# RT-bench: after RT-8a (api_t hot/cold field reorder).
# Captured against baseline-pre-rt4.txt.
#
# What changed: api_t fields reorganised so the eight pointers
# touched on every CALL / MCALL / alloc / write-barrier / O_AGE
# fit on the first cache line of the global `api_g`.  Pure layout
# change -- no semantic / codegen impact, drift stable.
#
# Delta vs pre-rt4 (median of 3 runs):

[bn_loop]
count    30000000:  17 -> 14   ns/op  (-18%)
xor      30000000:  17 -> 14   ns/op  (-18%)

[bn_arith]
mix1      5000000:  21 -> 16   ns/op  (-24%)
mix2      5000000:  24 -> 18   ns/op  (-25%)
bits      5000000:  19 -> 16   ns/op  (-16%)

[bn_branch]
alt       5000000:  26 -> 21   ns/op  (-19%)
tri3      5000000:  45 -> 36   ns/op  (-20%)

[bn_call]
1arg      2000000:  38 -> 32   ns/op  (-16%)
2arg      2000000:  40 -> 34   ns/op  (-15%)

[bn_mcall]
negneg    1000000:  43 -> 32   ns/op  (-26%)
abs       1000000:  40 -> 29   ns/op  (-27%)

[bn_list]
iget      2000000:  36 -> 29   ns/op  (-19%)
iset      2000000:  18 -> 16   ns/op  (-11%)
lsize     2000000:  26 -> 23   ns/op  (-12%)

[bn_gc.wbarrier]
wbarrier   500000:  48 -> 44   ns/op  (-8%)

[bn_gc.megamcall]
megamcall  500000:  60 -> 52   ns/op  (-13%)

[bn_gc.closure]
closure    500000:  395 -> 397 ns/op  (unchanged; closure
                                        bottleneck is hooks_heap,
                                        not api_t -- RT-5 target)

[bn_gc.deepgc]
deepgc        200:  ~0.9-1.2 ms -> ~0.85-1.0 ms per gc()
                                        (small win from frame-walk
                                        touching api.frame in the
                                        hot line)

# Headline:
# - Dispatch-bound microbenchmarks: -15..27% across the board.
# - Specifically the .negneg / .abs MCALL path (-26-27%) benefits
#   most because MCALL writes api.method on every call -- pre-RT-8a
#   that was line 0 byte 16 while api.frame/args/hgp were also
#   line 0; the eviction was just whatever else got pulled in.
#   With api.method, args, frame, hgp, theap0, pgmod, puwh all on
#   line 0, the MCALL dispatch sequence reads/writes one line
#   instead of cross-line touches.
# - Write-barrier (wbarrier) -8% because the pgmod/theap0 pair was
#   already adjacent in the original layout; the win is just from
#   keeping them with api.frame on the same line.
