65816-llvm-mos/scripts/runInMameCycles.sh
Scott Duensing da095402ec Updated
2026-06-02 23:17:57 -05:00

199 lines
6.6 KiB
Bash
Executable file

#!/usr/bin/env bash
# runInMameCycles.sh — measure emulated CPU time between START / DONE
# markers via MAME's emu.time(), or sample PC for function-attribution
# profiling.
#
# Two modes:
#
# runInMameCycles.sh <binary> <iters>
# Cycle-counting mode (default). Captures emu.time() at the
# START/DONE marker writes and reports cyc_per_call.
#
# runInMameCycles.sh <binary> <iters> --sample
# PC-sampling mode. In addition to cycle counting, registers
# emu.register_periodic to read the CPU PC at ~1ms simulated
# intervals between START and DONE, accumulating per-PC hit
# counts. Output adds `SAMPLE 0xPC N` lines (one per unique
# PC observed) plus `SAMPLES total=N` summary. Consumed by
# scripts/profile.sh which joins against a link816 --map to
# produce a (function, hits, hits%) attribution table.
#
# Optional flags (after the positional args):
# --clock-hz N Override CLOCK_HZ. Default 1023000 (IIgs slow
# mode, the rate the IIgs CPU starts at — we boot
# the binary without ROM init so we stay slow
# unless the binary itself writes $80 to $C036).
# --fast-mode Shortcut for --clock-hz 2864000 (IIgs fast mode,
# 2.8 MHz). Use when the binary explicitly enables
# fast mode OR when running through GS/OS which
# defaults to fast.
#
# The binary MUST:
# 1. Switch DBR to bank 2 (so the marker writes are observable
# at $025000 / $025002 — bank 0 there is also fine but harder
# to find atomically).
# 2. Write 0xA1A1 to $025000 *immediately before* the bench loop.
# 3. Write 0xA2A2 to $025002 *immediately after* the bench loop.
# 4. while(1){} after the DONE marker.
#
# Output (stdout) in both modes:
# MAME-CYCLES iters=N delta_us=... cyc_per_call=... ...
# --sample mode additionally emits SAMPLE / SAMPLES lines.
# Exit 0 on success, 1 on time-out / missing markers.
set -euo pipefail
source "$(dirname "$0")/common.sh"
if [ $# -lt 1 ]; then
die "usage: $0 <binary> [<iters>] [--sample] [--clock-hz N|--fast-mode]"
fi
BIN="$1"
shift
ITERS=100
SAMPLE_MODE=0
# Default to IIgs slow mode (1.023 MHz). Profile users probing GS/OS
# demos via --fast-mode get 2864000 Hz.
CLOCK_HZ=1023000
SECS=30
# Consume positional iters arg if it's a bare number.
if [ $# -ge 1 ] && [[ "$1" =~ ^[0-9]+$ ]]; then
ITERS="$1"
shift
fi
while [ $# -gt 0 ]; do
case "$1" in
--sample)
SAMPLE_MODE=1
shift
;;
--clock-hz)
shift
[ $# -ge 1 ] || die "--clock-hz needs a value"
CLOCK_HZ="$1"
shift
;;
--fast-mode)
CLOCK_HZ=2864000
shift
;;
--secs)
shift
[ $# -ge 1 ] || die "--secs needs a value"
SECS="$1"
shift
;;
*)
die "unknown option '$1'"
;;
esac
done
[ -f "$BIN" ] || die "binary not found: $BIN"
LUA_PATH=$(mktemp --suffix=.lua)
trap 'rm -f "$LUA_PATH"' EXIT
cat > "$LUA_PATH" <<EOF
local frame = 0
local loaded = false
local start_t = nil
local done_t = nil
local sampling = $SAMPLE_MODE
local sample_count = 0
local samples = {}
emu.register_frame_done(function()
frame = frame + 1
local cpu = manager.machine.devices[":maincpu"]
local mem = cpu.spaces["program"]
if frame == 30 and not loaded then
local f = io.open("$BIN", "rb")
if not f then print("BIN-MISSING"); manager.machine:exit(); return end
local data = f:read("*all"); f:close()
for i = 1, #data do
local addr = 0x001000 + i - 1
if not (addr >= 0x00C000 and addr < 0x00D000) then
mem:write_u8(addr, data:byte(i))
end
end
loaded = true
cpu.state["PC"].value = 0x1000
cpu.state["PB"].value = 0x00
cpu.state["DB"].value = 0x00
cpu.state["D"].value = 0x00
cpu.state["P"].value = 0x34
cpu.state["E"].value = 0
cpu.state["S"].value = 0x01FF
print("MAME-LOADED bytes=" .. #data)
return
end
if not loaded then return end
-- Poll markers on every frame after load. Capture emu.time()
-- the first frame each marker appears.
if not start_t and mem:read_u16(0x025000) == 0xa1a1 then
start_t = emu.time()
print(string.format("MAME-MARK START frame=%d t=%.9f", frame, start_t))
end
if start_t and not done_t and mem:read_u16(0x025002) == 0xa2a2 then
done_t = emu.time()
print(string.format("MAME-MARK DONE frame=%d t=%.9f", frame, done_t))
local delta = done_t - start_t
local delta_us = delta * 1e6
local cyc = delta * $CLOCK_HZ
local per_call = cyc / $ITERS
print(string.format("MAME-CYCLES iters=$ITERS delta_us=%.3f total_cyc=%.0f cyc_per_call=%.2f",
delta_us, cyc, per_call))
if sampling == 1 then
print(string.format("SAMPLES total=%d", sample_count))
for pc, n in pairs(samples) do
print(string.format("SAMPLE 0x%06x %d", pc, n))
end
end
manager.machine:exit()
end
end)
-- Periodic PC sampler. Fires on a simulated-time schedule that the
-- MAME core resolves to ~1ms intervals (precise rate depends on MAME's
-- scheduler granularity). We accumulate per-PC hit counts between the
-- START and DONE markers; samples taken before START or after DONE are
-- ignored. Captures the 24-bit (PB:PC) PC so multi-bank code attributes
-- correctly. Per the reviewer revision, attribution downstream uses
-- (hits, hits%) — NOT emu.time() weighting — so each callback contributes
-- exactly one count regardless of the inter-sample interval.
if sampling == 1 then
emu.register_periodic(function()
if not start_t or done_t then return end
local cpu = manager.machine.devices[":maincpu"]
local pc = cpu.state["PC"].value
local pb = cpu.state["PB"].value
local full = (pb * 0x10000) + pc
samples[full] = (samples[full] or 0) + 1
sample_count = sample_count + 1
end)
end
EOF
if [ "$SAMPLE_MODE" = "1" ]; then
GREP_PAT="^MAME-|^SAMPLE"
else
GREP_PAT="^MAME-"
fi
OUT=$(SDL_VIDEODRIVER=dummy SDL_AUDIODRIVER=dummy timeout 90 mame apple2gs \
-rompath "$PROJECT_ROOT/tools/mame/roms" \
-plugins -autoboot_script "$LUA_PATH" \
-video none -sound none -nothrottle -seconds_to_run "$SECS" 2>&1 | grep -E "$GREP_PAT")
echo "$OUT"
if echo "$OUT" | grep -q "MAME-CYCLES"; then
exit 0
fi
warn "no MAME-CYCLES output — markers not observed within $SECS sec"
exit 1