#!/usr/bin/env bash # runInMameCycles.sh — measure emulated CPU time between START / DONE # markers via MAME's emu.time(), or sample PC for function-attribution # profiling. # # Two modes: # # runInMameCycles.sh # Cycle-counting mode (default). Captures emu.time() at the # START/DONE marker writes and reports cyc_per_call. # # runInMameCycles.sh --sample # PC-sampling mode. In addition to cycle counting, registers # emu.register_periodic to read the CPU PC at ~1ms simulated # intervals between START and DONE, accumulating per-PC hit # counts. Output adds `SAMPLE 0xPC N` lines (one per unique # PC observed) plus `SAMPLES total=N` summary. Consumed by # scripts/profile.sh which joins against a link816 --map to # produce a (function, hits, hits%) attribution table. # # Optional flags (after the positional args): # --clock-hz N Override CLOCK_HZ. Default 1023000 (IIgs slow # mode, the rate the IIgs CPU starts at — we boot # the binary without ROM init so we stay slow # unless the binary itself writes $80 to $C036). # --fast-mode Shortcut for --clock-hz 2864000 (IIgs fast mode, # 2.8 MHz). Use when the binary explicitly enables # fast mode OR when running through GS/OS which # defaults to fast. # # The binary MUST: # 1. Switch DBR to bank 2 (so the marker writes are observable # at $025000 / $025002 — bank 0 there is also fine but harder # to find atomically). # 2. Write 0xA1A1 to $025000 *immediately before* the bench loop. # 3. Write 0xA2A2 to $025002 *immediately after* the bench loop. # 4. while(1){} after the DONE marker. # # Output (stdout) in both modes: # MAME-CYCLES iters=N delta_us=... cyc_per_call=... ... # --sample mode additionally emits SAMPLE / SAMPLES lines. # Exit 0 on success, 1 on time-out / missing markers. set -euo pipefail source "$(dirname "$0")/common.sh" if [ $# -lt 1 ]; then die "usage: $0 [] [--sample] [--clock-hz N|--fast-mode]" fi BIN="$1" shift ITERS=100 SAMPLE_MODE=0 # Default to IIgs slow mode (1.023 MHz). Profile users probing GS/OS # demos via --fast-mode get 2864000 Hz. CLOCK_HZ=1023000 SECS=30 # Consume positional iters arg if it's a bare number. if [ $# -ge 1 ] && [[ "$1" =~ ^[0-9]+$ ]]; then ITERS="$1" shift fi while [ $# -gt 0 ]; do case "$1" in --sample) SAMPLE_MODE=1 shift ;; --clock-hz) shift [ $# -ge 1 ] || die "--clock-hz needs a value" CLOCK_HZ="$1" shift ;; --fast-mode) CLOCK_HZ=2864000 shift ;; --secs) shift [ $# -ge 1 ] || die "--secs needs a value" SECS="$1" shift ;; *) die "unknown option '$1'" ;; esac done [ -f "$BIN" ] || die "binary not found: $BIN" LUA_PATH=$(mktemp --suffix=.lua) trap 'rm -f "$LUA_PATH"' EXIT cat > "$LUA_PATH" <= 0x00C000 and addr < 0x00D000) then mem:write_u8(addr, data:byte(i)) end end loaded = true cpu.state["PC"].value = 0x1000 cpu.state["PB"].value = 0x00 cpu.state["DB"].value = 0x00 cpu.state["D"].value = 0x00 cpu.state["P"].value = 0x34 cpu.state["E"].value = 0 cpu.state["S"].value = 0x01FF print("MAME-LOADED bytes=" .. #data) return end if not loaded then return end -- Poll markers on every frame after load. Capture emu.time() -- the first frame each marker appears. if not start_t and mem:read_u16(0x025000) == 0xa1a1 then start_t = emu.time() print(string.format("MAME-MARK START frame=%d t=%.9f", frame, start_t)) end if start_t and not done_t and mem:read_u16(0x025002) == 0xa2a2 then done_t = emu.time() print(string.format("MAME-MARK DONE frame=%d t=%.9f", frame, done_t)) local delta = done_t - start_t local delta_us = delta * 1e6 local cyc = delta * $CLOCK_HZ local per_call = cyc / $ITERS print(string.format("MAME-CYCLES iters=$ITERS delta_us=%.3f total_cyc=%.0f cyc_per_call=%.2f", delta_us, cyc, per_call)) if sampling == 1 then print(string.format("SAMPLES total=%d", sample_count)) for pc, n in pairs(samples) do print(string.format("SAMPLE 0x%06x %d", pc, n)) end end manager.machine:exit() end end) -- Periodic PC sampler. Fires on a simulated-time schedule that the -- MAME core resolves to ~1ms intervals (precise rate depends on MAME's -- scheduler granularity). We accumulate per-PC hit counts between the -- START and DONE markers; samples taken before START or after DONE are -- ignored. Captures the 24-bit (PB:PC) PC so multi-bank code attributes -- correctly. Per the reviewer revision, attribution downstream uses -- (hits, hits%) — NOT emu.time() weighting — so each callback contributes -- exactly one count regardless of the inter-sample interval. if sampling == 1 then emu.register_periodic(function() if not start_t or done_t then return end local cpu = manager.machine.devices[":maincpu"] local pc = cpu.state["PC"].value local pb = cpu.state["PB"].value local full = (pb * 0x10000) + pc samples[full] = (samples[full] or 0) + 1 sample_count = sample_count + 1 end) end EOF if [ "$SAMPLE_MODE" = "1" ]; then GREP_PAT="^MAME-|^SAMPLE" else GREP_PAT="^MAME-" fi OUT=$(SDL_VIDEODRIVER=dummy SDL_AUDIODRIVER=dummy timeout 90 mame apple2gs \ -rompath "$PROJECT_ROOT/tools/mame/roms" \ -plugins -autoboot_script "$LUA_PATH" \ -video none -sound none -nothrottle -seconds_to_run "$SECS" 2>&1 | grep -E "$GREP_PAT") echo "$OUT" if echo "$OUT" | grep -q "MAME-CYCLES"; then exit 0 fi warn "no MAME-CYCLES output — markers not observed within $SECS sec" exit 1