199 lines
6.6 KiB
Bash
Executable file
199 lines
6.6 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# runInMameCycles.sh — measure emulated CPU time between START / DONE
|
|
# markers via MAME's emu.time(), or sample PC for function-attribution
|
|
# profiling.
|
|
#
|
|
# Two modes:
|
|
#
|
|
# runInMameCycles.sh <binary> <iters>
|
|
# Cycle-counting mode (default). Captures emu.time() at the
|
|
# START/DONE marker writes and reports cyc_per_call.
|
|
#
|
|
# runInMameCycles.sh <binary> <iters> --sample
|
|
# PC-sampling mode. In addition to cycle counting, registers
|
|
# emu.register_periodic to read the CPU PC at ~1ms simulated
|
|
# intervals between START and DONE, accumulating per-PC hit
|
|
# counts. Output adds `SAMPLE 0xPC N` lines (one per unique
|
|
# PC observed) plus `SAMPLES total=N` summary. Consumed by
|
|
# scripts/profile.sh which joins against a link816 --map to
|
|
# produce a (function, hits, hits%) attribution table.
|
|
#
|
|
# Optional flags (after the positional args):
|
|
# --clock-hz N Override CLOCK_HZ. Default 1023000 (IIgs slow
|
|
# mode, the rate the IIgs CPU starts at — we boot
|
|
# the binary without ROM init so we stay slow
|
|
# unless the binary itself writes $80 to $C036).
|
|
# --fast-mode Shortcut for --clock-hz 2864000 (IIgs fast mode,
|
|
# 2.8 MHz). Use when the binary explicitly enables
|
|
# fast mode OR when running through GS/OS which
|
|
# defaults to fast.
|
|
#
|
|
# The binary MUST:
|
|
# 1. Switch DBR to bank 2 (so the marker writes are observable
|
|
# at $025000 / $025002 — bank 0 there is also fine but harder
|
|
# to find atomically).
|
|
# 2. Write 0xA1A1 to $025000 *immediately before* the bench loop.
|
|
# 3. Write 0xA2A2 to $025002 *immediately after* the bench loop.
|
|
# 4. while(1){} after the DONE marker.
|
|
#
|
|
# Output (stdout) in both modes:
|
|
# MAME-CYCLES iters=N delta_us=... cyc_per_call=... ...
|
|
# --sample mode additionally emits SAMPLE / SAMPLES lines.
|
|
# Exit 0 on success, 1 on time-out / missing markers.
|
|
|
|
set -euo pipefail
|
|
source "$(dirname "$0")/common.sh"
|
|
|
|
if [ $# -lt 1 ]; then
|
|
die "usage: $0 <binary> [<iters>] [--sample] [--clock-hz N|--fast-mode]"
|
|
fi
|
|
|
|
BIN="$1"
|
|
shift
|
|
ITERS=100
|
|
SAMPLE_MODE=0
|
|
# Default to IIgs slow mode (1.023 MHz). Profile users probing GS/OS
|
|
# demos via --fast-mode get 2864000 Hz.
|
|
CLOCK_HZ=1023000
|
|
SECS=30
|
|
|
|
# Consume positional iters arg if it's a bare number.
|
|
if [ $# -ge 1 ] && [[ "$1" =~ ^[0-9]+$ ]]; then
|
|
ITERS="$1"
|
|
shift
|
|
fi
|
|
|
|
while [ $# -gt 0 ]; do
|
|
case "$1" in
|
|
--sample)
|
|
SAMPLE_MODE=1
|
|
shift
|
|
;;
|
|
--clock-hz)
|
|
shift
|
|
[ $# -ge 1 ] || die "--clock-hz needs a value"
|
|
CLOCK_HZ="$1"
|
|
shift
|
|
;;
|
|
--fast-mode)
|
|
CLOCK_HZ=2864000
|
|
shift
|
|
;;
|
|
--secs)
|
|
shift
|
|
[ $# -ge 1 ] || die "--secs needs a value"
|
|
SECS="$1"
|
|
shift
|
|
;;
|
|
*)
|
|
die "unknown option '$1'"
|
|
;;
|
|
esac
|
|
done
|
|
|
|
[ -f "$BIN" ] || die "binary not found: $BIN"
|
|
|
|
LUA_PATH=$(mktemp --suffix=.lua)
|
|
trap 'rm -f "$LUA_PATH"' EXIT
|
|
|
|
cat > "$LUA_PATH" <<EOF
|
|
local frame = 0
|
|
local loaded = false
|
|
local start_t = nil
|
|
local done_t = nil
|
|
local sampling = $SAMPLE_MODE
|
|
local sample_count = 0
|
|
local samples = {}
|
|
|
|
emu.register_frame_done(function()
|
|
frame = frame + 1
|
|
local cpu = manager.machine.devices[":maincpu"]
|
|
local mem = cpu.spaces["program"]
|
|
|
|
if frame == 30 and not loaded then
|
|
local f = io.open("$BIN", "rb")
|
|
if not f then print("BIN-MISSING"); manager.machine:exit(); return end
|
|
local data = f:read("*all"); f:close()
|
|
for i = 1, #data do
|
|
local addr = 0x001000 + i - 1
|
|
if not (addr >= 0x00C000 and addr < 0x00D000) then
|
|
mem:write_u8(addr, data:byte(i))
|
|
end
|
|
end
|
|
loaded = true
|
|
cpu.state["PC"].value = 0x1000
|
|
cpu.state["PB"].value = 0x00
|
|
cpu.state["DB"].value = 0x00
|
|
cpu.state["D"].value = 0x00
|
|
cpu.state["P"].value = 0x34
|
|
cpu.state["E"].value = 0
|
|
cpu.state["S"].value = 0x01FF
|
|
print("MAME-LOADED bytes=" .. #data)
|
|
return
|
|
end
|
|
|
|
if not loaded then return end
|
|
|
|
-- Poll markers on every frame after load. Capture emu.time()
|
|
-- the first frame each marker appears.
|
|
if not start_t and mem:read_u16(0x025000) == 0xa1a1 then
|
|
start_t = emu.time()
|
|
print(string.format("MAME-MARK START frame=%d t=%.9f", frame, start_t))
|
|
end
|
|
if start_t and not done_t and mem:read_u16(0x025002) == 0xa2a2 then
|
|
done_t = emu.time()
|
|
print(string.format("MAME-MARK DONE frame=%d t=%.9f", frame, done_t))
|
|
local delta = done_t - start_t
|
|
local delta_us = delta * 1e6
|
|
local cyc = delta * $CLOCK_HZ
|
|
local per_call = cyc / $ITERS
|
|
print(string.format("MAME-CYCLES iters=$ITERS delta_us=%.3f total_cyc=%.0f cyc_per_call=%.2f",
|
|
delta_us, cyc, per_call))
|
|
if sampling == 1 then
|
|
print(string.format("SAMPLES total=%d", sample_count))
|
|
for pc, n in pairs(samples) do
|
|
print(string.format("SAMPLE 0x%06x %d", pc, n))
|
|
end
|
|
end
|
|
manager.machine:exit()
|
|
end
|
|
end)
|
|
|
|
-- Periodic PC sampler. Fires on a simulated-time schedule that the
|
|
-- MAME core resolves to ~1ms intervals (precise rate depends on MAME's
|
|
-- scheduler granularity). We accumulate per-PC hit counts between the
|
|
-- START and DONE markers; samples taken before START or after DONE are
|
|
-- ignored. Captures the 24-bit (PB:PC) PC so multi-bank code attributes
|
|
-- correctly. Per the reviewer revision, attribution downstream uses
|
|
-- (hits, hits%) — NOT emu.time() weighting — so each callback contributes
|
|
-- exactly one count regardless of the inter-sample interval.
|
|
if sampling == 1 then
|
|
emu.register_periodic(function()
|
|
if not start_t or done_t then return end
|
|
local cpu = manager.machine.devices[":maincpu"]
|
|
local pc = cpu.state["PC"].value
|
|
local pb = cpu.state["PB"].value
|
|
local full = (pb * 0x10000) + pc
|
|
samples[full] = (samples[full] or 0) + 1
|
|
sample_count = sample_count + 1
|
|
end)
|
|
end
|
|
EOF
|
|
|
|
if [ "$SAMPLE_MODE" = "1" ]; then
|
|
GREP_PAT="^MAME-|^SAMPLE"
|
|
else
|
|
GREP_PAT="^MAME-"
|
|
fi
|
|
|
|
OUT=$(SDL_VIDEODRIVER=dummy SDL_AUDIODRIVER=dummy timeout 90 mame apple2gs \
|
|
-rompath "$PROJECT_ROOT/tools/mame/roms" \
|
|
-plugins -autoboot_script "$LUA_PATH" \
|
|
-video none -sound none -nothrottle -seconds_to_run "$SECS" 2>&1 | grep -E "$GREP_PAT")
|
|
|
|
echo "$OUT"
|
|
if echo "$OUT" | grep -q "MAME-CYCLES"; then
|
|
exit 0
|
|
fi
|
|
warn "no MAME-CYCLES output — markers not observed within $SECS sec"
|
|
exit 1
|