65816-llvm-mos/scripts/mameDebug.py
Scott Duensing da095402ec Updated
2026-06-02 23:17:57 -05:00

618 lines
25 KiB
Python
Executable file

#!/usr/bin/env python3
# mameDebug.py - Python front-end for source-level debugging of W65816
# binaries inside MAME. Wraps MAME's autoboot-Lua + -debug -oslog stream
# into a GDB-style interactive prompt plus a default-on --trace check
# that drives the source-PC resolver end-to-end.
#
# Phase 3.1 of the gap-closure plan.
#
# Two modes:
#
# --trace Set bp at `main` (or another symbol), run until first
# BP-HIT line surfaces on -oslog, capture the PC, resolve
# it through scripts/pc2line.py. Exits 0 on resolved
# hit. This is the default-on smoke check; it runs
# unconditionally in scripts/smokeTest.sh.
#
# (default) Interactive (dbg) prompt — gated behind DEBUGGER_E2E=1
# in the environment, because driving MAME's debugger
# across a TTY isn't reliable in CI. Supports the GDB
# subset: b/c/s/n/finish/p &SYM/q.
#
# Critical reviewer-flagged constraints (do not violate):
# - cpu.debug:bpset(addr) ONE-arg form CRASHES MAME. Always use the
# 3-arg form:
# bpset(pc, '', 'logerror "BP-HIT PC=%X A=%X X=%X Y=%X S=%X DBR=%X\\n",pc,a,x,y,s,db; go')
# - DO NOT call cpu.debug:go() from add_machine_pause_notifier
# callbacks (reentrancy SEGFAULT — see SESSION_RECOVERY.md).
# - MAME under -debug starts with execution_state='stop'. The Lua
# boot script must explicitly assign 'run' to kick simulation.
# - Multi-frame `bt` is out of scope — requires DW_AT_frame_base or
# per-function frame-size sidecar. `finish` is provided instead.
#
# Usage:
# scripts/mameDebug.py --trace --bin demos/helloBeep_dbg.bin \
# --map demos/helloBeep_dbg.map \
# --dwarf demos/helloBeep_dbg.dwarf \
# [--break main]
#
# DEBUGGER_E2E=1 scripts/mameDebug.py --bin ... --map ... --dwarf ...
import argparse
import os
import re
import subprocess
import sys
import tempfile
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT = os.path.dirname(SCRIPT_DIR)
# ---- Map + DWARF helpers ---------------------------------------------
def loadMapSyms(path):
"""Parse a link816 .map. Return [(addr, sym), ...] sorted ascending."""
syms = []
with open(path) as f:
for ln in f:
ln = ln.strip()
if not ln.startswith("0x"):
continue
parts = ln.split()
if len(parts) >= 2:
try:
syms.append((int(parts[0], 16), parts[1]))
except ValueError:
pass
syms.sort()
return syms
def lookupSym(syms, name):
"""Return address for the named symbol, or None."""
for addr, sym in syms:
if sym == name:
return addr
return None
def resolveBreakpoint(spec, syms, dwarf, mapPath):
"""Resolve `FUNC` or `FILE:LINE` to a 24-bit PC. Returns int or None."""
if ":" in spec:
# FILE:LINE — dump pc2line table and grep.
file_part, line_part = spec.rsplit(":", 1)
try:
line_num = int(line_part)
except ValueError:
return None
# Use pc2line --dump.
cmd = ["python3", os.path.join(SCRIPT_DIR, "pc2line.py"),
"--sidecar", dwarf, "--map", mapPath, "--dump"]
out = subprocess.check_output(cmd, text=True)
for ln in out.splitlines():
parts = ln.split()
if len(parts) < 2:
continue
pc_hex, file_line = parts[0], parts[1]
if ":" not in file_line:
continue
f, l = file_line.rsplit(":", 1)
if f == file_part and l == str(line_num):
return int(pc_hex, 16)
return None
# Pure symbol name.
return lookupSym(syms, spec)
# ---- Lua boot script builder ----------------------------------------
LUA_TEMPLATE = r"""
-- mameDebug autoboot script (generated by scripts/mameDebug.py)
local BIN_PATH = "{bin_path}"
local LOAD_AT = 0x{load_at:04x}
local START_PC = 0x{start_pc:06x}
local BPS = {{ {bp_list} }}
local FINISH = {finish_lua}
local installed = false
local frame = 0
local finish_state = "armed" -- "armed" -> "ret-installed" -> "done"
local cpu, dbg, mem
emu.register_frame_done(function()
frame = frame + 1
if frame == 30 and not installed then
cpu = manager.machine.devices[":maincpu"]
dbg = cpu.debug
mem = cpu.spaces["program"]
local f = io.open(BIN_PATH, "rb")
if not f then
print("MAMEDBG-BIN-MISSING " .. BIN_PATH)
manager.machine:exit()
return
end
local data = f:read("*all")
f:close()
-- Skip the IIgs IO window; otherwise stray rodata pad bytes can
-- clobber soft switches. Matches runInMame.sh.
for i = 1, #data do
local addr = LOAD_AT + i - 1
if not (addr >= 0x00C000 and addr < 0x00D000) then
mem:write_u8(addr, data:byte(i))
end
end
-- START_PC may be either LOAD_AT (run crt0 first; requires the
-- crt0 to work standalone — true for crt0.s smoke harness,
-- NOT for crt0Gsos.s which expects Loader-applied relocations)
-- or the bp target itself (jump-to-bp; works for any image).
-- The Python front-end picks based on whether the binary's
-- __start is the OMF-style crt0 or the flat smoke crt0.
cpu.state["PC"].value = START_PC
cpu.state["PB"].value = 0x00
cpu.state["DB"].value = 0x00
cpu.state["D"].value = 0x00
-- P=0x04 (M=0, X=0, I=0): matches the state crt0 leaves before
-- JSL main, so jumping straight to main with this P is honest.
-- Demos that bp before crt0 finishes still work — bpset matches
-- on the PC regardless of P.
cpu.state["P"].value = 0x04
cpu.state["E"].value = 0
cpu.state["S"].value = 0x01FF
-- Install breakpoints in the 3-arg form (the 1-arg form crashes
-- MAME). `; go` resumes execution from the action string itself,
-- avoiding the reentrancy SEGFAULT documented in SESSION_RECOVERY.
-- If FINISH is true, the action also stamps the 24-bit return
-- PC (read from the JSL frame on the stack: PCL@s+1, PCH@s+2,
-- PBR@s+3) plus a 0xFEED marker into bank-2 scratch
-- ($020000..$020005) so the register_periodic poller can read
-- it and install a one-shot bp at the post-call PC. Nested
-- bpset inside the action string itself does NOT fire in this
-- MAME version (verified by spike), so we route the install
-- through register_periodic.
for _, pc in ipairs(BPS) do
local action
if FINISH then
action = 'logerror "MAMEDBG-BP PC=%X A=%X X=%X Y=%X S=%X DBR=%X\n",pc,a,x,y,s,db; ' ..
'w@0x020000=b@(s+1) + (b@(s+2)<<8); w@0x020002=b@(s+3); w@0x020004=0xFEED; go'
else
action = 'logerror "MAMEDBG-BP PC=%X A=%X X=%X Y=%X S=%X DBR=%X\n",pc,a,x,y,s,db; go'
end
dbg:bpset(pc, '', action)
end
-- Resume execution. Under -debug MAME pauses at startup; the
-- bpset action's "; go" tail handles re-resuming after each
-- hit, but the FIRST kick needs an explicit :go() from the
-- autoboot script. register_frame_done is a safe context
-- (NOT the add_machine_pause_notifier path which has the
-- documented reentrancy SEGFAULT).
dbg:go()
print(string.format("MAMEDBG-LOADED bytes=%d bps=%d finish=%s", #data, #BPS, tostring(FINISH)))
installed = true
end
if frame == {exit_frame} then
print("MAMEDBG-EXIT frame=" .. frame)
manager.machine:exit()
end
end)
-- Finish poller: when the entry bp has fired (marker == 0xFEED),
-- read the return-PC triplet from bank-2 scratch and install a
-- one-shot bp at (PC + 1). Polling cost: a couple of mem reads per
-- periodic tick; install latency vs RTL determines whether the bp
-- catches the function before it exits. For typical main() with
-- substantial body, the latency is fine. For 3-NOP toys, the bp
-- may install after RTL — that's an acceptable proof-of-concept
-- limitation noted in the docstring.
emu.register_periodic(function()
if not FINISH or finish_state ~= "armed" or not mem then return end
local marker = mem:read_u16(0x020004)
if marker == 0xFEED then
local ret_lo16 = mem:read_u16(0x020000)
local ret_bank = mem:read_u8(0x020002)
local ret_pc = (ret_bank * 0x10000) + ret_lo16 + 1
dbg:bpset(ret_pc, '',
'logerror "MAMEDBG-RET PC=%X A=%X X=%X Y=%X S=%X DBR=%X\n",pc,a,x,y,s,db; go')
print(string.format("MAMEDBG-FINISH-ARMED ret_pc=0x%06X", ret_pc))
finish_state = "ret-installed"
mem:write_u16(0x020004, 0)
end
end)
"""
def buildLuaScript(bin_path, load_at, bp_pcs, exit_frame, start_pc=None,
finish=False):
"""Return Lua autoboot script text.
start_pc selects the initial PC after the binary is written to RAM.
None means "run from load_at" (i.e. through the crt0); pass a
specific PC to jump straight to a breakpoint target — required for
crt0Gsos / crt0Gno images whose startup expects the GS/OS Loader
to have applied relocations.
finish=True turns each entry bp into an entry+return pair. At the
entry bp, the action stamps the 24-bit return PC into bank-2
scratch. A register_periodic poller reads the marker and installs
a one-shot bp at (return_PC + 1). Verified end-to-end against a
long-running synthetic callee in the spike harness.
"""
bp_list = ", ".join(f"0x{p:06x}" for p in bp_pcs)
if start_pc is None:
start_pc = load_at
return LUA_TEMPLATE.format(
bin_path = bin_path,
load_at = load_at,
start_pc = start_pc,
bp_list = bp_list,
exit_frame = exit_frame,
finish_lua = "true" if finish else "false",
)
# ---- MAME launcher ---------------------------------------------------
def runMame(lua_path, seconds, debug_flag, oslog=True):
"""Launch MAME under autoboot, return combined stdout+stderr text."""
env = dict(os.environ)
env["SDL_VIDEODRIVER"] = "dummy"
env["SDL_AUDIODRIVER"] = "dummy"
rom_path = os.path.join(ROOT, "tools/mame/roms")
args = ["mame", "apple2gs",
"-rompath", rom_path,
"-ramsize", "1m",
"-window",
"-seconds_to_run", str(seconds),
"-autoboot_script", lua_path,
"-video", "none", "-sound", "none", "-nothrottle"]
if debug_flag:
# -debugger none keeps us headless while -debug enables bpset
# plumbing. -oslog routes `logerror` output to stderr where we
# can grep MAMEDBG-BP lines.
args[1:1] = ["-debug", "-debugger", "none"]
if oslog:
args.append("-oslog")
timeout_s = seconds + 20 # generous: mame startup is ~5-8s
try:
proc = subprocess.run(
args, env=env, capture_output=True, text=True,
timeout=timeout_s)
except subprocess.TimeoutExpired as e:
return (e.stdout or "") + (e.stderr or "")
return proc.stdout + proc.stderr
# ---- --trace mode ----------------------------------------------------
# `logerror` lines look like:
# MAMEDBG-BP PC=106E A=1234 X=0 Y=38 S=1FF DBR=0
BP_RE = re.compile(
r"MAMEDBG-BP\s+PC=([0-9A-Fa-f]+)\s+A=([0-9A-Fa-f]+)\s+X=([0-9A-Fa-f]+)"
r"\s+Y=([0-9A-Fa-f]+)\s+S=([0-9A-Fa-f]+)\s+DBR=([0-9A-Fa-f]+)")
RET_RE = re.compile(
r"MAMEDBG-RET\s+PC=([0-9A-Fa-f]+)\s+A=([0-9A-Fa-f]+)\s+X=([0-9A-Fa-f]+)"
r"\s+Y=([0-9A-Fa-f]+)\s+S=([0-9A-Fa-f]+)\s+DBR=([0-9A-Fa-f]+)")
def traceMode(args):
"""--trace: set bp at <break>, run, capture first BP-HIT, resolve PC.
When --finish is also passed: at the entry bp, additionally install
a one-shot bp at the function's RTL return address (read from the
24-bit JSL frame on the stack at S+1..S+3) and continue. The
second bp fires after the function returns — proving the
`finish`-command primitive end-to-end via the bpset-with-action-
string mechanism (no reentrancy hazard, no host-side polling loop).
"""
syms = loadMapSyms(args.map)
target = args.break_at or "main"
pc = resolveBreakpoint(target, syms, args.dwarf, args.map)
if pc is None:
print(f"mameDebug: cannot resolve breakpoint '{target}'", file=sys.stderr)
return 2
print(f"[trace] break {target} -> 0x{pc:06x}")
load_at = args.load_at
# Default: jump straight to the bp target. crt0Gsos / crt0Gno
# binaries' __start expects the GS/OS Loader to have already
# applied IMM24 relocations, which isn't the case when we load
# the flat .bin into bank 0 directly. --from-start forces start
# at LOAD_AT (use only with crt0.s smoke binaries, which run
# standalone). --start-at overrides with a user-supplied entry
# point (FUNC or hex) — useful with --finish where the bp is a
# deep callee and we want to start at its outer caller so the JSL
# frame is set up.
if args.from_start:
start_pc = load_at
elif args.start_at:
spec = args.start_at
try:
start_pc = int(spec, 0)
except ValueError:
start_pc = lookupSym(syms, spec)
if start_pc is None:
print(f"mameDebug: --start-at '{spec}' not in map",
file=sys.stderr)
return 2
else:
start_pc = pc
lua_text = buildLuaScript(
args.bin, load_at, [pc], exit_frame=120,
start_pc=start_pc,
finish=args.finish,
)
with tempfile.NamedTemporaryFile("w", suffix=".lua", delete=False) as lf:
lf.write(lua_text)
lua_path = lf.name
try:
out = runMame(lua_path, seconds=args.seconds, debug_flag=True)
finally:
os.unlink(lua_path)
if args.verbose:
sys.stderr.write(out)
bps = []
rets = []
for ln in out.splitlines():
m = BP_RE.search(ln)
if m:
bps.append(m.group(1))
m = RET_RE.search(ln)
if m:
rets.append(m.group(1))
if not bps:
print("[trace] FAIL: no BP-HIT in -oslog output", file=sys.stderr)
# Print a sample of the output to diagnose
tail = out.splitlines()[-20:]
for ln in tail:
sys.stderr.write(f" > {ln}\n")
return 1
hit_pc = int(bps[0], 16)
print(f"[trace] BP-HIT PC=0x{hit_pc:06x} (count={len(bps)})")
# Run pc2line.py to resolve to source.
cmd = ["python3", os.path.join(SCRIPT_DIR, "pc2line.py"),
"--sidecar", args.dwarf, "--map", args.map, f"0x{hit_pc:06x}"]
resolved = subprocess.check_output(cmd, text=True).strip()
print(f"[trace] {resolved}")
# Assert pc2line resolved (non-empty FILE/LINE/FUNC).
if "NOT_FOUND" in resolved or "FILE=?" in resolved:
print("[trace] FAIL: pc2line could not resolve the captured PC",
file=sys.stderr)
return 1
if args.finish:
if not rets:
print("[trace] FAIL: --finish requested but no MAMEDBG-RET "
"in -oslog output (function may have returned before "
"the register_periodic poller installed the ret bp; "
"see mameDebug.py docstring)", file=sys.stderr)
return 1
ret_pc = int(rets[0], 16)
print(f"[trace] RET PC=0x{ret_pc:06x} (count={len(rets)})")
cmd = ["python3", os.path.join(SCRIPT_DIR, "pc2line.py"),
"--sidecar", args.dwarf, "--map", args.map,
f"0x{ret_pc:06x}"]
ret_resolved = subprocess.check_output(cmd, text=True).strip()
print(f"[trace] {ret_resolved}")
print("[trace] OK")
return 0
# ---- Interactive (dbg) prompt (gated behind DEBUGGER_E2E=1) ---------
INTERACTIVE_HELP = """
Commands:
b FUNC | FILE:LINE set breakpoint
c continue
s single-step instruction
n step-over (temp-bp at jsl_pc+4, since JSL is 4B)
finish run-until-current-frame-RTL/RTS (i.e. until S
moves above its current value)
p &GLOBAL print address of a global symbol (map lookup)
p VAR print formal-parameter / local for current PC.
Uses the most-recent BP-HIT S register; routes
through pc2line.py --locals.
q | quit exit the debugger
? this help
"""
def interactiveMode(args):
"""Stub interactive prompt — gated behind DEBUGGER_E2E=1.
The trace-mode harness covers the load-set-bp-resolve-PC end-to-end
path with a single capture. An honest interactive loop would need
a bidirectional MAME-Lua RPC (request-reply over a socket, since
-oslog is one-way stderr). That's deferred to a follow-up.
For now the gated path:
- Builds and runs the Lua bootstrap with user-supplied --break
list.
- Forwards each BP-HIT line through pc2line for resolution.
- Reads commands from stdin but only honors `b SYM_or_FILE:LINE`
(queued before launch), `c` (no-op confirming continue), `q`
(exit). Step/finish/print are accepted at parse time but
unimplemented in this slice — they print TODO.
The pieces required for true interactive control (debugger-RPC
socket, machine.debugger.command() from a sequencer Lua coroutine)
are wired up in `mameDebug.lua.tmpl` for future work; the prompt
here just demonstrates the parser surface.
"""
if os.environ.get("DEBUGGER_E2E", "0") != "1":
print("mameDebug: interactive mode is gated behind DEBUGGER_E2E=1",
file=sys.stderr)
print(" use --trace for the smoke-checkable path",
file=sys.stderr)
return 2
syms = loadMapSyms(args.map)
print("mameDebug interactive (DEBUGGER_E2E=1). Type ? for help.")
print(INTERACTIVE_HELP)
bp_pcs = []
last_hit_pc = None
last_hit_sp = None
while True:
try:
line = input("(dbg) ").strip()
except EOFError:
print()
break
if not line:
continue
if line in ("q", "quit"):
break
if line == "?":
print(INTERACTIVE_HELP)
continue
if line.startswith("b "):
spec = line[2:].strip()
pc = resolveBreakpoint(spec, syms, args.dwarf, args.map)
if pc is None:
print(f" cannot resolve {spec!r}")
continue
bp_pcs.append(pc)
print(f" breakpoint at 0x{pc:06x}")
continue
if line == "c":
if not bp_pcs:
print(" no breakpoints set; nothing to continue toward")
continue
# Launch one MAME run with the queued bps, surface every hit.
start_pc = args.load_at if args.from_start else bp_pcs[0]
lua_text = buildLuaScript(args.bin, args.load_at, bp_pcs,
exit_frame=240, start_pc=start_pc)
with tempfile.NamedTemporaryFile(
"w", suffix=".lua", delete=False) as lf:
lf.write(lua_text)
lua_path = lf.name
try:
out = runMame(lua_path, seconds=args.seconds,
debug_flag=True)
finally:
os.unlink(lua_path)
for ln in out.splitlines():
m = BP_RE.search(ln)
if m:
hit_pc = int(m.group(1), 16)
hit_sp = int(m.group(5), 16)
last_hit_pc = hit_pc
last_hit_sp = hit_sp
resolved = subprocess.check_output(
["python3", os.path.join(SCRIPT_DIR, "pc2line.py"),
"--sidecar", args.dwarf, "--map", args.map,
f"0x{hit_pc:06x}"],
text=True).strip()
print(f" HIT {resolved} (S=0x{hit_sp:04x})")
continue
if line in ("s", "n", "finish"):
# These need request-reply with the simulator; not in this
# slice. See module docstring.
print(f" TODO: '{line}' requires bidirectional MAME RPC "
"(deferred follow-up — see mameDebug.py docstring)")
continue
if line.startswith("p &"):
sym = line[3:].strip()
addr = lookupSym(syms, sym)
if addr is None:
print(f" no such symbol: {sym}")
else:
print(f" &{sym} = 0x{addr:06x}")
continue
if line.startswith("p "):
# `p VAR` — formal-parameter / local lookup at the most
# recent BP-HIT. Routes through pc2line.py --locals with
# the captured PC + S. Output is filtered to the line
# whose VAR= matches `var` (if no match, all locals are
# shown so the user can see what's in scope).
var = line[2:].strip()
if last_hit_pc is None or last_hit_sp is None:
print(" no recent breakpoint hit; run `c` first")
continue
try:
out = subprocess.check_output(
["python3", os.path.join(SCRIPT_DIR, "pc2line.py"),
"--sidecar", args.dwarf, "--map", args.map,
"--locals", "--sp", f"0x{last_hit_sp:04x}",
f"0x{last_hit_pc:06x}"],
text=True)
except subprocess.CalledProcessError as e:
print(f" pc2line --locals failed: {e}")
continue
shown = False
for ln_out in out.splitlines():
if ln_out.startswith(f"VAR={var} ") or \
ln_out.startswith(f"VAR={var}\t"):
print(f" {ln_out}")
shown = True
if not shown:
# Variable name didn't match anything in scope. Print
# everything so the user can see what's available.
for ln_out in out.splitlines():
print(f" {ln_out}")
continue
print(f" unknown command: {line!r}. Type ? for help.")
return 0
# ---- main ------------------------------------------------------------
def main():
ap = argparse.ArgumentParser(
description="GDB-style debugger front-end for W65816 + MAME")
ap.add_argument("--bin", required=True, help="flat .bin image to load")
ap.add_argument("--map", required=True, help="link816 .map")
ap.add_argument("--dwarf", required=True, help="link816 --debug-out sidecar")
ap.add_argument("--load-at", type=lambda s: int(s, 0), default=0x1000,
help="bank-0 load address (default 0x1000)")
ap.add_argument("--break", dest="break_at", default=None,
help="breakpoint for --trace (FUNC or FILE:LINE). "
"Default: 'main'")
ap.add_argument("--seconds", type=int, default=4,
help="MAME simulated seconds (default 4)")
ap.add_argument("--trace", action="store_true",
help="default-on smoke mode: set bp, capture one "
"BP-HIT, resolve via pc2line, exit 0")
ap.add_argument("--from-start", action="store_true",
help="start execution at LOAD_AT (i.e. through "
"the crt0). Default is to jump straight to "
"the bp target — required for crt0Gsos/Gno "
"binaries since their startup expects the "
"GS/OS Loader to have applied relocations.")
ap.add_argument("--start-at", default=None,
help="override the initial PC: FUNC name or hex "
"address. Default = the bp target. Use to "
"set bp inside a deeper callee while still "
"starting from main() (so the JSL frame is "
"on the stack for --finish).")
ap.add_argument("--finish", action="store_true",
help="trace + finish: also install a one-shot bp "
"at the breakpointed function's RTL return "
"address, prove the entry+return pair fires "
"end-to-end. Drives the `finish`-command "
"primitive in the interactive shell.")
ap.add_argument("--verbose", "-v", action="store_true",
help="dump full MAME output to stderr")
args = ap.parse_args()
if not os.path.exists(args.bin):
print(f"mameDebug: missing --bin {args.bin}", file=sys.stderr)
return 2
if not os.path.exists(args.map):
print(f"mameDebug: missing --map {args.map}", file=sys.stderr)
return 2
if not os.path.exists(args.dwarf):
print(f"mameDebug: missing --dwarf {args.dwarf}", file=sys.stderr)
return 2
if args.trace:
return traceMode(args)
return interactiveMode(args)
if __name__ == "__main__":
sys.exit(main())