65816-llvm-mos/scripts/pc2line.py
2026-05-30 19:40:29 -05:00

435 lines
14 KiB
Python
Executable file

#!/usr/bin/env python3
# pc2line.py - PC -> source location resolver for W65816 binaries.
#
# Parses the DWARF .debug_line section from link816's debug sidecar
# (`--debug-out FILE`) and answers `file:line` queries for runtime PCs.
# The sidecar already has reloc-applied PCs — they refer to the final
# linked image — so we don't need link-map cross-walking.
#
# Standard tools (pyelftools, llvm-dwarfdump, addr2line) all choke on
# the W65816 ELF: pyelftools asserts on the DWARF address size, the
# others can't apply our target-specific relocations. So we ship a
# small DWARF 5 line-program decoder (DWARF §6.2) — ~200 LOC, no deps.
#
# Usage:
# scripts/pc2line.py --sidecar foo.dwarf 0x123A
# scripts/pc2line.py --sidecar foo.dwarf --dump
# scripts/pc2line.py --sidecar foo.dwarf 0x123A 0x4567
import argparse
import os
import struct
import sys
# ---- ULEB128 / SLEB128 -----------------------------------------------
def readUleb(buf, off):
result = 0
shift = 0
while True:
b = buf[off]; off += 1
result |= (b & 0x7F) << shift
if (b & 0x80) == 0:
break
shift += 7
return result, off
def readSleb(buf, off):
result = 0
shift = 0
size = 64
while True:
b = buf[off]; off += 1
result |= (b & 0x7F) << shift
shift += 7
if (b & 0x80) == 0:
break
if shift < size and (b & 0x40):
result |= -(1 << shift)
return result, off
def readU8(buf, off):
return buf[off], off + 1
def readU16(buf, off):
return struct.unpack_from("<H", buf, off)[0], off + 2
def readU32(buf, off):
return struct.unpack_from("<I", buf, off)[0], off + 4
def readCStr(buf, off):
end = buf.index(0, off)
return buf[off:end].decode("utf-8", "replace"), end + 1
# ---- DWARF 5 line-program standard opcodes ---------------------------
DW_LNS_copy = 1
DW_LNS_advance_pc = 2
DW_LNS_advance_line = 3
DW_LNS_set_file = 4
DW_LNS_set_column = 5
DW_LNS_negate_stmt = 6
DW_LNS_set_basic_block = 7
DW_LNS_const_add_pc = 8
DW_LNS_fixed_advance_pc = 9
DW_LNS_set_prologue_end = 10
DW_LNS_set_epilogue_begin = 11
DW_LNS_set_isa = 12
DW_LNE_end_sequence = 1
DW_LNE_set_address = 2
DW_LNE_set_discriminator = 4
# ---- Line-program header parsing (DWARF 5 §6.2.4) --------------------
def parseDwarf5Header(buf, off, section_end):
"""Returns (header_dict, off_after_header).
LLVM-mos clang emits unit_length and header_length as zeros in the
.o (the section-relative diffs that should produce them go through
a reloc link816 can't apply). We tolerate both: unit_length=0
falls back to section size; header_length is ignored entirely —
the body start is the offset we land at after reading the full
prologue forward.
"""
h = {}
unit_length, off = readU32(buf, off)
if unit_length == 0xFFFFFFFF:
raise NotImplementedError("64-bit DWARF not handled")
if unit_length == 0:
unit_length = section_end - off
h["end"] = off + unit_length
h["version"], off = readU16(buf, off)
if h["version"] != 5:
raise NotImplementedError(f"DWARF v{h['version']} not handled (v5 only)")
h["addr_size"], off = readU8(buf, off)
h["seg_size"], off = readU8(buf, off)
_hdr_len_unused, off = readU32(buf, off) # see docstring
h["min_inst_length"], off = readU8(buf, off)
h["max_ops_per_inst"], off = readU8(buf, off)
h["default_is_stmt"], off = readU8(buf, off)
h["line_base"], off = readU8(buf, off)
if h["line_base"] >= 0x80: # signed
h["line_base"] -= 0x100
h["line_range"], off = readU8(buf, off)
h["opcode_base"], off = readU8(buf, off)
# standard_opcode_lengths[opcode_base-1]
h["std_op_lens"] = list(buf[off:off + h["opcode_base"] - 1])
off += h["opcode_base"] - 1
# directory format + entries.
dir_fmt_count, off = readU8(buf, off)
dir_fmt = []
for _ in range(dir_fmt_count):
ct, off = readUleb(buf, off)
fm, off = readUleb(buf, off)
dir_fmt.append((ct, fm))
dir_count, off = readUleb(buf, off)
dirs = []
for _ in range(dir_count):
entry = {}
for ct, fm in dir_fmt:
val, off = readForm(buf, off, fm)
entry[ct] = val
dirs.append(entry)
# file format + entries.
file_fmt_count, off = readU8(buf, off)
file_fmt = []
for _ in range(file_fmt_count):
ct, off = readUleb(buf, off)
fm, off = readUleb(buf, off)
file_fmt.append((ct, fm))
file_count, off = readUleb(buf, off)
files = []
for _ in range(file_count):
entry = {}
for ct, fm in file_fmt:
val, off = readForm(buf, off, fm)
entry[ct] = val
files.append(entry)
h["dirs"] = dirs
h["files"] = files
# Body starts where the prologue parse ended — header_length above
# is unreliable in our sidecar (see docstring).
return h, off
# DWARF forms (subset — what we see in our line-program file/dir entries).
DW_FORM_string = 0x08
DW_FORM_strp = 0x0e
DW_FORM_udata = 0x0f
DW_FORM_data16 = 0x1e
DW_FORM_line_strp = 0x1f
DW_FORM_strx = 0x1a
DW_FORM_strx1 = 0x26
DW_FORM_strx2 = 0x27
DW_FORM_strx3 = 0x28
DW_FORM_strx4 = 0x29
def readForm(buf, off, form):
if form == DW_FORM_string:
return readCStr(buf, off)
if form == DW_FORM_strp or form == DW_FORM_line_strp:
return readU32(buf, off)
if form == DW_FORM_udata:
return readUleb(buf, off)
if form == DW_FORM_data16:
return (buf[off:off + 16].hex(), off + 16)
if form == DW_FORM_strx:
return readUleb(buf, off)
if form == DW_FORM_strx1:
return readU8(buf, off)
if form == DW_FORM_strx2:
return readU16(buf, off)
raise NotImplementedError(f"DW_FORM 0x{form:x} not handled")
# ---- Sidecar parser --------------------------------------------------
def loadSidecarSection(path, section_name):
"""Return raw bytes of `section_name` from the link816 sidecar, or None."""
with open(path, "rb") as f:
data = f.read()
needle = f"; OBJ ".encode()
sections = []
i = 0
while True:
h = data.find(needle, i)
if h < 0:
break
nl = data.find(b"\n", h)
if nl < 0:
break
header = data[h:nl].decode("utf-8", "replace")
# `; OBJ <path> SEC <name> SIZE <bytes> RELOCS_APPLIED <n> RELOCS_SKIPPED <n>`
parts = header.split()
if "SEC" in parts and "SIZE" in parts:
sec = parts[parts.index("SEC") + 1]
size = int(parts[parts.index("SIZE") + 1])
payload_start = nl + 1
payload = data[payload_start:payload_start + size]
sections.append((sec, payload))
i = payload_start + size
else:
i = nl + 1
return [(name, p) for name, p in sections if name == section_name]
# ---- Line-program decoder --------------------------------------------
def runLineProgram(buf, header, h_end):
"""Walk the line program; yield (pc, file_idx, line) tuples."""
off = 0
addr = 0
line = 1
file = 1
is_stmt = bool(header["default_is_stmt"])
while off < h_end:
op, off = readU8(buf, off)
if op == 0:
# Extended opcode.
ln, off = readUleb(buf, off)
sub = buf[off]
sub_off = off + 1
if sub == DW_LNE_end_sequence:
yield (addr, file, line, True)
addr = 0
line = 1
file = 1
is_stmt = bool(header["default_is_stmt"])
off = off + ln
elif sub == DW_LNE_set_address:
addr_size = header["addr_size"]
if addr_size == 4:
a, _ = readU32(buf, sub_off)
elif addr_size == 2:
a, _ = readU16(buf, sub_off)
else:
a = int.from_bytes(buf[sub_off:sub_off + addr_size], "little")
addr = a
off = off + ln
else:
off = off + ln
elif op < header["opcode_base"]:
# Standard opcode.
if op == DW_LNS_copy:
yield (addr, file, line, False)
elif op == DW_LNS_advance_pc:
inc, off = readUleb(buf, off)
addr += inc * header["min_inst_length"]
elif op == DW_LNS_advance_line:
inc, off = readSleb(buf, off)
line += inc
elif op == DW_LNS_set_file:
f, off = readUleb(buf, off)
file = f
elif op == DW_LNS_set_column:
_, off = readUleb(buf, off)
elif op == DW_LNS_negate_stmt:
is_stmt = not is_stmt
elif op == DW_LNS_set_basic_block:
pass
elif op == DW_LNS_const_add_pc:
adj = (255 - header["opcode_base"]) // header["line_range"]
addr += adj * header["min_inst_length"]
elif op == DW_LNS_fixed_advance_pc:
inc, off = readU16(buf, off)
addr += inc
elif op == DW_LNS_set_prologue_end:
pass
elif op == DW_LNS_set_epilogue_begin:
pass
elif op == DW_LNS_set_isa:
_, off = readUleb(buf, off)
else:
# Unknown std op — skip operands per std_op_lens.
args = header["std_op_lens"][op - 1]
for _ in range(args):
_, off = readUleb(buf, off)
else:
# Special opcode (most common).
adj = op - header["opcode_base"]
addr += (adj // header["line_range"]) * header["min_inst_length"]
line += header["line_base"] + (adj % header["line_range"])
yield (addr, file, line, False)
# ---- Main ------------------------------------------------------------
def buildTable(sidecar_path):
"""Return list of (pc, file_idx, line, file_table)."""
line_sections = loadSidecarSection(sidecar_path, ".debug_line")
line_str_sections = loadSidecarSection(sidecar_path, ".debug_line_str")
line_str = b""
for _, p in line_str_sections:
line_str += p
rows = []
file_tables = []
line_str_offsets = {}
# Precompute line_str cstring offsets.
off = 0
while off < len(line_str):
end = line_str.find(b"\0", off)
if end < 0:
break
line_str_offsets[off] = line_str[off:end].decode("utf-8", "replace")
off = end + 1
for sec_name, payload in line_sections:
i = 0
while i < len(payload):
try:
hdr, body_start = parseDwarf5Header(payload, i, len(payload))
except NotImplementedError as e:
print(f"pc2line: {e}", file=sys.stderr)
break
file_tbl = []
for fe in hdr["files"]:
# File entry's name field is at content type DW_LNCT_path=1
name_val = fe.get(1)
if isinstance(name_val, str):
file_tbl.append(name_val)
elif isinstance(name_val, int):
file_tbl.append(line_str_offsets.get(name_val, f"@{name_val}"))
else:
file_tbl.append("?")
body = payload[body_start:hdr["end"]]
for pc, fidx, ln, end_seq in runLineProgram(body, hdr, len(body)):
if end_seq:
continue
rows.append((pc, fidx, ln, file_tbl))
i = hdr["end"]
return rows
def query(table, pc):
"""Return (pc, file, line) or None — largest-pc-<=-query match."""
candidates = [r for r in table if r[0] <= pc]
if not candidates:
return None
best = max(candidates, key=lambda r: r[0])
f_idx = best[1]
if f_idx == 0 or f_idx > len(best[3]):
fname = "?"
else:
fname = best[3][f_idx - 1]
return (best[0], os.path.basename(fname), best[2])
def loadMapSymbols(path):
"""Load `addr symbol` lines from a link816 .map. Return sorted list."""
if not path or not os.path.exists(path):
return []
syms = []
with open(path) as f:
for ln in f:
ln = ln.strip()
if not ln.startswith("0x"):
continue
parts = ln.split()
if len(parts) >= 2:
try:
syms.append((int(parts[0], 16), parts[1]))
except ValueError:
pass
syms.sort()
return syms
def funcAt(syms, pc):
"""Return the symbol name with largest addr <= pc, or '?'."""
lo, hi = 0, len(syms) - 1
best = None
while lo <= hi:
mid = (lo + hi) // 2
if syms[mid][0] <= pc:
best = syms[mid][1]
lo = mid + 1
else:
hi = mid - 1
return best or "?"
def main():
ap = argparse.ArgumentParser(description="PC -> source resolver")
ap.add_argument("--sidecar", required=True,
help="link816 --debug-out file")
ap.add_argument("--map", help="link816 .map (optional — for function names)")
ap.add_argument("--dump", action="store_true",
help="print the full PC->line table")
ap.add_argument("pcs", nargs="*", help="PCs to resolve (hex 0x... or decimal)")
args = ap.parse_args()
table = buildTable(args.sidecar)
syms = loadMapSymbols(args.map)
if args.dump:
for pc, fidx, ln, ft in sorted(table):
fname = ft[fidx - 1] if 0 < fidx <= len(ft) else "?"
func = funcAt(syms, pc)
print(f"0x{pc:06x}\t{os.path.basename(fname)}:{ln}\t{func}")
return 0
if not args.pcs:
print(f"pc2line: built {len(table)} entries", file=sys.stderr)
print("pass PCs as positional args to resolve, or --dump for the table")
return 2
for s in args.pcs:
pc = int(s, 0)
row = query(table, pc)
func = funcAt(syms, pc)
if row is None:
print(f"PC=0x{pc:06x} NOT_FOUND FUNC={func}")
else:
r_pc, fname, ln = row
print(f"PC=0x{pc:06x} FILE={fname} LINE={ln} FUNC={func}")
return 0
if __name__ == "__main__":
sys.exit(main())