435 lines
14 KiB
Python
Executable file
435 lines
14 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# pc2line.py - PC -> source location resolver for W65816 binaries.
|
|
#
|
|
# Parses the DWARF .debug_line section from link816's debug sidecar
|
|
# (`--debug-out FILE`) and answers `file:line` queries for runtime PCs.
|
|
# The sidecar already has reloc-applied PCs — they refer to the final
|
|
# linked image — so we don't need link-map cross-walking.
|
|
#
|
|
# Standard tools (pyelftools, llvm-dwarfdump, addr2line) all choke on
|
|
# the W65816 ELF: pyelftools asserts on the DWARF address size, the
|
|
# others can't apply our target-specific relocations. So we ship a
|
|
# small DWARF 5 line-program decoder (DWARF §6.2) — ~200 LOC, no deps.
|
|
#
|
|
# Usage:
|
|
# scripts/pc2line.py --sidecar foo.dwarf 0x123A
|
|
# scripts/pc2line.py --sidecar foo.dwarf --dump
|
|
# scripts/pc2line.py --sidecar foo.dwarf 0x123A 0x4567
|
|
|
|
import argparse
|
|
import os
|
|
import struct
|
|
import sys
|
|
|
|
|
|
# ---- ULEB128 / SLEB128 -----------------------------------------------
|
|
|
|
def readUleb(buf, off):
|
|
result = 0
|
|
shift = 0
|
|
while True:
|
|
b = buf[off]; off += 1
|
|
result |= (b & 0x7F) << shift
|
|
if (b & 0x80) == 0:
|
|
break
|
|
shift += 7
|
|
return result, off
|
|
|
|
|
|
def readSleb(buf, off):
|
|
result = 0
|
|
shift = 0
|
|
size = 64
|
|
while True:
|
|
b = buf[off]; off += 1
|
|
result |= (b & 0x7F) << shift
|
|
shift += 7
|
|
if (b & 0x80) == 0:
|
|
break
|
|
if shift < size and (b & 0x40):
|
|
result |= -(1 << shift)
|
|
return result, off
|
|
|
|
|
|
def readU8(buf, off):
|
|
return buf[off], off + 1
|
|
|
|
|
|
def readU16(buf, off):
|
|
return struct.unpack_from("<H", buf, off)[0], off + 2
|
|
|
|
|
|
def readU32(buf, off):
|
|
return struct.unpack_from("<I", buf, off)[0], off + 4
|
|
|
|
|
|
def readCStr(buf, off):
|
|
end = buf.index(0, off)
|
|
return buf[off:end].decode("utf-8", "replace"), end + 1
|
|
|
|
|
|
# ---- DWARF 5 line-program standard opcodes ---------------------------
|
|
|
|
DW_LNS_copy = 1
|
|
DW_LNS_advance_pc = 2
|
|
DW_LNS_advance_line = 3
|
|
DW_LNS_set_file = 4
|
|
DW_LNS_set_column = 5
|
|
DW_LNS_negate_stmt = 6
|
|
DW_LNS_set_basic_block = 7
|
|
DW_LNS_const_add_pc = 8
|
|
DW_LNS_fixed_advance_pc = 9
|
|
DW_LNS_set_prologue_end = 10
|
|
DW_LNS_set_epilogue_begin = 11
|
|
DW_LNS_set_isa = 12
|
|
|
|
DW_LNE_end_sequence = 1
|
|
DW_LNE_set_address = 2
|
|
DW_LNE_set_discriminator = 4
|
|
|
|
|
|
# ---- Line-program header parsing (DWARF 5 §6.2.4) --------------------
|
|
|
|
def parseDwarf5Header(buf, off, section_end):
|
|
"""Returns (header_dict, off_after_header).
|
|
|
|
LLVM-mos clang emits unit_length and header_length as zeros in the
|
|
.o (the section-relative diffs that should produce them go through
|
|
a reloc link816 can't apply). We tolerate both: unit_length=0
|
|
falls back to section size; header_length is ignored entirely —
|
|
the body start is the offset we land at after reading the full
|
|
prologue forward.
|
|
"""
|
|
h = {}
|
|
unit_length, off = readU32(buf, off)
|
|
if unit_length == 0xFFFFFFFF:
|
|
raise NotImplementedError("64-bit DWARF not handled")
|
|
if unit_length == 0:
|
|
unit_length = section_end - off
|
|
h["end"] = off + unit_length
|
|
h["version"], off = readU16(buf, off)
|
|
if h["version"] != 5:
|
|
raise NotImplementedError(f"DWARF v{h['version']} not handled (v5 only)")
|
|
h["addr_size"], off = readU8(buf, off)
|
|
h["seg_size"], off = readU8(buf, off)
|
|
_hdr_len_unused, off = readU32(buf, off) # see docstring
|
|
h["min_inst_length"], off = readU8(buf, off)
|
|
h["max_ops_per_inst"], off = readU8(buf, off)
|
|
h["default_is_stmt"], off = readU8(buf, off)
|
|
h["line_base"], off = readU8(buf, off)
|
|
if h["line_base"] >= 0x80: # signed
|
|
h["line_base"] -= 0x100
|
|
h["line_range"], off = readU8(buf, off)
|
|
h["opcode_base"], off = readU8(buf, off)
|
|
# standard_opcode_lengths[opcode_base-1]
|
|
h["std_op_lens"] = list(buf[off:off + h["opcode_base"] - 1])
|
|
off += h["opcode_base"] - 1
|
|
# directory format + entries.
|
|
dir_fmt_count, off = readU8(buf, off)
|
|
dir_fmt = []
|
|
for _ in range(dir_fmt_count):
|
|
ct, off = readUleb(buf, off)
|
|
fm, off = readUleb(buf, off)
|
|
dir_fmt.append((ct, fm))
|
|
dir_count, off = readUleb(buf, off)
|
|
dirs = []
|
|
for _ in range(dir_count):
|
|
entry = {}
|
|
for ct, fm in dir_fmt:
|
|
val, off = readForm(buf, off, fm)
|
|
entry[ct] = val
|
|
dirs.append(entry)
|
|
# file format + entries.
|
|
file_fmt_count, off = readU8(buf, off)
|
|
file_fmt = []
|
|
for _ in range(file_fmt_count):
|
|
ct, off = readUleb(buf, off)
|
|
fm, off = readUleb(buf, off)
|
|
file_fmt.append((ct, fm))
|
|
file_count, off = readUleb(buf, off)
|
|
files = []
|
|
for _ in range(file_count):
|
|
entry = {}
|
|
for ct, fm in file_fmt:
|
|
val, off = readForm(buf, off, fm)
|
|
entry[ct] = val
|
|
files.append(entry)
|
|
h["dirs"] = dirs
|
|
h["files"] = files
|
|
# Body starts where the prologue parse ended — header_length above
|
|
# is unreliable in our sidecar (see docstring).
|
|
return h, off
|
|
|
|
|
|
# DWARF forms (subset — what we see in our line-program file/dir entries).
|
|
DW_FORM_string = 0x08
|
|
DW_FORM_strp = 0x0e
|
|
DW_FORM_udata = 0x0f
|
|
DW_FORM_data16 = 0x1e
|
|
DW_FORM_line_strp = 0x1f
|
|
DW_FORM_strx = 0x1a
|
|
DW_FORM_strx1 = 0x26
|
|
DW_FORM_strx2 = 0x27
|
|
DW_FORM_strx3 = 0x28
|
|
DW_FORM_strx4 = 0x29
|
|
|
|
|
|
def readForm(buf, off, form):
|
|
if form == DW_FORM_string:
|
|
return readCStr(buf, off)
|
|
if form == DW_FORM_strp or form == DW_FORM_line_strp:
|
|
return readU32(buf, off)
|
|
if form == DW_FORM_udata:
|
|
return readUleb(buf, off)
|
|
if form == DW_FORM_data16:
|
|
return (buf[off:off + 16].hex(), off + 16)
|
|
if form == DW_FORM_strx:
|
|
return readUleb(buf, off)
|
|
if form == DW_FORM_strx1:
|
|
return readU8(buf, off)
|
|
if form == DW_FORM_strx2:
|
|
return readU16(buf, off)
|
|
raise NotImplementedError(f"DW_FORM 0x{form:x} not handled")
|
|
|
|
|
|
# ---- Sidecar parser --------------------------------------------------
|
|
|
|
def loadSidecarSection(path, section_name):
|
|
"""Return raw bytes of `section_name` from the link816 sidecar, or None."""
|
|
with open(path, "rb") as f:
|
|
data = f.read()
|
|
needle = f"; OBJ ".encode()
|
|
sections = []
|
|
i = 0
|
|
while True:
|
|
h = data.find(needle, i)
|
|
if h < 0:
|
|
break
|
|
nl = data.find(b"\n", h)
|
|
if nl < 0:
|
|
break
|
|
header = data[h:nl].decode("utf-8", "replace")
|
|
# `; OBJ <path> SEC <name> SIZE <bytes> RELOCS_APPLIED <n> RELOCS_SKIPPED <n>`
|
|
parts = header.split()
|
|
if "SEC" in parts and "SIZE" in parts:
|
|
sec = parts[parts.index("SEC") + 1]
|
|
size = int(parts[parts.index("SIZE") + 1])
|
|
payload_start = nl + 1
|
|
payload = data[payload_start:payload_start + size]
|
|
sections.append((sec, payload))
|
|
i = payload_start + size
|
|
else:
|
|
i = nl + 1
|
|
return [(name, p) for name, p in sections if name == section_name]
|
|
|
|
|
|
# ---- Line-program decoder --------------------------------------------
|
|
|
|
def runLineProgram(buf, header, h_end):
|
|
"""Walk the line program; yield (pc, file_idx, line) tuples."""
|
|
off = 0
|
|
addr = 0
|
|
line = 1
|
|
file = 1
|
|
is_stmt = bool(header["default_is_stmt"])
|
|
while off < h_end:
|
|
op, off = readU8(buf, off)
|
|
if op == 0:
|
|
# Extended opcode.
|
|
ln, off = readUleb(buf, off)
|
|
sub = buf[off]
|
|
sub_off = off + 1
|
|
if sub == DW_LNE_end_sequence:
|
|
yield (addr, file, line, True)
|
|
addr = 0
|
|
line = 1
|
|
file = 1
|
|
is_stmt = bool(header["default_is_stmt"])
|
|
off = off + ln
|
|
elif sub == DW_LNE_set_address:
|
|
addr_size = header["addr_size"]
|
|
if addr_size == 4:
|
|
a, _ = readU32(buf, sub_off)
|
|
elif addr_size == 2:
|
|
a, _ = readU16(buf, sub_off)
|
|
else:
|
|
a = int.from_bytes(buf[sub_off:sub_off + addr_size], "little")
|
|
addr = a
|
|
off = off + ln
|
|
else:
|
|
off = off + ln
|
|
elif op < header["opcode_base"]:
|
|
# Standard opcode.
|
|
if op == DW_LNS_copy:
|
|
yield (addr, file, line, False)
|
|
elif op == DW_LNS_advance_pc:
|
|
inc, off = readUleb(buf, off)
|
|
addr += inc * header["min_inst_length"]
|
|
elif op == DW_LNS_advance_line:
|
|
inc, off = readSleb(buf, off)
|
|
line += inc
|
|
elif op == DW_LNS_set_file:
|
|
f, off = readUleb(buf, off)
|
|
file = f
|
|
elif op == DW_LNS_set_column:
|
|
_, off = readUleb(buf, off)
|
|
elif op == DW_LNS_negate_stmt:
|
|
is_stmt = not is_stmt
|
|
elif op == DW_LNS_set_basic_block:
|
|
pass
|
|
elif op == DW_LNS_const_add_pc:
|
|
adj = (255 - header["opcode_base"]) // header["line_range"]
|
|
addr += adj * header["min_inst_length"]
|
|
elif op == DW_LNS_fixed_advance_pc:
|
|
inc, off = readU16(buf, off)
|
|
addr += inc
|
|
elif op == DW_LNS_set_prologue_end:
|
|
pass
|
|
elif op == DW_LNS_set_epilogue_begin:
|
|
pass
|
|
elif op == DW_LNS_set_isa:
|
|
_, off = readUleb(buf, off)
|
|
else:
|
|
# Unknown std op — skip operands per std_op_lens.
|
|
args = header["std_op_lens"][op - 1]
|
|
for _ in range(args):
|
|
_, off = readUleb(buf, off)
|
|
else:
|
|
# Special opcode (most common).
|
|
adj = op - header["opcode_base"]
|
|
addr += (adj // header["line_range"]) * header["min_inst_length"]
|
|
line += header["line_base"] + (adj % header["line_range"])
|
|
yield (addr, file, line, False)
|
|
|
|
|
|
# ---- Main ------------------------------------------------------------
|
|
|
|
def buildTable(sidecar_path):
|
|
"""Return list of (pc, file_idx, line, file_table)."""
|
|
line_sections = loadSidecarSection(sidecar_path, ".debug_line")
|
|
line_str_sections = loadSidecarSection(sidecar_path, ".debug_line_str")
|
|
line_str = b""
|
|
for _, p in line_str_sections:
|
|
line_str += p
|
|
rows = []
|
|
file_tables = []
|
|
line_str_offsets = {}
|
|
# Precompute line_str cstring offsets.
|
|
off = 0
|
|
while off < len(line_str):
|
|
end = line_str.find(b"\0", off)
|
|
if end < 0:
|
|
break
|
|
line_str_offsets[off] = line_str[off:end].decode("utf-8", "replace")
|
|
off = end + 1
|
|
for sec_name, payload in line_sections:
|
|
i = 0
|
|
while i < len(payload):
|
|
try:
|
|
hdr, body_start = parseDwarf5Header(payload, i, len(payload))
|
|
except NotImplementedError as e:
|
|
print(f"pc2line: {e}", file=sys.stderr)
|
|
break
|
|
file_tbl = []
|
|
for fe in hdr["files"]:
|
|
# File entry's name field is at content type DW_LNCT_path=1
|
|
name_val = fe.get(1)
|
|
if isinstance(name_val, str):
|
|
file_tbl.append(name_val)
|
|
elif isinstance(name_val, int):
|
|
file_tbl.append(line_str_offsets.get(name_val, f"@{name_val}"))
|
|
else:
|
|
file_tbl.append("?")
|
|
body = payload[body_start:hdr["end"]]
|
|
for pc, fidx, ln, end_seq in runLineProgram(body, hdr, len(body)):
|
|
if end_seq:
|
|
continue
|
|
rows.append((pc, fidx, ln, file_tbl))
|
|
i = hdr["end"]
|
|
return rows
|
|
|
|
|
|
def query(table, pc):
|
|
"""Return (pc, file, line) or None — largest-pc-<=-query match."""
|
|
candidates = [r for r in table if r[0] <= pc]
|
|
if not candidates:
|
|
return None
|
|
best = max(candidates, key=lambda r: r[0])
|
|
f_idx = best[1]
|
|
if f_idx == 0 or f_idx > len(best[3]):
|
|
fname = "?"
|
|
else:
|
|
fname = best[3][f_idx - 1]
|
|
return (best[0], os.path.basename(fname), best[2])
|
|
|
|
|
|
def loadMapSymbols(path):
|
|
"""Load `addr symbol` lines from a link816 .map. Return sorted list."""
|
|
if not path or not os.path.exists(path):
|
|
return []
|
|
syms = []
|
|
with open(path) as f:
|
|
for ln in f:
|
|
ln = ln.strip()
|
|
if not ln.startswith("0x"):
|
|
continue
|
|
parts = ln.split()
|
|
if len(parts) >= 2:
|
|
try:
|
|
syms.append((int(parts[0], 16), parts[1]))
|
|
except ValueError:
|
|
pass
|
|
syms.sort()
|
|
return syms
|
|
|
|
|
|
def funcAt(syms, pc):
|
|
"""Return the symbol name with largest addr <= pc, or '?'."""
|
|
lo, hi = 0, len(syms) - 1
|
|
best = None
|
|
while lo <= hi:
|
|
mid = (lo + hi) // 2
|
|
if syms[mid][0] <= pc:
|
|
best = syms[mid][1]
|
|
lo = mid + 1
|
|
else:
|
|
hi = mid - 1
|
|
return best or "?"
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser(description="PC -> source resolver")
|
|
ap.add_argument("--sidecar", required=True,
|
|
help="link816 --debug-out file")
|
|
ap.add_argument("--map", help="link816 .map (optional — for function names)")
|
|
ap.add_argument("--dump", action="store_true",
|
|
help="print the full PC->line table")
|
|
ap.add_argument("pcs", nargs="*", help="PCs to resolve (hex 0x... or decimal)")
|
|
args = ap.parse_args()
|
|
|
|
table = buildTable(args.sidecar)
|
|
syms = loadMapSymbols(args.map)
|
|
if args.dump:
|
|
for pc, fidx, ln, ft in sorted(table):
|
|
fname = ft[fidx - 1] if 0 < fidx <= len(ft) else "?"
|
|
func = funcAt(syms, pc)
|
|
print(f"0x{pc:06x}\t{os.path.basename(fname)}:{ln}\t{func}")
|
|
return 0
|
|
if not args.pcs:
|
|
print(f"pc2line: built {len(table)} entries", file=sys.stderr)
|
|
print("pass PCs as positional args to resolve, or --dump for the table")
|
|
return 2
|
|
for s in args.pcs:
|
|
pc = int(s, 0)
|
|
row = query(table, pc)
|
|
func = funcAt(syms, pc)
|
|
if row is None:
|
|
print(f"PC=0x{pc:06x} NOT_FOUND FUNC={func}")
|
|
else:
|
|
r_pc, fname, ln = row
|
|
print(f"PC=0x{pc:06x} FILE={fname} LINE={ln} FUNC={func}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|