65816-llvm-mos/scripts/pc2line.py
Scott Duensing 3388f3c5a5 More updates
2026-06-03 20:46:31 -05:00

1755 lines
63 KiB
Python
Executable file

#!/usr/bin/env python3
# pc2line.py - PC -> source location resolver for W65816 binaries.
#
# Parses the DWARF .debug_line section from link816's debug sidecar
# (`--debug-out FILE`) and answers `file:line` queries for runtime PCs.
# The sidecar already has reloc-applied PCs — they refer to the final
# linked image — so we don't need link-map cross-walking.
#
# Standard tools (pyelftools, llvm-dwarfdump, addr2line) all choke on
# the W65816 ELF: pyelftools asserts on the DWARF address size, the
# others can't apply our target-specific relocations. So we ship a
# small DWARF 5 line-program decoder (DWARF §6.2) — ~200 LOC, no deps.
#
# Usage:
# scripts/pc2line.py --sidecar foo.dwarf 0x123A
# scripts/pc2line.py --sidecar foo.dwarf --dump
# scripts/pc2line.py --sidecar foo.dwarf 0x123A 0x4567
# scripts/pc2line.py --sidecar foo.dwarf --list-cus
# scripts/pc2line.py --sidecar foo.dwarf --dump-dies 0x123A
# scripts/pc2line.py --sidecar foo.dwarf --locals --sp 0x1FA 0x123A
import argparse
import os
import struct
import sys
# ---- ULEB128 / SLEB128 -----------------------------------------------
def readUleb(buf, off):
result = 0
shift = 0
while True:
b = buf[off]; off += 1
result |= (b & 0x7F) << shift
if (b & 0x80) == 0:
break
shift += 7
return result, off
def readSleb(buf, off):
result = 0
shift = 0
size = 64
while True:
b = buf[off]; off += 1
result |= (b & 0x7F) << shift
shift += 7
if (b & 0x80) == 0:
break
if shift < size and (b & 0x40):
result |= -(1 << shift)
return result, off
def readU8(buf, off):
return buf[off], off + 1
def readU16(buf, off):
return struct.unpack_from("<H", buf, off)[0], off + 2
def readU32(buf, off):
return struct.unpack_from("<I", buf, off)[0], off + 4
def readCStr(buf, off):
end = buf.index(0, off)
return buf[off:end].decode("utf-8", "replace"), end + 1
# ---- DWARF 5 line-program standard opcodes ---------------------------
DW_LNS_copy = 1
DW_LNS_advance_pc = 2
DW_LNS_advance_line = 3
DW_LNS_set_file = 4
DW_LNS_set_column = 5
DW_LNS_negate_stmt = 6
DW_LNS_set_basic_block = 7
DW_LNS_const_add_pc = 8
DW_LNS_fixed_advance_pc = 9
DW_LNS_set_prologue_end = 10
DW_LNS_set_epilogue_begin = 11
DW_LNS_set_isa = 12
DW_LNE_end_sequence = 1
DW_LNE_set_address = 2
DW_LNE_set_discriminator = 4
# ---- Line-program header parsing (DWARF 5 §6.2.4) --------------------
def parseDwarf5Header(buf, off, section_end):
"""Returns (header_dict, off_after_header).
Post Phase-1.3 (the FK_Data_4 -> R_W65816_DATA32 reloc fix in
W65816ELFObjectWriter), unit_length and header_length come out
of the linker correctly populated for fresh -g builds: the
backend now emits a 4-byte fixup for each, and link816 patches
the resolved value into all 4 bytes of the slot.
We still keep the tolerant zero-fallback so older sidecars
(produced before the reloc fix landed, or before link816 was
rebuilt) keep decoding: unit_length=0 falls back to section
size, header_length=0 lets the prologue forward-scan land us
at the right body offset on its own.
The body offset is taken from the post-prologue position even
when header_length is non-zero: the prologue's variable-length
file/dir entries are already consumed by the time we reach
that point, so the forward scan IS the source of truth. The
header_length field is informational from our perspective.
"""
h = {}
unit_length, off = readU32(buf, off)
if unit_length == 0xFFFFFFFF:
raise NotImplementedError("64-bit DWARF not handled")
if unit_length == 0:
unit_length = section_end - off
h["end"] = off + unit_length
h["version"], off = readU16(buf, off)
if h["version"] != 5:
raise NotImplementedError(f"DWARF v{h['version']} not handled (v5 only)")
h["addr_size"], off = readU8(buf, off)
h["seg_size"], off = readU8(buf, off)
# header_length is now reloc-resolved post Phase 1.3; we still
# forward-scan the prologue, but we record the value for tools
# that want it.
h["header_length"], off = readU32(buf, off)
h["min_inst_length"], off = readU8(buf, off)
h["max_ops_per_inst"], off = readU8(buf, off)
h["default_is_stmt"], off = readU8(buf, off)
h["line_base"], off = readU8(buf, off)
if h["line_base"] >= 0x80: # signed
h["line_base"] -= 0x100
h["line_range"], off = readU8(buf, off)
h["opcode_base"], off = readU8(buf, off)
# standard_opcode_lengths[opcode_base-1]
h["std_op_lens"] = list(buf[off:off + h["opcode_base"] - 1])
off += h["opcode_base"] - 1
# directory format + entries.
dir_fmt_count, off = readU8(buf, off)
dir_fmt = []
for _ in range(dir_fmt_count):
ct, off = readUleb(buf, off)
fm, off = readUleb(buf, off)
dir_fmt.append((ct, fm))
dir_count, off = readUleb(buf, off)
dirs = []
for _ in range(dir_count):
entry = {}
for ct, fm in dir_fmt:
val, off = readForm(buf, off, fm)
entry[ct] = val
dirs.append(entry)
# file format + entries.
file_fmt_count, off = readU8(buf, off)
file_fmt = []
for _ in range(file_fmt_count):
ct, off = readUleb(buf, off)
fm, off = readUleb(buf, off)
file_fmt.append((ct, fm))
file_count, off = readUleb(buf, off)
files = []
for _ in range(file_count):
entry = {}
for ct, fm in file_fmt:
val, off = readForm(buf, off, fm)
entry[ct] = val
files.append(entry)
h["dirs"] = dirs
h["files"] = files
# Body starts where the prologue parse ended — header_length above
# is unreliable in our sidecar (see docstring).
return h, off
# DWARF forms (subset — what we see in our line-program file/dir entries
# and .debug_info DIE attributes for the Phase 3.2 DIE walker). Form codes
# are from DWARF 5 §7.5.5.
DW_FORM_addr = 0x01
DW_FORM_block2 = 0x03
DW_FORM_block4 = 0x04
DW_FORM_data2 = 0x05
DW_FORM_data4 = 0x06
DW_FORM_data8 = 0x07
DW_FORM_string = 0x08
DW_FORM_block = 0x09
DW_FORM_block1 = 0x0a
DW_FORM_data1 = 0x0b
DW_FORM_flag = 0x0c
DW_FORM_sdata = 0x0d
DW_FORM_strp = 0x0e
DW_FORM_udata = 0x0f
DW_FORM_ref_addr = 0x10
DW_FORM_ref1 = 0x11
DW_FORM_ref2 = 0x12
DW_FORM_ref4 = 0x13
DW_FORM_ref8 = 0x14
DW_FORM_ref_udata = 0x15
DW_FORM_indirect = 0x16
DW_FORM_sec_offset = 0x17
DW_FORM_exprloc = 0x18
DW_FORM_flag_present = 0x19
DW_FORM_strx = 0x1a
DW_FORM_addrx = 0x1b
DW_FORM_ref_sup4 = 0x1c
DW_FORM_strp_sup = 0x1d
DW_FORM_data16 = 0x1e
DW_FORM_line_strp = 0x1f
DW_FORM_ref_sig8 = 0x20
DW_FORM_implicit_const = 0x21
DW_FORM_loclistx = 0x22
DW_FORM_rnglistx = 0x23
DW_FORM_ref_sup8 = 0x24
DW_FORM_strx1 = 0x25
DW_FORM_strx2 = 0x26
DW_FORM_strx3 = 0x27
DW_FORM_strx4 = 0x28
DW_FORM_addrx1 = 0x29
DW_FORM_addrx2 = 0x2a
DW_FORM_addrx3 = 0x2b
DW_FORM_addrx4 = 0x2c
# DWARF 5 tag codes — only the ones we look at for the DIE walker.
DW_TAG_array_type = 0x01
DW_TAG_class_type = 0x02
DW_TAG_enumeration_type = 0x04
DW_TAG_formal_parameter = 0x05
DW_TAG_lexical_block = 0x0b
DW_TAG_member = 0x0d
DW_TAG_pointer_type = 0x0f
DW_TAG_compile_unit = 0x11
DW_TAG_structure_type = 0x13
DW_TAG_subroutine_type = 0x15
DW_TAG_typedef = 0x16
DW_TAG_union_type = 0x17
DW_TAG_inheritance = 0x1c
DW_TAG_inlined_subroutine = 0x1d
DW_TAG_subrange_type = 0x21
DW_TAG_base_type = 0x24
DW_TAG_const_type = 0x26
DW_TAG_subprogram = 0x2e
DW_TAG_variable = 0x34
DW_TAG_volatile_type = 0x35
DW_TAG_restrict_type = 0x37
# DWARF 5 attribute codes (subset).
DW_AT_sibling = 0x01
DW_AT_location = 0x02
DW_AT_name = 0x03
DW_AT_byte_size = 0x0b
DW_AT_stmt_list = 0x10
DW_AT_low_pc = 0x11
DW_AT_high_pc = 0x12
DW_AT_language = 0x13
DW_AT_comp_dir = 0x1b
DW_AT_const_value = 0x1c
DW_AT_upper_bound = 0x2f
DW_AT_prototyped = 0x27
DW_AT_producer = 0x25
DW_AT_start_scope = 0x2c
DW_AT_bit_size = 0x0d
DW_AT_encoding = 0x3e
DW_AT_frame_base = 0x40
DW_AT_specification = 0x47
DW_AT_type = 0x49
DW_AT_ranges = 0x55
DW_AT_external = 0x3f
DW_AT_decl_file = 0x3a
DW_AT_decl_line = 0x3b
DW_AT_artificial = 0x34
DW_AT_data_member_location = 0x38
DW_AT_call_file = 0x58
DW_AT_call_line = 0x59
DW_AT_call_column = 0x57
DW_AT_call_origin = 0x7d
DW_AT_abstract_origin = 0x31
DW_AT_inline = 0x20
DW_AT_str_offsets_base = 0x72
DW_AT_addr_base = 0x73
DW_AT_rnglists_base = 0x74
DW_AT_loclists_base = 0x8c
# Reverse-name tables for printing. Built by enumerating the module
# namespace once; values that aren't constants are filtered out.
def _buildNameMap(prefix):
out = {}
for k, v in globals().items():
if k.startswith(prefix) and isinstance(v, int):
out[v] = k[len(prefix):]
return out
def readForm(buf, off, form, addr_size=4, dwarf_size=4, implicit_val=None):
"""Read a single attribute value at `off` using DWARF form `form`.
Returns (value, new_off). For block/exprloc forms, value is a bytes
object; for refN forms, value is the raw CU-local DIE offset; for
sec_offset / strx / addrx forms, value is the index/offset (caller
resolves through the appropriate table).
`addr_size` is the CU's address size (4 on W65816 since DWARF treats
24-bit PCs as 32-bit). `dwarf_size` is 4 for 32-bit DWARF, 8 for
64-bit (we only support 32-bit).
"""
if form == DW_FORM_addr:
if addr_size == 4:
return readU32(buf, off)
if addr_size == 2:
return readU16(buf, off)
return (int.from_bytes(buf[off:off + addr_size], "little"),
off + addr_size)
if form == DW_FORM_data1 or form == DW_FORM_flag or form == DW_FORM_ref1:
return readU8(buf, off)
if form == DW_FORM_data2 or form == DW_FORM_ref2:
return readU16(buf, off)
if form == DW_FORM_data4 or form == DW_FORM_ref4 or form == DW_FORM_ref_sup4:
return readU32(buf, off)
if form == DW_FORM_data8 or form == DW_FORM_ref8 or form == DW_FORM_ref_sig8 or form == DW_FORM_ref_sup8:
return (int.from_bytes(buf[off:off + 8], "little"), off + 8)
if form == DW_FORM_data16:
return (buf[off:off + 16].hex(), off + 16)
if form == DW_FORM_sdata:
return readSleb(buf, off)
if form == DW_FORM_udata or form == DW_FORM_ref_udata:
return readUleb(buf, off)
if form == DW_FORM_string:
return readCStr(buf, off)
if form == DW_FORM_strp or form == DW_FORM_line_strp or form == DW_FORM_strp_sup:
if dwarf_size == 4:
return readU32(buf, off)
return (int.from_bytes(buf[off:off + 8], "little"), off + 8)
if form == DW_FORM_sec_offset:
if dwarf_size == 4:
return readU32(buf, off)
return (int.from_bytes(buf[off:off + 8], "little"), off + 8)
if form == DW_FORM_ref_addr:
if dwarf_size == 4:
return readU32(buf, off)
return (int.from_bytes(buf[off:off + 8], "little"), off + 8)
if form == DW_FORM_flag_present:
return (1, off)
if form == DW_FORM_implicit_const:
return (implicit_val, off)
if form == DW_FORM_strx or form == DW_FORM_addrx or form == DW_FORM_loclistx or form == DW_FORM_rnglistx:
return readUleb(buf, off)
if form == DW_FORM_strx1 or form == DW_FORM_addrx1:
return readU8(buf, off)
if form == DW_FORM_strx2 or form == DW_FORM_addrx2:
return readU16(buf, off)
if form == DW_FORM_strx3 or form == DW_FORM_addrx3:
v = buf[off] | (buf[off + 1] << 8) | (buf[off + 2] << 16)
return (v, off + 3)
if form == DW_FORM_strx4 or form == DW_FORM_addrx4:
return readU32(buf, off)
if form == DW_FORM_block1:
n, off = readU8(buf, off)
return (bytes(buf[off:off + n]), off + n)
if form == DW_FORM_block2:
n, off = readU16(buf, off)
return (bytes(buf[off:off + n]), off + n)
if form == DW_FORM_block4:
n, off = readU32(buf, off)
return (bytes(buf[off:off + n]), off + n)
if form == DW_FORM_block or form == DW_FORM_exprloc:
n, off = readUleb(buf, off)
return (bytes(buf[off:off + n]), off + n)
raise NotImplementedError(f"DW_FORM 0x{form:x} not handled")
# ---- Sidecar parser --------------------------------------------------
def loadSidecarSection(path, section_name):
"""Return raw bytes of `section_name` from the link816 sidecar, or None."""
with open(path, "rb") as f:
data = f.read()
needle = f"; OBJ ".encode()
sections = []
i = 0
while True:
h = data.find(needle, i)
if h < 0:
break
nl = data.find(b"\n", h)
if nl < 0:
break
header = data[h:nl].decode("utf-8", "replace")
# `; OBJ <path> SEC <name> SIZE <bytes> RELOCS_APPLIED <n> RELOCS_SKIPPED <n>`
parts = header.split()
if "SEC" in parts and "SIZE" in parts:
sec = parts[parts.index("SEC") + 1]
size = int(parts[parts.index("SIZE") + 1])
payload_start = nl + 1
payload = data[payload_start:payload_start + size]
sections.append((sec, payload))
i = payload_start + size
else:
i = nl + 1
return [(name, p) for name, p in sections if name == section_name]
# ---- Line-program decoder --------------------------------------------
def runLineProgram(buf, header, h_end):
"""Walk the line program; yield (pc, file_idx, line) tuples."""
off = 0
addr = 0
line = 1
file = 1
is_stmt = bool(header["default_is_stmt"])
while off < h_end:
op, off = readU8(buf, off)
if op == 0:
# Extended opcode.
ln, off = readUleb(buf, off)
sub = buf[off]
sub_off = off + 1
if sub == DW_LNE_end_sequence:
yield (addr, file, line, True)
addr = 0
line = 1
file = 1
is_stmt = bool(header["default_is_stmt"])
off = off + ln
elif sub == DW_LNE_set_address:
addr_size = header["addr_size"]
if addr_size == 4:
a, _ = readU32(buf, sub_off)
elif addr_size == 2:
a, _ = readU16(buf, sub_off)
else:
a = int.from_bytes(buf[sub_off:sub_off + addr_size], "little")
addr = a
off = off + ln
else:
off = off + ln
elif op < header["opcode_base"]:
# Standard opcode.
if op == DW_LNS_copy:
yield (addr, file, line, False)
elif op == DW_LNS_advance_pc:
inc, off = readUleb(buf, off)
addr += inc * header["min_inst_length"]
elif op == DW_LNS_advance_line:
inc, off = readSleb(buf, off)
line += inc
elif op == DW_LNS_set_file:
f, off = readUleb(buf, off)
file = f
elif op == DW_LNS_set_column:
_, off = readUleb(buf, off)
elif op == DW_LNS_negate_stmt:
is_stmt = not is_stmt
elif op == DW_LNS_set_basic_block:
pass
elif op == DW_LNS_const_add_pc:
adj = (255 - header["opcode_base"]) // header["line_range"]
addr += adj * header["min_inst_length"]
elif op == DW_LNS_fixed_advance_pc:
inc, off = readU16(buf, off)
addr += inc
elif op == DW_LNS_set_prologue_end:
pass
elif op == DW_LNS_set_epilogue_begin:
pass
elif op == DW_LNS_set_isa:
_, off = readUleb(buf, off)
else:
# Unknown std op — skip operands per std_op_lens.
args = header["std_op_lens"][op - 1]
for _ in range(args):
_, off = readUleb(buf, off)
else:
# Special opcode (most common).
adj = op - header["opcode_base"]
addr += (adj // header["line_range"]) * header["min_inst_length"]
line += header["line_base"] + (adj % header["line_range"])
yield (addr, file, line, False)
# ---- .debug_abbrev parser --------------------------------------------
#
# Each CU has an abbrev_offset that points into .debug_abbrev. A table
# at that offset is a list of abbrev entries terminated by code 0.
# Each entry is: ULEB code, ULEB tag, u8 children, then (ULEB attr,
# ULEB form, optional SLEB implicit_const if form==implicit_const)*
# terminated by attr==0 form==0.
def parseAbbrevTable(buf, base):
"""Parse the abbrev table starting at offset `base`.
Returns {code: (tag, has_children, [(attr, form, implicit_const), ...])}.
"""
table = {}
off = base
while off < len(buf):
code, off = readUleb(buf, off)
if code == 0:
break
tag, off = readUleb(buf, off)
has_kids, off = readU8(buf, off)
attrs = []
while True:
at, off = readUleb(buf, off)
fm, off = readUleb(buf, off)
if at == 0 and fm == 0:
break
ic = None
if fm == DW_FORM_implicit_const:
ic, off = readSleb(buf, off)
attrs.append((at, fm, ic))
table[code] = (tag, bool(has_kids), attrs)
return table
# ---- .debug_str / .debug_line_str / .debug_str_offsets resolution ----
#
# In a link816 sidecar the per-CU .debug_str_offsets entries are zeroed
# (the .rela.debug_str_offsets relocs target .debug_str, which isn't in
# the resolveSym kind-set — text/rodata/bss/init_array). This is a known
# Phase-3.2-slice-1 gap noted in the GAP_CLOSURE_PLAN: until link816 is
# extended to resolve intra-debug relocs, strx-form names come out as
# `@<idx>` placeholders. Line-strp names DO resolve because line_str
# is referenced by literal offset inside .debug_line, not by a reloc.
def resolveStr(strtab, offset):
if not strtab or offset >= len(strtab):
return None
end = strtab.find(b"\0", offset)
if end < 0:
return None
return strtab[offset:end].decode("utf-8", "replace")
def resolveStrx(str_offsets, debug_str, str_offsets_base, idx, dwarf_size=4):
"""DWARF 5 §7.26: index into the str_offsets table. The base
points past the header. Each entry is `dwarf_size` bytes."""
if str_offsets is None or debug_str is None:
return None
entry_off = str_offsets_base + idx * dwarf_size
if entry_off + dwarf_size > len(str_offsets):
return None
if dwarf_size == 4:
off = int.from_bytes(str_offsets[entry_off:entry_off + 4], "little")
else:
off = int.from_bytes(str_offsets[entry_off:entry_off + 8], "little")
return resolveStr(debug_str, off)
def resolveAddrx(addr_section, addr_base, idx, addr_size=4):
"""DWARF 5 §7.27: index into the addr table. Base points past header."""
if addr_section is None:
return None
entry_off = addr_base + idx * addr_size
if entry_off + addr_size > len(addr_section):
return None
return int.from_bytes(addr_section[entry_off:entry_off + addr_size],
"little")
# ---- .debug_info DIE walker -------------------------------------------
class Die:
"""A parsed DIE: tag, attributes, children, CU-relative offset."""
def __init__(self, offset, tag, attrs):
self.offset = offset # CU-relative offset (for ref4 lookup)
self.tag = tag
self.attrs = attrs # {at_code: (raw_value, form_code)}
self.children = []
self.parent = None
def get(self, at_code):
rec = self.attrs.get(at_code)
if rec is None:
return None
return rec[0]
def getRaw(self, at_code):
return self.attrs.get(at_code)
class Cu:
"""One compile unit: header info, abbrev table, root DIE."""
def __init__(self):
self.version = 0
self.unit_type = 0
self.addr_size = 4
self.dwarf_size = 4
self.abbrev_off = 0
self.cu_start = 0 # offset of CU header in .debug_info
self.body_start = 0 # offset of first DIE
self.body_end = 0
self.abbrev = {}
self.root = None
# Bases for indirect lookups (resolved from CU root attrs):
self.str_offsets_base = 0
self.addr_base = 0
# Per-CU references to the global section tables:
self.debug_str = None
self.line_str = None
self.str_offsets = None
self.addr_section = None
# Source file paths and PC ranges from the matching .debug_line CU.
self.files = []
def lookupStr(self, idx):
s = resolveStrx(self.str_offsets, self.debug_str,
self.str_offsets_base, idx, self.dwarf_size)
if s is None:
return f"@strx{idx}"
return s
def lookupLineStr(self, off):
s = resolveStr(self.line_str, off)
if s is None:
return f"@linestr0x{off:x}"
return s
def lookupStrp(self, off):
s = resolveStr(self.debug_str, off)
if s is None:
return f"@str0x{off:x}"
return s
def lookupAddr(self, idx):
a = resolveAddrx(self.addr_section, self.addr_base, idx,
self.addr_size)
if a is None:
return None
return a
def parseDie(buf, off, cu, parent=None):
"""Parse a single DIE (and its children) starting at `off`.
Returns (die or None for null terminator, new_off). `off` is
a .debug_info-absolute offset; we record `cu_offset = off - cu.cu_start`
for ref4 lookup.
"""
die_off = off
code, off = readUleb(buf, off)
if code == 0:
return (None, off)
if code not in cu.abbrev:
# Malformed — stop walking the rest of this CU defensively.
return (None, off)
tag, has_kids, attr_list = cu.abbrev[code]
attrs = {}
for at, fm, ic in attr_list:
val, off = readForm(buf, off, fm,
addr_size=cu.addr_size,
dwarf_size=cu.dwarf_size,
implicit_val=ic)
attrs[at] = (val, fm)
die = Die(die_off - cu.cu_start, tag, attrs)
die.parent = parent
if has_kids:
while off < cu.body_end:
child, off = parseDie(buf, off, cu, parent=die)
if child is None:
break
die.children.append(child)
return (die, off)
def parseDebugInfoCu(info_buf, info_off, abbrev_buf, sections):
"""Parse one CU header + DIE tree starting at .debug_info offset
`info_off`. Returns (cu, next_off) where next_off is the start
of the next CU (or end of section)."""
cu = Cu()
cu.cu_start = info_off
cu.debug_str = sections["debug_str"]
cu.line_str = sections["line_str"]
cu.str_offsets = sections["str_offsets"]
cu.addr_section = sections["addr"]
off = info_off
unit_length, off = readU32(info_buf, off)
if unit_length == 0xFFFFFFFF:
raise NotImplementedError("64-bit DWARF .debug_info not handled")
next_off = off + unit_length
cu.dwarf_size = 4
cu.version, off = readU16(info_buf, off)
if cu.version == 5:
cu.unit_type, off = readU8(info_buf, off)
cu.addr_size, off = readU8(info_buf, off)
cu.abbrev_off, off = readU32(info_buf, off)
elif cu.version == 4:
cu.abbrev_off, off = readU32(info_buf, off)
cu.addr_size, off = readU8(info_buf, off)
cu.unit_type = 0
else:
raise NotImplementedError(f"DWARF v{cu.version} CU not handled")
cu.body_start = off
cu.body_end = next_off
cu.abbrev = parseAbbrevTable(abbrev_buf, cu.abbrev_off)
# Walk the root DIE (the CU DIE). Resolve base attrs first so that
# children parsed in the same call can use them — but parseDie doesn't
# actually consume strx/addrx (they stay as raw indices), so we can
# do the base resolution post-parse.
cu.root, _ = parseDie(info_buf, off, cu)
if cu.root is not None:
sob = cu.root.getRaw(DW_AT_str_offsets_base)
if sob is not None:
cu.str_offsets_base = sob[0]
ab = cu.root.getRaw(DW_AT_addr_base)
if ab is not None:
cu.addr_base = ab[0]
return cu, next_off
def parseAllCus(payload_map):
"""Parse every CU from concatenated .debug_info bytes.
Returns list[Cu].
"""
info_payloads = payload_map.get(".debug_info", [])
abbrev_payloads = payload_map.get(".debug_abbrev", [])
if not info_payloads or not abbrev_payloads:
return []
cus = []
# Each input .o slice contributes one .debug_info + one .debug_abbrev
# section (DWARF v5 single-CU is the norm for clang). We concatenate
# the payloads but reset abbrev_base per slice — within the same .o,
# the CU's abbrev_off is relative to ITS .debug_abbrev section. So
# we walk slice-by-slice, pairing the i-th .debug_info with the i-th
# .debug_abbrev.
n = min(len(info_payloads), len(abbrev_payloads))
for i in range(n):
info_buf = info_payloads[i]
abbrev_buf = abbrev_payloads[i]
# Sections like .debug_addr / .debug_str_offsets may also be
# per-slice; index them similarly.
slice_sections = {}
for key, sec_name in [
("debug_str", ".debug_str"),
("line_str", ".debug_line_str"),
("str_offsets", ".debug_str_offsets"),
("addr", ".debug_addr"),
]:
payloads = payload_map.get(sec_name, [])
slice_sections[key] = payloads[i] if i < len(payloads) else b""
off = 0
while off < len(info_buf):
try:
cu, off = parseDebugInfoCu(info_buf, off, abbrev_buf,
slice_sections)
except NotImplementedError as e:
print(f"pc2line: skipping CU: {e}", file=sys.stderr)
break
cus.append(cu)
return cus
# ---- DIE search / attribute formatting -------------------------------
_TAG_NAMES = None
_AT_NAMES = None
_FORM_NAMES = None
def tagName(code):
global _TAG_NAMES
if _TAG_NAMES is None:
_TAG_NAMES = _buildNameMap("DW_TAG_")
return _TAG_NAMES.get(code, f"DW_TAG_<0x{code:x}>")
def attrName(code):
global _AT_NAMES
if _AT_NAMES is None:
_AT_NAMES = _buildNameMap("DW_AT_")
return _AT_NAMES.get(code, f"DW_AT_<0x{code:x}>")
def formName(code):
global _FORM_NAMES
if _FORM_NAMES is None:
_FORM_NAMES = _buildNameMap("DW_FORM_")
return _FORM_NAMES.get(code, f"DW_FORM_<0x{code:x}>")
def dieName(cu, die):
"""Resolve DW_AT_name for a DIE, or return None."""
rec = die.getRaw(DW_AT_name)
if rec is None:
return None
val, form = rec
if form in (DW_FORM_strx, DW_FORM_strx1, DW_FORM_strx2,
DW_FORM_strx3, DW_FORM_strx4):
return cu.lookupStr(val)
if form == DW_FORM_strp:
return cu.lookupStrp(val)
if form == DW_FORM_line_strp:
return cu.lookupLineStr(val)
if form == DW_FORM_string:
return val
return None
def diePcRange(cu, die):
"""Return (low_pc, high_pc) for a DIE, or (None, None).
DWARF 5 high_pc is commonly an offset from low_pc (data4/udata) per
§3.3.1.1. We handle that and the absolute-PC case.
"""
lo_rec = die.getRaw(DW_AT_low_pc)
hi_rec = die.getRaw(DW_AT_high_pc)
if lo_rec is None:
return (None, None)
lo_val, lo_form = lo_rec
if lo_form in (DW_FORM_addrx, DW_FORM_addrx1, DW_FORM_addrx2,
DW_FORM_addrx3, DW_FORM_addrx4):
lo = cu.lookupAddr(lo_val)
else:
lo = lo_val
if lo is None:
return (None, None)
if hi_rec is None:
return (lo, lo)
hi_val, hi_form = hi_rec
if hi_form == DW_FORM_addr:
return (lo, hi_val)
# Offset form (DWARF 5 default for clang).
return (lo, lo + hi_val)
def findSubprogramForPc(cus, pc):
"""Return (cu, subprogram_die) whose [low, high) contains pc."""
for cu in cus:
if cu.root is None:
continue
for child in cu.root.children:
if child.tag != DW_TAG_subprogram:
continue
lo, hi = diePcRange(cu, child)
if lo is None:
continue
if lo <= pc < hi:
return (cu, child)
return (None, None)
def formatAttr(cu, at, val, form):
"""Pretty-print an attribute value for --dump-dies output."""
if at == DW_AT_name:
if form in (DW_FORM_strx, DW_FORM_strx1, DW_FORM_strx2,
DW_FORM_strx3, DW_FORM_strx4):
return cu.lookupStr(val)
if form == DW_FORM_strp:
return cu.lookupStrp(val)
if form == DW_FORM_line_strp:
return cu.lookupLineStr(val)
if form == DW_FORM_string:
return val
if form == DW_FORM_exprloc or form in (DW_FORM_block, DW_FORM_block1,
DW_FORM_block2, DW_FORM_block4):
return "<expr " + val.hex() + ">"
if form in (DW_FORM_addrx, DW_FORM_addrx1, DW_FORM_addrx2,
DW_FORM_addrx3, DW_FORM_addrx4):
a = cu.lookupAddr(val)
if a is None:
return f"addrx[{val}]=?"
return f"0x{a:06x}"
if form == DW_FORM_addr:
return f"0x{val:06x}"
if form in (DW_FORM_flag, DW_FORM_flag_present):
return "true" if val else "false"
if isinstance(val, int):
return f"{val} (0x{val:x})"
if isinstance(val, bytes):
return val.hex()
return repr(val)
def dumpDieRecursive(cu, die, depth, out):
indent = " " * depth
name = dieName(cu, die)
name_str = f" \"{name}\"" if name else ""
out.append(f"{indent}<0x{die.offset:x}> {tagName(die.tag)}{name_str}")
for at, (val, form) in die.attrs.items():
if at == DW_AT_name:
continue
out.append(f"{indent} {attrName(at)} ({formName(form)}) = "
f"{formatAttr(cu, at, val, form)}")
for child in die.children:
dumpDieRecursive(cu, child, depth + 1, out)
def dumpDiesAtPc(cus, pc):
"""Print the subprogram containing `pc` and its variable/parameter
children (DW_TAG_formal_parameter + DW_TAG_variable). Lexical
blocks are descended into recursively."""
cu, sub = findSubprogramForPc(cus, pc)
if sub is None:
print(f"pc2line: no DW_TAG_subprogram covers PC 0x{pc:06x}",
file=sys.stderr)
return 1
lo, hi = diePcRange(cu, sub)
sub_name = dieName(cu, sub) or "<unnamed>"
print(f"PC 0x{pc:06x} -> subprogram {sub_name!r} "
f"[0x{lo:06x}, 0x{hi:06x})")
out = []
dumpDieRecursive(cu, sub, 0, out)
print("\n".join(out))
# Also list parameters + variables in flat form for easy parsing.
print("\n; variables under this subprogram (DW_TAG_formal_parameter "
"+ DW_TAG_variable):")
_flatVarList(cu, sub, [], None)
def _flatVarList(cu, die, scope_stack, _unused):
"""Walk DIE tree under `die`, printing each formal_parameter / variable
along with its containing lexical block PC range (if any)."""
for child in die.children:
if child.tag == DW_TAG_lexical_block:
lo, hi = diePcRange(cu, child)
new_scope = list(scope_stack)
if lo is not None:
new_scope.append(f"block[0x{lo:06x},0x{hi:06x})")
_flatVarList(cu, child, new_scope, None)
continue
if child.tag == DW_TAG_inlined_subroutine:
lo, hi = diePcRange(cu, child)
name = dieName(cu, child) or "<inlined>"
new_scope = list(scope_stack)
if lo is not None:
new_scope.append(f"inlined {name}[0x{lo:06x},0x{hi:06x})")
else:
new_scope.append(f"inlined {name}")
_flatVarList(cu, child, new_scope, None)
continue
if child.tag not in (DW_TAG_formal_parameter, DW_TAG_variable):
continue
name = dieName(cu, child) or "<unnamed>"
loc_rec = child.getRaw(DW_AT_location)
if loc_rec is None:
loc_str = "<no location>"
else:
lv, lf = loc_rec
loc_str = formatAttr(cu, DW_AT_location, lv, lf)
kind = "param" if child.tag == DW_TAG_formal_parameter else "var"
scope_str = (" @ " + " > ".join(scope_stack)) if scope_stack else ""
print(f" {kind}\t{name}\tloc={loc_str}{scope_str}")
# ---- DW_OP evaluator (Phase 3.2 slice 2) -----------------------------
#
# DWARF expression opcodes from §7.7.1. We support only the subset
# needed for -O0 stack-resident locals and trivial -O2 IMG locals:
# DW_OP_addr (0x03): absolute address
# DW_OP_constN/Nu/Ns: small constants
# DW_OP_regN (0x50..0x57): register location (where N <= 7)
# DW_OP_bregN (0x70..0x77): register + signed offset
# DW_OP_fbreg (0x91): frame-base + signed offset
# DW_OP_call_frame_cfa (0x9c): equivalent to frame-base in our ABI
# DW_OP_regx (0x90): ULEB-encoded register
# DW_OP_bregx (0x92): ULEB-encoded register + signed offset
# DW_OP_plus_uconst (0x23): pop, add ULEB, push
# DW_OP_plus (0x22): pop two, push sum
# DW_OP_deref (0x06): pop addr, push *(addr) (memory-aware mode)
# DW_OP_stack_value (0x9f): final result IS the value (not the addr)
# DW_OP_piece (0x93): composite; not handled — returns None
#
# We return a structured `LocResult` so callers can distinguish
# memory addresses, register-resident values, IMG-slot addresses,
# and composite/unsupported expressions.
DW_OP_addr = 0x03
DW_OP_deref = 0x06
DW_OP_const1u = 0x08
DW_OP_const1s = 0x09
DW_OP_const2u = 0x0a
DW_OP_const2s = 0x0b
DW_OP_const4u = 0x0c
DW_OP_const4s = 0x0d
DW_OP_const8u = 0x0e
DW_OP_const8s = 0x0f
DW_OP_constu = 0x10
DW_OP_consts = 0x11
DW_OP_plus = 0x22
DW_OP_plus_uconst = 0x23
DW_OP_reg0 = 0x50
DW_OP_breg0 = 0x70
DW_OP_regx = 0x90
DW_OP_fbreg = 0x91
DW_OP_bregx = 0x92
DW_OP_piece = 0x93
DW_OP_call_frame_cfa = 0x9c
DW_OP_stack_value = 0x9f
# Map W65816 Dwarf register numbers (from W65816RegisterInfo.td) to
# their direct-page memory locations. IMG0..IMG7 at $D0..$DE,
# IMG8..IMG15 at $C0..$CE (two bytes each). DPF0 at $F0.
# SP (DwarfRegNum=3) handled separately via the stack-pointer parameter.
W65816_DW_REG_TO_DP = {
16: 0xD0, 17: 0xD2, 18: 0xD4, 19: 0xD6, # IMG0..IMG3
20: 0xD8, 21: 0xDA, 22: 0xDC, 23: 0xDE, # IMG4..IMG7
24: 0xF0, # DPF0
32: 0xC0, 33: 0xC2, 34: 0xC4, 35: 0xC6, # IMG8..IMG11
36: 0xC8, 37: 0xCA, 38: 0xCC, 39: 0xCE, # IMG12..IMG15
}
class LocResult:
"""Result of evaluating a DWARF location expression.
kind ∈ {"memory", "register", "value", "composite", "unsupported"}.
memory: addr = 24-bit byte address in target memory
register: reg_dw = DWARF register number; dp_addr = mapped DP
byte address (or None if not a W65816 DP-mapped reg)
value: value = the computed value (DW_OP_stack_value path)
composite: pieces = list of LocResult (one per DW_OP_piece)
unsupported: reason = human-readable string
"""
def __init__(self, kind, **kw):
self.kind = kind
self.addr = kw.get("addr")
self.reg_dw = kw.get("reg_dw")
self.dp_addr = kw.get("dp_addr")
self.value = kw.get("value")
self.pieces = kw.get("pieces")
self.reason = kw.get("reason")
def displayAddr(self):
"""If the result corresponds to a fixed memory byte address
(either RAM or DP-mapped register), return it; else None."""
if self.kind == "memory":
return self.addr
if self.kind == "register":
return self.dp_addr
return None
def __repr__(self):
if self.kind == "memory":
return f"mem[0x{self.addr:06x}]"
if self.kind == "register":
if self.dp_addr is not None:
return f"reg{self.reg_dw}@DP[0x{self.dp_addr:02x}]"
return f"reg{self.reg_dw}"
if self.kind == "value":
return f"value(0x{self.value:x})"
if self.kind == "composite":
return "composite[" + ", ".join(repr(p) for p in self.pieces) + "]"
return f"unsupported({self.reason})"
def _evalFrameBase(cu, sub_die, sp_value):
"""Evaluate DW_AT_frame_base for a subprogram. Returns the
frame-base BYTE ADDRESS (i.e., what DW_OP_fbreg offsets are
relative to), or None if unresolvable.
For W65816 -O0, clang emits `frame_base = DW_OP_reg3` (SP). Our
ABI is empty-descending: S points to the next-free byte. LLVM's
PEI assigns FrameOffset assuming full-descending, then
W65816RegisterInfo::eliminateFrameIndex adds +1 for locals (see
feedback_stack_skew.md). So `fb + fbreg_offset` where
fb = sp_value + 1 yields the correct local byte address.
"""
fb_rec = sub_die.getRaw(DW_AT_frame_base)
if fb_rec is None:
# No frame_base — fall back to SP itself (best effort).
if sp_value is None:
return None
return sp_value + 1
val, form = fb_rec
if not isinstance(val, (bytes, bytearray)):
return None
expr = bytes(val)
if len(expr) == 0:
return None
op = expr[0]
# DW_OP_reg0..reg31
if 0x50 <= op <= 0x6f:
reg = op - 0x50
if reg == 3:
if sp_value is None:
return None
return sp_value + 1
return None
if op == DW_OP_regx:
reg, _ = readUleb(expr, 1)
if reg == 3:
if sp_value is None:
return None
return sp_value + 1
return None
if op == DW_OP_call_frame_cfa:
if sp_value is None:
return None
# Per our ABI, the CFA is the SP value at function entry,
# which equals current S + 1 for an empty-descending stack.
return sp_value + 1
if op == DW_OP_bregx:
reg, off = readUleb(expr, 1)
ofs, _ = readSleb(expr, off)
if reg == 3:
if sp_value is None:
return None
return sp_value + 1 + ofs
return None
if 0x70 <= op <= 0x8f:
reg = op - 0x70
ofs, _ = readSleb(expr, 1)
if reg == 3:
if sp_value is None:
return None
return sp_value + 1 + ofs
return None
def evalDwOp(expr, cu=None, frame_base=None, sp_value=None):
"""Evaluate a DWARF expression `expr` (bytes). Returns a LocResult.
`frame_base`: byte address frame-base resolves to (caller pre-computed
via _evalFrameBase). `sp_value`: 24-bit S register snapshot (for
DW_OP_reg3 / DW_OP_breg3 / DW_OP_call_frame_cfa).
Stack-based: most ops push/pop the implicit DWARF stack. At the
end, the top-of-stack is the byte address (unless DW_OP_stack_value
flipped that to "the top IS the value").
"""
if expr is None or len(expr) == 0:
return LocResult("unsupported", reason="empty expression")
stack = []
is_value = False
off = 0
n = len(expr)
while off < n:
op = expr[off]; off += 1
if op == DW_OP_addr:
# 4-byte addr in our DWARF.
addr, off = readU32(expr, off)
stack.append(addr)
continue
if op == DW_OP_const1u:
v, off = readU8(expr, off); stack.append(v); continue
if op == DW_OP_const1s:
v, off = readU8(expr, off)
if v >= 0x80:
v -= 0x100
stack.append(v); continue
if op == DW_OP_const2u:
v, off = readU16(expr, off); stack.append(v); continue
if op == DW_OP_const2s:
v, off = readU16(expr, off)
if v >= 0x8000:
v -= 0x10000
stack.append(v); continue
if op == DW_OP_const4u:
v, off = readU32(expr, off); stack.append(v); continue
if op == DW_OP_const4s:
v, off = readU32(expr, off)
if v >= 0x80000000:
v -= 0x100000000
stack.append(v); continue
if op == DW_OP_constu:
v, off = readUleb(expr, off); stack.append(v); continue
if op == DW_OP_consts:
v, off = readSleb(expr, off); stack.append(v); continue
if op == DW_OP_plus:
b = stack.pop(); a = stack.pop(); stack.append(a + b); continue
if op == DW_OP_plus_uconst:
inc, off = readUleb(expr, off)
stack.append(stack.pop() + inc); continue
if op == DW_OP_fbreg:
ofs, off = readSleb(expr, off)
if frame_base is None:
return LocResult("unsupported",
reason="DW_OP_fbreg without frame_base")
stack.append(frame_base + ofs)
continue
if op == DW_OP_call_frame_cfa:
if sp_value is None:
return LocResult("unsupported",
reason="DW_OP_call_frame_cfa without SP")
stack.append(sp_value + 1)
continue
if 0x50 <= op <= 0x6f:
# DW_OP_regN — the value is in register N; not a memory addr.
# Only honored at end of expression; we return a register LocResult
# immediately to keep the W65816 DP-mapping precise.
reg = op - 0x50
if reg == 3 and sp_value is not None:
# SP-as-register is unusual but legal; treat as the
# post-skew stack address (i.e., the frame-base byte).
return LocResult("register", reg_dw=reg,
dp_addr=sp_value + 1)
return LocResult("register", reg_dw=reg,
dp_addr=W65816_DW_REG_TO_DP.get(reg))
if op == DW_OP_regx:
reg, off = readUleb(expr, off)
return LocResult("register", reg_dw=reg,
dp_addr=W65816_DW_REG_TO_DP.get(reg))
if 0x70 <= op <= 0x8f:
# DW_OP_bregN — register N + signed offset, pushed as address.
reg = op - 0x70
ofs, off = readSleb(expr, off)
if reg == 3:
if sp_value is None:
return LocResult("unsupported",
reason="DW_OP_breg3 without SP")
stack.append(sp_value + 1 + ofs)
else:
dp = W65816_DW_REG_TO_DP.get(reg)
if dp is None:
return LocResult("unsupported",
reason=f"DW_OP_breg{reg} unmapped")
stack.append(dp + ofs)
continue
if op == DW_OP_bregx:
reg, off = readUleb(expr, off)
ofs, off = readSleb(expr, off)
if reg == 3:
if sp_value is None:
return LocResult("unsupported",
reason="DW_OP_bregx(SP) without SP")
stack.append(sp_value + 1 + ofs)
else:
dp = W65816_DW_REG_TO_DP.get(reg)
if dp is None:
return LocResult("unsupported",
reason=f"DW_OP_bregx({reg}) unmapped")
stack.append(dp + ofs)
continue
if op == DW_OP_deref:
# Need a memory read function to honor this; out of scope.
return LocResult("unsupported", reason="DW_OP_deref not handled")
if op == DW_OP_stack_value:
is_value = True
break
if op == DW_OP_piece:
# Composite — out of scope for this slice (multi-piece i32
# IMG-resident locals). Surfacing as unsupported is honest.
return LocResult("unsupported",
reason="DW_OP_piece composite not handled")
return LocResult("unsupported", reason=f"DW_OP 0x{op:x} not handled")
if not stack:
return LocResult("unsupported", reason="empty expression stack")
top = stack[-1]
if is_value:
return LocResult("value", value=top & 0xFFFFFFFF)
return LocResult("memory", addr=top & 0xFFFFFF)
# ---- Type-chain resolution for --locals ------------------------------
def _findDieByOffset(cu, target_cu_off):
"""Walk the CU's DIE tree to find the DIE whose cu-relative offset
matches `target_cu_off`. Returns None if not found."""
if cu.root is None:
return None
stack = [cu.root]
while stack:
d = stack.pop()
if d.offset == target_cu_off:
return d
stack.extend(d.children)
return None
def typeChain(cu, die):
"""Resolve DW_AT_type chain into a printable C-ish type string.
Handles base_type, pointer_type, const/volatile/restrict_type,
typedef, array_type (with subrange), structure/union/enum types.
Best-effort: unknown chains print as `<unresolved>`.
"""
if die is None:
return "<no type>"
visited = set()
prefix = ""
suffix = ""
cur = die
while cur is not None and cur.offset not in visited:
visited.add(cur.offset)
tag = cur.tag
if tag == DW_TAG_pointer_type:
suffix = "*" + suffix
t = cur.getRaw(DW_AT_type)
if t is None:
return ("void " + suffix).strip()
cur = _findDieByOffset(cu, t[0])
continue
if tag == DW_TAG_const_type:
prefix = "const " + prefix
t = cur.getRaw(DW_AT_type)
if t is None:
return (prefix + "void" + suffix).strip()
cur = _findDieByOffset(cu, t[0])
continue
if tag == DW_TAG_volatile_type:
prefix = "volatile " + prefix
t = cur.getRaw(DW_AT_type)
if t is None:
return (prefix + "void" + suffix).strip()
cur = _findDieByOffset(cu, t[0])
continue
if tag == DW_TAG_restrict_type:
t = cur.getRaw(DW_AT_type)
if t is None:
return (prefix + "void" + suffix).strip()
cur = _findDieByOffset(cu, t[0])
continue
if tag == DW_TAG_typedef:
nm = dieName(cu, cur) or "?"
return (prefix + nm + suffix).strip()
if tag == DW_TAG_base_type:
nm = dieName(cu, cur) or "?"
return (prefix + nm + suffix).strip()
if tag == DW_TAG_array_type:
# Look for first DW_TAG_subrange_type child for size.
bound = None
for ch in cur.children:
if ch.tag == DW_TAG_subrange_type:
ub = ch.getRaw(DW_AT_upper_bound)
if ub is not None:
bound = ub[0] + 1
break
t = cur.getRaw(DW_AT_type)
elem = "?"
if t is not None:
elem = typeChain(cu, _findDieByOffset(cu, t[0]))
bnd_str = "" if bound is None else str(bound)
return f"{prefix}{elem}[{bnd_str}]{suffix}".strip()
if tag in (DW_TAG_structure_type, DW_TAG_union_type,
DW_TAG_enumeration_type, DW_TAG_class_type):
kw = {DW_TAG_structure_type: "struct",
DW_TAG_union_type: "union",
DW_TAG_enumeration_type: "enum",
DW_TAG_class_type: "class"}[tag]
nm = dieName(cu, cur) or "<anon>"
return f"{prefix}{kw} {nm}{suffix}".strip()
if tag == DW_TAG_subroutine_type:
return f"{prefix}<func>{suffix}".strip()
# Unknown link in the chain — bail.
return f"<unresolved tag 0x{tag:x}>"
return "<cyclic>"
def varTypeStr(cu, var_die):
"""Return the C-ish type string for a variable / parameter DIE."""
t = var_die.getRaw(DW_AT_type)
if t is None:
return "<no DW_AT_type>"
target = _findDieByOffset(cu, t[0])
return typeChain(cu, target)
# ---- --locals 0xPC mode (Phase 3.2 slice 2) -------------------------
def _collectLocals(cu, die, pc, out, scope_stack):
"""Walk DIE tree under `die`, collecting formal_parameter +
variable DIEs that are in scope at `pc` (respecting lexical-block
PC ranges)."""
for child in die.children:
if child.tag == DW_TAG_lexical_block:
lo, hi = diePcRange(cu, child)
if lo is not None and not (lo <= pc < hi):
# Block not in scope; skip.
continue
_collectLocals(cu, child, pc, out, scope_stack)
continue
if child.tag == DW_TAG_inlined_subroutine:
# Inlined: out of scope for this slice; skip descent.
continue
if child.tag in (DW_TAG_formal_parameter, DW_TAG_variable):
out.append(child)
def localsAtPc(cus, pc, sp_value=None):
"""Return list of (name, type_str, location_result, var_die) for
all formal_parameter and variable DIEs in scope at `pc`.
`sp_value` is the 24-bit S register snapshot. Required to resolve
DW_OP_fbreg / DW_OP_call_frame_cfa expressions.
"""
cu, sub = findSubprogramForPc(cus, pc)
if sub is None:
return (None, None, [])
frame_base = _evalFrameBase(cu, sub, sp_value)
vars_ = []
_collectLocals(cu, sub, pc, vars_, [])
out = []
for v in vars_:
name = dieName(cu, v) or "<unnamed>"
ty = varTypeStr(cu, v)
loc_rec = v.getRaw(DW_AT_location)
if loc_rec is None:
loc_res = LocResult("unsupported", reason="no DW_AT_location")
else:
lv, lf = loc_rec
if isinstance(lv, (bytes, bytearray)):
loc_res = evalDwOp(bytes(lv), cu=cu,
frame_base=frame_base, sp_value=sp_value)
else:
loc_res = LocResult("unsupported",
reason=f"DW_AT_location form 0x{lf:x}")
out.append((name, ty, loc_res, v))
return (cu, sub, out)
def printLocals(cus, pc, sp_value):
"""Print formal_parameter + variable info for the subprogram at
`pc`. Output is one line per variable:
VAR=<name> TYPE=<type> ADDR=0x... (memory location)
VAR=<name> TYPE=<type> REG=DW<n> (register, no DP map)
VAR=<name> TYPE=<type> ADDR=0x... REG=DW<n> (DP-mapped register)
VAR=<name> TYPE=<type> VALUE=0x... (DW_OP_stack_value path)
VAR=<name> TYPE=<type> UNSUPPORTED=<reason>
"""
cu, sub, locs = localsAtPc(cus, pc, sp_value=sp_value)
if sub is None:
print(f"pc2line: no DW_TAG_subprogram covers PC 0x{pc:06x}",
file=sys.stderr)
return 1
sub_name = dieName(cu, sub) or "<unnamed>"
if sp_value is None:
sp_str = "<unset>"
else:
sp_str = f"0x{sp_value:06x}"
print(f"; PC 0x{pc:06x} in subprogram {sub_name!r}, "
f"S={sp_str}")
if not locs:
print("; (no formal_parameter or DW_TAG_variable in scope)")
return 0
for name, ty, loc, _die in locs:
line = f"VAR={name} TYPE={ty}"
if loc.kind == "memory":
line += f" ADDR=0x{loc.addr:06x}"
elif loc.kind == "register":
if loc.dp_addr is not None:
line += f" ADDR=0x{loc.dp_addr:06x} REG=DW{loc.reg_dw}"
else:
line += f" REG=DW{loc.reg_dw}"
elif loc.kind == "value":
line += f" VALUE=0x{loc.value:x}"
elif loc.kind == "composite":
line += " COMPOSITE=<unsupported>"
else:
line += f" UNSUPPORTED={loc.reason}"
print(line)
return 0
# ---- Main ------------------------------------------------------------
def loadSidecarSectionsAll(path):
"""Return {section_name: [payload_bytes, ...]} from the sidecar."""
with open(path, "rb") as f:
data = f.read()
needle = b"; OBJ "
out = {}
i = 0
while True:
h = data.find(needle, i)
if h < 0:
break
nl = data.find(b"\n", h)
if nl < 0:
break
header = data[h:nl].decode("utf-8", "replace")
parts = header.split()
if "SEC" in parts and "SIZE" in parts:
sec = parts[parts.index("SEC") + 1]
size = int(parts[parts.index("SIZE") + 1])
payload_start = nl + 1
payload = data[payload_start:payload_start + size]
out.setdefault(sec, []).append(payload)
i = payload_start + size
else:
i = nl + 1
return out
def buildTable(sidecar_path):
"""Return list of (pc, file_idx, line, file_table)."""
line_sections = loadSidecarSection(sidecar_path, ".debug_line")
line_str_sections = loadSidecarSection(sidecar_path, ".debug_line_str")
line_str = b""
for _, p in line_str_sections:
line_str += p
rows = []
file_tables = []
line_str_offsets = {}
# Precompute line_str cstring offsets.
off = 0
while off < len(line_str):
end = line_str.find(b"\0", off)
if end < 0:
break
line_str_offsets[off] = line_str[off:end].decode("utf-8", "replace")
off = end + 1
for sec_name, payload in line_sections:
i = 0
while i < len(payload):
try:
hdr, body_start = parseDwarf5Header(payload, i, len(payload))
except NotImplementedError as e:
print(f"pc2line: {e}", file=sys.stderr)
break
file_tbl = []
for fe in hdr["files"]:
# File entry's name field is at content type DW_LNCT_path=1
name_val = fe.get(1)
if isinstance(name_val, str):
file_tbl.append(name_val)
elif isinstance(name_val, int):
file_tbl.append(line_str_offsets.get(name_val, f"@{name_val}"))
else:
file_tbl.append("?")
body = payload[body_start:hdr["end"]]
for pc, fidx, ln, end_seq in runLineProgram(body, hdr, len(body)):
if end_seq:
continue
rows.append((pc, fidx, ln, file_tbl))
i = hdr["end"]
return rows
def query(table, pc):
"""Return (pc, file, line) or None — largest-pc-<=-query match."""
candidates = [r for r in table if r[0] <= pc]
if not candidates:
return None
best = max(candidates, key=lambda r: r[0])
f_idx = best[1]
if f_idx == 0 or f_idx > len(best[3]):
fname = "?"
else:
fname = best[3][f_idx - 1]
return (best[0], os.path.basename(fname), best[2])
def loadMapSymbols(path):
"""Load `addr symbol` lines from a link816 .map. Return sorted list."""
if not path or not os.path.exists(path):
return []
syms = []
with open(path) as f:
for ln in f:
ln = ln.strip()
if not ln.startswith("0x"):
continue
parts = ln.split()
if len(parts) >= 2:
try:
syms.append((int(parts[0], 16), parts[1]))
except ValueError:
pass
syms.sort()
return syms
def funcAt(syms, pc):
"""Return the symbol name with largest addr <= pc, or '?'."""
lo, hi = 0, len(syms) - 1
best = None
while lo <= hi:
mid = (lo + hi) // 2
if syms[mid][0] <= pc:
best = syms[mid][1]
lo = mid + 1
else:
hi = mid - 1
return best or "?"
# ---- Frame sidecar (.debug_frame_w65816) -----------------------------
#
# Each record is exactly 12 bytes:
# +0 uint32_t fnPcStart (24-bit final-image address, zero-padded)
# +4 uint32_t fnPcEnd (one past the last instruction)
# +8 uint16_t frameSize (bytes that the prologue subtracts from S)
# +10 uint8_t rtlBytes (3 for JSL/RTL; reserved for inline RTS)
# +11 uint8_t pad (must be 0; reserved for future flags)
#
# Records are emitted in object-file order by W65816AsmPrinter and
# concatenated unchanged by link816's `.debug_*` sidecar pipeline.
FRAME_RECORD_SIZE = 12
def loadFrameRecords(sidecar_path):
"""Return a list of (pcStart, pcEnd, frameSize, rtlBytes) tuples
parsed from .debug_frame_w65816 in the link816 sidecar. Empty
list if the section is absent (older sidecars / hand-written .s
objects with no frame records).
"""
chunks = loadSidecarSection(sidecar_path, ".debug_frame_w65816")
out = []
for _name, payload in chunks:
if len(payload) % FRAME_RECORD_SIZE != 0:
# Truncated / corrupt — stop parsing the bad chunk but
# keep any prior good ones (one bad input object shouldn't
# disable bt across the whole sidecar).
continue
for i in range(0, len(payload), FRAME_RECORD_SIZE):
rec = payload[i:i + FRAME_RECORD_SIZE]
pc_start = int.from_bytes(rec[0:4], "little") & 0xFFFFFF
pc_end = int.from_bytes(rec[4:8], "little") & 0xFFFFFF
frame_sz = int.from_bytes(rec[8:10], "little")
rtl_bytes = rec[10]
# Skip placeholder rows (both endpoints 0): the AsmPrinter
# guard normally filters these, but a relocation that
# resolved an entire empty function to bank 0 / addr 0
# would still leak through.
if pc_start == 0 and pc_end == 0:
continue
out.append((pc_start, pc_end, frame_sz, rtl_bytes))
# Sort by pcStart so bisect lookups stay O(log n) for large
# binaries (CoreMark has ~150 records; Lua ~600).
out.sort()
return out
def frameAt(records, pc):
"""Return the record covering pc, or None. records must be sorted
by pcStart (loadFrameRecords guarantees this).
"""
# Find largest pcStart <= pc via binary search.
lo, hi = 0, len(records) - 1
best = None
while lo <= hi:
mid = (lo + hi) // 2
if records[mid][0] <= pc:
best = records[mid]
lo = mid + 1
else:
hi = mid - 1
if best is None:
return None
pc_start, pc_end, _fs, _rtl = best
# pcEnd is exclusive (one past the last function instruction); if
# pc lies in the inter-function gap we still return the nearest
# preceding function — useful for diagnostic purposes but caller
# may want to disambiguate via pcEnd.
if pc < pc_end:
return best
return best # keep the "nearest preceding" semantics
def main():
ap = argparse.ArgumentParser(description="PC -> source resolver")
ap.add_argument("--sidecar", required=True,
help="link816 --debug-out file")
ap.add_argument("--map", help="link816 .map (optional — for function names)")
ap.add_argument("--dump", action="store_true",
help="print the full PC->line table")
ap.add_argument("--dump-dies", action="store_true",
help="dump the DIE subtree for the subprogram covering"
" a given PC (pass PC as a positional arg)")
ap.add_argument("--list-cus", action="store_true",
help="list compile units and their top-level subprograms")
ap.add_argument("--locals", action="store_true",
help="print formal parameters + locals in scope at "
"the given PC. Requires --sp for stack-resident "
"locals (DW_OP_fbreg). Multiple PCs supported.")
ap.add_argument("--sp", default=None,
help="MAME S-register snapshot (16-bit or 24-bit "
"hex). Used as the W65816 stack pointer when "
"resolving DW_OP_fbreg / DW_OP_call_frame_cfa.")
ap.add_argument("pcs", nargs="*", help="PCs to resolve (hex 0x... or decimal)")
args = ap.parse_args()
syms = loadMapSymbols(args.map)
sp_value = None
if args.sp is not None:
sp_value = int(args.sp, 0)
# SP is the 16-bit S register; bank-0 stack lives in bank 0,
# so 24-bit byte addresses are just 0x000000 | (S & 0xFFFF).
# Caller may pass either form; canonicalise to 24-bit.
if sp_value <= 0xFFFF:
sp_value &= 0xFFFF
# DIE-walker modes use parseAllCus; the legacy line-table mode uses
# buildTable. Both work off the same sidecar file.
if args.dump_dies or args.list_cus or args.locals:
payload_map = loadSidecarSectionsAll(args.sidecar)
cus = parseAllCus(payload_map)
if args.list_cus:
for ci, cu in enumerate(cus):
cu_name = dieName(cu, cu.root) if cu.root else None
print(f"CU #{ci} v{cu.version} addr_size={cu.addr_size} "
f"name={cu_name!r}")
if cu.root is None:
continue
for child in cu.root.children:
if child.tag != DW_TAG_subprogram:
continue
lo, hi = diePcRange(cu, child)
nm = dieName(cu, child) or "<unnamed>"
if lo is None:
print(f" subprogram {nm!r} (no PC range)")
else:
print(f" subprogram {nm!r} "
f"[0x{lo:06x}, 0x{hi:06x})")
return 0
if args.locals:
if not args.pcs:
print("pc2line: --locals needs a PC", file=sys.stderr)
return 2
rc = 0
for s in args.pcs:
pc = int(s, 0)
r = printLocals(cus, pc, sp_value)
if r:
rc = r
return rc
# --dump-dies path
if not args.pcs:
print("pc2line: --dump-dies needs a PC", file=sys.stderr)
return 2
rc = 0
for s in args.pcs:
pc = int(s, 0)
r = dumpDiesAtPc(cus, pc)
if r:
rc = r
return rc
table = buildTable(args.sidecar)
if args.dump:
for pc, fidx, ln, ft in sorted(table):
fname = ft[fidx - 1] if 0 < fidx <= len(ft) else "?"
func = funcAt(syms, pc)
print(f"0x{pc:06x}\t{os.path.basename(fname)}:{ln}\t{func}")
return 0
if not args.pcs:
print(f"pc2line: built {len(table)} entries", file=sys.stderr)
print("pass PCs as positional args to resolve, or --dump for the table")
return 2
for s in args.pcs:
pc = int(s, 0)
row = query(table, pc)
func = funcAt(syms, pc)
if row is None:
print(f"PC=0x{pc:06x} NOT_FOUND FUNC={func}")
else:
r_pc, fname, ln = row
print(f"PC=0x{pc:06x} FILE={fname} LINE={ln} FUNC={func}")
return 0
if __name__ == "__main__":
sys.exit(main())