#!/usr/bin/env python3 # pc2line.py - PC -> source location resolver for W65816 binaries. # # Parses the DWARF .debug_line section from link816's debug sidecar # (`--debug-out FILE`) and answers `file:line` queries for runtime PCs. # The sidecar already has reloc-applied PCs — they refer to the final # linked image — so we don't need link-map cross-walking. # # Standard tools (pyelftools, llvm-dwarfdump, addr2line) all choke on # the W65816 ELF: pyelftools asserts on the DWARF address size, the # others can't apply our target-specific relocations. So we ship a # small DWARF 5 line-program decoder (DWARF §6.2) — ~200 LOC, no deps. # # Usage: # scripts/pc2line.py --sidecar foo.dwarf 0x123A # scripts/pc2line.py --sidecar foo.dwarf --dump # scripts/pc2line.py --sidecar foo.dwarf 0x123A 0x4567 # scripts/pc2line.py --sidecar foo.dwarf --list-cus # scripts/pc2line.py --sidecar foo.dwarf --dump-dies 0x123A # scripts/pc2line.py --sidecar foo.dwarf --locals --sp 0x1FA 0x123A import argparse import os import struct import sys # ---- ULEB128 / SLEB128 ----------------------------------------------- def readUleb(buf, off): result = 0 shift = 0 while True: b = buf[off]; off += 1 result |= (b & 0x7F) << shift if (b & 0x80) == 0: break shift += 7 return result, off def readSleb(buf, off): result = 0 shift = 0 size = 64 while True: b = buf[off]; off += 1 result |= (b & 0x7F) << shift shift += 7 if (b & 0x80) == 0: break if shift < size and (b & 0x40): result |= -(1 << shift) return result, off def readU8(buf, off): return buf[off], off + 1 def readU16(buf, off): return struct.unpack_from(" R_W65816_DATA32 reloc fix in W65816ELFObjectWriter), unit_length and header_length come out of the linker correctly populated for fresh -g builds: the backend now emits a 4-byte fixup for each, and link816 patches the resolved value into all 4 bytes of the slot. We still keep the tolerant zero-fallback so older sidecars (produced before the reloc fix landed, or before link816 was rebuilt) keep decoding: unit_length=0 falls back to section size, header_length=0 lets the prologue forward-scan land us at the right body offset on its own. The body offset is taken from the post-prologue position even when header_length is non-zero: the prologue's variable-length file/dir entries are already consumed by the time we reach that point, so the forward scan IS the source of truth. The header_length field is informational from our perspective. """ h = {} unit_length, off = readU32(buf, off) if unit_length == 0xFFFFFFFF: raise NotImplementedError("64-bit DWARF not handled") if unit_length == 0: unit_length = section_end - off h["end"] = off + unit_length h["version"], off = readU16(buf, off) if h["version"] != 5: raise NotImplementedError(f"DWARF v{h['version']} not handled (v5 only)") h["addr_size"], off = readU8(buf, off) h["seg_size"], off = readU8(buf, off) # header_length is now reloc-resolved post Phase 1.3; we still # forward-scan the prologue, but we record the value for tools # that want it. h["header_length"], off = readU32(buf, off) h["min_inst_length"], off = readU8(buf, off) h["max_ops_per_inst"], off = readU8(buf, off) h["default_is_stmt"], off = readU8(buf, off) h["line_base"], off = readU8(buf, off) if h["line_base"] >= 0x80: # signed h["line_base"] -= 0x100 h["line_range"], off = readU8(buf, off) h["opcode_base"], off = readU8(buf, off) # standard_opcode_lengths[opcode_base-1] h["std_op_lens"] = list(buf[off:off + h["opcode_base"] - 1]) off += h["opcode_base"] - 1 # directory format + entries. dir_fmt_count, off = readU8(buf, off) dir_fmt = [] for _ in range(dir_fmt_count): ct, off = readUleb(buf, off) fm, off = readUleb(buf, off) dir_fmt.append((ct, fm)) dir_count, off = readUleb(buf, off) dirs = [] for _ in range(dir_count): entry = {} for ct, fm in dir_fmt: val, off = readForm(buf, off, fm) entry[ct] = val dirs.append(entry) # file format + entries. file_fmt_count, off = readU8(buf, off) file_fmt = [] for _ in range(file_fmt_count): ct, off = readUleb(buf, off) fm, off = readUleb(buf, off) file_fmt.append((ct, fm)) file_count, off = readUleb(buf, off) files = [] for _ in range(file_count): entry = {} for ct, fm in file_fmt: val, off = readForm(buf, off, fm) entry[ct] = val files.append(entry) h["dirs"] = dirs h["files"] = files # Body starts where the prologue parse ended — header_length above # is unreliable in our sidecar (see docstring). return h, off # DWARF forms (subset — what we see in our line-program file/dir entries # and .debug_info DIE attributes for the Phase 3.2 DIE walker). Form codes # are from DWARF 5 §7.5.5. DW_FORM_addr = 0x01 DW_FORM_block2 = 0x03 DW_FORM_block4 = 0x04 DW_FORM_data2 = 0x05 DW_FORM_data4 = 0x06 DW_FORM_data8 = 0x07 DW_FORM_string = 0x08 DW_FORM_block = 0x09 DW_FORM_block1 = 0x0a DW_FORM_data1 = 0x0b DW_FORM_flag = 0x0c DW_FORM_sdata = 0x0d DW_FORM_strp = 0x0e DW_FORM_udata = 0x0f DW_FORM_ref_addr = 0x10 DW_FORM_ref1 = 0x11 DW_FORM_ref2 = 0x12 DW_FORM_ref4 = 0x13 DW_FORM_ref8 = 0x14 DW_FORM_ref_udata = 0x15 DW_FORM_indirect = 0x16 DW_FORM_sec_offset = 0x17 DW_FORM_exprloc = 0x18 DW_FORM_flag_present = 0x19 DW_FORM_strx = 0x1a DW_FORM_addrx = 0x1b DW_FORM_ref_sup4 = 0x1c DW_FORM_strp_sup = 0x1d DW_FORM_data16 = 0x1e DW_FORM_line_strp = 0x1f DW_FORM_ref_sig8 = 0x20 DW_FORM_implicit_const = 0x21 DW_FORM_loclistx = 0x22 DW_FORM_rnglistx = 0x23 DW_FORM_ref_sup8 = 0x24 DW_FORM_strx1 = 0x25 DW_FORM_strx2 = 0x26 DW_FORM_strx3 = 0x27 DW_FORM_strx4 = 0x28 DW_FORM_addrx1 = 0x29 DW_FORM_addrx2 = 0x2a DW_FORM_addrx3 = 0x2b DW_FORM_addrx4 = 0x2c # DWARF 5 tag codes — only the ones we look at for the DIE walker. DW_TAG_array_type = 0x01 DW_TAG_class_type = 0x02 DW_TAG_enumeration_type = 0x04 DW_TAG_formal_parameter = 0x05 DW_TAG_lexical_block = 0x0b DW_TAG_member = 0x0d DW_TAG_pointer_type = 0x0f DW_TAG_compile_unit = 0x11 DW_TAG_structure_type = 0x13 DW_TAG_subroutine_type = 0x15 DW_TAG_typedef = 0x16 DW_TAG_union_type = 0x17 DW_TAG_inheritance = 0x1c DW_TAG_inlined_subroutine = 0x1d DW_TAG_subrange_type = 0x21 DW_TAG_base_type = 0x24 DW_TAG_const_type = 0x26 DW_TAG_subprogram = 0x2e DW_TAG_variable = 0x34 DW_TAG_volatile_type = 0x35 DW_TAG_restrict_type = 0x37 # DWARF 5 attribute codes (subset). DW_AT_sibling = 0x01 DW_AT_location = 0x02 DW_AT_name = 0x03 DW_AT_byte_size = 0x0b DW_AT_stmt_list = 0x10 DW_AT_low_pc = 0x11 DW_AT_high_pc = 0x12 DW_AT_language = 0x13 DW_AT_comp_dir = 0x1b DW_AT_const_value = 0x1c DW_AT_upper_bound = 0x2f DW_AT_prototyped = 0x27 DW_AT_producer = 0x25 DW_AT_start_scope = 0x2c DW_AT_bit_size = 0x0d DW_AT_encoding = 0x3e DW_AT_frame_base = 0x40 DW_AT_specification = 0x47 DW_AT_type = 0x49 DW_AT_ranges = 0x55 DW_AT_external = 0x3f DW_AT_decl_file = 0x3a DW_AT_decl_line = 0x3b DW_AT_artificial = 0x34 DW_AT_data_member_location = 0x38 DW_AT_call_file = 0x58 DW_AT_call_line = 0x59 DW_AT_call_column = 0x57 DW_AT_call_origin = 0x7d DW_AT_abstract_origin = 0x31 DW_AT_inline = 0x20 DW_AT_str_offsets_base = 0x72 DW_AT_addr_base = 0x73 DW_AT_rnglists_base = 0x74 DW_AT_loclists_base = 0x8c # Reverse-name tables for printing. Built by enumerating the module # namespace once; values that aren't constants are filtered out. def _buildNameMap(prefix): out = {} for k, v in globals().items(): if k.startswith(prefix) and isinstance(v, int): out[v] = k[len(prefix):] return out def readForm(buf, off, form, addr_size=4, dwarf_size=4, implicit_val=None): """Read a single attribute value at `off` using DWARF form `form`. Returns (value, new_off). For block/exprloc forms, value is a bytes object; for refN forms, value is the raw CU-local DIE offset; for sec_offset / strx / addrx forms, value is the index/offset (caller resolves through the appropriate table). `addr_size` is the CU's address size (4 on W65816 since DWARF treats 24-bit PCs as 32-bit). `dwarf_size` is 4 for 32-bit DWARF, 8 for 64-bit (we only support 32-bit). """ if form == DW_FORM_addr: if addr_size == 4: return readU32(buf, off) if addr_size == 2: return readU16(buf, off) return (int.from_bytes(buf[off:off + addr_size], "little"), off + addr_size) if form == DW_FORM_data1 or form == DW_FORM_flag or form == DW_FORM_ref1: return readU8(buf, off) if form == DW_FORM_data2 or form == DW_FORM_ref2: return readU16(buf, off) if form == DW_FORM_data4 or form == DW_FORM_ref4 or form == DW_FORM_ref_sup4: return readU32(buf, off) if form == DW_FORM_data8 or form == DW_FORM_ref8 or form == DW_FORM_ref_sig8 or form == DW_FORM_ref_sup8: return (int.from_bytes(buf[off:off + 8], "little"), off + 8) if form == DW_FORM_data16: return (buf[off:off + 16].hex(), off + 16) if form == DW_FORM_sdata: return readSleb(buf, off) if form == DW_FORM_udata or form == DW_FORM_ref_udata: return readUleb(buf, off) if form == DW_FORM_string: return readCStr(buf, off) if form == DW_FORM_strp or form == DW_FORM_line_strp or form == DW_FORM_strp_sup: if dwarf_size == 4: return readU32(buf, off) return (int.from_bytes(buf[off:off + 8], "little"), off + 8) if form == DW_FORM_sec_offset: if dwarf_size == 4: return readU32(buf, off) return (int.from_bytes(buf[off:off + 8], "little"), off + 8) if form == DW_FORM_ref_addr: if dwarf_size == 4: return readU32(buf, off) return (int.from_bytes(buf[off:off + 8], "little"), off + 8) if form == DW_FORM_flag_present: return (1, off) if form == DW_FORM_implicit_const: return (implicit_val, off) if form == DW_FORM_strx or form == DW_FORM_addrx or form == DW_FORM_loclistx or form == DW_FORM_rnglistx: return readUleb(buf, off) if form == DW_FORM_strx1 or form == DW_FORM_addrx1: return readU8(buf, off) if form == DW_FORM_strx2 or form == DW_FORM_addrx2: return readU16(buf, off) if form == DW_FORM_strx3 or form == DW_FORM_addrx3: v = buf[off] | (buf[off + 1] << 8) | (buf[off + 2] << 16) return (v, off + 3) if form == DW_FORM_strx4 or form == DW_FORM_addrx4: return readU32(buf, off) if form == DW_FORM_block1: n, off = readU8(buf, off) return (bytes(buf[off:off + n]), off + n) if form == DW_FORM_block2: n, off = readU16(buf, off) return (bytes(buf[off:off + n]), off + n) if form == DW_FORM_block4: n, off = readU32(buf, off) return (bytes(buf[off:off + n]), off + n) if form == DW_FORM_block or form == DW_FORM_exprloc: n, off = readUleb(buf, off) return (bytes(buf[off:off + n]), off + n) raise NotImplementedError(f"DW_FORM 0x{form:x} not handled") # ---- Sidecar parser -------------------------------------------------- def loadSidecarSection(path, section_name): """Return raw bytes of `section_name` from the link816 sidecar, or None.""" with open(path, "rb") as f: data = f.read() needle = f"; OBJ ".encode() sections = [] i = 0 while True: h = data.find(needle, i) if h < 0: break nl = data.find(b"\n", h) if nl < 0: break header = data[h:nl].decode("utf-8", "replace") # `; OBJ SEC SIZE RELOCS_APPLIED RELOCS_SKIPPED ` parts = header.split() if "SEC" in parts and "SIZE" in parts: sec = parts[parts.index("SEC") + 1] size = int(parts[parts.index("SIZE") + 1]) payload_start = nl + 1 payload = data[payload_start:payload_start + size] sections.append((sec, payload)) i = payload_start + size else: i = nl + 1 return [(name, p) for name, p in sections if name == section_name] # ---- Line-program decoder -------------------------------------------- def runLineProgram(buf, header, h_end): """Walk the line program; yield (pc, file_idx, line) tuples.""" off = 0 addr = 0 line = 1 file = 1 is_stmt = bool(header["default_is_stmt"]) while off < h_end: op, off = readU8(buf, off) if op == 0: # Extended opcode. ln, off = readUleb(buf, off) sub = buf[off] sub_off = off + 1 if sub == DW_LNE_end_sequence: yield (addr, file, line, True) addr = 0 line = 1 file = 1 is_stmt = bool(header["default_is_stmt"]) off = off + ln elif sub == DW_LNE_set_address: addr_size = header["addr_size"] if addr_size == 4: a, _ = readU32(buf, sub_off) elif addr_size == 2: a, _ = readU16(buf, sub_off) else: a = int.from_bytes(buf[sub_off:sub_off + addr_size], "little") addr = a off = off + ln else: off = off + ln elif op < header["opcode_base"]: # Standard opcode. if op == DW_LNS_copy: yield (addr, file, line, False) elif op == DW_LNS_advance_pc: inc, off = readUleb(buf, off) addr += inc * header["min_inst_length"] elif op == DW_LNS_advance_line: inc, off = readSleb(buf, off) line += inc elif op == DW_LNS_set_file: f, off = readUleb(buf, off) file = f elif op == DW_LNS_set_column: _, off = readUleb(buf, off) elif op == DW_LNS_negate_stmt: is_stmt = not is_stmt elif op == DW_LNS_set_basic_block: pass elif op == DW_LNS_const_add_pc: adj = (255 - header["opcode_base"]) // header["line_range"] addr += adj * header["min_inst_length"] elif op == DW_LNS_fixed_advance_pc: inc, off = readU16(buf, off) addr += inc elif op == DW_LNS_set_prologue_end: pass elif op == DW_LNS_set_epilogue_begin: pass elif op == DW_LNS_set_isa: _, off = readUleb(buf, off) else: # Unknown std op — skip operands per std_op_lens. args = header["std_op_lens"][op - 1] for _ in range(args): _, off = readUleb(buf, off) else: # Special opcode (most common). adj = op - header["opcode_base"] addr += (adj // header["line_range"]) * header["min_inst_length"] line += header["line_base"] + (adj % header["line_range"]) yield (addr, file, line, False) # ---- .debug_abbrev parser -------------------------------------------- # # Each CU has an abbrev_offset that points into .debug_abbrev. A table # at that offset is a list of abbrev entries terminated by code 0. # Each entry is: ULEB code, ULEB tag, u8 children, then (ULEB attr, # ULEB form, optional SLEB implicit_const if form==implicit_const)* # terminated by attr==0 form==0. def parseAbbrevTable(buf, base): """Parse the abbrev table starting at offset `base`. Returns {code: (tag, has_children, [(attr, form, implicit_const), ...])}. """ table = {} off = base while off < len(buf): code, off = readUleb(buf, off) if code == 0: break tag, off = readUleb(buf, off) has_kids, off = readU8(buf, off) attrs = [] while True: at, off = readUleb(buf, off) fm, off = readUleb(buf, off) if at == 0 and fm == 0: break ic = None if fm == DW_FORM_implicit_const: ic, off = readSleb(buf, off) attrs.append((at, fm, ic)) table[code] = (tag, bool(has_kids), attrs) return table # ---- .debug_str / .debug_line_str / .debug_str_offsets resolution ---- # # In a link816 sidecar the per-CU .debug_str_offsets entries are zeroed # (the .rela.debug_str_offsets relocs target .debug_str, which isn't in # the resolveSym kind-set — text/rodata/bss/init_array). This is a known # Phase-3.2-slice-1 gap noted in the GAP_CLOSURE_PLAN: until link816 is # extended to resolve intra-debug relocs, strx-form names come out as # `@` placeholders. Line-strp names DO resolve because line_str # is referenced by literal offset inside .debug_line, not by a reloc. def resolveStr(strtab, offset): if not strtab or offset >= len(strtab): return None end = strtab.find(b"\0", offset) if end < 0: return None return strtab[offset:end].decode("utf-8", "replace") def resolveStrx(str_offsets, debug_str, str_offsets_base, idx, dwarf_size=4): """DWARF 5 §7.26: index into the str_offsets table. The base points past the header. Each entry is `dwarf_size` bytes.""" if str_offsets is None or debug_str is None: return None entry_off = str_offsets_base + idx * dwarf_size if entry_off + dwarf_size > len(str_offsets): return None if dwarf_size == 4: off = int.from_bytes(str_offsets[entry_off:entry_off + 4], "little") else: off = int.from_bytes(str_offsets[entry_off:entry_off + 8], "little") return resolveStr(debug_str, off) def resolveAddrx(addr_section, addr_base, idx, addr_size=4): """DWARF 5 §7.27: index into the addr table. Base points past header.""" if addr_section is None: return None entry_off = addr_base + idx * addr_size if entry_off + addr_size > len(addr_section): return None return int.from_bytes(addr_section[entry_off:entry_off + addr_size], "little") # ---- .debug_info DIE walker ------------------------------------------- class Die: """A parsed DIE: tag, attributes, children, CU-relative offset.""" def __init__(self, offset, tag, attrs): self.offset = offset # CU-relative offset (for ref4 lookup) self.tag = tag self.attrs = attrs # {at_code: (raw_value, form_code)} self.children = [] self.parent = None def get(self, at_code): rec = self.attrs.get(at_code) if rec is None: return None return rec[0] def getRaw(self, at_code): return self.attrs.get(at_code) class Cu: """One compile unit: header info, abbrev table, root DIE.""" def __init__(self): self.version = 0 self.unit_type = 0 self.addr_size = 4 self.dwarf_size = 4 self.abbrev_off = 0 self.cu_start = 0 # offset of CU header in .debug_info self.body_start = 0 # offset of first DIE self.body_end = 0 self.abbrev = {} self.root = None # Bases for indirect lookups (resolved from CU root attrs): self.str_offsets_base = 0 self.addr_base = 0 # Per-CU references to the global section tables: self.debug_str = None self.line_str = None self.str_offsets = None self.addr_section = None # Source file paths and PC ranges from the matching .debug_line CU. self.files = [] def lookupStr(self, idx): s = resolveStrx(self.str_offsets, self.debug_str, self.str_offsets_base, idx, self.dwarf_size) if s is None: return f"@strx{idx}" return s def lookupLineStr(self, off): s = resolveStr(self.line_str, off) if s is None: return f"@linestr0x{off:x}" return s def lookupStrp(self, off): s = resolveStr(self.debug_str, off) if s is None: return f"@str0x{off:x}" return s def lookupAddr(self, idx): a = resolveAddrx(self.addr_section, self.addr_base, idx, self.addr_size) if a is None: return None return a def parseDie(buf, off, cu, parent=None): """Parse a single DIE (and its children) starting at `off`. Returns (die or None for null terminator, new_off). `off` is a .debug_info-absolute offset; we record `cu_offset = off - cu.cu_start` for ref4 lookup. """ die_off = off code, off = readUleb(buf, off) if code == 0: return (None, off) if code not in cu.abbrev: # Malformed — stop walking the rest of this CU defensively. return (None, off) tag, has_kids, attr_list = cu.abbrev[code] attrs = {} for at, fm, ic in attr_list: val, off = readForm(buf, off, fm, addr_size=cu.addr_size, dwarf_size=cu.dwarf_size, implicit_val=ic) attrs[at] = (val, fm) die = Die(die_off - cu.cu_start, tag, attrs) die.parent = parent if has_kids: while off < cu.body_end: child, off = parseDie(buf, off, cu, parent=die) if child is None: break die.children.append(child) return (die, off) def parseDebugInfoCu(info_buf, info_off, abbrev_buf, sections): """Parse one CU header + DIE tree starting at .debug_info offset `info_off`. Returns (cu, next_off) where next_off is the start of the next CU (or end of section).""" cu = Cu() cu.cu_start = info_off cu.debug_str = sections["debug_str"] cu.line_str = sections["line_str"] cu.str_offsets = sections["str_offsets"] cu.addr_section = sections["addr"] off = info_off unit_length, off = readU32(info_buf, off) if unit_length == 0xFFFFFFFF: raise NotImplementedError("64-bit DWARF .debug_info not handled") next_off = off + unit_length cu.dwarf_size = 4 cu.version, off = readU16(info_buf, off) if cu.version == 5: cu.unit_type, off = readU8(info_buf, off) cu.addr_size, off = readU8(info_buf, off) cu.abbrev_off, off = readU32(info_buf, off) elif cu.version == 4: cu.abbrev_off, off = readU32(info_buf, off) cu.addr_size, off = readU8(info_buf, off) cu.unit_type = 0 else: raise NotImplementedError(f"DWARF v{cu.version} CU not handled") cu.body_start = off cu.body_end = next_off cu.abbrev = parseAbbrevTable(abbrev_buf, cu.abbrev_off) # Walk the root DIE (the CU DIE). Resolve base attrs first so that # children parsed in the same call can use them — but parseDie doesn't # actually consume strx/addrx (they stay as raw indices), so we can # do the base resolution post-parse. cu.root, _ = parseDie(info_buf, off, cu) if cu.root is not None: sob = cu.root.getRaw(DW_AT_str_offsets_base) if sob is not None: cu.str_offsets_base = sob[0] ab = cu.root.getRaw(DW_AT_addr_base) if ab is not None: cu.addr_base = ab[0] return cu, next_off def parseAllCus(payload_map): """Parse every CU from concatenated .debug_info bytes. Returns list[Cu]. """ info_payloads = payload_map.get(".debug_info", []) abbrev_payloads = payload_map.get(".debug_abbrev", []) if not info_payloads or not abbrev_payloads: return [] cus = [] # Each input .o slice contributes one .debug_info + one .debug_abbrev # section (DWARF v5 single-CU is the norm for clang). We concatenate # the payloads but reset abbrev_base per slice — within the same .o, # the CU's abbrev_off is relative to ITS .debug_abbrev section. So # we walk slice-by-slice, pairing the i-th .debug_info with the i-th # .debug_abbrev. n = min(len(info_payloads), len(abbrev_payloads)) for i in range(n): info_buf = info_payloads[i] abbrev_buf = abbrev_payloads[i] # Sections like .debug_addr / .debug_str_offsets may also be # per-slice; index them similarly. slice_sections = {} for key, sec_name in [ ("debug_str", ".debug_str"), ("line_str", ".debug_line_str"), ("str_offsets", ".debug_str_offsets"), ("addr", ".debug_addr"), ]: payloads = payload_map.get(sec_name, []) slice_sections[key] = payloads[i] if i < len(payloads) else b"" off = 0 while off < len(info_buf): try: cu, off = parseDebugInfoCu(info_buf, off, abbrev_buf, slice_sections) except NotImplementedError as e: print(f"pc2line: skipping CU: {e}", file=sys.stderr) break cus.append(cu) return cus # ---- DIE search / attribute formatting ------------------------------- _TAG_NAMES = None _AT_NAMES = None _FORM_NAMES = None def tagName(code): global _TAG_NAMES if _TAG_NAMES is None: _TAG_NAMES = _buildNameMap("DW_TAG_") return _TAG_NAMES.get(code, f"DW_TAG_<0x{code:x}>") def attrName(code): global _AT_NAMES if _AT_NAMES is None: _AT_NAMES = _buildNameMap("DW_AT_") return _AT_NAMES.get(code, f"DW_AT_<0x{code:x}>") def formName(code): global _FORM_NAMES if _FORM_NAMES is None: _FORM_NAMES = _buildNameMap("DW_FORM_") return _FORM_NAMES.get(code, f"DW_FORM_<0x{code:x}>") def dieName(cu, die): """Resolve DW_AT_name for a DIE, or return None.""" rec = die.getRaw(DW_AT_name) if rec is None: return None val, form = rec if form in (DW_FORM_strx, DW_FORM_strx1, DW_FORM_strx2, DW_FORM_strx3, DW_FORM_strx4): return cu.lookupStr(val) if form == DW_FORM_strp: return cu.lookupStrp(val) if form == DW_FORM_line_strp: return cu.lookupLineStr(val) if form == DW_FORM_string: return val return None def diePcRange(cu, die): """Return (low_pc, high_pc) for a DIE, or (None, None). DWARF 5 high_pc is commonly an offset from low_pc (data4/udata) per §3.3.1.1. We handle that and the absolute-PC case. """ lo_rec = die.getRaw(DW_AT_low_pc) hi_rec = die.getRaw(DW_AT_high_pc) if lo_rec is None: return (None, None) lo_val, lo_form = lo_rec if lo_form in (DW_FORM_addrx, DW_FORM_addrx1, DW_FORM_addrx2, DW_FORM_addrx3, DW_FORM_addrx4): lo = cu.lookupAddr(lo_val) else: lo = lo_val if lo is None: return (None, None) if hi_rec is None: return (lo, lo) hi_val, hi_form = hi_rec if hi_form == DW_FORM_addr: return (lo, hi_val) # Offset form (DWARF 5 default for clang). return (lo, lo + hi_val) def findSubprogramForPc(cus, pc): """Return (cu, subprogram_die) whose [low, high) contains pc.""" for cu in cus: if cu.root is None: continue for child in cu.root.children: if child.tag != DW_TAG_subprogram: continue lo, hi = diePcRange(cu, child) if lo is None: continue if lo <= pc < hi: return (cu, child) return (None, None) def formatAttr(cu, at, val, form): """Pretty-print an attribute value for --dump-dies output.""" if at == DW_AT_name: if form in (DW_FORM_strx, DW_FORM_strx1, DW_FORM_strx2, DW_FORM_strx3, DW_FORM_strx4): return cu.lookupStr(val) if form == DW_FORM_strp: return cu.lookupStrp(val) if form == DW_FORM_line_strp: return cu.lookupLineStr(val) if form == DW_FORM_string: return val if form == DW_FORM_exprloc or form in (DW_FORM_block, DW_FORM_block1, DW_FORM_block2, DW_FORM_block4): return "" if form in (DW_FORM_addrx, DW_FORM_addrx1, DW_FORM_addrx2, DW_FORM_addrx3, DW_FORM_addrx4): a = cu.lookupAddr(val) if a is None: return f"addrx[{val}]=?" return f"0x{a:06x}" if form == DW_FORM_addr: return f"0x{val:06x}" if form in (DW_FORM_flag, DW_FORM_flag_present): return "true" if val else "false" if isinstance(val, int): return f"{val} (0x{val:x})" if isinstance(val, bytes): return val.hex() return repr(val) def dumpDieRecursive(cu, die, depth, out): indent = " " * depth name = dieName(cu, die) name_str = f" \"{name}\"" if name else "" out.append(f"{indent}<0x{die.offset:x}> {tagName(die.tag)}{name_str}") for at, (val, form) in die.attrs.items(): if at == DW_AT_name: continue out.append(f"{indent} {attrName(at)} ({formName(form)}) = " f"{formatAttr(cu, at, val, form)}") for child in die.children: dumpDieRecursive(cu, child, depth + 1, out) def dumpDiesAtPc(cus, pc): """Print the subprogram containing `pc` and its variable/parameter children (DW_TAG_formal_parameter + DW_TAG_variable). Lexical blocks are descended into recursively.""" cu, sub = findSubprogramForPc(cus, pc) if sub is None: print(f"pc2line: no DW_TAG_subprogram covers PC 0x{pc:06x}", file=sys.stderr) return 1 lo, hi = diePcRange(cu, sub) sub_name = dieName(cu, sub) or "" print(f"PC 0x{pc:06x} -> subprogram {sub_name!r} " f"[0x{lo:06x}, 0x{hi:06x})") out = [] dumpDieRecursive(cu, sub, 0, out) print("\n".join(out)) # Also list parameters + variables in flat form for easy parsing. print("\n; variables under this subprogram (DW_TAG_formal_parameter " "+ DW_TAG_variable):") _flatVarList(cu, sub, [], None) def _flatVarList(cu, die, scope_stack, _unused): """Walk DIE tree under `die`, printing each formal_parameter / variable along with its containing lexical block PC range (if any).""" for child in die.children: if child.tag == DW_TAG_lexical_block: lo, hi = diePcRange(cu, child) new_scope = list(scope_stack) if lo is not None: new_scope.append(f"block[0x{lo:06x},0x{hi:06x})") _flatVarList(cu, child, new_scope, None) continue if child.tag == DW_TAG_inlined_subroutine: lo, hi = diePcRange(cu, child) name = dieName(cu, child) or "" new_scope = list(scope_stack) if lo is not None: new_scope.append(f"inlined {name}[0x{lo:06x},0x{hi:06x})") else: new_scope.append(f"inlined {name}") _flatVarList(cu, child, new_scope, None) continue if child.tag not in (DW_TAG_formal_parameter, DW_TAG_variable): continue name = dieName(cu, child) or "" loc_rec = child.getRaw(DW_AT_location) if loc_rec is None: loc_str = "" else: lv, lf = loc_rec loc_str = formatAttr(cu, DW_AT_location, lv, lf) kind = "param" if child.tag == DW_TAG_formal_parameter else "var" scope_str = (" @ " + " > ".join(scope_stack)) if scope_stack else "" print(f" {kind}\t{name}\tloc={loc_str}{scope_str}") # ---- DW_OP evaluator (Phase 3.2 slice 2) ----------------------------- # # DWARF expression opcodes from §7.7.1. We support only the subset # needed for -O0 stack-resident locals and trivial -O2 IMG locals: # DW_OP_addr (0x03): absolute address # DW_OP_constN/Nu/Ns: small constants # DW_OP_regN (0x50..0x57): register location (where N <= 7) # DW_OP_bregN (0x70..0x77): register + signed offset # DW_OP_fbreg (0x91): frame-base + signed offset # DW_OP_call_frame_cfa (0x9c): equivalent to frame-base in our ABI # DW_OP_regx (0x90): ULEB-encoded register # DW_OP_bregx (0x92): ULEB-encoded register + signed offset # DW_OP_plus_uconst (0x23): pop, add ULEB, push # DW_OP_plus (0x22): pop two, push sum # DW_OP_deref (0x06): pop addr, push *(addr) (memory-aware mode) # DW_OP_stack_value (0x9f): final result IS the value (not the addr) # DW_OP_piece (0x93): composite; not handled — returns None # # We return a structured `LocResult` so callers can distinguish # memory addresses, register-resident values, IMG-slot addresses, # and composite/unsupported expressions. DW_OP_addr = 0x03 DW_OP_deref = 0x06 DW_OP_const1u = 0x08 DW_OP_const1s = 0x09 DW_OP_const2u = 0x0a DW_OP_const2s = 0x0b DW_OP_const4u = 0x0c DW_OP_const4s = 0x0d DW_OP_const8u = 0x0e DW_OP_const8s = 0x0f DW_OP_constu = 0x10 DW_OP_consts = 0x11 DW_OP_plus = 0x22 DW_OP_plus_uconst = 0x23 DW_OP_reg0 = 0x50 DW_OP_breg0 = 0x70 DW_OP_regx = 0x90 DW_OP_fbreg = 0x91 DW_OP_bregx = 0x92 DW_OP_piece = 0x93 DW_OP_call_frame_cfa = 0x9c DW_OP_stack_value = 0x9f # Map W65816 Dwarf register numbers (from W65816RegisterInfo.td) to # their direct-page memory locations. IMG0..IMG7 at $D0..$DE, # IMG8..IMG15 at $C0..$CE (two bytes each). DPF0 at $F0. # SP (DwarfRegNum=3) handled separately via the stack-pointer parameter. W65816_DW_REG_TO_DP = { 16: 0xD0, 17: 0xD2, 18: 0xD4, 19: 0xD6, # IMG0..IMG3 20: 0xD8, 21: 0xDA, 22: 0xDC, 23: 0xDE, # IMG4..IMG7 24: 0xF0, # DPF0 32: 0xC0, 33: 0xC2, 34: 0xC4, 35: 0xC6, # IMG8..IMG11 36: 0xC8, 37: 0xCA, 38: 0xCC, 39: 0xCE, # IMG12..IMG15 } class LocResult: """Result of evaluating a DWARF location expression. kind ∈ {"memory", "register", "value", "composite", "unsupported"}. memory: addr = 24-bit byte address in target memory register: reg_dw = DWARF register number; dp_addr = mapped DP byte address (or None if not a W65816 DP-mapped reg) value: value = the computed value (DW_OP_stack_value path) composite: pieces = list of LocResult (one per DW_OP_piece) unsupported: reason = human-readable string """ def __init__(self, kind, **kw): self.kind = kind self.addr = kw.get("addr") self.reg_dw = kw.get("reg_dw") self.dp_addr = kw.get("dp_addr") self.value = kw.get("value") self.pieces = kw.get("pieces") self.reason = kw.get("reason") def displayAddr(self): """If the result corresponds to a fixed memory byte address (either RAM or DP-mapped register), return it; else None.""" if self.kind == "memory": return self.addr if self.kind == "register": return self.dp_addr return None def __repr__(self): if self.kind == "memory": return f"mem[0x{self.addr:06x}]" if self.kind == "register": if self.dp_addr is not None: return f"reg{self.reg_dw}@DP[0x{self.dp_addr:02x}]" return f"reg{self.reg_dw}" if self.kind == "value": return f"value(0x{self.value:x})" if self.kind == "composite": return "composite[" + ", ".join(repr(p) for p in self.pieces) + "]" return f"unsupported({self.reason})" def _evalFrameBase(cu, sub_die, sp_value): """Evaluate DW_AT_frame_base for a subprogram. Returns the frame-base BYTE ADDRESS (i.e., what DW_OP_fbreg offsets are relative to), or None if unresolvable. For W65816 -O0, clang emits `frame_base = DW_OP_reg3` (SP). Our ABI is empty-descending: S points to the next-free byte. LLVM's PEI assigns FrameOffset assuming full-descending, then W65816RegisterInfo::eliminateFrameIndex adds +1 for locals (see feedback_stack_skew.md). So `fb + fbreg_offset` where fb = sp_value + 1 yields the correct local byte address. """ fb_rec = sub_die.getRaw(DW_AT_frame_base) if fb_rec is None: # No frame_base — fall back to SP itself (best effort). if sp_value is None: return None return sp_value + 1 val, form = fb_rec if not isinstance(val, (bytes, bytearray)): return None expr = bytes(val) if len(expr) == 0: return None op = expr[0] # DW_OP_reg0..reg31 if 0x50 <= op <= 0x6f: reg = op - 0x50 if reg == 3: if sp_value is None: return None return sp_value + 1 return None if op == DW_OP_regx: reg, _ = readUleb(expr, 1) if reg == 3: if sp_value is None: return None return sp_value + 1 return None if op == DW_OP_call_frame_cfa: if sp_value is None: return None # Per our ABI, the CFA is the SP value at function entry, # which equals current S + 1 for an empty-descending stack. return sp_value + 1 if op == DW_OP_bregx: reg, off = readUleb(expr, 1) ofs, _ = readSleb(expr, off) if reg == 3: if sp_value is None: return None return sp_value + 1 + ofs return None if 0x70 <= op <= 0x8f: reg = op - 0x70 ofs, _ = readSleb(expr, 1) if reg == 3: if sp_value is None: return None return sp_value + 1 + ofs return None def evalDwOp(expr, cu=None, frame_base=None, sp_value=None): """Evaluate a DWARF expression `expr` (bytes). Returns a LocResult. `frame_base`: byte address frame-base resolves to (caller pre-computed via _evalFrameBase). `sp_value`: 24-bit S register snapshot (for DW_OP_reg3 / DW_OP_breg3 / DW_OP_call_frame_cfa). Stack-based: most ops push/pop the implicit DWARF stack. At the end, the top-of-stack is the byte address (unless DW_OP_stack_value flipped that to "the top IS the value"). """ if expr is None or len(expr) == 0: return LocResult("unsupported", reason="empty expression") stack = [] is_value = False off = 0 n = len(expr) while off < n: op = expr[off]; off += 1 if op == DW_OP_addr: # 4-byte addr in our DWARF. addr, off = readU32(expr, off) stack.append(addr) continue if op == DW_OP_const1u: v, off = readU8(expr, off); stack.append(v); continue if op == DW_OP_const1s: v, off = readU8(expr, off) if v >= 0x80: v -= 0x100 stack.append(v); continue if op == DW_OP_const2u: v, off = readU16(expr, off); stack.append(v); continue if op == DW_OP_const2s: v, off = readU16(expr, off) if v >= 0x8000: v -= 0x10000 stack.append(v); continue if op == DW_OP_const4u: v, off = readU32(expr, off); stack.append(v); continue if op == DW_OP_const4s: v, off = readU32(expr, off) if v >= 0x80000000: v -= 0x100000000 stack.append(v); continue if op == DW_OP_constu: v, off = readUleb(expr, off); stack.append(v); continue if op == DW_OP_consts: v, off = readSleb(expr, off); stack.append(v); continue if op == DW_OP_plus: b = stack.pop(); a = stack.pop(); stack.append(a + b); continue if op == DW_OP_plus_uconst: inc, off = readUleb(expr, off) stack.append(stack.pop() + inc); continue if op == DW_OP_fbreg: ofs, off = readSleb(expr, off) if frame_base is None: return LocResult("unsupported", reason="DW_OP_fbreg without frame_base") stack.append(frame_base + ofs) continue if op == DW_OP_call_frame_cfa: if sp_value is None: return LocResult("unsupported", reason="DW_OP_call_frame_cfa without SP") stack.append(sp_value + 1) continue if 0x50 <= op <= 0x6f: # DW_OP_regN — the value is in register N; not a memory addr. # Only honored at end of expression; we return a register LocResult # immediately to keep the W65816 DP-mapping precise. reg = op - 0x50 if reg == 3 and sp_value is not None: # SP-as-register is unusual but legal; treat as the # post-skew stack address (i.e., the frame-base byte). return LocResult("register", reg_dw=reg, dp_addr=sp_value + 1) return LocResult("register", reg_dw=reg, dp_addr=W65816_DW_REG_TO_DP.get(reg)) if op == DW_OP_regx: reg, off = readUleb(expr, off) return LocResult("register", reg_dw=reg, dp_addr=W65816_DW_REG_TO_DP.get(reg)) if 0x70 <= op <= 0x8f: # DW_OP_bregN — register N + signed offset, pushed as address. reg = op - 0x70 ofs, off = readSleb(expr, off) if reg == 3: if sp_value is None: return LocResult("unsupported", reason="DW_OP_breg3 without SP") stack.append(sp_value + 1 + ofs) else: dp = W65816_DW_REG_TO_DP.get(reg) if dp is None: return LocResult("unsupported", reason=f"DW_OP_breg{reg} unmapped") stack.append(dp + ofs) continue if op == DW_OP_bregx: reg, off = readUleb(expr, off) ofs, off = readSleb(expr, off) if reg == 3: if sp_value is None: return LocResult("unsupported", reason="DW_OP_bregx(SP) without SP") stack.append(sp_value + 1 + ofs) else: dp = W65816_DW_REG_TO_DP.get(reg) if dp is None: return LocResult("unsupported", reason=f"DW_OP_bregx({reg}) unmapped") stack.append(dp + ofs) continue if op == DW_OP_deref: # Need a memory read function to honor this; out of scope. return LocResult("unsupported", reason="DW_OP_deref not handled") if op == DW_OP_stack_value: is_value = True break if op == DW_OP_piece: # Composite — out of scope for this slice (multi-piece i32 # IMG-resident locals). Surfacing as unsupported is honest. return LocResult("unsupported", reason="DW_OP_piece composite not handled") return LocResult("unsupported", reason=f"DW_OP 0x{op:x} not handled") if not stack: return LocResult("unsupported", reason="empty expression stack") top = stack[-1] if is_value: return LocResult("value", value=top & 0xFFFFFFFF) return LocResult("memory", addr=top & 0xFFFFFF) # ---- Type-chain resolution for --locals ------------------------------ def _findDieByOffset(cu, target_cu_off): """Walk the CU's DIE tree to find the DIE whose cu-relative offset matches `target_cu_off`. Returns None if not found.""" if cu.root is None: return None stack = [cu.root] while stack: d = stack.pop() if d.offset == target_cu_off: return d stack.extend(d.children) return None def typeChain(cu, die): """Resolve DW_AT_type chain into a printable C-ish type string. Handles base_type, pointer_type, const/volatile/restrict_type, typedef, array_type (with subrange), structure/union/enum types. Best-effort: unknown chains print as ``. """ if die is None: return "" visited = set() prefix = "" suffix = "" cur = die while cur is not None and cur.offset not in visited: visited.add(cur.offset) tag = cur.tag if tag == DW_TAG_pointer_type: suffix = "*" + suffix t = cur.getRaw(DW_AT_type) if t is None: return ("void " + suffix).strip() cur = _findDieByOffset(cu, t[0]) continue if tag == DW_TAG_const_type: prefix = "const " + prefix t = cur.getRaw(DW_AT_type) if t is None: return (prefix + "void" + suffix).strip() cur = _findDieByOffset(cu, t[0]) continue if tag == DW_TAG_volatile_type: prefix = "volatile " + prefix t = cur.getRaw(DW_AT_type) if t is None: return (prefix + "void" + suffix).strip() cur = _findDieByOffset(cu, t[0]) continue if tag == DW_TAG_restrict_type: t = cur.getRaw(DW_AT_type) if t is None: return (prefix + "void" + suffix).strip() cur = _findDieByOffset(cu, t[0]) continue if tag == DW_TAG_typedef: nm = dieName(cu, cur) or "?" return (prefix + nm + suffix).strip() if tag == DW_TAG_base_type: nm = dieName(cu, cur) or "?" return (prefix + nm + suffix).strip() if tag == DW_TAG_array_type: # Look for first DW_TAG_subrange_type child for size. bound = None for ch in cur.children: if ch.tag == DW_TAG_subrange_type: ub = ch.getRaw(DW_AT_upper_bound) if ub is not None: bound = ub[0] + 1 break t = cur.getRaw(DW_AT_type) elem = "?" if t is not None: elem = typeChain(cu, _findDieByOffset(cu, t[0])) bnd_str = "" if bound is None else str(bound) return f"{prefix}{elem}[{bnd_str}]{suffix}".strip() if tag in (DW_TAG_structure_type, DW_TAG_union_type, DW_TAG_enumeration_type, DW_TAG_class_type): kw = {DW_TAG_structure_type: "struct", DW_TAG_union_type: "union", DW_TAG_enumeration_type: "enum", DW_TAG_class_type: "class"}[tag] nm = dieName(cu, cur) or "" return f"{prefix}{kw} {nm}{suffix}".strip() if tag == DW_TAG_subroutine_type: return f"{prefix}{suffix}".strip() # Unknown link in the chain — bail. return f"" return "" def varTypeStr(cu, var_die): """Return the C-ish type string for a variable / parameter DIE.""" t = var_die.getRaw(DW_AT_type) if t is None: return "" target = _findDieByOffset(cu, t[0]) return typeChain(cu, target) # ---- --locals 0xPC mode (Phase 3.2 slice 2) ------------------------- def _collectLocals(cu, die, pc, out, scope_stack): """Walk DIE tree under `die`, collecting formal_parameter + variable DIEs that are in scope at `pc` (respecting lexical-block PC ranges).""" for child in die.children: if child.tag == DW_TAG_lexical_block: lo, hi = diePcRange(cu, child) if lo is not None and not (lo <= pc < hi): # Block not in scope; skip. continue _collectLocals(cu, child, pc, out, scope_stack) continue if child.tag == DW_TAG_inlined_subroutine: # Inlined: out of scope for this slice; skip descent. continue if child.tag in (DW_TAG_formal_parameter, DW_TAG_variable): out.append(child) def localsAtPc(cus, pc, sp_value=None): """Return list of (name, type_str, location_result, var_die) for all formal_parameter and variable DIEs in scope at `pc`. `sp_value` is the 24-bit S register snapshot. Required to resolve DW_OP_fbreg / DW_OP_call_frame_cfa expressions. """ cu, sub = findSubprogramForPc(cus, pc) if sub is None: return (None, None, []) frame_base = _evalFrameBase(cu, sub, sp_value) vars_ = [] _collectLocals(cu, sub, pc, vars_, []) out = [] for v in vars_: name = dieName(cu, v) or "" ty = varTypeStr(cu, v) loc_rec = v.getRaw(DW_AT_location) if loc_rec is None: loc_res = LocResult("unsupported", reason="no DW_AT_location") else: lv, lf = loc_rec if isinstance(lv, (bytes, bytearray)): loc_res = evalDwOp(bytes(lv), cu=cu, frame_base=frame_base, sp_value=sp_value) else: loc_res = LocResult("unsupported", reason=f"DW_AT_location form 0x{lf:x}") out.append((name, ty, loc_res, v)) return (cu, sub, out) def printLocals(cus, pc, sp_value): """Print formal_parameter + variable info for the subprogram at `pc`. Output is one line per variable: VAR= TYPE= ADDR=0x... (memory location) VAR= TYPE= REG=DW (register, no DP map) VAR= TYPE= ADDR=0x... REG=DW (DP-mapped register) VAR= TYPE= VALUE=0x... (DW_OP_stack_value path) VAR= TYPE= UNSUPPORTED= """ cu, sub, locs = localsAtPc(cus, pc, sp_value=sp_value) if sub is None: print(f"pc2line: no DW_TAG_subprogram covers PC 0x{pc:06x}", file=sys.stderr) return 1 sub_name = dieName(cu, sub) or "" if sp_value is None: sp_str = "" else: sp_str = f"0x{sp_value:06x}" print(f"; PC 0x{pc:06x} in subprogram {sub_name!r}, " f"S={sp_str}") if not locs: print("; (no formal_parameter or DW_TAG_variable in scope)") return 0 for name, ty, loc, _die in locs: line = f"VAR={name} TYPE={ty}" if loc.kind == "memory": line += f" ADDR=0x{loc.addr:06x}" elif loc.kind == "register": if loc.dp_addr is not None: line += f" ADDR=0x{loc.dp_addr:06x} REG=DW{loc.reg_dw}" else: line += f" REG=DW{loc.reg_dw}" elif loc.kind == "value": line += f" VALUE=0x{loc.value:x}" elif loc.kind == "composite": line += " COMPOSITE=" else: line += f" UNSUPPORTED={loc.reason}" print(line) return 0 # ---- Main ------------------------------------------------------------ def loadSidecarSectionsAll(path): """Return {section_name: [payload_bytes, ...]} from the sidecar.""" with open(path, "rb") as f: data = f.read() needle = b"; OBJ " out = {} i = 0 while True: h = data.find(needle, i) if h < 0: break nl = data.find(b"\n", h) if nl < 0: break header = data[h:nl].decode("utf-8", "replace") parts = header.split() if "SEC" in parts and "SIZE" in parts: sec = parts[parts.index("SEC") + 1] size = int(parts[parts.index("SIZE") + 1]) payload_start = nl + 1 payload = data[payload_start:payload_start + size] out.setdefault(sec, []).append(payload) i = payload_start + size else: i = nl + 1 return out def buildTable(sidecar_path): """Return list of (pc, file_idx, line, file_table).""" line_sections = loadSidecarSection(sidecar_path, ".debug_line") line_str_sections = loadSidecarSection(sidecar_path, ".debug_line_str") line_str = b"" for _, p in line_str_sections: line_str += p rows = [] file_tables = [] line_str_offsets = {} # Precompute line_str cstring offsets. off = 0 while off < len(line_str): end = line_str.find(b"\0", off) if end < 0: break line_str_offsets[off] = line_str[off:end].decode("utf-8", "replace") off = end + 1 for sec_name, payload in line_sections: i = 0 while i < len(payload): try: hdr, body_start = parseDwarf5Header(payload, i, len(payload)) except NotImplementedError as e: print(f"pc2line: {e}", file=sys.stderr) break file_tbl = [] for fe in hdr["files"]: # File entry's name field is at content type DW_LNCT_path=1 name_val = fe.get(1) if isinstance(name_val, str): file_tbl.append(name_val) elif isinstance(name_val, int): file_tbl.append(line_str_offsets.get(name_val, f"@{name_val}")) else: file_tbl.append("?") body = payload[body_start:hdr["end"]] for pc, fidx, ln, end_seq in runLineProgram(body, hdr, len(body)): if end_seq: continue rows.append((pc, fidx, ln, file_tbl)) i = hdr["end"] return rows def query(table, pc): """Return (pc, file, line) or None — largest-pc-<=-query match.""" candidates = [r for r in table if r[0] <= pc] if not candidates: return None best = max(candidates, key=lambda r: r[0]) f_idx = best[1] if f_idx == 0 or f_idx > len(best[3]): fname = "?" else: fname = best[3][f_idx - 1] return (best[0], os.path.basename(fname), best[2]) def loadMapSymbols(path): """Load `addr symbol` lines from a link816 .map. Return sorted list.""" if not path or not os.path.exists(path): return [] syms = [] with open(path) as f: for ln in f: ln = ln.strip() if not ln.startswith("0x"): continue parts = ln.split() if len(parts) >= 2: try: syms.append((int(parts[0], 16), parts[1])) except ValueError: pass syms.sort() return syms def funcAt(syms, pc): """Return the symbol name with largest addr <= pc, or '?'.""" lo, hi = 0, len(syms) - 1 best = None while lo <= hi: mid = (lo + hi) // 2 if syms[mid][0] <= pc: best = syms[mid][1] lo = mid + 1 else: hi = mid - 1 return best or "?" def main(): ap = argparse.ArgumentParser(description="PC -> source resolver") ap.add_argument("--sidecar", required=True, help="link816 --debug-out file") ap.add_argument("--map", help="link816 .map (optional — for function names)") ap.add_argument("--dump", action="store_true", help="print the full PC->line table") ap.add_argument("--dump-dies", action="store_true", help="dump the DIE subtree for the subprogram covering" " a given PC (pass PC as a positional arg)") ap.add_argument("--list-cus", action="store_true", help="list compile units and their top-level subprograms") ap.add_argument("--locals", action="store_true", help="print formal parameters + locals in scope at " "the given PC. Requires --sp for stack-resident " "locals (DW_OP_fbreg). Multiple PCs supported.") ap.add_argument("--sp", default=None, help="MAME S-register snapshot (16-bit or 24-bit " "hex). Used as the W65816 stack pointer when " "resolving DW_OP_fbreg / DW_OP_call_frame_cfa.") ap.add_argument("pcs", nargs="*", help="PCs to resolve (hex 0x... or decimal)") args = ap.parse_args() syms = loadMapSymbols(args.map) sp_value = None if args.sp is not None: sp_value = int(args.sp, 0) # SP is the 16-bit S register; bank-0 stack lives in bank 0, # so 24-bit byte addresses are just 0x000000 | (S & 0xFFFF). # Caller may pass either form; canonicalise to 24-bit. if sp_value <= 0xFFFF: sp_value &= 0xFFFF # DIE-walker modes use parseAllCus; the legacy line-table mode uses # buildTable. Both work off the same sidecar file. if args.dump_dies or args.list_cus or args.locals: payload_map = loadSidecarSectionsAll(args.sidecar) cus = parseAllCus(payload_map) if args.list_cus: for ci, cu in enumerate(cus): cu_name = dieName(cu, cu.root) if cu.root else None print(f"CU #{ci} v{cu.version} addr_size={cu.addr_size} " f"name={cu_name!r}") if cu.root is None: continue for child in cu.root.children: if child.tag != DW_TAG_subprogram: continue lo, hi = diePcRange(cu, child) nm = dieName(cu, child) or "" if lo is None: print(f" subprogram {nm!r} (no PC range)") else: print(f" subprogram {nm!r} " f"[0x{lo:06x}, 0x{hi:06x})") return 0 if args.locals: if not args.pcs: print("pc2line: --locals needs a PC", file=sys.stderr) return 2 rc = 0 for s in args.pcs: pc = int(s, 0) r = printLocals(cus, pc, sp_value) if r: rc = r return rc # --dump-dies path if not args.pcs: print("pc2line: --dump-dies needs a PC", file=sys.stderr) return 2 rc = 0 for s in args.pcs: pc = int(s, 0) r = dumpDiesAtPc(cus, pc) if r: rc = r return rc table = buildTable(args.sidecar) if args.dump: for pc, fidx, ln, ft in sorted(table): fname = ft[fidx - 1] if 0 < fidx <= len(ft) else "?" func = funcAt(syms, pc) print(f"0x{pc:06x}\t{os.path.basename(fname)}:{ln}\t{func}") return 0 if not args.pcs: print(f"pc2line: built {len(table)} entries", file=sys.stderr) print("pass PCs as positional args to resolve, or --dump for the table") return 2 for s in args.pcs: pc = int(s, 0) row = query(table, pc) func = funcAt(syms, pc) if row is None: print(f"PC=0x{pc:06x} NOT_FOUND FUNC={func}") else: r_pc, fname, ln = row print(f"PC=0x{pc:06x} FILE={fname} LINE={ln} FUNC={func}") return 0 if __name__ == "__main__": sys.exit(main())