1755 lines
63 KiB
Python
Executable file
1755 lines
63 KiB
Python
Executable file
#!/usr/bin/env python3
|
|
# pc2line.py - PC -> source location resolver for W65816 binaries.
|
|
#
|
|
# Parses the DWARF .debug_line section from link816's debug sidecar
|
|
# (`--debug-out FILE`) and answers `file:line` queries for runtime PCs.
|
|
# The sidecar already has reloc-applied PCs — they refer to the final
|
|
# linked image — so we don't need link-map cross-walking.
|
|
#
|
|
# Standard tools (pyelftools, llvm-dwarfdump, addr2line) all choke on
|
|
# the W65816 ELF: pyelftools asserts on the DWARF address size, the
|
|
# others can't apply our target-specific relocations. So we ship a
|
|
# small DWARF 5 line-program decoder (DWARF §6.2) — ~200 LOC, no deps.
|
|
#
|
|
# Usage:
|
|
# scripts/pc2line.py --sidecar foo.dwarf 0x123A
|
|
# scripts/pc2line.py --sidecar foo.dwarf --dump
|
|
# scripts/pc2line.py --sidecar foo.dwarf 0x123A 0x4567
|
|
# scripts/pc2line.py --sidecar foo.dwarf --list-cus
|
|
# scripts/pc2line.py --sidecar foo.dwarf --dump-dies 0x123A
|
|
# scripts/pc2line.py --sidecar foo.dwarf --locals --sp 0x1FA 0x123A
|
|
|
|
import argparse
|
|
import os
|
|
import struct
|
|
import sys
|
|
|
|
|
|
# ---- ULEB128 / SLEB128 -----------------------------------------------
|
|
|
|
def readUleb(buf, off):
|
|
result = 0
|
|
shift = 0
|
|
while True:
|
|
b = buf[off]; off += 1
|
|
result |= (b & 0x7F) << shift
|
|
if (b & 0x80) == 0:
|
|
break
|
|
shift += 7
|
|
return result, off
|
|
|
|
|
|
def readSleb(buf, off):
|
|
result = 0
|
|
shift = 0
|
|
size = 64
|
|
while True:
|
|
b = buf[off]; off += 1
|
|
result |= (b & 0x7F) << shift
|
|
shift += 7
|
|
if (b & 0x80) == 0:
|
|
break
|
|
if shift < size and (b & 0x40):
|
|
result |= -(1 << shift)
|
|
return result, off
|
|
|
|
|
|
def readU8(buf, off):
|
|
return buf[off], off + 1
|
|
|
|
|
|
def readU16(buf, off):
|
|
return struct.unpack_from("<H", buf, off)[0], off + 2
|
|
|
|
|
|
def readU32(buf, off):
|
|
return struct.unpack_from("<I", buf, off)[0], off + 4
|
|
|
|
|
|
def readCStr(buf, off):
|
|
end = buf.index(0, off)
|
|
return buf[off:end].decode("utf-8", "replace"), end + 1
|
|
|
|
|
|
# ---- DWARF 5 line-program standard opcodes ---------------------------
|
|
|
|
DW_LNS_copy = 1
|
|
DW_LNS_advance_pc = 2
|
|
DW_LNS_advance_line = 3
|
|
DW_LNS_set_file = 4
|
|
DW_LNS_set_column = 5
|
|
DW_LNS_negate_stmt = 6
|
|
DW_LNS_set_basic_block = 7
|
|
DW_LNS_const_add_pc = 8
|
|
DW_LNS_fixed_advance_pc = 9
|
|
DW_LNS_set_prologue_end = 10
|
|
DW_LNS_set_epilogue_begin = 11
|
|
DW_LNS_set_isa = 12
|
|
|
|
DW_LNE_end_sequence = 1
|
|
DW_LNE_set_address = 2
|
|
DW_LNE_set_discriminator = 4
|
|
|
|
|
|
# ---- Line-program header parsing (DWARF 5 §6.2.4) --------------------
|
|
|
|
def parseDwarf5Header(buf, off, section_end):
|
|
"""Returns (header_dict, off_after_header).
|
|
|
|
Post Phase-1.3 (the FK_Data_4 -> R_W65816_DATA32 reloc fix in
|
|
W65816ELFObjectWriter), unit_length and header_length come out
|
|
of the linker correctly populated for fresh -g builds: the
|
|
backend now emits a 4-byte fixup for each, and link816 patches
|
|
the resolved value into all 4 bytes of the slot.
|
|
|
|
We still keep the tolerant zero-fallback so older sidecars
|
|
(produced before the reloc fix landed, or before link816 was
|
|
rebuilt) keep decoding: unit_length=0 falls back to section
|
|
size, header_length=0 lets the prologue forward-scan land us
|
|
at the right body offset on its own.
|
|
|
|
The body offset is taken from the post-prologue position even
|
|
when header_length is non-zero: the prologue's variable-length
|
|
file/dir entries are already consumed by the time we reach
|
|
that point, so the forward scan IS the source of truth. The
|
|
header_length field is informational from our perspective.
|
|
"""
|
|
h = {}
|
|
unit_length, off = readU32(buf, off)
|
|
if unit_length == 0xFFFFFFFF:
|
|
raise NotImplementedError("64-bit DWARF not handled")
|
|
if unit_length == 0:
|
|
unit_length = section_end - off
|
|
h["end"] = off + unit_length
|
|
h["version"], off = readU16(buf, off)
|
|
if h["version"] != 5:
|
|
raise NotImplementedError(f"DWARF v{h['version']} not handled (v5 only)")
|
|
h["addr_size"], off = readU8(buf, off)
|
|
h["seg_size"], off = readU8(buf, off)
|
|
# header_length is now reloc-resolved post Phase 1.3; we still
|
|
# forward-scan the prologue, but we record the value for tools
|
|
# that want it.
|
|
h["header_length"], off = readU32(buf, off)
|
|
h["min_inst_length"], off = readU8(buf, off)
|
|
h["max_ops_per_inst"], off = readU8(buf, off)
|
|
h["default_is_stmt"], off = readU8(buf, off)
|
|
h["line_base"], off = readU8(buf, off)
|
|
if h["line_base"] >= 0x80: # signed
|
|
h["line_base"] -= 0x100
|
|
h["line_range"], off = readU8(buf, off)
|
|
h["opcode_base"], off = readU8(buf, off)
|
|
# standard_opcode_lengths[opcode_base-1]
|
|
h["std_op_lens"] = list(buf[off:off + h["opcode_base"] - 1])
|
|
off += h["opcode_base"] - 1
|
|
# directory format + entries.
|
|
dir_fmt_count, off = readU8(buf, off)
|
|
dir_fmt = []
|
|
for _ in range(dir_fmt_count):
|
|
ct, off = readUleb(buf, off)
|
|
fm, off = readUleb(buf, off)
|
|
dir_fmt.append((ct, fm))
|
|
dir_count, off = readUleb(buf, off)
|
|
dirs = []
|
|
for _ in range(dir_count):
|
|
entry = {}
|
|
for ct, fm in dir_fmt:
|
|
val, off = readForm(buf, off, fm)
|
|
entry[ct] = val
|
|
dirs.append(entry)
|
|
# file format + entries.
|
|
file_fmt_count, off = readU8(buf, off)
|
|
file_fmt = []
|
|
for _ in range(file_fmt_count):
|
|
ct, off = readUleb(buf, off)
|
|
fm, off = readUleb(buf, off)
|
|
file_fmt.append((ct, fm))
|
|
file_count, off = readUleb(buf, off)
|
|
files = []
|
|
for _ in range(file_count):
|
|
entry = {}
|
|
for ct, fm in file_fmt:
|
|
val, off = readForm(buf, off, fm)
|
|
entry[ct] = val
|
|
files.append(entry)
|
|
h["dirs"] = dirs
|
|
h["files"] = files
|
|
# Body starts where the prologue parse ended — header_length above
|
|
# is unreliable in our sidecar (see docstring).
|
|
return h, off
|
|
|
|
|
|
# DWARF forms (subset — what we see in our line-program file/dir entries
|
|
# and .debug_info DIE attributes for the Phase 3.2 DIE walker). Form codes
|
|
# are from DWARF 5 §7.5.5.
|
|
DW_FORM_addr = 0x01
|
|
DW_FORM_block2 = 0x03
|
|
DW_FORM_block4 = 0x04
|
|
DW_FORM_data2 = 0x05
|
|
DW_FORM_data4 = 0x06
|
|
DW_FORM_data8 = 0x07
|
|
DW_FORM_string = 0x08
|
|
DW_FORM_block = 0x09
|
|
DW_FORM_block1 = 0x0a
|
|
DW_FORM_data1 = 0x0b
|
|
DW_FORM_flag = 0x0c
|
|
DW_FORM_sdata = 0x0d
|
|
DW_FORM_strp = 0x0e
|
|
DW_FORM_udata = 0x0f
|
|
DW_FORM_ref_addr = 0x10
|
|
DW_FORM_ref1 = 0x11
|
|
DW_FORM_ref2 = 0x12
|
|
DW_FORM_ref4 = 0x13
|
|
DW_FORM_ref8 = 0x14
|
|
DW_FORM_ref_udata = 0x15
|
|
DW_FORM_indirect = 0x16
|
|
DW_FORM_sec_offset = 0x17
|
|
DW_FORM_exprloc = 0x18
|
|
DW_FORM_flag_present = 0x19
|
|
DW_FORM_strx = 0x1a
|
|
DW_FORM_addrx = 0x1b
|
|
DW_FORM_ref_sup4 = 0x1c
|
|
DW_FORM_strp_sup = 0x1d
|
|
DW_FORM_data16 = 0x1e
|
|
DW_FORM_line_strp = 0x1f
|
|
DW_FORM_ref_sig8 = 0x20
|
|
DW_FORM_implicit_const = 0x21
|
|
DW_FORM_loclistx = 0x22
|
|
DW_FORM_rnglistx = 0x23
|
|
DW_FORM_ref_sup8 = 0x24
|
|
DW_FORM_strx1 = 0x25
|
|
DW_FORM_strx2 = 0x26
|
|
DW_FORM_strx3 = 0x27
|
|
DW_FORM_strx4 = 0x28
|
|
DW_FORM_addrx1 = 0x29
|
|
DW_FORM_addrx2 = 0x2a
|
|
DW_FORM_addrx3 = 0x2b
|
|
DW_FORM_addrx4 = 0x2c
|
|
|
|
|
|
# DWARF 5 tag codes — only the ones we look at for the DIE walker.
|
|
DW_TAG_array_type = 0x01
|
|
DW_TAG_class_type = 0x02
|
|
DW_TAG_enumeration_type = 0x04
|
|
DW_TAG_formal_parameter = 0x05
|
|
DW_TAG_lexical_block = 0x0b
|
|
DW_TAG_member = 0x0d
|
|
DW_TAG_pointer_type = 0x0f
|
|
DW_TAG_compile_unit = 0x11
|
|
DW_TAG_structure_type = 0x13
|
|
DW_TAG_subroutine_type = 0x15
|
|
DW_TAG_typedef = 0x16
|
|
DW_TAG_union_type = 0x17
|
|
DW_TAG_inheritance = 0x1c
|
|
DW_TAG_inlined_subroutine = 0x1d
|
|
DW_TAG_subrange_type = 0x21
|
|
DW_TAG_base_type = 0x24
|
|
DW_TAG_const_type = 0x26
|
|
DW_TAG_subprogram = 0x2e
|
|
DW_TAG_variable = 0x34
|
|
DW_TAG_volatile_type = 0x35
|
|
DW_TAG_restrict_type = 0x37
|
|
|
|
|
|
# DWARF 5 attribute codes (subset).
|
|
DW_AT_sibling = 0x01
|
|
DW_AT_location = 0x02
|
|
DW_AT_name = 0x03
|
|
DW_AT_byte_size = 0x0b
|
|
DW_AT_stmt_list = 0x10
|
|
DW_AT_low_pc = 0x11
|
|
DW_AT_high_pc = 0x12
|
|
DW_AT_language = 0x13
|
|
DW_AT_comp_dir = 0x1b
|
|
DW_AT_const_value = 0x1c
|
|
DW_AT_upper_bound = 0x2f
|
|
DW_AT_prototyped = 0x27
|
|
DW_AT_producer = 0x25
|
|
DW_AT_start_scope = 0x2c
|
|
DW_AT_bit_size = 0x0d
|
|
DW_AT_encoding = 0x3e
|
|
DW_AT_frame_base = 0x40
|
|
DW_AT_specification = 0x47
|
|
DW_AT_type = 0x49
|
|
DW_AT_ranges = 0x55
|
|
DW_AT_external = 0x3f
|
|
DW_AT_decl_file = 0x3a
|
|
DW_AT_decl_line = 0x3b
|
|
DW_AT_artificial = 0x34
|
|
DW_AT_data_member_location = 0x38
|
|
DW_AT_call_file = 0x58
|
|
DW_AT_call_line = 0x59
|
|
DW_AT_call_column = 0x57
|
|
DW_AT_call_origin = 0x7d
|
|
DW_AT_abstract_origin = 0x31
|
|
DW_AT_inline = 0x20
|
|
DW_AT_str_offsets_base = 0x72
|
|
DW_AT_addr_base = 0x73
|
|
DW_AT_rnglists_base = 0x74
|
|
DW_AT_loclists_base = 0x8c
|
|
|
|
|
|
# Reverse-name tables for printing. Built by enumerating the module
|
|
# namespace once; values that aren't constants are filtered out.
|
|
def _buildNameMap(prefix):
|
|
out = {}
|
|
for k, v in globals().items():
|
|
if k.startswith(prefix) and isinstance(v, int):
|
|
out[v] = k[len(prefix):]
|
|
return out
|
|
|
|
|
|
def readForm(buf, off, form, addr_size=4, dwarf_size=4, implicit_val=None):
|
|
"""Read a single attribute value at `off` using DWARF form `form`.
|
|
|
|
Returns (value, new_off). For block/exprloc forms, value is a bytes
|
|
object; for refN forms, value is the raw CU-local DIE offset; for
|
|
sec_offset / strx / addrx forms, value is the index/offset (caller
|
|
resolves through the appropriate table).
|
|
|
|
`addr_size` is the CU's address size (4 on W65816 since DWARF treats
|
|
24-bit PCs as 32-bit). `dwarf_size` is 4 for 32-bit DWARF, 8 for
|
|
64-bit (we only support 32-bit).
|
|
"""
|
|
if form == DW_FORM_addr:
|
|
if addr_size == 4:
|
|
return readU32(buf, off)
|
|
if addr_size == 2:
|
|
return readU16(buf, off)
|
|
return (int.from_bytes(buf[off:off + addr_size], "little"),
|
|
off + addr_size)
|
|
if form == DW_FORM_data1 or form == DW_FORM_flag or form == DW_FORM_ref1:
|
|
return readU8(buf, off)
|
|
if form == DW_FORM_data2 or form == DW_FORM_ref2:
|
|
return readU16(buf, off)
|
|
if form == DW_FORM_data4 or form == DW_FORM_ref4 or form == DW_FORM_ref_sup4:
|
|
return readU32(buf, off)
|
|
if form == DW_FORM_data8 or form == DW_FORM_ref8 or form == DW_FORM_ref_sig8 or form == DW_FORM_ref_sup8:
|
|
return (int.from_bytes(buf[off:off + 8], "little"), off + 8)
|
|
if form == DW_FORM_data16:
|
|
return (buf[off:off + 16].hex(), off + 16)
|
|
if form == DW_FORM_sdata:
|
|
return readSleb(buf, off)
|
|
if form == DW_FORM_udata or form == DW_FORM_ref_udata:
|
|
return readUleb(buf, off)
|
|
if form == DW_FORM_string:
|
|
return readCStr(buf, off)
|
|
if form == DW_FORM_strp or form == DW_FORM_line_strp or form == DW_FORM_strp_sup:
|
|
if dwarf_size == 4:
|
|
return readU32(buf, off)
|
|
return (int.from_bytes(buf[off:off + 8], "little"), off + 8)
|
|
if form == DW_FORM_sec_offset:
|
|
if dwarf_size == 4:
|
|
return readU32(buf, off)
|
|
return (int.from_bytes(buf[off:off + 8], "little"), off + 8)
|
|
if form == DW_FORM_ref_addr:
|
|
if dwarf_size == 4:
|
|
return readU32(buf, off)
|
|
return (int.from_bytes(buf[off:off + 8], "little"), off + 8)
|
|
if form == DW_FORM_flag_present:
|
|
return (1, off)
|
|
if form == DW_FORM_implicit_const:
|
|
return (implicit_val, off)
|
|
if form == DW_FORM_strx or form == DW_FORM_addrx or form == DW_FORM_loclistx or form == DW_FORM_rnglistx:
|
|
return readUleb(buf, off)
|
|
if form == DW_FORM_strx1 or form == DW_FORM_addrx1:
|
|
return readU8(buf, off)
|
|
if form == DW_FORM_strx2 or form == DW_FORM_addrx2:
|
|
return readU16(buf, off)
|
|
if form == DW_FORM_strx3 or form == DW_FORM_addrx3:
|
|
v = buf[off] | (buf[off + 1] << 8) | (buf[off + 2] << 16)
|
|
return (v, off + 3)
|
|
if form == DW_FORM_strx4 or form == DW_FORM_addrx4:
|
|
return readU32(buf, off)
|
|
if form == DW_FORM_block1:
|
|
n, off = readU8(buf, off)
|
|
return (bytes(buf[off:off + n]), off + n)
|
|
if form == DW_FORM_block2:
|
|
n, off = readU16(buf, off)
|
|
return (bytes(buf[off:off + n]), off + n)
|
|
if form == DW_FORM_block4:
|
|
n, off = readU32(buf, off)
|
|
return (bytes(buf[off:off + n]), off + n)
|
|
if form == DW_FORM_block or form == DW_FORM_exprloc:
|
|
n, off = readUleb(buf, off)
|
|
return (bytes(buf[off:off + n]), off + n)
|
|
raise NotImplementedError(f"DW_FORM 0x{form:x} not handled")
|
|
|
|
|
|
# ---- Sidecar parser --------------------------------------------------
|
|
|
|
def loadSidecarSection(path, section_name):
|
|
"""Return raw bytes of `section_name` from the link816 sidecar, or None."""
|
|
with open(path, "rb") as f:
|
|
data = f.read()
|
|
needle = f"; OBJ ".encode()
|
|
sections = []
|
|
i = 0
|
|
while True:
|
|
h = data.find(needle, i)
|
|
if h < 0:
|
|
break
|
|
nl = data.find(b"\n", h)
|
|
if nl < 0:
|
|
break
|
|
header = data[h:nl].decode("utf-8", "replace")
|
|
# `; OBJ <path> SEC <name> SIZE <bytes> RELOCS_APPLIED <n> RELOCS_SKIPPED <n>`
|
|
parts = header.split()
|
|
if "SEC" in parts and "SIZE" in parts:
|
|
sec = parts[parts.index("SEC") + 1]
|
|
size = int(parts[parts.index("SIZE") + 1])
|
|
payload_start = nl + 1
|
|
payload = data[payload_start:payload_start + size]
|
|
sections.append((sec, payload))
|
|
i = payload_start + size
|
|
else:
|
|
i = nl + 1
|
|
return [(name, p) for name, p in sections if name == section_name]
|
|
|
|
|
|
# ---- Line-program decoder --------------------------------------------
|
|
|
|
def runLineProgram(buf, header, h_end):
|
|
"""Walk the line program; yield (pc, file_idx, line) tuples."""
|
|
off = 0
|
|
addr = 0
|
|
line = 1
|
|
file = 1
|
|
is_stmt = bool(header["default_is_stmt"])
|
|
while off < h_end:
|
|
op, off = readU8(buf, off)
|
|
if op == 0:
|
|
# Extended opcode.
|
|
ln, off = readUleb(buf, off)
|
|
sub = buf[off]
|
|
sub_off = off + 1
|
|
if sub == DW_LNE_end_sequence:
|
|
yield (addr, file, line, True)
|
|
addr = 0
|
|
line = 1
|
|
file = 1
|
|
is_stmt = bool(header["default_is_stmt"])
|
|
off = off + ln
|
|
elif sub == DW_LNE_set_address:
|
|
addr_size = header["addr_size"]
|
|
if addr_size == 4:
|
|
a, _ = readU32(buf, sub_off)
|
|
elif addr_size == 2:
|
|
a, _ = readU16(buf, sub_off)
|
|
else:
|
|
a = int.from_bytes(buf[sub_off:sub_off + addr_size], "little")
|
|
addr = a
|
|
off = off + ln
|
|
else:
|
|
off = off + ln
|
|
elif op < header["opcode_base"]:
|
|
# Standard opcode.
|
|
if op == DW_LNS_copy:
|
|
yield (addr, file, line, False)
|
|
elif op == DW_LNS_advance_pc:
|
|
inc, off = readUleb(buf, off)
|
|
addr += inc * header["min_inst_length"]
|
|
elif op == DW_LNS_advance_line:
|
|
inc, off = readSleb(buf, off)
|
|
line += inc
|
|
elif op == DW_LNS_set_file:
|
|
f, off = readUleb(buf, off)
|
|
file = f
|
|
elif op == DW_LNS_set_column:
|
|
_, off = readUleb(buf, off)
|
|
elif op == DW_LNS_negate_stmt:
|
|
is_stmt = not is_stmt
|
|
elif op == DW_LNS_set_basic_block:
|
|
pass
|
|
elif op == DW_LNS_const_add_pc:
|
|
adj = (255 - header["opcode_base"]) // header["line_range"]
|
|
addr += adj * header["min_inst_length"]
|
|
elif op == DW_LNS_fixed_advance_pc:
|
|
inc, off = readU16(buf, off)
|
|
addr += inc
|
|
elif op == DW_LNS_set_prologue_end:
|
|
pass
|
|
elif op == DW_LNS_set_epilogue_begin:
|
|
pass
|
|
elif op == DW_LNS_set_isa:
|
|
_, off = readUleb(buf, off)
|
|
else:
|
|
# Unknown std op — skip operands per std_op_lens.
|
|
args = header["std_op_lens"][op - 1]
|
|
for _ in range(args):
|
|
_, off = readUleb(buf, off)
|
|
else:
|
|
# Special opcode (most common).
|
|
adj = op - header["opcode_base"]
|
|
addr += (adj // header["line_range"]) * header["min_inst_length"]
|
|
line += header["line_base"] + (adj % header["line_range"])
|
|
yield (addr, file, line, False)
|
|
|
|
|
|
# ---- .debug_abbrev parser --------------------------------------------
|
|
#
|
|
# Each CU has an abbrev_offset that points into .debug_abbrev. A table
|
|
# at that offset is a list of abbrev entries terminated by code 0.
|
|
# Each entry is: ULEB code, ULEB tag, u8 children, then (ULEB attr,
|
|
# ULEB form, optional SLEB implicit_const if form==implicit_const)*
|
|
# terminated by attr==0 form==0.
|
|
|
|
def parseAbbrevTable(buf, base):
|
|
"""Parse the abbrev table starting at offset `base`.
|
|
|
|
Returns {code: (tag, has_children, [(attr, form, implicit_const), ...])}.
|
|
"""
|
|
table = {}
|
|
off = base
|
|
while off < len(buf):
|
|
code, off = readUleb(buf, off)
|
|
if code == 0:
|
|
break
|
|
tag, off = readUleb(buf, off)
|
|
has_kids, off = readU8(buf, off)
|
|
attrs = []
|
|
while True:
|
|
at, off = readUleb(buf, off)
|
|
fm, off = readUleb(buf, off)
|
|
if at == 0 and fm == 0:
|
|
break
|
|
ic = None
|
|
if fm == DW_FORM_implicit_const:
|
|
ic, off = readSleb(buf, off)
|
|
attrs.append((at, fm, ic))
|
|
table[code] = (tag, bool(has_kids), attrs)
|
|
return table
|
|
|
|
|
|
# ---- .debug_str / .debug_line_str / .debug_str_offsets resolution ----
|
|
#
|
|
# In a link816 sidecar the per-CU .debug_str_offsets entries are zeroed
|
|
# (the .rela.debug_str_offsets relocs target .debug_str, which isn't in
|
|
# the resolveSym kind-set — text/rodata/bss/init_array). This is a known
|
|
# Phase-3.2-slice-1 gap noted in the GAP_CLOSURE_PLAN: until link816 is
|
|
# extended to resolve intra-debug relocs, strx-form names come out as
|
|
# `@<idx>` placeholders. Line-strp names DO resolve because line_str
|
|
# is referenced by literal offset inside .debug_line, not by a reloc.
|
|
|
|
def resolveStr(strtab, offset):
|
|
if not strtab or offset >= len(strtab):
|
|
return None
|
|
end = strtab.find(b"\0", offset)
|
|
if end < 0:
|
|
return None
|
|
return strtab[offset:end].decode("utf-8", "replace")
|
|
|
|
|
|
def resolveStrx(str_offsets, debug_str, str_offsets_base, idx, dwarf_size=4):
|
|
"""DWARF 5 §7.26: index into the str_offsets table. The base
|
|
points past the header. Each entry is `dwarf_size` bytes."""
|
|
if str_offsets is None or debug_str is None:
|
|
return None
|
|
entry_off = str_offsets_base + idx * dwarf_size
|
|
if entry_off + dwarf_size > len(str_offsets):
|
|
return None
|
|
if dwarf_size == 4:
|
|
off = int.from_bytes(str_offsets[entry_off:entry_off + 4], "little")
|
|
else:
|
|
off = int.from_bytes(str_offsets[entry_off:entry_off + 8], "little")
|
|
return resolveStr(debug_str, off)
|
|
|
|
|
|
def resolveAddrx(addr_section, addr_base, idx, addr_size=4):
|
|
"""DWARF 5 §7.27: index into the addr table. Base points past header."""
|
|
if addr_section is None:
|
|
return None
|
|
entry_off = addr_base + idx * addr_size
|
|
if entry_off + addr_size > len(addr_section):
|
|
return None
|
|
return int.from_bytes(addr_section[entry_off:entry_off + addr_size],
|
|
"little")
|
|
|
|
|
|
# ---- .debug_info DIE walker -------------------------------------------
|
|
|
|
class Die:
|
|
"""A parsed DIE: tag, attributes, children, CU-relative offset."""
|
|
|
|
def __init__(self, offset, tag, attrs):
|
|
self.offset = offset # CU-relative offset (for ref4 lookup)
|
|
self.tag = tag
|
|
self.attrs = attrs # {at_code: (raw_value, form_code)}
|
|
self.children = []
|
|
self.parent = None
|
|
|
|
def get(self, at_code):
|
|
rec = self.attrs.get(at_code)
|
|
if rec is None:
|
|
return None
|
|
return rec[0]
|
|
|
|
def getRaw(self, at_code):
|
|
return self.attrs.get(at_code)
|
|
|
|
|
|
class Cu:
|
|
"""One compile unit: header info, abbrev table, root DIE."""
|
|
|
|
def __init__(self):
|
|
self.version = 0
|
|
self.unit_type = 0
|
|
self.addr_size = 4
|
|
self.dwarf_size = 4
|
|
self.abbrev_off = 0
|
|
self.cu_start = 0 # offset of CU header in .debug_info
|
|
self.body_start = 0 # offset of first DIE
|
|
self.body_end = 0
|
|
self.abbrev = {}
|
|
self.root = None
|
|
# Bases for indirect lookups (resolved from CU root attrs):
|
|
self.str_offsets_base = 0
|
|
self.addr_base = 0
|
|
# Per-CU references to the global section tables:
|
|
self.debug_str = None
|
|
self.line_str = None
|
|
self.str_offsets = None
|
|
self.addr_section = None
|
|
# Source file paths and PC ranges from the matching .debug_line CU.
|
|
self.files = []
|
|
|
|
def lookupStr(self, idx):
|
|
s = resolveStrx(self.str_offsets, self.debug_str,
|
|
self.str_offsets_base, idx, self.dwarf_size)
|
|
if s is None:
|
|
return f"@strx{idx}"
|
|
return s
|
|
|
|
def lookupLineStr(self, off):
|
|
s = resolveStr(self.line_str, off)
|
|
if s is None:
|
|
return f"@linestr0x{off:x}"
|
|
return s
|
|
|
|
def lookupStrp(self, off):
|
|
s = resolveStr(self.debug_str, off)
|
|
if s is None:
|
|
return f"@str0x{off:x}"
|
|
return s
|
|
|
|
def lookupAddr(self, idx):
|
|
a = resolveAddrx(self.addr_section, self.addr_base, idx,
|
|
self.addr_size)
|
|
if a is None:
|
|
return None
|
|
return a
|
|
|
|
|
|
def parseDie(buf, off, cu, parent=None):
|
|
"""Parse a single DIE (and its children) starting at `off`.
|
|
|
|
Returns (die or None for null terminator, new_off). `off` is
|
|
a .debug_info-absolute offset; we record `cu_offset = off - cu.cu_start`
|
|
for ref4 lookup.
|
|
"""
|
|
die_off = off
|
|
code, off = readUleb(buf, off)
|
|
if code == 0:
|
|
return (None, off)
|
|
if code not in cu.abbrev:
|
|
# Malformed — stop walking the rest of this CU defensively.
|
|
return (None, off)
|
|
tag, has_kids, attr_list = cu.abbrev[code]
|
|
attrs = {}
|
|
for at, fm, ic in attr_list:
|
|
val, off = readForm(buf, off, fm,
|
|
addr_size=cu.addr_size,
|
|
dwarf_size=cu.dwarf_size,
|
|
implicit_val=ic)
|
|
attrs[at] = (val, fm)
|
|
die = Die(die_off - cu.cu_start, tag, attrs)
|
|
die.parent = parent
|
|
if has_kids:
|
|
while off < cu.body_end:
|
|
child, off = parseDie(buf, off, cu, parent=die)
|
|
if child is None:
|
|
break
|
|
die.children.append(child)
|
|
return (die, off)
|
|
|
|
|
|
def parseDebugInfoCu(info_buf, info_off, abbrev_buf, sections):
|
|
"""Parse one CU header + DIE tree starting at .debug_info offset
|
|
`info_off`. Returns (cu, next_off) where next_off is the start
|
|
of the next CU (or end of section)."""
|
|
cu = Cu()
|
|
cu.cu_start = info_off
|
|
cu.debug_str = sections["debug_str"]
|
|
cu.line_str = sections["line_str"]
|
|
cu.str_offsets = sections["str_offsets"]
|
|
cu.addr_section = sections["addr"]
|
|
off = info_off
|
|
unit_length, off = readU32(info_buf, off)
|
|
if unit_length == 0xFFFFFFFF:
|
|
raise NotImplementedError("64-bit DWARF .debug_info not handled")
|
|
next_off = off + unit_length
|
|
cu.dwarf_size = 4
|
|
cu.version, off = readU16(info_buf, off)
|
|
if cu.version == 5:
|
|
cu.unit_type, off = readU8(info_buf, off)
|
|
cu.addr_size, off = readU8(info_buf, off)
|
|
cu.abbrev_off, off = readU32(info_buf, off)
|
|
elif cu.version == 4:
|
|
cu.abbrev_off, off = readU32(info_buf, off)
|
|
cu.addr_size, off = readU8(info_buf, off)
|
|
cu.unit_type = 0
|
|
else:
|
|
raise NotImplementedError(f"DWARF v{cu.version} CU not handled")
|
|
cu.body_start = off
|
|
cu.body_end = next_off
|
|
cu.abbrev = parseAbbrevTable(abbrev_buf, cu.abbrev_off)
|
|
# Walk the root DIE (the CU DIE). Resolve base attrs first so that
|
|
# children parsed in the same call can use them — but parseDie doesn't
|
|
# actually consume strx/addrx (they stay as raw indices), so we can
|
|
# do the base resolution post-parse.
|
|
cu.root, _ = parseDie(info_buf, off, cu)
|
|
if cu.root is not None:
|
|
sob = cu.root.getRaw(DW_AT_str_offsets_base)
|
|
if sob is not None:
|
|
cu.str_offsets_base = sob[0]
|
|
ab = cu.root.getRaw(DW_AT_addr_base)
|
|
if ab is not None:
|
|
cu.addr_base = ab[0]
|
|
return cu, next_off
|
|
|
|
|
|
def parseAllCus(payload_map):
|
|
"""Parse every CU from concatenated .debug_info bytes.
|
|
|
|
Returns list[Cu].
|
|
"""
|
|
info_payloads = payload_map.get(".debug_info", [])
|
|
abbrev_payloads = payload_map.get(".debug_abbrev", [])
|
|
if not info_payloads or not abbrev_payloads:
|
|
return []
|
|
cus = []
|
|
# Each input .o slice contributes one .debug_info + one .debug_abbrev
|
|
# section (DWARF v5 single-CU is the norm for clang). We concatenate
|
|
# the payloads but reset abbrev_base per slice — within the same .o,
|
|
# the CU's abbrev_off is relative to ITS .debug_abbrev section. So
|
|
# we walk slice-by-slice, pairing the i-th .debug_info with the i-th
|
|
# .debug_abbrev.
|
|
n = min(len(info_payloads), len(abbrev_payloads))
|
|
for i in range(n):
|
|
info_buf = info_payloads[i]
|
|
abbrev_buf = abbrev_payloads[i]
|
|
# Sections like .debug_addr / .debug_str_offsets may also be
|
|
# per-slice; index them similarly.
|
|
slice_sections = {}
|
|
for key, sec_name in [
|
|
("debug_str", ".debug_str"),
|
|
("line_str", ".debug_line_str"),
|
|
("str_offsets", ".debug_str_offsets"),
|
|
("addr", ".debug_addr"),
|
|
]:
|
|
payloads = payload_map.get(sec_name, [])
|
|
slice_sections[key] = payloads[i] if i < len(payloads) else b""
|
|
off = 0
|
|
while off < len(info_buf):
|
|
try:
|
|
cu, off = parseDebugInfoCu(info_buf, off, abbrev_buf,
|
|
slice_sections)
|
|
except NotImplementedError as e:
|
|
print(f"pc2line: skipping CU: {e}", file=sys.stderr)
|
|
break
|
|
cus.append(cu)
|
|
return cus
|
|
|
|
|
|
# ---- DIE search / attribute formatting -------------------------------
|
|
|
|
_TAG_NAMES = None
|
|
_AT_NAMES = None
|
|
_FORM_NAMES = None
|
|
|
|
|
|
def tagName(code):
|
|
global _TAG_NAMES
|
|
if _TAG_NAMES is None:
|
|
_TAG_NAMES = _buildNameMap("DW_TAG_")
|
|
return _TAG_NAMES.get(code, f"DW_TAG_<0x{code:x}>")
|
|
|
|
|
|
def attrName(code):
|
|
global _AT_NAMES
|
|
if _AT_NAMES is None:
|
|
_AT_NAMES = _buildNameMap("DW_AT_")
|
|
return _AT_NAMES.get(code, f"DW_AT_<0x{code:x}>")
|
|
|
|
|
|
def formName(code):
|
|
global _FORM_NAMES
|
|
if _FORM_NAMES is None:
|
|
_FORM_NAMES = _buildNameMap("DW_FORM_")
|
|
return _FORM_NAMES.get(code, f"DW_FORM_<0x{code:x}>")
|
|
|
|
|
|
def dieName(cu, die):
|
|
"""Resolve DW_AT_name for a DIE, or return None."""
|
|
rec = die.getRaw(DW_AT_name)
|
|
if rec is None:
|
|
return None
|
|
val, form = rec
|
|
if form in (DW_FORM_strx, DW_FORM_strx1, DW_FORM_strx2,
|
|
DW_FORM_strx3, DW_FORM_strx4):
|
|
return cu.lookupStr(val)
|
|
if form == DW_FORM_strp:
|
|
return cu.lookupStrp(val)
|
|
if form == DW_FORM_line_strp:
|
|
return cu.lookupLineStr(val)
|
|
if form == DW_FORM_string:
|
|
return val
|
|
return None
|
|
|
|
|
|
def diePcRange(cu, die):
|
|
"""Return (low_pc, high_pc) for a DIE, or (None, None).
|
|
|
|
DWARF 5 high_pc is commonly an offset from low_pc (data4/udata) per
|
|
§3.3.1.1. We handle that and the absolute-PC case.
|
|
"""
|
|
lo_rec = die.getRaw(DW_AT_low_pc)
|
|
hi_rec = die.getRaw(DW_AT_high_pc)
|
|
if lo_rec is None:
|
|
return (None, None)
|
|
lo_val, lo_form = lo_rec
|
|
if lo_form in (DW_FORM_addrx, DW_FORM_addrx1, DW_FORM_addrx2,
|
|
DW_FORM_addrx3, DW_FORM_addrx4):
|
|
lo = cu.lookupAddr(lo_val)
|
|
else:
|
|
lo = lo_val
|
|
if lo is None:
|
|
return (None, None)
|
|
if hi_rec is None:
|
|
return (lo, lo)
|
|
hi_val, hi_form = hi_rec
|
|
if hi_form == DW_FORM_addr:
|
|
return (lo, hi_val)
|
|
# Offset form (DWARF 5 default for clang).
|
|
return (lo, lo + hi_val)
|
|
|
|
|
|
def findSubprogramForPc(cus, pc):
|
|
"""Return (cu, subprogram_die) whose [low, high) contains pc."""
|
|
for cu in cus:
|
|
if cu.root is None:
|
|
continue
|
|
for child in cu.root.children:
|
|
if child.tag != DW_TAG_subprogram:
|
|
continue
|
|
lo, hi = diePcRange(cu, child)
|
|
if lo is None:
|
|
continue
|
|
if lo <= pc < hi:
|
|
return (cu, child)
|
|
return (None, None)
|
|
|
|
|
|
def formatAttr(cu, at, val, form):
|
|
"""Pretty-print an attribute value for --dump-dies output."""
|
|
if at == DW_AT_name:
|
|
if form in (DW_FORM_strx, DW_FORM_strx1, DW_FORM_strx2,
|
|
DW_FORM_strx3, DW_FORM_strx4):
|
|
return cu.lookupStr(val)
|
|
if form == DW_FORM_strp:
|
|
return cu.lookupStrp(val)
|
|
if form == DW_FORM_line_strp:
|
|
return cu.lookupLineStr(val)
|
|
if form == DW_FORM_string:
|
|
return val
|
|
if form == DW_FORM_exprloc or form in (DW_FORM_block, DW_FORM_block1,
|
|
DW_FORM_block2, DW_FORM_block4):
|
|
return "<expr " + val.hex() + ">"
|
|
if form in (DW_FORM_addrx, DW_FORM_addrx1, DW_FORM_addrx2,
|
|
DW_FORM_addrx3, DW_FORM_addrx4):
|
|
a = cu.lookupAddr(val)
|
|
if a is None:
|
|
return f"addrx[{val}]=?"
|
|
return f"0x{a:06x}"
|
|
if form == DW_FORM_addr:
|
|
return f"0x{val:06x}"
|
|
if form in (DW_FORM_flag, DW_FORM_flag_present):
|
|
return "true" if val else "false"
|
|
if isinstance(val, int):
|
|
return f"{val} (0x{val:x})"
|
|
if isinstance(val, bytes):
|
|
return val.hex()
|
|
return repr(val)
|
|
|
|
|
|
def dumpDieRecursive(cu, die, depth, out):
|
|
indent = " " * depth
|
|
name = dieName(cu, die)
|
|
name_str = f" \"{name}\"" if name else ""
|
|
out.append(f"{indent}<0x{die.offset:x}> {tagName(die.tag)}{name_str}")
|
|
for at, (val, form) in die.attrs.items():
|
|
if at == DW_AT_name:
|
|
continue
|
|
out.append(f"{indent} {attrName(at)} ({formName(form)}) = "
|
|
f"{formatAttr(cu, at, val, form)}")
|
|
for child in die.children:
|
|
dumpDieRecursive(cu, child, depth + 1, out)
|
|
|
|
|
|
def dumpDiesAtPc(cus, pc):
|
|
"""Print the subprogram containing `pc` and its variable/parameter
|
|
children (DW_TAG_formal_parameter + DW_TAG_variable). Lexical
|
|
blocks are descended into recursively."""
|
|
cu, sub = findSubprogramForPc(cus, pc)
|
|
if sub is None:
|
|
print(f"pc2line: no DW_TAG_subprogram covers PC 0x{pc:06x}",
|
|
file=sys.stderr)
|
|
return 1
|
|
lo, hi = diePcRange(cu, sub)
|
|
sub_name = dieName(cu, sub) or "<unnamed>"
|
|
print(f"PC 0x{pc:06x} -> subprogram {sub_name!r} "
|
|
f"[0x{lo:06x}, 0x{hi:06x})")
|
|
out = []
|
|
dumpDieRecursive(cu, sub, 0, out)
|
|
print("\n".join(out))
|
|
# Also list parameters + variables in flat form for easy parsing.
|
|
print("\n; variables under this subprogram (DW_TAG_formal_parameter "
|
|
"+ DW_TAG_variable):")
|
|
_flatVarList(cu, sub, [], None)
|
|
|
|
|
|
def _flatVarList(cu, die, scope_stack, _unused):
|
|
"""Walk DIE tree under `die`, printing each formal_parameter / variable
|
|
along with its containing lexical block PC range (if any)."""
|
|
for child in die.children:
|
|
if child.tag == DW_TAG_lexical_block:
|
|
lo, hi = diePcRange(cu, child)
|
|
new_scope = list(scope_stack)
|
|
if lo is not None:
|
|
new_scope.append(f"block[0x{lo:06x},0x{hi:06x})")
|
|
_flatVarList(cu, child, new_scope, None)
|
|
continue
|
|
if child.tag == DW_TAG_inlined_subroutine:
|
|
lo, hi = diePcRange(cu, child)
|
|
name = dieName(cu, child) or "<inlined>"
|
|
new_scope = list(scope_stack)
|
|
if lo is not None:
|
|
new_scope.append(f"inlined {name}[0x{lo:06x},0x{hi:06x})")
|
|
else:
|
|
new_scope.append(f"inlined {name}")
|
|
_flatVarList(cu, child, new_scope, None)
|
|
continue
|
|
if child.tag not in (DW_TAG_formal_parameter, DW_TAG_variable):
|
|
continue
|
|
name = dieName(cu, child) or "<unnamed>"
|
|
loc_rec = child.getRaw(DW_AT_location)
|
|
if loc_rec is None:
|
|
loc_str = "<no location>"
|
|
else:
|
|
lv, lf = loc_rec
|
|
loc_str = formatAttr(cu, DW_AT_location, lv, lf)
|
|
kind = "param" if child.tag == DW_TAG_formal_parameter else "var"
|
|
scope_str = (" @ " + " > ".join(scope_stack)) if scope_stack else ""
|
|
print(f" {kind}\t{name}\tloc={loc_str}{scope_str}")
|
|
|
|
|
|
# ---- DW_OP evaluator (Phase 3.2 slice 2) -----------------------------
|
|
#
|
|
# DWARF expression opcodes from §7.7.1. We support only the subset
|
|
# needed for -O0 stack-resident locals and trivial -O2 IMG locals:
|
|
# DW_OP_addr (0x03): absolute address
|
|
# DW_OP_constN/Nu/Ns: small constants
|
|
# DW_OP_regN (0x50..0x57): register location (where N <= 7)
|
|
# DW_OP_bregN (0x70..0x77): register + signed offset
|
|
# DW_OP_fbreg (0x91): frame-base + signed offset
|
|
# DW_OP_call_frame_cfa (0x9c): equivalent to frame-base in our ABI
|
|
# DW_OP_regx (0x90): ULEB-encoded register
|
|
# DW_OP_bregx (0x92): ULEB-encoded register + signed offset
|
|
# DW_OP_plus_uconst (0x23): pop, add ULEB, push
|
|
# DW_OP_plus (0x22): pop two, push sum
|
|
# DW_OP_deref (0x06): pop addr, push *(addr) (memory-aware mode)
|
|
# DW_OP_stack_value (0x9f): final result IS the value (not the addr)
|
|
# DW_OP_piece (0x93): composite; not handled — returns None
|
|
#
|
|
# We return a structured `LocResult` so callers can distinguish
|
|
# memory addresses, register-resident values, IMG-slot addresses,
|
|
# and composite/unsupported expressions.
|
|
|
|
DW_OP_addr = 0x03
|
|
DW_OP_deref = 0x06
|
|
DW_OP_const1u = 0x08
|
|
DW_OP_const1s = 0x09
|
|
DW_OP_const2u = 0x0a
|
|
DW_OP_const2s = 0x0b
|
|
DW_OP_const4u = 0x0c
|
|
DW_OP_const4s = 0x0d
|
|
DW_OP_const8u = 0x0e
|
|
DW_OP_const8s = 0x0f
|
|
DW_OP_constu = 0x10
|
|
DW_OP_consts = 0x11
|
|
DW_OP_plus = 0x22
|
|
DW_OP_plus_uconst = 0x23
|
|
DW_OP_reg0 = 0x50
|
|
DW_OP_breg0 = 0x70
|
|
DW_OP_regx = 0x90
|
|
DW_OP_fbreg = 0x91
|
|
DW_OP_bregx = 0x92
|
|
DW_OP_piece = 0x93
|
|
DW_OP_call_frame_cfa = 0x9c
|
|
DW_OP_stack_value = 0x9f
|
|
|
|
|
|
# Map W65816 Dwarf register numbers (from W65816RegisterInfo.td) to
|
|
# their direct-page memory locations. IMG0..IMG7 at $D0..$DE,
|
|
# IMG8..IMG15 at $C0..$CE (two bytes each). DPF0 at $F0.
|
|
# SP (DwarfRegNum=3) handled separately via the stack-pointer parameter.
|
|
W65816_DW_REG_TO_DP = {
|
|
16: 0xD0, 17: 0xD2, 18: 0xD4, 19: 0xD6, # IMG0..IMG3
|
|
20: 0xD8, 21: 0xDA, 22: 0xDC, 23: 0xDE, # IMG4..IMG7
|
|
24: 0xF0, # DPF0
|
|
32: 0xC0, 33: 0xC2, 34: 0xC4, 35: 0xC6, # IMG8..IMG11
|
|
36: 0xC8, 37: 0xCA, 38: 0xCC, 39: 0xCE, # IMG12..IMG15
|
|
}
|
|
|
|
|
|
class LocResult:
|
|
"""Result of evaluating a DWARF location expression.
|
|
|
|
kind ∈ {"memory", "register", "value", "composite", "unsupported"}.
|
|
memory: addr = 24-bit byte address in target memory
|
|
register: reg_dw = DWARF register number; dp_addr = mapped DP
|
|
byte address (or None if not a W65816 DP-mapped reg)
|
|
value: value = the computed value (DW_OP_stack_value path)
|
|
composite: pieces = list of LocResult (one per DW_OP_piece)
|
|
unsupported: reason = human-readable string
|
|
"""
|
|
|
|
def __init__(self, kind, **kw):
|
|
self.kind = kind
|
|
self.addr = kw.get("addr")
|
|
self.reg_dw = kw.get("reg_dw")
|
|
self.dp_addr = kw.get("dp_addr")
|
|
self.value = kw.get("value")
|
|
self.pieces = kw.get("pieces")
|
|
self.reason = kw.get("reason")
|
|
|
|
def displayAddr(self):
|
|
"""If the result corresponds to a fixed memory byte address
|
|
(either RAM or DP-mapped register), return it; else None."""
|
|
if self.kind == "memory":
|
|
return self.addr
|
|
if self.kind == "register":
|
|
return self.dp_addr
|
|
return None
|
|
|
|
def __repr__(self):
|
|
if self.kind == "memory":
|
|
return f"mem[0x{self.addr:06x}]"
|
|
if self.kind == "register":
|
|
if self.dp_addr is not None:
|
|
return f"reg{self.reg_dw}@DP[0x{self.dp_addr:02x}]"
|
|
return f"reg{self.reg_dw}"
|
|
if self.kind == "value":
|
|
return f"value(0x{self.value:x})"
|
|
if self.kind == "composite":
|
|
return "composite[" + ", ".join(repr(p) for p in self.pieces) + "]"
|
|
return f"unsupported({self.reason})"
|
|
|
|
|
|
def _evalFrameBase(cu, sub_die, sp_value):
|
|
"""Evaluate DW_AT_frame_base for a subprogram. Returns the
|
|
frame-base BYTE ADDRESS (i.e., what DW_OP_fbreg offsets are
|
|
relative to), or None if unresolvable.
|
|
|
|
For W65816 -O0, clang emits `frame_base = DW_OP_reg3` (SP). Our
|
|
ABI is empty-descending: S points to the next-free byte. LLVM's
|
|
PEI assigns FrameOffset assuming full-descending, then
|
|
W65816RegisterInfo::eliminateFrameIndex adds +1 for locals (see
|
|
feedback_stack_skew.md). So `fb + fbreg_offset` where
|
|
fb = sp_value + 1 yields the correct local byte address.
|
|
"""
|
|
fb_rec = sub_die.getRaw(DW_AT_frame_base)
|
|
if fb_rec is None:
|
|
# No frame_base — fall back to SP itself (best effort).
|
|
if sp_value is None:
|
|
return None
|
|
return sp_value + 1
|
|
val, form = fb_rec
|
|
if not isinstance(val, (bytes, bytearray)):
|
|
return None
|
|
expr = bytes(val)
|
|
if len(expr) == 0:
|
|
return None
|
|
op = expr[0]
|
|
# DW_OP_reg0..reg31
|
|
if 0x50 <= op <= 0x6f:
|
|
reg = op - 0x50
|
|
if reg == 3:
|
|
if sp_value is None:
|
|
return None
|
|
return sp_value + 1
|
|
return None
|
|
if op == DW_OP_regx:
|
|
reg, _ = readUleb(expr, 1)
|
|
if reg == 3:
|
|
if sp_value is None:
|
|
return None
|
|
return sp_value + 1
|
|
return None
|
|
if op == DW_OP_call_frame_cfa:
|
|
if sp_value is None:
|
|
return None
|
|
# Per our ABI, the CFA is the SP value at function entry,
|
|
# which equals current S + 1 for an empty-descending stack.
|
|
return sp_value + 1
|
|
if op == DW_OP_bregx:
|
|
reg, off = readUleb(expr, 1)
|
|
ofs, _ = readSleb(expr, off)
|
|
if reg == 3:
|
|
if sp_value is None:
|
|
return None
|
|
return sp_value + 1 + ofs
|
|
return None
|
|
if 0x70 <= op <= 0x8f:
|
|
reg = op - 0x70
|
|
ofs, _ = readSleb(expr, 1)
|
|
if reg == 3:
|
|
if sp_value is None:
|
|
return None
|
|
return sp_value + 1 + ofs
|
|
return None
|
|
|
|
|
|
def evalDwOp(expr, cu=None, frame_base=None, sp_value=None):
|
|
"""Evaluate a DWARF expression `expr` (bytes). Returns a LocResult.
|
|
|
|
`frame_base`: byte address frame-base resolves to (caller pre-computed
|
|
via _evalFrameBase). `sp_value`: 24-bit S register snapshot (for
|
|
DW_OP_reg3 / DW_OP_breg3 / DW_OP_call_frame_cfa).
|
|
|
|
Stack-based: most ops push/pop the implicit DWARF stack. At the
|
|
end, the top-of-stack is the byte address (unless DW_OP_stack_value
|
|
flipped that to "the top IS the value").
|
|
"""
|
|
if expr is None or len(expr) == 0:
|
|
return LocResult("unsupported", reason="empty expression")
|
|
stack = []
|
|
is_value = False
|
|
off = 0
|
|
n = len(expr)
|
|
while off < n:
|
|
op = expr[off]; off += 1
|
|
if op == DW_OP_addr:
|
|
# 4-byte addr in our DWARF.
|
|
addr, off = readU32(expr, off)
|
|
stack.append(addr)
|
|
continue
|
|
if op == DW_OP_const1u:
|
|
v, off = readU8(expr, off); stack.append(v); continue
|
|
if op == DW_OP_const1s:
|
|
v, off = readU8(expr, off)
|
|
if v >= 0x80:
|
|
v -= 0x100
|
|
stack.append(v); continue
|
|
if op == DW_OP_const2u:
|
|
v, off = readU16(expr, off); stack.append(v); continue
|
|
if op == DW_OP_const2s:
|
|
v, off = readU16(expr, off)
|
|
if v >= 0x8000:
|
|
v -= 0x10000
|
|
stack.append(v); continue
|
|
if op == DW_OP_const4u:
|
|
v, off = readU32(expr, off); stack.append(v); continue
|
|
if op == DW_OP_const4s:
|
|
v, off = readU32(expr, off)
|
|
if v >= 0x80000000:
|
|
v -= 0x100000000
|
|
stack.append(v); continue
|
|
if op == DW_OP_constu:
|
|
v, off = readUleb(expr, off); stack.append(v); continue
|
|
if op == DW_OP_consts:
|
|
v, off = readSleb(expr, off); stack.append(v); continue
|
|
if op == DW_OP_plus:
|
|
b = stack.pop(); a = stack.pop(); stack.append(a + b); continue
|
|
if op == DW_OP_plus_uconst:
|
|
inc, off = readUleb(expr, off)
|
|
stack.append(stack.pop() + inc); continue
|
|
if op == DW_OP_fbreg:
|
|
ofs, off = readSleb(expr, off)
|
|
if frame_base is None:
|
|
return LocResult("unsupported",
|
|
reason="DW_OP_fbreg without frame_base")
|
|
stack.append(frame_base + ofs)
|
|
continue
|
|
if op == DW_OP_call_frame_cfa:
|
|
if sp_value is None:
|
|
return LocResult("unsupported",
|
|
reason="DW_OP_call_frame_cfa without SP")
|
|
stack.append(sp_value + 1)
|
|
continue
|
|
if 0x50 <= op <= 0x6f:
|
|
# DW_OP_regN — the value is in register N; not a memory addr.
|
|
# Only honored at end of expression; we return a register LocResult
|
|
# immediately to keep the W65816 DP-mapping precise.
|
|
reg = op - 0x50
|
|
if reg == 3 and sp_value is not None:
|
|
# SP-as-register is unusual but legal; treat as the
|
|
# post-skew stack address (i.e., the frame-base byte).
|
|
return LocResult("register", reg_dw=reg,
|
|
dp_addr=sp_value + 1)
|
|
return LocResult("register", reg_dw=reg,
|
|
dp_addr=W65816_DW_REG_TO_DP.get(reg))
|
|
if op == DW_OP_regx:
|
|
reg, off = readUleb(expr, off)
|
|
return LocResult("register", reg_dw=reg,
|
|
dp_addr=W65816_DW_REG_TO_DP.get(reg))
|
|
if 0x70 <= op <= 0x8f:
|
|
# DW_OP_bregN — register N + signed offset, pushed as address.
|
|
reg = op - 0x70
|
|
ofs, off = readSleb(expr, off)
|
|
if reg == 3:
|
|
if sp_value is None:
|
|
return LocResult("unsupported",
|
|
reason="DW_OP_breg3 without SP")
|
|
stack.append(sp_value + 1 + ofs)
|
|
else:
|
|
dp = W65816_DW_REG_TO_DP.get(reg)
|
|
if dp is None:
|
|
return LocResult("unsupported",
|
|
reason=f"DW_OP_breg{reg} unmapped")
|
|
stack.append(dp + ofs)
|
|
continue
|
|
if op == DW_OP_bregx:
|
|
reg, off = readUleb(expr, off)
|
|
ofs, off = readSleb(expr, off)
|
|
if reg == 3:
|
|
if sp_value is None:
|
|
return LocResult("unsupported",
|
|
reason="DW_OP_bregx(SP) without SP")
|
|
stack.append(sp_value + 1 + ofs)
|
|
else:
|
|
dp = W65816_DW_REG_TO_DP.get(reg)
|
|
if dp is None:
|
|
return LocResult("unsupported",
|
|
reason=f"DW_OP_bregx({reg}) unmapped")
|
|
stack.append(dp + ofs)
|
|
continue
|
|
if op == DW_OP_deref:
|
|
# Need a memory read function to honor this; out of scope.
|
|
return LocResult("unsupported", reason="DW_OP_deref not handled")
|
|
if op == DW_OP_stack_value:
|
|
is_value = True
|
|
break
|
|
if op == DW_OP_piece:
|
|
# Composite — out of scope for this slice (multi-piece i32
|
|
# IMG-resident locals). Surfacing as unsupported is honest.
|
|
return LocResult("unsupported",
|
|
reason="DW_OP_piece composite not handled")
|
|
return LocResult("unsupported", reason=f"DW_OP 0x{op:x} not handled")
|
|
if not stack:
|
|
return LocResult("unsupported", reason="empty expression stack")
|
|
top = stack[-1]
|
|
if is_value:
|
|
return LocResult("value", value=top & 0xFFFFFFFF)
|
|
return LocResult("memory", addr=top & 0xFFFFFF)
|
|
|
|
|
|
# ---- Type-chain resolution for --locals ------------------------------
|
|
|
|
def _findDieByOffset(cu, target_cu_off):
|
|
"""Walk the CU's DIE tree to find the DIE whose cu-relative offset
|
|
matches `target_cu_off`. Returns None if not found."""
|
|
if cu.root is None:
|
|
return None
|
|
stack = [cu.root]
|
|
while stack:
|
|
d = stack.pop()
|
|
if d.offset == target_cu_off:
|
|
return d
|
|
stack.extend(d.children)
|
|
return None
|
|
|
|
|
|
def typeChain(cu, die):
|
|
"""Resolve DW_AT_type chain into a printable C-ish type string.
|
|
|
|
Handles base_type, pointer_type, const/volatile/restrict_type,
|
|
typedef, array_type (with subrange), structure/union/enum types.
|
|
Best-effort: unknown chains print as `<unresolved>`.
|
|
"""
|
|
if die is None:
|
|
return "<no type>"
|
|
visited = set()
|
|
prefix = ""
|
|
suffix = ""
|
|
cur = die
|
|
while cur is not None and cur.offset not in visited:
|
|
visited.add(cur.offset)
|
|
tag = cur.tag
|
|
if tag == DW_TAG_pointer_type:
|
|
suffix = "*" + suffix
|
|
t = cur.getRaw(DW_AT_type)
|
|
if t is None:
|
|
return ("void " + suffix).strip()
|
|
cur = _findDieByOffset(cu, t[0])
|
|
continue
|
|
if tag == DW_TAG_const_type:
|
|
prefix = "const " + prefix
|
|
t = cur.getRaw(DW_AT_type)
|
|
if t is None:
|
|
return (prefix + "void" + suffix).strip()
|
|
cur = _findDieByOffset(cu, t[0])
|
|
continue
|
|
if tag == DW_TAG_volatile_type:
|
|
prefix = "volatile " + prefix
|
|
t = cur.getRaw(DW_AT_type)
|
|
if t is None:
|
|
return (prefix + "void" + suffix).strip()
|
|
cur = _findDieByOffset(cu, t[0])
|
|
continue
|
|
if tag == DW_TAG_restrict_type:
|
|
t = cur.getRaw(DW_AT_type)
|
|
if t is None:
|
|
return (prefix + "void" + suffix).strip()
|
|
cur = _findDieByOffset(cu, t[0])
|
|
continue
|
|
if tag == DW_TAG_typedef:
|
|
nm = dieName(cu, cur) or "?"
|
|
return (prefix + nm + suffix).strip()
|
|
if tag == DW_TAG_base_type:
|
|
nm = dieName(cu, cur) or "?"
|
|
return (prefix + nm + suffix).strip()
|
|
if tag == DW_TAG_array_type:
|
|
# Look for first DW_TAG_subrange_type child for size.
|
|
bound = None
|
|
for ch in cur.children:
|
|
if ch.tag == DW_TAG_subrange_type:
|
|
ub = ch.getRaw(DW_AT_upper_bound)
|
|
if ub is not None:
|
|
bound = ub[0] + 1
|
|
break
|
|
t = cur.getRaw(DW_AT_type)
|
|
elem = "?"
|
|
if t is not None:
|
|
elem = typeChain(cu, _findDieByOffset(cu, t[0]))
|
|
bnd_str = "" if bound is None else str(bound)
|
|
return f"{prefix}{elem}[{bnd_str}]{suffix}".strip()
|
|
if tag in (DW_TAG_structure_type, DW_TAG_union_type,
|
|
DW_TAG_enumeration_type, DW_TAG_class_type):
|
|
kw = {DW_TAG_structure_type: "struct",
|
|
DW_TAG_union_type: "union",
|
|
DW_TAG_enumeration_type: "enum",
|
|
DW_TAG_class_type: "class"}[tag]
|
|
nm = dieName(cu, cur) or "<anon>"
|
|
return f"{prefix}{kw} {nm}{suffix}".strip()
|
|
if tag == DW_TAG_subroutine_type:
|
|
return f"{prefix}<func>{suffix}".strip()
|
|
# Unknown link in the chain — bail.
|
|
return f"<unresolved tag 0x{tag:x}>"
|
|
return "<cyclic>"
|
|
|
|
|
|
def varTypeStr(cu, var_die):
|
|
"""Return the C-ish type string for a variable / parameter DIE."""
|
|
t = var_die.getRaw(DW_AT_type)
|
|
if t is None:
|
|
return "<no DW_AT_type>"
|
|
target = _findDieByOffset(cu, t[0])
|
|
return typeChain(cu, target)
|
|
|
|
|
|
# ---- --locals 0xPC mode (Phase 3.2 slice 2) -------------------------
|
|
|
|
def _collectLocals(cu, die, pc, out, scope_stack):
|
|
"""Walk DIE tree under `die`, collecting formal_parameter +
|
|
variable DIEs that are in scope at `pc` (respecting lexical-block
|
|
PC ranges)."""
|
|
for child in die.children:
|
|
if child.tag == DW_TAG_lexical_block:
|
|
lo, hi = diePcRange(cu, child)
|
|
if lo is not None and not (lo <= pc < hi):
|
|
# Block not in scope; skip.
|
|
continue
|
|
_collectLocals(cu, child, pc, out, scope_stack)
|
|
continue
|
|
if child.tag == DW_TAG_inlined_subroutine:
|
|
# Inlined: out of scope for this slice; skip descent.
|
|
continue
|
|
if child.tag in (DW_TAG_formal_parameter, DW_TAG_variable):
|
|
out.append(child)
|
|
|
|
|
|
def localsAtPc(cus, pc, sp_value=None):
|
|
"""Return list of (name, type_str, location_result, var_die) for
|
|
all formal_parameter and variable DIEs in scope at `pc`.
|
|
|
|
`sp_value` is the 24-bit S register snapshot. Required to resolve
|
|
DW_OP_fbreg / DW_OP_call_frame_cfa expressions.
|
|
"""
|
|
cu, sub = findSubprogramForPc(cus, pc)
|
|
if sub is None:
|
|
return (None, None, [])
|
|
frame_base = _evalFrameBase(cu, sub, sp_value)
|
|
vars_ = []
|
|
_collectLocals(cu, sub, pc, vars_, [])
|
|
out = []
|
|
for v in vars_:
|
|
name = dieName(cu, v) or "<unnamed>"
|
|
ty = varTypeStr(cu, v)
|
|
loc_rec = v.getRaw(DW_AT_location)
|
|
if loc_rec is None:
|
|
loc_res = LocResult("unsupported", reason="no DW_AT_location")
|
|
else:
|
|
lv, lf = loc_rec
|
|
if isinstance(lv, (bytes, bytearray)):
|
|
loc_res = evalDwOp(bytes(lv), cu=cu,
|
|
frame_base=frame_base, sp_value=sp_value)
|
|
else:
|
|
loc_res = LocResult("unsupported",
|
|
reason=f"DW_AT_location form 0x{lf:x}")
|
|
out.append((name, ty, loc_res, v))
|
|
return (cu, sub, out)
|
|
|
|
|
|
def printLocals(cus, pc, sp_value):
|
|
"""Print formal_parameter + variable info for the subprogram at
|
|
`pc`. Output is one line per variable:
|
|
VAR=<name> TYPE=<type> ADDR=0x... (memory location)
|
|
VAR=<name> TYPE=<type> REG=DW<n> (register, no DP map)
|
|
VAR=<name> TYPE=<type> ADDR=0x... REG=DW<n> (DP-mapped register)
|
|
VAR=<name> TYPE=<type> VALUE=0x... (DW_OP_stack_value path)
|
|
VAR=<name> TYPE=<type> UNSUPPORTED=<reason>
|
|
"""
|
|
cu, sub, locs = localsAtPc(cus, pc, sp_value=sp_value)
|
|
if sub is None:
|
|
print(f"pc2line: no DW_TAG_subprogram covers PC 0x{pc:06x}",
|
|
file=sys.stderr)
|
|
return 1
|
|
sub_name = dieName(cu, sub) or "<unnamed>"
|
|
if sp_value is None:
|
|
sp_str = "<unset>"
|
|
else:
|
|
sp_str = f"0x{sp_value:06x}"
|
|
print(f"; PC 0x{pc:06x} in subprogram {sub_name!r}, "
|
|
f"S={sp_str}")
|
|
if not locs:
|
|
print("; (no formal_parameter or DW_TAG_variable in scope)")
|
|
return 0
|
|
for name, ty, loc, _die in locs:
|
|
line = f"VAR={name} TYPE={ty}"
|
|
if loc.kind == "memory":
|
|
line += f" ADDR=0x{loc.addr:06x}"
|
|
elif loc.kind == "register":
|
|
if loc.dp_addr is not None:
|
|
line += f" ADDR=0x{loc.dp_addr:06x} REG=DW{loc.reg_dw}"
|
|
else:
|
|
line += f" REG=DW{loc.reg_dw}"
|
|
elif loc.kind == "value":
|
|
line += f" VALUE=0x{loc.value:x}"
|
|
elif loc.kind == "composite":
|
|
line += " COMPOSITE=<unsupported>"
|
|
else:
|
|
line += f" UNSUPPORTED={loc.reason}"
|
|
print(line)
|
|
return 0
|
|
|
|
|
|
# ---- Main ------------------------------------------------------------
|
|
|
|
def loadSidecarSectionsAll(path):
|
|
"""Return {section_name: [payload_bytes, ...]} from the sidecar."""
|
|
with open(path, "rb") as f:
|
|
data = f.read()
|
|
needle = b"; OBJ "
|
|
out = {}
|
|
i = 0
|
|
while True:
|
|
h = data.find(needle, i)
|
|
if h < 0:
|
|
break
|
|
nl = data.find(b"\n", h)
|
|
if nl < 0:
|
|
break
|
|
header = data[h:nl].decode("utf-8", "replace")
|
|
parts = header.split()
|
|
if "SEC" in parts and "SIZE" in parts:
|
|
sec = parts[parts.index("SEC") + 1]
|
|
size = int(parts[parts.index("SIZE") + 1])
|
|
payload_start = nl + 1
|
|
payload = data[payload_start:payload_start + size]
|
|
out.setdefault(sec, []).append(payload)
|
|
i = payload_start + size
|
|
else:
|
|
i = nl + 1
|
|
return out
|
|
|
|
|
|
def buildTable(sidecar_path):
|
|
"""Return list of (pc, file_idx, line, file_table)."""
|
|
line_sections = loadSidecarSection(sidecar_path, ".debug_line")
|
|
line_str_sections = loadSidecarSection(sidecar_path, ".debug_line_str")
|
|
line_str = b""
|
|
for _, p in line_str_sections:
|
|
line_str += p
|
|
rows = []
|
|
file_tables = []
|
|
line_str_offsets = {}
|
|
# Precompute line_str cstring offsets.
|
|
off = 0
|
|
while off < len(line_str):
|
|
end = line_str.find(b"\0", off)
|
|
if end < 0:
|
|
break
|
|
line_str_offsets[off] = line_str[off:end].decode("utf-8", "replace")
|
|
off = end + 1
|
|
for sec_name, payload in line_sections:
|
|
i = 0
|
|
while i < len(payload):
|
|
try:
|
|
hdr, body_start = parseDwarf5Header(payload, i, len(payload))
|
|
except NotImplementedError as e:
|
|
print(f"pc2line: {e}", file=sys.stderr)
|
|
break
|
|
file_tbl = []
|
|
for fe in hdr["files"]:
|
|
# File entry's name field is at content type DW_LNCT_path=1
|
|
name_val = fe.get(1)
|
|
if isinstance(name_val, str):
|
|
file_tbl.append(name_val)
|
|
elif isinstance(name_val, int):
|
|
file_tbl.append(line_str_offsets.get(name_val, f"@{name_val}"))
|
|
else:
|
|
file_tbl.append("?")
|
|
body = payload[body_start:hdr["end"]]
|
|
for pc, fidx, ln, end_seq in runLineProgram(body, hdr, len(body)):
|
|
if end_seq:
|
|
continue
|
|
rows.append((pc, fidx, ln, file_tbl))
|
|
i = hdr["end"]
|
|
return rows
|
|
|
|
|
|
def query(table, pc):
|
|
"""Return (pc, file, line) or None — largest-pc-<=-query match."""
|
|
candidates = [r for r in table if r[0] <= pc]
|
|
if not candidates:
|
|
return None
|
|
best = max(candidates, key=lambda r: r[0])
|
|
f_idx = best[1]
|
|
if f_idx == 0 or f_idx > len(best[3]):
|
|
fname = "?"
|
|
else:
|
|
fname = best[3][f_idx - 1]
|
|
return (best[0], os.path.basename(fname), best[2])
|
|
|
|
|
|
def loadMapSymbols(path):
|
|
"""Load `addr symbol` lines from a link816 .map. Return sorted list."""
|
|
if not path or not os.path.exists(path):
|
|
return []
|
|
syms = []
|
|
with open(path) as f:
|
|
for ln in f:
|
|
ln = ln.strip()
|
|
if not ln.startswith("0x"):
|
|
continue
|
|
parts = ln.split()
|
|
if len(parts) >= 2:
|
|
try:
|
|
syms.append((int(parts[0], 16), parts[1]))
|
|
except ValueError:
|
|
pass
|
|
syms.sort()
|
|
return syms
|
|
|
|
|
|
def funcAt(syms, pc):
|
|
"""Return the symbol name with largest addr <= pc, or '?'."""
|
|
lo, hi = 0, len(syms) - 1
|
|
best = None
|
|
while lo <= hi:
|
|
mid = (lo + hi) // 2
|
|
if syms[mid][0] <= pc:
|
|
best = syms[mid][1]
|
|
lo = mid + 1
|
|
else:
|
|
hi = mid - 1
|
|
return best or "?"
|
|
|
|
|
|
# ---- Frame sidecar (.debug_frame_w65816) -----------------------------
|
|
#
|
|
# Each record is exactly 12 bytes:
|
|
# +0 uint32_t fnPcStart (24-bit final-image address, zero-padded)
|
|
# +4 uint32_t fnPcEnd (one past the last instruction)
|
|
# +8 uint16_t frameSize (bytes that the prologue subtracts from S)
|
|
# +10 uint8_t rtlBytes (3 for JSL/RTL; reserved for inline RTS)
|
|
# +11 uint8_t pad (must be 0; reserved for future flags)
|
|
#
|
|
# Records are emitted in object-file order by W65816AsmPrinter and
|
|
# concatenated unchanged by link816's `.debug_*` sidecar pipeline.
|
|
FRAME_RECORD_SIZE = 12
|
|
|
|
|
|
def loadFrameRecords(sidecar_path):
|
|
"""Return a list of (pcStart, pcEnd, frameSize, rtlBytes) tuples
|
|
parsed from .debug_frame_w65816 in the link816 sidecar. Empty
|
|
list if the section is absent (older sidecars / hand-written .s
|
|
objects with no frame records).
|
|
"""
|
|
chunks = loadSidecarSection(sidecar_path, ".debug_frame_w65816")
|
|
out = []
|
|
for _name, payload in chunks:
|
|
if len(payload) % FRAME_RECORD_SIZE != 0:
|
|
# Truncated / corrupt — stop parsing the bad chunk but
|
|
# keep any prior good ones (one bad input object shouldn't
|
|
# disable bt across the whole sidecar).
|
|
continue
|
|
for i in range(0, len(payload), FRAME_RECORD_SIZE):
|
|
rec = payload[i:i + FRAME_RECORD_SIZE]
|
|
pc_start = int.from_bytes(rec[0:4], "little") & 0xFFFFFF
|
|
pc_end = int.from_bytes(rec[4:8], "little") & 0xFFFFFF
|
|
frame_sz = int.from_bytes(rec[8:10], "little")
|
|
rtl_bytes = rec[10]
|
|
# Skip placeholder rows (both endpoints 0): the AsmPrinter
|
|
# guard normally filters these, but a relocation that
|
|
# resolved an entire empty function to bank 0 / addr 0
|
|
# would still leak through.
|
|
if pc_start == 0 and pc_end == 0:
|
|
continue
|
|
out.append((pc_start, pc_end, frame_sz, rtl_bytes))
|
|
# Sort by pcStart so bisect lookups stay O(log n) for large
|
|
# binaries (CoreMark has ~150 records; Lua ~600).
|
|
out.sort()
|
|
return out
|
|
|
|
|
|
def frameAt(records, pc):
|
|
"""Return the record covering pc, or None. records must be sorted
|
|
by pcStart (loadFrameRecords guarantees this).
|
|
"""
|
|
# Find largest pcStart <= pc via binary search.
|
|
lo, hi = 0, len(records) - 1
|
|
best = None
|
|
while lo <= hi:
|
|
mid = (lo + hi) // 2
|
|
if records[mid][0] <= pc:
|
|
best = records[mid]
|
|
lo = mid + 1
|
|
else:
|
|
hi = mid - 1
|
|
if best is None:
|
|
return None
|
|
pc_start, pc_end, _fs, _rtl = best
|
|
# pcEnd is exclusive (one past the last function instruction); if
|
|
# pc lies in the inter-function gap we still return the nearest
|
|
# preceding function — useful for diagnostic purposes but caller
|
|
# may want to disambiguate via pcEnd.
|
|
if pc < pc_end:
|
|
return best
|
|
return best # keep the "nearest preceding" semantics
|
|
|
|
|
|
def main():
|
|
ap = argparse.ArgumentParser(description="PC -> source resolver")
|
|
ap.add_argument("--sidecar", required=True,
|
|
help="link816 --debug-out file")
|
|
ap.add_argument("--map", help="link816 .map (optional — for function names)")
|
|
ap.add_argument("--dump", action="store_true",
|
|
help="print the full PC->line table")
|
|
ap.add_argument("--dump-dies", action="store_true",
|
|
help="dump the DIE subtree for the subprogram covering"
|
|
" a given PC (pass PC as a positional arg)")
|
|
ap.add_argument("--list-cus", action="store_true",
|
|
help="list compile units and their top-level subprograms")
|
|
ap.add_argument("--locals", action="store_true",
|
|
help="print formal parameters + locals in scope at "
|
|
"the given PC. Requires --sp for stack-resident "
|
|
"locals (DW_OP_fbreg). Multiple PCs supported.")
|
|
ap.add_argument("--sp", default=None,
|
|
help="MAME S-register snapshot (16-bit or 24-bit "
|
|
"hex). Used as the W65816 stack pointer when "
|
|
"resolving DW_OP_fbreg / DW_OP_call_frame_cfa.")
|
|
ap.add_argument("pcs", nargs="*", help="PCs to resolve (hex 0x... or decimal)")
|
|
args = ap.parse_args()
|
|
|
|
syms = loadMapSymbols(args.map)
|
|
sp_value = None
|
|
if args.sp is not None:
|
|
sp_value = int(args.sp, 0)
|
|
# SP is the 16-bit S register; bank-0 stack lives in bank 0,
|
|
# so 24-bit byte addresses are just 0x000000 | (S & 0xFFFF).
|
|
# Caller may pass either form; canonicalise to 24-bit.
|
|
if sp_value <= 0xFFFF:
|
|
sp_value &= 0xFFFF
|
|
|
|
# DIE-walker modes use parseAllCus; the legacy line-table mode uses
|
|
# buildTable. Both work off the same sidecar file.
|
|
if args.dump_dies or args.list_cus or args.locals:
|
|
payload_map = loadSidecarSectionsAll(args.sidecar)
|
|
cus = parseAllCus(payload_map)
|
|
if args.list_cus:
|
|
for ci, cu in enumerate(cus):
|
|
cu_name = dieName(cu, cu.root) if cu.root else None
|
|
print(f"CU #{ci} v{cu.version} addr_size={cu.addr_size} "
|
|
f"name={cu_name!r}")
|
|
if cu.root is None:
|
|
continue
|
|
for child in cu.root.children:
|
|
if child.tag != DW_TAG_subprogram:
|
|
continue
|
|
lo, hi = diePcRange(cu, child)
|
|
nm = dieName(cu, child) or "<unnamed>"
|
|
if lo is None:
|
|
print(f" subprogram {nm!r} (no PC range)")
|
|
else:
|
|
print(f" subprogram {nm!r} "
|
|
f"[0x{lo:06x}, 0x{hi:06x})")
|
|
return 0
|
|
if args.locals:
|
|
if not args.pcs:
|
|
print("pc2line: --locals needs a PC", file=sys.stderr)
|
|
return 2
|
|
rc = 0
|
|
for s in args.pcs:
|
|
pc = int(s, 0)
|
|
r = printLocals(cus, pc, sp_value)
|
|
if r:
|
|
rc = r
|
|
return rc
|
|
# --dump-dies path
|
|
if not args.pcs:
|
|
print("pc2line: --dump-dies needs a PC", file=sys.stderr)
|
|
return 2
|
|
rc = 0
|
|
for s in args.pcs:
|
|
pc = int(s, 0)
|
|
r = dumpDiesAtPc(cus, pc)
|
|
if r:
|
|
rc = r
|
|
return rc
|
|
|
|
table = buildTable(args.sidecar)
|
|
if args.dump:
|
|
for pc, fidx, ln, ft in sorted(table):
|
|
fname = ft[fidx - 1] if 0 < fidx <= len(ft) else "?"
|
|
func = funcAt(syms, pc)
|
|
print(f"0x{pc:06x}\t{os.path.basename(fname)}:{ln}\t{func}")
|
|
return 0
|
|
if not args.pcs:
|
|
print(f"pc2line: built {len(table)} entries", file=sys.stderr)
|
|
print("pass PCs as positional args to resolve, or --dump for the table")
|
|
return 2
|
|
for s in args.pcs:
|
|
pc = int(s, 0)
|
|
row = query(table, pc)
|
|
func = funcAt(syms, pc)
|
|
if row is None:
|
|
print(f"PC=0x{pc:06x} NOT_FOUND FUNC={func}")
|
|
else:
|
|
r_pc, fname, ln = row
|
|
print(f"PC=0x{pc:06x} FILE={fname} LINE={ln} FUNC={func}")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
sys.exit(main())
|