#!/usr/bin/env python3 # pc2line.py - PC -> source location resolver for W65816 binaries. # # Parses the DWARF .debug_line section from link816's debug sidecar # (`--debug-out FILE`) and answers `file:line` queries for runtime PCs. # The sidecar already has reloc-applied PCs — they refer to the final # linked image — so we don't need link-map cross-walking. # # Standard tools (pyelftools, llvm-dwarfdump, addr2line) all choke on # the W65816 ELF: pyelftools asserts on the DWARF address size, the # others can't apply our target-specific relocations. So we ship a # small DWARF 5 line-program decoder (DWARF §6.2) — ~200 LOC, no deps. # # Usage: # scripts/pc2line.py --sidecar foo.dwarf 0x123A # scripts/pc2line.py --sidecar foo.dwarf --dump # scripts/pc2line.py --sidecar foo.dwarf 0x123A 0x4567 import argparse import os import struct import sys # ---- ULEB128 / SLEB128 ----------------------------------------------- def readUleb(buf, off): result = 0 shift = 0 while True: b = buf[off]; off += 1 result |= (b & 0x7F) << shift if (b & 0x80) == 0: break shift += 7 return result, off def readSleb(buf, off): result = 0 shift = 0 size = 64 while True: b = buf[off]; off += 1 result |= (b & 0x7F) << shift shift += 7 if (b & 0x80) == 0: break if shift < size and (b & 0x40): result |= -(1 << shift) return result, off def readU8(buf, off): return buf[off], off + 1 def readU16(buf, off): return struct.unpack_from("= 0x80: # signed h["line_base"] -= 0x100 h["line_range"], off = readU8(buf, off) h["opcode_base"], off = readU8(buf, off) # standard_opcode_lengths[opcode_base-1] h["std_op_lens"] = list(buf[off:off + h["opcode_base"] - 1]) off += h["opcode_base"] - 1 # directory format + entries. dir_fmt_count, off = readU8(buf, off) dir_fmt = [] for _ in range(dir_fmt_count): ct, off = readUleb(buf, off) fm, off = readUleb(buf, off) dir_fmt.append((ct, fm)) dir_count, off = readUleb(buf, off) dirs = [] for _ in range(dir_count): entry = {} for ct, fm in dir_fmt: val, off = readForm(buf, off, fm) entry[ct] = val dirs.append(entry) # file format + entries. file_fmt_count, off = readU8(buf, off) file_fmt = [] for _ in range(file_fmt_count): ct, off = readUleb(buf, off) fm, off = readUleb(buf, off) file_fmt.append((ct, fm)) file_count, off = readUleb(buf, off) files = [] for _ in range(file_count): entry = {} for ct, fm in file_fmt: val, off = readForm(buf, off, fm) entry[ct] = val files.append(entry) h["dirs"] = dirs h["files"] = files # Body starts where the prologue parse ended — header_length above # is unreliable in our sidecar (see docstring). return h, off # DWARF forms (subset — what we see in our line-program file/dir entries). DW_FORM_string = 0x08 DW_FORM_strp = 0x0e DW_FORM_udata = 0x0f DW_FORM_data16 = 0x1e DW_FORM_line_strp = 0x1f DW_FORM_strx = 0x1a DW_FORM_strx1 = 0x26 DW_FORM_strx2 = 0x27 DW_FORM_strx3 = 0x28 DW_FORM_strx4 = 0x29 def readForm(buf, off, form): if form == DW_FORM_string: return readCStr(buf, off) if form == DW_FORM_strp or form == DW_FORM_line_strp: return readU32(buf, off) if form == DW_FORM_udata: return readUleb(buf, off) if form == DW_FORM_data16: return (buf[off:off + 16].hex(), off + 16) if form == DW_FORM_strx: return readUleb(buf, off) if form == DW_FORM_strx1: return readU8(buf, off) if form == DW_FORM_strx2: return readU16(buf, off) raise NotImplementedError(f"DW_FORM 0x{form:x} not handled") # ---- Sidecar parser -------------------------------------------------- def loadSidecarSection(path, section_name): """Return raw bytes of `section_name` from the link816 sidecar, or None.""" with open(path, "rb") as f: data = f.read() needle = f"; OBJ ".encode() sections = [] i = 0 while True: h = data.find(needle, i) if h < 0: break nl = data.find(b"\n", h) if nl < 0: break header = data[h:nl].decode("utf-8", "replace") # `; OBJ SEC SIZE RELOCS_APPLIED RELOCS_SKIPPED ` parts = header.split() if "SEC" in parts and "SIZE" in parts: sec = parts[parts.index("SEC") + 1] size = int(parts[parts.index("SIZE") + 1]) payload_start = nl + 1 payload = data[payload_start:payload_start + size] sections.append((sec, payload)) i = payload_start + size else: i = nl + 1 return [(name, p) for name, p in sections if name == section_name] # ---- Line-program decoder -------------------------------------------- def runLineProgram(buf, header, h_end): """Walk the line program; yield (pc, file_idx, line) tuples.""" off = 0 addr = 0 line = 1 file = 1 is_stmt = bool(header["default_is_stmt"]) while off < h_end: op, off = readU8(buf, off) if op == 0: # Extended opcode. ln, off = readUleb(buf, off) sub = buf[off] sub_off = off + 1 if sub == DW_LNE_end_sequence: yield (addr, file, line, True) addr = 0 line = 1 file = 1 is_stmt = bool(header["default_is_stmt"]) off = off + ln elif sub == DW_LNE_set_address: addr_size = header["addr_size"] if addr_size == 4: a, _ = readU32(buf, sub_off) elif addr_size == 2: a, _ = readU16(buf, sub_off) else: a = int.from_bytes(buf[sub_off:sub_off + addr_size], "little") addr = a off = off + ln else: off = off + ln elif op < header["opcode_base"]: # Standard opcode. if op == DW_LNS_copy: yield (addr, file, line, False) elif op == DW_LNS_advance_pc: inc, off = readUleb(buf, off) addr += inc * header["min_inst_length"] elif op == DW_LNS_advance_line: inc, off = readSleb(buf, off) line += inc elif op == DW_LNS_set_file: f, off = readUleb(buf, off) file = f elif op == DW_LNS_set_column: _, off = readUleb(buf, off) elif op == DW_LNS_negate_stmt: is_stmt = not is_stmt elif op == DW_LNS_set_basic_block: pass elif op == DW_LNS_const_add_pc: adj = (255 - header["opcode_base"]) // header["line_range"] addr += adj * header["min_inst_length"] elif op == DW_LNS_fixed_advance_pc: inc, off = readU16(buf, off) addr += inc elif op == DW_LNS_set_prologue_end: pass elif op == DW_LNS_set_epilogue_begin: pass elif op == DW_LNS_set_isa: _, off = readUleb(buf, off) else: # Unknown std op — skip operands per std_op_lens. args = header["std_op_lens"][op - 1] for _ in range(args): _, off = readUleb(buf, off) else: # Special opcode (most common). adj = op - header["opcode_base"] addr += (adj // header["line_range"]) * header["min_inst_length"] line += header["line_base"] + (adj % header["line_range"]) yield (addr, file, line, False) # ---- Main ------------------------------------------------------------ def buildTable(sidecar_path): """Return list of (pc, file_idx, line, file_table).""" line_sections = loadSidecarSection(sidecar_path, ".debug_line") line_str_sections = loadSidecarSection(sidecar_path, ".debug_line_str") line_str = b"" for _, p in line_str_sections: line_str += p rows = [] file_tables = [] line_str_offsets = {} # Precompute line_str cstring offsets. off = 0 while off < len(line_str): end = line_str.find(b"\0", off) if end < 0: break line_str_offsets[off] = line_str[off:end].decode("utf-8", "replace") off = end + 1 for sec_name, payload in line_sections: i = 0 while i < len(payload): try: hdr, body_start = parseDwarf5Header(payload, i, len(payload)) except NotImplementedError as e: print(f"pc2line: {e}", file=sys.stderr) break file_tbl = [] for fe in hdr["files"]: # File entry's name field is at content type DW_LNCT_path=1 name_val = fe.get(1) if isinstance(name_val, str): file_tbl.append(name_val) elif isinstance(name_val, int): file_tbl.append(line_str_offsets.get(name_val, f"@{name_val}")) else: file_tbl.append("?") body = payload[body_start:hdr["end"]] for pc, fidx, ln, end_seq in runLineProgram(body, hdr, len(body)): if end_seq: continue rows.append((pc, fidx, ln, file_tbl)) i = hdr["end"] return rows def query(table, pc): """Return (pc, file, line) or None — largest-pc-<=-query match.""" candidates = [r for r in table if r[0] <= pc] if not candidates: return None best = max(candidates, key=lambda r: r[0]) f_idx = best[1] if f_idx == 0 or f_idx > len(best[3]): fname = "?" else: fname = best[3][f_idx - 1] return (best[0], os.path.basename(fname), best[2]) def loadMapSymbols(path): """Load `addr symbol` lines from a link816 .map. Return sorted list.""" if not path or not os.path.exists(path): return [] syms = [] with open(path) as f: for ln in f: ln = ln.strip() if not ln.startswith("0x"): continue parts = ln.split() if len(parts) >= 2: try: syms.append((int(parts[0], 16), parts[1])) except ValueError: pass syms.sort() return syms def funcAt(syms, pc): """Return the symbol name with largest addr <= pc, or '?'.""" lo, hi = 0, len(syms) - 1 best = None while lo <= hi: mid = (lo + hi) // 2 if syms[mid][0] <= pc: best = syms[mid][1] lo = mid + 1 else: hi = mid - 1 return best or "?" def main(): ap = argparse.ArgumentParser(description="PC -> source resolver") ap.add_argument("--sidecar", required=True, help="link816 --debug-out file") ap.add_argument("--map", help="link816 .map (optional — for function names)") ap.add_argument("--dump", action="store_true", help="print the full PC->line table") ap.add_argument("pcs", nargs="*", help="PCs to resolve (hex 0x... or decimal)") args = ap.parse_args() table = buildTable(args.sidecar) syms = loadMapSymbols(args.map) if args.dump: for pc, fidx, ln, ft in sorted(table): fname = ft[fidx - 1] if 0 < fidx <= len(ft) else "?" func = funcAt(syms, pc) print(f"0x{pc:06x}\t{os.path.basename(fname)}:{ln}\t{func}") return 0 if not args.pcs: print(f"pc2line: built {len(table)} entries", file=sys.stderr) print("pass PCs as positional args to resolve, or --dump for the table") return 2 for s in args.pcs: pc = int(s, 0) row = query(table, pc) func = funcAt(syms, pc) if row is None: print(f"PC=0x{pc:06x} NOT_FOUND FUNC={func}") else: r_pc, fname, ln = row print(f"PC=0x{pc:06x} FILE={fname} LINE={ln} FUNC={func}") return 0 if __name__ == "__main__": sys.exit(main())