// link816 — minimal flat-binary linker for W65816 ELF .o files. // // Reads one or more ELF32 object files (produced by llvm-mc / clang -c // with the W65816 backend), concatenates their .text* / .rodata* / // .data* sections at consecutive addresses starting from a given base, // builds a global symbol table, resolves the W65816 ELF relocations, // and writes a flat binary suitable for loading into a 65816 emulator // or further wrapping by omfEmit. // // Standalone — no LLVM dependency. Parses ELF32-LE structures // directly with the layout from /usr/include/elf.h. // // Supported relocation types (per W65816ELFObjectWriter): // 1 R_W65816_IMM8 — 1-byte absolute // 2 R_W65816_IMM16 — 2-byte LE absolute // 3 R_W65816_IMM24 — 3-byte LE absolute (JSL targets) // 4 R_W65816_PCREL8 — 1-byte signed PC-relative // 5 R_W65816_PCREL16 — 2-byte signed PC-relative // // CLI mirrors the Python tool exactly: // link816 -o out.bin --text-base 0x8000 --bss-base 0x2000 a.o b.o ... // [--rodata-base ADDR] [--map FILE] #include #include #include #include #include #include #include #include #include #include #include #include namespace { // ---------------------------------------------------------------- ELF32 layout // We only need the LE host-side parsing path. Field names mirror // /usr/include/elf.h so a reader can cross-check against the spec. struct Elf32Ehdr { uint8_t e_ident[16]; uint16_t e_type; uint16_t e_machine; uint32_t e_version; uint32_t e_entry; uint32_t e_phoff; uint32_t e_shoff; uint32_t e_flags; uint16_t e_ehsize; uint16_t e_phentsize; uint16_t e_phnum; uint16_t e_shentsize; uint16_t e_shnum; uint16_t e_shstrndx; }; struct Elf32Shdr { uint32_t sh_name; uint32_t sh_type; uint32_t sh_flags; uint32_t sh_addr; uint32_t sh_offset; uint32_t sh_size; uint32_t sh_link; uint32_t sh_info; uint32_t sh_addralign; uint32_t sh_entsize; }; static constexpr uint32_t SHT_NULL = 0; static constexpr uint32_t SHT_PROGBITS = 1; static constexpr uint32_t SHT_SYMTAB = 2; static constexpr uint32_t SHT_STRTAB = 3; static constexpr uint32_t SHT_RELA = 4; static constexpr uint32_t SHT_NOBITS = 8; struct Elf32Sym { uint32_t st_name; uint32_t st_value; uint32_t st_size; uint8_t st_info; uint8_t st_other; uint16_t st_shndx; }; static constexpr uint16_t SHN_UNDEF = 0; static constexpr uint16_t SHN_ABS = 0xFFF1; static constexpr uint16_t SHN_COMMON = 0xFFF2; inline uint8_t ELF32_ST_TYPE(uint8_t i) { return i & 0x0F; } inline uint8_t ELF32_ST_BIND(uint8_t i) { return (i >> 4) & 0x0F; } static constexpr uint8_t STB_LOCAL = 0; static constexpr uint8_t STB_GLOBAL = 1; static constexpr uint8_t STB_WEAK = 2; static constexpr uint8_t STT_NOTYPE = 0; static constexpr uint8_t STT_OBJECT = 1; static constexpr uint8_t STT_FUNC = 2; static constexpr uint8_t STT_SECTION = 3; struct Elf32Rela { uint32_t r_offset; uint32_t r_info; int32_t r_addend; }; inline uint32_t ELF32_R_SYM (uint32_t i) { return i >> 8; } inline uint32_t ELF32_R_TYPE(uint32_t i) { return i & 0xFF; } // W65816 reloc type numbers — match W65816ELFObjectWriter. static constexpr uint8_t R_W65816_IMM8 = 1; static constexpr uint8_t R_W65816_IMM16 = 2; static constexpr uint8_t R_W65816_IMM24 = 3; static constexpr uint8_t R_W65816_PCREL8 = 4; static constexpr uint8_t R_W65816_PCREL16 = 5; // ---------------------------------------------------------------- Helpers [[noreturn]] static void die(const std::string &msg) { std::fprintf(stderr, "link816: %s\n", msg.c_str()); std::exit(1); } static std::vector readFile(const std::string &path) { std::ifstream f(path, std::ios::binary); if (!f) die("cannot open '" + path + "' for reading"); std::vector buf((std::istreambuf_iterator(f)), std::istreambuf_iterator()); return buf; } static std::string sectionKind(const std::string &name) { if (name == ".text" || name.rfind(".text.", 0) == 0) return "text"; if (name == ".rodata" || name.rfind(".rodata.", 0) == 0) return "rodata"; if (name == ".data" || name.rfind(".data.", 0) == 0) return "rodata"; if (name == ".bss" || name.rfind(".bss.", 0) == 0) return "bss"; // .init_array entries are 16-bit function pointers; treat as // rodata so they end up in the read-only image and get a stable // address. The linker emits __init_array_start/_end so crt0 can // walk them. Same for .fini_array (destructors). if (name == ".init_array" || name.rfind(".init_array.", 0) == 0) return "init_array"; if (name == ".fini_array" || name.rfind(".fini_array.", 0) == 0) return "fini_array"; return ""; } // ---------------------------------------------------------------- ELF parser struct Section { std::string name; uint32_t type; uint32_t size; uint32_t fileOffset; uint32_t link; uint32_t info; }; struct Symbol { std::string name; uint32_t value; // st_value uint16_t shndx; uint8_t type; // STT_* uint8_t bind; // STB_LOCAL / STB_GLOBAL / STB_WEAK }; struct Reloc { uint32_t offset; // within target section uint32_t symIdx; uint8_t type; int32_t addend; }; struct InputObject { std::string path; std::vector raw; std::vector
sections; std::vector symbols; // relocs indexed by target section id std::map> relocs; void parse() { if (raw.size() < sizeof(Elf32Ehdr)) die("'" + path + "': file too small to be ELF"); if (raw[0] != 0x7f || raw[1] != 'E' || raw[2] != 'L' || raw[3] != 'F') die("'" + path + "': not an ELF file"); if (raw[4] != 1) // ELFCLASS32 die("'" + path + "': not 32-bit ELF"); if (raw[5] != 1) // ELFDATA2LSB die("'" + path + "': not little-endian ELF"); Elf32Ehdr hdr; std::memcpy(&hdr, raw.data(), sizeof(hdr)); if (hdr.e_shoff == 0 || hdr.e_shnum == 0) die("'" + path + "': no section table"); if (hdr.e_shentsize != sizeof(Elf32Shdr)) die("'" + path + "': unexpected section header size"); // Section header string table — used to look up section names. Elf32Shdr shstrhdr; std::memcpy(&shstrhdr, raw.data() + hdr.e_shoff + hdr.e_shstrndx * sizeof(Elf32Shdr), sizeof(shstrhdr)); const char *shstrtab = reinterpret_cast( raw.data() + shstrhdr.sh_offset); sections.resize(hdr.e_shnum); std::vector shdrs(hdr.e_shnum); for (size_t i = 0; i < hdr.e_shnum; ++i) { std::memcpy(&shdrs[i], raw.data() + hdr.e_shoff + i * sizeof(Elf32Shdr), sizeof(Elf32Shdr)); sections[i].name = std::string(shstrtab + shdrs[i].sh_name); sections[i].type = shdrs[i].sh_type; sections[i].size = shdrs[i].sh_size; sections[i].fileOffset = shdrs[i].sh_offset; sections[i].link = shdrs[i].sh_link; sections[i].info = shdrs[i].sh_info; } // Find the symbol table and its string table. size_t symtabIdx = (size_t)-1, symstrtabIdx = (size_t)-1; for (size_t i = 0; i < sections.size(); ++i) { if (sections[i].type == SHT_SYMTAB) { symtabIdx = i; symstrtabIdx = sections[i].link; break; } } if (symtabIdx == (size_t)-1) { // Object with no symbols is unusual but legal — treat as empty. return; } const char *symstrtab = reinterpret_cast( raw.data() + sections[symstrtabIdx].fileOffset); size_t numSyms = sections[symtabIdx].size / sizeof(Elf32Sym); symbols.resize(numSyms); for (size_t i = 0; i < numSyms; ++i) { Elf32Sym sym; std::memcpy(&sym, raw.data() + sections[symtabIdx].fileOffset + i * sizeof(Elf32Sym), sizeof(Elf32Sym)); symbols[i].name = std::string(symstrtab + sym.st_name); symbols[i].value = sym.st_value; symbols[i].shndx = sym.st_shndx; symbols[i].type = ELF32_ST_TYPE(sym.st_info); symbols[i].bind = ELF32_ST_BIND(sym.st_info); } // Walk RELA sections; index by their target section (sh_info). for (size_t i = 0; i < sections.size(); ++i) { if (sections[i].type != SHT_RELA) continue; uint32_t targetSec = sections[i].info; size_t numRels = sections[i].size / sizeof(Elf32Rela); std::vector &out = relocs[targetSec]; out.reserve(numRels); for (size_t j = 0; j < numRels; ++j) { Elf32Rela r; std::memcpy(&r, raw.data() + sections[i].fileOffset + j * sizeof(Elf32Rela), sizeof(Elf32Rela)); Reloc R; R.offset = r.r_offset; R.symIdx = ELF32_R_SYM(r.r_info); R.type = static_cast(ELF32_R_TYPE(r.r_info)); R.addend = r.r_addend; out.push_back(R); } } } const uint8_t *sectionData(uint32_t idx) const { return raw.data() + sections[idx].fileOffset; } std::vector sectionsByKind(const std::string &kind) const { std::vector out; for (size_t i = 0; i < sections.size(); ++i) { if (sections[i].size == 0) continue; if (sectionKind(sections[i].name) == kind) out.push_back(static_cast(i)); } return out; } }; // ---------------------------------------------------------------- Linker struct Layout { uint32_t textBase, textSize; uint32_t rodataBase, rodataSize; uint32_t bssBase, bssSize; uint32_t initBase, initSize; }; static void applyReloc(std::vector &buf, uint32_t off, uint32_t patchAddr, uint32_t target, uint8_t rtype, const std::string &symName) { int64_t Signed; switch (rtype) { case R_W65816_IMM8: if (target > 0xFF) die("R_W65816_IMM8 to '" + symName + "' = 0x" + std::to_string(target) + " out of range"); buf[off] = static_cast(target & 0xFF); break; case R_W65816_IMM16: if (target > 0xFFFF) die("R_W65816_IMM16 to '" + symName + "' = 0x" + std::to_string(target) + " out of range"); buf[off] = static_cast(target & 0xFF); buf[off + 1] = static_cast((target >> 8) & 0xFF); break; case R_W65816_IMM24: if (target > 0xFFFFFF) die("R_W65816_IMM24 to '" + symName + "' = 0x" + std::to_string(target) + " out of range"); buf[off] = static_cast(target & 0xFF); buf[off + 1] = static_cast((target >> 8) & 0xFF); buf[off + 2] = static_cast((target >> 16) & 0xFF); break; case R_W65816_PCREL8: Signed = static_cast(target) - (static_cast(patchAddr) + 1); if (Signed < -128 || Signed > 127) { char msg[256]; std::snprintf(msg, sizeof(msg), "R_W65816_PCREL8 to '%s' out of branch range (%lld bytes)", symName.c_str(), (long long)Signed); die(msg); } buf[off] = static_cast(Signed & 0xFF); break; case R_W65816_PCREL16: Signed = static_cast(target) - (static_cast(patchAddr) + 2); if (Signed < -32768 || Signed > 32767) die("R_W65816_PCREL16 to '" + symName + "' out of BRL range"); buf[off] = static_cast(Signed & 0xFF); buf[off + 1] = static_cast((Signed >> 8) & 0xFF); break; default: { char msg[128]; std::snprintf(msg, sizeof(msg), "unhandled relocation type %u to '%s'", rtype, symName.c_str()); die(msg); } } } struct Linker { std::vector> objs; uint32_t textBase = 0x8000; uint32_t rodataBase = 0; uint32_t bssBase = 0x2000; bool gcSections = true; // Per-section identity: (object index, section index within obj). using SecID = std::pair; std::set liveSecs; std::map symToSection; // Build the "global symbol name -> (objIdx, secIdx) where defined" // map. Honors weak vs strong: strong def overrides weak; first // weak-only def wins. Used by computeLiveSet() to follow cross- // object reloc references back to their defining section. void buildSymToSection() { std::map strongSeen; for (size_t fi = 0; fi < objs.size(); ++fi) { const auto &obj = *objs[fi]; for (const Symbol &sym : obj.symbols) { if (sym.name.empty()) continue; if (sym.bind == STB_LOCAL) continue; if (sym.shndx == SHN_UNDEF || sym.shndx == SHN_ABS || sym.shndx == SHN_COMMON || sym.shndx >= obj.sections.size()) continue; bool thisStrong = (sym.bind != STB_WEAK); auto sit = strongSeen.find(sym.name); if (sit == strongSeen.end()) { symToSection[sym.name] = {fi, sym.shndx}; strongSeen[sym.name] = thisStrong; } else if (thisStrong && !sit->second) { symToSection[sym.name] = {fi, sym.shndx}; sit->second = true; } } } } // Compute the live-section set via BFS from roots (entry point, // init_array sections — crt0 walks them at runtime). Without // gc-sections, every section is implicitly live. void computeLiveSet() { if (!gcSections) return; buildSymToSection(); std::vector work; auto markLive = [&](SecID s) { if (liveSecs.insert(s).second) work.push_back(s); }; // Roots: entry symbols. __start is the canonical crt0 entry; // also keep main (crt0 calls it) and __indirTarget (used by // __jsl_indir). Plus any defined symbol whose name starts // with __ (linker-defined globals like __heap_start are also // synthesized but their section refs follow naturally). for (const char *root : {"__start", "_start", "main", "__indirTarget", "__jsl_indir"}) { auto it = symToSection.find(root); if (it != symToSection.end()) markLive(it->second); } // crt0's init-loop walks .init_array via the linker-defined // boundary symbols __init_array_start/_end. All init_array // sections must therefore be considered live. Same for // .fini_array if any object provides it. for (size_t fi = 0; fi < objs.size(); ++fi) { for (uint32_t idx : objs[fi]->sectionsByKind("init_array")) markLive({fi, idx}); } // BFS: each live section's relocs reference symbols whose // defining sections are in turn live. Local refs via section // symbols (STT_SECTION) resolve within the same object. for (size_t i = 0; i < work.size(); ++i) { SecID cur = work[i]; const auto &obj = *objs[cur.first]; auto relIt = obj.relocs.find(cur.second); if (relIt == obj.relocs.end()) continue; for (const Reloc &r : relIt->second) { if (r.symIdx >= obj.symbols.size()) continue; const Symbol &sym = obj.symbols[r.symIdx]; if (sym.shndx != SHN_UNDEF && sym.shndx != SHN_ABS && sym.shndx != SHN_COMMON && sym.shndx < obj.sections.size()) { // Local def (incl. STT_SECTION refs). markLive({cur.first, sym.shndx}); continue; } // External — look up the global definition. auto sit = symToSection.find(sym.name); if (sit != symToSection.end()) markLive(sit->second); // Else: undefined external; resolveSym() will die later // (or the user explicitly declared the ref weak). } } } bool isLive(size_t fi, uint32_t idx) const { if (!gcSections) return true; return liveSecs.count({fi, idx}) > 0; } // Per-object, per-section: in-merged-text/rodata/bss offset. struct ObjOffsets { uint32_t textBaseInMerged = 0; uint32_t rodataBaseInMerged = 0; uint32_t bssBaseInMerged = 0; uint32_t initBaseInMerged = 0; std::map textWithin; std::map rodataWithin; std::map bssWithin; std::map initWithin; }; std::vector objOff; std::map globalSyms; void addObject(const std::string &path) { auto o = std::make_unique(); o->path = path; o->raw = readFile(path); o->parse(); objs.push_back(std::move(o)); } // Resolve a reloc to (target, name) using the symbol table and the // per-object section base map. Requires link() to have populated // objOff/globalSyms/lastLayout first. Returns false when the // referenced section is one we don't track (e.g. another .debug_* // section); strict callers should die() on false, lenient callers // (the DWARF sidecar) should leave the bytes object-local. bool resolveSym(const InputObject &obj, const ObjOffsets &oo, const Reloc &r, uint32_t &target, std::string &resolvedName) const { if (r.symIdx >= obj.symbols.size()) die(obj.path + ": reloc symIdx out of range"); const Symbol &sym = obj.symbols[r.symIdx]; if (sym.type == STT_SECTION) { if (sym.shndx >= obj.sections.size()) die(obj.path + ": section symbol shndx out of range"); const auto &refSec = obj.sections[sym.shndx]; std::string kind = sectionKind(refSec.name); uint32_t base = 0; if (kind == "text") { auto wIt = oo.textWithin.find(sym.shndx); base = lastLayout.textBase + oo.textBaseInMerged + (wIt == oo.textWithin.end() ? 0 : wIt->second); } else if (kind == "rodata") { auto wIt = oo.rodataWithin.find(sym.shndx); base = lastLayout.rodataBase + oo.rodataBaseInMerged + (wIt == oo.rodataWithin.end() ? 0 : wIt->second); } else if (kind == "bss") { auto wIt = oo.bssWithin.find(sym.shndx); base = lastLayout.bssBase + oo.bssBaseInMerged + (wIt == oo.bssWithin.end() ? 0 : wIt->second); } else if (kind == "init_array") { auto wIt = oo.initWithin.find(sym.shndx); base = lastLayout.initBase + oo.initBaseInMerged + (wIt == oo.initWithin.end() ? 0 : wIt->second); } else { resolvedName = refSec.name; return false; } target = base + r.addend; resolvedName = refSec.name; return true; } auto sIt = globalSyms.find(sym.name); if (sIt == globalSyms.end()) { // Undefined symbol — for the strict link path the caller // dies; for the DWARF sidecar this just means "leave the // bytes alone". resolvedName = sym.name; return false; } target = sIt->second + r.addend; resolvedName = sym.name; return true; } Layout link(std::vector &outImage) { // 1. Layout: each obj's sections at running offsets. objOff.resize(objs.size()); uint32_t curText = 0, curRodata = 0, curBss = 0, curInit = 0; // gc-sections: compute the live-section set before accumulating // so dead sections drop out of every later layout/reloc step. computeLiveSet(); for (size_t fi = 0; fi < objs.size(); ++fi) { ObjOffsets &oo = objOff[fi]; oo.textBaseInMerged = curText; for (uint32_t idx : objs[fi]->sectionsByKind("text")) { if (!isLive(fi, idx)) continue; oo.textWithin[idx] = curText - oo.textBaseInMerged; curText += objs[fi]->sections[idx].size; } oo.rodataBaseInMerged = curRodata; for (uint32_t idx : objs[fi]->sectionsByKind("rodata")) { if (!isLive(fi, idx)) continue; oo.rodataWithin[idx] = curRodata - oo.rodataBaseInMerged; curRodata += objs[fi]->sections[idx].size; } oo.bssBaseInMerged = curBss; for (uint32_t idx : objs[fi]->sectionsByKind("bss")) { if (!isLive(fi, idx)) continue; oo.bssWithin[idx] = curBss - oo.bssBaseInMerged; curBss += objs[fi]->sections[idx].size; } oo.initBaseInMerged = curInit; for (uint32_t idx : objs[fi]->sectionsByKind("init_array")) { if (!isLive(fi, idx)) continue; oo.initWithin[idx] = curInit - oo.initBaseInMerged; curInit += objs[fi]->sections[idx].size; } } Layout L; L.textBase = textBase; L.textSize = curText; L.bssSize = curBss; L.rodataBase = rodataBase ? rodataBase : (textBase + curText); L.rodataSize = curRodata; // Reject a --rodata-base that overlaps text. Without this // check, the gap between text-end and rodata-base goes // negative, the unsigned subtraction wraps to ~4GB, and the // image-write loop creates a multi-gigabyte file with no // diagnostic. Caught while sweeping --rodata-base values // in a strtok layout-sensitivity test. if (rodataBase && L.rodataBase < L.textBase + L.textSize) { char msg[160]; std::snprintf(msg, sizeof(msg), "--rodata-base 0x%X overlaps text 0x%X+%u " "(rodata must start at or after 0x%X)", L.rodataBase, L.textBase, L.textSize, L.textBase + L.textSize); die(msg); } // Hard-fail if text crosses into the IO window ($C000-$CFFF). // Code there would fetch instructions from hardware registers. // Programs that grow this big need to split into bank 1 (not // currently supported by this linker). if (L.textBase < 0xC000 && L.textBase + L.textSize > 0xC000) { char msg[160]; std::snprintf(msg, sizeof(msg), "text [0x%X+%u] crosses IIgs IO window 0xC000-0xCFFF — " "shrink the program or split into bank 1", L.textBase, L.textSize); die(msg); } // Auto-skip the IO window ($C000-$CFFF) if rodata would land // there. Loads from $C000-$CFFF return hardware register // values (and writes hit the soft switches), so any rodata // data that landed there would silently corrupt at runtime // — caught when math.o grew past ~28KB and pushed string // literals into the IO range, breaking smoke #86 (hash // table strcmp returned garbage because the keys read back // as IO register values). Catches both "starts before IO, // crosses in" and "starts inside IO" cases. if (!rodataBase && L.rodataBase < 0xD000 && L.rodataBase + L.rodataSize > 0xC000) { // Page-align upward past the IO window. L.rodataBase = 0xD000; // Pad the image so the gap between text-end and rodata- // start is just zeros. The runInMame loader skips // writes to the IO range so the soft switches stay // intact. } // .init_array goes immediately after .rodata in the image. L.initBase = L.rodataBase + L.rodataSize; L.initSize = curInit; // Init_array can also land in IO if rodata ends just before // or starts inside. if (L.initBase < 0xD000 && L.initBase + L.initSize > 0xC000) { L.initBase = 0xD000; } // After all skips, sanity-check we haven't gone past the LC // ceiling. The IIgs LC area is $D000-$FFFF (12KB usable when // bank 1 is selected; the $E000-$FFFF chunk is common to both // banks). crt0's `lda $C083` read-twice enables RAM read+write // for the entire LC range, so we can use through $FFFF. if (L.initBase + L.initSize > 0x10000u) { char msg[160]; std::snprintf(msg, sizeof(msg), "rodata + init_array [0x%X+%u] exceeds bank-0 LC " "ceiling 0x10000 — shrink the runtime or split into bank 1", L.rodataBase, (unsigned)(L.initBase + L.initSize - L.rodataBase)); die(msg); } uint32_t initBase = L.initBase; // bss-base safety: default 0x2000 only works if text doesn't // grow past it. When text + rodata + init_array would // overflow the 0x2000 bss start, shift bss above them so // crt0's bss-init doesn't zero loaded text bytes. Caller // can still force a specific bssBase via --bss-base. // // IIgs bank-0 hazard zones: // $C000-$CFFF: IO and soft switches (ALWAYS unusable — // reads/writes hit hardware registers). // $D000-$DFFF: Language Card 1 area. Read-only ROM by // default; crt0 enables LC1 RAM via the // $C083 soft switch (read-twice trick) so // BSS placed here is writable. // $E000-$FFFF: bank-0 ROM area, also LC-switched but // we don't enable it (less common need). // Skip past the IO window if BSS would land there; LC1 // ($D000-$DFFF) IS now usable thanks to crt0's soft-switch // enable. Above $DFFF means BSS exceeds 16-bit range — // bail clearly rather than silently corrupt. uint32_t loadEnd = L.initBase + L.initSize; L.bssBase = bssBase; if (L.bssBase < loadEnd) { // Page-align upward for nicer addresses in the map. L.bssBase = (loadEnd + 0xFF) & ~0xFFu; if (L.bssBase >= 0xC000 && L.bssBase < 0xD000) { L.bssBase = 0xD000; } } if (L.bssBase + L.bssSize > 0x10000u) { char msg[160]; std::snprintf(msg, sizeof(msg), "bss [0x%X+%u] exceeds bank-0 LC ceiling 0x10000 — " "shrink the runtime or split into bank 1", L.bssBase, L.bssSize); die(msg); } // Publish layout now so resolveSym() can read it during reloc // application (it's a const member that uses lastLayout). lastLayout = L; // Synthesize linker-defined symbols so crt0 / startup code // can find the section extents. These must NOT be in the // input objects; we provide them. globalSyms["__text_start"] = L.textBase; globalSyms["__text_end"] = L.textBase + L.textSize; globalSyms["__rodata_start"] = L.rodataBase; globalSyms["__rodata_end"] = L.rodataBase + L.rodataSize; globalSyms["__init_array_start"] = initBase; globalSyms["__init_array_end"] = initBase + curInit; globalSyms["__bss_start"] = L.bssBase; globalSyms["__bss_end"] = L.bssBase + L.bssSize; // __heap_start / __heap_end: pick the largest contiguous safe // range above bss_end. Without this, the previous hardcoded // heap_end=$BF00 gave heap_end < heap_start whenever BSS // spilled into LC1 — malloc immediately returned NULL. // Skip the IO window if heap_start would land there. uint32_t heapStart = L.bssBase + L.bssSize; if (heapStart >= 0xC000 && heapStart < 0xD000) { heapStart = 0xD000; // skip IO window } globalSyms["__heap_start"] = heapStart; if (heapStart < 0xC000) { globalSyms["__heap_end"] = 0xBF00; } else if (heapStart < 0x10000u) { // Heap in LC area ($D000-$FFFF, 12KB usable). crt0's // $C083 read-twice enables read+write for the whole range. globalSyms["__heap_end"] = 0x10000u; } else { // Unreachable — bssBase + bssSize > 0x10000 check above. globalSyms["__heap_end"] = heapStart; } // 2. Build global symbol map. Honor weak vs strong binding: // - strong def overrides any prior weak def // - strong + strong is a multiple-definition error // - weak + weak: first wins (any choice would be valid) // - weak after strong: ignored // Without this, the previous "last def wins" rule meant a weak // libc stub (e.g. putchar) could silently overwrite a user's // strong override depending on link order. std::map isStrong; // name -> strong-def seen for (size_t fi = 0; fi < objs.size(); ++fi) { const auto &obj = *objs[fi]; const auto &oo = objOff[fi]; for (const Symbol &sym : obj.symbols) { if (sym.name.empty()) continue; if (sym.shndx == SHN_UNDEF || sym.shndx == SHN_ABS || sym.shndx == SHN_COMMON || sym.shndx >= obj.sections.size()) continue; // Skip dead sections under gc-sections — their symbols // would otherwise resolve to whatever junk address the // missing oo.{text,rodata,bss,init}Within entry implies. if (!isLive(fi, sym.shndx)) continue; const auto &sec = obj.sections[sym.shndx]; std::string kind = sectionKind(sec.name); uint32_t addr = 0; if (kind == "text") { auto it = oo.textWithin.find(sym.shndx); addr = textBase + oo.textBaseInMerged + (it == oo.textWithin.end() ? 0 : it->second) + sym.value; } else if (kind == "rodata") { auto it = oo.rodataWithin.find(sym.shndx); addr = L.rodataBase + oo.rodataBaseInMerged + (it == oo.rodataWithin.end() ? 0 : it->second) + sym.value; } else if (kind == "bss") { auto it = oo.bssWithin.find(sym.shndx); addr = L.bssBase + oo.bssBaseInMerged + (it == oo.bssWithin.end() ? 0 : it->second) + sym.value; } else if (kind == "init_array") { auto it = oo.initWithin.find(sym.shndx); addr = initBase + oo.initBaseInMerged + (it == oo.initWithin.end() ? 0 : it->second) + sym.value; } else { continue; } bool thisStrong = (sym.bind != STB_WEAK); auto sit = isStrong.find(sym.name); if (sit == isStrong.end()) { globalSyms[sym.name] = addr; isStrong[sym.name] = thisStrong; } else if (thisStrong && !sit->second) { // strong over weak — replace. globalSyms[sym.name] = addr; sit->second = true; } else if (thisStrong && sit->second) { die("multiple strong definitions of '" + sym.name + "'"); } // weak after strong, or weak after weak: keep first. } } // 3. Build text and rodata buffers. Skip dead sections under // gc-sections (isLive() returns true for everything when gc // is off). std::vector textBuf; textBuf.reserve(curText); for (size_t fi = 0; fi < objs.size(); ++fi) { for (uint32_t idx : objs[fi]->sectionsByKind("text")) { if (!isLive(fi, idx)) continue; const uint8_t *p = objs[fi]->sectionData(idx); textBuf.insert(textBuf.end(), p, p + objs[fi]->sections[idx].size); } } std::vector rodataBuf; rodataBuf.reserve(curRodata); for (size_t fi = 0; fi < objs.size(); ++fi) { for (uint32_t idx : objs[fi]->sectionsByKind("rodata")) { if (!isLive(fi, idx)) continue; const uint8_t *p = objs[fi]->sectionData(idx); rodataBuf.insert(rodataBuf.end(), p, p + objs[fi]->sections[idx].size); } } // 4. Apply relocations to text buffer. for (size_t fi = 0; fi < objs.size(); ++fi) { const auto &obj = *objs[fi]; const auto &oo = objOff[fi]; for (uint32_t textIdx : obj.sectionsByKind("text")) { if (!isLive(fi, textIdx)) continue; auto it = obj.relocs.find(textIdx); if (it == obj.relocs.end()) continue; uint32_t inMerged = oo.textBaseInMerged + oo.textWithin.at(textIdx); for (const Reloc &r : it->second) { uint32_t patchOff = inMerged + r.offset; uint32_t patchAddr = textBase + patchOff; uint32_t target; std::string resolvedName; if (!resolveSym(obj, oo, r, target, resolvedName)) die(obj.path + ": .text reloc to unresolved '" + resolvedName + "'"); applyReloc(textBuf, patchOff, patchAddr, target, r.type, resolvedName); } } } // 4b. Apply relocations to rodata/data buffer. Globals like // `int *p = &v;` need their initializer patched at link time // (the .o emits a placeholder 0 + a R_W65816_IMM16 reloc). // Without this, every initialized pointer or function-pointer // table in the program reads 0 at runtime. for (size_t fi = 0; fi < objs.size(); ++fi) { const auto &obj = *objs[fi]; const auto &oo = objOff[fi]; for (uint32_t rdIdx : obj.sectionsByKind("rodata")) { if (!isLive(fi, rdIdx)) continue; auto it = obj.relocs.find(rdIdx); if (it == obj.relocs.end()) continue; uint32_t inMerged = oo.rodataBaseInMerged + oo.rodataWithin.at(rdIdx); for (const Reloc &r : it->second) { uint32_t patchOff = inMerged + r.offset; uint32_t patchAddr = L.rodataBase + patchOff; uint32_t target; std::string resolvedName; if (!resolveSym(obj, oo, r, target, resolvedName)) die(obj.path + ": .rodata reloc to unresolved '" + resolvedName + "'"); applyReloc(rodataBuf, patchOff, patchAddr, target, r.type, resolvedName); } } } // 5. Compose output: text || (gap) || rodata. bss is virtual. outImage.clear(); outImage = std::move(textBuf); if (L.rodataBase != textBase + curText) { uint32_t gap = L.rodataBase - (textBase + curText); outImage.insert(outImage.end(), gap, 0); } outImage.insert(outImage.end(), rodataBuf.begin(), rodataBuf.end()); // Build init_array buffer + apply its relocations (entries are // 16-bit function pointers needing IMM16 reloc). std::vector initBuf; initBuf.reserve(curInit); for (size_t fi = 0; fi < objs.size(); ++fi) { for (uint32_t idx : objs[fi]->sectionsByKind("init_array")) { if (!isLive(fi, idx)) continue; const uint8_t *p = objs[fi]->sectionData(idx); initBuf.insert(initBuf.end(), p, p + objs[fi]->sections[idx].size); } } for (size_t fi = 0; fi < objs.size(); ++fi) { const auto &obj = *objs[fi]; const auto &oo = objOff[fi]; for (uint32_t idx : obj.sectionsByKind("init_array")) { if (!isLive(fi, idx)) continue; auto it = obj.relocs.find(idx); if (it == obj.relocs.end()) continue; uint32_t inMerged = oo.initBaseInMerged + oo.initWithin.at(idx); for (const Reloc &r : it->second) { uint32_t target; std::string resolvedName; if (!resolveSym(obj, oo, r, target, resolvedName)) die(obj.path + ": .init_array reloc to unresolved '" + resolvedName + "'"); uint32_t patchOff = inMerged + r.offset; uint32_t patchAddr = initBase + patchOff; applyReloc(initBuf, patchOff, patchAddr, target, r.type, resolvedName); } } } outImage.insert(outImage.end(), initBuf.begin(), initBuf.end()); return L; } // ---------------------------------------------------------------- // DWARF sidecar. Walks each input object and concatenates every // section whose name starts with `.debug_`. Each section is // prefixed by an ASCII-readable header line: // // ; OBJ SEC SIZE RELOCS // // followed by the section bytes after applying any text/rodata/bss/ // init_array relocations from `.rela.`. This means PCs in // .debug_info / .debug_line / .debug_aranges resolve to final-image // addresses and a consumer like llvm-dwarfdump (or a custom MAME // overlay) can map them back to source lines. // // Intra-debug references (e.g., .debug_info -> .debug_str offsets) // are *not* renumbered; we concatenate sections without recompacting, // so the original object-local offsets stay correct relative to each // object's slice of the sidecar. A multi-TU consumer would need to // walk the slice headers to find the right base. void writeDebugSidecar(const std::string &path) const { std::ofstream f(path, std::ios::binary); if (!f) die("cannot open '" + path + "' for writing"); f << "; llvm816 link816 DWARF sidecar v1\n"; f << "; text/rodata/bss/init_array relocs applied to final-image addresses\n"; f << "; intra-debug refs left object-local (per-OBJ slice scope)\n"; size_t total = 0; size_t kept = 0; size_t patched = 0; for (size_t fi = 0; fi < objs.size(); ++fi) { const InputObject &obj = *objs[fi]; const ObjOffsets &oo = objOff[fi]; for (uint32_t idx = 0; idx < obj.sections.size(); ++idx) { const Section &sec = obj.sections[idx]; if (sec.name.rfind(".debug_", 0) != 0) continue; if (sec.size == 0) continue; std::vector data(sec.size); std::memcpy(data.data(), obj.raw.data() + sec.fileOffset, sec.size); size_t applied = 0; size_t skipped = 0; auto it = obj.relocs.find(idx); if (it != obj.relocs.end()) { for (const Reloc &r : it->second) { uint32_t target; std::string resolvedName; if (!resolveSym(obj, oo, r, target, resolvedName)) { skipped++; continue; } if (r.offset + 3 > sec.size) { // Out-of-range offset; defensively skip. skipped++; continue; } // patchAddr is only meaningful for PCREL types, // which DWARF doesn't use. Pass 0; applyReloc // ignores it for absolute types. applyReloc(data, r.offset, 0, target, r.type, resolvedName); applied++; } } patched += applied; char hdr[256]; std::snprintf(hdr, sizeof(hdr), "; OBJ %s SEC %s SIZE %u RELOCS_APPLIED %zu RELOCS_SKIPPED %zu\n", obj.path.c_str(), sec.name.c_str(), sec.size, applied, skipped); f.write(hdr, std::strlen(hdr)); f.write(reinterpret_cast(data.data()), sec.size); f << "\n"; total += sec.size; kept++; } } std::fprintf(stderr, "debug sidecar: %zu sections, %zu bytes, %zu relocs applied -> %s\n", kept, total, patched, path.c_str()); } void writeMap(const std::string &path) const { std::ofstream f(path); if (!f) die("cannot open '" + path + "' for writing"); char buf[256]; // Section layout summary at top. std::snprintf(buf, sizeof(buf), "# section layout\n" ".text : 0x%06x .. 0x%06x (%6u bytes)\n" ".rodata : 0x%06x .. 0x%06x (%6u bytes)\n" ".bss : 0x%06x .. 0x%06x (%6u bytes)\n", lastLayout.textBase, lastLayout.textBase + lastLayout.textSize, lastLayout.textSize, lastLayout.rodataBase, lastLayout.rodataBase + lastLayout.rodataSize, lastLayout.rodataSize, lastLayout.bssBase, lastLayout.bssBase + lastLayout.bssSize, lastLayout.bssSize); f.write(buf, std::strlen(buf)); // Per-input-file contributions to .text (size in bytes). std::snprintf(buf, sizeof(buf), "\n# per-input-file .text contributions\n"); f.write(buf, std::strlen(buf)); for (size_t fi = 0; fi < objs.size(); ++fi) { uint32_t bytes = 0; for (uint32_t idx : objs[fi]->sectionsByKind("text")) bytes += objs[fi]->sections[idx].size; std::snprintf(buf, sizeof(buf), "%6u %s\n", bytes, objs[fi]->path.c_str()); f.write(buf, std::strlen(buf)); } // Symbol table sorted by address. std::snprintf(buf, sizeof(buf), "\n# global symbols (sorted by address)\n"); f.write(buf, std::strlen(buf)); std::vector> sorted; for (const auto &kv : globalSyms) sorted.emplace_back(kv.second, kv.first); std::sort(sorted.begin(), sorted.end()); for (const auto &p : sorted) { std::snprintf(buf, sizeof(buf), "0x%06x %s\n", p.first, p.second.c_str()); f.write(buf, std::strlen(buf)); } // Backwards-compat: also emit the old `name = 0x...` lines so // existing smoke greps still match. for (const auto &kv : globalSyms) { std::snprintf(buf, sizeof(buf), "%s = 0x%06x\n", kv.first.c_str(), kv.second); f.write(buf, std::strlen(buf)); } } // Stash the last layout so writeMap can use it. Layout lastLayout; }; // ---------------------------------------------------------------- CLI static uint32_t parseInt(const std::string &s) { char *end = nullptr; unsigned long v = std::strtoul(s.c_str(), &end, 0); if (end == s.c_str() || *end != '\0') die("bad numeric value '" + s + "'"); // 65816 addresses are 24-bit; reject anything that doesn't fit so // a typo like `--text-base 0x100000000` doesn't silently wrap to 0. if (v > 0xFFFFFF) die("address '" + s + "' exceeds 24-bit range"); return static_cast(v); } static void usage(const char *argv0) { std::fprintf(stderr, "usage: %s -o [--text-base ADDR] [--rodata-base ADDR]\n" " [--bss-base ADDR] [--map FILE] [--debug-out FILE]\n" " [--no-gc-sections]\n" " ...\n", argv0); std::exit(2); } } // anonymous namespace int main(int argc, char **argv) { std::string outPath; std::string mapPath; std::string debugOutPath; Linker linker; int i = 1; while (i < argc) { std::string a = argv[i]; if (a == "-o" || a == "--output") { if (++i >= argc) usage(argv[0]); outPath = argv[i++]; } else if (a == "--text-base") { if (++i >= argc) usage(argv[0]); linker.textBase = parseInt(argv[i++]); } else if (a == "--rodata-base") { if (++i >= argc) usage(argv[0]); linker.rodataBase = parseInt(argv[i++]); } else if (a == "--bss-base") { if (++i >= argc) usage(argv[0]); linker.bssBase = parseInt(argv[i++]); } else if (a == "--map") { if (++i >= argc) usage(argv[0]); mapPath = argv[i++]; } else if (a == "--debug-out") { if (++i >= argc) usage(argv[0]); debugOutPath = argv[i++]; } else if (a == "--gc-sections") { // Drop sections not reachable from __start / main / // init_array. Requires `-ffunction-sections` (so each // function is in its own section). Significantly shrinks // text for programs that link the whole runtime but only // use a fraction of it. ON by default; --no-gc-sections // disables. linker.gcSections = true; i++; } else if (a == "--no-gc-sections") { linker.gcSections = false; i++; } else if (a == "-h" || a == "--help") { usage(argv[0]); } else if (!a.empty() && a[0] == '-') { die("unknown option '" + a + "'"); } else { linker.addObject(a); i++; } } if (outPath.empty() || linker.objs.empty()) usage(argv[0]); std::vector image; Layout L = linker.link(image); std::ofstream f(outPath, std::ios::binary); if (!f) die("cannot open '" + outPath + "' for writing"); f.write(reinterpret_cast(image.data()), image.size()); if (!mapPath.empty()) linker.writeMap(mapPath); if (!debugOutPath.empty()) linker.writeDebugSidecar(debugOutPath); std::fprintf(stderr, "linked: text=[0x%04x+%u] rodata=[0x%04x+%u] bss=[0x%04x+%u] " "-> %s (%zu bytes)\n", L.textBase, L.textSize, L.rodataBase, L.rodataSize, L.bssBase, L.bssSize, outPath.c_str(), image.size()); return 0; }