65816-llvm-mos/src/link816/link816.cpp
Scott Duensing 81694c5971 Checkpoint
2026-05-02 19:17:23 -05:00

1126 lines
48 KiB
C++

// link816 — minimal flat-binary linker for W65816 ELF .o files.
//
// Reads one or more ELF32 object files (produced by llvm-mc / clang -c
// with the W65816 backend), concatenates their .text* / .rodata* /
// .data* sections at consecutive addresses starting from a given base,
// builds a global symbol table, resolves the W65816 ELF relocations,
// and writes a flat binary suitable for loading into a 65816 emulator
// or further wrapping by omfEmit.
//
// Standalone — no LLVM dependency. Parses ELF32-LE structures
// directly with the layout from /usr/include/elf.h.
//
// Supported relocation types (per W65816ELFObjectWriter):
// 1 R_W65816_IMM8 — 1-byte absolute
// 2 R_W65816_IMM16 — 2-byte LE absolute
// 3 R_W65816_IMM24 — 3-byte LE absolute (JSL targets)
// 4 R_W65816_PCREL8 — 1-byte signed PC-relative
// 5 R_W65816_PCREL16 — 2-byte signed PC-relative
//
// CLI mirrors the Python tool exactly:
// link816 -o out.bin --text-base 0x8000 --bss-base 0x2000 a.o b.o ...
// [--rodata-base ADDR] [--map FILE]
#include <algorithm>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <map>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>
namespace {
// ---------------------------------------------------------------- ELF32 layout
// We only need the LE host-side parsing path. Field names mirror
// /usr/include/elf.h so a reader can cross-check against the spec.
struct Elf32Ehdr {
uint8_t e_ident[16];
uint16_t e_type;
uint16_t e_machine;
uint32_t e_version;
uint32_t e_entry;
uint32_t e_phoff;
uint32_t e_shoff;
uint32_t e_flags;
uint16_t e_ehsize;
uint16_t e_phentsize;
uint16_t e_phnum;
uint16_t e_shentsize;
uint16_t e_shnum;
uint16_t e_shstrndx;
};
struct Elf32Shdr {
uint32_t sh_name;
uint32_t sh_type;
uint32_t sh_flags;
uint32_t sh_addr;
uint32_t sh_offset;
uint32_t sh_size;
uint32_t sh_link;
uint32_t sh_info;
uint32_t sh_addralign;
uint32_t sh_entsize;
};
static constexpr uint32_t SHT_NULL = 0;
static constexpr uint32_t SHT_PROGBITS = 1;
static constexpr uint32_t SHT_SYMTAB = 2;
static constexpr uint32_t SHT_STRTAB = 3;
static constexpr uint32_t SHT_RELA = 4;
static constexpr uint32_t SHT_NOBITS = 8;
struct Elf32Sym {
uint32_t st_name;
uint32_t st_value;
uint32_t st_size;
uint8_t st_info;
uint8_t st_other;
uint16_t st_shndx;
};
static constexpr uint16_t SHN_UNDEF = 0;
static constexpr uint16_t SHN_ABS = 0xFFF1;
static constexpr uint16_t SHN_COMMON = 0xFFF2;
inline uint8_t ELF32_ST_TYPE(uint8_t i) { return i & 0x0F; }
inline uint8_t ELF32_ST_BIND(uint8_t i) { return (i >> 4) & 0x0F; }
static constexpr uint8_t STB_LOCAL = 0;
static constexpr uint8_t STB_GLOBAL = 1;
static constexpr uint8_t STB_WEAK = 2;
static constexpr uint8_t STT_NOTYPE = 0;
static constexpr uint8_t STT_OBJECT = 1;
static constexpr uint8_t STT_FUNC = 2;
static constexpr uint8_t STT_SECTION = 3;
struct Elf32Rela {
uint32_t r_offset;
uint32_t r_info;
int32_t r_addend;
};
inline uint32_t ELF32_R_SYM (uint32_t i) { return i >> 8; }
inline uint32_t ELF32_R_TYPE(uint32_t i) { return i & 0xFF; }
// W65816 reloc type numbers — match W65816ELFObjectWriter.
static constexpr uint8_t R_W65816_IMM8 = 1;
static constexpr uint8_t R_W65816_IMM16 = 2;
static constexpr uint8_t R_W65816_IMM24 = 3;
static constexpr uint8_t R_W65816_PCREL8 = 4;
static constexpr uint8_t R_W65816_PCREL16 = 5;
// ---------------------------------------------------------------- Helpers
[[noreturn]] static void die(const std::string &msg) {
std::fprintf(stderr, "link816: %s\n", msg.c_str());
std::exit(1);
}
static std::vector<uint8_t> readFile(const std::string &path) {
std::ifstream f(path, std::ios::binary);
if (!f) die("cannot open '" + path + "' for reading");
std::vector<uint8_t> buf((std::istreambuf_iterator<char>(f)),
std::istreambuf_iterator<char>());
return buf;
}
static std::string sectionKind(const std::string &name) {
if (name == ".text" || name.rfind(".text.", 0) == 0) return "text";
if (name == ".rodata" || name.rfind(".rodata.", 0) == 0) return "rodata";
if (name == ".data" || name.rfind(".data.", 0) == 0) return "rodata";
if (name == ".bss" || name.rfind(".bss.", 0) == 0) return "bss";
// .init_array entries are 16-bit function pointers; treat as
// rodata so they end up in the read-only image and get a stable
// address. The linker emits __init_array_start/_end so crt0 can
// walk them. Same for .fini_array (destructors).
if (name == ".init_array" || name.rfind(".init_array.", 0) == 0) return "init_array";
if (name == ".fini_array" || name.rfind(".fini_array.", 0) == 0) return "fini_array";
return "";
}
// ---------------------------------------------------------------- ELF parser
struct Section {
std::string name;
uint32_t type;
uint32_t size;
uint32_t fileOffset;
uint32_t link;
uint32_t info;
};
struct Symbol {
std::string name;
uint32_t value; // st_value
uint16_t shndx;
uint8_t type; // STT_*
uint8_t bind; // STB_LOCAL / STB_GLOBAL / STB_WEAK
};
struct Reloc {
uint32_t offset; // within target section
uint32_t symIdx;
uint8_t type;
int32_t addend;
};
struct InputObject {
std::string path;
std::vector<uint8_t> raw;
std::vector<Section> sections;
std::vector<Symbol> symbols;
// relocs indexed by target section id
std::map<uint32_t, std::vector<Reloc>> relocs;
void parse() {
if (raw.size() < sizeof(Elf32Ehdr))
die("'" + path + "': file too small to be ELF");
if (raw[0] != 0x7f || raw[1] != 'E' || raw[2] != 'L' || raw[3] != 'F')
die("'" + path + "': not an ELF file");
if (raw[4] != 1) // ELFCLASS32
die("'" + path + "': not 32-bit ELF");
if (raw[5] != 1) // ELFDATA2LSB
die("'" + path + "': not little-endian ELF");
Elf32Ehdr hdr;
std::memcpy(&hdr, raw.data(), sizeof(hdr));
if (hdr.e_shoff == 0 || hdr.e_shnum == 0)
die("'" + path + "': no section table");
if (hdr.e_shentsize != sizeof(Elf32Shdr))
die("'" + path + "': unexpected section header size");
// Section header string table — used to look up section names.
Elf32Shdr shstrhdr;
std::memcpy(&shstrhdr,
raw.data() + hdr.e_shoff + hdr.e_shstrndx * sizeof(Elf32Shdr),
sizeof(shstrhdr));
const char *shstrtab = reinterpret_cast<const char *>(
raw.data() + shstrhdr.sh_offset);
sections.resize(hdr.e_shnum);
std::vector<Elf32Shdr> shdrs(hdr.e_shnum);
for (size_t i = 0; i < hdr.e_shnum; ++i) {
std::memcpy(&shdrs[i],
raw.data() + hdr.e_shoff + i * sizeof(Elf32Shdr),
sizeof(Elf32Shdr));
sections[i].name = std::string(shstrtab + shdrs[i].sh_name);
sections[i].type = shdrs[i].sh_type;
sections[i].size = shdrs[i].sh_size;
sections[i].fileOffset = shdrs[i].sh_offset;
sections[i].link = shdrs[i].sh_link;
sections[i].info = shdrs[i].sh_info;
}
// Find the symbol table and its string table.
size_t symtabIdx = (size_t)-1, symstrtabIdx = (size_t)-1;
for (size_t i = 0; i < sections.size(); ++i) {
if (sections[i].type == SHT_SYMTAB) {
symtabIdx = i;
symstrtabIdx = sections[i].link;
break;
}
}
if (symtabIdx == (size_t)-1) {
// Object with no symbols is unusual but legal — treat as empty.
return;
}
const char *symstrtab = reinterpret_cast<const char *>(
raw.data() + sections[symstrtabIdx].fileOffset);
size_t numSyms = sections[symtabIdx].size / sizeof(Elf32Sym);
symbols.resize(numSyms);
for (size_t i = 0; i < numSyms; ++i) {
Elf32Sym sym;
std::memcpy(&sym,
raw.data() + sections[symtabIdx].fileOffset
+ i * sizeof(Elf32Sym),
sizeof(Elf32Sym));
symbols[i].name = std::string(symstrtab + sym.st_name);
symbols[i].value = sym.st_value;
symbols[i].shndx = sym.st_shndx;
symbols[i].type = ELF32_ST_TYPE(sym.st_info);
symbols[i].bind = ELF32_ST_BIND(sym.st_info);
}
// Walk RELA sections; index by their target section (sh_info).
for (size_t i = 0; i < sections.size(); ++i) {
if (sections[i].type != SHT_RELA) continue;
uint32_t targetSec = sections[i].info;
size_t numRels = sections[i].size / sizeof(Elf32Rela);
std::vector<Reloc> &out = relocs[targetSec];
out.reserve(numRels);
for (size_t j = 0; j < numRels; ++j) {
Elf32Rela r;
std::memcpy(&r,
raw.data() + sections[i].fileOffset
+ j * sizeof(Elf32Rela),
sizeof(Elf32Rela));
Reloc R;
R.offset = r.r_offset;
R.symIdx = ELF32_R_SYM(r.r_info);
R.type = static_cast<uint8_t>(ELF32_R_TYPE(r.r_info));
R.addend = r.r_addend;
out.push_back(R);
}
}
}
const uint8_t *sectionData(uint32_t idx) const {
return raw.data() + sections[idx].fileOffset;
}
std::vector<uint32_t> sectionsByKind(const std::string &kind) const {
std::vector<uint32_t> out;
for (size_t i = 0; i < sections.size(); ++i) {
if (sections[i].size == 0) continue;
if (sectionKind(sections[i].name) == kind)
out.push_back(static_cast<uint32_t>(i));
}
return out;
}
};
// ---------------------------------------------------------------- Linker
struct Layout {
uint32_t textBase, textSize;
uint32_t rodataBase, rodataSize;
uint32_t bssBase, bssSize;
uint32_t initBase, initSize;
};
static void applyReloc(std::vector<uint8_t> &buf, uint32_t off,
uint32_t patchAddr, uint32_t target,
uint8_t rtype, const std::string &symName) {
int64_t Signed;
switch (rtype) {
case R_W65816_IMM8:
if (target > 0xFF)
die("R_W65816_IMM8 to '" + symName + "' = 0x" +
std::to_string(target) + " out of range");
buf[off] = static_cast<uint8_t>(target & 0xFF);
break;
case R_W65816_IMM16:
if (target > 0xFFFF)
die("R_W65816_IMM16 to '" + symName + "' = 0x" +
std::to_string(target) + " out of range");
buf[off] = static_cast<uint8_t>(target & 0xFF);
buf[off + 1] = static_cast<uint8_t>((target >> 8) & 0xFF);
break;
case R_W65816_IMM24:
if (target > 0xFFFFFF)
die("R_W65816_IMM24 to '" + symName + "' = 0x" +
std::to_string(target) + " out of range");
buf[off] = static_cast<uint8_t>(target & 0xFF);
buf[off + 1] = static_cast<uint8_t>((target >> 8) & 0xFF);
buf[off + 2] = static_cast<uint8_t>((target >> 16) & 0xFF);
break;
case R_W65816_PCREL8:
Signed = static_cast<int64_t>(target) - (static_cast<int64_t>(patchAddr) + 1);
if (Signed < -128 || Signed > 127) {
char msg[256];
std::snprintf(msg, sizeof(msg),
"R_W65816_PCREL8 to '%s' out of branch range (%lld bytes)",
symName.c_str(), (long long)Signed);
die(msg);
}
buf[off] = static_cast<uint8_t>(Signed & 0xFF);
break;
case R_W65816_PCREL16:
Signed = static_cast<int64_t>(target) - (static_cast<int64_t>(patchAddr) + 2);
if (Signed < -32768 || Signed > 32767)
die("R_W65816_PCREL16 to '" + symName +
"' out of BRL range");
buf[off] = static_cast<uint8_t>(Signed & 0xFF);
buf[off + 1] = static_cast<uint8_t>((Signed >> 8) & 0xFF);
break;
default: {
char msg[128];
std::snprintf(msg, sizeof(msg),
"unhandled relocation type %u to '%s'", rtype, symName.c_str());
die(msg);
}
}
}
struct Linker {
std::vector<std::unique_ptr<InputObject>> objs;
uint32_t textBase = 0x8000;
uint32_t rodataBase = 0;
uint32_t bssBase = 0x2000;
bool gcSections = true;
// Per-section identity: (object index, section index within obj).
using SecID = std::pair<size_t, uint32_t>;
std::set<SecID> liveSecs;
std::map<std::string, SecID> symToSection;
// Build the "global symbol name -> (objIdx, secIdx) where defined"
// map. Honors weak vs strong: strong def overrides weak; first
// weak-only def wins. Used by computeLiveSet() to follow cross-
// object reloc references back to their defining section.
void buildSymToSection() {
std::map<std::string, bool> strongSeen;
for (size_t fi = 0; fi < objs.size(); ++fi) {
const auto &obj = *objs[fi];
for (const Symbol &sym : obj.symbols) {
if (sym.name.empty()) continue;
if (sym.bind == STB_LOCAL) continue;
if (sym.shndx == SHN_UNDEF || sym.shndx == SHN_ABS ||
sym.shndx == SHN_COMMON ||
sym.shndx >= obj.sections.size())
continue;
bool thisStrong = (sym.bind != STB_WEAK);
auto sit = strongSeen.find(sym.name);
if (sit == strongSeen.end()) {
symToSection[sym.name] = {fi, sym.shndx};
strongSeen[sym.name] = thisStrong;
} else if (thisStrong && !sit->second) {
symToSection[sym.name] = {fi, sym.shndx};
sit->second = true;
}
}
}
}
// Compute the live-section set via BFS from roots (entry point,
// init_array sections — crt0 walks them at runtime). Without
// gc-sections, every section is implicitly live.
void computeLiveSet() {
if (!gcSections) return;
buildSymToSection();
std::vector<SecID> work;
auto markLive = [&](SecID s) {
if (liveSecs.insert(s).second) work.push_back(s);
};
// Roots: entry symbols. __start is the canonical crt0 entry;
// also keep main (crt0 calls it) and __indirTarget (used by
// __jsl_indir). Plus any defined symbol whose name starts
// with __ (linker-defined globals like __heap_start are also
// synthesized but their section refs follow naturally).
for (const char *root : {"__start", "_start", "main",
"__indirTarget", "__jsl_indir"}) {
auto it = symToSection.find(root);
if (it != symToSection.end()) markLive(it->second);
}
// crt0's init-loop walks .init_array via the linker-defined
// boundary symbols __init_array_start/_end. All init_array
// sections must therefore be considered live. Same for
// .fini_array if any object provides it.
for (size_t fi = 0; fi < objs.size(); ++fi) {
for (uint32_t idx : objs[fi]->sectionsByKind("init_array"))
markLive({fi, idx});
}
// BFS: each live section's relocs reference symbols whose
// defining sections are in turn live. Local refs via section
// symbols (STT_SECTION) resolve within the same object.
for (size_t i = 0; i < work.size(); ++i) {
SecID cur = work[i];
const auto &obj = *objs[cur.first];
auto relIt = obj.relocs.find(cur.second);
if (relIt == obj.relocs.end()) continue;
for (const Reloc &r : relIt->second) {
if (r.symIdx >= obj.symbols.size()) continue;
const Symbol &sym = obj.symbols[r.symIdx];
if (sym.shndx != SHN_UNDEF &&
sym.shndx != SHN_ABS &&
sym.shndx != SHN_COMMON &&
sym.shndx < obj.sections.size()) {
// Local def (incl. STT_SECTION refs).
markLive({cur.first, sym.shndx});
continue;
}
// External — look up the global definition.
auto sit = symToSection.find(sym.name);
if (sit != symToSection.end()) markLive(sit->second);
// Else: undefined external; resolveSym() will die later
// (or the user explicitly declared the ref weak).
}
}
}
bool isLive(size_t fi, uint32_t idx) const {
if (!gcSections) return true;
return liveSecs.count({fi, idx}) > 0;
}
// Per-object, per-section: in-merged-text/rodata/bss offset.
struct ObjOffsets {
uint32_t textBaseInMerged = 0;
uint32_t rodataBaseInMerged = 0;
uint32_t bssBaseInMerged = 0;
uint32_t initBaseInMerged = 0;
std::map<uint32_t, uint32_t> textWithin;
std::map<uint32_t, uint32_t> rodataWithin;
std::map<uint32_t, uint32_t> bssWithin;
std::map<uint32_t, uint32_t> initWithin;
};
std::vector<ObjOffsets> objOff;
std::map<std::string, uint32_t> globalSyms;
void addObject(const std::string &path) {
auto o = std::make_unique<InputObject>();
o->path = path;
o->raw = readFile(path);
o->parse();
objs.push_back(std::move(o));
}
// Resolve a reloc to (target, name) using the symbol table and the
// per-object section base map. Requires link() to have populated
// objOff/globalSyms/lastLayout first. Returns false when the
// referenced section is one we don't track (e.g. another .debug_*
// section); strict callers should die() on false, lenient callers
// (the DWARF sidecar) should leave the bytes object-local.
bool resolveSym(const InputObject &obj, const ObjOffsets &oo,
const Reloc &r,
uint32_t &target, std::string &resolvedName) const {
if (r.symIdx >= obj.symbols.size())
die(obj.path + ": reloc symIdx out of range");
const Symbol &sym = obj.symbols[r.symIdx];
if (sym.type == STT_SECTION) {
if (sym.shndx >= obj.sections.size())
die(obj.path + ": section symbol shndx out of range");
const auto &refSec = obj.sections[sym.shndx];
std::string kind = sectionKind(refSec.name);
uint32_t base = 0;
if (kind == "text") {
auto wIt = oo.textWithin.find(sym.shndx);
base = lastLayout.textBase + oo.textBaseInMerged
+ (wIt == oo.textWithin.end() ? 0 : wIt->second);
} else if (kind == "rodata") {
auto wIt = oo.rodataWithin.find(sym.shndx);
base = lastLayout.rodataBase + oo.rodataBaseInMerged
+ (wIt == oo.rodataWithin.end() ? 0 : wIt->second);
} else if (kind == "bss") {
auto wIt = oo.bssWithin.find(sym.shndx);
base = lastLayout.bssBase + oo.bssBaseInMerged
+ (wIt == oo.bssWithin.end() ? 0 : wIt->second);
} else if (kind == "init_array") {
auto wIt = oo.initWithin.find(sym.shndx);
base = lastLayout.initBase + oo.initBaseInMerged
+ (wIt == oo.initWithin.end() ? 0 : wIt->second);
} else {
resolvedName = refSec.name;
return false;
}
target = base + r.addend;
resolvedName = refSec.name;
return true;
}
auto sIt = globalSyms.find(sym.name);
if (sIt == globalSyms.end()) {
// Undefined symbol — for the strict link path the caller
// dies; for the DWARF sidecar this just means "leave the
// bytes alone".
resolvedName = sym.name;
return false;
}
target = sIt->second + r.addend;
resolvedName = sym.name;
return true;
}
Layout link(std::vector<uint8_t> &outImage) {
// 1. Layout: each obj's sections at running offsets.
objOff.resize(objs.size());
uint32_t curText = 0, curRodata = 0, curBss = 0, curInit = 0;
// gc-sections: compute the live-section set before accumulating
// so dead sections drop out of every later layout/reloc step.
computeLiveSet();
for (size_t fi = 0; fi < objs.size(); ++fi) {
ObjOffsets &oo = objOff[fi];
oo.textBaseInMerged = curText;
for (uint32_t idx : objs[fi]->sectionsByKind("text")) {
if (!isLive(fi, idx)) continue;
oo.textWithin[idx] = curText - oo.textBaseInMerged;
curText += objs[fi]->sections[idx].size;
}
oo.rodataBaseInMerged = curRodata;
for (uint32_t idx : objs[fi]->sectionsByKind("rodata")) {
if (!isLive(fi, idx)) continue;
oo.rodataWithin[idx] = curRodata - oo.rodataBaseInMerged;
curRodata += objs[fi]->sections[idx].size;
}
oo.bssBaseInMerged = curBss;
for (uint32_t idx : objs[fi]->sectionsByKind("bss")) {
if (!isLive(fi, idx)) continue;
oo.bssWithin[idx] = curBss - oo.bssBaseInMerged;
curBss += objs[fi]->sections[idx].size;
}
oo.initBaseInMerged = curInit;
for (uint32_t idx : objs[fi]->sectionsByKind("init_array")) {
if (!isLive(fi, idx)) continue;
oo.initWithin[idx] = curInit - oo.initBaseInMerged;
curInit += objs[fi]->sections[idx].size;
}
}
Layout L;
L.textBase = textBase;
L.textSize = curText;
L.bssSize = curBss;
L.rodataBase = rodataBase ? rodataBase : (textBase + curText);
L.rodataSize = curRodata;
// Reject a --rodata-base that overlaps text. Without this
// check, the gap between text-end and rodata-base goes
// negative, the unsigned subtraction wraps to ~4GB, and the
// image-write loop creates a multi-gigabyte file with no
// diagnostic. Caught while sweeping --rodata-base values
// in a strtok layout-sensitivity test.
if (rodataBase && L.rodataBase < L.textBase + L.textSize) {
char msg[160];
std::snprintf(msg, sizeof(msg),
"--rodata-base 0x%X overlaps text 0x%X+%u "
"(rodata must start at or after 0x%X)",
L.rodataBase, L.textBase, L.textSize,
L.textBase + L.textSize);
die(msg);
}
// Hard-fail if text crosses into the IO window ($C000-$CFFF).
// Code there would fetch instructions from hardware registers.
// Programs that grow this big need to split into bank 1 (not
// currently supported by this linker).
if (L.textBase < 0xC000 &&
L.textBase + L.textSize > 0xC000) {
char msg[160];
std::snprintf(msg, sizeof(msg),
"text [0x%X+%u] crosses IIgs IO window 0xC000-0xCFFF — "
"shrink the program or split into bank 1",
L.textBase, L.textSize);
die(msg);
}
// Auto-skip the IO window ($C000-$CFFF) if rodata would land
// there. Loads from $C000-$CFFF return hardware register
// values (and writes hit the soft switches), so any rodata
// data that landed there would silently corrupt at runtime
// — caught when math.o grew past ~28KB and pushed string
// literals into the IO range, breaking smoke #86 (hash
// table strcmp returned garbage because the keys read back
// as IO register values). Catches both "starts before IO,
// crosses in" and "starts inside IO" cases.
if (!rodataBase &&
L.rodataBase < 0xD000 &&
L.rodataBase + L.rodataSize > 0xC000) {
// Page-align upward past the IO window.
L.rodataBase = 0xD000;
// Pad the image so the gap between text-end and rodata-
// start is just zeros. The runInMame loader skips
// writes to the IO range so the soft switches stay
// intact.
}
// .init_array goes immediately after .rodata in the image.
L.initBase = L.rodataBase + L.rodataSize;
L.initSize = curInit;
// Init_array can also land in IO if rodata ends just before
// or starts inside.
if (L.initBase < 0xD000 &&
L.initBase + L.initSize > 0xC000) {
L.initBase = 0xD000;
}
// After all skips, sanity-check we haven't gone past the LC
// ceiling. The IIgs LC area is $D000-$FFFF (12KB usable when
// bank 1 is selected; the $E000-$FFFF chunk is common to both
// banks). crt0's `lda $C083` read-twice enables RAM read+write
// for the entire LC range, so we can use through $FFFF.
if (L.initBase + L.initSize > 0x10000u) {
char msg[160];
std::snprintf(msg, sizeof(msg),
"rodata + init_array [0x%X+%u] exceeds bank-0 LC "
"ceiling 0x10000 — shrink the runtime or split into bank 1",
L.rodataBase,
(unsigned)(L.initBase + L.initSize - L.rodataBase));
die(msg);
}
uint32_t initBase = L.initBase;
// bss-base safety: default 0x2000 only works if text doesn't
// grow past it. When text + rodata + init_array would
// overflow the 0x2000 bss start, shift bss above them so
// crt0's bss-init doesn't zero loaded text bytes. Caller
// can still force a specific bssBase via --bss-base.
//
// IIgs bank-0 hazard zones:
// $C000-$CFFF: IO and soft switches (ALWAYS unusable —
// reads/writes hit hardware registers).
// $D000-$DFFF: Language Card 1 area. Read-only ROM by
// default; crt0 enables LC1 RAM via the
// $C083 soft switch (read-twice trick) so
// BSS placed here is writable.
// $E000-$FFFF: bank-0 ROM area, also LC-switched but
// we don't enable it (less common need).
// Skip past the IO window if BSS would land there; LC1
// ($D000-$DFFF) IS now usable thanks to crt0's soft-switch
// enable. Above $DFFF means BSS exceeds 16-bit range —
// bail clearly rather than silently corrupt.
uint32_t loadEnd = L.initBase + L.initSize;
L.bssBase = bssBase;
if (L.bssBase < loadEnd) {
// Page-align upward for nicer addresses in the map.
L.bssBase = (loadEnd + 0xFF) & ~0xFFu;
if (L.bssBase >= 0xC000 && L.bssBase < 0xD000) {
L.bssBase = 0xD000;
}
}
if (L.bssBase + L.bssSize > 0x10000u) {
char msg[160];
std::snprintf(msg, sizeof(msg),
"bss [0x%X+%u] exceeds bank-0 LC ceiling 0x10000 — "
"shrink the runtime or split into bank 1",
L.bssBase, L.bssSize);
die(msg);
}
// Publish layout now so resolveSym() can read it during reloc
// application (it's a const member that uses lastLayout).
lastLayout = L;
// Synthesize linker-defined symbols so crt0 / startup code
// can find the section extents. These must NOT be in the
// input objects; we provide them.
globalSyms["__text_start"] = L.textBase;
globalSyms["__text_end"] = L.textBase + L.textSize;
globalSyms["__rodata_start"] = L.rodataBase;
globalSyms["__rodata_end"] = L.rodataBase + L.rodataSize;
globalSyms["__init_array_start"] = initBase;
globalSyms["__init_array_end"] = initBase + curInit;
globalSyms["__bss_start"] = L.bssBase;
globalSyms["__bss_end"] = L.bssBase + L.bssSize;
// __heap_start / __heap_end: pick the largest contiguous safe
// range above bss_end. Without this, the previous hardcoded
// heap_end=$BF00 gave heap_end < heap_start whenever BSS
// spilled into LC1 — malloc immediately returned NULL.
// Skip the IO window if heap_start would land there.
uint32_t heapStart = L.bssBase + L.bssSize;
if (heapStart >= 0xC000 && heapStart < 0xD000) {
heapStart = 0xD000; // skip IO window
}
globalSyms["__heap_start"] = heapStart;
if (heapStart < 0xC000) {
globalSyms["__heap_end"] = 0xBF00;
} else if (heapStart < 0x10000u) {
// Heap in LC area ($D000-$FFFF, 12KB usable). crt0's
// $C083 read-twice enables read+write for the whole range.
globalSyms["__heap_end"] = 0x10000u;
} else {
// Unreachable — bssBase + bssSize > 0x10000 check above.
globalSyms["__heap_end"] = heapStart;
}
// 2. Build global symbol map. Honor weak vs strong binding:
// - strong def overrides any prior weak def
// - strong + strong is a multiple-definition error
// - weak + weak: first wins (any choice would be valid)
// - weak after strong: ignored
// Without this, the previous "last def wins" rule meant a weak
// libc stub (e.g. putchar) could silently overwrite a user's
// strong override depending on link order.
std::map<std::string, bool> isStrong; // name -> strong-def seen
for (size_t fi = 0; fi < objs.size(); ++fi) {
const auto &obj = *objs[fi];
const auto &oo = objOff[fi];
for (const Symbol &sym : obj.symbols) {
if (sym.name.empty()) continue;
if (sym.shndx == SHN_UNDEF || sym.shndx == SHN_ABS ||
sym.shndx == SHN_COMMON || sym.shndx >= obj.sections.size())
continue;
// Skip dead sections under gc-sections — their symbols
// would otherwise resolve to whatever junk address the
// missing oo.{text,rodata,bss,init}Within entry implies.
if (!isLive(fi, sym.shndx)) continue;
const auto &sec = obj.sections[sym.shndx];
std::string kind = sectionKind(sec.name);
uint32_t addr = 0;
if (kind == "text") {
auto it = oo.textWithin.find(sym.shndx);
addr = textBase + oo.textBaseInMerged
+ (it == oo.textWithin.end() ? 0 : it->second)
+ sym.value;
} else if (kind == "rodata") {
auto it = oo.rodataWithin.find(sym.shndx);
addr = L.rodataBase + oo.rodataBaseInMerged
+ (it == oo.rodataWithin.end() ? 0 : it->second)
+ sym.value;
} else if (kind == "bss") {
auto it = oo.bssWithin.find(sym.shndx);
addr = L.bssBase + oo.bssBaseInMerged
+ (it == oo.bssWithin.end() ? 0 : it->second)
+ sym.value;
} else if (kind == "init_array") {
auto it = oo.initWithin.find(sym.shndx);
addr = initBase + oo.initBaseInMerged
+ (it == oo.initWithin.end() ? 0 : it->second)
+ sym.value;
} else {
continue;
}
bool thisStrong = (sym.bind != STB_WEAK);
auto sit = isStrong.find(sym.name);
if (sit == isStrong.end()) {
globalSyms[sym.name] = addr;
isStrong[sym.name] = thisStrong;
} else if (thisStrong && !sit->second) {
// strong over weak — replace.
globalSyms[sym.name] = addr;
sit->second = true;
} else if (thisStrong && sit->second) {
die("multiple strong definitions of '" + sym.name + "'");
}
// weak after strong, or weak after weak: keep first.
}
}
// 3. Build text and rodata buffers. Skip dead sections under
// gc-sections (isLive() returns true for everything when gc
// is off).
std::vector<uint8_t> textBuf;
textBuf.reserve(curText);
for (size_t fi = 0; fi < objs.size(); ++fi) {
for (uint32_t idx : objs[fi]->sectionsByKind("text")) {
if (!isLive(fi, idx)) continue;
const uint8_t *p = objs[fi]->sectionData(idx);
textBuf.insert(textBuf.end(), p, p + objs[fi]->sections[idx].size);
}
}
std::vector<uint8_t> rodataBuf;
rodataBuf.reserve(curRodata);
for (size_t fi = 0; fi < objs.size(); ++fi) {
for (uint32_t idx : objs[fi]->sectionsByKind("rodata")) {
if (!isLive(fi, idx)) continue;
const uint8_t *p = objs[fi]->sectionData(idx);
rodataBuf.insert(rodataBuf.end(), p,
p + objs[fi]->sections[idx].size);
}
}
// 4. Apply relocations to text buffer.
for (size_t fi = 0; fi < objs.size(); ++fi) {
const auto &obj = *objs[fi];
const auto &oo = objOff[fi];
for (uint32_t textIdx : obj.sectionsByKind("text")) {
if (!isLive(fi, textIdx)) continue;
auto it = obj.relocs.find(textIdx);
if (it == obj.relocs.end()) continue;
uint32_t inMerged = oo.textBaseInMerged + oo.textWithin.at(textIdx);
for (const Reloc &r : it->second) {
uint32_t patchOff = inMerged + r.offset;
uint32_t patchAddr = textBase + patchOff;
uint32_t target;
std::string resolvedName;
if (!resolveSym(obj, oo, r, target, resolvedName))
die(obj.path + ": .text reloc to unresolved '"
+ resolvedName + "'");
applyReloc(textBuf, patchOff, patchAddr, target, r.type,
resolvedName);
}
}
}
// 4b. Apply relocations to rodata/data buffer. Globals like
// `int *p = &v;` need their initializer patched at link time
// (the .o emits a placeholder 0 + a R_W65816_IMM16 reloc).
// Without this, every initialized pointer or function-pointer
// table in the program reads 0 at runtime.
for (size_t fi = 0; fi < objs.size(); ++fi) {
const auto &obj = *objs[fi];
const auto &oo = objOff[fi];
for (uint32_t rdIdx : obj.sectionsByKind("rodata")) {
if (!isLive(fi, rdIdx)) continue;
auto it = obj.relocs.find(rdIdx);
if (it == obj.relocs.end()) continue;
uint32_t inMerged = oo.rodataBaseInMerged + oo.rodataWithin.at(rdIdx);
for (const Reloc &r : it->second) {
uint32_t patchOff = inMerged + r.offset;
uint32_t patchAddr = L.rodataBase + patchOff;
uint32_t target;
std::string resolvedName;
if (!resolveSym(obj, oo, r, target, resolvedName))
die(obj.path + ": .rodata reloc to unresolved '"
+ resolvedName + "'");
applyReloc(rodataBuf, patchOff, patchAddr, target,
r.type, resolvedName);
}
}
}
// 5. Compose output: text || (gap) || rodata. bss is virtual.
outImage.clear();
outImage = std::move(textBuf);
if (L.rodataBase != textBase + curText) {
uint32_t gap = L.rodataBase - (textBase + curText);
outImage.insert(outImage.end(), gap, 0);
}
outImage.insert(outImage.end(), rodataBuf.begin(), rodataBuf.end());
// Build init_array buffer + apply its relocations (entries are
// 16-bit function pointers needing IMM16 reloc).
std::vector<uint8_t> initBuf;
initBuf.reserve(curInit);
for (size_t fi = 0; fi < objs.size(); ++fi) {
for (uint32_t idx : objs[fi]->sectionsByKind("init_array")) {
if (!isLive(fi, idx)) continue;
const uint8_t *p = objs[fi]->sectionData(idx);
initBuf.insert(initBuf.end(), p,
p + objs[fi]->sections[idx].size);
}
}
for (size_t fi = 0; fi < objs.size(); ++fi) {
const auto &obj = *objs[fi];
const auto &oo = objOff[fi];
for (uint32_t idx : obj.sectionsByKind("init_array")) {
if (!isLive(fi, idx)) continue;
auto it = obj.relocs.find(idx);
if (it == obj.relocs.end()) continue;
uint32_t inMerged = oo.initBaseInMerged + oo.initWithin.at(idx);
for (const Reloc &r : it->second) {
uint32_t target;
std::string resolvedName;
if (!resolveSym(obj, oo, r, target, resolvedName))
die(obj.path + ": .init_array reloc to unresolved '"
+ resolvedName + "'");
uint32_t patchOff = inMerged + r.offset;
uint32_t patchAddr = initBase + patchOff;
applyReloc(initBuf, patchOff, patchAddr, target, r.type,
resolvedName);
}
}
}
outImage.insert(outImage.end(), initBuf.begin(), initBuf.end());
return L;
}
// ----------------------------------------------------------------
// DWARF sidecar. Walks each input object and concatenates every
// section whose name starts with `.debug_`. Each section is
// prefixed by an ASCII-readable header line:
//
// ; OBJ <objname> SEC <sectionname> SIZE <bytes> RELOCS <n>
//
// followed by the section bytes after applying any text/rodata/bss/
// init_array relocations from `.rela.<sec>`. This means PCs in
// .debug_info / .debug_line / .debug_aranges resolve to final-image
// addresses and a consumer like llvm-dwarfdump (or a custom MAME
// overlay) can map them back to source lines.
//
// Intra-debug references (e.g., .debug_info -> .debug_str offsets)
// are *not* renumbered; we concatenate sections without recompacting,
// so the original object-local offsets stay correct relative to each
// object's slice of the sidecar. A multi-TU consumer would need to
// walk the slice headers to find the right base.
void writeDebugSidecar(const std::string &path) const {
std::ofstream f(path, std::ios::binary);
if (!f) die("cannot open '" + path + "' for writing");
f << "; llvm816 link816 DWARF sidecar v1\n";
f << "; text/rodata/bss/init_array relocs applied to final-image addresses\n";
f << "; intra-debug refs left object-local (per-OBJ slice scope)\n";
size_t total = 0;
size_t kept = 0;
size_t patched = 0;
for (size_t fi = 0; fi < objs.size(); ++fi) {
const InputObject &obj = *objs[fi];
const ObjOffsets &oo = objOff[fi];
for (uint32_t idx = 0; idx < obj.sections.size(); ++idx) {
const Section &sec = obj.sections[idx];
if (sec.name.rfind(".debug_", 0) != 0) continue;
if (sec.size == 0) continue;
std::vector<uint8_t> data(sec.size);
std::memcpy(data.data(), obj.raw.data() + sec.fileOffset,
sec.size);
size_t applied = 0;
size_t skipped = 0;
auto it = obj.relocs.find(idx);
if (it != obj.relocs.end()) {
for (const Reloc &r : it->second) {
uint32_t target;
std::string resolvedName;
if (!resolveSym(obj, oo, r, target, resolvedName)) {
skipped++;
continue;
}
if (r.offset + 3 > sec.size) {
// Out-of-range offset; defensively skip.
skipped++;
continue;
}
// patchAddr is only meaningful for PCREL types,
// which DWARF doesn't use. Pass 0; applyReloc
// ignores it for absolute types.
applyReloc(data, r.offset, 0, target, r.type,
resolvedName);
applied++;
}
}
patched += applied;
char hdr[256];
std::snprintf(hdr, sizeof(hdr),
"; OBJ %s SEC %s SIZE %u RELOCS_APPLIED %zu RELOCS_SKIPPED %zu\n",
obj.path.c_str(), sec.name.c_str(), sec.size,
applied, skipped);
f.write(hdr, std::strlen(hdr));
f.write(reinterpret_cast<const char *>(data.data()), sec.size);
f << "\n";
total += sec.size;
kept++;
}
}
std::fprintf(stderr,
"debug sidecar: %zu sections, %zu bytes, %zu relocs applied -> %s\n",
kept, total, patched, path.c_str());
}
void writeMap(const std::string &path) const {
std::ofstream f(path);
if (!f) die("cannot open '" + path + "' for writing");
char buf[256];
// Section layout summary at top.
std::snprintf(buf, sizeof(buf),
"# section layout\n"
".text : 0x%06x .. 0x%06x (%6u bytes)\n"
".rodata : 0x%06x .. 0x%06x (%6u bytes)\n"
".bss : 0x%06x .. 0x%06x (%6u bytes)\n",
lastLayout.textBase,
lastLayout.textBase + lastLayout.textSize,
lastLayout.textSize,
lastLayout.rodataBase,
lastLayout.rodataBase + lastLayout.rodataSize,
lastLayout.rodataSize,
lastLayout.bssBase,
lastLayout.bssBase + lastLayout.bssSize,
lastLayout.bssSize);
f.write(buf, std::strlen(buf));
// Per-input-file contributions to .text (size in bytes).
std::snprintf(buf, sizeof(buf), "\n# per-input-file .text contributions\n");
f.write(buf, std::strlen(buf));
for (size_t fi = 0; fi < objs.size(); ++fi) {
uint32_t bytes = 0;
for (uint32_t idx : objs[fi]->sectionsByKind("text"))
bytes += objs[fi]->sections[idx].size;
std::snprintf(buf, sizeof(buf), "%6u %s\n", bytes,
objs[fi]->path.c_str());
f.write(buf, std::strlen(buf));
}
// Symbol table sorted by address.
std::snprintf(buf, sizeof(buf), "\n# global symbols (sorted by address)\n");
f.write(buf, std::strlen(buf));
std::vector<std::pair<uint32_t, std::string>> sorted;
for (const auto &kv : globalSyms) sorted.emplace_back(kv.second, kv.first);
std::sort(sorted.begin(), sorted.end());
for (const auto &p : sorted) {
std::snprintf(buf, sizeof(buf), "0x%06x %s\n",
p.first, p.second.c_str());
f.write(buf, std::strlen(buf));
}
// Backwards-compat: also emit the old `name = 0x...` lines so
// existing smoke greps still match.
for (const auto &kv : globalSyms) {
std::snprintf(buf, sizeof(buf), "%s = 0x%06x\n",
kv.first.c_str(), kv.second);
f.write(buf, std::strlen(buf));
}
}
// Stash the last layout so writeMap can use it.
Layout lastLayout;
};
// ---------------------------------------------------------------- CLI
static uint32_t parseInt(const std::string &s) {
char *end = nullptr;
unsigned long v = std::strtoul(s.c_str(), &end, 0);
if (end == s.c_str() || *end != '\0')
die("bad numeric value '" + s + "'");
// 65816 addresses are 24-bit; reject anything that doesn't fit so
// a typo like `--text-base 0x100000000` doesn't silently wrap to 0.
if (v > 0xFFFFFF)
die("address '" + s + "' exceeds 24-bit range");
return static_cast<uint32_t>(v);
}
static void usage(const char *argv0) {
std::fprintf(stderr,
"usage: %s -o <output> [--text-base ADDR] [--rodata-base ADDR]\n"
" [--bss-base ADDR] [--map FILE] [--debug-out FILE]\n"
" [--no-gc-sections]\n"
" <input.o> ...\n",
argv0);
std::exit(2);
}
} // anonymous namespace
int main(int argc, char **argv) {
std::string outPath;
std::string mapPath;
std::string debugOutPath;
Linker linker;
int i = 1;
while (i < argc) {
std::string a = argv[i];
if (a == "-o" || a == "--output") {
if (++i >= argc) usage(argv[0]);
outPath = argv[i++];
} else if (a == "--text-base") {
if (++i >= argc) usage(argv[0]);
linker.textBase = parseInt(argv[i++]);
} else if (a == "--rodata-base") {
if (++i >= argc) usage(argv[0]);
linker.rodataBase = parseInt(argv[i++]);
} else if (a == "--bss-base") {
if (++i >= argc) usage(argv[0]);
linker.bssBase = parseInt(argv[i++]);
} else if (a == "--map") {
if (++i >= argc) usage(argv[0]);
mapPath = argv[i++];
} else if (a == "--debug-out") {
if (++i >= argc) usage(argv[0]);
debugOutPath = argv[i++];
} else if (a == "--gc-sections") {
// Drop sections not reachable from __start / main /
// init_array. Requires `-ffunction-sections` (so each
// function is in its own section). Significantly shrinks
// text for programs that link the whole runtime but only
// use a fraction of it. ON by default; --no-gc-sections
// disables.
linker.gcSections = true;
i++;
} else if (a == "--no-gc-sections") {
linker.gcSections = false;
i++;
} else if (a == "-h" || a == "--help") {
usage(argv[0]);
} else if (!a.empty() && a[0] == '-') {
die("unknown option '" + a + "'");
} else {
linker.addObject(a);
i++;
}
}
if (outPath.empty() || linker.objs.empty()) usage(argv[0]);
std::vector<uint8_t> image;
Layout L = linker.link(image);
std::ofstream f(outPath, std::ios::binary);
if (!f) die("cannot open '" + outPath + "' for writing");
f.write(reinterpret_cast<const char *>(image.data()), image.size());
if (!mapPath.empty()) linker.writeMap(mapPath);
if (!debugOutPath.empty()) linker.writeDebugSidecar(debugOutPath);
std::fprintf(stderr,
"linked: text=[0x%04x+%u] rodata=[0x%04x+%u] bss=[0x%04x+%u] "
"-> %s (%zu bytes)\n",
L.textBase, L.textSize, L.rodataBase, L.rodataSize,
L.bssBase, L.bssSize,
outPath.c_str(), image.size());
return 0;
}