Checkpoint

This commit is contained in:
Scott Duensing 2026-04-30 19:16:32 -05:00
parent 91ac5476a5
commit f80a49dc1e
5 changed files with 164 additions and 38 deletions

View file

@ -65,30 +65,28 @@ which runs correctly under MAME (apple2gs).
## In flight (build-system level) ## In flight (build-system level)
- **DWARF sidecar emission in link816** (#51): The link should produce - **DWARF sidecar — minimal version landed** (#51): `link816 --debug-out
a separate sidecar file with line-number / variable-location info FILE` collects every `.debug_*` section from the input objects and
that an IDE or post-mortem dumper can consume. Skeleton not yet writes them to a sidecar with section headers. Addresses are still
written; deferred until other correctness work is done. object-file-local (no relocation processing). A consumer that wants
source-mapped final-image addresses must re-run reloc against the
text/rodata bases, or use offsets within their object scope. Future
work: apply text/rodata relocations to `.debug_info` / `.debug_line`
so addresses match the final image, and emit a TOC the consumer
can index by source file or function.
## Known issues / workarounds ## Known issues / workarounds
- **Greedy register allocator mis-orders spills** in two patterns - **Greedy register allocator mis-orders spills** in iterative
(#69, #70): quicksort with `if/else` recursion choice (#70). Complex live
1. Functions where both `$a` and `$x` are live-in (i64-first-arg ranges across two `swap()` calls produce wrong pointer args.
with a stack-output pointer, e.g. `udivmod(i64, i64, ptr)`). Reproduces only at `-O1`/`-O2` with greedy. Workaround:
The TAX bridging `$x` to A clobbers `$a`'s value before the `-mllvm -regalloc=fast` for the affected translation unit, or
second STA can save it. rewrite the qsort with explicit recursion guards instead of the
2. Iterative quicksort with `if/else` recursion choice: complex iterative tail-elim form. `softDouble.c` already uses this
live-ranges across two `swap()` calls produce wrong arg values. flag for `__muldf3` (build.sh applies it automatically). Real
fix is either a pre-RA pass that explicitly spills loop-carried
Both reproduce only at `-O1`/`-O2` with greedy. Workaround: pointer args or a targeted greedy heuristic patch.
`-mllvm -regalloc=fast` for the affected translation unit.
`softDouble.c` already requires this flag for `__muldf3` (build.sh
applies it automatically).
Real fix is a pre-RA pass that pre-spills critical pointer
arguments to memory, or a targeted fix in greedy's spill-ordering
heuristic. Material work; deferred.
- **(d,s),y / (sr,s),y addressing wraps the bank** when Y is - **(d,s),y / (sr,s),y addressing wraps the bank** when Y is
negative as 16-bit unsigned. Worked around by `W65816NegYIndY` negative as 16-bit unsigned. Worked around by `W65816NegYIndY`
@ -115,7 +113,10 @@ which runs correctly under MAME (apple2gs).
several test cases. Acceptable today (Newton iterations still several test cases. Acceptable today (Newton iterations still
converge); revisit when an exact-match test suite lands. converge); revisit when an exact-match test suite lands.
- **DWARF sidecar** (#51) for source-level debugging. - **DWARF sidecar with relocations applied** — current (#51) version
is raw section pass-through; addresses are object-file-local. A
real source-level debugger needs the linker to apply text/rodata
relocations to `.debug_info` / `.debug_line` first.
- **More of the C standard library**: `<math.h>` transcendental - **More of the C standard library**: `<math.h>` transcendental
functions (sin, cos, exp, log, pow), `<string.h>` beyond what's functions (sin, cos, exp, log, pow), `<string.h>` beyond what's

View file

@ -2018,6 +2018,49 @@ EOF
fi fi
rm -f "$cAofFile" "$oAofFile" "$binAofFile" rm -f "$cAofFile" "$oAofFile" "$binAofFile"
log "check: MAME runs udivmod(0x123...DEF, 0x10000, &m) → q=0x12345_6789AB m=0xCDEF (#69)"
cUdmFile="$(mktemp --suffix=.c)"
oUdmFile="$(mktemp --suffix=.o)"
binUdmFile="$(mktemp --suffix=.bin)"
cat > "$cUdmFile" <<'EOF'
__attribute__((noinline)) void switchToBank2(void) {
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
}
typedef unsigned long long u64;
__attribute__((noinline)) u64 udivmod(u64 a, u64 b, u64 *out_mod) {
*out_mod = a % b;
return a / b;
}
int main(void) {
u64 m;
u64 q = udivmod(0x123456789ABCDEFULL, 0x10000ULL, &m);
union { u64 u; unsigned short w[4]; } qu, mu;
qu.u = q; mu.u = m;
switchToBank2();
*(volatile unsigned short *)0x5000 = qu.w[0];
*(volatile unsigned short *)0x5002 = qu.w[1];
*(volatile unsigned short *)0x5004 = qu.w[2];
*(volatile unsigned short *)0x5006 = qu.w[3];
*(volatile unsigned short *)0x5008 = mu.w[0];
*(volatile unsigned short *)0x500a = mu.w[1];
*(volatile unsigned short *)0x500c = mu.w[2];
*(volatile unsigned short *)0x500e = mu.w[3];
while (1) {}
}
EOF
"$CLANG" --target=w65816 -O2 -ffunction-sections -c \
"$cUdmFile" -o "$oUdmFile"
"$PROJECT_ROOT/tools/link816" -o "$binUdmFile" --text-base 0x1000 \
"$oCrt0F" "$oLibcF" "$oSfF" "$oSdF" "$oLibgccFile" "$oUdmFile" \
>/dev/null 2>&1
if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" "$binUdmFile" --check \
0x025000=89ab 0x025002=4567 0x025004=0123 0x025006=0000 \
0x025008=cdef 0x02500a=0000 0x02500c=0000 0x02500e=0000 \
>/dev/null 2>&1; then
die "MAME: udivmod(...) wrong mod (i64-first-arg X-spill bug)"
fi
rm -f "$cUdmFile" "$oUdmFile" "$binUdmFile"
log "check: MAME runs sqr(10) → 100 (frame-less ADJCALLSTACKUP must emit PLY)" log "check: MAME runs sqr(10) → 100 (frame-less ADJCALLSTACKUP must emit PLY)"
cSqrFile="$(mktemp --suffix=.c)" cSqrFile="$(mktemp --suffix=.c)"
oSqrFile="$(mktemp --suffix=.o)" oSqrFile="$(mktemp --suffix=.o)"
@ -2285,6 +2328,29 @@ EOF
# Linker exports the synthetic __bss_start / __bss_end / etc. # Linker exports the synthetic __bss_start / __bss_end / etc.
# symbols so crt0 can do BSS init and runtime malloc finds the # symbols so crt0 can do BSS init and runtime malloc finds the
# heap top. # heap top.
log "check: link816 --debug-out emits a DWARF sidecar (#51)"
cDbgFile="$(mktemp --suffix=.c)"
oDbgFile="$(mktemp --suffix=.o)"
binDbgFile="$(mktemp --suffix=.bin)"
dbgOutFile="$(mktemp --suffix=.dbg)"
cat > "$cDbgFile" <<'EOF'
int add(int a, int b) { return a + b; }
int main(void) { return add(3, 4); }
EOF
"$CLANG" --target=w65816 -O2 -g -ffunction-sections -c "$cDbgFile" -o "$oDbgFile"
"$PROJECT_ROOT/tools/link816" -o "$binDbgFile" --debug-out "$dbgOutFile" \
--text-base 0x1000 "$oDbgFile" "$oLibgccFile" 2>/dev/null
if ! head -1 "$dbgOutFile" | grep -q "DWARF sidecar"; then
die "link816 --debug-out: sidecar missing header"
fi
if ! grep -q "SEC \.debug_info" "$dbgOutFile"; then
die "link816 --debug-out: sidecar missing .debug_info section"
fi
if ! grep -q "SEC \.debug_line" "$dbgOutFile"; then
die "link816 --debug-out: sidecar missing .debug_line section"
fi
rm -f "$cDbgFile" "$oDbgFile" "$binDbgFile" "$dbgOutFile"
log "check: link816 emits __bss_start, __bss_end, __heap_start" log "check: link816 emits __bss_start, __bss_end, __heap_start"
cBssFile="$(mktemp --suffix=.c)" cBssFile="$(mktemp --suffix=.c)"
oBssFile="$(mktemp --suffix=.o)" oBssFile="$(mktemp --suffix=.o)"

View file

@ -708,16 +708,61 @@ static uint32_t parseInt(const std::string &s) {
static void usage(const char *argv0) { static void usage(const char *argv0) {
std::fprintf(stderr, std::fprintf(stderr,
"usage: %s -o <output> [--text-base ADDR] [--rodata-base ADDR]\n" "usage: %s -o <output> [--text-base ADDR] [--rodata-base ADDR]\n"
" [--bss-base ADDR] [--map FILE] <input.o> ...\n", " [--bss-base ADDR] [--map FILE] [--debug-out FILE]\n"
" <input.o> ...\n",
argv0); argv0);
std::exit(2); std::exit(2);
} }
// ---------------------------------------------------------------- DWARF
// Sidecar emission. Walks each input object and concatenates every
// section whose name starts with `.debug_`. Each section is prefixed
// by a small ASCII-readable header line:
//
// ; OBJ <objname> SEC <sectionname> SIZE <bytes>
//
// followed by the raw section bytes. Address-bearing sections
// (.debug_info, .debug_line, .debug_aranges, .debug_loc, etc.) are
// written WITHOUT relocation processing — addresses are object-file-
// local, not final-image-local. A consumer that wants source-mapped
// addresses needs to either (a) re-run reloc against the linked
// section bases, or (b) use the relative offsets within their object
// scope. Better than nothing for a single-TU debug session.
static void writeDebugSidecar(
const std::string &path,
const std::vector<std::unique_ptr<InputObject>> &objs) {
std::ofstream f(path, std::ios::binary);
if (!f) die("cannot open '" + path + "' for writing");
f << "; llvm816 link816 DWARF sidecar v0\n";
f << "; Object-file-local addresses; not relocated to final image.\n";
size_t total = 0;
size_t kept = 0;
for (const auto &objPtr : objs) {
const InputObject &obj = *objPtr;
for (const Section &sec : obj.sections) {
if (sec.name.rfind(".debug_", 0) != 0) continue;
if (sec.size == 0) continue;
f << "; OBJ " << obj.path << " SEC " << sec.name
<< " SIZE " << sec.size << "\n";
f.write(reinterpret_cast<const char *>(obj.raw.data()
+ sec.fileOffset),
sec.size);
f << "\n";
total += sec.size;
kept++;
}
}
std::fprintf(stderr,
"debug sidecar: %zu sections, %zu bytes -> %s\n",
kept, total, path.c_str());
}
} // anonymous namespace } // anonymous namespace
int main(int argc, char **argv) { int main(int argc, char **argv) {
std::string outPath; std::string outPath;
std::string mapPath; std::string mapPath;
std::string debugOutPath;
Linker linker; Linker linker;
int i = 1; int i = 1;
@ -738,6 +783,9 @@ int main(int argc, char **argv) {
} else if (a == "--map") { } else if (a == "--map") {
if (++i >= argc) usage(argv[0]); if (++i >= argc) usage(argv[0]);
mapPath = argv[i++]; mapPath = argv[i++];
} else if (a == "--debug-out") {
if (++i >= argc) usage(argv[0]);
debugOutPath = argv[i++];
} else if (a == "-h" || a == "--help") { } else if (a == "-h" || a == "--help") {
usage(argv[0]); usage(argv[0]);
} else if (!a.empty() && a[0] == '-') { } else if (!a.empty() && a[0] == '-') {
@ -757,6 +805,7 @@ int main(int argc, char **argv) {
f.write(reinterpret_cast<const char *>(image.data()), image.size()); f.write(reinterpret_cast<const char *>(image.data()), image.size());
if (!mapPath.empty()) linker.writeMap(mapPath); if (!mapPath.empty()) linker.writeMap(mapPath);
if (!debugOutPath.empty()) writeDebugSidecar(debugOutPath, linker.objs);
std::fprintf(stderr, std::fprintf(stderr,
"linked: text=[0x%04x+%u] rodata=[0x%04x+%u] bss=[0x%04x+%u] " "linked: text=[0x%04x+%u] rodata=[0x%04x+%u] bss=[0x%04x+%u] "

View file

@ -638,6 +638,17 @@ SDValue W65816TargetLowering::LowerFormalArguments(
bool I32FirstArg = bool I32FirstArg =
Ins.size() >= 2 && Ins[0].VT == MVT::i16 && Ins[1].VT == MVT::i16 && Ins.size() >= 2 && Ins[0].VT == MVT::i16 && Ins[1].VT == MVT::i16 &&
Ins[0].OrigArgIndex == 0 && Ins[1].OrigArgIndex == 0; Ins[0].OrigArgIndex == 0 && Ins[1].OrigArgIndex == 0;
// True iff the FIRST original arg spans 4 i16s (i.e., is i64). Used
// below to choose the Img16-via-STX_DP X-arg path for i64 callees,
// which dodges greedy's TXA-bridge-clobbers-A spill bug. i32-first
// doesn't get the same treatment because the change pessimizes
// simple functions like `int add32(int a, int b) { return a+b; }`
// where greedy's regular A:X handling is fine.
bool I64FirstArg =
Ins.size() >= 4 && Ins[0].VT == MVT::i16 && Ins[1].VT == MVT::i16 &&
Ins[2].VT == MVT::i16 && Ins[3].VT == MVT::i16 &&
Ins[0].OrigArgIndex == 0 && Ins[1].OrigArgIndex == 0 &&
Ins[2].OrigArgIndex == 0 && Ins[3].OrigArgIndex == 0;
unsigned ArgIdx = 0; unsigned ArgIdx = 0;
// Stack offset is measured from S+1 (the WDC convention) and grows // Stack offset is measured from S+1 (the WDC convention) and grows
@ -655,8 +666,16 @@ SDValue W65816TargetLowering::LowerFormalArguments(
MRI.addLiveIn(W65816::A, VReg); MRI.addLiveIn(W65816::A, VReg);
InVals.push_back(DAG.getCopyFromReg(Chain, DL, VReg, VT)); InVals.push_back(DAG.getCopyFromReg(Chain, DL, VReg, VT));
} else if (ArgIdx == 1 && I32FirstArg) { } else if (ArgIdx == 1 && I32FirstArg) {
// i32 first-arg hi half: in X. // First-arg hi half (or arg0_ml for i64-first-arg): in X.
Register VReg = MRI.createVirtualRegister(&W65816::Idx16RegClass); // For i64-first-arg signatures (4 i16s with OrigArgIndex 0), use
// Img16 so greedy parks the value in an IMG slot via STX_DP,
// dodging the TXA-bridge-clobbers-A spill bug. i32-first stays
// on the original Idx16 path because the change pessimizes
// simple cases (verified: vprintf's writeULong/__udivsi3 chain
// crashes if i32-first is also rerouted). Caught by udivmod.
const TargetRegisterClass *RC = I64FirstArg ? &W65816::Img16RegClass
: &W65816::Idx16RegClass;
Register VReg = MRI.createVirtualRegister(RC);
MRI.addLiveIn(W65816::X, VReg); MRI.addLiveIn(W65816::X, VReg);
InVals.push_back(DAG.getCopyFromReg(Chain, DL, VReg, MVT::i16)); InVals.push_back(DAG.getCopyFromReg(Chain, DL, VReg, MVT::i16));
} else { } else {

View file

@ -92,10 +92,10 @@ void W65816InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(W65816::STA_DP)).addImm(dstImg); BuildMI(MBB, I, DL, get(W65816::STA_DP)).addImm(dstImg);
return; return;
} }
// X → IMGn / IMGn → X: STX dp / LDX dp. Avoids the A-bridge that // X → IMGn / IMGn → X: STX dp / LDX dp. Used by the i64-first-arg
// TAX/TXA would impose; critical for i32-first-arg signatures // entry COPY (LowerFormalArguments routes arg0_ml through Img16 to
// (live-in $a + $x) where bridging X via A clobbers $a's value // dodge the TXA-bridge-clobbers-A spill bug for udivmod-shaped
// before it can be saved. Caught by udivmod and iterative qsort. // signatures).
if (dstImg >= 0 && SrcReg == W65816::X) { if (dstImg >= 0 && SrcReg == W65816::X) {
BuildMI(MBB, I, DL, get(W65816::STX_DP)).addImm(dstImg); BuildMI(MBB, I, DL, get(W65816::STX_DP)).addImm(dstImg);
return; return;
@ -104,15 +104,6 @@ void W65816InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
BuildMI(MBB, I, DL, get(W65816::LDX_DP)).addImm(srcImg); BuildMI(MBB, I, DL, get(W65816::LDX_DP)).addImm(srcImg);
return; return;
} }
// Y → IMGn / IMGn → Y: STY dp / LDY dp — symmetric.
if (dstImg >= 0 && SrcReg == W65816::Y) {
BuildMI(MBB, I, DL, get(W65816::STY_DP)).addImm(dstImg);
return;
}
if (DestReg == W65816::Y && srcImg >= 0) {
BuildMI(MBB, I, DL, get(W65816::LDY_DP)).addImm(srcImg);
return;
}
// DPF0 → A: emit `LDA $F0`. DPF0 is the pseudo-physreg carrier // DPF0 → A: emit `LDA $F0`. DPF0 is the pseudo-physreg carrier
// for an i64-returning call's high 16 bits; LowerCall builds a // for an i64-returning call's high 16 bits; LowerCall builds a
// CopyFromReg(DPF0) glued to the call so the SDAG combiner / // CopyFromReg(DPF0) glued to the call so the SDAG combiner /