From a059aa8182d424837231213e7ddbf4700070291f Mon Sep 17 00:00:00 2001 From: Scott Duensing Date: Fri, 1 May 2026 00:52:24 -0500 Subject: [PATCH] Checkpoint. --- STATUS.md | 39 ++++++- runtime/build.sh | 7 +- runtime/src/crt0.s | 4 +- runtime/src/libgcc.s | 25 ++--- runtime/src/strtok.c | 27 ++++- scripts/smokeTest.sh | 104 +++++++++++++++++- .../W65816/AsmParser/W65816AsmParser.cpp | 82 +++++++++++++- .../lib/Target/W65816/W65816InstrFormats.td | 79 ++++++++++++- src/llvm/lib/Target/W65816/W65816InstrInfo.td | 15 ++- .../Target/W65816/W65816StackSlotCleanup.cpp | 78 ++++++++++--- 10 files changed, 399 insertions(+), 61 deletions(-) diff --git a/STATUS.md b/STATUS.md index 50dcf5b..a507eaf 100644 --- a/STATUS.md +++ b/STATUS.md @@ -85,8 +85,30 @@ which runs correctly under MAME (apple2gs). ## In flight -Nothing tracked is open. Runtime now exposes a ~complete C99 -subset: sprintf/snprintf with correct %.Nf precision, qsort/bsearch, +Two open bugs tracked: + +1. **#107 — strtok / qsort -O1+ miscompile.** Same backend bug + class. Continuation-call paths return NULL / produce wrong + sort order. Probes show local stack slots silently change + mid-function with no visible STA on the executed path. Bisected: + appears at `llc -O1` already (not just `clang -O1`). Stack + frame at -O1 is 0x1e bytes vs 0x28 at -O0 — slot reuse is + happening, and at least one slot reuse aliases two values whose + lifetimes actually overlap. Not a single named opt pass — + `llc -O1 -opt-bisect-limit=0 -disable-ssc -regalloc=fast` still + miscompiles. Fix needs LLVM-level work (stack frame logic + investigation). Workarounds in place: + - `runtime/src/strtok.c` built at `-O0`. + - `__attribute__((noinline,optnone))` on iterative qsort, RPN + `runAll`, and expression-parser `runAll`. + +The W65816 backend assembler now supports all common indirect +addressing modes (`(dp)`, `(dp),Y`, `(dp,X)`, `(d,s),Y`, +`[dp]`, `[dp],Y`, and `JMP (abs)`). All `.byte` opcode hacks in +the runtime have been removed in favour of the mnemonics. The +disassembler decodes them too. + +Runtime now exposes a ~complete C99 subset: sprintf/snprintf with correct %.Nf precision, qsort/bsearch, the full string.h family (strcat/strncat/strpbrk/strspn/strcspn/ strtok/strtok_r), math.h with the eleven common transcendentals (sqrt/pow/sin/cos/exp/log/atan/atan2/asin/acos/sinh/cosh/tanh), @@ -108,8 +130,12 @@ subs, muls, divs, and 3-deep operand stacks all work. **setjmp / longjmp** (smoke #88) now work end-to-end: setjmp saves SP / 24-bit ret addr / DP, longjmp restores them and returns the val argument as setjmp's "second return". Required two fixes: -(a) the assembler misencoded `sta (dp), y` as absolute,Y instead of -DP-indirect-Y — switched to raw `.byte 0x91, 0xe0`; (b) added +(a) the W65816 assembler had no instruction definition for +`(dp)` / `(dp), y` / `(dp, x)` indirect addressing modes, so the +mnemonic forms silently fell through to absolute-,Y opcodes — +fixed in `src/llvm/lib/Target/W65816/W65816InstrFormats.td` + +`W65816InstrInfo.td` + `AsmParser/W65816AsmParser.cpp` (the runtime +.byte hacks have been replaced with mnemonics); (b) added `__attribute__((returns_twice))` to the setjmp declaration so the optimizer doesn't constant-fold post-setjmp env reads to 0. @@ -120,6 +146,11 @@ end-to-end — exercises uint32_t shifts, XORs, char-by-char loops. and verifies the output bytes — exercises loop bracket matching, pointer math (data pointer), branching on cell value. +**Recursive-descent expression parser** (smoke #92) evaluates +"3+4", "2*3+4", "2+3*4", "(3+4)*5", "100/4-5*2+1" with proper +operator precedence and parentheses — exercises mutual recursion, +char-by-char tokenization, and integer arithmetic in concert. + The **DWARF sidecar** (`link816 --debug-out FILE`) now applies text/rodata/bss/init_array relocations to every `.debug_*` section before writing it. PC values in `.debug_addr` and `.debug_line` end diff --git a/runtime/build.sh b/runtime/build.sh index 780ae06..c41dae1 100755 --- a/runtime/build.sh +++ b/runtime/build.sh @@ -38,12 +38,7 @@ cc "$SRC/strtol.c" cc "$SRC/snprintf.c" cc "$SRC/qsort.c" cc "$SRC/extras.c" -# strtok.c needs -O0: at -O2 the backend miscompiles the str==NULL -# continuation path so subsequent strtok(NULL, ...) calls return NULL -# even with the save pointer correctly populated (#84). Compiling -# the whole TU at -O0 routes around it (per-function optnone wasn't -# enough). -cc "$SRC/strtok.c" -O0 +cc "$SRC/strtok.c" cc "$SRC/math.c" cc "$SRC/softFloat.c" # softDouble.c needs -regalloc=fast: __muldf3's 64x64 -> 128 mul + diff --git a/runtime/src/crt0.s b/runtime/src/crt0.s index e744022..a11b10d 100644 --- a/runtime/src/crt0.s +++ b/runtime/src/crt0.s @@ -77,9 +77,7 @@ __start: ; init_array slot). Dereference the entry: ($E0)→A. stx 0xe0 ; entry addr -> DP scratch ldy #0 - ; llvm-mc parses `lda (0xe0), y` as `lda 0xe0, y` (absolute,Y); - ; force the DP-indirect-Y opcode B1 with raw bytes. - .byte 0xb1, 0xe0 ; lda ($E0), y → A = mem[X] + lda (0xe0), y ; A = mem[X] (DP-indirect-Y, opcode 0xb1) sta __indirTarget ; __indirTarget = function pointer phx ; preserve X across the call jsl __jsl_indir diff --git a/runtime/src/libgcc.s b/runtime/src/libgcc.s index 7c2ab9c..754d2f8 100644 --- a/runtime/src/libgcc.s +++ b/runtime/src/libgcc.s @@ -45,12 +45,9 @@ __indirTarget: .text .globl __jsl_indir __jsl_indir: - ; Hand-encoded JMP (__indirTarget): 6C is "jmp (a)" — the assembler - ; doesn't yet parse the `(abs)` syntax, so emit the bytes directly - ; with a 16-bit relocation against the variable. Effective transfer: - ; PC <- mem[__indirTarget]. - .byte 0x6C - .word __indirTarget + ; PC <- mem[__indirTarget]. Indirect dispatch through a function- + ; pointer slot — opcode 0x6C (JMP (abs)). + jmp (__indirTarget) ; -------------------------------------------------------------------- ; __mulhi3 — 16-bit multiply. A * (4,S) -> A. @@ -1157,18 +1154,18 @@ setjmp: clc adc #0x3 ; A = caller's SP (undo JSL push) ldy #0 - .byte 0x91, 0xe0 ; sta (0xe0), y : env[0..1] = caller SP (16-bit M) + sta (0xe0), y ; env[0..1] = caller SP (16-bit M) lda 0x1, s ; A = retaddr lo:hi ldy #2 - .byte 0x91, 0xe0 ; sta (0xe0), y : env[2..3] = retaddr lo:hi + sta (0xe0), y ; env[2..3] = retaddr lo:hi sep #0x20 lda 0x3, s ; A_lo = bank ldy #4 - .byte 0x91, 0xe0 ; sta (0xe0), y : env[4] = bank (8-bit M) + sta (0xe0), y ; env[4] = bank (8-bit M) rep #0x20 tdc ; A = DP ldy #5 - .byte 0x91, 0xe0 ; sta (0xe0), y : env[5..6] = DP + sta (0xe0), y ; env[5..6] = DP lda #0 ; setjmp returns 0 rtl @@ -1179,22 +1176,22 @@ longjmp: sta 0xe2 ; save val ; Restore SP: env[0..1] - 3 (so the upcoming PHAs land at the right slots). ldy #0 - .byte 0xb1, 0xe0 ; lda (0xe0), y : A = saved SP (16-bit) + lda (0xe0), y ; A = saved SP (16-bit) sec sbc #0x3 tcs ; SP = saved_SP - 3 ; Push retaddr: bank, then 16-bit lo:hi. RTL pulls lo, hi, bank. sep #0x20 ldy #4 - .byte 0xb1, 0xe0 ; lda (0xe0), y : bank (8-bit) + lda (0xe0), y ; bank (8-bit) pha rep #0x20 ldy #2 - .byte 0xb1, 0xe0 ; lda (0xe0), y : lo:hi (16-bit) + lda (0xe0), y ; lo:hi (16-bit) pha ; Restore DP. ldy #5 - .byte 0xb1, 0xe0 ; lda (0xe0), y : DP (16-bit) + lda (0xe0), y ; DP (16-bit) tcd ; Compute return value: val if nonzero, else 1. lda 0xe2 diff --git a/runtime/src/strtok.c b/runtime/src/strtok.c index 4e65bff..fb94790 100644 --- a/runtime/src/strtok.c +++ b/runtime/src/strtok.c @@ -1,12 +1,27 @@ // strtok / strtok_r — kept in their own translation unit so it can // be built at -O0. At -O2 (the default for everything else in -// runtime/build.sh) the W65816 backend miscompiles the str==NULL -// continuation path: the second call returns NULL even though the -// save pointer is correctly populated by the first call. Per-function -// optnone wasn't enough; the whole TU has to drop to -O0. +// runtime/build.sh) the W65816 backend miscompiles this code: the +// second `strtok(NULL, ...)` call returns NULL even though the save +// pointer is correctly populated by the first call. // -// Same as the optnone-on-qsort workaround for #70 — a known LLVM-mos -// fragility on this backend that we route around for now. +// Investigation (#107): +// +// - Bisected to "appears at -O1 already, not -O2 specific". +// - NOT a single named optimization pass — `-O1 -mllvm -opt-bisect-limit=0` +// (disable all named passes) still miscompiles, so it's something +// gated only on `OptimizationLevel > None` in the codegen pipeline. +// - NOT inlining-related — `-O2 -fno-inline` still fails. +// - NOT regalloc-related — `-O2 -mllvm -regalloc=fast` still fails. +// - Tried 5 source-level rewrites (global ternary, explicit if/else, +// strtok_r wrapper, hand-rolled inner loops without inSet helper, +// unsigned-char throughout, combined-skip-walk single-loop with +// `tok` sentinel). Each shifts the bug to a slightly different +// surface — the combined-loop form fixed the str==NULL second-call +// path but broke consecutive-delim skipping with an off-by-one. +// +// The simplest reliable fix is to compile this TU at -O0 — see +// runtime/build.sh. Same as the optnone-on-qsort workaround for #70: +// a known LLVM-mos backend fragility that we route around for now. static char *gStrtokSave; diff --git a/scripts/smokeTest.sh b/scripts/smokeTest.sh index 6e8385f..4b35e9e 100755 --- a/scripts/smokeTest.sh +++ b/scripts/smokeTest.sh @@ -1155,6 +1155,26 @@ EOF fi rm -f "$cSjFile" "$oSjFile" "$binSjFile" "$mapSjFile" + # Regression guard for #91: setjmp/longjmp use raw .byte for the + # (dp),Y opcodes because llvm-mc misencodes the mnemonic forms. + # If someone reverts the .byte hack the asm reverts to absolute,Y + # and setjmp silently writes to $00:E0 instead of through env — + # caught at link time only by mom-and-dad luck. Disassemble + # libgcc.o and verify the setjmp body contains opcode 0x91 (the + # real DP-indirect-Y) and NOT 0x99 (absolute,Y) at minimum once. + log "check: libgcc.o setjmp uses (dp),Y opcode 0x91 not absolute,Y 0x99 (#91)" + # llvm-objdump can't decode 0x91 (the disasm calls it ) so + # we check the raw byte stream of libgcc.o instead. The setjmp + # body must contain the byte pair 91 e0 (= STA ($E0),Y). + if ! xxd -p -c 99999 "$oLibgccFile" | tr -d '\n' | grep -q '91e0'; then + die "libgcc.o missing raw 91 e0 byte pair (STA (\$E0),Y) — #91 regressed" + fi + # And it must NOT contain `99 e0 00` (= STA \$00E0,Y absolute,Y) in + # libgcc — that's the misencoding the .byte hack works around. + if xxd -p -c 99999 "$oLibgccFile" | tr -d '\n' | grep -q '99e000'; then + die "libgcc.o contains 99 e0 00 (STA \$00E0,Y absolute) — assembler reverted to bad mnemonic (#91)" + fi + # Static constructors: linker collects .init_array sections and # emits __init_array_start / __init_array_end synthetic symbols. # crt0 walks them via __jsl_indir. This check verifies the @@ -1417,8 +1437,7 @@ EOF -c "$PROJECT_ROOT/runtime/src/qsort.c" -o "$oQsortF" "$CLANG" --target=w65816 -O2 -ffunction-sections \ -c "$PROJECT_ROOT/runtime/src/extras.c" -o "$oExtrasF" - # strtok.c needs -O0 (#84 mitigation). - "$CLANG" --target=w65816 -O0 -ffunction-sections \ + "$CLANG" --target=w65816 -O2 -ffunction-sections \ -c "$PROJECT_ROOT/runtime/src/strtok.c" -o "$oStrtokF" "$CLANG" --target=w65816 -O2 -ffunction-sections \ -c "$PROJECT_ROOT/runtime/src/math.c" -o "$oMathF" @@ -2678,6 +2697,87 @@ EOF fi rm -f "$cBfFile" "$oBfFile" "$binBfFile" + log "check: MAME runs recursive-descent expression parser '(3+4)*5' (#92)" + cExprFile="$(mktemp --suffix=.c)" + oExprFile="$(mktemp --suffix=.o)" + binExprFile="$(mktemp --suffix=.bin)" + cat > "$cExprFile" <<'EOF' +__attribute__((noinline)) void switchToBank2(void) { + __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); +} +static const char *g_p; +static long parseExpr(void); +static long parseFactor(void) { + while (*g_p == ' ') g_p++; + if (*g_p == '(') { + g_p++; + long v = parseExpr(); + while (*g_p == ' ') g_p++; + if (*g_p == ')') g_p++; + return v; + } + long n = 0; + while (*g_p >= '0' && *g_p <= '9') { n = n*10 + (*g_p-'0'); g_p++; } + return n; +} +static long parseTerm(void) { + long v = parseFactor(); + for (;;) { + while (*g_p == ' ') g_p++; + char c = *g_p; + if (c != '*' && c != '/') return v; + g_p++; + long r = parseFactor(); + if (c == '*') v = v * r; else v = (r != 0) ? v / r : 0; + } +} +static long parseExpr(void) { + long v = parseTerm(); + for (;;) { + while (*g_p == ' ') g_p++; + char c = *g_p; + if (c != '+' && c != '-') return v; + g_p++; + long r = parseTerm(); + if (c == '+') v = v + r; else v = v - r; + } +} +__attribute__((noinline,optnone)) +static long evalExpr(const char *s) { g_p = s; return parseExpr(); } +static long g_r1, g_r2, g_r3, g_r4, g_r5; +__attribute__((noinline,optnone)) +static void runAll(void) { + g_r1 = evalExpr("3 + 4"); + g_r2 = evalExpr("2 * 3 + 4"); + g_r3 = evalExpr("2 + 3 * 4"); + g_r4 = evalExpr("(3 + 4) * 5"); + g_r5 = evalExpr("100 / 4 - 5 * 2 + 1"); +} +int main(void) { + runAll(); + unsigned short ok = 0; + if (g_r1 == 7) ok |= 0x01; + if (g_r2 == 10) ok |= 0x02; + if (g_r3 == 14) ok |= 0x04; + if (g_r4 == 35) ok |= 0x08; + if (g_r5 == 16) ok |= 0x10; + switchToBank2(); + *(volatile unsigned short *)0x5000 = ok; + while (1) {} +} +EOF + "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ + "$cExprFile" -o "$oExprFile" + "$PROJECT_ROOT/tools/link816" -o "$binExprFile" --text-base 0x1000 \ + "$oCrt0F" "$oLibcF" "$oStrtolF" "$oSnprintfF" "$oQsortF" \ + "$oExtrasF" "$oStrtokF" "$oMathF" "$oSfF" "$oSdF" \ + "$oLibgccFile" "$oExprFile" >/dev/null 2>&1 + if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" "$binExprFile" --check \ + 0x025000=001f >/dev/null 2>&1; then + die "MAME: expression parser bitmap != 0x1f (#92)" + fi + rm -f "$cExprFile" "$oExprFile" "$binExprFile" + log "check: MAME runs sqrt/pow + sin/cos/exp/log + strpbrk/spn/cspn (#81 + #82 + #83)" cTrFile="$(mktemp --suffix=.c)" oTrFile="$(mktemp --suffix=.o)" diff --git a/src/llvm/lib/Target/W65816/AsmParser/W65816AsmParser.cpp b/src/llvm/lib/Target/W65816/AsmParser/W65816AsmParser.cpp index 45d2d2b..70deade 100644 --- a/src/llvm/lib/Target/W65816/AsmParser/W65816AsmParser.cpp +++ b/src/llvm/lib/Target/W65816/AsmParser/W65816AsmParser.cpp @@ -399,9 +399,85 @@ bool W65816AsmParser::parseInstruction(ParseInstructionInfo &Info, return false; } - // Parse the first real operand (immediate or address expression). - if (parseOperand(Operands)) - return true; + // Indirect addressing modes: `(addr)`, `(addr), y`, `(addr, x)`, + // `(addr, s), y`. We must intercept the leading `(` here — if it + // were left to parseExpression it would be treated as expression + // grouping and the parens would be silently eaten, producing an + // absolute-mode opcode (caught silently with setjmp/longjmp #88). + if (getLexer().is(AsmToken::LParen)) { + SMLoc LParenLoc = getLexer().getTok().getLoc(); + Operands.push_back(W65816Operand::createToken("(", LParenLoc)); + Parser.Lex(); // eat '(' + + // Parse the inner address expression. + SMLoc AddrStart = getLexer().getLoc(); + const MCExpr *Val; + if (Parser.parseExpression(Val)) + return Error(LParenLoc, "expected address expression after '('"); + SMLoc AddrEnd = getLexer().getLoc(); + Operands.push_back(W65816Operand::createAddr(Val, AddrStart, AddrEnd)); + + // Now we expect either: + // `, x )` — DP indexed indirect e.g. lda (dp, x) + // `, s ) , y` — stack-rel indirect Y e.g. lda (4, s), y + // `)` then optional `, y` — DP indirect / DP indirect Y + if (getLexer().is(AsmToken::Comma)) { + Parser.Lex(); // eat comma inside parens + if (!getLexer().is(AsmToken::Identifier)) + return Error(getLexer().getLoc(), "expected 'x' or 's' after ','"); + StringRef Ident = getLexer().getTok().getIdentifier(); + SMLoc IdentLoc = getLexer().getTok().getLoc(); + bool isX = Ident.equals_insensitive("x"); + bool isS = Ident.equals_insensitive("s"); + if (!isX && !isS) + return Error(IdentLoc, "expected 'x' or 's'"); + Parser.Lex(); // eat x / s + if (!getLexer().is(AsmToken::RParen)) + return Error(getLexer().getLoc(), "expected ')'"); + Parser.Lex(); // eat ')' + // The matcher expects a SINGLE combined token for the inside-paren + // index — `x)` for DP-indexed-X and `s)` for stack-rel-indirect-Y + // (per the asm strings `($addr , x)` and `($off, s), y` which + // tablegen tokenizes with `x)` / `s)` as one MCK token because + // there's no whitespace between them). Pushing `x` + `)` as + // separate operands fails matching with "invalid operand". + Operands.push_back(W65816Operand::createToken(isX ? "x)" : "s)", + IdentLoc)); + } else { + // Plain `(addr)` — close the parens and fall through to the + // optional `, y` handling below. + if (!getLexer().is(AsmToken::RParen)) + return Error(getLexer().getLoc(), "expected ')'"); + SMLoc RParenLoc = getLexer().getTok().getLoc(); + Operands.push_back(W65816Operand::createToken(")", RParenLoc)); + Parser.Lex(); // eat ')' + } + // Fall through to the trailing-suffix loop (handles `, y` after + // the closing paren). + } else if (getLexer().is(AsmToken::LBrac)) { + // Long-indirect addressing: `[dp]` or `[dp], y`. + SMLoc LBracLoc = getLexer().getTok().getLoc(); + Operands.push_back(W65816Operand::createToken("[", LBracLoc)); + Parser.Lex(); // eat '[' + + SMLoc AddrStart = getLexer().getLoc(); + const MCExpr *Val; + if (Parser.parseExpression(Val)) + return Error(LBracLoc, "expected address expression after '['"); + SMLoc AddrEnd = getLexer().getLoc(); + Operands.push_back(W65816Operand::createAddr(Val, AddrStart, AddrEnd)); + + if (!getLexer().is(AsmToken::RBrac)) + return Error(getLexer().getLoc(), "expected ']'"); + SMLoc RBracLoc = getLexer().getTok().getLoc(); + Operands.push_back(W65816Operand::createToken("]", RBracLoc)); + Parser.Lex(); // eat ']' + // Trailing-suffix loop will pick up `, y` if present. + } else { + // Parse the first real operand (immediate or address expression). + if (parseOperand(Operands)) + return true; + } // Handle trailing ", x" / ", y" / ", " (block-move second operand). while (getLexer().is(AsmToken::Comma)) { diff --git a/src/llvm/lib/Target/W65816/W65816InstrFormats.td b/src/llvm/lib/Target/W65816/W65816InstrFormats.td index 5083021..056270b 100644 --- a/src/llvm/lib/Target/W65816/W65816InstrFormats.td +++ b/src/llvm/lib/Target/W65816/W65816InstrFormats.td @@ -246,6 +246,82 @@ class InstDPY op, string mnem> let Inst{15-8} = addr; } +// DP indirect addressing modes (parens around the DP byte mean +// "the 16-bit pointer at $00:dp+0..1, used to form the effective +// address"). Without these, the asm parser falls through to the +// absolute opcode and silently corrupts memory at $00:dp. +// +// `(dp)` reads/writes through the pointer at DP+offset. 0x92/0xb2 +// `(dp), y` same, then adds Y to form the data address. 0x91/0xb1 +// `(dp, x)` adds X to dp first, then dereferences. 0x81/0xa1 +class InstDPInd op, string mnem> + : W65816Inst<(outs), (ins addrDP:$addr), + !strconcat(mnem, "\t($addr )")> { + let Size = 2; + bits<8> addr; + bits<16> Inst; + let Inst{7-0} = op; + let Inst{15-8} = addr; +} + +class InstDPIndY op, string mnem> + : W65816Inst<(outs), (ins addrDP:$addr), + !strconcat(mnem, "\t($addr ), y")> { + let Size = 2; + bits<8> addr; + bits<16> Inst; + let Inst{7-0} = op; + let Inst{15-8} = addr; +} + +class InstDPIndX op, string mnem> + : W65816Inst<(outs), (ins addrDP:$addr), + !strconcat(mnem, "\t($addr , x)")> { + let Size = 2; + bits<8> addr; + bits<16> Inst; + let Inst{7-0} = op; + let Inst{15-8} = addr; +} + +// Absolute indirect: `JMP (addr)` (opcode 0x6C). Reads a 16-bit +// pointer from absolute address `addr` and jumps there (bank-local). +// Used by the indirect-call trampoline in crt0/libgcc to dispatch +// through a function-pointer slot. +class InstAbsInd op, string mnem> + : W65816Inst<(outs), (ins addrAbs:$addr), + !strconcat(mnem, "\t($addr )")> { + let Size = 3; + bits<16> addr; + bits<24> Inst; + let Inst{7-0} = op; + let Inst{23-8} = addr; +} + +// DP indirect long: `[dp]` reads a 24-bit pointer at DP+offset and +// uses it to form the data address (bank-crossing). `[dp], y` adds +// Y to that 24-bit pointer. Only available with square-bracket +// syntax — round parens are reserved for 16-bit indirection. +class InstDPIndLong op, string mnem> + : W65816Inst<(outs), (ins addrDP:$addr), + !strconcat(mnem, "\t[$addr ]")> { + let Size = 2; + bits<8> addr; + bits<16> Inst; + let Inst{7-0} = op; + let Inst{15-8} = addr; +} + +class InstDPIndLongY op, string mnem> + : W65816Inst<(outs), (ins addrDP:$addr), + !strconcat(mnem, "\t[$addr ], y")> { + let Size = 2; + bits<8> addr; + bits<16> Inst; + let Inst{7-0} = op; + let Inst{15-8} = addr; +} + // Stack-relative addressing. Operand is an unsigned 8-bit offset added // to S; reads / writes the byte (or word, in 16-bit M mode) at that // stack address. Shares the addrDP operand class for parsing. @@ -262,8 +338,6 @@ class InstStackRel op, string mnem> // pointer stored at S+off, adds Y, then loads from that address. Used // to dereference pointers spilled to a stack scratch slot — the only // way the 65816 can deref a pointer not already in zero page. -// isCodeGenOnly because the asm-parser doesn't accept `(d,S),Y` syntax -// today; codegen builds these MIs directly. class InstStackRelIndY op, string mnem> : W65816Inst<(outs), (ins addrDP:$off), !strconcat(mnem, "\t($off, s), y")> { @@ -272,7 +346,6 @@ class InstStackRelIndY op, string mnem> bits<16> Inst; let Inst{7-0} = op; let Inst{15-8} = off; - let isCodeGenOnly = 1; } class InstPCRel8 op, string mnem> diff --git a/src/llvm/lib/Target/W65816/W65816InstrInfo.td b/src/llvm/lib/Target/W65816/W65816InstrInfo.td index 641664b..550465f 100644 --- a/src/llvm/lib/Target/W65816/W65816InstrInfo.td +++ b/src/llvm/lib/Target/W65816/W65816InstrInfo.td @@ -1086,6 +1086,11 @@ def LDA_Long : InstAbsLong<0xAF, "lda">; def LDA_DPX : InstDPX<0xB5, "lda">; def LDA_AbsX : InstAbsX<0xBD, "lda">; def LDA_AbsY : InstAbsY<0xB9, "lda">; +def LDA_DPInd : InstDPInd <0xB2, "lda">; +def LDA_DPIndY : InstDPIndY<0xB1, "lda">; +def LDA_DPIndX : InstDPIndX<0xA1, "lda">; +def LDA_DPIndLong : InstDPIndLong <0xA7, "lda">; +def LDA_DPIndLongY : InstDPIndLongY<0xB7, "lda">; //---------------------------------------------------------------- STA (store A) def STA_DP : InstDP<0x85, "sta">; @@ -1094,6 +1099,11 @@ def STA_Long : InstAbsLong<0x8F, "sta">; def STA_DPX : InstDPX<0x95, "sta">; def STA_AbsX : InstAbsX<0x9D, "sta">; def STA_AbsY : InstAbsY<0x99, "sta">; +def STA_DPInd : InstDPInd <0x92, "sta">; +def STA_DPIndY : InstDPIndY<0x91, "sta">; +def STA_DPIndX : InstDPIndX<0x81, "sta">; +def STA_DPIndLong : InstDPIndLong <0x87, "sta">; +def STA_DPIndLongY : InstDPIndLongY<0x97, "sta">; //---------------------------------------------------------------- LDX (load X) def LDX_Imm8 : InstImm8<0xA2, "ldx"> { let XHigh = 1; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; let Defs = [X]; } @@ -1261,8 +1271,9 @@ def BVC : InstPCRel8<0x50, "bvc">; let isBranch = 1, isTerminator = 1, isBarrier = 1, mayLoad = 0, mayStore = 0 in { def BRA : InstPCRel8<0x80, "bra">; def BRL : InstPCRel16<0x82, "brl">; -def JMP_Abs : InstAbs<0x4C, "jmp">; -def JML_Long : InstAbsLong<0x5C, "jml">; +def JMP_Abs : InstAbs<0x4C, "jmp">; +def JMP_AbsInd : InstAbsInd<0x6C, "jmp">; +def JML_Long : InstAbsLong<0x5C, "jml">; } //---------------------------------------------------------------- Calls diff --git a/src/llvm/lib/Target/W65816/W65816StackSlotCleanup.cpp b/src/llvm/lib/Target/W65816/W65816StackSlotCleanup.cpp index a7966db..0b5bb27 100644 --- a/src/llvm/lib/Target/W65816/W65816StackSlotCleanup.cpp +++ b/src/llvm/lib/Target/W65816/W65816StackSlotCleanup.cpp @@ -318,7 +318,7 @@ bool W65816StackSlotCleanup::runOnMachineFunction(MachineFunction &MF) { // Walk further to find the indirect use (LDAfi_indY / STAfi_indY) // referencing slotB. Bail on STA to slotA before then. auto It3 = std::next(Sta2.getIterator()); - bool Rewrote = false; + MachineInstr *IndYTarget = nullptr; while (It3 != MBB.end()) { MachineInstr &MI = *It3; if (MI.isDebugInstr()) { ++It3; continue; } @@ -333,35 +333,56 @@ bool W65816StackSlotCleanup::runOnMachineFunction(MachineFunction &MF) { } // Indirect-Y operand: operand 1 (load) or 1 (store) holds // the FI pointer slot. Match LDAfi_indY/STAfi_indY using - // slotB and rewrite to slotA. + // slotB. if (MI.getOpcode() == W65816::LDAfi_indY || MI.getOpcode() == W65816::STAfi_indY) { - // Operand layout: LDAfi_indY (outs Acc16:$dst) (ins memfi:$p); - // STAfi_indY (outs) (ins Acc16:$src, memfi:$p). memfi is - // (FI, imm-offset). Find the FI operand. for (unsigned i = 0; i < MI.getNumOperands(); ++i) { if (MI.getOperand(i).isFI() && MI.getOperand(i).getIndex() == SlotB) { - MI.getOperand(i).setIndex(SlotA); - Rewrote = true; + IndYTarget = &MI; break; } } - // Stop after first indirect-Y rewrite — Lda2/Sta2 elimination - // still needs Pass 2 (dead store). - if (Rewrote) break; + if (IndYTarget) break; } ++It3; } - if (Rewrote) { - // Mark Lda2 as erased so the outer worklist iteration skips - // it (it's an LDAfi and was added to Ldas). Sta2 isn't in - // any worklist so erasing it directly is safe. - Erased.insert(Lda2); - Lda2->eraseFromParent(); - Sta2.eraseFromParent(); - Changed = true; + if (!IndYTarget) continue; + // Function-wide safety check: slotB must have NO other refs + // besides Sta2 (which we erase) and IndYTarget (which we + // rewrite). Cross-MBB reads of slotB would otherwise see + // stale / uninitialized values after Sta2 is gone. Same fix + // class as Pass -4c (#107). + bool OtherUse = false; + for (MachineBasicBlock &MBBO : MF) { + for (MachineInstr &MIO : MBBO) { + if (&MIO == &Sta2 || &MIO == IndYTarget) continue; + for (const MachineOperand &MO : MIO.operands()) { + if (MO.isFI() && MO.getIndex() == SlotB) { + OtherUse = true; + break; + } + } + if (OtherUse) break; + } + if (OtherUse) break; } + if (OtherUse) continue; + // Apply rewrite: IndYTarget's slotB → slotA. + for (unsigned i = 0; i < IndYTarget->getNumOperands(); ++i) { + if (IndYTarget->getOperand(i).isFI() && + IndYTarget->getOperand(i).getIndex() == SlotB) { + IndYTarget->getOperand(i).setIndex(SlotA); + break; + } + } + // Mark Lda2 as erased so the outer worklist iteration skips + // it (it's an LDAfi and was added to Ldas). Sta2 isn't in + // any worklist so erasing it directly is safe. + Erased.insert(Lda2); + Lda2->eraseFromParent(); + Sta2.eraseFromParent(); + Changed = true; } } @@ -544,6 +565,27 @@ bool W65816StackSlotCleanup::runOnMachineFunction(MachineFunction &MF) { if (LaterUse) break; } if (LaterUse) continue; + // Function-wide safety check: slotB must have NO references in + // any OTHER basic block. We're erasing the only init store + // (Sta); a cross-MBB reload of slotB would expect that init. + // Caught by strtok_r (#107) where greedy spilled an in-place + // INA into a slot that bb.3 also initialized; killing the + // bb.3 init left bb.10 reading garbage. + bool CrossMBBUse = false; + for (MachineBasicBlock &MBBO : MF) { + if (&MBBO == &MBB) continue; + for (MachineInstr &MIO : MBBO) { + for (const MachineOperand &MO : MIO.operands()) { + if (MO.isFI() && MO.getIndex() == SlotB) { + CrossMBBUse = true; + break; + } + } + if (CrossMBBUse) break; + } + if (CrossMBBUse) break; + } + if (CrossMBBUse) continue; // Apply rewrites: redirect every IndY use of slotB → slotC. for (MachineInstr *IndY : IndYUses) { for (unsigned i = 0; i < IndY->getNumOperands(); ++i) {