Checkpoint.

This commit is contained in:
Scott Duensing 2026-05-01 00:52:24 -05:00
parent 77202fb7f7
commit a059aa8182
10 changed files with 399 additions and 61 deletions

View file

@ -85,8 +85,30 @@ which runs correctly under MAME (apple2gs).
## In flight ## In flight
Nothing tracked is open. Runtime now exposes a ~complete C99 Two open bugs tracked:
subset: sprintf/snprintf with correct %.Nf precision, qsort/bsearch,
1. **#107 — strtok / qsort -O1+ miscompile.** Same backend bug
class. Continuation-call paths return NULL / produce wrong
sort order. Probes show local stack slots silently change
mid-function with no visible STA on the executed path. Bisected:
appears at `llc -O1` already (not just `clang -O1`). Stack
frame at -O1 is 0x1e bytes vs 0x28 at -O0 — slot reuse is
happening, and at least one slot reuse aliases two values whose
lifetimes actually overlap. Not a single named opt pass —
`llc -O1 -opt-bisect-limit=0 -disable-ssc -regalloc=fast` still
miscompiles. Fix needs LLVM-level work (stack frame logic
investigation). Workarounds in place:
- `runtime/src/strtok.c` built at `-O0`.
- `__attribute__((noinline,optnone))` on iterative qsort, RPN
`runAll`, and expression-parser `runAll`.
The W65816 backend assembler now supports all common indirect
addressing modes (`(dp)`, `(dp),Y`, `(dp,X)`, `(d,s),Y`,
`[dp]`, `[dp],Y`, and `JMP (abs)`). All `.byte` opcode hacks in
the runtime have been removed in favour of the mnemonics. The
disassembler decodes them too.
Runtime now exposes a ~complete C99 subset: sprintf/snprintf with correct %.Nf precision, qsort/bsearch,
the full string.h family (strcat/strncat/strpbrk/strspn/strcspn/ the full string.h family (strcat/strncat/strpbrk/strspn/strcspn/
strtok/strtok_r), math.h with the eleven common transcendentals strtok/strtok_r), math.h with the eleven common transcendentals
(sqrt/pow/sin/cos/exp/log/atan/atan2/asin/acos/sinh/cosh/tanh), (sqrt/pow/sin/cos/exp/log/atan/atan2/asin/acos/sinh/cosh/tanh),
@ -108,8 +130,12 @@ subs, muls, divs, and 3-deep operand stacks all work.
**setjmp / longjmp** (smoke #88) now work end-to-end: setjmp saves **setjmp / longjmp** (smoke #88) now work end-to-end: setjmp saves
SP / 24-bit ret addr / DP, longjmp restores them and returns the SP / 24-bit ret addr / DP, longjmp restores them and returns the
val argument as setjmp's "second return". Required two fixes: val argument as setjmp's "second return". Required two fixes:
(a) the assembler misencoded `sta (dp), y` as absolute,Y instead of (a) the W65816 assembler had no instruction definition for
DP-indirect-Y — switched to raw `.byte 0x91, 0xe0`; (b) added `(dp)` / `(dp), y` / `(dp, x)` indirect addressing modes, so the
mnemonic forms silently fell through to absolute-,Y opcodes —
fixed in `src/llvm/lib/Target/W65816/W65816InstrFormats.td` +
`W65816InstrInfo.td` + `AsmParser/W65816AsmParser.cpp` (the runtime
.byte hacks have been replaced with mnemonics); (b) added
`__attribute__((returns_twice))` to the setjmp declaration so the `__attribute__((returns_twice))` to the setjmp declaration so the
optimizer doesn't constant-fold post-setjmp env reads to 0. optimizer doesn't constant-fold post-setjmp env reads to 0.
@ -120,6 +146,11 @@ end-to-end — exercises uint32_t shifts, XORs, char-by-char loops.
and verifies the output bytes — exercises loop bracket matching, and verifies the output bytes — exercises loop bracket matching,
pointer math (data pointer), branching on cell value. pointer math (data pointer), branching on cell value.
**Recursive-descent expression parser** (smoke #92) evaluates
"3+4", "2*3+4", "2+3*4", "(3+4)*5", "100/4-5*2+1" with proper
operator precedence and parentheses — exercises mutual recursion,
char-by-char tokenization, and integer arithmetic in concert.
The **DWARF sidecar** (`link816 --debug-out FILE`) now applies The **DWARF sidecar** (`link816 --debug-out FILE`) now applies
text/rodata/bss/init_array relocations to every `.debug_*` section text/rodata/bss/init_array relocations to every `.debug_*` section
before writing it. PC values in `.debug_addr` and `.debug_line` end before writing it. PC values in `.debug_addr` and `.debug_line` end

View file

@ -38,12 +38,7 @@ cc "$SRC/strtol.c"
cc "$SRC/snprintf.c" cc "$SRC/snprintf.c"
cc "$SRC/qsort.c" cc "$SRC/qsort.c"
cc "$SRC/extras.c" cc "$SRC/extras.c"
# strtok.c needs -O0: at -O2 the backend miscompiles the str==NULL cc "$SRC/strtok.c"
# continuation path so subsequent strtok(NULL, ...) calls return NULL
# even with the save pointer correctly populated (#84). Compiling
# the whole TU at -O0 routes around it (per-function optnone wasn't
# enough).
cc "$SRC/strtok.c" -O0
cc "$SRC/math.c" cc "$SRC/math.c"
cc "$SRC/softFloat.c" cc "$SRC/softFloat.c"
# softDouble.c needs -regalloc=fast: __muldf3's 64x64 -> 128 mul + # softDouble.c needs -regalloc=fast: __muldf3's 64x64 -> 128 mul +

View file

@ -77,9 +77,7 @@ __start:
; init_array slot). Dereference the entry: ($E0)→A. ; init_array slot). Dereference the entry: ($E0)→A.
stx 0xe0 ; entry addr -> DP scratch stx 0xe0 ; entry addr -> DP scratch
ldy #0 ldy #0
; llvm-mc parses `lda (0xe0), y` as `lda 0xe0, y` (absolute,Y); lda (0xe0), y ; A = mem[X] (DP-indirect-Y, opcode 0xb1)
; force the DP-indirect-Y opcode B1 with raw bytes.
.byte 0xb1, 0xe0 ; lda ($E0), y → A = mem[X]
sta __indirTarget ; __indirTarget = function pointer sta __indirTarget ; __indirTarget = function pointer
phx ; preserve X across the call phx ; preserve X across the call
jsl __jsl_indir jsl __jsl_indir

View file

@ -45,12 +45,9 @@ __indirTarget:
.text .text
.globl __jsl_indir .globl __jsl_indir
__jsl_indir: __jsl_indir:
; Hand-encoded JMP (__indirTarget): 6C is "jmp (a)" — the assembler ; PC <- mem[__indirTarget]. Indirect dispatch through a function-
; doesn't yet parse the `(abs)` syntax, so emit the bytes directly ; pointer slot — opcode 0x6C (JMP (abs)).
; with a 16-bit relocation against the variable. Effective transfer: jmp (__indirTarget)
; PC <- mem[__indirTarget].
.byte 0x6C
.word __indirTarget
; -------------------------------------------------------------------- ; --------------------------------------------------------------------
; __mulhi3 — 16-bit multiply. A * (4,S) -> A. ; __mulhi3 — 16-bit multiply. A * (4,S) -> A.
@ -1157,18 +1154,18 @@ setjmp:
clc clc
adc #0x3 ; A = caller's SP (undo JSL push) adc #0x3 ; A = caller's SP (undo JSL push)
ldy #0 ldy #0
.byte 0x91, 0xe0 ; sta (0xe0), y : env[0..1] = caller SP (16-bit M) sta (0xe0), y ; env[0..1] = caller SP (16-bit M)
lda 0x1, s ; A = retaddr lo:hi lda 0x1, s ; A = retaddr lo:hi
ldy #2 ldy #2
.byte 0x91, 0xe0 ; sta (0xe0), y : env[2..3] = retaddr lo:hi sta (0xe0), y ; env[2..3] = retaddr lo:hi
sep #0x20 sep #0x20
lda 0x3, s ; A_lo = bank lda 0x3, s ; A_lo = bank
ldy #4 ldy #4
.byte 0x91, 0xe0 ; sta (0xe0), y : env[4] = bank (8-bit M) sta (0xe0), y ; env[4] = bank (8-bit M)
rep #0x20 rep #0x20
tdc ; A = DP tdc ; A = DP
ldy #5 ldy #5
.byte 0x91, 0xe0 ; sta (0xe0), y : env[5..6] = DP sta (0xe0), y ; env[5..6] = DP
lda #0 ; setjmp returns 0 lda #0 ; setjmp returns 0
rtl rtl
@ -1179,22 +1176,22 @@ longjmp:
sta 0xe2 ; save val sta 0xe2 ; save val
; Restore SP: env[0..1] - 3 (so the upcoming PHAs land at the right slots). ; Restore SP: env[0..1] - 3 (so the upcoming PHAs land at the right slots).
ldy #0 ldy #0
.byte 0xb1, 0xe0 ; lda (0xe0), y : A = saved SP (16-bit) lda (0xe0), y ; A = saved SP (16-bit)
sec sec
sbc #0x3 sbc #0x3
tcs ; SP = saved_SP - 3 tcs ; SP = saved_SP - 3
; Push retaddr: bank, then 16-bit lo:hi. RTL pulls lo, hi, bank. ; Push retaddr: bank, then 16-bit lo:hi. RTL pulls lo, hi, bank.
sep #0x20 sep #0x20
ldy #4 ldy #4
.byte 0xb1, 0xe0 ; lda (0xe0), y : bank (8-bit) lda (0xe0), y ; bank (8-bit)
pha pha
rep #0x20 rep #0x20
ldy #2 ldy #2
.byte 0xb1, 0xe0 ; lda (0xe0), y : lo:hi (16-bit) lda (0xe0), y ; lo:hi (16-bit)
pha pha
; Restore DP. ; Restore DP.
ldy #5 ldy #5
.byte 0xb1, 0xe0 ; lda (0xe0), y : DP (16-bit) lda (0xe0), y ; DP (16-bit)
tcd tcd
; Compute return value: val if nonzero, else 1. ; Compute return value: val if nonzero, else 1.
lda 0xe2 lda 0xe2

View file

@ -1,12 +1,27 @@
// strtok / strtok_r — kept in their own translation unit so it can // strtok / strtok_r — kept in their own translation unit so it can
// be built at -O0. At -O2 (the default for everything else in // be built at -O0. At -O2 (the default for everything else in
// runtime/build.sh) the W65816 backend miscompiles the str==NULL // runtime/build.sh) the W65816 backend miscompiles this code: the
// continuation path: the second call returns NULL even though the // second `strtok(NULL, ...)` call returns NULL even though the save
// save pointer is correctly populated by the first call. Per-function // pointer is correctly populated by the first call.
// optnone wasn't enough; the whole TU has to drop to -O0.
// //
// Same as the optnone-on-qsort workaround for #70 — a known LLVM-mos // Investigation (#107):
// fragility on this backend that we route around for now. //
// - Bisected to "appears at -O1 already, not -O2 specific".
// - NOT a single named optimization pass — `-O1 -mllvm -opt-bisect-limit=0`
// (disable all named passes) still miscompiles, so it's something
// gated only on `OptimizationLevel > None` in the codegen pipeline.
// - NOT inlining-related — `-O2 -fno-inline` still fails.
// - NOT regalloc-related — `-O2 -mllvm -regalloc=fast` still fails.
// - Tried 5 source-level rewrites (global ternary, explicit if/else,
// strtok_r wrapper, hand-rolled inner loops without inSet helper,
// unsigned-char throughout, combined-skip-walk single-loop with
// `tok` sentinel). Each shifts the bug to a slightly different
// surface — the combined-loop form fixed the str==NULL second-call
// path but broke consecutive-delim skipping with an off-by-one.
//
// The simplest reliable fix is to compile this TU at -O0 — see
// runtime/build.sh. Same as the optnone-on-qsort workaround for #70:
// a known LLVM-mos backend fragility that we route around for now.
static char *gStrtokSave; static char *gStrtokSave;

View file

@ -1155,6 +1155,26 @@ EOF
fi fi
rm -f "$cSjFile" "$oSjFile" "$binSjFile" "$mapSjFile" rm -f "$cSjFile" "$oSjFile" "$binSjFile" "$mapSjFile"
# Regression guard for #91: setjmp/longjmp use raw .byte for the
# (dp),Y opcodes because llvm-mc misencodes the mnemonic forms.
# If someone reverts the .byte hack the asm reverts to absolute,Y
# and setjmp silently writes to $00:E0 instead of through env —
# caught at link time only by mom-and-dad luck. Disassemble
# libgcc.o and verify the setjmp body contains opcode 0x91 (the
# real DP-indirect-Y) and NOT 0x99 (absolute,Y) at minimum once.
log "check: libgcc.o setjmp uses (dp),Y opcode 0x91 not absolute,Y 0x99 (#91)"
# llvm-objdump can't decode 0x91 (the disasm calls it <unknown>) so
# we check the raw byte stream of libgcc.o instead. The setjmp
# body must contain the byte pair 91 e0 (= STA ($E0),Y).
if ! xxd -p -c 99999 "$oLibgccFile" | tr -d '\n' | grep -q '91e0'; then
die "libgcc.o missing raw 91 e0 byte pair (STA (\$E0),Y) — #91 regressed"
fi
# And it must NOT contain `99 e0 00` (= STA \$00E0,Y absolute,Y) in
# libgcc — that's the misencoding the .byte hack works around.
if xxd -p -c 99999 "$oLibgccFile" | tr -d '\n' | grep -q '99e000'; then
die "libgcc.o contains 99 e0 00 (STA \$00E0,Y absolute) — assembler reverted to bad mnemonic (#91)"
fi
# Static constructors: linker collects .init_array sections and # Static constructors: linker collects .init_array sections and
# emits __init_array_start / __init_array_end synthetic symbols. # emits __init_array_start / __init_array_end synthetic symbols.
# crt0 walks them via __jsl_indir. This check verifies the # crt0 walks them via __jsl_indir. This check verifies the
@ -1417,8 +1437,7 @@ EOF
-c "$PROJECT_ROOT/runtime/src/qsort.c" -o "$oQsortF" -c "$PROJECT_ROOT/runtime/src/qsort.c" -o "$oQsortF"
"$CLANG" --target=w65816 -O2 -ffunction-sections \ "$CLANG" --target=w65816 -O2 -ffunction-sections \
-c "$PROJECT_ROOT/runtime/src/extras.c" -o "$oExtrasF" -c "$PROJECT_ROOT/runtime/src/extras.c" -o "$oExtrasF"
# strtok.c needs -O0 (#84 mitigation). "$CLANG" --target=w65816 -O2 -ffunction-sections \
"$CLANG" --target=w65816 -O0 -ffunction-sections \
-c "$PROJECT_ROOT/runtime/src/strtok.c" -o "$oStrtokF" -c "$PROJECT_ROOT/runtime/src/strtok.c" -o "$oStrtokF"
"$CLANG" --target=w65816 -O2 -ffunction-sections \ "$CLANG" --target=w65816 -O2 -ffunction-sections \
-c "$PROJECT_ROOT/runtime/src/math.c" -o "$oMathF" -c "$PROJECT_ROOT/runtime/src/math.c" -o "$oMathF"
@ -2678,6 +2697,87 @@ EOF
fi fi
rm -f "$cBfFile" "$oBfFile" "$binBfFile" rm -f "$cBfFile" "$oBfFile" "$binBfFile"
log "check: MAME runs recursive-descent expression parser '(3+4)*5' (#92)"
cExprFile="$(mktemp --suffix=.c)"
oExprFile="$(mktemp --suffix=.o)"
binExprFile="$(mktemp --suffix=.bin)"
cat > "$cExprFile" <<'EOF'
__attribute__((noinline)) void switchToBank2(void) {
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
}
static const char *g_p;
static long parseExpr(void);
static long parseFactor(void) {
while (*g_p == ' ') g_p++;
if (*g_p == '(') {
g_p++;
long v = parseExpr();
while (*g_p == ' ') g_p++;
if (*g_p == ')') g_p++;
return v;
}
long n = 0;
while (*g_p >= '0' && *g_p <= '9') { n = n*10 + (*g_p-'0'); g_p++; }
return n;
}
static long parseTerm(void) {
long v = parseFactor();
for (;;) {
while (*g_p == ' ') g_p++;
char c = *g_p;
if (c != '*' && c != '/') return v;
g_p++;
long r = parseFactor();
if (c == '*') v = v * r; else v = (r != 0) ? v / r : 0;
}
}
static long parseExpr(void) {
long v = parseTerm();
for (;;) {
while (*g_p == ' ') g_p++;
char c = *g_p;
if (c != '+' && c != '-') return v;
g_p++;
long r = parseTerm();
if (c == '+') v = v + r; else v = v - r;
}
}
__attribute__((noinline,optnone))
static long evalExpr(const char *s) { g_p = s; return parseExpr(); }
static long g_r1, g_r2, g_r3, g_r4, g_r5;
__attribute__((noinline,optnone))
static void runAll(void) {
g_r1 = evalExpr("3 + 4");
g_r2 = evalExpr("2 * 3 + 4");
g_r3 = evalExpr("2 + 3 * 4");
g_r4 = evalExpr("(3 + 4) * 5");
g_r5 = evalExpr("100 / 4 - 5 * 2 + 1");
}
int main(void) {
runAll();
unsigned short ok = 0;
if (g_r1 == 7) ok |= 0x01;
if (g_r2 == 10) ok |= 0x02;
if (g_r3 == 14) ok |= 0x04;
if (g_r4 == 35) ok |= 0x08;
if (g_r5 == 16) ok |= 0x10;
switchToBank2();
*(volatile unsigned short *)0x5000 = ok;
while (1) {}
}
EOF
"$CLANG" --target=w65816 -O2 -ffunction-sections -c \
"$cExprFile" -o "$oExprFile"
"$PROJECT_ROOT/tools/link816" -o "$binExprFile" --text-base 0x1000 \
"$oCrt0F" "$oLibcF" "$oStrtolF" "$oSnprintfF" "$oQsortF" \
"$oExtrasF" "$oStrtokF" "$oMathF" "$oSfF" "$oSdF" \
"$oLibgccFile" "$oExprFile" >/dev/null 2>&1
if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" "$binExprFile" --check \
0x025000=001f >/dev/null 2>&1; then
die "MAME: expression parser bitmap != 0x1f (#92)"
fi
rm -f "$cExprFile" "$oExprFile" "$binExprFile"
log "check: MAME runs sqrt/pow + sin/cos/exp/log + strpbrk/spn/cspn (#81 + #82 + #83)" log "check: MAME runs sqrt/pow + sin/cos/exp/log + strpbrk/spn/cspn (#81 + #82 + #83)"
cTrFile="$(mktemp --suffix=.c)" cTrFile="$(mktemp --suffix=.c)"
oTrFile="$(mktemp --suffix=.o)" oTrFile="$(mktemp --suffix=.o)"

View file

@ -399,9 +399,85 @@ bool W65816AsmParser::parseInstruction(ParseInstructionInfo &Info,
return false; return false;
} }
// Parse the first real operand (immediate or address expression). // Indirect addressing modes: `(addr)`, `(addr), y`, `(addr, x)`,
if (parseOperand(Operands)) // `(addr, s), y`. We must intercept the leading `(` here — if it
return true; // were left to parseExpression it would be treated as expression
// grouping and the parens would be silently eaten, producing an
// absolute-mode opcode (caught silently with setjmp/longjmp #88).
if (getLexer().is(AsmToken::LParen)) {
SMLoc LParenLoc = getLexer().getTok().getLoc();
Operands.push_back(W65816Operand::createToken("(", LParenLoc));
Parser.Lex(); // eat '('
// Parse the inner address expression.
SMLoc AddrStart = getLexer().getLoc();
const MCExpr *Val;
if (Parser.parseExpression(Val))
return Error(LParenLoc, "expected address expression after '('");
SMLoc AddrEnd = getLexer().getLoc();
Operands.push_back(W65816Operand::createAddr(Val, AddrStart, AddrEnd));
// Now we expect either:
// `, x )` — DP indexed indirect e.g. lda (dp, x)
// `, s ) , y` — stack-rel indirect Y e.g. lda (4, s), y
// `)` then optional `, y` — DP indirect / DP indirect Y
if (getLexer().is(AsmToken::Comma)) {
Parser.Lex(); // eat comma inside parens
if (!getLexer().is(AsmToken::Identifier))
return Error(getLexer().getLoc(), "expected 'x' or 's' after ','");
StringRef Ident = getLexer().getTok().getIdentifier();
SMLoc IdentLoc = getLexer().getTok().getLoc();
bool isX = Ident.equals_insensitive("x");
bool isS = Ident.equals_insensitive("s");
if (!isX && !isS)
return Error(IdentLoc, "expected 'x' or 's'");
Parser.Lex(); // eat x / s
if (!getLexer().is(AsmToken::RParen))
return Error(getLexer().getLoc(), "expected ')'");
Parser.Lex(); // eat ')'
// The matcher expects a SINGLE combined token for the inside-paren
// index — `x)` for DP-indexed-X and `s)` for stack-rel-indirect-Y
// (per the asm strings `($addr , x)` and `($off, s), y` which
// tablegen tokenizes with `x)` / `s)` as one MCK token because
// there's no whitespace between them). Pushing `x` + `)` as
// separate operands fails matching with "invalid operand".
Operands.push_back(W65816Operand::createToken(isX ? "x)" : "s)",
IdentLoc));
} else {
// Plain `(addr)` — close the parens and fall through to the
// optional `, y` handling below.
if (!getLexer().is(AsmToken::RParen))
return Error(getLexer().getLoc(), "expected ')'");
SMLoc RParenLoc = getLexer().getTok().getLoc();
Operands.push_back(W65816Operand::createToken(")", RParenLoc));
Parser.Lex(); // eat ')'
}
// Fall through to the trailing-suffix loop (handles `, y` after
// the closing paren).
} else if (getLexer().is(AsmToken::LBrac)) {
// Long-indirect addressing: `[dp]` or `[dp], y`.
SMLoc LBracLoc = getLexer().getTok().getLoc();
Operands.push_back(W65816Operand::createToken("[", LBracLoc));
Parser.Lex(); // eat '['
SMLoc AddrStart = getLexer().getLoc();
const MCExpr *Val;
if (Parser.parseExpression(Val))
return Error(LBracLoc, "expected address expression after '['");
SMLoc AddrEnd = getLexer().getLoc();
Operands.push_back(W65816Operand::createAddr(Val, AddrStart, AddrEnd));
if (!getLexer().is(AsmToken::RBrac))
return Error(getLexer().getLoc(), "expected ']'");
SMLoc RBracLoc = getLexer().getTok().getLoc();
Operands.push_back(W65816Operand::createToken("]", RBracLoc));
Parser.Lex(); // eat ']'
// Trailing-suffix loop will pick up `, y` if present.
} else {
// Parse the first real operand (immediate or address expression).
if (parseOperand(Operands))
return true;
}
// Handle trailing ", x" / ", y" / ", <expr>" (block-move second operand). // Handle trailing ", x" / ", y" / ", <expr>" (block-move second operand).
while (getLexer().is(AsmToken::Comma)) { while (getLexer().is(AsmToken::Comma)) {

View file

@ -246,6 +246,82 @@ class InstDPY<bits<8> op, string mnem>
let Inst{15-8} = addr; let Inst{15-8} = addr;
} }
// DP indirect addressing modes (parens around the DP byte mean
// "the 16-bit pointer at $00:dp+0..1, used to form the effective
// address"). Without these, the asm parser falls through to the
// absolute opcode and silently corrupts memory at $00:dp.
//
// `(dp)` reads/writes through the pointer at DP+offset. 0x92/0xb2
// `(dp), y` same, then adds Y to form the data address. 0x91/0xb1
// `(dp, x)` adds X to dp first, then dereferences. 0x81/0xa1
class InstDPInd<bits<8> op, string mnem>
: W65816Inst<(outs), (ins addrDP:$addr),
!strconcat(mnem, "\t($addr )")> {
let Size = 2;
bits<8> addr;
bits<16> Inst;
let Inst{7-0} = op;
let Inst{15-8} = addr;
}
class InstDPIndY<bits<8> op, string mnem>
: W65816Inst<(outs), (ins addrDP:$addr),
!strconcat(mnem, "\t($addr ), y")> {
let Size = 2;
bits<8> addr;
bits<16> Inst;
let Inst{7-0} = op;
let Inst{15-8} = addr;
}
class InstDPIndX<bits<8> op, string mnem>
: W65816Inst<(outs), (ins addrDP:$addr),
!strconcat(mnem, "\t($addr , x)")> {
let Size = 2;
bits<8> addr;
bits<16> Inst;
let Inst{7-0} = op;
let Inst{15-8} = addr;
}
// Absolute indirect: `JMP (addr)` (opcode 0x6C). Reads a 16-bit
// pointer from absolute address `addr` and jumps there (bank-local).
// Used by the indirect-call trampoline in crt0/libgcc to dispatch
// through a function-pointer slot.
class InstAbsInd<bits<8> op, string mnem>
: W65816Inst<(outs), (ins addrAbs:$addr),
!strconcat(mnem, "\t($addr )")> {
let Size = 3;
bits<16> addr;
bits<24> Inst;
let Inst{7-0} = op;
let Inst{23-8} = addr;
}
// DP indirect long: `[dp]` reads a 24-bit pointer at DP+offset and
// uses it to form the data address (bank-crossing). `[dp], y` adds
// Y to that 24-bit pointer. Only available with square-bracket
// syntax round parens are reserved for 16-bit indirection.
class InstDPIndLong<bits<8> op, string mnem>
: W65816Inst<(outs), (ins addrDP:$addr),
!strconcat(mnem, "\t[$addr ]")> {
let Size = 2;
bits<8> addr;
bits<16> Inst;
let Inst{7-0} = op;
let Inst{15-8} = addr;
}
class InstDPIndLongY<bits<8> op, string mnem>
: W65816Inst<(outs), (ins addrDP:$addr),
!strconcat(mnem, "\t[$addr ], y")> {
let Size = 2;
bits<8> addr;
bits<16> Inst;
let Inst{7-0} = op;
let Inst{15-8} = addr;
}
// Stack-relative addressing. Operand is an unsigned 8-bit offset added // Stack-relative addressing. Operand is an unsigned 8-bit offset added
// to S; reads / writes the byte (or word, in 16-bit M mode) at that // to S; reads / writes the byte (or word, in 16-bit M mode) at that
// stack address. Shares the addrDP operand class for parsing. // stack address. Shares the addrDP operand class for parsing.
@ -262,8 +338,6 @@ class InstStackRel<bits<8> op, string mnem>
// pointer stored at S+off, adds Y, then loads from that address. Used // pointer stored at S+off, adds Y, then loads from that address. Used
// to dereference pointers spilled to a stack scratch slot the only // to dereference pointers spilled to a stack scratch slot the only
// way the 65816 can deref a pointer not already in zero page. // way the 65816 can deref a pointer not already in zero page.
// isCodeGenOnly because the asm-parser doesn't accept `(d,S),Y` syntax
// today; codegen builds these MIs directly.
class InstStackRelIndY<bits<8> op, string mnem> class InstStackRelIndY<bits<8> op, string mnem>
: W65816Inst<(outs), (ins addrDP:$off), : W65816Inst<(outs), (ins addrDP:$off),
!strconcat(mnem, "\t($off, s), y")> { !strconcat(mnem, "\t($off, s), y")> {
@ -272,7 +346,6 @@ class InstStackRelIndY<bits<8> op, string mnem>
bits<16> Inst; bits<16> Inst;
let Inst{7-0} = op; let Inst{7-0} = op;
let Inst{15-8} = off; let Inst{15-8} = off;
let isCodeGenOnly = 1;
} }
class InstPCRel8<bits<8> op, string mnem> class InstPCRel8<bits<8> op, string mnem>

View file

@ -1086,6 +1086,11 @@ def LDA_Long : InstAbsLong<0xAF, "lda">;
def LDA_DPX : InstDPX<0xB5, "lda">; def LDA_DPX : InstDPX<0xB5, "lda">;
def LDA_AbsX : InstAbsX<0xBD, "lda">; def LDA_AbsX : InstAbsX<0xBD, "lda">;
def LDA_AbsY : InstAbsY<0xB9, "lda">; def LDA_AbsY : InstAbsY<0xB9, "lda">;
def LDA_DPInd : InstDPInd <0xB2, "lda">;
def LDA_DPIndY : InstDPIndY<0xB1, "lda">;
def LDA_DPIndX : InstDPIndX<0xA1, "lda">;
def LDA_DPIndLong : InstDPIndLong <0xA7, "lda">;
def LDA_DPIndLongY : InstDPIndLongY<0xB7, "lda">;
//---------------------------------------------------------------- STA (store A) //---------------------------------------------------------------- STA (store A)
def STA_DP : InstDP<0x85, "sta">; def STA_DP : InstDP<0x85, "sta">;
@ -1094,6 +1099,11 @@ def STA_Long : InstAbsLong<0x8F, "sta">;
def STA_DPX : InstDPX<0x95, "sta">; def STA_DPX : InstDPX<0x95, "sta">;
def STA_AbsX : InstAbsX<0x9D, "sta">; def STA_AbsX : InstAbsX<0x9D, "sta">;
def STA_AbsY : InstAbsY<0x99, "sta">; def STA_AbsY : InstAbsY<0x99, "sta">;
def STA_DPInd : InstDPInd <0x92, "sta">;
def STA_DPIndY : InstDPIndY<0x91, "sta">;
def STA_DPIndX : InstDPIndX<0x81, "sta">;
def STA_DPIndLong : InstDPIndLong <0x87, "sta">;
def STA_DPIndLongY : InstDPIndLongY<0x97, "sta">;
//---------------------------------------------------------------- LDX (load X) //---------------------------------------------------------------- LDX (load X)
def LDX_Imm8 : InstImm8<0xA2, "ldx"> { let XHigh = 1; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; let Defs = [X]; } def LDX_Imm8 : InstImm8<0xA2, "ldx"> { let XHigh = 1; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; let Defs = [X]; }
@ -1261,8 +1271,9 @@ def BVC : InstPCRel8<0x50, "bvc">;
let isBranch = 1, isTerminator = 1, isBarrier = 1, mayLoad = 0, mayStore = 0 in { let isBranch = 1, isTerminator = 1, isBarrier = 1, mayLoad = 0, mayStore = 0 in {
def BRA : InstPCRel8<0x80, "bra">; def BRA : InstPCRel8<0x80, "bra">;
def BRL : InstPCRel16<0x82, "brl">; def BRL : InstPCRel16<0x82, "brl">;
def JMP_Abs : InstAbs<0x4C, "jmp">; def JMP_Abs : InstAbs<0x4C, "jmp">;
def JML_Long : InstAbsLong<0x5C, "jml">; def JMP_AbsInd : InstAbsInd<0x6C, "jmp">;
def JML_Long : InstAbsLong<0x5C, "jml">;
} }
//---------------------------------------------------------------- Calls //---------------------------------------------------------------- Calls

View file

@ -318,7 +318,7 @@ bool W65816StackSlotCleanup::runOnMachineFunction(MachineFunction &MF) {
// Walk further to find the indirect use (LDAfi_indY / STAfi_indY) // Walk further to find the indirect use (LDAfi_indY / STAfi_indY)
// referencing slotB. Bail on STA to slotA before then. // referencing slotB. Bail on STA to slotA before then.
auto It3 = std::next(Sta2.getIterator()); auto It3 = std::next(Sta2.getIterator());
bool Rewrote = false; MachineInstr *IndYTarget = nullptr;
while (It3 != MBB.end()) { while (It3 != MBB.end()) {
MachineInstr &MI = *It3; MachineInstr &MI = *It3;
if (MI.isDebugInstr()) { ++It3; continue; } if (MI.isDebugInstr()) { ++It3; continue; }
@ -333,35 +333,56 @@ bool W65816StackSlotCleanup::runOnMachineFunction(MachineFunction &MF) {
} }
// Indirect-Y operand: operand 1 (load) or 1 (store) holds // Indirect-Y operand: operand 1 (load) or 1 (store) holds
// the FI pointer slot. Match LDAfi_indY/STAfi_indY using // the FI pointer slot. Match LDAfi_indY/STAfi_indY using
// slotB and rewrite to slotA. // slotB.
if (MI.getOpcode() == W65816::LDAfi_indY || if (MI.getOpcode() == W65816::LDAfi_indY ||
MI.getOpcode() == W65816::STAfi_indY) { MI.getOpcode() == W65816::STAfi_indY) {
// Operand layout: LDAfi_indY (outs Acc16:$dst) (ins memfi:$p);
// STAfi_indY (outs) (ins Acc16:$src, memfi:$p). memfi is
// (FI, imm-offset). Find the FI operand.
for (unsigned i = 0; i < MI.getNumOperands(); ++i) { for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
if (MI.getOperand(i).isFI() && if (MI.getOperand(i).isFI() &&
MI.getOperand(i).getIndex() == SlotB) { MI.getOperand(i).getIndex() == SlotB) {
MI.getOperand(i).setIndex(SlotA); IndYTarget = &MI;
Rewrote = true;
break; break;
} }
} }
// Stop after first indirect-Y rewrite — Lda2/Sta2 elimination if (IndYTarget) break;
// still needs Pass 2 (dead store).
if (Rewrote) break;
} }
++It3; ++It3;
} }
if (Rewrote) { if (!IndYTarget) continue;
// Mark Lda2 as erased so the outer worklist iteration skips // Function-wide safety check: slotB must have NO other refs
// it (it's an LDAfi and was added to Ldas). Sta2 isn't in // besides Sta2 (which we erase) and IndYTarget (which we
// any worklist so erasing it directly is safe. // rewrite). Cross-MBB reads of slotB would otherwise see
Erased.insert(Lda2); // stale / uninitialized values after Sta2 is gone. Same fix
Lda2->eraseFromParent(); // class as Pass -4c (#107).
Sta2.eraseFromParent(); bool OtherUse = false;
Changed = true; for (MachineBasicBlock &MBBO : MF) {
for (MachineInstr &MIO : MBBO) {
if (&MIO == &Sta2 || &MIO == IndYTarget) continue;
for (const MachineOperand &MO : MIO.operands()) {
if (MO.isFI() && MO.getIndex() == SlotB) {
OtherUse = true;
break;
}
}
if (OtherUse) break;
}
if (OtherUse) break;
} }
if (OtherUse) continue;
// Apply rewrite: IndYTarget's slotB → slotA.
for (unsigned i = 0; i < IndYTarget->getNumOperands(); ++i) {
if (IndYTarget->getOperand(i).isFI() &&
IndYTarget->getOperand(i).getIndex() == SlotB) {
IndYTarget->getOperand(i).setIndex(SlotA);
break;
}
}
// Mark Lda2 as erased so the outer worklist iteration skips
// it (it's an LDAfi and was added to Ldas). Sta2 isn't in
// any worklist so erasing it directly is safe.
Erased.insert(Lda2);
Lda2->eraseFromParent();
Sta2.eraseFromParent();
Changed = true;
} }
} }
@ -544,6 +565,27 @@ bool W65816StackSlotCleanup::runOnMachineFunction(MachineFunction &MF) {
if (LaterUse) break; if (LaterUse) break;
} }
if (LaterUse) continue; if (LaterUse) continue;
// Function-wide safety check: slotB must have NO references in
// any OTHER basic block. We're erasing the only init store
// (Sta); a cross-MBB reload of slotB would expect that init.
// Caught by strtok_r (#107) where greedy spilled an in-place
// INA into a slot that bb.3 also initialized; killing the
// bb.3 init left bb.10 reading garbage.
bool CrossMBBUse = false;
for (MachineBasicBlock &MBBO : MF) {
if (&MBBO == &MBB) continue;
for (MachineInstr &MIO : MBBO) {
for (const MachineOperand &MO : MIO.operands()) {
if (MO.isFI() && MO.getIndex() == SlotB) {
CrossMBBUse = true;
break;
}
}
if (CrossMBBUse) break;
}
if (CrossMBBUse) break;
}
if (CrossMBBUse) continue;
// Apply rewrites: redirect every IndY use of slotB → slotC. // Apply rewrites: redirect every IndY use of slotB → slotC.
for (MachineInstr *IndY : IndYUses) { for (MachineInstr *IndY : IndYUses) {
for (unsigned i = 0; i < IndY->getNumOperands(); ++i) { for (unsigned i = 0; i < IndY->getNumOperands(); ++i) {