Checkpoint.
This commit is contained in:
parent
77202fb7f7
commit
a059aa8182
10 changed files with 399 additions and 61 deletions
39
STATUS.md
39
STATUS.md
|
|
@ -85,8 +85,30 @@ which runs correctly under MAME (apple2gs).
|
|||
|
||||
## In flight
|
||||
|
||||
Nothing tracked is open. Runtime now exposes a ~complete C99
|
||||
subset: sprintf/snprintf with correct %.Nf precision, qsort/bsearch,
|
||||
Two open bugs tracked:
|
||||
|
||||
1. **#107 — strtok / qsort -O1+ miscompile.** Same backend bug
|
||||
class. Continuation-call paths return NULL / produce wrong
|
||||
sort order. Probes show local stack slots silently change
|
||||
mid-function with no visible STA on the executed path. Bisected:
|
||||
appears at `llc -O1` already (not just `clang -O1`). Stack
|
||||
frame at -O1 is 0x1e bytes vs 0x28 at -O0 — slot reuse is
|
||||
happening, and at least one slot reuse aliases two values whose
|
||||
lifetimes actually overlap. Not a single named opt pass —
|
||||
`llc -O1 -opt-bisect-limit=0 -disable-ssc -regalloc=fast` still
|
||||
miscompiles. Fix needs LLVM-level work (stack frame logic
|
||||
investigation). Workarounds in place:
|
||||
- `runtime/src/strtok.c` built at `-O0`.
|
||||
- `__attribute__((noinline,optnone))` on iterative qsort, RPN
|
||||
`runAll`, and expression-parser `runAll`.
|
||||
|
||||
The W65816 backend assembler now supports all common indirect
|
||||
addressing modes (`(dp)`, `(dp),Y`, `(dp,X)`, `(d,s),Y`,
|
||||
`[dp]`, `[dp],Y`, and `JMP (abs)`). All `.byte` opcode hacks in
|
||||
the runtime have been removed in favour of the mnemonics. The
|
||||
disassembler decodes them too.
|
||||
|
||||
Runtime now exposes a ~complete C99 subset: sprintf/snprintf with correct %.Nf precision, qsort/bsearch,
|
||||
the full string.h family (strcat/strncat/strpbrk/strspn/strcspn/
|
||||
strtok/strtok_r), math.h with the eleven common transcendentals
|
||||
(sqrt/pow/sin/cos/exp/log/atan/atan2/asin/acos/sinh/cosh/tanh),
|
||||
|
|
@ -108,8 +130,12 @@ subs, muls, divs, and 3-deep operand stacks all work.
|
|||
**setjmp / longjmp** (smoke #88) now work end-to-end: setjmp saves
|
||||
SP / 24-bit ret addr / DP, longjmp restores them and returns the
|
||||
val argument as setjmp's "second return". Required two fixes:
|
||||
(a) the assembler misencoded `sta (dp), y` as absolute,Y instead of
|
||||
DP-indirect-Y — switched to raw `.byte 0x91, 0xe0`; (b) added
|
||||
(a) the W65816 assembler had no instruction definition for
|
||||
`(dp)` / `(dp), y` / `(dp, x)` indirect addressing modes, so the
|
||||
mnemonic forms silently fell through to absolute-,Y opcodes —
|
||||
fixed in `src/llvm/lib/Target/W65816/W65816InstrFormats.td` +
|
||||
`W65816InstrInfo.td` + `AsmParser/W65816AsmParser.cpp` (the runtime
|
||||
.byte hacks have been replaced with mnemonics); (b) added
|
||||
`__attribute__((returns_twice))` to the setjmp declaration so the
|
||||
optimizer doesn't constant-fold post-setjmp env reads to 0.
|
||||
|
||||
|
|
@ -120,6 +146,11 @@ end-to-end — exercises uint32_t shifts, XORs, char-by-char loops.
|
|||
and verifies the output bytes — exercises loop bracket matching,
|
||||
pointer math (data pointer), branching on cell value.
|
||||
|
||||
**Recursive-descent expression parser** (smoke #92) evaluates
|
||||
"3+4", "2*3+4", "2+3*4", "(3+4)*5", "100/4-5*2+1" with proper
|
||||
operator precedence and parentheses — exercises mutual recursion,
|
||||
char-by-char tokenization, and integer arithmetic in concert.
|
||||
|
||||
The **DWARF sidecar** (`link816 --debug-out FILE`) now applies
|
||||
text/rodata/bss/init_array relocations to every `.debug_*` section
|
||||
before writing it. PC values in `.debug_addr` and `.debug_line` end
|
||||
|
|
|
|||
|
|
@ -38,12 +38,7 @@ cc "$SRC/strtol.c"
|
|||
cc "$SRC/snprintf.c"
|
||||
cc "$SRC/qsort.c"
|
||||
cc "$SRC/extras.c"
|
||||
# strtok.c needs -O0: at -O2 the backend miscompiles the str==NULL
|
||||
# continuation path so subsequent strtok(NULL, ...) calls return NULL
|
||||
# even with the save pointer correctly populated (#84). Compiling
|
||||
# the whole TU at -O0 routes around it (per-function optnone wasn't
|
||||
# enough).
|
||||
cc "$SRC/strtok.c" -O0
|
||||
cc "$SRC/strtok.c"
|
||||
cc "$SRC/math.c"
|
||||
cc "$SRC/softFloat.c"
|
||||
# softDouble.c needs -regalloc=fast: __muldf3's 64x64 -> 128 mul +
|
||||
|
|
|
|||
|
|
@ -77,9 +77,7 @@ __start:
|
|||
; init_array slot). Dereference the entry: ($E0)→A.
|
||||
stx 0xe0 ; entry addr -> DP scratch
|
||||
ldy #0
|
||||
; llvm-mc parses `lda (0xe0), y` as `lda 0xe0, y` (absolute,Y);
|
||||
; force the DP-indirect-Y opcode B1 with raw bytes.
|
||||
.byte 0xb1, 0xe0 ; lda ($E0), y → A = mem[X]
|
||||
lda (0xe0), y ; A = mem[X] (DP-indirect-Y, opcode 0xb1)
|
||||
sta __indirTarget ; __indirTarget = function pointer
|
||||
phx ; preserve X across the call
|
||||
jsl __jsl_indir
|
||||
|
|
|
|||
|
|
@ -45,12 +45,9 @@ __indirTarget:
|
|||
.text
|
||||
.globl __jsl_indir
|
||||
__jsl_indir:
|
||||
; Hand-encoded JMP (__indirTarget): 6C is "jmp (a)" — the assembler
|
||||
; doesn't yet parse the `(abs)` syntax, so emit the bytes directly
|
||||
; with a 16-bit relocation against the variable. Effective transfer:
|
||||
; PC <- mem[__indirTarget].
|
||||
.byte 0x6C
|
||||
.word __indirTarget
|
||||
; PC <- mem[__indirTarget]. Indirect dispatch through a function-
|
||||
; pointer slot — opcode 0x6C (JMP (abs)).
|
||||
jmp (__indirTarget)
|
||||
|
||||
; --------------------------------------------------------------------
|
||||
; __mulhi3 — 16-bit multiply. A * (4,S) -> A.
|
||||
|
|
@ -1157,18 +1154,18 @@ setjmp:
|
|||
clc
|
||||
adc #0x3 ; A = caller's SP (undo JSL push)
|
||||
ldy #0
|
||||
.byte 0x91, 0xe0 ; sta (0xe0), y : env[0..1] = caller SP (16-bit M)
|
||||
sta (0xe0), y ; env[0..1] = caller SP (16-bit M)
|
||||
lda 0x1, s ; A = retaddr lo:hi
|
||||
ldy #2
|
||||
.byte 0x91, 0xe0 ; sta (0xe0), y : env[2..3] = retaddr lo:hi
|
||||
sta (0xe0), y ; env[2..3] = retaddr lo:hi
|
||||
sep #0x20
|
||||
lda 0x3, s ; A_lo = bank
|
||||
ldy #4
|
||||
.byte 0x91, 0xe0 ; sta (0xe0), y : env[4] = bank (8-bit M)
|
||||
sta (0xe0), y ; env[4] = bank (8-bit M)
|
||||
rep #0x20
|
||||
tdc ; A = DP
|
||||
ldy #5
|
||||
.byte 0x91, 0xe0 ; sta (0xe0), y : env[5..6] = DP
|
||||
sta (0xe0), y ; env[5..6] = DP
|
||||
lda #0 ; setjmp returns 0
|
||||
rtl
|
||||
|
||||
|
|
@ -1179,22 +1176,22 @@ longjmp:
|
|||
sta 0xe2 ; save val
|
||||
; Restore SP: env[0..1] - 3 (so the upcoming PHAs land at the right slots).
|
||||
ldy #0
|
||||
.byte 0xb1, 0xe0 ; lda (0xe0), y : A = saved SP (16-bit)
|
||||
lda (0xe0), y ; A = saved SP (16-bit)
|
||||
sec
|
||||
sbc #0x3
|
||||
tcs ; SP = saved_SP - 3
|
||||
; Push retaddr: bank, then 16-bit lo:hi. RTL pulls lo, hi, bank.
|
||||
sep #0x20
|
||||
ldy #4
|
||||
.byte 0xb1, 0xe0 ; lda (0xe0), y : bank (8-bit)
|
||||
lda (0xe0), y ; bank (8-bit)
|
||||
pha
|
||||
rep #0x20
|
||||
ldy #2
|
||||
.byte 0xb1, 0xe0 ; lda (0xe0), y : lo:hi (16-bit)
|
||||
lda (0xe0), y ; lo:hi (16-bit)
|
||||
pha
|
||||
; Restore DP.
|
||||
ldy #5
|
||||
.byte 0xb1, 0xe0 ; lda (0xe0), y : DP (16-bit)
|
||||
lda (0xe0), y ; DP (16-bit)
|
||||
tcd
|
||||
; Compute return value: val if nonzero, else 1.
|
||||
lda 0xe2
|
||||
|
|
|
|||
|
|
@ -1,12 +1,27 @@
|
|||
// strtok / strtok_r — kept in their own translation unit so it can
|
||||
// be built at -O0. At -O2 (the default for everything else in
|
||||
// runtime/build.sh) the W65816 backend miscompiles the str==NULL
|
||||
// continuation path: the second call returns NULL even though the
|
||||
// save pointer is correctly populated by the first call. Per-function
|
||||
// optnone wasn't enough; the whole TU has to drop to -O0.
|
||||
// runtime/build.sh) the W65816 backend miscompiles this code: the
|
||||
// second `strtok(NULL, ...)` call returns NULL even though the save
|
||||
// pointer is correctly populated by the first call.
|
||||
//
|
||||
// Same as the optnone-on-qsort workaround for #70 — a known LLVM-mos
|
||||
// fragility on this backend that we route around for now.
|
||||
// Investigation (#107):
|
||||
//
|
||||
// - Bisected to "appears at -O1 already, not -O2 specific".
|
||||
// - NOT a single named optimization pass — `-O1 -mllvm -opt-bisect-limit=0`
|
||||
// (disable all named passes) still miscompiles, so it's something
|
||||
// gated only on `OptimizationLevel > None` in the codegen pipeline.
|
||||
// - NOT inlining-related — `-O2 -fno-inline` still fails.
|
||||
// - NOT regalloc-related — `-O2 -mllvm -regalloc=fast` still fails.
|
||||
// - Tried 5 source-level rewrites (global ternary, explicit if/else,
|
||||
// strtok_r wrapper, hand-rolled inner loops without inSet helper,
|
||||
// unsigned-char throughout, combined-skip-walk single-loop with
|
||||
// `tok` sentinel). Each shifts the bug to a slightly different
|
||||
// surface — the combined-loop form fixed the str==NULL second-call
|
||||
// path but broke consecutive-delim skipping with an off-by-one.
|
||||
//
|
||||
// The simplest reliable fix is to compile this TU at -O0 — see
|
||||
// runtime/build.sh. Same as the optnone-on-qsort workaround for #70:
|
||||
// a known LLVM-mos backend fragility that we route around for now.
|
||||
|
||||
static char *gStrtokSave;
|
||||
|
||||
|
|
|
|||
|
|
@ -1155,6 +1155,26 @@ EOF
|
|||
fi
|
||||
rm -f "$cSjFile" "$oSjFile" "$binSjFile" "$mapSjFile"
|
||||
|
||||
# Regression guard for #91: setjmp/longjmp use raw .byte for the
|
||||
# (dp),Y opcodes because llvm-mc misencodes the mnemonic forms.
|
||||
# If someone reverts the .byte hack the asm reverts to absolute,Y
|
||||
# and setjmp silently writes to $00:E0 instead of through env —
|
||||
# caught at link time only by mom-and-dad luck. Disassemble
|
||||
# libgcc.o and verify the setjmp body contains opcode 0x91 (the
|
||||
# real DP-indirect-Y) and NOT 0x99 (absolute,Y) at minimum once.
|
||||
log "check: libgcc.o setjmp uses (dp),Y opcode 0x91 not absolute,Y 0x99 (#91)"
|
||||
# llvm-objdump can't decode 0x91 (the disasm calls it <unknown>) so
|
||||
# we check the raw byte stream of libgcc.o instead. The setjmp
|
||||
# body must contain the byte pair 91 e0 (= STA ($E0),Y).
|
||||
if ! xxd -p -c 99999 "$oLibgccFile" | tr -d '\n' | grep -q '91e0'; then
|
||||
die "libgcc.o missing raw 91 e0 byte pair (STA (\$E0),Y) — #91 regressed"
|
||||
fi
|
||||
# And it must NOT contain `99 e0 00` (= STA \$00E0,Y absolute,Y) in
|
||||
# libgcc — that's the misencoding the .byte hack works around.
|
||||
if xxd -p -c 99999 "$oLibgccFile" | tr -d '\n' | grep -q '99e000'; then
|
||||
die "libgcc.o contains 99 e0 00 (STA \$00E0,Y absolute) — assembler reverted to bad mnemonic (#91)"
|
||||
fi
|
||||
|
||||
# Static constructors: linker collects .init_array sections and
|
||||
# emits __init_array_start / __init_array_end synthetic symbols.
|
||||
# crt0 walks them via __jsl_indir. This check verifies the
|
||||
|
|
@ -1417,8 +1437,7 @@ EOF
|
|||
-c "$PROJECT_ROOT/runtime/src/qsort.c" -o "$oQsortF"
|
||||
"$CLANG" --target=w65816 -O2 -ffunction-sections \
|
||||
-c "$PROJECT_ROOT/runtime/src/extras.c" -o "$oExtrasF"
|
||||
# strtok.c needs -O0 (#84 mitigation).
|
||||
"$CLANG" --target=w65816 -O0 -ffunction-sections \
|
||||
"$CLANG" --target=w65816 -O2 -ffunction-sections \
|
||||
-c "$PROJECT_ROOT/runtime/src/strtok.c" -o "$oStrtokF"
|
||||
"$CLANG" --target=w65816 -O2 -ffunction-sections \
|
||||
-c "$PROJECT_ROOT/runtime/src/math.c" -o "$oMathF"
|
||||
|
|
@ -2678,6 +2697,87 @@ EOF
|
|||
fi
|
||||
rm -f "$cBfFile" "$oBfFile" "$binBfFile"
|
||||
|
||||
log "check: MAME runs recursive-descent expression parser '(3+4)*5' (#92)"
|
||||
cExprFile="$(mktemp --suffix=.c)"
|
||||
oExprFile="$(mktemp --suffix=.o)"
|
||||
binExprFile="$(mktemp --suffix=.bin)"
|
||||
cat > "$cExprFile" <<'EOF'
|
||||
__attribute__((noinline)) void switchToBank2(void) {
|
||||
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
||||
}
|
||||
static const char *g_p;
|
||||
static long parseExpr(void);
|
||||
static long parseFactor(void) {
|
||||
while (*g_p == ' ') g_p++;
|
||||
if (*g_p == '(') {
|
||||
g_p++;
|
||||
long v = parseExpr();
|
||||
while (*g_p == ' ') g_p++;
|
||||
if (*g_p == ')') g_p++;
|
||||
return v;
|
||||
}
|
||||
long n = 0;
|
||||
while (*g_p >= '0' && *g_p <= '9') { n = n*10 + (*g_p-'0'); g_p++; }
|
||||
return n;
|
||||
}
|
||||
static long parseTerm(void) {
|
||||
long v = parseFactor();
|
||||
for (;;) {
|
||||
while (*g_p == ' ') g_p++;
|
||||
char c = *g_p;
|
||||
if (c != '*' && c != '/') return v;
|
||||
g_p++;
|
||||
long r = parseFactor();
|
||||
if (c == '*') v = v * r; else v = (r != 0) ? v / r : 0;
|
||||
}
|
||||
}
|
||||
static long parseExpr(void) {
|
||||
long v = parseTerm();
|
||||
for (;;) {
|
||||
while (*g_p == ' ') g_p++;
|
||||
char c = *g_p;
|
||||
if (c != '+' && c != '-') return v;
|
||||
g_p++;
|
||||
long r = parseTerm();
|
||||
if (c == '+') v = v + r; else v = v - r;
|
||||
}
|
||||
}
|
||||
__attribute__((noinline,optnone))
|
||||
static long evalExpr(const char *s) { g_p = s; return parseExpr(); }
|
||||
static long g_r1, g_r2, g_r3, g_r4, g_r5;
|
||||
__attribute__((noinline,optnone))
|
||||
static void runAll(void) {
|
||||
g_r1 = evalExpr("3 + 4");
|
||||
g_r2 = evalExpr("2 * 3 + 4");
|
||||
g_r3 = evalExpr("2 + 3 * 4");
|
||||
g_r4 = evalExpr("(3 + 4) * 5");
|
||||
g_r5 = evalExpr("100 / 4 - 5 * 2 + 1");
|
||||
}
|
||||
int main(void) {
|
||||
runAll();
|
||||
unsigned short ok = 0;
|
||||
if (g_r1 == 7) ok |= 0x01;
|
||||
if (g_r2 == 10) ok |= 0x02;
|
||||
if (g_r3 == 14) ok |= 0x04;
|
||||
if (g_r4 == 35) ok |= 0x08;
|
||||
if (g_r5 == 16) ok |= 0x10;
|
||||
switchToBank2();
|
||||
*(volatile unsigned short *)0x5000 = ok;
|
||||
while (1) {}
|
||||
}
|
||||
EOF
|
||||
"$CLANG" --target=w65816 -O2 -ffunction-sections -c \
|
||||
"$cExprFile" -o "$oExprFile"
|
||||
"$PROJECT_ROOT/tools/link816" -o "$binExprFile" --text-base 0x1000 \
|
||||
"$oCrt0F" "$oLibcF" "$oStrtolF" "$oSnprintfF" "$oQsortF" \
|
||||
"$oExtrasF" "$oStrtokF" "$oMathF" "$oSfF" "$oSdF" \
|
||||
"$oLibgccFile" "$oExprFile" >/dev/null 2>&1
|
||||
if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" "$binExprFile" --check \
|
||||
0x025000=001f >/dev/null 2>&1; then
|
||||
die "MAME: expression parser bitmap != 0x1f (#92)"
|
||||
fi
|
||||
rm -f "$cExprFile" "$oExprFile" "$binExprFile"
|
||||
|
||||
log "check: MAME runs sqrt/pow + sin/cos/exp/log + strpbrk/spn/cspn (#81 + #82 + #83)"
|
||||
cTrFile="$(mktemp --suffix=.c)"
|
||||
oTrFile="$(mktemp --suffix=.o)"
|
||||
|
|
|
|||
|
|
@ -399,9 +399,85 @@ bool W65816AsmParser::parseInstruction(ParseInstructionInfo &Info,
|
|||
return false;
|
||||
}
|
||||
|
||||
// Indirect addressing modes: `(addr)`, `(addr), y`, `(addr, x)`,
|
||||
// `(addr, s), y`. We must intercept the leading `(` here — if it
|
||||
// were left to parseExpression it would be treated as expression
|
||||
// grouping and the parens would be silently eaten, producing an
|
||||
// absolute-mode opcode (caught silently with setjmp/longjmp #88).
|
||||
if (getLexer().is(AsmToken::LParen)) {
|
||||
SMLoc LParenLoc = getLexer().getTok().getLoc();
|
||||
Operands.push_back(W65816Operand::createToken("(", LParenLoc));
|
||||
Parser.Lex(); // eat '('
|
||||
|
||||
// Parse the inner address expression.
|
||||
SMLoc AddrStart = getLexer().getLoc();
|
||||
const MCExpr *Val;
|
||||
if (Parser.parseExpression(Val))
|
||||
return Error(LParenLoc, "expected address expression after '('");
|
||||
SMLoc AddrEnd = getLexer().getLoc();
|
||||
Operands.push_back(W65816Operand::createAddr(Val, AddrStart, AddrEnd));
|
||||
|
||||
// Now we expect either:
|
||||
// `, x )` — DP indexed indirect e.g. lda (dp, x)
|
||||
// `, s ) , y` — stack-rel indirect Y e.g. lda (4, s), y
|
||||
// `)` then optional `, y` — DP indirect / DP indirect Y
|
||||
if (getLexer().is(AsmToken::Comma)) {
|
||||
Parser.Lex(); // eat comma inside parens
|
||||
if (!getLexer().is(AsmToken::Identifier))
|
||||
return Error(getLexer().getLoc(), "expected 'x' or 's' after ','");
|
||||
StringRef Ident = getLexer().getTok().getIdentifier();
|
||||
SMLoc IdentLoc = getLexer().getTok().getLoc();
|
||||
bool isX = Ident.equals_insensitive("x");
|
||||
bool isS = Ident.equals_insensitive("s");
|
||||
if (!isX && !isS)
|
||||
return Error(IdentLoc, "expected 'x' or 's'");
|
||||
Parser.Lex(); // eat x / s
|
||||
if (!getLexer().is(AsmToken::RParen))
|
||||
return Error(getLexer().getLoc(), "expected ')'");
|
||||
Parser.Lex(); // eat ')'
|
||||
// The matcher expects a SINGLE combined token for the inside-paren
|
||||
// index — `x)` for DP-indexed-X and `s)` for stack-rel-indirect-Y
|
||||
// (per the asm strings `($addr , x)` and `($off, s), y` which
|
||||
// tablegen tokenizes with `x)` / `s)` as one MCK token because
|
||||
// there's no whitespace between them). Pushing `x` + `)` as
|
||||
// separate operands fails matching with "invalid operand".
|
||||
Operands.push_back(W65816Operand::createToken(isX ? "x)" : "s)",
|
||||
IdentLoc));
|
||||
} else {
|
||||
// Plain `(addr)` — close the parens and fall through to the
|
||||
// optional `, y` handling below.
|
||||
if (!getLexer().is(AsmToken::RParen))
|
||||
return Error(getLexer().getLoc(), "expected ')'");
|
||||
SMLoc RParenLoc = getLexer().getTok().getLoc();
|
||||
Operands.push_back(W65816Operand::createToken(")", RParenLoc));
|
||||
Parser.Lex(); // eat ')'
|
||||
}
|
||||
// Fall through to the trailing-suffix loop (handles `, y` after
|
||||
// the closing paren).
|
||||
} else if (getLexer().is(AsmToken::LBrac)) {
|
||||
// Long-indirect addressing: `[dp]` or `[dp], y`.
|
||||
SMLoc LBracLoc = getLexer().getTok().getLoc();
|
||||
Operands.push_back(W65816Operand::createToken("[", LBracLoc));
|
||||
Parser.Lex(); // eat '['
|
||||
|
||||
SMLoc AddrStart = getLexer().getLoc();
|
||||
const MCExpr *Val;
|
||||
if (Parser.parseExpression(Val))
|
||||
return Error(LBracLoc, "expected address expression after '['");
|
||||
SMLoc AddrEnd = getLexer().getLoc();
|
||||
Operands.push_back(W65816Operand::createAddr(Val, AddrStart, AddrEnd));
|
||||
|
||||
if (!getLexer().is(AsmToken::RBrac))
|
||||
return Error(getLexer().getLoc(), "expected ']'");
|
||||
SMLoc RBracLoc = getLexer().getTok().getLoc();
|
||||
Operands.push_back(W65816Operand::createToken("]", RBracLoc));
|
||||
Parser.Lex(); // eat ']'
|
||||
// Trailing-suffix loop will pick up `, y` if present.
|
||||
} else {
|
||||
// Parse the first real operand (immediate or address expression).
|
||||
if (parseOperand(Operands))
|
||||
return true;
|
||||
}
|
||||
|
||||
// Handle trailing ", x" / ", y" / ", <expr>" (block-move second operand).
|
||||
while (getLexer().is(AsmToken::Comma)) {
|
||||
|
|
|
|||
|
|
@ -246,6 +246,82 @@ class InstDPY<bits<8> op, string mnem>
|
|||
let Inst{15-8} = addr;
|
||||
}
|
||||
|
||||
// DP indirect addressing modes (parens around the DP byte mean
|
||||
// "the 16-bit pointer at $00:dp+0..1, used to form the effective
|
||||
// address"). Without these, the asm parser falls through to the
|
||||
// absolute opcode and silently corrupts memory at $00:dp.
|
||||
//
|
||||
// `(dp)` reads/writes through the pointer at DP+offset. 0x92/0xb2
|
||||
// `(dp), y` same, then adds Y to form the data address. 0x91/0xb1
|
||||
// `(dp, x)` adds X to dp first, then dereferences. 0x81/0xa1
|
||||
class InstDPInd<bits<8> op, string mnem>
|
||||
: W65816Inst<(outs), (ins addrDP:$addr),
|
||||
!strconcat(mnem, "\t($addr )")> {
|
||||
let Size = 2;
|
||||
bits<8> addr;
|
||||
bits<16> Inst;
|
||||
let Inst{7-0} = op;
|
||||
let Inst{15-8} = addr;
|
||||
}
|
||||
|
||||
class InstDPIndY<bits<8> op, string mnem>
|
||||
: W65816Inst<(outs), (ins addrDP:$addr),
|
||||
!strconcat(mnem, "\t($addr ), y")> {
|
||||
let Size = 2;
|
||||
bits<8> addr;
|
||||
bits<16> Inst;
|
||||
let Inst{7-0} = op;
|
||||
let Inst{15-8} = addr;
|
||||
}
|
||||
|
||||
class InstDPIndX<bits<8> op, string mnem>
|
||||
: W65816Inst<(outs), (ins addrDP:$addr),
|
||||
!strconcat(mnem, "\t($addr , x)")> {
|
||||
let Size = 2;
|
||||
bits<8> addr;
|
||||
bits<16> Inst;
|
||||
let Inst{7-0} = op;
|
||||
let Inst{15-8} = addr;
|
||||
}
|
||||
|
||||
// Absolute indirect: `JMP (addr)` (opcode 0x6C). Reads a 16-bit
|
||||
// pointer from absolute address `addr` and jumps there (bank-local).
|
||||
// Used by the indirect-call trampoline in crt0/libgcc to dispatch
|
||||
// through a function-pointer slot.
|
||||
class InstAbsInd<bits<8> op, string mnem>
|
||||
: W65816Inst<(outs), (ins addrAbs:$addr),
|
||||
!strconcat(mnem, "\t($addr )")> {
|
||||
let Size = 3;
|
||||
bits<16> addr;
|
||||
bits<24> Inst;
|
||||
let Inst{7-0} = op;
|
||||
let Inst{23-8} = addr;
|
||||
}
|
||||
|
||||
// DP indirect long: `[dp]` reads a 24-bit pointer at DP+offset and
|
||||
// uses it to form the data address (bank-crossing). `[dp], y` adds
|
||||
// Y to that 24-bit pointer. Only available with square-bracket
|
||||
// syntax — round parens are reserved for 16-bit indirection.
|
||||
class InstDPIndLong<bits<8> op, string mnem>
|
||||
: W65816Inst<(outs), (ins addrDP:$addr),
|
||||
!strconcat(mnem, "\t[$addr ]")> {
|
||||
let Size = 2;
|
||||
bits<8> addr;
|
||||
bits<16> Inst;
|
||||
let Inst{7-0} = op;
|
||||
let Inst{15-8} = addr;
|
||||
}
|
||||
|
||||
class InstDPIndLongY<bits<8> op, string mnem>
|
||||
: W65816Inst<(outs), (ins addrDP:$addr),
|
||||
!strconcat(mnem, "\t[$addr ], y")> {
|
||||
let Size = 2;
|
||||
bits<8> addr;
|
||||
bits<16> Inst;
|
||||
let Inst{7-0} = op;
|
||||
let Inst{15-8} = addr;
|
||||
}
|
||||
|
||||
// Stack-relative addressing. Operand is an unsigned 8-bit offset added
|
||||
// to S; reads / writes the byte (or word, in 16-bit M mode) at that
|
||||
// stack address. Shares the addrDP operand class for parsing.
|
||||
|
|
@ -262,8 +338,6 @@ class InstStackRel<bits<8> op, string mnem>
|
|||
// pointer stored at S+off, adds Y, then loads from that address. Used
|
||||
// to dereference pointers spilled to a stack scratch slot — the only
|
||||
// way the 65816 can deref a pointer not already in zero page.
|
||||
// isCodeGenOnly because the asm-parser doesn't accept `(d,S),Y` syntax
|
||||
// today; codegen builds these MIs directly.
|
||||
class InstStackRelIndY<bits<8> op, string mnem>
|
||||
: W65816Inst<(outs), (ins addrDP:$off),
|
||||
!strconcat(mnem, "\t($off, s), y")> {
|
||||
|
|
@ -272,7 +346,6 @@ class InstStackRelIndY<bits<8> op, string mnem>
|
|||
bits<16> Inst;
|
||||
let Inst{7-0} = op;
|
||||
let Inst{15-8} = off;
|
||||
let isCodeGenOnly = 1;
|
||||
}
|
||||
|
||||
class InstPCRel8<bits<8> op, string mnem>
|
||||
|
|
|
|||
|
|
@ -1086,6 +1086,11 @@ def LDA_Long : InstAbsLong<0xAF, "lda">;
|
|||
def LDA_DPX : InstDPX<0xB5, "lda">;
|
||||
def LDA_AbsX : InstAbsX<0xBD, "lda">;
|
||||
def LDA_AbsY : InstAbsY<0xB9, "lda">;
|
||||
def LDA_DPInd : InstDPInd <0xB2, "lda">;
|
||||
def LDA_DPIndY : InstDPIndY<0xB1, "lda">;
|
||||
def LDA_DPIndX : InstDPIndX<0xA1, "lda">;
|
||||
def LDA_DPIndLong : InstDPIndLong <0xA7, "lda">;
|
||||
def LDA_DPIndLongY : InstDPIndLongY<0xB7, "lda">;
|
||||
|
||||
//---------------------------------------------------------------- STA (store A)
|
||||
def STA_DP : InstDP<0x85, "sta">;
|
||||
|
|
@ -1094,6 +1099,11 @@ def STA_Long : InstAbsLong<0x8F, "sta">;
|
|||
def STA_DPX : InstDPX<0x95, "sta">;
|
||||
def STA_AbsX : InstAbsX<0x9D, "sta">;
|
||||
def STA_AbsY : InstAbsY<0x99, "sta">;
|
||||
def STA_DPInd : InstDPInd <0x92, "sta">;
|
||||
def STA_DPIndY : InstDPIndY<0x91, "sta">;
|
||||
def STA_DPIndX : InstDPIndX<0x81, "sta">;
|
||||
def STA_DPIndLong : InstDPIndLong <0x87, "sta">;
|
||||
def STA_DPIndLongY : InstDPIndLongY<0x97, "sta">;
|
||||
|
||||
//---------------------------------------------------------------- LDX (load X)
|
||||
def LDX_Imm8 : InstImm8<0xA2, "ldx"> { let XHigh = 1; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; let Defs = [X]; }
|
||||
|
|
@ -1262,6 +1272,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, mayLoad = 0, mayStore = 0 in
|
|||
def BRA : InstPCRel8<0x80, "bra">;
|
||||
def BRL : InstPCRel16<0x82, "brl">;
|
||||
def JMP_Abs : InstAbs<0x4C, "jmp">;
|
||||
def JMP_AbsInd : InstAbsInd<0x6C, "jmp">;
|
||||
def JML_Long : InstAbsLong<0x5C, "jml">;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -318,7 +318,7 @@ bool W65816StackSlotCleanup::runOnMachineFunction(MachineFunction &MF) {
|
|||
// Walk further to find the indirect use (LDAfi_indY / STAfi_indY)
|
||||
// referencing slotB. Bail on STA to slotA before then.
|
||||
auto It3 = std::next(Sta2.getIterator());
|
||||
bool Rewrote = false;
|
||||
MachineInstr *IndYTarget = nullptr;
|
||||
while (It3 != MBB.end()) {
|
||||
MachineInstr &MI = *It3;
|
||||
if (MI.isDebugInstr()) { ++It3; continue; }
|
||||
|
|
@ -333,27 +333,49 @@ bool W65816StackSlotCleanup::runOnMachineFunction(MachineFunction &MF) {
|
|||
}
|
||||
// Indirect-Y operand: operand 1 (load) or 1 (store) holds
|
||||
// the FI pointer slot. Match LDAfi_indY/STAfi_indY using
|
||||
// slotB and rewrite to slotA.
|
||||
// slotB.
|
||||
if (MI.getOpcode() == W65816::LDAfi_indY ||
|
||||
MI.getOpcode() == W65816::STAfi_indY) {
|
||||
// Operand layout: LDAfi_indY (outs Acc16:$dst) (ins memfi:$p);
|
||||
// STAfi_indY (outs) (ins Acc16:$src, memfi:$p). memfi is
|
||||
// (FI, imm-offset). Find the FI operand.
|
||||
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
||||
if (MI.getOperand(i).isFI() &&
|
||||
MI.getOperand(i).getIndex() == SlotB) {
|
||||
MI.getOperand(i).setIndex(SlotA);
|
||||
Rewrote = true;
|
||||
IndYTarget = &MI;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Stop after first indirect-Y rewrite — Lda2/Sta2 elimination
|
||||
// still needs Pass 2 (dead store).
|
||||
if (Rewrote) break;
|
||||
if (IndYTarget) break;
|
||||
}
|
||||
++It3;
|
||||
}
|
||||
if (Rewrote) {
|
||||
if (!IndYTarget) continue;
|
||||
// Function-wide safety check: slotB must have NO other refs
|
||||
// besides Sta2 (which we erase) and IndYTarget (which we
|
||||
// rewrite). Cross-MBB reads of slotB would otherwise see
|
||||
// stale / uninitialized values after Sta2 is gone. Same fix
|
||||
// class as Pass -4c (#107).
|
||||
bool OtherUse = false;
|
||||
for (MachineBasicBlock &MBBO : MF) {
|
||||
for (MachineInstr &MIO : MBBO) {
|
||||
if (&MIO == &Sta2 || &MIO == IndYTarget) continue;
|
||||
for (const MachineOperand &MO : MIO.operands()) {
|
||||
if (MO.isFI() && MO.getIndex() == SlotB) {
|
||||
OtherUse = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (OtherUse) break;
|
||||
}
|
||||
if (OtherUse) break;
|
||||
}
|
||||
if (OtherUse) continue;
|
||||
// Apply rewrite: IndYTarget's slotB → slotA.
|
||||
for (unsigned i = 0; i < IndYTarget->getNumOperands(); ++i) {
|
||||
if (IndYTarget->getOperand(i).isFI() &&
|
||||
IndYTarget->getOperand(i).getIndex() == SlotB) {
|
||||
IndYTarget->getOperand(i).setIndex(SlotA);
|
||||
break;
|
||||
}
|
||||
}
|
||||
// Mark Lda2 as erased so the outer worklist iteration skips
|
||||
// it (it's an LDAfi and was added to Ldas). Sta2 isn't in
|
||||
// any worklist so erasing it directly is safe.
|
||||
|
|
@ -363,7 +385,6 @@ bool W65816StackSlotCleanup::runOnMachineFunction(MachineFunction &MF) {
|
|||
Changed = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Pass -4b: redundant pair of consecutive STAfi. Pattern:
|
||||
//
|
||||
|
|
@ -544,6 +565,27 @@ bool W65816StackSlotCleanup::runOnMachineFunction(MachineFunction &MF) {
|
|||
if (LaterUse) break;
|
||||
}
|
||||
if (LaterUse) continue;
|
||||
// Function-wide safety check: slotB must have NO references in
|
||||
// any OTHER basic block. We're erasing the only init store
|
||||
// (Sta); a cross-MBB reload of slotB would expect that init.
|
||||
// Caught by strtok_r (#107) where greedy spilled an in-place
|
||||
// INA into a slot that bb.3 also initialized; killing the
|
||||
// bb.3 init left bb.10 reading garbage.
|
||||
bool CrossMBBUse = false;
|
||||
for (MachineBasicBlock &MBBO : MF) {
|
||||
if (&MBBO == &MBB) continue;
|
||||
for (MachineInstr &MIO : MBBO) {
|
||||
for (const MachineOperand &MO : MIO.operands()) {
|
||||
if (MO.isFI() && MO.getIndex() == SlotB) {
|
||||
CrossMBBUse = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (CrossMBBUse) break;
|
||||
}
|
||||
if (CrossMBBUse) break;
|
||||
}
|
||||
if (CrossMBBUse) continue;
|
||||
// Apply rewrites: redirect every IndY use of slotB → slotC.
|
||||
for (MachineInstr *IndY : IndYUses) {
|
||||
for (unsigned i = 0; i < IndY->getNumOperands(); ++i) {
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue