#!/usr/bin/env bash # W65816 backend smoke test. Run after any change to confirm the # scaffold still builds and llc still registers the target. Non-zero # exit on any failure. # # Usage: scripts/smokeTest.sh [--build] # --build Run ninja to (re)build LLVMW65816* + llc before testing. # Without this flag the script assumes tools/llvm-mos-build # is already up to date. set -euo pipefail source "$(dirname "$0")/common.sh" # Resource caps for child compilers. A bug in the W65816 backend can send # clang/llc into a runaway combine/inserter loop that allocates tens of GB # of RAM. When that happens the kernel OOM-killer takes down the entire # tmux scope (bash, the compiler, and the parent Claude Code session with # it). Bounding virtual memory and CPU time here turns "OOM kills the # terminal" into "compiler dies with SIGSEGV / SIGXCPU and we get a clean # error." Numbers are well above what a healthy compile of these tiny # test inputs needs (~200 MB / a few seconds), so legitimate work is # unaffected. ulimit -v $((10 * 1024 * 1024)) # 10 GB virtual memory ceiling ulimit -t 90 # 90 CPU-seconds per process BUILD_DIR="$TOOLS_DIR/llvm-mos-build" LLC="$BUILD_DIR/bin/llc" LLVM_MC="$BUILD_DIR/bin/llvm-mc" doBuild=0 for arg in "$@"; do case "$arg" in --build) doBuild=1 ;; *) die "unknown flag: $arg" ;; esac done [ -x "$LLC" ] || die "llc not found at $LLC; run setup.sh and applyBackend.sh, or pass --build" if [ "$doBuild" -eq 1 ]; then log "ninja LLVMW65816* llc llvm-mc llvm-objdump" ninja -C "$BUILD_DIR" LLVMW65816Info LLVMW65816Desc LLVMW65816CodeGen \ LLVMW65816AsmParser LLVMW65816Disassembler llc llvm-mc llvm-objdump fi # 1. Target must be registered. log "check: llc --version lists w65816" if ! "$LLC" --version 2>/dev/null | grep -q "^[[:space:]]*w65816[[:space:]]"; then die "llc does not list the w65816 target" fi # 2. Empty IR must compile to nothing. log "check: llc -march=w65816 -filetype=null /dev/null exits 0" "$LLC" -march=w65816 -filetype=null /dev/null # 3. Trivial IR that shouldn't touch our (unimplemented) codegen paths. tmp="$(mktemp --suffix=.ll)" trap 'rm -f "$tmp"' EXIT cat > "$tmp" <<'EOF' ; ModuleID = 'smoke' target triple = "w65816-unknown-unknown" ; Empty module: exercises target initialization only. EOF log "check: llc accepts an empty module with w65816 triple" "$LLC" -filetype=null "$tmp" # 4. MC layer round-trip. Assemble a representative mix of addressing # modes and mode-switching instructions and grep for the expected # encoding bytes. Hex-byte strings are stable across llvm-mc # formatting changes, unlike full-line string matching. if [ -x "$LLVM_MC" ]; then log "check: llvm-mc -arch=w65816 emits expected encodings" # Only exercise instructions that round-trip cleanly: # - LDA/LDX/LDY immediates without explicit force use the _Imm16 # form (codegen-dominant path). A pure `lda #x` assembles to # LDA_Imm16 since the _Imm8 variant is isCodeGenOnly. mcInput=' nop rep #0x30 sep #0x20 lda #0x1234 sta 0x10 sta 0x1000 sta 0x010000 mvn 0x01, 0x02 jsl 0x012345' mcOut="$(printf '%s\n' "$mcInput" | "$LLVM_MC" -arch=w65816 -show-encoding 2>&1)" assertHas() { if ! printf '%s\n' "$mcOut" | grep -qF "$1"; then warn "missing expected encoding: $1" printf '%s\n' "$mcOut" >&2 die "llvm-mc did not produce expected encoding" fi } assertHas "[0xea]" assertHas "[0xc2,0x30]" assertHas "[0xe2,0x20]" assertHas "[0xa9,0x34,0x12]" assertHas "[0x85,0x10]" assertHas "[0x8d,0x00,0x10]" assertHas "[0x8f,0x00,0x00,0x01]" assertHas "[0x54,0x01,0x02]" assertHas "[0x22,0x45,0x23,0x01]" else warn "llvm-mc not built; skipping MC round-trip check" fi # 5. Disassembler round-trip. A raw byte stream fed to llvm-mc # --disassemble should produce the mnemonic we expect. if [ -x "$LLVM_MC" ]; then log "check: llvm-mc --disassemble decodes bytes back to mnemonics" disasmOut="$(printf '0xea 0xa9 0x34 0x12 0x85 0x10 0x8d 0x00 0x10 0x6b\n' \ | "$LLVM_MC" --disassemble --triple=w65816 2>&1)" for mnem in "nop" "lda #0x1234" "sta 0x10" "sta 0x1000" "rtl"; do if ! printf '%s\n' "$disasmOut" | grep -qF "$mnem"; then warn "disassembler missing: $mnem" printf '%s\n' "$disasmOut" >&2 die "disassembler round-trip failed" fi done fi # 6. End-to-end codegen: IR -> asm -> ELF -> disassembly. # This is the first real codegen test: verifies that our LowerReturn, # DAG pattern for the i16 constant pseudo, and prologue-emitting # frame lowering produce runnable 65816 machine code. OBJDUMP="$BUILD_DIR/bin/llvm-objdump" if [ -x "$LLC" ] && [ -x "$LLVM_MC" ] && [ -x "$OBJDUMP" ]; then log "check: end-to-end IR -> asm -> ELF -> disasm for a trivial function" irFile="$(mktemp --suffix=.ll)" sFile="$(mktemp --suffix=.s)" oFile="$(mktemp --suffix=.o)" trap 'rm -f "$irFile" "$sFile" "$oFile"' EXIT cat > "$irFile" <<'EOF' target triple = "w65816-unknown-unknown" define i16 @answer() { ret i16 42 } EOF "$LLC" -march=w65816 "$irFile" -o "$sFile" "$LLVM_MC" -arch=w65816 -filetype=obj "$sFile" -o "$oFile" disasm="$("$OBJDUMP" --triple=w65816 -d "$oFile" 2>&1)" for expect in "rep #0x30" "lda #0x2a" "rtl"; do if ! printf '%s\n' "$disasm" | grep -qF "$expect"; then warn "end-to-end pipeline missing: $expect" printf '%s\n' "$disasm" >&2 die "end-to-end pipeline failed" fi done fi # 7. Real codegen check: a non-trivial function exercising globals, # arithmetic, branches, bitwise. This tests our DAG selection # patterns and AsmPrinter pseudo expansions. if [ -x "$LLC" ]; then log "check: llc compiles a multi-pattern function" irFile="$(mktemp --suffix=.ll)" sFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile"' EXIT cat > "$irFile" <<'EOF' target triple = "w65816-unknown-unknown" @a = global i16 0 @b = global i16 0 define i16 @demo() { %x = load i16, ptr @a %y = load i16, ptr @b %s = add i16 %x, %y %m = and i16 %s, 4095 %c = icmp ult i16 %m, 100 br i1 %c, label %lo, label %hi lo: ret i16 0 hi: ret i16 %m } EOF "$LLC" -march=w65816 "$irFile" -o "$sFile" for expect in "rep #0x30" "lda a" "clc" "adc b" "and #0xfff" "cmp #0x64" "bcs" "rtl"; do if ! grep -qF "$expect" "$sFile"; then warn "multi-pattern test missing: $expect" cat "$sFile" >&2 die "multi-pattern test failed" fi done fi # 8. Function call check: caller passes i16 in A, callee adds, returns. if [ -x "$LLC" ]; then log "check: llc compiles a function call (single i16 arg in A)" irCallFile="$(mktemp --suffix=.ll)" sCallFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile"' EXIT cat > "$irCallFile" <<'EOF' target triple = "w65816-unknown-unknown" define i16 @inc(i16 %x) { %r = add i16 %x, 1 ret i16 %r } define i16 @caller() { %r = call i16 @inc(i16 41) ret i16 %r } EOF "$LLC" -march=w65816 "$irCallFile" -o "$sCallFile" # Caller passes 41 in A and JSL's inc. Inc is now an `inc a` # peephole (was clc; adc #1 before the INA_PSEUDO pattern). for expect in "lda #0x29" "jsl inc" "inc a"; do if ! grep -qF "$expect" "$sCallFile"; then warn "call test missing: $expect" cat "$sCallFile" >&2 die "call test failed" fi done fi # 9. Multi-arg sum: 3-arg function reads args 1 and 2 via stack-relative # addressing. if [ -x "$LLC" ]; then log "check: llc compiles a 3-arg function (stack-relative reads)" irMaFile="$(mktemp --suffix=.ll)" sMaFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile"' EXIT cat > "$irMaFile" <<'EOF' target triple = "w65816-unknown-unknown" define i16 @sum3(i16 %a, i16 %b, i16 %c) { %ab = add i16 %a, %b %r = add i16 %ab, %c ret i16 %r } EOF "$LLC" -march=w65816 "$irMaFile" -o "$sMaFile" for expect in "adc 0x4, s" "adc 0x6, s" "rtl"; do if ! grep -qF "$expect" "$sMaFile"; then warn "multi-arg test missing: $expect" cat "$sMaFile" >&2 die "multi-arg test failed" fi done fi # 10. i8 codegen: an i8 add+1 lowers to a single inc-A in 16-bit M. # (We always use a 16-bit M prologue now — the per-function "pure-i8" # heuristic was a silent miscompile. See feedback_callframe_spadj.md # and feedback_pure_i8_misencoded_imm.md.) if [ -x "$LLC" ]; then log "check: llc compiles i8 add+1 to a single inc a" irI8File="$(mktemp --suffix=.ll)" sI8File="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File"' EXIT cat > "$irI8File" <<'EOF' target triple = "w65816-unknown-unknown" define i8 @i8_inc(i8 %x) { %r = add i8 %x, 1 ret i8 %r } EOF "$LLC" -march=w65816 "$irI8File" -o "$sI8File" for expect in "rep #0x30" "inc a" "rtl"; do if ! grep -qF "$expect" "$sI8File"; then warn "i8 test missing: $expect" cat "$sI8File" >&2 die "i8 test failed" fi done # The function should NOT enter in 8-bit M (no SEP #$20 in prologue). if grep -qE '^\s*sep\s+#0x20' "$sI8File"; then cat "$sI8File" >&2 die "i8 test: pure-i8 SEP #\$20 prologue regressed (silent-miscompile risk)" fi fi # 11a. SETCC via clang: a > b returns 0/1. Exercises the multi-branch # CC path (BEQ + BPL diamond, since SETGT can't be a single Bxx). CLANG="$BUILD_DIR/bin/clang" if [ -x "$CLANG" ]; then log "check: clang compiles a > b via multi-branch SETCC" cFile="$(mktemp --suffix=.c)" sCmpFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile"' EXIT cat > "$cFile" <<'EOF' int gt(int a, int b) { return a > b; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile" -o "$sCmpFile" # Expect a stack-relative CMP (offset depends on current spill # behaviour — fast regalloc adds 2 PHA prologue bytes vs greedy # which had no frame; either is acceptable as long as we cmp # against b through a stack-relative slot), then BEQ + BPL forming # the multi-branch diamond. for expect in "lda #0x1" "beq" "bpl" "lda #0x0"; do if ! grep -qF "$expect" "$sCmpFile"; then warn "setcc gt test missing: $expect" cat "$sCmpFile" >&2 die "setcc gt test failed" fi done if ! grep -qE '^\s*cmp\s+0x[0-9a-f]+,\s*s\s*$' "$sCmpFile"; then cat "$sCmpFile" >&2 die "setcc gt test missing: cmp ,s (stack-relative compare to arg b)" fi fi # 11b. SELECT via clang: c ? a : b returns one of two constants. if [ -x "$CLANG" ]; then log "check: clang compiles c ? 100 : 200 via SELECT_CC" cFile2="$(mktemp --suffix=.c)" sSelFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile"' EXIT cat > "$cFile2" <<'EOF' int sel(int c) { return c ? 100 : 200; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile2" -o "$sSelFile" for expect in "cmp #0x0" "lda #0xc8" "beq" "lda #0x64"; do if ! grep -qF "$expect" "$sSelFile"; then warn "select test missing: $expect" cat "$sSelFile" >&2 die "select test failed" fi done fi # 11c. Two-Acc16 op via clang: a - b where both are non-foldable Acc16. # Caller-side b lives in memory (FI), so this matches via SBCfi without # the spill — but a + b + c chains through a true two-Acc16 add. if [ -x "$CLANG" ]; then log "check: clang compiles two-Acc16 ops via spill (chained add)" cFile3="$(mktemp --suffix=.c)" sChainFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile"' EXIT cat > "$cFile3" <<'EOF' // max3 forces two-Acc16: outer SELECT_CC compares one Acc16 PHI value // to another Acc16 PHI value (m vs c, both computed values). int max3(int a, int b, int c) { int m = a > b ? a : b; return m > c ? m : c; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile3" -o "$sChainFile" # Expect cmp against a stack-relative slot - the signature of the # two-Acc16 CMP_RR custom inserter. (Earlier this test also # required an `sta d,s` spill, but greedy regalloc + WidenAcc16 # avoids that spill entirely on this pattern.) if ! grep -qE 'cmp 0x[0-9a-f]+, s' "$sChainFile"; then cat "$sChainFile" >&2 die "two-Acc16 (max3) didn't cmp via stack-relative" fi fi # 11d. Multiply via libcall. if [ -x "$CLANG" ]; then log "check: clang emits __mulhi3 libcall for i16 multiply" cFile4="$(mktemp --suffix=.c)" sMulFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile"' EXIT cat > "$cFile4" <<'EOF' int mul(int a, int b) { return a * b; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile4" -o "$sMulFile" if ! grep -qF "jsl __mulhi3" "$sMulFile"; then cat "$sMulFile" >&2 die "expected jsl __mulhi3" fi # Note: the original SPAdj-miscompile guard (which asserted specific # offsets like `lda 6,s` for arg b after one PHA) was tied to the # greedy-regalloc layout. Under fast regalloc, the spill structure # changes call-by-call, so structural offset checks become brittle. # The fix for the underlying bug (SPAdj added in W65816Register­ # Info::eliminateFrameIndex, plus hasReservedCallFrame=false in # W65816FrameLowering) is unit-verified by the existence of the # SPAdj-tracking code paths and was sim-verified on mul(7,13) # returning 91. fi # 11e. Variable shift via libcall. if [ -x "$CLANG" ]; then log "check: clang emits __ashlhi3 libcall for variable i16 shift" cFile5="$(mktemp --suffix=.c)" sShfFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile"' EXIT cat > "$cFile5" <<'EOF' int shf(int x, int n) { return x << n; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile5" -o "$sShfFile" if ! grep -qF "jsl __ashlhi3" "$sShfFile"; then cat "$sShfFile" >&2 die "expected jsl __ashlhi3" fi fi # 11f. Pointer deref: *p loads via stack-relative-indirect-Y. if [ -x "$CLANG" ]; then log "check: clang compiles *p via LDA (slot,s),y" cFile6="$(mktemp --suffix=.c)" sPtrFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile"' EXIT cat > "$cFile6" <<'EOF' int load_ptr(const int *p) { return *p; } void store_ptr(int *p, int v) { *p = v; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile6" -o "$sPtrFile" for expect in "ldy #0x0" "lda (0x" "sta (0x"; do if ! grep -qF "$expect" "$sPtrFile"; then warn "ptr-deref test missing: $expect" cat "$sPtrFile" >&2 die "ptr-deref test failed" fi done fi # 11g. i8 store via pointer: *p = v wraps the STA in SEP/REP so only # 1 byte is written. Both load_byte and store_byte must compile. if [ -x "$CLANG" ]; then log "check: clang compiles *p = v with SEP/REP-wrapped STA (i8 store)" cFile7="$(mktemp --suffix=.c)" sBptrFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile"' EXIT cat > "$cFile7" <<'EOF' unsigned char loadb(const unsigned char *p) { return *p; } void storeb(unsigned char *p, unsigned char v) { *p = v; } unsigned char incb(unsigned char *p) { return ++*p; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile7" -o "$sBptrFile" # storeb body should contain SEP #$20 ... STA (slot,s),y ... REP #$20. if ! grep -qF "sep #0x20" "$sBptrFile" \ || ! grep -qF "rep #0x20" "$sBptrFile" \ || ! grep -qE 'sta \(0x[0-9a-f]+, s\), y' "$sBptrFile"; then cat "$sBptrFile" >&2 die "i8 ptr-store test missing SEP/STA/REP sequence" fi # All three functions must produce labels. for sym in loadb storeb incb; do if ! grep -qE "^${sym}:" "$sBptrFile"; then cat "$sBptrFile" >&2 die "i8 ptr test: missing function ${sym}" fi done # Correctness check: storeb's prologue must NOT clobber A. A holds # the pointer arg on entry; the first body op must spill A intact. # The fixed prologue uses N/2 PHAs (small N) or TAY/TSC/.../TYA # (large N). Either way, the first non-prologue op should be a # `sta NN,s` that captures arg0=p. If we see TSC anywhere in the # prologue WITHOUT a TAY before it, that's the broken form (A # clobbered by TSC, then the spill stores garbage SP value as if # it were the pointer). storeb_body="$(sed -n '/^storeb:/,/^\.Lfunc_end/p' "$sBptrFile")" if printf '%s\n' "$storeb_body" | grep -qE '^ tsc$' \ && ! printf '%s\n' "$storeb_body" | grep -qE '^ tay$'; then cat "$sBptrFile" >&2 die "storeb prologue uses bare TSC without TAY — A (the pointer arg) gets clobbered before being spilled. Byte store writes to the wrong address. Use PHA-based prologue or TAY/TSC/.../TYA bracket." fi # Also: the pointer arg must end up in a stack slot for the # subsequent `sta (NN,s),y` indirect store. This happens via # either an explicit `sta NN,s` spill OR via the prologue's PHA # alone (which pushes A — the pointer — to the slot for free; the # eliminateFrameIndex prologue-PHA fold elides the redundant # explicit STA). The earlier `sta (0x..., s), y` regex already # confirms the indirect store is from a stack slot — i.e. that # SOMETHING put the pointer there. : fi # 11h. i8 global access stays in 8-bit M (no over-read). bump_gb must # get the SEP #$20 prologue and emit a single-byte lda/inc/sta sequence. if [ -x "$CLANG" ]; then log "check: clang keeps pure-i8 global access in 8-bit M (no wide-read regression)" cFile8="$(mktemp --suffix=.c)" sGbFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile"' EXIT cat > "$cFile8" <<'EOF' unsigned char gb; void bump_gb(void) { gb++; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile8" -o "$sGbFile" # Must use 8-bit M prologue (sep #$20), not the 16-bit one. if ! grep -qF "sep #0x20" "$sGbFile"; then cat "$sGbFile" >&2 die "bump_gb test: expected sep #\$20 prologue (got 16-bit M)" fi fi # 11j. Runtime library assembles and exports all expected libcalls. # This is the destination of every __mulhi3/__ashlhi3/etc. that clang # emits — without it, generated code links to nothing. RUNTIME_SH="$PROJECT_ROOT/runtime/build.sh" RUNTIME_OBJ="$PROJECT_ROOT/runtime/libgcc.o" if [ -x "$RUNTIME_SH" ]; then log "check: runtime/build.sh assembles libgcc.o with all libcall symbols" "$RUNTIME_SH" >/dev/null if [ ! -f "$RUNTIME_OBJ" ]; then die "runtime/build.sh did not produce libgcc.o" fi syms="$("$BUILD_DIR/bin/llvm-objdump" -t "$RUNTIME_OBJ" 2>&1 | awk '{print $NF}')" for need in __mulhi3 __ashlhi3 __ashrhi3 __lshrhi3 __divhi3 __udivhi3 __modhi3 __umodhi3; do if ! printf '%s\n' "$syms" | grep -qx "$need"; then printf '%s\n' "$syms" >&2 die "runtime missing symbol: $need" fi done fi # 11m. Real-world surface area: a non-trivial program that exercises # struct-field deref, char* iteration, multiply, shift, and a bit-twiddle # function. Validates the backend compiles a realistic C input # end-to-end without crashing. Doesn't assert specific asm; just # success and that the function bodies are non-empty. if [ -x "$CLANG" ]; then log "check: clang compiles a real-world multi-function program" cFile12="$(mktemp --suffix=.c)" sBigFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile" "$cFile9" "$sEqbFile" "$cFile10" "$sSgnFile" "$cFile11" "$sCallsFile" "$cFile12" "$sBigFile"' EXIT cat > "$cFile12" <<'EOF' typedef unsigned char u8; typedef unsigned int u16; struct Node { u16 data; struct Node *next; }; u16 list_sum(const struct Node *h) { u16 s=0; while(h){ s+=h->data; h=h->next; } return s; } int strcmp_test(const char *a, const char *b) { while (*a && *a == *b) { a++; b++; } return (unsigned char)*a - (unsigned char)*b; } u16 fnv16(const u8 *p, u16 n) { u16 h=0x811C; for (u16 i=0;i>=8; } if (!(x & 0x0F)) { n+=4; x>>=4; } if (!(x & 0x03)) { n+=2; x>>=2; } if (!(x & 0x01)) n+=1; return n; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile12" -o "$sBigFile" for sym in list_sum strcmp_test fnv16 ctz16; do if ! grep -qE "^${sym}:" "$sBigFile"; then cat "$sBigFile" >&2 die "real-world test missing function: $sym" fi done fi # 11l. Linkage contract: every libcall clang generates from arithmetic # ops must match a symbol provided by runtime/libgcc.o. We can't run a # real link yet (no w65816-aware linker), but we can verify the symbol # names line up — drift here would be a silent runtime crash. if [ -x "$CLANG" ] && [ -f "$RUNTIME_OBJ" ]; then log "check: every libcall clang emits has a matching definition in libgcc.o" cFile11="$(mktemp --suffix=.c)" sCallsFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile" "$cFile9" "$sEqbFile" "$cFile10" "$sSgnFile" "$cFile11" "$sCallsFile"' EXIT cat > "$cFile11" <<'EOF' int m1(int a, int b) { return a * b; } unsigned int m2(unsigned int a, unsigned int b) { return a * b; } int s1(int x, int n) { return x << n; } unsigned int s2(unsigned int x, int n) { return x >> n; } int s3(int x, int n) { return x >> n; } int d1(int a, int b) { return a / b; } unsigned int d2(unsigned int a, unsigned int b) { return a / b; } int r1(int a, int b) { return a % b; } unsigned int r2(unsigned int a, unsigned int b) { return a % b; } long m3(long a, long b) { return a * b; } unsigned long m4(unsigned long a, unsigned long b) { return a * b; } long s4(long x, int n) { return x << n; } long s5(long x, int n) { return x >> n; } unsigned long s6(unsigned long x, int n) { return x >> n; } long d3(long a, long b) { return a / b; } unsigned long d4(unsigned long a, unsigned long b) { return a / b; } long r3(long a, long b) { return a % b; } unsigned long r4(unsigned long a, unsigned long b) { return a % b; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile11" -o "$sCallsFile" runtime_syms="$("$BUILD_DIR/bin/llvm-objdump" -t "$RUNTIME_OBJ" 2>&1 | awk '$2 == "g" {print $NF}')" emitted="$(grep -oE 'jsl __[a-z0-9]+' "$sCallsFile" | awk '{print $2}' | sort -u)" for sym in $emitted; do if ! printf '%s\n' "$runtime_syms" | grep -qx "$sym"; then warn "clang emitted libcall $sym but runtime/libgcc.o has no such symbol" printf 'runtime exports:\n%s\n' "$runtime_syms" >&2 printf 'clang emitted:\n%s\n' "$emitted" >&2 die "libcall name drift: $sym missing from runtime" fi done fi # 11k. signed i8 compare: forces 16-bit M prologue (instrLowersToWide) # because the SEXT lowering needs i16 ops. Verifies both that the # code compiles AND that the prologue is REP #$30 (not the 8-bit M # fast path, which would silently corrupt the SEXT mask). if [ -x "$CLANG" ]; then log "check: signed i8 compare gets 16-bit M prologue + emits cmp" cFile10="$(mktemp --suffix=.c)" sSgnFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile" "$cFile9" "$sEqbFile" "$cFile10" "$sSgnFile"' EXIT cat > "$cFile10" <<'EOF' signed char sgnlt(signed char a, signed char b) { return a < b; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile10" -o "$sSgnFile" # Must use 16-bit M (rep #$30), not the 8-bit fast path. if ! grep -qF "rep #0x30" "$sSgnFile"; then cat "$sSgnFile" >&2 die "sgnlt: expected rep #\$30 prologue (i8 signed cmp needs 16-bit M)" fi # Must NOT contain the 8-bit prologue, which would mean we never # transitioned (the SEXT injection's ora #\$ff00 would silently # truncate to ora #\$00 in 8-bit M). if grep -qF "rep #0x10" "$sSgnFile" && ! grep -qF "rep #0x30" "$sSgnFile"; then cat "$sSgnFile" >&2 die "sgnlt: only saw 8-bit M prologue, SEXT high-byte mask would be dropped" fi fi # 11i. i8 equality compare on two stack args (eqbyte): exercises i8 # SETCC promotion through Lower*CC. if [ -x "$CLANG" ]; then log "check: clang lowers i8 == i8 via promoted i16 cmp" cFile9="$(mktemp --suffix=.c)" sEqbFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile" "$cFile9" "$sEqbFile"' EXIT cat > "$cFile9" <<'EOF' unsigned char eqbyte(unsigned char a, unsigned char b) { return a == b; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile9" -o "$sEqbFile" # Must produce a cmp + beq (the eq diamond). if ! grep -qE 'cmp ' "$sEqbFile" || ! grep -qF "beq" "$sEqbFile"; then cat "$sEqbFile" >&2 die "eqbyte test: expected cmp + beq sequence" fi fi # 12. Real C through clang. Uses the clang front-end if it has been # built; skipped otherwise (clang takes 15-30 minutes to build the # first time; afterwards rebuilds are fast). CLANG="$BUILD_DIR/bin/clang" if [ -x "$CLANG" ] && [ -x "$OBJDUMP" ]; then log "check: clang -target w65816 -O2 compiles a tiny C function" cFile="$(mktemp --suffix=.c)" oFile2="$(mktemp --suffix=.o)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2"' EXIT cat > "$cFile" <<'EOF' int answer(void) { return 42; } EOF "$CLANG" --target=w65816 -O2 -c "$cFile" -o "$oFile2" disasm="$("$OBJDUMP" --triple=w65816 -d "$oFile2" 2>&1)" for expect in "rep #0x30" "lda #0x2a" "rtl"; do if ! printf '%s\n' "$disasm" | grep -qF "$expect"; then warn "clang test missing: $expect" printf '%s\n' "$disasm" >&2 die "clang end-to-end test failed" fi done # 13. i32 (long) compile path. Type legalization splits i32 into # two i16 halves; the high half flows through the (add FrameIndex, # 2) shape, which previously crashed ISel with "Cannot select # FrameIndex<-2>". SelectFrameIndex now folds (add FI, const) so # the split loads land on a stack-relative addressing mode. # Return ABI: low->A, high->X (TAX in the epilogue). # Also asserts the native ADC carry chain (CLC + ADC + ADC) is in # place — task #49 replaced the bloated SETCC-based carry detect # (lda;cmp;bcc;lda) with a direct ADDC/ADDE-pattern lowering that # uses the C flag in P as a Glue-modeled physreg. log "check: clang compiles a long add (i32 split + A:X return)" cI32File="$(mktemp --suffix=.c)" oI32File="$(mktemp --suffix=.o)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File"' EXIT cat > "$cI32File" <<'EOF' long add32(long a, long b) { return a + b; } EOF "$CLANG" --target=w65816 -O2 -c "$cI32File" -o "$oI32File" disasmI32="$("$OBJDUMP" --triple=w65816 -d "$oI32File" 2>&1)" # TAX confirms the high-half-into-X part of the return ABI fired. # Without it, both halves would pile into A and one would be lost. # Exactly one CLC and exactly two ADCs prove the native carry chain # is wired (one CLC for lo, ADC lo, ADC hi-with-carry); a regression # to the SETCC path would show two CLCs and a bcc/cmp. for expect in "tax" "rtl" "clc" "adc"; do if ! printf '%s\n' "$disasmI32" | grep -qF "$expect"; then warn "i32 add test missing: $expect" printf '%s\n' "$disasmI32" >&2 die "i32 add end-to-end test failed" fi done nClc="$(printf '%s\n' "$disasmI32" | grep -cE '\bclc\b' || true)" nAdc="$(printf '%s\n' "$disasmI32" | grep -cE '\badc\b' || true)" nBcc="$(printf '%s\n' "$disasmI32" | grep -cE '\bbcc\b' || true)" if [ "$nClc" != "1" ] || [ "$nAdc" != "2" ] || [ "$nBcc" != "0" ]; then warn "i32 add carry-chain shape wrong (clc=$nClc adc=$nAdc bcc=$nBcc, want 1/2/0)" printf '%s\n' "$disasmI32" >&2 die "i32 add carry-chain regression" fi # Lock the post-StackSlotCleanup instruction count: should be ~11 for # add32 (rep + pha + clc + adc + sta + txa + adc + tax + lda + ply + rtl # — i32-first-arg in A:X means arg0_hi loads as TXA, no LDAfi). If # this regresses meaningfully (say >14) the cleanup pass, the # rematerialization flag, or the A:X first-arg ABI has been broken. nInsns="$(printf '%s\n' "$disasmI32" | grep -cE '^[0-9a-f]+:' || true)" if [ "$nInsns" -gt 14 ]; then warn "i32 add bloat (got $nInsns insns, want <=14 — was 25 pre-cleanup, 11 post)" printf '%s\n' "$disasmI32" >&2 die "i32 add code-quality regression" fi # The A:X arg0 ABI moves arg0_hi out of the stack slot, so the # asm should contain TXA (X→A for the hi-half ADC tied input) # exactly once. A regression to "load arg0_hi from stack" would # remove the TXA and add an extra LDA. nTxa="$(printf '%s\n' "$disasmI32" | grep -cE '\btxa\b' || true)" if [ "$nTxa" != "1" ]; then warn "i32 add: expected exactly 1 txa (i32-first-arg-in-A:X path); got $nTxa" printf '%s\n' "$disasmI32" >&2 die "i32 add A:X first-arg ABI regression" fi # i32 carry chain on two-Acc16 (no foldable load): exercises the # ADD_RR + ADDE_RR custom-inserter path. fib32 has live a/b values # the inserter must spill to a fresh slot; pre-fix this crashed at # ISel with "Cannot select: adde reg, reg". log "check: clang compiles a 32-bit fib loop (ADDE_RR inserter path)" cFibFile="$(mktemp --suffix=.c)" sFibFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile"' EXIT cat > "$cFibFile" <<'EOF' unsigned long fib32(unsigned long n) { unsigned long a = 0, b = 1, t; while (n > 0) { t = a + b; a = b; b = t; n--; } return a; } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cFibFile" -o "$sFibFile" 2>&1 >/dev/null; then die "i32 fib (ADDE_RR inserter) failed to compile" fi if ! grep -qE '\bclc\b' "$sFibFile" || ! grep -qE '\badc\b' "$sFibFile"; then warn "i32 fib output missing clc/adc" die "i32 fib carry-chain regression" fi # i32 multiply via __mulsi3 libcall: tests the multi-i16-return path # (RetCC_W65816 assigning A then X for 2 i16 returns) plus the i32 # arg push side. Pre-fix this hit "multi-return calls not yet # supported (Ins.size=4)" when LowerCallTo split the i32 return. log "check: clang compiles a long multiply via __mulsi3 libcall" cMulFile="$(mktemp --suffix=.c)" sMulFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile"' EXIT cat > "$cMulFile" <<'EOF' unsigned long mul32(unsigned long a, unsigned long b) { return a * b; } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cMulFile" -o "$sMulFile" 2>&1 >/dev/null; then die "i32 mul via __mulsi3 failed to compile" fi if ! grep -q '__mulsi3' "$sMulFile"; then die "i32 mul did not emit __mulsi3 libcall" fi # i32 shift-by-1 (SHL/SRL): the type-legalizer's SHL_PARTS / SRL_PARTS # expansion needs `(srl x, 15)` or `(shl x, 15)` for the carry-cross- # halves slot. Without inline patterns those fall to __lshrhi3 / # __ashlhi3 libcalls (~10 byte overhead per shift). SRL15A and # SHL15A pseudos handle them inline (`ASL/LSR; LDA #0; ROL/ROR`, # 3 bytes). Verify the shift-by-1 output doesn't contain a hi3 # libcall. log "check: clang i32 shift-by-1 stays inline (no __lshrhi3 / __ashlhi3 libcall)" cSh1File="$(mktemp --suffix=.c)" sSh1File="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cSh1File" "$sSh1File"' EXIT cat > "$cSh1File" <<'EOF' unsigned long shl1(unsigned long a) { return a << 1; } unsigned long shr1(unsigned long a) { return a >> 1; } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cSh1File" -o "$sSh1File" 2>&1 >/dev/null; then die "i32 shift-by-1 failed to compile" fi if grep -qE '__lshrhi3|__ashlhi3' "$sSh1File"; then warn "i32 shift-by-1 still calling i16 shift libcall — SRL15A/SHL15A pattern not firing" die "i32 shift-by-1 regression" fi # Varargs (): LowerFormalArguments creates a fixed FI # for the first vararg slot when IsVarArg; LowerVASTART stores # its address to the va_list pointer. VAARG/VACOPY/VAEND use # default LLVM expansions. Pre-fix this hit # "vararg functions not yet supported" fatal error. log "check: clang compiles a vararg function ()" cVaFile="$(mktemp --suffix=.c)" sVaFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cSh1File" "$sSh1File" "$cVaFile" "$sVaFile"' EXIT cat > "$cVaFile" <<'EOF' #include int sumArgs(int n, ...) { va_list args; va_start(args, n); int sum = 0; for (int i = 0; i < n; i++) sum += va_arg(args, int); va_end(args); return sum; } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cVaFile" -o "$sVaFile" 2>&1 >/dev/null; then die "vararg function failed to compile" fi # Stack-array LEA: `char arr[16]; arr[i] = ...` needs the address # of an alloca'd object as an i16 value. Pre-fix this hit "Cannot # select: FrameIndex<0>" because addr_fi only matches in load/store # contexts. W65816DAGToDAGISel::Select now lowers a bare # ISD::FrameIndex to ADDframe (FI, 0); eliminateFrameIndex expands # ADDframe into TSC + CLC + ADC #disp. log "check: clang takes the address of a stack-allocated array" cAllocaFile="$(mktemp --suffix=.c)" sAllocaFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile"' EXIT cat > "$cAllocaFile" <<'EOF' extern void use_buffer(char *p); void writeBytes(char v) { char tmp[8]; for (int i = 0; i < 8; i++) tmp[i] = v + i; use_buffer(tmp); // forces &tmp[0] to escape } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cAllocaFile" -o "$sAllocaFile" 2>&1 >/dev/null; then die "alloca'd-array address failed to compile" fi # The TSC; CLC; ADC #disp triple is the LEA expansion of ADDframe; # at least one occurrence proves the pseudo wired through. if ! grep -qE '^\s*tsc' "$sAllocaFile"; then die "alloca'd-array LEA missing TSC (ADDframe expansion broken)" fi # i8 stores into the alloca slot must be 8-bit (SEP/REP bracketed). # A bare 16-bit `sta d,S` with M=0 writes 2 bytes and corrupts the # next slot or the return address. The writeBytes function unrolls # to 8 i8 stores (one per `tmp[i] = v + i`); each must be inside a # `sep #$20 ... rep #$20` pair. Count `sta d,S` occurrences inside # vs. outside SEP/REP — at least 8 must be inside. if ! awk ' /^\s*sep\s+#0x20\s*$/ { sep = 1; next } /^\s*rep\s+#0x20\s*$/ { sep = 0; next } /^\s*sta\s+0x[0-9a-f]+,\s*s\s*$/ { if (sep) inside++ } END { if (inside < 8) { print "INSIDE=" inside "; want >= 8"; exit 1 } } ' "$sAllocaFile"; then die "alloca'd-array i8 stores not properly SEP/REP bracketed (8-bit store regression)" fi # Same correctness check for i8 stores to *globals* in an M=0 # function. STA8abs in AsmPrinter must wrap with SEP/REP when # UsesAcc8 is false; bare `sta g+N` in M=0 writes 2 bytes and # corrupts the next global. log "check: clang i8 store to global in M=0 mode is SEP/REP bracketed" cGlobFile="$(mktemp --suffix=.c)" sGlobFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cGlobFile" "$sGlobFile"' EXIT cat > "$cGlobFile" <<'EOF' char g[4]; void writeMixed(int x) { g[0] = (char)x; g[1] = (char)(x + 1); g[2] = (char)(x + 2); g[3] = (char)(x + 3); } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cGlobFile" -o "$sGlobFile" 2>&1 >/dev/null; then die "global-i8-store M=0 test failed to compile" fi # Each `sta g+N` (or `sta g`) must sit inside SEP/REP brackets. if ! awk ' /^\s*sep\s+#0x20\s*$/ { sep = 1; next } /^\s*rep\s+#0x20\s*$/ { sep = 0; next } /^\s*sta\s+g(\+[0-9]+)?\s*$/ { if (!sep) { print "NAKED:" $0; exit 1 } } ' "$sGlobFile"; then die "i8 store to global in M=0 emits naked 16-bit STA (would clobber adjacent global)" fi # signed-byte arithmetic (`(int)(*p) - (int)(*q)` style — strcmp). # Exercises three formerly-missing patterns: SEXTLOAD i16 from i8 # (we Expand it to (sext (load))), sext_inreg i16 from i8 (the # `((x & 0xFF) ^ 0x80) - 0x80` tablegen Pat), and extloadi8 from # an Acc16 register pointer (LDAptr / "high byte don't care"). log "check: clang compiles a signed-byte strcmp (sextload + sext_inreg + extload-via-ptr)" cStrFile="$(mktemp --suffix=.c)" sStrFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile"' EXIT cat > "$cStrFile" <<'EOF' int strcmp32(const char *a, const char *b) { while (*a && *a == *b) { a++; b++; } return (int)(*a) - (int)(*b); } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cStrFile" -o "$sStrFile" 2>&1 >/dev/null; then die "signed-byte strcmp failed to compile" fi # Indirect calls (function pointers). Lowered via the runtime # trampoline at runtime/src/libgcc.s::__jsl_indir, which does # JMP (__indirTarget) — caller stores target to __indirTarget then # JSL __jsl_indir. Pre-fix, LowerCall reported a fatal error. log "check: clang compiles an indirect call (via __jsl_indir trampoline)" cIndFile="$(mktemp --suffix=.c)" sIndFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile" "$cIndFile" "$sIndFile"' EXIT cat > "$cIndFile" <<'EOF' typedef int (*BinOp)(int, int); int doOp(BinOp op, int x, int y) { return op(x, y); } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cIndFile" -o "$sIndFile" 2>&1 >/dev/null; then die "indirect call failed to compile" fi if ! grep -q '__indirTarget' "$sIndFile"; then die "indirect call missing __indirTarget store" fi if ! grep -q '__jsl_indir' "$sIndFile"; then die "indirect call missing JSL to __jsl_indir trampoline" fi # SEP/REP toggle coalescing (W65816SepRepCleanup, addPreEmitPass). # Each STA8fi expands to `SEP #$20 ; STA d,S ; REP #$20`. When two # such stores sit back-to-back in the MIR, the post-PEI stream # contains a redundant `REP #$20 ; SEP #$20` pair that the cleanup # pass should drop. We use a volatile-store IR snippet so the # store-merger can't fold the two i8 stores into one i16, and so # nothing 16-bit-mode sneaks between them. log "check: SEP/REP toggle pass coalesces back-to-back i8 alloca stores" irCoalesceFile="$(mktemp --suffix=.ll)" sCoalesceFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile" "$cIndFile" "$sIndFile" "$irCoalesceFile" "$sCoalesceFile"' EXIT cat > "$irCoalesceFile" <<'EOF' declare void @sink(ptr) define void @adjacent(i8 %v) { %p = alloca [2 x i8], align 1 %p0 = getelementptr inbounds [2 x i8], ptr %p, i16 0, i16 0 %p1 = getelementptr inbounds [2 x i8], ptr %p, i16 0, i16 1 store volatile i8 %v, ptr %p0 store volatile i8 %v, ptr %p1 call void @sink(ptr %p) ret void } EOF if ! "$LLC" -march=w65816 -O2 "$irCoalesceFile" -o "$sCoalesceFile" 2>&1 >/dev/null; then die "SEP/REP coalescing test failed to compile" fi # Expect a single `sep #$20 ; sta ... ; sta ... ; rep #$20` block # with NO `rep #$20 ; sep #$20` toggle anywhere. The smoking gun # of an absent pass: at least one consecutive `rep #$20`/`sep #$20` # pair (in either order) appears in the output. if ! awk ' BEGIN { prev = "" } /^\s*sep\s+#0x20\s*$/ { if (prev == "rep") { print "TOGGLE: rep then sep at line " NR; exit 1 } prev = "sep"; next } /^\s*rep\s+#0x20\s*$/ { if (prev == "sep") { print "TOGGLE: sep then rep at line " NR; exit 1 } prev = "rep"; next } /^\s*[a-z]/ { prev = "" } ' "$sCoalesceFile"; then cat "$sCoalesceFile" >&2 die "SEP/REP cleanup pass left an adjacent REP/SEP toggle in the output" fi # Belt-and-braces: the body must contain TWO consecutive `sta d,S` # inside one SEP/REP region (proves both stores ran in M=1 without # an intervening toggle). if ! awk ' /^\s*sep\s+#0x20\s*$/ { in_m1 = 1; consecutive = 0; next } /^\s*rep\s+#0x20\s*$/ { in_m1 = 0; consecutive = 0; next } /^\s*sta\s+0x[0-9a-f]+,\s*s\s*$/ { if (in_m1) { consecutive++; if (consecutive >= 2) { found = 1 } } next } /^\s*[a-z]/ { consecutive = 0 } END { if (!found) exit 1 } ' "$sCoalesceFile"; then cat "$sCoalesceFile" >&2 die "SEP/REP cleanup pass: no two consecutive sta d,S found inside one SEP/REP region" fi # Mixed-mode regression guard: a function that increments a char # global and returns it must NOT use 8-bit-M-only encodings for # i16 immediates. Pre-fix (per-function "pure-i8" prologue), the # late sign-extension `and #$ff; eor #$80; sbc #$80` emitted as # 3-byte i16 immediates but executed in M=1 — the CPU read only # the low byte of each immediate, sliding subsequent opcodes # one byte off and treating the immediate's high byte as the # next opcode (often $00 = BRK). Now: prologue is REP #$30 only # (no SEP), and i8 ops carry their own SEP/REP wrap. log "check: mixed i8/i16 in one function — no SEP-only-prologue miscompile" cMixFile="$(mktemp --suffix=.c)" sMixFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile" "$cIndFile" "$sIndFile" "$irCoalesceFile" "$sCoalesceFile" "$cMixFile" "$sMixFile"' EXIT cat > "$cMixFile" <<'EOF' char g; char inc_g(void) { g++; return g; } EOF "$CLANG" --target=w65816 -O2 -S "$cMixFile" -o "$sMixFile" # Prologue must be REP #$30, NOT a bare SEP #$20 transition. # (The prologue is the FIRST mode-affecting instruction.) if ! awk ' BEGIN { found = 0 } /^\s*rep\s+#0x30\s*$/ { found = 1; exit 0 } /^\s*sep\s+#0x20\s*$/ { exit 1 } /^\s*rep\s+#0x10\s*$/ { exit 1 } END { if (!found) exit 1 } ' "$sMixFile"; then cat "$sMixFile" >&2 die "mixed i8/i16: prologue is not the expected REP #\$30 (8-bit-M-prologue regression)" fi # Linker: tools/link816 (built from src/link816/link816.cpp) concatenates # one-or-more ELF .o files, resolves W65816 relocations (R_W65816_IMM8/ # IMM16/IMM24/PCREL8/16, plus generic FK_Data_*), and emits a flat # binary. Verify by linking a minimal program that calls __mulhi3, # then disassemble the JSL operand and confirm it points at __mulhi3's # actual post-link address (per the symbol map). log "check: link816 resolves a libcall to libgcc" cLinkFile="$(mktemp --suffix=.c)" oLinkFile="$(mktemp --suffix=.o)" oLibgccFile="$(mktemp --suffix=.o)" binLinkFile="$(mktemp --suffix=.bin)" mapLinkFile="$(mktemp --suffix=.map)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile" "$cIndFile" "$sIndFile" "$irCoalesceFile" "$sCoalesceFile" "$cMixFile" "$sMixFile" "$cLinkFile" "$oLinkFile" "$oLibgccFile" "$binLinkFile" "$mapLinkFile"' EXIT cat > "$cLinkFile" <<'EOF' int mul(int a, int b) { return a * b; } EOF "$CLANG" --target=w65816 -O2 -c "$cLinkFile" -o "$oLinkFile" "$BUILD_DIR/bin/llvm-mc" -arch=w65816 -filetype=obj \ "$PROJECT_ROOT/runtime/src/libgcc.s" -o "$oLibgccFile" "$PROJECT_ROOT/tools/link816" -o "$binLinkFile" \ --text-base 0x8000 --map "$mapLinkFile" \ "$oLinkFile" "$oLibgccFile" 2>/dev/null if [ ! -s "$binLinkFile" ]; then die "link816 produced empty/missing binary" fi mul_addr=$(awk -F' = ' '$1 == "mul" { print $2 }' "$mapLinkFile") mulhi3_addr=$(awk -F' = ' '$1 == "__mulhi3" { print $2 }' "$mapLinkFile") if [ -z "$mul_addr" ] || [ -z "$mulhi3_addr" ]; then cat "$mapLinkFile" >&2 die "link map missing 'mul' or '__mulhi3' symbol" fi # mul's body is short — the JSL to __mulhi3 should appear near the # start. Read mul's bytes (mul_addr - 0x8000 = file offset) and # search for `0x22 lo mid hi` matching __mulhi3's address. mul_off=$((mul_addr - 0x8000)) expect_lo=$(printf '%02x' $((mulhi3_addr & 0xff))) expect_mid=$(printf '%02x' $(((mulhi3_addr >> 8) & 0xff))) expect_hi=$(printf '%02x' $(((mulhi3_addr >> 16) & 0xff))) # Hexdump mul's first 32 bytes and look for the JSL pattern. if ! od -An -tx1 -N 32 -j "$mul_off" "$binLinkFile" \ | tr -s ' \n' ' ' \ | grep -qE " 22 ${expect_lo} ${expect_mid} ${expect_hi}( |$)"; then od -An -tx1 -N 32 -j "$mul_off" "$binLinkFile" >&2 die "link816: mul's JSL operand does not point at __mulhi3 (expected 22 ${expect_lo} ${expect_mid} ${expect_hi})" fi # Soft-float runtime: compile runtime/src/softFloat.c, then link a # tiny float-using program against it. Confirms (a) the real # soft-float helpers compile (which exercises the W65816BranchExpand # pass — the C-based __addsf3 has internal Bxx targets > 128 bytes # and would error at link time without the inversion-and-jump # transform), (b) all the libcalls clang emits for float ops have # matching definitions in softFloat.o. log "check: soft-float runtime links (real impl, not stubs)" cFltFile="$(mktemp --suffix=.c)" oFltFile="$(mktemp --suffix=.o)" oSfFile="$(mktemp --suffix=.o)" binFltFile="$(mktemp --suffix=.bin)" mapFltFile="$(mktemp --suffix=.map)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile" "$cIndFile" "$sIndFile" "$irCoalesceFile" "$sCoalesceFile" "$cMixFile" "$sMixFile" "$cLinkFile" "$oLinkFile" "$oLibgccFile" "$binLinkFile" "$mapLinkFile" "$cFltFile" "$oFltFile" "$oSfFile" "$binFltFile" "$mapFltFile"' EXIT cat > "$cFltFile" <<'EOF' float fadd(float a, float b) { return a + b; } float fmul(float a, float b) { return a * b; } int feq(float a, float b) { return a == b; } int toInt(float x) { return (int)x; } float fromInt(int n) { return (float)n; } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c "$cFltFile" -o "$oFltFile" "$CLANG" --target=w65816 -O2 -ffunction-sections \ -c "$PROJECT_ROOT/runtime/src/softFloat.c" -o "$oSfFile" "$PROJECT_ROOT/tools/link816" -o "$binFltFile" \ --text-base 0x8000 --map "$mapFltFile" \ "$oFltFile" "$oSfFile" "$oLibgccFile" 2>/dev/null if [ ! -s "$binFltFile" ]; then die "soft-float runtime failed to link" fi # Verify the JSL targets are resolved (no zero entries in the # critical libcall slots). if ! grep -q "__addsf3" "$mapFltFile"; then die "soft-float map missing __addsf3" fi if ! grep -q "__mulsf3" "$mapFltFile"; then die "soft-float map missing __mulsf3" fi if ! grep -q "__fixsfsi" "$mapFltFile"; then die "soft-float map missing __fixsfsi" fi # Soft-double runtime: compile runtime/src/softDouble.c (was a stub # returning zero; now a real IEEE 754 binary64 implementation in C). # Confirms (a) the C version compiles end-to-end (greedy regalloc # + WidenAcc16 unblocked the prior Register Coalescer crash on # this code), (b) all the libcalls clang emits for double ops # have matching definitions. log "check: soft-double runtime compiles (real impl, not stubs)" cDblFile="$(mktemp --suffix=.c)" oDblFile="$(mktemp --suffix=.o)" oSdFile="$(mktemp --suffix=.o)" binDblFile="$(mktemp --suffix=.bin)" mapDblFile="$(mktemp --suffix=.map)" cat > "$cDblFile" <<'EOF' double dadd(double a, double b) { return a + b; } double dmul(double a, double b) { return a * b; } int deq(double a, double b) { return a == b; } int toInt(double x) { return (int)x; } double fromInt(int n) { return (double)n; } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c "$cDblFile" -o "$oDblFile" "$CLANG" --target=w65816 -O2 -ffunction-sections \ -c "$PROJECT_ROOT/runtime/src/softDouble.c" -o "$oSdFile" "$PROJECT_ROOT/tools/link816" -o "$binDblFile" \ --text-base 0x8000 --map "$mapDblFile" \ "$oDblFile" "$oSdFile" "$oLibgccFile" 2>/dev/null if [ ! -s "$binDblFile" ]; then die "soft-double runtime failed to link" fi if ! grep -q "__adddf3" "$mapDblFile"; then die "soft-double map missing __adddf3" fi if ! grep -q "__muldf3" "$mapDblFile"; then die "soft-double map missing __muldf3" fi if ! grep -q "__fixdfsi" "$mapDblFile"; then die "soft-double map missing __fixdfsi" fi rm -f "$cDblFile" "$oDblFile" "$oSdFile" "$binDblFile" "$mapDblFile" # setjmp/longjmp from libgcc.s. Compile a tiny program that uses # both and verify the symbols are present in the linked binary. log "check: setjmp/longjmp link from libgcc" cSjFile="$(mktemp --suffix=.c)" oSjFile="$(mktemp --suffix=.o)" binSjFile="$(mktemp --suffix=.bin)" mapSjFile="$(mktemp --suffix=.map)" cat > "$cSjFile" <<'EOF' typedef unsigned char jmp_buf[8]; int setjmp(jmp_buf env); void longjmp(jmp_buf env, int val) __attribute__((noreturn)); jmp_buf env; int trip(int x) { if (setjmp(env) == 0) { if (x > 5) longjmp(env, 42); return 1; } return 0; } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c "$cSjFile" -o "$oSjFile" "$PROJECT_ROOT/tools/link816" -o "$binSjFile" \ --text-base 0x8000 --map "$mapSjFile" \ "$oSjFile" "$oLibgccFile" 2>/dev/null if ! grep -q "^setjmp" "$mapSjFile" || ! grep -q "^longjmp" "$mapSjFile"; then die "setjmp/longjmp not in linked map" fi rm -f "$cSjFile" "$oSjFile" "$binSjFile" "$mapSjFile" # Static constructors: linker collects .init_array sections and # emits __init_array_start / __init_array_end synthetic symbols. # crt0 walks them via __jsl_indir. This check verifies the # linker collection — runtime verification is on the IIgs side # (blocked by ROM IRQ pre-empting injected programs). log "check: linker collects .init_array and emits boundary symbols" cInitFile="$(mktemp --suffix=.c)" oInitFile="$(mktemp --suffix=.o)" binInitFile="$(mktemp --suffix=.bin)" mapInitFile="$(mktemp --suffix=.map)" cat > "$cInitFile" <<'EOF' volatile unsigned short m = 0x1111; __attribute__((constructor)) static void ctor1(void) { m = 0xAAAA; } int main(void) { return m; } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c "$cInitFile" -o "$oInitFile" "$PROJECT_ROOT/tools/link816" -o "$binInitFile" \ --text-base 0x8000 --map "$mapInitFile" \ "$oInitFile" "$oLibgccFile" 2>/dev/null if ! grep -q "^__init_array_start" "$mapInitFile" \ || ! grep -q "^__init_array_end" "$mapInitFile" \ || ! grep -q "^ctor1" "$mapInitFile"; then die "init_array boundary symbols or ctor not in map" fi # Sanity: __init_array_end > __init_array_start (non-empty) s=$(grep -E "^__init_array_start = " "$mapInitFile" | grep -oE '0x[0-9a-f]+' | head -1) e=$(grep -E "^__init_array_end = " "$mapInitFile" | grep -oE '0x[0-9a-f]+' | head -1) if [ "$s" = "$e" ]; then die "init_array is empty even though ctor1 is defined" fi rm -f "$cInitFile" "$oInitFile" "$binInitFile" "$mapInitFile" # Static constructors RUN end-to-end: build crt0+main+ctor program, # load into MAME, and verify the constructor wrote a sentinel value # into a BSS variable. This proves crt0's init_array walk works # at runtime (not just that the linker emitted boundary symbols). if command -v mame >/dev/null && [ -d "$PROJECT_ROOT/tools/mame/roms" ]; then log "check: MAME runs static constructors via crt0 init_array walk" cCMameFile="$(mktemp --suffix=.c)" oCMameFile="$(mktemp --suffix=.o)" oCrt0File="$(mktemp --suffix=.o)" binCMameFile="$(mktemp --suffix=.bin)" cat > "$cCMameFile" <<'EOF' volatile unsigned short ctorRan = 0; __attribute__((constructor)) static void initFn(void) { ctorRan = 0xABCD; } int main(void) { while (1) {} return 0; } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ "$cCMameFile" -o "$oCMameFile" "$PROJECT_ROOT/tools/llvm-mos-build/bin/llvm-mc" -arch=w65816 \ -filetype=obj "$PROJECT_ROOT/runtime/src/crt0.s" -o "$oCrt0File" "$PROJECT_ROOT/tools/link816" -o "$binCMameFile" \ --text-base 0x1000 \ "$oCrt0File" "$oCMameFile" "$oLibgccFile" 2>/dev/null # ctorRan lives in BSS at $2000 (linker layout). Read $00:2000 # via the runner; expect 0xABCD if the constructor ran. if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$binCMameFile" 0x002000 abcd >/dev/null 2>&1; then warn "MAME: constructor did not run (read \$2000 != 0xABCD)" die "constructor end-to-end failed" fi rm -f "$cCMameFile" "$oCMameFile" "$binCMameFile" # Soft-float runtime executes correctly: compute 1.5f + 2.5f and # verify the IEEE 754 bit pattern matches 0x40800000. log "check: MAME runs soft-float __addsf3 → bit pattern correct" cFltMame="$(mktemp --suffix=.c)" oFltMame="$(mktemp --suffix=.o)" oSfMame="$(mktemp --suffix=.o)" binFltMame="$(mktemp --suffix=.bin)" # Reuse oCrt0File from the constructor test above. cat > "$cFltMame" <<'EOF' __attribute__((noinline)) static void switchToBank2(void) { __asm__ volatile ("sep #0x20\n.byte 0xa9, 0x02\npha\nplb\nrep #0x20\n" ::: "memory"); } int main(void) { float a = 1.5f, b = 2.5f; float c = a + b; unsigned long bits; __builtin_memcpy(&bits, &c, 4); switchToBank2(); *(volatile unsigned short *)0x5000 = (unsigned short)(bits & 0xFFFF); *(volatile unsigned short *)0x5002 = (unsigned short)(bits >> 16); while (1) {} return 0; } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c "$cFltMame" -o "$oFltMame" "$CLANG" --target=w65816 -O2 -ffunction-sections \ -c "$PROJECT_ROOT/runtime/src/softFloat.c" -o "$oSfMame" "$PROJECT_ROOT/tools/link816" -o "$binFltMame" \ --text-base 0x1000 \ "$oCrt0File" "$oFltMame" "$oSfMame" "$oLibgccFile" 2>/dev/null if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" "$binFltMame" --check \ 0x025000=0000 0x025002=4080 >/dev/null 2>&1; then die "soft-float MAME: 1.5+2.5 != 4.0 (bit pattern wrong)" fi rm -f "$cFltMame" "$oFltMame" "$oSfMame" "$binFltMame" # Soft-double runtime executes correctly: compute 1.5 + 2.5 and # verify IEEE 754 binary64 bit pattern = 0x4010000000000000. log "check: MAME runs soft-double __adddf3 → bit pattern correct" cDblMame="$(mktemp --suffix=.c)" oDblMame="$(mktemp --suffix=.o)" oSdMame="$(mktemp --suffix=.o)" binDblMame="$(mktemp --suffix=.bin)" cat > "$cDblMame" <<'EOF' __attribute__((noinline)) static void switchToBank2(void) { __asm__ volatile ("sep #0x20\n.byte 0xa9, 0x02\npha\nplb\nrep #0x20\n" ::: "memory"); } int main(void) { double a = 1.5, b = 2.5; double c = a + b; unsigned long long bits; __builtin_memcpy(&bits, &c, 8); switchToBank2(); *(volatile unsigned short *)0x5000 = (unsigned short)(bits & 0xFFFF); *(volatile unsigned short *)0x5002 = (unsigned short)((bits >> 16) & 0xFFFF); *(volatile unsigned short *)0x5004 = (unsigned short)((bits >> 32) & 0xFFFF); *(volatile unsigned short *)0x5006 = (unsigned short)((bits >> 48) & 0xFFFF); while (1) {} return 0; } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c "$cDblMame" -o "$oDblMame" "$CLANG" --target=w65816 -O2 -ffunction-sections \ -c "$PROJECT_ROOT/runtime/src/softDouble.c" -o "$oSdMame" "$PROJECT_ROOT/tools/link816" -o "$binDblMame" \ --text-base 0x1000 \ "$oCrt0File" "$oDblMame" "$oSdMame" "$oLibgccFile" 2>/dev/null if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" "$binDblMame" --check \ 0x025000=0000 0x025002=0000 0x025004=0000 0x025006=4010 \ >/dev/null 2>&1; then die "soft-double MAME: 1.5+2.5 != 4.0 (bit pattern wrong)" fi rm -f "$cDblMame" "$oDblMame" "$oSdMame" "$binDblMame" "$oCrt0File" fi # Fuzzer: generate 20 small random C programs and verify all compile. # Catches backend crashes / lowering gaps the hand-written checks miss. log "check: random C fuzzer (20 programs compile cleanly)" if ! python3 "$PROJECT_ROOT/scripts/fuzzCompile.py" -n 20 -q > /dev/null; then die "random C fuzzer found compile failures" fi # C++ basics: virtual call (vtable indirect), Itanium ABI symbol # mangling, global ctor → .init_array entry. Compile-only check. log "check: clang++ compiles class with virtual + non-trivial ctor" cppFile="$(mktemp --suffix=.cc)" oCppFile="$(mktemp --suffix=.o)" binCppFile="$(mktemp --suffix=.bin)" mapCppFile="$(mktemp --suffix=.map)" CLANGXX="${CLANG%clang}clang++" cat > "$cppFile" <<'EOF' extern int sideEffect(int); struct Base { virtual int v(int x) const { return x + 1; } }; struct Derived : Base { int v(int x) const override { return x * 2; } Derived() { sideEffect(99); } }; Derived g; int call(Base *b, int x) { return b->v(x); } EOF "$CLANGXX" --target=w65816 -O2 -ffunction-sections \ -fno-exceptions -fno-rtti -c "$cppFile" -o "$oCppFile" # Just check the .o has the expected sections / mangled symbols. syms="$("$PROJECT_ROOT/tools/llvm-mos-build/bin/llvm-objdump" \ --triple=w65816 -t "$oCppFile" 2>/dev/null)" secs="$("$PROJECT_ROOT/tools/llvm-mos-build/bin/llvm-objdump" \ --triple=w65816 -h "$oCppFile" 2>/dev/null)" if ! printf '%s\n' "$syms" | grep -qE '_Z4callP4Basei'; then die "C++: no Itanium-mangled call symbol" fi if ! printf '%s\n' "$secs" | grep -qE '\.init_array'; then die "C++: no .init_array for non-trivial global ctor" fi rm -f "$cppFile" "$oCppFile" "$binCppFile" "$mapCppFile" # End-to-end MAME execution: compile a tiny C program that writes # a known value to $E0 (DP), assemble + link to a raw flat binary, # load into MAME's apple2gs RAM at $1000, set PC, run, read back # $E0, verify the value matches. This is the first byte-level # runtime correctness check in the suite — proves compile-link-run # actually works, not just that asm-pattern grep matches. if command -v mame >/dev/null && [ -d "$PROJECT_ROOT/tools/mame/roms" ]; then log "check: MAME runs compiled code and reads back expected value" cMameFile="$(mktemp --suffix=.c)" sMameFile="$(mktemp --suffix=.s)" oMameFile="$(mktemp --suffix=.o)" binMameFile="$(mktemp --suffix=.bin)" # Write directly to DP $E0..$E1 from C. cat > "$cMameFile" <<'EOF' void _start(void) { *(volatile unsigned short *)0xE0 = 0x1234 + 0x5678; // 0x68AC while (1) {} } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ "$cMameFile" -o "$oMameFile" # Link with text-base 0x1000 so PC-relative branches resolve # correctly when loaded at that address. "$PROJECT_ROOT/tools/link816" -o "$binMameFile" \ --text-base 0x1000 "$oMameFile" "$oLibgccFile" 2>/dev/null if [ ! -s "$binMameFile" ]; then die "MAME: failed to link test binary" fi if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$binMameFile" 0xe0 68ac >/dev/null 2>&1; then die "MAME: read at \$E0 != 0x68AC after running compiled C" fi rm -f "$cMameFile" "$sMameFile" "$oMameFile" "$binMameFile" # Recursive call regression: catches the empty-descending-SP # off-by-one in eliminateFrameIndex. fact(5)=120 ($0078) and the # value passes through main() → fact(5) → result-store, which # only works if locals don't collide with JSL retaddr push. log "check: MAME runs recursive fact(5) → 120 (off-by-one regression)" cFactFile="$(mktemp --suffix=.c)" oFactFile="$(mktemp --suffix=.o)" binFactFile="$(mktemp --suffix=.bin)" cat > "$cFactFile" <<'EOF' __attribute__((noinline)) void switchToBank2(void) { __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); } unsigned short fact(unsigned short n) { if (n <= 1) return 1; return n * fact(n - 1); } int main(void) { unsigned short r = fact(5); switchToBank2(); *(volatile unsigned short *)0x5000 = r; while (1) {} } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ "$cFactFile" -o "$oFactFile" oLibcF="$(mktemp --suffix=.o)" oSfF="$(mktemp --suffix=.o)" oSdF="$(mktemp --suffix=.o)" "$CLANG" --target=w65816 -O2 -ffunction-sections \ -c "$PROJECT_ROOT/runtime/src/libc.c" -o "$oLibcF" "$CLANG" --target=w65816 -O2 -ffunction-sections \ -c "$PROJECT_ROOT/runtime/src/softFloat.c" -o "$oSfF" "$CLANG" --target=w65816 -O2 -ffunction-sections \ -c "$PROJECT_ROOT/runtime/src/softDouble.c" -o "$oSdF" oCrt0F="$(mktemp --suffix=.o)" "$PROJECT_ROOT/tools/llvm-mos-build/bin/llvm-mc" -arch=w65816 \ -filetype=obj "$PROJECT_ROOT/runtime/src/crt0.s" -o "$oCrt0F" "$PROJECT_ROOT/tools/link816" -o "$binFactFile" --text-base 0x1000 \ "$oCrt0F" "$oLibcF" "$oSfF" "$oSdF" "$oLibgccFile" "$oFactFile" \ >/dev/null 2>&1 if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$binFactFile" 0x025000 0078 >/dev/null 2>&1; then die "MAME: fact(5) != 120 (off-by-one stack-rel skew regression)" fi rm -f "$cFactFile" "$oFactFile" "$binFactFile" # Loop with flag-corrupting TXA between counter-DEC and BNE. # Canary for the PHP/PLP wrap fix that excludes stack-rel ops: # without the wrap-tightening, the PHP-saved P gets clobbered # by an in-wrap sta d,S and PLP loads garbage, making BNE # branch forever. Iterative fib(10) = 55 ($0037). log "check: MAME runs iterative fib(10) → 55 (PHP/PLP wrap regression)" cFibFile2="$(mktemp --suffix=.c)" oFibFile2="$(mktemp --suffix=.o)" binFibFile2="$(mktemp --suffix=.bin)" cat > "$cFibFile2" <<'EOF' __attribute__((noinline)) void switchToBank2(void) { __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); } __attribute__((noinline)) unsigned short fib(unsigned short n) { if (n < 2) return n; unsigned short a = 0, b = 1; for (unsigned short i = 2; i <= n; i++) { unsigned short t = a + b; a = b; b = t; } return b; } int main(void) { unsigned short r = fib(10); switchToBank2(); *(volatile unsigned short *)0x5000 = r; while (1) {} } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ "$cFibFile2" -o "$oFibFile2" "$PROJECT_ROOT/tools/link816" -o "$binFibFile2" --text-base 0x1000 \ "$oCrt0F" "$oLibcF" "$oSfF" "$oSdF" "$oLibgccFile" "$oFibFile2" \ >/dev/null 2>&1 if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$binFibFile2" 0x025000 0037 >/dev/null 2>&1; then die "MAME: iterative fib(10) != 55 (PHP/PLP wrap regression)" fi rm -f "$cFibFile2" "$oFibFile2" "$binFibFile2" # Recursive fib with phi-resolution across loop-exit edge. # Canary for the SpillToX cross-block-use check: without it, # the peephole elided the loop's STA-to-merge-slot and the # merge block read the stale bb.0-init value (0) instead of # the loop accumulator. fib(7)=13 ($000D). log "check: MAME runs recursive fib(7) → 13 (SpillToX cross-block regression)" cFibFile3="$(mktemp --suffix=.c)" oFibFile3="$(mktemp --suffix=.o)" binFibFile3="$(mktemp --suffix=.bin)" cat > "$cFibFile3" <<'EOF' __attribute__((noinline)) void switchToBank2(void) { __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); } unsigned short fib(unsigned short n) { if (n < 2) return n; return fib(n-1) + fib(n-2); } int main(void) { unsigned short r = fib(7); switchToBank2(); *(volatile unsigned short *)0x5000 = r; while (1) {} } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ "$cFibFile3" -o "$oFibFile3" "$PROJECT_ROOT/tools/link816" -o "$binFibFile3" --text-base 0x1000 \ "$oCrt0F" "$oLibcF" "$oSfF" "$oSdF" "$oLibgccFile" "$oFibFile3" \ >/dev/null 2>&1 if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$binFibFile3" 0x025000 000d >/dev/null 2>&1; then die "MAME: recursive fib(7) != 13 (SpillToX cross-block regression)" fi rm -f "$cFibFile3" "$oFibFile3" "$binFibFile3" # Array-sum loop with indirect deref + counter-DEC + LDA # between DEC and BNE. Canary for the disp-bump-inside-wrap # fix: PHP decrements S, so any stack-rel inside the wrap # needs ImmOffset += 1 to compensate. sum 11+22+...+88 = 396 # ($018C). log "check: MAME runs array sumTable → 396 (disp-bump-inside-wrap regression)" cArrFile="$(mktemp --suffix=.c)" oArrFile="$(mktemp --suffix=.o)" binArrFile="$(mktemp --suffix=.bin)" cat > "$cArrFile" <<'EOF' __attribute__((noinline)) void switchToBank2(void) { __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); } unsigned short table[8] = { 11, 22, 33, 44, 55, 66, 77, 88 }; __attribute__((noinline)) unsigned short sumTable(unsigned short *arr, unsigned short n) { unsigned short s = 0; for (unsigned short i = 0; i < n; i++) s += arr[i]; return s; } int main(void) { unsigned short r = sumTable(table, 8); switchToBank2(); *(volatile unsigned short *)0x5000 = r; while (1) {} } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ "$cArrFile" -o "$oArrFile" "$PROJECT_ROOT/tools/link816" -o "$binArrFile" --text-base 0x1000 \ "$oCrt0F" "$oLibcF" "$oSfF" "$oSdF" "$oLibgccFile" "$oArrFile" \ >/dev/null 2>&1 if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$binArrFile" 0x025000 018c >/dev/null 2>&1; then die "MAME: sumTable(11..88) != 396 (disp-bump-inside-wrap regression)" fi rm -f "$cArrFile" "$oArrFile" "$binArrFile" # Pointer-to-pointer dereference: catches the linker missing # .data relocations. `int *p=&v; int **pp=&p;` initializers # need the linker to patch &p into pp's storage; without that, # **pp reads zero. log "check: MAME runs **pp dereference → 0xBEEF (data-reloc regression)" cPtrFile="$(mktemp --suffix=.c)" oPtrFile="$(mktemp --suffix=.o)" binPtrFile="$(mktemp --suffix=.bin)" cat > "$cPtrFile" <<'EOF' __attribute__((noinline)) void switchToBank2(void) { __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); } unsigned short v = 0xBEEF; unsigned short *p = &v; unsigned short **pp = &p; int main(void) { unsigned short x = **pp; switchToBank2(); *(volatile unsigned short *)0x5000 = x; while (1) {} } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ "$cPtrFile" -o "$oPtrFile" "$PROJECT_ROOT/tools/link816" -o "$binPtrFile" --text-base 0x1000 \ "$oCrt0F" "$oLibcF" "$oSfF" "$oSdF" "$oLibgccFile" "$oPtrFile" \ >/dev/null 2>&1 if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$binPtrFile" 0x025000 beef >/dev/null 2>&1; then die "MAME: **pp != 0xBEEF (data-reloc regression)" fi rm -f "$cPtrFile" "$oPtrFile" "$binPtrFile" # i32 libcall with arg0 in A:X — catches the SpillToX clobber # of live-in $x. shiftRight(0x12345678, 4) = 0x01234567. log "check: MAME runs i32 (a >> n) libcall → 0x01234567 (X-live SpillToX regression)" cI32File="$(mktemp --suffix=.c)" oI32File="$(mktemp --suffix=.o)" binI32File="$(mktemp --suffix=.bin)" cat > "$cI32File" <<'EOF' __attribute__((noinline)) void switchToBank2(void) { __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); } __attribute__((noinline)) unsigned long shiftRight(unsigned long a, int n) { return a >> n; } int main(void) { unsigned long s = shiftRight(0x12345678UL, 4); switchToBank2(); *(volatile unsigned short *)0x5000 = (unsigned short)(s & 0xFFFF); *(volatile unsigned short *)0x5002 = (unsigned short)((s >> 16) & 0xFFFF); while (1) {} } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ "$cI32File" -o "$oI32File" "$PROJECT_ROOT/tools/link816" -o "$binI32File" --text-base 0x1000 \ "$oCrt0F" "$oLibcF" "$oSfF" "$oSdF" "$oLibgccFile" "$oI32File" \ >/dev/null 2>&1 if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$binI32File" --check 0x025000=4567 0x025002=0123 >/dev/null 2>&1; then die "MAME: shiftRight(0x12345678, 4) != 0x01234567 (X-live SpillToX regression)" fi rm -f "$cI32File" "$oI32File" "$binI32File" # Variadic int sum. Catches the va_arg-aligns-up bug. Default # va_arg expansion rounds ap to the type's preferred alignment # (S16 = 2 bytes), but PHA-pushed varargs land at byte-granular # addresses, so aligning skips the low byte. log "check: MAME runs vararg sum(3,10,20,30) → 60 (VAARG-no-align regression)" cVaFile="$(mktemp --suffix=.c)" oVaFile="$(mktemp --suffix=.o)" binVaFile="$(mktemp --suffix=.bin)" cat > "$cVaFile" <<'EOF' #include __attribute__((noinline)) void switchToBank2(void) { __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); } int sum(int n, ...) { va_list ap; va_start(ap, n); int s = 0; for (int i = 0; i < n; i++) s += va_arg(ap, int); va_end(ap); return s; } int main(void) { int s = sum(3, 10, 20, 30); switchToBank2(); *(volatile unsigned short *)0x5000 = (unsigned short)s; while (1) {} } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ "$cVaFile" -o "$oVaFile" "$PROJECT_ROOT/tools/link816" -o "$binVaFile" --text-base 0x1000 \ "$oCrt0F" "$oLibcF" "$oSfF" "$oSdF" "$oLibgccFile" "$oVaFile" \ >/dev/null 2>&1 if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$binVaFile" 0x025000 003c >/dev/null 2>&1; then die "MAME: sum(3,10,20,30) != 60 (VAARG-no-align regression)" fi rm -f "$cVaFile" "$oVaFile" "$binVaFile" # Negative-index pointer access (`p[-1]`). Catches the # 24-bit-Y-add bug in (sr,S),Y that crosses bank boundaries # for signed-negative Y. arr[-1] from &data[2] should give # data[1] = 22 ($0016). log "check: MAME runs p[-1] indirect → 22 (negative-Y indy regression)" cNyFile="$(mktemp --suffix=.c)" oNyFile="$(mktemp --suffix=.o)" binNyFile="$(mktemp --suffix=.bin)" cat > "$cNyFile" <<'EOF' __attribute__((noinline)) void switchToBank2(void) { __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); } unsigned short data[4] = { 11, 22, 33, 44 }; __attribute__((noinline)) unsigned short readPrev(unsigned short *p) { return p[-1]; } int main(void) { unsigned short r = readPrev(&data[2]); switchToBank2(); *(volatile unsigned short *)0x5000 = r; while (1) {} } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ "$cNyFile" -o "$oNyFile" "$PROJECT_ROOT/tools/link816" -o "$binNyFile" --text-base 0x1000 \ "$oCrt0F" "$oLibcF" "$oSfF" "$oSdF" "$oLibgccFile" "$oNyFile" \ >/dev/null 2>&1 if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$binNyFile" 0x025000 0016 >/dev/null 2>&1; then die "MAME: p[-1] != 22 (negative-Y indy regression)" fi rm -f "$cNyFile" "$oNyFile" "$binNyFile" # Loop with conditional dual-effect on n (n+=10 vs n+=1) and on # fmt (advance 2 vs 1). Catches the TiedDefSpill cross-block # redirect bug — without dominance check, the exit returns the # iter-N-1 value from the spill slot rather than iter-N. log "check: MAME runs parse2('HABCD') → 13 (TiedDefSpill dominance)" cP2File="$(mktemp --suffix=.c)" oP2File="$(mktemp --suffix=.o)" binP2File="$(mktemp --suffix=.bin)" cat > "$cP2File" <<'EOF' __attribute__((noinline)) void switchToBank2(void) { __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); } __attribute__((noinline)) int parse(const char *fmt) { int n = 0; while (*fmt) { char c = *fmt++; if (c == 'A') { char spec = *fmt++; (void)spec; n += 10; } else { n++; } } return n; } int main(void) { int r = parse("HABCD"); switchToBank2(); *(volatile unsigned short *)0x5000 = (unsigned short)r; while (1) {} } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ "$cP2File" -o "$oP2File" "$PROJECT_ROOT/tools/link816" -o "$binP2File" --text-base 0x1000 \ "$oCrt0F" "$oLibcF" "$oSfF" "$oSdF" "$oLibgccFile" "$oP2File" \ >/dev/null 2>&1 if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$binP2File" 0x025000 000d >/dev/null 2>&1; then die "MAME: parse('HABCD') != 13 (TiedDefSpill dominance regression)" fi rm -f "$cP2File" "$oP2File" "$binP2File" # Bubble sort with the loop form that compiles correctly # (i=1..n; inner j+1 "$cBsFile" <<'EOF' __attribute__((noinline)) void switchToBank2(void) { __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); } unsigned short data[4] = { 4, 1, 3, 2 }; __attribute__((noinline)) void bubbleSort(unsigned short *arr, unsigned short n) { for (unsigned short i = 1; i < n; i++) { for (unsigned short j = 0; j + 1 < n - i + 1; j++) { if (arr[j] > arr[j+1]) { unsigned short t = arr[j]; arr[j] = arr[j+1]; arr[j+1] = t; } } } } int main(void) { bubbleSort(data, 4); unsigned short d0 = data[0], d1 = data[1], d2 = data[2], d3 = data[3]; switchToBank2(); *(volatile unsigned short *)0x5000 = d0; *(volatile unsigned short *)0x5002 = d1; *(volatile unsigned short *)0x5004 = d2; *(volatile unsigned short *)0x5006 = d3; while (1) {} } EOF "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ "$cBsFile" -o "$oBsFile" "$PROJECT_ROOT/tools/link816" -o "$binBsFile" --text-base 0x1000 \ "$oCrt0F" "$oLibcF" "$oSfF" "$oSdF" "$oLibgccFile" "$oBsFile" \ >/dev/null 2>&1 if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$binBsFile" --check 0x025000=0001 0x025002=0002 \ 0x025004=0003 0x025006=0004 >/dev/null 2>&1; then die "MAME: bubbleSort([4,1,3,2]) != [1,2,3,4]" fi rm -f "$cBsFile" "$oBsFile" "$binBsFile" \ "$oLibcF" "$oSfF" "$oSdF" "$oCrt0F" else warn "MAME or apple2gs ROMs not installed; skipping end-to-end test" fi # Inline asm with W65816 register constraints — required for # toolbox calls and hand-tuned asm kernels. Verify the compiler # accepts 'a' / 'x' / 'y' as register-class constraints AND # routes them to the actual registers. log "check: inline asm with W65816 register constraints" cAsmFile="$(mktemp --suffix=.c)" sAsmFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile" "$cIndFile" "$sIndFile" "$irCoalesceFile" "$sCoalesceFile" "$cMixFile" "$sMixFile" "$cLinkFile" "$oLinkFile" "$oLibgccFile" "$binLinkFile" "$mapLinkFile" "$cFltFile" "$oFltFile" "$oSfFile" "$binFltFile" "$mapFltFile" "$cAsmFile" "$sAsmFile"' EXIT cat > "$cAsmFile" <<'EOF' int incA(int x) { int r; __asm__ volatile ("inc a" : "=a"(r) : "a"(x)); return r; } EOF "$CLANG" --target=w65816 -O2 -S "$cAsmFile" -o "$sAsmFile" if ! grep -qE '^\s*inc a\s*$' "$sAsmFile"; then cat "$sAsmFile" >&2 die "inline asm: 'inc a' missing from output" fi # Linker exports the synthetic __bss_start / __bss_end / etc. # symbols so crt0 can do BSS init and runtime malloc finds the # heap top. log "check: link816 emits __bss_start, __bss_end, __heap_start" cBssFile="$(mktemp --suffix=.c)" oBssFile="$(mktemp --suffix=.o)" binBssFile="$(mktemp --suffix=.bin)" mapBssFile="$(mktemp --suffix=.map)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile" "$cIndFile" "$sIndFile" "$irCoalesceFile" "$sCoalesceFile" "$cMixFile" "$sMixFile" "$cLinkFile" "$oLinkFile" "$oLibgccFile" "$binLinkFile" "$mapLinkFile" "$cFltFile" "$oFltFile" "$oSfFile" "$binFltFile" "$mapFltFile" "$cAsmFile" "$sAsmFile" "$cBssFile" "$oBssFile" "$binBssFile" "$mapBssFile"' EXIT cat > "$cBssFile" <<'EOF' char a, b, c, d; int main(void) { return 0; } EOF "$CLANG" --target=w65816 -O2 -c "$cBssFile" -o "$oBssFile" "$PROJECT_ROOT/tools/link816" -o "$binBssFile" \ --text-base 0x8000 --bss-base 0x2000 --map "$mapBssFile" \ "$oBssFile" "$oLibgccFile" 2>/dev/null for sym in __bss_start __bss_end __heap_start __text_start; do if ! grep -q "^${sym} = " "$mapBssFile"; then die "linker missing synthetic symbol: ${sym}" fi done # OMF emitter — wrap the linked binary as a single-segment OMF # file ready for IIgs loading. log "check: omfEmit produces a valid OMF v2.1 single-segment file" omfFile="$(mktemp --suffix=.omf)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile" "$cIndFile" "$sIndFile" "$irCoalesceFile" "$sCoalesceFile" "$cMixFile" "$sMixFile" "$cLinkFile" "$oLinkFile" "$oLibgccFile" "$binLinkFile" "$mapLinkFile" "$cFltFile" "$oFltFile" "$oSfFile" "$binFltFile" "$mapFltFile" "$cAsmFile" "$sAsmFile" "$cBssFile" "$oBssFile" "$binBssFile" "$mapBssFile" "$omfFile"' EXIT "$PROJECT_ROOT/tools/omfEmit" \ --input "$binBssFile" --map "$mapBssFile" \ --base 0x8000 --entry main --output "$omfFile" 2>/dev/null if [ ! -s "$omfFile" ]; then die "omfEmit produced empty/missing OMF" fi # Sanity-check the OMF: VERSION byte at offset 15 should be 0x21 # (OMF v2.1). KIND at offset 20-21 should be 0x0000 (CODE). ver=$(od -An -tx1 -N 1 -j 15 "$omfFile" | tr -d ' ') if [ "$ver" != "21" ]; then die "OMF version byte at offset 15 is 0x$ver (expected 0x21 = v2.1)" fi fi log "all smoke checks passed"