#!/usr/bin/env bash # W65816 backend smoke test. Run after any change to confirm the # scaffold still builds and llc still registers the target. Non-zero # exit on any failure. # # Usage: scripts/smokeTest.sh [--build] # --build Run ninja to (re)build LLVMW65816* + llc before testing. # Without this flag the script assumes tools/llvm-mos-build # is already up to date. set -euo pipefail source "$(dirname "$0")/common.sh" # Resource caps for child compilers. A bug in the W65816 backend can send # clang/llc into a runaway combine/inserter loop that allocates tens of GB # of RAM. When that happens the kernel OOM-killer takes down the entire # tmux scope (bash, the compiler, and the parent Claude Code session with # it). Bounding virtual memory and CPU time here turns "OOM kills the # terminal" into "compiler dies with SIGSEGV / SIGXCPU and we get a clean # error." Numbers are well above what a healthy compile of these tiny # test inputs needs (~200 MB / a few seconds), so legitimate work is # unaffected. ulimit -v $((4 * 1024 * 1024)) # 4 GB virtual memory ceiling ulimit -t 90 # 90 CPU-seconds per process BUILD_DIR="$TOOLS_DIR/llvm-mos-build" LLC="$BUILD_DIR/bin/llc" LLVM_MC="$BUILD_DIR/bin/llvm-mc" doBuild=0 for arg in "$@"; do case "$arg" in --build) doBuild=1 ;; *) die "unknown flag: $arg" ;; esac done [ -x "$LLC" ] || die "llc not found at $LLC; run setup.sh and applyBackend.sh, or pass --build" if [ "$doBuild" -eq 1 ]; then log "ninja LLVMW65816* llc llvm-mc llvm-objdump" ninja -C "$BUILD_DIR" LLVMW65816Info LLVMW65816Desc LLVMW65816CodeGen \ LLVMW65816AsmParser LLVMW65816Disassembler llc llvm-mc llvm-objdump fi # 1. Target must be registered. log "check: llc --version lists w65816" if ! "$LLC" --version 2>/dev/null | grep -q "^[[:space:]]*w65816[[:space:]]"; then die "llc does not list the w65816 target" fi # 2. Empty IR must compile to nothing. log "check: llc -march=w65816 -filetype=null /dev/null exits 0" "$LLC" -march=w65816 -filetype=null /dev/null # 3. Trivial IR that shouldn't touch our (unimplemented) codegen paths. tmp="$(mktemp --suffix=.ll)" trap 'rm -f "$tmp"' EXIT cat > "$tmp" <<'EOF' ; ModuleID = 'smoke' target triple = "w65816-unknown-unknown" ; Empty module: exercises target initialization only. EOF log "check: llc accepts an empty module with w65816 triple" "$LLC" -filetype=null "$tmp" # 4. MC layer round-trip. Assemble a representative mix of addressing # modes and mode-switching instructions and grep for the expected # encoding bytes. Hex-byte strings are stable across llvm-mc # formatting changes, unlike full-line string matching. if [ -x "$LLVM_MC" ]; then log "check: llvm-mc -arch=w65816 emits expected encodings" # Only exercise instructions that round-trip cleanly: # - LDA/LDX/LDY immediates without explicit force use the _Imm16 # form (codegen-dominant path). A pure `lda #x` assembles to # LDA_Imm16 since the _Imm8 variant is isCodeGenOnly. mcInput=' nop rep #0x30 sep #0x20 lda #0x1234 sta 0x10 sta 0x1000 sta 0x010000 mvn 0x01, 0x02 jsl 0x012345' mcOut="$(printf '%s\n' "$mcInput" | "$LLVM_MC" -arch=w65816 -show-encoding 2>&1)" assertHas() { if ! printf '%s\n' "$mcOut" | grep -qF "$1"; then warn "missing expected encoding: $1" printf '%s\n' "$mcOut" >&2 die "llvm-mc did not produce expected encoding" fi } assertHas "[0xea]" assertHas "[0xc2,0x30]" assertHas "[0xe2,0x20]" assertHas "[0xa9,0x34,0x12]" assertHas "[0x85,0x10]" assertHas "[0x8d,0x00,0x10]" assertHas "[0x8f,0x00,0x00,0x01]" assertHas "[0x54,0x01,0x02]" assertHas "[0x22,0x45,0x23,0x01]" else warn "llvm-mc not built; skipping MC round-trip check" fi # 5. Disassembler round-trip. A raw byte stream fed to llvm-mc # --disassemble should produce the mnemonic we expect. if [ -x "$LLVM_MC" ]; then log "check: llvm-mc --disassemble decodes bytes back to mnemonics" disasmOut="$(printf '0xea 0xa9 0x34 0x12 0x85 0x10 0x8d 0x00 0x10 0x6b\n' \ | "$LLVM_MC" --disassemble --triple=w65816 2>&1)" for mnem in "nop" "lda #0x1234" "sta 0x10" "sta 0x1000" "rtl"; do if ! printf '%s\n' "$disasmOut" | grep -qF "$mnem"; then warn "disassembler missing: $mnem" printf '%s\n' "$disasmOut" >&2 die "disassembler round-trip failed" fi done fi # 6. End-to-end codegen: IR -> asm -> ELF -> disassembly. # This is the first real codegen test: verifies that our LowerReturn, # DAG pattern for the i16 constant pseudo, and prologue-emitting # frame lowering produce runnable 65816 machine code. OBJDUMP="$BUILD_DIR/bin/llvm-objdump" if [ -x "$LLC" ] && [ -x "$LLVM_MC" ] && [ -x "$OBJDUMP" ]; then log "check: end-to-end IR -> asm -> ELF -> disasm for a trivial function" irFile="$(mktemp --suffix=.ll)" sFile="$(mktemp --suffix=.s)" oFile="$(mktemp --suffix=.o)" trap 'rm -f "$irFile" "$sFile" "$oFile"' EXIT cat > "$irFile" <<'EOF' target triple = "w65816-unknown-unknown" define i16 @answer() { ret i16 42 } EOF "$LLC" -march=w65816 "$irFile" -o "$sFile" "$LLVM_MC" -arch=w65816 -filetype=obj "$sFile" -o "$oFile" disasm="$("$OBJDUMP" --triple=w65816 -d "$oFile" 2>&1)" for expect in "rep #0x30" "lda #0x2a" "rtl"; do if ! printf '%s\n' "$disasm" | grep -qF "$expect"; then warn "end-to-end pipeline missing: $expect" printf '%s\n' "$disasm" >&2 die "end-to-end pipeline failed" fi done fi # 7. Real codegen check: a non-trivial function exercising globals, # arithmetic, branches, bitwise. This tests our DAG selection # patterns and AsmPrinter pseudo expansions. if [ -x "$LLC" ]; then log "check: llc compiles a multi-pattern function" irFile="$(mktemp --suffix=.ll)" sFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile"' EXIT cat > "$irFile" <<'EOF' target triple = "w65816-unknown-unknown" @a = global i16 0 @b = global i16 0 define i16 @demo() { %x = load i16, ptr @a %y = load i16, ptr @b %s = add i16 %x, %y %m = and i16 %s, 4095 %c = icmp ult i16 %m, 100 br i1 %c, label %lo, label %hi lo: ret i16 0 hi: ret i16 %m } EOF "$LLC" -march=w65816 "$irFile" -o "$sFile" for expect in "rep #0x30" "lda a" "clc" "adc b" "and #0xfff" "cmp #0x64" "bcs" "rtl"; do if ! grep -qF "$expect" "$sFile"; then warn "multi-pattern test missing: $expect" cat "$sFile" >&2 die "multi-pattern test failed" fi done fi # 8. Function call check: caller passes i16 in A, callee adds, returns. if [ -x "$LLC" ]; then log "check: llc compiles a function call (single i16 arg in A)" irCallFile="$(mktemp --suffix=.ll)" sCallFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile"' EXIT cat > "$irCallFile" <<'EOF' target triple = "w65816-unknown-unknown" define i16 @inc(i16 %x) { %r = add i16 %x, 1 ret i16 %r } define i16 @caller() { %r = call i16 @inc(i16 41) ret i16 %r } EOF "$LLC" -march=w65816 "$irCallFile" -o "$sCallFile" # Caller passes 41 in A and JSL's inc. Inc is now an `inc a` # peephole (was clc; adc #1 before the INA_PSEUDO pattern). for expect in "lda #0x29" "jsl inc" "inc a"; do if ! grep -qF "$expect" "$sCallFile"; then warn "call test missing: $expect" cat "$sCallFile" >&2 die "call test failed" fi done fi # 9. Multi-arg sum: 3-arg function reads args 1 and 2 via stack-relative # addressing. if [ -x "$LLC" ]; then log "check: llc compiles a 3-arg function (stack-relative reads)" irMaFile="$(mktemp --suffix=.ll)" sMaFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile"' EXIT cat > "$irMaFile" <<'EOF' target triple = "w65816-unknown-unknown" define i16 @sum3(i16 %a, i16 %b, i16 %c) { %ab = add i16 %a, %b %r = add i16 %ab, %c ret i16 %r } EOF "$LLC" -march=w65816 "$irMaFile" -o "$sMaFile" for expect in "adc 0x4, s" "adc 0x6, s" "rtl"; do if ! grep -qF "$expect" "$sMaFile"; then warn "multi-arg test missing: $expect" cat "$sMaFile" >&2 die "multi-arg test failed" fi done fi # 10. i8 codegen: pure-i8 function uses SEP #$20 prologue and `inc a`. if [ -x "$LLC" ]; then log "check: llc compiles a pure-i8 function (SEP #\$20 prologue)" irI8File="$(mktemp --suffix=.ll)" sI8File="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File"' EXIT cat > "$irI8File" <<'EOF' target triple = "w65816-unknown-unknown" define i8 @i8_inc(i8 %x) { %r = add i8 %x, 1 ret i8 %r } EOF "$LLC" -march=w65816 "$irI8File" -o "$sI8File" for expect in "sep #0x20" "inc a" "rtl"; do if ! grep -qF "$expect" "$sI8File"; then warn "i8 test missing: $expect" cat "$sI8File" >&2 die "i8 test failed" fi done fi # 11a. SETCC via clang: a > b returns 0/1. Exercises the multi-branch # CC path (BEQ + BPL diamond, since SETGT can't be a single Bxx). CLANG="$BUILD_DIR/bin/clang" if [ -x "$CLANG" ]; then log "check: clang compiles a > b via multi-branch SETCC" cFile="$(mktemp --suffix=.c)" sCmpFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile"' EXIT cat > "$cFile" <<'EOF' int gt(int a, int b) { return a > b; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile" -o "$sCmpFile" # Expect a CMP, then BEQ + BPL forming the multi-branch diamond. for expect in "cmp 0x4, s" "lda #0x1" "beq" "bpl" "lda #0x0"; do if ! grep -qF "$expect" "$sCmpFile"; then warn "setcc gt test missing: $expect" cat "$sCmpFile" >&2 die "setcc gt test failed" fi done fi # 11b. SELECT via clang: c ? a : b returns one of two constants. if [ -x "$CLANG" ]; then log "check: clang compiles c ? 100 : 200 via SELECT_CC" cFile2="$(mktemp --suffix=.c)" sSelFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile"' EXIT cat > "$cFile2" <<'EOF' int sel(int c) { return c ? 100 : 200; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile2" -o "$sSelFile" for expect in "cmp #0x0" "lda #0xc8" "beq" "lda #0x64"; do if ! grep -qF "$expect" "$sSelFile"; then warn "select test missing: $expect" cat "$sSelFile" >&2 die "select test failed" fi done fi # 11c. Two-Acc16 op via clang: a - b where both are non-foldable Acc16. # Caller-side b lives in memory (FI), so this matches via SBCfi without # the spill — but a + b + c chains through a true two-Acc16 add. if [ -x "$CLANG" ]; then log "check: clang compiles two-Acc16 ops via spill (chained add)" cFile3="$(mktemp --suffix=.c)" sChainFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile"' EXIT cat > "$cFile3" <<'EOF' // max3 forces two-Acc16: outer SELECT_CC compares one Acc16 PHI value // to another Acc16 PHI value (m vs c, both computed values). int max3(int a, int b, int c) { int m = a > b ? a : b; return m > c ? m : c; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile3" -o "$sChainFile" # Expect at least one sta-spill paired with cmp to a stack-relative # slot - the signature of the two-Acc16 CMP_RR custom inserter. if ! grep -qE 'sta 0x[0-9a-f]+, s' "$sChainFile" \ || ! grep -qE 'cmp 0x[0-9a-f]+, s' "$sChainFile"; then cat "$sChainFile" >&2 die "two-Acc16 (max3) didn't spill+cmp via stack-relative" fi fi # 11d. Multiply via libcall. if [ -x "$CLANG" ]; then log "check: clang emits __mulhi3 libcall for i16 multiply" cFile4="$(mktemp --suffix=.c)" sMulFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile"' EXIT cat > "$cFile4" <<'EOF' int mul(int a, int b) { return a * b; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile4" -o "$sMulFile" if ! grep -qF "jsl __mulhi3" "$sMulFile"; then cat "$sMulFile" >&2 die "expected jsl __mulhi3" fi fi # 11e. Variable shift via libcall. if [ -x "$CLANG" ]; then log "check: clang emits __ashlhi3 libcall for variable i16 shift" cFile5="$(mktemp --suffix=.c)" sShfFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile"' EXIT cat > "$cFile5" <<'EOF' int shf(int x, int n) { return x << n; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile5" -o "$sShfFile" if ! grep -qF "jsl __ashlhi3" "$sShfFile"; then cat "$sShfFile" >&2 die "expected jsl __ashlhi3" fi fi # 11f. Pointer deref: *p loads via stack-relative-indirect-Y. if [ -x "$CLANG" ]; then log "check: clang compiles *p via LDA (slot,s),y" cFile6="$(mktemp --suffix=.c)" sPtrFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile"' EXIT cat > "$cFile6" <<'EOF' int load_ptr(const int *p) { return *p; } void store_ptr(int *p, int v) { *p = v; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile6" -o "$sPtrFile" for expect in "ldy #0x0" "lda (0x" "sta (0x"; do if ! grep -qF "$expect" "$sPtrFile"; then warn "ptr-deref test missing: $expect" cat "$sPtrFile" >&2 die "ptr-deref test failed" fi done fi # 11g. i8 store via pointer: *p = v wraps the STA in SEP/REP so only # 1 byte is written. Both load_byte and store_byte must compile. if [ -x "$CLANG" ]; then log "check: clang compiles *p = v with SEP/REP-wrapped STA (i8 store)" cFile7="$(mktemp --suffix=.c)" sBptrFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile"' EXIT cat > "$cFile7" <<'EOF' unsigned char loadb(const unsigned char *p) { return *p; } void storeb(unsigned char *p, unsigned char v) { *p = v; } unsigned char incb(unsigned char *p) { return ++*p; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile7" -o "$sBptrFile" # storeb body should contain SEP #$20 ... STA (slot,s),y ... REP #$20. if ! grep -qF "sep #0x20" "$sBptrFile" \ || ! grep -qF "rep #0x20" "$sBptrFile" \ || ! grep -qE 'sta \(0x[0-9a-f]+, s\), y' "$sBptrFile"; then cat "$sBptrFile" >&2 die "i8 ptr-store test missing SEP/STA/REP sequence" fi # All three functions must produce labels. for sym in loadb storeb incb; do if ! grep -qE "^${sym}:" "$sBptrFile"; then cat "$sBptrFile" >&2 die "i8 ptr test: missing function ${sym}" fi done # Correctness check: storeb's prologue must NOT clobber A. A holds # the pointer arg on entry; the first body op must spill A intact. # The fixed prologue uses N/2 PHAs (small N) or TAY/TSC/.../TYA # (large N). Either way, the first non-prologue op should be a # `sta NN,s` that captures arg0=p. If we see TSC anywhere in the # prologue WITHOUT a TAY before it, that's the broken form (A # clobbered by TSC, then the spill stores garbage SP value as if # it were the pointer). storeb_body="$(sed -n '/^storeb:/,/^\.Lfunc_end/p' "$sBptrFile")" if printf '%s\n' "$storeb_body" | grep -qE '^ tsc$' \ && ! printf '%s\n' "$storeb_body" | grep -qE '^ tay$'; then cat "$sBptrFile" >&2 die "storeb prologue uses bare TSC without TAY — A (the pointer arg) gets clobbered before being spilled. Byte store writes to the wrong address. Use PHA-based prologue or TAY/TSC/.../TYA bracket." fi # Also: there must be at least one `sta NN,s` in the body (the spill # of the pointer arg). if ! printf '%s\n' "$storeb_body" | grep -qE '^ sta 0x[0-9a-f]+, s$'; then cat "$sBptrFile" >&2 die "storeb missing pointer-arg spill (sta NN,s)" fi fi # 11h. i8 global access stays in 8-bit M (no over-read). bump_gb must # get the SEP #$20 prologue and emit a single-byte lda/inc/sta sequence. if [ -x "$CLANG" ]; then log "check: clang keeps pure-i8 global access in 8-bit M (no wide-read regression)" cFile8="$(mktemp --suffix=.c)" sGbFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile"' EXIT cat > "$cFile8" <<'EOF' unsigned char gb; void bump_gb(void) { gb++; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile8" -o "$sGbFile" # Must use 8-bit M prologue (sep #$20), not the 16-bit one. if ! grep -qF "sep #0x20" "$sGbFile"; then cat "$sGbFile" >&2 die "bump_gb test: expected sep #\$20 prologue (got 16-bit M)" fi fi # 11j. Runtime library assembles and exports all expected libcalls. # This is the destination of every __mulhi3/__ashlhi3/etc. that clang # emits — without it, generated code links to nothing. RUNTIME_SH="$PROJECT_ROOT/runtime/build.sh" RUNTIME_OBJ="$PROJECT_ROOT/runtime/libgcc.o" if [ -x "$RUNTIME_SH" ]; then log "check: runtime/build.sh assembles libgcc.o with all libcall symbols" "$RUNTIME_SH" >/dev/null if [ ! -f "$RUNTIME_OBJ" ]; then die "runtime/build.sh did not produce libgcc.o" fi syms="$("$BUILD_DIR/bin/llvm-objdump" -t "$RUNTIME_OBJ" 2>&1 | awk '{print $NF}')" for need in __mulhi3 __ashlhi3 __ashrhi3 __lshrhi3 __divhi3 __udivhi3 __modhi3 __umodhi3; do if ! printf '%s\n' "$syms" | grep -qx "$need"; then printf '%s\n' "$syms" >&2 die "runtime missing symbol: $need" fi done fi # 11m. Real-world surface area: a non-trivial program that exercises # struct-field deref, char* iteration, multiply, shift, and a bit-twiddle # function. Validates the backend compiles a realistic C input # end-to-end without crashing. Doesn't assert specific asm; just # success and that the function bodies are non-empty. if [ -x "$CLANG" ]; then log "check: clang compiles a real-world multi-function program" cFile12="$(mktemp --suffix=.c)" sBigFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile" "$cFile9" "$sEqbFile" "$cFile10" "$sSgnFile" "$cFile11" "$sCallsFile" "$cFile12" "$sBigFile"' EXIT cat > "$cFile12" <<'EOF' typedef unsigned char u8; typedef unsigned int u16; struct Node { u16 data; struct Node *next; }; u16 list_sum(const struct Node *h) { u16 s=0; while(h){ s+=h->data; h=h->next; } return s; } int strcmp_test(const char *a, const char *b) { while (*a && *a == *b) { a++; b++; } return (unsigned char)*a - (unsigned char)*b; } u16 fnv16(const u8 *p, u16 n) { u16 h=0x811C; for (u16 i=0;i>=8; } if (!(x & 0x0F)) { n+=4; x>>=4; } if (!(x & 0x03)) { n+=2; x>>=2; } if (!(x & 0x01)) n+=1; return n; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile12" -o "$sBigFile" for sym in list_sum strcmp_test fnv16 ctz16; do if ! grep -qE "^${sym}:" "$sBigFile"; then cat "$sBigFile" >&2 die "real-world test missing function: $sym" fi done fi # 11l. Linkage contract: every libcall clang generates from arithmetic # ops must match a symbol provided by runtime/libgcc.o. We can't run a # real link yet (no w65816-aware linker), but we can verify the symbol # names line up — drift here would be a silent runtime crash. if [ -x "$CLANG" ] && [ -f "$RUNTIME_OBJ" ]; then log "check: every libcall clang emits has a matching definition in libgcc.o" cFile11="$(mktemp --suffix=.c)" sCallsFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile" "$cFile9" "$sEqbFile" "$cFile10" "$sSgnFile" "$cFile11" "$sCallsFile"' EXIT cat > "$cFile11" <<'EOF' int m1(int a, int b) { return a * b; } unsigned int m2(unsigned int a, unsigned int b) { return a * b; } int s1(int x, int n) { return x << n; } unsigned int s2(unsigned int x, int n) { return x >> n; } int s3(int x, int n) { return x >> n; } int d1(int a, int b) { return a / b; } unsigned int d2(unsigned int a, unsigned int b) { return a / b; } int r1(int a, int b) { return a % b; } unsigned int r2(unsigned int a, unsigned int b) { return a % b; } long m3(long a, long b) { return a * b; } unsigned long m4(unsigned long a, unsigned long b) { return a * b; } long s4(long x, int n) { return x << n; } long s5(long x, int n) { return x >> n; } unsigned long s6(unsigned long x, int n) { return x >> n; } long d3(long a, long b) { return a / b; } unsigned long d4(unsigned long a, unsigned long b) { return a / b; } long r3(long a, long b) { return a % b; } unsigned long r4(unsigned long a, unsigned long b) { return a % b; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile11" -o "$sCallsFile" runtime_syms="$("$BUILD_DIR/bin/llvm-objdump" -t "$RUNTIME_OBJ" 2>&1 | awk '$2 == "g" {print $NF}')" emitted="$(grep -oE 'jsl __[a-z0-9]+' "$sCallsFile" | awk '{print $2}' | sort -u)" for sym in $emitted; do if ! printf '%s\n' "$runtime_syms" | grep -qx "$sym"; then warn "clang emitted libcall $sym but runtime/libgcc.o has no such symbol" printf 'runtime exports:\n%s\n' "$runtime_syms" >&2 printf 'clang emitted:\n%s\n' "$emitted" >&2 die "libcall name drift: $sym missing from runtime" fi done fi # 11k. signed i8 compare: forces 16-bit M prologue (instrLowersToWide) # because the SEXT lowering needs i16 ops. Verifies both that the # code compiles AND that the prologue is REP #$30 (not the 8-bit M # fast path, which would silently corrupt the SEXT mask). if [ -x "$CLANG" ]; then log "check: signed i8 compare gets 16-bit M prologue + emits cmp" cFile10="$(mktemp --suffix=.c)" sSgnFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile" "$cFile9" "$sEqbFile" "$cFile10" "$sSgnFile"' EXIT cat > "$cFile10" <<'EOF' signed char sgnlt(signed char a, signed char b) { return a < b; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile10" -o "$sSgnFile" # Must use 16-bit M (rep #$30), not the 8-bit fast path. if ! grep -qF "rep #0x30" "$sSgnFile"; then cat "$sSgnFile" >&2 die "sgnlt: expected rep #\$30 prologue (i8 signed cmp needs 16-bit M)" fi # Must NOT contain the 8-bit prologue, which would mean we never # transitioned (the SEXT injection's ora #\$ff00 would silently # truncate to ora #\$00 in 8-bit M). if grep -qF "rep #0x10" "$sSgnFile" && ! grep -qF "rep #0x30" "$sSgnFile"; then cat "$sSgnFile" >&2 die "sgnlt: only saw 8-bit M prologue, SEXT high-byte mask would be dropped" fi fi # 11i. i8 equality compare on two stack args (eqbyte): exercises i8 # SETCC promotion through Lower*CC. if [ -x "$CLANG" ]; then log "check: clang lowers i8 == i8 via promoted i16 cmp" cFile9="$(mktemp --suffix=.c)" sEqbFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile" "$cFile9" "$sEqbFile"' EXIT cat > "$cFile9" <<'EOF' unsigned char eqbyte(unsigned char a, unsigned char b) { return a == b; } EOF "$CLANG" --target=w65816 -O2 -S "$cFile9" -o "$sEqbFile" # Must produce a cmp + beq (the eq diamond). if ! grep -qE 'cmp ' "$sEqbFile" || ! grep -qF "beq" "$sEqbFile"; then cat "$sEqbFile" >&2 die "eqbyte test: expected cmp + beq sequence" fi fi # 12. Real C through clang. Uses the clang front-end if it has been # built; skipped otherwise (clang takes 15-30 minutes to build the # first time; afterwards rebuilds are fast). CLANG="$BUILD_DIR/bin/clang" if [ -x "$CLANG" ] && [ -x "$OBJDUMP" ]; then log "check: clang -target w65816 -O2 compiles a tiny C function" cFile="$(mktemp --suffix=.c)" oFile2="$(mktemp --suffix=.o)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2"' EXIT cat > "$cFile" <<'EOF' int answer(void) { return 42; } EOF "$CLANG" --target=w65816 -O2 -c "$cFile" -o "$oFile2" disasm="$("$OBJDUMP" --triple=w65816 -d "$oFile2" 2>&1)" for expect in "rep #0x30" "lda #0x2a" "rtl"; do if ! printf '%s\n' "$disasm" | grep -qF "$expect"; then warn "clang test missing: $expect" printf '%s\n' "$disasm" >&2 die "clang end-to-end test failed" fi done # 13. i32 (long) compile path. Type legalization splits i32 into # two i16 halves; the high half flows through the (add FrameIndex, # 2) shape, which previously crashed ISel with "Cannot select # FrameIndex<-2>". SelectFrameIndex now folds (add FI, const) so # the split loads land on a stack-relative addressing mode. # Return ABI: low->A, high->X (TAX in the epilogue). # Also asserts the native ADC carry chain (CLC + ADC + ADC) is in # place — task #49 replaced the bloated SETCC-based carry detect # (lda;cmp;bcc;lda) with a direct ADDC/ADDE-pattern lowering that # uses the C flag in P as a Glue-modeled physreg. log "check: clang compiles a long add (i32 split + A:X return)" cI32File="$(mktemp --suffix=.c)" oI32File="$(mktemp --suffix=.o)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File"' EXIT cat > "$cI32File" <<'EOF' long add32(long a, long b) { return a + b; } EOF "$CLANG" --target=w65816 -O2 -c "$cI32File" -o "$oI32File" disasmI32="$("$OBJDUMP" --triple=w65816 -d "$oI32File" 2>&1)" # TAX confirms the high-half-into-X part of the return ABI fired. # Without it, both halves would pile into A and one would be lost. # Exactly one CLC and exactly two ADCs prove the native carry chain # is wired (one CLC for lo, ADC lo, ADC hi-with-carry); a regression # to the SETCC path would show two CLCs and a bcc/cmp. for expect in "tax" "rtl" "clc" "adc"; do if ! printf '%s\n' "$disasmI32" | grep -qF "$expect"; then warn "i32 add test missing: $expect" printf '%s\n' "$disasmI32" >&2 die "i32 add end-to-end test failed" fi done nClc="$(printf '%s\n' "$disasmI32" | grep -cE '\bclc\b' || true)" nAdc="$(printf '%s\n' "$disasmI32" | grep -cE '\badc\b' || true)" nBcc="$(printf '%s\n' "$disasmI32" | grep -cE '\bbcc\b' || true)" if [ "$nClc" != "1" ] || [ "$nAdc" != "2" ] || [ "$nBcc" != "0" ]; then warn "i32 add carry-chain shape wrong (clc=$nClc adc=$nAdc bcc=$nBcc, want 1/2/0)" printf '%s\n' "$disasmI32" >&2 die "i32 add carry-chain regression" fi # Lock the post-StackSlotCleanup instruction count: should be ~11 for # add32 (rep + pha + clc + adc + sta + txa + adc + tax + lda + ply + rtl # — i32-first-arg in A:X means arg0_hi loads as TXA, no LDAfi). If # this regresses meaningfully (say >14) the cleanup pass, the # rematerialization flag, or the A:X first-arg ABI has been broken. nInsns="$(printf '%s\n' "$disasmI32" | grep -cE '^[0-9a-f]+:' || true)" if [ "$nInsns" -gt 14 ]; then warn "i32 add bloat (got $nInsns insns, want <=14 — was 25 pre-cleanup, 11 post)" printf '%s\n' "$disasmI32" >&2 die "i32 add code-quality regression" fi # The A:X arg0 ABI moves arg0_hi out of the stack slot, so the # asm should contain TXA (X→A for the hi-half ADC tied input) # exactly once. A regression to "load arg0_hi from stack" would # remove the TXA and add an extra LDA. nTxa="$(printf '%s\n' "$disasmI32" | grep -cE '\btxa\b' || true)" if [ "$nTxa" != "1" ]; then warn "i32 add: expected exactly 1 txa (i32-first-arg-in-A:X path); got $nTxa" printf '%s\n' "$disasmI32" >&2 die "i32 add A:X first-arg ABI regression" fi # i32 carry chain on two-Acc16 (no foldable load): exercises the # ADD_RR + ADDE_RR custom-inserter path. fib32 has live a/b values # the inserter must spill to a fresh slot; pre-fix this crashed at # ISel with "Cannot select: adde reg, reg". log "check: clang compiles a 32-bit fib loop (ADDE_RR inserter path)" cFibFile="$(mktemp --suffix=.c)" sFibFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile"' EXIT cat > "$cFibFile" <<'EOF' unsigned long fib32(unsigned long n) { unsigned long a = 0, b = 1, t; while (n > 0) { t = a + b; a = b; b = t; n--; } return a; } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cFibFile" -o "$sFibFile" 2>&1 >/dev/null; then die "i32 fib (ADDE_RR inserter) failed to compile" fi if ! grep -qE '\bclc\b' "$sFibFile" || ! grep -qE '\badc\b' "$sFibFile"; then warn "i32 fib output missing clc/adc" die "i32 fib carry-chain regression" fi # i32 multiply via __mulsi3 libcall: tests the multi-i16-return path # (RetCC_W65816 assigning A then X for 2 i16 returns) plus the i32 # arg push side. Pre-fix this hit "multi-return calls not yet # supported (Ins.size=4)" when LowerCallTo split the i32 return. log "check: clang compiles a long multiply via __mulsi3 libcall" cMulFile="$(mktemp --suffix=.c)" sMulFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile"' EXIT cat > "$cMulFile" <<'EOF' unsigned long mul32(unsigned long a, unsigned long b) { return a * b; } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cMulFile" -o "$sMulFile" 2>&1 >/dev/null; then die "i32 mul via __mulsi3 failed to compile" fi if ! grep -q '__mulsi3' "$sMulFile"; then die "i32 mul did not emit __mulsi3 libcall" fi # i32 shift-by-1 (SHL/SRL): the type-legalizer's SHL_PARTS / SRL_PARTS # expansion needs `(srl x, 15)` or `(shl x, 15)` for the carry-cross- # halves slot. Without inline patterns those fall to __lshrhi3 / # __ashlhi3 libcalls (~10 byte overhead per shift). SRL15A and # SHL15A pseudos handle them inline (`ASL/LSR; LDA #0; ROL/ROR`, # 3 bytes). Verify the shift-by-1 output doesn't contain a hi3 # libcall. log "check: clang i32 shift-by-1 stays inline (no __lshrhi3 / __ashlhi3 libcall)" cSh1File="$(mktemp --suffix=.c)" sSh1File="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cSh1File" "$sSh1File"' EXIT cat > "$cSh1File" <<'EOF' unsigned long shl1(unsigned long a) { return a << 1; } unsigned long shr1(unsigned long a) { return a >> 1; } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cSh1File" -o "$sSh1File" 2>&1 >/dev/null; then die "i32 shift-by-1 failed to compile" fi if grep -qE '__lshrhi3|__ashlhi3' "$sSh1File"; then warn "i32 shift-by-1 still calling i16 shift libcall — SRL15A/SHL15A pattern not firing" die "i32 shift-by-1 regression" fi # Varargs (): LowerFormalArguments creates a fixed FI # for the first vararg slot when IsVarArg; LowerVASTART stores # its address to the va_list pointer. VAARG/VACOPY/VAEND use # default LLVM expansions. Pre-fix this hit # "vararg functions not yet supported" fatal error. log "check: clang compiles a vararg function ()" cVaFile="$(mktemp --suffix=.c)" sVaFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cSh1File" "$sSh1File" "$cVaFile" "$sVaFile"' EXIT cat > "$cVaFile" <<'EOF' #include int sumArgs(int n, ...) { va_list args; va_start(args, n); int sum = 0; for (int i = 0; i < n; i++) sum += va_arg(args, int); va_end(args); return sum; } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cVaFile" -o "$sVaFile" 2>&1 >/dev/null; then die "vararg function failed to compile" fi # Stack-array LEA: `char arr[16]; arr[i] = ...` needs the address # of an alloca'd object as an i16 value. Pre-fix this hit "Cannot # select: FrameIndex<0>" because addr_fi only matches in load/store # contexts. W65816DAGToDAGISel::Select now lowers a bare # ISD::FrameIndex to ADDframe (FI, 0); eliminateFrameIndex expands # ADDframe into TSC + CLC + ADC #disp. log "check: clang takes the address of a stack-allocated array" cAllocaFile="$(mktemp --suffix=.c)" sAllocaFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile"' EXIT cat > "$cAllocaFile" <<'EOF' void writeBytes(char *out, char v) { char tmp[8]; for (int i = 0; i < 8; i++) tmp[i] = v + i; for (int i = 0; i < 8; i++) out[i] = tmp[i]; } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cAllocaFile" -o "$sAllocaFile" 2>&1 >/dev/null; then die "alloca'd-array address failed to compile" fi # The TSC; CLC; ADC #disp triple is the LEA expansion of ADDframe; # at least one occurrence proves the pseudo wired through. if ! grep -qE '^\s*tsc' "$sAllocaFile"; then die "alloca'd-array LEA missing TSC (ADDframe expansion broken)" fi # signed-byte arithmetic (`(int)(*p) - (int)(*q)` style — strcmp). # Exercises three formerly-missing patterns: SEXTLOAD i16 from i8 # (we Expand it to (sext (load))), sext_inreg i16 from i8 (the # `((x & 0xFF) ^ 0x80) - 0x80` tablegen Pat), and extloadi8 from # an Acc16 register pointer (LDAptr / "high byte don't care"). log "check: clang compiles a signed-byte strcmp (sextload + sext_inreg + extload-via-ptr)" cStrFile="$(mktemp --suffix=.c)" sStrFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile"' EXIT cat > "$cStrFile" <<'EOF' int strcmp32(const char *a, const char *b) { while (*a && *a == *b) { a++; b++; } return (int)(*a) - (int)(*b); } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cStrFile" -o "$sStrFile" 2>&1 >/dev/null; then die "signed-byte strcmp failed to compile" fi # Indirect calls (function pointers). Lowered via the runtime # trampoline at runtime/src/libgcc.s::__jsl_indir, which does # JMP (__indirTarget) — caller stores target to __indirTarget then # JSL __jsl_indir. Pre-fix, LowerCall reported a fatal error. log "check: clang compiles an indirect call (via __jsl_indir trampoline)" cIndFile="$(mktemp --suffix=.c)" sIndFile="$(mktemp --suffix=.s)" trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile" "$cIndFile" "$sIndFile"' EXIT cat > "$cIndFile" <<'EOF' typedef int (*BinOp)(int, int); int doOp(BinOp op, int x, int y) { return op(x, y); } EOF if ! "$CLANG" --target=w65816 -O2 -S "$cIndFile" -o "$sIndFile" 2>&1 >/dev/null; then die "indirect call failed to compile" fi if ! grep -q '__indirTarget' "$sIndFile"; then die "indirect call missing __indirTarget store" fi if ! grep -q '__jsl_indir' "$sIndFile"; then die "indirect call missing JSL to __jsl_indir trampoline" fi fi log "all smoke checks passed"