840 lines
37 KiB
Bash
Executable file
840 lines
37 KiB
Bash
Executable file
#!/usr/bin/env bash
|
|
# W65816 backend smoke test. Run after any change to confirm the
|
|
# scaffold still builds and llc still registers the target. Non-zero
|
|
# exit on any failure.
|
|
#
|
|
# Usage: scripts/smokeTest.sh [--build]
|
|
# --build Run ninja to (re)build LLVMW65816* + llc before testing.
|
|
# Without this flag the script assumes tools/llvm-mos-build
|
|
# is already up to date.
|
|
|
|
set -euo pipefail
|
|
source "$(dirname "$0")/common.sh"
|
|
|
|
# Resource caps for child compilers. A bug in the W65816 backend can send
|
|
# clang/llc into a runaway combine/inserter loop that allocates tens of GB
|
|
# of RAM. When that happens the kernel OOM-killer takes down the entire
|
|
# tmux scope (bash, the compiler, and the parent Claude Code session with
|
|
# it). Bounding virtual memory and CPU time here turns "OOM kills the
|
|
# terminal" into "compiler dies with SIGSEGV / SIGXCPU and we get a clean
|
|
# error." Numbers are well above what a healthy compile of these tiny
|
|
# test inputs needs (~200 MB / a few seconds), so legitimate work is
|
|
# unaffected.
|
|
ulimit -v $((4 * 1024 * 1024)) # 4 GB virtual memory ceiling
|
|
ulimit -t 90 # 90 CPU-seconds per process
|
|
|
|
BUILD_DIR="$TOOLS_DIR/llvm-mos-build"
|
|
LLC="$BUILD_DIR/bin/llc"
|
|
LLVM_MC="$BUILD_DIR/bin/llvm-mc"
|
|
|
|
doBuild=0
|
|
for arg in "$@"; do
|
|
case "$arg" in
|
|
--build) doBuild=1 ;;
|
|
*) die "unknown flag: $arg" ;;
|
|
esac
|
|
done
|
|
|
|
[ -x "$LLC" ] || die "llc not found at $LLC; run setup.sh and applyBackend.sh, or pass --build"
|
|
|
|
if [ "$doBuild" -eq 1 ]; then
|
|
log "ninja LLVMW65816* llc llvm-mc llvm-objdump"
|
|
ninja -C "$BUILD_DIR" LLVMW65816Info LLVMW65816Desc LLVMW65816CodeGen \
|
|
LLVMW65816AsmParser LLVMW65816Disassembler llc llvm-mc llvm-objdump
|
|
fi
|
|
|
|
# 1. Target must be registered.
|
|
log "check: llc --version lists w65816"
|
|
if ! "$LLC" --version 2>/dev/null | grep -q "^[[:space:]]*w65816[[:space:]]"; then
|
|
die "llc does not list the w65816 target"
|
|
fi
|
|
|
|
# 2. Empty IR must compile to nothing.
|
|
log "check: llc -march=w65816 -filetype=null /dev/null exits 0"
|
|
"$LLC" -march=w65816 -filetype=null /dev/null
|
|
|
|
# 3. Trivial IR that shouldn't touch our (unimplemented) codegen paths.
|
|
tmp="$(mktemp --suffix=.ll)"
|
|
trap 'rm -f "$tmp"' EXIT
|
|
cat > "$tmp" <<'EOF'
|
|
; ModuleID = 'smoke'
|
|
target triple = "w65816-unknown-unknown"
|
|
|
|
; Empty module: exercises target initialization only.
|
|
EOF
|
|
log "check: llc accepts an empty module with w65816 triple"
|
|
"$LLC" -filetype=null "$tmp"
|
|
|
|
# 4. MC layer round-trip. Assemble a representative mix of addressing
|
|
# modes and mode-switching instructions and grep for the expected
|
|
# encoding bytes. Hex-byte strings are stable across llvm-mc
|
|
# formatting changes, unlike full-line string matching.
|
|
if [ -x "$LLVM_MC" ]; then
|
|
log "check: llvm-mc -arch=w65816 emits expected encodings"
|
|
# Only exercise instructions that round-trip cleanly:
|
|
# - LDA/LDX/LDY immediates without explicit force use the _Imm16
|
|
# form (codegen-dominant path). A pure `lda #x` assembles to
|
|
# LDA_Imm16 since the _Imm8 variant is isCodeGenOnly.
|
|
mcInput=' nop
|
|
rep #0x30
|
|
sep #0x20
|
|
lda #0x1234
|
|
sta 0x10
|
|
sta 0x1000
|
|
sta 0x010000
|
|
mvn 0x01, 0x02
|
|
jsl 0x012345'
|
|
mcOut="$(printf '%s\n' "$mcInput" | "$LLVM_MC" -arch=w65816 -show-encoding 2>&1)"
|
|
|
|
assertHas() {
|
|
if ! printf '%s\n' "$mcOut" | grep -qF "$1"; then
|
|
warn "missing expected encoding: $1"
|
|
printf '%s\n' "$mcOut" >&2
|
|
die "llvm-mc did not produce expected encoding"
|
|
fi
|
|
}
|
|
|
|
assertHas "[0xea]"
|
|
assertHas "[0xc2,0x30]"
|
|
assertHas "[0xe2,0x20]"
|
|
assertHas "[0xa9,0x34,0x12]"
|
|
assertHas "[0x85,0x10]"
|
|
assertHas "[0x8d,0x00,0x10]"
|
|
assertHas "[0x8f,0x00,0x00,0x01]"
|
|
assertHas "[0x54,0x01,0x02]"
|
|
assertHas "[0x22,0x45,0x23,0x01]"
|
|
else
|
|
warn "llvm-mc not built; skipping MC round-trip check"
|
|
fi
|
|
|
|
# 5. Disassembler round-trip. A raw byte stream fed to llvm-mc
|
|
# --disassemble should produce the mnemonic we expect.
|
|
if [ -x "$LLVM_MC" ]; then
|
|
log "check: llvm-mc --disassemble decodes bytes back to mnemonics"
|
|
disasmOut="$(printf '0xea 0xa9 0x34 0x12 0x85 0x10 0x8d 0x00 0x10 0x6b\n' \
|
|
| "$LLVM_MC" --disassemble --triple=w65816 2>&1)"
|
|
for mnem in "nop" "lda #0x1234" "sta 0x10" "sta 0x1000" "rtl"; do
|
|
if ! printf '%s\n' "$disasmOut" | grep -qF "$mnem"; then
|
|
warn "disassembler missing: $mnem"
|
|
printf '%s\n' "$disasmOut" >&2
|
|
die "disassembler round-trip failed"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 6. End-to-end codegen: IR -> asm -> ELF -> disassembly.
|
|
# This is the first real codegen test: verifies that our LowerReturn,
|
|
# DAG pattern for the i16 constant pseudo, and prologue-emitting
|
|
# frame lowering produce runnable 65816 machine code.
|
|
OBJDUMP="$BUILD_DIR/bin/llvm-objdump"
|
|
if [ -x "$LLC" ] && [ -x "$LLVM_MC" ] && [ -x "$OBJDUMP" ]; then
|
|
log "check: end-to-end IR -> asm -> ELF -> disasm for a trivial function"
|
|
irFile="$(mktemp --suffix=.ll)"
|
|
sFile="$(mktemp --suffix=.s)"
|
|
oFile="$(mktemp --suffix=.o)"
|
|
trap 'rm -f "$irFile" "$sFile" "$oFile"' EXIT
|
|
cat > "$irFile" <<'EOF'
|
|
target triple = "w65816-unknown-unknown"
|
|
define i16 @answer() { ret i16 42 }
|
|
EOF
|
|
"$LLC" -march=w65816 "$irFile" -o "$sFile"
|
|
"$LLVM_MC" -arch=w65816 -filetype=obj "$sFile" -o "$oFile"
|
|
disasm="$("$OBJDUMP" --triple=w65816 -d "$oFile" 2>&1)"
|
|
for expect in "rep #0x30" "lda #0x2a" "rtl"; do
|
|
if ! printf '%s\n' "$disasm" | grep -qF "$expect"; then
|
|
warn "end-to-end pipeline missing: $expect"
|
|
printf '%s\n' "$disasm" >&2
|
|
die "end-to-end pipeline failed"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 7. Real codegen check: a non-trivial function exercising globals,
|
|
# arithmetic, branches, bitwise. This tests our DAG selection
|
|
# patterns and AsmPrinter pseudo expansions.
|
|
if [ -x "$LLC" ]; then
|
|
log "check: llc compiles a multi-pattern function"
|
|
irFile="$(mktemp --suffix=.ll)"
|
|
sFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile"' EXIT
|
|
cat > "$irFile" <<'EOF'
|
|
target triple = "w65816-unknown-unknown"
|
|
@a = global i16 0
|
|
@b = global i16 0
|
|
define i16 @demo() {
|
|
%x = load i16, ptr @a
|
|
%y = load i16, ptr @b
|
|
%s = add i16 %x, %y
|
|
%m = and i16 %s, 4095
|
|
%c = icmp ult i16 %m, 100
|
|
br i1 %c, label %lo, label %hi
|
|
lo:
|
|
ret i16 0
|
|
hi:
|
|
ret i16 %m
|
|
}
|
|
EOF
|
|
"$LLC" -march=w65816 "$irFile" -o "$sFile"
|
|
for expect in "rep #0x30" "lda a" "clc" "adc b" "and #0xfff" "cmp #0x64" "bcs" "rtl"; do
|
|
if ! grep -qF "$expect" "$sFile"; then
|
|
warn "multi-pattern test missing: $expect"
|
|
cat "$sFile" >&2
|
|
die "multi-pattern test failed"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 8. Function call check: caller passes i16 in A, callee adds, returns.
|
|
if [ -x "$LLC" ]; then
|
|
log "check: llc compiles a function call (single i16 arg in A)"
|
|
irCallFile="$(mktemp --suffix=.ll)"
|
|
sCallFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile"' EXIT
|
|
cat > "$irCallFile" <<'EOF'
|
|
target triple = "w65816-unknown-unknown"
|
|
define i16 @inc(i16 %x) {
|
|
%r = add i16 %x, 1
|
|
ret i16 %r
|
|
}
|
|
define i16 @caller() {
|
|
%r = call i16 @inc(i16 41)
|
|
ret i16 %r
|
|
}
|
|
EOF
|
|
"$LLC" -march=w65816 "$irCallFile" -o "$sCallFile"
|
|
# Caller passes 41 in A and JSL's inc. Inc is now an `inc a`
|
|
# peephole (was clc; adc #1 before the INA_PSEUDO pattern).
|
|
for expect in "lda #0x29" "jsl inc" "inc a"; do
|
|
if ! grep -qF "$expect" "$sCallFile"; then
|
|
warn "call test missing: $expect"
|
|
cat "$sCallFile" >&2
|
|
die "call test failed"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 9. Multi-arg sum: 3-arg function reads args 1 and 2 via stack-relative
|
|
# addressing.
|
|
if [ -x "$LLC" ]; then
|
|
log "check: llc compiles a 3-arg function (stack-relative reads)"
|
|
irMaFile="$(mktemp --suffix=.ll)"
|
|
sMaFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile"' EXIT
|
|
cat > "$irMaFile" <<'EOF'
|
|
target triple = "w65816-unknown-unknown"
|
|
define i16 @sum3(i16 %a, i16 %b, i16 %c) {
|
|
%ab = add i16 %a, %b
|
|
%r = add i16 %ab, %c
|
|
ret i16 %r
|
|
}
|
|
EOF
|
|
"$LLC" -march=w65816 "$irMaFile" -o "$sMaFile"
|
|
for expect in "adc 0x4, s" "adc 0x6, s" "rtl"; do
|
|
if ! grep -qF "$expect" "$sMaFile"; then
|
|
warn "multi-arg test missing: $expect"
|
|
cat "$sMaFile" >&2
|
|
die "multi-arg test failed"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 10. i8 codegen: pure-i8 function uses SEP #$20 prologue and `inc a`.
|
|
if [ -x "$LLC" ]; then
|
|
log "check: llc compiles a pure-i8 function (SEP #\$20 prologue)"
|
|
irI8File="$(mktemp --suffix=.ll)"
|
|
sI8File="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File"' EXIT
|
|
cat > "$irI8File" <<'EOF'
|
|
target triple = "w65816-unknown-unknown"
|
|
define i8 @i8_inc(i8 %x) {
|
|
%r = add i8 %x, 1
|
|
ret i8 %r
|
|
}
|
|
EOF
|
|
"$LLC" -march=w65816 "$irI8File" -o "$sI8File"
|
|
for expect in "sep #0x20" "inc a" "rtl"; do
|
|
if ! grep -qF "$expect" "$sI8File"; then
|
|
warn "i8 test missing: $expect"
|
|
cat "$sI8File" >&2
|
|
die "i8 test failed"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 11a. SETCC via clang: a > b returns 0/1. Exercises the multi-branch
|
|
# CC path (BEQ + BPL diamond, since SETGT can't be a single Bxx).
|
|
CLANG="$BUILD_DIR/bin/clang"
|
|
if [ -x "$CLANG" ]; then
|
|
log "check: clang compiles a > b via multi-branch SETCC"
|
|
cFile="$(mktemp --suffix=.c)"
|
|
sCmpFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile"' EXIT
|
|
cat > "$cFile" <<'EOF'
|
|
int gt(int a, int b) { return a > b; }
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -S "$cFile" -o "$sCmpFile"
|
|
# Expect a CMP, then BEQ + BPL forming the multi-branch diamond.
|
|
for expect in "cmp 0x4, s" "lda #0x1" "beq" "bpl" "lda #0x0"; do
|
|
if ! grep -qF "$expect" "$sCmpFile"; then
|
|
warn "setcc gt test missing: $expect"
|
|
cat "$sCmpFile" >&2
|
|
die "setcc gt test failed"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 11b. SELECT via clang: c ? a : b returns one of two constants.
|
|
if [ -x "$CLANG" ]; then
|
|
log "check: clang compiles c ? 100 : 200 via SELECT_CC"
|
|
cFile2="$(mktemp --suffix=.c)"
|
|
sSelFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile"' EXIT
|
|
cat > "$cFile2" <<'EOF'
|
|
int sel(int c) { return c ? 100 : 200; }
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -S "$cFile2" -o "$sSelFile"
|
|
for expect in "cmp #0x0" "lda #0xc8" "beq" "lda #0x64"; do
|
|
if ! grep -qF "$expect" "$sSelFile"; then
|
|
warn "select test missing: $expect"
|
|
cat "$sSelFile" >&2
|
|
die "select test failed"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 11c. Two-Acc16 op via clang: a - b where both are non-foldable Acc16.
|
|
# Caller-side b lives in memory (FI), so this matches via SBCfi without
|
|
# the spill — but a + b + c chains through a true two-Acc16 add.
|
|
if [ -x "$CLANG" ]; then
|
|
log "check: clang compiles two-Acc16 ops via spill (chained add)"
|
|
cFile3="$(mktemp --suffix=.c)"
|
|
sChainFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile"' EXIT
|
|
cat > "$cFile3" <<'EOF'
|
|
// max3 forces two-Acc16: outer SELECT_CC compares one Acc16 PHI value
|
|
// to another Acc16 PHI value (m vs c, both computed values).
|
|
int max3(int a, int b, int c) {
|
|
int m = a > b ? a : b;
|
|
return m > c ? m : c;
|
|
}
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -S "$cFile3" -o "$sChainFile"
|
|
# Expect at least one sta-spill paired with cmp to a stack-relative
|
|
# slot - the signature of the two-Acc16 CMP_RR custom inserter.
|
|
if ! grep -qE 'sta 0x[0-9a-f]+, s' "$sChainFile" \
|
|
|| ! grep -qE 'cmp 0x[0-9a-f]+, s' "$sChainFile"; then
|
|
cat "$sChainFile" >&2
|
|
die "two-Acc16 (max3) didn't spill+cmp via stack-relative"
|
|
fi
|
|
fi
|
|
|
|
# 11d. Multiply via libcall.
|
|
if [ -x "$CLANG" ]; then
|
|
log "check: clang emits __mulhi3 libcall for i16 multiply"
|
|
cFile4="$(mktemp --suffix=.c)"
|
|
sMulFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile"' EXIT
|
|
cat > "$cFile4" <<'EOF'
|
|
int mul(int a, int b) { return a * b; }
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -S "$cFile4" -o "$sMulFile"
|
|
if ! grep -qF "jsl __mulhi3" "$sMulFile"; then
|
|
cat "$sMulFile" >&2
|
|
die "expected jsl __mulhi3"
|
|
fi
|
|
fi
|
|
|
|
# 11e. Variable shift via libcall.
|
|
if [ -x "$CLANG" ]; then
|
|
log "check: clang emits __ashlhi3 libcall for variable i16 shift"
|
|
cFile5="$(mktemp --suffix=.c)"
|
|
sShfFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile"' EXIT
|
|
cat > "$cFile5" <<'EOF'
|
|
int shf(int x, int n) { return x << n; }
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -S "$cFile5" -o "$sShfFile"
|
|
if ! grep -qF "jsl __ashlhi3" "$sShfFile"; then
|
|
cat "$sShfFile" >&2
|
|
die "expected jsl __ashlhi3"
|
|
fi
|
|
fi
|
|
|
|
# 11f. Pointer deref: *p loads via stack-relative-indirect-Y.
|
|
if [ -x "$CLANG" ]; then
|
|
log "check: clang compiles *p via LDA (slot,s),y"
|
|
cFile6="$(mktemp --suffix=.c)"
|
|
sPtrFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile"' EXIT
|
|
cat > "$cFile6" <<'EOF'
|
|
int load_ptr(const int *p) { return *p; }
|
|
void store_ptr(int *p, int v) { *p = v; }
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -S "$cFile6" -o "$sPtrFile"
|
|
for expect in "ldy #0x0" "lda (0x" "sta (0x"; do
|
|
if ! grep -qF "$expect" "$sPtrFile"; then
|
|
warn "ptr-deref test missing: $expect"
|
|
cat "$sPtrFile" >&2
|
|
die "ptr-deref test failed"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 11g. i8 store via pointer: *p = v wraps the STA in SEP/REP so only
|
|
# 1 byte is written. Both load_byte and store_byte must compile.
|
|
if [ -x "$CLANG" ]; then
|
|
log "check: clang compiles *p = v with SEP/REP-wrapped STA (i8 store)"
|
|
cFile7="$(mktemp --suffix=.c)"
|
|
sBptrFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile"' EXIT
|
|
cat > "$cFile7" <<'EOF'
|
|
unsigned char loadb(const unsigned char *p) { return *p; }
|
|
void storeb(unsigned char *p, unsigned char v) { *p = v; }
|
|
unsigned char incb(unsigned char *p) { return ++*p; }
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -S "$cFile7" -o "$sBptrFile"
|
|
# storeb body should contain SEP #$20 ... STA (slot,s),y ... REP #$20.
|
|
if ! grep -qF "sep #0x20" "$sBptrFile" \
|
|
|| ! grep -qF "rep #0x20" "$sBptrFile" \
|
|
|| ! grep -qE 'sta \(0x[0-9a-f]+, s\), y' "$sBptrFile"; then
|
|
cat "$sBptrFile" >&2
|
|
die "i8 ptr-store test missing SEP/STA/REP sequence"
|
|
fi
|
|
# All three functions must produce labels.
|
|
for sym in loadb storeb incb; do
|
|
if ! grep -qE "^${sym}:" "$sBptrFile"; then
|
|
cat "$sBptrFile" >&2
|
|
die "i8 ptr test: missing function ${sym}"
|
|
fi
|
|
done
|
|
# Correctness check: storeb's prologue must NOT clobber A. A holds
|
|
# the pointer arg on entry; the first body op must spill A intact.
|
|
# The fixed prologue uses N/2 PHAs (small N) or TAY/TSC/.../TYA
|
|
# (large N). Either way, the first non-prologue op should be a
|
|
# `sta NN,s` that captures arg0=p. If we see TSC anywhere in the
|
|
# prologue WITHOUT a TAY before it, that's the broken form (A
|
|
# clobbered by TSC, then the spill stores garbage SP value as if
|
|
# it were the pointer).
|
|
storeb_body="$(sed -n '/^storeb:/,/^\.Lfunc_end/p' "$sBptrFile")"
|
|
if printf '%s\n' "$storeb_body" | grep -qE '^ tsc$' \
|
|
&& ! printf '%s\n' "$storeb_body" | grep -qE '^ tay$'; then
|
|
cat "$sBptrFile" >&2
|
|
die "storeb prologue uses bare TSC without TAY — A (the pointer arg) gets clobbered before being spilled. Byte store writes to the wrong address. Use PHA-based prologue or TAY/TSC/.../TYA bracket."
|
|
fi
|
|
# Also: there must be at least one `sta NN,s` in the body (the spill
|
|
# of the pointer arg).
|
|
if ! printf '%s\n' "$storeb_body" | grep -qE '^ sta 0x[0-9a-f]+, s$'; then
|
|
cat "$sBptrFile" >&2
|
|
die "storeb missing pointer-arg spill (sta NN,s)"
|
|
fi
|
|
fi
|
|
|
|
# 11h. i8 global access stays in 8-bit M (no over-read). bump_gb must
|
|
# get the SEP #$20 prologue and emit a single-byte lda/inc/sta sequence.
|
|
if [ -x "$CLANG" ]; then
|
|
log "check: clang keeps pure-i8 global access in 8-bit M (no wide-read regression)"
|
|
cFile8="$(mktemp --suffix=.c)"
|
|
sGbFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile"' EXIT
|
|
cat > "$cFile8" <<'EOF'
|
|
unsigned char gb;
|
|
void bump_gb(void) { gb++; }
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -S "$cFile8" -o "$sGbFile"
|
|
# Must use 8-bit M prologue (sep #$20), not the 16-bit one.
|
|
if ! grep -qF "sep #0x20" "$sGbFile"; then
|
|
cat "$sGbFile" >&2
|
|
die "bump_gb test: expected sep #\$20 prologue (got 16-bit M)"
|
|
fi
|
|
fi
|
|
|
|
# 11j. Runtime library assembles and exports all expected libcalls.
|
|
# This is the destination of every __mulhi3/__ashlhi3/etc. that clang
|
|
# emits — without it, generated code links to nothing.
|
|
RUNTIME_SH="$PROJECT_ROOT/runtime/build.sh"
|
|
RUNTIME_OBJ="$PROJECT_ROOT/runtime/libgcc.o"
|
|
if [ -x "$RUNTIME_SH" ]; then
|
|
log "check: runtime/build.sh assembles libgcc.o with all libcall symbols"
|
|
"$RUNTIME_SH" >/dev/null
|
|
if [ ! -f "$RUNTIME_OBJ" ]; then
|
|
die "runtime/build.sh did not produce libgcc.o"
|
|
fi
|
|
syms="$("$BUILD_DIR/bin/llvm-objdump" -t "$RUNTIME_OBJ" 2>&1 | awk '{print $NF}')"
|
|
for need in __mulhi3 __ashlhi3 __ashrhi3 __lshrhi3 __divhi3 __udivhi3 __modhi3 __umodhi3; do
|
|
if ! printf '%s\n' "$syms" | grep -qx "$need"; then
|
|
printf '%s\n' "$syms" >&2
|
|
die "runtime missing symbol: $need"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 11m. Real-world surface area: a non-trivial program that exercises
|
|
# struct-field deref, char* iteration, multiply, shift, and a bit-twiddle
|
|
# function. Validates the backend compiles a realistic C input
|
|
# end-to-end without crashing. Doesn't assert specific asm; just
|
|
# success and that the function bodies are non-empty.
|
|
if [ -x "$CLANG" ]; then
|
|
log "check: clang compiles a real-world multi-function program"
|
|
cFile12="$(mktemp --suffix=.c)"
|
|
sBigFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile" "$cFile9" "$sEqbFile" "$cFile10" "$sSgnFile" "$cFile11" "$sCallsFile" "$cFile12" "$sBigFile"' EXIT
|
|
cat > "$cFile12" <<'EOF'
|
|
typedef unsigned char u8;
|
|
typedef unsigned int u16;
|
|
struct Node { u16 data; struct Node *next; };
|
|
u16 list_sum(const struct Node *h) {
|
|
u16 s=0; while(h){ s+=h->data; h=h->next; } return s;
|
|
}
|
|
int strcmp_test(const char *a, const char *b) {
|
|
while (*a && *a == *b) { a++; b++; }
|
|
return (unsigned char)*a - (unsigned char)*b;
|
|
}
|
|
u16 fnv16(const u8 *p, u16 n) {
|
|
u16 h=0x811C; for (u16 i=0;i<n;i++){ h^=p[i]; h=h*0x101; } return h;
|
|
}
|
|
u16 ctz16(u16 x) {
|
|
if (!x) return 16;
|
|
u16 n=0;
|
|
if (!(x & 0xFF)) { n+=8; x>>=8; }
|
|
if (!(x & 0x0F)) { n+=4; x>>=4; }
|
|
if (!(x & 0x03)) { n+=2; x>>=2; }
|
|
if (!(x & 0x01)) n+=1;
|
|
return n;
|
|
}
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -S "$cFile12" -o "$sBigFile"
|
|
for sym in list_sum strcmp_test fnv16 ctz16; do
|
|
if ! grep -qE "^${sym}:" "$sBigFile"; then
|
|
cat "$sBigFile" >&2
|
|
die "real-world test missing function: $sym"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 11l. Linkage contract: every libcall clang generates from arithmetic
|
|
# ops must match a symbol provided by runtime/libgcc.o. We can't run a
|
|
# real link yet (no w65816-aware linker), but we can verify the symbol
|
|
# names line up — drift here would be a silent runtime crash.
|
|
if [ -x "$CLANG" ] && [ -f "$RUNTIME_OBJ" ]; then
|
|
log "check: every libcall clang emits has a matching definition in libgcc.o"
|
|
cFile11="$(mktemp --suffix=.c)"
|
|
sCallsFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile" "$cFile9" "$sEqbFile" "$cFile10" "$sSgnFile" "$cFile11" "$sCallsFile"' EXIT
|
|
cat > "$cFile11" <<'EOF'
|
|
int m1(int a, int b) { return a * b; }
|
|
unsigned int m2(unsigned int a, unsigned int b) { return a * b; }
|
|
int s1(int x, int n) { return x << n; }
|
|
unsigned int s2(unsigned int x, int n) { return x >> n; }
|
|
int s3(int x, int n) { return x >> n; }
|
|
int d1(int a, int b) { return a / b; }
|
|
unsigned int d2(unsigned int a, unsigned int b) { return a / b; }
|
|
int r1(int a, int b) { return a % b; }
|
|
unsigned int r2(unsigned int a, unsigned int b) { return a % b; }
|
|
long m3(long a, long b) { return a * b; }
|
|
unsigned long m4(unsigned long a, unsigned long b) { return a * b; }
|
|
long s4(long x, int n) { return x << n; }
|
|
long s5(long x, int n) { return x >> n; }
|
|
unsigned long s6(unsigned long x, int n) { return x >> n; }
|
|
long d3(long a, long b) { return a / b; }
|
|
unsigned long d4(unsigned long a, unsigned long b) { return a / b; }
|
|
long r3(long a, long b) { return a % b; }
|
|
unsigned long r4(unsigned long a, unsigned long b) { return a % b; }
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -S "$cFile11" -o "$sCallsFile"
|
|
runtime_syms="$("$BUILD_DIR/bin/llvm-objdump" -t "$RUNTIME_OBJ" 2>&1 | awk '$2 == "g" {print $NF}')"
|
|
emitted="$(grep -oE 'jsl __[a-z0-9]+' "$sCallsFile" | awk '{print $2}' | sort -u)"
|
|
for sym in $emitted; do
|
|
if ! printf '%s\n' "$runtime_syms" | grep -qx "$sym"; then
|
|
warn "clang emitted libcall $sym but runtime/libgcc.o has no such symbol"
|
|
printf 'runtime exports:\n%s\n' "$runtime_syms" >&2
|
|
printf 'clang emitted:\n%s\n' "$emitted" >&2
|
|
die "libcall name drift: $sym missing from runtime"
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# 11k. signed i8 compare: forces 16-bit M prologue (instrLowersToWide)
|
|
# because the SEXT lowering needs i16 ops. Verifies both that the
|
|
# code compiles AND that the prologue is REP #$30 (not the 8-bit M
|
|
# fast path, which would silently corrupt the SEXT mask).
|
|
if [ -x "$CLANG" ]; then
|
|
log "check: signed i8 compare gets 16-bit M prologue + emits cmp"
|
|
cFile10="$(mktemp --suffix=.c)"
|
|
sSgnFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile" "$cFile9" "$sEqbFile" "$cFile10" "$sSgnFile"' EXIT
|
|
cat > "$cFile10" <<'EOF'
|
|
signed char sgnlt(signed char a, signed char b) { return a < b; }
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -S "$cFile10" -o "$sSgnFile"
|
|
# Must use 16-bit M (rep #$30), not the 8-bit fast path.
|
|
if ! grep -qF "rep #0x30" "$sSgnFile"; then
|
|
cat "$sSgnFile" >&2
|
|
die "sgnlt: expected rep #\$30 prologue (i8 signed cmp needs 16-bit M)"
|
|
fi
|
|
# Must NOT contain the 8-bit prologue, which would mean we never
|
|
# transitioned (the SEXT injection's ora #\$ff00 would silently
|
|
# truncate to ora #\$00 in 8-bit M).
|
|
if grep -qF "rep #0x10" "$sSgnFile" && ! grep -qF "rep #0x30" "$sSgnFile"; then
|
|
cat "$sSgnFile" >&2
|
|
die "sgnlt: only saw 8-bit M prologue, SEXT high-byte mask would be dropped"
|
|
fi
|
|
fi
|
|
|
|
# 11i. i8 equality compare on two stack args (eqbyte): exercises i8
|
|
# SETCC promotion through Lower*CC.
|
|
if [ -x "$CLANG" ]; then
|
|
log "check: clang lowers i8 == i8 via promoted i16 cmp"
|
|
cFile9="$(mktemp --suffix=.c)"
|
|
sEqbFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$sCmpFile" "$cFile2" "$sSelFile" "$cFile3" "$sChainFile" "$cFile4" "$sMulFile" "$cFile5" "$sShfFile" "$cFile6" "$sPtrFile" "$cFile7" "$sBptrFile" "$cFile8" "$sGbFile" "$cFile9" "$sEqbFile"' EXIT
|
|
cat > "$cFile9" <<'EOF'
|
|
unsigned char eqbyte(unsigned char a, unsigned char b) { return a == b; }
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -S "$cFile9" -o "$sEqbFile"
|
|
# Must produce a cmp + beq (the eq diamond).
|
|
if ! grep -qE 'cmp ' "$sEqbFile" || ! grep -qF "beq" "$sEqbFile"; then
|
|
cat "$sEqbFile" >&2
|
|
die "eqbyte test: expected cmp + beq sequence"
|
|
fi
|
|
fi
|
|
|
|
# 12. Real C through clang. Uses the clang front-end if it has been
|
|
# built; skipped otherwise (clang takes 15-30 minutes to build the
|
|
# first time; afterwards rebuilds are fast).
|
|
CLANG="$BUILD_DIR/bin/clang"
|
|
if [ -x "$CLANG" ] && [ -x "$OBJDUMP" ]; then
|
|
log "check: clang -target w65816 -O2 compiles a tiny C function"
|
|
cFile="$(mktemp --suffix=.c)"
|
|
oFile2="$(mktemp --suffix=.o)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2"' EXIT
|
|
cat > "$cFile" <<'EOF'
|
|
int answer(void) { return 42; }
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -c "$cFile" -o "$oFile2"
|
|
disasm="$("$OBJDUMP" --triple=w65816 -d "$oFile2" 2>&1)"
|
|
for expect in "rep #0x30" "lda #0x2a" "rtl"; do
|
|
if ! printf '%s\n' "$disasm" | grep -qF "$expect"; then
|
|
warn "clang test missing: $expect"
|
|
printf '%s\n' "$disasm" >&2
|
|
die "clang end-to-end test failed"
|
|
fi
|
|
done
|
|
|
|
# 13. i32 (long) compile path. Type legalization splits i32 into
|
|
# two i16 halves; the high half flows through the (add FrameIndex,
|
|
# 2) shape, which previously crashed ISel with "Cannot select
|
|
# FrameIndex<-2>". SelectFrameIndex now folds (add FI, const) so
|
|
# the split loads land on a stack-relative addressing mode.
|
|
# Return ABI: low->A, high->X (TAX in the epilogue).
|
|
# Also asserts the native ADC carry chain (CLC + ADC + ADC) is in
|
|
# place — task #49 replaced the bloated SETCC-based carry detect
|
|
# (lda;cmp;bcc;lda) with a direct ADDC/ADDE-pattern lowering that
|
|
# uses the C flag in P as a Glue-modeled physreg.
|
|
log "check: clang compiles a long add (i32 split + A:X return)"
|
|
cI32File="$(mktemp --suffix=.c)"
|
|
oI32File="$(mktemp --suffix=.o)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File"' EXIT
|
|
cat > "$cI32File" <<'EOF'
|
|
long add32(long a, long b) { return a + b; }
|
|
EOF
|
|
"$CLANG" --target=w65816 -O2 -c "$cI32File" -o "$oI32File"
|
|
disasmI32="$("$OBJDUMP" --triple=w65816 -d "$oI32File" 2>&1)"
|
|
# TAX confirms the high-half-into-X part of the return ABI fired.
|
|
# Without it, both halves would pile into A and one would be lost.
|
|
# Exactly one CLC and exactly two ADCs prove the native carry chain
|
|
# is wired (one CLC for lo, ADC lo, ADC hi-with-carry); a regression
|
|
# to the SETCC path would show two CLCs and a bcc/cmp.
|
|
for expect in "tax" "rtl" "clc" "adc"; do
|
|
if ! printf '%s\n' "$disasmI32" | grep -qF "$expect"; then
|
|
warn "i32 add test missing: $expect"
|
|
printf '%s\n' "$disasmI32" >&2
|
|
die "i32 add end-to-end test failed"
|
|
fi
|
|
done
|
|
nClc="$(printf '%s\n' "$disasmI32" | grep -cE '\bclc\b' || true)"
|
|
nAdc="$(printf '%s\n' "$disasmI32" | grep -cE '\badc\b' || true)"
|
|
nBcc="$(printf '%s\n' "$disasmI32" | grep -cE '\bbcc\b' || true)"
|
|
if [ "$nClc" != "1" ] || [ "$nAdc" != "2" ] || [ "$nBcc" != "0" ]; then
|
|
warn "i32 add carry-chain shape wrong (clc=$nClc adc=$nAdc bcc=$nBcc, want 1/2/0)"
|
|
printf '%s\n' "$disasmI32" >&2
|
|
die "i32 add carry-chain regression"
|
|
fi
|
|
# Lock the post-StackSlotCleanup instruction count: should be ~11 for
|
|
# add32 (rep + pha + clc + adc + sta + txa + adc + tax + lda + ply + rtl
|
|
# — i32-first-arg in A:X means arg0_hi loads as TXA, no LDAfi). If
|
|
# this regresses meaningfully (say >14) the cleanup pass, the
|
|
# rematerialization flag, or the A:X first-arg ABI has been broken.
|
|
nInsns="$(printf '%s\n' "$disasmI32" | grep -cE '^[0-9a-f]+:' || true)"
|
|
if [ "$nInsns" -gt 14 ]; then
|
|
warn "i32 add bloat (got $nInsns insns, want <=14 — was 25 pre-cleanup, 11 post)"
|
|
printf '%s\n' "$disasmI32" >&2
|
|
die "i32 add code-quality regression"
|
|
fi
|
|
# The A:X arg0 ABI moves arg0_hi out of the stack slot, so the
|
|
# asm should contain TXA (X→A for the hi-half ADC tied input)
|
|
# exactly once. A regression to "load arg0_hi from stack" would
|
|
# remove the TXA and add an extra LDA.
|
|
nTxa="$(printf '%s\n' "$disasmI32" | grep -cE '\btxa\b' || true)"
|
|
if [ "$nTxa" != "1" ]; then
|
|
warn "i32 add: expected exactly 1 txa (i32-first-arg-in-A:X path); got $nTxa"
|
|
printf '%s\n' "$disasmI32" >&2
|
|
die "i32 add A:X first-arg ABI regression"
|
|
fi
|
|
|
|
# i32 carry chain on two-Acc16 (no foldable load): exercises the
|
|
# ADD_RR + ADDE_RR custom-inserter path. fib32 has live a/b values
|
|
# the inserter must spill to a fresh slot; pre-fix this crashed at
|
|
# ISel with "Cannot select: adde reg, reg".
|
|
log "check: clang compiles a 32-bit fib loop (ADDE_RR inserter path)"
|
|
cFibFile="$(mktemp --suffix=.c)"
|
|
sFibFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile"' EXIT
|
|
cat > "$cFibFile" <<'EOF'
|
|
unsigned long fib32(unsigned long n) {
|
|
unsigned long a = 0, b = 1, t;
|
|
while (n > 0) { t = a + b; a = b; b = t; n--; }
|
|
return a;
|
|
}
|
|
EOF
|
|
if ! "$CLANG" --target=w65816 -O2 -S "$cFibFile" -o "$sFibFile" 2>&1 >/dev/null; then
|
|
die "i32 fib (ADDE_RR inserter) failed to compile"
|
|
fi
|
|
if ! grep -qE '\bclc\b' "$sFibFile" || ! grep -qE '\badc\b' "$sFibFile"; then
|
|
warn "i32 fib output missing clc/adc"
|
|
die "i32 fib carry-chain regression"
|
|
fi
|
|
|
|
# i32 multiply via __mulsi3 libcall: tests the multi-i16-return path
|
|
# (RetCC_W65816 assigning A then X for 2 i16 returns) plus the i32
|
|
# arg push side. Pre-fix this hit "multi-return calls not yet
|
|
# supported (Ins.size=4)" when LowerCallTo split the i32 return.
|
|
log "check: clang compiles a long multiply via __mulsi3 libcall"
|
|
cMulFile="$(mktemp --suffix=.c)"
|
|
sMulFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile"' EXIT
|
|
cat > "$cMulFile" <<'EOF'
|
|
unsigned long mul32(unsigned long a, unsigned long b) { return a * b; }
|
|
EOF
|
|
if ! "$CLANG" --target=w65816 -O2 -S "$cMulFile" -o "$sMulFile" 2>&1 >/dev/null; then
|
|
die "i32 mul via __mulsi3 failed to compile"
|
|
fi
|
|
if ! grep -q '__mulsi3' "$sMulFile"; then
|
|
die "i32 mul did not emit __mulsi3 libcall"
|
|
fi
|
|
|
|
# i32 shift-by-1 (SHL/SRL): the type-legalizer's SHL_PARTS / SRL_PARTS
|
|
# expansion needs `(srl x, 15)` or `(shl x, 15)` for the carry-cross-
|
|
# halves slot. Without inline patterns those fall to __lshrhi3 /
|
|
# __ashlhi3 libcalls (~10 byte overhead per shift). SRL15A and
|
|
# SHL15A pseudos handle them inline (`ASL/LSR; LDA #0; ROL/ROR`,
|
|
# 3 bytes). Verify the shift-by-1 output doesn't contain a hi3
|
|
# libcall.
|
|
log "check: clang i32 shift-by-1 stays inline (no __lshrhi3 / __ashlhi3 libcall)"
|
|
cSh1File="$(mktemp --suffix=.c)"
|
|
sSh1File="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cSh1File" "$sSh1File"' EXIT
|
|
cat > "$cSh1File" <<'EOF'
|
|
unsigned long shl1(unsigned long a) { return a << 1; }
|
|
unsigned long shr1(unsigned long a) { return a >> 1; }
|
|
EOF
|
|
if ! "$CLANG" --target=w65816 -O2 -S "$cSh1File" -o "$sSh1File" 2>&1 >/dev/null; then
|
|
die "i32 shift-by-1 failed to compile"
|
|
fi
|
|
if grep -qE '__lshrhi3|__ashlhi3' "$sSh1File"; then
|
|
warn "i32 shift-by-1 still calling i16 shift libcall — SRL15A/SHL15A pattern not firing"
|
|
die "i32 shift-by-1 regression"
|
|
fi
|
|
|
|
# Varargs (<stdarg.h>): LowerFormalArguments creates a fixed FI
|
|
# for the first vararg slot when IsVarArg; LowerVASTART stores
|
|
# its address to the va_list pointer. VAARG/VACOPY/VAEND use
|
|
# default LLVM expansions. Pre-fix this hit
|
|
# "vararg functions not yet supported" fatal error.
|
|
log "check: clang compiles a vararg function (<stdarg.h>)"
|
|
cVaFile="$(mktemp --suffix=.c)"
|
|
sVaFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cSh1File" "$sSh1File" "$cVaFile" "$sVaFile"' EXIT
|
|
cat > "$cVaFile" <<'EOF'
|
|
#include <stdarg.h>
|
|
int sumArgs(int n, ...) {
|
|
va_list args;
|
|
va_start(args, n);
|
|
int sum = 0;
|
|
for (int i = 0; i < n; i++) sum += va_arg(args, int);
|
|
va_end(args);
|
|
return sum;
|
|
}
|
|
EOF
|
|
if ! "$CLANG" --target=w65816 -O2 -S "$cVaFile" -o "$sVaFile" 2>&1 >/dev/null; then
|
|
die "vararg function failed to compile"
|
|
fi
|
|
|
|
# Stack-array LEA: `char arr[16]; arr[i] = ...` needs the address
|
|
# of an alloca'd object as an i16 value. Pre-fix this hit "Cannot
|
|
# select: FrameIndex<0>" because addr_fi only matches in load/store
|
|
# contexts. W65816DAGToDAGISel::Select now lowers a bare
|
|
# ISD::FrameIndex to ADDframe (FI, 0); eliminateFrameIndex expands
|
|
# ADDframe into TSC + CLC + ADC #disp.
|
|
log "check: clang takes the address of a stack-allocated array"
|
|
cAllocaFile="$(mktemp --suffix=.c)"
|
|
sAllocaFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile"' EXIT
|
|
cat > "$cAllocaFile" <<'EOF'
|
|
void writeBytes(char *out, char v) {
|
|
char tmp[8];
|
|
for (int i = 0; i < 8; i++) tmp[i] = v + i;
|
|
for (int i = 0; i < 8; i++) out[i] = tmp[i];
|
|
}
|
|
EOF
|
|
if ! "$CLANG" --target=w65816 -O2 -S "$cAllocaFile" -o "$sAllocaFile" 2>&1 >/dev/null; then
|
|
die "alloca'd-array address failed to compile"
|
|
fi
|
|
# The TSC; CLC; ADC #disp triple is the LEA expansion of ADDframe;
|
|
# at least one occurrence proves the pseudo wired through.
|
|
if ! grep -qE '^\s*tsc' "$sAllocaFile"; then
|
|
die "alloca'd-array LEA missing TSC (ADDframe expansion broken)"
|
|
fi
|
|
|
|
# signed-byte arithmetic (`(int)(*p) - (int)(*q)` style — strcmp).
|
|
# Exercises three formerly-missing patterns: SEXTLOAD i16 from i8
|
|
# (we Expand it to (sext (load))), sext_inreg i16 from i8 (the
|
|
# `((x & 0xFF) ^ 0x80) - 0x80` tablegen Pat), and extloadi8 from
|
|
# an Acc16 register pointer (LDAptr / "high byte don't care").
|
|
log "check: clang compiles a signed-byte strcmp (sextload + sext_inreg + extload-via-ptr)"
|
|
cStrFile="$(mktemp --suffix=.c)"
|
|
sStrFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile"' EXIT
|
|
cat > "$cStrFile" <<'EOF'
|
|
int strcmp32(const char *a, const char *b) {
|
|
while (*a && *a == *b) { a++; b++; }
|
|
return (int)(*a) - (int)(*b);
|
|
}
|
|
EOF
|
|
if ! "$CLANG" --target=w65816 -O2 -S "$cStrFile" -o "$sStrFile" 2>&1 >/dev/null; then
|
|
die "signed-byte strcmp failed to compile"
|
|
fi
|
|
|
|
# Indirect calls (function pointers). Lowered via the runtime
|
|
# trampoline at runtime/src/libgcc.s::__jsl_indir, which does
|
|
# JMP (__indirTarget) — caller stores target to __indirTarget then
|
|
# JSL __jsl_indir. Pre-fix, LowerCall reported a fatal error.
|
|
log "check: clang compiles an indirect call (via __jsl_indir trampoline)"
|
|
cIndFile="$(mktemp --suffix=.c)"
|
|
sIndFile="$(mktemp --suffix=.s)"
|
|
trap 'rm -f "$irFile" "$sFile" "$irCallFile" "$sCallFile" "$irMaFile" "$sMaFile" "$irI8File" "$sI8File" "$cFile" "$oFile2" "$cI32File" "$oI32File" "$cFibFile" "$sFibFile" "$cMulFile" "$sMulFile" "$cAllocaFile" "$sAllocaFile" "$cStrFile" "$sStrFile" "$cIndFile" "$sIndFile"' EXIT
|
|
cat > "$cIndFile" <<'EOF'
|
|
typedef int (*BinOp)(int, int);
|
|
int doOp(BinOp op, int x, int y) { return op(x, y); }
|
|
EOF
|
|
if ! "$CLANG" --target=w65816 -O2 -S "$cIndFile" -o "$sIndFile" 2>&1 >/dev/null; then
|
|
die "indirect call failed to compile"
|
|
fi
|
|
if ! grep -q '__indirTarget' "$sIndFile"; then
|
|
die "indirect call missing __indirTarget store"
|
|
fi
|
|
if ! grep -q '__jsl_indir' "$sIndFile"; then
|
|
die "indirect call missing JSL to __jsl_indir trampoline"
|
|
fi
|
|
fi
|
|
|
|
log "all smoke checks passed"
|