Checkpoint

This commit is contained in:
Scott Duensing 2026-05-03 01:40:15 -05:00
parent c4da4b77b3
commit 17899ee960
12 changed files with 782 additions and 13 deletions

View file

@ -114,7 +114,7 @@ which runs correctly under MAME (apple2gs).
image addresses. image addresses.
- `runtime/build.sh` builds crt0, libc, soft-float, soft-double, - `runtime/build.sh` builds crt0, libc, soft-float, soft-double,
libgcc into linkable objects. libgcc into linkable objects.
- `scripts/smokeTest.sh` runs 122 end-to-end checks at -O2: - `scripts/smokeTest.sh` runs 123 end-to-end checks at -O2:
scalar ops, control flow, calling conventions, MAME execution scalar ops, control flow, calling conventions, MAME execution
regressions, link816 bss-base safety + weak-symbol resolution + regressions, link816 bss-base safety + weak-symbol resolution +
heap_end-vs-heap_start sanity, iigs/toolbox.h compile + link, heap_end-vs-heap_start sanity, iigs/toolbox.h compile + link,
@ -219,14 +219,27 @@ RAM through $FFFF, gaining 8KB of bank-0 space.)
## Yet to come ## Yet to come
- **C++ exceptions**`dynamic_cast` works (via libcxxabi shim, - **C++ exceptions through clang `-fsjlj-exceptions`** — the SJLJ
see "What works"); `throw`/`try`/`catch` does not. Implementing runtime IS implemented (`runtime/src/libcxxabiSjlj.c` provides
exceptions needs the full Itanium unwind ABI: `__cxa_throw`, `__cxa_throw`, `__cxa_allocate_exception`, `__cxa_begin_catch`,
`__cxa_allocate_exception`, `_Unwind_RaiseException`, a `__cxa_end_catch`, `__cxa_rethrow`, `_Unwind_SjLj_Register/
personality routine, and DWARF `.eh_frame` data the unwinder Unregister/RaiseException/Resume`, plus a no-op `__gxx_personality
consumes to restore registers per-frame. The 65816's lack of _sj0`). The W65816 backend has SJLJ wiring: `MCAsmInfo` selects
any existing unwinder makes this a real project — defer until `ExceptionHandling::SjLj` so clang's `SjLjEHPrepare` runs; a
someone needs exception-based code on the IIgs. custom `W65816SjLjFinalize` IR pass (in
`src/llvm/lib/Target/W65816/`) finishes the lowering by inserting
an actual `setjmp` + dispatch block, building a per-function
catch table referenced via the lsda field, and rewriting the
`eh.typeid.for` calls to use typeinfo addresses as selectors.
Throw/catch round-trip works end-to-end **when driven from
pure C** (smoke test "SJLJ exception runtime"); the C++
frontend path crashes at runtime because clang's `-O2`
lowering of the volatile call_site store before `__cxa_throw`
routes the value to the wrong stack address — a separate
W65816 isel bug for `store volatile i32 N, <stack-relative GEP>`
that needs its own debugging session. Until that's fixed,
raw C code can use the SJLJ runtime directly; C++ `try/catch`
still requires `-fno-exceptions`.
- **GS/OS validated against a real ProDOS volume** — the wrapper - **GS/OS validated against a real ProDOS volume** — the wrapper
contract (PHA + PEA 0 + LDX + JSL $E100A8 + post-call SP fixup) contract (PHA + PEA 0 + LDX + JSL $E100A8 + post-call SP fixup)

View file

@ -42,6 +42,7 @@ cc "$SRC/strtok.c"
cc "$SRC/math.c" cc "$SRC/math.c"
cc "$SRC/softFloat.c" cc "$SRC/softFloat.c"
cc "$SRC/libcxxabi.c" cc "$SRC/libcxxabi.c"
cc "$SRC/libcxxabiSjlj.c"
asm "$SRC/iigsGsos.s" asm "$SRC/iigsGsos.s"
# softDouble.c builds at -O1: __muldf3's u64 live-range pressure # softDouble.c builds at -O1: __muldf3's u64 live-range pressure
# overflows the greedy allocator at -O2. dpack is already noinline # overflows the greedy allocator at -O2. dpack is already noinline

View file

@ -146,3 +146,39 @@ void abiPureVirtual(void) {
while (1) { while (1) {
} }
} }
// Fundamental typeinfo objects. The Itanium ABI lists a fixed roster
// of typeinfo objects for built-in types; libcxxabi normally provides
// them. We supply the ones likely to be thrown. Each is a TypeInfo
// (vptr + name) where the vptr is the fundamental-type-info vtable
// address. Since our personality compares typeinfo pointers directly
// (no hierarchy walk for fundamental types), the contents only need
// to be stable addresses.
const void *abiFundTypeInfoVtable[3] __asm__("_ZTVN10__cxxabiv123__fundamental_type_infoE") = { 0, 0, 0 };
// Helper macro to declare one fundamental typeinfo + its name string.
#define FUND_TI(MANGLED, NAMECHAR, NAMESTR) \
static const char abiTSname_##NAMECHAR[] = NAMESTR; \
const TypeInfo MANGLED __asm__(#MANGLED) = { \
&abiFundTypeInfoVtable[2], abiTSname_##NAMECHAR \
}
// Itanium mangling for built-in types: i=int, j=unsigned int, c=char,
// h=unsigned char, s=short, t=unsigned short, l=long, m=unsigned long,
// x=long long, y=unsigned long long, b=bool, v=void, Pv=void*.
FUND_TI(_ZTIi, i, "i");
FUND_TI(_ZTIj, j, "j");
FUND_TI(_ZTIc, c, "c");
FUND_TI(_ZTIh, h, "h");
FUND_TI(_ZTIs, s, "s");
FUND_TI(_ZTIt, t, "t");
FUND_TI(_ZTIl, l, "l");
FUND_TI(_ZTIm, m, "m");
FUND_TI(_ZTIb, b, "b");
FUND_TI(_ZTIv, v, "v");
// Pointer-to-void typeinfo. Used when throwing void* (or any pointer
// caught as void*). Itanium classifies it as __pointer_type_info.
const void *abiPtrTypeInfoVtable[3] __asm__("_ZTVN10__cxxabiv119__pointer_type_infoE") = { 0, 0, 0 };
static const char abiTSname_Pv[] = "Pv";
const TypeInfo _ZTIPv __asm__("_ZTIPv") = { &abiPtrTypeInfoVtable[2], abiTSname_Pv };

224
runtime/src/libcxxabiSjlj.c Normal file
View file

@ -0,0 +1,224 @@
// SJLJ exception runtime for the W65816 backend.
//
// Works with the IR generated by clang's `-fsjlj-exceptions` lowering
// after our W65816SjLjFinalize pass has rewritten things. The shape:
//
// - clang+SjLjEHPrepare emit a per-function "function context" alloca:
// struct FnCtx {
// void *prev; // [0] linked list ptr (set by Register)
// int call_site; // [1] active call_site index (set by clang)
// int data[4]; // [2] [exn_obj, selector, ...] — set by us
// void *personality; // [3] @__gxx_personality_sj0 (unused here)
// void *lsda; // [4] our pass replaces with catch_table_ptr
// void *jbuf[5]; // [5] jmp_buf
// };
// - On entry: clang code calls _Unwind_SjLj_Register(&FnCtx) and then
// setjmp(&FnCtx.jbuf). setjmp returns 0 first time → normal flow;
// when an exception unwinds to here, longjmp(&jbuf, 1) returns
// non-zero → our finalize pass dispatches via switch on FnCtx.call_site.
// - At each invoke: clang stores N → FnCtx.call_site, calls the
// function, then clears it (or stores next CS).
// - Landing pad expects FnCtx.data[0] = exception ptr, FnCtx.data[1]
// = selector, where selector is the (i32)(uintptr_t)&typeinfo for
// the matched catch (per our pass's eh.typeid.for rewrite).
//
// Catch table layout (built by W65816SjLjFinalize):
// short call_site_1
// short typeinfo_addr_1
// short call_site_2
// short typeinfo_addr_2
// ...
// short 0 ; sentinel
// short 0
// Stored as FnCtx.lsda; matches "if active call_site equals this row's
// call_site AND thrown type matches this row's typeinfo, this catch fires".
#include <stdint.h>
#include <stddef.h>
extern void *malloc(unsigned int);
extern void free(void *);
extern int setjmp(void *jb);
extern void longjmp(void *jb, int v) __attribute__((noreturn));
typedef struct TypeInfo {
const void *vptr;
const char *name;
} TypeInfo;
// __dynamic_cast lives in libcxxabi.c — we reuse it for catch matching.
extern void *abiDynamicCast(const void *src, const TypeInfo *srcType,
const TypeInfo *dstType, int32_t hint)
__asm__("__dynamic_cast");
// Function context layout (matches what SjLjEHPrepare emits). We
// only access fields we read; everything else is opaque.
typedef struct FnCtx {
struct FnCtx *prev;
uint32_t call_site;
uint32_t data[4];
void *personality;
uint16_t *lsda; // catch table ptr (set by our pass)
void *jbuf[5];
} FnCtx;
// Active fn_ctx stack. SjLjEHPrepare doesn't actually require a
// thread-local because the IIgs is single-threaded; one global suffices.
static FnCtx *gActive = 0;
// Currently-in-flight exception. Set by __cxa_throw; consumed by
// __cxa_begin_catch. Holds the user object (the part returned by
// __cxa_begin_catch) and the typeinfo pointer used for matching.
typedef struct ExcHeader {
const TypeInfo *type;
void (*dtor)(void *);
// The user exception object follows immediately after this header.
} ExcHeader;
static ExcHeader *gCurrentExc = 0;
void _Unwind_SjLj_Register(FnCtx *ctx) {
ctx->prev = gActive;
gActive = ctx;
}
void _Unwind_SjLj_Unregister(FnCtx *ctx) {
// SjLjEHPrepare puts unregister at every return. Pop the stack
// assuming LIFO order (which it is for non-pathological code).
if (gActive == ctx) {
gActive = ctx->prev;
}
}
// Walk the catch table for a fn_ctx, find a matching catch for the
// thrown type, return its row's typeinfo (which is what we'll store
// in selector). Returns 0 if no match.
static const TypeInfo *findCatch(FnCtx *ctx, const TypeInfo *thrownType, void *thrownObj, void **adjustedObjOut) {
if (!ctx->lsda) {
return 0;
}
uint16_t *p = ctx->lsda;
uint16_t cs = (uint16_t)ctx->call_site;
for (;;) {
uint16_t row_cs = p[0];
uint16_t row_ti = p[1];
if (row_cs == 0 && row_ti == 0) {
return 0;
}
if (row_cs == cs) {
const TypeInfo *catchType = (const TypeInfo *)(uintptr_t)row_ti;
if (thrownType == catchType) {
*adjustedObjOut = thrownObj;
return catchType;
}
}
p += 2;
}
}
void _Unwind_SjLj_RaiseException(ExcHeader *exc) __attribute__((noreturn));
void _Unwind_SjLj_RaiseException(ExcHeader *exc) {
gCurrentExc = exc;
for (FnCtx *ctx = gActive; ctx; ctx = ctx->prev) {
void *adjustedObj = (void *)(exc + 1);
const TypeInfo *match =
findCatch(ctx, exc->type, adjustedObj, &adjustedObj);
if (match) {
gActive = ctx;
ctx->data[0] = (uint32_t)(uintptr_t)exc;
ctx->data[1] = (uint32_t)(uintptr_t)match;
longjmp(ctx->jbuf, 1);
}
}
extern void abort(void) __attribute__((noreturn));
abort();
}
void _Unwind_SjLj_Resume(void *unused) __attribute__((noreturn));
void _Unwind_SjLj_Resume(void *unused) {
(void)unused;
if (gCurrentExc) {
_Unwind_SjLj_RaiseException(gCurrentExc);
}
extern void abort(void) __attribute__((noreturn));
abort();
}
// Personality routine — never actually called in our scheme (we
// dispatch via call_site directly). Provided as a symbol because
// SjLjEHPrepare emits `store @__gxx_personality_sj0, fn_ctx[3]`
// at function entry. Returns "continue unwinding" if ever invoked.
int __gxx_personality_sj0(int version, int actions, uint64_t excClass,
void *exc, void *ctx) {
(void)version; (void)actions; (void)excClass; (void)exc; (void)ctx;
return 8; // _URC_CONTINUE_UNWIND
}
// Itanium C++ ABI surface.
void *__cxa_allocate_exception(unsigned int sz) {
void *p = malloc(sizeof(ExcHeader) + sz);
if (!p) {
extern void abort(void) __attribute__((noreturn));
abort();
}
// Zero the header; the user object isn't initialized.
ExcHeader *h = (ExcHeader *)p;
h->type = 0;
h->dtor = 0;
return (void *)(h + 1); // user object pointer
}
void __cxa_free_exception(void *user) {
if (user) {
free((char *)user - sizeof(ExcHeader));
}
}
void __cxa_throw(void *user, const TypeInfo *type, void (*dtor)(void *))
__attribute__((noreturn));
void __cxa_throw(void *user, const TypeInfo *type, void (*dtor)(void *)) {
ExcHeader *h = (ExcHeader *)user - 1;
h->type = type;
h->dtor = dtor;
_Unwind_SjLj_RaiseException(h);
extern void abort(void) __attribute__((noreturn));
abort();
}
void *__cxa_begin_catch(void *exc) {
// exc is the ExcHeader pointer (per landing pad's data[0] write).
ExcHeader *h = (ExcHeader *)exc;
return (void *)(h + 1); // hand back the user object pointer
}
void __cxa_end_catch(void) {
if (gCurrentExc) {
if (gCurrentExc->dtor) {
gCurrentExc->dtor(gCurrentExc + 1);
}
free(gCurrentExc);
gCurrentExc = 0;
}
}
void __cxa_rethrow(void) __attribute__((noreturn));
void __cxa_rethrow(void) {
if (gCurrentExc) {
_Unwind_SjLj_RaiseException(gCurrentExc);
}
extern void abort(void) __attribute__((noreturn));
abort();
}

View file

@ -1159,12 +1159,15 @@ longjmp:
sta 0xe0 ; jmp_buf addr -> DP scratch sta 0xe0 ; jmp_buf addr -> DP scratch
lda 0x4, s ; A = val (2nd arg, on stack) lda 0x4, s ; A = val (2nd arg, on stack)
sta 0xe2 ; save val sta 0xe2 ; save val
; Restore SP: env[0..1] - 3 (so the upcoming PHAs land at the right slots). ; Restore SP: env[0..1]. TCS directly — the PHAs below consume
; 3 bytes (1 bank + 2 retaddr); RTL pulls them back; net post-RTL
; S = saved_SP. (Earlier this subtracted 3 before TCS, leaving
; S = saved_SP - 3 after RTL, mangling caller's stack-relative
; reads — caught by an SJLJ EH test where main's locals shifted
; by 3 bytes after longjmp.)
ldy #0 ldy #0
lda (0xe0), y ; A = saved SP (16-bit) lda (0xe0), y ; A = saved SP (16-bit)
sec tcs ; SP = saved_SP
sbc #0x3
tcs ; SP = saved_SP - 3
; Push retaddr: bank, then 16-bit lo:hi. RTL pulls lo, hi, bank. ; Push retaddr: bank, then 16-bit lo:hi. RTL pulls lo, hi, bank.
sep #0x20 sep #0x20
ldy #4 ldy #4

View file

@ -3996,6 +3996,81 @@ EOF
fi fi
rm -f "$cppRttiFile" "$oCppRttiFile" "$oCxxAbiFile" "$binCppRttiFile" rm -f "$cppRttiFile" "$oCppRttiFile" "$oCxxAbiFile" "$binCppRttiFile"
# SJLJ exception runtime end-to-end via pure C. Drives the
# libcxxabiSjlj runtime directly (manual fn_ctx setup + setjmp
# + __cxa_throw + __cxa_begin_catch) since the C++ frontend
# path through clang's `-fsjlj-exceptions` lowering currently
# mis-routes the volatile call_site store at -O2 (separate
# backend isel issue, see STATUS "Yet to come"). This test
# exercises every runtime function and is also the canary for
# the longjmp SP-restore fix (subtracting #3 before TCS left
# caller's S 3 bytes off; locals shifted across the longjmp).
log "check: MAME runs SJLJ exception runtime (throw/catch round-trip)"
cSjeFile="$(mktemp --suffix=.c)"
oSjeFile="$(mktemp --suffix=.o)"
oSjeRt="$(mktemp --suffix=.o)"
oSjeAbi="$(mktemp --suffix=.o)"
binSjeFile="$(mktemp --suffix=.bin)"
cat > "$cSjeFile" <<'EOF'
extern void *__cxa_allocate_exception(unsigned int);
extern void __cxa_throw(void *, const void *, void (*)(void *)) __attribute__((noreturn));
extern void *__cxa_begin_catch(void *);
extern void __cxa_end_catch(void);
extern int setjmp(void *jb);
extern const char _ZTIi[];
typedef struct FnCtx {
struct FnCtx *prev;
unsigned int call_site_lo, call_site_hi;
unsigned int data[8];
void *personality;
void *lsda;
char jbuf[10];
} FnCtx;
extern void _Unwind_SjLj_Register(FnCtx *);
static unsigned short ctab[4];
int main(void) {
ctab[0] = 1;
ctab[1] = (unsigned short)(unsigned long)_ZTIi;
ctab[2] = 0;
ctab[3] = 0;
*(volatile unsigned short *)0x5000 = 0xa1a1;
FnCtx ctx;
ctx.personality = 0;
ctx.lsda = (void *)ctab;
_Unwind_SjLj_Register(&ctx);
volatile int r = setjmp(ctx.jbuf);
if (r == 0) {
ctx.call_site_lo = 1;
void *p = __cxa_allocate_exception(2);
*(int *)p = 42;
__cxa_throw(p, _ZTIi, 0);
}
void *u = __cxa_begin_catch((void *)ctx.data[0]);
*(volatile unsigned short *)0x5002 = (unsigned short)*(int *)u;
__cxa_end_catch();
*(volatile unsigned short *)0x5004 = 0xc1c1;
while (1) {}
}
EOF
"$CLANG" --target=w65816 -O2 -ffunction-sections \
-I"$PROJECT_ROOT/runtime/include" \
-c "$PROJECT_ROOT/runtime/src/libcxxabiSjlj.c" -o "$oSjeRt"
"$CLANG" --target=w65816 -O2 -ffunction-sections \
-I"$PROJECT_ROOT/runtime/include" \
-c "$PROJECT_ROOT/runtime/src/libcxxabi.c" -o "$oSjeAbi"
"$CLANG" --target=w65816 -O2 -ffunction-sections \
-I"$PROJECT_ROOT/runtime/include" \
-c "$cSjeFile" -o "$oSjeFile"
"$PROJECT_ROOT/tools/link816" -o "$binSjeFile" --text-base 0x1000 \
"$oCrt0F" "$oLibgccFile" "$oLibcF" \
"$oSjeAbi" "$oSjeRt" "$oSjeFile" \
>/dev/null 2>&1
if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" "$binSjeFile" --check \
0x005000=a1a1 0x005002=002a 0x005004=c1c1 >/dev/null 2>&1; then
die "MAME: SJLJ throw/catch round-trip failed (libcxxabiSjlj or longjmp regression)"
fi
rm -f "$cSjeFile" "$oSjeFile" "$oSjeRt" "$oSjeAbi" "$binSjeFile"
# Real-world: hex dumper using memory-backed file I/O. Reads # Real-world: hex dumper using memory-backed file I/O. Reads
# 16 bytes from a registered "in" file, writes a hex+ASCII # 16 bytes from a registered "in" file, writes a hex+ASCII
# dump to a registered "out" file via fprintf. Verifies the # dump to a registered "out" file via fprintf. Verifies the

View file

@ -33,6 +33,7 @@ add_llvm_target(W65816CodeGen
W65816SpillToX.cpp W65816SpillToX.cpp
W65816NegYIndY.cpp W65816NegYIndY.cpp
W65816PreSpillCrossCall.cpp W65816PreSpillCrossCall.cpp
W65816SjLjFinalize.cpp
W65816TargetMachine.cpp W65816TargetMachine.cpp
W65816AsmPrinter.cpp W65816AsmPrinter.cpp
W65816MCInstLower.cpp W65816MCInstLower.cpp

View file

@ -11,6 +11,7 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "W65816MCAsmInfo.h" #include "W65816MCAsmInfo.h"
#include "llvm/MC/MCDwarf.h"
using namespace llvm; using namespace llvm;
void W65816MCAsmInfo::anchor() {} void W65816MCAsmInfo::anchor() {}
@ -27,4 +28,12 @@ W65816MCAsmInfo::W65816MCAsmInfo(const Triple &TT) {
UsesELFSectionDirectiveForBSS = true; UsesELFSectionDirectiveForBSS = true;
SupportsDebugInformation = true; SupportsDebugInformation = true;
// C++ exceptions use the SJLJ model. Selecting SJLJ here makes
// TargetPassConfig::addPassesToHandleExceptions add SjLjEHPrepare,
// which converts invoke/landingpad/resume IR into eh.sjlj.* intrinsics
// + a function-context struct in the function's prologue. The
// backend then lowers those intrinsics in W65816ISelLowering, and
// the runtime side lives in runtime/src/libcxxabiSjlj.c.
ExceptionsType = ExceptionHandling::SjLj;
} }

View file

@ -109,6 +109,13 @@ FunctionPass *createW65816NegYIndY();
// "ran out of registers". See W65816PreSpillCrossCall.cpp. // "ran out of registers". See W65816PreSpillCrossCall.cpp.
FunctionPass *createW65816PreSpillCrossCall(); FunctionPass *createW65816PreSpillCrossCall();
// IR pass: finishes the SjLjEHPrepare lowering by inserting a setjmp
// at function entry and a dispatch block, then erasing the eh.sjlj.*
// intrinsics our backend doesn't lower natively. Runs after
// SjLjEHPrepare in addPassesToHandleExceptions. See
// W65816SjLjFinalize.cpp.
FunctionPass *createW65816SjLjFinalize();
void initializeW65816AsmPrinterPass(PassRegistry &); void initializeW65816AsmPrinterPass(PassRegistry &);
void initializeW65816DAGToDAGISelLegacyPass(PassRegistry &); void initializeW65816DAGToDAGISelLegacyPass(PassRegistry &);
void initializeW65816StackSlotCleanupPass(PassRegistry &); void initializeW65816StackSlotCleanupPass(PassRegistry &);
@ -120,6 +127,7 @@ void initializeW65816WidenAcc16Pass(PassRegistry &);
void initializeW65816SpillToXPass(PassRegistry &); void initializeW65816SpillToXPass(PassRegistry &);
void initializeW65816NegYIndYPass(PassRegistry &); void initializeW65816NegYIndYPass(PassRegistry &);
void initializeW65816PreSpillCrossCallPass(PassRegistry &); void initializeW65816PreSpillCrossCallPass(PassRegistry &);
void initializeW65816SjLjFinalizePass(PassRegistry &);
} // namespace llvm } // namespace llvm

View file

@ -92,6 +92,31 @@ W65816TargetLowering::W65816TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VACOPY, MVT::Other, Expand);
setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand);
// C++ exceptions (SJLJ model) — clang lowers exception machinery into
// these intrinsics via SjLjEHPrepare. We don't have native handling
// for any of them on this target; mark Expand so LegalizeDAG falls
// back to its no-op stubs (setjmp returns 0, longjmp is a no-op,
// setup_dispatch is a chain pass-through). The actual EH semantics
// are provided at runtime by libcxxabi (__cxa_throw etc.) calling
// _Unwind_SjLj_RaiseException, which in turn longjmps via the
// function context the prologue prepared. See
// runtime/src/libcxxabiSjlj.c for the runtime side.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Expand);
setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Expand);
setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom);
// stacksave / stackrestore — used by SjLjEHPrepare to save/restore SP
// around invoke calls. The jmp_buf already captures SP via TSC in
// our setjmp implementation, so these are redundant here. Lower
// stacksave to a constant 0 (the value is stored into the function
// context but never used for restoration on our target) and
// stackrestore to a chain pass-through (no-op).
setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
setOperationAction(ISD::FRAMEADDR, MVT::i16, Expand);
setOperationAction(ISD::RETURNADDR, MVT::i16, Expand);
setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i16, Expand);
setOperationAction(ISD::EH_DWARF_CFA, MVT::i16, Expand);
// The 65816 has no hardware multiplier or divider. Multiply by a // The 65816 has no hardware multiplier or divider. Multiply by a
// power-of-two constant is auto-rewritten to shifts by the DAG // power-of-two constant is auto-rewritten to shifts by the DAG
// combiner; arbitrary multiply / divide / mod go through libcalls // combiner; arbitrary multiply / divide / mod go through libcalls
@ -490,6 +515,12 @@ SDValue W65816TargetLowering::LowerOperation(SDValue Op,
case ISD::SHL: case ISD::SHL:
case ISD::SRL: case ISD::SRL:
case ISD::SRA: return LowerShift(Op, DAG); case ISD::SRA: return LowerShift(Op, DAG);
// SJLJ EH: setup_dispatch is a no-op on this target — the dispatcher
// logic lives entirely in the SJLJ runtime (_Unwind_SjLj_Resume +
// longjmp into the function context's jmp_buf). The isel layer
// doesn't need to emit any code; just thread the chain through.
case ISD::EH_SJLJ_SETUP_DISPATCH:
return Op.getOperand(0);
case ISD::DYNAMIC_STACKALLOC: return LowerDynamicStackalloc(Op, DAG); case ISD::DYNAMIC_STACKALLOC: return LowerDynamicStackalloc(Op, DAG);
default: default:
llvm_unreachable("W65816: unexpected operation in LowerOperation"); llvm_unreachable("W65816: unexpected operation in LowerOperation");

View file

@ -0,0 +1,356 @@
//===-- W65816SjLjFinalize.cpp - Finish SJLJ EH lowering -----------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
// SjLjEHPrepare leaves IR with a function-context alloca, a few marker
// intrinsics (eh.sjlj.lsda / setup.dispatch / functioncontext / callsite),
// and `invoke` instructions whose unwind dest is a landing pad block that
// reads exception+selector from the function context. The expectation
// is that the BACKEND finishes lowering by inserting an actual setjmp
// at function entry and a switch-on-call-site dispatch block.
//
// On targets like ARM that's done with custom inserter pseudos
// (Int_eh_sjlj_setjmp + EmitSjLjDispatchBlock). We don't have any of
// that machinery, so this pass does it at IR level instead. Concretely:
//
// 1. Find the function-context alloca and the _Unwind_SjLj_Register
// call (SjLjEHPrepare placed both at function entry).
// 2. After the Register call, insert:
// %r = call i16 @setjmp(ptr %jbuf) ; jbuf is fn_ctx[5]
// %is_unwind = icmp ne i16 %r, 0
// br i1 %is_unwind, label %eh.dispatch, label %normal_entry
// 3. Build %eh.dispatch:
// %cs = load i32, ptr %call_site_field
// switch i32 %cs, label %resume_unreachable
// [ i32 1, label %lpad1
// i32 2, label %lpad2
// ... ]
// The lpadN blocks already exist (originally invoke's unwind dest).
// 4. Convert each `invoke F(args) to %normal unwind to %lpad`
// to a regular `call F(args); br %normal`. The eh.sjlj.callsite(N)
// intrinsic just before the invoke recorded N — we extract the
// mapping from the explicit `store i32 N, ptr %call_site_field`
// that SjLjEHPrepare also emitted.
// 5. Erase eh.sjlj.lsda / setup.dispatch / functioncontext / callsite
// intrinsic calls. lsda's result is replaced with null — our
// personality routine doesn't consume an LSDA pointer; it knows
// the catch types from the function context's data array which
// _Unwind_SjLj_RaiseException populated.
//
// The runtime side (runtime/src/libcxxabiSjlj.c) provides
// _Unwind_SjLj_Register/Unregister, _Unwind_SjLj_RaiseException,
// _Unwind_SjLj_Resume, and __gxx_personality_sj0 — plus the libcxxabi
// surface (__cxa_throw, __cxa_begin_catch, etc.).
//
//===---------------------------------------------------------------------===//
#include "W65816.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/Module.h"
#include "llvm/Pass.h"
using namespace llvm;
#define DEBUG_TYPE "w65816-sjlj-finalize"
namespace {
class W65816SjLjFinalize : public FunctionPass {
public:
static char ID;
W65816SjLjFinalize() : FunctionPass(ID) {}
StringRef getPassName() const override {
return "W65816 SJLJ EH finalize";
}
bool runOnFunction(Function &F) override;
};
} // namespace
char W65816SjLjFinalize::ID = 0;
INITIALIZE_PASS(W65816SjLjFinalize, DEBUG_TYPE,
"W65816 SJLJ EH finalize", false, false)
FunctionPass *llvm::createW65816SjLjFinalize() {
return new W65816SjLjFinalize();
}
// Match the personality recorded by clang for SJLJ EH.
static bool hasSjLjPersonality(const Function &F) {
if (!F.hasPersonalityFn())
return false;
const Constant *P = F.getPersonalityFn();
if (auto *Fn = dyn_cast<Function>(P->stripPointerCasts()))
return Fn->getName() == "__gxx_personality_sj0";
return false;
}
// Find the alloca SjLjEHPrepare used for the function context. It's
// the only alloca whose first use is a GEP storing the personality fn
// (field 3) — but a more reliable marker is the eh.sjlj.functioncontext
// intrinsic call, which takes the alloca as its sole argument.
static AllocaInst *findFnCtxAlloca(Function &F) {
for (Instruction &I : instructions(F)) {
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
if (II->getIntrinsicID() == Intrinsic::eh_sjlj_functioncontext) {
return cast<AllocaInst>(II->getArgOperand(0)->stripPointerCasts());
}
}
}
return nullptr;
}
bool W65816SjLjFinalize::runOnFunction(Function &F) {
if (!hasSjLjPersonality(F))
return false;
AllocaInst *FnCtx = findFnCtxAlloca(F);
if (!FnCtx)
return false;
Module &M = *F.getParent();
LLVMContext &Ctx = F.getContext();
Type *I32Ty = Type::getInt32Ty(Ctx);
Type *I16Ty = Type::getInt16Ty(Ctx);
Type *PtrTy = PointerType::getUnqual(Ctx);
Type *FnCtxTy = FnCtx->getAllocatedType();
// Walk invokes; build the call-site → landing-pad map. The
// call-site index for each invoke is the i32 stored to fn_ctx[1]
// immediately before the invoke (SjLjEHPrepare placed it).
SmallVector<InvokeInst *, 4> Invokes;
DenseMap<int, BasicBlock *> CSToLPad;
for (BasicBlock &BB : F) {
if (auto *II = dyn_cast<InvokeInst>(BB.getTerminator())) {
Invokes.push_back(II);
// Walk backward from the invoke for the most recent
// `store i32 <const>, ptr <call_site_gep>`.
int CS = -1;
for (auto It = std::next(BB.rbegin()); It != BB.rend(); ++It) {
if (auto *SI = dyn_cast<StoreInst>(&*It)) {
if (auto *C = dyn_cast<ConstantInt>(SI->getValueOperand())) {
if (auto *GEP = dyn_cast<GetElementPtrInst>(SI->getPointerOperand())) {
if (GEP->getPointerOperand() == FnCtx) {
CS = (int)C->getSExtValue();
break;
}
}
}
}
}
if (CS > 0)
CSToLPad[CS] = II->getUnwindDest();
}
}
if (Invokes.empty())
return false;
// Find the call to _Unwind_SjLj_Register — our setjmp insertion point
// is right after it (so the function context is fully populated).
CallInst *RegisterCall = nullptr;
for (Instruction &I : F.getEntryBlock()) {
if (auto *CI = dyn_cast<CallInst>(&I)) {
if (CI->getCalledFunction() &&
CI->getCalledFunction()->getName() == "_Unwind_SjLj_Register") {
RegisterCall = CI;
break;
}
}
}
if (!RegisterCall)
return false;
// Materialize: %r = call setjmp(ptr %jbuf)
// jbuf is fn_ctx field 5.
BasicBlock *EntryBB = RegisterCall->getParent();
IRBuilder<> Builder(RegisterCall->getNextNode());
Value *Jbuf = Builder.CreateStructGEP(FnCtxTy, FnCtx, 5, "jbuf");
// Our setjmp signature: int setjmp(void *jb). We treat its return
// as i16 here to match the W65816 ABI; the runtime returns 0 on the
// initial call and a nonzero value when longjmp comes back.
FunctionCallee SetjmpFn =
M.getOrInsertFunction("setjmp", FunctionType::get(I16Ty, {PtrTy}, false));
// Mark setjmp as returns_twice so LLVM doesn't optimize across it.
if (auto *SF = dyn_cast<Function>(SetjmpFn.getCallee())) {
SF->addFnAttr(Attribute::ReturnsTwice);
}
CallInst *SetjmpCall = Builder.CreateCall(SetjmpFn, {Jbuf}, "sj.r");
SetjmpCall->setCanReturnTwice();
Value *IsUnwind = Builder.CreateICmpNE(
SetjmpCall, ConstantInt::get(I16Ty, 0), "sj.is_unwind");
// Split entry block: instructions after our br go into a new block;
// we'll branch there on the first-time setjmp return.
BasicBlock *EHContinueBB =
EntryBB->splitBasicBlock(Builder.GetInsertPoint(), "sj.first_entry");
// splitBasicBlock created an unconditional br at the split point;
// replace it with our conditional branch to dispatch vs continue.
EntryBB->getTerminator()->eraseFromParent();
Builder.SetInsertPoint(EntryBB);
// Build dispatch block: switch on call_site.
BasicBlock *DispatchBB = BasicBlock::Create(Ctx, "sj.dispatch", &F);
IRBuilder<> DBuilder(DispatchBB);
Value *CallSiteGEP =
DBuilder.CreateStructGEP(FnCtxTy, FnCtx, 1, "cs.gep");
LoadInst *CallSite =
DBuilder.CreateLoad(I32Ty, CallSiteGEP, /*isVolatile=*/true, "cs");
// Default case: if we ever land here with an unknown call_site, just
// unreachable — the runtime should never longjmp with an out-of-range
// index. But a defensive fallback is to spin (terminate).
BasicBlock *DispatchUnreachable =
BasicBlock::Create(Ctx, "sj.dispatch.unreachable", &F);
new UnreachableInst(Ctx, DispatchUnreachable);
SwitchInst *SI =
DBuilder.CreateSwitch(CallSite, DispatchUnreachable, CSToLPad.size());
for (auto &KV : CSToLPad) {
SI->addCase(cast<ConstantInt>(ConstantInt::get(I32Ty, KV.first)),
KV.second);
}
// Final entry-block terminator: if (is_unwind) goto dispatch else continue.
Builder.CreateCondBr(IsUnwind, DispatchBB, EHContinueBB);
// The landing-pad blocks each start with a `landingpad { ptr, i32 }`
// instruction. After we convert invokes to plain calls, those
// blocks are no longer reached via an unwind edge — the verifier
// requires landingpads to be reached only from invoke unwind dests.
// Remove the landingpad insts (they're no-ops now; the real
// exception ptr / selector come from explicit fn_ctx.data loads
// SjLjEHPrepare emitted right after). Replace landingpad uses with
// poison since downstream code reads via GEPs, not via the inst's
// result.
SmallVector<LandingPadInst *, 4> LPads;
for (auto &KV : CSToLPad) {
if (LandingPadInst *LP = KV.second->getLandingPadInst())
LPads.push_back(LP);
}
// De-dup (multiple call_sites can share a landing pad).
std::sort(LPads.begin(), LPads.end());
LPads.erase(std::unique(LPads.begin(), LPads.end()), LPads.end());
for (LandingPadInst *LP : LPads) {
LP->replaceAllUsesWith(PoisonValue::get(LP->getType()));
LP->eraseFromParent();
}
// The function's "personality" attribute references @__gxx_personality_sj0,
// which would normally require landingpads. Drop it since we have none.
F.setPersonalityFn(nullptr);
// Convert each invoke to a regular call + br to its normal dest.
for (InvokeInst *II : Invokes) {
SmallVector<Value *, 8> Args(II->args());
SmallVector<OperandBundleDef, 1> Bundles;
II->getOperandBundlesAsDefs(Bundles);
CallInst *CI = CallInst::Create(II->getFunctionType(),
II->getCalledOperand(), Args, Bundles, "",
II->getIterator());
CI->setCallingConv(II->getCallingConv());
CI->setAttributes(II->getAttributes());
CI->setDebugLoc(II->getDebugLoc());
if (!II->getType()->isVoidTy())
II->replaceAllUsesWith(CI);
BasicBlock *NormalDest = II->getNormalDest();
BasicBlock *UnwindDest = II->getUnwindDest();
BranchInst::Create(NormalDest, II->getIterator());
// Drop the unwind-dest PHI predecessor entries for this invoke's BB.
UnwindDest->removePredecessor(II->getParent());
II->eraseFromParent();
}
// Build per-function catch table. Format (flat array of i16 pairs):
// [cs1, ti_addr1, cs1, ti_addr2, ..., cs2, ti_addr1, ..., 0, 0]
// Each (call_site, typeinfo_address) row encodes "if a throw is in
// flight while call_site is active, try to catch with this typeinfo".
// Terminated by a (0, 0) sentinel. Catch table address is stored in
// fn_ctx[4] (the lsda field) by replacing eh.sjlj.lsda's result.
//
// To make the landing pad's selector compare work without a real
// selector value: we set selector = (i32)(uintptr_t)&typeinfo at
// longjmp time, and rewrite the landing pad's
// eh.typeid.for(@TI) calls to also yield (i32)(uintptr_t)&TI. The
// icmp eq then succeeds for the matched catch.
SmallVector<Constant *, 16> TableRows;
// Walk each invoke (now lowered to call+br); we kept its call_site
// → unwind_dest mapping, so re-derive from CSToLPad and the lpad's
// landingpad instruction's catch clauses.
for (auto &KV : CSToLPad) {
int CS = KV.first;
BasicBlock *LPadBB = KV.second;
LandingPadInst *LP = LPadBB->getLandingPadInst();
if (!LP)
continue;
for (unsigned i = 0; i < LP->getNumClauses(); i++) {
if (LP->isCatch(i)) {
Constant *TIClause = cast<Constant>(LP->getClause(i));
// Skip catch-all (null TI) for now — rare, and our personality
// would need to handle it specially.
if (TIClause->isNullValue())
continue;
TableRows.push_back(ConstantInt::get(I16Ty, CS));
TableRows.push_back(ConstantExpr::getPtrToInt(TIClause, I16Ty));
}
}
}
// Append (0, 0) sentinel.
TableRows.push_back(ConstantInt::get(I16Ty, 0));
TableRows.push_back(ConstantInt::get(I16Ty, 0));
ArrayType *TableArrTy = ArrayType::get(I16Ty, TableRows.size());
Constant *TableInit = ConstantArray::get(TableArrTy, TableRows);
std::string TableName = "_W65SJLJ_CATCHTAB_" + F.getName().str();
GlobalVariable *Table = new GlobalVariable(
M, TableArrTy, /*isConstant=*/true, GlobalValue::InternalLinkage,
TableInit, TableName);
// Replace eh.sjlj.lsda → catch-table address; rewrite eh.typeid.for
// to (i32) ptrtoint of its typeinfo arg; erase setup_dispatch /
// functioncontext / callsite intrinsic markers.
SmallVector<Instruction *, 8> ToErase;
for (Instruction &I : instructions(F)) {
if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::eh_sjlj_lsda: {
Constant *TableAddr = ConstantExpr::getBitCast(Table, PtrTy);
II->replaceAllUsesWith(TableAddr);
ToErase.push_back(II);
break;
}
case Intrinsic::eh_typeid_for: {
// Replace with: zext (ptrtoint TI to i16) to i32.
IRBuilder<> TBuilder(II);
Value *TI = II->getArgOperand(0);
Value *AsI16 = TBuilder.CreatePtrToInt(TI, I16Ty);
Value *AsI32 = TBuilder.CreateZExt(AsI16, I32Ty);
II->replaceAllUsesWith(AsI32);
ToErase.push_back(II);
break;
}
case Intrinsic::eh_sjlj_setup_dispatch:
case Intrinsic::eh_sjlj_functioncontext:
case Intrinsic::eh_sjlj_callsite:
ToErase.push_back(II);
break;
default:
break;
}
}
}
for (Instruction *I : ToErase)
I->eraseFromParent();
return true;
}

View file

@ -47,6 +47,7 @@ LLVMInitializeW65816Target() {
initializeW65816SpillToXPass(PR); initializeW65816SpillToXPass(PR);
initializeW65816NegYIndYPass(PR); initializeW65816NegYIndYPass(PR);
initializeW65816PreSpillCrossCallPass(PR); initializeW65816PreSpillCrossCallPass(PR);
initializeW65816SjLjFinalizePass(PR);
// Default IndVarSimplify's exit-value rewriter to "never". The // Default IndVarSimplify's exit-value rewriter to "never". The
// closed-form replacement frequently widens an i16 induction var // closed-form replacement frequently widens an i16 induction var
@ -100,6 +101,7 @@ public:
void addPostRegAlloc() override; void addPostRegAlloc() override;
void addPreEmitPass() override; void addPreEmitPass() override;
void addMachineSSAOptimization() override; void addMachineSSAOptimization() override;
void addISelPrepare() override;
// W65816's only 16-bit ALU register is A. At -O1+ we use BASIC // W65816's only 16-bit ALU register is A. At -O1+ we use BASIC
// regalloc instead of greedy: greedy fails ("ran out of registers // regalloc instead of greedy: greedy fails ("ran out of registers
@ -127,6 +129,16 @@ TargetPassConfig *W65816TargetMachine::createPassConfig(PassManagerBase &PM) {
return new W65816PassConfig(*this, PM); return new W65816PassConfig(*this, PM);
} }
void W65816PassConfig::addISelPrepare() {
// SjLjEHPrepare ran in addPassesToHandleExceptions just before this;
// our finalize pass inserts an actual setjmp at function entry +
// a switch-on-call_site dispatch block, and erases the eh.sjlj.*
// intrinsics our backend doesn't natively lower. Must run BEFORE
// the base ISelPrepare passes so isel sees the cleaned IR.
addPass(createW65816SjLjFinalize());
TargetPassConfig::addISelPrepare();
}
void W65816PassConfig::addMachineSSAOptimization() { void W65816PassConfig::addMachineSSAOptimization() {
// MachineCSE used to be disabled here because it incorrectly // MachineCSE used to be disabled here because it incorrectly
// eliminated "redundant" CMP instructions: P was considered // eliminated "redundant" CMP instructions: P was considered