diff --git a/STATUS.md b/STATUS.md index 7171bfc..5f90c5c 100644 --- a/STATUS.md +++ b/STATUS.md @@ -114,7 +114,7 @@ which runs correctly under MAME (apple2gs). image addresses. - `runtime/build.sh` builds crt0, libc, soft-float, soft-double, libgcc into linkable objects. -- `scripts/smokeTest.sh` runs 122 end-to-end checks at -O2: +- `scripts/smokeTest.sh` runs 123 end-to-end checks at -O2: scalar ops, control flow, calling conventions, MAME execution regressions, link816 bss-base safety + weak-symbol resolution + heap_end-vs-heap_start sanity, iigs/toolbox.h compile + link, @@ -219,14 +219,27 @@ RAM through $FFFF, gaining 8KB of bank-0 space.) ## Yet to come -- **C++ exceptions** — `dynamic_cast` works (via libcxxabi shim, - see "What works"); `throw`/`try`/`catch` does not. Implementing - exceptions needs the full Itanium unwind ABI: `__cxa_throw`, - `__cxa_allocate_exception`, `_Unwind_RaiseException`, a - personality routine, and DWARF `.eh_frame` data the unwinder - consumes to restore registers per-frame. The 65816's lack of - any existing unwinder makes this a real project — defer until - someone needs exception-based code on the IIgs. +- **C++ exceptions through clang `-fsjlj-exceptions`** — the SJLJ + runtime IS implemented (`runtime/src/libcxxabiSjlj.c` provides + `__cxa_throw`, `__cxa_allocate_exception`, `__cxa_begin_catch`, + `__cxa_end_catch`, `__cxa_rethrow`, `_Unwind_SjLj_Register/ + Unregister/RaiseException/Resume`, plus a no-op `__gxx_personality + _sj0`). The W65816 backend has SJLJ wiring: `MCAsmInfo` selects + `ExceptionHandling::SjLj` so clang's `SjLjEHPrepare` runs; a + custom `W65816SjLjFinalize` IR pass (in + `src/llvm/lib/Target/W65816/`) finishes the lowering by inserting + an actual `setjmp` + dispatch block, building a per-function + catch table referenced via the lsda field, and rewriting the + `eh.typeid.for` calls to use typeinfo addresses as selectors. + Throw/catch round-trip works end-to-end **when driven from + pure C** (smoke test "SJLJ exception runtime"); the C++ + frontend path crashes at runtime because clang's `-O2` + lowering of the volatile call_site store before `__cxa_throw` + routes the value to the wrong stack address — a separate + W65816 isel bug for `store volatile i32 N, ` + that needs its own debugging session. Until that's fixed, + raw C code can use the SJLJ runtime directly; C++ `try/catch` + still requires `-fno-exceptions`. - **GS/OS validated against a real ProDOS volume** — the wrapper contract (PHA + PEA 0 + LDX + JSL $E100A8 + post-call SP fixup) diff --git a/runtime/build.sh b/runtime/build.sh index 82eb76a..14adad9 100755 --- a/runtime/build.sh +++ b/runtime/build.sh @@ -42,6 +42,7 @@ cc "$SRC/strtok.c" cc "$SRC/math.c" cc "$SRC/softFloat.c" cc "$SRC/libcxxabi.c" +cc "$SRC/libcxxabiSjlj.c" asm "$SRC/iigsGsos.s" # softDouble.c builds at -O1: __muldf3's u64 live-range pressure # overflows the greedy allocator at -O2. dpack is already noinline diff --git a/runtime/src/libcxxabi.c b/runtime/src/libcxxabi.c index c8b9169..0e8c99f 100644 --- a/runtime/src/libcxxabi.c +++ b/runtime/src/libcxxabi.c @@ -146,3 +146,39 @@ void abiPureVirtual(void) { while (1) { } } + +// Fundamental typeinfo objects. The Itanium ABI lists a fixed roster +// of typeinfo objects for built-in types; libcxxabi normally provides +// them. We supply the ones likely to be thrown. Each is a TypeInfo +// (vptr + name) where the vptr is the fundamental-type-info vtable +// address. Since our personality compares typeinfo pointers directly +// (no hierarchy walk for fundamental types), the contents only need +// to be stable addresses. +const void *abiFundTypeInfoVtable[3] __asm__("_ZTVN10__cxxabiv123__fundamental_type_infoE") = { 0, 0, 0 }; + +// Helper macro to declare one fundamental typeinfo + its name string. +#define FUND_TI(MANGLED, NAMECHAR, NAMESTR) \ + static const char abiTSname_##NAMECHAR[] = NAMESTR; \ + const TypeInfo MANGLED __asm__(#MANGLED) = { \ + &abiFundTypeInfoVtable[2], abiTSname_##NAMECHAR \ + } + +// Itanium mangling for built-in types: i=int, j=unsigned int, c=char, +// h=unsigned char, s=short, t=unsigned short, l=long, m=unsigned long, +// x=long long, y=unsigned long long, b=bool, v=void, Pv=void*. +FUND_TI(_ZTIi, i, "i"); +FUND_TI(_ZTIj, j, "j"); +FUND_TI(_ZTIc, c, "c"); +FUND_TI(_ZTIh, h, "h"); +FUND_TI(_ZTIs, s, "s"); +FUND_TI(_ZTIt, t, "t"); +FUND_TI(_ZTIl, l, "l"); +FUND_TI(_ZTIm, m, "m"); +FUND_TI(_ZTIb, b, "b"); +FUND_TI(_ZTIv, v, "v"); + +// Pointer-to-void typeinfo. Used when throwing void* (or any pointer +// caught as void*). Itanium classifies it as __pointer_type_info. +const void *abiPtrTypeInfoVtable[3] __asm__("_ZTVN10__cxxabiv119__pointer_type_infoE") = { 0, 0, 0 }; +static const char abiTSname_Pv[] = "Pv"; +const TypeInfo _ZTIPv __asm__("_ZTIPv") = { &abiPtrTypeInfoVtable[2], abiTSname_Pv }; diff --git a/runtime/src/libcxxabiSjlj.c b/runtime/src/libcxxabiSjlj.c new file mode 100644 index 0000000..e38f4a2 --- /dev/null +++ b/runtime/src/libcxxabiSjlj.c @@ -0,0 +1,224 @@ +// SJLJ exception runtime for the W65816 backend. +// +// Works with the IR generated by clang's `-fsjlj-exceptions` lowering +// after our W65816SjLjFinalize pass has rewritten things. The shape: +// +// - clang+SjLjEHPrepare emit a per-function "function context" alloca: +// struct FnCtx { +// void *prev; // [0] linked list ptr (set by Register) +// int call_site; // [1] active call_site index (set by clang) +// int data[4]; // [2] [exn_obj, selector, ...] — set by us +// void *personality; // [3] @__gxx_personality_sj0 (unused here) +// void *lsda; // [4] our pass replaces with catch_table_ptr +// void *jbuf[5]; // [5] jmp_buf +// }; +// - On entry: clang code calls _Unwind_SjLj_Register(&FnCtx) and then +// setjmp(&FnCtx.jbuf). setjmp returns 0 first time → normal flow; +// when an exception unwinds to here, longjmp(&jbuf, 1) returns +// non-zero → our finalize pass dispatches via switch on FnCtx.call_site. +// - At each invoke: clang stores N → FnCtx.call_site, calls the +// function, then clears it (or stores next CS). +// - Landing pad expects FnCtx.data[0] = exception ptr, FnCtx.data[1] +// = selector, where selector is the (i32)(uintptr_t)&typeinfo for +// the matched catch (per our pass's eh.typeid.for rewrite). +// +// Catch table layout (built by W65816SjLjFinalize): +// short call_site_1 +// short typeinfo_addr_1 +// short call_site_2 +// short typeinfo_addr_2 +// ... +// short 0 ; sentinel +// short 0 +// Stored as FnCtx.lsda; matches "if active call_site equals this row's +// call_site AND thrown type matches this row's typeinfo, this catch fires". + +#include +#include + +extern void *malloc(unsigned int); +extern void free(void *); +extern int setjmp(void *jb); +extern void longjmp(void *jb, int v) __attribute__((noreturn)); + +typedef struct TypeInfo { + const void *vptr; + const char *name; +} TypeInfo; + +// __dynamic_cast lives in libcxxabi.c — we reuse it for catch matching. +extern void *abiDynamicCast(const void *src, const TypeInfo *srcType, + const TypeInfo *dstType, int32_t hint) + __asm__("__dynamic_cast"); + +// Function context layout (matches what SjLjEHPrepare emits). We +// only access fields we read; everything else is opaque. +typedef struct FnCtx { + struct FnCtx *prev; + uint32_t call_site; + uint32_t data[4]; + void *personality; + uint16_t *lsda; // catch table ptr (set by our pass) + void *jbuf[5]; +} FnCtx; + +// Active fn_ctx stack. SjLjEHPrepare doesn't actually require a +// thread-local because the IIgs is single-threaded; one global suffices. +static FnCtx *gActive = 0; + + +// Currently-in-flight exception. Set by __cxa_throw; consumed by +// __cxa_begin_catch. Holds the user object (the part returned by +// __cxa_begin_catch) and the typeinfo pointer used for matching. +typedef struct ExcHeader { + const TypeInfo *type; + void (*dtor)(void *); + // The user exception object follows immediately after this header. +} ExcHeader; + +static ExcHeader *gCurrentExc = 0; + +void _Unwind_SjLj_Register(FnCtx *ctx) { + ctx->prev = gActive; + gActive = ctx; +} + + +void _Unwind_SjLj_Unregister(FnCtx *ctx) { + // SjLjEHPrepare puts unregister at every return. Pop the stack + // assuming LIFO order (which it is for non-pathological code). + if (gActive == ctx) { + gActive = ctx->prev; + } +} + + +// Walk the catch table for a fn_ctx, find a matching catch for the +// thrown type, return its row's typeinfo (which is what we'll store +// in selector). Returns 0 if no match. +static const TypeInfo *findCatch(FnCtx *ctx, const TypeInfo *thrownType, void *thrownObj, void **adjustedObjOut) { + if (!ctx->lsda) { + return 0; + } + uint16_t *p = ctx->lsda; + uint16_t cs = (uint16_t)ctx->call_site; + for (;;) { + uint16_t row_cs = p[0]; + uint16_t row_ti = p[1]; + if (row_cs == 0 && row_ti == 0) { + return 0; + } + if (row_cs == cs) { + const TypeInfo *catchType = (const TypeInfo *)(uintptr_t)row_ti; + if (thrownType == catchType) { + *adjustedObjOut = thrownObj; + return catchType; + } + } + p += 2; + } +} + + +void _Unwind_SjLj_RaiseException(ExcHeader *exc) __attribute__((noreturn)); +void _Unwind_SjLj_RaiseException(ExcHeader *exc) { + gCurrentExc = exc; + for (FnCtx *ctx = gActive; ctx; ctx = ctx->prev) { + void *adjustedObj = (void *)(exc + 1); + const TypeInfo *match = + findCatch(ctx, exc->type, adjustedObj, &adjustedObj); + if (match) { + gActive = ctx; + ctx->data[0] = (uint32_t)(uintptr_t)exc; + ctx->data[1] = (uint32_t)(uintptr_t)match; + longjmp(ctx->jbuf, 1); + } + } + extern void abort(void) __attribute__((noreturn)); + abort(); +} + + +void _Unwind_SjLj_Resume(void *unused) __attribute__((noreturn)); +void _Unwind_SjLj_Resume(void *unused) { + (void)unused; + if (gCurrentExc) { + _Unwind_SjLj_RaiseException(gCurrentExc); + } + extern void abort(void) __attribute__((noreturn)); + abort(); +} + + +// Personality routine — never actually called in our scheme (we +// dispatch via call_site directly). Provided as a symbol because +// SjLjEHPrepare emits `store @__gxx_personality_sj0, fn_ctx[3]` +// at function entry. Returns "continue unwinding" if ever invoked. +int __gxx_personality_sj0(int version, int actions, uint64_t excClass, + void *exc, void *ctx) { + (void)version; (void)actions; (void)excClass; (void)exc; (void)ctx; + return 8; // _URC_CONTINUE_UNWIND +} + + +// Itanium C++ ABI surface. + +void *__cxa_allocate_exception(unsigned int sz) { + void *p = malloc(sizeof(ExcHeader) + sz); + if (!p) { + extern void abort(void) __attribute__((noreturn)); + abort(); + } + // Zero the header; the user object isn't initialized. + ExcHeader *h = (ExcHeader *)p; + h->type = 0; + h->dtor = 0; + return (void *)(h + 1); // user object pointer +} + + +void __cxa_free_exception(void *user) { + if (user) { + free((char *)user - sizeof(ExcHeader)); + } +} + + +void __cxa_throw(void *user, const TypeInfo *type, void (*dtor)(void *)) + __attribute__((noreturn)); +void __cxa_throw(void *user, const TypeInfo *type, void (*dtor)(void *)) { + ExcHeader *h = (ExcHeader *)user - 1; + h->type = type; + h->dtor = dtor; + _Unwind_SjLj_RaiseException(h); + extern void abort(void) __attribute__((noreturn)); + abort(); +} + + +void *__cxa_begin_catch(void *exc) { + // exc is the ExcHeader pointer (per landing pad's data[0] write). + ExcHeader *h = (ExcHeader *)exc; + return (void *)(h + 1); // hand back the user object pointer +} + + +void __cxa_end_catch(void) { + if (gCurrentExc) { + if (gCurrentExc->dtor) { + gCurrentExc->dtor(gCurrentExc + 1); + } + free(gCurrentExc); + gCurrentExc = 0; + } +} + + +void __cxa_rethrow(void) __attribute__((noreturn)); +void __cxa_rethrow(void) { + if (gCurrentExc) { + _Unwind_SjLj_RaiseException(gCurrentExc); + } + extern void abort(void) __attribute__((noreturn)); + abort(); +} diff --git a/runtime/src/libgcc.s b/runtime/src/libgcc.s index 43d413f..0e3ac2d 100644 --- a/runtime/src/libgcc.s +++ b/runtime/src/libgcc.s @@ -1159,12 +1159,15 @@ longjmp: sta 0xe0 ; jmp_buf addr -> DP scratch lda 0x4, s ; A = val (2nd arg, on stack) sta 0xe2 ; save val - ; Restore SP: env[0..1] - 3 (so the upcoming PHAs land at the right slots). + ; Restore SP: env[0..1]. TCS directly — the PHAs below consume + ; 3 bytes (1 bank + 2 retaddr); RTL pulls them back; net post-RTL + ; S = saved_SP. (Earlier this subtracted 3 before TCS, leaving + ; S = saved_SP - 3 after RTL, mangling caller's stack-relative + ; reads — caught by an SJLJ EH test where main's locals shifted + ; by 3 bytes after longjmp.) ldy #0 lda (0xe0), y ; A = saved SP (16-bit) - sec - sbc #0x3 - tcs ; SP = saved_SP - 3 + tcs ; SP = saved_SP ; Push retaddr: bank, then 16-bit lo:hi. RTL pulls lo, hi, bank. sep #0x20 ldy #4 diff --git a/scripts/smokeTest.sh b/scripts/smokeTest.sh index 616af0d..83983af 100755 --- a/scripts/smokeTest.sh +++ b/scripts/smokeTest.sh @@ -3996,6 +3996,81 @@ EOF fi rm -f "$cppRttiFile" "$oCppRttiFile" "$oCxxAbiFile" "$binCppRttiFile" + # SJLJ exception runtime end-to-end via pure C. Drives the + # libcxxabiSjlj runtime directly (manual fn_ctx setup + setjmp + # + __cxa_throw + __cxa_begin_catch) since the C++ frontend + # path through clang's `-fsjlj-exceptions` lowering currently + # mis-routes the volatile call_site store at -O2 (separate + # backend isel issue, see STATUS "Yet to come"). This test + # exercises every runtime function and is also the canary for + # the longjmp SP-restore fix (subtracting #3 before TCS left + # caller's S 3 bytes off; locals shifted across the longjmp). + log "check: MAME runs SJLJ exception runtime (throw/catch round-trip)" + cSjeFile="$(mktemp --suffix=.c)" + oSjeFile="$(mktemp --suffix=.o)" + oSjeRt="$(mktemp --suffix=.o)" + oSjeAbi="$(mktemp --suffix=.o)" + binSjeFile="$(mktemp --suffix=.bin)" + cat > "$cSjeFile" <<'EOF' +extern void *__cxa_allocate_exception(unsigned int); +extern void __cxa_throw(void *, const void *, void (*)(void *)) __attribute__((noreturn)); +extern void *__cxa_begin_catch(void *); +extern void __cxa_end_catch(void); +extern int setjmp(void *jb); +extern const char _ZTIi[]; +typedef struct FnCtx { + struct FnCtx *prev; + unsigned int call_site_lo, call_site_hi; + unsigned int data[8]; + void *personality; + void *lsda; + char jbuf[10]; +} FnCtx; +extern void _Unwind_SjLj_Register(FnCtx *); +static unsigned short ctab[4]; +int main(void) { + ctab[0] = 1; + ctab[1] = (unsigned short)(unsigned long)_ZTIi; + ctab[2] = 0; + ctab[3] = 0; + *(volatile unsigned short *)0x5000 = 0xa1a1; + FnCtx ctx; + ctx.personality = 0; + ctx.lsda = (void *)ctab; + _Unwind_SjLj_Register(&ctx); + volatile int r = setjmp(ctx.jbuf); + if (r == 0) { + ctx.call_site_lo = 1; + void *p = __cxa_allocate_exception(2); + *(int *)p = 42; + __cxa_throw(p, _ZTIi, 0); + } + void *u = __cxa_begin_catch((void *)ctx.data[0]); + *(volatile unsigned short *)0x5002 = (unsigned short)*(int *)u; + __cxa_end_catch(); + *(volatile unsigned short *)0x5004 = 0xc1c1; + while (1) {} +} +EOF + "$CLANG" --target=w65816 -O2 -ffunction-sections \ + -I"$PROJECT_ROOT/runtime/include" \ + -c "$PROJECT_ROOT/runtime/src/libcxxabiSjlj.c" -o "$oSjeRt" + "$CLANG" --target=w65816 -O2 -ffunction-sections \ + -I"$PROJECT_ROOT/runtime/include" \ + -c "$PROJECT_ROOT/runtime/src/libcxxabi.c" -o "$oSjeAbi" + "$CLANG" --target=w65816 -O2 -ffunction-sections \ + -I"$PROJECT_ROOT/runtime/include" \ + -c "$cSjeFile" -o "$oSjeFile" + "$PROJECT_ROOT/tools/link816" -o "$binSjeFile" --text-base 0x1000 \ + "$oCrt0F" "$oLibgccFile" "$oLibcF" \ + "$oSjeAbi" "$oSjeRt" "$oSjeFile" \ + >/dev/null 2>&1 + if ! bash "$PROJECT_ROOT/scripts/runInMame.sh" "$binSjeFile" --check \ + 0x005000=a1a1 0x005002=002a 0x005004=c1c1 >/dev/null 2>&1; then + die "MAME: SJLJ throw/catch round-trip failed (libcxxabiSjlj or longjmp regression)" + fi + rm -f "$cSjeFile" "$oSjeFile" "$oSjeRt" "$oSjeAbi" "$binSjeFile" + # Real-world: hex dumper using memory-backed file I/O. Reads # 16 bytes from a registered "in" file, writes a hex+ASCII # dump to a registered "out" file via fprintf. Verifies the diff --git a/src/llvm/lib/Target/W65816/CMakeLists.txt b/src/llvm/lib/Target/W65816/CMakeLists.txt index 3cc976e..b3fe53f 100644 --- a/src/llvm/lib/Target/W65816/CMakeLists.txt +++ b/src/llvm/lib/Target/W65816/CMakeLists.txt @@ -33,6 +33,7 @@ add_llvm_target(W65816CodeGen W65816SpillToX.cpp W65816NegYIndY.cpp W65816PreSpillCrossCall.cpp + W65816SjLjFinalize.cpp W65816TargetMachine.cpp W65816AsmPrinter.cpp W65816MCInstLower.cpp diff --git a/src/llvm/lib/Target/W65816/MCTargetDesc/W65816MCAsmInfo.cpp b/src/llvm/lib/Target/W65816/MCTargetDesc/W65816MCAsmInfo.cpp index 53f564d..2237658 100644 --- a/src/llvm/lib/Target/W65816/MCTargetDesc/W65816MCAsmInfo.cpp +++ b/src/llvm/lib/Target/W65816/MCTargetDesc/W65816MCAsmInfo.cpp @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #include "W65816MCAsmInfo.h" +#include "llvm/MC/MCDwarf.h" using namespace llvm; void W65816MCAsmInfo::anchor() {} @@ -27,4 +28,12 @@ W65816MCAsmInfo::W65816MCAsmInfo(const Triple &TT) { UsesELFSectionDirectiveForBSS = true; SupportsDebugInformation = true; + + // C++ exceptions use the SJLJ model. Selecting SJLJ here makes + // TargetPassConfig::addPassesToHandleExceptions add SjLjEHPrepare, + // which converts invoke/landingpad/resume IR into eh.sjlj.* intrinsics + // + a function-context struct in the function's prologue. The + // backend then lowers those intrinsics in W65816ISelLowering, and + // the runtime side lives in runtime/src/libcxxabiSjlj.c. + ExceptionsType = ExceptionHandling::SjLj; } diff --git a/src/llvm/lib/Target/W65816/W65816.h b/src/llvm/lib/Target/W65816/W65816.h index 121ab3e..1860bb2 100644 --- a/src/llvm/lib/Target/W65816/W65816.h +++ b/src/llvm/lib/Target/W65816/W65816.h @@ -109,6 +109,13 @@ FunctionPass *createW65816NegYIndY(); // "ran out of registers". See W65816PreSpillCrossCall.cpp. FunctionPass *createW65816PreSpillCrossCall(); +// IR pass: finishes the SjLjEHPrepare lowering by inserting a setjmp +// at function entry and a dispatch block, then erasing the eh.sjlj.* +// intrinsics our backend doesn't lower natively. Runs after +// SjLjEHPrepare in addPassesToHandleExceptions. See +// W65816SjLjFinalize.cpp. +FunctionPass *createW65816SjLjFinalize(); + void initializeW65816AsmPrinterPass(PassRegistry &); void initializeW65816DAGToDAGISelLegacyPass(PassRegistry &); void initializeW65816StackSlotCleanupPass(PassRegistry &); @@ -120,6 +127,7 @@ void initializeW65816WidenAcc16Pass(PassRegistry &); void initializeW65816SpillToXPass(PassRegistry &); void initializeW65816NegYIndYPass(PassRegistry &); void initializeW65816PreSpillCrossCallPass(PassRegistry &); +void initializeW65816SjLjFinalizePass(PassRegistry &); } // namespace llvm diff --git a/src/llvm/lib/Target/W65816/W65816ISelLowering.cpp b/src/llvm/lib/Target/W65816/W65816ISelLowering.cpp index 40ade34..dccda7a 100644 --- a/src/llvm/lib/Target/W65816/W65816ISelLowering.cpp +++ b/src/llvm/lib/Target/W65816/W65816ISelLowering.cpp @@ -92,6 +92,31 @@ W65816TargetLowering::W65816TargetLowering(const TargetMachine &TM, setOperationAction(ISD::VACOPY, MVT::Other, Expand); setOperationAction(ISD::VAEND, MVT::Other, Expand); + // C++ exceptions (SJLJ model) — clang lowers exception machinery into + // these intrinsics via SjLjEHPrepare. We don't have native handling + // for any of them on this target; mark Expand so LegalizeDAG falls + // back to its no-op stubs (setjmp returns 0, longjmp is a no-op, + // setup_dispatch is a chain pass-through). The actual EH semantics + // are provided at runtime by libcxxabi (__cxa_throw etc.) calling + // _Unwind_SjLj_RaiseException, which in turn longjmps via the + // function context the prologue prepared. See + // runtime/src/libcxxabiSjlj.c for the runtime side. + setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Expand); + setOperationAction(ISD::EH_SJLJ_LONGJMP, MVT::Other, Expand); + setOperationAction(ISD::EH_SJLJ_SETUP_DISPATCH, MVT::Other, Custom); + // stacksave / stackrestore — used by SjLjEHPrepare to save/restore SP + // around invoke calls. The jmp_buf already captures SP via TSC in + // our setjmp implementation, so these are redundant here. Lower + // stacksave to a constant 0 (the value is stored into the function + // context but never used for restoration on our target) and + // stackrestore to a chain pass-through (no-op). + setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); + setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); + setOperationAction(ISD::FRAMEADDR, MVT::i16, Expand); + setOperationAction(ISD::RETURNADDR, MVT::i16, Expand); + setOperationAction(ISD::FRAME_TO_ARGS_OFFSET, MVT::i16, Expand); + setOperationAction(ISD::EH_DWARF_CFA, MVT::i16, Expand); + // The 65816 has no hardware multiplier or divider. Multiply by a // power-of-two constant is auto-rewritten to shifts by the DAG // combiner; arbitrary multiply / divide / mod go through libcalls @@ -490,6 +515,12 @@ SDValue W65816TargetLowering::LowerOperation(SDValue Op, case ISD::SHL: case ISD::SRL: case ISD::SRA: return LowerShift(Op, DAG); + // SJLJ EH: setup_dispatch is a no-op on this target — the dispatcher + // logic lives entirely in the SJLJ runtime (_Unwind_SjLj_Resume + + // longjmp into the function context's jmp_buf). The isel layer + // doesn't need to emit any code; just thread the chain through. + case ISD::EH_SJLJ_SETUP_DISPATCH: + return Op.getOperand(0); case ISD::DYNAMIC_STACKALLOC: return LowerDynamicStackalloc(Op, DAG); default: llvm_unreachable("W65816: unexpected operation in LowerOperation"); diff --git a/src/llvm/lib/Target/W65816/W65816SjLjFinalize.cpp b/src/llvm/lib/Target/W65816/W65816SjLjFinalize.cpp new file mode 100644 index 0000000..a1889b4 --- /dev/null +++ b/src/llvm/lib/Target/W65816/W65816SjLjFinalize.cpp @@ -0,0 +1,356 @@ +//===-- W65816SjLjFinalize.cpp - Finish SJLJ EH lowering -----------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// SjLjEHPrepare leaves IR with a function-context alloca, a few marker +// intrinsics (eh.sjlj.lsda / setup.dispatch / functioncontext / callsite), +// and `invoke` instructions whose unwind dest is a landing pad block that +// reads exception+selector from the function context. The expectation +// is that the BACKEND finishes lowering by inserting an actual setjmp +// at function entry and a switch-on-call-site dispatch block. +// +// On targets like ARM that's done with custom inserter pseudos +// (Int_eh_sjlj_setjmp + EmitSjLjDispatchBlock). We don't have any of +// that machinery, so this pass does it at IR level instead. Concretely: +// +// 1. Find the function-context alloca and the _Unwind_SjLj_Register +// call (SjLjEHPrepare placed both at function entry). +// 2. After the Register call, insert: +// %r = call i16 @setjmp(ptr %jbuf) ; jbuf is fn_ctx[5] +// %is_unwind = icmp ne i16 %r, 0 +// br i1 %is_unwind, label %eh.dispatch, label %normal_entry +// 3. Build %eh.dispatch: +// %cs = load i32, ptr %call_site_field +// switch i32 %cs, label %resume_unreachable +// [ i32 1, label %lpad1 +// i32 2, label %lpad2 +// ... ] +// The lpadN blocks already exist (originally invoke's unwind dest). +// 4. Convert each `invoke F(args) to %normal unwind to %lpad` +// to a regular `call F(args); br %normal`. The eh.sjlj.callsite(N) +// intrinsic just before the invoke recorded N — we extract the +// mapping from the explicit `store i32 N, ptr %call_site_field` +// that SjLjEHPrepare also emitted. +// 5. Erase eh.sjlj.lsda / setup.dispatch / functioncontext / callsite +// intrinsic calls. lsda's result is replaced with null — our +// personality routine doesn't consume an LSDA pointer; it knows +// the catch types from the function context's data array which +// _Unwind_SjLj_RaiseException populated. +// +// The runtime side (runtime/src/libcxxabiSjlj.c) provides +// _Unwind_SjLj_Register/Unregister, _Unwind_SjLj_RaiseException, +// _Unwind_SjLj_Resume, and __gxx_personality_sj0 — plus the libcxxabi +// surface (__cxa_throw, __cxa_begin_catch, etc.). +// +//===---------------------------------------------------------------------===// + +#include "W65816.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +using namespace llvm; + +#define DEBUG_TYPE "w65816-sjlj-finalize" + +namespace { + +class W65816SjLjFinalize : public FunctionPass { +public: + static char ID; + W65816SjLjFinalize() : FunctionPass(ID) {} + + StringRef getPassName() const override { + return "W65816 SJLJ EH finalize"; + } + + bool runOnFunction(Function &F) override; +}; + +} // namespace + +char W65816SjLjFinalize::ID = 0; + +INITIALIZE_PASS(W65816SjLjFinalize, DEBUG_TYPE, + "W65816 SJLJ EH finalize", false, false) + +FunctionPass *llvm::createW65816SjLjFinalize() { + return new W65816SjLjFinalize(); +} + +// Match the personality recorded by clang for SJLJ EH. +static bool hasSjLjPersonality(const Function &F) { + if (!F.hasPersonalityFn()) + return false; + const Constant *P = F.getPersonalityFn(); + if (auto *Fn = dyn_cast(P->stripPointerCasts())) + return Fn->getName() == "__gxx_personality_sj0"; + return false; +} + +// Find the alloca SjLjEHPrepare used for the function context. It's +// the only alloca whose first use is a GEP storing the personality fn +// (field 3) — but a more reliable marker is the eh.sjlj.functioncontext +// intrinsic call, which takes the alloca as its sole argument. +static AllocaInst *findFnCtxAlloca(Function &F) { + for (Instruction &I : instructions(F)) { + if (auto *II = dyn_cast(&I)) { + if (II->getIntrinsicID() == Intrinsic::eh_sjlj_functioncontext) { + return cast(II->getArgOperand(0)->stripPointerCasts()); + } + } + } + return nullptr; +} + +bool W65816SjLjFinalize::runOnFunction(Function &F) { + if (!hasSjLjPersonality(F)) + return false; + + AllocaInst *FnCtx = findFnCtxAlloca(F); + if (!FnCtx) + return false; + + Module &M = *F.getParent(); + LLVMContext &Ctx = F.getContext(); + Type *I32Ty = Type::getInt32Ty(Ctx); + Type *I16Ty = Type::getInt16Ty(Ctx); + Type *PtrTy = PointerType::getUnqual(Ctx); + Type *FnCtxTy = FnCtx->getAllocatedType(); + + // Walk invokes; build the call-site → landing-pad map. The + // call-site index for each invoke is the i32 stored to fn_ctx[1] + // immediately before the invoke (SjLjEHPrepare placed it). + SmallVector Invokes; + DenseMap CSToLPad; + for (BasicBlock &BB : F) { + if (auto *II = dyn_cast(BB.getTerminator())) { + Invokes.push_back(II); + // Walk backward from the invoke for the most recent + // `store i32 , ptr `. + int CS = -1; + for (auto It = std::next(BB.rbegin()); It != BB.rend(); ++It) { + if (auto *SI = dyn_cast(&*It)) { + if (auto *C = dyn_cast(SI->getValueOperand())) { + if (auto *GEP = dyn_cast(SI->getPointerOperand())) { + if (GEP->getPointerOperand() == FnCtx) { + CS = (int)C->getSExtValue(); + break; + } + } + } + } + } + if (CS > 0) + CSToLPad[CS] = II->getUnwindDest(); + } + } + + if (Invokes.empty()) + return false; + + // Find the call to _Unwind_SjLj_Register — our setjmp insertion point + // is right after it (so the function context is fully populated). + CallInst *RegisterCall = nullptr; + for (Instruction &I : F.getEntryBlock()) { + if (auto *CI = dyn_cast(&I)) { + if (CI->getCalledFunction() && + CI->getCalledFunction()->getName() == "_Unwind_SjLj_Register") { + RegisterCall = CI; + break; + } + } + } + if (!RegisterCall) + return false; + + // Materialize: %r = call setjmp(ptr %jbuf) + // jbuf is fn_ctx field 5. + BasicBlock *EntryBB = RegisterCall->getParent(); + IRBuilder<> Builder(RegisterCall->getNextNode()); + Value *Jbuf = Builder.CreateStructGEP(FnCtxTy, FnCtx, 5, "jbuf"); + // Our setjmp signature: int setjmp(void *jb). We treat its return + // as i16 here to match the W65816 ABI; the runtime returns 0 on the + // initial call and a nonzero value when longjmp comes back. + FunctionCallee SetjmpFn = + M.getOrInsertFunction("setjmp", FunctionType::get(I16Ty, {PtrTy}, false)); + // Mark setjmp as returns_twice so LLVM doesn't optimize across it. + if (auto *SF = dyn_cast(SetjmpFn.getCallee())) { + SF->addFnAttr(Attribute::ReturnsTwice); + } + CallInst *SetjmpCall = Builder.CreateCall(SetjmpFn, {Jbuf}, "sj.r"); + SetjmpCall->setCanReturnTwice(); + Value *IsUnwind = Builder.CreateICmpNE( + SetjmpCall, ConstantInt::get(I16Ty, 0), "sj.is_unwind"); + + // Split entry block: instructions after our br go into a new block; + // we'll branch there on the first-time setjmp return. + BasicBlock *EHContinueBB = + EntryBB->splitBasicBlock(Builder.GetInsertPoint(), "sj.first_entry"); + // splitBasicBlock created an unconditional br at the split point; + // replace it with our conditional branch to dispatch vs continue. + EntryBB->getTerminator()->eraseFromParent(); + Builder.SetInsertPoint(EntryBB); + + // Build dispatch block: switch on call_site. + BasicBlock *DispatchBB = BasicBlock::Create(Ctx, "sj.dispatch", &F); + IRBuilder<> DBuilder(DispatchBB); + Value *CallSiteGEP = + DBuilder.CreateStructGEP(FnCtxTy, FnCtx, 1, "cs.gep"); + LoadInst *CallSite = + DBuilder.CreateLoad(I32Ty, CallSiteGEP, /*isVolatile=*/true, "cs"); + // Default case: if we ever land here with an unknown call_site, just + // unreachable — the runtime should never longjmp with an out-of-range + // index. But a defensive fallback is to spin (terminate). + BasicBlock *DispatchUnreachable = + BasicBlock::Create(Ctx, "sj.dispatch.unreachable", &F); + new UnreachableInst(Ctx, DispatchUnreachable); + SwitchInst *SI = + DBuilder.CreateSwitch(CallSite, DispatchUnreachable, CSToLPad.size()); + for (auto &KV : CSToLPad) { + SI->addCase(cast(ConstantInt::get(I32Ty, KV.first)), + KV.second); + } + + // Final entry-block terminator: if (is_unwind) goto dispatch else continue. + Builder.CreateCondBr(IsUnwind, DispatchBB, EHContinueBB); + + // The landing-pad blocks each start with a `landingpad { ptr, i32 }` + // instruction. After we convert invokes to plain calls, those + // blocks are no longer reached via an unwind edge — the verifier + // requires landingpads to be reached only from invoke unwind dests. + // Remove the landingpad insts (they're no-ops now; the real + // exception ptr / selector come from explicit fn_ctx.data loads + // SjLjEHPrepare emitted right after). Replace landingpad uses with + // poison since downstream code reads via GEPs, not via the inst's + // result. + SmallVector LPads; + for (auto &KV : CSToLPad) { + if (LandingPadInst *LP = KV.second->getLandingPadInst()) + LPads.push_back(LP); + } + // De-dup (multiple call_sites can share a landing pad). + std::sort(LPads.begin(), LPads.end()); + LPads.erase(std::unique(LPads.begin(), LPads.end()), LPads.end()); + for (LandingPadInst *LP : LPads) { + LP->replaceAllUsesWith(PoisonValue::get(LP->getType())); + LP->eraseFromParent(); + } + // The function's "personality" attribute references @__gxx_personality_sj0, + // which would normally require landingpads. Drop it since we have none. + F.setPersonalityFn(nullptr); + + // Convert each invoke to a regular call + br to its normal dest. + for (InvokeInst *II : Invokes) { + SmallVector Args(II->args()); + SmallVector Bundles; + II->getOperandBundlesAsDefs(Bundles); + CallInst *CI = CallInst::Create(II->getFunctionType(), + II->getCalledOperand(), Args, Bundles, "", + II->getIterator()); + CI->setCallingConv(II->getCallingConv()); + CI->setAttributes(II->getAttributes()); + CI->setDebugLoc(II->getDebugLoc()); + if (!II->getType()->isVoidTy()) + II->replaceAllUsesWith(CI); + BasicBlock *NormalDest = II->getNormalDest(); + BasicBlock *UnwindDest = II->getUnwindDest(); + BranchInst::Create(NormalDest, II->getIterator()); + // Drop the unwind-dest PHI predecessor entries for this invoke's BB. + UnwindDest->removePredecessor(II->getParent()); + II->eraseFromParent(); + } + + // Build per-function catch table. Format (flat array of i16 pairs): + // [cs1, ti_addr1, cs1, ti_addr2, ..., cs2, ti_addr1, ..., 0, 0] + // Each (call_site, typeinfo_address) row encodes "if a throw is in + // flight while call_site is active, try to catch with this typeinfo". + // Terminated by a (0, 0) sentinel. Catch table address is stored in + // fn_ctx[4] (the lsda field) by replacing eh.sjlj.lsda's result. + // + // To make the landing pad's selector compare work without a real + // selector value: we set selector = (i32)(uintptr_t)&typeinfo at + // longjmp time, and rewrite the landing pad's + // eh.typeid.for(@TI) calls to also yield (i32)(uintptr_t)&TI. The + // icmp eq then succeeds for the matched catch. + SmallVector TableRows; + // Walk each invoke (now lowered to call+br); we kept its call_site + // → unwind_dest mapping, so re-derive from CSToLPad and the lpad's + // landingpad instruction's catch clauses. + for (auto &KV : CSToLPad) { + int CS = KV.first; + BasicBlock *LPadBB = KV.second; + LandingPadInst *LP = LPadBB->getLandingPadInst(); + if (!LP) + continue; + for (unsigned i = 0; i < LP->getNumClauses(); i++) { + if (LP->isCatch(i)) { + Constant *TIClause = cast(LP->getClause(i)); + // Skip catch-all (null TI) for now — rare, and our personality + // would need to handle it specially. + if (TIClause->isNullValue()) + continue; + TableRows.push_back(ConstantInt::get(I16Ty, CS)); + TableRows.push_back(ConstantExpr::getPtrToInt(TIClause, I16Ty)); + } + } + } + // Append (0, 0) sentinel. + TableRows.push_back(ConstantInt::get(I16Ty, 0)); + TableRows.push_back(ConstantInt::get(I16Ty, 0)); + + ArrayType *TableArrTy = ArrayType::get(I16Ty, TableRows.size()); + Constant *TableInit = ConstantArray::get(TableArrTy, TableRows); + std::string TableName = "_W65SJLJ_CATCHTAB_" + F.getName().str(); + GlobalVariable *Table = new GlobalVariable( + M, TableArrTy, /*isConstant=*/true, GlobalValue::InternalLinkage, + TableInit, TableName); + + // Replace eh.sjlj.lsda → catch-table address; rewrite eh.typeid.for + // to (i32) ptrtoint of its typeinfo arg; erase setup_dispatch / + // functioncontext / callsite intrinsic markers. + SmallVector ToErase; + for (Instruction &I : instructions(F)) { + if (auto *II = dyn_cast(&I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::eh_sjlj_lsda: { + Constant *TableAddr = ConstantExpr::getBitCast(Table, PtrTy); + II->replaceAllUsesWith(TableAddr); + ToErase.push_back(II); + break; + } + case Intrinsic::eh_typeid_for: { + // Replace with: zext (ptrtoint TI to i16) to i32. + IRBuilder<> TBuilder(II); + Value *TI = II->getArgOperand(0); + Value *AsI16 = TBuilder.CreatePtrToInt(TI, I16Ty); + Value *AsI32 = TBuilder.CreateZExt(AsI16, I32Ty); + II->replaceAllUsesWith(AsI32); + ToErase.push_back(II); + break; + } + case Intrinsic::eh_sjlj_setup_dispatch: + case Intrinsic::eh_sjlj_functioncontext: + case Intrinsic::eh_sjlj_callsite: + ToErase.push_back(II); + break; + default: + break; + } + } + } + for (Instruction *I : ToErase) + I->eraseFromParent(); + + return true; +} diff --git a/src/llvm/lib/Target/W65816/W65816TargetMachine.cpp b/src/llvm/lib/Target/W65816/W65816TargetMachine.cpp index 3981feb..be3a394 100644 --- a/src/llvm/lib/Target/W65816/W65816TargetMachine.cpp +++ b/src/llvm/lib/Target/W65816/W65816TargetMachine.cpp @@ -47,6 +47,7 @@ LLVMInitializeW65816Target() { initializeW65816SpillToXPass(PR); initializeW65816NegYIndYPass(PR); initializeW65816PreSpillCrossCallPass(PR); + initializeW65816SjLjFinalizePass(PR); // Default IndVarSimplify's exit-value rewriter to "never". The // closed-form replacement frequently widens an i16 induction var @@ -100,6 +101,7 @@ public: void addPostRegAlloc() override; void addPreEmitPass() override; void addMachineSSAOptimization() override; + void addISelPrepare() override; // W65816's only 16-bit ALU register is A. At -O1+ we use BASIC // regalloc instead of greedy: greedy fails ("ran out of registers @@ -127,6 +129,16 @@ TargetPassConfig *W65816TargetMachine::createPassConfig(PassManagerBase &PM) { return new W65816PassConfig(*this, PM); } +void W65816PassConfig::addISelPrepare() { + // SjLjEHPrepare ran in addPassesToHandleExceptions just before this; + // our finalize pass inserts an actual setjmp at function entry + + // a switch-on-call_site dispatch block, and erases the eh.sjlj.* + // intrinsics our backend doesn't natively lower. Must run BEFORE + // the base ISelPrepare passes so isel sees the cleaned IR. + addPass(createW65816SjLjFinalize()); + TargetPassConfig::addISelPrepare(); +} + void W65816PassConfig::addMachineSSAOptimization() { // MachineCSE used to be disabled here because it incorrectly // eliminated "redundant" CMP instructions: P was considered