From dc2505c4af3f67d63b9ccf5a3ec05c05a54ec0fc Mon Sep 17 00:00:00 2001 From: Scott Duensing Date: Mon, 4 May 2026 00:18:52 -0500 Subject: [PATCH] Checkpoint --- runtime/build.sh | 5 +- runtime/src/timeExt.c | 35 ++++++----- scripts/smokeTest.sh | 22 +++++++ .../lib/Target/W65816/W65816AsmPrinter.cpp | 58 ++++++++++++++++--- src/llvm/lib/Target/W65816/W65816InstrInfo.td | 30 ++++++++-- 5 files changed, 118 insertions(+), 32 deletions(-) diff --git a/runtime/build.sh b/runtime/build.sh index a258f2e..215e6ad 100755 --- a/runtime/build.sh +++ b/runtime/build.sh @@ -41,10 +41,7 @@ cc "$SRC/sscanf.c" cc "$SRC/qsort.c" cc "$SRC/extras.c" cc "$SRC/strtok.c" -cc "$SRC/timeExt.c" -O1 -# timeExt.c at -O1: -O2 generates code where strftime's directive -# switch overflows the W65816's 8-bit signed stack-relative offset -# range. -O1 keeps the per-function frame small enough. +cc "$SRC/timeExt.c" cc "$SRC/math.c" cc "$SRC/softFloat.c" cc "$SRC/libcxxabi.c" diff --git a/runtime/src/timeExt.c b/runtime/src/timeExt.c index 12b19df..7b19952 100644 --- a/runtime/src/timeExt.c +++ b/runtime/src/timeExt.c @@ -35,26 +35,29 @@ double difftime(time_t end, time_t start) { // gmtime / localtime: convert seconds-since-1970 to broken-down time. // "local" is identical to "gm" — no timezone support. -// Convert days-since-1970 to (year, days-into-year). Uses 4-year -// cycles where possible to keep the loop short and to avoid clang -// generating code that misbehaves on this target. -// gmtime: KNOWN BROKEN at all -O levels. The year-decomposition loop -// (subtract years from `days` until what's left fits in one year) -// triggers a W65816 backend codegen issue — the loop doesn't iterate -// correctly under either -O2 (frame overflow) or -O1/-O0 (wrong -// values returned). For now, gmtime fills in fields with zeros and -// just stashes the input epoch in tm_sec/tm_min as low/mid 16-bit. -// asctime/strftime/mktime work correctly on a user-supplied struct -// tm. Workaround for callers that need decomposition: build the -// struct tm manually. +// gmtime KNOWN-BROKEN: every algorithm tried (year-by-year subtract, +// year-by-year add, Howard Hinnant pure-arithmetic, table-lookup +// binary search, table-lookup linear scan) returns garbage from this +// TU even though the same source compiles correctly in user code at +// -O2. Worse, adding *any* date-decomposition code corrupts the +// sec/min/hour fields too — strongly suggests regalloc-pressure +// interaction with the larger frame from neighbouring functions in +// timeExt.c. Stub: fill seconds/minutes/hours correctly (which work +// when they are the only computation in the function body) and leave +// date fields at the 1970-01-01 sentinel. Workaround for users: +// build a struct tm by hand and pass to mktime/asctime/strftime — +// those all work correctly. struct tm *gmtime(const time_t *t) { long secs = *t; - __gmtimeBuf.tm_sec = (int)(secs & 0xFFFF); - __gmtimeBuf.tm_min = (int)((secs >> 16) & 0xFFFF); - __gmtimeBuf.tm_hour = 0; + int sec = (int)(secs % 60L); secs /= 60L; + int min = (int)(secs % 60L); secs /= 60L; + int hour = (int)(secs % 24L); + __gmtimeBuf.tm_sec = sec; + __gmtimeBuf.tm_min = min; + __gmtimeBuf.tm_hour = hour; __gmtimeBuf.tm_mday = 1; __gmtimeBuf.tm_mon = 0; - __gmtimeBuf.tm_year = 70; // 1970, sentinel "not decomposed" + __gmtimeBuf.tm_year = 70; // 1970 sentinel — year decomp is broken __gmtimeBuf.tm_wday = 4; // Jan 1 1970 was Thursday __gmtimeBuf.tm_yday = 0; __gmtimeBuf.tm_isdst = -1; diff --git a/scripts/smokeTest.sh b/scripts/smokeTest.sh index cc7f7cd..bf6a3e4 100755 --- a/scripts/smokeTest.sh +++ b/scripts/smokeTest.sh @@ -503,6 +503,28 @@ EOF : fi +# 11g+. i8 store via constant-int address (MMIO style) lowers to STA8long +# (sta long, 0x8F) — bank-explicit, NOT [dp],Y or DBR-relative `sta abs`. +# Required so `*(uint8*)0xC035 = v` works under GS/OS Loader where DBR != 0. +# See feedback_const_addr_byte_store.md for the rationale. +if [ -x "$CLANG" ]; then + log "check: i8 store to const-int address lowers to sta long (bank-explicit)" + cFileC="$(mktemp --suffix=.c)" + sFileC="$(mktemp --suffix=.s)" + cat > "$cFileC" <<'EOF' +void mmio(unsigned char v) { *(volatile unsigned char *)0xC035 = v; } +EOF + "$CLANG" --target=w65816 -O2 -S "$cFileC" -o "$sFileC" + # Must contain `sta 0xc035` (assembler picks long form for 24-bit addr). + # Must NOT contain `sta [` (the old [dp],Y route). + if ! grep -qE 'sta 0xc035' "$sFileC" \ + || grep -qE 'sta \[' "$sFileC"; then + cat "$sFileC" >&2 + die "i8 const-addr store: expected STA8long (sta long), got [dp],Y route" + fi + rm -f "$cFileC" "$sFileC" +fi + # 11h. i8 global access stays in 8-bit M (no over-read). bump_gb must # get the SEP #$20 prologue and emit a single-byte lda/inc/sta sequence. if [ -x "$CLANG" ]; then diff --git a/src/llvm/lib/Target/W65816/W65816AsmPrinter.cpp b/src/llvm/lib/Target/W65816/W65816AsmPrinter.cpp index d8463e6..ac07ba2 100644 --- a/src/llvm/lib/Target/W65816/W65816AsmPrinter.cpp +++ b/src/llvm/lib/Target/W65816/W65816AsmPrinter.cpp @@ -45,6 +45,7 @@ public: SkipNextSepImm = -1; SkipNextStaAbs = false; SkipNextPush16 = false; + SkipNextSta8Wrap = false; } // Reset on MBB entry too — labels emit before the MIs of a new MBB, // and a stale flag from a previous MBB's last LDAi8imm could @@ -53,6 +54,7 @@ public: SkipNextSepImm = -1; SkipNextStaAbs = false; SkipNextPush16 = false; + SkipNextSta8Wrap = false; AsmPrinter::emitBasicBlockStart(MBB); } @@ -71,6 +73,12 @@ public: // by the LDAi16imm + PUSH16 peephole). bool SkipNextPush16 = false; + // When true, the next STA8abs / STA8long should skip emitting its + // opening SEP (we already entered M=8 via the preceding LDAi8imm + // collapse) and skip its closing REP (the LDAi8imm consumer will + // restore M=16 itself). Avoids 4 B / 6 cyc per byte-store-of-imm. + bool SkipNextSta8Wrap = false; + static char ID; }; @@ -378,6 +386,18 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { SkipRep = true; SkipNextSepImm = 0x20; } + // STA8abs / STA8long don't expose their SEP at MIR — the wrap is + // emitted at MC layer. Detect them here so we can elide the + // closing REP and the store's opening SEP+REP wrap entirely: + // the 4 emitted bytes (REP/SEP between LDAi8imm and STA, plus + // REP after STA) all collapse, leaving SEP/LDA/STA/REP as the + // tight sequence the user wrote. + else if (It != MI->getParent()->end() && + (It->getOpcode() == W65816::STA8abs || + It->getOpcode() == W65816::STA8long)) { + SkipRep = true; + SkipNextSta8Wrap = true; + } if (!SkipRep) { MCInst Rep; Rep.setOpcode(W65816::REP); Rep.addOperand(MCOperand::createImm(0x20)); @@ -523,22 +543,44 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, Rep); return; } - case W65816::STA8abs: { - // STA_Abs is 16-bit when M=0, 8-bit when M=1. Pure-i8 functions - // run with M=1 and a bare STA is correct. M=0 functions need an - // SEP/REP wrap so the STA stores only one byte — without it, the - // store clobbers the byte at addr+1 (potentially another global). + case W65816::STA8abs: + case W65816::STA8long: { + // STA_Abs / STA_Long are 16-bit when M=0, 8-bit when M=1. Pure-i8 + // functions run with M=1 and a bare STA is correct. M=0 functions + // need an SEP/REP wrap so the STA stores only one byte — without + // it, the store clobbers the byte at addr+1. STA8long differs + // from STA8abs only in the underlying opcode (0x8F vs 0x8D): long + // is bank-explicit, abs is DBR-relative. Long is required for + // const-int MMIO addresses since the data bank is non-zero under + // GS/OS Loader. + bool IsLong = MI->getOpcode() == W65816::STA8long; bool UsesAcc8 = MI->getMF() ->getInfo() ->getUsesAcc8(); - if (!UsesAcc8) { + // SkipOpenSep: LDAi8imm collapse already left M=8, so skip our + // opening SEP — but we still own the closing REP since LDAi8imm + // dropped its. + bool SkipOpenSep = SkipNextSta8Wrap; + SkipNextSta8Wrap = false; + if (!UsesAcc8 && !SkipOpenSep) { MCInst Sep; Sep.setOpcode(W65816::SEP); Sep.addOperand(MCOperand::createImm(0x20)); EmitToStreamer(*OutStreamer, Sep); } MCInst Sta; - Sta.setOpcode(W65816::STA_Abs); - Sta.addOperand(lowerOperand(MI->getOperand(1), MCInstLowering)); + Sta.setOpcode(IsLong ? W65816::STA_Long : W65816::STA_Abs); + MCOperand Addr = lowerOperand(MI->getOperand(1), MCInstLowering); + // STA_Long takes a 24-bit absolute address. When the input is a + // const-int cast through a 16-bit pointer, TableGen sign-extends + // the 16-bit value into the i32 imm operand: 0xC035 (i16) becomes + // 0xFFFFC035 (i64). Mask to 16 bits to recover the original + // pointer; the resulting encoding has bank=0 explicit. Users who + // need a banked address should construct a far pointer rather than + // casting an int. + if (IsLong && Addr.isImm()) { + Addr = MCOperand::createImm(Addr.getImm() & 0xFFFFu); + } + Sta.addOperand(Addr); EmitToStreamer(*OutStreamer, Sta); if (!UsesAcc8) { MCInst Rep; Rep.setOpcode(W65816::REP); diff --git a/src/llvm/lib/Target/W65816/W65816InstrInfo.td b/src/llvm/lib/Target/W65816/W65816InstrInfo.td index 5545c3a..e067f21 100644 --- a/src/llvm/lib/Target/W65816/W65816InstrInfo.td +++ b/src/llvm/lib/Target/W65816/W65816InstrInfo.td @@ -247,6 +247,13 @@ def LDA8abs : W65816Pseudo<(outs Acc8:$dst), (ins i32imm:$addr), let mayStore = 1, hasSideEffects = 0, mayLoad = 0 in { def STA8abs : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr), "# STA8abs $src, $addr", []>; +// STA8long: 8-bit absolute-long store. Same pattern as STA8abs but +// the AsmPrinter emits STA_Long (0x8F) — a true 24-bit bank-explicit +// store — instead of STA_Abs (0x8D, DBR-relative). Used for MMIO via +// a constant integer address; the i32imm carries the full 24-bit +// physical address. See the (store Acc8, (iPTR imm)) pattern. +def STA8long : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr), + "# STA8long $src, $addr", []>; } def : Pat<(i8 (load (W65816Wrapper tglobaladdr:$g))), (LDA8abs tglobaladdr:$g)>; @@ -256,6 +263,16 @@ def : Pat<(store Acc8:$src, (W65816Wrapper tglobaladdr:$g)), (STA8abs Acc8:$src, tglobaladdr:$g)>; def : Pat<(store Acc8:$src, (W65816Wrapper texternalsym:$s)), (STA8abs Acc8:$src, texternalsym:$s)>; +// Byte store via a constant-int address (MMIO-style: `*(volatile uint8 *)0x70 +// = v`). Without this, the i8 store falls through to STBptr ([dp],Y), which +// is 16 B / 30 cyc. We route through STA8long (sta abs-long, opcode 0x8F) +// rather than STA8abs because a const-int address is a physical 24-bit +// pointer and must NOT track DBR — under the GS/OS Loader the data bank is +// non-zero, so DBR-relative `sta abs` would land in the wrong bank. +def : Pat<(store Acc8:$src, (iPTR imm:$addr)), + (STA8long Acc8:$src, (i32 imm:$addr))>; +def : Pat<(truncstorei8 Acc16:$src, (iPTR imm:$addr)), + (STA8long (COPY_TO_REGCLASS Acc16:$src, Acc8), (i32 imm:$addr))>; // Load 16 bits via a 16-bit absolute address. Currently only matches // loads from a Wrapper(global); direct constant-pointer loads come once @@ -278,10 +295,15 @@ def : Pat<(store Acc16:$src, (W65816Wrapper tglobaladdr:$g)), (STAabs Acc16:$src, tglobaladdr:$g)>; def : Pat<(store Acc16:$src, (W65816Wrapper texternalsym:$s)), (STAabs Acc16:$src, texternalsym:$s)>; -// Store via a constant-int address (MMIO-style fixed pointer like -// `*(volatile uint16 *)0x5000 = v`). Lower to STAabs (DBR-relative, -// opcode 0x8D) — keeps the access shorter than going through STAptr -// (which would also be DBR-relative via (sr,s),Y, but 4-5 bytes longer). +// Store via a constant-int address (`*(volatile uint16 *)0x5000 = v`). +// Lowers to STAabs (0x8D, DBR-relative) — DELIBERATELY asymmetric with the +// i8 case (STA8long, bank-explicit). Rationale: most 65816 MMIO is i8 +// (e.g. `*(uint8*)0xC035`) where users expect bank=0 always. Const-int +// i16 is mostly used as a DBR-relative idiom in test code that switches +// DBR and verifies a write lands in the new bank. Switching i16 to +// bank-explicit broke 10+ existing tests with no real-world i16 MMIO +// use case to justify it. Users who need bank-explicit i16 should +// declare a global or split into two i8 stores. def : Pat<(store Acc16:$src, (iPTR imm:$addr)), (STAabs Acc16:$src, (i32 imm:$addr))>;