Checkpoint

This commit is contained in:
Scott Duensing 2026-05-04 00:18:52 -05:00
parent bb3aad3911
commit dc2505c4af
5 changed files with 118 additions and 32 deletions

View file

@ -41,10 +41,7 @@ cc "$SRC/sscanf.c"
cc "$SRC/qsort.c"
cc "$SRC/extras.c"
cc "$SRC/strtok.c"
cc "$SRC/timeExt.c" -O1
# timeExt.c at -O1: -O2 generates code where strftime's directive
# switch overflows the W65816's 8-bit signed stack-relative offset
# range. -O1 keeps the per-function frame small enough.
cc "$SRC/timeExt.c"
cc "$SRC/math.c"
cc "$SRC/softFloat.c"
cc "$SRC/libcxxabi.c"

View file

@ -35,26 +35,29 @@ double difftime(time_t end, time_t start) {
// gmtime / localtime: convert seconds-since-1970 to broken-down time.
// "local" is identical to "gm" — no timezone support.
// Convert days-since-1970 to (year, days-into-year). Uses 4-year
// cycles where possible to keep the loop short and to avoid clang
// generating code that misbehaves on this target.
// gmtime: KNOWN BROKEN at all -O levels. The year-decomposition loop
// (subtract years from `days` until what's left fits in one year)
// triggers a W65816 backend codegen issue — the loop doesn't iterate
// correctly under either -O2 (frame overflow) or -O1/-O0 (wrong
// values returned). For now, gmtime fills in fields with zeros and
// just stashes the input epoch in tm_sec/tm_min as low/mid 16-bit.
// asctime/strftime/mktime work correctly on a user-supplied struct
// tm. Workaround for callers that need decomposition: build the
// struct tm manually.
// gmtime KNOWN-BROKEN: every algorithm tried (year-by-year subtract,
// year-by-year add, Howard Hinnant pure-arithmetic, table-lookup
// binary search, table-lookup linear scan) returns garbage from this
// TU even though the same source compiles correctly in user code at
// -O2. Worse, adding *any* date-decomposition code corrupts the
// sec/min/hour fields too — strongly suggests regalloc-pressure
// interaction with the larger frame from neighbouring functions in
// timeExt.c. Stub: fill seconds/minutes/hours correctly (which work
// when they are the only computation in the function body) and leave
// date fields at the 1970-01-01 sentinel. Workaround for users:
// build a struct tm by hand and pass to mktime/asctime/strftime —
// those all work correctly.
struct tm *gmtime(const time_t *t) {
long secs = *t;
__gmtimeBuf.tm_sec = (int)(secs & 0xFFFF);
__gmtimeBuf.tm_min = (int)((secs >> 16) & 0xFFFF);
__gmtimeBuf.tm_hour = 0;
int sec = (int)(secs % 60L); secs /= 60L;
int min = (int)(secs % 60L); secs /= 60L;
int hour = (int)(secs % 24L);
__gmtimeBuf.tm_sec = sec;
__gmtimeBuf.tm_min = min;
__gmtimeBuf.tm_hour = hour;
__gmtimeBuf.tm_mday = 1;
__gmtimeBuf.tm_mon = 0;
__gmtimeBuf.tm_year = 70; // 1970, sentinel "not decomposed"
__gmtimeBuf.tm_year = 70; // 1970 sentinel — year decomp is broken
__gmtimeBuf.tm_wday = 4; // Jan 1 1970 was Thursday
__gmtimeBuf.tm_yday = 0;
__gmtimeBuf.tm_isdst = -1;

View file

@ -503,6 +503,28 @@ EOF
:
fi
# 11g+. i8 store via constant-int address (MMIO style) lowers to STA8long
# (sta long, 0x8F) — bank-explicit, NOT [dp],Y or DBR-relative `sta abs`.
# Required so `*(uint8*)0xC035 = v` works under GS/OS Loader where DBR != 0.
# See feedback_const_addr_byte_store.md for the rationale.
if [ -x "$CLANG" ]; then
log "check: i8 store to const-int address lowers to sta long (bank-explicit)"
cFileC="$(mktemp --suffix=.c)"
sFileC="$(mktemp --suffix=.s)"
cat > "$cFileC" <<'EOF'
void mmio(unsigned char v) { *(volatile unsigned char *)0xC035 = v; }
EOF
"$CLANG" --target=w65816 -O2 -S "$cFileC" -o "$sFileC"
# Must contain `sta 0xc035` (assembler picks long form for 24-bit addr).
# Must NOT contain `sta [` (the old [dp],Y route).
if ! grep -qE 'sta 0xc035' "$sFileC" \
|| grep -qE 'sta \[' "$sFileC"; then
cat "$sFileC" >&2
die "i8 const-addr store: expected STA8long (sta long), got [dp],Y route"
fi
rm -f "$cFileC" "$sFileC"
fi
# 11h. i8 global access stays in 8-bit M (no over-read). bump_gb must
# get the SEP #$20 prologue and emit a single-byte lda/inc/sta sequence.
if [ -x "$CLANG" ]; then

View file

@ -45,6 +45,7 @@ public:
SkipNextSepImm = -1;
SkipNextStaAbs = false;
SkipNextPush16 = false;
SkipNextSta8Wrap = false;
}
// Reset on MBB entry too — labels emit before the MIs of a new MBB,
// and a stale flag from a previous MBB's last LDAi8imm could
@ -53,6 +54,7 @@ public:
SkipNextSepImm = -1;
SkipNextStaAbs = false;
SkipNextPush16 = false;
SkipNextSta8Wrap = false;
AsmPrinter::emitBasicBlockStart(MBB);
}
@ -71,6 +73,12 @@ public:
// by the LDAi16imm + PUSH16 peephole).
bool SkipNextPush16 = false;
// When true, the next STA8abs / STA8long should skip emitting its
// opening SEP (we already entered M=8 via the preceding LDAi8imm
// collapse) and skip its closing REP (the LDAi8imm consumer will
// restore M=16 itself). Avoids 4 B / 6 cyc per byte-store-of-imm.
bool SkipNextSta8Wrap = false;
static char ID;
};
@ -378,6 +386,18 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
SkipRep = true;
SkipNextSepImm = 0x20;
}
// STA8abs / STA8long don't expose their SEP at MIR — the wrap is
// emitted at MC layer. Detect them here so we can elide the
// closing REP and the store's opening SEP+REP wrap entirely:
// the 4 emitted bytes (REP/SEP between LDAi8imm and STA, plus
// REP after STA) all collapse, leaving SEP/LDA/STA/REP as the
// tight sequence the user wrote.
else if (It != MI->getParent()->end() &&
(It->getOpcode() == W65816::STA8abs ||
It->getOpcode() == W65816::STA8long)) {
SkipRep = true;
SkipNextSta8Wrap = true;
}
if (!SkipRep) {
MCInst Rep; Rep.setOpcode(W65816::REP);
Rep.addOperand(MCOperand::createImm(0x20));
@ -523,22 +543,44 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
EmitToStreamer(*OutStreamer, Rep);
return;
}
case W65816::STA8abs: {
// STA_Abs is 16-bit when M=0, 8-bit when M=1. Pure-i8 functions
// run with M=1 and a bare STA is correct. M=0 functions need an
// SEP/REP wrap so the STA stores only one byte — without it, the
// store clobbers the byte at addr+1 (potentially another global).
case W65816::STA8abs:
case W65816::STA8long: {
// STA_Abs / STA_Long are 16-bit when M=0, 8-bit when M=1. Pure-i8
// functions run with M=1 and a bare STA is correct. M=0 functions
// need an SEP/REP wrap so the STA stores only one byte — without
// it, the store clobbers the byte at addr+1. STA8long differs
// from STA8abs only in the underlying opcode (0x8F vs 0x8D): long
// is bank-explicit, abs is DBR-relative. Long is required for
// const-int MMIO addresses since the data bank is non-zero under
// GS/OS Loader.
bool IsLong = MI->getOpcode() == W65816::STA8long;
bool UsesAcc8 = MI->getMF()
->getInfo<W65816MachineFunctionInfo>()
->getUsesAcc8();
if (!UsesAcc8) {
// SkipOpenSep: LDAi8imm collapse already left M=8, so skip our
// opening SEP — but we still own the closing REP since LDAi8imm
// dropped its.
bool SkipOpenSep = SkipNextSta8Wrap;
SkipNextSta8Wrap = false;
if (!UsesAcc8 && !SkipOpenSep) {
MCInst Sep; Sep.setOpcode(W65816::SEP);
Sep.addOperand(MCOperand::createImm(0x20));
EmitToStreamer(*OutStreamer, Sep);
}
MCInst Sta;
Sta.setOpcode(W65816::STA_Abs);
Sta.addOperand(lowerOperand(MI->getOperand(1), MCInstLowering));
Sta.setOpcode(IsLong ? W65816::STA_Long : W65816::STA_Abs);
MCOperand Addr = lowerOperand(MI->getOperand(1), MCInstLowering);
// STA_Long takes a 24-bit absolute address. When the input is a
// const-int cast through a 16-bit pointer, TableGen sign-extends
// the 16-bit value into the i32 imm operand: 0xC035 (i16) becomes
// 0xFFFFC035 (i64). Mask to 16 bits to recover the original
// pointer; the resulting encoding has bank=0 explicit. Users who
// need a banked address should construct a far pointer rather than
// casting an int.
if (IsLong && Addr.isImm()) {
Addr = MCOperand::createImm(Addr.getImm() & 0xFFFFu);
}
Sta.addOperand(Addr);
EmitToStreamer(*OutStreamer, Sta);
if (!UsesAcc8) {
MCInst Rep; Rep.setOpcode(W65816::REP);

View file

@ -247,6 +247,13 @@ def LDA8abs : W65816Pseudo<(outs Acc8:$dst), (ins i32imm:$addr),
let mayStore = 1, hasSideEffects = 0, mayLoad = 0 in {
def STA8abs : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr),
"# STA8abs $src, $addr", []>;
// STA8long: 8-bit absolute-long store. Same pattern as STA8abs but
// the AsmPrinter emits STA_Long (0x8F) a true 24-bit bank-explicit
// store instead of STA_Abs (0x8D, DBR-relative). Used for MMIO via
// a constant integer address; the i32imm carries the full 24-bit
// physical address. See the (store Acc8, (iPTR imm)) pattern.
def STA8long : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr),
"# STA8long $src, $addr", []>;
}
def : Pat<(i8 (load (W65816Wrapper tglobaladdr:$g))),
(LDA8abs tglobaladdr:$g)>;
@ -256,6 +263,16 @@ def : Pat<(store Acc8:$src, (W65816Wrapper tglobaladdr:$g)),
(STA8abs Acc8:$src, tglobaladdr:$g)>;
def : Pat<(store Acc8:$src, (W65816Wrapper texternalsym:$s)),
(STA8abs Acc8:$src, texternalsym:$s)>;
// Byte store via a constant-int address (MMIO-style: `*(volatile uint8 *)0x70
// = v`). Without this, the i8 store falls through to STBptr ([dp],Y), which
// is 16 B / 30 cyc. We route through STA8long (sta abs-long, opcode 0x8F)
// rather than STA8abs because a const-int address is a physical 24-bit
// pointer and must NOT track DBR under the GS/OS Loader the data bank is
// non-zero, so DBR-relative `sta abs` would land in the wrong bank.
def : Pat<(store Acc8:$src, (iPTR imm:$addr)),
(STA8long Acc8:$src, (i32 imm:$addr))>;
def : Pat<(truncstorei8 Acc16:$src, (iPTR imm:$addr)),
(STA8long (COPY_TO_REGCLASS Acc16:$src, Acc8), (i32 imm:$addr))>;
// Load 16 bits via a 16-bit absolute address. Currently only matches
// loads from a Wrapper(global); direct constant-pointer loads come once
@ -278,10 +295,15 @@ def : Pat<(store Acc16:$src, (W65816Wrapper tglobaladdr:$g)),
(STAabs Acc16:$src, tglobaladdr:$g)>;
def : Pat<(store Acc16:$src, (W65816Wrapper texternalsym:$s)),
(STAabs Acc16:$src, texternalsym:$s)>;
// Store via a constant-int address (MMIO-style fixed pointer like
// `*(volatile uint16 *)0x5000 = v`). Lower to STAabs (DBR-relative,
// opcode 0x8D) keeps the access shorter than going through STAptr
// (which would also be DBR-relative via (sr,s),Y, but 4-5 bytes longer).
// Store via a constant-int address (`*(volatile uint16 *)0x5000 = v`).
// Lowers to STAabs (0x8D, DBR-relative) DELIBERATELY asymmetric with the
// i8 case (STA8long, bank-explicit). Rationale: most 65816 MMIO is i8
// (e.g. `*(uint8*)0xC035`) where users expect bank=0 always. Const-int
// i16 is mostly used as a DBR-relative idiom in test code that switches
// DBR and verifies a write lands in the new bank. Switching i16 to
// bank-explicit broke 10+ existing tests with no real-world i16 MMIO
// use case to justify it. Users who need bank-explicit i16 should
// declare a global or split into two i8 stores.
def : Pat<(store Acc16:$src, (iPTR imm:$addr)),
(STAabs Acc16:$src, (i32 imm:$addr))>;