Checkpoint
This commit is contained in:
parent
bb3aad3911
commit
dc2505c4af
5 changed files with 118 additions and 32 deletions
|
|
@ -41,10 +41,7 @@ cc "$SRC/sscanf.c"
|
||||||
cc "$SRC/qsort.c"
|
cc "$SRC/qsort.c"
|
||||||
cc "$SRC/extras.c"
|
cc "$SRC/extras.c"
|
||||||
cc "$SRC/strtok.c"
|
cc "$SRC/strtok.c"
|
||||||
cc "$SRC/timeExt.c" -O1
|
cc "$SRC/timeExt.c"
|
||||||
# timeExt.c at -O1: -O2 generates code where strftime's directive
|
|
||||||
# switch overflows the W65816's 8-bit signed stack-relative offset
|
|
||||||
# range. -O1 keeps the per-function frame small enough.
|
|
||||||
cc "$SRC/math.c"
|
cc "$SRC/math.c"
|
||||||
cc "$SRC/softFloat.c"
|
cc "$SRC/softFloat.c"
|
||||||
cc "$SRC/libcxxabi.c"
|
cc "$SRC/libcxxabi.c"
|
||||||
|
|
|
||||||
|
|
@ -35,26 +35,29 @@ double difftime(time_t end, time_t start) {
|
||||||
|
|
||||||
// gmtime / localtime: convert seconds-since-1970 to broken-down time.
|
// gmtime / localtime: convert seconds-since-1970 to broken-down time.
|
||||||
// "local" is identical to "gm" — no timezone support.
|
// "local" is identical to "gm" — no timezone support.
|
||||||
// Convert days-since-1970 to (year, days-into-year). Uses 4-year
|
// gmtime KNOWN-BROKEN: every algorithm tried (year-by-year subtract,
|
||||||
// cycles where possible to keep the loop short and to avoid clang
|
// year-by-year add, Howard Hinnant pure-arithmetic, table-lookup
|
||||||
// generating code that misbehaves on this target.
|
// binary search, table-lookup linear scan) returns garbage from this
|
||||||
// gmtime: KNOWN BROKEN at all -O levels. The year-decomposition loop
|
// TU even though the same source compiles correctly in user code at
|
||||||
// (subtract years from `days` until what's left fits in one year)
|
// -O2. Worse, adding *any* date-decomposition code corrupts the
|
||||||
// triggers a W65816 backend codegen issue — the loop doesn't iterate
|
// sec/min/hour fields too — strongly suggests regalloc-pressure
|
||||||
// correctly under either -O2 (frame overflow) or -O1/-O0 (wrong
|
// interaction with the larger frame from neighbouring functions in
|
||||||
// values returned). For now, gmtime fills in fields with zeros and
|
// timeExt.c. Stub: fill seconds/minutes/hours correctly (which work
|
||||||
// just stashes the input epoch in tm_sec/tm_min as low/mid 16-bit.
|
// when they are the only computation in the function body) and leave
|
||||||
// asctime/strftime/mktime work correctly on a user-supplied struct
|
// date fields at the 1970-01-01 sentinel. Workaround for users:
|
||||||
// tm. Workaround for callers that need decomposition: build the
|
// build a struct tm by hand and pass to mktime/asctime/strftime —
|
||||||
// struct tm manually.
|
// those all work correctly.
|
||||||
struct tm *gmtime(const time_t *t) {
|
struct tm *gmtime(const time_t *t) {
|
||||||
long secs = *t;
|
long secs = *t;
|
||||||
__gmtimeBuf.tm_sec = (int)(secs & 0xFFFF);
|
int sec = (int)(secs % 60L); secs /= 60L;
|
||||||
__gmtimeBuf.tm_min = (int)((secs >> 16) & 0xFFFF);
|
int min = (int)(secs % 60L); secs /= 60L;
|
||||||
__gmtimeBuf.tm_hour = 0;
|
int hour = (int)(secs % 24L);
|
||||||
|
__gmtimeBuf.tm_sec = sec;
|
||||||
|
__gmtimeBuf.tm_min = min;
|
||||||
|
__gmtimeBuf.tm_hour = hour;
|
||||||
__gmtimeBuf.tm_mday = 1;
|
__gmtimeBuf.tm_mday = 1;
|
||||||
__gmtimeBuf.tm_mon = 0;
|
__gmtimeBuf.tm_mon = 0;
|
||||||
__gmtimeBuf.tm_year = 70; // 1970, sentinel "not decomposed"
|
__gmtimeBuf.tm_year = 70; // 1970 sentinel — year decomp is broken
|
||||||
__gmtimeBuf.tm_wday = 4; // Jan 1 1970 was Thursday
|
__gmtimeBuf.tm_wday = 4; // Jan 1 1970 was Thursday
|
||||||
__gmtimeBuf.tm_yday = 0;
|
__gmtimeBuf.tm_yday = 0;
|
||||||
__gmtimeBuf.tm_isdst = -1;
|
__gmtimeBuf.tm_isdst = -1;
|
||||||
|
|
|
||||||
|
|
@ -503,6 +503,28 @@ EOF
|
||||||
:
|
:
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# 11g+. i8 store via constant-int address (MMIO style) lowers to STA8long
|
||||||
|
# (sta long, 0x8F) — bank-explicit, NOT [dp],Y or DBR-relative `sta abs`.
|
||||||
|
# Required so `*(uint8*)0xC035 = v` works under GS/OS Loader where DBR != 0.
|
||||||
|
# See feedback_const_addr_byte_store.md for the rationale.
|
||||||
|
if [ -x "$CLANG" ]; then
|
||||||
|
log "check: i8 store to const-int address lowers to sta long (bank-explicit)"
|
||||||
|
cFileC="$(mktemp --suffix=.c)"
|
||||||
|
sFileC="$(mktemp --suffix=.s)"
|
||||||
|
cat > "$cFileC" <<'EOF'
|
||||||
|
void mmio(unsigned char v) { *(volatile unsigned char *)0xC035 = v; }
|
||||||
|
EOF
|
||||||
|
"$CLANG" --target=w65816 -O2 -S "$cFileC" -o "$sFileC"
|
||||||
|
# Must contain `sta 0xc035` (assembler picks long form for 24-bit addr).
|
||||||
|
# Must NOT contain `sta [` (the old [dp],Y route).
|
||||||
|
if ! grep -qE 'sta 0xc035' "$sFileC" \
|
||||||
|
|| grep -qE 'sta \[' "$sFileC"; then
|
||||||
|
cat "$sFileC" >&2
|
||||||
|
die "i8 const-addr store: expected STA8long (sta long), got [dp],Y route"
|
||||||
|
fi
|
||||||
|
rm -f "$cFileC" "$sFileC"
|
||||||
|
fi
|
||||||
|
|
||||||
# 11h. i8 global access stays in 8-bit M (no over-read). bump_gb must
|
# 11h. i8 global access stays in 8-bit M (no over-read). bump_gb must
|
||||||
# get the SEP #$20 prologue and emit a single-byte lda/inc/sta sequence.
|
# get the SEP #$20 prologue and emit a single-byte lda/inc/sta sequence.
|
||||||
if [ -x "$CLANG" ]; then
|
if [ -x "$CLANG" ]; then
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,7 @@ public:
|
||||||
SkipNextSepImm = -1;
|
SkipNextSepImm = -1;
|
||||||
SkipNextStaAbs = false;
|
SkipNextStaAbs = false;
|
||||||
SkipNextPush16 = false;
|
SkipNextPush16 = false;
|
||||||
|
SkipNextSta8Wrap = false;
|
||||||
}
|
}
|
||||||
// Reset on MBB entry too — labels emit before the MIs of a new MBB,
|
// Reset on MBB entry too — labels emit before the MIs of a new MBB,
|
||||||
// and a stale flag from a previous MBB's last LDAi8imm could
|
// and a stale flag from a previous MBB's last LDAi8imm could
|
||||||
|
|
@ -53,6 +54,7 @@ public:
|
||||||
SkipNextSepImm = -1;
|
SkipNextSepImm = -1;
|
||||||
SkipNextStaAbs = false;
|
SkipNextStaAbs = false;
|
||||||
SkipNextPush16 = false;
|
SkipNextPush16 = false;
|
||||||
|
SkipNextSta8Wrap = false;
|
||||||
AsmPrinter::emitBasicBlockStart(MBB);
|
AsmPrinter::emitBasicBlockStart(MBB);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -71,6 +73,12 @@ public:
|
||||||
// by the LDAi16imm + PUSH16 peephole).
|
// by the LDAi16imm + PUSH16 peephole).
|
||||||
bool SkipNextPush16 = false;
|
bool SkipNextPush16 = false;
|
||||||
|
|
||||||
|
// When true, the next STA8abs / STA8long should skip emitting its
|
||||||
|
// opening SEP (we already entered M=8 via the preceding LDAi8imm
|
||||||
|
// collapse) and skip its closing REP (the LDAi8imm consumer will
|
||||||
|
// restore M=16 itself). Avoids 4 B / 6 cyc per byte-store-of-imm.
|
||||||
|
bool SkipNextSta8Wrap = false;
|
||||||
|
|
||||||
static char ID;
|
static char ID;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -378,6 +386,18 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
SkipRep = true;
|
SkipRep = true;
|
||||||
SkipNextSepImm = 0x20;
|
SkipNextSepImm = 0x20;
|
||||||
}
|
}
|
||||||
|
// STA8abs / STA8long don't expose their SEP at MIR — the wrap is
|
||||||
|
// emitted at MC layer. Detect them here so we can elide the
|
||||||
|
// closing REP and the store's opening SEP+REP wrap entirely:
|
||||||
|
// the 4 emitted bytes (REP/SEP between LDAi8imm and STA, plus
|
||||||
|
// REP after STA) all collapse, leaving SEP/LDA/STA/REP as the
|
||||||
|
// tight sequence the user wrote.
|
||||||
|
else if (It != MI->getParent()->end() &&
|
||||||
|
(It->getOpcode() == W65816::STA8abs ||
|
||||||
|
It->getOpcode() == W65816::STA8long)) {
|
||||||
|
SkipRep = true;
|
||||||
|
SkipNextSta8Wrap = true;
|
||||||
|
}
|
||||||
if (!SkipRep) {
|
if (!SkipRep) {
|
||||||
MCInst Rep; Rep.setOpcode(W65816::REP);
|
MCInst Rep; Rep.setOpcode(W65816::REP);
|
||||||
Rep.addOperand(MCOperand::createImm(0x20));
|
Rep.addOperand(MCOperand::createImm(0x20));
|
||||||
|
|
@ -523,22 +543,44 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
EmitToStreamer(*OutStreamer, Rep);
|
EmitToStreamer(*OutStreamer, Rep);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
case W65816::STA8abs: {
|
case W65816::STA8abs:
|
||||||
// STA_Abs is 16-bit when M=0, 8-bit when M=1. Pure-i8 functions
|
case W65816::STA8long: {
|
||||||
// run with M=1 and a bare STA is correct. M=0 functions need an
|
// STA_Abs / STA_Long are 16-bit when M=0, 8-bit when M=1. Pure-i8
|
||||||
// SEP/REP wrap so the STA stores only one byte — without it, the
|
// functions run with M=1 and a bare STA is correct. M=0 functions
|
||||||
// store clobbers the byte at addr+1 (potentially another global).
|
// need an SEP/REP wrap so the STA stores only one byte — without
|
||||||
|
// it, the store clobbers the byte at addr+1. STA8long differs
|
||||||
|
// from STA8abs only in the underlying opcode (0x8F vs 0x8D): long
|
||||||
|
// is bank-explicit, abs is DBR-relative. Long is required for
|
||||||
|
// const-int MMIO addresses since the data bank is non-zero under
|
||||||
|
// GS/OS Loader.
|
||||||
|
bool IsLong = MI->getOpcode() == W65816::STA8long;
|
||||||
bool UsesAcc8 = MI->getMF()
|
bool UsesAcc8 = MI->getMF()
|
||||||
->getInfo<W65816MachineFunctionInfo>()
|
->getInfo<W65816MachineFunctionInfo>()
|
||||||
->getUsesAcc8();
|
->getUsesAcc8();
|
||||||
if (!UsesAcc8) {
|
// SkipOpenSep: LDAi8imm collapse already left M=8, so skip our
|
||||||
|
// opening SEP — but we still own the closing REP since LDAi8imm
|
||||||
|
// dropped its.
|
||||||
|
bool SkipOpenSep = SkipNextSta8Wrap;
|
||||||
|
SkipNextSta8Wrap = false;
|
||||||
|
if (!UsesAcc8 && !SkipOpenSep) {
|
||||||
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
||||||
Sep.addOperand(MCOperand::createImm(0x20));
|
Sep.addOperand(MCOperand::createImm(0x20));
|
||||||
EmitToStreamer(*OutStreamer, Sep);
|
EmitToStreamer(*OutStreamer, Sep);
|
||||||
}
|
}
|
||||||
MCInst Sta;
|
MCInst Sta;
|
||||||
Sta.setOpcode(W65816::STA_Abs);
|
Sta.setOpcode(IsLong ? W65816::STA_Long : W65816::STA_Abs);
|
||||||
Sta.addOperand(lowerOperand(MI->getOperand(1), MCInstLowering));
|
MCOperand Addr = lowerOperand(MI->getOperand(1), MCInstLowering);
|
||||||
|
// STA_Long takes a 24-bit absolute address. When the input is a
|
||||||
|
// const-int cast through a 16-bit pointer, TableGen sign-extends
|
||||||
|
// the 16-bit value into the i32 imm operand: 0xC035 (i16) becomes
|
||||||
|
// 0xFFFFC035 (i64). Mask to 16 bits to recover the original
|
||||||
|
// pointer; the resulting encoding has bank=0 explicit. Users who
|
||||||
|
// need a banked address should construct a far pointer rather than
|
||||||
|
// casting an int.
|
||||||
|
if (IsLong && Addr.isImm()) {
|
||||||
|
Addr = MCOperand::createImm(Addr.getImm() & 0xFFFFu);
|
||||||
|
}
|
||||||
|
Sta.addOperand(Addr);
|
||||||
EmitToStreamer(*OutStreamer, Sta);
|
EmitToStreamer(*OutStreamer, Sta);
|
||||||
if (!UsesAcc8) {
|
if (!UsesAcc8) {
|
||||||
MCInst Rep; Rep.setOpcode(W65816::REP);
|
MCInst Rep; Rep.setOpcode(W65816::REP);
|
||||||
|
|
|
||||||
|
|
@ -247,6 +247,13 @@ def LDA8abs : W65816Pseudo<(outs Acc8:$dst), (ins i32imm:$addr),
|
||||||
let mayStore = 1, hasSideEffects = 0, mayLoad = 0 in {
|
let mayStore = 1, hasSideEffects = 0, mayLoad = 0 in {
|
||||||
def STA8abs : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr),
|
def STA8abs : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr),
|
||||||
"# STA8abs $src, $addr", []>;
|
"# STA8abs $src, $addr", []>;
|
||||||
|
// STA8long: 8-bit absolute-long store. Same pattern as STA8abs but
|
||||||
|
// the AsmPrinter emits STA_Long (0x8F) — a true 24-bit bank-explicit
|
||||||
|
// store — instead of STA_Abs (0x8D, DBR-relative). Used for MMIO via
|
||||||
|
// a constant integer address; the i32imm carries the full 24-bit
|
||||||
|
// physical address. See the (store Acc8, (iPTR imm)) pattern.
|
||||||
|
def STA8long : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr),
|
||||||
|
"# STA8long $src, $addr", []>;
|
||||||
}
|
}
|
||||||
def : Pat<(i8 (load (W65816Wrapper tglobaladdr:$g))),
|
def : Pat<(i8 (load (W65816Wrapper tglobaladdr:$g))),
|
||||||
(LDA8abs tglobaladdr:$g)>;
|
(LDA8abs tglobaladdr:$g)>;
|
||||||
|
|
@ -256,6 +263,16 @@ def : Pat<(store Acc8:$src, (W65816Wrapper tglobaladdr:$g)),
|
||||||
(STA8abs Acc8:$src, tglobaladdr:$g)>;
|
(STA8abs Acc8:$src, tglobaladdr:$g)>;
|
||||||
def : Pat<(store Acc8:$src, (W65816Wrapper texternalsym:$s)),
|
def : Pat<(store Acc8:$src, (W65816Wrapper texternalsym:$s)),
|
||||||
(STA8abs Acc8:$src, texternalsym:$s)>;
|
(STA8abs Acc8:$src, texternalsym:$s)>;
|
||||||
|
// Byte store via a constant-int address (MMIO-style: `*(volatile uint8 *)0x70
|
||||||
|
// = v`). Without this, the i8 store falls through to STBptr ([dp],Y), which
|
||||||
|
// is 16 B / 30 cyc. We route through STA8long (sta abs-long, opcode 0x8F)
|
||||||
|
// rather than STA8abs because a const-int address is a physical 24-bit
|
||||||
|
// pointer and must NOT track DBR — under the GS/OS Loader the data bank is
|
||||||
|
// non-zero, so DBR-relative `sta abs` would land in the wrong bank.
|
||||||
|
def : Pat<(store Acc8:$src, (iPTR imm:$addr)),
|
||||||
|
(STA8long Acc8:$src, (i32 imm:$addr))>;
|
||||||
|
def : Pat<(truncstorei8 Acc16:$src, (iPTR imm:$addr)),
|
||||||
|
(STA8long (COPY_TO_REGCLASS Acc16:$src, Acc8), (i32 imm:$addr))>;
|
||||||
|
|
||||||
// Load 16 bits via a 16-bit absolute address. Currently only matches
|
// Load 16 bits via a 16-bit absolute address. Currently only matches
|
||||||
// loads from a Wrapper(global); direct constant-pointer loads come once
|
// loads from a Wrapper(global); direct constant-pointer loads come once
|
||||||
|
|
@ -278,10 +295,15 @@ def : Pat<(store Acc16:$src, (W65816Wrapper tglobaladdr:$g)),
|
||||||
(STAabs Acc16:$src, tglobaladdr:$g)>;
|
(STAabs Acc16:$src, tglobaladdr:$g)>;
|
||||||
def : Pat<(store Acc16:$src, (W65816Wrapper texternalsym:$s)),
|
def : Pat<(store Acc16:$src, (W65816Wrapper texternalsym:$s)),
|
||||||
(STAabs Acc16:$src, texternalsym:$s)>;
|
(STAabs Acc16:$src, texternalsym:$s)>;
|
||||||
// Store via a constant-int address (MMIO-style fixed pointer like
|
// Store via a constant-int address (`*(volatile uint16 *)0x5000 = v`).
|
||||||
// `*(volatile uint16 *)0x5000 = v`). Lower to STAabs (DBR-relative,
|
// Lowers to STAabs (0x8D, DBR-relative) — DELIBERATELY asymmetric with the
|
||||||
// opcode 0x8D) — keeps the access shorter than going through STAptr
|
// i8 case (STA8long, bank-explicit). Rationale: most 65816 MMIO is i8
|
||||||
// (which would also be DBR-relative via (sr,s),Y, but 4-5 bytes longer).
|
// (e.g. `*(uint8*)0xC035`) where users expect bank=0 always. Const-int
|
||||||
|
// i16 is mostly used as a DBR-relative idiom in test code that switches
|
||||||
|
// DBR and verifies a write lands in the new bank. Switching i16 to
|
||||||
|
// bank-explicit broke 10+ existing tests with no real-world i16 MMIO
|
||||||
|
// use case to justify it. Users who need bank-explicit i16 should
|
||||||
|
// declare a global or split into two i8 stores.
|
||||||
def : Pat<(store Acc16:$src, (iPTR imm:$addr)),
|
def : Pat<(store Acc16:$src, (iPTR imm:$addr)),
|
||||||
(STAabs Acc16:$src, (i32 imm:$addr))>;
|
(STAabs Acc16:$src, (i32 imm:$addr))>;
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue