diff --git a/STATUS.md b/STATUS.md index 5587f38..70060b7 100644 --- a/STATUS.md +++ b/STATUS.md @@ -66,12 +66,28 @@ which runs correctly under MAME (apple2gs). — pre-compute or cache when possible. - `setjmp` / `longjmp` from libgcc.s. - Static constructors via crt0's init_array walk. -- `` file I/O against an in-memory FS: `mfsRegister - (path, buf, size, cap, writable)` stages a buffer as a named - file; `fopen`/`fread`/`fwrite`/`fseek`/`ftell`/`fclose`/`fgetc` - /`fgets`/`ungetc`/`fprintf` operate on it via a per-FILE - (kind, buf, size, cap, pos, eof, err, unget) record. stdin/ - stdout/stderr route through `putchar` as before. +- `` file I/O with two backends: + - **mfs** — `mfsRegister(path, buf, size, cap, writable)` stages a + memory buffer as a named file. Used by smoke tests that don't + have a real disk. Fully validated end-to-end. + - **GS/OS** — `fopen` falls through to `gsosOpen` for any path not + in the mfs table. Routes through the GS/OS class-1 dispatcher + via wrappers in `runtime/src/iigsGsos.s` (Open/Read/Write/Close/ + SetMark/GetMark/SetEOF/GetEOF). The full stdio surface + (`fread/fwrite/fseek/ftell/fclose/fgetc/fputc/fputs/fgets/ungetc/ + feof/ferror/clearerr/rewind/fprintf/vfprintf`) dispatches on + backend. link816 honors weak symbols so programs that don't use + the GS/OS backend don't have to link `iigsGsos.o`. + - **Validation status:** code path compiles, links, and runs under + `runViaFinder.sh --data` injection. `fopen` + `gsosOpen` hangs + when invoked under real GS/OS 6.0.2 (JSL $E100A8 doesn't return); + root cause not yet diagnosed. Stub-dispatcher GS/OS smoke (the + existing one) validates the wrapper contract independently. An + XFAIL'd end-to-end smoke is in `scripts/smokeTest.sh` gated + behind `GSOS_FILE_SMOKE=1` for use after the dispatcher path is + fixed. `runViaFinder.sh --data /PATH=local_file` is the + automated-injection mechanism for runtime-test data files. + - stdin/stdout/stderr route through `putchar` as before. - ``: wcslen / wcscmp / wcsncmp / wcscpy / wcsncpy / wcscat / wcschr / wcsrchr; mbtowc / wctomb / mbstowcs / wcstombs / mblen with the trivial 1:1 byte<->wide mapping @@ -175,9 +191,9 @@ which runs correctly under MAME (apple2gs). - `scripts/benchCyclesPrecise.sh` measures per-call cycle counts via MAME's emulated time counter. Eight benchmarks under - `benchmarks/`. Current numbers: popcount 6888 cyc, bsearch - 1108, memcmp 1569, strcpy 3580, dotProduct 4774, fib(10) 14152, - sumOfSquares 49104. Speed is the optimization priority, not + `benchmarks/`. Current numbers: popcount 4876 cyc, bsearch + 938, memcmp 1330, strcpy 3325, dotProduct 4007, fib(10) 12958, + sumOfSquares 40920. Speed is the optimization priority, not size. **Backend register allocation:** @@ -243,22 +259,15 @@ for the common-case C / minimal-C++ workload. Priority is speed **Speed wins queued, ranked by expected impact:** -- **u16×u16 → u32 multiply path.** sumOfSquares is 982 cyc/iter - bottlenecked by `__mulsi3` for what's effectively a 16×16 - multiply (both inputs are zext from u16). Adding a `__umulhi3` - libcall + SDAG hook to detect `MUL(zext(a), zext(b))` could - roughly halve the iteration cost. - -- **Fold `while (x != 0)` for i32 to `lda lo; ora hi; bne`.** - The combiner currently materializes a SETCC boolean and re-tests - it, generating ~10 redundant ops in every i32-iteration loop. - Hot in popcount, CRC, and any BigInt-style code. - - **ptr32 pointer-increment overhead.** `*p++` under ptr32 emits a full 32-bit `ADC` chain even when the high half is provably - unchanged. strcpy and memcmp pay 30+ cycles per byte for what - should be 15-20. Needs a peephole or SDAG combine for `i32 + 1` - with provably-no-carry-into-hi. + unchanged, and LSR rewrites `*p++` into base+offset (worse on + W65816). strcpy/memcmp pay 30+ cycles per byte for what should + be 15-20. Tried `-disable-lsr` (strcpy −10%, dotProduct +10%) + and TTI `isLSRCostLess` override (memcmp +22% — worse); both + too risky without per-loop heuristics. Needs either a peephole + for `i32 + 1` with provably-no-carry-into-hi or per-loop LSR + override based on pointer-vs-array access pattern. - **Greedy regalloc retry.** Currently blocked on an upstream LLVM `LiveRangeEdit::eliminateDeadDef` assertion when our diff --git a/runtime/include/iigs/gsos.h b/runtime/include/iigs/gsos.h index ea8a4fd..43237b6 100644 --- a/runtime/include/iigs/gsos.h +++ b/runtime/include/iigs/gsos.h @@ -22,8 +22,10 @@ // $2012 Read // $2013 Write // $2014 Close -// $2026 GetEOF -// $2027 SetEOF +// $2016 SetMark +// $2017 GetMark +// $2018 SetEOF +// $2019 GetEOF // $2029 Quit (special — no return) // See "GS/OS Reference" for the full ~50 calls and parm-block layouts. @@ -72,6 +74,13 @@ typedef struct { unsigned long eof; } EOFRecGS; +// Class-1 SetMark/GetMark parm block — 32-bit position within file. +typedef struct { + unsigned short pCount; // 2 + unsigned short refNum; + unsigned long position; // [in for SetMark, out for GetMark] +} MarkRecGS; + // Open / Read / Write / Close wrappers. Each returns 0 on success or // a non-zero GS/OS error code (see gsos.h reference for codes). The // parm block lives on the caller's stack; you set the input fields @@ -86,6 +95,8 @@ extern unsigned short gsosWrite (IORecGS *p); extern unsigned short gsosClose (RefNumRecGS *p); extern unsigned short gsosGetEOF (EOFRecGS *p); extern unsigned short gsosSetEOF (EOFRecGS *p); +extern unsigned short gsosSetMark(MarkRecGS *p); +extern unsigned short gsosGetMark(MarkRecGS *p); #ifdef __cplusplus } diff --git a/runtime/src/iigsGsos.s b/runtime/src/iigsGsos.s index 93c7043..14009cb 100644 --- a/runtime/src/iigsGsos.s +++ b/runtime/src/iigsGsos.s @@ -32,6 +32,8 @@ .globl gsosClose .globl gsosGetEOF .globl gsosSetEOF + .globl gsosSetMark + .globl gsosGetMark gsosOpen: pha @@ -110,3 +112,29 @@ gsosSetEOF: tcs lda 0xe4 rtl + +gsosSetMark: + pha + pea 0 + ldx #0x2016 + jsl 0xe100a8 + sta 0xe4 + tsc + clc + adc #4 + tcs + lda 0xe4 + rtl + +gsosGetMark: + pha + pea 0 + ldx #0x2017 + jsl 0xe100a8 + sta 0xe4 + tsc + clc + adc #4 + tcs + lda 0xe4 + rtl diff --git a/runtime/src/libc.c b/runtime/src/libc.c index 1235b53..973a73a 100644 --- a/runtime/src/libc.c +++ b/runtime/src/libc.c @@ -18,6 +18,56 @@ typedef int ssize_t; typedef unsigned char u8; typedef unsigned short u16; +// GS/OS class-1 file-call hooks. Resolved at link time by the +// iigsGsos.s wrappers (which themselves dispatch through $E100A8). +// Declared inline here to avoid pulling iigs/gsos.h's full type +// surface into libc.c. The parm-block types are local matches of +// iigs/gsos.h's structs — kept layout-equivalent so callers in +// iigs/gsos.h can interoperate. +typedef struct { + u16 pCount; + u16 refNum; + void *pathname; +} __GsosOpenParm; +typedef struct { + u16 pCount; + u16 refNum; + void *dataBuffer; + unsigned long requestCount; + unsigned long transferCount; +} __GsosIORecGS; +typedef struct { + u16 pCount; + u16 refNum; +} __GsosRefNumRecGS; +typedef struct { + u16 pCount; + u16 refNum; + unsigned long eof; +} __GsosEOFRecGS; +typedef struct { + u16 pCount; + u16 refNum; + unsigned long position; +} __GsosMarkRecGS; +// Weak so programs that never call into the GS/OS file backend don't +// drag iigsGsos.o into the link. fopen guards GSOS path on a NULL +// check (see __gsosAvailable below). +extern u16 gsosOpen (__GsosOpenParm *p) __attribute__((weak)); +extern u16 gsosRead (__GsosIORecGS *p) __attribute__((weak)); +extern u16 gsosWrite (__GsosIORecGS *p) __attribute__((weak)); +extern u16 gsosClose (__GsosRefNumRecGS *p) __attribute__((weak)); +extern u16 gsosGetEOF (__GsosEOFRecGS *p) __attribute__((weak)); +extern u16 gsosSetEOF (__GsosEOFRecGS *p) __attribute__((weak)); +extern u16 gsosSetMark(__GsosMarkRecGS *p) __attribute__((weak)); +extern u16 gsosGetMark(__GsosMarkRecGS *p) __attribute__((weak)); + +static int __gsosAvailable(void) { + // gsosOpen is the entry point — if iigsGsos.o is linked, all the + // wrappers are present (they're all in one .s file). + return gsosOpen != (u16 (*)(__GsosOpenParm *))0; +} + // ---- string.h ---- void *memcpy(void *dst, const void *src, size_t n) { @@ -793,35 +843,40 @@ clock_t clock(void) { return (clock_t)(__vblBase + now); } -// ---- FILE* abstraction (memory-backed FS) ---- +// ---- FILE* abstraction (memory-backed FS + GS/OS pass-through) ---- // // stdin / stdout / stderr are tagged as kind=STDIO and route through // putchar / fgetc-from-keyboard; opening a regular file allocates a -// FILE slot and keeps a (buf, size, pos, writable) record. Programs -// stage files into the FS at startup via mfsRegister(name, ptr, size, -// writable) and then use the standard fopen/fread/fwrite/fseek API. +// FILE slot. Two backends: // -// Why in-memory rather than GS/OS-backed: the smoke harness doesn't -// boot ProDOS, so toolbox-FS calls would crash MAME. An in-RAM FS -// covers the common need (parser/printer that wants a FILE*) without -// pulling in GS/OS init. A future GS/OS backend can replace -// fopenImpl/etc. without touching callers. +// kind=MEM — backed by an mfsRegister'd in-memory buffer. Used by +// smoke tests that don't have a real disk; staged via +// mfsRegister(name, ptr, size, cap, writable) at startup. +// kind=GSOS — backed by a real GS/OS file. fopen falls through to +// gsosOpen for any path not in the mfs table, so callers +// with a mounted ProDOS volume get true file I/O via +// the GS/OS class-1 dispatcher (Open/Read/Write/Close/ +// SetMark/GetMark/SetEOF/GetEOF). Requires a GS/OS- +// hosted environment; in a bare MAME boot (no ProDOS +// volume) gsosOpen fails and fopen returns NULL. // // FILE-table layout: 8 entries. Slot 0..2 are stdin/stdout/stderr // (immutable); 3..7 are user-allocated by fopen. Each entry has: -// kind (0=stdio in/out/err, 1=memory) -// buf (memory buffer base) -// size (logical size in bytes) -// cap (allocated capacity — for write-grow) -// pos (current seek position) +// kind (0=stdin, 1=stdout, 2=stderr, 3=memory, 4=GS/OS) +// buf (memory buffer base; unused for GS/OS) +// size (logical size in bytes; unused for GS/OS — read on demand) +// cap (allocated capacity — for write-grow; unused for GS/OS) +// pos (current seek position; unused for GS/OS — Mark is authoritative) // eof, err flags // writable (1 if opened for "w" or "r+" or "a") // ungetc holding cell (-1 = empty) +// refNum (GS/OS file reference; only valid when kind=GSOS) #define FILE_KIND_STDIN 0 #define FILE_KIND_STDOUT 1 #define FILE_KIND_STDERR 2 #define FILE_KIND_MEM 3 +#define FILE_KIND_GSOS 4 typedef struct __sFILE { u8 kind; @@ -834,13 +889,14 @@ typedef struct __sFILE { size_t pos; int unget; // -1 if no pushed-back char const char *path; // borrowed from caller, NULL for stdio + unsigned short refNum; // GS/OS file reference (kind=GSOS only) } FILE; #define MFS_MAX_FILES 8 static FILE __mfs[MFS_MAX_FILES] = { - { FILE_KIND_STDIN, 0, 0, 0, 0, 0, 0, 0, -1, 0 }, - { FILE_KIND_STDOUT, 1, 0, 0, 0, 0, 0, 0, -1, 0 }, - { FILE_KIND_STDERR, 1, 0, 0, 0, 0, 0, 0, -1, 0 }, + { FILE_KIND_STDIN, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0 }, + { FILE_KIND_STDOUT, 1, 0, 0, 0, 0, 0, 0, -1, 0, 0 }, + { FILE_KIND_STDERR, 1, 0, 0, 0, 0, 0, 0, -1, 0, 0 }, }; FILE *stdin = &__mfs[0]; @@ -910,6 +966,16 @@ int fputc(int c, FILE *stream) { if (stream->pos > stream->size) stream->size = stream->pos; return (int)(unsigned char)c; } + if (stream->kind == FILE_KIND_GSOS) { + if (!stream->writable) { stream->err = 1; return -1; } + unsigned char b = (unsigned char)c; + __GsosIORecGS r = { 4, stream->refNum, &b, 1, 0 }; + if (gsosWrite(&r) != 0 || r.transferCount != 1) { + stream->err = 1; + return -1; + } + return (int)b; + } return -1; } @@ -919,7 +985,7 @@ int fputs(const char *s, FILE *stream) { while (*s) { putchar(*s); s++; } return 0; } - if (stream->kind == FILE_KIND_MEM) { + if (stream->kind == FILE_KIND_MEM || stream->kind == FILE_KIND_GSOS) { while (*s) { if (fputc(*s, stream) == -1) return -1; s++; @@ -934,6 +1000,14 @@ int fflush(FILE *stream) { (void)stream; return 0; } int fclose(FILE *stream) { if (!stream) return -1; // Don't close stdin/stdout/stderr — they're long-lived statics. + if (stream->kind == FILE_KIND_GSOS) { + __GsosRefNumRecGS c = { 1, stream->refNum }; + gsosClose(&c); + stream->kind = 0; + stream->refNum = 0; + stream->path = (const char *)0; + return 0; + } if (stream->kind != FILE_KIND_MEM) return 0; stream->kind = 0; stream->buf = (char *)0; @@ -949,6 +1023,7 @@ extern int vsnprintf(char *buf, size_t n, const char *fmt, va_list ap); // Forward decl for vfprintf so fprintf can call it. int vfprintf(FILE *stream, const char *fmt, va_list ap); +size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); // Opaque pos-update helper. The vfprintf body's `stream->pos += // written` got DSE'd under p:32:16 + size_t=unsigned long when called @@ -982,6 +1057,20 @@ int vfprintf(FILE *stream, const char *fmt, va_list ap) { if (!stream) return -1; if (stream->kind == FILE_KIND_STDOUT || stream->kind == FILE_KIND_STDERR) return vprintf(fmt, ap); + if (stream->kind == FILE_KIND_GSOS) { + // Format into a stack buffer, then push to GS/OS via fwrite. + // 256 bytes covers most format-string outputs; longer strings + // get truncated (caller can break up the format if needed). + if (!stream->writable) { stream->err = 1; return -1; } + char tmp[256]; + int n = vsnprintf(tmp, sizeof(tmp), fmt, ap); + if (n < 0) { stream->err = 1; return -1; } + size_t outLen = ((size_t)n < sizeof(tmp) - 1) + ? (size_t)n : sizeof(tmp) - 1; + size_t w = fwrite(tmp, 1, outLen, stream); + if (w != outLen) return -1; + return n; + } if (stream->kind == FILE_KIND_MEM) { // Format into the file's tail. Use the memory buffer that // remains as a snprintf target. Caller is responsible for @@ -1067,6 +1156,22 @@ static void initFileMem(FILE *f, const MfsEntry *reg, int wantWrite) { } } +// Scratch GSString for fopen's gsosOpen call. Single static buffer is +// fine — fopen is non-reentrant on a single-threaded target. +static struct { + u16 length; + char text[256]; +} __gsosPathBuf; + +static int __buildGSString(const char *path) { + size_t n = 0; + while (path[n] && n < 256) n++; + if (path[n]) return -1; // path > 256 chars + __gsosPathBuf.length = (u16)n; + for (size_t i = 0; i < n; i++) __gsosPathBuf.text[i] = path[i]; + return 0; +} + FILE *fopen(const char *path, const char *mode) { if (!path || !mode) return (FILE *)0; int wantWrite = 0; @@ -1078,7 +1183,8 @@ FILE *fopen(const char *path, const char *mode) { else if (mode[0] == 'a') { wantWrite = 1; append = 1; wantRead = (mode[1] == '+' || (mode[1] == 'b' && mode[2] == '+')); } else return (FILE *)0; - // Locate registration. + // Locate mfs registration first. Backwards-compat: any path + // staged via mfsRegister(path, ...) routes to memory backend. MfsEntry *reg = (MfsEntry *)0; for (int i = 0; i < MFS_MAX_REG; i++) { if (__mfsReg[i].inUse && strcmp(__mfsReg[i].path, path) == 0) { @@ -1086,8 +1192,7 @@ FILE *fopen(const char *path, const char *mode) { break; } } - if (!reg) return (FILE *)0; - if (wantWrite && !reg->writable) return (FILE *)0; + if (reg && wantWrite && !reg->writable) return (FILE *)0; // Allocate a FILE slot (3..MAX-1 — 0..2 are stdin/out/err). FILE *f = (FILE *)0; @@ -1099,20 +1204,80 @@ FILE *fopen(const char *path, const char *mode) { } if (!f) return (FILE *)0; - initFileMem(f, reg, wantWrite); - (void)wantRead; + if (reg) { + initFileMem(f, reg, wantWrite); + (void)wantRead; + if (truncate) f->size = 0; + if (append) f->pos = f->size; + return f; + } - if (truncate) f->size = 0; - if (append) f->pos = f->size; + // No mfs registration — try GS/OS. Requires iigsGsos.o linked + // (weak references; absent in some smoke tests) AND a mounted + // ProDOS volume + Tool Locator init at run time. Either missing + // → NULL. + if (!__gsosAvailable()) return (FILE *)0; + if (__buildGSString(path) < 0) return (FILE *)0; + __GsosOpenParm op = { 2, 0, &__gsosPathBuf }; + if (gsosOpen(&op) != 0) return (FILE *)0; + + f->kind = FILE_KIND_GSOS; + f->writable = (u8)(wantWrite ? 1 : 0); + f->eof = 0; + f->err = 0; + f->buf = (char *)0; + f->size = 0; + f->cap = 0; + f->pos = 0; + f->unget = -1; + f->path = path; + f->refNum = op.refNum; + if (truncate) { + // "w" / "w+" — truncate to zero length. + __GsosEOFRecGS e = { 2, op.refNum, 0 }; + if (gsosSetEOF(&e) != 0) f->err = 1; + } + if (append) { + // "a" / "a+" — position at end-of-file. + __GsosEOFRecGS e = { 2, op.refNum, 0 }; + if (gsosGetEOF(&e) == 0) { + __GsosMarkRecGS m = { 2, op.refNum, e.eof }; + gsosSetMark(&m); + } + } + (void)wantRead; return f; } size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { - if (!stream || stream->kind != FILE_KIND_MEM) return 0; + if (!stream) return 0; if (size == 0 || nmemb == 0) return 0; // Avoid 32-bit overflow on size * nmemb: cap nmemb so each item // (size bytes) fits in remaining 16-bit address space. if (nmemb > (size_t)0xFFFE / size) nmemb = (size_t)0xFFFE / size; + if (stream->kind == FILE_KIND_GSOS) { + // Drain unget byte first if present. + char *out = (char *)ptr; + unsigned long total = (unsigned long)size * (unsigned long)nmemb; + unsigned long offset = 0; + if (stream->unget >= 0 && total > 0) { + *out++ = (char)stream->unget; + stream->unget = -1; + offset = 1; + } + if (offset >= total) return offset / size; + __GsosIORecGS r = { + 4, stream->refNum, out, total - offset, 0 + }; + u16 rc = gsosRead(&r); + unsigned long got = offset + r.transferCount; + if (rc != 0 || r.transferCount < total - offset) { + stream->eof = 1; + if (rc != 0 && rc != 0x4C) stream->err = 1; // 0x4C = eofErr + } + return (size_t)(got / size); + } + if (stream->kind != FILE_KIND_MEM) return 0; char *out = (char *)ptr; size_t items = 0; while (items < nmemb) { @@ -1150,6 +1315,16 @@ size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { } return items; } + if (stream->kind == FILE_KIND_GSOS) { + if (!stream->writable) { stream->err = 1; return 0; } + unsigned long total = (unsigned long)size * (unsigned long)nmemb; + __GsosIORecGS r = { 4, stream->refNum, (void *)in, total, 0 }; + if (gsosWrite(&r) != 0) { + stream->err = 1; + return (size_t)(r.transferCount / size); + } + return (size_t)(r.transferCount / size); + } if (stream->kind != FILE_KIND_MEM) return 0; if (!stream->writable) { stream->err = 1; return 0; } size_t items = 0; @@ -1174,7 +1349,31 @@ size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { #define SEEK_END 2 int fseek(FILE *stream, long offset, int whence) { - if (!stream || stream->kind != FILE_KIND_MEM) return -1; + if (!stream) return -1; + if (stream->kind == FILE_KIND_GSOS) { + long base; + if (whence == SEEK_SET) { + base = 0; + } else if (whence == SEEK_CUR) { + __GsosMarkRecGS m = { 2, stream->refNum, 0 }; + if (gsosGetMark(&m) != 0) return -1; + base = (long)m.position; + } else if (whence == SEEK_END) { + __GsosEOFRecGS e = { 2, stream->refNum, 0 }; + if (gsosGetEOF(&e) != 0) return -1; + base = (long)e.eof; + } else { + return -1; + } + long target = base + offset; + if (target < 0) return -1; + __GsosMarkRecGS m = { 2, stream->refNum, (unsigned long)target }; + if (gsosSetMark(&m) != 0) return -1; + stream->eof = 0; + stream->unget = -1; + return 0; + } + if (stream->kind != FILE_KIND_MEM) return -1; long base; if (whence == SEEK_SET) base = 0; else if (whence == SEEK_CUR) base = (long)stream->pos; @@ -1189,7 +1388,13 @@ int fseek(FILE *stream, long offset, int whence) { } long ftell(FILE *stream) { - if (!stream || stream->kind != FILE_KIND_MEM) return -1L; + if (!stream) return -1L; + if (stream->kind == FILE_KIND_GSOS) { + __GsosMarkRecGS m = { 2, stream->refNum, 0 }; + if (gsosGetMark(&m) != 0) return -1L; + return (long)m.position; + } + if (stream->kind != FILE_KIND_MEM) return -1L; return (long)stream->pos; } @@ -1205,6 +1410,17 @@ int fgetc(FILE *stream) { return (int)(unsigned char)stream->buf[stream->pos++]; } if (stream->kind == FILE_KIND_STDIN) return getchar(); + if (stream->kind == FILE_KIND_GSOS) { + unsigned char b = 0; + __GsosIORecGS r = { 4, stream->refNum, &b, 1, 0 }; + u16 rc = gsosRead(&r); + if (rc != 0 || r.transferCount != 1) { + stream->eof = 1; + if (rc != 0 && rc != 0x4C) stream->err = 1; + return -1; + } + return (int)b; + } return -1; } diff --git a/runtime/src/libgcc.s b/runtime/src/libgcc.s index 7309dd0..d7f8f04 100644 --- a/runtime/src/libgcc.s +++ b/runtime/src/libgcc.s @@ -323,15 +323,38 @@ __mulsi3: stz 0xe8 stz 0xea ; Fast path: if multiplier's high half ($e2) is 0, we only - ; need 16 loop iterations (the full 32-iter shift-out would - ; just shift in zeros after iter 16). Common in C code where - ; both source operands are zext'd from i16 — e.g. `i*i` with - ; i a `unsigned short`. Saves ~half the multiply cycles in - ; that case (sumOfSquares: 80000 → ~40000 cyc/call). + ; need 16 loop iterations AND we can drop the multiplier-hi + ; shift step entirely (lsr $e2 + bcc + ora #$8000) — that step + ; only ever fires when hi has bits to shift out, which it + ; doesn't here. Saves ~8 cyc/iter × 16 iters = ~128 cyc/call + ; vs the generic 16-iter path. Common in C code where both + ; source operands are zext'd from i16 (e.g. `i*i` with i a + ; `unsigned short`) — sumOfSquares benchmark hits this on every + ; iteration. lda 0xe2 bne .Lmulsi_full ldy #0x10 - bra .Lmulsi_loop +.Lmulsi_u16_loop: + ; Test bit 0 of multiplier (lo word). + lda 0xe0 + lsr a + sta 0xe0 + bcc .Lmulsi_u16_noadd + clc + lda 0xe8 + adc 0xe4 + sta 0xe8 + lda 0xea + adc 0xe6 + sta 0xea +.Lmulsi_u16_noadd: + asl 0xe4 + rol 0xe6 + dey + bne .Lmulsi_u16_loop + ldx 0xea + lda 0xe8 + rtl .Lmulsi_full: ldy #0x20 .Lmulsi_loop: @@ -349,12 +372,6 @@ __mulsi3: adc 0xe6 sta 0xea .Lmulsi_noadd: - ; Shift multiplier right (32-bit, hi-into-lo) — we already shifted - ; the lo half above, but the bit shifted out went to carry. We - ; need to also bring the lo bit of the hi half into bit 15 of lo, - ; and shift hi right. Simpler: do a full 32-bit shift right - ; before the LSR. Restructure: - ; ; Shift multiplicand left (32-bit, carry chain). asl 0xe4 rol 0xe6 diff --git a/scripts/runViaFinder.sh b/scripts/runViaFinder.sh index 1e907ad..51c820f 100755 --- a/scripts/runViaFinder.sh +++ b/scripts/runViaFinder.sh @@ -3,10 +3,13 @@ # Lua keyboard automation to launch a user OMF, sample memory at # specific frames to verify the program executed. # -# Usage: runViaFinder.sh --check =... -# The OMF file is injected as /SYSTEM.DISK/HELLO (top-level on the -# boot disk). Lua then waits for Finder, types S+Cmd-O to open the -# System.Disk volume window, then H+Cmd-O to launch HELLO. +# Usage: runViaFinder.sh [--data /DATA/NAME=local_file]... +# --check =... +# The OMF file is injected as /DATA/HELLO on a separate 800K data +# disk; Lua drives Finder to open the Data volume and launch HELLO. +# Each --data option also injects an arbitrary file (raw bytes) onto +# the same disk under the given path — used for stdio smoke tests +# that need a known file present at runtime. # # Memory checks happen at frame 5400 (~90s emulated, well after the # launch path completes) and exit 0 / 1 depending on whether each @@ -22,7 +25,15 @@ set -euo pipefail OMF="$1" shift [ -f "$OMF" ] || { echo "missing: $OMF" >&2; exit 2; } -[ "${1:-}" = "--check" ] || { echo "usage: $0 --check =..." >&2; exit 2; } + +# Collect optional --data injections before --check. +DATA_INJECTS=() +while [ $# -gt 0 ] && [ "$1" = "--data" ]; do + [ $# -ge 2 ] || { echo "usage: $0 [--data /DATA/NAME=path]... --check =..." >&2; exit 2; } + DATA_INJECTS+=("$2") + shift 2 +done +[ "${1:-}" = "--check" ] || { echo "usage: $0 [--data /DATA/NAME=path]... --check =..." >&2; exit 2; } shift PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" @@ -43,6 +54,21 @@ cp "$SYSDISK" "$WORK/disk.po" cp "$OMF" "$WORK/HELLO#B30000" "$CADIUS" ADDFILE "$WORK/data.po" /DATA "$WORK/HELLO#B30000" >/dev/null +# Inject extra data files. Path syntax: /DATA/NAME=local_file. +# Each gets type=$06 (BIN, generic data) so GS/OS treats it as a +# plain file readable via gsosOpen. +for inj in "${DATA_INJECTS[@]}"; do + targetPath="${inj%=*}" + srcPath="${inj#*=}" + [ -f "$srcPath" ] || { echo "missing data injection source: $srcPath" >&2; exit 2; } + # cadius ADDFILE uses the basename of the source as the on-disk name, + # with #TTAAAAAA suffix selecting type+aux. Strip the leading + # /VOL/ from targetPath to get the in-volume name. + inVolName="${targetPath##*/}" + cp "$srcPath" "$WORK/${inVolName}#060000" + "$CADIUS" ADDFILE "$WORK/data.po" /DATA "$WORK/${inVolName}#060000" >/dev/null +done + LUA_CHECKS="" EXPECTS=() for pair in "$@"; do diff --git a/scripts/smokeTest.sh b/scripts/smokeTest.sh index d22d12e..877b77a 100755 --- a/scripts/smokeTest.sh +++ b/scripts/smokeTest.sh @@ -5459,6 +5459,91 @@ print(f'OK: {nCreloc} cRELOC opcodes match sidecar') rm -f "$cR1" "$oR1" "$binR1" "$mapR1" "$relR1" "$omfR1" fi +# GS/OS file I/O end-to-end smoke. Builds a test program that calls +# fopen on a real ProDOS path (no mfsRegister), reads bytes via the +# GS/OS class-1 dispatcher, writes a marker. Validates the full +# FILE_KIND_GSOS surface: gsosOpen → gsosRead → gsosClose, the libc.c +# fopen fallthrough from the mfs lookup, and weak-link resolution to +# iigsGsos.o. Disabled by default — set GSOS_FILE_SMOKE=1 to enable. +# +# Status (2026-05-08): the program LINKS cleanly and the test rig +# (runViaFinder.sh + cadius --data injection) all work. When run +# under real GS/OS 6.0.2 in MAME the gsosOpen call hangs the CPU +# (never returns from $E100A8); root cause not yet diagnosed — +# possibly a parm-block bank issue or a Loader-state assumption the +# wrapper makes that's incorrect for class-1 Open under real GS/OS. +# The stub-dispatcher GS/OS smoke (existing) validates the wrapper +# contract, so this is specific to the dispatcher's behaviour. +# +# Manual repro after fix: +# GSOS_FILE_SMOKE=1 bash scripts/smokeTest.sh +CADIUS=${CADIUS:-/tmp/cadius/cadius} +SYSDISK=${SYSDISK:-$PROJECT_ROOT/tools/gsos/sys602.po} +if [ "${GSOS_FILE_SMOKE:-0}" = "1" ] \ + && [ -x "$CLANG" ] && [ -x "$CADIUS" ] && [ -f "$SYSDISK" ] \ + && command -v mame >/dev/null 2>&1; then + log "check: GS/OS fopen/fread reads /DATA/TESTFILE via runViaFinder" + cGsfFile="$(mktemp --suffix=.c)" + oGsfFile="$(mktemp --suffix=.o)" + binGsf="$(mktemp --suffix=.bin)" + mapGsf="$(mktemp --suffix=.map)" + relGsf="$(mktemp --suffix=.reloc)" + omfGsf="$(mktemp --suffix=.omf)" + testFileGsf="$(mktemp --suffix=.dat)" + printf 'Hello, world!' > "$testFileGsf" + cat > "$cGsfFile" <<'EOF' +extern struct __sFILE *fopen(const char *path, const char *mode); +extern unsigned long fread(void *p, unsigned long s, unsigned long n, struct __sFILE *f); +extern int fclose(struct __sFILE *f); +static char rbuf[16]; +__attribute__((noinline)) static int strnequ(const char *a, const char *b, int n) { + for (int i = 0; i < n; i++) if (a[i] != b[i]) return 0; + return 1; +} +int main(void) { + unsigned char ok = 0; + struct __sFILE *f = fopen("/DATA/TESTFILE", "r"); + if (f) { + ok |= 0x10; + unsigned long n = fread(rbuf, 1, 13, f); + if (n == 13) ok |= 0x20; + if (strnequ(rbuf, "Hello, world!", 13)) ok |= 0x40; + if (fclose(f) == 0) ok |= 0x80; + } + *(volatile unsigned char *)0x70 = ok; + return 0; +} +EOF + "$CLANG" --target=w65816 -O2 -ffunction-sections -c \ + "$cGsfFile" -o "$oGsfFile" + "$PROJECT_ROOT/tools/link816" -o "$binGsf" --text-base 0x1000 \ + --map "$mapGsf" --reloc-out "$relGsf" \ + "$PROJECT_ROOT/runtime/crt0Gsos.o" "$oGsfFile" \ + "$PROJECT_ROOT/runtime/libc.o" \ + "$PROJECT_ROOT/runtime/snprintf.o" \ + "$PROJECT_ROOT/runtime/extras.o" \ + "$PROJECT_ROOT/runtime/softFloat.o" \ + "$PROJECT_ROOT/runtime/softDouble.o" \ + "$PROJECT_ROOT/runtime/iigsGsos.o" \ + "$PROJECT_ROOT/runtime/libgcc.o" >/dev/null 2>&1 + "$PROJECT_ROOT/tools/omfEmit" --input "$binGsf" --map "$mapGsf" \ + --base 0x1000 --entry __start --output "$omfGsf" \ + --name HELLO --expressload --relocs "$relGsf" >/dev/null 2>&1 + if [ ! -s "$omfGsf" ]; then + die "GS/OS file smoke: omfEmit produced empty/missing OMF" + fi + if ! bash "$PROJECT_ROOT/scripts/runViaFinder.sh" "$omfGsf" \ + --data "/DATA/TESTFILE=$testFileGsf" \ + --check 0x70=0xf0 >/dev/null 2>&1; then + bash "$PROJECT_ROOT/scripts/runViaFinder.sh" "$omfGsf" \ + --data "/DATA/TESTFILE=$testFileGsf" \ + --check 0x70=0xf0 2>&1 | tail -5 >&2 + die "GS/OS fopen/fread end-to-end smoke FAILED (marker != 0xf0)" + fi + rm -f "$cGsfFile" "$oGsfFile" "$binGsf" "$mapGsf" "$relGsf" \ + "$omfGsf" "$testFileGsf" +fi + # W65816 codegen-shape regression pins. Tiny FileCheck assertions on # specific lowering behaviors that have broken before; runs in well # under a second. See scripts/runFileCheckTests.sh. diff --git a/src/link816/link816.cpp b/src/link816/link816.cpp index 7744f9c..58323b8 100644 --- a/src/link816/link816.cpp +++ b/src/link816/link816.cpp @@ -604,10 +604,18 @@ struct Linker { } auto sIt = globalSyms.find(sym.name); if (sIt == globalSyms.end()) { - // Undefined symbol — for the strict link path the caller - // dies; for the DWARF sidecar this just means "leave the - // bytes alone". + // Undefined symbol. If the reference itself is weak, ELF + // semantics say "leave the address as 0" — do that and + // return resolved-true so the caller doesn't error out. + // This lets libc reference optional helpers (e.g. the + // GS/OS file backend in iigsGsos.s) without forcing every + // caller to link iigsGsos.o. Strong unresolved → caller + // dies. resolvedName = sym.name; + if (sym.bind == STB_WEAK) { + target = 0 + r.addend; + return true; + } return false; } target = sIt->second + r.addend; diff --git a/src/llvm/lib/Target/W65816/W65816ISelLowering.cpp b/src/llvm/lib/Target/W65816/W65816ISelLowering.cpp index c1879e9..866228c 100644 --- a/src/llvm/lib/Target/W65816/W65816ISelLowering.cpp +++ b/src/llvm/lib/Target/W65816/W65816ISelLowering.cpp @@ -546,6 +546,18 @@ SDValue W65816TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue LH = extractWide32Hi(DAG, DL, LHS); SDValue RL = extractWide32Lo(DAG, DL, RHS); SDValue RH = extractWide32Hi(DAG, DL, RHS); + // Fast path: i32 == 0 / != 0 → (LL | LH) cmp 0. Drops two i16 + // setcc materializations + an AND + (for NE) an XOR; the BR_CC + // can branch directly on the OR-test. Hot in `while (x)` and + // any i32-counter loop test. + if ((CC == ISD::SETEQ || CC == ISD::SETNE) && + isa(RHS) && + cast(RHS)->isZero()) { + SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i16, LL, LH); + SDValue Z16 = DAG.getConstant(0, DL, MVT::i16); + return DAG.getNode(ISD::BR_CC, DL, MVT::Other, Chain, + DAG.getCondCode(CC), Or, Z16, Dest); + } SDValue Bool; if (CC == ISD::SETEQ || CC == ISD::SETNE) { SDValue EqLo = DAG.getSetCC(DL, MVT::i16, LL, RL, ISD::SETEQ); @@ -624,6 +636,15 @@ SDValue W65816TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { SDValue LH = extractWide32Hi(DAG, DL, LHS); SDValue RL = extractWide32Lo(DAG, DL, RHS); SDValue RH = extractWide32Hi(DAG, DL, RHS); + // Fast path: i32 == 0 / != 0 → (LL | LH) cmp 0. One i16 OR + one + // i16 setcc instead of two setcc + AND (+ XOR for NE). + if ((CC == ISD::SETEQ || CC == ISD::SETNE) && + isa(RHS) && + cast(RHS)->isZero()) { + SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i16, LL, LH); + SDValue Z16 = DAG.getConstant(0, DL, MVT::i16); + return DAG.getSetCC(DL, VT, Or, Z16, CC); + } if (CC == ISD::SETEQ || CC == ISD::SETNE) { SDValue EqLo = DAG.getSetCC(DL, VT, LL, RL, ISD::SETEQ); SDValue EqHi = DAG.getSetCC(DL, VT, LH, RH, ISD::SETEQ); diff --git a/src/llvm/lib/Target/W65816/W65816StackSlotCleanup.cpp b/src/llvm/lib/Target/W65816/W65816StackSlotCleanup.cpp index 0485599..4918ca1 100644 --- a/src/llvm/lib/Target/W65816/W65816StackSlotCleanup.cpp +++ b/src/llvm/lib/Target/W65816/W65816StackSlotCleanup.cpp @@ -260,8 +260,58 @@ static bool tryEliminateLoadAfterStore(MachineBasicBlock &MBB, MI.getOperand(0).isReg() && MI.getOperand(0).getReg() == StoredReg) { // A volatile load is observable — never elide, even if the - // value is provably the same as the prior store. - if (MI.hasOrderedMemoryRef() || StaMI.hasOrderedMemoryRef()) + // value is provably the same as the prior store. But STAfi/ + // LDAfi target compiler-managed stack spill slots, which are + // by construction never volatile — `hasOrderedMemoryRef()` + // returns true here only because both lack explicit memops + // (the conservative "no info → treat as ordered" default). + // Check the actual memops if present; absence is fine. + auto isReallyVolatile = [](const MachineInstr &I) { + for (auto *MMO : I.memoperands()) + if (MMO->isVolatile() || MMO->isAtomic()) + return true; + return false; + }; + if (isReallyVolatile(MI) || isReallyVolatile(StaMI)) + return false; + // LDA sets N/Z based on the loaded value. Dropping it would + // expose stale N/Z from before the STA→LDA pair to the next + // flag-reading op (e.g. a branch). Only safe to drop if the + // immediately-following op overwrites N/Z. + auto opSetsNZ = [](unsigned Op) { + switch (Op) { + case W65816::LDAfi: + case W65816::LDAi16imm: + case W65816::LDAabs: + case W65816::ANDi16imm: case W65816::ANDabs: + case W65816::ORAi16imm: case W65816::ORAabs: + case W65816::EORi16imm: case W65816::EORabs: + case W65816::ADCi16imm: case W65816::ADCabs: case W65816::ADCfi: + case W65816::SBCi16imm: case W65816::SBCabs: case W65816::SBCfi: + case W65816::ADCEi16imm: case W65816::ADCEabs: case W65816::ADCEfi: + case W65816::SBCEi16imm: case W65816::SBCEabs: case W65816::SBCEfi: + case W65816::ASLA16: case W65816::LSRA16: + case W65816::ASLA8: case W65816::LSRA8: + return true; + default: + return false; + } + }; + // Walk past further STAfi pseudos (spill stores) — they preserve + // A's flags. Only walk past STAfi specifically; STA_DP and + // pointer-indirect stores have inserter-driven expansions that + // can introduce flag-touching ops, and walking past those broke + // a printf smoke test. + auto NextIt = std::next(MI.getIterator()); + while (NextIt != MBB.end()) { + if (NextIt->isDebugInstr()) { ++NextIt; continue; } + if (NextIt->getOpcode() == W65816::STAfi) { ++NextIt; continue; } + break; + } + if (NextIt == MBB.end() || NextIt->isBranch() || NextIt->isReturn()) + return false; + if (!NextIt->definesRegister(W65816::P, TRI) && + !opSetsNZ(NextIt->getOpcode())) return false; MI.eraseFromParent(); return true;