Checkpoint

This commit is contained in:
Scott Duensing 2026-05-08 20:29:13 -05:00
parent 7600812a7b
commit 45f8f45877
10 changed files with 546 additions and 75 deletions

View file

@ -66,12 +66,28 @@ which runs correctly under MAME (apple2gs).
— pre-compute or cache when possible.
- `setjmp` / `longjmp` from libgcc.s.
- Static constructors via crt0's init_array walk.
- `<stdio.h>` file I/O against an in-memory FS: `mfsRegister
(path, buf, size, cap, writable)` stages a buffer as a named
file; `fopen`/`fread`/`fwrite`/`fseek`/`ftell`/`fclose`/`fgetc`
/`fgets`/`ungetc`/`fprintf` operate on it via a per-FILE
(kind, buf, size, cap, pos, eof, err, unget) record. stdin/
stdout/stderr route through `putchar` as before.
- `<stdio.h>` file I/O with two backends:
- **mfs**`mfsRegister(path, buf, size, cap, writable)` stages a
memory buffer as a named file. Used by smoke tests that don't
have a real disk. Fully validated end-to-end.
- **GS/OS**`fopen` falls through to `gsosOpen` for any path not
in the mfs table. Routes through the GS/OS class-1 dispatcher
via wrappers in `runtime/src/iigsGsos.s` (Open/Read/Write/Close/
SetMark/GetMark/SetEOF/GetEOF). The full stdio surface
(`fread/fwrite/fseek/ftell/fclose/fgetc/fputc/fputs/fgets/ungetc/
feof/ferror/clearerr/rewind/fprintf/vfprintf`) dispatches on
backend. link816 honors weak symbols so programs that don't use
the GS/OS backend don't have to link `iigsGsos.o`.
- **Validation status:** code path compiles, links, and runs under
`runViaFinder.sh --data` injection. `fopen` + `gsosOpen` hangs
when invoked under real GS/OS 6.0.2 (JSL $E100A8 doesn't return);
root cause not yet diagnosed. Stub-dispatcher GS/OS smoke (the
existing one) validates the wrapper contract independently. An
XFAIL'd end-to-end smoke is in `scripts/smokeTest.sh` gated
behind `GSOS_FILE_SMOKE=1` for use after the dispatcher path is
fixed. `runViaFinder.sh --data /PATH=local_file` is the
automated-injection mechanism for runtime-test data files.
- stdin/stdout/stderr route through `putchar` as before.
- `<wchar.h>`: wcslen / wcscmp / wcsncmp / wcscpy / wcsncpy /
wcscat / wcschr / wcsrchr; mbtowc / wctomb / mbstowcs /
wcstombs / mblen with the trivial 1:1 byte<->wide mapping
@ -175,9 +191,9 @@ which runs correctly under MAME (apple2gs).
- `scripts/benchCyclesPrecise.sh` measures per-call cycle counts
via MAME's emulated time counter. Eight benchmarks under
`benchmarks/`. Current numbers: popcount 6888 cyc, bsearch
1108, memcmp 1569, strcpy 3580, dotProduct 4774, fib(10) 14152,
sumOfSquares 49104. Speed is the optimization priority, not
`benchmarks/`. Current numbers: popcount 4876 cyc, bsearch
938, memcmp 1330, strcpy 3325, dotProduct 4007, fib(10) 12958,
sumOfSquares 40920. Speed is the optimization priority, not
size.
**Backend register allocation:**
@ -243,22 +259,15 @@ for the common-case C / minimal-C++ workload. Priority is speed
**Speed wins queued, ranked by expected impact:**
- **u16×u16 → u32 multiply path.** sumOfSquares is 982 cyc/iter
bottlenecked by `__mulsi3` for what's effectively a 16×16
multiply (both inputs are zext from u16). Adding a `__umulhi3`
libcall + SDAG hook to detect `MUL(zext(a), zext(b))` could
roughly halve the iteration cost.
- **Fold `while (x != 0)` for i32 to `lda lo; ora hi; bne`.**
The combiner currently materializes a SETCC boolean and re-tests
it, generating ~10 redundant ops in every i32-iteration loop.
Hot in popcount, CRC, and any BigInt-style code.
- **ptr32 pointer-increment overhead.** `*p++` under ptr32 emits
a full 32-bit `ADC` chain even when the high half is provably
unchanged. strcpy and memcmp pay 30+ cycles per byte for what
should be 15-20. Needs a peephole or SDAG combine for `i32 + 1`
with provably-no-carry-into-hi.
unchanged, and LSR rewrites `*p++` into base+offset (worse on
W65816). strcpy/memcmp pay 30+ cycles per byte for what should
be 15-20. Tried `-disable-lsr` (strcpy 10%, dotProduct +10%)
and TTI `isLSRCostLess` override (memcmp +22% — worse); both
too risky without per-loop heuristics. Needs either a peephole
for `i32 + 1` with provably-no-carry-into-hi or per-loop LSR
override based on pointer-vs-array access pattern.
- **Greedy regalloc retry.** Currently blocked on an upstream
LLVM `LiveRangeEdit::eliminateDeadDef` assertion when our

View file

@ -22,8 +22,10 @@
// $2012 Read
// $2013 Write
// $2014 Close
// $2026 GetEOF
// $2027 SetEOF
// $2016 SetMark
// $2017 GetMark
// $2018 SetEOF
// $2019 GetEOF
// $2029 Quit (special — no return)
// See "GS/OS Reference" for the full ~50 calls and parm-block layouts.
@ -72,6 +74,13 @@ typedef struct {
unsigned long eof;
} EOFRecGS;
// Class-1 SetMark/GetMark parm block — 32-bit position within file.
typedef struct {
unsigned short pCount; // 2
unsigned short refNum;
unsigned long position; // [in for SetMark, out for GetMark]
} MarkRecGS;
// Open / Read / Write / Close wrappers. Each returns 0 on success or
// a non-zero GS/OS error code (see gsos.h reference for codes). The
// parm block lives on the caller's stack; you set the input fields
@ -86,6 +95,8 @@ extern unsigned short gsosWrite (IORecGS *p);
extern unsigned short gsosClose (RefNumRecGS *p);
extern unsigned short gsosGetEOF (EOFRecGS *p);
extern unsigned short gsosSetEOF (EOFRecGS *p);
extern unsigned short gsosSetMark(MarkRecGS *p);
extern unsigned short gsosGetMark(MarkRecGS *p);
#ifdef __cplusplus
}

View file

@ -32,6 +32,8 @@
.globl gsosClose
.globl gsosGetEOF
.globl gsosSetEOF
.globl gsosSetMark
.globl gsosGetMark
gsosOpen:
pha
@ -110,3 +112,29 @@ gsosSetEOF:
tcs
lda 0xe4
rtl
gsosSetMark:
pha
pea 0
ldx #0x2016
jsl 0xe100a8
sta 0xe4
tsc
clc
adc #4
tcs
lda 0xe4
rtl
gsosGetMark:
pha
pea 0
ldx #0x2017
jsl 0xe100a8
sta 0xe4
tsc
clc
adc #4
tcs
lda 0xe4
rtl

View file

@ -18,6 +18,56 @@ typedef int ssize_t;
typedef unsigned char u8;
typedef unsigned short u16;
// GS/OS class-1 file-call hooks. Resolved at link time by the
// iigsGsos.s wrappers (which themselves dispatch through $E100A8).
// Declared inline here to avoid pulling iigs/gsos.h's full type
// surface into libc.c. The parm-block types are local matches of
// iigs/gsos.h's structs — kept layout-equivalent so callers in
// iigs/gsos.h can interoperate.
typedef struct {
u16 pCount;
u16 refNum;
void *pathname;
} __GsosOpenParm;
typedef struct {
u16 pCount;
u16 refNum;
void *dataBuffer;
unsigned long requestCount;
unsigned long transferCount;
} __GsosIORecGS;
typedef struct {
u16 pCount;
u16 refNum;
} __GsosRefNumRecGS;
typedef struct {
u16 pCount;
u16 refNum;
unsigned long eof;
} __GsosEOFRecGS;
typedef struct {
u16 pCount;
u16 refNum;
unsigned long position;
} __GsosMarkRecGS;
// Weak so programs that never call into the GS/OS file backend don't
// drag iigsGsos.o into the link. fopen guards GSOS path on a NULL
// check (see __gsosAvailable below).
extern u16 gsosOpen (__GsosOpenParm *p) __attribute__((weak));
extern u16 gsosRead (__GsosIORecGS *p) __attribute__((weak));
extern u16 gsosWrite (__GsosIORecGS *p) __attribute__((weak));
extern u16 gsosClose (__GsosRefNumRecGS *p) __attribute__((weak));
extern u16 gsosGetEOF (__GsosEOFRecGS *p) __attribute__((weak));
extern u16 gsosSetEOF (__GsosEOFRecGS *p) __attribute__((weak));
extern u16 gsosSetMark(__GsosMarkRecGS *p) __attribute__((weak));
extern u16 gsosGetMark(__GsosMarkRecGS *p) __attribute__((weak));
static int __gsosAvailable(void) {
// gsosOpen is the entry point — if iigsGsos.o is linked, all the
// wrappers are present (they're all in one .s file).
return gsosOpen != (u16 (*)(__GsosOpenParm *))0;
}
// ---- string.h ----
void *memcpy(void *dst, const void *src, size_t n) {
@ -793,35 +843,40 @@ clock_t clock(void) {
return (clock_t)(__vblBase + now);
}
// ---- FILE* abstraction (memory-backed FS) ----
// ---- FILE* abstraction (memory-backed FS + GS/OS pass-through) ----
//
// stdin / stdout / stderr are tagged as kind=STDIO and route through
// putchar / fgetc-from-keyboard; opening a regular file allocates a
// FILE slot and keeps a (buf, size, pos, writable) record. Programs
// stage files into the FS at startup via mfsRegister(name, ptr, size,
// writable) and then use the standard fopen/fread/fwrite/fseek API.
// FILE slot. Two backends:
//
// Why in-memory rather than GS/OS-backed: the smoke harness doesn't
// boot ProDOS, so toolbox-FS calls would crash MAME. An in-RAM FS
// covers the common need (parser/printer that wants a FILE*) without
// pulling in GS/OS init. A future GS/OS backend can replace
// fopenImpl/etc. without touching callers.
// kind=MEM — backed by an mfsRegister'd in-memory buffer. Used by
// smoke tests that don't have a real disk; staged via
// mfsRegister(name, ptr, size, cap, writable) at startup.
// kind=GSOS — backed by a real GS/OS file. fopen falls through to
// gsosOpen for any path not in the mfs table, so callers
// with a mounted ProDOS volume get true file I/O via
// the GS/OS class-1 dispatcher (Open/Read/Write/Close/
// SetMark/GetMark/SetEOF/GetEOF). Requires a GS/OS-
// hosted environment; in a bare MAME boot (no ProDOS
// volume) gsosOpen fails and fopen returns NULL.
//
// FILE-table layout: 8 entries. Slot 0..2 are stdin/stdout/stderr
// (immutable); 3..7 are user-allocated by fopen. Each entry has:
// kind (0=stdio in/out/err, 1=memory)
// buf (memory buffer base)
// size (logical size in bytes)
// cap (allocated capacity — for write-grow)
// pos (current seek position)
// kind (0=stdin, 1=stdout, 2=stderr, 3=memory, 4=GS/OS)
// buf (memory buffer base; unused for GS/OS)
// size (logical size in bytes; unused for GS/OS — read on demand)
// cap (allocated capacity — for write-grow; unused for GS/OS)
// pos (current seek position; unused for GS/OS — Mark is authoritative)
// eof, err flags
// writable (1 if opened for "w" or "r+" or "a")
// ungetc holding cell (-1 = empty)
// refNum (GS/OS file reference; only valid when kind=GSOS)
#define FILE_KIND_STDIN 0
#define FILE_KIND_STDOUT 1
#define FILE_KIND_STDERR 2
#define FILE_KIND_MEM 3
#define FILE_KIND_GSOS 4
typedef struct __sFILE {
u8 kind;
@ -834,13 +889,14 @@ typedef struct __sFILE {
size_t pos;
int unget; // -1 if no pushed-back char
const char *path; // borrowed from caller, NULL for stdio
unsigned short refNum; // GS/OS file reference (kind=GSOS only)
} FILE;
#define MFS_MAX_FILES 8
static FILE __mfs[MFS_MAX_FILES] = {
{ FILE_KIND_STDIN, 0, 0, 0, 0, 0, 0, 0, -1, 0 },
{ FILE_KIND_STDOUT, 1, 0, 0, 0, 0, 0, 0, -1, 0 },
{ FILE_KIND_STDERR, 1, 0, 0, 0, 0, 0, 0, -1, 0 },
{ FILE_KIND_STDIN, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0 },
{ FILE_KIND_STDOUT, 1, 0, 0, 0, 0, 0, 0, -1, 0, 0 },
{ FILE_KIND_STDERR, 1, 0, 0, 0, 0, 0, 0, -1, 0, 0 },
};
FILE *stdin = &__mfs[0];
@ -910,6 +966,16 @@ int fputc(int c, FILE *stream) {
if (stream->pos > stream->size) stream->size = stream->pos;
return (int)(unsigned char)c;
}
if (stream->kind == FILE_KIND_GSOS) {
if (!stream->writable) { stream->err = 1; return -1; }
unsigned char b = (unsigned char)c;
__GsosIORecGS r = { 4, stream->refNum, &b, 1, 0 };
if (gsosWrite(&r) != 0 || r.transferCount != 1) {
stream->err = 1;
return -1;
}
return (int)b;
}
return -1;
}
@ -919,7 +985,7 @@ int fputs(const char *s, FILE *stream) {
while (*s) { putchar(*s); s++; }
return 0;
}
if (stream->kind == FILE_KIND_MEM) {
if (stream->kind == FILE_KIND_MEM || stream->kind == FILE_KIND_GSOS) {
while (*s) {
if (fputc(*s, stream) == -1) return -1;
s++;
@ -934,6 +1000,14 @@ int fflush(FILE *stream) { (void)stream; return 0; }
int fclose(FILE *stream) {
if (!stream) return -1;
// Don't close stdin/stdout/stderr — they're long-lived statics.
if (stream->kind == FILE_KIND_GSOS) {
__GsosRefNumRecGS c = { 1, stream->refNum };
gsosClose(&c);
stream->kind = 0;
stream->refNum = 0;
stream->path = (const char *)0;
return 0;
}
if (stream->kind != FILE_KIND_MEM) return 0;
stream->kind = 0;
stream->buf = (char *)0;
@ -949,6 +1023,7 @@ extern int vsnprintf(char *buf, size_t n, const char *fmt, va_list ap);
// Forward decl for vfprintf so fprintf can call it.
int vfprintf(FILE *stream, const char *fmt, va_list ap);
size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);
// Opaque pos-update helper. The vfprintf body's `stream->pos +=
// written` got DSE'd under p:32:16 + size_t=unsigned long when called
@ -982,6 +1057,20 @@ int vfprintf(FILE *stream, const char *fmt, va_list ap) {
if (!stream) return -1;
if (stream->kind == FILE_KIND_STDOUT || stream->kind == FILE_KIND_STDERR)
return vprintf(fmt, ap);
if (stream->kind == FILE_KIND_GSOS) {
// Format into a stack buffer, then push to GS/OS via fwrite.
// 256 bytes covers most format-string outputs; longer strings
// get truncated (caller can break up the format if needed).
if (!stream->writable) { stream->err = 1; return -1; }
char tmp[256];
int n = vsnprintf(tmp, sizeof(tmp), fmt, ap);
if (n < 0) { stream->err = 1; return -1; }
size_t outLen = ((size_t)n < sizeof(tmp) - 1)
? (size_t)n : sizeof(tmp) - 1;
size_t w = fwrite(tmp, 1, outLen, stream);
if (w != outLen) return -1;
return n;
}
if (stream->kind == FILE_KIND_MEM) {
// Format into the file's tail. Use the memory buffer that
// remains as a snprintf target. Caller is responsible for
@ -1067,6 +1156,22 @@ static void initFileMem(FILE *f, const MfsEntry *reg, int wantWrite) {
}
}
// Scratch GSString for fopen's gsosOpen call. Single static buffer is
// fine — fopen is non-reentrant on a single-threaded target.
static struct {
u16 length;
char text[256];
} __gsosPathBuf;
static int __buildGSString(const char *path) {
size_t n = 0;
while (path[n] && n < 256) n++;
if (path[n]) return -1; // path > 256 chars
__gsosPathBuf.length = (u16)n;
for (size_t i = 0; i < n; i++) __gsosPathBuf.text[i] = path[i];
return 0;
}
FILE *fopen(const char *path, const char *mode) {
if (!path || !mode) return (FILE *)0;
int wantWrite = 0;
@ -1078,7 +1183,8 @@ FILE *fopen(const char *path, const char *mode) {
else if (mode[0] == 'a') { wantWrite = 1; append = 1; wantRead = (mode[1] == '+' || (mode[1] == 'b' && mode[2] == '+')); }
else return (FILE *)0;
// Locate registration.
// Locate mfs registration first. Backwards-compat: any path
// staged via mfsRegister(path, ...) routes to memory backend.
MfsEntry *reg = (MfsEntry *)0;
for (int i = 0; i < MFS_MAX_REG; i++) {
if (__mfsReg[i].inUse && strcmp(__mfsReg[i].path, path) == 0) {
@ -1086,8 +1192,7 @@ FILE *fopen(const char *path, const char *mode) {
break;
}
}
if (!reg) return (FILE *)0;
if (wantWrite && !reg->writable) return (FILE *)0;
if (reg && wantWrite && !reg->writable) return (FILE *)0;
// Allocate a FILE slot (3..MAX-1 — 0..2 are stdin/out/err).
FILE *f = (FILE *)0;
@ -1099,20 +1204,80 @@ FILE *fopen(const char *path, const char *mode) {
}
if (!f) return (FILE *)0;
if (reg) {
initFileMem(f, reg, wantWrite);
(void)wantRead;
if (truncate) f->size = 0;
if (append) f->pos = f->size;
return f;
}
// No mfs registration — try GS/OS. Requires iigsGsos.o linked
// (weak references; absent in some smoke tests) AND a mounted
// ProDOS volume + Tool Locator init at run time. Either missing
// → NULL.
if (!__gsosAvailable()) return (FILE *)0;
if (__buildGSString(path) < 0) return (FILE *)0;
__GsosOpenParm op = { 2, 0, &__gsosPathBuf };
if (gsosOpen(&op) != 0) return (FILE *)0;
f->kind = FILE_KIND_GSOS;
f->writable = (u8)(wantWrite ? 1 : 0);
f->eof = 0;
f->err = 0;
f->buf = (char *)0;
f->size = 0;
f->cap = 0;
f->pos = 0;
f->unget = -1;
f->path = path;
f->refNum = op.refNum;
if (truncate) {
// "w" / "w+" — truncate to zero length.
__GsosEOFRecGS e = { 2, op.refNum, 0 };
if (gsosSetEOF(&e) != 0) f->err = 1;
}
if (append) {
// "a" / "a+" — position at end-of-file.
__GsosEOFRecGS e = { 2, op.refNum, 0 };
if (gsosGetEOF(&e) == 0) {
__GsosMarkRecGS m = { 2, op.refNum, e.eof };
gsosSetMark(&m);
}
}
(void)wantRead;
return f;
}
size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream) {
if (!stream || stream->kind != FILE_KIND_MEM) return 0;
if (!stream) return 0;
if (size == 0 || nmemb == 0) return 0;
// Avoid 32-bit overflow on size * nmemb: cap nmemb so each item
// (size bytes) fits in remaining 16-bit address space.
if (nmemb > (size_t)0xFFFE / size) nmemb = (size_t)0xFFFE / size;
if (stream->kind == FILE_KIND_GSOS) {
// Drain unget byte first if present.
char *out = (char *)ptr;
unsigned long total = (unsigned long)size * (unsigned long)nmemb;
unsigned long offset = 0;
if (stream->unget >= 0 && total > 0) {
*out++ = (char)stream->unget;
stream->unget = -1;
offset = 1;
}
if (offset >= total) return offset / size;
__GsosIORecGS r = {
4, stream->refNum, out, total - offset, 0
};
u16 rc = gsosRead(&r);
unsigned long got = offset + r.transferCount;
if (rc != 0 || r.transferCount < total - offset) {
stream->eof = 1;
if (rc != 0 && rc != 0x4C) stream->err = 1; // 0x4C = eofErr
}
return (size_t)(got / size);
}
if (stream->kind != FILE_KIND_MEM) return 0;
char *out = (char *)ptr;
size_t items = 0;
while (items < nmemb) {
@ -1150,6 +1315,16 @@ size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) {
}
return items;
}
if (stream->kind == FILE_KIND_GSOS) {
if (!stream->writable) { stream->err = 1; return 0; }
unsigned long total = (unsigned long)size * (unsigned long)nmemb;
__GsosIORecGS r = { 4, stream->refNum, (void *)in, total, 0 };
if (gsosWrite(&r) != 0) {
stream->err = 1;
return (size_t)(r.transferCount / size);
}
return (size_t)(r.transferCount / size);
}
if (stream->kind != FILE_KIND_MEM) return 0;
if (!stream->writable) { stream->err = 1; return 0; }
size_t items = 0;
@ -1174,7 +1349,31 @@ size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) {
#define SEEK_END 2
int fseek(FILE *stream, long offset, int whence) {
if (!stream || stream->kind != FILE_KIND_MEM) return -1;
if (!stream) return -1;
if (stream->kind == FILE_KIND_GSOS) {
long base;
if (whence == SEEK_SET) {
base = 0;
} else if (whence == SEEK_CUR) {
__GsosMarkRecGS m = { 2, stream->refNum, 0 };
if (gsosGetMark(&m) != 0) return -1;
base = (long)m.position;
} else if (whence == SEEK_END) {
__GsosEOFRecGS e = { 2, stream->refNum, 0 };
if (gsosGetEOF(&e) != 0) return -1;
base = (long)e.eof;
} else {
return -1;
}
long target = base + offset;
if (target < 0) return -1;
__GsosMarkRecGS m = { 2, stream->refNum, (unsigned long)target };
if (gsosSetMark(&m) != 0) return -1;
stream->eof = 0;
stream->unget = -1;
return 0;
}
if (stream->kind != FILE_KIND_MEM) return -1;
long base;
if (whence == SEEK_SET) base = 0;
else if (whence == SEEK_CUR) base = (long)stream->pos;
@ -1189,7 +1388,13 @@ int fseek(FILE *stream, long offset, int whence) {
}
long ftell(FILE *stream) {
if (!stream || stream->kind != FILE_KIND_MEM) return -1L;
if (!stream) return -1L;
if (stream->kind == FILE_KIND_GSOS) {
__GsosMarkRecGS m = { 2, stream->refNum, 0 };
if (gsosGetMark(&m) != 0) return -1L;
return (long)m.position;
}
if (stream->kind != FILE_KIND_MEM) return -1L;
return (long)stream->pos;
}
@ -1205,6 +1410,17 @@ int fgetc(FILE *stream) {
return (int)(unsigned char)stream->buf[stream->pos++];
}
if (stream->kind == FILE_KIND_STDIN) return getchar();
if (stream->kind == FILE_KIND_GSOS) {
unsigned char b = 0;
__GsosIORecGS r = { 4, stream->refNum, &b, 1, 0 };
u16 rc = gsosRead(&r);
if (rc != 0 || r.transferCount != 1) {
stream->eof = 1;
if (rc != 0 && rc != 0x4C) stream->err = 1;
return -1;
}
return (int)b;
}
return -1;
}

View file

@ -323,15 +323,38 @@ __mulsi3:
stz 0xe8
stz 0xea
; Fast path: if multiplier's high half ($e2) is 0, we only
; need 16 loop iterations (the full 32-iter shift-out would
; just shift in zeros after iter 16). Common in C code where
; both source operands are zext'd from i16 — e.g. `i*i` with
; i a `unsigned short`. Saves ~half the multiply cycles in
; that case (sumOfSquares: 80000 → ~40000 cyc/call).
; need 16 loop iterations AND we can drop the multiplier-hi
; shift step entirely (lsr $e2 + bcc + ora #$8000) — that step
; only ever fires when hi has bits to shift out, which it
; doesn't here. Saves ~8 cyc/iter × 16 iters = ~128 cyc/call
; vs the generic 16-iter path. Common in C code where both
; source operands are zext'd from i16 (e.g. `i*i` with i a
; `unsigned short`) — sumOfSquares benchmark hits this on every
; iteration.
lda 0xe2
bne .Lmulsi_full
ldy #0x10
bra .Lmulsi_loop
.Lmulsi_u16_loop:
; Test bit 0 of multiplier (lo word).
lda 0xe0
lsr a
sta 0xe0
bcc .Lmulsi_u16_noadd
clc
lda 0xe8
adc 0xe4
sta 0xe8
lda 0xea
adc 0xe6
sta 0xea
.Lmulsi_u16_noadd:
asl 0xe4
rol 0xe6
dey
bne .Lmulsi_u16_loop
ldx 0xea
lda 0xe8
rtl
.Lmulsi_full:
ldy #0x20
.Lmulsi_loop:
@ -349,12 +372,6 @@ __mulsi3:
adc 0xe6
sta 0xea
.Lmulsi_noadd:
; Shift multiplier right (32-bit, hi-into-lo) — we already shifted
; the lo half above, but the bit shifted out went to carry. We
; need to also bring the lo bit of the hi half into bit 15 of lo,
; and shift hi right. Simpler: do a full 32-bit shift right
; before the LSR. Restructure:
;
; Shift multiplicand left (32-bit, carry chain).
asl 0xe4
rol 0xe6

View file

@ -3,10 +3,13 @@
# Lua keyboard automation to launch a user OMF, sample memory at
# specific frames to verify the program executed.
#
# Usage: runViaFinder.sh <omf-file> --check <addr>=<value>...
# The OMF file is injected as /SYSTEM.DISK/HELLO (top-level on the
# boot disk). Lua then waits for Finder, types S+Cmd-O to open the
# System.Disk volume window, then H+Cmd-O to launch HELLO.
# Usage: runViaFinder.sh <omf-file> [--data /DATA/NAME=local_file]...
# --check <addr>=<value>...
# The OMF file is injected as /DATA/HELLO on a separate 800K data
# disk; Lua drives Finder to open the Data volume and launch HELLO.
# Each --data option also injects an arbitrary file (raw bytes) onto
# the same disk under the given path — used for stdio smoke tests
# that need a known file present at runtime.
#
# Memory checks happen at frame 5400 (~90s emulated, well after the
# launch path completes) and exit 0 / 1 depending on whether each
@ -22,7 +25,15 @@ set -euo pipefail
OMF="$1"
shift
[ -f "$OMF" ] || { echo "missing: $OMF" >&2; exit 2; }
[ "${1:-}" = "--check" ] || { echo "usage: $0 <omf> --check <addr>=<val>..." >&2; exit 2; }
# Collect optional --data injections before --check.
DATA_INJECTS=()
while [ $# -gt 0 ] && [ "$1" = "--data" ]; do
[ $# -ge 2 ] || { echo "usage: $0 <omf> [--data /DATA/NAME=path]... --check <addr>=<val>..." >&2; exit 2; }
DATA_INJECTS+=("$2")
shift 2
done
[ "${1:-}" = "--check" ] || { echo "usage: $0 <omf> [--data /DATA/NAME=path]... --check <addr>=<val>..." >&2; exit 2; }
shift
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
@ -43,6 +54,21 @@ cp "$SYSDISK" "$WORK/disk.po"
cp "$OMF" "$WORK/HELLO#B30000"
"$CADIUS" ADDFILE "$WORK/data.po" /DATA "$WORK/HELLO#B30000" >/dev/null
# Inject extra data files. Path syntax: /DATA/NAME=local_file.
# Each gets type=$06 (BIN, generic data) so GS/OS treats it as a
# plain file readable via gsosOpen.
for inj in "${DATA_INJECTS[@]}"; do
targetPath="${inj%=*}"
srcPath="${inj#*=}"
[ -f "$srcPath" ] || { echo "missing data injection source: $srcPath" >&2; exit 2; }
# cadius ADDFILE uses the basename of the source as the on-disk name,
# with #TTAAAAAA suffix selecting type+aux. Strip the leading
# /VOL/ from targetPath to get the in-volume name.
inVolName="${targetPath##*/}"
cp "$srcPath" "$WORK/${inVolName}#060000"
"$CADIUS" ADDFILE "$WORK/data.po" /DATA "$WORK/${inVolName}#060000" >/dev/null
done
LUA_CHECKS=""
EXPECTS=()
for pair in "$@"; do

View file

@ -5459,6 +5459,91 @@ print(f'OK: {nCreloc} cRELOC opcodes match sidecar')
rm -f "$cR1" "$oR1" "$binR1" "$mapR1" "$relR1" "$omfR1"
fi
# GS/OS file I/O end-to-end smoke. Builds a test program that calls
# fopen on a real ProDOS path (no mfsRegister), reads bytes via the
# GS/OS class-1 dispatcher, writes a marker. Validates the full
# FILE_KIND_GSOS surface: gsosOpen → gsosRead → gsosClose, the libc.c
# fopen fallthrough from the mfs lookup, and weak-link resolution to
# iigsGsos.o. Disabled by default — set GSOS_FILE_SMOKE=1 to enable.
#
# Status (2026-05-08): the program LINKS cleanly and the test rig
# (runViaFinder.sh + cadius --data injection) all work. When run
# under real GS/OS 6.0.2 in MAME the gsosOpen call hangs the CPU
# (never returns from $E100A8); root cause not yet diagnosed —
# possibly a parm-block bank issue or a Loader-state assumption the
# wrapper makes that's incorrect for class-1 Open under real GS/OS.
# The stub-dispatcher GS/OS smoke (existing) validates the wrapper
# contract, so this is specific to the dispatcher's behaviour.
#
# Manual repro after fix:
# GSOS_FILE_SMOKE=1 bash scripts/smokeTest.sh
CADIUS=${CADIUS:-/tmp/cadius/cadius}
SYSDISK=${SYSDISK:-$PROJECT_ROOT/tools/gsos/sys602.po}
if [ "${GSOS_FILE_SMOKE:-0}" = "1" ] \
&& [ -x "$CLANG" ] && [ -x "$CADIUS" ] && [ -f "$SYSDISK" ] \
&& command -v mame >/dev/null 2>&1; then
log "check: GS/OS fopen/fread reads /DATA/TESTFILE via runViaFinder"
cGsfFile="$(mktemp --suffix=.c)"
oGsfFile="$(mktemp --suffix=.o)"
binGsf="$(mktemp --suffix=.bin)"
mapGsf="$(mktemp --suffix=.map)"
relGsf="$(mktemp --suffix=.reloc)"
omfGsf="$(mktemp --suffix=.omf)"
testFileGsf="$(mktemp --suffix=.dat)"
printf 'Hello, world!' > "$testFileGsf"
cat > "$cGsfFile" <<'EOF'
extern struct __sFILE *fopen(const char *path, const char *mode);
extern unsigned long fread(void *p, unsigned long s, unsigned long n, struct __sFILE *f);
extern int fclose(struct __sFILE *f);
static char rbuf[16];
__attribute__((noinline)) static int strnequ(const char *a, const char *b, int n) {
for (int i = 0; i < n; i++) if (a[i] != b[i]) return 0;
return 1;
}
int main(void) {
unsigned char ok = 0;
struct __sFILE *f = fopen("/DATA/TESTFILE", "r");
if (f) {
ok |= 0x10;
unsigned long n = fread(rbuf, 1, 13, f);
if (n == 13) ok |= 0x20;
if (strnequ(rbuf, "Hello, world!", 13)) ok |= 0x40;
if (fclose(f) == 0) ok |= 0x80;
}
*(volatile unsigned char *)0x70 = ok;
return 0;
}
EOF
"$CLANG" --target=w65816 -O2 -ffunction-sections -c \
"$cGsfFile" -o "$oGsfFile"
"$PROJECT_ROOT/tools/link816" -o "$binGsf" --text-base 0x1000 \
--map "$mapGsf" --reloc-out "$relGsf" \
"$PROJECT_ROOT/runtime/crt0Gsos.o" "$oGsfFile" \
"$PROJECT_ROOT/runtime/libc.o" \
"$PROJECT_ROOT/runtime/snprintf.o" \
"$PROJECT_ROOT/runtime/extras.o" \
"$PROJECT_ROOT/runtime/softFloat.o" \
"$PROJECT_ROOT/runtime/softDouble.o" \
"$PROJECT_ROOT/runtime/iigsGsos.o" \
"$PROJECT_ROOT/runtime/libgcc.o" >/dev/null 2>&1
"$PROJECT_ROOT/tools/omfEmit" --input "$binGsf" --map "$mapGsf" \
--base 0x1000 --entry __start --output "$omfGsf" \
--name HELLO --expressload --relocs "$relGsf" >/dev/null 2>&1
if [ ! -s "$omfGsf" ]; then
die "GS/OS file smoke: omfEmit produced empty/missing OMF"
fi
if ! bash "$PROJECT_ROOT/scripts/runViaFinder.sh" "$omfGsf" \
--data "/DATA/TESTFILE=$testFileGsf" \
--check 0x70=0xf0 >/dev/null 2>&1; then
bash "$PROJECT_ROOT/scripts/runViaFinder.sh" "$omfGsf" \
--data "/DATA/TESTFILE=$testFileGsf" \
--check 0x70=0xf0 2>&1 | tail -5 >&2
die "GS/OS fopen/fread end-to-end smoke FAILED (marker != 0xf0)"
fi
rm -f "$cGsfFile" "$oGsfFile" "$binGsf" "$mapGsf" "$relGsf" \
"$omfGsf" "$testFileGsf"
fi
# W65816 codegen-shape regression pins. Tiny FileCheck assertions on
# specific lowering behaviors that have broken before; runs in well
# under a second. See scripts/runFileCheckTests.sh.

View file

@ -604,10 +604,18 @@ struct Linker {
}
auto sIt = globalSyms.find(sym.name);
if (sIt == globalSyms.end()) {
// Undefined symbol — for the strict link path the caller
// dies; for the DWARF sidecar this just means "leave the
// bytes alone".
// Undefined symbol. If the reference itself is weak, ELF
// semantics say "leave the address as 0" — do that and
// return resolved-true so the caller doesn't error out.
// This lets libc reference optional helpers (e.g. the
// GS/OS file backend in iigsGsos.s) without forcing every
// caller to link iigsGsos.o. Strong unresolved → caller
// dies.
resolvedName = sym.name;
if (sym.bind == STB_WEAK) {
target = 0 + r.addend;
return true;
}
return false;
}
target = sIt->second + r.addend;

View file

@ -546,6 +546,18 @@ SDValue W65816TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
SDValue LH = extractWide32Hi(DAG, DL, LHS);
SDValue RL = extractWide32Lo(DAG, DL, RHS);
SDValue RH = extractWide32Hi(DAG, DL, RHS);
// Fast path: i32 == 0 / != 0 → (LL | LH) cmp 0. Drops two i16
// setcc materializations + an AND + (for NE) an XOR; the BR_CC
// can branch directly on the OR-test. Hot in `while (x)` and
// any i32-counter loop test.
if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
isa<ConstantSDNode>(RHS) &&
cast<ConstantSDNode>(RHS)->isZero()) {
SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i16, LL, LH);
SDValue Z16 = DAG.getConstant(0, DL, MVT::i16);
return DAG.getNode(ISD::BR_CC, DL, MVT::Other, Chain,
DAG.getCondCode(CC), Or, Z16, Dest);
}
SDValue Bool;
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
SDValue EqLo = DAG.getSetCC(DL, MVT::i16, LL, RL, ISD::SETEQ);
@ -624,6 +636,15 @@ SDValue W65816TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
SDValue LH = extractWide32Hi(DAG, DL, LHS);
SDValue RL = extractWide32Lo(DAG, DL, RHS);
SDValue RH = extractWide32Hi(DAG, DL, RHS);
// Fast path: i32 == 0 / != 0 → (LL | LH) cmp 0. One i16 OR + one
// i16 setcc instead of two setcc + AND (+ XOR for NE).
if ((CC == ISD::SETEQ || CC == ISD::SETNE) &&
isa<ConstantSDNode>(RHS) &&
cast<ConstantSDNode>(RHS)->isZero()) {
SDValue Or = DAG.getNode(ISD::OR, DL, MVT::i16, LL, LH);
SDValue Z16 = DAG.getConstant(0, DL, MVT::i16);
return DAG.getSetCC(DL, VT, Or, Z16, CC);
}
if (CC == ISD::SETEQ || CC == ISD::SETNE) {
SDValue EqLo = DAG.getSetCC(DL, VT, LL, RL, ISD::SETEQ);
SDValue EqHi = DAG.getSetCC(DL, VT, LH, RH, ISD::SETEQ);

View file

@ -260,8 +260,58 @@ static bool tryEliminateLoadAfterStore(MachineBasicBlock &MBB,
MI.getOperand(0).isReg() &&
MI.getOperand(0).getReg() == StoredReg) {
// A volatile load is observable — never elide, even if the
// value is provably the same as the prior store.
if (MI.hasOrderedMemoryRef() || StaMI.hasOrderedMemoryRef())
// value is provably the same as the prior store. But STAfi/
// LDAfi target compiler-managed stack spill slots, which are
// by construction never volatile — `hasOrderedMemoryRef()`
// returns true here only because both lack explicit memops
// (the conservative "no info → treat as ordered" default).
// Check the actual memops if present; absence is fine.
auto isReallyVolatile = [](const MachineInstr &I) {
for (auto *MMO : I.memoperands())
if (MMO->isVolatile() || MMO->isAtomic())
return true;
return false;
};
if (isReallyVolatile(MI) || isReallyVolatile(StaMI))
return false;
// LDA sets N/Z based on the loaded value. Dropping it would
// expose stale N/Z from before the STA→LDA pair to the next
// flag-reading op (e.g. a branch). Only safe to drop if the
// immediately-following op overwrites N/Z.
auto opSetsNZ = [](unsigned Op) {
switch (Op) {
case W65816::LDAfi:
case W65816::LDAi16imm:
case W65816::LDAabs:
case W65816::ANDi16imm: case W65816::ANDabs:
case W65816::ORAi16imm: case W65816::ORAabs:
case W65816::EORi16imm: case W65816::EORabs:
case W65816::ADCi16imm: case W65816::ADCabs: case W65816::ADCfi:
case W65816::SBCi16imm: case W65816::SBCabs: case W65816::SBCfi:
case W65816::ADCEi16imm: case W65816::ADCEabs: case W65816::ADCEfi:
case W65816::SBCEi16imm: case W65816::SBCEabs: case W65816::SBCEfi:
case W65816::ASLA16: case W65816::LSRA16:
case W65816::ASLA8: case W65816::LSRA8:
return true;
default:
return false;
}
};
// Walk past further STAfi pseudos (spill stores) — they preserve
// A's flags. Only walk past STAfi specifically; STA_DP and
// pointer-indirect stores have inserter-driven expansions that
// can introduce flag-touching ops, and walking past those broke
// a printf smoke test.
auto NextIt = std::next(MI.getIterator());
while (NextIt != MBB.end()) {
if (NextIt->isDebugInstr()) { ++NextIt; continue; }
if (NextIt->getOpcode() == W65816::STAfi) { ++NextIt; continue; }
break;
}
if (NextIt == MBB.end() || NextIt->isBranch() || NextIt->isReturn())
return false;
if (!NextIt->definesRegister(W65816::P, TRI) &&
!opSetsNZ(NextIt->getOpcode()))
return false;
MI.eraseFromParent();
return true;