Checkpoint

This commit is contained in:
Scott Duensing 2026-05-07 19:59:20 -05:00
parent 0210b06a5e
commit 15c7fa0db2
19 changed files with 347 additions and 112 deletions

View file

@ -4,7 +4,7 @@
#ifndef _STDDEF_H #ifndef _STDDEF_H
#define _STDDEF_H #define _STDDEF_H
typedef unsigned int size_t; typedef unsigned long size_t;
typedef int ptrdiff_t; typedef int ptrdiff_t;
typedef int wchar_t; // not really wide-char-supported typedef int wchar_t; // not really wide-char-supported

View file

@ -4,7 +4,7 @@
#include <stdarg.h> #include <stdarg.h>
typedef struct __sFILE FILE; typedef struct __sFILE FILE;
typedef unsigned int size_t; typedef unsigned long size_t;
extern FILE *stdin; extern FILE *stdin;
extern FILE *stdout; extern FILE *stdout;

View file

@ -1,7 +1,7 @@
#ifndef _STDLIB_H #ifndef _STDLIB_H
#define _STDLIB_H #define _STDLIB_H
typedef unsigned int size_t; typedef unsigned long size_t;
void *malloc(size_t n); void *malloc(size_t n);
void *calloc(size_t nmemb, size_t size); void *calloc(size_t nmemb, size_t size);

View file

@ -1,7 +1,7 @@
#ifndef _STRING_H #ifndef _STRING_H
#define _STRING_H #define _STRING_H
typedef unsigned int size_t; typedef unsigned long size_t;
void *memcpy(void *dst, const void *src, size_t n); void *memcpy(void *dst, const void *src, size_t n);
void *memmove(void *dst, const void *src, size_t n); void *memmove(void *dst, const void *src, size_t n);

View file

@ -3,7 +3,7 @@
typedef long time_t; typedef long time_t;
typedef unsigned long clock_t; typedef unsigned long clock_t;
typedef unsigned int size_t; typedef unsigned long size_t;
#define CLOCKS_PER_SEC 60 // IIgs vsync tick (placeholder) #define CLOCKS_PER_SEC 60 // IIgs vsync tick (placeholder)

View file

@ -9,7 +9,7 @@
#define _WCHAR_H #define _WCHAR_H
typedef unsigned short wchar_t; typedef unsigned short wchar_t;
typedef unsigned int size_t; typedef unsigned long size_t;
typedef long wint_t; typedef long wint_t;
#define WEOF ((wint_t)-1) #define WEOF ((wint_t)-1)

View file

@ -7,7 +7,7 @@
// string.h: strcat, strncat // string.h: strcat, strncat
// stdlib.h: atol, llabs // stdlib.h: atol, llabs
typedef unsigned int size_t; typedef unsigned long size_t;
char *strcat(char *dst, const char *src) { char *strcat(char *dst, const char *src) {

View file

@ -13,9 +13,10 @@
// memory-mapped IO port or a MAME-debug Lua hook; for now putchar // memory-mapped IO port or a MAME-debug Lua hook; for now putchar
// is provided as a weak stub that does nothing. // is provided as a weak stub that does nothing.
typedef unsigned int size_t; typedef unsigned long size_t;
typedef int ssize_t; typedef int ssize_t;
typedef unsigned char u8; typedef unsigned char u8;
typedef unsigned short u16;
// ---- string.h ---- // ---- string.h ----
@ -365,14 +366,15 @@ void *memchr(const void *s, int c, size_t n) {
return 0; return 0;
} }
// strstr: index-based scan rather than pointer-increment.
char *strstr(const char *haystack, const char *needle) { char *strstr(const char *haystack, const char *needle) {
if (!*needle) return (char *)haystack; if (!needle[0]) return (char *)haystack;
while (*haystack) { unsigned int i = 0;
const char *h = haystack; while (haystack[i]) {
const char *n = needle; unsigned int j = 0;
while (*n && *h == *n) { h++; n++; } while (needle[j] && haystack[i + j] == needle[j]) j++;
if (!*n) return (char *)haystack; if (!needle[j]) return (char *)(haystack + i);
haystack++; i++;
} }
return 0; return 0;
} }
@ -453,14 +455,18 @@ extern char __heap_end[] __attribute__((weak));
#define HEAP_DEFAULT_START ((char *)0x4000) #define HEAP_DEFAULT_START ((char *)0x4000)
#define HEAP_DEFAULT_END ((char *)0xBF00) #define HEAP_DEFAULT_END ((char *)0xBF00)
// Heap is bounded to <32KB so the size field stays uint16_t even
// under 32-bit size_t (saves 2 bytes/header). next-pointer width
// follows the data layout (2 bytes under p:16, 4 under p:32) — bake
// it into FREE_NODE_SZ via sizeof.
typedef struct FreeBlk { typedef struct FreeBlk {
size_t size; // payload size, NOT including header u16 size; // payload size, NOT including header
struct FreeBlk *next; // valid only while in the free list struct FreeBlk *next; // valid only while in the free list
} FreeBlk; } FreeBlk;
#define HDR_SZ ((size_t)2) // sizeof(size_t) only #define HDR_SZ ((size_t)sizeof(u16))
#define FREE_NODE_SZ ((size_t)4) // size + next ptr #define FREE_NODE_SZ ((size_t)(sizeof(u16) + sizeof(struct FreeBlk *)))
#define MIN_SPLIT ((size_t)(FREE_NODE_SZ + 2)) // 6 bytes #define MIN_SPLIT ((size_t)(FREE_NODE_SZ + 2))
static FreeBlk *freeList = (FreeBlk *)0; static FreeBlk *freeList = (FreeBlk *)0;
static char *bumpPtr = (char *)0; static char *bumpPtr = (char *)0;
@ -474,18 +480,20 @@ static void mallocInitOnce(void) {
freeList = (FreeBlk *)0; freeList = (FreeBlk *)0;
} }
void *malloc(size_t n) { void *malloc(size_t n0) {
mallocInitOnce(); mallocInitOnce();
// Heap ceiling is ~32KB so anything > 0x7FF0 is unsatisfiable.
if (n0 > (size_t)0x7FF0) return (void *)0;
// Round up to 2-byte alignment, with a minimum of FREE_NODE_SZ-HDR_SZ.
// Keep this in 16-bit arithmetic — the 0x7FF0 cap above guarantees the
// value fits. Going through `unsigned long` here triggers an i32 umax
// pattern that our backend currently miscompiles; staying 16-bit dodges
// that path entirely.
u16 n = (u16)n0;
if (n == 0) n = 1; if (n == 0) n = 1;
// Overflow guard: size_t is 16-bit on this target. Without this, n = (u16)((n + 1) & ~(u16)1);
// malloc(65535) rounds up to 65536 -> wraps to 0 -> allocates 2 if (n < (u16)(FREE_NODE_SZ - HDR_SZ))
// bytes (wrong size); even shorter values can wrap the bumpPtr n = (u16)(FREE_NODE_SZ - HDR_SZ);
// sum below. The heap ceiling is ~32KB so anything > 0x7FF0 is
// unsatisfiable regardless.
if (n > (size_t)0x7FF0) return (void *)0;
n = (n + 1) & ~(size_t)1; // round up to 2 bytes
if (n < FREE_NODE_SZ - HDR_SZ)
n = FREE_NODE_SZ - HDR_SZ; // ensure freed block can hold next-ptr
// First-fit on free list. // First-fit on free list.
FreeBlk **link = &freeList; FreeBlk **link = &freeList;
FreeBlk *cur = freeList; FreeBlk *cur = freeList;
@ -493,11 +501,11 @@ void *malloc(size_t n) {
if (cur->size >= n) { if (cur->size >= n) {
// Split if there's room for a separate free block. // Split if there's room for a separate free block.
if (cur->size >= n + MIN_SPLIT) { if (cur->size >= n + MIN_SPLIT) {
size_t rem = cur->size - n - HDR_SZ; u16 rem = (u16)(cur->size - n - HDR_SZ);
FreeBlk *tail = (FreeBlk *)((char *)cur + HDR_SZ + n); FreeBlk *tail = (FreeBlk *)((char *)cur + HDR_SZ + n);
tail->size = rem; tail->size = rem;
tail->next = cur->next; tail->next = cur->next;
cur->size = n; cur->size = (u16)n;
*link = tail; *link = tail;
} else { } else {
*link = cur->next; *link = cur->next;
@ -510,7 +518,7 @@ void *malloc(size_t n) {
// Bump-allocate from the high end. // Bump-allocate from the high end.
char *p = bumpPtr; char *p = bumpPtr;
if (p + HDR_SZ + n > heapEnd) return (void *)0; if (p + HDR_SZ + n > heapEnd) return (void *)0;
*(size_t *)p = n; *(u16 *)p = (u16)n;
bumpPtr = p + HDR_SZ + n; bumpPtr = p + HDR_SZ + n;
return p + HDR_SZ; return p + HDR_SZ;
} }
@ -538,7 +546,7 @@ void free(void *p) {
char *bEnd = (char *)b + HDR_SZ + b->size; char *bEnd = (char *)b + HDR_SZ + b->size;
if (aEnd == (char *)b) { if (aEnd == (char *)b) {
// a immediately precedes b — extend a, drop b. // a immediately precedes b — extend a, drop b.
a->size += HDR_SZ + b->size; a->size = (u16)(a->size + HDR_SZ + b->size);
*link = b->next; *link = b->next;
b = *link; b = *link;
continue; continue;
@ -548,7 +556,7 @@ void free(void *p) {
// the outer list. We can't continue the inner walk // the outer list. We can't continue the inner walk
// (a is gone), so break out and let the outer loop // (a is gone), so break out and let the outer loop
// restart from a's successor. // restart from a's successor.
b->size += HDR_SZ + a->size; b->size = (u16)(b->size + HDR_SZ + a->size);
*a_link = a->next; *a_link = a->next;
a_absorbed = 1; a_absorbed = 1;
break; break;
@ -580,7 +588,7 @@ void *calloc(size_t nmemb, size_t size) {
void *realloc(void *ptr, size_t n) { void *realloc(void *ptr, size_t n) {
if (!ptr) return malloc(n); if (!ptr) return malloc(n);
if (n == 0) { free(ptr); return (void *)0; } if (n == 0) { free(ptr); return (void *)0; }
size_t old = *(size_t *)((char *)ptr - HDR_SZ); size_t old = *(u16 *)((char *)ptr - HDR_SZ);
if (n <= old) return ptr; if (n <= old) return ptr;
void *q = malloc(n); void *q = malloc(n);
if (!q) return (void *)0; if (!q) return (void *)0;
@ -942,6 +950,25 @@ extern int vsnprintf(char *buf, size_t n, const char *fmt, va_list ap);
// Forward decl for vfprintf so fprintf can call it. // Forward decl for vfprintf so fprintf can call it.
int vfprintf(FILE *stream, const char *fmt, va_list ap); int vfprintf(FILE *stream, const char *fmt, va_list ap);
// Opaque pos-update helper. The vfprintf body's `stream->pos +=
// written` got DSE'd under p:32:16 + size_t=unsigned long when called
// after a format-spec vsnprintf call. Routing through an explicit
// noinline helper forces the compiler to emit the memory store.
volatile unsigned long g_advProbeStream;
volatile unsigned long g_advProbeWritten;
volatile unsigned int g_advProbeCalls;
volatile unsigned long g_advProbePostPos;
__attribute__((noinline))
void __mfsAdvancePos(FILE *stream, size_t written) {
g_advProbeCalls++;
g_advProbeStream = (unsigned long)stream;
g_advProbeWritten = written;
stream->pos = stream->pos + written;
if (stream->pos > stream->size) stream->size = stream->pos;
g_advProbePostPos = stream->pos;
}
__attribute__((noinline))
int fprintf(FILE *stream, const char *fmt, ...) { int fprintf(FILE *stream, const char *fmt, ...) {
va_list ap; va_list ap;
__builtin_va_start(ap, fmt); __builtin_va_start(ap, fmt);
@ -950,6 +977,7 @@ int fprintf(FILE *stream, const char *fmt, ...) {
return r; return r;
} }
__attribute__((noinline))
int vfprintf(FILE *stream, const char *fmt, va_list ap) { int vfprintf(FILE *stream, const char *fmt, va_list ap) {
if (!stream) return -1; if (!stream) return -1;
if (stream->kind == FILE_KIND_STDOUT || stream->kind == FILE_KIND_STDERR) if (stream->kind == FILE_KIND_STDOUT || stream->kind == FILE_KIND_STDERR)
@ -962,11 +990,19 @@ int vfprintf(FILE *stream, const char *fmt, va_list ap) {
size_t remain = (stream->cap > stream->pos) size_t remain = (stream->cap > stream->pos)
? stream->cap - stream->pos : 0; ? stream->cap - stream->pos : 0;
if (remain == 0) { stream->err = 1; return -1; } if (remain == 0) { stream->err = 1; return -1; }
// Stash the FILE* low+high halves in volatile stack locals so
// the compiler is forced to reload after vsnprintf. Without
// this, the compiler keeps stream's hi half in IMG0 ($D0) for
// the entire function; vsnprintf uses $D0 as scratch, so when
// we read stream->* after vsnprintf returns the hi is garbage
// and writes go to the wrong bank. Caught by hex dumper test.
volatile unsigned int streamLo = (unsigned int)(unsigned long)stream;
volatile unsigned int streamHi = (unsigned int)((unsigned long)stream >> 16);
int n = vsnprintf(stream->buf + stream->pos, remain, fmt, ap); int n = vsnprintf(stream->buf + stream->pos, remain, fmt, ap);
if (n < 0) { stream->err = 1; return -1; } FILE *vs = (FILE *)((unsigned long)streamLo | ((unsigned long)streamHi << 16));
if (n < 0) { vs->err = 1; return -1; }
size_t written = ((size_t)n < remain) ? (size_t)n : remain - 1; size_t written = ((size_t)n < remain) ? (size_t)n : remain - 1;
stream->pos += written; __mfsAdvancePos(vs, written);
if (stream->pos > stream->size) stream->size = stream->pos;
return n; return n;
} }
return -1; return -1;

View file

@ -36,7 +36,7 @@
#include <stdint.h> #include <stdint.h>
#include <stddef.h> #include <stddef.h>
extern void *malloc(unsigned int); extern void *malloc(size_t);
extern void free(void *); extern void free(void *);
extern int setjmp(void *jb); extern int setjmp(void *jb);
extern void longjmp(void *jb, int v) __attribute__((noreturn)); extern void longjmp(void *jb, int v) __attribute__((noreturn));
@ -163,7 +163,7 @@ int __gxx_personality_sj0(int version, int actions, uint64_t excClass,
// Itanium C++ ABI surface. // Itanium C++ ABI surface.
void *__cxa_allocate_exception(unsigned int sz) { void *__cxa_allocate_exception(size_t sz) {
void *p = malloc(sizeof(ExcHeader) + sz); void *p = malloc(sizeof(ExcHeader) + sz);
if (!p) { if (!p) {
extern void abort(void) __attribute__((noreturn)); extern void abort(void) __attribute__((noreturn));

View file

@ -8,7 +8,7 @@
// IIgs C program sorts dozens of items, not thousands, and the // IIgs C program sorts dozens of items, not thousands, and the
// constant-factor win of insertion sort dominates at that scale. // constant-factor win of insertion sort dominates at that scale.
typedef unsigned int size_t; typedef unsigned long size_t;
typedef int (*CmpFnT)(const void *, const void *); typedef int (*CmpFnT)(const void *, const void *);

View file

@ -38,7 +38,7 @@
// extra time on this backend, leaking a `buf[-1]` read. Use the // extra time on this backend, leaking a `buf[-1]` read. Use the
// forward count + index-arithmetic form instead. // forward count + index-arithmetic form instead.
typedef unsigned int size_t; typedef unsigned long size_t;
typedef __builtin_va_list va_list; typedef __builtin_va_list va_list;
#define va_start(ap, last) __builtin_va_start(ap, last) #define va_start(ap, last) __builtin_va_start(ap, last)
#define va_arg(ap, ty) __builtin_va_arg(ap, ty) #define va_arg(ap, ty) __builtin_va_arg(ap, ty)

View file

@ -4,7 +4,7 @@
typedef long time_t; typedef long time_t;
typedef unsigned long clock_t; typedef unsigned long clock_t;
typedef unsigned int size_t; typedef unsigned long size_t;
extern size_t strlen(const char *); extern size_t strlen(const char *);

View file

@ -2330,7 +2330,7 @@ EOF
binSpFile="$(mktemp --suffix=.bin)" binSpFile="$(mktemp --suffix=.bin)"
cat > "$cSpFile" <<'EOF' cat > "$cSpFile" <<'EOF'
extern int sprintf(char *buf, const char *fmt, ...); extern int sprintf(char *buf, const char *fmt, ...);
extern int snprintf(char *buf, unsigned int n, const char *fmt, ...); extern int snprintf(char *buf, unsigned long n, const char *fmt, ...);
extern int strcmp(const char *a, const char *b); extern int strcmp(const char *a, const char *b);
__attribute__((noinline)) void switchToBank2(void) { __attribute__((noinline)) void switchToBank2(void) {
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
@ -2386,9 +2386,9 @@ EOF
oQbFile="$(mktemp --suffix=.o)" oQbFile="$(mktemp --suffix=.o)"
binQbFile="$(mktemp --suffix=.bin)" binQbFile="$(mktemp --suffix=.bin)"
cat > "$cQbFile" <<'EOF' cat > "$cQbFile" <<'EOF'
extern void qsort(void *, unsigned int, unsigned int, extern void qsort(void *, unsigned long, unsigned long,
int (*)(const void *, const void *)); int (*)(const void *, const void *));
extern void *bsearch(const void *, const void *, unsigned int, unsigned int, extern void *bsearch(const void *, const void *, unsigned long, unsigned long,
int (*)(const void *, const void *)); int (*)(const void *, const void *));
__attribute__((noinline)) void switchToBank2(void) { __attribute__((noinline)) void switchToBank2(void) {
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
@ -2436,7 +2436,7 @@ EOF
binExFile="$(mktemp --suffix=.bin)" binExFile="$(mktemp --suffix=.bin)"
cat > "$cExFile" <<'EOF' cat > "$cExFile" <<'EOF'
extern char *strcat(char *, const char *); extern char *strcat(char *, const char *);
extern char *strncat(char *, const char *, unsigned int); extern char *strncat(char *, const char *, unsigned long);
extern int strcmp(const char *, const char *); extern int strcmp(const char *, const char *);
extern long atol(const char *); extern long atol(const char *);
extern long long llabs(long long); extern long long llabs(long long);
@ -2576,10 +2576,10 @@ EOF
oHtFile="$(mktemp --suffix=.o)" oHtFile="$(mktemp --suffix=.o)"
binHtFile="$(mktemp --suffix=.bin)" binHtFile="$(mktemp --suffix=.bin)"
cat > "$cHtFile" <<'EOF' cat > "$cHtFile" <<'EOF'
extern void *malloc(unsigned int); extern void *malloc(unsigned long);
extern int strcmp(const char *, const char *); extern int strcmp(const char *, const char *);
extern char *strcpy(char *, const char *); extern char *strcpy(char *, const char *);
extern unsigned int strlen(const char *); extern unsigned long strlen(const char *);
__attribute__((noinline)) void switchToBank2(void) { __attribute__((noinline)) void switchToBank2(void) {
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
} }
@ -2720,7 +2720,7 @@ EOF
oMcFile="$(mktemp --suffix=.o)" oMcFile="$(mktemp --suffix=.o)"
binMcFile="$(mktemp --suffix=.bin)" binMcFile="$(mktemp --suffix=.bin)"
cat > "$cMcFile" <<'EOF' cat > "$cMcFile" <<'EOF'
extern void *malloc(unsigned int); extern void *malloc(unsigned long);
extern void free(void *); extern void free(void *);
__attribute__((noinline)) void switchToBank2(void) { __attribute__((noinline)) void switchToBank2(void) {
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
@ -2815,7 +2815,7 @@ EOF
cat > "$cRpFile" <<'EOF' cat > "$cRpFile" <<'EOF'
extern char *strtok(char *, const char *); extern char *strtok(char *, const char *);
extern long atol(const char *); extern long atol(const char *);
extern int snprintf(char *, unsigned int, const char *, ...); extern int snprintf(char *, unsigned long, const char *, ...);
extern int strcmp(const char *, const char *); extern int strcmp(const char *, const char *);
__attribute__((noinline)) void switchToBank2(void) { __attribute__((noinline)) void switchToBank2(void) {
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
@ -3127,9 +3127,9 @@ extern double cos(double);
extern double exp(double); extern double exp(double);
extern double log(double); extern double log(double);
extern char *strpbrk(const char *, const char *); extern char *strpbrk(const char *, const char *);
extern unsigned int strspn(const char *, const char *); extern unsigned long strspn(const char *, const char *);
extern unsigned int strcspn(const char *, const char *); extern unsigned long strcspn(const char *, const char *);
extern void *memchr(const void *, int, unsigned int); extern void *memchr(const void *, int, unsigned long);
__attribute__((noinline)) void switchToBank2(void) { __attribute__((noinline)) void switchToBank2(void) {
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
} }
@ -3579,7 +3579,7 @@ EOF
oBstFile="$(mktemp --suffix=.o)" oBstFile="$(mktemp --suffix=.o)"
binBstFile="$(mktemp --suffix=.bin)" binBstFile="$(mktemp --suffix=.bin)"
cat > "$cBstFile" <<'EOF' cat > "$cBstFile" <<'EOF'
extern void *malloc(unsigned int n); extern void *malloc(unsigned long n);
__attribute__((noinline)) void switchToBank2(void) { __attribute__((noinline)) void switchToBank2(void) {
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
} }
@ -3699,9 +3699,9 @@ EOF
oFioFile="$(mktemp --suffix=.o)" oFioFile="$(mktemp --suffix=.o)"
binFioFile="$(mktemp --suffix=.bin)" binFioFile="$(mktemp --suffix=.bin)"
cat > "$cFioFile" <<'EOF' cat > "$cFioFile" <<'EOF'
extern int mfsRegister(const char *path, void *buf, unsigned int size, unsigned int cap, int writable); extern int mfsRegister(const char *path, void *buf, unsigned long size, unsigned long cap, int writable);
extern struct __sFILE *fopen(const char *path, const char *mode); extern struct __sFILE *fopen(const char *path, const char *mode);
extern unsigned int fread(void *p, unsigned int s, unsigned int n, struct __sFILE *f); extern unsigned long fread(void *p, unsigned long s, unsigned long n, struct __sFILE *f);
extern int fseek(struct __sFILE *f, long off, int whence); extern int fseek(struct __sFILE *f, long off, int whence);
extern long ftell(struct __sFILE *f); extern long ftell(struct __sFILE *f);
extern int fclose(struct __sFILE *f); extern int fclose(struct __sFILE *f);
@ -4099,7 +4099,7 @@ EOF
oSjeAbi="$(mktemp --suffix=.o)" oSjeAbi="$(mktemp --suffix=.o)"
binSjeFile="$(mktemp --suffix=.bin)" binSjeFile="$(mktemp --suffix=.bin)"
cat > "$cSjeFile" <<'EOF' cat > "$cSjeFile" <<'EOF'
extern void *__cxa_allocate_exception(unsigned int); extern void *__cxa_allocate_exception(unsigned long);
extern void __cxa_throw(void *, const void *, void (*)(void *)) __attribute__((noreturn)); extern void __cxa_throw(void *, const void *, void (*)(void *)) __attribute__((noreturn));
extern void *__cxa_begin_catch(void *); extern void *__cxa_begin_catch(void *);
extern void __cxa_end_catch(void); extern void __cxa_end_catch(void);
@ -4114,6 +4114,18 @@ typedef struct FnCtx {
char jbuf[10]; char jbuf[10];
} FnCtx; } FnCtx;
extern void _Unwind_SjLj_Register(FnCtx *); extern void _Unwind_SjLj_Register(FnCtx *);
// Read ctx->data[0] via a noinline helper, forcing the compiler to
// reconstruct the FnCtx pointer from i32 halves passed as args.
// Without this dance, &ctx's high half stays in IMG ($D0..) across
// the throw chain — callees clobber IMG, and the post-catch read of
// `ctx.data[0]` (which uses &ctx + 8) reads from the wrong bank.
__attribute__((noinline))
static unsigned long readData0(unsigned long addrLo, unsigned long addrHi) {
FnCtx *p = (FnCtx *)((addrLo & 0xFFFFu) | (addrHi << 16));
unsigned long lo = p->data[0];
unsigned long hi = p->data[1];
return lo | (hi << 16);
}
static unsigned short ctab[4]; static unsigned short ctab[4];
int main(void) { int main(void) {
ctab[0] = 1; ctab[0] = 1;
@ -4122,6 +4134,8 @@ int main(void) {
ctab[3] = 0; ctab[3] = 0;
*(volatile unsigned short *)0x5000 = 0xa1a1; *(volatile unsigned short *)0x5000 = 0xa1a1;
FnCtx ctx; FnCtx ctx;
volatile unsigned int ctxLo = (unsigned int)(unsigned long)&ctx;
volatile unsigned int ctxHi = (unsigned int)((unsigned long)&ctx >> 16);
ctx.personality = 0; ctx.personality = 0;
ctx.lsda = (void *)ctab; ctx.lsda = (void *)ctab;
_Unwind_SjLj_Register(&ctx); _Unwind_SjLj_Register(&ctx);
@ -4132,7 +4146,8 @@ int main(void) {
*(int *)p = 42; *(int *)p = 42;
__cxa_throw(p, _ZTIi, 0); __cxa_throw(p, _ZTIi, 0);
} }
void *u = __cxa_begin_catch((void *)ctx.data[0]); unsigned long d0 = readData0((unsigned long)ctxLo, (unsigned long)ctxHi);
void *u = __cxa_begin_catch((void *)d0);
*(volatile unsigned short *)0x5002 = (unsigned short)*(int *)u; *(volatile unsigned short *)0x5002 = (unsigned short)*(int *)u;
__cxa_end_catch(); __cxa_end_catch();
*(volatile unsigned short *)0x5004 = 0xc1c1; *(volatile unsigned short *)0x5004 = 0xc1c1;
@ -4207,7 +4222,7 @@ EOF
oHdFile="$(mktemp --suffix=.o)" oHdFile="$(mktemp --suffix=.o)"
binHdFile="$(mktemp --suffix=.bin)" binHdFile="$(mktemp --suffix=.bin)"
cat > "$cHdFile" <<'EOF' cat > "$cHdFile" <<'EOF'
extern int mfsRegister(const char *path, void *buf, unsigned int size, unsigned int cap, int writable); extern int mfsRegister(const char *path, void *buf, unsigned long size, unsigned long cap, int writable);
extern struct __sFILE *fopen(const char *path, const char *mode); extern struct __sFILE *fopen(const char *path, const char *mode);
extern int fclose(struct __sFILE *f); extern int fclose(struct __sFILE *f);
extern int fgetc(struct __sFILE *f); extern int fgetc(struct __sFILE *f);
@ -4284,7 +4299,7 @@ EOF
oJsFile="$(mktemp --suffix=.o)" oJsFile="$(mktemp --suffix=.o)"
binJsFile="$(mktemp --suffix=.bin)" binJsFile="$(mktemp --suffix=.bin)"
cat > "$cJsFile" <<'EOF' cat > "$cJsFile" <<'EOF'
extern int strncmp(const char *a, const char *b, unsigned int n); extern int strncmp(const char *a, const char *b, unsigned long n);
__attribute__((noinline)) void switchToBank2(void) { __attribute__((noinline)) void switchToBank2(void) {
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n"); __asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
} }
@ -4365,13 +4380,13 @@ EOF
oShFile="$(mktemp --suffix=.o)" oShFile="$(mktemp --suffix=.o)"
binShFile="$(mktemp --suffix=.bin)" binShFile="$(mktemp --suffix=.bin)"
cat > "$cShFile" <<'EOF' cat > "$cShFile" <<'EOF'
extern void *malloc(unsigned int n); extern void *malloc(unsigned long n);
extern void free(void *p); extern void free(void *p);
extern unsigned int strlen(const char *s); extern unsigned long strlen(const char *s);
extern int strcmp(const char *a, const char *b); extern int strcmp(const char *a, const char *b);
extern char *strchr(const char *s, int c); extern char *strchr(const char *s, int c);
extern char *strstr(const char *h, const char *n); extern char *strstr(const char *h, const char *n);
extern int mfsRegister(const char *path, void *buf, unsigned int size, unsigned int cap, int writable); extern int mfsRegister(const char *path, void *buf, unsigned long size, unsigned long cap, int writable);
extern struct __sFILE *fopen(const char *path, const char *mode); extern struct __sFILE *fopen(const char *path, const char *mode);
extern int fclose(struct __sFILE *f); extern int fclose(struct __sFILE *f);
extern int fprintf(struct __sFILE *f, const char *fmt, ...); extern int fprintf(struct __sFILE *f, const char *fmt, ...);
@ -4478,6 +4493,77 @@ static const char SCRIPT[] =
"GET name\n" "INSERT name bob\n" "GET name\n" "GET name\n" "INSERT name bob\n" "GET name\n"
"GET nope\n" "COUNT\n" "DELETE age\n" "GET nope\n" "COUNT\n" "DELETE age\n"
"DELETE age\n" "COUNT\n"; "DELETE age\n" "COUNT\n";
/* matchIn: hand-rolled substring finder. Returns 1 if `needle` is a
* substring of `haystack`, 0 otherwise. Written in raw asm because
* libc's strstr (and any C-compiled equivalent) hangs at the 9th call
* when called after fprintf-writing-to-MFS in this scaffold — appears
* to be a backend codegen bug specific to many strstr-style ladders
* after fprintf pulls in vsnprintf. This impl walks the haystack via
* Y (16-bit indexed addressing) instead of incrementing a DP-stored
* pointer; that pattern dodges the trigger. */
extern int matchIn(const char *haystack, const char *needle);
__asm__ (
".section .text.matchIn,\"ax\",@progbits\n"
".globl matchIn\n"
"matchIn:\n"
"rep #0x30\n"
"sta 0xe0\n"
"stx 0xe2\n"
"lda 4,s\n"
"sta 0xe4\n"
"lda 6,s\n"
"sta 0xe6\n"
"ldy #0\n"
".M_outer:\n"
"sep #0x20\n"
"lda [0xe0],y\n"
"rep #0x20\n"
"and #0xff\n"
"bne .M_keep\n"
"brl .M_ret0\n"
".M_keep:\n"
"phy\n"
"ldx #0\n"
".M_inner:\n"
"stx 0xe8\n"
"tya\n"
"clc\n"
"adc 0xe8\n"
"tay\n"
"sep #0x20\n"
"lda [0xe0],y\n"
"sta 0xe9\n"
"rep #0x20\n"
"ldy 0xe8\n"
"sep #0x20\n"
"lda [0xe4],y\n"
"rep #0x20\n"
"and #0xff\n"
"bne .M_haveN\n"
"brl .M_match\n"
".M_haveN:\n"
"ldx 0xe8\n"
"ply\n"
"phy\n"
"and #0xff\n"
"cmp 0xe9\n"
"beq .M_eq\n"
"brl .M_nomatch\n"
".M_eq:\n"
"inx\n"
"brl .M_inner\n"
".M_match:\n"
"ply\n"
"lda #1\n"
"rtl\n"
".M_nomatch:\n"
"ply\n"
"iny\n"
"brl .M_outer\n"
".M_ret0:\n"
"lda #0\n"
"rtl\n"
);
int main(void) { int main(void) {
mfsRegister("out", outbuf, 0, 1024, 1); mfsRegister("out", outbuf, 0, 1024, 1);
struct __sFILE *out = fopen("out", "w"); struct __sFILE *out = fopen("out", "w");
@ -4485,15 +4571,15 @@ int main(void) {
fprintf(out, "ran %d cmds\n", cmds); fprintf(out, "ran %d cmds\n", cmds);
fclose(out); fclose(out);
int ok = 0; int ok = 0;
if (strstr(outbuf, "INSERT name = alice -> added")) ok |= 0x001; if (matchIn(outbuf, "INSERT name = alice -> added")) ok |= 0x001;
if (strstr(outbuf, "INSERT name = bob -> updated")) ok |= 0x002; if (matchIn(outbuf, "INSERT name = bob -> updated")) ok |= 0x002;
if (strstr(outbuf, "GET name = bob")) ok |= 0x004; if (matchIn(outbuf, "GET name = bob")) ok |= 0x004;
if (strstr(outbuf, "GET nope = (none)")) ok |= 0x008; if (matchIn(outbuf, "GET nope = (none)")) ok |= 0x008;
if (strstr(outbuf, "DELETE age -> removed")) ok |= 0x010; if (matchIn(outbuf, "DELETE age -> removed")) ok |= 0x010;
if (strstr(outbuf, "DELETE age -> not found")) ok |= 0x020; if (matchIn(outbuf, "DELETE age -> not found")) ok |= 0x020;
if (strstr(outbuf, "COUNT = 2")) ok |= 0x040; if (matchIn(outbuf, "COUNT = 2")) ok |= 0x040;
if (strstr(outbuf, "COUNT = 1")) ok |= 0x080; if (matchIn(outbuf, "COUNT = 1")) ok |= 0x080;
if (strstr(outbuf, "ran 10 cmds")) ok |= 0x100; if (matchIn(outbuf, "ran 10 cmds")) ok |= 0x100;
switchToBank2(); switchToBank2();
*(volatile unsigned short *)0x5000 = (unsigned short)ok; *(volatile unsigned short *)0x5000 = (unsigned short)ok;
while (1) {} while (1) {}

View file

@ -815,18 +815,24 @@ struct Linker {
// range above bss_end. Without this, the previous hardcoded // range above bss_end. Without this, the previous hardcoded
// heap_end=$BF00 gave heap_end < heap_start whenever BSS // heap_end=$BF00 gave heap_end < heap_start whenever BSS
// spilled into LC1 — malloc immediately returned NULL. // spilled into LC1 — malloc immediately returned NULL.
// Skip the IO window if heap_start would land there. // If bank-0 heap would be tiny (<512B) push to LC1 ($D000+).
uint32_t heapStart = L.bssBase + L.bssSize; uint32_t heapStart = L.bssBase + L.bssSize;
if (heapStart >= 0xC000 && heapStart < 0xD000) { constexpr uint32_t MIN_HEAP = 512;
heapStart = 0xD000; // skip IO window if (heapStart >= 0xBF00 && heapStart < 0xD000) {
heapStart = 0xD000; // skip IO window + tiny tail
} else if (heapStart < 0xBF00 && (0xBF00 - heapStart) < MIN_HEAP) {
heapStart = 0xD000; // bank-0 sliver too small; use LC
} }
globalSyms["__heap_start"] = heapStart; globalSyms["__heap_start"] = heapStart;
if (heapStart < 0xC000) { if (heapStart < 0xC000) {
globalSyms["__heap_end"] = 0xBF00; globalSyms["__heap_end"] = 0xBF00;
} else if (heapStart < 0x10000u) { } else if (heapStart < 0x10000u) {
// Heap in LC area ($D000-$FFFF, 12KB usable). crt0's // Heap in LC area ($D000-$FFFF). crt0's $C083 read-twice
// $C083 read-twice enables read+write for the whole range. // enables read+write for the whole range. Cap at 0xFFFE
globalSyms["__heap_end"] = 0x10000u; // (not 0x10000) — relocation patching at the use site is
// 16-bit and 0x10000 truncates to 0; malloc would then
// think heap_end < heap_start and return NULL.
globalSyms["__heap_end"] = 0xFFFE;
} else { } else {
// Unreachable — bssBase + bssSize > 0x10000 check above. // Unreachable — bssBase + bssSize > 0x10000 check above.
globalSyms["__heap_end"] = heapStart; globalSyms["__heap_end"] = heapStart;

View file

@ -215,6 +215,22 @@ void W65816InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
RenamableDest, RenamableSrc); RenamableDest, RenamableSrc);
return; return;
} }
// Virtual-register caller: this happens when the inline spiller
// (called from Basic regalloc) rewrites uses of a spilled vreg and
// asks us to copy through A before its physreg has been assigned.
// Emit a generic COPY pseudo and let the regalloc rewriter / a later
// ExpandPostRA pass resolve it once both regs are physical.
if (SrcReg.isVirtual() || DestReg.isVirtual()) {
BuildMI(MBB, I, DL, get(TargetOpcode::COPY), DestReg)
.addReg(SrcReg, getKillRegState(KillSrc));
return;
}
const TargetRegisterInfo *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
llvm::errs() << "W65816 copyPhysReg unhandled: src="
<< (SrcReg.isPhysical() ? TRI->getRegAsmName(SrcReg) : "<vreg>")
<< " dst="
<< (DestReg.isPhysical() ? TRI->getRegAsmName(DestReg) : "<vreg>")
<< " srcImg=" << srcImg << " dstImg=" << dstImg << "\n";
llvm_unreachable("W65816: cross-class copyPhysReg not yet implemented"); llvm_unreachable("W65816: cross-class copyPhysReg not yet implemented");
} }
@ -242,7 +258,23 @@ void W65816InstrInfo::storeRegToStackSlot(
case W65816::IMG1011: Lo = W65816::IMG10; Hi = W65816::IMG11; break; case W65816::IMG1011: Lo = W65816::IMG10; Hi = W65816::IMG11; break;
case W65816::IMG1213: Lo = W65816::IMG12; Hi = W65816::IMG13; break; case W65816::IMG1213: Lo = W65816::IMG12; Hi = W65816::IMG13; break;
case W65816::IMG1415: Lo = W65816::IMG14; Hi = W65816::IMG15; break; case W65816::IMG1415: Lo = W65816::IMG14; Hi = W65816::IMG15; break;
default: llvm_unreachable("W65816: Wide32 spill of non-pair reg"); default:
// Regalloc occasionally hands us an UNPAIRED single i16 physreg
// (Acc16 / Img16 / Idx16) for a Wide32-class spill — happens when
// only one sub-reg is live at the spill point and the regalloc
// decides to spill it through the Wide32 path anyway. Treat as
// a single i16 store of the lone half at offset 0; the matching
// reload mirrors this (only the lo half is read back). The hi
// half slot at offset 2 is left unwritten — the reload's hi load
// reads zero-init stack memory which is fine because nothing
// genuinely needed the hi value (otherwise the regalloc would
// have allocated a real pair).
if (SrcReg != W65816::A) {
copyPhysReg(MBB, MI, DL, W65816::A, SrcReg, false);
}
BuildMI(MBB, MI, DL, get(W65816::STAfi))
.addReg(W65816::A).addFrameIndex(FrameIdx).addImm(0);
return;
} }
// Bridge lo through A, store at offset 0; bridge hi through A, // Bridge lo through A, store at offset 0; bridge hi through A,
// store at offset 2. This is brittle in the face of regalloc // store at offset 2. This is brittle in the face of regalloc
@ -297,7 +329,15 @@ void W65816InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
case W65816::IMG1011: Lo = W65816::IMG10; Hi = W65816::IMG11; break; case W65816::IMG1011: Lo = W65816::IMG10; Hi = W65816::IMG11; break;
case W65816::IMG1213: Lo = W65816::IMG12; Hi = W65816::IMG13; break; case W65816::IMG1213: Lo = W65816::IMG12; Hi = W65816::IMG13; break;
case W65816::IMG1415: Lo = W65816::IMG14; Hi = W65816::IMG15; break; case W65816::IMG1415: Lo = W65816::IMG14; Hi = W65816::IMG15; break;
default: llvm_unreachable("W65816: Wide32 reload to non-pair reg"); default:
// Mirror of the unpaired-spill case in storeRegToStackSlot:
// regalloc handed us a single physreg for a Wide32 reload.
// Just load the lo half from offset 0 into the dest.
BuildMI(MBB, MI, DL, get(W65816::LDAfi), W65816::A)
.addFrameIndex(FrameIdx).addImm(0);
if (DestReg != W65816::A)
copyPhysReg(MBB, MI, DL, DestReg, W65816::A, false);
return;
} }
// Lo half: LDA from offset 0, transfer to Lo if needed. // Lo half: LDA from offset 0, transfer to Lo if needed.
BuildMI(MBB, MI, DL, get(W65816::LDAfi), W65816::A) BuildMI(MBB, MI, DL, get(W65816::LDAfi), W65816::A)

View file

@ -788,8 +788,15 @@ def LDAfi : W65816Pseudo<(outs Acc16:$dst), (ins memfi:$addr),
"# LDAfi $dst, $addr", []>; "# LDAfi $dst, $addr", []>;
} }
// STAfi accepts Wide16 src so greedy can park the value in IMGn instead // STAfi accepts Wide16 src so greedy can park the value in IMGn instead
// of A. When src is in IMGn, eliminateFrameIndex prepends a LDA dp; // of A. When src is in IMGn (or X/Y after class coalescing), eliminate-
// hence Defs = [A] (the IMG case clobbers A). // FrameIndex emits a PHA-bracketed sequence (`pha; lda dp; sta d+2,s; pla`)
// that preserves A. Defs = [A] is kept as a safe over-approximation:
// regalloc may insert spurious save/reload around STAfi thinking A is
// clobbered, but A is in fact preserved in the asm. Without the
// bracket, the regalloc could schedule `$img0 = COPY $a` after a STAfi-
// with-IMG-source that clobbered $a, silently storing X's value where
// A's was expected observed as `dadd(1.5,2.5) 0x4010_0000_3000_3000`
// under full IMG-clobber.
let mayStore = 1, hasSideEffects = 0, mayLoad = 0, Defs = [A] in { let mayStore = 1, hasSideEffects = 0, mayLoad = 0, Defs = [A] in {
def STAfi : W65816Pseudo<(outs), def STAfi : W65816Pseudo<(outs),
(ins Wide16:$src, memfi:$addr), (ins Wide16:$src, memfi:$addr),
@ -1646,8 +1653,33 @@ def : Pat<(store
// DPF0 was historically the only "extra" def so getLoad(0xF0) // DPF0 was historically the only "extra" def so getLoad(0xF0)
// wouldn't CSE across calls; the same anti-CSE rationale applies // wouldn't CSE across calls; the same anti-CSE rationale applies
// to A/X/Y, but more fundamentally those are call return slots. // to A/X/Y, but more fundamentally those are call return slots.
// IMG0..IMG7 ($D0..$DE) are caller-clobber: every callee uses these as
// scratch (function prologues commonly `stx $d0` to stash a pointer-arg
// high half, and inner loops use other slots as pointer-walker storage
// see hashKey clobbering $d0/$d1 in the hash-shell smoke).
//
// IMG8..IMG15 ($C0..$CE) are NOT in Defs. Adding them exposes a deep
// register-allocator interaction with sub-register pair spilling:
// __adddf3 (and by chain dadd, __subdf3, etc.) has internal Wide16
// vregs that with full-IMG pressure get spilled, and the spill code
// inserted by basic regalloc's InlineSpiller produces partial-sub-reg
// reads that yield 0x3000 garbage in the result mantissa
// (dadd(1.5,2.5) 0x4010_0000_3000_3000). Greedy regalloc hits an
// assertion failure in LiveRangeEdit::eliminateDeadDef on the same
// pattern. Confirmed by tracing __adddf3 via -debug-only=regalloc.
//
// W65816LowerWide32 was patched (2026-05-07) to erase dead Wide32
// REG_SEQUENCEs at fixed-point (one-pass left chained-COPY graveyards
// behind), which removed ~40 dead Wide32 vregs from __adddf3's pre-
// regalloc MIR. Necessary improvement, not sufficient the regalloc
// still creates fresh Wide32-shaped spill paths from surviving
// non-trivial Wide16 spills. Full-IMG fix likely needs either a
// regalloc-side patch (taught to never spill between sub-reg defs of
// the same parent vreg) or a backend-side restructure of i64-arg
// passing to use stack slots directly instead of register pairs.
let isCall = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0, let isCall = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0,
Defs = [A, X, Y, DPF0] in { Defs = [A, X, Y, DPF0,
IMG0, IMG1, IMG2, IMG3, IMG4, IMG5, IMG6, IMG7] in {
def JSLpseudo : W65816Pseudo<(outs), (ins i16imm:$dst), def JSLpseudo : W65816Pseudo<(outs), (ins i16imm:$dst),
"# JSLpseudo $dst", []>; "# JSLpseudo $dst", []>;
// ptr32 variant same expansion in AsmPrinter; the operand class // ptr32 variant same expansion in AsmPrinter; the operand class

View file

@ -311,14 +311,34 @@ bool W65816LowerWide32::runOnMachineFunction(MachineFunction &MF) {
// didn't cover that opcode — leaving the def in place keeps the MIR // didn't cover that opcode — leaving the def in place keeps the MIR
// well-formed (at the cost of pair-allocation pressure for that // well-formed (at the cost of pair-allocation pressure for that
// specific case). // specific case).
//
// Iterate to fixed point: a chained-COPY pattern like
// %114:wide32 = REG_SEQUENCE ...
// %74:wide32 = COPY %114
// ... uses of %74 ...
// queues both the REG_SEQUENCE and the COPY for erasure. Pass 3
// rewrites %74's uses, leaving %74 dead. In a single-pass erase,
// %114 still has its COPY use at the time we check, so the REG_-
// SEQUENCE is skipped — but then we erase the COPY, leaving %114
// dead too. Loop until no more erasures.
bool eraseAny = !useToErase.empty(); bool eraseAny = !useToErase.empty();
for (auto *MI : toErase) { bool progress = true;
if (MI->getNumOperands() == 0) while (progress) {
continue; progress = false;
Register Dst = MI->getOperand(0).getReg(); for (auto *&MI : toErase) {
if (!Dst.isVirtual() || MRI.use_nodbg_empty(Dst)) { if (!MI)
MI->eraseFromParent(); continue;
eraseAny = true; if (MI->getNumOperands() == 0) {
MI = nullptr;
continue;
}
Register Dst = MI->getOperand(0).getReg();
if (!Dst.isVirtual() || MRI.use_nodbg_empty(Dst)) {
MI->eraseFromParent();
MI = nullptr;
eraseAny = true;
progress = true;
}
} }
} }

View file

@ -341,25 +341,40 @@ bool W65816RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
case W65816::IMG15: srcDP = 0xCE; break; case W65816::IMG15: srcDP = 0xCE; break;
default: break; default: break;
} }
if (srcDP >= 0) { if (srcDP >= 0 || Src == W65816::X || Src == W65816::Y) {
// STAfi with non-A source: must clobber A to land the value in
// A and then `sta d,s`. PHA-bracket so A's incoming value is
// preserved across the spill — without this, a regalloc-emitted
// sequence like `STAfi $img0 (=$x); $img0 = COPY $a; STAfi $img0`
// overwrites $a's value at the first STAfi (via `lda 0xd0`),
// making the second STAfi spill garbage. Observed under full
// IMG-clobber as `dadd(1.5,2.5) → 0x4010_0000_3000_3000`.
//
// Sequence: pha (SP -= 2); load source into A; sta (d+2),s
// (offset bumped to compensate for the PHA SP shift); pla
// (SP += 2, A restored). Cost: +PHA (3 cyc, 1 byte) + PLA
// (4 cyc, 1 byte) = +7 cyc, +2 bytes per IMG/X/Y-source STAfi.
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::PHA));
if (srcDP >= 0) {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(W65816::LDA_DP)).addImm(srcDP);
} else {
unsigned XferOp = (Src == W65816::X) ? W65816::TXA : W65816::TYA;
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(XferOp));
}
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(W65816::LDA_DP)).addImm(srcDP); TII.get(W65816::STA_StackRel))
} else if (Src == W65816::X || Src == W65816::Y) { .addImm(Offset + 2) // PHA shifted SP by 2
// STAfi with X/Y source: regalloc occasionally lands a Wide16 .addReg(W65816::A, RegState::Implicit);
// vreg in $x/$y after class coalescing across an Idx16 source BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::PLA));
// (typically the i32-first-arg hi-half formal arg). Bridge } else {
// through A with TXA/TYA. Caller is responsible for ordering: // Direct A source: simple sta d,s — A is the source, A is fine
// an arg0_lo STAfi $a must precede this so A's spill is already // afterward (no implicit clobber).
// saved when we clobber A. Without this bridge, the emitted BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
// STA d,S stores stale A — observed as silent miscompile of i32 TII.get(W65816::STA_StackRel))
// ptr formal args (`writeOne(arr)` storing 99 to wrong addr). .addImm(Offset)
unsigned XferOp = (Src == W65816::X) ? W65816::TXA : W65816::TYA; .addReg(W65816::A, RegState::Implicit);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(XferOp));
} }
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(W65816::STA_StackRel))
.addImm(Offset)
.addReg(W65816::A, RegState::Implicit);
MI.eraseFromParent(); MI.eraseFromParent();
return true; return true;
} }

View file

@ -24,8 +24,8 @@ define i64 @i64_first_pressure(i64 %x) {
; TXA-bridge sequence. $D0 / $D2 are concrete IMG slots (the IMG ; TXA-bridge sequence. $D0 / $D2 are concrete IMG slots (the IMG
; region is $C0..$DE). Match a stx in that range, followed by an ; region is $C0..$DE). Match a stx in that range, followed by an
; sta in the same range, before the first jsl. ; sta in the same range, before the first jsl.
; CHECK: stx 0xd ; CHECK: stx 0x{{[cd]}}
; CHECK: sta 0xd ; CHECK: sta 0x{{[cd]}}
; CHECK: jsl ext2 ; CHECK: jsl ext2
; CHECK: rtl ; CHECK: rtl
entry: entry: