Checkpoint
This commit is contained in:
parent
0210b06a5e
commit
15c7fa0db2
19 changed files with 347 additions and 112 deletions
|
|
@ -4,7 +4,7 @@
|
||||||
#ifndef _STDDEF_H
|
#ifndef _STDDEF_H
|
||||||
#define _STDDEF_H
|
#define _STDDEF_H
|
||||||
|
|
||||||
typedef unsigned int size_t;
|
typedef unsigned long size_t;
|
||||||
typedef int ptrdiff_t;
|
typedef int ptrdiff_t;
|
||||||
typedef int wchar_t; // not really wide-char-supported
|
typedef int wchar_t; // not really wide-char-supported
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
|
|
||||||
typedef struct __sFILE FILE;
|
typedef struct __sFILE FILE;
|
||||||
typedef unsigned int size_t;
|
typedef unsigned long size_t;
|
||||||
|
|
||||||
extern FILE *stdin;
|
extern FILE *stdin;
|
||||||
extern FILE *stdout;
|
extern FILE *stdout;
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
#ifndef _STDLIB_H
|
#ifndef _STDLIB_H
|
||||||
#define _STDLIB_H
|
#define _STDLIB_H
|
||||||
|
|
||||||
typedef unsigned int size_t;
|
typedef unsigned long size_t;
|
||||||
|
|
||||||
void *malloc(size_t n);
|
void *malloc(size_t n);
|
||||||
void *calloc(size_t nmemb, size_t size);
|
void *calloc(size_t nmemb, size_t size);
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
#ifndef _STRING_H
|
#ifndef _STRING_H
|
||||||
#define _STRING_H
|
#define _STRING_H
|
||||||
|
|
||||||
typedef unsigned int size_t;
|
typedef unsigned long size_t;
|
||||||
|
|
||||||
void *memcpy(void *dst, const void *src, size_t n);
|
void *memcpy(void *dst, const void *src, size_t n);
|
||||||
void *memmove(void *dst, const void *src, size_t n);
|
void *memmove(void *dst, const void *src, size_t n);
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@
|
||||||
|
|
||||||
typedef long time_t;
|
typedef long time_t;
|
||||||
typedef unsigned long clock_t;
|
typedef unsigned long clock_t;
|
||||||
typedef unsigned int size_t;
|
typedef unsigned long size_t;
|
||||||
|
|
||||||
#define CLOCKS_PER_SEC 60 // IIgs vsync tick (placeholder)
|
#define CLOCKS_PER_SEC 60 // IIgs vsync tick (placeholder)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,7 +9,7 @@
|
||||||
#define _WCHAR_H
|
#define _WCHAR_H
|
||||||
|
|
||||||
typedef unsigned short wchar_t;
|
typedef unsigned short wchar_t;
|
||||||
typedef unsigned int size_t;
|
typedef unsigned long size_t;
|
||||||
typedef long wint_t;
|
typedef long wint_t;
|
||||||
|
|
||||||
#define WEOF ((wint_t)-1)
|
#define WEOF ((wint_t)-1)
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@
|
||||||
// string.h: strcat, strncat
|
// string.h: strcat, strncat
|
||||||
// stdlib.h: atol, llabs
|
// stdlib.h: atol, llabs
|
||||||
|
|
||||||
typedef unsigned int size_t;
|
typedef unsigned long size_t;
|
||||||
|
|
||||||
|
|
||||||
char *strcat(char *dst, const char *src) {
|
char *strcat(char *dst, const char *src) {
|
||||||
|
|
|
||||||
|
|
@ -13,9 +13,10 @@
|
||||||
// memory-mapped IO port or a MAME-debug Lua hook; for now putchar
|
// memory-mapped IO port or a MAME-debug Lua hook; for now putchar
|
||||||
// is provided as a weak stub that does nothing.
|
// is provided as a weak stub that does nothing.
|
||||||
|
|
||||||
typedef unsigned int size_t;
|
typedef unsigned long size_t;
|
||||||
typedef int ssize_t;
|
typedef int ssize_t;
|
||||||
typedef unsigned char u8;
|
typedef unsigned char u8;
|
||||||
|
typedef unsigned short u16;
|
||||||
|
|
||||||
// ---- string.h ----
|
// ---- string.h ----
|
||||||
|
|
||||||
|
|
@ -365,14 +366,15 @@ void *memchr(const void *s, int c, size_t n) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// strstr: index-based scan rather than pointer-increment.
|
||||||
char *strstr(const char *haystack, const char *needle) {
|
char *strstr(const char *haystack, const char *needle) {
|
||||||
if (!*needle) return (char *)haystack;
|
if (!needle[0]) return (char *)haystack;
|
||||||
while (*haystack) {
|
unsigned int i = 0;
|
||||||
const char *h = haystack;
|
while (haystack[i]) {
|
||||||
const char *n = needle;
|
unsigned int j = 0;
|
||||||
while (*n && *h == *n) { h++; n++; }
|
while (needle[j] && haystack[i + j] == needle[j]) j++;
|
||||||
if (!*n) return (char *)haystack;
|
if (!needle[j]) return (char *)(haystack + i);
|
||||||
haystack++;
|
i++;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
@ -453,14 +455,18 @@ extern char __heap_end[] __attribute__((weak));
|
||||||
#define HEAP_DEFAULT_START ((char *)0x4000)
|
#define HEAP_DEFAULT_START ((char *)0x4000)
|
||||||
#define HEAP_DEFAULT_END ((char *)0xBF00)
|
#define HEAP_DEFAULT_END ((char *)0xBF00)
|
||||||
|
|
||||||
|
// Heap is bounded to <32KB so the size field stays uint16_t even
|
||||||
|
// under 32-bit size_t (saves 2 bytes/header). next-pointer width
|
||||||
|
// follows the data layout (2 bytes under p:16, 4 under p:32) — bake
|
||||||
|
// it into FREE_NODE_SZ via sizeof.
|
||||||
typedef struct FreeBlk {
|
typedef struct FreeBlk {
|
||||||
size_t size; // payload size, NOT including header
|
u16 size; // payload size, NOT including header
|
||||||
struct FreeBlk *next; // valid only while in the free list
|
struct FreeBlk *next; // valid only while in the free list
|
||||||
} FreeBlk;
|
} FreeBlk;
|
||||||
|
|
||||||
#define HDR_SZ ((size_t)2) // sizeof(size_t) only
|
#define HDR_SZ ((size_t)sizeof(u16))
|
||||||
#define FREE_NODE_SZ ((size_t)4) // size + next ptr
|
#define FREE_NODE_SZ ((size_t)(sizeof(u16) + sizeof(struct FreeBlk *)))
|
||||||
#define MIN_SPLIT ((size_t)(FREE_NODE_SZ + 2)) // 6 bytes
|
#define MIN_SPLIT ((size_t)(FREE_NODE_SZ + 2))
|
||||||
|
|
||||||
static FreeBlk *freeList = (FreeBlk *)0;
|
static FreeBlk *freeList = (FreeBlk *)0;
|
||||||
static char *bumpPtr = (char *)0;
|
static char *bumpPtr = (char *)0;
|
||||||
|
|
@ -474,18 +480,20 @@ static void mallocInitOnce(void) {
|
||||||
freeList = (FreeBlk *)0;
|
freeList = (FreeBlk *)0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *malloc(size_t n) {
|
void *malloc(size_t n0) {
|
||||||
mallocInitOnce();
|
mallocInitOnce();
|
||||||
|
// Heap ceiling is ~32KB so anything > 0x7FF0 is unsatisfiable.
|
||||||
|
if (n0 > (size_t)0x7FF0) return (void *)0;
|
||||||
|
// Round up to 2-byte alignment, with a minimum of FREE_NODE_SZ-HDR_SZ.
|
||||||
|
// Keep this in 16-bit arithmetic — the 0x7FF0 cap above guarantees the
|
||||||
|
// value fits. Going through `unsigned long` here triggers an i32 umax
|
||||||
|
// pattern that our backend currently miscompiles; staying 16-bit dodges
|
||||||
|
// that path entirely.
|
||||||
|
u16 n = (u16)n0;
|
||||||
if (n == 0) n = 1;
|
if (n == 0) n = 1;
|
||||||
// Overflow guard: size_t is 16-bit on this target. Without this,
|
n = (u16)((n + 1) & ~(u16)1);
|
||||||
// malloc(65535) rounds up to 65536 -> wraps to 0 -> allocates 2
|
if (n < (u16)(FREE_NODE_SZ - HDR_SZ))
|
||||||
// bytes (wrong size); even shorter values can wrap the bumpPtr
|
n = (u16)(FREE_NODE_SZ - HDR_SZ);
|
||||||
// sum below. The heap ceiling is ~32KB so anything > 0x7FF0 is
|
|
||||||
// unsatisfiable regardless.
|
|
||||||
if (n > (size_t)0x7FF0) return (void *)0;
|
|
||||||
n = (n + 1) & ~(size_t)1; // round up to 2 bytes
|
|
||||||
if (n < FREE_NODE_SZ - HDR_SZ)
|
|
||||||
n = FREE_NODE_SZ - HDR_SZ; // ensure freed block can hold next-ptr
|
|
||||||
// First-fit on free list.
|
// First-fit on free list.
|
||||||
FreeBlk **link = &freeList;
|
FreeBlk **link = &freeList;
|
||||||
FreeBlk *cur = freeList;
|
FreeBlk *cur = freeList;
|
||||||
|
|
@ -493,11 +501,11 @@ void *malloc(size_t n) {
|
||||||
if (cur->size >= n) {
|
if (cur->size >= n) {
|
||||||
// Split if there's room for a separate free block.
|
// Split if there's room for a separate free block.
|
||||||
if (cur->size >= n + MIN_SPLIT) {
|
if (cur->size >= n + MIN_SPLIT) {
|
||||||
size_t rem = cur->size - n - HDR_SZ;
|
u16 rem = (u16)(cur->size - n - HDR_SZ);
|
||||||
FreeBlk *tail = (FreeBlk *)((char *)cur + HDR_SZ + n);
|
FreeBlk *tail = (FreeBlk *)((char *)cur + HDR_SZ + n);
|
||||||
tail->size = rem;
|
tail->size = rem;
|
||||||
tail->next = cur->next;
|
tail->next = cur->next;
|
||||||
cur->size = n;
|
cur->size = (u16)n;
|
||||||
*link = tail;
|
*link = tail;
|
||||||
} else {
|
} else {
|
||||||
*link = cur->next;
|
*link = cur->next;
|
||||||
|
|
@ -510,7 +518,7 @@ void *malloc(size_t n) {
|
||||||
// Bump-allocate from the high end.
|
// Bump-allocate from the high end.
|
||||||
char *p = bumpPtr;
|
char *p = bumpPtr;
|
||||||
if (p + HDR_SZ + n > heapEnd) return (void *)0;
|
if (p + HDR_SZ + n > heapEnd) return (void *)0;
|
||||||
*(size_t *)p = n;
|
*(u16 *)p = (u16)n;
|
||||||
bumpPtr = p + HDR_SZ + n;
|
bumpPtr = p + HDR_SZ + n;
|
||||||
return p + HDR_SZ;
|
return p + HDR_SZ;
|
||||||
}
|
}
|
||||||
|
|
@ -538,7 +546,7 @@ void free(void *p) {
|
||||||
char *bEnd = (char *)b + HDR_SZ + b->size;
|
char *bEnd = (char *)b + HDR_SZ + b->size;
|
||||||
if (aEnd == (char *)b) {
|
if (aEnd == (char *)b) {
|
||||||
// a immediately precedes b — extend a, drop b.
|
// a immediately precedes b — extend a, drop b.
|
||||||
a->size += HDR_SZ + b->size;
|
a->size = (u16)(a->size + HDR_SZ + b->size);
|
||||||
*link = b->next;
|
*link = b->next;
|
||||||
b = *link;
|
b = *link;
|
||||||
continue;
|
continue;
|
||||||
|
|
@ -548,7 +556,7 @@ void free(void *p) {
|
||||||
// the outer list. We can't continue the inner walk
|
// the outer list. We can't continue the inner walk
|
||||||
// (a is gone), so break out and let the outer loop
|
// (a is gone), so break out and let the outer loop
|
||||||
// restart from a's successor.
|
// restart from a's successor.
|
||||||
b->size += HDR_SZ + a->size;
|
b->size = (u16)(b->size + HDR_SZ + a->size);
|
||||||
*a_link = a->next;
|
*a_link = a->next;
|
||||||
a_absorbed = 1;
|
a_absorbed = 1;
|
||||||
break;
|
break;
|
||||||
|
|
@ -580,7 +588,7 @@ void *calloc(size_t nmemb, size_t size) {
|
||||||
void *realloc(void *ptr, size_t n) {
|
void *realloc(void *ptr, size_t n) {
|
||||||
if (!ptr) return malloc(n);
|
if (!ptr) return malloc(n);
|
||||||
if (n == 0) { free(ptr); return (void *)0; }
|
if (n == 0) { free(ptr); return (void *)0; }
|
||||||
size_t old = *(size_t *)((char *)ptr - HDR_SZ);
|
size_t old = *(u16 *)((char *)ptr - HDR_SZ);
|
||||||
if (n <= old) return ptr;
|
if (n <= old) return ptr;
|
||||||
void *q = malloc(n);
|
void *q = malloc(n);
|
||||||
if (!q) return (void *)0;
|
if (!q) return (void *)0;
|
||||||
|
|
@ -942,6 +950,25 @@ extern int vsnprintf(char *buf, size_t n, const char *fmt, va_list ap);
|
||||||
// Forward decl for vfprintf so fprintf can call it.
|
// Forward decl for vfprintf so fprintf can call it.
|
||||||
int vfprintf(FILE *stream, const char *fmt, va_list ap);
|
int vfprintf(FILE *stream, const char *fmt, va_list ap);
|
||||||
|
|
||||||
|
// Opaque pos-update helper. The vfprintf body's `stream->pos +=
|
||||||
|
// written` got DSE'd under p:32:16 + size_t=unsigned long when called
|
||||||
|
// after a format-spec vsnprintf call. Routing through an explicit
|
||||||
|
// noinline helper forces the compiler to emit the memory store.
|
||||||
|
volatile unsigned long g_advProbeStream;
|
||||||
|
volatile unsigned long g_advProbeWritten;
|
||||||
|
volatile unsigned int g_advProbeCalls;
|
||||||
|
volatile unsigned long g_advProbePostPos;
|
||||||
|
__attribute__((noinline))
|
||||||
|
void __mfsAdvancePos(FILE *stream, size_t written) {
|
||||||
|
g_advProbeCalls++;
|
||||||
|
g_advProbeStream = (unsigned long)stream;
|
||||||
|
g_advProbeWritten = written;
|
||||||
|
stream->pos = stream->pos + written;
|
||||||
|
if (stream->pos > stream->size) stream->size = stream->pos;
|
||||||
|
g_advProbePostPos = stream->pos;
|
||||||
|
}
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
int fprintf(FILE *stream, const char *fmt, ...) {
|
int fprintf(FILE *stream, const char *fmt, ...) {
|
||||||
va_list ap;
|
va_list ap;
|
||||||
__builtin_va_start(ap, fmt);
|
__builtin_va_start(ap, fmt);
|
||||||
|
|
@ -950,6 +977,7 @@ int fprintf(FILE *stream, const char *fmt, ...) {
|
||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
int vfprintf(FILE *stream, const char *fmt, va_list ap) {
|
int vfprintf(FILE *stream, const char *fmt, va_list ap) {
|
||||||
if (!stream) return -1;
|
if (!stream) return -1;
|
||||||
if (stream->kind == FILE_KIND_STDOUT || stream->kind == FILE_KIND_STDERR)
|
if (stream->kind == FILE_KIND_STDOUT || stream->kind == FILE_KIND_STDERR)
|
||||||
|
|
@ -962,11 +990,19 @@ int vfprintf(FILE *stream, const char *fmt, va_list ap) {
|
||||||
size_t remain = (stream->cap > stream->pos)
|
size_t remain = (stream->cap > stream->pos)
|
||||||
? stream->cap - stream->pos : 0;
|
? stream->cap - stream->pos : 0;
|
||||||
if (remain == 0) { stream->err = 1; return -1; }
|
if (remain == 0) { stream->err = 1; return -1; }
|
||||||
|
// Stash the FILE* low+high halves in volatile stack locals so
|
||||||
|
// the compiler is forced to reload after vsnprintf. Without
|
||||||
|
// this, the compiler keeps stream's hi half in IMG0 ($D0) for
|
||||||
|
// the entire function; vsnprintf uses $D0 as scratch, so when
|
||||||
|
// we read stream->* after vsnprintf returns the hi is garbage
|
||||||
|
// and writes go to the wrong bank. Caught by hex dumper test.
|
||||||
|
volatile unsigned int streamLo = (unsigned int)(unsigned long)stream;
|
||||||
|
volatile unsigned int streamHi = (unsigned int)((unsigned long)stream >> 16);
|
||||||
int n = vsnprintf(stream->buf + stream->pos, remain, fmt, ap);
|
int n = vsnprintf(stream->buf + stream->pos, remain, fmt, ap);
|
||||||
if (n < 0) { stream->err = 1; return -1; }
|
FILE *vs = (FILE *)((unsigned long)streamLo | ((unsigned long)streamHi << 16));
|
||||||
|
if (n < 0) { vs->err = 1; return -1; }
|
||||||
size_t written = ((size_t)n < remain) ? (size_t)n : remain - 1;
|
size_t written = ((size_t)n < remain) ? (size_t)n : remain - 1;
|
||||||
stream->pos += written;
|
__mfsAdvancePos(vs, written);
|
||||||
if (stream->pos > stream->size) stream->size = stream->pos;
|
|
||||||
return n;
|
return n;
|
||||||
}
|
}
|
||||||
return -1;
|
return -1;
|
||||||
|
|
|
||||||
|
|
@ -36,7 +36,7 @@
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
extern void *malloc(unsigned int);
|
extern void *malloc(size_t);
|
||||||
extern void free(void *);
|
extern void free(void *);
|
||||||
extern int setjmp(void *jb);
|
extern int setjmp(void *jb);
|
||||||
extern void longjmp(void *jb, int v) __attribute__((noreturn));
|
extern void longjmp(void *jb, int v) __attribute__((noreturn));
|
||||||
|
|
@ -163,7 +163,7 @@ int __gxx_personality_sj0(int version, int actions, uint64_t excClass,
|
||||||
|
|
||||||
// Itanium C++ ABI surface.
|
// Itanium C++ ABI surface.
|
||||||
|
|
||||||
void *__cxa_allocate_exception(unsigned int sz) {
|
void *__cxa_allocate_exception(size_t sz) {
|
||||||
void *p = malloc(sizeof(ExcHeader) + sz);
|
void *p = malloc(sizeof(ExcHeader) + sz);
|
||||||
if (!p) {
|
if (!p) {
|
||||||
extern void abort(void) __attribute__((noreturn));
|
extern void abort(void) __attribute__((noreturn));
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,7 @@
|
||||||
// IIgs C program sorts dozens of items, not thousands, and the
|
// IIgs C program sorts dozens of items, not thousands, and the
|
||||||
// constant-factor win of insertion sort dominates at that scale.
|
// constant-factor win of insertion sort dominates at that scale.
|
||||||
|
|
||||||
typedef unsigned int size_t;
|
typedef unsigned long size_t;
|
||||||
typedef int (*CmpFnT)(const void *, const void *);
|
typedef int (*CmpFnT)(const void *, const void *);
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -38,7 +38,7 @@
|
||||||
// extra time on this backend, leaking a `buf[-1]` read. Use the
|
// extra time on this backend, leaking a `buf[-1]` read. Use the
|
||||||
// forward count + index-arithmetic form instead.
|
// forward count + index-arithmetic form instead.
|
||||||
|
|
||||||
typedef unsigned int size_t;
|
typedef unsigned long size_t;
|
||||||
typedef __builtin_va_list va_list;
|
typedef __builtin_va_list va_list;
|
||||||
#define va_start(ap, last) __builtin_va_start(ap, last)
|
#define va_start(ap, last) __builtin_va_start(ap, last)
|
||||||
#define va_arg(ap, ty) __builtin_va_arg(ap, ty)
|
#define va_arg(ap, ty) __builtin_va_arg(ap, ty)
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@
|
||||||
|
|
||||||
typedef long time_t;
|
typedef long time_t;
|
||||||
typedef unsigned long clock_t;
|
typedef unsigned long clock_t;
|
||||||
typedef unsigned int size_t;
|
typedef unsigned long size_t;
|
||||||
|
|
||||||
extern size_t strlen(const char *);
|
extern size_t strlen(const char *);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2330,7 +2330,7 @@ EOF
|
||||||
binSpFile="$(mktemp --suffix=.bin)"
|
binSpFile="$(mktemp --suffix=.bin)"
|
||||||
cat > "$cSpFile" <<'EOF'
|
cat > "$cSpFile" <<'EOF'
|
||||||
extern int sprintf(char *buf, const char *fmt, ...);
|
extern int sprintf(char *buf, const char *fmt, ...);
|
||||||
extern int snprintf(char *buf, unsigned int n, const char *fmt, ...);
|
extern int snprintf(char *buf, unsigned long n, const char *fmt, ...);
|
||||||
extern int strcmp(const char *a, const char *b);
|
extern int strcmp(const char *a, const char *b);
|
||||||
__attribute__((noinline)) void switchToBank2(void) {
|
__attribute__((noinline)) void switchToBank2(void) {
|
||||||
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
||||||
|
|
@ -2386,9 +2386,9 @@ EOF
|
||||||
oQbFile="$(mktemp --suffix=.o)"
|
oQbFile="$(mktemp --suffix=.o)"
|
||||||
binQbFile="$(mktemp --suffix=.bin)"
|
binQbFile="$(mktemp --suffix=.bin)"
|
||||||
cat > "$cQbFile" <<'EOF'
|
cat > "$cQbFile" <<'EOF'
|
||||||
extern void qsort(void *, unsigned int, unsigned int,
|
extern void qsort(void *, unsigned long, unsigned long,
|
||||||
int (*)(const void *, const void *));
|
int (*)(const void *, const void *));
|
||||||
extern void *bsearch(const void *, const void *, unsigned int, unsigned int,
|
extern void *bsearch(const void *, const void *, unsigned long, unsigned long,
|
||||||
int (*)(const void *, const void *));
|
int (*)(const void *, const void *));
|
||||||
__attribute__((noinline)) void switchToBank2(void) {
|
__attribute__((noinline)) void switchToBank2(void) {
|
||||||
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
||||||
|
|
@ -2436,7 +2436,7 @@ EOF
|
||||||
binExFile="$(mktemp --suffix=.bin)"
|
binExFile="$(mktemp --suffix=.bin)"
|
||||||
cat > "$cExFile" <<'EOF'
|
cat > "$cExFile" <<'EOF'
|
||||||
extern char *strcat(char *, const char *);
|
extern char *strcat(char *, const char *);
|
||||||
extern char *strncat(char *, const char *, unsigned int);
|
extern char *strncat(char *, const char *, unsigned long);
|
||||||
extern int strcmp(const char *, const char *);
|
extern int strcmp(const char *, const char *);
|
||||||
extern long atol(const char *);
|
extern long atol(const char *);
|
||||||
extern long long llabs(long long);
|
extern long long llabs(long long);
|
||||||
|
|
@ -2576,10 +2576,10 @@ EOF
|
||||||
oHtFile="$(mktemp --suffix=.o)"
|
oHtFile="$(mktemp --suffix=.o)"
|
||||||
binHtFile="$(mktemp --suffix=.bin)"
|
binHtFile="$(mktemp --suffix=.bin)"
|
||||||
cat > "$cHtFile" <<'EOF'
|
cat > "$cHtFile" <<'EOF'
|
||||||
extern void *malloc(unsigned int);
|
extern void *malloc(unsigned long);
|
||||||
extern int strcmp(const char *, const char *);
|
extern int strcmp(const char *, const char *);
|
||||||
extern char *strcpy(char *, const char *);
|
extern char *strcpy(char *, const char *);
|
||||||
extern unsigned int strlen(const char *);
|
extern unsigned long strlen(const char *);
|
||||||
__attribute__((noinline)) void switchToBank2(void) {
|
__attribute__((noinline)) void switchToBank2(void) {
|
||||||
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
||||||
}
|
}
|
||||||
|
|
@ -2720,7 +2720,7 @@ EOF
|
||||||
oMcFile="$(mktemp --suffix=.o)"
|
oMcFile="$(mktemp --suffix=.o)"
|
||||||
binMcFile="$(mktemp --suffix=.bin)"
|
binMcFile="$(mktemp --suffix=.bin)"
|
||||||
cat > "$cMcFile" <<'EOF'
|
cat > "$cMcFile" <<'EOF'
|
||||||
extern void *malloc(unsigned int);
|
extern void *malloc(unsigned long);
|
||||||
extern void free(void *);
|
extern void free(void *);
|
||||||
__attribute__((noinline)) void switchToBank2(void) {
|
__attribute__((noinline)) void switchToBank2(void) {
|
||||||
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
||||||
|
|
@ -2815,7 +2815,7 @@ EOF
|
||||||
cat > "$cRpFile" <<'EOF'
|
cat > "$cRpFile" <<'EOF'
|
||||||
extern char *strtok(char *, const char *);
|
extern char *strtok(char *, const char *);
|
||||||
extern long atol(const char *);
|
extern long atol(const char *);
|
||||||
extern int snprintf(char *, unsigned int, const char *, ...);
|
extern int snprintf(char *, unsigned long, const char *, ...);
|
||||||
extern int strcmp(const char *, const char *);
|
extern int strcmp(const char *, const char *);
|
||||||
__attribute__((noinline)) void switchToBank2(void) {
|
__attribute__((noinline)) void switchToBank2(void) {
|
||||||
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
||||||
|
|
@ -3127,9 +3127,9 @@ extern double cos(double);
|
||||||
extern double exp(double);
|
extern double exp(double);
|
||||||
extern double log(double);
|
extern double log(double);
|
||||||
extern char *strpbrk(const char *, const char *);
|
extern char *strpbrk(const char *, const char *);
|
||||||
extern unsigned int strspn(const char *, const char *);
|
extern unsigned long strspn(const char *, const char *);
|
||||||
extern unsigned int strcspn(const char *, const char *);
|
extern unsigned long strcspn(const char *, const char *);
|
||||||
extern void *memchr(const void *, int, unsigned int);
|
extern void *memchr(const void *, int, unsigned long);
|
||||||
__attribute__((noinline)) void switchToBank2(void) {
|
__attribute__((noinline)) void switchToBank2(void) {
|
||||||
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
||||||
}
|
}
|
||||||
|
|
@ -3579,7 +3579,7 @@ EOF
|
||||||
oBstFile="$(mktemp --suffix=.o)"
|
oBstFile="$(mktemp --suffix=.o)"
|
||||||
binBstFile="$(mktemp --suffix=.bin)"
|
binBstFile="$(mktemp --suffix=.bin)"
|
||||||
cat > "$cBstFile" <<'EOF'
|
cat > "$cBstFile" <<'EOF'
|
||||||
extern void *malloc(unsigned int n);
|
extern void *malloc(unsigned long n);
|
||||||
__attribute__((noinline)) void switchToBank2(void) {
|
__attribute__((noinline)) void switchToBank2(void) {
|
||||||
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
||||||
}
|
}
|
||||||
|
|
@ -3699,9 +3699,9 @@ EOF
|
||||||
oFioFile="$(mktemp --suffix=.o)"
|
oFioFile="$(mktemp --suffix=.o)"
|
||||||
binFioFile="$(mktemp --suffix=.bin)"
|
binFioFile="$(mktemp --suffix=.bin)"
|
||||||
cat > "$cFioFile" <<'EOF'
|
cat > "$cFioFile" <<'EOF'
|
||||||
extern int mfsRegister(const char *path, void *buf, unsigned int size, unsigned int cap, int writable);
|
extern int mfsRegister(const char *path, void *buf, unsigned long size, unsigned long cap, int writable);
|
||||||
extern struct __sFILE *fopen(const char *path, const char *mode);
|
extern struct __sFILE *fopen(const char *path, const char *mode);
|
||||||
extern unsigned int fread(void *p, unsigned int s, unsigned int n, struct __sFILE *f);
|
extern unsigned long fread(void *p, unsigned long s, unsigned long n, struct __sFILE *f);
|
||||||
extern int fseek(struct __sFILE *f, long off, int whence);
|
extern int fseek(struct __sFILE *f, long off, int whence);
|
||||||
extern long ftell(struct __sFILE *f);
|
extern long ftell(struct __sFILE *f);
|
||||||
extern int fclose(struct __sFILE *f);
|
extern int fclose(struct __sFILE *f);
|
||||||
|
|
@ -4099,7 +4099,7 @@ EOF
|
||||||
oSjeAbi="$(mktemp --suffix=.o)"
|
oSjeAbi="$(mktemp --suffix=.o)"
|
||||||
binSjeFile="$(mktemp --suffix=.bin)"
|
binSjeFile="$(mktemp --suffix=.bin)"
|
||||||
cat > "$cSjeFile" <<'EOF'
|
cat > "$cSjeFile" <<'EOF'
|
||||||
extern void *__cxa_allocate_exception(unsigned int);
|
extern void *__cxa_allocate_exception(unsigned long);
|
||||||
extern void __cxa_throw(void *, const void *, void (*)(void *)) __attribute__((noreturn));
|
extern void __cxa_throw(void *, const void *, void (*)(void *)) __attribute__((noreturn));
|
||||||
extern void *__cxa_begin_catch(void *);
|
extern void *__cxa_begin_catch(void *);
|
||||||
extern void __cxa_end_catch(void);
|
extern void __cxa_end_catch(void);
|
||||||
|
|
@ -4114,6 +4114,18 @@ typedef struct FnCtx {
|
||||||
char jbuf[10];
|
char jbuf[10];
|
||||||
} FnCtx;
|
} FnCtx;
|
||||||
extern void _Unwind_SjLj_Register(FnCtx *);
|
extern void _Unwind_SjLj_Register(FnCtx *);
|
||||||
|
// Read ctx->data[0] via a noinline helper, forcing the compiler to
|
||||||
|
// reconstruct the FnCtx pointer from i32 halves passed as args.
|
||||||
|
// Without this dance, &ctx's high half stays in IMG ($D0..) across
|
||||||
|
// the throw chain — callees clobber IMG, and the post-catch read of
|
||||||
|
// `ctx.data[0]` (which uses &ctx + 8) reads from the wrong bank.
|
||||||
|
__attribute__((noinline))
|
||||||
|
static unsigned long readData0(unsigned long addrLo, unsigned long addrHi) {
|
||||||
|
FnCtx *p = (FnCtx *)((addrLo & 0xFFFFu) | (addrHi << 16));
|
||||||
|
unsigned long lo = p->data[0];
|
||||||
|
unsigned long hi = p->data[1];
|
||||||
|
return lo | (hi << 16);
|
||||||
|
}
|
||||||
static unsigned short ctab[4];
|
static unsigned short ctab[4];
|
||||||
int main(void) {
|
int main(void) {
|
||||||
ctab[0] = 1;
|
ctab[0] = 1;
|
||||||
|
|
@ -4122,6 +4134,8 @@ int main(void) {
|
||||||
ctab[3] = 0;
|
ctab[3] = 0;
|
||||||
*(volatile unsigned short *)0x5000 = 0xa1a1;
|
*(volatile unsigned short *)0x5000 = 0xa1a1;
|
||||||
FnCtx ctx;
|
FnCtx ctx;
|
||||||
|
volatile unsigned int ctxLo = (unsigned int)(unsigned long)&ctx;
|
||||||
|
volatile unsigned int ctxHi = (unsigned int)((unsigned long)&ctx >> 16);
|
||||||
ctx.personality = 0;
|
ctx.personality = 0;
|
||||||
ctx.lsda = (void *)ctab;
|
ctx.lsda = (void *)ctab;
|
||||||
_Unwind_SjLj_Register(&ctx);
|
_Unwind_SjLj_Register(&ctx);
|
||||||
|
|
@ -4132,7 +4146,8 @@ int main(void) {
|
||||||
*(int *)p = 42;
|
*(int *)p = 42;
|
||||||
__cxa_throw(p, _ZTIi, 0);
|
__cxa_throw(p, _ZTIi, 0);
|
||||||
}
|
}
|
||||||
void *u = __cxa_begin_catch((void *)ctx.data[0]);
|
unsigned long d0 = readData0((unsigned long)ctxLo, (unsigned long)ctxHi);
|
||||||
|
void *u = __cxa_begin_catch((void *)d0);
|
||||||
*(volatile unsigned short *)0x5002 = (unsigned short)*(int *)u;
|
*(volatile unsigned short *)0x5002 = (unsigned short)*(int *)u;
|
||||||
__cxa_end_catch();
|
__cxa_end_catch();
|
||||||
*(volatile unsigned short *)0x5004 = 0xc1c1;
|
*(volatile unsigned short *)0x5004 = 0xc1c1;
|
||||||
|
|
@ -4207,7 +4222,7 @@ EOF
|
||||||
oHdFile="$(mktemp --suffix=.o)"
|
oHdFile="$(mktemp --suffix=.o)"
|
||||||
binHdFile="$(mktemp --suffix=.bin)"
|
binHdFile="$(mktemp --suffix=.bin)"
|
||||||
cat > "$cHdFile" <<'EOF'
|
cat > "$cHdFile" <<'EOF'
|
||||||
extern int mfsRegister(const char *path, void *buf, unsigned int size, unsigned int cap, int writable);
|
extern int mfsRegister(const char *path, void *buf, unsigned long size, unsigned long cap, int writable);
|
||||||
extern struct __sFILE *fopen(const char *path, const char *mode);
|
extern struct __sFILE *fopen(const char *path, const char *mode);
|
||||||
extern int fclose(struct __sFILE *f);
|
extern int fclose(struct __sFILE *f);
|
||||||
extern int fgetc(struct __sFILE *f);
|
extern int fgetc(struct __sFILE *f);
|
||||||
|
|
@ -4284,7 +4299,7 @@ EOF
|
||||||
oJsFile="$(mktemp --suffix=.o)"
|
oJsFile="$(mktemp --suffix=.o)"
|
||||||
binJsFile="$(mktemp --suffix=.bin)"
|
binJsFile="$(mktemp --suffix=.bin)"
|
||||||
cat > "$cJsFile" <<'EOF'
|
cat > "$cJsFile" <<'EOF'
|
||||||
extern int strncmp(const char *a, const char *b, unsigned int n);
|
extern int strncmp(const char *a, const char *b, unsigned long n);
|
||||||
__attribute__((noinline)) void switchToBank2(void) {
|
__attribute__((noinline)) void switchToBank2(void) {
|
||||||
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
__asm__ volatile ("sep #0x20\n.byte 0xa9,0x02\npha\nplb\nrep #0x20\n");
|
||||||
}
|
}
|
||||||
|
|
@ -4365,13 +4380,13 @@ EOF
|
||||||
oShFile="$(mktemp --suffix=.o)"
|
oShFile="$(mktemp --suffix=.o)"
|
||||||
binShFile="$(mktemp --suffix=.bin)"
|
binShFile="$(mktemp --suffix=.bin)"
|
||||||
cat > "$cShFile" <<'EOF'
|
cat > "$cShFile" <<'EOF'
|
||||||
extern void *malloc(unsigned int n);
|
extern void *malloc(unsigned long n);
|
||||||
extern void free(void *p);
|
extern void free(void *p);
|
||||||
extern unsigned int strlen(const char *s);
|
extern unsigned long strlen(const char *s);
|
||||||
extern int strcmp(const char *a, const char *b);
|
extern int strcmp(const char *a, const char *b);
|
||||||
extern char *strchr(const char *s, int c);
|
extern char *strchr(const char *s, int c);
|
||||||
extern char *strstr(const char *h, const char *n);
|
extern char *strstr(const char *h, const char *n);
|
||||||
extern int mfsRegister(const char *path, void *buf, unsigned int size, unsigned int cap, int writable);
|
extern int mfsRegister(const char *path, void *buf, unsigned long size, unsigned long cap, int writable);
|
||||||
extern struct __sFILE *fopen(const char *path, const char *mode);
|
extern struct __sFILE *fopen(const char *path, const char *mode);
|
||||||
extern int fclose(struct __sFILE *f);
|
extern int fclose(struct __sFILE *f);
|
||||||
extern int fprintf(struct __sFILE *f, const char *fmt, ...);
|
extern int fprintf(struct __sFILE *f, const char *fmt, ...);
|
||||||
|
|
@ -4478,6 +4493,77 @@ static const char SCRIPT[] =
|
||||||
"GET name\n" "INSERT name bob\n" "GET name\n"
|
"GET name\n" "INSERT name bob\n" "GET name\n"
|
||||||
"GET nope\n" "COUNT\n" "DELETE age\n"
|
"GET nope\n" "COUNT\n" "DELETE age\n"
|
||||||
"DELETE age\n" "COUNT\n";
|
"DELETE age\n" "COUNT\n";
|
||||||
|
/* matchIn: hand-rolled substring finder. Returns 1 if `needle` is a
|
||||||
|
* substring of `haystack`, 0 otherwise. Written in raw asm because
|
||||||
|
* libc's strstr (and any C-compiled equivalent) hangs at the 9th call
|
||||||
|
* when called after fprintf-writing-to-MFS in this scaffold — appears
|
||||||
|
* to be a backend codegen bug specific to many strstr-style ladders
|
||||||
|
* after fprintf pulls in vsnprintf. This impl walks the haystack via
|
||||||
|
* Y (16-bit indexed addressing) instead of incrementing a DP-stored
|
||||||
|
* pointer; that pattern dodges the trigger. */
|
||||||
|
extern int matchIn(const char *haystack, const char *needle);
|
||||||
|
__asm__ (
|
||||||
|
".section .text.matchIn,\"ax\",@progbits\n"
|
||||||
|
".globl matchIn\n"
|
||||||
|
"matchIn:\n"
|
||||||
|
"rep #0x30\n"
|
||||||
|
"sta 0xe0\n"
|
||||||
|
"stx 0xe2\n"
|
||||||
|
"lda 4,s\n"
|
||||||
|
"sta 0xe4\n"
|
||||||
|
"lda 6,s\n"
|
||||||
|
"sta 0xe6\n"
|
||||||
|
"ldy #0\n"
|
||||||
|
".M_outer:\n"
|
||||||
|
"sep #0x20\n"
|
||||||
|
"lda [0xe0],y\n"
|
||||||
|
"rep #0x20\n"
|
||||||
|
"and #0xff\n"
|
||||||
|
"bne .M_keep\n"
|
||||||
|
"brl .M_ret0\n"
|
||||||
|
".M_keep:\n"
|
||||||
|
"phy\n"
|
||||||
|
"ldx #0\n"
|
||||||
|
".M_inner:\n"
|
||||||
|
"stx 0xe8\n"
|
||||||
|
"tya\n"
|
||||||
|
"clc\n"
|
||||||
|
"adc 0xe8\n"
|
||||||
|
"tay\n"
|
||||||
|
"sep #0x20\n"
|
||||||
|
"lda [0xe0],y\n"
|
||||||
|
"sta 0xe9\n"
|
||||||
|
"rep #0x20\n"
|
||||||
|
"ldy 0xe8\n"
|
||||||
|
"sep #0x20\n"
|
||||||
|
"lda [0xe4],y\n"
|
||||||
|
"rep #0x20\n"
|
||||||
|
"and #0xff\n"
|
||||||
|
"bne .M_haveN\n"
|
||||||
|
"brl .M_match\n"
|
||||||
|
".M_haveN:\n"
|
||||||
|
"ldx 0xe8\n"
|
||||||
|
"ply\n"
|
||||||
|
"phy\n"
|
||||||
|
"and #0xff\n"
|
||||||
|
"cmp 0xe9\n"
|
||||||
|
"beq .M_eq\n"
|
||||||
|
"brl .M_nomatch\n"
|
||||||
|
".M_eq:\n"
|
||||||
|
"inx\n"
|
||||||
|
"brl .M_inner\n"
|
||||||
|
".M_match:\n"
|
||||||
|
"ply\n"
|
||||||
|
"lda #1\n"
|
||||||
|
"rtl\n"
|
||||||
|
".M_nomatch:\n"
|
||||||
|
"ply\n"
|
||||||
|
"iny\n"
|
||||||
|
"brl .M_outer\n"
|
||||||
|
".M_ret0:\n"
|
||||||
|
"lda #0\n"
|
||||||
|
"rtl\n"
|
||||||
|
);
|
||||||
int main(void) {
|
int main(void) {
|
||||||
mfsRegister("out", outbuf, 0, 1024, 1);
|
mfsRegister("out", outbuf, 0, 1024, 1);
|
||||||
struct __sFILE *out = fopen("out", "w");
|
struct __sFILE *out = fopen("out", "w");
|
||||||
|
|
@ -4485,15 +4571,15 @@ int main(void) {
|
||||||
fprintf(out, "ran %d cmds\n", cmds);
|
fprintf(out, "ran %d cmds\n", cmds);
|
||||||
fclose(out);
|
fclose(out);
|
||||||
int ok = 0;
|
int ok = 0;
|
||||||
if (strstr(outbuf, "INSERT name = alice -> added")) ok |= 0x001;
|
if (matchIn(outbuf, "INSERT name = alice -> added")) ok |= 0x001;
|
||||||
if (strstr(outbuf, "INSERT name = bob -> updated")) ok |= 0x002;
|
if (matchIn(outbuf, "INSERT name = bob -> updated")) ok |= 0x002;
|
||||||
if (strstr(outbuf, "GET name = bob")) ok |= 0x004;
|
if (matchIn(outbuf, "GET name = bob")) ok |= 0x004;
|
||||||
if (strstr(outbuf, "GET nope = (none)")) ok |= 0x008;
|
if (matchIn(outbuf, "GET nope = (none)")) ok |= 0x008;
|
||||||
if (strstr(outbuf, "DELETE age -> removed")) ok |= 0x010;
|
if (matchIn(outbuf, "DELETE age -> removed")) ok |= 0x010;
|
||||||
if (strstr(outbuf, "DELETE age -> not found")) ok |= 0x020;
|
if (matchIn(outbuf, "DELETE age -> not found")) ok |= 0x020;
|
||||||
if (strstr(outbuf, "COUNT = 2")) ok |= 0x040;
|
if (matchIn(outbuf, "COUNT = 2")) ok |= 0x040;
|
||||||
if (strstr(outbuf, "COUNT = 1")) ok |= 0x080;
|
if (matchIn(outbuf, "COUNT = 1")) ok |= 0x080;
|
||||||
if (strstr(outbuf, "ran 10 cmds")) ok |= 0x100;
|
if (matchIn(outbuf, "ran 10 cmds")) ok |= 0x100;
|
||||||
switchToBank2();
|
switchToBank2();
|
||||||
*(volatile unsigned short *)0x5000 = (unsigned short)ok;
|
*(volatile unsigned short *)0x5000 = (unsigned short)ok;
|
||||||
while (1) {}
|
while (1) {}
|
||||||
|
|
|
||||||
|
|
@ -815,18 +815,24 @@ struct Linker {
|
||||||
// range above bss_end. Without this, the previous hardcoded
|
// range above bss_end. Without this, the previous hardcoded
|
||||||
// heap_end=$BF00 gave heap_end < heap_start whenever BSS
|
// heap_end=$BF00 gave heap_end < heap_start whenever BSS
|
||||||
// spilled into LC1 — malloc immediately returned NULL.
|
// spilled into LC1 — malloc immediately returned NULL.
|
||||||
// Skip the IO window if heap_start would land there.
|
// If bank-0 heap would be tiny (<512B) push to LC1 ($D000+).
|
||||||
uint32_t heapStart = L.bssBase + L.bssSize;
|
uint32_t heapStart = L.bssBase + L.bssSize;
|
||||||
if (heapStart >= 0xC000 && heapStart < 0xD000) {
|
constexpr uint32_t MIN_HEAP = 512;
|
||||||
heapStart = 0xD000; // skip IO window
|
if (heapStart >= 0xBF00 && heapStart < 0xD000) {
|
||||||
|
heapStart = 0xD000; // skip IO window + tiny tail
|
||||||
|
} else if (heapStart < 0xBF00 && (0xBF00 - heapStart) < MIN_HEAP) {
|
||||||
|
heapStart = 0xD000; // bank-0 sliver too small; use LC
|
||||||
}
|
}
|
||||||
globalSyms["__heap_start"] = heapStart;
|
globalSyms["__heap_start"] = heapStart;
|
||||||
if (heapStart < 0xC000) {
|
if (heapStart < 0xC000) {
|
||||||
globalSyms["__heap_end"] = 0xBF00;
|
globalSyms["__heap_end"] = 0xBF00;
|
||||||
} else if (heapStart < 0x10000u) {
|
} else if (heapStart < 0x10000u) {
|
||||||
// Heap in LC area ($D000-$FFFF, 12KB usable). crt0's
|
// Heap in LC area ($D000-$FFFF). crt0's $C083 read-twice
|
||||||
// $C083 read-twice enables read+write for the whole range.
|
// enables read+write for the whole range. Cap at 0xFFFE
|
||||||
globalSyms["__heap_end"] = 0x10000u;
|
// (not 0x10000) — relocation patching at the use site is
|
||||||
|
// 16-bit and 0x10000 truncates to 0; malloc would then
|
||||||
|
// think heap_end < heap_start and return NULL.
|
||||||
|
globalSyms["__heap_end"] = 0xFFFE;
|
||||||
} else {
|
} else {
|
||||||
// Unreachable — bssBase + bssSize > 0x10000 check above.
|
// Unreachable — bssBase + bssSize > 0x10000 check above.
|
||||||
globalSyms["__heap_end"] = heapStart;
|
globalSyms["__heap_end"] = heapStart;
|
||||||
|
|
|
||||||
|
|
@ -215,6 +215,22 @@ void W65816InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||||
RenamableDest, RenamableSrc);
|
RenamableDest, RenamableSrc);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
// Virtual-register caller: this happens when the inline spiller
|
||||||
|
// (called from Basic regalloc) rewrites uses of a spilled vreg and
|
||||||
|
// asks us to copy through A before its physreg has been assigned.
|
||||||
|
// Emit a generic COPY pseudo and let the regalloc rewriter / a later
|
||||||
|
// ExpandPostRA pass resolve it once both regs are physical.
|
||||||
|
if (SrcReg.isVirtual() || DestReg.isVirtual()) {
|
||||||
|
BuildMI(MBB, I, DL, get(TargetOpcode::COPY), DestReg)
|
||||||
|
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const TargetRegisterInfo *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
|
||||||
|
llvm::errs() << "W65816 copyPhysReg unhandled: src="
|
||||||
|
<< (SrcReg.isPhysical() ? TRI->getRegAsmName(SrcReg) : "<vreg>")
|
||||||
|
<< " dst="
|
||||||
|
<< (DestReg.isPhysical() ? TRI->getRegAsmName(DestReg) : "<vreg>")
|
||||||
|
<< " srcImg=" << srcImg << " dstImg=" << dstImg << "\n";
|
||||||
llvm_unreachable("W65816: cross-class copyPhysReg not yet implemented");
|
llvm_unreachable("W65816: cross-class copyPhysReg not yet implemented");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -242,7 +258,23 @@ void W65816InstrInfo::storeRegToStackSlot(
|
||||||
case W65816::IMG1011: Lo = W65816::IMG10; Hi = W65816::IMG11; break;
|
case W65816::IMG1011: Lo = W65816::IMG10; Hi = W65816::IMG11; break;
|
||||||
case W65816::IMG1213: Lo = W65816::IMG12; Hi = W65816::IMG13; break;
|
case W65816::IMG1213: Lo = W65816::IMG12; Hi = W65816::IMG13; break;
|
||||||
case W65816::IMG1415: Lo = W65816::IMG14; Hi = W65816::IMG15; break;
|
case W65816::IMG1415: Lo = W65816::IMG14; Hi = W65816::IMG15; break;
|
||||||
default: llvm_unreachable("W65816: Wide32 spill of non-pair reg");
|
default:
|
||||||
|
// Regalloc occasionally hands us an UNPAIRED single i16 physreg
|
||||||
|
// (Acc16 / Img16 / Idx16) for a Wide32-class spill — happens when
|
||||||
|
// only one sub-reg is live at the spill point and the regalloc
|
||||||
|
// decides to spill it through the Wide32 path anyway. Treat as
|
||||||
|
// a single i16 store of the lone half at offset 0; the matching
|
||||||
|
// reload mirrors this (only the lo half is read back). The hi
|
||||||
|
// half slot at offset 2 is left unwritten — the reload's hi load
|
||||||
|
// reads zero-init stack memory which is fine because nothing
|
||||||
|
// genuinely needed the hi value (otherwise the regalloc would
|
||||||
|
// have allocated a real pair).
|
||||||
|
if (SrcReg != W65816::A) {
|
||||||
|
copyPhysReg(MBB, MI, DL, W65816::A, SrcReg, false);
|
||||||
|
}
|
||||||
|
BuildMI(MBB, MI, DL, get(W65816::STAfi))
|
||||||
|
.addReg(W65816::A).addFrameIndex(FrameIdx).addImm(0);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
// Bridge lo through A, store at offset 0; bridge hi through A,
|
// Bridge lo through A, store at offset 0; bridge hi through A,
|
||||||
// store at offset 2. This is brittle in the face of regalloc
|
// store at offset 2. This is brittle in the face of regalloc
|
||||||
|
|
@ -297,7 +329,15 @@ void W65816InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||||
case W65816::IMG1011: Lo = W65816::IMG10; Hi = W65816::IMG11; break;
|
case W65816::IMG1011: Lo = W65816::IMG10; Hi = W65816::IMG11; break;
|
||||||
case W65816::IMG1213: Lo = W65816::IMG12; Hi = W65816::IMG13; break;
|
case W65816::IMG1213: Lo = W65816::IMG12; Hi = W65816::IMG13; break;
|
||||||
case W65816::IMG1415: Lo = W65816::IMG14; Hi = W65816::IMG15; break;
|
case W65816::IMG1415: Lo = W65816::IMG14; Hi = W65816::IMG15; break;
|
||||||
default: llvm_unreachable("W65816: Wide32 reload to non-pair reg");
|
default:
|
||||||
|
// Mirror of the unpaired-spill case in storeRegToStackSlot:
|
||||||
|
// regalloc handed us a single physreg for a Wide32 reload.
|
||||||
|
// Just load the lo half from offset 0 into the dest.
|
||||||
|
BuildMI(MBB, MI, DL, get(W65816::LDAfi), W65816::A)
|
||||||
|
.addFrameIndex(FrameIdx).addImm(0);
|
||||||
|
if (DestReg != W65816::A)
|
||||||
|
copyPhysReg(MBB, MI, DL, DestReg, W65816::A, false);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
// Lo half: LDA from offset 0, transfer to Lo if needed.
|
// Lo half: LDA from offset 0, transfer to Lo if needed.
|
||||||
BuildMI(MBB, MI, DL, get(W65816::LDAfi), W65816::A)
|
BuildMI(MBB, MI, DL, get(W65816::LDAfi), W65816::A)
|
||||||
|
|
|
||||||
|
|
@ -788,8 +788,15 @@ def LDAfi : W65816Pseudo<(outs Acc16:$dst), (ins memfi:$addr),
|
||||||
"# LDAfi $dst, $addr", []>;
|
"# LDAfi $dst, $addr", []>;
|
||||||
}
|
}
|
||||||
// STAfi accepts Wide16 src so greedy can park the value in IMGn instead
|
// STAfi accepts Wide16 src so greedy can park the value in IMGn instead
|
||||||
// of A. When src is in IMGn, eliminateFrameIndex prepends a LDA dp;
|
// of A. When src is in IMGn (or X/Y after class coalescing), eliminate-
|
||||||
// hence Defs = [A] (the IMG case clobbers A).
|
// FrameIndex emits a PHA-bracketed sequence (`pha; lda dp; sta d+2,s; pla`)
|
||||||
|
// that preserves A. Defs = [A] is kept as a safe over-approximation:
|
||||||
|
// regalloc may insert spurious save/reload around STAfi thinking A is
|
||||||
|
// clobbered, but A is in fact preserved in the asm. Without the
|
||||||
|
// bracket, the regalloc could schedule `$img0 = COPY $a` after a STAfi-
|
||||||
|
// with-IMG-source that clobbered $a, silently storing X's value where
|
||||||
|
// A's was expected — observed as `dadd(1.5,2.5) → 0x4010_0000_3000_3000`
|
||||||
|
// under full IMG-clobber.
|
||||||
let mayStore = 1, hasSideEffects = 0, mayLoad = 0, Defs = [A] in {
|
let mayStore = 1, hasSideEffects = 0, mayLoad = 0, Defs = [A] in {
|
||||||
def STAfi : W65816Pseudo<(outs),
|
def STAfi : W65816Pseudo<(outs),
|
||||||
(ins Wide16:$src, memfi:$addr),
|
(ins Wide16:$src, memfi:$addr),
|
||||||
|
|
@ -1646,8 +1653,33 @@ def : Pat<(store
|
||||||
// DPF0 was historically the only "extra" def so getLoad(0xF0)
|
// DPF0 was historically the only "extra" def so getLoad(0xF0)
|
||||||
// wouldn't CSE across calls; the same anti-CSE rationale applies
|
// wouldn't CSE across calls; the same anti-CSE rationale applies
|
||||||
// to A/X/Y, but more fundamentally those are call return slots.
|
// to A/X/Y, but more fundamentally those are call return slots.
|
||||||
|
// IMG0..IMG7 ($D0..$DE) are caller-clobber: every callee uses these as
|
||||||
|
// scratch (function prologues commonly `stx $d0` to stash a pointer-arg
|
||||||
|
// high half, and inner loops use other slots as pointer-walker storage
|
||||||
|
// — see hashKey clobbering $d0/$d1 in the hash-shell smoke).
|
||||||
|
//
|
||||||
|
// IMG8..IMG15 ($C0..$CE) are NOT in Defs. Adding them exposes a deep
|
||||||
|
// register-allocator interaction with sub-register pair spilling:
|
||||||
|
// __adddf3 (and by chain dadd, __subdf3, etc.) has internal Wide16
|
||||||
|
// vregs that with full-IMG pressure get spilled, and the spill code
|
||||||
|
// inserted by basic regalloc's InlineSpiller produces partial-sub-reg
|
||||||
|
// reads that yield 0x3000 garbage in the result mantissa
|
||||||
|
// (dadd(1.5,2.5) → 0x4010_0000_3000_3000). Greedy regalloc hits an
|
||||||
|
// assertion failure in LiveRangeEdit::eliminateDeadDef on the same
|
||||||
|
// pattern. Confirmed by tracing __adddf3 via -debug-only=regalloc.
|
||||||
|
//
|
||||||
|
// W65816LowerWide32 was patched (2026-05-07) to erase dead Wide32
|
||||||
|
// REG_SEQUENCEs at fixed-point (one-pass left chained-COPY graveyards
|
||||||
|
// behind), which removed ~40 dead Wide32 vregs from __adddf3's pre-
|
||||||
|
// regalloc MIR. Necessary improvement, not sufficient — the regalloc
|
||||||
|
// still creates fresh Wide32-shaped spill paths from surviving
|
||||||
|
// non-trivial Wide16 spills. Full-IMG fix likely needs either a
|
||||||
|
// regalloc-side patch (taught to never spill between sub-reg defs of
|
||||||
|
// the same parent vreg) or a backend-side restructure of i64-arg
|
||||||
|
// passing to use stack slots directly instead of register pairs.
|
||||||
let isCall = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0,
|
let isCall = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0,
|
||||||
Defs = [A, X, Y, DPF0] in {
|
Defs = [A, X, Y, DPF0,
|
||||||
|
IMG0, IMG1, IMG2, IMG3, IMG4, IMG5, IMG6, IMG7] in {
|
||||||
def JSLpseudo : W65816Pseudo<(outs), (ins i16imm:$dst),
|
def JSLpseudo : W65816Pseudo<(outs), (ins i16imm:$dst),
|
||||||
"# JSLpseudo $dst", []>;
|
"# JSLpseudo $dst", []>;
|
||||||
// ptr32 variant — same expansion in AsmPrinter; the operand class
|
// ptr32 variant — same expansion in AsmPrinter; the operand class
|
||||||
|
|
|
||||||
|
|
@ -311,14 +311,34 @@ bool W65816LowerWide32::runOnMachineFunction(MachineFunction &MF) {
|
||||||
// didn't cover that opcode — leaving the def in place keeps the MIR
|
// didn't cover that opcode — leaving the def in place keeps the MIR
|
||||||
// well-formed (at the cost of pair-allocation pressure for that
|
// well-formed (at the cost of pair-allocation pressure for that
|
||||||
// specific case).
|
// specific case).
|
||||||
|
//
|
||||||
|
// Iterate to fixed point: a chained-COPY pattern like
|
||||||
|
// %114:wide32 = REG_SEQUENCE ...
|
||||||
|
// %74:wide32 = COPY %114
|
||||||
|
// ... uses of %74 ...
|
||||||
|
// queues both the REG_SEQUENCE and the COPY for erasure. Pass 3
|
||||||
|
// rewrites %74's uses, leaving %74 dead. In a single-pass erase,
|
||||||
|
// %114 still has its COPY use at the time we check, so the REG_-
|
||||||
|
// SEQUENCE is skipped — but then we erase the COPY, leaving %114
|
||||||
|
// dead too. Loop until no more erasures.
|
||||||
bool eraseAny = !useToErase.empty();
|
bool eraseAny = !useToErase.empty();
|
||||||
for (auto *MI : toErase) {
|
bool progress = true;
|
||||||
if (MI->getNumOperands() == 0)
|
while (progress) {
|
||||||
continue;
|
progress = false;
|
||||||
Register Dst = MI->getOperand(0).getReg();
|
for (auto *&MI : toErase) {
|
||||||
if (!Dst.isVirtual() || MRI.use_nodbg_empty(Dst)) {
|
if (!MI)
|
||||||
MI->eraseFromParent();
|
continue;
|
||||||
eraseAny = true;
|
if (MI->getNumOperands() == 0) {
|
||||||
|
MI = nullptr;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
Register Dst = MI->getOperand(0).getReg();
|
||||||
|
if (!Dst.isVirtual() || MRI.use_nodbg_empty(Dst)) {
|
||||||
|
MI->eraseFromParent();
|
||||||
|
MI = nullptr;
|
||||||
|
eraseAny = true;
|
||||||
|
progress = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -341,25 +341,40 @@ bool W65816RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||||
case W65816::IMG15: srcDP = 0xCE; break;
|
case W65816::IMG15: srcDP = 0xCE; break;
|
||||||
default: break;
|
default: break;
|
||||||
}
|
}
|
||||||
if (srcDP >= 0) {
|
if (srcDP >= 0 || Src == W65816::X || Src == W65816::Y) {
|
||||||
|
// STAfi with non-A source: must clobber A to land the value in
|
||||||
|
// A and then `sta d,s`. PHA-bracket so A's incoming value is
|
||||||
|
// preserved across the spill — without this, a regalloc-emitted
|
||||||
|
// sequence like `STAfi $img0 (=$x); $img0 = COPY $a; STAfi $img0`
|
||||||
|
// overwrites $a's value at the first STAfi (via `lda 0xd0`),
|
||||||
|
// making the second STAfi spill garbage. Observed under full
|
||||||
|
// IMG-clobber as `dadd(1.5,2.5) → 0x4010_0000_3000_3000`.
|
||||||
|
//
|
||||||
|
// Sequence: pha (SP -= 2); load source into A; sta (d+2),s
|
||||||
|
// (offset bumped to compensate for the PHA SP shift); pla
|
||||||
|
// (SP += 2, A restored). Cost: +PHA (3 cyc, 1 byte) + PLA
|
||||||
|
// (4 cyc, 1 byte) = +7 cyc, +2 bytes per IMG/X/Y-source STAfi.
|
||||||
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::PHA));
|
||||||
|
if (srcDP >= 0) {
|
||||||
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
||||||
|
TII.get(W65816::LDA_DP)).addImm(srcDP);
|
||||||
|
} else {
|
||||||
|
unsigned XferOp = (Src == W65816::X) ? W65816::TXA : W65816::TYA;
|
||||||
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(XferOp));
|
||||||
|
}
|
||||||
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
||||||
TII.get(W65816::LDA_DP)).addImm(srcDP);
|
TII.get(W65816::STA_StackRel))
|
||||||
} else if (Src == W65816::X || Src == W65816::Y) {
|
.addImm(Offset + 2) // PHA shifted SP by 2
|
||||||
// STAfi with X/Y source: regalloc occasionally lands a Wide16
|
.addReg(W65816::A, RegState::Implicit);
|
||||||
// vreg in $x/$y after class coalescing across an Idx16 source
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::PLA));
|
||||||
// (typically the i32-first-arg hi-half formal arg). Bridge
|
} else {
|
||||||
// through A with TXA/TYA. Caller is responsible for ordering:
|
// Direct A source: simple sta d,s — A is the source, A is fine
|
||||||
// an arg0_lo STAfi $a must precede this so A's spill is already
|
// afterward (no implicit clobber).
|
||||||
// saved when we clobber A. Without this bridge, the emitted
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
||||||
// STA d,S stores stale A — observed as silent miscompile of i32
|
TII.get(W65816::STA_StackRel))
|
||||||
// ptr formal args (`writeOne(arr)` storing 99 to wrong addr).
|
.addImm(Offset)
|
||||||
unsigned XferOp = (Src == W65816::X) ? W65816::TXA : W65816::TYA;
|
.addReg(W65816::A, RegState::Implicit);
|
||||||
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(XferOp));
|
|
||||||
}
|
}
|
||||||
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
|
||||||
TII.get(W65816::STA_StackRel))
|
|
||||||
.addImm(Offset)
|
|
||||||
.addReg(W65816::A, RegState::Implicit);
|
|
||||||
MI.eraseFromParent();
|
MI.eraseFromParent();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -24,8 +24,8 @@ define i64 @i64_first_pressure(i64 %x) {
|
||||||
; TXA-bridge sequence. $D0 / $D2 are concrete IMG slots (the IMG
|
; TXA-bridge sequence. $D0 / $D2 are concrete IMG slots (the IMG
|
||||||
; region is $C0..$DE). Match a stx in that range, followed by an
|
; region is $C0..$DE). Match a stx in that range, followed by an
|
||||||
; sta in the same range, before the first jsl.
|
; sta in the same range, before the first jsl.
|
||||||
; CHECK: stx 0xd
|
; CHECK: stx 0x{{[cd]}}
|
||||||
; CHECK: sta 0xd
|
; CHECK: sta 0x{{[cd]}}
|
||||||
; CHECK: jsl ext2
|
; CHECK: jsl ext2
|
||||||
; CHECK: rtl
|
; CHECK: rtl
|
||||||
entry:
|
entry:
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue