diff --git a/runtime/build.sh b/runtime/build.sh index ea92262..a258f2e 100755 --- a/runtime/build.sh +++ b/runtime/build.sh @@ -37,9 +37,14 @@ asm "$SRC/libgcc.s" cc "$SRC/libc.c" cc "$SRC/strtol.c" cc "$SRC/snprintf.c" +cc "$SRC/sscanf.c" cc "$SRC/qsort.c" cc "$SRC/extras.c" cc "$SRC/strtok.c" +cc "$SRC/timeExt.c" -O1 +# timeExt.c at -O1: -O2 generates code where strftime's directive +# switch overflows the W65816's 8-bit signed stack-relative offset +# range. -O1 keeps the per-function frame small enough. cc "$SRC/math.c" cc "$SRC/softFloat.c" cc "$SRC/libcxxabi.c" diff --git a/runtime/include/math.h b/runtime/include/math.h index 53c65c5..5642062 100644 --- a/runtime/include/math.h +++ b/runtime/include/math.h @@ -1,30 +1,93 @@ #ifndef _MATH_H #define _MATH_H +// ---- Special values -------------------------------------------------- +// Use compiler builtins so const-folding works in user code. +#define HUGE_VAL (__builtin_huge_val()) +#define HUGE_VALF (__builtin_huge_valf()) +#define INFINITY (__builtin_inff()) +#define NAN (__builtin_nanf("")) + +// ---- Classification (functions, exposed via macros below) ----------- +int __isnan_d (double x); +int __isinf_d (double x); +int __isfinite_d(double x); +int __signbit_d (double x); + +#define isnan(x) __isnan_d((double)(x)) +#define isinf(x) __isinf_d((double)(x)) +#define isfinite(x) __isfinite_d((double)(x)) +#define signbit(x) __signbit_d((double)(x)) + +// ---- Absolute / sign ------------------------------------------------ double fabs (double x); float fabsf (float x); -double floor (double x); -float floorf (float x); -double ceil (double x); -float ceilf (float x); -double fmod (double x, double y); -float fmodf (float x, float y); double copysign (double x, double y); float copysignf(float x, float y); + +// ---- Rounding ------------------------------------------------------- +double floor (double x); +float floorf (float x); +double ceil (double x); +float ceilf (float x); +double trunc (double x); +float truncf (float x); +double round (double x); +float roundf (float x); + +// ---- Min / max / positive difference -------------------------------- +double fmax (double x, double y); +double fmin (double x, double y); +double fdim (double x, double y); +float fmaxf(float x, float y); +float fminf(float x, float y); +float fdimf(float x, float y); + +// ---- Mod / remainder ------------------------------------------------ +double fmod (double x, double y); +float fmodf(float x, float y); + +// ---- FP decomposition ----------------------------------------------- +double ldexp(double x, int n); +float ldexpf(float x, int n); +double frexp(double x, int *e); +float frexpf(float x, int *e); +double modf (double x, double *iptr); +float modff(float x, float *iptr); + +// ---- Power / root --------------------------------------------------- double sqrt (double x); float sqrtf (float x); +double cbrt (double x); +float cbrtf (float x); double pow (double x, double y); float powf (float x, float y); +double hypot (double x, double y); +float hypotf (float x, float y); + +// ---- Exponential / log ---------------------------------------------- +double exp (double x); +float expf (float x); +double exp2 (double x); +float exp2f (float x); +double expm1 (double x); +float expm1f (float x); +double log (double x); +float logf (float x); +double log10 (double x); +float log10f (float x); +double log2 (double x); +float log2f (float x); +double log1p (double x); +float log1pf (float x); + +// ---- Trigonometric -------------------------------------------------- double sin (double x); float sinf (float x); double cos (double x); float cosf (float x); double tan (double x); float tanf (float x); -double exp (double x); -float expf (float x); -double log (double x); -float logf (float x); double atan (double x); float atanf (float x); double atan2 (double y, double x); @@ -33,6 +96,8 @@ double asin (double x); float asinf (float x); double acos (double x); float acosf (float x); + +// ---- Hyperbolic ----------------------------------------------------- double sinh (double x); float sinhf (float x); double cosh (double x); @@ -40,4 +105,20 @@ float coshf (float x); double tanh (double x); float tanhf (float x); +// ---- Common constants ----------------------------------------------- +// (Not in C99 strict, but defined by glibc/BSD math.h and widely used.) +#define M_E 2.7182818284590452354 +#define M_LOG2E 1.4426950408889634074 +#define M_LOG10E 0.43429448190325182765 +#define M_LN2 0.69314718055994530942 +#define M_LN10 2.30258509299404568402 +#define M_PI 3.14159265358979323846 +#define M_PI_2 1.57079632679489661923 +#define M_PI_4 0.78539816339744830962 +#define M_1_PI 0.31830988618379067154 +#define M_2_PI 0.63661977236758134308 +#define M_2_SQRTPI 1.12837916709551257390 +#define M_SQRT2 1.41421356237309504880 +#define M_SQRT1_2 0.70710678118654752440 + #endif diff --git a/runtime/include/stdio.h b/runtime/include/stdio.h index d9d0ed9..98f8fcf 100644 --- a/runtime/include/stdio.h +++ b/runtime/include/stdio.h @@ -47,6 +47,14 @@ char *fgets(char *buf, int n, FILE *stream); int ungetc(int c, FILE *stream); #define getc(s) fgetc(s) +// scanf family — only sscanf and vsscanf are implemented (parsing +// from a string buffer). scanf/fscanf would need a reliable byte-at- +// a-time stdin which we don't have. Supports %d %i %u %x %X %o %s +// %c %% with optional `l` long modifier. +int sscanf (const char *str, const char *fmt, ...); +int vsscanf(const char *str, const char *fmt, va_list ap); +void rewind(FILE *stream); // = fseek(s, 0, SEEK_SET) + clearerr + // Memory-backed FS: register a memory region as a named file so // fopen can open it. `cap` should be >= size; use cap > size for // files that may grow on write. `writable` controls whether diff --git a/runtime/include/stdlib.h b/runtime/include/stdlib.h index bac8deb..cf6166d 100644 --- a/runtime/include/stdlib.h +++ b/runtime/include/stdlib.h @@ -10,12 +10,25 @@ void free(void *p); int abs(int n); long labs(long n); +long long llabs(long long n); int atoi(const char *s); long atol(const char *s); -long long llabs(long long n); +long long atoll(const char *s); +double atof(const char *s); -long strtol (const char *nptr, char **endptr, int base); -unsigned long strtoul(const char *nptr, char **endptr, int base); +typedef struct { int quot, rem; } div_t; +typedef struct { long quot, rem; } ldiv_t; +typedef struct { long long quot, rem; } lldiv_t; +div_t div (int n, int d); +ldiv_t ldiv (long n, long d); +lldiv_t lldiv(long long n, long long d); + +long strtol (const char *nptr, char **endptr, int base); +unsigned long strtoul (const char *nptr, char **endptr, int base); +long long strtoll (const char *nptr, char **endptr, int base); +unsigned long long strtoull(const char *nptr, char **endptr, int base); +double strtod (const char *nptr, char **endptr); +float strtof (const char *nptr, char **endptr); typedef int (*__cmp_fn)(const void *, const void *); void qsort (void *base, size_t nmemb, size_t size, __cmp_fn cmp); diff --git a/runtime/include/string.h b/runtime/include/string.h index 8095dfe..d74f5fa 100644 --- a/runtime/include/string.h +++ b/runtime/include/string.h @@ -22,6 +22,11 @@ char *strrchr(const char *s, int c); char *strstr(const char *haystack, const char *needle); char *strcat(char *dst, const char *src); char *strncat(char *dst, const char *src, size_t n); +char *strdup (const char *s); +char *strndup(const char *s, size_t maxlen); +void *memccpy(void *dst, const void *src, int c, size_t n); +char *stpcpy (char *dst, const char *src); +char *stpncpy(char *dst, const char *src, size_t n); char *strpbrk(const char *s, const char *accept); size_t strspn (const char *s, const char *accept); size_t strcspn(const char *s, const char *reject); diff --git a/runtime/include/time.h b/runtime/include/time.h index 79aedab..6b60130 100644 --- a/runtime/include/time.h +++ b/runtime/include/time.h @@ -3,11 +3,48 @@ typedef long time_t; typedef unsigned long clock_t; +typedef unsigned int size_t; #define CLOCKS_PER_SEC 60 // IIgs vsync tick (placeholder) +struct tm { + int tm_sec; // 0..60 (60 = leap second) + int tm_min; // 0..59 + int tm_hour; // 0..23 + int tm_mday; // 1..31 + int tm_mon; // 0..11 (Jan=0) + int tm_year; // years since 1900 + int tm_wday; // 0..6 (Sun=0) + int tm_yday; // 0..365 (Jan 1 = 0) + int tm_isdst; // > 0 = DST in effect, 0 = not, < 0 = unknown +}; + time_t time(time_t *t); clock_t clock(void); +double difftime(time_t end, time_t start); + +// Calendar conversions. gmtime and localtime are identical here — +// no timezone support; "local" is treated as UTC. +// +// ⚠ gmtime/localtime are CURRENTLY KNOWN-BROKEN: the year-decomposition +// loop hits a W65816 backend codegen issue that mis-iterates and +// returns year=1970 regardless of input. Workaround: build the +// struct tm by hand and call mktime/asctime/strftime, all of which +// work correctly on a user-supplied struct tm. +struct tm *gmtime (const time_t *t); +struct tm *localtime(const time_t *t); +time_t mktime (struct tm *tm); + +// Text formatting. asctime returns "Sun Jan 1 00:00:00 1970\n" form. +// Both functions return a pointer to a static buffer (overwritten on +// each call — not thread-safe). +char *asctime(const struct tm *tm); +char *ctime (const time_t *t); + +// strftime: format `tm` per `fmt` into `buf` (max `n` bytes incl. +// terminator). Supports a useful subset: %Y %m %d %H %M %S %j %w %a +// %A %b %B %p %% — sufficient for log timestamps and date display. +size_t strftime(char *buf, size_t n, const char *fmt, const struct tm *tm); // Initialise the IIgs Tool Locator so time() can call ReadTimeHex. // Call once before any time() use. Idempotent — repeated calls diff --git a/runtime/src/crt0Gsos.s b/runtime/src/crt0Gsos.s index ffc07b4..84e7fd6 100644 --- a/runtime/src/crt0Gsos.s +++ b/runtime/src/crt0Gsos.s @@ -26,6 +26,14 @@ .globl __start __start: + ; Set DBR := PBR so absolute (DBR-relative) loads/stores reach + ; symbols within our segment. The Loader's documented contract is + ; "DBR set to your bank" but verified empirically NOT always true + ; for KIND=0x1000 segments — `lda absConst` was reading from the + ; wrong bank without this. PHK + PLB copies PBR into DBR. + phk + plb + ; Set DP=0. The C compiler assumes DP=0 for all `sta dp` and ; `[dp],y`-style accesses; GS/OS hands us a Memory-Manager- ; allocated DP page that we discard. diff --git a/runtime/src/libc.c b/runtime/src/libc.c index 4d1fcd2..e31d7c5 100644 --- a/runtime/src/libc.c +++ b/runtime/src/libc.c @@ -131,6 +131,19 @@ int tolower(int c) { return isupper(c) ? c + 32 : c; } int abs(int n) { return n < 0 ? -n : n; } long labs(long n) { return n < 0 ? -n : n; } +// div/ldiv/lldiv: return both quotient and remainder in one struct. +// Useful for code that wants a single libcall instead of paired / and %. +// Per C99: quot is integer division truncated toward zero; rem has the +// same sign as the numerator. + +typedef struct { int quot, rem; } div_t; +typedef struct { long quot, rem; } ldiv_t; +typedef struct { long long quot, rem; } lldiv_t; + +div_t div (int n, int d) { div_t r; r.quot = n/d; r.rem = n%d; return r; } +ldiv_t ldiv (long n, long d) { ldiv_t r; r.quot = n/d; r.rem = n%d; return r; } +lldiv_t lldiv(long long n, long long d) { lldiv_t r; r.quot = n/d; r.rem = n%d; return r; } + int atoi(const char *s) { int sign = 1; while (isspace(*s)) s++; @@ -364,6 +377,59 @@ char *strstr(const char *haystack, const char *needle) { return 0; } +// Forward declarations for strdup/strndup; the actual definitions +// live further down in the file (malloc and strnlen are below). +extern void *malloc(size_t); +extern size_t strnlen(const char *, size_t); + +// strdup/strndup — POSIX, super common in real-world code. +// Both allocate via malloc; caller frees. +char *strdup(const char *s) { + size_t n = strlen(s); + char *r = (char *)malloc(n + 1); + if (!r) return 0; + memcpy(r, s, n + 1); + return r; +} + +char *strndup(const char *s, size_t maxlen) { + size_t n = strnlen(s, maxlen); + char *r = (char *)malloc(n + 1); + if (!r) return 0; + memcpy(r, s, n); + r[n] = 0; + return r; +} + +// memccpy — copy until either `n` bytes done OR byte `c` was copied. +// Returns ptr to byte after the copied `c`, or NULL if `c` not found. +void *memccpy(void *dst, const void *src, int c, size_t n) { + unsigned char *d = (unsigned char *)dst; + const unsigned char *s = (const unsigned char *)src; + while (n--) { + unsigned char v = *s++; + *d++ = v; + if (v == (unsigned char)c) return d; + } + return 0; +} + +// stpcpy — like strcpy but returns ptr to terminating NUL of dst. +char *stpcpy(char *dst, const char *src) { + while ((*dst = *src) != 0) { dst++; src++; } + return dst; +} + +// stpncpy — like strncpy but returns ptr to terminator (or to dst+n +// if no terminator was copied). +char *stpncpy(char *dst, const char *src, size_t n) { + char *p = dst; + while (n && (*p = *src) != 0) { p++; src++; n--; } + char *end = p; + while (n--) *p++ = 0; + return end; +} + // ---- malloc/free — first-fit allocator with coalescing-on-free ---- // // Heap lives between the static-data top (linker-supplied __heap_start) @@ -1129,6 +1195,14 @@ void clearerr(FILE *stream) { if (stream) { stream->eof = 0; stream->err = 0; } } +// rewind — convenience wrapper: seek to start + clear error/EOF. +void rewind(FILE *stream) { + if (!stream) return; + fseek(stream, 0L, 0 /* SEEK_SET */); + stream->eof = 0; + stream->err = 0; +} + // ---- locale.h stubs ---- // // No real locale support — IIgs is single-locale. setlocale always @@ -1225,3 +1299,5 @@ int raise(int sig) { h(sig); return 0; } + + diff --git a/runtime/src/math.c b/runtime/src/math.c index d0979e2..62dd218 100644 --- a/runtime/src/math.c +++ b/runtime/src/math.c @@ -503,3 +503,251 @@ double tanh(double x) { float tanhf(float x) { return (float)tanh((double)x); } + + +// ---- Classification ------------------------------------------------ +// +// Implemented as functions rather than the C99 macros so they can be +// referenced from outside math.h. The math.h header also exposes them +// as macros that expand to these calls. + +int __isnan_d(double x) { + uint64_t b = dToBits(x); + return ((b >> 52) & 0x7FF) == 0x7FF && (b & 0xFFFFFFFFFFFFFULL) != 0; +} + + +int __isinf_d(double x) { + uint64_t b = dToBits(x); + return ((b >> 52) & 0x7FF) == 0x7FF && (b & 0xFFFFFFFFFFFFFULL) == 0; +} + + +int __isfinite_d(double x) { + return ((dToBits(x) >> 52) & 0x7FF) != 0x7FF; +} + + +int __signbit_d(double x) { + return (int)(dToBits(x) >> 63); +} + + +// ---- Rounding ------------------------------------------------------ + +// trunc: round toward zero. = floor for positive, ceil for negative. +double trunc(double x) { + return (dToBits(x) >> 63) ? ceil(x) : floor(x); +} + + +float truncf(float x) { + return (float)trunc((double)x); +} + + +// round: round half away from zero. +double round(double x) { + return (dToBits(x) >> 63) ? -floor(-x + 0.5) : floor(x + 0.5); +} + + +float roundf(float x) { + return (float)round((double)x); +} + + +// ---- Min / max / positive difference ------------------------------- + +double fmax(double x, double y) { + if (__isnan_d(x)) return y; + if (__isnan_d(y)) return x; + return x > y ? x : y; +} + + +double fmin(double x, double y) { + if (__isnan_d(x)) return y; + if (__isnan_d(y)) return x; + return x < y ? x : y; +} + + +double fdim(double x, double y) { + return x > y ? x - y : 0.0; +} + + +float fmaxf(float x, float y) { return (float)fmax((double)x, (double)y); } +float fminf(float x, float y) { return (float)fmin((double)x, (double)y); } +float fdimf(float x, float y) { return (float)fdim((double)x, (double)y); } + + +// ---- FP decomposition ---------------------------------------------- + +// ldexp(x, n) = x * 2^n. Implemented by adjusting the exponent field +// in place. Handles subnormals and overflow only crudely (overflow → +// infinity; underflow → zero). +double ldexp(double x, int n) { + uint64_t b = dToBits(x); + int e = (int)((b >> 52) & 0x7FF); + if (e == 0 || e == 0x7FF) { + // 0 or denorm or inf/nan — return as-is for inf/nan, else + // fall back to multiplication (handles subnormals correctly + // enough for typical use). + if (e == 0x7FF) return x; + // For zero / subnormal, multiply by 2^n via repeated *2. + if (n > 0) { + while (n--) x *= 2.0; + return x; + } + if (n < 0) { + while (n++) x *= 0.5; + return x; + } + return x; + } + int newE = e + n; + if (newE >= 0x7FF) { + // Overflow → ±infinity. + return (b >> 63) ? -1e308 * 1e308 : 1e308 * 1e308; + } + if (newE <= 0) { + // Underflow → ±0 (skip subnormal handling). + return (b >> 63) ? -0.0 : 0.0; + } + b = (b & 0x800FFFFFFFFFFFFFULL) | ((uint64_t)newE << 52); + return dFromBits(b); +} + + +float ldexpf(float x, int n) { + return (float)ldexp((double)x, n); +} + + +// frexp(x, *e): split x into normalized fraction in [0.5, 1) (or 0) +// and integer exponent. x = frac * 2^e. +double frexp(double x, int *eOut) { + uint64_t b = dToBits(x); + int e = (int)((b >> 52) & 0x7FF); + if (e == 0) { *eOut = 0; return x; } // zero or subnormal + if (e == 0x7FF) { *eOut = 0; return x; } // inf or nan + *eOut = e - 1022; // bias adjustment for [0.5,1) + // Force exponent to 1022 (so result is in [0.5, 1)). + b = (b & 0x800FFFFFFFFFFFFFULL) | ((uint64_t)1022 << 52); + return dFromBits(b); +} + + +float frexpf(float x, int *eOut) { + double r = frexp((double)x, eOut); + return (float)r; +} + + +// modf(x, *iptr): split into integer and fractional parts, both with +// the same sign as x. +double modf(double x, double *iptr) { + double ip = trunc(x); + *iptr = ip; + return x - ip; +} + + +float modff(float x, float *iptr) { + double ipd; + double frac = modf((double)x, &ipd); + *iptr = (float)ipd; + return (float)frac; +} + + +// ---- log10 / log2 / exp2 ------------------------------------------- +// +// All routed through log() / exp() with the conversion constant. + +double log10(double x) { + // 1 / ln(10) = 0.43429448190325182765112891891660508229439700580366 + return log(x) * 0.43429448190325182765; +} + + +double log2(double x) { + // 1 / ln(2) = 1.4426950408889634073599246810018921374266459541530 + return log(x) * 1.4426950408889634074; +} + + +double exp2(double x) { + // ln(2) = 0.69314718055994530941723212145817656807550013436026 + return exp(x * 0.69314718055994530942); +} + + +float log10f(float x) { return (float)log10((double)x); } +float log2f(float x) { return (float)log2((double)x); } +float exp2f(float x) { return (float)exp2((double)x); } + + +// log1p(x) = log(1 + x). For |x| small the naive form loses precision; +// we accept that for now since this is a standalone runtime, not a +// numerics library. +double log1p(double x) { return log(1.0 + x); } +float log1pf(float x) { return (float)log1p((double)x); } + + +// expm1(x) = exp(x) - 1. Same loss-of-precision caveat near 0. +double expm1(double x) { return exp(x) - 1.0; } +float expm1f(float x) { return (float)expm1((double)x); } + + +// ---- hypot --------------------------------------------------------- +// +// hypot(x, y) = sqrt(x*x + y*y). This implementation does NOT scale +// to avoid overflow — for |x|, |y| < ~1e150 the naive form is fine, +// past that you'd want the standard scale-by-max trick. + +// hypot — naive sqrt(x*x + y*y). NO `volatile` on the temps — +// clang's codegen for volatile-double locals on this target generates +// stack-relative loads/stores that crash under the GS/OS Loader (the +// chain executes correctly under runInMame but not via Finder). The +// volatile-free version works in both contexts. +__attribute__((noinline)) +double hypot(double x, double y) { + double xx = x * x; + double yy = y * y; + double s = xx + yy; + return sqrt(s); +} + + +float hypotf(float x, float y) { + return (float)hypot((double)x, (double)y); +} + + +// ---- cbrt ---------------------------------------------------------- +// +// Newton-Raphson for cube root: r_{n+1} = (2*r_n + a/r_n²) / 3. +// Converges quadratically; 30 iters more than enough for double. +// Implemented WITHOUT calling pow because clang treats pow as a +// known builtin and either inlines it (with bad fold of pow(x,1/3)) +// or DCEs the call entirely (cbrt body collapses to "return 0"). +// This implementation has no pow dependency and is immune. +__attribute__((noinline)) +double cbrt(double x) { + if (x == 0.0) return x; + int neg = (int)(dToBits(x) >> 63) & 1; + double a = neg ? -x : x; + double r = a; // crude initial guess + for (int i = 0; i < 30; i++) { + r = (2.0 * r + a / (r * r)) * (1.0 / 3.0); + } + return neg ? -r : r; +} + + +float cbrtf(float x) { + return (float)cbrt((double)x); +} diff --git a/runtime/src/softDouble.c b/runtime/src/softDouble.c index d0c609a..0b1e6a1 100644 --- a/runtime/src/softDouble.c +++ b/runtime/src/softDouble.c @@ -377,3 +377,24 @@ s32 __fixdfsi(u64 x) { if (shift >= 0) m >>= shift; else m <<= -shift; return sign ? -(s32)m : (s32)m; } + + +// __fixunsdfsi: unsigned double → uint32. Saturates to 0 for negative +// inputs, to 0xFFFFFFFF for inputs >= 2^32. Used by clang when casting +// double values to unsigned integer types. +u32 __fixunsdfsi(u64 x) { + if (x & DSIGN_BIT) return 0; // negative → 0 + u16 e = (u16)((x >> DEXP_SHIFT) & 0x7FF); + if (e == 0) return 0; + if (e == 0x7FF) return 0xFFFFFFFF; + s16 unbiased = (s16)e - DEXP_BIAS; + if (unbiased < 0) return 0; + if (unbiased > 31) return 0xFFFFFFFF; + u64 m = (x & DMANT_MASK) | DMANT_LEAD; + if (unbiased >= 52) { + m <<= (unbiased - 52); + } else { + m >>= (52 - unbiased); + } + return (u32)m; +} diff --git a/runtime/src/softFloat.c b/runtime/src/softFloat.c index 33bd3c9..bf055bc 100644 --- a/runtime/src/softFloat.c +++ b/runtime/src/softFloat.c @@ -220,6 +220,15 @@ s16 __gtsf2(u32 a, u32 b) { return __cmpsf2(a, b); } s16 __lesf2(u32 a, u32 b) { return __cmpsf2(a, b); } s16 __gesf2(u32 a, u32 b) { return __cmpsf2(a, b); } +// __unordsf2: 1 if either arg is NaN, else 0. Used for IEEE 754 +// unordered comparisons (a < b is false if either is NaN, etc.). +s16 __unordsf2(u32 a, u32 b) { + u32 ax = a & 0x7FFFFFFFUL; + u32 bx = b & 0x7FFFFFFFUL; + // NaN: exp=0xFF, mantissa != 0 → ax > 0x7F800000 + return (ax > 0x7F800000UL) || (bx > 0x7F800000UL) ? 1 : 0; +} + u32 __floatsisf(s32 i) { if (i == 0) return 0; u32 sign = 0; diff --git a/runtime/src/sscanf.c b/runtime/src/sscanf.c new file mode 100644 index 0000000..f77f268 --- /dev/null +++ b/runtime/src/sscanf.c @@ -0,0 +1,146 @@ +// sscanf — minimal subset for the W65816 runtime. +// Supports format directives: +// %d / %i signed int (decimal) +// %u unsigned int (decimal) +// %x %X unsigned int (hex; "0x" prefix optional) +// %o unsigned int (octal) +// %ld %lu %lx long-int variants (32-bit) +// %s whitespace-terminated string into char* +// %c single char into char* +// %% literal % +// Whitespace in the format matches zero or more whitespace chars +// in the input. Returns the number of successful conversions or +// EOF (-1) if input ends before any match. + +typedef __builtin_va_list va_list; +#define va_start(ap, last) __builtin_va_start(ap, last) +#define va_arg(ap, ty) __builtin_va_arg(ap, ty) +#define va_end(ap) __builtin_va_end(ap) + +extern int isspace(int); + +// Skip leading whitespace, return the first non-space char ptr. +static const char *skipWs(const char *s) { + while (*s && isspace(*s)) s++; + return s; +} + +// Parse an unsigned integer in the given base. Updates *pp to the +// first unconsumed char. Returns 1 if any digit was consumed, else 0. +static int parseUL(const char **pp, int base, unsigned long *out) { + const char *p = *pp; + unsigned long v = 0; + int saw = 0; + while (*p) { + int c = *p, d; + if (c >= '0' && c <= '9') d = c - '0'; + else if (c >= 'a' && c <= 'z') d = 10 + c - 'a'; + else if (c >= 'A' && c <= 'Z') d = 10 + c - 'A'; + else break; + if (d < 0 || d >= base) break; + v = v * (unsigned long)base + (unsigned long)d; + saw = 1; + p++; + } + *pp = p; + *out = v; + return saw; +} + +int vsscanf(const char *str, const char *fmt, va_list ap) { + int matched = 0; + const char *s = str; + while (*fmt) { + if (isspace(*fmt)) { + // Whitespace in format: skip 0+ whitespace in input. + while (*fmt && isspace(*fmt)) fmt++; + s = skipWs(s); + continue; + } + if (*fmt != '%') { + if (*s != *fmt) break; + fmt++; s++; + continue; + } + fmt++; + if (*fmt == 0) break; + // Long modifier? + int isLong = 0; + if (*fmt == 'l') { isLong = 1; fmt++; if (*fmt == 0) break; } + char spec = *fmt; + + if (spec == '%') { + if (*s != '%') break; + s++; fmt++; continue; + } + if (spec == 'c') { + char *out = va_arg(ap, char *); + if (!*s) break; + *out = *s++; + matched++; + fmt++; + continue; + } + if (spec == 's') { + char *out = va_arg(ap, char *); + s = skipWs(s); + if (!*s) break; + int n = 0; + while (*s && !isspace(*s)) { *out++ = *s++; n++; } + *out = 0; + if (n) matched++; + fmt++; + continue; + } + // Numeric conversions: skip whitespace first. + s = skipWs(s); + int neg = 0; + if ((spec == 'd' || spec == 'i') && (*s == '+' || *s == '-')) { + neg = (*s == '-'); + s++; + } + int base = 10; + if (spec == 'x' || spec == 'X') base = 16; + else if (spec == 'o') base = 8; + else if (spec == 'i') { + if (s[0] == '0' && (s[1] == 'x' || s[1] == 'X')) { + base = 16; s += 2; + } else if (s[0] == '0') { + base = 8; + } + } + if ((spec == 'x' || spec == 'X') && s[0] == '0' && + (s[1] == 'x' || s[1] == 'X')) s += 2; + unsigned long v; + if (!parseUL(&s, base, &v)) break; + if (isLong) { + if (spec == 'd' || spec == 'i') { + long *out = va_arg(ap, long *); + *out = neg ? -(long)v : (long)v; + } else { + unsigned long *out = va_arg(ap, unsigned long *); + *out = v; + } + } else { + if (spec == 'd' || spec == 'i') { + int *out = va_arg(ap, int *); + *out = neg ? -(int)v : (int)v; + } else { + unsigned int *out = va_arg(ap, unsigned int *); + *out = (unsigned int)v; + } + } + matched++; + fmt++; + } + if (matched == 0 && !*s) return -1; // EOF: no chars consumed + return matched; +} + +int sscanf(const char *str, const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + int r = vsscanf(str, fmt, ap); + va_end(ap); + return r; +} diff --git a/runtime/src/strtol.c b/runtime/src/strtol.c index 40fa1b2..d1811a4 100644 --- a/runtime/src/strtol.c +++ b/runtime/src/strtol.c @@ -60,3 +60,127 @@ long strtol(const char *nptr, char **endptr, int base) { // ("-2147483648" — the magnitude doesn't fit in long). return neg ? (long)(0ul - n) : (long)n; } + + +// ---- Long-long (64-bit) variants ------------------------------------ + +unsigned long long strtoull(const char *nptr, char **endptr, int base) { + const char *s = nptr; + while (isspace(*s)) s++; + int neg = 0; + if (*s == '-') { neg = 1; s++; } + else if (*s == '+') s++; + if ((base == 0 || base == 16) && s[0] == '0' && + (s[1] == 'x' || s[1] == 'X') && charDigit(s[2], 16) >= 0) { + base = 16; + s += 2; + } else if (base == 0 && *s == '0') { + base = 8; + s++; + } else if (base == 0) { + base = 10; + } + unsigned long long n = 0; + int saw_digit = 0; + for (;;) { + int d = charDigit(*s, base); + if (d < 0) break; + n = n * (unsigned long long)base + (unsigned long long)d; + saw_digit = 1; + s++; + } + if (endptr) *endptr = (char *)(saw_digit ? s : nptr); + return neg ? (0ull - n) : n; +} + + +long long strtoll(const char *nptr, char **endptr, int base) { + const char *s = nptr; + while (isspace(*s)) s++; + int neg = (*s == '-'); + if (*s == '+' || *s == '-') s++; + char *ep = 0; + unsigned long long n = strtoull(s, &ep, base); + if (ep == s) { + if (endptr) *endptr = (char *)nptr; + return 0; + } + if (endptr) *endptr = ep; + return neg ? (long long)(0ull - n) : (long long)n; +} + + +long long atoll(const char *s) { + return strtoll(s, 0, 10); +} + + +// ---- Float parsing (atof / strtod / strtof) ------------------------- + +extern double pow(double, double); // for 10^N scaling + + +// strtod parses [whitespace][+|-][.digits][e|E[+|-]]. +// Pure-double accumulation; precision degrades for very long inputs but +// sufficient for typical config / data parsing. +double strtod(const char *nptr, char **endptr) { + const char *s = nptr; + while (isspace(*s)) s++; + int neg = 0; + if (*s == '-') { neg = 1; s++; } + else if (*s == '+') s++; + + double whole = 0.0; + int saw_digit = 0; + while (*s >= '0' && *s <= '9') { + whole = whole * 10.0 + (double)(*s - '0'); + s++; + saw_digit = 1; + } + double frac = 0.0; + double scale = 1.0; + if (*s == '.') { + s++; + while (*s >= '0' && *s <= '9') { + frac = frac * 10.0 + (double)(*s - '0'); + scale *= 10.0; + s++; + saw_digit = 1; + } + } + if (!saw_digit) { + if (endptr) *endptr = (char *)nptr; + return 0.0; + } + double v = whole + frac / scale; + if (*s == 'e' || *s == 'E') { + const char *eMark = s; + s++; + int eNeg = 0; + if (*s == '-') { eNeg = 1; s++; } + else if (*s == '+') s++; + if (*s < '0' || *s > '9') { + // No exponent digits — leave 'e' unconsumed. + s = eMark; + } else { + int e = 0; + while (*s >= '0' && *s <= '9') { + e = e * 10 + (*s - '0'); + s++; + } + v *= pow(10.0, eNeg ? -e : e); + } + } + if (endptr) *endptr = (char *)s; + return neg ? -v : v; +} + + +float strtof(const char *nptr, char **endptr) { + return (float)strtod(nptr, endptr); +} + + +double atof(const char *s) { + return strtod(s, 0); +} diff --git a/runtime/src/timeExt.c b/runtime/src/timeExt.c new file mode 100644 index 0000000..12b19df --- /dev/null +++ b/runtime/src/timeExt.c @@ -0,0 +1,207 @@ +// time.h additions (struct tm, gmtime, localtime, mktime, asctime, +// ctime, difftime, strftime). Split out from libc.c to keep that +// translation unit's frame size below the W65816 stack-relative range. + +typedef long time_t; +typedef unsigned long clock_t; +typedef unsigned int size_t; + +extern size_t strlen(const char *); + +static const unsigned short __monthDays[12] = { + 0, 31, 59, 90, 120, 151, 181, 212, 243, 273, 304, 334 +}; + +static int __isLeap(int y) { + return (y % 4 == 0 && y % 100 != 0) || (y % 400 == 0); +} + +// ---- time.h: struct tm + calendar conversions ----------------------- + +struct tm { + int tm_sec, tm_min, tm_hour; + int tm_mday, tm_mon, tm_year; + int tm_wday, tm_yday, tm_isdst; +}; + +// Internal mutable buffer for gmtime/localtime/asctime/ctime. +// Not thread-safe but matches the C standard's spec. +static struct tm __gmtimeBuf; +static char __asctimeBuf[32]; + +double difftime(time_t end, time_t start) { + return (double)(end - start); +} + +// gmtime / localtime: convert seconds-since-1970 to broken-down time. +// "local" is identical to "gm" — no timezone support. +// Convert days-since-1970 to (year, days-into-year). Uses 4-year +// cycles where possible to keep the loop short and to avoid clang +// generating code that misbehaves on this target. +// gmtime: KNOWN BROKEN at all -O levels. The year-decomposition loop +// (subtract years from `days` until what's left fits in one year) +// triggers a W65816 backend codegen issue — the loop doesn't iterate +// correctly under either -O2 (frame overflow) or -O1/-O0 (wrong +// values returned). For now, gmtime fills in fields with zeros and +// just stashes the input epoch in tm_sec/tm_min as low/mid 16-bit. +// asctime/strftime/mktime work correctly on a user-supplied struct +// tm. Workaround for callers that need decomposition: build the +// struct tm manually. +struct tm *gmtime(const time_t *t) { + long secs = *t; + __gmtimeBuf.tm_sec = (int)(secs & 0xFFFF); + __gmtimeBuf.tm_min = (int)((secs >> 16) & 0xFFFF); + __gmtimeBuf.tm_hour = 0; + __gmtimeBuf.tm_mday = 1; + __gmtimeBuf.tm_mon = 0; + __gmtimeBuf.tm_year = 70; // 1970, sentinel "not decomposed" + __gmtimeBuf.tm_wday = 4; // Jan 1 1970 was Thursday + __gmtimeBuf.tm_yday = 0; + __gmtimeBuf.tm_isdst = -1; + return &__gmtimeBuf; +} + +struct tm *localtime(const time_t *t) { + return gmtime(t); +} + +// mktime: convert broken-down time → seconds-since-1970. Also fills +// in tm_wday and tm_yday if the caller didn't bother. +time_t mktime(struct tm *tm) { + int year = tm->tm_year + 1900; + int mon = tm->tm_mon; + long days = 0; + for (int y = 1970; y < year; y++) { + days += __isLeap(y) ? 366 : 365; + } + for (int m = 0; m < mon; m++) { + days += __monthDays[m + 1] - __monthDays[m]; + // (the table starts at 0 for Jan, so each month is the diff) + } + if (mon > 1 && __isLeap(year)) days++; + days += tm->tm_mday - 1; + long secs = days * 86400L + + (long)tm->tm_hour * 3600L + + (long)tm->tm_min * 60L + + tm->tm_sec; + tm->tm_wday = (int)((days + 4) % 7); + if (tm->tm_wday < 0) tm->tm_wday += 7; + tm->tm_yday = (int)(__monthDays[mon] + + ((mon > 1 && __isLeap(year)) ? 1 : 0) + + tm->tm_mday - 1); + return secs; +} + +static const char *const __wdayShort[7] = { + "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat" +}; +static const char *const __wdayLong[7] = { + "Sunday", "Monday", "Tuesday", "Wednesday", + "Thursday", "Friday", "Saturday" +}; +static const char *const __monShort[12] = { + "Jan", "Feb", "Mar", "Apr", "May", "Jun", + "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" +}; +static const char *const __monLong[12] = { + "January", "February", "March", "April", + "May", "June", "July", "August", + "September", "October", "November", "December" +}; + +// Format N-digit zero-padded. Forces u32 (long) arithmetic — clang's +// strength reducer otherwise lowers /10 and %10 on small types into +// i8 mulhu by 0xCD (magic constant for div-by-10), which the W65816 +// backend has no select pattern for. +static char *fmtN(char *p, unsigned long v, int n) { + p += n; + char *end = p; + while (n--) { + unsigned long q = v / 10ul; + unsigned long r = v - q * 10ul; + p--; + *p = (char)('0' + (int)r); + v = q; + } + return end; +} +static char *fmt02(char *p, int v) { return fmtN(p, (unsigned long)v, 2); } +static char *fmt04(char *p, int v) { return fmtN(p, (unsigned long)v, 4); } + +char *asctime(const struct tm *tm) { + // "Sun Jan 1 00:00:00 1970\n\0" = 26 bytes + char *p = __asctimeBuf; + int wday = tm->tm_wday & 7; if (wday > 6) wday = 0; + int mon = tm->tm_mon & 15; if (mon > 11) mon = 0; + *p++ = __wdayShort[wday][0]; + *p++ = __wdayShort[wday][1]; + *p++ = __wdayShort[wday][2]; + *p++ = ' '; + *p++ = __monShort[mon][0]; + *p++ = __monShort[mon][1]; + *p++ = __monShort[mon][2]; + *p++ = ' '; + int mday = tm->tm_mday; + *p++ = (mday >= 10) ? ('0' + mday/10) : ' '; + *p++ = '0' + mday % 10; + *p++ = ' '; + p = fmt02(p, tm->tm_hour); *p++ = ':'; + p = fmt02(p, tm->tm_min); *p++ = ':'; + p = fmt02(p, tm->tm_sec); *p++ = ' '; + p = fmt04(p, tm->tm_year + 1900); + *p++ = '\n'; + *p = 0; + return __asctimeBuf; +} + +char *ctime(const time_t *t) { + return asctime(gmtime(t)); +} + +// strftime — directive expansion is split into a helper so the main +// loop's frame stays small (W65816 stack-relative offsets are 8-bit +// signed). +__attribute__((noinline)) +static int strftimeOne(char dst[8], char spec, const struct tm *tm, + const char **strOut) { + *strOut = 0; + switch (spec) { + case 'Y': fmt04(dst, tm->tm_year + 1900); return 4; + case 'm': fmt02(dst, tm->tm_mon + 1); return 2; + case 'd': fmt02(dst, tm->tm_mday); return 2; + case 'H': fmt02(dst, tm->tm_hour); return 2; + case 'M': fmt02(dst, tm->tm_min); return 2; + case 'S': fmt02(dst, tm->tm_sec); return 2; + case 'j': fmtN(dst, (unsigned long)(tm->tm_yday + 1), 3); return 3; + case 'w': dst[0] = (char)('0' + (tm->tm_wday & 7)); return 1; + case 'a': *strOut = __wdayShort[tm->tm_wday & 7]; return 3; + case 'A': *strOut = __wdayLong [tm->tm_wday & 7]; + return (int)strlen(*strOut); + case 'b': + case 'h': *strOut = __monShort[tm->tm_mon & 15]; return 3; + case 'B': *strOut = __monLong [tm->tm_mon & 15]; + return (int)strlen(*strOut); + case 'p': *strOut = (tm->tm_hour < 12) ? "AM" : "PM"; return 2; + case '%': dst[0] = '%'; return 1; + default: dst[0] = '%'; dst[1] = spec; return 2; + } +} + +size_t strftime(char *buf, size_t n, const char *fmt, const struct tm *tm) { + char *p = buf; + char *end = buf + n; + char tmp[8]; + while (*fmt && p + 1 < end) { + if (*fmt != '%') { *p++ = *fmt++; continue; } + fmt++; + if (!*fmt) break; + const char *ins; + int len = strftimeOne(tmp, *fmt, tm, &ins); + const char *src = ins ? ins : tmp; + for (int i = 0; i < len && p + 1 < end; i++) *p++ = src[i]; + fmt++; + } + if (p < end) *p = 0; + else if (n > 0) buf[n - 1] = 0; + return (size_t)(p - buf); +} diff --git a/scripts/runViaFinder.sh b/scripts/runViaFinder.sh index 5e67c0b..1e907ad 100755 --- a/scripts/runViaFinder.sh +++ b/scripts/runViaFinder.sh @@ -36,8 +36,12 @@ WORK=$(mktemp -d -t finderlaunch.XXXXXX) trap 'rm -rf "$WORK"' EXIT cp "$SYSDISK" "$WORK/disk.po" -cp "$OMF" "$WORK/HELLO#B30000" -"$CADIUS" ADDFILE "$WORK/disk.po" /SYSTEM.DISK "$WORK/HELLO#B30000" >/dev/null +# Create a separate 800K data disk and put HELLO on it. Keeps the +# boot disk untouched (and avoids the "20K free" limit on sys602.po +# that fails for OMFs > ~15K). +"$CADIUS" CREATEVOLUME "$WORK/data.po" DATA 800KB >/dev/null +cp "$OMF" "$WORK/HELLO#B30000" +"$CADIUS" ADDFILE "$WORK/data.po" /DATA "$WORK/HELLO#B30000" >/dev/null LUA_CHECKS="" EXPECTS=() @@ -65,9 +69,9 @@ local key_cmd = get_field(":macadb:KEY3", "Command / Open Apple") local function press(f) if f then f:set_value(1) end end local function release(f) if f then f:set_value(0) end end --- Keystroke timeline: open System.Disk, then launch HELLO. +-- Keystroke timeline: open DATA volume (the second disk), then launch HELLO. local steps = { - {3300, function() nat:post("S") end}, -- select System.Disk + {3300, function() nat:post("D") end}, -- select Data {3540, function() press(key_cmd) end}, {3546, function() nat:post("o") end}, -- Cmd-O opens volume {3600, function() release(key_cmd) end}, @@ -75,7 +79,7 @@ local steps = { {4500, function() press(key_cmd) end}, {4506, function() nat:post("o") end}, -- Cmd-O launches {4560, function() release(key_cmd) end}, - {5400, function() + {6000, function() $LUA_CHECKS manager.machine:exit() end}, @@ -89,9 +93,9 @@ emu.register_frame_done(function() end) LUA -OUT=$(timeout 130 mame apple2gs -rompath "$PROJECT_ROOT/tools/mame/roms" \ +OUT=$(timeout 150 mame apple2gs -rompath "$PROJECT_ROOT/tools/mame/roms" \ -window -nothrottle -sound none \ - -seconds_to_run 110 -flop3 "$WORK/disk.po" \ + -seconds_to_run 130 -flop3 "$WORK/disk.po" -flop4 "$WORK/data.po" \ -autoboot_script "$WORK/finder.lua" &1) # Verify each expected value. diff --git a/scripts/smokeTest.sh b/scripts/smokeTest.sh index ae027fe..cc7f7cd 100755 --- a/scripts/smokeTest.sh +++ b/scripts/smokeTest.sh @@ -4610,8 +4610,10 @@ EOF oGsCrt0="$(mktemp --suffix=.o)" "$PROJECT_ROOT/tools/llvm-mos-build/bin/llvm-mc" -arch=w65816 -filetype=obj \ "$PROJECT_ROOT/runtime/src/crt0.s" -o "$oGsCrt0" + # extras.o supplies strnlen (used by libc.c's strdup wrapper). if ! "$PROJECT_ROOT/tools/link816" -o "$binGs" --text-base 0x1000 \ "$oGsCrt0" "$oGsLibc" "$oGsSnp" "$oGsSf" "$oGsSd" \ + "$PROJECT_ROOT/runtime/extras.o" \ "$oGsFile" "$oGsAsm" "$oLibgccFile" \ --no-gc-sections 2>&1; then die "iigs/gsos.h + iigsGsos.s failed to link" @@ -5148,7 +5150,8 @@ while post < bytecnt: op = b[post] if op == 0xF5: # 1 + 1 (ByteCnt) + 1 (BitShift) + 2 (OffsetPatch) + 2 (OffsetReference) = 7 bytes - if b[post+1] != 3: print(f'FAIL: cRELOC ByteCnt {b[post+1]} != 3'); sys.exit(1) + # ByteCnt 2 = IMM16 (data refs), 3 = IMM24 (JSL/JML/STAlong). + if b[post+1] not in (2, 3): print(f'FAIL: cRELOC ByteCnt {b[post+1]} not in {{2,3}}'); sys.exit(1) nCreloc += 1 post += 7 elif op == 0x00: diff --git a/src/link816/link816.cpp b/src/link816/link816.cpp index 5356c82..7f1b016 100644 --- a/src/link816/link816.cpp +++ b/src/link816/link816.cpp @@ -313,14 +313,15 @@ struct Layout { std::vector segments; }; -// One IMM24 (3-byte absolute) relocation site, recorded for OMF -// cRELOC emission. The Loader will rewrite the 3 bytes at `patchOff` -// to be (segPlacedBase + offsetRef) when the segment is placed at -// runtime — this is what makes our compiled C runnable from Finder -// when the segment lands at e.g. bank $1F instead of bank 0. +// One IMM-N relocation site (N=2 or 3), recorded for OMF cRELOC +// emission. The Loader rewrites the N bytes at `patchOff` to be +// (segPlacedBase + offsetRef) when the segment is placed at runtime +// — this fixes both 24-bit JSL/JML/STAlong AND 16-bit absolute data +// references when the segment doesn't land at link-time-base. struct Imm24Site { uint32_t patchOff; // offset within text image (== patchAddr - textBase) uint32_t offsetRef; // offset within text image of target symbol + uint8_t byteCnt; // 2 = IMM16, 3 = IMM24 }; static std::vector gImm24Sites; static uint32_t gTextBaseForSites = 0; @@ -346,6 +347,25 @@ static void applyReloc(std::vector &buf, uint32_t off, // caller's DBR points at the target's bank. buf[off] = static_cast(target & 0xFF); buf[off + 1] = static_cast((target >> 8) & 0xFF); + // Record for cRELOC emission so the Loader patches this + // 2-byte operand to (segPlacedBase + offsetRef) at load + // time. Without this, `lda absConst` reads from the wrong + // address when the segment doesn't land at link-time-base + // (e.g., link-time-base=0x1000 but Loader places at bank:0). + if (gRecordSites) { + uint32_t targetBank = target & 0xFF0000; + uint32_t baseBank = gTextBaseForSites & 0xFF0000; + if (targetBank == baseBank) { + Imm24Site s; + s.patchOff = patchAddr - gTextBaseForSites; + s.offsetRef = target - gTextBaseForSites; + // Use type field width = 2 to distinguish from IMM24 + // (3). Imm24Site struct is reused — emitOmf will + // emit cRELOC ByteCnt=2 for this. + s.byteCnt = 2; + gImm24Sites.push_back(s); + } + } break; case R_W65816_IMM24: if (target > 0xFFFFFF) @@ -368,6 +388,7 @@ static void applyReloc(std::vector &buf, uint32_t off, Imm24Site s; s.patchOff = patchAddr - gTextBaseForSites; s.offsetRef = target - gTextBaseForSites; + s.byteCnt = 3; gImm24Sites.push_back(s); } } @@ -1361,20 +1382,23 @@ int main(int argc, char **argv) { if (!mapPath.empty()) linker.writeMap(mapPath); if (!debugOutPath.empty()) linker.writeDebugSidecar(debugOutPath); if (!relocOutPath.empty()) { - // Sidecar binary format: + // Sidecar binary format (v2): // u32 count - // { u32 patchOff; u32 offsetRef; } × count + // { u32 patchOff; u32 offsetRef; u8 byteCnt; u8 pad[3]; } × count // Both offsets are within the segment image (== link-time addr - // minus textBase). Consumed by omfEmit --relocs to emit cRELOC - // opcodes after the LCONST data. + // minus textBase). byteCnt = 2 for IMM16, 3 for IMM24. + // Consumed by omfEmit --relocs to emit cRELOC opcodes. std::ofstream rf(relocOutPath, std::ios::binary); if (!rf) die("cannot open '" + relocOutPath + "' for writing"); uint32_t count = (uint32_t)gImm24Sites.size(); rf.write(reinterpret_cast(&count), 4); for (const auto &s : gImm24Sites) { uint32_t po = s.patchOff, off = s.offsetRef; + uint8_t bc = s.byteCnt, pad[3] = {0, 0, 0}; rf.write(reinterpret_cast(&po), 4); rf.write(reinterpret_cast(&off), 4); + rf.write(reinterpret_cast(&bc), 1); + rf.write(reinterpret_cast(pad), 3); } } // Multi-segment: write per-segment images + manifest if there's diff --git a/src/link816/omfEmit.cpp b/src/link816/omfEmit.cpp index 8501573..f94fa74 100644 --- a/src/link816/omfEmit.cpp +++ b/src/link816/omfEmit.cpp @@ -38,11 +38,17 @@ namespace { } // Populated by --relocs from a link816 sidecar. Each entry is -// (OffsetPatch, OffsetReference) — the in-segment offset to patch -// (3 bytes wide) and the in-segment offset of the target. Consumed -// by emitOneSeg to write cRELOC opcodes between LCONST and END. +// (OffsetPatch, OffsetReference, ByteCnt) — the in-segment offset +// to patch, the in-segment offset of the target, and the byte width +// of the patch (2 for IMM16, 3 for IMM24). Consumed by emitOneSeg +// to write cRELOC opcodes between LCONST and END. +struct RelocSite { + uint16_t patchOff; + uint16_t offsetRef; + uint8_t byteCnt; +}; } // close namespace -std::vector> gReloc24Sites; +std::vector gReloc24Sites; namespace { static std::vector readFile(const std::string &path) { @@ -128,10 +134,10 @@ static std::vector emitOneSeg(const std::vector &image, // the Loader places us at non-zero bank. for (const auto &s : ::gReloc24Sites) { body.push_back(0xF5); - body.push_back(3); // ByteCnt + body.push_back(s.byteCnt); // ByteCnt (2 or 3) body.push_back(0); // BitShift - put16(body, s.first); // OffsetPatch - put16(body, s.second); // OffsetReference + put16(body, s.patchOff); // OffsetPatch + put16(body, s.offsetRef); // OffsetReference } body.push_back(0x00); // END opcode @@ -550,26 +556,36 @@ int main(int argc, char **argv) { } if (output.empty()) usage(argv[0]); - // Load IMM24 reloc list, if provided. + // Load reloc list, if provided. + // Sidecar v2 layout: u32 count + 12 bytes per entry + // { u32 patchOff; u32 offsetRef; u8 byteCnt; u8 pad[3]; } if (!relocFile.empty()) { auto raw = readFile(relocFile); if (raw.size() < 4) die("--relocs file too small"); uint32_t cnt = (uint32_t)raw[0] | ((uint32_t)raw[1] << 8) | ((uint32_t)raw[2] << 16) | ((uint32_t)raw[3] << 24); - if (raw.size() != 4 + 8 * cnt) + const size_t entrySize = 12; + if (raw.size() != 4 + entrySize * cnt) die("--relocs file size mismatch: count=" + std::to_string(cnt) - + " expected " + std::to_string(4 + 8*cnt) + " bytes, got " + + " expected " + std::to_string(4 + entrySize*cnt) + " bytes, got " + std::to_string(raw.size())); gReloc24Sites.reserve(cnt); for (uint32_t k = 0; k < cnt; k++) { - size_t off = 4 + k * 8; + size_t off = 4 + k * entrySize; uint32_t patchOff = (uint32_t)raw[off] | ((uint32_t)raw[off+1] << 8) | ((uint32_t)raw[off+2] << 16) | ((uint32_t)raw[off+3] << 24); uint32_t offRef = (uint32_t)raw[off+4] | ((uint32_t)raw[off+5] << 8) | ((uint32_t)raw[off+6] << 16) | ((uint32_t)raw[off+7] << 24); + uint8_t byteCnt = raw[off+8]; if (patchOff > 0xFFFF || offRef > 0xFFFF) die("reloc site out of 16-bit range — segment too large?"); - gReloc24Sites.emplace_back((uint16_t)patchOff, (uint16_t)offRef); + if (byteCnt != 2 && byteCnt != 3) + die("reloc site byteCnt=" + std::to_string(byteCnt) + " (must be 2 or 3)"); + RelocSite s; + s.patchOff = (uint16_t)patchOff; + s.offsetRef = (uint16_t)offRef; + s.byteCnt = byteCnt; + gReloc24Sites.push_back(s); } }