65816-llvm-mos/runtime/src/strtok.c
Scott Duensing a059aa8182 Checkpoint.
2026-05-01 00:52:24 -05:00

90 lines
2.8 KiB
C

// strtok / strtok_r — kept in their own translation unit so it can
// be built at -O0. At -O2 (the default for everything else in
// runtime/build.sh) the W65816 backend miscompiles this code: the
// second `strtok(NULL, ...)` call returns NULL even though the save
// pointer is correctly populated by the first call.
//
// Investigation (#107):
//
// - Bisected to "appears at -O1 already, not -O2 specific".
// - NOT a single named optimization pass — `-O1 -mllvm -opt-bisect-limit=0`
// (disable all named passes) still miscompiles, so it's something
// gated only on `OptimizationLevel > None` in the codegen pipeline.
// - NOT inlining-related — `-O2 -fno-inline` still fails.
// - NOT regalloc-related — `-O2 -mllvm -regalloc=fast` still fails.
// - Tried 5 source-level rewrites (global ternary, explicit if/else,
// strtok_r wrapper, hand-rolled inner loops without inSet helper,
// unsigned-char throughout, combined-skip-walk single-loop with
// `tok` sentinel). Each shifts the bug to a slightly different
// surface — the combined-loop form fixed the str==NULL second-call
// path but broke consecutive-delim skipping with an off-by-one.
//
// The simplest reliable fix is to compile this TU at -O0 — see
// runtime/build.sh. Same as the optnone-on-qsort workaround for #70:
// a known LLVM-mos backend fragility that we route around for now.
static char *gStrtokSave;
char *strtok_r(char *str, const char *delim, char **saveptr) {
unsigned char *s;
if (str != (char *)0) {
s = (unsigned char *)str;
} else {
s = (unsigned char *)(*saveptr);
}
if (s == (unsigned char *)0) {
return (char *)0;
}
const unsigned char *du = (const unsigned char *)delim;
// Skip leading delimiters.
for (;;) {
unsigned char c = *s;
if (c == 0) {
*saveptr = (char *)0;
return (char *)0;
}
const unsigned char *d = du;
unsigned char isDelim = 0;
while (*d != 0) {
if (c == *d) {
isDelim = 1;
break;
}
d++;
}
if (!isDelim) {
break;
}
s++;
}
unsigned char *tok = s;
// Find next delimiter or NUL.
for (;;) {
unsigned char c = *s;
if (c == 0) {
*saveptr = (char *)0;
return (char *)tok;
}
const unsigned char *d = du;
unsigned char isDelim = 0;
while (*d != 0) {
if (c == *d) {
isDelim = 1;
break;
}
d++;
}
if (isDelim) {
*s = 0;
*saveptr = (char *)(s + 1);
return (char *)tok;
}
s++;
}
}
char *strtok(char *str, const char *delim) {
return strtok_r(str, delim, &gStrtokSave);
}