65816-llvm-mos/runtime/include/c++/iigs/path.h
Scott Duensing da095402ec Updated
2026-06-02 23:17:57 -05:00

391 lines
12 KiB
C++

// iigs/path.h - ProDOS / GS/OS aware path operations for C++ (Phase 5.4).
//
// ProDOS and GS/OS impose a small set of structural rules on pathnames
// that std::filesystem-style C++ code routinely violates:
//
// - Component length: <= 15 chars for ProDOS native; <= 64 chars for
// GS/OS class-1 paths (HFS/AppleShare). We
// validate against 64 so callers that target the
// class-1 FST surface are happy; the per-volume
// ProDOS limit is the caller's problem (caller
// can check with iigs::path::isProdosNative).
// - Component count: <= 8 directory components for ProDOS hierarchical
// (4-byte FILE_INFO header limit). GS/OS does not
// hard-limit but most real disks honor the rule.
// - Separator: ':' (IIgs GS/OS preferred) OR '/' (ProDOS native).
// We auto-detect: ':' wins if both appear (matches
// GS/OS conventions); '/' otherwise. Operations
// emit using the input string's detected separator.
//
// API surface (all are `static inline` so this header is dependency-free
// for callers — link of cxxStreamProbe demonstrates this):
//
// bool pathNormalize(const char *in, char *out, size_t outLen);
// Collapse runs of separators, strip trailing separator (unless
// the path is just ":") and rewrite ".." segments by popping the
// previous component. Returns false on overflow or validation
// failure (component > 64 chars / depth > 8 / output buffer too
// small). Output may equal input.
//
// bool pathJoin(const char *base, const char *leaf, char *out,
// size_t outLen);
// Glue `base` and `leaf` with the auto-detected separator. If
// `leaf` is absolute (begins with the separator) it replaces
// `base` outright. Returns false on overflow or component-rule
// violation in the result.
//
// bool pathSplit(const char *path, char *parent, size_t parentLen,
// char *leaf, size_t leafLen);
// Decompose `path` into the parent-directory portion and the
// final component. Mirrors POSIX dirname+basename but writes to
// caller-supplied buffers (no static scratch — re-entrant).
// Returns false on overflow.
//
// Recommended `cout` replacement:
//
// #include <iigs/path.h>
// #include <etl/string_stream.h>
// #include <etl/to_string.h>
// #include <stdio.h>
//
// etl::string<128> buf;
// etl::string_stream ss(buf);
// ss << "/USR/BIN/" << 42 << ":" << etl::hex << 0xC0DE;
// printf("%s\n", ss.str().c_str());
//
// The full std::iostream / std::regex / std::filesystem / std::format
// surfaces are explicit out-of-scope on the W65816 - see
// docs/GAP_CLOSURE_PLAN.md Phase 5.4 step 7 for rationale (size,
// locale dependencies, GS/OS-fopen mismatch). iigs::path + ETL
// string_stream/format are the supported replacements.
#ifndef IIGS_PATH_H_CXX
#define IIGS_PATH_H_CXX
#include <stdint.h>
#include <stddef.h>
namespace iigs {
namespace path {
// ---- ProDOS / GS/OS structural limits --------------------------------
// kMaxComponentLen is the GS/OS class-1 ceiling (64 chars). ProDOS
// native is tighter (15); callers that need the strict ProDOS rule
// should use isProdosNative() on their own component.
static const size_t kMaxComponentLen = 64;
static const size_t kMaxDepth = 8;
static const char kPreferredSep = ':';
// ---- Forward declarations (alphabetized) -----------------------------
static inline char detectSep(const char *p);
static inline bool isProdosNative(const char *component);
static inline bool isSep(char c);
static inline size_t strLenLocal(const char *s);
// ---- isSep — true if `c` is either of the two recognized separators.
static inline bool isSep(char c) {
return c == ':' || c == '/';
}
// ---- detectSep — return ':' or '/' based on first separator seen, with
// ':' winning ties (GS/OS convention). Returns 0 if path is pure-name.
static inline char detectSep(const char *p) {
if (!p) {
return 0;
}
bool sawSlash = false;
while (*p) {
if (*p == ':') {
return ':';
}
if (*p == '/') {
sawSlash = true;
}
p++;
}
return sawSlash ? '/' : 0;
}
// ---- isProdosNative — true if `component` fits the ProDOS-8 / ProDOS-16
// native rules: <= 15 chars, first char alpha, remainder alnum or '.'.
// Strict by design: callers that don't care can ignore.
static inline bool isProdosNative(const char *component) {
if (!component || !*component) {
return false;
}
char c0 = component[0];
bool firstAlpha = (c0 >= 'A' && c0 <= 'Z') || (c0 >= 'a' && c0 <= 'z');
if (!firstAlpha) {
return false;
}
size_t n = 0;
const char *p = component;
while (*p) {
char c = *p;
bool alnum = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
(c >= '0' && c <= '9') || c == '.';
if (!alnum) {
return false;
}
n++;
if (n > 15) {
return false;
}
p++;
}
return true;
}
// ---- strLenLocal — small inline strlen so this header is self-contained
// (callers might use iigs::path before string.h is in scope on some TUs).
static inline size_t strLenLocal(const char *s) {
size_t n = 0;
while (s[n]) {
n++;
}
return n;
}
// ---- pathNormalize ---------------------------------------------------
// Collapse `//`, drop trailing separators (keep a single one only if
// path is exactly the separator), and resolve `..` by popping the
// previous component. Returns false on overflow or rule violation.
static inline bool pathNormalize(const char *in, char *out, size_t outLen) {
if (!in || !out || outLen == 0) {
return false;
}
char sep = detectSep(in);
if (sep == 0) {
// Pure name - copy through, capped at outLen.
size_t inLen = strLenLocal(in);
if (inLen > kMaxComponentLen) {
return false;
}
if (inLen + 1 > outLen) {
return false;
}
for (size_t i = 0; i <= inLen; i++) {
out[i] = in[i];
}
return true;
}
// Component stack - record byte offsets into `out` of each component
// start so `..` can rewind.
size_t stack[kMaxDepth];
size_t depth = 0;
size_t outPos = 0;
// Leading-separator preservation: emit one if input starts with sep.
if (isSep(in[0])) {
if (outPos + 1 >= outLen) {
return false;
}
out[outPos++] = sep;
}
size_t i = 0;
while (in[i]) {
// Skip runs of separators.
while (in[i] && isSep(in[i])) {
i++;
}
if (!in[i]) {
break;
}
// Read one component into a scratch span [start..end).
size_t start = i;
while (in[i] && !isSep(in[i])) {
i++;
}
size_t compLen = i - start;
if (compLen > kMaxComponentLen) {
return false;
}
// ".." handling.
if (compLen == 2 && in[start] == '.' && in[start + 1] == '.') {
if (depth == 0) {
// Cannot rewind past the root. Treat as no-op for
// absolute paths, fail for relative ones (matches
// most std::filesystem implementations).
if (outPos > 0 && out[0] == sep) {
continue;
}
return false;
}
outPos = stack[--depth];
// Drop the trailing separator that brought us here (if any).
if (outPos > 0 && out[outPos - 1] == sep) {
outPos--;
}
continue;
}
// "." is also a no-op.
if (compLen == 1 && in[start] == '.') {
continue;
}
if (depth >= kMaxDepth) {
return false;
}
// Insert a separator before this component if the output is
// non-empty and doesn't already end in one.
if (outPos > 0 && out[outPos - 1] != sep) {
if (outPos + 1 >= outLen) {
return false;
}
out[outPos++] = sep;
}
stack[depth++] = outPos;
if (outPos + compLen + 1 > outLen) {
return false;
}
for (size_t k = 0; k < compLen; k++) {
out[outPos++] = in[start + k];
}
}
// Strip lone trailing separator (but keep "/" / ":" itself).
if (outPos > 1 && out[outPos - 1] == sep) {
outPos--;
}
if (outPos == 0) {
// All input was separators.
if (outLen < 2) {
return false;
}
out[outPos++] = sep;
}
out[outPos] = 0;
return true;
}
// ---- pathJoin --------------------------------------------------------
// Concatenate `base` + sep + `leaf`. If `leaf` is absolute (begins with
// a separator) it wins outright. The result is run through
// pathNormalize so callers get a canonical form back.
static inline bool pathJoin(const char *base, const char *leaf, char *out, size_t outLen) {
if (!leaf || !out || outLen == 0) {
return false;
}
// Leaf-is-absolute short-circuit.
if (isSep(leaf[0])) {
return pathNormalize(leaf, out, outLen);
}
if (!base || !*base) {
return pathNormalize(leaf, out, outLen);
}
char sep = detectSep(base);
if (sep == 0) {
sep = detectSep(leaf);
}
if (sep == 0) {
sep = kPreferredSep;
}
// Build "<base><sep><leaf>" in a scratch buffer then normalize.
char scratch[kMaxComponentLen * (kMaxDepth + 1) + 2];
size_t pos = 0;
const char *p = base;
while (*p && pos < sizeof(scratch) - 1) {
scratch[pos++] = *p++;
}
if (*p) {
return false;
}
// Avoid double-separator if base already ends in one.
if (pos == 0 || scratch[pos - 1] != sep) {
if (pos >= sizeof(scratch) - 1) {
return false;
}
scratch[pos++] = sep;
}
p = leaf;
while (*p && pos < sizeof(scratch) - 1) {
scratch[pos++] = *p++;
}
if (*p) {
return false;
}
scratch[pos] = 0;
return pathNormalize(scratch, out, outLen);
}
// ---- pathSplit -------------------------------------------------------
// Decompose `path` into `parent` + `leaf`. Either output may be NULL
// (in which case that side is discarded — useful when the caller only
// wants one half). Returns false on overflow.
static inline bool pathSplit(const char *path, char *parent, size_t parentLen, char *leaf, size_t leafLen) {
if (!path) {
return false;
}
char sep = detectSep(path);
size_t pathLen = strLenLocal(path);
// Find the last separator.
size_t lastSep = pathLen;
if (sep) {
for (size_t i = 0; i < pathLen; i++) {
if (path[i] == sep) {
lastSep = i;
}
}
}
if (lastSep == pathLen) {
// No separator. Parent is empty, leaf is the whole string.
if (parent && parentLen > 0) {
parent[0] = 0;
}
if (leaf) {
if (pathLen + 1 > leafLen) {
return false;
}
for (size_t i = 0; i <= pathLen; i++) {
leaf[i] = path[i];
}
}
return true;
}
if (parent) {
// Parent is everything up to lastSep (with trailing sep stripped
// unless lastSep == 0, i.e. path is rooted and parent is just sep).
size_t parentN = lastSep == 0 ? 1 : lastSep;
if (parentN + 1 > parentLen) {
return false;
}
for (size_t i = 0; i < parentN; i++) {
parent[i] = path[i];
}
parent[parentN] = 0;
}
if (leaf) {
size_t leafN = pathLen - lastSep - 1;
if (leafN + 1 > leafLen) {
return false;
}
for (size_t i = 0; i < leafN; i++) {
leaf[i] = path[lastSep + 1 + i];
}
leaf[leafN] = 0;
}
return true;
}
} // namespace path
} // namespace iigs
#endif // IIGS_PATH_H_CXX