391 lines
12 KiB
C++
391 lines
12 KiB
C++
// iigs/path.h - ProDOS / GS/OS aware path operations for C++ (Phase 5.4).
|
|
//
|
|
// ProDOS and GS/OS impose a small set of structural rules on pathnames
|
|
// that std::filesystem-style C++ code routinely violates:
|
|
//
|
|
// - Component length: <= 15 chars for ProDOS native; <= 64 chars for
|
|
// GS/OS class-1 paths (HFS/AppleShare). We
|
|
// validate against 64 so callers that target the
|
|
// class-1 FST surface are happy; the per-volume
|
|
// ProDOS limit is the caller's problem (caller
|
|
// can check with iigs::path::isProdosNative).
|
|
// - Component count: <= 8 directory components for ProDOS hierarchical
|
|
// (4-byte FILE_INFO header limit). GS/OS does not
|
|
// hard-limit but most real disks honor the rule.
|
|
// - Separator: ':' (IIgs GS/OS preferred) OR '/' (ProDOS native).
|
|
// We auto-detect: ':' wins if both appear (matches
|
|
// GS/OS conventions); '/' otherwise. Operations
|
|
// emit using the input string's detected separator.
|
|
//
|
|
// API surface (all are `static inline` so this header is dependency-free
|
|
// for callers — link of cxxStreamProbe demonstrates this):
|
|
//
|
|
// bool pathNormalize(const char *in, char *out, size_t outLen);
|
|
// Collapse runs of separators, strip trailing separator (unless
|
|
// the path is just ":") and rewrite ".." segments by popping the
|
|
// previous component. Returns false on overflow or validation
|
|
// failure (component > 64 chars / depth > 8 / output buffer too
|
|
// small). Output may equal input.
|
|
//
|
|
// bool pathJoin(const char *base, const char *leaf, char *out,
|
|
// size_t outLen);
|
|
// Glue `base` and `leaf` with the auto-detected separator. If
|
|
// `leaf` is absolute (begins with the separator) it replaces
|
|
// `base` outright. Returns false on overflow or component-rule
|
|
// violation in the result.
|
|
//
|
|
// bool pathSplit(const char *path, char *parent, size_t parentLen,
|
|
// char *leaf, size_t leafLen);
|
|
// Decompose `path` into the parent-directory portion and the
|
|
// final component. Mirrors POSIX dirname+basename but writes to
|
|
// caller-supplied buffers (no static scratch — re-entrant).
|
|
// Returns false on overflow.
|
|
//
|
|
// Recommended `cout` replacement:
|
|
//
|
|
// #include <iigs/path.h>
|
|
// #include <etl/string_stream.h>
|
|
// #include <etl/to_string.h>
|
|
// #include <stdio.h>
|
|
//
|
|
// etl::string<128> buf;
|
|
// etl::string_stream ss(buf);
|
|
// ss << "/USR/BIN/" << 42 << ":" << etl::hex << 0xC0DE;
|
|
// printf("%s\n", ss.str().c_str());
|
|
//
|
|
// The full std::iostream / std::regex / std::filesystem / std::format
|
|
// surfaces are explicit out-of-scope on the W65816 - see
|
|
// docs/GAP_CLOSURE_PLAN.md Phase 5.4 step 7 for rationale (size,
|
|
// locale dependencies, GS/OS-fopen mismatch). iigs::path + ETL
|
|
// string_stream/format are the supported replacements.
|
|
|
|
#ifndef IIGS_PATH_H_CXX
|
|
#define IIGS_PATH_H_CXX
|
|
|
|
#include <stdint.h>
|
|
#include <stddef.h>
|
|
|
|
namespace iigs {
|
|
namespace path {
|
|
|
|
|
|
// ---- ProDOS / GS/OS structural limits --------------------------------
|
|
// kMaxComponentLen is the GS/OS class-1 ceiling (64 chars). ProDOS
|
|
// native is tighter (15); callers that need the strict ProDOS rule
|
|
// should use isProdosNative() on their own component.
|
|
static const size_t kMaxComponentLen = 64;
|
|
static const size_t kMaxDepth = 8;
|
|
static const char kPreferredSep = ':';
|
|
|
|
|
|
// ---- Forward declarations (alphabetized) -----------------------------
|
|
static inline char detectSep(const char *p);
|
|
static inline bool isProdosNative(const char *component);
|
|
static inline bool isSep(char c);
|
|
static inline size_t strLenLocal(const char *s);
|
|
|
|
|
|
// ---- isSep — true if `c` is either of the two recognized separators.
|
|
static inline bool isSep(char c) {
|
|
return c == ':' || c == '/';
|
|
}
|
|
|
|
|
|
// ---- detectSep — return ':' or '/' based on first separator seen, with
|
|
// ':' winning ties (GS/OS convention). Returns 0 if path is pure-name.
|
|
static inline char detectSep(const char *p) {
|
|
if (!p) {
|
|
return 0;
|
|
}
|
|
bool sawSlash = false;
|
|
while (*p) {
|
|
if (*p == ':') {
|
|
return ':';
|
|
}
|
|
if (*p == '/') {
|
|
sawSlash = true;
|
|
}
|
|
p++;
|
|
}
|
|
return sawSlash ? '/' : 0;
|
|
}
|
|
|
|
|
|
// ---- isProdosNative — true if `component` fits the ProDOS-8 / ProDOS-16
|
|
// native rules: <= 15 chars, first char alpha, remainder alnum or '.'.
|
|
// Strict by design: callers that don't care can ignore.
|
|
static inline bool isProdosNative(const char *component) {
|
|
if (!component || !*component) {
|
|
return false;
|
|
}
|
|
char c0 = component[0];
|
|
bool firstAlpha = (c0 >= 'A' && c0 <= 'Z') || (c0 >= 'a' && c0 <= 'z');
|
|
if (!firstAlpha) {
|
|
return false;
|
|
}
|
|
size_t n = 0;
|
|
const char *p = component;
|
|
while (*p) {
|
|
char c = *p;
|
|
bool alnum = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
|
|
(c >= '0' && c <= '9') || c == '.';
|
|
if (!alnum) {
|
|
return false;
|
|
}
|
|
n++;
|
|
if (n > 15) {
|
|
return false;
|
|
}
|
|
p++;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
// ---- strLenLocal — small inline strlen so this header is self-contained
|
|
// (callers might use iigs::path before string.h is in scope on some TUs).
|
|
static inline size_t strLenLocal(const char *s) {
|
|
size_t n = 0;
|
|
while (s[n]) {
|
|
n++;
|
|
}
|
|
return n;
|
|
}
|
|
|
|
|
|
// ---- pathNormalize ---------------------------------------------------
|
|
// Collapse `//`, drop trailing separators (keep a single one only if
|
|
// path is exactly the separator), and resolve `..` by popping the
|
|
// previous component. Returns false on overflow or rule violation.
|
|
static inline bool pathNormalize(const char *in, char *out, size_t outLen) {
|
|
if (!in || !out || outLen == 0) {
|
|
return false;
|
|
}
|
|
char sep = detectSep(in);
|
|
if (sep == 0) {
|
|
// Pure name - copy through, capped at outLen.
|
|
size_t inLen = strLenLocal(in);
|
|
if (inLen > kMaxComponentLen) {
|
|
return false;
|
|
}
|
|
if (inLen + 1 > outLen) {
|
|
return false;
|
|
}
|
|
for (size_t i = 0; i <= inLen; i++) {
|
|
out[i] = in[i];
|
|
}
|
|
return true;
|
|
}
|
|
|
|
// Component stack - record byte offsets into `out` of each component
|
|
// start so `..` can rewind.
|
|
size_t stack[kMaxDepth];
|
|
size_t depth = 0;
|
|
size_t outPos = 0;
|
|
|
|
// Leading-separator preservation: emit one if input starts with sep.
|
|
if (isSep(in[0])) {
|
|
if (outPos + 1 >= outLen) {
|
|
return false;
|
|
}
|
|
out[outPos++] = sep;
|
|
}
|
|
|
|
size_t i = 0;
|
|
while (in[i]) {
|
|
// Skip runs of separators.
|
|
while (in[i] && isSep(in[i])) {
|
|
i++;
|
|
}
|
|
if (!in[i]) {
|
|
break;
|
|
}
|
|
// Read one component into a scratch span [start..end).
|
|
size_t start = i;
|
|
while (in[i] && !isSep(in[i])) {
|
|
i++;
|
|
}
|
|
size_t compLen = i - start;
|
|
if (compLen > kMaxComponentLen) {
|
|
return false;
|
|
}
|
|
|
|
// ".." handling.
|
|
if (compLen == 2 && in[start] == '.' && in[start + 1] == '.') {
|
|
if (depth == 0) {
|
|
// Cannot rewind past the root. Treat as no-op for
|
|
// absolute paths, fail for relative ones (matches
|
|
// most std::filesystem implementations).
|
|
if (outPos > 0 && out[0] == sep) {
|
|
continue;
|
|
}
|
|
return false;
|
|
}
|
|
outPos = stack[--depth];
|
|
// Drop the trailing separator that brought us here (if any).
|
|
if (outPos > 0 && out[outPos - 1] == sep) {
|
|
outPos--;
|
|
}
|
|
continue;
|
|
}
|
|
// "." is also a no-op.
|
|
if (compLen == 1 && in[start] == '.') {
|
|
continue;
|
|
}
|
|
|
|
if (depth >= kMaxDepth) {
|
|
return false;
|
|
}
|
|
// Insert a separator before this component if the output is
|
|
// non-empty and doesn't already end in one.
|
|
if (outPos > 0 && out[outPos - 1] != sep) {
|
|
if (outPos + 1 >= outLen) {
|
|
return false;
|
|
}
|
|
out[outPos++] = sep;
|
|
}
|
|
stack[depth++] = outPos;
|
|
|
|
if (outPos + compLen + 1 > outLen) {
|
|
return false;
|
|
}
|
|
for (size_t k = 0; k < compLen; k++) {
|
|
out[outPos++] = in[start + k];
|
|
}
|
|
}
|
|
|
|
// Strip lone trailing separator (but keep "/" / ":" itself).
|
|
if (outPos > 1 && out[outPos - 1] == sep) {
|
|
outPos--;
|
|
}
|
|
if (outPos == 0) {
|
|
// All input was separators.
|
|
if (outLen < 2) {
|
|
return false;
|
|
}
|
|
out[outPos++] = sep;
|
|
}
|
|
out[outPos] = 0;
|
|
return true;
|
|
}
|
|
|
|
|
|
// ---- pathJoin --------------------------------------------------------
|
|
// Concatenate `base` + sep + `leaf`. If `leaf` is absolute (begins with
|
|
// a separator) it wins outright. The result is run through
|
|
// pathNormalize so callers get a canonical form back.
|
|
static inline bool pathJoin(const char *base, const char *leaf, char *out, size_t outLen) {
|
|
if (!leaf || !out || outLen == 0) {
|
|
return false;
|
|
}
|
|
// Leaf-is-absolute short-circuit.
|
|
if (isSep(leaf[0])) {
|
|
return pathNormalize(leaf, out, outLen);
|
|
}
|
|
if (!base || !*base) {
|
|
return pathNormalize(leaf, out, outLen);
|
|
}
|
|
char sep = detectSep(base);
|
|
if (sep == 0) {
|
|
sep = detectSep(leaf);
|
|
}
|
|
if (sep == 0) {
|
|
sep = kPreferredSep;
|
|
}
|
|
|
|
// Build "<base><sep><leaf>" in a scratch buffer then normalize.
|
|
char scratch[kMaxComponentLen * (kMaxDepth + 1) + 2];
|
|
size_t pos = 0;
|
|
const char *p = base;
|
|
while (*p && pos < sizeof(scratch) - 1) {
|
|
scratch[pos++] = *p++;
|
|
}
|
|
if (*p) {
|
|
return false;
|
|
}
|
|
// Avoid double-separator if base already ends in one.
|
|
if (pos == 0 || scratch[pos - 1] != sep) {
|
|
if (pos >= sizeof(scratch) - 1) {
|
|
return false;
|
|
}
|
|
scratch[pos++] = sep;
|
|
}
|
|
p = leaf;
|
|
while (*p && pos < sizeof(scratch) - 1) {
|
|
scratch[pos++] = *p++;
|
|
}
|
|
if (*p) {
|
|
return false;
|
|
}
|
|
scratch[pos] = 0;
|
|
return pathNormalize(scratch, out, outLen);
|
|
}
|
|
|
|
|
|
// ---- pathSplit -------------------------------------------------------
|
|
// Decompose `path` into `parent` + `leaf`. Either output may be NULL
|
|
// (in which case that side is discarded — useful when the caller only
|
|
// wants one half). Returns false on overflow.
|
|
static inline bool pathSplit(const char *path, char *parent, size_t parentLen, char *leaf, size_t leafLen) {
|
|
if (!path) {
|
|
return false;
|
|
}
|
|
char sep = detectSep(path);
|
|
size_t pathLen = strLenLocal(path);
|
|
|
|
// Find the last separator.
|
|
size_t lastSep = pathLen;
|
|
if (sep) {
|
|
for (size_t i = 0; i < pathLen; i++) {
|
|
if (path[i] == sep) {
|
|
lastSep = i;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (lastSep == pathLen) {
|
|
// No separator. Parent is empty, leaf is the whole string.
|
|
if (parent && parentLen > 0) {
|
|
parent[0] = 0;
|
|
}
|
|
if (leaf) {
|
|
if (pathLen + 1 > leafLen) {
|
|
return false;
|
|
}
|
|
for (size_t i = 0; i <= pathLen; i++) {
|
|
leaf[i] = path[i];
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
if (parent) {
|
|
// Parent is everything up to lastSep (with trailing sep stripped
|
|
// unless lastSep == 0, i.e. path is rooted and parent is just sep).
|
|
size_t parentN = lastSep == 0 ? 1 : lastSep;
|
|
if (parentN + 1 > parentLen) {
|
|
return false;
|
|
}
|
|
for (size_t i = 0; i < parentN; i++) {
|
|
parent[i] = path[i];
|
|
}
|
|
parent[parentN] = 0;
|
|
}
|
|
if (leaf) {
|
|
size_t leafN = pathLen - lastSep - 1;
|
|
if (leafN + 1 > leafLen) {
|
|
return false;
|
|
}
|
|
for (size_t i = 0; i < leafN; i++) {
|
|
leaf[i] = path[lastSep + 1 + i];
|
|
}
|
|
leaf[leafN] = 0;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
|
|
} // namespace path
|
|
} // namespace iigs
|
|
|
|
#endif // IIGS_PATH_H_CXX
|