// iigs/path.h - ProDOS / GS/OS aware path operations for C++ (Phase 5.4). // // ProDOS and GS/OS impose a small set of structural rules on pathnames // that std::filesystem-style C++ code routinely violates: // // - Component length: <= 15 chars for ProDOS native; <= 64 chars for // GS/OS class-1 paths (HFS/AppleShare). We // validate against 64 so callers that target the // class-1 FST surface are happy; the per-volume // ProDOS limit is the caller's problem (caller // can check with iigs::path::isProdosNative). // - Component count: <= 8 directory components for ProDOS hierarchical // (4-byte FILE_INFO header limit). GS/OS does not // hard-limit but most real disks honor the rule. // - Separator: ':' (IIgs GS/OS preferred) OR '/' (ProDOS native). // We auto-detect: ':' wins if both appear (matches // GS/OS conventions); '/' otherwise. Operations // emit using the input string's detected separator. // // API surface (all are `static inline` so this header is dependency-free // for callers — link of cxxStreamProbe demonstrates this): // // bool pathNormalize(const char *in, char *out, size_t outLen); // Collapse runs of separators, strip trailing separator (unless // the path is just ":") and rewrite ".." segments by popping the // previous component. Returns false on overflow or validation // failure (component > 64 chars / depth > 8 / output buffer too // small). Output may equal input. // // bool pathJoin(const char *base, const char *leaf, char *out, // size_t outLen); // Glue `base` and `leaf` with the auto-detected separator. If // `leaf` is absolute (begins with the separator) it replaces // `base` outright. Returns false on overflow or component-rule // violation in the result. // // bool pathSplit(const char *path, char *parent, size_t parentLen, // char *leaf, size_t leafLen); // Decompose `path` into the parent-directory portion and the // final component. Mirrors POSIX dirname+basename but writes to // caller-supplied buffers (no static scratch — re-entrant). // Returns false on overflow. // // Recommended `cout` replacement: // // #include // #include // #include // #include // // etl::string<128> buf; // etl::string_stream ss(buf); // ss << "/USR/BIN/" << 42 << ":" << etl::hex << 0xC0DE; // printf("%s\n", ss.str().c_str()); // // The full std::iostream / std::regex / std::filesystem / std::format // surfaces are explicit out-of-scope on the W65816 - see // docs/GAP_CLOSURE_PLAN.md Phase 5.4 step 7 for rationale (size, // locale dependencies, GS/OS-fopen mismatch). iigs::path + ETL // string_stream/format are the supported replacements. #ifndef IIGS_PATH_H_CXX #define IIGS_PATH_H_CXX #include #include namespace iigs { namespace path { // ---- ProDOS / GS/OS structural limits -------------------------------- // kMaxComponentLen is the GS/OS class-1 ceiling (64 chars). ProDOS // native is tighter (15); callers that need the strict ProDOS rule // should use isProdosNative() on their own component. static const size_t kMaxComponentLen = 64; static const size_t kMaxDepth = 8; static const char kPreferredSep = ':'; // ---- Forward declarations (alphabetized) ----------------------------- static inline char detectSep(const char *p); static inline bool isProdosNative(const char *component); static inline bool isSep(char c); static inline size_t strLenLocal(const char *s); // ---- isSep — true if `c` is either of the two recognized separators. static inline bool isSep(char c) { return c == ':' || c == '/'; } // ---- detectSep — return ':' or '/' based on first separator seen, with // ':' winning ties (GS/OS convention). Returns 0 if path is pure-name. static inline char detectSep(const char *p) { if (!p) { return 0; } bool sawSlash = false; while (*p) { if (*p == ':') { return ':'; } if (*p == '/') { sawSlash = true; } p++; } return sawSlash ? '/' : 0; } // ---- isProdosNative — true if `component` fits the ProDOS-8 / ProDOS-16 // native rules: <= 15 chars, first char alpha, remainder alnum or '.'. // Strict by design: callers that don't care can ignore. static inline bool isProdosNative(const char *component) { if (!component || !*component) { return false; } char c0 = component[0]; bool firstAlpha = (c0 >= 'A' && c0 <= 'Z') || (c0 >= 'a' && c0 <= 'z'); if (!firstAlpha) { return false; } size_t n = 0; const char *p = component; while (*p) { char c = *p; bool alnum = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '.'; if (!alnum) { return false; } n++; if (n > 15) { return false; } p++; } return true; } // ---- strLenLocal — small inline strlen so this header is self-contained // (callers might use iigs::path before string.h is in scope on some TUs). static inline size_t strLenLocal(const char *s) { size_t n = 0; while (s[n]) { n++; } return n; } // ---- pathNormalize --------------------------------------------------- // Collapse `//`, drop trailing separators (keep a single one only if // path is exactly the separator), and resolve `..` by popping the // previous component. Returns false on overflow or rule violation. static inline bool pathNormalize(const char *in, char *out, size_t outLen) { if (!in || !out || outLen == 0) { return false; } char sep = detectSep(in); if (sep == 0) { // Pure name - copy through, capped at outLen. size_t inLen = strLenLocal(in); if (inLen > kMaxComponentLen) { return false; } if (inLen + 1 > outLen) { return false; } for (size_t i = 0; i <= inLen; i++) { out[i] = in[i]; } return true; } // Component stack - record byte offsets into `out` of each component // start so `..` can rewind. size_t stack[kMaxDepth]; size_t depth = 0; size_t outPos = 0; // Leading-separator preservation: emit one if input starts with sep. if (isSep(in[0])) { if (outPos + 1 >= outLen) { return false; } out[outPos++] = sep; } size_t i = 0; while (in[i]) { // Skip runs of separators. while (in[i] && isSep(in[i])) { i++; } if (!in[i]) { break; } // Read one component into a scratch span [start..end). size_t start = i; while (in[i] && !isSep(in[i])) { i++; } size_t compLen = i - start; if (compLen > kMaxComponentLen) { return false; } // ".." handling. if (compLen == 2 && in[start] == '.' && in[start + 1] == '.') { if (depth == 0) { // Cannot rewind past the root. Treat as no-op for // absolute paths, fail for relative ones (matches // most std::filesystem implementations). if (outPos > 0 && out[0] == sep) { continue; } return false; } outPos = stack[--depth]; // Drop the trailing separator that brought us here (if any). if (outPos > 0 && out[outPos - 1] == sep) { outPos--; } continue; } // "." is also a no-op. if (compLen == 1 && in[start] == '.') { continue; } if (depth >= kMaxDepth) { return false; } // Insert a separator before this component if the output is // non-empty and doesn't already end in one. if (outPos > 0 && out[outPos - 1] != sep) { if (outPos + 1 >= outLen) { return false; } out[outPos++] = sep; } stack[depth++] = outPos; if (outPos + compLen + 1 > outLen) { return false; } for (size_t k = 0; k < compLen; k++) { out[outPos++] = in[start + k]; } } // Strip lone trailing separator (but keep "/" / ":" itself). if (outPos > 1 && out[outPos - 1] == sep) { outPos--; } if (outPos == 0) { // All input was separators. if (outLen < 2) { return false; } out[outPos++] = sep; } out[outPos] = 0; return true; } // ---- pathJoin -------------------------------------------------------- // Concatenate `base` + sep + `leaf`. If `leaf` is absolute (begins with // a separator) it wins outright. The result is run through // pathNormalize so callers get a canonical form back. static inline bool pathJoin(const char *base, const char *leaf, char *out, size_t outLen) { if (!leaf || !out || outLen == 0) { return false; } // Leaf-is-absolute short-circuit. if (isSep(leaf[0])) { return pathNormalize(leaf, out, outLen); } if (!base || !*base) { return pathNormalize(leaf, out, outLen); } char sep = detectSep(base); if (sep == 0) { sep = detectSep(leaf); } if (sep == 0) { sep = kPreferredSep; } // Build "" in a scratch buffer then normalize. char scratch[kMaxComponentLen * (kMaxDepth + 1) + 2]; size_t pos = 0; const char *p = base; while (*p && pos < sizeof(scratch) - 1) { scratch[pos++] = *p++; } if (*p) { return false; } // Avoid double-separator if base already ends in one. if (pos == 0 || scratch[pos - 1] != sep) { if (pos >= sizeof(scratch) - 1) { return false; } scratch[pos++] = sep; } p = leaf; while (*p && pos < sizeof(scratch) - 1) { scratch[pos++] = *p++; } if (*p) { return false; } scratch[pos] = 0; return pathNormalize(scratch, out, outLen); } // ---- pathSplit ------------------------------------------------------- // Decompose `path` into `parent` + `leaf`. Either output may be NULL // (in which case that side is discarded — useful when the caller only // wants one half). Returns false on overflow. static inline bool pathSplit(const char *path, char *parent, size_t parentLen, char *leaf, size_t leafLen) { if (!path) { return false; } char sep = detectSep(path); size_t pathLen = strLenLocal(path); // Find the last separator. size_t lastSep = pathLen; if (sep) { for (size_t i = 0; i < pathLen; i++) { if (path[i] == sep) { lastSep = i; } } } if (lastSep == pathLen) { // No separator. Parent is empty, leaf is the whole string. if (parent && parentLen > 0) { parent[0] = 0; } if (leaf) { if (pathLen + 1 > leafLen) { return false; } for (size_t i = 0; i <= pathLen; i++) { leaf[i] = path[i]; } } return true; } if (parent) { // Parent is everything up to lastSep (with trailing sep stripped // unless lastSep == 0, i.e. path is rooted and parent is just sep). size_t parentN = lastSep == 0 ? 1 : lastSep; if (parentN + 1 > parentLen) { return false; } for (size_t i = 0; i < parentN; i++) { parent[i] = path[i]; } parent[parentN] = 0; } if (leaf) { size_t leafN = pathLen - lastSep - 1; if (leafN + 1 > leafLen) { return false; } for (size_t i = 0; i < leafN; i++) { leaf[i] = path[lastSep + 1 + i]; } leaf[leafN] = 0; } return true; } } // namespace path } // namespace iigs #endif // IIGS_PATH_H_CXX