// dvxhlpc.c -- DVX Help file compiler // // Host-side tool (Linux) that converts .dvxhelp source files into // the .hlp binary format consumed by the DVX help viewer. // // Usage: // dvxhlpc -o output.hlp [-w 76] [-i imagedir] input1.dvxhelp [...] // // Five-pass algorithm: // 1. Parse source files, build topic/TOC/index/image arrays // 2. Word-wrap text and list-item records // 3. Build deduplicated string table // 4. Generate trigram search index // 5. Serialize binary .hlp file #define _POSIX_C_SOURCE 200809L #include "../apps/dvxhelp/hlpformat.h" #include #include #include #include #include #include #include #include #include // --------------------------------------------------------------------------- // Limits // --------------------------------------------------------------------------- #define MAX_TOPICS 1024 #define MAX_TOC_ENTRIES 2048 #define MAX_INDEX_ENTRIES 4096 #define MAX_IMAGES 256 #define MAX_RECORDS_PER_TOPIC 512 #define MAX_LINE_LEN 1024 #define MAX_TRIGRAMS 65536 #define MAX_INCLUDE_DEPTH 16 #define DEFAULT_WRAP_WIDTH 76 #define LIST_INDENT 2 #define INITIAL_STRTAB_SIZE 65536 #define INITIAL_BUF_SIZE 65536 // --------------------------------------------------------------------------- // Compiler data structures // --------------------------------------------------------------------------- typedef struct { uint8_t type; uint8_t flags; char *data; int32_t dataLen; } RecordT; typedef struct { char id[128]; char title[256]; RecordT *records; int32_t recordCount; int32_t recordCap; bool isDefault; } TopicT; typedef struct { char title[256]; int32_t topicIdx; int32_t depth; } TocEntryT; typedef struct { char keyword[128]; int32_t topicIdx; } IndexEntryT; typedef struct { char path[520]; int32_t poolOffset; int32_t fileSize; } ImageRefT; // String table entry for deduplication typedef struct { char *str; int32_t offset; } StrEntryT; // Trigram posting list typedef struct { uint8_t trigram[3]; uint16_t *postings; int32_t postingCount; int32_t postingCap; } TrigramT; // Dynamic buffer for serialization typedef struct { uint8_t *data; int32_t size; int32_t cap; } BufT; // --------------------------------------------------------------------------- // Globals // --------------------------------------------------------------------------- static TopicT topics[MAX_TOPICS]; static int32_t topicCount = 0; static TocEntryT tocEntries[MAX_TOC_ENTRIES]; static int32_t tocCount = 0; static IndexEntryT indexEntries[MAX_INDEX_ENTRIES]; static int32_t indexCount = 0; static ImageRefT imageRefs[MAX_IMAGES]; static int32_t imageCount = 0; static char *strTab = NULL; static int32_t strTabSize = 0; static int32_t strTabCap = 0; static StrEntryT *strEntries = NULL; static int32_t strEntryCount = 0; static int32_t strEntryCap = 0; static TrigramT trigrams[MAX_TRIGRAMS]; static int32_t trigramCount = 0; static int32_t wrapWidth = DEFAULT_WRAP_WIDTH; static char imageDir[260] = "."; static const char *htmlPath = NULL; static int32_t errorCount = 0; // Parse state static const char *currentFile = NULL; static int32_t currentLine = 0; // --------------------------------------------------------------------------- // Prototypes // --------------------------------------------------------------------------- static void addImageRef(const char *filename); static void addIndexEntry(const char *keyword, int32_t topicIdx); static RecordT *addRecord(TopicT *topic, uint8_t type, uint8_t flags, const char *data, int32_t dataLen); static void addTocEntry(const char *title, int32_t topicIdx, int32_t depth); static TopicT *addTopic(const char *id); static void addTrigram(uint8_t a, uint8_t b, uint8_t c, uint16_t topicIdx); static void bufAppend(BufT *buf, const void *data, int32_t len); static void bufInit(BufT *buf); static void buildSearchIndex(void); static int compareIndexEntries(const void *a, const void *b); static int compareTrigrams(const void *a, const void *b); static void emitError(const char *fmt, ...); static void emitWarning(const char *fmt, ...); static int32_t findImage(const char *filename); static void flushParagraph(TopicT *topic, char *para, int32_t paraLen, uint8_t type, uint8_t flags); static void freeAll(void); static void parseDirective(const char *line, TopicT **curTopic, bool *inList, bool *inTable, bool *inCode, bool *inNote, uint8_t *noteFlags, char *para, int32_t *paraLen, int32_t includeDepth); static void parseFile(const char *path, TopicT **curTopic, bool *inList, bool *inTable, bool *inCode, bool *inNote, uint8_t *noteFlags, char *para, int32_t *paraLen, int32_t includeDepth); static void pass1Parse(int32_t fileCount, char **files); static void pass2Wrap(void); static void pass3StringTable(void); static void pass4SearchIndex(void); static int pass5Serialize(const char *outputPath); static int emitHtml(const char *outputPath); static int32_t strTableAdd(const char *str); static int32_t strTableFind(const char *str); static char *wordWrap(const char *text, int32_t width, int32_t indent); static void usage(void); // --------------------------------------------------------------------------- // emitError / emitWarning // --------------------------------------------------------------------------- static void emitError(const char *fmt, ...) { va_list ap; va_start(ap, fmt); fprintf(stderr, "error: %s:%d: ", currentFile ? currentFile : "", currentLine); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); errorCount++; } static void emitWarning(const char *fmt, ...) { va_list ap; va_start(ap, fmt); fprintf(stderr, "warning: %s:%d: ", currentFile ? currentFile : "", currentLine); vfprintf(stderr, fmt, ap); fprintf(stderr, "\n"); va_end(ap); } // --------------------------------------------------------------------------- // usage // --------------------------------------------------------------------------- static void usage(void) { fprintf(stderr, "Usage: dvxhlpc -o output.hlp [-w width] [-i imagedir] [--html out.html] input.dvxhelp [...]\n"); exit(1); } // --------------------------------------------------------------------------- // Buffer helpers // --------------------------------------------------------------------------- static void bufInit(BufT *buf) { buf->cap = INITIAL_BUF_SIZE; buf->size = 0; buf->data = malloc(buf->cap); if (!buf->data) { fprintf(stderr, "fatal: out of memory\n"); exit(1); } } static void bufAppend(BufT *buf, const void *data, int32_t len) { while (buf->size + len > buf->cap) { buf->cap *= 2; buf->data = realloc(buf->data, buf->cap); if (!buf->data) { fprintf(stderr, "fatal: out of memory\n"); exit(1); } } memcpy(buf->data + buf->size, data, len); buf->size += len; } // --------------------------------------------------------------------------- // Topic management // --------------------------------------------------------------------------- static TopicT *addTopic(const char *id) { if (topicCount >= MAX_TOPICS) { emitError("too many topics (max %d)", MAX_TOPICS); return NULL; } TopicT *t = &topics[topicCount++]; memset(t, 0, sizeof(*t)); snprintf(t->id, sizeof(t->id), "%s", id); t->recordCap = 32; t->records = malloc(sizeof(RecordT) * t->recordCap); if (!t->records) { fprintf(stderr, "fatal: out of memory\n"); exit(1); } return t; } static RecordT *addRecord(TopicT *topic, uint8_t type, uint8_t flags, const char *data, int32_t dataLen) { if (topic->recordCount >= MAX_RECORDS_PER_TOPIC) { emitError("too many records in topic '%s' (max %d)", topic->id, MAX_RECORDS_PER_TOPIC); return NULL; } if (topic->recordCount >= topic->recordCap) { topic->recordCap *= 2; topic->records = realloc(topic->records, sizeof(RecordT) * topic->recordCap); if (!topic->records) { fprintf(stderr, "fatal: out of memory\n"); exit(1); } } RecordT *r = &topic->records[topic->recordCount++]; r->type = type; r->flags = flags; r->dataLen = dataLen; if (data && dataLen > 0) { r->data = malloc(dataLen + 1); memcpy(r->data, data, dataLen); r->data[dataLen] = '\0'; } else { r->data = NULL; r->dataLen = 0; } return r; } // --------------------------------------------------------------------------- // TOC / Index / Image management // --------------------------------------------------------------------------- static void addTocEntry(const char *title, int32_t topicIdx, int32_t depth) { if (tocCount >= MAX_TOC_ENTRIES) { emitError("too many TOC entries (max %d)", MAX_TOC_ENTRIES); return; } TocEntryT *e = &tocEntries[tocCount++]; snprintf(e->title, sizeof(e->title), "%s", title); e->title[sizeof(e->title) - 1] = '\0'; e->topicIdx = topicIdx; e->depth = depth; } static void addIndexEntry(const char *keyword, int32_t topicIdx) { if (indexCount >= MAX_INDEX_ENTRIES) { emitError("too many index entries (max %d)", MAX_INDEX_ENTRIES); return; } IndexEntryT *e = &indexEntries[indexCount++]; snprintf(e->keyword, sizeof(e->keyword), "%s", keyword); e->keyword[sizeof(e->keyword) - 1] = '\0'; e->topicIdx = topicIdx; } static int32_t findImage(const char *filename) { for (int32_t i = 0; i < imageCount; i++) { // Compare just the filename portion const char *base = strrchr(imageRefs[i].path, '/'); if (!base) { base = imageRefs[i].path; } else { base++; } if (strcmp(base, filename) == 0) { return i; } } return -1; } static void addImageRef(const char *filename) { if (findImage(filename) >= 0) { return; } if (imageCount >= MAX_IMAGES) { emitError("too many images (max %d)", MAX_IMAGES); return; } ImageRefT *img = &imageRefs[imageCount++]; snprintf(img->path, sizeof(img->path), "%s/%s", imageDir, filename); img->poolOffset = 0; img->fileSize = 0; } // --------------------------------------------------------------------------- // Word wrap // --------------------------------------------------------------------------- static char *wordWrap(const char *text, int32_t width, int32_t indent) { if (!text || !*text) { return strdup(""); } int32_t textLen = strlen(text); // Worst case: every word on its own line + indent char *result = malloc(textLen * 2 + 1); if (!result) { fprintf(stderr, "fatal: out of memory\n"); exit(1); } int32_t outPos = 0; int32_t col = 0; bool firstLine = true; const char *p = text; while (*p) { // Skip leading whitespace while (*p == ' ' || *p == '\t') { p++; } if (!*p) { break; } // Find end of word const char *wordStart = p; while (*p && *p != ' ' && *p != '\t' && *p != '\n') { p++; } int32_t wordLen = (int32_t)(p - wordStart); // Check if word fits on current line if (col > 0 && col + 1 + wordLen > width) { // Wrap to next line result[outPos++] = '\n'; for (int32_t i = 0; i < indent; i++) { result[outPos++] = ' '; } col = indent; firstLine = false; } else if (col > 0) { // Add space between words result[outPos++] = ' '; col++; } else if (!firstLine) { // Start of continuation line for (int32_t i = 0; i < indent; i++) { result[outPos++] = ' '; } col = indent; } memcpy(result + outPos, wordStart, wordLen); outPos += wordLen; col += wordLen; // Skip newlines in source (join continuation lines) while (*p == '\n') { p++; } } result[outPos] = '\0'; return result; } // --------------------------------------------------------------------------- // Paragraph flush // --------------------------------------------------------------------------- static void flushParagraph(TopicT *topic, char *para, int32_t paraLen, uint8_t type, uint8_t flags) { if (!topic || paraLen <= 0) { return; } // Trim trailing whitespace while (paraLen > 0 && (para[paraLen - 1] == ' ' || para[paraLen - 1] == '\n' || para[paraLen - 1] == '\r')) { paraLen--; } if (paraLen <= 0) { return; } para[paraLen] = '\0'; addRecord(topic, type, flags, para, paraLen); } // --------------------------------------------------------------------------- // Pass 1: Parse // --------------------------------------------------------------------------- static void parseDirective(const char *line, TopicT **curTopic, bool *inList, bool *inTable, bool *inCode, bool *inNote, uint8_t *noteFlags, char *para, int32_t *paraLen, int32_t includeDepth) { char directive[32] = {0}; const char *rest = NULL; // Extract directive name const char *p = line + 1; // skip '.' int32_t i = 0; while (*p && !isspace(*p) && i < (int32_t)sizeof(directive) - 1) { directive[i++] = *p++; } directive[i] = '\0'; // Skip whitespace after directive while (*p && isspace(*p)) { p++; } rest = p; // Handle .topic if (strcmp(directive, "topic") == 0) { if (!*rest) { emitError(".topic requires an ID"); return; } // Flush pending paragraph flushParagraph(*curTopic, para, *paraLen, HLP_REC_TEXT, 0); *paraLen = 0; // Close any open blocks *inList = false; *inTable = false; *inCode = false; *inNote = false; // Extract topic ID (first word) char id[128] = {0}; int32_t j = 0; while (*rest && !isspace(*rest) && j < (int32_t)sizeof(id) - 1) { id[j++] = *rest++; } id[j] = '\0'; *curTopic = addTopic(id); return; } // All other directives need a current topic (except .include) if (strcmp(directive, "include") == 0) { if (!*rest) { emitError(".include requires a filename"); return; } // Flush pending paragraph flushParagraph(*curTopic, para, *paraLen, *inCode ? HLP_REC_CODE : (*inTable ? HLP_REC_TABLE : HLP_REC_TEXT), *inNote ? *noteFlags : 0); *paraLen = 0; // Build path relative to current file's directory char includePath[260] = {0}; if (rest[0] == '/') { snprintf(includePath, sizeof(includePath), "%s", rest); } else { snprintf(includePath, sizeof(includePath), "%s", currentFile); char *slash = strrchr(includePath, '/'); if (slash) { slash[1] = '\0'; } else { includePath[0] = '\0'; } snprintf(includePath + strlen(includePath), sizeof(includePath) - strlen(includePath), "%s", rest); } // Trim trailing whitespace from path int32_t len = strlen(includePath); while (len > 0 && isspace(includePath[len - 1])) { includePath[--len] = '\0'; } parseFile(includePath, curTopic, inList, inTable, inCode, inNote, noteFlags, para, paraLen, includeDepth + 1); return; } if (!*curTopic) { emitError("directive .%s outside of a topic", directive); return; } // Flush pending paragraph before most directives if (strcmp(directive, "item") != 0) { uint8_t flushType = HLP_REC_TEXT; uint8_t flushFlags = 0; if (*inCode) { flushType = HLP_REC_CODE; } else if (*inTable) { flushType = HLP_REC_TABLE; } else if (*inNote) { flushType = HLP_REC_NOTE; flushFlags = *noteFlags; } flushParagraph(*curTopic, para, *paraLen, flushType, flushFlags); *paraLen = 0; } if (strcmp(directive, "title") == 0) { snprintf((*curTopic)->title, sizeof((*curTopic)->title), "%s", rest); (*curTopic)->title[sizeof((*curTopic)->title) - 1] = '\0'; } else if (strcmp(directive, "toc") == 0) { // .toc int32_t depth = 0; if (isdigit(*rest)) { depth = *rest - '0'; rest++; while (isspace(*rest)) { rest++; } } addTocEntry(rest, topicCount - 1, depth); } else if (strcmp(directive, "h1") == 0) { addRecord(*curTopic, HLP_REC_HEADING1, 0, rest, strlen(rest)); } else if (strcmp(directive, "h2") == 0) { addRecord(*curTopic, HLP_REC_HEADING2, 0, rest, strlen(rest)); } else if (strcmp(directive, "h3") == 0) { addRecord(*curTopic, HLP_REC_HEADING3, 0, rest, strlen(rest)); } else if (strcmp(directive, "image") == 0) { if (!*rest) { emitError(".image requires a filename"); return; } // Trim trailing whitespace char imgFile[260]; snprintf(imgFile, sizeof(imgFile), "%s", rest); imgFile[sizeof(imgFile) - 1] = '\0'; int32_t len = strlen(imgFile); while (len > 0 && isspace(imgFile[len - 1])) { imgFile[--len] = '\0'; } addImageRef(imgFile); addRecord(*curTopic, HLP_REC_IMAGE, 0, imgFile, strlen(imgFile)); } else if (strcmp(directive, "link") == 0) { // .link char linkTarget[128] = {0}; int32_t j = 0; while (*rest && !isspace(*rest) && j < (int32_t)sizeof(linkTarget) - 1) { linkTarget[j++] = *rest++; } linkTarget[j] = '\0'; while (isspace(*rest)) { rest++; } // Store as "target\0display text" int32_t targetLen = strlen(linkTarget); int32_t displayLen = strlen(rest); int32_t totalLen = targetLen + 1 + displayLen; char *linkData = malloc(totalLen + 1); memcpy(linkData, linkTarget, targetLen); linkData[targetLen] = '\0'; memcpy(linkData + targetLen + 1, rest, displayLen); linkData[totalLen] = '\0'; addRecord(*curTopic, HLP_REC_LINK, 0, linkData, totalLen); free(linkData); } else if (strcmp(directive, "list") == 0) { *inList = true; } else if (strcmp(directive, "item") == 0) { if (!*inList) { emitWarning(".item outside of .list"); } // Flush any pending item flushParagraph(*curTopic, para, *paraLen, HLP_REC_LIST_ITEM, 0); *paraLen = 0; // Start new item with the rest text if (*rest) { int32_t len = strlen(rest); memcpy(para, rest, len); *paraLen = len; } } else if (strcmp(directive, "endlist") == 0) { // Flush final list item flushParagraph(*curTopic, para, *paraLen, HLP_REC_LIST_ITEM, 0); *paraLen = 0; *inList = false; } else if (strcmp(directive, "table") == 0) { *inTable = true; } else if (strcmp(directive, "endtable") == 0) { *inTable = false; } else if (strcmp(directive, "code") == 0) { *inCode = true; } else if (strcmp(directive, "endcode") == 0) { *inCode = false; } else if (strcmp(directive, "note") == 0) { *inNote = true; if (strncmp(rest, "tip", 3) == 0) { *noteFlags = HLP_NOTE_TIP; } else if (strncmp(rest, "warning", 7) == 0) { *noteFlags = HLP_NOTE_WARNING; } else { *noteFlags = HLP_NOTE_INFO; } } else if (strcmp(directive, "endnote") == 0) { *inNote = false; } else if (strcmp(directive, "index") == 0) { if (!*rest) { emitError(".index requires a keyword"); return; } addIndexEntry(rest, topicCount - 1); } else if (strcmp(directive, "hr") == 0) { addRecord(*curTopic, HLP_REC_HRULE, 0, NULL, 0); } else if (strcmp(directive, "default") == 0) { (*curTopic)->isDefault = true; } else if (strcmp(directive, "wrap") == 0) { int32_t w = atoi(rest); if (w > 0) { wrapWidth = w; } } else { emitWarning("unknown directive .%s", directive); } } static void parseFile(const char *path, TopicT **curTopic, bool *inList, bool *inTable, bool *inCode, bool *inNote, uint8_t *noteFlags, char *para, int32_t *paraLen, int32_t includeDepth) { if (includeDepth > MAX_INCLUDE_DEPTH) { emitError("include depth exceeded (max %d)", MAX_INCLUDE_DEPTH); return; } FILE *f = fopen(path, "r"); if (!f) { emitError("cannot open '%s': %s", path, strerror(errno)); return; } // Save and set parse state const char *savedFile = currentFile; int32_t savedLine = currentLine; currentFile = path; currentLine = 0; fprintf(stderr, " parsing %s\n", path); char line[MAX_LINE_LEN]; while (fgets(line, sizeof(line), f)) { currentLine++; // Strip trailing newline/CR int32_t len = strlen(line); while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) { line[--len] = '\0'; } // Check for directive if (line[0] == '.') { parseDirective(line, curTopic, inList, inTable, inCode, inNote, noteFlags, para, paraLen, includeDepth); continue; } // Plain text line if (!*curTopic) { // Ignore text outside topics continue; } if (*inCode || *inTable) { // In code/table blocks, preserve lines verbatim if (*paraLen > 0) { para[(*paraLen)++] = '\n'; } memcpy(para + *paraLen, line, len); *paraLen += len; para[*paraLen] = '\0'; } else if (*inList) { // In list, accumulate continuation text for current item if (len == 0) { // Empty line flushes current item flushParagraph(*curTopic, para, *paraLen, HLP_REC_LIST_ITEM, 0); *paraLen = 0; } else { if (*paraLen > 0) { para[(*paraLen)++] = ' '; } memcpy(para + *paraLen, line, len); *paraLen += len; para[*paraLen] = '\0'; } } else if (*inNote) { // In note block if (len == 0) { // Empty line separates paragraphs flushParagraph(*curTopic, para, *paraLen, HLP_REC_NOTE, *noteFlags); *paraLen = 0; } else { if (*paraLen > 0) { para[(*paraLen)++] = ' '; } memcpy(para + *paraLen, line, len); *paraLen += len; para[*paraLen] = '\0'; } } else { // Normal text if (len == 0) { // Empty line ends paragraph flushParagraph(*curTopic, para, *paraLen, HLP_REC_TEXT, 0); *paraLen = 0; } else { if (*paraLen > 0) { para[(*paraLen)++] = ' '; } memcpy(para + *paraLen, line, len); *paraLen += len; para[*paraLen] = '\0'; } } } fclose(f); // Restore parse state currentFile = savedFile; currentLine = savedLine; } static void pass1Parse(int32_t fileCount, char **files) { fprintf(stderr, "Pass 1: Parsing %d input file(s)...\n", fileCount); TopicT *curTopic = NULL; bool inList = false; bool inTable = false; bool inCode = false; bool inNote = false; uint8_t noteFlags = 0; // Paragraph buffer (shared across files for multi-file topics) char para[MAX_LINE_LEN * 64]; int32_t paraLen = 0; for (int32_t i = 0; i < fileCount; i++) { parseFile(files[i], &curTopic, &inList, &inTable, &inCode, &inNote, ¬eFlags, para, ¶Len, 0); } // Flush any remaining paragraph if (curTopic && paraLen > 0) { uint8_t type = HLP_REC_TEXT; uint8_t flags = 0; if (inCode) { type = HLP_REC_CODE; } else if (inTable) { type = HLP_REC_TABLE; } else if (inNote) { type = HLP_REC_NOTE; flags = noteFlags; } else if (inList) { type = HLP_REC_LIST_ITEM; } flushParagraph(curTopic, para, paraLen, type, flags); } fprintf(stderr, " %d topic(s), %d TOC entries, %d index entries, %d image(s)\n", topicCount, tocCount, indexCount, imageCount); } // --------------------------------------------------------------------------- // Pass 2: Word wrap // --------------------------------------------------------------------------- static void pass2Wrap(void) { fprintf(stderr, "Pass 2: Word wrapping at %d columns...\n", wrapWidth); for (int32_t t = 0; t < topicCount; t++) { TopicT *topic = &topics[t]; for (int32_t r = 0; r < topic->recordCount; r++) { RecordT *rec = &topic->records[r]; if (rec->type == HLP_REC_TEXT || rec->type == HLP_REC_NOTE) { char *wrapped = wordWrap(rec->data, wrapWidth, 0); free(rec->data); rec->data = wrapped; rec->dataLen = strlen(wrapped); } else if (rec->type == HLP_REC_LIST_ITEM) { char *wrapped = wordWrap(rec->data, wrapWidth - LIST_INDENT, LIST_INDENT); free(rec->data); rec->data = wrapped; rec->dataLen = strlen(wrapped); } // CODE, TABLE, HEADING, etc. are not wrapped } } } // --------------------------------------------------------------------------- // Pass 3: String table // --------------------------------------------------------------------------- static int32_t strTableFind(const char *str) { for (int32_t i = 0; i < strEntryCount; i++) { if (strcmp(strEntries[i].str, str) == 0) { return strEntries[i].offset; } } return -1; } static int32_t strTableAdd(const char *str) { // Check for existing int32_t existing = strTableFind(str); if (existing >= 0) { return existing; } int32_t len = strlen(str); // Grow string table buffer while (strTabSize + len + 1 > strTabCap) { strTabCap *= 2; strTab = realloc(strTab, strTabCap); if (!strTab) { fprintf(stderr, "fatal: out of memory\n"); exit(1); } } // Grow entries array if (strEntryCount >= strEntryCap) { strEntryCap *= 2; strEntries = realloc(strEntries, sizeof(StrEntryT) * strEntryCap); if (!strEntries) { fprintf(stderr, "fatal: out of memory\n"); exit(1); } } int32_t offset = strTabSize; memcpy(strTab + strTabSize, str, len + 1); strTabSize += len + 1; strEntries[strEntryCount].str = strTab + offset; strEntries[strEntryCount].offset = offset; strEntryCount++; return offset; } static void pass3StringTable(void) { fprintf(stderr, "Pass 3: Building string table...\n"); strTabCap = INITIAL_STRTAB_SIZE; strTabSize = 0; strTab = malloc(strTabCap); strEntryCap = 1024; strEntryCount = 0; strEntries = malloc(sizeof(StrEntryT) * strEntryCap); // Add empty string at offset 0 strTableAdd(""); // Add all topic IDs and titles for (int32_t i = 0; i < topicCount; i++) { strTableAdd(topics[i].id); strTableAdd(topics[i].title); } // Add TOC titles for (int32_t i = 0; i < tocCount; i++) { strTableAdd(tocEntries[i].title); } // Add index keywords for (int32_t i = 0; i < indexCount; i++) { strTableAdd(indexEntries[i].keyword); } fprintf(stderr, " %d unique strings, %d bytes\n", strEntryCount, strTabSize); } // --------------------------------------------------------------------------- // Pass 4: Search index (trigram) // --------------------------------------------------------------------------- static int compareTrigrams(const void *a, const void *b) { const TrigramT *ta = (const TrigramT *)a; const TrigramT *tb = (const TrigramT *)b; int32_t d = (int32_t)ta->trigram[0] - (int32_t)tb->trigram[0]; if (d != 0) { return d; } d = (int32_t)ta->trigram[1] - (int32_t)tb->trigram[1]; if (d != 0) { return d; } return (int32_t)ta->trigram[2] - (int32_t)tb->trigram[2]; } static void addTrigram(uint8_t a, uint8_t b, uint8_t c, uint16_t topicIdx) { // Find existing trigram TrigramT *tri = NULL; for (int32_t i = 0; i < trigramCount; i++) { if (trigrams[i].trigram[0] == a && trigrams[i].trigram[1] == b && trigrams[i].trigram[2] == c) { tri = &trigrams[i]; break; } } if (!tri) { if (trigramCount >= MAX_TRIGRAMS) { return; } tri = &trigrams[trigramCount++]; tri->trigram[0] = a; tri->trigram[1] = b; tri->trigram[2] = c; tri->postingCap = 8; tri->postingCount = 0; tri->postings = malloc(sizeof(uint16_t) * tri->postingCap); } // Check if topic already in posting list for (int32_t i = 0; i < tri->postingCount; i++) { if (tri->postings[i] == topicIdx) { return; } } // Add to posting list if (tri->postingCount >= tri->postingCap) { tri->postingCap *= 2; tri->postings = realloc(tri->postings, sizeof(uint16_t) * tri->postingCap); } tri->postings[tri->postingCount++] = topicIdx; } static void buildSearchIndex(void) { for (int32_t t = 0; t < topicCount; t++) { TopicT *topic = &topics[t]; // Collect all searchable text for this topic BufT textBuf; bufInit(&textBuf); // Add title int32_t titleLen = strlen(topic->title); bufAppend(&textBuf, topic->title, titleLen); uint8_t space = ' '; bufAppend(&textBuf, &space, 1); for (int32_t r = 0; r < topic->recordCount; r++) { RecordT *rec = &topic->records[r]; if (!rec->data || rec->dataLen <= 0) { continue; } if (rec->type == HLP_REC_TEXT || rec->type == HLP_REC_HEADING1 || rec->type == HLP_REC_HEADING2 || rec->type == HLP_REC_HEADING3 || rec->type == HLP_REC_LIST_ITEM || rec->type == HLP_REC_NOTE || rec->type == HLP_REC_CODE) { bufAppend(&textBuf, rec->data, rec->dataLen); bufAppend(&textBuf, &space, 1); } } // Lowercase the text for (int32_t i = 0; i < textBuf.size; i++) { textBuf.data[i] = tolower(textBuf.data[i]); } // Generate trigrams for (int32_t i = 0; i + 2 < textBuf.size; i++) { uint8_t a = textBuf.data[i]; uint8_t b = textBuf.data[i + 1]; uint8_t c = textBuf.data[i + 2]; if (isalnum(a) && isalnum(b) && isalnum(c)) { addTrigram(a, b, c, (uint16_t)t); } } free(textBuf.data); } } static void pass4SearchIndex(void) { fprintf(stderr, "Pass 4: Building search index...\n"); buildSearchIndex(); // Sort trigrams qsort(trigrams, trigramCount, sizeof(TrigramT), compareTrigrams); fprintf(stderr, " %d trigrams\n", trigramCount); } // --------------------------------------------------------------------------- // Pass 5: Serialize // --------------------------------------------------------------------------- static int compareIndexEntries(const void *a, const void *b) { return strcasecmp(((const IndexEntryT *)a)->keyword, ((const IndexEntryT *)b)->keyword); } // --------------------------------------------------------------------------- // HTML output // --------------------------------------------------------------------------- static const char sBase64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; static void base64Encode(FILE *f, const uint8_t *data, int32_t len) { for (int32_t i = 0; i < len; i += 3) { uint32_t b = (uint32_t)data[i] << 16; if (i + 1 < len) { b |= (uint32_t)data[i + 1] << 8; } if (i + 2 < len) { b |= (uint32_t)data[i + 2]; } fputc(sBase64[(b >> 18) & 0x3F], f); fputc(sBase64[(b >> 12) & 0x3F], f); fputc((i + 1 < len) ? sBase64[(b >> 6) & 0x3F] : '=', f); fputc((i + 2 < len) ? sBase64[b & 0x3F] : '=', f); } } static void htmlEscapeWrite(FILE *f, const char *text) { for (const char *p = text; *p; p++) { switch (*p) { case '<': fputs("<", f); break; case '>': fputs(">", f); break; case '&': fputs("&", f); break; case '"': fputs(""", f); break; default: fputc(*p, f); break; } } } static void htmlWritePreformatted(FILE *f, const char *text) { fprintf(f, "
");
    htmlEscapeWrite(f, text);
    fprintf(f, "
\n"); } static void htmlWriteImage(FILE *f, const RecordT *rec) { // rec->data is the image filename (from parse time) // Find the image ref and embed the file as base64 int32_t imgIdx = findImage(rec->data); if (imgIdx < 0) { fprintf(f, "

[Image: %s not found]

\n", rec->data); return; } FILE *imgFile = fopen(imageRefs[imgIdx].path, "rb"); if (!imgFile) { fprintf(f, "

[Image: %s could not be read]

\n", rec->data); return; } fseek(imgFile, 0, SEEK_END); long imgSize = ftell(imgFile); fseek(imgFile, 0, SEEK_SET); uint8_t *imgData = malloc(imgSize); if (!imgData) { fclose(imgFile); return; } if (fread(imgData, 1, imgSize, imgFile) != (size_t)imgSize) { free(imgData); fclose(imgFile); fprintf(f, "

[Image: %s read error]

\n", rec->data); return; } fclose(imgFile); fprintf(f, "

\"%s\"

\n", rec->data); free(imgData); } static void htmlWriteRecords(FILE *f, const TopicT *topic) { bool inList = false; for (int32_t i = 0; i < topic->recordCount; i++) { const RecordT *rec = &topic->records[i]; // Close list if we're leaving list items if (inList && rec->type != HLP_REC_LIST_ITEM) { fprintf(f, "\n"); inList = false; } switch (rec->type) { case HLP_REC_TEXT: fprintf(f, "

"); htmlEscapeWrite(f, rec->data); fprintf(f, "

\n"); break; case HLP_REC_HEADING1: fprintf(f, "

"); htmlEscapeWrite(f, rec->data); fprintf(f, "

\n"); break; case HLP_REC_HEADING2: fprintf(f, "

"); htmlEscapeWrite(f, rec->data); fprintf(f, "

\n"); break; case HLP_REC_HEADING3: fprintf(f, "

"); htmlEscapeWrite(f, rec->data); fprintf(f, "

\n"); break; case HLP_REC_IMAGE: htmlWriteImage(f, rec); break; case HLP_REC_LINK: { // data format: "topicId\0displayText" const char *targetId = rec->data; const char *displayTxt = rec->data + strlen(rec->data) + 1; if (displayTxt > rec->data + rec->dataLen) { displayTxt = targetId; } fprintf(f, "

", targetId); htmlEscapeWrite(f, displayTxt); fprintf(f, "

\n"); break; } case HLP_REC_LIST_ITEM: if (!inList) { fprintf(f, "
    \n"); inList = true; } fprintf(f, "
  • "); htmlEscapeWrite(f, rec->data); fprintf(f, "
  • \n"); break; case HLP_REC_TABLE: htmlWritePreformatted(f, rec->data); break; case HLP_REC_CODE: fprintf(f, "
    ");
                    htmlEscapeWrite(f, rec->data);
                    fprintf(f, "
    \n"); break; case HLP_REC_HRULE: fprintf(f, "
    \n"); break; case HLP_REC_NOTE: { const char *label = "Note"; if (rec->flags == HLP_NOTE_TIP) { label = "Tip"; } else if (rec->flags == HLP_NOTE_WARNING) { label = "Warning"; } fprintf(f, "
    %s: ", label); htmlEscapeWrite(f, rec->data); fprintf(f, "
    \n"); break; } case HLP_REC_END: break; default: break; } } if (inList) { fprintf(f, "
\n"); } } static void htmlWriteTocEntry(FILE *f, int32_t idx) { const TocEntryT *entry = &tocEntries[idx]; if (entry->topicIdx >= 0 && entry->topicIdx < topicCount) { fprintf(f, "
  • ", topics[entry->topicIdx].id); htmlEscapeWrite(f, entry->title); fprintf(f, ""); } else { fprintf(f, "
  • "); htmlEscapeWrite(f, entry->title); fprintf(f, ""); } // Check if next entries are children (deeper depth) int32_t myDepth = entry->depth; int32_t next = idx + 1; if (next < tocCount && tocEntries[next].depth > myDepth) { fprintf(f, "\n
      \n"); while (next < tocCount && tocEntries[next].depth > myDepth) { htmlWriteTocEntry(f, next); // Skip past this entry's subtree int32_t childDepth = tocEntries[next].depth; next++; while (next < tocCount && tocEntries[next].depth > childDepth) { next++; } } fprintf(f, "
    \n"); } fprintf(f, "
  • \n"); } static int emitHtml(const char *outputPath) { FILE *f = fopen(outputPath, "w"); if (!f) { fprintf(stderr, "error: cannot open %s for writing\n", outputPath); return -1; } // Find a title from the default topic or the first topic const char *docTitle = "DVX Help"; for (int32_t i = 0; i < topicCount; i++) { if (topics[i].isDefault && topics[i].title[0]) { docTitle = topics[i].title; break; } } fprintf(f, "\n\n\n"); fprintf(f, "\n"); fprintf(f, ""); htmlEscapeWrite(f, docTitle); fprintf(f, "\n"); fprintf(f, "\n"); fprintf(f, "\n\n"); // TOC sidebar fprintf(f, "\n"); // Main content fprintf(f, "
    \n"); for (int32_t i = 0; i < topicCount; i++) { fprintf(f, "
    \n", topics[i].id); fprintf(f, "

    "); htmlEscapeWrite(f, topics[i].title); fprintf(f, "

    \n"); htmlWriteRecords(f, &topics[i]); fprintf(f, "
    \n"); } fprintf(f, "
    \n\n\n"); fclose(f); return 0; } static int pass5Serialize(const char *outputPath) { fprintf(stderr, "Pass 5: Serializing to %s...\n", outputPath); FILE *f = fopen(outputPath, "wb"); if (!f) { fprintf(stderr, "error: cannot create '%s': %s\n", outputPath, strerror(errno)); return 1; } HlpHeaderT hdr; memset(&hdr, 0, sizeof(hdr)); hdr.magic = HLP_MAGIC; hdr.version = HLP_VERSION; hdr.topicCount = topicCount; hdr.wrapWidth = wrapWidth; // Find default topic hdr.defaultTopicStr = 0; for (int32_t i = 0; i < topicCount; i++) { if (topics[i].isDefault) { hdr.defaultTopicStr = strTableFind(topics[i].id); break; } } uint32_t offset = 0; // --- 1. Image pool --- hdr.imagePoolOffset = offset; for (int32_t i = 0; i < imageCount; i++) { ImageRefT *img = &imageRefs[i]; FILE *imgFile = fopen(img->path, "rb"); if (!imgFile) { fprintf(stderr, "error: cannot open image '%s': %s\n", img->path, strerror(errno)); fclose(f); return 1; } fseek(imgFile, 0, SEEK_END); img->fileSize = ftell(imgFile); fseek(imgFile, 0, SEEK_SET); img->poolOffset = offset - hdr.imagePoolOffset; uint8_t *imgData = malloc(img->fileSize); if (fread(imgData, 1, img->fileSize, imgFile) != (size_t)img->fileSize) { fprintf(stderr, "error: cannot read image '%s'\n", img->path); free(imgData); fclose(imgFile); fclose(f); return 1; } fwrite(imgData, 1, img->fileSize, f); offset += img->fileSize; free(imgData); fclose(imgFile); fprintf(stderr, " image: %s (%d bytes)\n", img->path, img->fileSize); } hdr.imagePoolSize = offset - hdr.imagePoolOffset; // --- 2. Topic content records --- uint32_t *topicContentOffsets = calloc(topicCount, sizeof(uint32_t)); uint32_t *topicContentSizes = calloc(topicCount, sizeof(uint32_t)); for (int32_t t = 0; t < topicCount; t++) { TopicT *topic = &topics[t]; topicContentOffsets[t] = offset; uint32_t startOffset = offset; for (int32_t r = 0; r < topic->recordCount; r++) { RecordT *rec = &topic->records[r]; HlpRecordHdrT recHdr; recHdr.type = rec->type; recHdr.flags = rec->flags; if (rec->type == HLP_REC_IMAGE) { // Replace filename with HlpImageRefT int32_t imgIdx = findImage(rec->data); if (imgIdx < 0) { fprintf(stderr, "error: image '%s' not found in references\n", rec->data); recHdr.length = 0; fwrite(&recHdr, sizeof(recHdr), 1, f); offset += sizeof(recHdr); } else { HlpImageRefT imgRef; imgRef.imageOffset = imageRefs[imgIdx].poolOffset; imgRef.imageSize = imageRefs[imgIdx].fileSize; recHdr.length = sizeof(HlpImageRefT); fwrite(&recHdr, sizeof(recHdr), 1, f); fwrite(&imgRef, sizeof(imgRef), 1, f); offset += sizeof(recHdr) + sizeof(HlpImageRefT); } } else if (rec->type == HLP_REC_LINK) { // Payload is "target\0display" -- write as-is recHdr.length = rec->dataLen; fwrite(&recHdr, sizeof(recHdr), 1, f); fwrite(rec->data, 1, rec->dataLen, f); offset += sizeof(recHdr) + rec->dataLen; } else if (rec->type == HLP_REC_HRULE) { recHdr.length = 0; fwrite(&recHdr, sizeof(recHdr), 1, f); offset += sizeof(recHdr); } else { // Text, headings, list items, code, table, note recHdr.length = rec->dataLen; fwrite(&recHdr, sizeof(recHdr), 1, f); if (rec->dataLen > 0) { fwrite(rec->data, 1, rec->dataLen, f); } offset += sizeof(recHdr) + rec->dataLen; } } // Write end-of-topic record HlpRecordHdrT endRec; endRec.type = HLP_REC_END; endRec.flags = 0; endRec.length = 0; fwrite(&endRec, sizeof(endRec), 1, f); offset += sizeof(endRec); topicContentSizes[t] = offset - startOffset; } // --- 3. TOC entries --- hdr.tocOffset = offset; hdr.tocCount = tocCount; for (int32_t i = 0; i < tocCount; i++) { HlpTocEntryT entry; entry.titleStr = strTableFind(tocEntries[i].title); entry.topicIdx = (tocEntries[i].topicIdx >= 0) ? (uint16_t)tocEntries[i].topicIdx : 0xFFFF; entry.depth = (uint8_t)tocEntries[i].depth; entry.flags = 0; fwrite(&entry, sizeof(entry), 1, f); offset += sizeof(entry); } // --- 4. Keyword index entries (sorted) --- qsort(indexEntries, indexCount, sizeof(IndexEntryT), compareIndexEntries); hdr.indexOffset = offset; hdr.indexCount = indexCount; for (int32_t i = 0; i < indexCount; i++) { HlpIndexEntryT entry; entry.keywordStr = strTableFind(indexEntries[i].keyword); entry.topicIdx = (uint16_t)indexEntries[i].topicIdx; entry.reserved = 0; fwrite(&entry, sizeof(entry), 1, f); offset += sizeof(entry); } // --- 5. Search index --- hdr.searchOffset = offset; { HlpSearchHeaderT searchHdr; searchHdr.trigramCount = trigramCount; fwrite(&searchHdr, sizeof(searchHdr), 1, f); offset += sizeof(searchHdr); // Calculate posting list offsets // Posting lists come after the trigram entry array uint32_t postingBase = sizeof(searchHdr) + sizeof(HlpTrigramEntryT) * trigramCount; uint32_t postingOff = 0; // Write trigram entries for (int32_t i = 0; i < trigramCount; i++) { HlpTrigramEntryT entry; entry.trigram[0] = trigrams[i].trigram[0]; entry.trigram[1] = trigrams[i].trigram[1]; entry.trigram[2] = trigrams[i].trigram[2]; entry.postingCount = (uint8_t)trigrams[i].postingCount; entry.postingOffset = postingBase + postingOff; fwrite(&entry, sizeof(entry), 1, f); offset += sizeof(entry); postingOff += sizeof(uint16_t) * trigrams[i].postingCount; } // Write posting lists for (int32_t i = 0; i < trigramCount; i++) { fwrite(trigrams[i].postings, sizeof(uint16_t), trigrams[i].postingCount, f); offset += sizeof(uint16_t) * trigrams[i].postingCount; } } hdr.searchSize = offset - hdr.searchOffset; // --- 6. String table --- hdr.stringTableOffset = offset; hdr.stringTableSize = strTabSize; fwrite(strTab, 1, strTabSize, f); offset += strTabSize; // --- 7. Topic directory (sorted by topic ID) --- hdr.topicDirOffset = offset; HlpTopicDirT *topicDir = calloc(topicCount, sizeof(HlpTopicDirT)); for (int32_t i = 0; i < topicCount; i++) { topicDir[i].topicIdStr = strTableFind(topics[i].id); topicDir[i].titleStr = strTableFind(topics[i].title); topicDir[i].contentOffset = topicContentOffsets[i]; topicDir[i].contentSize = topicContentSizes[i]; topicDir[i].reserved = 0; } // Topic directory is NOT sorted -- indices must match what TOC and // index entries reference (assigned sequentially during parsing). fwrite(topicDir, sizeof(HlpTopicDirT), topicCount, f); offset += sizeof(HlpTopicDirT) * topicCount; // --- 8. Header (at EOF) --- fwrite(&hdr, sizeof(hdr), 1, f); offset += sizeof(hdr); fclose(f); free(topicDir); free(topicContentOffsets); free(topicContentSizes); fprintf(stderr, " wrote %u bytes\n", offset); return 0; } // --------------------------------------------------------------------------- // Cleanup // --------------------------------------------------------------------------- static void freeAll(void) { for (int32_t i = 0; i < topicCount; i++) { for (int32_t r = 0; r < topics[i].recordCount; r++) { free(topics[i].records[r].data); } free(topics[i].records); } for (int32_t i = 0; i < trigramCount; i++) { free(trigrams[i].postings); } free(strTab); free(strEntries); } // --------------------------------------------------------------------------- // main // --------------------------------------------------------------------------- int main(int argc, char **argv) { const char *outputPath = NULL; char *inputFiles[256]; int32_t inputCount = 0; // Parse command-line arguments for (int32_t i = 1; i < argc; i++) { if (strcmp(argv[i], "-o") == 0) { if (++i >= argc) { usage(); } outputPath = argv[i]; } else if (strcmp(argv[i], "-w") == 0) { if (++i >= argc) { usage(); } wrapWidth = atoi(argv[i]); if (wrapWidth < 20) { fprintf(stderr, "error: wrap width must be >= 20\n"); return 1; } } else if (strcmp(argv[i], "-i") == 0) { if (++i >= argc) { usage(); } snprintf(imageDir, sizeof(imageDir), "%s", argv[i]); imageDir[sizeof(imageDir) - 1] = '\0'; } else if (strcmp(argv[i], "--html") == 0) { if (++i >= argc) { usage(); } htmlPath = argv[i]; } else if (argv[i][0] == '-') { fprintf(stderr, "error: unknown option '%s'\n", argv[i]); usage(); } else { if (inputCount >= 256) { fprintf(stderr, "error: too many input files\n"); return 1; } inputFiles[inputCount++] = argv[i]; } } if (!outputPath || inputCount == 0) { usage(); } fprintf(stderr, "dvxhlpc: DVX Help Compiler\n"); pass1Parse(inputCount, inputFiles); if (errorCount > 0) { fprintf(stderr, "Aborting due to %d error(s).\n", errorCount); freeAll(); return 1; } pass2Wrap(); // Emit HTML if requested (uses wrapped text, before binary passes) if (htmlPath) { if (emitHtml(htmlPath) == 0) { fprintf(stderr, "HTML: wrote %s\n", htmlPath); } } pass3StringTable(); pass4SearchIndex(); int result = pass5Serialize(outputPath); if (result == 0) { fprintf(stderr, "Done. %d topic(s), %d TOC entries, %d index keywords, %d trigrams.\n", topicCount, tocCount, indexCount, trigramCount); } freeAll(); return result; }