DVX_GUI/tools/dvxhlpc.c

1804 lines
56 KiB
C

// dvxhlpc.c -- DVX Help file compiler
//
// Host-side tool (Linux) that converts .dhs source files into
// the .hlp binary format consumed by the DVX help viewer.
//
// Usage:
// dvxhlpc -o output.hlp [-w 76] [-i imagedir] input1.dvxhelp [...]
//
// Five-pass algorithm:
// 1. Parse source files, build topic/TOC/index/image arrays
// 2. Word-wrap text and list-item records
// 3. Build deduplicated string table
// 4. Generate trigram search index
// 5. Serialize binary .hlp file
#define _POSIX_C_SOURCE 200809L
#include "../apps/dvxhelp/hlpformat.h"
#include <ctype.h>
#include <errno.h>
#include <stdbool.h>
#include <stdarg.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <strings.h>
// ---------------------------------------------------------------------------
// Limits
// ---------------------------------------------------------------------------
#define MAX_TOPICS 1024
#define MAX_TOC_ENTRIES 2048
#define MAX_INDEX_ENTRIES 4096
#define MAX_IMAGES 256
#define MAX_RECORDS_PER_TOPIC 512
#define MAX_LINE_LEN 1024
#define MAX_TRIGRAMS 65536
#define MAX_INCLUDE_DEPTH 16
#define MAX_SECTIONS 64
#define INITIAL_STRTAB_SIZE 65536
#define INITIAL_BUF_SIZE 65536
// ---------------------------------------------------------------------------
// Compiler data structures
// ---------------------------------------------------------------------------
typedef struct {
uint8_t type;
uint8_t flags;
char *data;
int32_t dataLen;
} RecordT;
typedef struct {
char id[128];
char title[256];
char section[64]; // section group (e.g., "Libraries") or empty for root
RecordT *records;
int32_t recordCount;
int32_t recordCap;
bool isDefault;
} TopicT;
typedef struct {
char title[256];
char section[64]; // section group this entry belongs to
int32_t topicIdx;
int32_t depth;
} TocEntryT;
typedef struct {
char keyword[128];
int32_t topicIdx;
} IndexEntryT;
typedef struct {
char path[520];
int32_t poolOffset;
int32_t fileSize;
} ImageRefT;
// String table entry for deduplication
typedef struct {
char *str;
int32_t offset;
} StrEntryT;
// Trigram posting list
typedef struct {
uint8_t trigram[3];
uint16_t *postings;
int32_t postingCount;
int32_t postingCap;
} TrigramT;
// Dynamic buffer for serialization
typedef struct {
uint8_t *data;
int32_t size;
int32_t cap;
} BufT;
// ---------------------------------------------------------------------------
// Globals
// ---------------------------------------------------------------------------
static TopicT topics[MAX_TOPICS];
static int32_t topicCount = 0;
static TocEntryT tocEntries[MAX_TOC_ENTRIES];
static int32_t tocCount = 0;
static IndexEntryT indexEntries[MAX_INDEX_ENTRIES];
static int32_t indexCount = 0;
static ImageRefT imageRefs[MAX_IMAGES];
static int32_t imageCount = 0;
static char *strTab = NULL;
static int32_t strTabSize = 0;
static int32_t strTabCap = 0;
static StrEntryT *strEntries = NULL;
static int32_t strEntryCount = 0;
static int32_t strEntryCap = 0;
static TrigramT trigrams[MAX_TRIGRAMS];
static int32_t trigramCount = 0;
static char imageDir[260] = ".";
static const char *htmlPath = NULL;
static int32_t errorCount = 0;
static bool quietMode = false;
// Parse state
static const char *currentFile = NULL;
static int32_t currentLine = 0;
static char currentSection[64] = {0};
// ---------------------------------------------------------------------------
// Prototypes
// ---------------------------------------------------------------------------
static void addImageRef(const char *filename);
static void addIndexEntry(const char *keyword, int32_t topicIdx);
static RecordT *addRecord(TopicT *topic, uint8_t type, uint8_t flags, const char *data, int32_t dataLen);
static void addTocEntry(const char *title, int32_t topicIdx, int32_t depth);
static TopicT *addTopic(const char *id);
static void addTrigram(uint8_t a, uint8_t b, uint8_t c, uint16_t topicIdx);
static void bufAppend(BufT *buf, const void *data, int32_t len);
static void bufInit(BufT *buf);
static void buildSearchIndex(void);
static int compareIndexEntries(const void *a, const void *b);
static int compareTrigrams(const void *a, const void *b);
static void emitError(const char *fmt, ...);
static void emitWarning(const char *fmt, ...);
static int32_t findImage(const char *filename);
static void flushParagraph(TopicT *topic, char *para, int32_t paraLen, uint8_t type, uint8_t flags);
static void freeAll(void);
static void hlpcInfo(const char *fmt, ...);
static void parseDirective(const char *line, TopicT **curTopic, bool *inList, bool *inTable, bool *inCode, bool *inNote, uint8_t *noteFlags, char *para, int32_t *paraLen, int32_t includeDepth);
static void parseFile(const char *path, TopicT **curTopic, bool *inList, bool *inTable, bool *inCode, bool *inNote, uint8_t *noteFlags, char *para, int32_t *paraLen, int32_t includeDepth);
static void pass1Parse(int32_t fileCount, char **files);
static void pass2Wrap(void);
static void regroupTocBySections(void);
static void pass3StringTable(void);
static void pass4SearchIndex(void);
static int pass5Serialize(const char *outputPath);
static int emitHtml(const char *outputPath);
static int32_t strTableAdd(const char *str);
static int32_t strTableFind(const char *str);
static void usage(void);
// ---------------------------------------------------------------------------
// emitError / emitWarning
// ---------------------------------------------------------------------------
static void emitError(const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "error: %s:%d: ", currentFile ? currentFile : "<unknown>", (int)currentLine);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
errorCount++;
}
static void emitWarning(const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
fprintf(stderr, "warning: %s:%d: ", currentFile ? currentFile : "<unknown>", (int)currentLine);
vfprintf(stderr, fmt, ap);
fprintf(stderr, "\n");
va_end(ap);
}
static void hlpcInfo(const char *fmt, ...) {
if (quietMode) {
return;
}
va_list ap;
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
}
// ---------------------------------------------------------------------------
// usage
// ---------------------------------------------------------------------------
static void usage(void) {
fprintf(stderr, "Usage: dvxhlpc -o output.hlp [-i imagedir] [--html out.html] [--quiet] input.dhs [@filelist] [...]\n");
exit(1);
}
// ---------------------------------------------------------------------------
// Buffer helpers
// ---------------------------------------------------------------------------
static void bufInit(BufT *buf) {
buf->cap = INITIAL_BUF_SIZE;
buf->size = 0;
buf->data = malloc(buf->cap);
if (!buf->data) {
fprintf(stderr, "fatal: out of memory\n");
exit(1);
}
}
static void bufAppend(BufT *buf, const void *data, int32_t len) {
while (buf->size + len > buf->cap) {
buf->cap *= 2;
buf->data = realloc(buf->data, buf->cap);
if (!buf->data) {
fprintf(stderr, "fatal: out of memory\n");
exit(1);
}
}
memcpy(buf->data + buf->size, data, len);
buf->size += len;
}
// ---------------------------------------------------------------------------
// Topic management
// ---------------------------------------------------------------------------
static TopicT *addTopic(const char *id) {
if (topicCount >= MAX_TOPICS) {
emitError("too many topics (max %d)", MAX_TOPICS);
return NULL;
}
TopicT *t = &topics[topicCount++];
memset(t, 0, sizeof(*t));
snprintf(t->id, sizeof(t->id), "%s", id);
snprintf(t->section, sizeof(t->section), "%s", currentSection);
t->recordCap = 32;
t->records = malloc(sizeof(RecordT) * t->recordCap);
if (!t->records) {
fprintf(stderr, "fatal: out of memory\n");
exit(1);
}
return t;
}
static RecordT *addRecord(TopicT *topic, uint8_t type, uint8_t flags, const char *data, int32_t dataLen) {
if (topic->recordCount >= MAX_RECORDS_PER_TOPIC) {
emitError("too many records in topic '%s' (max %d)", topic->id, MAX_RECORDS_PER_TOPIC);
return NULL;
}
if (topic->recordCount >= topic->recordCap) {
topic->recordCap *= 2;
topic->records = realloc(topic->records, sizeof(RecordT) * topic->recordCap);
if (!topic->records) {
fprintf(stderr, "fatal: out of memory\n");
exit(1);
}
}
RecordT *r = &topic->records[topic->recordCount++];
r->type = type;
r->flags = flags;
r->dataLen = dataLen;
if (data && dataLen > 0) {
r->data = malloc(dataLen + 1);
memcpy(r->data, data, dataLen);
r->data[dataLen] = '\0';
} else {
r->data = NULL;
r->dataLen = 0;
}
return r;
}
// ---------------------------------------------------------------------------
// TOC / Index / Image management
// ---------------------------------------------------------------------------
static void addTocEntry(const char *title, int32_t topicIdx, int32_t depth) {
if (tocCount >= MAX_TOC_ENTRIES) {
emitError("too many TOC entries (max %d)", MAX_TOC_ENTRIES);
return;
}
TocEntryT *e = &tocEntries[tocCount++];
snprintf(e->title, sizeof(e->title), "%s", title);
e->title[sizeof(e->title) - 1] = '\0';
snprintf(e->section, sizeof(e->section), "%s", currentSection);
e->topicIdx = topicIdx;
e->depth = depth;
}
static void addIndexEntry(const char *keyword, int32_t topicIdx) {
if (indexCount >= MAX_INDEX_ENTRIES) {
emitError("too many index entries (max %d)", MAX_INDEX_ENTRIES);
return;
}
IndexEntryT *e = &indexEntries[indexCount++];
snprintf(e->keyword, sizeof(e->keyword), "%s", keyword);
e->keyword[sizeof(e->keyword) - 1] = '\0';
e->topicIdx = topicIdx;
}
static int32_t findImage(const char *filename) {
for (int32_t i = 0; i < imageCount; i++) {
// Compare just the filename portion
const char *base = strrchr(imageRefs[i].path, '/');
if (!base) {
base = imageRefs[i].path;
} else {
base++;
}
if (strcmp(base, filename) == 0) {
return i;
}
}
return -1;
}
static void addImageRef(const char *filename) {
if (findImage(filename) >= 0) {
return;
}
if (imageCount >= MAX_IMAGES) {
emitError("too many images (max %d)", MAX_IMAGES);
return;
}
ImageRefT *img = &imageRefs[imageCount++];
snprintf(img->path, sizeof(img->path), "%s/%s", imageDir, filename);
img->poolOffset = 0;
img->fileSize = 0;
}
// ---------------------------------------------------------------------------
// ---------------------------------------------------------------------------
// Paragraph flush
// ---------------------------------------------------------------------------
static void flushParagraph(TopicT *topic, char *para, int32_t paraLen, uint8_t type, uint8_t flags) {
if (!topic || paraLen <= 0) {
return;
}
// Trim trailing whitespace
while (paraLen > 0 && (para[paraLen - 1] == ' ' || para[paraLen - 1] == '\n' || para[paraLen - 1] == '\r')) {
paraLen--;
}
if (paraLen <= 0) {
return;
}
para[paraLen] = '\0';
addRecord(topic, type, flags, para, paraLen);
}
// ---------------------------------------------------------------------------
// Pass 1: Parse
// ---------------------------------------------------------------------------
static void parseDirective(const char *line, TopicT **curTopic, bool *inList, bool *inTable, bool *inCode, bool *inNote, uint8_t *noteFlags, char *para, int32_t *paraLen, int32_t includeDepth) {
char directive[32] = {0};
const char *rest = NULL;
// Extract directive name
const char *p = line + 1; // skip '.'
int32_t i = 0;
while (*p && !isspace(*p) && i < (int32_t)sizeof(directive) - 1) {
directive[i++] = *p++;
}
directive[i] = '\0';
// Skip whitespace after directive
while (*p && isspace(*p)) {
p++;
}
rest = p;
// Handle .topic
if (strcmp(directive, "topic") == 0) {
if (!*rest) {
emitError(".topic requires an ID");
return;
}
// Flush pending paragraph
flushParagraph(*curTopic, para, *paraLen, HLP_REC_TEXT, 0);
*paraLen = 0;
// Close any open blocks
*inList = false;
*inTable = false;
*inCode = false;
*inNote = false;
// Extract topic ID (first word)
char id[128] = {0};
int32_t j = 0;
while (*rest && !isspace(*rest) && j < (int32_t)sizeof(id) - 1) {
id[j++] = *rest++;
}
id[j] = '\0';
*curTopic = addTopic(id);
return;
}
// All other directives need a current topic (except .include)
if (strcmp(directive, "include") == 0) {
if (!*rest) {
emitError(".include requires a filename");
return;
}
// Flush pending paragraph
flushParagraph(*curTopic, para, *paraLen, *inCode ? HLP_REC_CODE : (*inTable ? HLP_REC_TABLE : HLP_REC_TEXT), *inNote ? *noteFlags : 0);
*paraLen = 0;
// Build path relative to current file's directory
char includePath[260] = {0};
if (rest[0] == '/') {
snprintf(includePath, sizeof(includePath), "%s", rest);
} else {
snprintf(includePath, sizeof(includePath), "%s", currentFile);
char *slash = strrchr(includePath, '/');
if (slash) {
slash[1] = '\0';
} else {
includePath[0] = '\0';
}
snprintf(includePath + strlen(includePath), sizeof(includePath) - strlen(includePath), "%s", rest);
}
// Trim trailing whitespace from path
int32_t len = strlen(includePath);
while (len > 0 && isspace(includePath[len - 1])) {
includePath[--len] = '\0';
}
parseFile(includePath, curTopic, inList, inTable, inCode, inNote, noteFlags, para, paraLen, includeDepth + 1);
return;
}
// Handle .section -- sets section group for subsequent topics/TOC entries
if (strcmp(directive, "section") == 0) {
if (*rest) {
snprintf(currentSection, sizeof(currentSection), "%s", rest);
currentSection[sizeof(currentSection) - 1] = '\0';
} else {
currentSection[0] = '\0';
}
return;
}
if (!*curTopic) {
emitError("directive .%s outside of a topic", directive);
return;
}
// Flush pending paragraph before most directives
if (strcmp(directive, "item") != 0) {
uint8_t flushType = HLP_REC_TEXT;
uint8_t flushFlags = 0;
if (*inCode) {
flushType = HLP_REC_CODE;
} else if (*inTable) {
flushType = HLP_REC_TABLE;
} else if (*inNote) {
flushType = HLP_REC_NOTE;
flushFlags = *noteFlags;
}
flushParagraph(*curTopic, para, *paraLen, flushType, flushFlags);
*paraLen = 0;
}
if (strcmp(directive, "title") == 0) {
snprintf((*curTopic)->title, sizeof((*curTopic)->title), "%s", rest);
(*curTopic)->title[sizeof((*curTopic)->title) - 1] = '\0';
} else if (strcmp(directive, "toc") == 0) {
// .toc <depth> <text>
int32_t depth = 0;
if (isdigit(*rest)) {
depth = *rest - '0';
rest++;
while (isspace(*rest)) {
rest++;
}
}
addTocEntry(rest, topicCount - 1, depth);
} else if (strcmp(directive, "h1") == 0) {
addRecord(*curTopic, HLP_REC_HEADING1, 0, rest, strlen(rest));
} else if (strcmp(directive, "h2") == 0) {
addRecord(*curTopic, HLP_REC_HEADING2, 0, rest, strlen(rest));
} else if (strcmp(directive, "h3") == 0) {
addRecord(*curTopic, HLP_REC_HEADING3, 0, rest, strlen(rest));
} else if (strcmp(directive, "image") == 0) {
if (!*rest) {
emitError(".image requires a filename");
return;
}
// Parse: .image filename [left|center|right]
char imgFile[260];
snprintf(imgFile, sizeof(imgFile), "%s", rest);
imgFile[sizeof(imgFile) - 1] = '\0';
uint8_t alignFlags = HLP_IMG_LEFT;
// Split off optional alignment keyword after filename
char *space = strchr(imgFile, ' ');
if (space) {
*space = '\0';
char *align = space + 1;
while (*align == ' ') { align++; }
if (strcasecmp(align, "center") == 0) {
alignFlags = HLP_IMG_CENTER;
} else if (strcasecmp(align, "right") == 0) {
alignFlags = HLP_IMG_RIGHT;
}
}
// Trim trailing whitespace from filename
int32_t len = strlen(imgFile);
while (len > 0 && isspace(imgFile[len - 1])) {
imgFile[--len] = '\0';
}
addImageRef(imgFile);
addRecord(*curTopic, HLP_REC_IMAGE, alignFlags, imgFile, strlen(imgFile));
} else if (strcmp(directive, "link") == 0) {
// .link <topic-id> <display text>
char linkTarget[128] = {0};
int32_t j = 0;
while (*rest && !isspace(*rest) && j < (int32_t)sizeof(linkTarget) - 1) {
linkTarget[j++] = *rest++;
}
linkTarget[j] = '\0';
while (isspace(*rest)) {
rest++;
}
// Store as "target\0display text"
int32_t targetLen = strlen(linkTarget);
int32_t displayLen = strlen(rest);
int32_t totalLen = targetLen + 1 + displayLen;
char *linkData = malloc(totalLen + 1);
memcpy(linkData, linkTarget, targetLen);
linkData[targetLen] = '\0';
memcpy(linkData + targetLen + 1, rest, displayLen);
linkData[totalLen] = '\0';
addRecord(*curTopic, HLP_REC_LINK, 0, linkData, totalLen);
free(linkData);
} else if (strcmp(directive, "list") == 0) {
*inList = true;
} else if (strcmp(directive, "item") == 0) {
if (!*inList) {
emitWarning(".item outside of .list");
}
// Flush any pending item
flushParagraph(*curTopic, para, *paraLen, HLP_REC_LIST_ITEM, 0);
*paraLen = 0;
// Start new item with the rest text
if (*rest) {
int32_t len = strlen(rest);
memcpy(para, rest, len);
*paraLen = len;
}
} else if (strcmp(directive, "endlist") == 0) {
// Flush final list item
flushParagraph(*curTopic, para, *paraLen, HLP_REC_LIST_ITEM, 0);
*paraLen = 0;
*inList = false;
} else if (strcmp(directive, "table") == 0) {
*inTable = true;
} else if (strcmp(directive, "endtable") == 0) {
*inTable = false;
} else if (strcmp(directive, "code") == 0) {
*inCode = true;
} else if (strcmp(directive, "endcode") == 0) {
*inCode = false;
} else if (strcmp(directive, "note") == 0) {
*inNote = true;
if (strncmp(rest, "tip", 3) == 0) {
*noteFlags = HLP_NOTE_TIP;
} else if (strncmp(rest, "warning", 7) == 0) {
*noteFlags = HLP_NOTE_WARNING;
} else {
*noteFlags = HLP_NOTE_INFO;
}
} else if (strcmp(directive, "endnote") == 0) {
*inNote = false;
} else if (strcmp(directive, "index") == 0) {
if (!*rest) {
emitError(".index requires a keyword");
return;
}
addIndexEntry(rest, topicCount - 1);
} else if (strcmp(directive, "hr") == 0) {
addRecord(*curTopic, HLP_REC_HRULE, 0, NULL, 0);
} else if (strcmp(directive, "default") == 0) {
(*curTopic)->isDefault = true;
} else {
emitWarning("unknown directive .%s", directive);
}
}
static void parseFile(const char *path, TopicT **curTopic, bool *inList, bool *inTable, bool *inCode, bool *inNote, uint8_t *noteFlags, char *para, int32_t *paraLen, int32_t includeDepth) {
if (includeDepth > MAX_INCLUDE_DEPTH) {
emitError("include depth exceeded (max %d)", MAX_INCLUDE_DEPTH);
return;
}
FILE *f = fopen(path, "r");
if (!f) {
emitError("cannot open '%s': %s", path, strerror(errno));
return;
}
// Save and set parse state
const char *savedFile = currentFile;
int32_t savedLine = currentLine;
char savedSection[64];
memcpy(savedSection, currentSection, sizeof(savedSection));
currentFile = path;
currentLine = 0;
hlpcInfo(" parsing %s\n", path);
char line[MAX_LINE_LEN];
while (fgets(line, sizeof(line), f)) {
currentLine++;
// Strip trailing newline/CR
int32_t len = strlen(line);
while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) {
line[--len] = '\0';
}
// Check for directive. Inside code/table blocks, only .endcode/.endtable
// are recognized — everything else is verbatim content.
if (line[0] == '.') {
if (*inCode && strncmp(line, ".endcode", 8) != 0) {
// Fall through to verbatim handling below
} else if (*inTable && strncmp(line, ".endtable", 9) != 0) {
// Fall through to verbatim handling below
} else {
parseDirective(line, curTopic, inList, inTable, inCode, inNote, noteFlags, para, paraLen, includeDepth);
continue;
}
}
// Plain text line
if (!*curTopic) {
// Ignore text outside topics
continue;
}
if (*inCode || *inTable) {
// In code/table blocks, preserve lines verbatim
if (*paraLen > 0) {
para[(*paraLen)++] = '\n';
}
memcpy(para + *paraLen, line, len);
*paraLen += len;
para[*paraLen] = '\0';
} else if (*inList) {
// In list, accumulate continuation text for current item
if (len == 0) {
// Empty line flushes current item
flushParagraph(*curTopic, para, *paraLen, HLP_REC_LIST_ITEM, 0);
*paraLen = 0;
} else {
if (*paraLen > 0) {
para[(*paraLen)++] = ' ';
}
memcpy(para + *paraLen, line, len);
*paraLen += len;
para[*paraLen] = '\0';
}
} else if (*inNote) {
// In note block
if (len == 0) {
// Empty line separates paragraphs
flushParagraph(*curTopic, para, *paraLen, HLP_REC_NOTE, *noteFlags);
*paraLen = 0;
} else {
if (*paraLen > 0) {
para[(*paraLen)++] = ' ';
}
memcpy(para + *paraLen, line, len);
*paraLen += len;
para[*paraLen] = '\0';
}
} else {
// Normal text
if (len == 0) {
// Empty line ends paragraph
flushParagraph(*curTopic, para, *paraLen, HLP_REC_TEXT, 0);
*paraLen = 0;
} else {
if (*paraLen > 0) {
para[(*paraLen)++] = ' ';
}
memcpy(para + *paraLen, line, len);
*paraLen += len;
para[*paraLen] = '\0';
}
}
}
fclose(f);
// Restore parse state
currentFile = savedFile;
currentLine = savedLine;
memcpy(currentSection, savedSection, sizeof(currentSection));
}
// ---------------------------------------------------------------------------
// regroupTocBySections -- reorder TOC entries by .section directives
//
// Entries with no section stay at the front in their original order.
// Each unique section (in order of first appearance) gets a depth-0
// header entry inserted, and all entries in that section are placed
// underneath with their depths bumped by 1.
// ---------------------------------------------------------------------------
static void regroupTocBySections(void) {
// Collect unique section names in order of first appearance
char sectionNames[MAX_SECTIONS][64];
int32_t sectionCount = 0;
for (int32_t i = 0; i < tocCount; i++) {
if (tocEntries[i].section[0] == '\0') {
continue;
}
bool found = false;
for (int32_t s = 0; s < sectionCount; s++) {
if (strcmp(sectionNames[s], tocEntries[i].section) == 0) {
found = true;
break;
}
}
if (!found) {
if (sectionCount >= MAX_SECTIONS) {
emitWarning("too many sections (max %d)", MAX_SECTIONS);
break;
}
snprintf(sectionNames[sectionCount], sizeof(sectionNames[0]), "%s", tocEntries[i].section);
sectionCount++;
}
}
if (sectionCount == 0) {
return;
}
TocEntryT *newToc = malloc(sizeof(TocEntryT) * MAX_TOC_ENTRIES);
if (!newToc) {
fprintf(stderr, "fatal: out of memory\n");
exit(1);
}
int32_t newCount = 0;
// Unsectioned entries first (original order, unchanged depths)
for (int32_t i = 0; i < tocCount; i++) {
if (tocEntries[i].section[0] == '\0') {
newToc[newCount++] = tocEntries[i];
}
}
// Each section: header entry + grouped entries with depth+1
for (int32_t s = 0; s < sectionCount; s++) {
// Insert section header (no topic)
TocEntryT *hdr = &newToc[newCount++];
snprintf(hdr->title, sizeof(hdr->title), "%s", sectionNames[s]);
hdr->section[0] = '\0';
hdr->topicIdx = -1;
hdr->depth = 0;
// All entries belonging to this section
for (int32_t i = 0; i < tocCount; i++) {
if (strcmp(tocEntries[i].section, sectionNames[s]) == 0) {
newToc[newCount] = tocEntries[i];
newToc[newCount].depth += 1;
newCount++;
}
}
}
memcpy(tocEntries, newToc, sizeof(TocEntryT) * newCount);
tocCount = newCount;
free(newToc);
hlpcInfo(" regrouped TOC: %d sections, %d entries\n", sectionCount, tocCount);
}
static void pass1Parse(int32_t fileCount, char **files) {
hlpcInfo("Pass 1: Parsing %d input file(s)...\n", fileCount);
TopicT *curTopic = NULL;
bool inList = false;
bool inTable = false;
bool inCode = false;
bool inNote = false;
uint8_t noteFlags = 0;
// Paragraph buffer (shared across files for multi-file topics)
char para[MAX_LINE_LEN * 64];
int32_t paraLen = 0;
for (int32_t i = 0; i < fileCount; i++) {
currentSection[0] = '\0';
parseFile(files[i], &curTopic, &inList, &inTable, &inCode, &inNote, &noteFlags, para, &paraLen, 0);
}
// Flush any remaining paragraph
if (curTopic && paraLen > 0) {
uint8_t type = HLP_REC_TEXT;
uint8_t flags = 0;
if (inCode) {
type = HLP_REC_CODE;
} else if (inTable) {
type = HLP_REC_TABLE;
} else if (inNote) {
type = HLP_REC_NOTE;
flags = noteFlags;
} else if (inList) {
type = HLP_REC_LIST_ITEM;
}
flushParagraph(curTopic, para, paraLen, type, flags);
}
// Regroup TOC entries by section
regroupTocBySections();
hlpcInfo(" %d topic(s), %d TOC entries, %d index entries, %d image(s)\n",
topicCount, tocCount, indexCount, imageCount);
}
// ---------------------------------------------------------------------------
// Pass 2: Word wrap
// ---------------------------------------------------------------------------
static void pass2Wrap(void) {
hlpcInfo("Pass 2: Joining paragraph lines...\n");
// TEXT, LIST_ITEM, and NOTE records are stored as flowing text.
// The viewer wraps them at display time based on actual window width.
// Here we only join continuation lines (strip internal newlines from
// paragraphs so the text is a single flowing string).
// CODE, TABLE, HEADING, etc. are not touched.
}
// ---------------------------------------------------------------------------
// Pass 3: String table
// ---------------------------------------------------------------------------
static int32_t strTableFind(const char *str) {
for (int32_t i = 0; i < strEntryCount; i++) {
if (strcmp(strEntries[i].str, str) == 0) {
return strEntries[i].offset;
}
}
return -1;
}
static int32_t strTableAdd(const char *str) {
// Check for existing
int32_t existing = strTableFind(str);
if (existing >= 0) {
return existing;
}
int32_t len = strlen(str);
// Grow string table buffer
while (strTabSize + len + 1 > strTabCap) {
strTabCap *= 2;
strTab = realloc(strTab, strTabCap);
if (!strTab) {
fprintf(stderr, "fatal: out of memory\n");
exit(1);
}
}
// Grow entries array
if (strEntryCount >= strEntryCap) {
strEntryCap *= 2;
strEntries = realloc(strEntries, sizeof(StrEntryT) * strEntryCap);
if (!strEntries) {
fprintf(stderr, "fatal: out of memory\n");
exit(1);
}
}
int32_t offset = strTabSize;
memcpy(strTab + strTabSize, str, len + 1);
strTabSize += len + 1;
strEntries[strEntryCount].str = strTab + offset;
strEntries[strEntryCount].offset = offset;
strEntryCount++;
return offset;
}
static void pass3StringTable(void) {
hlpcInfo("Pass 3: Building string table...\n");
strTabCap = INITIAL_STRTAB_SIZE;
strTabSize = 0;
strTab = malloc(strTabCap);
strEntryCap = 1024;
strEntryCount = 0;
strEntries = malloc(sizeof(StrEntryT) * strEntryCap);
// Add empty string at offset 0
strTableAdd("");
// Add all topic IDs and titles
for (int32_t i = 0; i < topicCount; i++) {
strTableAdd(topics[i].id);
strTableAdd(topics[i].title);
}
// Add TOC titles
for (int32_t i = 0; i < tocCount; i++) {
strTableAdd(tocEntries[i].title);
}
// Add index keywords
for (int32_t i = 0; i < indexCount; i++) {
strTableAdd(indexEntries[i].keyword);
}
hlpcInfo(" %d unique strings, %d bytes\n", strEntryCount, strTabSize);
}
// ---------------------------------------------------------------------------
// Pass 4: Search index (trigram)
// ---------------------------------------------------------------------------
static int compareTrigrams(const void *a, const void *b) {
const TrigramT *ta = (const TrigramT *)a;
const TrigramT *tb = (const TrigramT *)b;
int32_t d = (int32_t)ta->trigram[0] - (int32_t)tb->trigram[0];
if (d != 0) {
return d;
}
d = (int32_t)ta->trigram[1] - (int32_t)tb->trigram[1];
if (d != 0) {
return d;
}
return (int32_t)ta->trigram[2] - (int32_t)tb->trigram[2];
}
static void addTrigram(uint8_t a, uint8_t b, uint8_t c, uint16_t topicIdx) {
// Find existing trigram
TrigramT *tri = NULL;
for (int32_t i = 0; i < trigramCount; i++) {
if (trigrams[i].trigram[0] == a && trigrams[i].trigram[1] == b && trigrams[i].trigram[2] == c) {
tri = &trigrams[i];
break;
}
}
if (!tri) {
if (trigramCount >= MAX_TRIGRAMS) {
return;
}
tri = &trigrams[trigramCount++];
tri->trigram[0] = a;
tri->trigram[1] = b;
tri->trigram[2] = c;
tri->postingCap = 8;
tri->postingCount = 0;
tri->postings = malloc(sizeof(uint16_t) * tri->postingCap);
}
// Check if topic already in posting list
for (int32_t i = 0; i < tri->postingCount; i++) {
if (tri->postings[i] == topicIdx) {
return;
}
}
// Add to posting list
if (tri->postingCount >= tri->postingCap) {
tri->postingCap *= 2;
tri->postings = realloc(tri->postings, sizeof(uint16_t) * tri->postingCap);
}
tri->postings[tri->postingCount++] = topicIdx;
}
static void buildSearchIndex(void) {
for (int32_t t = 0; t < topicCount; t++) {
TopicT *topic = &topics[t];
// Collect all searchable text for this topic
BufT textBuf;
bufInit(&textBuf);
// Add title
int32_t titleLen = strlen(topic->title);
bufAppend(&textBuf, topic->title, titleLen);
uint8_t space = ' ';
bufAppend(&textBuf, &space, 1);
for (int32_t r = 0; r < topic->recordCount; r++) {
RecordT *rec = &topic->records[r];
if (!rec->data || rec->dataLen <= 0) {
continue;
}
if (rec->type == HLP_REC_TEXT || rec->type == HLP_REC_HEADING1 || rec->type == HLP_REC_HEADING2 || rec->type == HLP_REC_HEADING3 || rec->type == HLP_REC_LIST_ITEM || rec->type == HLP_REC_NOTE || rec->type == HLP_REC_CODE) {
bufAppend(&textBuf, rec->data, rec->dataLen);
bufAppend(&textBuf, &space, 1);
}
}
// Lowercase the text
for (int32_t i = 0; i < textBuf.size; i++) {
textBuf.data[i] = tolower(textBuf.data[i]);
}
// Generate trigrams
for (int32_t i = 0; i + 2 < textBuf.size; i++) {
uint8_t a = textBuf.data[i];
uint8_t b = textBuf.data[i + 1];
uint8_t c = textBuf.data[i + 2];
if (isalnum(a) && isalnum(b) && isalnum(c)) {
addTrigram(a, b, c, (uint16_t)t);
}
}
free(textBuf.data);
}
}
static void pass4SearchIndex(void) {
hlpcInfo("Pass 4: Building search index...\n");
buildSearchIndex();
// Sort trigrams
qsort(trigrams, trigramCount, sizeof(TrigramT), compareTrigrams);
hlpcInfo(" %d trigrams\n", trigramCount);
}
// ---------------------------------------------------------------------------
// Pass 5: Serialize
// ---------------------------------------------------------------------------
static int compareIndexEntries(const void *a, const void *b) {
return strcasecmp(((const IndexEntryT *)a)->keyword, ((const IndexEntryT *)b)->keyword);
}
// ---------------------------------------------------------------------------
// HTML output
// ---------------------------------------------------------------------------
static const char sBase64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static void base64Encode(FILE *f, const uint8_t *data, int32_t len) {
for (int32_t i = 0; i < len; i += 3) {
uint32_t b = (uint32_t)data[i] << 16;
if (i + 1 < len) { b |= (uint32_t)data[i + 1] << 8; }
if (i + 2 < len) { b |= (uint32_t)data[i + 2]; }
fputc(sBase64[(b >> 18) & 0x3F], f);
fputc(sBase64[(b >> 12) & 0x3F], f);
fputc((i + 1 < len) ? sBase64[(b >> 6) & 0x3F] : '=', f);
fputc((i + 2 < len) ? sBase64[b & 0x3F] : '=', f);
}
}
static void htmlEscapeWrite(FILE *f, const char *text) {
for (const char *p = text; *p; p++) {
switch (*p) {
case '<': fputs("&lt;", f); break;
case '>': fputs("&gt;", f); break;
case '&': fputs("&amp;", f); break;
case '"': fputs("&quot;", f); break;
default: fputc(*p, f); break;
}
}
}
static void htmlWritePreformatted(FILE *f, const char *text) {
fprintf(f, "<pre>");
htmlEscapeWrite(f, text);
fprintf(f, "</pre>\n");
}
static void htmlWriteImage(FILE *f, const RecordT *rec) {
// rec->data is the image filename (from parse time)
// Find the image ref and embed the file as base64
int32_t imgIdx = findImage(rec->data);
if (imgIdx < 0) {
fprintf(f, "<p><em>[Image: %s not found]</em></p>\n", rec->data);
return;
}
FILE *imgFile = fopen(imageRefs[imgIdx].path, "rb");
if (!imgFile) {
fprintf(f, "<p><em>[Image: %s could not be read]</em></p>\n", rec->data);
return;
}
fseek(imgFile, 0, SEEK_END);
long imgSize = ftell(imgFile);
fseek(imgFile, 0, SEEK_SET);
uint8_t *imgData = malloc(imgSize);
if (!imgData) {
fclose(imgFile);
return;
}
if (fread(imgData, 1, imgSize, imgFile) != (size_t)imgSize) {
free(imgData);
fclose(imgFile);
fprintf(f, "<p><em>[Image: %s read error]</em></p>\n", rec->data);
return;
}
fclose(imgFile);
fprintf(f, "<p><img src=\"data:image/bmp;base64,");
base64Encode(f, imgData, (int32_t)imgSize);
fprintf(f, "\" alt=\"%s\"></p>\n", rec->data);
free(imgData);
}
static void htmlWriteRecords(FILE *f, const TopicT *topic) {
bool inList = false;
for (int32_t i = 0; i < topic->recordCount; i++) {
const RecordT *rec = &topic->records[i];
// Close list if we're leaving list items
if (inList && rec->type != HLP_REC_LIST_ITEM) {
fprintf(f, "</ul>\n");
inList = false;
}
switch (rec->type) {
case HLP_REC_TEXT:
fprintf(f, "<p>");
htmlEscapeWrite(f, rec->data);
fprintf(f, "</p>\n");
break;
case HLP_REC_HEADING1:
fprintf(f, "<h2>");
htmlEscapeWrite(f, rec->data);
fprintf(f, "</h2>\n");
break;
case HLP_REC_HEADING2:
fprintf(f, "<h3>");
htmlEscapeWrite(f, rec->data);
fprintf(f, "</h3>\n");
break;
case HLP_REC_HEADING3:
fprintf(f, "<h4>");
htmlEscapeWrite(f, rec->data);
fprintf(f, "</h4>\n");
break;
case HLP_REC_IMAGE:
htmlWriteImage(f, rec);
break;
case HLP_REC_LINK: {
// data format: "topicId\0displayText"
const char *targetId = rec->data;
const char *displayTxt = rec->data + strlen(rec->data) + 1;
if (displayTxt > rec->data + rec->dataLen) {
displayTxt = targetId;
}
fprintf(f, "<p><a href=\"#%s\">", targetId);
htmlEscapeWrite(f, displayTxt);
fprintf(f, "</a></p>\n");
break;
}
case HLP_REC_LIST_ITEM:
if (!inList) {
fprintf(f, "<ul>\n");
inList = true;
}
fprintf(f, "<li>");
htmlEscapeWrite(f, rec->data);
fprintf(f, "</li>\n");
break;
case HLP_REC_TABLE:
htmlWritePreformatted(f, rec->data);
break;
case HLP_REC_CODE:
fprintf(f, "<pre><code>");
htmlEscapeWrite(f, rec->data);
fprintf(f, "</code></pre>\n");
break;
case HLP_REC_HRULE:
fprintf(f, "<hr>\n");
break;
case HLP_REC_NOTE: {
const char *label = "Note";
if (rec->flags == HLP_NOTE_TIP) {
label = "Tip";
} else if (rec->flags == HLP_NOTE_WARNING) {
label = "Warning";
}
fprintf(f, "<blockquote><strong>%s:</strong> ", label);
htmlEscapeWrite(f, rec->data);
fprintf(f, "</blockquote>\n");
break;
}
case HLP_REC_END:
break;
default:
break;
}
}
if (inList) {
fprintf(f, "</ul>\n");
}
}
static void htmlWriteTocEntry(FILE *f, int32_t idx) {
const TocEntryT *entry = &tocEntries[idx];
if (entry->topicIdx >= 0 && entry->topicIdx < topicCount) {
fprintf(f, "<li><a href=\"#%s\">", topics[entry->topicIdx].id);
htmlEscapeWrite(f, entry->title);
fprintf(f, "</a>");
} else {
fprintf(f, "<li><strong>");
htmlEscapeWrite(f, entry->title);
fprintf(f, "</strong>");
}
// Check if next entries are children (deeper depth)
int32_t myDepth = entry->depth;
int32_t next = idx + 1;
if (next < tocCount && tocEntries[next].depth > myDepth) {
fprintf(f, "\n<ul>\n");
while (next < tocCount && tocEntries[next].depth > myDepth) {
htmlWriteTocEntry(f, next);
// Skip past this entry's subtree
int32_t childDepth = tocEntries[next].depth;
next++;
while (next < tocCount && tocEntries[next].depth > childDepth) {
next++;
}
}
fprintf(f, "</ul>\n");
}
fprintf(f, "</li>\n");
}
static int emitHtml(const char *outputPath) {
FILE *f = fopen(outputPath, "w");
if (!f) {
fprintf(stderr, "error: cannot open %s for writing\n", outputPath);
return -1;
}
// Find a title from the default topic or the first topic
const char *docTitle = "DVX Help";
for (int32_t i = 0; i < topicCount; i++) {
if (topics[i].isDefault && topics[i].title[0]) {
docTitle = topics[i].title;
break;
}
}
fprintf(f, "<!DOCTYPE html>\n<html>\n<head>\n");
fprintf(f, "<meta charset=\"utf-8\">\n");
fprintf(f, "<title>");
htmlEscapeWrite(f, docTitle);
fprintf(f, "</title>\n");
fprintf(f, "<style>\n");
fprintf(f, "body { font-family: sans-serif; margin: 0; padding: 0; display: flex; }\n");
fprintf(f, "nav { width: 250px; min-width: 250px; background: #f0f0f0; padding: 16px;\n");
fprintf(f, " border-right: 1px solid #ccc; height: 100vh; overflow-y: auto;\n");
fprintf(f, " position: sticky; top: 0; box-sizing: border-box; }\n");
fprintf(f, "nav ul { list-style: none; padding-left: 16px; margin: 4px 0; }\n");
fprintf(f, "nav > ul { padding-left: 0; }\n");
fprintf(f, "nav a { text-decoration: none; color: #0066cc; }\n");
fprintf(f, "nav a:hover { text-decoration: underline; }\n");
fprintf(f, "main { flex: 1; padding: 24px 32px; max-width: 800px; }\n");
fprintf(f, "h1 { border-bottom: 2px solid #333; padding-bottom: 4px; }\n");
fprintf(f, "h2 { border-bottom: 1px solid #999; padding-bottom: 2px; margin-top: 32px; }\n");
fprintf(f, "h3 { margin-top: 24px; }\n");
fprintf(f, "pre { background: #f8f8f8; border: 1px solid #ddd; padding: 8px;\n");
fprintf(f, " overflow-x: auto; font-size: 14px; }\n");
fprintf(f, "blockquote { background: #fffde7; border-left: 4px solid #ffc107;\n");
fprintf(f, " padding: 8px 12px; margin: 12px 0; }\n");
fprintf(f, "hr { border: none; border-top: 1px solid #ccc; margin: 24px 0; }\n");
fprintf(f, "img { max-width: 100%%; }\n");
fprintf(f, ".topic { margin-bottom: 48px; }\n");
fprintf(f, "</style>\n");
fprintf(f, "</head>\n<body>\n");
// TOC sidebar
fprintf(f, "<nav>\n<h3>Contents</h3>\n<ul>\n");
for (int32_t i = 0; i < tocCount; ) {
htmlWriteTocEntry(f, i);
// Skip past this entry's subtree
int32_t myDepth = tocEntries[i].depth;
i++;
while (i < tocCount && tocEntries[i].depth > myDepth) {
i++;
}
}
fprintf(f, "</ul>\n");
// Index section in the sidebar
if (indexCount > 0) {
fprintf(f, "<h3>Index</h3>\n<ul>\n");
for (int32_t i = 0; i < indexCount; i++) {
if (indexEntries[i].topicIdx >= 0 && indexEntries[i].topicIdx < topicCount) {
fprintf(f, "<li><a href=\"#%s\">", topics[indexEntries[i].topicIdx].id);
htmlEscapeWrite(f, indexEntries[i].keyword);
fprintf(f, "</a></li>\n");
}
}
fprintf(f, "</ul>\n");
}
fprintf(f, "</nav>\n");
// Main content
fprintf(f, "<main>\n");
for (int32_t i = 0; i < topicCount; i++) {
fprintf(f, "<div class=\"topic\" id=\"%s\">\n", topics[i].id);
fprintf(f, "<h1>");
htmlEscapeWrite(f, topics[i].title);
fprintf(f, "</h1>\n");
htmlWriteRecords(f, &topics[i]);
fprintf(f, "</div>\n");
}
fprintf(f, "</main>\n</body>\n</html>\n");
fclose(f);
return 0;
}
static int pass5Serialize(const char *outputPath) {
hlpcInfo("Pass 5: Serializing to %s...\n", outputPath);
FILE *f = fopen(outputPath, "wb");
if (!f) {
fprintf(stderr, "error: cannot create '%s': %s\n", outputPath, strerror(errno));
return 1;
}
HlpHeaderT hdr;
memset(&hdr, 0, sizeof(hdr));
hdr.magic = HLP_MAGIC;
hdr.version = HLP_VERSION;
hdr.topicCount = topicCount;
// Find default topic
hdr.defaultTopicStr = 0;
for (int32_t i = 0; i < topicCount; i++) {
if (topics[i].isDefault) {
hdr.defaultTopicStr = strTableFind(topics[i].id);
break;
}
}
uint32_t offset = 0;
// --- 1. Image pool ---
hdr.imagePoolOffset = offset;
for (int32_t i = 0; i < imageCount; i++) {
ImageRefT *img = &imageRefs[i];
FILE *imgFile = fopen(img->path, "rb");
if (!imgFile) {
fprintf(stderr, "error: cannot open image '%s': %s\n", img->path, strerror(errno));
fclose(f);
return 1;
}
fseek(imgFile, 0, SEEK_END);
img->fileSize = ftell(imgFile);
fseek(imgFile, 0, SEEK_SET);
img->poolOffset = offset - hdr.imagePoolOffset;
uint8_t *imgData = malloc(img->fileSize);
if (fread(imgData, 1, img->fileSize, imgFile) != (size_t)img->fileSize) {
fprintf(stderr, "error: cannot read image '%s'\n", img->path);
free(imgData);
fclose(imgFile);
fclose(f);
return 1;
}
fwrite(imgData, 1, img->fileSize, f);
offset += img->fileSize;
free(imgData);
fclose(imgFile);
hlpcInfo(" image: %s (%d bytes)\n", img->path, img->fileSize);
}
hdr.imagePoolSize = offset - hdr.imagePoolOffset;
// --- 2. Topic content records ---
uint32_t *topicContentOffsets = calloc(topicCount, sizeof(uint32_t));
uint32_t *topicContentSizes = calloc(topicCount, sizeof(uint32_t));
for (int32_t t = 0; t < topicCount; t++) {
TopicT *topic = &topics[t];
topicContentOffsets[t] = offset;
uint32_t startOffset = offset;
for (int32_t r = 0; r < topic->recordCount; r++) {
RecordT *rec = &topic->records[r];
HlpRecordHdrT recHdr;
recHdr.type = rec->type;
recHdr.flags = rec->flags;
if (rec->type == HLP_REC_IMAGE) {
// Replace filename with HlpImageRefT
int32_t imgIdx = findImage(rec->data);
if (imgIdx < 0) {
fprintf(stderr, "error: image '%s' not found in references\n", rec->data);
recHdr.length = 0;
fwrite(&recHdr, sizeof(recHdr), 1, f);
offset += sizeof(recHdr);
} else {
HlpImageRefT imgRef;
imgRef.imageOffset = imageRefs[imgIdx].poolOffset;
imgRef.imageSize = imageRefs[imgIdx].fileSize;
recHdr.length = sizeof(HlpImageRefT);
fwrite(&recHdr, sizeof(recHdr), 1, f);
fwrite(&imgRef, sizeof(imgRef), 1, f);
offset += sizeof(recHdr) + sizeof(HlpImageRefT);
}
} else if (rec->type == HLP_REC_LINK) {
// Payload is "target\0display" -- write as-is
recHdr.length = rec->dataLen;
fwrite(&recHdr, sizeof(recHdr), 1, f);
fwrite(rec->data, 1, rec->dataLen, f);
offset += sizeof(recHdr) + rec->dataLen;
} else if (rec->type == HLP_REC_HRULE) {
recHdr.length = 0;
fwrite(&recHdr, sizeof(recHdr), 1, f);
offset += sizeof(recHdr);
} else {
// Text, headings, list items, code, table, note
recHdr.length = rec->dataLen;
fwrite(&recHdr, sizeof(recHdr), 1, f);
if (rec->dataLen > 0) {
fwrite(rec->data, 1, rec->dataLen, f);
}
offset += sizeof(recHdr) + rec->dataLen;
}
}
// Write end-of-topic record
HlpRecordHdrT endRec;
endRec.type = HLP_REC_END;
endRec.flags = 0;
endRec.length = 0;
fwrite(&endRec, sizeof(endRec), 1, f);
offset += sizeof(endRec);
topicContentSizes[t] = offset - startOffset;
}
// --- 3. TOC entries ---
hdr.tocOffset = offset;
hdr.tocCount = tocCount;
for (int32_t i = 0; i < tocCount; i++) {
HlpTocEntryT entry;
entry.titleStr = strTableFind(tocEntries[i].title);
entry.topicIdx = (tocEntries[i].topicIdx >= 0) ? (uint16_t)tocEntries[i].topicIdx : 0xFFFF;
entry.depth = (uint8_t)tocEntries[i].depth;
entry.flags = 0;
fwrite(&entry, sizeof(entry), 1, f);
offset += sizeof(entry);
}
// --- 4. Keyword index entries (sorted) ---
qsort(indexEntries, indexCount, sizeof(IndexEntryT), compareIndexEntries);
hdr.indexOffset = offset;
hdr.indexCount = indexCount;
for (int32_t i = 0; i < indexCount; i++) {
HlpIndexEntryT entry;
entry.keywordStr = strTableFind(indexEntries[i].keyword);
entry.topicIdx = (uint16_t)indexEntries[i].topicIdx;
entry.reserved = 0;
fwrite(&entry, sizeof(entry), 1, f);
offset += sizeof(entry);
}
// --- 5. Search index ---
hdr.searchOffset = offset;
{
HlpSearchHeaderT searchHdr;
searchHdr.trigramCount = trigramCount;
fwrite(&searchHdr, sizeof(searchHdr), 1, f);
offset += sizeof(searchHdr);
// Calculate posting list offsets
// Posting lists come after the trigram entry array
uint32_t postingBase = sizeof(searchHdr) + sizeof(HlpTrigramEntryT) * trigramCount;
uint32_t postingOff = 0;
// Write trigram entries
for (int32_t i = 0; i < trigramCount; i++) {
HlpTrigramEntryT entry;
entry.trigram[0] = trigrams[i].trigram[0];
entry.trigram[1] = trigrams[i].trigram[1];
entry.trigram[2] = trigrams[i].trigram[2];
entry.postingCount = (uint8_t)trigrams[i].postingCount;
entry.postingOffset = postingBase + postingOff;
fwrite(&entry, sizeof(entry), 1, f);
offset += sizeof(entry);
postingOff += sizeof(uint16_t) * trigrams[i].postingCount;
}
// Write posting lists
for (int32_t i = 0; i < trigramCount; i++) {
fwrite(trigrams[i].postings, sizeof(uint16_t), trigrams[i].postingCount, f);
offset += sizeof(uint16_t) * trigrams[i].postingCount;
}
}
hdr.searchSize = offset - hdr.searchOffset;
// --- 6. String table ---
hdr.stringTableOffset = offset;
hdr.stringTableSize = strTabSize;
fwrite(strTab, 1, strTabSize, f);
offset += strTabSize;
// --- 7. Topic directory (sorted by topic ID) ---
hdr.topicDirOffset = offset;
HlpTopicDirT *topicDir = calloc(topicCount, sizeof(HlpTopicDirT));
for (int32_t i = 0; i < topicCount; i++) {
topicDir[i].topicIdStr = strTableFind(topics[i].id);
topicDir[i].titleStr = strTableFind(topics[i].title);
topicDir[i].contentOffset = topicContentOffsets[i];
topicDir[i].contentSize = topicContentSizes[i];
topicDir[i].reserved = 0;
}
// Topic directory is NOT sorted -- indices must match what TOC and
// index entries reference (assigned sequentially during parsing).
fwrite(topicDir, sizeof(HlpTopicDirT), topicCount, f);
offset += sizeof(HlpTopicDirT) * topicCount;
// --- 8. Header (at EOF) ---
fwrite(&hdr, sizeof(hdr), 1, f);
offset += sizeof(hdr);
fclose(f);
free(topicDir);
free(topicContentOffsets);
free(topicContentSizes);
hlpcInfo(" wrote %u bytes\n", offset);
return 0;
}
// ---------------------------------------------------------------------------
// Cleanup
// ---------------------------------------------------------------------------
static void freeAll(void) {
for (int32_t i = 0; i < topicCount; i++) {
for (int32_t r = 0; r < topics[i].recordCount; r++) {
free(topics[i].records[r].data);
}
free(topics[i].records);
}
for (int32_t i = 0; i < trigramCount; i++) {
free(trigrams[i].postings);
}
free(strTab);
free(strEntries);
}
// ---------------------------------------------------------------------------
// main
// ---------------------------------------------------------------------------
int main(int argc, char **argv) {
const char *outputPath = NULL;
char *inputFiles[256];
int32_t inputCount = 0;
// Parse command-line arguments
for (int32_t i = 1; i < argc; i++) {
if (strcmp(argv[i], "-o") == 0) {
if (++i >= argc) {
usage();
}
outputPath = argv[i];
} else if (strcmp(argv[i], "-i") == 0) {
if (++i >= argc) {
usage();
}
snprintf(imageDir, sizeof(imageDir), "%s", argv[i]);
imageDir[sizeof(imageDir) - 1] = '\0';
} else if (strcmp(argv[i], "--html") == 0) {
if (++i >= argc) {
usage();
}
htmlPath = argv[i];
} else if (strcmp(argv[i], "--quiet") == 0) {
quietMode = true;
} else if (argv[i][0] == '-') {
fprintf(stderr, "error: unknown option '%s'\n", argv[i]);
usage();
} else if (argv[i][0] == '@') {
// Response file: read filenames from the file, one per line
FILE *rf = fopen(argv[i] + 1, "r");
if (!rf) {
fprintf(stderr, "error: cannot open response file '%s'\n", argv[i] + 1);
return 1;
}
#define RESP_LINE_MAX 260
static char respLines[256][RESP_LINE_MAX];
char rline[RESP_LINE_MAX];
while (fgets(rline, (int)sizeof(rline), rf)) {
int32_t rlen = (int32_t)strlen(rline);
while (rlen > 0 && (rline[rlen - 1] == '\n' || rline[rlen - 1] == '\r' || rline[rlen - 1] == ' ')) {
rline[--rlen] = '\0';
}
if (rlen == 0 || rline[0] == '#') {
continue;
}
if (inputCount >= 256) {
fprintf(stderr, "error: too many input files\n");
fclose(rf);
return 1;
}
snprintf(respLines[inputCount], sizeof(respLines[inputCount]), "%s", rline);
inputFiles[inputCount] = respLines[inputCount];
inputCount++;
}
fclose(rf);
} else {
if (inputCount >= 256) {
fprintf(stderr, "error: too many input files\n");
return 1;
}
inputFiles[inputCount++] = argv[i];
}
}
if (!outputPath || inputCount == 0) {
usage();
}
hlpcInfo("dvxhlpc: DVX Help Compiler\n");
pass1Parse(inputCount, inputFiles);
if (errorCount > 0) {
fprintf(stderr, "Aborting due to %d error(s).\n", (int)errorCount);
freeAll();
return 1;
}
pass2Wrap();
// Emit HTML if requested (uses wrapped text, before binary passes)
if (htmlPath) {
if (emitHtml(htmlPath) == 0) {
hlpcInfo("HTML: wrote %s\n", htmlPath);
}
}
pass3StringTable();
pass4SearchIndex();
int result = pass5Serialize(outputPath);
if (result == 0) {
hlpcInfo("Done. %d topic(s), %d TOC entries, %d index keywords, %d trigrams.\n",
topicCount, tocCount, indexCount, trigramCount);
}
freeAll();
return result;
}