1804 lines
56 KiB
C
1804 lines
56 KiB
C
// dvxhlpc.c -- DVX Help file compiler
|
|
//
|
|
// Host-side tool (Linux) that converts .dhs source files into
|
|
// the .hlp binary format consumed by the DVX help viewer.
|
|
//
|
|
// Usage:
|
|
// dvxhlpc -o output.hlp [-w 76] [-i imagedir] input1.dvxhelp [...]
|
|
//
|
|
// Five-pass algorithm:
|
|
// 1. Parse source files, build topic/TOC/index/image arrays
|
|
// 2. Word-wrap text and list-item records
|
|
// 3. Build deduplicated string table
|
|
// 4. Generate trigram search index
|
|
// 5. Serialize binary .hlp file
|
|
|
|
#define _POSIX_C_SOURCE 200809L
|
|
|
|
#include "../apps/dvxhelp/hlpformat.h"
|
|
|
|
#include <ctype.h>
|
|
#include <errno.h>
|
|
#include <stdbool.h>
|
|
#include <stdarg.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <strings.h>
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Limits
|
|
// ---------------------------------------------------------------------------
|
|
|
|
#define MAX_TOPICS 1024
|
|
#define MAX_TOC_ENTRIES 2048
|
|
#define MAX_INDEX_ENTRIES 4096
|
|
#define MAX_IMAGES 256
|
|
#define MAX_RECORDS_PER_TOPIC 512
|
|
#define MAX_LINE_LEN 1024
|
|
#define MAX_TRIGRAMS 65536
|
|
#define MAX_INCLUDE_DEPTH 16
|
|
#define MAX_SECTIONS 64
|
|
#define INITIAL_STRTAB_SIZE 65536
|
|
#define INITIAL_BUF_SIZE 65536
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Compiler data structures
|
|
// ---------------------------------------------------------------------------
|
|
|
|
typedef struct {
|
|
uint8_t type;
|
|
uint8_t flags;
|
|
char *data;
|
|
int32_t dataLen;
|
|
} RecordT;
|
|
|
|
typedef struct {
|
|
char id[128];
|
|
char title[256];
|
|
char section[64]; // section group (e.g., "Libraries") or empty for root
|
|
RecordT *records;
|
|
int32_t recordCount;
|
|
int32_t recordCap;
|
|
bool isDefault;
|
|
} TopicT;
|
|
|
|
typedef struct {
|
|
char title[256];
|
|
char section[64]; // section group this entry belongs to
|
|
int32_t topicIdx;
|
|
int32_t depth;
|
|
} TocEntryT;
|
|
|
|
typedef struct {
|
|
char keyword[128];
|
|
int32_t topicIdx;
|
|
} IndexEntryT;
|
|
|
|
typedef struct {
|
|
char path[520];
|
|
int32_t poolOffset;
|
|
int32_t fileSize;
|
|
} ImageRefT;
|
|
|
|
// String table entry for deduplication
|
|
typedef struct {
|
|
char *str;
|
|
int32_t offset;
|
|
} StrEntryT;
|
|
|
|
// Trigram posting list
|
|
typedef struct {
|
|
uint8_t trigram[3];
|
|
uint16_t *postings;
|
|
int32_t postingCount;
|
|
int32_t postingCap;
|
|
} TrigramT;
|
|
|
|
// Dynamic buffer for serialization
|
|
typedef struct {
|
|
uint8_t *data;
|
|
int32_t size;
|
|
int32_t cap;
|
|
} BufT;
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Globals
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static TopicT topics[MAX_TOPICS];
|
|
static int32_t topicCount = 0;
|
|
|
|
static TocEntryT tocEntries[MAX_TOC_ENTRIES];
|
|
static int32_t tocCount = 0;
|
|
|
|
static IndexEntryT indexEntries[MAX_INDEX_ENTRIES];
|
|
static int32_t indexCount = 0;
|
|
|
|
static ImageRefT imageRefs[MAX_IMAGES];
|
|
static int32_t imageCount = 0;
|
|
|
|
static char *strTab = NULL;
|
|
static int32_t strTabSize = 0;
|
|
static int32_t strTabCap = 0;
|
|
|
|
static StrEntryT *strEntries = NULL;
|
|
static int32_t strEntryCount = 0;
|
|
static int32_t strEntryCap = 0;
|
|
|
|
static TrigramT trigrams[MAX_TRIGRAMS];
|
|
static int32_t trigramCount = 0;
|
|
|
|
static char imageDir[260] = ".";
|
|
static const char *htmlPath = NULL;
|
|
static int32_t errorCount = 0;
|
|
static bool quietMode = false;
|
|
|
|
// Parse state
|
|
static const char *currentFile = NULL;
|
|
static int32_t currentLine = 0;
|
|
static char currentSection[64] = {0};
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Prototypes
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static void addImageRef(const char *filename);
|
|
static void addIndexEntry(const char *keyword, int32_t topicIdx);
|
|
static RecordT *addRecord(TopicT *topic, uint8_t type, uint8_t flags, const char *data, int32_t dataLen);
|
|
static void addTocEntry(const char *title, int32_t topicIdx, int32_t depth);
|
|
static TopicT *addTopic(const char *id);
|
|
static void addTrigram(uint8_t a, uint8_t b, uint8_t c, uint16_t topicIdx);
|
|
static void bufAppend(BufT *buf, const void *data, int32_t len);
|
|
static void bufInit(BufT *buf);
|
|
static void buildSearchIndex(void);
|
|
static int compareIndexEntries(const void *a, const void *b);
|
|
static int compareTrigrams(const void *a, const void *b);
|
|
static void emitError(const char *fmt, ...);
|
|
static void emitWarning(const char *fmt, ...);
|
|
static int32_t findImage(const char *filename);
|
|
static void flushParagraph(TopicT *topic, char *para, int32_t paraLen, uint8_t type, uint8_t flags);
|
|
static void freeAll(void);
|
|
static void hlpcInfo(const char *fmt, ...);
|
|
static void parseDirective(const char *line, TopicT **curTopic, bool *inList, bool *inTable, bool *inCode, bool *inNote, uint8_t *noteFlags, char *para, int32_t *paraLen, int32_t includeDepth);
|
|
static void parseFile(const char *path, TopicT **curTopic, bool *inList, bool *inTable, bool *inCode, bool *inNote, uint8_t *noteFlags, char *para, int32_t *paraLen, int32_t includeDepth);
|
|
static void pass1Parse(int32_t fileCount, char **files);
|
|
static void pass2Wrap(void);
|
|
static void regroupTocBySections(void);
|
|
static void pass3StringTable(void);
|
|
static void pass4SearchIndex(void);
|
|
static int pass5Serialize(const char *outputPath);
|
|
static int emitHtml(const char *outputPath);
|
|
static int32_t strTableAdd(const char *str);
|
|
static int32_t strTableFind(const char *str);
|
|
static void usage(void);
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// emitError / emitWarning
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static void emitError(const char *fmt, ...) {
|
|
va_list ap;
|
|
va_start(ap, fmt);
|
|
fprintf(stderr, "error: %s:%d: ", currentFile ? currentFile : "<unknown>", (int)currentLine);
|
|
vfprintf(stderr, fmt, ap);
|
|
fprintf(stderr, "\n");
|
|
va_end(ap);
|
|
errorCount++;
|
|
}
|
|
|
|
|
|
static void emitWarning(const char *fmt, ...) {
|
|
va_list ap;
|
|
va_start(ap, fmt);
|
|
fprintf(stderr, "warning: %s:%d: ", currentFile ? currentFile : "<unknown>", (int)currentLine);
|
|
vfprintf(stderr, fmt, ap);
|
|
fprintf(stderr, "\n");
|
|
va_end(ap);
|
|
}
|
|
|
|
|
|
static void hlpcInfo(const char *fmt, ...) {
|
|
if (quietMode) {
|
|
return;
|
|
}
|
|
va_list ap;
|
|
va_start(ap, fmt);
|
|
vfprintf(stderr, fmt, ap);
|
|
va_end(ap);
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// usage
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static void usage(void) {
|
|
fprintf(stderr, "Usage: dvxhlpc -o output.hlp [-i imagedir] [--html out.html] [--quiet] input.dhs [@filelist] [...]\n");
|
|
exit(1);
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Buffer helpers
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static void bufInit(BufT *buf) {
|
|
buf->cap = INITIAL_BUF_SIZE;
|
|
buf->size = 0;
|
|
buf->data = malloc(buf->cap);
|
|
if (!buf->data) {
|
|
fprintf(stderr, "fatal: out of memory\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
|
|
static void bufAppend(BufT *buf, const void *data, int32_t len) {
|
|
while (buf->size + len > buf->cap) {
|
|
buf->cap *= 2;
|
|
buf->data = realloc(buf->data, buf->cap);
|
|
if (!buf->data) {
|
|
fprintf(stderr, "fatal: out of memory\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
memcpy(buf->data + buf->size, data, len);
|
|
buf->size += len;
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Topic management
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static TopicT *addTopic(const char *id) {
|
|
if (topicCount >= MAX_TOPICS) {
|
|
emitError("too many topics (max %d)", MAX_TOPICS);
|
|
return NULL;
|
|
}
|
|
TopicT *t = &topics[topicCount++];
|
|
memset(t, 0, sizeof(*t));
|
|
snprintf(t->id, sizeof(t->id), "%s", id);
|
|
snprintf(t->section, sizeof(t->section), "%s", currentSection);
|
|
t->recordCap = 32;
|
|
t->records = malloc(sizeof(RecordT) * t->recordCap);
|
|
if (!t->records) {
|
|
fprintf(stderr, "fatal: out of memory\n");
|
|
exit(1);
|
|
}
|
|
return t;
|
|
}
|
|
|
|
|
|
static RecordT *addRecord(TopicT *topic, uint8_t type, uint8_t flags, const char *data, int32_t dataLen) {
|
|
if (topic->recordCount >= MAX_RECORDS_PER_TOPIC) {
|
|
emitError("too many records in topic '%s' (max %d)", topic->id, MAX_RECORDS_PER_TOPIC);
|
|
return NULL;
|
|
}
|
|
if (topic->recordCount >= topic->recordCap) {
|
|
topic->recordCap *= 2;
|
|
topic->records = realloc(topic->records, sizeof(RecordT) * topic->recordCap);
|
|
if (!topic->records) {
|
|
fprintf(stderr, "fatal: out of memory\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
RecordT *r = &topic->records[topic->recordCount++];
|
|
r->type = type;
|
|
r->flags = flags;
|
|
r->dataLen = dataLen;
|
|
if (data && dataLen > 0) {
|
|
r->data = malloc(dataLen + 1);
|
|
memcpy(r->data, data, dataLen);
|
|
r->data[dataLen] = '\0';
|
|
} else {
|
|
r->data = NULL;
|
|
r->dataLen = 0;
|
|
}
|
|
return r;
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// TOC / Index / Image management
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static void addTocEntry(const char *title, int32_t topicIdx, int32_t depth) {
|
|
if (tocCount >= MAX_TOC_ENTRIES) {
|
|
emitError("too many TOC entries (max %d)", MAX_TOC_ENTRIES);
|
|
return;
|
|
}
|
|
TocEntryT *e = &tocEntries[tocCount++];
|
|
snprintf(e->title, sizeof(e->title), "%s", title);
|
|
e->title[sizeof(e->title) - 1] = '\0';
|
|
snprintf(e->section, sizeof(e->section), "%s", currentSection);
|
|
e->topicIdx = topicIdx;
|
|
e->depth = depth;
|
|
}
|
|
|
|
|
|
static void addIndexEntry(const char *keyword, int32_t topicIdx) {
|
|
if (indexCount >= MAX_INDEX_ENTRIES) {
|
|
emitError("too many index entries (max %d)", MAX_INDEX_ENTRIES);
|
|
return;
|
|
}
|
|
IndexEntryT *e = &indexEntries[indexCount++];
|
|
snprintf(e->keyword, sizeof(e->keyword), "%s", keyword);
|
|
e->keyword[sizeof(e->keyword) - 1] = '\0';
|
|
e->topicIdx = topicIdx;
|
|
}
|
|
|
|
|
|
static int32_t findImage(const char *filename) {
|
|
for (int32_t i = 0; i < imageCount; i++) {
|
|
// Compare just the filename portion
|
|
const char *base = strrchr(imageRefs[i].path, '/');
|
|
if (!base) {
|
|
base = imageRefs[i].path;
|
|
} else {
|
|
base++;
|
|
}
|
|
if (strcmp(base, filename) == 0) {
|
|
return i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
|
|
static void addImageRef(const char *filename) {
|
|
if (findImage(filename) >= 0) {
|
|
return;
|
|
}
|
|
if (imageCount >= MAX_IMAGES) {
|
|
emitError("too many images (max %d)", MAX_IMAGES);
|
|
return;
|
|
}
|
|
ImageRefT *img = &imageRefs[imageCount++];
|
|
snprintf(img->path, sizeof(img->path), "%s/%s", imageDir, filename);
|
|
img->poolOffset = 0;
|
|
img->fileSize = 0;
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// ---------------------------------------------------------------------------
|
|
// Paragraph flush
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static void flushParagraph(TopicT *topic, char *para, int32_t paraLen, uint8_t type, uint8_t flags) {
|
|
if (!topic || paraLen <= 0) {
|
|
return;
|
|
}
|
|
|
|
// Trim trailing whitespace
|
|
while (paraLen > 0 && (para[paraLen - 1] == ' ' || para[paraLen - 1] == '\n' || para[paraLen - 1] == '\r')) {
|
|
paraLen--;
|
|
}
|
|
if (paraLen <= 0) {
|
|
return;
|
|
}
|
|
para[paraLen] = '\0';
|
|
|
|
addRecord(topic, type, flags, para, paraLen);
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Pass 1: Parse
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static void parseDirective(const char *line, TopicT **curTopic, bool *inList, bool *inTable, bool *inCode, bool *inNote, uint8_t *noteFlags, char *para, int32_t *paraLen, int32_t includeDepth) {
|
|
char directive[32] = {0};
|
|
const char *rest = NULL;
|
|
|
|
// Extract directive name
|
|
const char *p = line + 1; // skip '.'
|
|
int32_t i = 0;
|
|
while (*p && !isspace(*p) && i < (int32_t)sizeof(directive) - 1) {
|
|
directive[i++] = *p++;
|
|
}
|
|
directive[i] = '\0';
|
|
|
|
// Skip whitespace after directive
|
|
while (*p && isspace(*p)) {
|
|
p++;
|
|
}
|
|
rest = p;
|
|
|
|
// Handle .topic
|
|
if (strcmp(directive, "topic") == 0) {
|
|
if (!*rest) {
|
|
emitError(".topic requires an ID");
|
|
return;
|
|
}
|
|
// Flush pending paragraph
|
|
flushParagraph(*curTopic, para, *paraLen, HLP_REC_TEXT, 0);
|
|
*paraLen = 0;
|
|
|
|
// Close any open blocks
|
|
*inList = false;
|
|
*inTable = false;
|
|
*inCode = false;
|
|
*inNote = false;
|
|
|
|
// Extract topic ID (first word)
|
|
char id[128] = {0};
|
|
int32_t j = 0;
|
|
while (*rest && !isspace(*rest) && j < (int32_t)sizeof(id) - 1) {
|
|
id[j++] = *rest++;
|
|
}
|
|
id[j] = '\0';
|
|
|
|
*curTopic = addTopic(id);
|
|
return;
|
|
}
|
|
|
|
// All other directives need a current topic (except .include)
|
|
if (strcmp(directive, "include") == 0) {
|
|
if (!*rest) {
|
|
emitError(".include requires a filename");
|
|
return;
|
|
}
|
|
// Flush pending paragraph
|
|
flushParagraph(*curTopic, para, *paraLen, *inCode ? HLP_REC_CODE : (*inTable ? HLP_REC_TABLE : HLP_REC_TEXT), *inNote ? *noteFlags : 0);
|
|
*paraLen = 0;
|
|
|
|
// Build path relative to current file's directory
|
|
char includePath[260] = {0};
|
|
if (rest[0] == '/') {
|
|
snprintf(includePath, sizeof(includePath), "%s", rest);
|
|
} else {
|
|
snprintf(includePath, sizeof(includePath), "%s", currentFile);
|
|
char *slash = strrchr(includePath, '/');
|
|
if (slash) {
|
|
slash[1] = '\0';
|
|
} else {
|
|
includePath[0] = '\0';
|
|
}
|
|
snprintf(includePath + strlen(includePath), sizeof(includePath) - strlen(includePath), "%s", rest);
|
|
}
|
|
|
|
// Trim trailing whitespace from path
|
|
int32_t len = strlen(includePath);
|
|
while (len > 0 && isspace(includePath[len - 1])) {
|
|
includePath[--len] = '\0';
|
|
}
|
|
|
|
parseFile(includePath, curTopic, inList, inTable, inCode, inNote, noteFlags, para, paraLen, includeDepth + 1);
|
|
return;
|
|
}
|
|
|
|
// Handle .section -- sets section group for subsequent topics/TOC entries
|
|
if (strcmp(directive, "section") == 0) {
|
|
if (*rest) {
|
|
snprintf(currentSection, sizeof(currentSection), "%s", rest);
|
|
currentSection[sizeof(currentSection) - 1] = '\0';
|
|
} else {
|
|
currentSection[0] = '\0';
|
|
}
|
|
return;
|
|
}
|
|
|
|
if (!*curTopic) {
|
|
emitError("directive .%s outside of a topic", directive);
|
|
return;
|
|
}
|
|
|
|
// Flush pending paragraph before most directives
|
|
if (strcmp(directive, "item") != 0) {
|
|
uint8_t flushType = HLP_REC_TEXT;
|
|
uint8_t flushFlags = 0;
|
|
if (*inCode) {
|
|
flushType = HLP_REC_CODE;
|
|
} else if (*inTable) {
|
|
flushType = HLP_REC_TABLE;
|
|
} else if (*inNote) {
|
|
flushType = HLP_REC_NOTE;
|
|
flushFlags = *noteFlags;
|
|
}
|
|
flushParagraph(*curTopic, para, *paraLen, flushType, flushFlags);
|
|
*paraLen = 0;
|
|
}
|
|
|
|
if (strcmp(directive, "title") == 0) {
|
|
snprintf((*curTopic)->title, sizeof((*curTopic)->title), "%s", rest);
|
|
(*curTopic)->title[sizeof((*curTopic)->title) - 1] = '\0';
|
|
|
|
} else if (strcmp(directive, "toc") == 0) {
|
|
// .toc <depth> <text>
|
|
int32_t depth = 0;
|
|
if (isdigit(*rest)) {
|
|
depth = *rest - '0';
|
|
rest++;
|
|
while (isspace(*rest)) {
|
|
rest++;
|
|
}
|
|
}
|
|
addTocEntry(rest, topicCount - 1, depth);
|
|
|
|
} else if (strcmp(directive, "h1") == 0) {
|
|
addRecord(*curTopic, HLP_REC_HEADING1, 0, rest, strlen(rest));
|
|
|
|
} else if (strcmp(directive, "h2") == 0) {
|
|
addRecord(*curTopic, HLP_REC_HEADING2, 0, rest, strlen(rest));
|
|
|
|
} else if (strcmp(directive, "h3") == 0) {
|
|
addRecord(*curTopic, HLP_REC_HEADING3, 0, rest, strlen(rest));
|
|
|
|
} else if (strcmp(directive, "image") == 0) {
|
|
if (!*rest) {
|
|
emitError(".image requires a filename");
|
|
return;
|
|
}
|
|
|
|
// Parse: .image filename [left|center|right]
|
|
char imgFile[260];
|
|
snprintf(imgFile, sizeof(imgFile), "%s", rest);
|
|
imgFile[sizeof(imgFile) - 1] = '\0';
|
|
|
|
uint8_t alignFlags = HLP_IMG_LEFT;
|
|
|
|
// Split off optional alignment keyword after filename
|
|
char *space = strchr(imgFile, ' ');
|
|
|
|
if (space) {
|
|
*space = '\0';
|
|
char *align = space + 1;
|
|
|
|
while (*align == ' ') { align++; }
|
|
|
|
if (strcasecmp(align, "center") == 0) {
|
|
alignFlags = HLP_IMG_CENTER;
|
|
} else if (strcasecmp(align, "right") == 0) {
|
|
alignFlags = HLP_IMG_RIGHT;
|
|
}
|
|
}
|
|
|
|
// Trim trailing whitespace from filename
|
|
int32_t len = strlen(imgFile);
|
|
|
|
while (len > 0 && isspace(imgFile[len - 1])) {
|
|
imgFile[--len] = '\0';
|
|
}
|
|
|
|
addImageRef(imgFile);
|
|
addRecord(*curTopic, HLP_REC_IMAGE, alignFlags, imgFile, strlen(imgFile));
|
|
|
|
} else if (strcmp(directive, "link") == 0) {
|
|
// .link <topic-id> <display text>
|
|
char linkTarget[128] = {0};
|
|
int32_t j = 0;
|
|
while (*rest && !isspace(*rest) && j < (int32_t)sizeof(linkTarget) - 1) {
|
|
linkTarget[j++] = *rest++;
|
|
}
|
|
linkTarget[j] = '\0';
|
|
while (isspace(*rest)) {
|
|
rest++;
|
|
}
|
|
// Store as "target\0display text"
|
|
int32_t targetLen = strlen(linkTarget);
|
|
int32_t displayLen = strlen(rest);
|
|
int32_t totalLen = targetLen + 1 + displayLen;
|
|
char *linkData = malloc(totalLen + 1);
|
|
memcpy(linkData, linkTarget, targetLen);
|
|
linkData[targetLen] = '\0';
|
|
memcpy(linkData + targetLen + 1, rest, displayLen);
|
|
linkData[totalLen] = '\0';
|
|
addRecord(*curTopic, HLP_REC_LINK, 0, linkData, totalLen);
|
|
free(linkData);
|
|
|
|
} else if (strcmp(directive, "list") == 0) {
|
|
*inList = true;
|
|
|
|
} else if (strcmp(directive, "item") == 0) {
|
|
if (!*inList) {
|
|
emitWarning(".item outside of .list");
|
|
}
|
|
// Flush any pending item
|
|
flushParagraph(*curTopic, para, *paraLen, HLP_REC_LIST_ITEM, 0);
|
|
*paraLen = 0;
|
|
// Start new item with the rest text
|
|
if (*rest) {
|
|
int32_t len = strlen(rest);
|
|
memcpy(para, rest, len);
|
|
*paraLen = len;
|
|
}
|
|
|
|
} else if (strcmp(directive, "endlist") == 0) {
|
|
// Flush final list item
|
|
flushParagraph(*curTopic, para, *paraLen, HLP_REC_LIST_ITEM, 0);
|
|
*paraLen = 0;
|
|
*inList = false;
|
|
|
|
} else if (strcmp(directive, "table") == 0) {
|
|
*inTable = true;
|
|
|
|
} else if (strcmp(directive, "endtable") == 0) {
|
|
*inTable = false;
|
|
|
|
} else if (strcmp(directive, "code") == 0) {
|
|
*inCode = true;
|
|
|
|
} else if (strcmp(directive, "endcode") == 0) {
|
|
*inCode = false;
|
|
|
|
} else if (strcmp(directive, "note") == 0) {
|
|
*inNote = true;
|
|
if (strncmp(rest, "tip", 3) == 0) {
|
|
*noteFlags = HLP_NOTE_TIP;
|
|
} else if (strncmp(rest, "warning", 7) == 0) {
|
|
*noteFlags = HLP_NOTE_WARNING;
|
|
} else {
|
|
*noteFlags = HLP_NOTE_INFO;
|
|
}
|
|
|
|
} else if (strcmp(directive, "endnote") == 0) {
|
|
*inNote = false;
|
|
|
|
} else if (strcmp(directive, "index") == 0) {
|
|
if (!*rest) {
|
|
emitError(".index requires a keyword");
|
|
return;
|
|
}
|
|
addIndexEntry(rest, topicCount - 1);
|
|
|
|
} else if (strcmp(directive, "hr") == 0) {
|
|
addRecord(*curTopic, HLP_REC_HRULE, 0, NULL, 0);
|
|
|
|
} else if (strcmp(directive, "default") == 0) {
|
|
(*curTopic)->isDefault = true;
|
|
|
|
} else {
|
|
emitWarning("unknown directive .%s", directive);
|
|
}
|
|
}
|
|
|
|
|
|
static void parseFile(const char *path, TopicT **curTopic, bool *inList, bool *inTable, bool *inCode, bool *inNote, uint8_t *noteFlags, char *para, int32_t *paraLen, int32_t includeDepth) {
|
|
if (includeDepth > MAX_INCLUDE_DEPTH) {
|
|
emitError("include depth exceeded (max %d)", MAX_INCLUDE_DEPTH);
|
|
return;
|
|
}
|
|
|
|
FILE *f = fopen(path, "r");
|
|
if (!f) {
|
|
emitError("cannot open '%s': %s", path, strerror(errno));
|
|
return;
|
|
}
|
|
|
|
// Save and set parse state
|
|
const char *savedFile = currentFile;
|
|
int32_t savedLine = currentLine;
|
|
char savedSection[64];
|
|
memcpy(savedSection, currentSection, sizeof(savedSection));
|
|
currentFile = path;
|
|
currentLine = 0;
|
|
|
|
hlpcInfo(" parsing %s\n", path);
|
|
|
|
char line[MAX_LINE_LEN];
|
|
while (fgets(line, sizeof(line), f)) {
|
|
currentLine++;
|
|
|
|
// Strip trailing newline/CR
|
|
int32_t len = strlen(line);
|
|
while (len > 0 && (line[len - 1] == '\n' || line[len - 1] == '\r')) {
|
|
line[--len] = '\0';
|
|
}
|
|
|
|
// Check for directive. Inside code/table blocks, only .endcode/.endtable
|
|
// are recognized — everything else is verbatim content.
|
|
if (line[0] == '.') {
|
|
if (*inCode && strncmp(line, ".endcode", 8) != 0) {
|
|
// Fall through to verbatim handling below
|
|
} else if (*inTable && strncmp(line, ".endtable", 9) != 0) {
|
|
// Fall through to verbatim handling below
|
|
} else {
|
|
parseDirective(line, curTopic, inList, inTable, inCode, inNote, noteFlags, para, paraLen, includeDepth);
|
|
continue;
|
|
}
|
|
}
|
|
|
|
// Plain text line
|
|
if (!*curTopic) {
|
|
// Ignore text outside topics
|
|
continue;
|
|
}
|
|
|
|
if (*inCode || *inTable) {
|
|
// In code/table blocks, preserve lines verbatim
|
|
if (*paraLen > 0) {
|
|
para[(*paraLen)++] = '\n';
|
|
}
|
|
memcpy(para + *paraLen, line, len);
|
|
*paraLen += len;
|
|
para[*paraLen] = '\0';
|
|
} else if (*inList) {
|
|
// In list, accumulate continuation text for current item
|
|
if (len == 0) {
|
|
// Empty line flushes current item
|
|
flushParagraph(*curTopic, para, *paraLen, HLP_REC_LIST_ITEM, 0);
|
|
*paraLen = 0;
|
|
} else {
|
|
if (*paraLen > 0) {
|
|
para[(*paraLen)++] = ' ';
|
|
}
|
|
memcpy(para + *paraLen, line, len);
|
|
*paraLen += len;
|
|
para[*paraLen] = '\0';
|
|
}
|
|
} else if (*inNote) {
|
|
// In note block
|
|
if (len == 0) {
|
|
// Empty line separates paragraphs
|
|
flushParagraph(*curTopic, para, *paraLen, HLP_REC_NOTE, *noteFlags);
|
|
*paraLen = 0;
|
|
} else {
|
|
if (*paraLen > 0) {
|
|
para[(*paraLen)++] = ' ';
|
|
}
|
|
memcpy(para + *paraLen, line, len);
|
|
*paraLen += len;
|
|
para[*paraLen] = '\0';
|
|
}
|
|
} else {
|
|
// Normal text
|
|
if (len == 0) {
|
|
// Empty line ends paragraph
|
|
flushParagraph(*curTopic, para, *paraLen, HLP_REC_TEXT, 0);
|
|
*paraLen = 0;
|
|
} else {
|
|
if (*paraLen > 0) {
|
|
para[(*paraLen)++] = ' ';
|
|
}
|
|
memcpy(para + *paraLen, line, len);
|
|
*paraLen += len;
|
|
para[*paraLen] = '\0';
|
|
}
|
|
}
|
|
}
|
|
|
|
fclose(f);
|
|
|
|
// Restore parse state
|
|
currentFile = savedFile;
|
|
currentLine = savedLine;
|
|
memcpy(currentSection, savedSection, sizeof(currentSection));
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// regroupTocBySections -- reorder TOC entries by .section directives
|
|
//
|
|
// Entries with no section stay at the front in their original order.
|
|
// Each unique section (in order of first appearance) gets a depth-0
|
|
// header entry inserted, and all entries in that section are placed
|
|
// underneath with their depths bumped by 1.
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static void regroupTocBySections(void) {
|
|
// Collect unique section names in order of first appearance
|
|
char sectionNames[MAX_SECTIONS][64];
|
|
int32_t sectionCount = 0;
|
|
|
|
for (int32_t i = 0; i < tocCount; i++) {
|
|
if (tocEntries[i].section[0] == '\0') {
|
|
continue;
|
|
}
|
|
bool found = false;
|
|
for (int32_t s = 0; s < sectionCount; s++) {
|
|
if (strcmp(sectionNames[s], tocEntries[i].section) == 0) {
|
|
found = true;
|
|
break;
|
|
}
|
|
}
|
|
if (!found) {
|
|
if (sectionCount >= MAX_SECTIONS) {
|
|
emitWarning("too many sections (max %d)", MAX_SECTIONS);
|
|
break;
|
|
}
|
|
snprintf(sectionNames[sectionCount], sizeof(sectionNames[0]), "%s", tocEntries[i].section);
|
|
sectionCount++;
|
|
}
|
|
}
|
|
|
|
if (sectionCount == 0) {
|
|
return;
|
|
}
|
|
|
|
TocEntryT *newToc = malloc(sizeof(TocEntryT) * MAX_TOC_ENTRIES);
|
|
if (!newToc) {
|
|
fprintf(stderr, "fatal: out of memory\n");
|
|
exit(1);
|
|
}
|
|
int32_t newCount = 0;
|
|
|
|
// Unsectioned entries first (original order, unchanged depths)
|
|
for (int32_t i = 0; i < tocCount; i++) {
|
|
if (tocEntries[i].section[0] == '\0') {
|
|
newToc[newCount++] = tocEntries[i];
|
|
}
|
|
}
|
|
|
|
// Each section: header entry + grouped entries with depth+1
|
|
for (int32_t s = 0; s < sectionCount; s++) {
|
|
// Insert section header (no topic)
|
|
TocEntryT *hdr = &newToc[newCount++];
|
|
snprintf(hdr->title, sizeof(hdr->title), "%s", sectionNames[s]);
|
|
hdr->section[0] = '\0';
|
|
hdr->topicIdx = -1;
|
|
hdr->depth = 0;
|
|
|
|
// All entries belonging to this section
|
|
for (int32_t i = 0; i < tocCount; i++) {
|
|
if (strcmp(tocEntries[i].section, sectionNames[s]) == 0) {
|
|
newToc[newCount] = tocEntries[i];
|
|
newToc[newCount].depth += 1;
|
|
newCount++;
|
|
}
|
|
}
|
|
}
|
|
|
|
memcpy(tocEntries, newToc, sizeof(TocEntryT) * newCount);
|
|
tocCount = newCount;
|
|
free(newToc);
|
|
|
|
hlpcInfo(" regrouped TOC: %d sections, %d entries\n", sectionCount, tocCount);
|
|
}
|
|
|
|
|
|
static void pass1Parse(int32_t fileCount, char **files) {
|
|
hlpcInfo("Pass 1: Parsing %d input file(s)...\n", fileCount);
|
|
|
|
TopicT *curTopic = NULL;
|
|
bool inList = false;
|
|
bool inTable = false;
|
|
bool inCode = false;
|
|
bool inNote = false;
|
|
uint8_t noteFlags = 0;
|
|
|
|
// Paragraph buffer (shared across files for multi-file topics)
|
|
char para[MAX_LINE_LEN * 64];
|
|
int32_t paraLen = 0;
|
|
|
|
for (int32_t i = 0; i < fileCount; i++) {
|
|
currentSection[0] = '\0';
|
|
parseFile(files[i], &curTopic, &inList, &inTable, &inCode, &inNote, ¬eFlags, para, ¶Len, 0);
|
|
}
|
|
|
|
// Flush any remaining paragraph
|
|
if (curTopic && paraLen > 0) {
|
|
uint8_t type = HLP_REC_TEXT;
|
|
uint8_t flags = 0;
|
|
if (inCode) {
|
|
type = HLP_REC_CODE;
|
|
} else if (inTable) {
|
|
type = HLP_REC_TABLE;
|
|
} else if (inNote) {
|
|
type = HLP_REC_NOTE;
|
|
flags = noteFlags;
|
|
} else if (inList) {
|
|
type = HLP_REC_LIST_ITEM;
|
|
}
|
|
flushParagraph(curTopic, para, paraLen, type, flags);
|
|
}
|
|
|
|
// Regroup TOC entries by section
|
|
regroupTocBySections();
|
|
|
|
hlpcInfo(" %d topic(s), %d TOC entries, %d index entries, %d image(s)\n",
|
|
topicCount, tocCount, indexCount, imageCount);
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Pass 2: Word wrap
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static void pass2Wrap(void) {
|
|
hlpcInfo("Pass 2: Joining paragraph lines...\n");
|
|
|
|
// TEXT, LIST_ITEM, and NOTE records are stored as flowing text.
|
|
// The viewer wraps them at display time based on actual window width.
|
|
// Here we only join continuation lines (strip internal newlines from
|
|
// paragraphs so the text is a single flowing string).
|
|
// CODE, TABLE, HEADING, etc. are not touched.
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Pass 3: String table
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static int32_t strTableFind(const char *str) {
|
|
for (int32_t i = 0; i < strEntryCount; i++) {
|
|
if (strcmp(strEntries[i].str, str) == 0) {
|
|
return strEntries[i].offset;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
|
|
static int32_t strTableAdd(const char *str) {
|
|
// Check for existing
|
|
int32_t existing = strTableFind(str);
|
|
if (existing >= 0) {
|
|
return existing;
|
|
}
|
|
|
|
int32_t len = strlen(str);
|
|
|
|
// Grow string table buffer
|
|
while (strTabSize + len + 1 > strTabCap) {
|
|
strTabCap *= 2;
|
|
strTab = realloc(strTab, strTabCap);
|
|
if (!strTab) {
|
|
fprintf(stderr, "fatal: out of memory\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
// Grow entries array
|
|
if (strEntryCount >= strEntryCap) {
|
|
strEntryCap *= 2;
|
|
strEntries = realloc(strEntries, sizeof(StrEntryT) * strEntryCap);
|
|
if (!strEntries) {
|
|
fprintf(stderr, "fatal: out of memory\n");
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
int32_t offset = strTabSize;
|
|
memcpy(strTab + strTabSize, str, len + 1);
|
|
strTabSize += len + 1;
|
|
|
|
strEntries[strEntryCount].str = strTab + offset;
|
|
strEntries[strEntryCount].offset = offset;
|
|
strEntryCount++;
|
|
|
|
return offset;
|
|
}
|
|
|
|
|
|
static void pass3StringTable(void) {
|
|
hlpcInfo("Pass 3: Building string table...\n");
|
|
|
|
strTabCap = INITIAL_STRTAB_SIZE;
|
|
strTabSize = 0;
|
|
strTab = malloc(strTabCap);
|
|
|
|
strEntryCap = 1024;
|
|
strEntryCount = 0;
|
|
strEntries = malloc(sizeof(StrEntryT) * strEntryCap);
|
|
|
|
// Add empty string at offset 0
|
|
strTableAdd("");
|
|
|
|
// Add all topic IDs and titles
|
|
for (int32_t i = 0; i < topicCount; i++) {
|
|
strTableAdd(topics[i].id);
|
|
strTableAdd(topics[i].title);
|
|
}
|
|
|
|
// Add TOC titles
|
|
for (int32_t i = 0; i < tocCount; i++) {
|
|
strTableAdd(tocEntries[i].title);
|
|
}
|
|
|
|
// Add index keywords
|
|
for (int32_t i = 0; i < indexCount; i++) {
|
|
strTableAdd(indexEntries[i].keyword);
|
|
}
|
|
|
|
hlpcInfo(" %d unique strings, %d bytes\n", strEntryCount, strTabSize);
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Pass 4: Search index (trigram)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static int compareTrigrams(const void *a, const void *b) {
|
|
const TrigramT *ta = (const TrigramT *)a;
|
|
const TrigramT *tb = (const TrigramT *)b;
|
|
int32_t d = (int32_t)ta->trigram[0] - (int32_t)tb->trigram[0];
|
|
if (d != 0) {
|
|
return d;
|
|
}
|
|
d = (int32_t)ta->trigram[1] - (int32_t)tb->trigram[1];
|
|
if (d != 0) {
|
|
return d;
|
|
}
|
|
return (int32_t)ta->trigram[2] - (int32_t)tb->trigram[2];
|
|
}
|
|
|
|
|
|
static void addTrigram(uint8_t a, uint8_t b, uint8_t c, uint16_t topicIdx) {
|
|
// Find existing trigram
|
|
TrigramT *tri = NULL;
|
|
for (int32_t i = 0; i < trigramCount; i++) {
|
|
if (trigrams[i].trigram[0] == a && trigrams[i].trigram[1] == b && trigrams[i].trigram[2] == c) {
|
|
tri = &trigrams[i];
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (!tri) {
|
|
if (trigramCount >= MAX_TRIGRAMS) {
|
|
return;
|
|
}
|
|
tri = &trigrams[trigramCount++];
|
|
tri->trigram[0] = a;
|
|
tri->trigram[1] = b;
|
|
tri->trigram[2] = c;
|
|
tri->postingCap = 8;
|
|
tri->postingCount = 0;
|
|
tri->postings = malloc(sizeof(uint16_t) * tri->postingCap);
|
|
}
|
|
|
|
// Check if topic already in posting list
|
|
for (int32_t i = 0; i < tri->postingCount; i++) {
|
|
if (tri->postings[i] == topicIdx) {
|
|
return;
|
|
}
|
|
}
|
|
|
|
// Add to posting list
|
|
if (tri->postingCount >= tri->postingCap) {
|
|
tri->postingCap *= 2;
|
|
tri->postings = realloc(tri->postings, sizeof(uint16_t) * tri->postingCap);
|
|
}
|
|
tri->postings[tri->postingCount++] = topicIdx;
|
|
}
|
|
|
|
|
|
static void buildSearchIndex(void) {
|
|
for (int32_t t = 0; t < topicCount; t++) {
|
|
TopicT *topic = &topics[t];
|
|
|
|
// Collect all searchable text for this topic
|
|
BufT textBuf;
|
|
bufInit(&textBuf);
|
|
|
|
// Add title
|
|
int32_t titleLen = strlen(topic->title);
|
|
bufAppend(&textBuf, topic->title, titleLen);
|
|
uint8_t space = ' ';
|
|
bufAppend(&textBuf, &space, 1);
|
|
|
|
for (int32_t r = 0; r < topic->recordCount; r++) {
|
|
RecordT *rec = &topic->records[r];
|
|
if (!rec->data || rec->dataLen <= 0) {
|
|
continue;
|
|
}
|
|
if (rec->type == HLP_REC_TEXT || rec->type == HLP_REC_HEADING1 || rec->type == HLP_REC_HEADING2 || rec->type == HLP_REC_HEADING3 || rec->type == HLP_REC_LIST_ITEM || rec->type == HLP_REC_NOTE || rec->type == HLP_REC_CODE) {
|
|
bufAppend(&textBuf, rec->data, rec->dataLen);
|
|
bufAppend(&textBuf, &space, 1);
|
|
}
|
|
}
|
|
|
|
// Lowercase the text
|
|
for (int32_t i = 0; i < textBuf.size; i++) {
|
|
textBuf.data[i] = tolower(textBuf.data[i]);
|
|
}
|
|
|
|
// Generate trigrams
|
|
for (int32_t i = 0; i + 2 < textBuf.size; i++) {
|
|
uint8_t a = textBuf.data[i];
|
|
uint8_t b = textBuf.data[i + 1];
|
|
uint8_t c = textBuf.data[i + 2];
|
|
if (isalnum(a) && isalnum(b) && isalnum(c)) {
|
|
addTrigram(a, b, c, (uint16_t)t);
|
|
}
|
|
}
|
|
|
|
free(textBuf.data);
|
|
}
|
|
}
|
|
|
|
|
|
static void pass4SearchIndex(void) {
|
|
hlpcInfo("Pass 4: Building search index...\n");
|
|
|
|
buildSearchIndex();
|
|
|
|
// Sort trigrams
|
|
qsort(trigrams, trigramCount, sizeof(TrigramT), compareTrigrams);
|
|
|
|
hlpcInfo(" %d trigrams\n", trigramCount);
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Pass 5: Serialize
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static int compareIndexEntries(const void *a, const void *b) {
|
|
return strcasecmp(((const IndexEntryT *)a)->keyword, ((const IndexEntryT *)b)->keyword);
|
|
}
|
|
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// HTML output
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static const char sBase64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
|
|
|
static void base64Encode(FILE *f, const uint8_t *data, int32_t len) {
|
|
for (int32_t i = 0; i < len; i += 3) {
|
|
uint32_t b = (uint32_t)data[i] << 16;
|
|
|
|
if (i + 1 < len) { b |= (uint32_t)data[i + 1] << 8; }
|
|
if (i + 2 < len) { b |= (uint32_t)data[i + 2]; }
|
|
|
|
fputc(sBase64[(b >> 18) & 0x3F], f);
|
|
fputc(sBase64[(b >> 12) & 0x3F], f);
|
|
fputc((i + 1 < len) ? sBase64[(b >> 6) & 0x3F] : '=', f);
|
|
fputc((i + 2 < len) ? sBase64[b & 0x3F] : '=', f);
|
|
}
|
|
}
|
|
|
|
|
|
static void htmlEscapeWrite(FILE *f, const char *text) {
|
|
for (const char *p = text; *p; p++) {
|
|
switch (*p) {
|
|
case '<': fputs("<", f); break;
|
|
case '>': fputs(">", f); break;
|
|
case '&': fputs("&", f); break;
|
|
case '"': fputs(""", f); break;
|
|
default: fputc(*p, f); break;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static void htmlWritePreformatted(FILE *f, const char *text) {
|
|
fprintf(f, "<pre>");
|
|
htmlEscapeWrite(f, text);
|
|
fprintf(f, "</pre>\n");
|
|
}
|
|
|
|
|
|
static void htmlWriteImage(FILE *f, const RecordT *rec) {
|
|
// rec->data is the image filename (from parse time)
|
|
// Find the image ref and embed the file as base64
|
|
int32_t imgIdx = findImage(rec->data);
|
|
|
|
if (imgIdx < 0) {
|
|
fprintf(f, "<p><em>[Image: %s not found]</em></p>\n", rec->data);
|
|
return;
|
|
}
|
|
|
|
FILE *imgFile = fopen(imageRefs[imgIdx].path, "rb");
|
|
|
|
if (!imgFile) {
|
|
fprintf(f, "<p><em>[Image: %s could not be read]</em></p>\n", rec->data);
|
|
return;
|
|
}
|
|
|
|
fseek(imgFile, 0, SEEK_END);
|
|
long imgSize = ftell(imgFile);
|
|
fseek(imgFile, 0, SEEK_SET);
|
|
|
|
uint8_t *imgData = malloc(imgSize);
|
|
|
|
if (!imgData) {
|
|
fclose(imgFile);
|
|
return;
|
|
}
|
|
|
|
if (fread(imgData, 1, imgSize, imgFile) != (size_t)imgSize) {
|
|
free(imgData);
|
|
fclose(imgFile);
|
|
fprintf(f, "<p><em>[Image: %s read error]</em></p>\n", rec->data);
|
|
return;
|
|
}
|
|
|
|
fclose(imgFile);
|
|
|
|
fprintf(f, "<p><img src=\"data:image/bmp;base64,");
|
|
base64Encode(f, imgData, (int32_t)imgSize);
|
|
fprintf(f, "\" alt=\"%s\"></p>\n", rec->data);
|
|
free(imgData);
|
|
}
|
|
|
|
|
|
static void htmlWriteRecords(FILE *f, const TopicT *topic) {
|
|
bool inList = false;
|
|
|
|
for (int32_t i = 0; i < topic->recordCount; i++) {
|
|
const RecordT *rec = &topic->records[i];
|
|
|
|
// Close list if we're leaving list items
|
|
if (inList && rec->type != HLP_REC_LIST_ITEM) {
|
|
fprintf(f, "</ul>\n");
|
|
inList = false;
|
|
}
|
|
|
|
switch (rec->type) {
|
|
case HLP_REC_TEXT:
|
|
fprintf(f, "<p>");
|
|
htmlEscapeWrite(f, rec->data);
|
|
fprintf(f, "</p>\n");
|
|
break;
|
|
|
|
case HLP_REC_HEADING1:
|
|
fprintf(f, "<h2>");
|
|
htmlEscapeWrite(f, rec->data);
|
|
fprintf(f, "</h2>\n");
|
|
break;
|
|
|
|
case HLP_REC_HEADING2:
|
|
fprintf(f, "<h3>");
|
|
htmlEscapeWrite(f, rec->data);
|
|
fprintf(f, "</h3>\n");
|
|
break;
|
|
|
|
case HLP_REC_HEADING3:
|
|
fprintf(f, "<h4>");
|
|
htmlEscapeWrite(f, rec->data);
|
|
fprintf(f, "</h4>\n");
|
|
break;
|
|
|
|
case HLP_REC_IMAGE:
|
|
htmlWriteImage(f, rec);
|
|
break;
|
|
|
|
case HLP_REC_LINK: {
|
|
// data format: "topicId\0displayText"
|
|
const char *targetId = rec->data;
|
|
const char *displayTxt = rec->data + strlen(rec->data) + 1;
|
|
|
|
if (displayTxt > rec->data + rec->dataLen) {
|
|
displayTxt = targetId;
|
|
}
|
|
|
|
fprintf(f, "<p><a href=\"#%s\">", targetId);
|
|
htmlEscapeWrite(f, displayTxt);
|
|
fprintf(f, "</a></p>\n");
|
|
break;
|
|
}
|
|
|
|
case HLP_REC_LIST_ITEM:
|
|
if (!inList) {
|
|
fprintf(f, "<ul>\n");
|
|
inList = true;
|
|
}
|
|
|
|
fprintf(f, "<li>");
|
|
htmlEscapeWrite(f, rec->data);
|
|
fprintf(f, "</li>\n");
|
|
break;
|
|
|
|
case HLP_REC_TABLE:
|
|
htmlWritePreformatted(f, rec->data);
|
|
break;
|
|
|
|
case HLP_REC_CODE:
|
|
fprintf(f, "<pre><code>");
|
|
htmlEscapeWrite(f, rec->data);
|
|
fprintf(f, "</code></pre>\n");
|
|
break;
|
|
|
|
case HLP_REC_HRULE:
|
|
fprintf(f, "<hr>\n");
|
|
break;
|
|
|
|
case HLP_REC_NOTE: {
|
|
const char *label = "Note";
|
|
|
|
if (rec->flags == HLP_NOTE_TIP) {
|
|
label = "Tip";
|
|
} else if (rec->flags == HLP_NOTE_WARNING) {
|
|
label = "Warning";
|
|
}
|
|
|
|
fprintf(f, "<blockquote><strong>%s:</strong> ", label);
|
|
htmlEscapeWrite(f, rec->data);
|
|
fprintf(f, "</blockquote>\n");
|
|
break;
|
|
}
|
|
|
|
case HLP_REC_END:
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (inList) {
|
|
fprintf(f, "</ul>\n");
|
|
}
|
|
}
|
|
|
|
|
|
static void htmlWriteTocEntry(FILE *f, int32_t idx) {
|
|
const TocEntryT *entry = &tocEntries[idx];
|
|
|
|
if (entry->topicIdx >= 0 && entry->topicIdx < topicCount) {
|
|
fprintf(f, "<li><a href=\"#%s\">", topics[entry->topicIdx].id);
|
|
htmlEscapeWrite(f, entry->title);
|
|
fprintf(f, "</a>");
|
|
} else {
|
|
fprintf(f, "<li><strong>");
|
|
htmlEscapeWrite(f, entry->title);
|
|
fprintf(f, "</strong>");
|
|
}
|
|
|
|
// Check if next entries are children (deeper depth)
|
|
int32_t myDepth = entry->depth;
|
|
int32_t next = idx + 1;
|
|
|
|
if (next < tocCount && tocEntries[next].depth > myDepth) {
|
|
fprintf(f, "\n<ul>\n");
|
|
|
|
while (next < tocCount && tocEntries[next].depth > myDepth) {
|
|
htmlWriteTocEntry(f, next);
|
|
|
|
// Skip past this entry's subtree
|
|
int32_t childDepth = tocEntries[next].depth;
|
|
next++;
|
|
|
|
while (next < tocCount && tocEntries[next].depth > childDepth) {
|
|
next++;
|
|
}
|
|
}
|
|
|
|
fprintf(f, "</ul>\n");
|
|
}
|
|
|
|
fprintf(f, "</li>\n");
|
|
}
|
|
|
|
|
|
static int emitHtml(const char *outputPath) {
|
|
FILE *f = fopen(outputPath, "w");
|
|
|
|
if (!f) {
|
|
fprintf(stderr, "error: cannot open %s for writing\n", outputPath);
|
|
return -1;
|
|
}
|
|
|
|
// Find a title from the default topic or the first topic
|
|
const char *docTitle = "DVX Help";
|
|
|
|
for (int32_t i = 0; i < topicCount; i++) {
|
|
if (topics[i].isDefault && topics[i].title[0]) {
|
|
docTitle = topics[i].title;
|
|
break;
|
|
}
|
|
}
|
|
|
|
fprintf(f, "<!DOCTYPE html>\n<html>\n<head>\n");
|
|
fprintf(f, "<meta charset=\"utf-8\">\n");
|
|
fprintf(f, "<title>");
|
|
htmlEscapeWrite(f, docTitle);
|
|
fprintf(f, "</title>\n");
|
|
fprintf(f, "<style>\n");
|
|
fprintf(f, "body { font-family: sans-serif; margin: 0; padding: 0; display: flex; }\n");
|
|
fprintf(f, "nav { width: 250px; min-width: 250px; background: #f0f0f0; padding: 16px;\n");
|
|
fprintf(f, " border-right: 1px solid #ccc; height: 100vh; overflow-y: auto;\n");
|
|
fprintf(f, " position: sticky; top: 0; box-sizing: border-box; }\n");
|
|
fprintf(f, "nav ul { list-style: none; padding-left: 16px; margin: 4px 0; }\n");
|
|
fprintf(f, "nav > ul { padding-left: 0; }\n");
|
|
fprintf(f, "nav a { text-decoration: none; color: #0066cc; }\n");
|
|
fprintf(f, "nav a:hover { text-decoration: underline; }\n");
|
|
fprintf(f, "main { flex: 1; padding: 24px 32px; max-width: 800px; }\n");
|
|
fprintf(f, "h1 { border-bottom: 2px solid #333; padding-bottom: 4px; }\n");
|
|
fprintf(f, "h2 { border-bottom: 1px solid #999; padding-bottom: 2px; margin-top: 32px; }\n");
|
|
fprintf(f, "h3 { margin-top: 24px; }\n");
|
|
fprintf(f, "pre { background: #f8f8f8; border: 1px solid #ddd; padding: 8px;\n");
|
|
fprintf(f, " overflow-x: auto; font-size: 14px; }\n");
|
|
fprintf(f, "blockquote { background: #fffde7; border-left: 4px solid #ffc107;\n");
|
|
fprintf(f, " padding: 8px 12px; margin: 12px 0; }\n");
|
|
fprintf(f, "hr { border: none; border-top: 1px solid #ccc; margin: 24px 0; }\n");
|
|
fprintf(f, "img { max-width: 100%%; }\n");
|
|
fprintf(f, ".topic { margin-bottom: 48px; }\n");
|
|
fprintf(f, "</style>\n");
|
|
fprintf(f, "</head>\n<body>\n");
|
|
|
|
// TOC sidebar
|
|
fprintf(f, "<nav>\n<h3>Contents</h3>\n<ul>\n");
|
|
|
|
for (int32_t i = 0; i < tocCount; ) {
|
|
htmlWriteTocEntry(f, i);
|
|
|
|
// Skip past this entry's subtree
|
|
int32_t myDepth = tocEntries[i].depth;
|
|
i++;
|
|
|
|
while (i < tocCount && tocEntries[i].depth > myDepth) {
|
|
i++;
|
|
}
|
|
}
|
|
|
|
fprintf(f, "</ul>\n");
|
|
|
|
// Index section in the sidebar
|
|
if (indexCount > 0) {
|
|
fprintf(f, "<h3>Index</h3>\n<ul>\n");
|
|
|
|
for (int32_t i = 0; i < indexCount; i++) {
|
|
if (indexEntries[i].topicIdx >= 0 && indexEntries[i].topicIdx < topicCount) {
|
|
fprintf(f, "<li><a href=\"#%s\">", topics[indexEntries[i].topicIdx].id);
|
|
htmlEscapeWrite(f, indexEntries[i].keyword);
|
|
fprintf(f, "</a></li>\n");
|
|
}
|
|
}
|
|
|
|
fprintf(f, "</ul>\n");
|
|
}
|
|
|
|
fprintf(f, "</nav>\n");
|
|
|
|
// Main content
|
|
fprintf(f, "<main>\n");
|
|
|
|
for (int32_t i = 0; i < topicCount; i++) {
|
|
fprintf(f, "<div class=\"topic\" id=\"%s\">\n", topics[i].id);
|
|
fprintf(f, "<h1>");
|
|
htmlEscapeWrite(f, topics[i].title);
|
|
fprintf(f, "</h1>\n");
|
|
htmlWriteRecords(f, &topics[i]);
|
|
fprintf(f, "</div>\n");
|
|
}
|
|
|
|
fprintf(f, "</main>\n</body>\n</html>\n");
|
|
fclose(f);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static int pass5Serialize(const char *outputPath) {
|
|
hlpcInfo("Pass 5: Serializing to %s...\n", outputPath);
|
|
|
|
FILE *f = fopen(outputPath, "wb");
|
|
if (!f) {
|
|
fprintf(stderr, "error: cannot create '%s': %s\n", outputPath, strerror(errno));
|
|
return 1;
|
|
}
|
|
|
|
HlpHeaderT hdr;
|
|
memset(&hdr, 0, sizeof(hdr));
|
|
hdr.magic = HLP_MAGIC;
|
|
hdr.version = HLP_VERSION;
|
|
hdr.topicCount = topicCount;
|
|
|
|
// Find default topic
|
|
hdr.defaultTopicStr = 0;
|
|
for (int32_t i = 0; i < topicCount; i++) {
|
|
if (topics[i].isDefault) {
|
|
hdr.defaultTopicStr = strTableFind(topics[i].id);
|
|
break;
|
|
}
|
|
}
|
|
|
|
uint32_t offset = 0;
|
|
|
|
// --- 1. Image pool ---
|
|
hdr.imagePoolOffset = offset;
|
|
for (int32_t i = 0; i < imageCount; i++) {
|
|
ImageRefT *img = &imageRefs[i];
|
|
FILE *imgFile = fopen(img->path, "rb");
|
|
if (!imgFile) {
|
|
fprintf(stderr, "error: cannot open image '%s': %s\n", img->path, strerror(errno));
|
|
fclose(f);
|
|
return 1;
|
|
}
|
|
fseek(imgFile, 0, SEEK_END);
|
|
img->fileSize = ftell(imgFile);
|
|
fseek(imgFile, 0, SEEK_SET);
|
|
img->poolOffset = offset - hdr.imagePoolOffset;
|
|
|
|
uint8_t *imgData = malloc(img->fileSize);
|
|
if (fread(imgData, 1, img->fileSize, imgFile) != (size_t)img->fileSize) {
|
|
fprintf(stderr, "error: cannot read image '%s'\n", img->path);
|
|
free(imgData);
|
|
fclose(imgFile);
|
|
fclose(f);
|
|
return 1;
|
|
}
|
|
fwrite(imgData, 1, img->fileSize, f);
|
|
offset += img->fileSize;
|
|
free(imgData);
|
|
fclose(imgFile);
|
|
hlpcInfo(" image: %s (%d bytes)\n", img->path, img->fileSize);
|
|
}
|
|
hdr.imagePoolSize = offset - hdr.imagePoolOffset;
|
|
|
|
// --- 2. Topic content records ---
|
|
uint32_t *topicContentOffsets = calloc(topicCount, sizeof(uint32_t));
|
|
uint32_t *topicContentSizes = calloc(topicCount, sizeof(uint32_t));
|
|
|
|
for (int32_t t = 0; t < topicCount; t++) {
|
|
TopicT *topic = &topics[t];
|
|
topicContentOffsets[t] = offset;
|
|
uint32_t startOffset = offset;
|
|
|
|
for (int32_t r = 0; r < topic->recordCount; r++) {
|
|
RecordT *rec = &topic->records[r];
|
|
HlpRecordHdrT recHdr;
|
|
recHdr.type = rec->type;
|
|
recHdr.flags = rec->flags;
|
|
|
|
if (rec->type == HLP_REC_IMAGE) {
|
|
// Replace filename with HlpImageRefT
|
|
int32_t imgIdx = findImage(rec->data);
|
|
if (imgIdx < 0) {
|
|
fprintf(stderr, "error: image '%s' not found in references\n", rec->data);
|
|
recHdr.length = 0;
|
|
fwrite(&recHdr, sizeof(recHdr), 1, f);
|
|
offset += sizeof(recHdr);
|
|
} else {
|
|
HlpImageRefT imgRef;
|
|
imgRef.imageOffset = imageRefs[imgIdx].poolOffset;
|
|
imgRef.imageSize = imageRefs[imgIdx].fileSize;
|
|
recHdr.length = sizeof(HlpImageRefT);
|
|
fwrite(&recHdr, sizeof(recHdr), 1, f);
|
|
fwrite(&imgRef, sizeof(imgRef), 1, f);
|
|
offset += sizeof(recHdr) + sizeof(HlpImageRefT);
|
|
}
|
|
} else if (rec->type == HLP_REC_LINK) {
|
|
// Payload is "target\0display" -- write as-is
|
|
recHdr.length = rec->dataLen;
|
|
fwrite(&recHdr, sizeof(recHdr), 1, f);
|
|
fwrite(rec->data, 1, rec->dataLen, f);
|
|
offset += sizeof(recHdr) + rec->dataLen;
|
|
} else if (rec->type == HLP_REC_HRULE) {
|
|
recHdr.length = 0;
|
|
fwrite(&recHdr, sizeof(recHdr), 1, f);
|
|
offset += sizeof(recHdr);
|
|
} else {
|
|
// Text, headings, list items, code, table, note
|
|
recHdr.length = rec->dataLen;
|
|
fwrite(&recHdr, sizeof(recHdr), 1, f);
|
|
if (rec->dataLen > 0) {
|
|
fwrite(rec->data, 1, rec->dataLen, f);
|
|
}
|
|
offset += sizeof(recHdr) + rec->dataLen;
|
|
}
|
|
}
|
|
|
|
// Write end-of-topic record
|
|
HlpRecordHdrT endRec;
|
|
endRec.type = HLP_REC_END;
|
|
endRec.flags = 0;
|
|
endRec.length = 0;
|
|
fwrite(&endRec, sizeof(endRec), 1, f);
|
|
offset += sizeof(endRec);
|
|
|
|
topicContentSizes[t] = offset - startOffset;
|
|
}
|
|
|
|
// --- 3. TOC entries ---
|
|
hdr.tocOffset = offset;
|
|
hdr.tocCount = tocCount;
|
|
for (int32_t i = 0; i < tocCount; i++) {
|
|
HlpTocEntryT entry;
|
|
entry.titleStr = strTableFind(tocEntries[i].title);
|
|
entry.topicIdx = (tocEntries[i].topicIdx >= 0) ? (uint16_t)tocEntries[i].topicIdx : 0xFFFF;
|
|
entry.depth = (uint8_t)tocEntries[i].depth;
|
|
entry.flags = 0;
|
|
fwrite(&entry, sizeof(entry), 1, f);
|
|
offset += sizeof(entry);
|
|
}
|
|
|
|
// --- 4. Keyword index entries (sorted) ---
|
|
qsort(indexEntries, indexCount, sizeof(IndexEntryT), compareIndexEntries);
|
|
hdr.indexOffset = offset;
|
|
hdr.indexCount = indexCount;
|
|
for (int32_t i = 0; i < indexCount; i++) {
|
|
HlpIndexEntryT entry;
|
|
entry.keywordStr = strTableFind(indexEntries[i].keyword);
|
|
entry.topicIdx = (uint16_t)indexEntries[i].topicIdx;
|
|
entry.reserved = 0;
|
|
fwrite(&entry, sizeof(entry), 1, f);
|
|
offset += sizeof(entry);
|
|
}
|
|
|
|
// --- 5. Search index ---
|
|
hdr.searchOffset = offset;
|
|
{
|
|
HlpSearchHeaderT searchHdr;
|
|
searchHdr.trigramCount = trigramCount;
|
|
fwrite(&searchHdr, sizeof(searchHdr), 1, f);
|
|
offset += sizeof(searchHdr);
|
|
|
|
// Calculate posting list offsets
|
|
// Posting lists come after the trigram entry array
|
|
uint32_t postingBase = sizeof(searchHdr) + sizeof(HlpTrigramEntryT) * trigramCount;
|
|
uint32_t postingOff = 0;
|
|
|
|
// Write trigram entries
|
|
for (int32_t i = 0; i < trigramCount; i++) {
|
|
HlpTrigramEntryT entry;
|
|
entry.trigram[0] = trigrams[i].trigram[0];
|
|
entry.trigram[1] = trigrams[i].trigram[1];
|
|
entry.trigram[2] = trigrams[i].trigram[2];
|
|
entry.postingCount = (uint8_t)trigrams[i].postingCount;
|
|
entry.postingOffset = postingBase + postingOff;
|
|
fwrite(&entry, sizeof(entry), 1, f);
|
|
offset += sizeof(entry);
|
|
postingOff += sizeof(uint16_t) * trigrams[i].postingCount;
|
|
}
|
|
|
|
// Write posting lists
|
|
for (int32_t i = 0; i < trigramCount; i++) {
|
|
fwrite(trigrams[i].postings, sizeof(uint16_t), trigrams[i].postingCount, f);
|
|
offset += sizeof(uint16_t) * trigrams[i].postingCount;
|
|
}
|
|
}
|
|
hdr.searchSize = offset - hdr.searchOffset;
|
|
|
|
// --- 6. String table ---
|
|
hdr.stringTableOffset = offset;
|
|
hdr.stringTableSize = strTabSize;
|
|
fwrite(strTab, 1, strTabSize, f);
|
|
offset += strTabSize;
|
|
|
|
// --- 7. Topic directory (sorted by topic ID) ---
|
|
hdr.topicDirOffset = offset;
|
|
HlpTopicDirT *topicDir = calloc(topicCount, sizeof(HlpTopicDirT));
|
|
for (int32_t i = 0; i < topicCount; i++) {
|
|
topicDir[i].topicIdStr = strTableFind(topics[i].id);
|
|
topicDir[i].titleStr = strTableFind(topics[i].title);
|
|
topicDir[i].contentOffset = topicContentOffsets[i];
|
|
topicDir[i].contentSize = topicContentSizes[i];
|
|
topicDir[i].reserved = 0;
|
|
}
|
|
// Topic directory is NOT sorted -- indices must match what TOC and
|
|
// index entries reference (assigned sequentially during parsing).
|
|
fwrite(topicDir, sizeof(HlpTopicDirT), topicCount, f);
|
|
offset += sizeof(HlpTopicDirT) * topicCount;
|
|
|
|
// --- 8. Header (at EOF) ---
|
|
fwrite(&hdr, sizeof(hdr), 1, f);
|
|
offset += sizeof(hdr);
|
|
|
|
fclose(f);
|
|
|
|
free(topicDir);
|
|
free(topicContentOffsets);
|
|
free(topicContentSizes);
|
|
|
|
hlpcInfo(" wrote %u bytes\n", offset);
|
|
return 0;
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Cleanup
|
|
// ---------------------------------------------------------------------------
|
|
|
|
static void freeAll(void) {
|
|
for (int32_t i = 0; i < topicCount; i++) {
|
|
for (int32_t r = 0; r < topics[i].recordCount; r++) {
|
|
free(topics[i].records[r].data);
|
|
}
|
|
free(topics[i].records);
|
|
}
|
|
for (int32_t i = 0; i < trigramCount; i++) {
|
|
free(trigrams[i].postings);
|
|
}
|
|
free(strTab);
|
|
free(strEntries);
|
|
}
|
|
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// main
|
|
// ---------------------------------------------------------------------------
|
|
|
|
int main(int argc, char **argv) {
|
|
const char *outputPath = NULL;
|
|
char *inputFiles[256];
|
|
int32_t inputCount = 0;
|
|
|
|
// Parse command-line arguments
|
|
for (int32_t i = 1; i < argc; i++) {
|
|
if (strcmp(argv[i], "-o") == 0) {
|
|
if (++i >= argc) {
|
|
usage();
|
|
}
|
|
outputPath = argv[i];
|
|
} else if (strcmp(argv[i], "-i") == 0) {
|
|
if (++i >= argc) {
|
|
usage();
|
|
}
|
|
snprintf(imageDir, sizeof(imageDir), "%s", argv[i]);
|
|
imageDir[sizeof(imageDir) - 1] = '\0';
|
|
} else if (strcmp(argv[i], "--html") == 0) {
|
|
if (++i >= argc) {
|
|
usage();
|
|
}
|
|
htmlPath = argv[i];
|
|
} else if (strcmp(argv[i], "--quiet") == 0) {
|
|
quietMode = true;
|
|
} else if (argv[i][0] == '-') {
|
|
fprintf(stderr, "error: unknown option '%s'\n", argv[i]);
|
|
usage();
|
|
} else if (argv[i][0] == '@') {
|
|
// Response file: read filenames from the file, one per line
|
|
FILE *rf = fopen(argv[i] + 1, "r");
|
|
|
|
if (!rf) {
|
|
fprintf(stderr, "error: cannot open response file '%s'\n", argv[i] + 1);
|
|
return 1;
|
|
}
|
|
|
|
#define RESP_LINE_MAX 260
|
|
static char respLines[256][RESP_LINE_MAX];
|
|
char rline[RESP_LINE_MAX];
|
|
|
|
while (fgets(rline, (int)sizeof(rline), rf)) {
|
|
int32_t rlen = (int32_t)strlen(rline);
|
|
|
|
while (rlen > 0 && (rline[rlen - 1] == '\n' || rline[rlen - 1] == '\r' || rline[rlen - 1] == ' ')) {
|
|
rline[--rlen] = '\0';
|
|
}
|
|
|
|
if (rlen == 0 || rline[0] == '#') {
|
|
continue;
|
|
}
|
|
|
|
if (inputCount >= 256) {
|
|
fprintf(stderr, "error: too many input files\n");
|
|
fclose(rf);
|
|
return 1;
|
|
}
|
|
|
|
snprintf(respLines[inputCount], sizeof(respLines[inputCount]), "%s", rline);
|
|
inputFiles[inputCount] = respLines[inputCount];
|
|
inputCount++;
|
|
}
|
|
|
|
fclose(rf);
|
|
} else {
|
|
if (inputCount >= 256) {
|
|
fprintf(stderr, "error: too many input files\n");
|
|
return 1;
|
|
}
|
|
inputFiles[inputCount++] = argv[i];
|
|
}
|
|
}
|
|
|
|
if (!outputPath || inputCount == 0) {
|
|
usage();
|
|
}
|
|
|
|
hlpcInfo("dvxhlpc: DVX Help Compiler\n");
|
|
|
|
pass1Parse(inputCount, inputFiles);
|
|
if (errorCount > 0) {
|
|
fprintf(stderr, "Aborting due to %d error(s).\n", (int)errorCount);
|
|
freeAll();
|
|
return 1;
|
|
}
|
|
|
|
pass2Wrap();
|
|
|
|
// Emit HTML if requested (uses wrapped text, before binary passes)
|
|
if (htmlPath) {
|
|
if (emitHtml(htmlPath) == 0) {
|
|
hlpcInfo("HTML: wrote %s\n", htmlPath);
|
|
}
|
|
}
|
|
|
|
pass3StringTable();
|
|
pass4SearchIndex();
|
|
|
|
int result = pass5Serialize(outputPath);
|
|
if (result == 0) {
|
|
hlpcInfo("Done. %d topic(s), %d TOC entries, %d index keywords, %d trigrams.\n",
|
|
topicCount, tocCount, indexCount, trigramCount);
|
|
}
|
|
|
|
freeAll();
|
|
return result;
|
|
}
|