DVX_GUI/src/apps/kpunch/dvxbasic/compiler/obfuscate.c

625 lines
18 KiB
C

// The MIT License (MIT)
//
// Copyright (C) 2026 Scott Duensing
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.
// obfuscate.c -- Release build name obfuscation
//
// See obfuscate.h for the high-level description.
#include "obfuscate.h"
#include "basEvents.h"
#include "../runtime/values.h"
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// ============================================================
// Name map
// ============================================================
typedef struct {
char *orig; // original name (strdup'd, case-preserved)
char *mapped; // new name (strdup'd, "C1" .. "Cn")
} NameEntryT;
typedef struct {
NameEntryT *entries;
int32_t count;
int32_t cap;
} NameMapT;
// Function prototypes (alphabetical)
void basObfuscateNames(BasModuleT *mod, const char **frmTexts, const int32_t *frmLens, int32_t frmCount, BasObfFrmT *outFrms);
int32_t basStripFrmComments(const char *src, int32_t srcLen, uint8_t *outBuf, int32_t outCap);
static void collectNamesFromFrm(const char *text, int32_t len, NameMapT *map);
static int32_t findFormEndPos(const char *text, int32_t len);
static bool isEventSuffix(const char *suffix);
static bool isIdentChar(int c);
static bool isValidIdent(const char *name);
static const char *nameMapAdd(NameMapT *m, const char *name);
static void nameMapFree(NameMapT *m);
static void nameMapInit(NameMapT *m);
static const char *nameMapLookup(const NameMapT *m, const char *name);
static const char *readToken(const char *p, const char *end, char *buf, int32_t bufSize);
static void replaceConstant(BasModuleT *mod, int32_t idx, const char *newText);
static int32_t rewriteFrmText(const char *src, int32_t srcLen, const NameMapT *map, uint8_t *out, int32_t outCap);
static void rewriteModuleConstants(BasModuleT *mod, const NameMapT *map);
static void rewriteModuleFormVars(BasModuleT *mod, const NameMapT *map);
static void rewriteModuleProcs(BasModuleT *mod, const NameMapT *map);
static const char *skipWhitespace(const char *p, const char *end);
// ============================================================
// Top-level entry point
// ============================================================
void basObfuscateNames(BasModuleT *mod, const char **frmTexts, const int32_t *frmLens, int32_t frmCount, BasObfFrmT *outFrms) {
if (!mod || frmCount < 0) {
return;
}
NameMapT map;
nameMapInit(&map);
// Pass 1: collect all names from all .frm texts
for (int32_t i = 0; i < frmCount; i++) {
if (frmTexts[i] && frmLens[i] > 0) {
collectNamesFromFrm(frmTexts[i], frmLens[i], &map);
}
}
// Pass 2: rewrite each .frm
for (int32_t i = 0; i < frmCount; i++) {
outFrms[i].data = NULL;
outFrms[i].len = 0;
if (!frmTexts[i] || frmLens[i] <= 0) {
continue;
}
int32_t strippedLen = findFormEndPos(frmTexts[i], frmLens[i]);
// Allocate generous output buffer (mapped names are usually shorter
// than originals, but allow for growth and a trailing newline).
int32_t outCap = strippedLen + 1024;
uint8_t *outBuf = malloc(outCap);
if (!outBuf) {
continue;
}
int32_t outLen = rewriteFrmText(frmTexts[i], strippedLen, &map, outBuf, outCap);
// Ensure trailing newline
if (outLen > 0 && outBuf[outLen - 1] != '\n' && outLen < outCap) {
outBuf[outLen++] = '\n';
}
outFrms[i].data = outBuf;
outFrms[i].len = outLen;
}
// Pass 3: rewrite module
rewriteModuleConstants(mod, &map);
rewriteModuleProcs(mod, &map);
rewriteModuleFormVars(mod, &map);
nameMapFree(&map);
}
int32_t basStripFrmComments(const char *src, int32_t srcLen, uint8_t *outBuf, int32_t outCap) {
if (!src || srcLen <= 0 || !outBuf || outCap <= 0) {
return 0;
}
int32_t outLen = 0;
int32_t i = 0;
while (i < srcLen) {
int32_t lineStart = i;
while (i < srcLen && src[i] != '\n' && src[i] != '\r') {
i++;
}
int32_t lineEnd = i;
if (i < srcLen && src[i] == '\r') {
i++;
}
if (i < srcLen && src[i] == '\n') {
i++;
}
// Scan for first unquoted ' (comment start).
bool inStr = false;
int32_t commentStart = -1;
for (int32_t j = lineStart; j < lineEnd; j++) {
char c = src[j];
if (c == '"') {
inStr = !inStr;
} else if (c == '\'' && !inStr) {
commentStart = j;
break;
}
}
int32_t contentEnd = (commentStart >= 0) ? commentStart : lineEnd;
// Check for whole-line REM. Find first non-whitespace position.
int32_t firstNonWs = lineStart;
while (firstNonWs < contentEnd && (src[firstNonWs] == ' ' || src[firstNonWs] == '\t')) {
firstNonWs++;
}
if (contentEnd - firstNonWs >= 3 &&
strncasecmp(src + firstNonWs, "REM", 3) == 0 &&
(contentEnd - firstNonWs == 3 ||
src[firstNonWs + 3] == ' ' ||
src[firstNonWs + 3] == '\t')) {
contentEnd = firstNonWs;
}
// Trim trailing whitespace.
while (contentEnd > lineStart && (src[contentEnd - 1] == ' ' || src[contentEnd - 1] == '\t')) {
contentEnd--;
}
// Drop lines that have no non-whitespace content.
if (contentEnd <= firstNonWs) {
continue;
}
// Strip leading whitespace -- the form parser trims per line
// anyway, so shipping indentation just bloats the embedded resource.
int32_t writeLen = contentEnd - firstNonWs;
if (outLen + writeLen + 1 >= outCap) {
break;
}
memcpy(outBuf + outLen, src + firstNonWs, writeLen);
outLen += writeLen;
outBuf[outLen++] = '\n';
}
return outLen;
}
// ============================================================
// Pass 1: collect all form/control names from .frm texts
// ============================================================
// Scan a .frm text and add all "Begin <Type> <Name>" names to the map.
static void collectNamesFromFrm(const char *text, int32_t len, NameMapT *map) {
const char *p = text;
const char *end = text + len;
while (p < end) {
// Read one line
const char *lineStart = p;
while (p < end && *p != '\n' && *p != '\r') {
p++;
}
const char *lineEnd = p;
if (p < end && *p == '\r') {
p++;
}
if (p < end && *p == '\n') {
p++;
}
// Trim leading whitespace
const char *l = skipWhitespace(lineStart, lineEnd);
// Check "Begin "
if ((lineEnd - l) < 6 || strncasecmp(l, "Begin ", 6) != 0) {
continue;
}
l += 6;
l = skipWhitespace(l, lineEnd);
// Read type name
char typeName[64];
l = readToken(l, lineEnd, typeName, sizeof(typeName));
if (typeName[0] == '\0') {
continue;
}
// Read control name
l = skipWhitespace(l, lineEnd);
char ctrlName[64];
l = readToken(l, lineEnd, ctrlName, sizeof(ctrlName));
if (ctrlName[0] && isValidIdent(ctrlName)) {
nameMapAdd(map, ctrlName);
}
}
}
// ============================================================
// Pass 2: strip BASIC code from .frm text (everything after outer End)
// ============================================================
// Find the position just after the matching End of the outermost Begin Form.
// Returns len of the stripped .frm. If no Begin Form found, returns original len.
static int32_t findFormEndPos(const char *text, int32_t len) {
int32_t nesting = 0;
bool inForm = false;
const char *p = text;
const char *end = text + len;
while (p < end) {
const char *lineStart = p;
while (p < end && *p != '\n' && *p != '\r') {
p++;
}
const char *lineEnd = p;
if (p < end && *p == '\r') {
p++;
}
if (p < end && *p == '\n') {
p++;
}
const char *l = skipWhitespace(lineStart, lineEnd);
if ((lineEnd - l) >= 6 && strncasecmp(l, "Begin ", 6) == 0) {
// Check for "Begin Form ..." to set inForm on outer open
if (!inForm) {
const char *r = l + 6;
r = skipWhitespace(r, lineEnd);
if ((lineEnd - r) >= 5 && strncasecmp(r, "Form ", 5) == 0) {
inForm = true;
}
}
nesting++;
} else if ((lineEnd - l) >= 3 && strncasecmp(l, "End", 3) == 0 &&
(lineEnd - l == 3 || l[3] == ' ' || l[3] == '\t' || l[3] == '\r')) {
nesting--;
if (inForm && nesting == 0) {
return (int32_t)(p - text);
}
}
}
return len;
}
// Check if suffix is a known event name.
static bool isEventSuffix(const char *suffix) {
for (int32_t i = 0; basEventSuffixes[i]; i++) {
if (strcasecmp(suffix, basEventSuffixes[i]) == 0) {
return true;
}
}
return false;
}
// ============================================================
// Pass 3: rewrite .frm text with mapped names
// ============================================================
// Returns true if c is a valid identifier character.
static bool isIdentChar(int c) {
return isalnum(c) || c == '_';
}
// Check if name is a valid identifier (letters, digits, underscore, starts non-digit)
static bool isValidIdent(const char *name) {
if (!name || !*name) {
return false;
}
if (!isalpha((unsigned char)name[0]) && name[0] != '_') {
return false;
}
for (const char *p = name; *p; p++) {
if (!isalnum((unsigned char)*p) && *p != '_') {
return false;
}
}
return true;
}
// Add a name if not already present. Returns mapped name.
static const char *nameMapAdd(NameMapT *m, const char *name) {
const char *existing = nameMapLookup(m, name);
if (existing) {
return existing;
}
if (m->count >= m->cap) {
int32_t newCap = m->cap == 0 ? 16 : m->cap * 2;
NameEntryT *newEntries = realloc(m->entries, newCap * sizeof(NameEntryT));
if (!newEntries) {
return NULL;
}
m->entries = newEntries;
m->cap = newCap;
}
char mapped[16];
snprintf(mapped, sizeof(mapped), "C%ld", (long)(m->count + 1));
m->entries[m->count].orig = strdup(name);
m->entries[m->count].mapped = strdup(mapped);
m->count++;
return m->entries[m->count - 1].mapped;
}
static void nameMapFree(NameMapT *m) {
for (int32_t i = 0; i < m->count; i++) {
free(m->entries[i].orig);
free(m->entries[i].mapped);
}
free(m->entries);
m->entries = NULL;
m->count = 0;
m->cap = 0;
}
static void nameMapInit(NameMapT *m) {
m->entries = NULL;
m->count = 0;
m->cap = 0;
}
// Look up an original name (case-insensitive). Returns mapped name or NULL.
static const char *nameMapLookup(const NameMapT *m, const char *name) {
for (int32_t i = 0; i < m->count; i++) {
if (strcasecmp(m->entries[i].orig, name) == 0) {
return m->entries[i].mapped;
}
}
return NULL;
}
// Copy next whitespace-delimited token into buf. Returns pointer after token.
static const char *readToken(const char *p, const char *end, char *buf, int32_t bufSize) {
int32_t len = 0;
while (p < end && *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' && len < bufSize - 1) {
buf[len++] = *p++;
}
buf[len] = '\0';
return p;
}
// ============================================================
// Module rewriting
// ============================================================
// Replace the contents of a constant pool entry with a new string.
static void replaceConstant(BasModuleT *mod, int32_t idx, const char *newText) {
BasStringT *newStr = basStringNew(newText, (int32_t)strlen(newText));
if (!newStr) {
return;
}
basStringUnref(mod->constants[idx]);
mod->constants[idx] = newStr;
}
// Scan text; for each identifier found outside of strings, if it's in
// the map, emit the mapped name instead. Output to out (returns bytes written).
static int32_t rewriteFrmText(const char *src, int32_t srcLen, const NameMapT *map, uint8_t *out, int32_t outCap) {
int32_t outLen = 0;
int32_t i = 0;
bool inStr = false;
while (i < srcLen) {
char c = src[i];
if (c == '"') {
inStr = !inStr;
if (outLen < outCap) {
out[outLen++] = (uint8_t)c;
}
i++;
continue;
}
// Read identifier
if (!inStr && (isalpha((unsigned char)c) || c == '_')) {
int32_t identStart = i;
while (i < srcLen && isIdentChar((unsigned char)src[i])) {
i++;
}
int32_t identLen = i - identStart;
char ident[128];
if (identLen >= (int32_t)sizeof(ident)) {
identLen = (int32_t)sizeof(ident) - 1;
}
memcpy(ident, src + identStart, identLen);
ident[identLen] = '\0';
const char *mapped = nameMapLookup(map, ident);
if (mapped) {
int32_t mLen = (int32_t)strlen(mapped);
for (int32_t k = 0; k < mLen && outLen < outCap; k++) {
out[outLen++] = (uint8_t)mapped[k];
}
} else {
for (int32_t k = 0; k < identLen && outLen < outCap; k++) {
out[outLen++] = (uint8_t)ident[k];
}
}
continue;
}
if (outLen < outCap) {
out[outLen++] = (uint8_t)c;
}
i++;
}
return outLen;
}
static void rewriteModuleConstants(BasModuleT *mod, const NameMapT *map) {
for (int32_t i = 0; i < mod->constCount; i++) {
const BasStringT *s = mod->constants[i];
if (!s) {
continue;
}
const char *mapped = nameMapLookup(map, s->data);
if (mapped) {
replaceConstant(mod, i, mapped);
}
}
}
static void rewriteModuleFormVars(BasModuleT *mod, const NameMapT *map) {
for (int32_t i = 0; i < mod->formVarInfoCount; i++) {
BasFormVarInfoT *fv = &mod->formVarInfo[i];
const char *mapped = nameMapLookup(map, fv->formName);
if (mapped) {
snprintf(fv->formName, sizeof(fv->formName), "%s", mapped);
}
}
}
static void rewriteModuleProcs(BasModuleT *mod, const NameMapT *map) {
for (int32_t i = 0; i < mod->procCount; i++) {
BasProcEntryT *proc = &mod->procs[i];
if (proc->name[0] == '\0') {
continue;
}
// Remap the owning form name (used at runtime to bind form-scope
// variables). The form itself gets renamed by the same pass.
if (proc->formName[0]) {
const char *mappedForm = nameMapLookup(map, proc->formName);
if (mappedForm) {
snprintf(proc->formName, sizeof(proc->formName), "%s", mappedForm);
}
}
// Find last underscore
char *underscore = strrchr(proc->name, '_');
if (!underscore) {
continue;
}
const char *suffix = underscore + 1;
if (!isEventSuffix(suffix)) {
continue;
}
// Split on underscore
int32_t prefixLen = (int32_t)(underscore - proc->name);
char prefix[BAS_MAX_PROC_NAME];
if (prefixLen >= (int32_t)sizeof(prefix)) {
prefixLen = (int32_t)sizeof(prefix) - 1;
}
memcpy(prefix, proc->name, prefixLen);
prefix[prefixLen] = '\0';
const char *mapped = nameMapLookup(map, prefix);
if (mapped) {
char newName[BAS_MAX_PROC_NAME];
snprintf(newName, sizeof(newName), "%s_%s", mapped, suffix);
snprintf(proc->name, sizeof(proc->name), "%s", newName);
}
}
}
// ============================================================
// .frm parsing helpers
// ============================================================
// Skip ASCII whitespace. Returns pointer past whitespace.
static const char *skipWhitespace(const char *p, const char *end) {
while (p < end && (*p == ' ' || *p == '\t')) {
p++;
}
return p;
}