basic2c/basic2c.c
2026-02-21 18:51:40 -06:00

5571 lines
182 KiB
C
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// ============================================================================
// basic2c.c - A BASIC to C Transpiler
//
// Translates BASIC source code into equivalent C source code.
//
// Supported features:
// - Classic line-numbered BASIC and named labels (GOTO, GOSUB/RETURN)
// - Modern structured BASIC (SUB, FUNCTION, IF/END IF, etc.)
// - Data types: BYTE, INTEGER, LONG, FLOAT, DOUBLE, STRING
// - User-defined types (TYPE/END TYPE) with nesting and SIZEOF
// - Dynamic arrays (DIM, REDIM), multidimensional (up to N-D)
// - Parameter passing: BYVAL (by value) and BYREF (by reference)
// - LOCAL and STATIC variable declarations inside SUB/FUNCTION
// - Constants (CONST) with compile-time substitution
// - Control flow: IF/ELSEIF/ELSE, FOR/NEXT, WHILE/WEND, DO/LOOP,
// SELECT CASE, ON GOTO, ON GOSUB, EXIT, CONTINUE
// - PRINT statement with ? shortcut, PRINT USING for formatted output
// - Operators: arithmetic, comparison, string concatenation (+, &),
// bitwise/logical AND, OR, NOT, XOR
// - DATA/READ/RESTORE for inline data
// - File I/O: OPEN/CLOSE, PRINT #, INPUT #, LINE INPUT #, WRITE #
// - Random-access file I/O: GET, PUT with record numbers
// - String functions: LEN, MID$, LEFT$, RIGHT$, STR$, VAL, CHR$,
// ASC, UCASE$, LCASE$, INSTR, STRING$, LTRIM$, RTRIM$, TRIM$,
// SPACE$, HEX$, OCT$, MID$ assignment
// - Print functions: TAB, SPC for cursor positioning
// - Math functions: ABS, INT, SQR, SIN, COS, TAN, ATN, LOG, EXP,
// SGN, RND (optional argument ignored), RANDOMIZE
// - Array functions: LBOUND, UBOUND
// - I/O functions: EOF, LOF, FREEFILE
// - SWAP for exchanging variable values
// - $INCLUDE metacommand for file inclusion with nested include
// support, circular detection, and file+line error reporting
// - Extensible built-in functions via builtins.def (compile-time)
// - External function definitions via functions.def (runtime)
// - Debug and release runtime modes (--release or -r flag)
//
// Usage: basic2c [--release|-r] input.bas [output.c]
// If output.c is omitted, C code is written to stdout.
//
// Build: cc -o basic2c basic2c.c -lm
//
// Architecture:
// 1. Preprocessor - processes $INCLUDE directives, builds line map
// 2. Lexer - tokenizes BASIC source (case-insensitive keywords)
// 3. Parser - recursive descent, builds an AST
// 4. Codegen - walks AST, emits C source with a small runtime library
// ============================================================================
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
// -----------------------------------------------------------------------
// Section 1: Constants and Limits
// -----------------------------------------------------------------------
#define MAX_TOKEN_LEN 4096 // max length of one token string
#define MAX_IDENT 128 // max identifier length
#define MAX_PARAMS 32 // max parameters per SUB/FUNCTION
#define MAX_SYMBOLS 2048 // symbol table capacity
#define MAX_GOSUB_SITES 512 // max GOSUB return-point IDs
#define MAX_LINE_LABELS 4096 // max classic line-number labels
#define MAX_NODES 65536 // AST node pool size
#define MAX_ARGS 64 // max arguments in a PRINT / CALL list
#define MAX_SOURCE_LINES 65536 // max lines in preprocessed source
#define MAX_INCLUDE_DEPTH 16 // max nested $INCLUDE depth
#define MAX_INCLUDE_FILES 64 // max distinct included filenames
#define MAX_EXTERN_FUNCS 128 // max external function definitions
#define MAX_EXTERN_CODE 256 // max C code template length
// -----------------------------------------------------------------------
// Section 2: Enumerations
// -----------------------------------------------------------------------
// Token types produced by the lexer
typedef enum {
TOK_EOF = 0,
TOK_NEWLINE, // end of line (statement separator)
TOK_COLON, // : (statement separator on same line)
TOK_INT_LIT, // integer literal
TOK_DBL_LIT, // floating-point literal
TOK_STR_LIT, // "..." string literal
TOK_IDENT, // identifier (variable / sub / function name)
// ---------- keywords ----------
TOK_DIM, TOK_REDIM, TOK_AS,
TOK_BYTE, TOK_INTEGER, TOK_LONG, TOK_FLOAT, TOK_DOUBLE, TOK_STRING,
TOK_LET, TOK_PRINT, TOK_INPUT,
TOK_IF, TOK_THEN, TOK_ELSE, TOK_ELSEIF, TOK_END,
TOK_FOR, TOK_TO, TOK_STEP, TOK_NEXT,
TOK_WHILE, TOK_WEND,
TOK_DO, TOK_LOOP, TOK_UNTIL,
TOK_GOTO, TOK_GOSUB, TOK_RETURN,
TOK_SUB, TOK_FUNCTION, TOK_CALL,
TOK_BYVAL, TOK_BYREF,
TOK_LOCAL, TOK_STATIC,
TOK_EXIT,
TOK_AND, TOK_OR, TOK_NOT, TOK_MOD, TOK_XOR,
TOK_SELECT, TOK_CASE, TOK_SWAP, TOK_CONST, TOK_ON,
TOK_REM,
TOK_OPEN, TOK_CLOSE, TOK_OUTPUT, TOK_APPEND, TOK_BINARY,
TOK_LINE, TOK_WRITE,
// DATA, READ, RESTORE, GET, PUT, RANDOM, SIZEOF are contextual
// keywords (checked as TOK_IDENT to avoid colliding with variable names)
// ---------- operators / punctuation ----------
TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_BSLASH,
TOK_CARET,
TOK_EQ, TOK_NE, TOK_LT, TOK_GT, TOK_LE, TOK_GE,
TOK_LPAREN, TOK_RPAREN, TOK_COMMA, TOK_SEMI,
TOK_AMP, // & string concatenation
TOK_HASH, // # file number prefix
TOK_DOT, // . member access
TOK_TYPE // TYPE keyword (user-defined types)
} TokenType;
// AST node kinds
typedef enum {
NODE_PROGRAM, // root: a = first top-level item (linked->next)
NODE_BLOCK, // block of statements: a = first stmt
NODE_INT_LIT, // ival = value
NODE_DBL_LIT, // dval = value
NODE_STR_LIT, // sval = string content
NODE_IDENT, // sval = name
NODE_ARRAY_REF, // sval = name, a = index exprs (linked list)
NODE_BINOP, // ival = op token, a = left, b = right
NODE_UNOP, // ival = op token, a = operand
NODE_ASSIGN, // a = target (IDENT/ARRAY_REF), b = value
NODE_DIM, // sval=name, dataType, a=sizes (list), ival=ndims
NODE_REDIM, // sval=name, dataType, a=sizes (list), ival=ndims
NODE_PRINT, // a = first print-item (linked->next)
NODE_PRINT_ITEM, // a = expr, ival = separator after (';'=1,','=2)
NODE_PRINT_USING, // a = format expr, b = value list (linked->next)
NODE_INPUT, // sval = prompt (or NULL), a = first var ->next
NODE_IF, // a=cond, b=then-block, c=else-part
NODE_FOR, // sval=var, a=start, b=end, c=step, d=body
NODE_WHILE, // a=cond, b=body
NODE_DO_LOOP, // a=cond, b=body, ival bits: 1=UNTIL,2=bottom
NODE_GOTO, // ival=line# or sval=label
NODE_GOSUB, // ival=line#, ival2=return-point-id
NODE_RETURN, // a=expr (FUNCTION return) or NULL
NODE_LABEL, // ival=line number
NODE_SUB, // sval=name, a=param list, b=body
NODE_FUNC, // sval=name, a=params, b=body, dataType=ret
NODE_PARAM, // sval=name, dataType, ival=passMode
NODE_CALL, // sval=name, a=arg list (linked->next)
NODE_FUNC_CALL, // sval=name, a=arg list (linked->next)
NODE_EXIT, // ival=what (TOK_FOR,TOK_WHILE,TOK_DO,etc)
NODE_CONTINUE, // ival=what (TOK_FOR,TOK_WHILE,TOK_DO)
NODE_LOCAL, // sval=name, dataType
NODE_STATIC, // sval=name, dataType
NODE_END, // END statement
NODE_OPEN, // a=filename, b=file# expr, ival=mode
NODE_CLOSE, // b=file# expr
NODE_FILE_PRINT, // b=file# expr, a=print items
NODE_FILE_INPUT, // b=file# expr, a=variable list
NODE_LINE_INPUT, // b=file# expr, a=target variable
NODE_FILE_WRITE, // b=file# expr, a=expression list
NODE_DATA, // a=linked list of literal items, line=source ln
NODE_READ, // a=linked list of NODE_IDENT vars to read into
NODE_RESTORE, // ival=target line number (0=beginning)
NODE_TYPE_DEF, // sval=type name, ival=udtIndex
NODE_DOT_ACCESS, // a=base expr, sval=field name, ival2=udtIndex
NODE_GET, // a=file# expr, b=record# expr, c=var
NODE_PUT, // a=file# expr, b=record# expr, c=var
NODE_SELECT, // a=test expr, b=first NODE_CASE (linked->next)
NODE_CASE, // a=value exprs (linked), b=body block, ival=flags
NODE_SWAP, // a=first var, b=second var
NODE_CONST_DECL, // sval=name, a=value expr
NODE_RANDOMIZE, // a=seed expr (or NULL)
NODE_ON_GOTO, // a=expr, b=label list (NODE_INT_LIT/NODE_IDENT)
NODE_ON_GOSUB, // a=expr, b=label list, ival2=first return-point-id
NODE_MID_ASSIGN // a=target string var, b=start, c=len, d=replacement
} NodeType;
// BASIC data types ordered by numeric promotion rank so that
// promoteType() can simply take the maximum of two types.
typedef enum {
TYPE_VOID = 0, // used for SUB (no return value)
TYPE_BYTE, // BYTE -> uint8_t
TYPE_INT, // INTEGER -> int16_t
TYPE_LONG, // LONG -> int32_t
TYPE_FLOAT, // FLOAT -> float
TYPE_DBL, // DOUBLE -> double
TYPE_STR, // STRING -> char*
TYPE_UDT // user-defined TYPE -> struct
} DataType;
// Parameter passing modes
typedef enum {
PASS_BYVAL = 0,
PASS_BYREF = 1
} PassMode;
// -----------------------------------------------------------------------
// Section 3: Data Structures
// -----------------------------------------------------------------------
// A single token from the lexer
typedef struct {
TokenType type;
int line; // source line where token appears
int ival; // integer value (for TOK_INT_LIT)
double dval; // double value (for TOK_DBL_LIT)
char sval[MAX_TOKEN_LEN]; // string payload
} Token;
// AST node compact tagged structure.
// Child pointers a,b,c,d have node-type-specific meanings (see enum).
// The 'next' pointer chains siblings (statement lists, param lists).
typedef struct Node {
NodeType type;
DataType dataType; // expression result type / decl type
int ival; // multi-purpose int (operator, flags)
int ival2; // secondary int (e.g. gosub return id)
double dval; // double literal value
char *sval; // identifier name / string literal
struct Node *a, *b, *c, *d;// child pointers
struct Node *next; // next sibling in a list
int line; // source line for error messages
} Node;
// Symbol table entry tracks variables, arrays, subs, functions
typedef struct {
char name[MAX_IDENT];
DataType dataType;
int isArray; // 1 if dynamic array
int ndims; // number of dimensions (0=scalar)
int isFunc; // 1 = FUNCTION, 2 = SUB
int paramCount;
DataType paramTypes[MAX_PARAMS];
PassMode paramModes[MAX_PARAMS];
char paramNames[MAX_PARAMS][MAX_IDENT];
DataType returnType; // for functions
int udtIndex; // index into gUdts[] for TYPE_UDT
} Symbol;
// User-defined type (UDT) support
#define MAX_UDTS 64
#define MAX_UDT_FIELDS 32
typedef struct {
char name[MAX_IDENT];
DataType dataType;
int strLen; // >0 for STRING * N (fixed-length)
int udtIndex; // index into gUdts[] if TYPE_UDT
} UdtField;
typedef struct {
char name[MAX_IDENT];
UdtField fields[MAX_UDT_FIELDS];
int fieldCount;
} UdtDef;
static UdtDef gUdts[MAX_UDTS];
static int gUdtCount = 0;
static int gLastUdtIndex = -1; // side-channel from parseType()
// -----------------------------------------------------------------------
// Section 4: Global State
// -----------------------------------------------------------------------
// Runtime mode: 0=debug (with error checks), 1=release (minimal)
static int gRelease = 0;
// Line map: maps merged-source line numbers to original file + line
typedef struct {
const char *fileName; // interned filename pointer
int origLine; // 1-based line in original file
} LineMapEntry;
static LineMapEntry gLineMap[MAX_SOURCE_LINES];
static int gLineMapCount = 0;
// Interned filename pool
static char *gFileNames[MAX_INCLUDE_FILES];
static int gFileNameCount = 0;
static const char *internFileName(const char *name) {
for (int i = 0; i < gFileNameCount; i++)
if (strcmp(gFileNames[i], name) == 0) return gFileNames[i];
if (gFileNameCount >= MAX_INCLUDE_FILES) {
fprintf(stderr, "Too many include files (max %d)\n", MAX_INCLUDE_FILES);
exit(1);
}
gFileNames[gFileNameCount] = strdup(name);
return gFileNames[gFileNameCount++];
}
// Source code
static const char *gSrc = NULL; // source text
static int gSrcPos = 0; // current read position
static int gSrcLen = 0; // total source length
static int gLine = 1; // current source line number
// Current and peek tokens for the recursive-descent parser
static Token gTok; // current token
// AST node pool simple bump allocator (nodes live until exit)
static Node gNodePool[MAX_NODES];
static int gNodeCount = 0;
// Symbol table
static Symbol gSyms[MAX_SYMBOLS];
static int gSymCount = 0;
// GOSUB bookkeeping: count of GOSUB sites for generating return switch
static int gGosubCount = 0;
// Collected line-number labels for the RETURN dispatch table
static int gLineLabels[MAX_LINE_LABELS];
static int gLineLabelCount = 0;
// Line numbers that are actually targeted by GOTO or GOSUB.
// Only these need C labels emitted to avoid -Wunused-label.
static int gGotoTargets[MAX_LINE_LABELS];
static int gGotoTargetCount = 0;
// Named (string) labels targeted by GOTO or GOSUB
static char *gGotoStrTargets[MAX_LINE_LABELS];
static int gGotoStrTargetCount = 0;
// Compile-time constant table (for CONST declarations)
#define MAX_CONSTS 256
typedef struct {
char name[MAX_IDENT];
DataType dataType;
double numVal;
char strVal[MAX_TOKEN_LEN];
} ConstDef;
static ConstDef gConsts[MAX_CONSTS];
static int gConstCount = 0;
// External function definitions (loaded from functions.def)
typedef struct {
char name[MAX_IDENT]; // BASIC function name (e.g., "CEIL")
DataType returnType; // return type
char cCode[MAX_EXTERN_CODE]; // C code template (% = arg, %1 %2 = numbered)
} ExternFunc;
static ExternFunc gExternFuncs[MAX_EXTERN_FUNCS];
static int gExternFuncCount = 0;
// Built-in function definitions (from builtins.def at compile time)
typedef struct {
const char *name;
DataType returnType;
const char *cCode;
} BuiltinDef;
#define BUILTIN(n, t, c) {n, t, c},
static const BuiltinDef gBuiltinDefs[] = {
#include "builtins.def"
{NULL, 0, NULL} // sentinel
};
#undef BUILTIN
// Code-generator state
static int gIndent = 0; // current indentation depth
static FILE *gOut = NULL; // output file handle
// Track whether we are inside a SUB/FUNCTION (for scope)
static int gInFunc = 0;
static const char *gFuncName = NULL; // current function name
static DataType gFuncRet = TYPE_VOID;
// -----------------------------------------------------------------------
// Section 5: Utility Functions
// -----------------------------------------------------------------------
// Report a fatal error with source file/line and exit
static void fatal(int line, const char *fmt, ...) {
va_list ap;
if (line > 0 && line <= gLineMapCount) {
LineMapEntry *e = &gLineMap[line - 1];
fprintf(stderr, "Error (%s:%d): ", e->fileName, e->origLine);
} else {
fprintf(stderr, "Error (line %d): ", line);
}
va_start(ap, fmt);
vfprintf(stderr, fmt, ap);
va_end(ap);
fprintf(stderr, "\n");
exit(1);
}
// Allocate a new AST node from the pool
static Node *newNode(NodeType type, int line) {
if (gNodeCount >= MAX_NODES)
fatal(line, "AST node pool exhausted (max %d)", MAX_NODES);
Node *n = &gNodePool[gNodeCount++];
memset(n, 0, sizeof(*n));
n->type = type;
n->line = line;
return n;
}
// Duplicate a string into heap memory
static char *strDup(const char *s) {
if (!s) return NULL;
char *d = malloc(strlen(s) + 1);
if (!d) { fprintf(stderr, "Out of memory\n"); exit(1); }
strcpy(d, s);
return d;
}
// Case-insensitive string comparison
static int strIcmp(const char *a, const char *b) {
if (!a || !b) return (a != b);
while (*a && *b) {
if (toupper((unsigned char)*a) != toupper((unsigned char)*b))
return 1;
a++; b++;
}
return *a != *b;
}
// Look up an external function by name; returns pointer or NULL
static ExternFunc *externFuncLookup(const char *name) {
for (int i = 0; i < gExternFuncCount; i++)
if (strIcmp(gExternFuncs[i].name, name) == 0)
return &gExternFuncs[i];
return NULL;
}
// Look up a built-in function definition by name; returns pointer or NULL
static const BuiltinDef *builtinDefLookup(const char *name) {
for (int i = 0; gBuiltinDefs[i].name; i++)
if (strIcmp(gBuiltinDefs[i].name, name) == 0)
return &gBuiltinDefs[i];
return NULL;
}
// Parse a type name from definition file
static DataType parseTypeName(const char *s) {
while (*s == ' ') s++;
if (strIcmp(s, "byte") == 0) return TYPE_BYTE;
if (strIcmp(s, "integer") == 0) return TYPE_INT;
if (strIcmp(s, "long") == 0) return TYPE_LONG;
if (strIcmp(s, "float") == 0) return TYPE_FLOAT;
if (strIcmp(s, "double") == 0) return TYPE_DBL;
if (strIcmp(s, "string") == 0) return TYPE_STR;
return TYPE_DBL; // default
}
// Load external function definitions from a file
// Format: name : type : c_code
// Lines starting with # are comments, blank lines ignored
static void loadExternFuncs(const char *filename) {
FILE *f = fopen(filename, "r");
if (!f) return; // file not found is OK, just no external funcs
char line[512];
while (fgets(line, sizeof(line), f)) {
// Skip comments and blank lines
char *p = line;
while (*p == ' ' || *p == '\t') p++;
if (*p == '#' || *p == '\n' || *p == '\0') continue;
// Parse: name : type : c_code
char *colon1 = strchr(p, ':');
if (!colon1) continue;
char *colon2 = strchr(colon1 + 1, ':');
if (!colon2) continue;
if (gExternFuncCount >= MAX_EXTERN_FUNCS) {
fprintf(stderr, "Warning: too many external functions, ignoring rest\n");
break;
}
ExternFunc *ef = &gExternFuncs[gExternFuncCount];
// Extract name (trim whitespace)
*colon1 = '\0';
char *name = p;
while (*name == ' ' || *name == '\t') name++;
char *nameEnd = colon1 - 1;
while (nameEnd > name && (*nameEnd == ' ' || *nameEnd == '\t')) nameEnd--;
nameEnd[1] = '\0';
strncpy(ef->name, name, MAX_IDENT - 1);
ef->name[MAX_IDENT - 1] = '\0';
// Extract type
*colon2 = '\0';
char *typeStr = colon1 + 1;
while (*typeStr == ' ' || *typeStr == '\t') typeStr++;
char *typeEnd = colon2 - 1;
while (typeEnd > typeStr && (*typeEnd == ' ' || *typeEnd == '\t')) typeEnd--;
typeEnd[1] = '\0';
ef->returnType = parseTypeName(typeStr);
// Extract C code template (trim leading whitespace and trailing newline)
char *code = colon2 + 1;
while (*code == ' ' || *code == '\t') code++;
size_t codeLen = strlen(code);
while (codeLen > 0 && (code[codeLen-1] == '\n' || code[codeLen-1] == '\r' ||
code[codeLen-1] == ' ' || code[codeLen-1] == '\t'))
codeLen--;
if (codeLen >= MAX_EXTERN_CODE) codeLen = MAX_EXTERN_CODE - 1;
strncpy(ef->cCode, code, codeLen);
ef->cCode[codeLen] = '\0';
gExternFuncCount++;
}
fclose(f);
}
// Look up a symbol by name; returns pointer to entry or NULL
static Symbol *symLookup(const char *name) {
for (int i = 0; i < gSymCount; i++)
if (strIcmp(gSyms[i].name, name) == 0)
return &gSyms[i];
return NULL;
}
// Forward declaration
static int isKeyword(const char *name);
static Symbol *symAdd(const char *name) {
if (isKeyword(name))
fatal(gLine, "Cannot use keyword '%s' as identifier", name);
Symbol *s = symLookup(name);
if (s) return s;
if (gSymCount >= MAX_SYMBOLS)
fatal(gLine, "Symbol table full");
s = &gSyms[gSymCount++];
memset(s, 0, sizeof(*s));
strncpy(s->name, name, MAX_IDENT - 1);
return s;
}
// Look up a user-defined type by name; returns index or -1
static int udtLookup(const char *name) {
for (int i = 0; i < gUdtCount; i++)
if (strIcmp(gUdts[i].name, name) == 0)
return i;
return -1;
}
// Look up a field within a UDT; returns field index or -1
static int udtFieldLookup(int udtIdx, const char *field) {
if (udtIdx < 0 || udtIdx >= gUdtCount) return -1;
UdtDef *u = &gUdts[udtIdx];
for (int i = 0; i < u->fieldCount; i++)
if (strIcmp(u->fields[i].name, field) == 0)
return i;
return -1;
}
// Check if 'name' is a BYREF parameter of the current function.
// Returns 1 if so, 0 otherwise. Used during code generation to
// emit pointer dereferences for BYREF params.
static int isByrefParam(const char *name) {
if (!gInFunc || !gFuncName) return 0;
Symbol *fsym = symLookup(gFuncName);
if (!fsym) return 0;
for (int i = 0; i < fsym->paramCount; i++) {
if (strIcmp(fsym->paramNames[i], name) == 0 &&
fsym->paramModes[i] == PASS_BYREF)
return 1;
}
return 0;
}
// Emit indented text to the output file
static void emit(const char *fmt, ...) {
va_list ap;
for (int i = 0; i < gIndent * 4; i++) fputc(' ', gOut);
va_start(ap, fmt);
vfprintf(gOut, fmt, ap);
va_end(ap);
}
// Emit text without leading indentation
static void emitRaw(const char *fmt, ...) {
va_list ap;
va_start(ap, fmt);
vfprintf(gOut, fmt, ap);
va_end(ap);
}
// Record a line-number label for later GOSUB dispatch
static void recordLineLabel(int lnum) {
for (int i = 0; i < gLineLabelCount; i++)
if (gLineLabels[i] == lnum) return; // already recorded
if (gLineLabelCount >= MAX_LINE_LABELS)
fatal(gLine, "Too many line labels");
gLineLabels[gLineLabelCount++] = lnum;
}
// Record a line number as a GOTO/GOSUB target so its label is emitted
static void recordGotoTarget(int lnum) {
for (int i = 0; i < gGotoTargetCount; i++)
if (gGotoTargets[i] == lnum) return;
if (gGotoTargetCount >= MAX_LINE_LABELS)
fatal(gLine, "Too many goto targets");
gGotoTargets[gGotoTargetCount++] = lnum;
}
// Check whether a line number is a GOTO/GOSUB target
static int isGotoTarget(int lnum) {
for (int i = 0; i < gGotoTargetCount; i++)
if (gGotoTargets[i] == lnum) return 1;
return 0;
}
// Record a named label as a GOTO/GOSUB target so its C label is emitted
static void recordGotoStrTarget(const char *name) {
for (int i = 0; i < gGotoStrTargetCount; i++)
if (strIcmp(gGotoStrTargets[i], name) == 0) return;
if (gGotoStrTargetCount >= MAX_LINE_LABELS)
fatal(gLine, "Too many named goto targets");
gGotoStrTargets[gGotoStrTargetCount++] = strDup(name);
}
// Check whether a named label is a GOTO/GOSUB target
static int isGotoStrTarget(const char *name) {
for (int i = 0; i < gGotoStrTargetCount; i++)
if (strIcmp(gGotoStrTargets[i], name) == 0) return 1;
return 0;
}
// Infer the data type of a variable from its name suffix.
// Names ending in '$' -> STRING, '%' -> INTEGER, '!' -> FLOAT,
// '#' -> DOUBLE; otherwise check the symbol table, default INTEGER.
static DataType inferVarType(const char *name) {
int len = (int)strlen(name);
if (len > 0 && name[len-1] == '$') return TYPE_STR;
if (len > 0 && name[len-1] == '%') return TYPE_INT;
if (len > 0 && name[len-1] == '!') return TYPE_FLOAT;
if (len > 0 && name[len-1] == '#') return TYPE_DBL;
Symbol *s = symLookup(name);
if (s) return s->dataType;
return TYPE_INT; // default
}
// Return the wider of two numeric types for expression promotion.
// The DataType enum is ordered so that a higher value = wider type
// (BYTE < INT < LONG < FLOAT < DBL). STRING is handled separately.
static DataType promoteType(DataType a, DataType b) {
if (a == TYPE_STR || b == TYPE_STR) return TYPE_STR;
return (a > b) ? a : b;
}
// Strip type-suffix characters ($, %, #, !) from an identifier for C output.
// Uses a rotating set of 8 static buffers so multiple calls within a
// single expression don't clobber each other.
static const char *cleanName(const char *name) {
static char bufs[8][MAX_IDENT];
static int idx = 0;
if (!name) return "_null_";
char *buf = bufs[idx++ & 7];
strncpy(buf, name, MAX_IDENT - 1);
buf[MAX_IDENT - 1] = '\0';
int len = (int)strlen(buf);
if (len > 0 && (buf[len-1]=='$' || buf[len-1]=='%' ||
buf[len-1]=='#' || buf[len-1]=='!'))
buf[len-1] = '\0';
return buf;
}
// -----------------------------------------------------------------------
// Section 6: Lexer
//
// The lexer reads characters from gSrc and produces tokens one at a time.
// BASIC keywords are case-insensitive; identifiers preserve case.
// -----------------------------------------------------------------------
// Keyword table: maps keyword strings to token types
static struct { const char *kw; TokenType tok; } gKeywords[] = {
{"DIM", TOK_DIM}, {"REDIM", TOK_REDIM},
{"AS", TOK_AS}, {"BYTE", TOK_BYTE},
{"INTEGER", TOK_INTEGER},
{"LONG", TOK_LONG}, {"FLOAT", TOK_FLOAT},
{"DOUBLE", TOK_DOUBLE}, {"STRING", TOK_STRING},
{"LET", TOK_LET}, {"PRINT", TOK_PRINT},
{"INPUT", TOK_INPUT}, {"IF", TOK_IF},
{"THEN", TOK_THEN}, {"ELSE", TOK_ELSE},
{"ELSEIF", TOK_ELSEIF}, {"END", TOK_END},
{"FOR", TOK_FOR}, {"TO", TOK_TO},
{"STEP", TOK_STEP}, {"NEXT", TOK_NEXT},
{"WHILE", TOK_WHILE}, {"WEND", TOK_WEND},
{"DO", TOK_DO}, {"LOOP", TOK_LOOP},
{"UNTIL", TOK_UNTIL}, {"GOTO", TOK_GOTO},
{"GOSUB", TOK_GOSUB}, {"RETURN", TOK_RETURN},
{"SUB", TOK_SUB}, {"FUNCTION", TOK_FUNCTION},
{"CALL", TOK_CALL}, {"BYVAL", TOK_BYVAL},
{"BYREF", TOK_BYREF}, {"LOCAL", TOK_LOCAL},
{"STATIC", TOK_STATIC}, {"EXIT", TOK_EXIT},
{"AND", TOK_AND}, {"OR", TOK_OR},
{"NOT", TOK_NOT}, {"MOD", TOK_MOD},
{"REM", TOK_REM},
{"OPEN", TOK_OPEN}, {"CLOSE", TOK_CLOSE},
{"OUTPUT", TOK_OUTPUT}, {"APPEND", TOK_APPEND},
{"BINARY", TOK_BINARY}, {"LINE", TOK_LINE},
{"WRITE", TOK_WRITE},
{"TYPE", TOK_TYPE},
{"XOR", TOK_XOR},
{"SELECT", TOK_SELECT}, {"CASE", TOK_CASE},
{"CONST", TOK_CONST},
{"ON", TOK_ON},
{NULL, TOK_EOF}
};
// Check if a name is a keyword
static int isKeyword(const char *name) {
for (int k = 0; gKeywords[k].kw; k++)
if (strIcmp(name, gKeywords[k].kw) == 0)
return 1;
return 0;
}
// Peek at the current character without advancing
static int peekChar(void) {
if (gSrcPos >= gSrcLen) return EOF;
return (unsigned char)gSrc[gSrcPos];
}
// Read and advance past the current character
static int readChar(void) {
if (gSrcPos >= gSrcLen) return EOF;
int ch = (unsigned char)gSrc[gSrcPos++];
if (ch == '\n') gLine++;
return ch;
}
// Skip whitespace (spaces and tabs) but NOT newlines
static void skipSpaces(void) {
while (gSrcPos < gSrcLen) {
int ch = gSrc[gSrcPos];
if (ch == ' ' || ch == '\t')
gSrcPos++;
else
break;
}
}
// Read the next token into gTok
static void nextToken(void) {
skipSpaces();
gTok.line = gLine;
gTok.sval[0] = '\0';
gTok.ival = 0;
gTok.dval = 0.0;
int ch = peekChar();
// End of file
if (ch == EOF) { gTok.type = TOK_EOF; return; }
// Newline statement separator
if (ch == '\n') {
readChar();
gTok.type = TOK_NEWLINE;
return;
}
// Carriage return (handle \r\n)
if (ch == '\r') {
readChar();
if (peekChar() == '\n') readChar();
gTok.type = TOK_NEWLINE;
return;
}
// Single-line comment: ' or REM
if (ch == '\'') {
// Skip until end of line
while (peekChar() != '\n' && peekChar() != EOF)
readChar();
gTok.type = TOK_NEWLINE; // treat comment as newline
if (peekChar() == '\n') readChar();
return;
}
// String literal
if (ch == '"') {
readChar(); // consume opening quote
int i = 0;
while (peekChar() != '"' && peekChar() != '\n' && peekChar() != EOF) {
if (i < MAX_TOKEN_LEN - 1)
gTok.sval[i++] = (char)readChar();
else
readChar();
}
gTok.sval[i] = '\0';
if (peekChar() == '"') readChar(); // consume closing quote
gTok.type = TOK_STR_LIT;
return;
}
// Number literal (integer or double)
if (isdigit(ch) || (ch == '.' && isdigit(gSrc[gSrcPos+1]))) {
int i = 0;
int hasDot = 0;
while (isdigit(peekChar()) || peekChar() == '.') {
if (peekChar() == '.') {
if (hasDot) break; // second dot ends the number
hasDot = 1;
}
if (i < MAX_TOKEN_LEN - 1)
gTok.sval[i++] = (char)readChar();
else
readChar();
}
gTok.sval[i] = '\0';
if (hasDot) {
gTok.type = TOK_DBL_LIT;
gTok.dval = atof(gTok.sval);
} else {
gTok.type = TOK_INT_LIT;
gTok.ival = atoi(gTok.sval);
}
return;
}
// Identifier or keyword
if (isalpha(ch) || ch == '_') {
int i = 0;
while (isalnum(peekChar()) || peekChar() == '_') {
if (i < MAX_TOKEN_LEN - 1)
gTok.sval[i++] = (char)readChar();
else
readChar();
}
// Allow trailing $, %, #, ! for type suffixes:
// $ = STRING, % = INTEGER (int16_t),
// # = DOUBLE, ! = FLOAT
if (peekChar()=='$' || peekChar()=='%' ||
peekChar()=='#' || peekChar()=='!') {
if (i < MAX_TOKEN_LEN - 1)
gTok.sval[i++] = (char)readChar();
}
gTok.sval[i] = '\0';
// Check for REM (rest of line is comment)
if (strIcmp(gTok.sval, "REM") == 0) {
while (peekChar() != '\n' && peekChar() != EOF)
readChar();
gTok.type = TOK_NEWLINE;
if (peekChar() == '\n') readChar();
return;
}
// Check keyword table
for (int k = 0; gKeywords[k].kw; k++) {
if (strIcmp(gTok.sval, gKeywords[k].kw) == 0) {
gTok.type = gKeywords[k].tok;
return;
}
}
// Not a keyword it is an identifier
gTok.type = TOK_IDENT;
return;
}
// Operators and punctuation
readChar();
switch (ch) {
case '+': gTok.type = TOK_PLUS; return;
case '-': gTok.type = TOK_MINUS; return;
case '*': gTok.type = TOK_STAR; return;
case '/': gTok.type = TOK_SLASH; return;
case '\\':gTok.type = TOK_BSLASH; return;
case '^': gTok.type = TOK_CARET; return;
case '&': gTok.type = TOK_AMP; return;
case '#': gTok.type = TOK_HASH; return;
case '.': gTok.type = TOK_DOT; return;
case '(': gTok.type = TOK_LPAREN; return;
case ')': gTok.type = TOK_RPAREN; return;
case ',': gTok.type = TOK_COMMA; return;
case ';': gTok.type = TOK_SEMI; return;
case ':': gTok.type = TOK_COLON; return;
case '?': gTok.type = TOK_PRINT; return;
case '=': gTok.type = TOK_EQ; return;
case '<':
if (peekChar() == '=') { readChar(); gTok.type = TOK_LE; }
else if (peekChar() == '>') { readChar(); gTok.type = TOK_NE; }
else gTok.type = TOK_LT;
return;
case '>':
if (peekChar() == '=') { readChar(); gTok.type = TOK_GE; }
else gTok.type = TOK_GT;
return;
default:
fatal(gLine, "Unexpected character '%c' (0x%02X)", ch, ch);
}
}
// Check if the current token matches a given type
static int tokIs(TokenType t) { return gTok.type == t; }
// Consume current token if it matches; returns 1 on match, 0 otherwise
static int tokAccept(TokenType t) {
if (gTok.type == t) { nextToken(); return 1; }
return 0;
}
// Require the current token to be of a given type; fatal error otherwise
static void tokExpect(TokenType t) {
if (gTok.type != t)
fatal(gTok.line, "Expected token type %d, got %d ('%s')",
t, gTok.type, gTok.sval);
nextToken();
}
// Skip newlines and colons (statement separators)
static void skipEol(void) {
while (gTok.type == TOK_NEWLINE || gTok.type == TOK_COLON)
nextToken();
}
// -----------------------------------------------------------------------
// Section 7: Parser Recursive Descent
//
// Grammar (simplified):
// program = { sub_decl | func_decl | statement }
// statement = dim | redim | type_def | assignment | print | input
// | if | for | while | do_loop | goto | gosub | return
// | call | exit | local | static | end | label | open
// | close | data | read | restore | get | put | line_input
// expression = or_expr
// or_expr = and_expr { OR and_expr }
// and_expr = not_expr { AND not_expr }
// not_expr = NOT not_expr | cmp_expr
// cmp_expr = add_expr { (= | <> | < | > | <= | >=) add_expr }
// add_expr = mul_expr { (+ | - | &) mul_expr }
// mul_expr = idiv_expr { (* | /) idiv_expr }
// idiv_expr = mod_expr { '\' mod_expr }
// mod_expr = power_expr { MOD power_expr }
// power_expr = unary_expr { ^ unary_expr }
// unary_expr = [+ | -] primary
// primary = INT_LIT | DBL_LIT | STR_LIT | ident['('args')'][.field...]
// | '(' expression ')' | SIZEOF'('type_name')'
// -----------------------------------------------------------------------
// Forward declarations for mutually recursive parser functions
static Node *parseExpr(void);
static Node *parseStatement(void);
static int dataIndexForLine(int lnum);
static int dataIndexForLabel(const char *name);
static Node *parseBlock(TokenType end1, TokenType end2, TokenType end3);
static void skipNewlines(void) {
while (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON)) nextToken();
}
// ---- Expression parser ----
// Parse a primary expression (literals, variables, function calls, parens)
static Node *parsePrimary(void) {
int ln = gTok.line;
// Integer literal
if (tokIs(TOK_INT_LIT)) {
Node *n = newNode(NODE_INT_LIT, ln);
n->ival = gTok.ival;
n->dataType = TYPE_INT;
nextToken();
return n;
}
// Double literal
if (tokIs(TOK_DBL_LIT)) {
Node *n = newNode(NODE_DBL_LIT, ln);
n->dval = gTok.dval;
n->dataType = TYPE_DBL;
nextToken();
return n;
}
// String literal
if (tokIs(TOK_STR_LIT)) {
Node *n = newNode(NODE_STR_LIT, ln);
n->sval = strDup(gTok.sval);
n->dataType = TYPE_STR;
nextToken();
return n;
}
// Parenthesized expression
if (tokIs(TOK_LPAREN)) {
nextToken();
Node *n = parseExpr();
tokExpect(TOK_RPAREN);
return n;
}
// Identifier: variable, array element, or function call
if (tokIs(TOK_IDENT)) {
char name[MAX_TOKEN_LEN];
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
name[MAX_TOKEN_LEN - 1] = '\0';
nextToken();
// Check for '(' array access or function call
if (tokIs(TOK_LPAREN)) {
nextToken();
// Collect argument list
Node *args = NULL, *tail = NULL;
if (!tokIs(TOK_RPAREN)) {
Node *arg = parseExpr();
args = tail = arg;
while (tokAccept(TOK_COMMA)) {
arg = parseExpr();
tail->next = arg;
tail = arg;
}
}
tokExpect(TOK_RPAREN);
// Determine if this is a known array or function
Symbol *s = symLookup(name);
// SIZEOF(TypeName) — compile-time sizeof
if (strIcmp(name, "SIZEOF") == 0) {
// args should be one identifier — the UDT name
Node *n = newNode(NODE_FUNC_CALL, ln);
n->sval = strDup("SIZEOF");
n->a = args;
n->dataType = TYPE_LONG;
return n;
}
if (s && s->isArray) {
Node *n = newNode(NODE_ARRAY_REF, ln);
n->sval = strDup(name);
n->a = args; // index expression
n->dataType = s->dataType;
n->ival2 = s->udtIndex;
// Check for dot-access on array element: arr(i).field[.field...]
if (s->dataType == TYPE_UDT && tokIs(TOK_DOT)) {
Node *cur = n;
int curUdt = s->udtIndex;
while (curUdt >= 0 && tokIs(TOK_DOT)) {
nextToken();
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected field name after '.'");
int fi = udtFieldLookup(curUdt, gTok.sval);
if (fi < 0)
fatal(ln, "Unknown field '%s' in type '%s'",
gTok.sval, gUdts[curUdt].name);
Node *dot = newNode(NODE_DOT_ACCESS, ln);
dot->a = cur;
dot->sval = strDup(gTok.sval);
dot->ival2 = curUdt;
UdtField *uf = &gUdts[curUdt].fields[fi];
dot->dataType = uf->dataType;
if (uf->dataType == TYPE_STR && uf->strLen > 0)
dot->ival = uf->strLen;
cur = dot;
curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1;
nextToken();
}
return cur;
}
return n;
} else {
// Treat as function call
Node *n = newNode(NODE_FUNC_CALL, ln);
n->sval = strDup(name);
n->a = args;
// Infer return type: check symbol table, built-ins, name
if (s && s->isFunc == 1) {
n->dataType = s->returnType;
} else if (strIcmp(name,"LOF")==0) {
n->dataType = TYPE_LONG;
} else if (strIcmp(name,"VAL")==0 || strIcmp(name,"ABS")==0) {
n->dataType = TYPE_DBL;
} else if (strIcmp(name,"LEN")==0 || strIcmp(name,"ASC")==0 ||
strIcmp(name,"INT")==0 || strIcmp(name,"INSTR")==0 ||
strIcmp(name,"EOF")==0 || strIcmp(name,"FREEFILE")==0 ||
strIcmp(name,"LBOUND")==0 || strIcmp(name,"UBOUND")==0) {
n->dataType = TYPE_INT;
} else {
// Check external functions and compile-time builtins
ExternFunc *ef = externFuncLookup(name);
if (ef) {
n->dataType = ef->returnType;
} else {
const BuiltinDef *bd = builtinDefLookup(name);
if (bd) {
n->dataType = bd->returnType;
} else {
n->dataType = inferVarType(name);
}
}
}
return n;
}
}
// RND without parentheses — treat as RND()
if (strIcmp(name, "RND") == 0) {
Node *n = newNode(NODE_FUNC_CALL, ln);
n->sval = strDup("RND");
n->a = NULL;
n->dataType = TYPE_DBL;
return n;
}
// Check compile-time constants
for (int ci = 0; ci < gConstCount; ci++) {
if (strIcmp(name, gConsts[ci].name) == 0) {
if (gConsts[ci].dataType == TYPE_STR) {
Node *n = newNode(NODE_STR_LIT, ln);
n->sval = strDup(gConsts[ci].strVal);
n->dataType = TYPE_STR;
return n;
} else {
double v = gConsts[ci].numVal;
if (v == (int)v && gConsts[ci].dataType != TYPE_DBL &&
gConsts[ci].dataType != TYPE_FLOAT) {
Node *n = newNode(NODE_INT_LIT, ln);
n->ival = (int)v;
n->dataType = gConsts[ci].dataType;
return n;
} else {
Node *n = newNode(NODE_DBL_LIT, ln);
n->dval = v;
n->dataType = gConsts[ci].dataType;
return n;
}
}
}
}
// Plain variable reference — check for dot-access (supports chaining)
{
Symbol *s = symLookup(name);
if (s && s->dataType == TYPE_UDT && tokIs(TOK_DOT)) {
Node *base = newNode(NODE_IDENT, ln);
base->sval = strDup(name);
base->dataType = TYPE_UDT;
Node *cur = base;
int curUdt = s->udtIndex;
while (curUdt >= 0 && tokIs(TOK_DOT)) {
nextToken();
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected field name after '.'");
int fi = udtFieldLookup(curUdt, gTok.sval);
if (fi < 0)
fatal(ln, "Unknown field '%s' in type '%s'",
gTok.sval, gUdts[curUdt].name);
Node *dot = newNode(NODE_DOT_ACCESS, ln);
dot->a = cur;
dot->sval = strDup(gTok.sval);
dot->ival2 = curUdt;
UdtField *uf = &gUdts[curUdt].fields[fi];
dot->dataType = uf->dataType;
if (uf->dataType == TYPE_STR && uf->strLen > 0)
dot->ival = uf->strLen;
cur = dot;
curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1;
nextToken();
}
return cur;
}
}
Node *n = newNode(NODE_IDENT, ln);
n->sval = strDup(name);
n->dataType = inferVarType(name);
return n;
}
fatal(ln, "Expected expression, got token type %d ('%s')",
gTok.type, gTok.sval);
return NULL; // unreachable
}
// Unary: [+|-] primary
static Node *parseUnary(void) {
int ln = gTok.line;
if (tokIs(TOK_MINUS) || tokIs(TOK_PLUS)) {
int op = gTok.type;
nextToken();
Node *operand = parseUnary();
if (op == TOK_PLUS) return operand; // unary + is a no-op
Node *n = newNode(NODE_UNOP, ln);
n->ival = op;
n->a = operand;
n->dataType = operand->dataType;
return n;
}
return parsePrimary();
}
// Power: unary { ^ unary } (right-associative)
static Node *parsePower(void) {
Node *left = parseUnary();
if (tokIs(TOK_CARET)) {
int ln = gTok.line;
nextToken();
Node *right = parsePower(); // right-associative
Node *n = newNode(NODE_BINOP, ln);
n->ival = TOK_CARET;
n->a = left;
n->b = right;
n->dataType = TYPE_DBL;
return n;
}
return left;
}
// MOD: power { MOD power }
static Node *parseMod(void) {
Node *left = parsePower();
while (tokIs(TOK_MOD)) {
int ln = gTok.line;
nextToken();
Node *right = parsePower();
Node *n = newNode(NODE_BINOP, ln);
n->ival = TOK_MOD;
n->a = left;
n->b = right;
n->dataType = TYPE_INT;
left = n;
}
return left;
}
// Integer division: mod { '\' mod }
static Node *parseIdiv(void) {
Node *left = parseMod();
while (tokIs(TOK_BSLASH)) {
int ln = gTok.line;
nextToken();
Node *right = parseMod();
Node *n = newNode(NODE_BINOP, ln);
n->ival = TOK_BSLASH;
n->a = left;
n->b = right;
n->dataType = TYPE_INT;
left = n;
}
return left;
}
// Multiply / divide: idiv { (*|/) idiv }
static Node *parseMuldiv(void) {
Node *left = parseIdiv();
while (tokIs(TOK_STAR) || tokIs(TOK_SLASH)) {
int ln = gTok.line;
int op = gTok.type;
nextToken();
Node *right = parseIdiv();
Node *n = newNode(NODE_BINOP, ln);
n->ival = op;
n->a = left;
n->b = right;
// Division always promotes to double; multiplication promotes
// to the wider of the two operand types.
n->dataType = (op == TOK_SLASH) ? TYPE_DBL :
promoteType(left->dataType, right->dataType);
left = n;
}
return left;
}
// Add / subtract / string concat: muldiv { (+|-|&) muldiv }
static Node *parseAddsub(void) {
Node *left = parseMuldiv();
while (tokIs(TOK_PLUS) || tokIs(TOK_MINUS) || tokIs(TOK_AMP)) {
int ln = gTok.line;
int op = gTok.type;
nextToken();
Node *right = parseMuldiv();
Node *n = newNode(NODE_BINOP, ln);
n->ival = op;
n->a = left;
n->b = right;
// String concatenation
if (op == TOK_AMP || (op == TOK_PLUS &&
(left->dataType == TYPE_STR || right->dataType == TYPE_STR)))
n->dataType = TYPE_STR;
else
n->dataType = promoteType(left->dataType, right->dataType);
left = n;
}
return left;
}
// Comparison: addsub { (=|<>|<|>|<=|>=) addsub }
static Node *parseComparison(void) {
Node *left = parseAddsub();
while (tokIs(TOK_EQ) || tokIs(TOK_NE) || tokIs(TOK_LT) ||
tokIs(TOK_GT) || tokIs(TOK_LE) || tokIs(TOK_GE)) {
int ln = gTok.line;
int op = gTok.type;
nextToken();
Node *right = parseAddsub();
Node *n = newNode(NODE_BINOP, ln);
n->ival = op;
n->a = left;
n->b = right;
n->dataType = TYPE_INT; // comparisons yield integer (boolean)
left = n;
}
return left;
}
// NOT: NOT not_expr | comparison
static Node *parseNot(void) {
if (tokIs(TOK_NOT)) {
int ln = gTok.line;
nextToken();
Node *operand = parseNot();
Node *n = newNode(NODE_UNOP, ln);
n->ival = TOK_NOT;
n->a = operand;
n->dataType = TYPE_INT;
return n;
}
return parseComparison();
}
// AND: not { AND not }
static Node *parseAnd(void) {
Node *left = parseNot();
while (tokIs(TOK_AND)) {
int ln = gTok.line;
nextToken();
Node *right = parseNot();
Node *n = newNode(NODE_BINOP, ln);
n->ival = TOK_AND;
n->a = left;
n->b = right;
n->dataType = TYPE_INT;
left = n;
}
return left;
}
// OR: and { OR and } — top-level expression rule
static Node *parseOr(void) {
Node *left = parseAnd();
while (tokIs(TOK_OR) || tokIs(TOK_XOR)) {
int ln = gTok.line;
int op = gTok.type;
nextToken();
Node *right = parseAnd();
Node *n = newNode(NODE_BINOP, ln);
n->ival = op;
n->a = left;
n->b = right;
n->dataType = TYPE_INT;
left = n;
}
return left;
}
// Top-level expression entry point
static Node *parseExpr(void) {
return parseOr();
}
// ---- Statement parsers ----
// Parse a data-type keyword (BYTE, INTEGER, LONG, FLOAT, DOUBLE, STRING, or UDT name)
static DataType parseType(void) {
if (tokAccept(TOK_BYTE)) return TYPE_BYTE;
if (tokAccept(TOK_INTEGER)) return TYPE_INT;
if (tokAccept(TOK_LONG)) return TYPE_LONG;
if (tokAccept(TOK_FLOAT)) return TYPE_FLOAT;
if (tokAccept(TOK_DOUBLE)) return TYPE_DBL;
if (tokAccept(TOK_STRING)) return TYPE_STR;
// Check for user-defined type name
if (tokIs(TOK_IDENT)) {
int idx = udtLookup(gTok.sval);
if (idx >= 0) {
gLastUdtIndex = idx;
nextToken();
return TYPE_UDT;
}
}
fatal(gTok.line,
"Expected type keyword (BYTE, INTEGER, LONG, FLOAT, DOUBLE, STRING)");
return TYPE_INT;
}
// Parse TYPE ... END TYPE definition
static Node *parseTypeDef(void) {
int ln = gTok.line;
tokExpect(TOK_TYPE);
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected type name after TYPE");
char tname[MAX_IDENT];
strncpy(tname, gTok.sval, MAX_IDENT - 1);
tname[MAX_IDENT - 1] = '\0';
nextToken();
if (gUdtCount >= MAX_UDTS)
fatal(ln, "Too many TYPE definitions (max %d)", MAX_UDTS);
int udtIdx = gUdtCount++;
UdtDef *u = &gUdts[udtIdx];
memset(u, 0, sizeof(*u));
strncpy(u->name, tname, MAX_IDENT - 1);
// Skip newlines before fields
while (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON)) nextToken();
// Parse fields until END TYPE
while (!tokIs(TOK_EOF)) {
// Check for END TYPE
if (tokIs(TOK_END)) {
int sp = gSrcPos;
int sl = gLine;
Token st = gTok;
nextToken();
if (tokIs(TOK_TYPE)) {
nextToken(); // consume TYPE
break;
}
// Not END TYPE — restore
gSrcPos = sp;
gLine = sl;
gTok = st;
}
// Parse field: name AS type
if (!tokIs(TOK_IDENT))
fatal(gTok.line, "Expected field name in TYPE definition");
if (u->fieldCount >= MAX_UDT_FIELDS)
fatal(gTok.line, "Too many fields in TYPE (max %d)", MAX_UDT_FIELDS);
UdtField *f = &u->fields[u->fieldCount];
strncpy(f->name, gTok.sval, MAX_IDENT - 1);
f->name[MAX_IDENT - 1] = '\0';
nextToken();
tokExpect(TOK_AS);
// Check for STRING * N (fixed-length string)
if (tokIs(TOK_STRING)) {
nextToken();
if (tokAccept(TOK_STAR)) {
if (!tokIs(TOK_INT_LIT))
fatal(gTok.line, "Expected integer after STRING *");
f->strLen = gTok.ival;
nextToken();
} else {
fatal(gTok.line,
"STRING fields in TYPE require fixed length (STRING * N)");
}
f->dataType = TYPE_STR;
f->udtIndex = -1;
} else {
gLastUdtIndex = -1;
f->dataType = parseType();
f->strLen = 0;
f->udtIndex = gLastUdtIndex;
}
u->fieldCount++;
// Skip newlines between fields
while (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON)) nextToken();
}
Node *n = newNode(NODE_TYPE_DEF, ln);
n->sval = strDup(tname);
n->ival = udtIdx;
return n;
}
// Parse DIM statement: DIM name[(size[, size, ...])] AS type
static Node *parseDim(void) {
int ln = gTok.line;
tokExpect(TOK_DIM);
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected identifier after DIM");
char name[MAX_TOKEN_LEN];
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
nextToken();
Node *size = NULL;
int ndims = 0;
if (tokAccept(TOK_LPAREN)) {
size = parseExpr();
ndims = 1;
Node *tail = size;
while (tokAccept(TOK_COMMA)) {
Node *dim = parseExpr();
tail->next = dim;
tail = dim;
ndims++;
}
tokExpect(TOK_RPAREN);
}
tokExpect(TOK_AS);
gLastUdtIndex = -1;
DataType dt = parseType();
Node *n = newNode(NODE_DIM, ln);
n->sval = strDup(name);
n->dataType = dt;
n->a = size;
n->ival = ndims;
n->ival2 = gLastUdtIndex;
// Register in symbol table
Symbol *s = symAdd(name);
s->dataType = dt;
s->isArray = (ndims > 0);
s->ndims = ndims;
s->udtIndex = gLastUdtIndex;
return n;
}
// Parse REDIM statement: REDIM name(size, ...) AS type
static Node *parseRedim(void) {
int ln = gTok.line;
tokExpect(TOK_REDIM);
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected identifier after REDIM");
char name[MAX_TOKEN_LEN];
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
nextToken();
tokExpect(TOK_LPAREN);
Node *size = parseExpr();
int ndims = 1;
Node *tail = size;
while (tokAccept(TOK_COMMA)) {
Node *dim = parseExpr();
tail->next = dim;
tail = dim;
ndims++;
}
tokExpect(TOK_RPAREN);
tokExpect(TOK_AS);
DataType dt = parseType();
Node *n = newNode(NODE_REDIM, ln);
n->sval = strDup(name);
n->dataType = dt;
n->a = size;
n->ival = ndims;
return n;
}
// Parse PRINT statement: PRINT [expr { (;|,) expr } [;]]
// Helper: parse a file number (#expr)
static Node *parseFileNumber(void) {
tokExpect(TOK_HASH);
return parseExpr();
}
// Helper: parse print items (shared by PRINT and PRINT #)
static Node *parsePrintItems(int ln) {
Node *head = NULL, *tail = NULL;
while (1) {
Node *item = newNode(NODE_PRINT_ITEM, ln);
item->a = parseExpr();
// Check for separator after this item
if (tokIs(TOK_SEMI)) {
item->ival = 1; // semicolon: no space
nextToken();
} else if (tokIs(TOK_COMMA)) {
item->ival = 2; // comma: tab
nextToken();
} else {
item->ival = 0; // end of print list
}
if (!head) head = tail = item;
else { tail->next = item; tail = item; }
// If no separator or end of statement, stop
if (item->ival == 0) break;
// If separator at end of line, stop (trailing separator)
if (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON) || tokIs(TOK_EOF))
break;
}
return head;
}
static Node *parsePrint(void) {
int ln = gTok.line;
tokExpect(TOK_PRINT);
// File-directed PRINT: PRINT #n, ...
if (tokIs(TOK_HASH)) {
Node *fpr = newNode(NODE_FILE_PRINT, ln);
fpr->b = parseFileNumber();
tokExpect(TOK_COMMA);
if (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON) || tokIs(TOK_EOF))
return fpr; // PRINT #n, alone = write newline to file
fpr->a = parsePrintItems(ln);
return fpr;
}
// PRINT USING "format"; value1; value2; ...
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "USING") == 0) {
nextToken();
Node *pu = newNode(NODE_PRINT_USING, ln);
pu->a = parseExpr(); // format string expression
if (!tokAccept(TOK_SEMI))
tokExpect(TOK_COMMA); // allow ; or , after format
// Parse values as linked list
Node *head = NULL, *tail = NULL;
while (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) {
Node *val = parseExpr();
if (!head) head = tail = val;
else { tail->next = val; tail = val; }
if (!tokAccept(TOK_SEMI) && !tokAccept(TOK_COMMA))
break;
}
pu->b = head;
return pu;
}
Node *pr = newNode(NODE_PRINT, ln);
// Empty PRINT (just a newline)
if (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON) || tokIs(TOK_EOF)) {
return pr;
}
pr->a = parsePrintItems(ln);
return pr;
}
// Parse INPUT statement: INPUT ["prompt";] var {, var}
// Helper: parse a comma-separated variable list for INPUT
static Node *parseInputVars(int ln) {
Node *head = NULL, *tail = NULL;
do {
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected variable name in INPUT");
Node *v = newNode(NODE_IDENT, ln);
v->sval = strDup(gTok.sval);
v->dataType = inferVarType(gTok.sval);
nextToken();
if (!head) head = tail = v;
else { tail->next = v; tail = v; }
} while (tokAccept(TOK_COMMA));
return head;
}
static Node *parseInput(void) {
int ln = gTok.line;
tokExpect(TOK_INPUT);
// File-directed INPUT: INPUT #n, var1, var2
if (tokIs(TOK_HASH)) {
Node *finp = newNode(NODE_FILE_INPUT, ln);
finp->b = parseFileNumber();
tokExpect(TOK_COMMA);
finp->a = parseInputVars(ln);
return finp;
}
Node *inp = newNode(NODE_INPUT, ln);
// Optional string prompt
if (tokIs(TOK_STR_LIT)) {
inp->sval = strDup(gTok.sval);
nextToken();
if (tokIs(TOK_SEMI) || tokIs(TOK_COMMA))
nextToken(); // consume separator after prompt
}
inp->a = parseInputVars(ln);
return inp;
}
// Parse an ELSEIF chain as a nested IF node.
// ELSEIF expr THEN \n block { ELSEIF ... } [ELSE block]
// The caller (parseIf) consumes the final END IF.
static Node *parseElseifChain(void) {
int ln = gTok.line;
tokExpect(TOK_ELSEIF);
Node *cond = parseExpr();
tokExpect(TOK_THEN);
Node *n = newNode(NODE_IF, ln);
n->a = cond;
skipEol();
n->b = parseBlock(TOK_ELSE, TOK_ELSEIF, TOK_END);
if (tokIs(TOK_ELSEIF)) {
n->c = parseElseifChain();
} else if (tokAccept(TOK_ELSE)) {
skipEol();
n->c = parseBlock(TOK_END, TOK_EOF, TOK_EOF);
}
// END IF is consumed by the top-level parseIf
return n;
}
// Parse IF block:
// IF expr THEN stmt (single-line)
// IF expr THEN \n block {ELSEIF...} [ELSE block] END IF
static Node *parseIf(void) {
int ln = gTok.line;
tokExpect(TOK_IF);
Node *cond = parseExpr();
tokExpect(TOK_THEN);
Node *n = newNode(NODE_IF, ln);
n->a = cond;
// Single-line IF: statement on same line after THEN
if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) {
Node *stmt = parseStatement();
Node *blk = newNode(NODE_BLOCK, ln);
blk->a = stmt;
n->b = blk;
return n;
}
// Multi-line IF
skipEol();
n->b = parseBlock(TOK_ELSE, TOK_ELSEIF, TOK_END);
// ELSEIF chain: parse as a nested IF node
if (tokIs(TOK_ELSEIF)) {
n->c = parseElseifChain();
}
// ELSE block
else if (tokAccept(TOK_ELSE)) {
skipEol();
n->c = parseBlock(TOK_END, TOK_EOF, TOK_EOF);
}
// END IF
tokExpect(TOK_END);
tokExpect(TOK_IF);
return n;
}
// Parse FOR loop: FOR var = start TO end [STEP step] \n block NEXT [var]
static Node *parseFor(void) {
int ln = gTok.line;
tokExpect(TOK_FOR);
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected variable after FOR");
char name[MAX_TOKEN_LEN];
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
nextToken();
tokExpect(TOK_EQ);
Node *start = parseExpr();
tokExpect(TOK_TO);
Node *end = parseExpr();
Node *step = NULL;
if (tokAccept(TOK_STEP)) {
step = parseExpr();
}
Node *n = newNode(NODE_FOR, ln);
n->sval = strDup(name);
n->a = start;
n->b = end;
n->c = step;
skipEol();
n->d = parseBlock(TOK_NEXT, TOK_EOF, TOK_EOF);
tokExpect(TOK_NEXT);
// Optional variable name after NEXT
if (tokIs(TOK_IDENT)) nextToken();
return n;
}
// Parse WHILE loop: WHILE expr \n block WEND
static Node *parseWhile(void) {
int ln = gTok.line;
tokExpect(TOK_WHILE);
Node *cond = parseExpr();
Node *n = newNode(NODE_WHILE, ln);
n->a = cond;
skipEol();
n->b = parseBlock(TOK_WEND, TOK_EOF, TOK_EOF);
tokExpect(TOK_WEND);
return n;
}
// Parse DO/LOOP:
// DO [WHILE|UNTIL expr] \n block LOOP [WHILE|UNTIL expr]
static Node *parseDoLoop(void) {
int ln = gTok.line;
tokExpect(TOK_DO);
Node *n = newNode(NODE_DO_LOOP, ln);
n->ival = 0; // flags: bit0 = isUntil, bit1 = conditionAtBottom
// Optional top condition
if (tokIs(TOK_WHILE)) {
nextToken();
n->a = parseExpr();
} else if (tokIs(TOK_UNTIL)) {
nextToken();
n->a = parseExpr();
n->ival |= 1; // UNTIL (vs WHILE)
}
skipEol();
n->b = parseBlock(TOK_LOOP, TOK_EOF, TOK_EOF);
tokExpect(TOK_LOOP);
// Optional bottom condition
if (tokIs(TOK_WHILE)) {
nextToken();
n->a = parseExpr();
n->ival = 2; // condition at bottom
} else if (tokIs(TOK_UNTIL)) {
nextToken();
n->a = parseExpr();
n->ival = 3; // until + at bottom
}
// If no condition at all, infinite loop (DO...LOOP)
return n;
}
// Parse SUB declaration:
// SUB name([BYVAL|BYREF] param AS type, ...) \n block END SUB
static Node *parseSub(void) {
int ln = gTok.line;
tokExpect(TOK_SUB);
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected subroutine name after SUB");
char name[MAX_TOKEN_LEN];
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
nextToken();
// Parse parameter list
Node *params = NULL, *ptail = NULL;
int pcount = 0;
Symbol *sym = symAdd(name);
sym->isFunc = 2; // SUB
sym->returnType = TYPE_VOID;
if (tokAccept(TOK_LPAREN)) {
while (!tokIs(TOK_RPAREN) && !tokIs(TOK_EOF)) {
PassMode pm = PASS_BYVAL;
if (tokAccept(TOK_BYREF)) pm = PASS_BYREF;
else tokAccept(TOK_BYVAL); // optional BYVAL
if (!tokIs(TOK_IDENT))
fatal(gTok.line, "Expected parameter name");
Node *p = newNode(NODE_PARAM, gTok.line);
p->sval = strDup(gTok.sval);
p->ival = pm;
nextToken();
tokExpect(TOK_AS);
p->dataType = parseType();
// Record param in the function's symbol entry
if (pcount >= MAX_PARAMS)
fatal(gTok.line, "Too many parameters (max %d)", MAX_PARAMS);
strncpy(sym->paramNames[pcount], p->sval, MAX_IDENT - 1);
sym->paramTypes[pcount] = p->dataType;
sym->paramModes[pcount] = pm;
pcount++;
// Also register the parameter as a variable for type inference
// inside the function body
Symbol *psym = symAdd(p->sval);
psym->dataType = p->dataType;
if (!params) params = ptail = p;
else { ptail->next = p; ptail = p; }
if (!tokAccept(TOK_COMMA)) break;
}
tokExpect(TOK_RPAREN);
}
sym->paramCount = pcount;
Node *n = newNode(NODE_SUB, ln);
n->sval = strDup(name);
n->a = params;
skipEol();
n->b = parseBlock(TOK_END, TOK_EOF, TOK_EOF);
tokExpect(TOK_END);
tokExpect(TOK_SUB);
return n;
}
// Parse FUNCTION declaration:
// FUNCTION name([params]) AS type \n block END FUNCTION
static Node *parseFunction(void) {
int ln = gTok.line;
tokExpect(TOK_FUNCTION);
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected function name after FUNCTION");
char name[MAX_TOKEN_LEN];
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
nextToken();
// Parse parameter list
Node *params = NULL, *ptail = NULL;
int pcount = 0;
Symbol *sym = symAdd(name);
sym->isFunc = 1; // FUNCTION
if (tokAccept(TOK_LPAREN)) {
while (!tokIs(TOK_RPAREN) && !tokIs(TOK_EOF)) {
PassMode pm = PASS_BYVAL;
if (tokAccept(TOK_BYREF)) pm = PASS_BYREF;
else tokAccept(TOK_BYVAL);
if (!tokIs(TOK_IDENT))
fatal(gTok.line, "Expected parameter name");
Node *p = newNode(NODE_PARAM, gTok.line);
p->sval = strDup(gTok.sval);
p->ival = pm;
nextToken();
tokExpect(TOK_AS);
p->dataType = parseType();
if (pcount >= MAX_PARAMS)
fatal(gTok.line, "Too many parameters (max %d)", MAX_PARAMS);
strncpy(sym->paramNames[pcount], p->sval, MAX_IDENT - 1);
sym->paramTypes[pcount] = p->dataType;
sym->paramModes[pcount] = pm;
pcount++;
// Register parameter as variable for type inference
Symbol *psym = symAdd(p->sval);
psym->dataType = p->dataType;
if (!params) params = ptail = p;
else { ptail->next = p; ptail = p; }
if (!tokAccept(TOK_COMMA)) break;
}
tokExpect(TOK_RPAREN);
}
sym->paramCount = pcount;
// Return type
tokExpect(TOK_AS);
DataType ret = parseType();
sym->returnType = ret;
sym->dataType = ret;
Node *n = newNode(NODE_FUNC, ln);
n->sval = strDup(name);
n->dataType = ret;
n->a = params;
skipEol();
n->b = parseBlock(TOK_END, TOK_EOF, TOK_EOF);
tokExpect(TOK_END);
tokExpect(TOK_FUNCTION);
return n;
}
// Parse LOCAL declaration: LOCAL name AS type
static Node *parseLocal(void) {
int ln = gTok.line;
tokExpect(TOK_LOCAL);
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected variable name after LOCAL");
Node *n = newNode(NODE_LOCAL, ln);
n->sval = strDup(gTok.sval);
nextToken();
tokExpect(TOK_AS);
n->dataType = parseType();
return n;
}
// Parse STATIC declaration: STATIC name AS type
static Node *parseStatic(void) {
int ln = gTok.line;
tokExpect(TOK_STATIC);
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected variable name after STATIC");
Node *n = newNode(NODE_STATIC, ln);
n->sval = strDup(gTok.sval);
nextToken();
tokExpect(TOK_AS);
n->dataType = parseType();
return n;
}
// Parse DATA statement: DATA literal, literal, ...
// Items can be integers, doubles, strings, or negative numbers.
// Note: the "DATA" keyword is already consumed by parseStatement
static Node *parseData(void) {
int ln = gTok.line;
Node *n = newNode(NODE_DATA, ln);
Node *head = NULL, *tail = NULL;
do {
Node *item = NULL;
// Handle negative numeric literals
int neg = 0;
if (tokIs(TOK_MINUS)) {
neg = 1;
nextToken();
}
if (tokIs(TOK_INT_LIT)) {
item = newNode(NODE_INT_LIT, ln);
item->ival = neg ? -gTok.ival : gTok.ival;
item->dataType = TYPE_INT;
nextToken();
} else if (tokIs(TOK_DBL_LIT)) {
item = newNode(NODE_DBL_LIT, ln);
item->dval = neg ? -gTok.dval : gTok.dval;
item->dataType = TYPE_DBL;
nextToken();
} else if (tokIs(TOK_STR_LIT)) {
item = newNode(NODE_STR_LIT, ln);
item->sval = strDup(gTok.sval);
item->dataType = TYPE_STR;
nextToken();
} else {
fatal(ln, "Expected literal value in DATA statement");
}
if (!head) head = tail = item;
else { tail->next = item; tail = item; }
} while (tokAccept(TOK_COMMA));
n->a = head;
return n;
}
// Parse READ statement: READ var1, var2, ...
// Note: the "READ" keyword is already consumed by parseStatement
static Node *parseRead(void) {
int ln = gTok.line;
Node *n = newNode(NODE_READ, ln);
Node *head = NULL, *tail = NULL;
do {
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected variable name in READ");
Node *v = newNode(NODE_IDENT, ln);
v->sval = strDup(gTok.sval);
v->dataType = inferVarType(gTok.sval);
nextToken();
if (!head) head = tail = v;
else { tail->next = v; tail = v; }
} while (tokAccept(TOK_COMMA));
n->a = head;
return n;
}
// Parse RESTORE statement: RESTORE [line_number]
// Note: the "RESTORE" keyword is already consumed by parseStatement
static Node *parseRestore(void) {
int ln = gTok.line;
Node *n = newNode(NODE_RESTORE, ln);
if (tokIs(TOK_INT_LIT)) {
n->ival = gTok.ival;
nextToken();
} else if (tokIs(TOK_IDENT)) {
n->sval = strDup(gTok.sval);
nextToken();
}
return n;
}
// Parse OPEN statement: OPEN "filename" FOR INPUT|OUTPUT|APPEND|BINARY|RANDOM AS #n [LEN = expr]
static Node *parseOpen(void) {
int ln = gTok.line;
tokExpect(TOK_OPEN);
Node *n = newNode(NODE_OPEN, ln);
n->a = parseExpr(); // filename expression
tokExpect(TOK_FOR);
if (tokIs(TOK_INPUT)) { n->ival = 0; nextToken(); }
else if (tokIs(TOK_OUTPUT)) { n->ival = 1; nextToken(); }
else if (tokIs(TOK_APPEND)) { n->ival = 2; nextToken(); }
else if (tokIs(TOK_BINARY)) { n->ival = 3; nextToken(); }
else if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "RANDOM") == 0)
{ n->ival = 4; nextToken(); }
else fatal(ln, "Expected INPUT, OUTPUT, APPEND, BINARY, or RANDOM after FOR");
tokExpect(TOK_AS);
n->b = parseFileNumber(); // file number expression
// Optional LEN = expr for RANDOM access
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "LEN") == 0) {
nextToken();
tokExpect(TOK_EQ);
n->c = parseExpr();
}
return n;
}
// Parse CLOSE statement: CLOSE #n
static Node *parseClose(void) {
int ln = gTok.line;
tokExpect(TOK_CLOSE);
Node *n = newNode(NODE_CLOSE, ln);
n->b = parseFileNumber();
return n;
}
// Parse a single statement
static Node *parseStatement(void) {
int ln = gTok.line;
// Line-number label: a bare integer at the start of a statement
if (tokIs(TOK_INT_LIT)) {
int lnum = gTok.ival;
nextToken();
// If followed by a statement, this is a labeled line
Node *lbl = newNode(NODE_LABEL, ln);
lbl->ival = lnum;
recordLineLabel(lnum);
// If there's a statement on this line, chain it
if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) {
Node *stmt = parseStatement();
lbl->next = stmt;
}
return lbl;
}
// Named label: identifier followed by colon (e.g., myLabel:)
// Must peek ahead to distinguish from statement separator colons.
// Only treat as label if the NEXT token is a colon.
if (tokIs(TOK_IDENT)) {
int savePos = gSrcPos;
int saveLine = gLine;
Token saveTok = gTok;
char labelName[MAX_IDENT];
strncpy(labelName, gTok.sval, MAX_IDENT - 1);
labelName[MAX_IDENT - 1] = '\0';
nextToken();
if (tokIs(TOK_COLON)) {
if (isKeyword(labelName))
fatal(ln, "Cannot use keyword '%s' as label", labelName);
nextToken();
Node *lbl = newNode(NODE_LABEL, ln);
lbl->ival = 0; // 0 = named label, not numeric
lbl->sval = strDup(labelName);
// If there's a statement on this line, chain it
if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) {
Node *stmt = parseStatement();
lbl->next = stmt;
}
return lbl;
}
// Not a label — restore state
gSrcPos = savePos;
gLine = saveLine;
gTok = saveTok;
}
// TYPE ... END TYPE
if (tokIs(TOK_TYPE)) return parseTypeDef();
// DIM
if (tokIs(TOK_DIM)) return parseDim();
// REDIM
if (tokIs(TOK_REDIM)) return parseRedim();
// PRINT
if (tokIs(TOK_PRINT)) return parsePrint();
// INPUT / INPUT #
if (tokIs(TOK_INPUT)) return parseInput();
// OPEN
if (tokIs(TOK_OPEN)) return parseOpen();
// CLOSE
if (tokIs(TOK_CLOSE)) return parseClose();
// DATA, READ, RESTORE are contextual keywords — checked as identifiers
// to avoid colliding with user variable names like "data(i)".
// We peek ahead: DATA is a keyword only when NOT followed by '(' or '='.
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "DATA") == 0) {
// Save state and peek at next token
int savePos = gSrcPos;
int saveLine = gLine;
Token saveTok = gTok;
nextToken();
if (!tokIs(TOK_LPAREN) && !tokIs(TOK_EQ)) {
// It's a DATA statement
return parseData();
}
// Restore — it's a variable named "data"
gSrcPos = savePos;
gLine = saveLine;
gTok = saveTok;
}
// READ (contextual keyword — same peek-ahead logic)
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "READ") == 0) {
int savePos = gSrcPos;
int saveLine = gLine;
Token saveTok = gTok;
nextToken();
if (!tokIs(TOK_LPAREN) && !tokIs(TOK_EQ)) {
return parseRead();
}
gSrcPos = savePos;
gLine = saveLine;
gTok = saveTok;
}
// RESTORE (contextual keyword)
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "RESTORE") == 0) {
nextToken();
return parseRestore();
}
// GET #filenum, record, variable (contextual keyword)
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "GET") == 0) {
int savePos = gSrcPos;
int saveLine = gLine;
Token saveTok = gTok;
nextToken();
if (tokIs(TOK_HASH)) {
Node *n = newNode(NODE_GET, ln);
n->a = parseFileNumber();
tokExpect(TOK_COMMA);
n->b = parseExpr();
tokExpect(TOK_COMMA);
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected variable name in GET");
Node *v = newNode(NODE_IDENT, ln);
v->sval = strDup(gTok.sval);
v->dataType = inferVarType(gTok.sval);
nextToken();
n->c = v;
return n;
}
gSrcPos = savePos;
gLine = saveLine;
gTok = saveTok;
}
// PUT #filenum, record, variable (contextual keyword)
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "PUT") == 0) {
int savePos = gSrcPos;
int saveLine = gLine;
Token saveTok = gTok;
nextToken();
if (tokIs(TOK_HASH)) {
Node *n = newNode(NODE_PUT, ln);
n->a = parseFileNumber();
tokExpect(TOK_COMMA);
n->b = parseExpr();
tokExpect(TOK_COMMA);
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected variable name in PUT");
Node *v = newNode(NODE_IDENT, ln);
v->sval = strDup(gTok.sval);
v->dataType = inferVarType(gTok.sval);
nextToken();
n->c = v;
return n;
}
gSrcPos = savePos;
gLine = saveLine;
gTok = saveTok;
}
// RANDOMIZE [seed] (contextual keyword)
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "RANDOMIZE") == 0) {
nextToken();
Node *n = newNode(NODE_RANDOMIZE, ln);
// Optional seed expression
if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF) &&
!tokIs(TOK_ELSE)) {
n->a = parseExpr();
}
return n;
}
// MID$ assignment: MID$(s$, start, len) = replacement$
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "MID$") == 0) {
int savePos = gSrcPos;
int saveLine = gLine;
Token saveTok = gTok;
nextToken();
if (tokIs(TOK_LPAREN)) {
nextToken();
Node *n = newNode(NODE_MID_ASSIGN, ln);
// Parse target string variable
if (!tokIs(TOK_IDENT)) fatal(ln, "Expected string variable in MID$ assignment");
n->a = newNode(NODE_IDENT, ln);
n->a->sval = strDup(gTok.sval);
n->a->dataType = TYPE_STR;
nextToken();
tokExpect(TOK_COMMA);
n->b = parseExpr(); // start position
if (tokAccept(TOK_COMMA)) {
n->c = parseExpr(); // length
} else {
// No length — use large value
Node *big = newNode(NODE_INT_LIT, ln);
big->ival = 32767;
n->c = big;
}
tokExpect(TOK_RPAREN);
tokExpect(TOK_EQ);
n->d = parseExpr(); // replacement string
return n;
}
// Not MID$ assignment — restore
gSrcPos = savePos;
gLine = saveLine;
gTok = saveTok;
}
// LINE INPUT #
if (tokIs(TOK_LINE)) {
nextToken();
tokExpect(TOK_INPUT);
int lln = ln;
Node *n = newNode(NODE_LINE_INPUT, lln);
n->b = parseFileNumber();
tokExpect(TOK_COMMA);
if (!tokIs(TOK_IDENT))
fatal(lln, "Expected variable name in LINE INPUT");
Node *v = newNode(NODE_IDENT, lln);
v->sval = strDup(gTok.sval);
v->dataType = TYPE_STR;
nextToken();
n->a = v;
return n;
}
// WRITE #
if (tokIs(TOK_WRITE)) {
nextToken();
Node *n = newNode(NODE_FILE_WRITE, ln);
n->b = parseFileNumber();
tokExpect(TOK_COMMA);
Node *head = NULL, *tail = NULL;
do {
Node *e = parseExpr();
if (!head) head = tail = e;
else { tail->next = e; tail = e; }
} while (tokAccept(TOK_COMMA));
n->a = head;
return n;
}
// IF
if (tokIs(TOK_IF)) return parseIf();
// FOR
if (tokIs(TOK_FOR)) return parseFor();
// WHILE
if (tokIs(TOK_WHILE)) return parseWhile();
// DO
if (tokIs(TOK_DO)) return parseDoLoop();
// SUB
if (tokIs(TOK_SUB)) return parseSub();
// FUNCTION
if (tokIs(TOK_FUNCTION)) return parseFunction();
// LOCAL
if (tokIs(TOK_LOCAL)) return parseLocal();
// STATIC
if (tokIs(TOK_STATIC)) return parseStatic();
// GOTO
if (tokIs(TOK_GOTO)) {
nextToken();
Node *n = newNode(NODE_GOTO, ln);
if (tokIs(TOK_INT_LIT)) {
n->ival = gTok.ival;
recordGotoTarget(n->ival);
nextToken();
} else if (tokIs(TOK_IDENT)) {
n->sval = strDup(gTok.sval);
recordGotoStrTarget(n->sval);
nextToken();
} else {
fatal(ln, "Expected line number or label after GOTO");
}
return n;
}
// GOSUB
if (tokIs(TOK_GOSUB)) {
nextToken();
Node *n = newNode(NODE_GOSUB, ln);
if (tokIs(TOK_INT_LIT)) {
n->ival = gTok.ival;
recordGotoTarget(n->ival);
nextToken();
} else if (tokIs(TOK_IDENT)) {
n->sval = strDup(gTok.sval);
recordGotoStrTarget(n->sval);
nextToken();
} else {
fatal(ln, "Expected line number or label after GOSUB");
}
if (gGosubCount >= MAX_GOSUB_SITES)
fatal(ln, "Too many GOSUB sites (max %d)", MAX_GOSUB_SITES);
n->ival2 = gGosubCount++;
return n;
}
// RETURN
if (tokIs(TOK_RETURN)) {
nextToken();
Node *n = newNode(NODE_RETURN, ln);
// Optional return expression for FUNCTION
if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) {
n->a = parseExpr();
}
return n;
}
// EXIT (FOR | WHILE | DO | SUB | FUNCTION)
if (tokIs(TOK_EXIT)) {
nextToken();
Node *n = newNode(NODE_EXIT, ln);
if (tokIs(TOK_FOR)) { n->ival = TOK_FOR; nextToken(); }
else if (tokIs(TOK_WHILE)){ n->ival = TOK_WHILE; nextToken(); }
else if (tokIs(TOK_DO)) { n->ival = TOK_DO; nextToken(); }
else if (tokIs(TOK_SUB)) { n->ival = TOK_SUB; nextToken(); }
else if (tokIs(TOK_FUNCTION)){n->ival = TOK_FUNCTION;nextToken(); }
else fatal(ln, "Expected FOR, WHILE, DO, SUB, or FUNCTION after EXIT");
return n;
}
// CONTINUE (FOR | WHILE | DO) — contextual keyword
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "CONTINUE") == 0) {
nextToken();
Node *n = newNode(NODE_CONTINUE, ln);
if (tokIs(TOK_FOR)) { n->ival = TOK_FOR; nextToken(); }
else if (tokIs(TOK_WHILE)){ n->ival = TOK_WHILE; nextToken(); }
else if (tokIs(TOK_DO)) { n->ival = TOK_DO; nextToken(); }
else fatal(ln, "Expected FOR, WHILE, or DO after CONTINUE");
return n;
}
// END (program termination)
if (tokIs(TOK_END)) {
// Peek ahead: END IF / END SUB / END FUNCTION are handled by callers.
// Bare END means program exit.
// Save position to check next token
int savePos = gSrcPos;
int saveLine = gLine;
Token saveTok = gTok;
nextToken();
// If followed by IF, SUB, FUNCTION, SELECT put it back (the caller handles it)
if (tokIs(TOK_IF) || tokIs(TOK_SUB) || tokIs(TOK_FUNCTION) ||
tokIs(TOK_SELECT)) {
gSrcPos = savePos;
gLine = saveLine;
gTok = saveTok;
return NULL; // signal to caller: block terminator reached
}
// Bare END
Node *n = newNode(NODE_END, ln);
return n;
}
// CONST name = value
if (tokIs(TOK_CONST)) {
nextToken();
if (!tokIs(TOK_IDENT)) fatal(ln, "Expected identifier after CONST");
char cname[MAX_IDENT];
strncpy(cname, gTok.sval, MAX_IDENT - 1);
cname[MAX_IDENT - 1] = '\0';
nextToken();
tokExpect(TOK_EQ);
// Parse the value — must be a literal
Node *val = parseExpr();
Node *n = newNode(NODE_CONST_DECL, ln);
n->sval = strDup(cname);
n->a = val;
// Store in constant table
if (gConstCount < MAX_CONSTS) {
strncpy(gConsts[gConstCount].name, cname, MAX_IDENT - 1);
if (val->type == NODE_STR_LIT) {
gConsts[gConstCount].dataType = TYPE_STR;
strncpy(gConsts[gConstCount].strVal, val->sval, MAX_TOKEN_LEN - 1);
} else if (val->type == NODE_DBL_LIT) {
gConsts[gConstCount].dataType = TYPE_DBL;
gConsts[gConstCount].numVal = val->dval;
} else if (val->type == NODE_INT_LIT) {
gConsts[gConstCount].dataType = TYPE_INT;
gConsts[gConstCount].numVal = val->ival;
} else if (val->type == NODE_UNOP && val->ival == TOK_MINUS) {
// Handle negative constants like CONST X = -1
gConsts[gConstCount].dataType = TYPE_DBL;
if (val->a->type == NODE_INT_LIT)
gConsts[gConstCount].numVal = -(double)val->a->ival;
else
gConsts[gConstCount].numVal = -val->a->dval;
} else {
fatal(ln, "CONST value must be a literal");
}
gConstCount++;
}
return n;
}
// SWAP var1, var2 (contextual keyword)
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "SWAP") == 0) {
nextToken();
Node *n = newNode(NODE_SWAP, ln);
n->a = parseExpr();
tokExpect(TOK_COMMA);
n->b = parseExpr();
return n;
}
// SELECT CASE expr ... CASE ... END SELECT
if (tokIs(TOK_SELECT)) {
nextToken();
tokExpect(TOK_CASE);
Node *n = newNode(NODE_SELECT, ln);
n->a = parseExpr();
skipNewlines();
// Parse CASE blocks
Node *caseHead = NULL, *caseTail = NULL;
while (tokIs(TOK_CASE)) {
nextToken();
Node *cb = newNode(NODE_CASE, gLine);
// CASE ELSE
if (tokIs(TOK_ELSE)) {
nextToken();
cb->ival = 1; // flag: CASE ELSE
} else {
// Parse comma-separated values/ranges
Node *valHead = NULL, *valTail = NULL;
for (;;) {
Node *v;
// CASE IS >/</>=/<=/<>/= expr
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "IS") == 0) {
nextToken();
// Expect a comparison operator
int op = gTok.type;
if (op != TOK_EQ && op != TOK_NE && op != TOK_LT &&
op != TOK_GT && op != TOK_LE && op != TOK_GE)
fatal(gLine, "Expected comparison operator after IS");
nextToken();
v = newNode(NODE_BINOP, gLine);
v->ival = op;
v->a = NULL; // placeholder: test expr filled at codegen
v->b = parseExpr();
v->ival2 = 1; // flag: IS comparison
} else {
v = parseExpr();
// Check for TO (range)
if (tokIs(TOK_TO)) {
nextToken();
Node *range = newNode(NODE_BINOP, gLine);
range->ival = TOK_TO; // reuse TO token as range marker
range->a = v;
range->b = parseExpr();
range->ival2 = 2; // flag: range
v = range;
}
}
if (!valHead) { valHead = valTail = v; }
else { valTail->next = v; valTail = v; }
if (!tokAccept(TOK_COMMA)) break;
}
cb->a = valHead;
}
skipNewlines();
// Parse body until next CASE or END SELECT
Node *bodyHead = NULL, *bodyTail = NULL;
while (!tokIs(TOK_CASE) && !tokIs(TOK_END) && !tokIs(TOK_EOF)) {
Node *s = parseStatement();
if (!s) break;
if (!bodyHead) { bodyHead = bodyTail = s; }
else { bodyTail->next = s; bodyTail = s; }
skipNewlines();
}
cb->b = bodyHead;
if (!caseHead) { caseHead = caseTail = cb; }
else { caseTail->next = cb; caseTail = cb; }
}
// Expect END SELECT
tokExpect(TOK_END);
tokExpect(TOK_SELECT);
n->b = caseHead;
return n;
}
// ON expr GOTO/GOSUB label1, label2, ...
if (tokIs(TOK_ON)) {
nextToken();
Node *expr = parseExpr();
if (tokIs(TOK_GOTO)) {
nextToken();
Node *n = newNode(NODE_ON_GOTO, ln);
n->a = expr;
// Parse comma-separated labels
Node *labHead = NULL, *labTail = NULL;
for (;;) {
Node *lab;
if (tokIs(TOK_INT_LIT)) {
lab = newNode(NODE_INT_LIT, gLine);
lab->ival = gTok.ival;
recordGotoTarget(lab->ival);
nextToken();
} else if (tokIs(TOK_IDENT)) {
lab = newNode(NODE_IDENT, gLine);
lab->sval = strDup(gTok.sval);
recordGotoStrTarget(lab->sval);
nextToken();
} else {
fatal(gLine, "Expected label in ON GOTO");
}
if (!labHead) { labHead = labTail = lab; }
else { labTail->next = lab; labTail = lab; }
if (!tokAccept(TOK_COMMA)) break;
}
n->b = labHead;
return n;
} else if (tokIs(TOK_GOSUB)) {
nextToken();
Node *n = newNode(NODE_ON_GOSUB, ln);
n->a = expr;
n->ival2 = gGosubCount; // first return-point id
// Parse comma-separated labels
Node *labHead = NULL, *labTail = NULL;
for (;;) {
Node *lab;
if (tokIs(TOK_INT_LIT)) {
lab = newNode(NODE_INT_LIT, gLine);
lab->ival = gTok.ival;
recordGotoTarget(lab->ival);
nextToken();
} else if (tokIs(TOK_IDENT)) {
lab = newNode(NODE_IDENT, gLine);
lab->sval = strDup(gTok.sval);
recordGotoStrTarget(lab->sval);
nextToken();
} else {
fatal(gLine, "Expected label in ON GOSUB");
}
gGosubCount++; // allocate return-point id for each target
if (!labHead) { labHead = labTail = lab; }
else { labTail->next = lab; labTail = lab; }
if (!tokAccept(TOK_COMMA)) break;
}
n->b = labHead;
return n;
} else {
fatal(ln, "Expected GOTO or GOSUB after ON expression");
}
}
// CALL name(args)
if (tokIs(TOK_CALL)) {
nextToken();
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected subroutine name after CALL");
Node *n = newNode(NODE_CALL, ln);
n->sval = strDup(gTok.sval);
nextToken();
// Parse argument list
Node *args = NULL, *atail = NULL;
if (tokAccept(TOK_LPAREN)) {
while (!tokIs(TOK_RPAREN) && !tokIs(TOK_EOF)) {
Node *arg = parseExpr();
if (!args) args = atail = arg;
else { atail->next = arg; atail = arg; }
if (!tokAccept(TOK_COMMA)) break;
}
tokExpect(TOK_RPAREN);
}
n->a = args;
return n;
}
// LET assignment or implicit assignment/call
if (tokIs(TOK_LET)) nextToken(); // consume optional LET
if (tokIs(TOK_IDENT)) {
char name[MAX_TOKEN_LEN];
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
nextToken();
// Array element assignment or sub call: name(args) [= expr]
if (tokIs(TOK_LPAREN)) {
nextToken();
Node *idx = parseExpr();
Node *tail = idx;
while (tokAccept(TOK_COMMA)) {
Node *dimIdx = parseExpr();
tail->next = dimIdx;
tail = dimIdx;
}
tokExpect(TOK_RPAREN);
// Check for array-element dot-access assignment: arr(i).field[.field...] = expr
if (tokIs(TOK_DOT)) {
Symbol *s = symLookup(name);
if (s && s->dataType == TYPE_UDT) {
Node *cur = newNode(NODE_ARRAY_REF, ln);
cur->sval = strDup(name);
cur->a = idx;
cur->dataType = TYPE_UDT;
cur->ival2 = s->udtIndex;
int curUdt = s->udtIndex;
while (curUdt >= 0 && tokIs(TOK_DOT)) {
nextToken();
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected field name after '.'");
int fi = udtFieldLookup(curUdt, gTok.sval);
if (fi < 0)
fatal(ln, "Unknown field '%s'", gTok.sval);
Node *dot = newNode(NODE_DOT_ACCESS, ln);
dot->a = cur;
dot->sval = strDup(gTok.sval);
dot->ival2 = curUdt;
UdtField *uf = &gUdts[curUdt].fields[fi];
dot->dataType = uf->dataType;
if (uf->dataType == TYPE_STR && uf->strLen > 0)
dot->ival = uf->strLen;
cur = dot;
curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1;
nextToken();
}
tokExpect(TOK_EQ);
Node *val = parseExpr();
Node *n = newNode(NODE_ASSIGN, ln);
n->a = cur;
n->b = val;
return n;
}
}
if (tokAccept(TOK_EQ)) {
// Array element assignment: name(i, j, ...) = expr
Node *val = parseExpr();
Node *target = newNode(NODE_ARRAY_REF, ln);
target->sval = strDup(name);
target->a = idx;
target->dataType = inferVarType(name);
Node *n = newNode(NODE_ASSIGN, ln);
n->a = target;
n->b = val;
return n;
}
// Not an assignment must be a sub call: name(args)
Node *n = newNode(NODE_CALL, ln);
n->sval = strDup(name);
n->a = idx;
return n;
}
// Dot-access assignment: var.field[.field...] = expr
if (tokIs(TOK_DOT)) {
Symbol *s = symLookup(name);
if (s && s->dataType == TYPE_UDT) {
Node *cur = newNode(NODE_IDENT, ln);
cur->sval = strDup(name);
cur->dataType = TYPE_UDT;
int curUdt = s->udtIndex;
while (curUdt >= 0 && tokIs(TOK_DOT)) {
nextToken();
if (!tokIs(TOK_IDENT))
fatal(ln, "Expected field name after '.'");
int fi = udtFieldLookup(curUdt, gTok.sval);
if (fi < 0)
fatal(ln, "Unknown field '%s' in type '%s'",
gTok.sval, gUdts[curUdt].name);
Node *dot = newNode(NODE_DOT_ACCESS, ln);
dot->a = cur;
dot->sval = strDup(gTok.sval);
dot->ival2 = curUdt;
UdtField *uf = &gUdts[curUdt].fields[fi];
dot->dataType = uf->dataType;
if (uf->dataType == TYPE_STR && uf->strLen > 0)
dot->ival = uf->strLen;
cur = dot;
curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1;
nextToken();
}
tokExpect(TOK_EQ);
Node *val = parseExpr();
Node *n = newNode(NODE_ASSIGN, ln);
n->a = cur;
n->b = val;
return n;
}
}
// Simple variable assignment: name = expr
if (tokAccept(TOK_EQ)) {
Node *val = parseExpr();
Node *target = newNode(NODE_IDENT, ln);
target->sval = strDup(name);
target->dataType = inferVarType(name);
Node *n = newNode(NODE_ASSIGN, ln);
n->a = target;
n->b = val;
return n;
}
// Implicit sub call without CALL keyword: name arg1, arg2, ...
Node *n = newNode(NODE_CALL, ln);
n->sval = strDup(name);
Node *args = NULL, *atail = NULL;
if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) {
Node *arg = parseExpr();
args = atail = arg;
while (tokAccept(TOK_COMMA)) {
arg = parseExpr();
atail->next = arg;
atail = arg;
}
}
n->a = args;
return n;
}
fatal(ln, "Unexpected token '%s' (type %d)", gTok.sval, gTok.type);
return NULL;
}
// Parse a block of statements until one of the terminator tokens is seen.
// Returns a NODE_BLOCK containing the linked list of statements.
static Node *parseBlock(TokenType end1, TokenType end2, TokenType end3) {
Node *blk = newNode(NODE_BLOCK, gTok.line);
Node *head = NULL, *tail = NULL;
while (!tokIs(TOK_EOF)) {
skipEol();
if (tokIs(end1) || tokIs(end2) || tokIs(end3))
break;
if (tokIs(TOK_EOF)) break;
// Check for END followed by IF/SUB/FUNCTION as block terminator
if (tokIs(TOK_END)) {
int savePos = gSrcPos;
int saveLine = gLine;
Token saveTok = gTok;
nextToken();
if (tokIs(TOK_IF) || tokIs(TOK_SUB) || tokIs(TOK_FUNCTION)) {
// Restore the caller will handle END IF/SUB/FUNCTION
gSrcPos = savePos;
gLine = saveLine;
gTok = saveTok;
break;
}
// Restore and let parseStatement handle bare END
gSrcPos = savePos;
gLine = saveLine;
gTok = saveTok;
}
Node *stmt = parseStatement();
if (!stmt) break; // NULL signals block terminator
// Flatten: if statement has a ->next chain (e.g., label + stmt),
// append the entire chain
if (!head) head = tail = stmt;
else { tail->next = stmt; }
while (tail->next) tail = tail->next;
}
blk->a = head;
return blk;
}
// Parse the entire program
static Node *parseProgram(void) {
nextToken(); // prime the first token
Node *blk = parseBlock(TOK_EOF, TOK_EOF, TOK_EOF);
Node *prog = newNode(NODE_PROGRAM, 1);
prog->a = blk;
return prog;
}
// -----------------------------------------------------------------------
// Section 8: Code Generator
//
// Walks the AST and emits C source code. The output includes:
// - A runtime library for strings, file I/O, and dynamic arrays
// (debug or release variant based on --release flag)
// - Packed struct definitions for user-defined types
// - A static DATA pool for DATA/READ/RESTORE
// - Forward declarations for SUBs and FUNCTIONs
// - SUB/FUNCTION implementations as C functions
// - A main() function containing global code
// -----------------------------------------------------------------------
// Forward declarations
static void genExpr(Node *n);
static void genStmt(Node *n);
static void genArrayFlatIndex(const char *name, Node *indices);
static void genBlock(Node *blk);
// Return the C type string for a BASIC data type
static const char *cTypeStr(DataType dt) {
switch (dt) {
case TYPE_BYTE: return "uint8_t";
case TYPE_INT: return "int16_t";
case TYPE_LONG: return "int32_t";
case TYPE_FLOAT: return "float";
case TYPE_DBL: return "double";
case TYPE_STR: return "char*";
default: return "void";
}
}
// Return the C struct type string for a UDT (uses rotating buffer)
static const char *cUdtTypeStr(int udtIndex) {
static char bufs[4][MAX_IDENT + 16];
static int bi = 0;
char *buf = bufs[bi++ & 3];
if (udtIndex >= 0 && udtIndex < gUdtCount)
snprintf(buf, MAX_IDENT + 16, "struct _b_%s", cleanName(gUdts[udtIndex].name));
else
snprintf(buf, MAX_IDENT + 16, "void");
return buf;
}
// Return a C default-value expression for a data type
static const char *cDefaultVal(DataType dt) {
switch (dt) {
case TYPE_BYTE: return "0";
case TYPE_INT: return "0";
case TYPE_LONG: return "0";
case TYPE_FLOAT: return "0.0f";
case TYPE_DBL: return "0.0";
case TYPE_STR: return "_bstr(\"\")";
default: return "0";
}
}
// Return a printf format specifier for a data type.
// int16_t is promoted to int in varargs so %d is safe.
// int32_t is int on all modern platforms so %d works.
static const char *cFmt(DataType dt) {
switch (dt) {
case TYPE_BYTE: return "%u";
case TYPE_INT: return "%d";
case TYPE_LONG: return "%d";
case TYPE_FLOAT: return "%g";
case TYPE_DBL: return "%g";
case TYPE_STR: return "%s";
default: return "%d";
}
}
// Return a scanf format specifier for a data type
static const char *cScanfFmt(DataType dt) {
switch (dt) {
case TYPE_BYTE: return "%hhu"; // uint8_t
case TYPE_INT: return "%hd"; // int16_t needs short format
case TYPE_LONG: return "%d"; // int32_t
case TYPE_FLOAT: return "%f";
case TYPE_DBL: return "%lf";
default: return "%hd";
}
}
// Determine if an expression node produces a string type
static int isStringExpr(Node *n) {
if (!n) return 0;
return n->dataType == TYPE_STR;
}
// Check if a name corresponds to a built-in BASIC function that returns
// a string. Names ending in '$' are string functions.
static int isBuiltinStrFunc(const char *name) {
// Check external functions first
ExternFunc *ef = externFuncLookup(name);
if (ef) return ef->returnType == TYPE_STR;
// Check compile-time builtins
const BuiltinDef *bd = builtinDefLookup(name);
if (bd) return bd->returnType == TYPE_STR;
// Functions with special handling in genBuiltinCall
return (strIcmp(name, "MID$") == 0 ||
strIcmp(name, "LEFT$") == 0 ||
strIcmp(name, "RIGHT$") == 0 ||
strIcmp(name, "STRING$") == 0);
}
// Check if a name is a built-in BASIC function
static int isBuiltinFunc(const char *name) {
// Check external functions
if (externFuncLookup(name)) return 1;
// Check compile-time builtins
if (builtinDefLookup(name)) return 1;
// Functions with special handling in genBuiltinCall
return (isBuiltinStrFunc(name) ||
strIcmp(name, "LEN") == 0 ||
strIcmp(name, "VAL") == 0 ||
strIcmp(name, "ASC") == 0 ||
strIcmp(name, "INT") == 0 ||
strIcmp(name, "ABS") == 0 ||
strIcmp(name, "INSTR") == 0 ||
strIcmp(name, "EOF") == 0 ||
strIcmp(name, "LOF") == 0 ||
strIcmp(name, "FREEFILE") == 0 ||
strIcmp(name, "LBOUND") == 0 ||
strIcmp(name, "UBOUND") == 0);
}
// Count the number of nodes in a linked list
static int countList(Node *n) {
int c = 0;
while (n) { c++; n = n->next; }
return c;
}
// Generate code for a built-in function call
static void genBuiltinCall(const char *name, Node *args) {
int argc = countList(args);
// Functions with special handling (validation, multiple args, etc.)
if (strIcmp(name, "LEN") == 0) {
emitRaw("((int)strlen(");
genExpr(args);
emitRaw("))");
} else if (strIcmp(name, "VAL") == 0) {
emitRaw("atof(");
genExpr(args);
emitRaw(")");
} else if (strIcmp(name, "ASC") == 0) {
emitRaw("((int)(unsigned char)(");
genExpr(args);
emitRaw(")[0])");
} else if (strIcmp(name, "INT") == 0) {
emitRaw("((int)(");
genExpr(args);
emitRaw("))");
} else if (strIcmp(name, "ABS") == 0) {
emitRaw("_babs(");
genExpr(args);
emitRaw(")");
} else if (strIcmp(name, "MID$") == 0) {
if (argc < 2) fatal(0, "MID$ requires at least 2 arguments");
emitRaw("_bmid(");
genExpr(args);
emitRaw(", ");
genExpr(args->next);
if (argc >= 3 && args->next->next) {
emitRaw(", ");
genExpr(args->next->next);
} else {
emitRaw(", -1");
}
emitRaw(")");
} else if (strIcmp(name, "LEFT$") == 0) {
if (argc < 2) fatal(0, "LEFT$ requires 2 arguments");
emitRaw("_bleft(");
genExpr(args);
emitRaw(", ");
genExpr(args->next);
emitRaw(")");
} else if (strIcmp(name, "RIGHT$") == 0) {
if (argc < 2) fatal(0, "RIGHT$ requires 2 arguments");
emitRaw("_bright(");
genExpr(args);
emitRaw(", ");
genExpr(args->next);
emitRaw(")");
} else if (strIcmp(name, "INSTR") == 0) {
if (argc < 2) fatal(0, "INSTR requires 2 arguments");
emitRaw("_binstr(");
genExpr(args);
emitRaw(", ");
genExpr(args->next);
emitRaw(")");
} else if (strIcmp(name, "STRING$") == 0) {
if (argc < 2) fatal(0, "STRING$ requires 2 arguments");
emitRaw("_bstring_rep("); genExpr(args); emitRaw(", ");
genExpr(args->next); emitRaw(")");
} else if (strIcmp(name, "EOF") == 0) {
emitRaw("_beof(");
genExpr(args);
emitRaw(")");
} else if (strIcmp(name, "LOF") == 0) {
emitRaw("_blof(");
genExpr(args);
emitRaw(")");
} else if (strIcmp(name, "FREEFILE") == 0) {
emitRaw("_bfreefile()");
// --- Array functions (need special codegen) ---
} else if (strIcmp(name, "LBOUND") == 0) {
emitRaw("0");
} else if (strIcmp(name, "UBOUND") == 0) {
if (args && args->type == NODE_IDENT) {
emitRaw("(%s_size - 1)", cleanName(args->sval));
} else {
fatal(0, "UBOUND requires an array name");
}
} else {
// Check external function definitions and compile-time builtins
const char *tmpl = NULL;
ExternFunc *ef = externFuncLookup(name);
if (ef) {
tmpl = ef->cCode;
} else {
const BuiltinDef *bd = builtinDefLookup(name);
if (bd) tmpl = bd->cCode;
}
if (tmpl) {
// Expand template: % = first arg, %1 %2 etc = numbered args
const char *t = tmpl;
while (*t) {
if (*t == '%') {
t++;
int argNum = 0;
if (*t >= '1' && *t <= '9') {
argNum = *t - '1';
t++;
}
// Find the nth argument
Node *arg = args;
for (int i = 0; i < argNum && arg; i++)
arg = arg->next;
if (arg) genExpr(arg);
else emitRaw("0"); // missing arg
} else {
char c[2] = {*t, '\0'};
emitRaw("%s", c);
t++;
}
}
} else {
// Unknown builtin just emit as-is
emitRaw("%s(", cleanName(name));
for (Node *a = args; a; a = a->next) {
if (a != args) emitRaw(", ");
genExpr(a);
}
emitRaw(")");
}
}
}
// Generate code for an expression node
static void genExpr(Node *n) {
if (!n) { emitRaw("0"); return; }
switch (n->type) {
case NODE_INT_LIT:
emitRaw("%d", n->ival);
break;
case NODE_DBL_LIT:
emitRaw("%g", n->dval);
break;
case NODE_STR_LIT:
// Emit as a C string literal
emitRaw("\"");
for (const char *p = n->sval; p && *p; p++) {
if (*p == '"') emitRaw("\\\"");
else if (*p == '\\') emitRaw("\\\\");
else if (*p == '\n') emitRaw("\\n");
else if (*p == '\t') emitRaw("\\t");
else emitRaw("%c", *p);
}
emitRaw("\"");
break;
case NODE_IDENT: {
const char *cn = cleanName(n->sval);
// Inside a function, check if this is the function name (return var)
if (gInFunc && gFuncName && strIcmp(n->sval, gFuncName) == 0) {
emitRaw("%s_ret", cn);
} else if (isByrefParam(n->sval)) {
// BYREF parameter: dereference the pointer
emitRaw("(*%s)", cn);
} else {
emitRaw("%s", cn);
}
break;
}
case NODE_ARRAY_REF:
emitRaw("%s[", cleanName(n->sval));
genArrayFlatIndex(n->sval, n->a);
emitRaw("]");
break;
case NODE_DOT_ACCESS:
// base.field — base is in n->a, field name in n->sval
genExpr(n->a);
emitRaw(".%s", cleanName(n->sval));
break;
case NODE_UNOP:
if (n->ival == TOK_MINUS) {
emitRaw("(-(");
genExpr(n->a);
emitRaw("))");
} else if (n->ival == TOK_NOT) {
// If operand is a comparison, use logical NOT for cleaner code
int isCmp = (n->a->type == NODE_BINOP &&
(n->a->ival == TOK_EQ || n->a->ival == TOK_NE ||
n->a->ival == TOK_LT || n->a->ival == TOK_GT ||
n->a->ival == TOK_LE || n->a->ival == TOK_GE ||
n->a->ival == TOK_AND || n->a->ival == TOK_OR));
if (isCmp || n->a->type == NODE_UNOP) {
emitRaw("(!(");
genExpr(n->a);
emitRaw("))");
} else {
emitRaw("(~(int)(");
genExpr(n->a);
emitRaw("))");
}
}
break;
case NODE_BINOP: {
int op = n->ival;
// String concatenation
if (n->dataType == TYPE_STR && (op == TOK_PLUS || op == TOK_AMP)) {
emitRaw("_bconcat(");
genExpr(n->a);
emitRaw(", ");
genExpr(n->b);
emitRaw(")");
break;
}
// String comparison
if (isStringExpr(n->a) && isStringExpr(n->b)) {
const char *cmpOp;
switch (op) {
case TOK_EQ: cmpOp = "==0"; break;
case TOK_NE: cmpOp = "!=0"; break;
case TOK_LT: cmpOp = "<0"; break;
case TOK_GT: cmpOp = ">0"; break;
case TOK_LE: cmpOp = "<=0"; break;
case TOK_GE: cmpOp = ">=0"; break;
default: cmpOp = "==0"; break;
}
emitRaw("(strcmp(");
genExpr(n->a);
emitRaw(", ");
genExpr(n->b);
emitRaw(")%s)", cmpOp);
break;
}
// Power operator: emit as pow() call
if (op == TOK_CARET) {
emitRaw("pow(");
genExpr(n->a);
emitRaw(", ");
genExpr(n->b);
emitRaw(")");
break;
}
// Integer division: cast operands to int
if (op == TOK_BSLASH) {
emitRaw("((int)(");
genExpr(n->a);
emitRaw(") / (int)(");
genExpr(n->b);
emitRaw("))");
break;
}
// Float division: BASIC '/' always produces a floating-point result
if (op == TOK_SLASH) {
emitRaw("((double)(");
genExpr(n->a);
emitRaw(") / (double)(");
genExpr(n->b);
emitRaw("))");
break;
}
// All other numeric and logical binary operators
emitRaw("(");
genExpr(n->a);
switch (op) {
case TOK_PLUS: emitRaw(" + "); break;
case TOK_MINUS: emitRaw(" - "); break;
case TOK_STAR: emitRaw(" * "); break;
case TOK_MOD: emitRaw(" %% "); break;
case TOK_EQ: emitRaw(" == "); break;
case TOK_NE: emitRaw(" != "); break;
case TOK_LT: emitRaw(" < "); break;
case TOK_GT: emitRaw(" > "); break;
case TOK_LE: emitRaw(" <= "); break;
case TOK_GE: emitRaw(" >= "); break;
case TOK_AND: emitRaw(" & "); break;
case TOK_OR: emitRaw(" | "); break;
case TOK_XOR: emitRaw(" ^ "); break;
default: emitRaw(" ? "); break;
}
genExpr(n->b);
emitRaw(")");
break;
}
case NODE_FUNC_CALL:
// SIZEOF(TypeName) — emit sizeof(struct _b_TypeName)
if (strIcmp(n->sval, "SIZEOF") == 0 && n->a &&
n->a->type == NODE_IDENT) {
int ui = udtLookup(n->a->sval);
if (ui >= 0) {
emitRaw("(long)sizeof(%s)", cUdtTypeStr(ui));
break;
}
}
if (isBuiltinFunc(n->sval)) {
genBuiltinCall(n->sval, n->a);
} else {
emitRaw("%s(", cleanName(n->sval));
// Generate arguments, applying BYREF (&) where needed
Symbol *fsym = symLookup(n->sval);
int pi = 0;
for (Node *a = n->a; a; a = a->next, pi++) {
if (a != n->a) emitRaw(", ");
int needRef = (fsym && pi < fsym->paramCount &&
fsym->paramModes[pi] == PASS_BYREF);
if (needRef && a->type == NODE_IDENT) {
emitRaw("&%s", cleanName(a->sval));
} else if (needRef && a->type == NODE_ARRAY_REF) {
emitRaw("&%s[", cleanName(a->sval));
genArrayFlatIndex(a->sval, a->a);
emitRaw("]");
} else {
genExpr(a);
}
}
emitRaw(")");
}
break;
default:
emitRaw("/* unknown expr node %d */0", n->type);
break;
}
}
// Generate a variable declaration in C
static void genVarDecl(const char *name, DataType dt, int isStatic) {
const char *cn = cleanName(name);
if (isStatic) emit("static ");
else emit("");
if (dt == TYPE_STR)
emitRaw("char *%s _BUNUSED = _bstr(\"\");\n", cn);
else
emitRaw("%s %s _BUNUSED = %s;\n", cTypeStr(dt), cn, cDefaultVal(dt));
}
// Emit a row-major flattened index for multidimensional array access.
// For 1D, just emits the single index expression (backward compatible).
// For nD, emits: ((i0) * nameDim1 + (i1)) * nameDim2 + (i2) ...
static void genArrayFlatIndex(const char *name, Node *indices) {
char cn[MAX_IDENT];
strncpy(cn, cleanName(name), MAX_IDENT - 1);
cn[MAX_IDENT - 1] = '\0';
// Count dimensions
int ndims = 0;
for (Node *p = indices; p; p = p->next) ndims++;
if (ndims <= 1) {
genExpr(indices);
} else {
// Row-major: fold left: acc = idx[0], for k=1..n-1: acc = acc * dimK + idx[k]
// For 3D: (((i) * dim1 + (j)) * dim2 + (k))
Node *idx = indices;
// Emit opening parens for nesting: need (ndims-1) wrapping levels
for (int i = 1; i < ndims; i++) emitRaw("(");
emitRaw("(");
genExpr(idx);
emitRaw(")");
idx = idx->next;
int dimIdx = 1;
while (idx) {
emitRaw(" * %s_dim%d + (", cn, dimIdx);
genExpr(idx);
emitRaw("))");
idx = idx->next;
dimIdx++;
}
}
}
// Generate a DIM array declaration (supports multidimensional)
static void genDimArray(const char *name, DataType dt, Node *sizeList,
int ndims) {
char cn[MAX_IDENT];
strncpy(cn, cleanName(name), MAX_IDENT - 1);
cn[MAX_IDENT - 1] = '\0';
emit("%s *%s _BUNUSED = NULL;\n", cTypeStr(dt), cn);
if (ndims <= 1) {
// 1D: backward-compatible
emit("int %s_size _BUNUSED = 0;\n", cn);
if (sizeList) {
emit("%s_size = (", cn);
genExpr(sizeList);
emitRaw(") + 1;\n");
emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n",
cn, cTypeStr(dt), cn, cTypeStr(dt));
if (dt == TYPE_STR) {
emit("for (int _i = 0; _i < %s_size; _i++) %s[_i] = _bstr(\"\");\n",
cn, cn);
}
}
} else {
// Multi-dimensional
Node *dim = sizeList;
for (int i = 0; i < ndims; i++, dim = dim->next) {
emit("int %s_dim%d _BUNUSED = 0;\n", cn, i);
}
emit("int %s_size _BUNUSED = 0;\n", cn);
dim = sizeList;
for (int i = 0; i < ndims; i++, dim = dim->next) {
emit("%s_dim%d = (", cn, i);
genExpr(dim);
emitRaw(") + 1;\n");
}
emit("%s_size = ", cn);
for (int i = 0; i < ndims; i++) {
if (i > 0) emitRaw(" * ");
emitRaw("%s_dim%d", cn, i);
}
emitRaw(";\n");
emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n",
cn, cTypeStr(dt), cn, cTypeStr(dt));
if (dt == TYPE_STR) {
emit("for (int _i = 0; _i < %s_size; _i++) %s[_i] = _bstr(\"\");\n",
cn, cn);
}
}
}
// Generate code for a PRINT statement
static void genPrint(Node *pr) {
Node *item = pr->a;
// Empty PRINT: just a newline
if (!item) {
emit("printf(\"\\n\");\n");
return;
}
// Build printf call with format string and arguments
emit("printf(\"");
// First pass: build format string
for (Node *it = item; it; it = it->next) {
if (it->a) {
emitRaw("%s", cFmt(it->a->dataType));
}
// Separator
if (it->ival == 1) {
// semicolon: no separator
} else if (it->ival == 2) {
emitRaw("\\t"); // comma: tab
} else if (!it->next) {
// Last item with no trailing separator: add newline
emitRaw("\\n");
}
}
emitRaw("\"");
// Second pass: arguments
for (Node *it = item; it; it = it->next) {
if (it->a) {
emitRaw(", ");
genExpr(it->a);
}
}
emitRaw(");\n");
// Free temporary strings created during expression evaluation
emit("_bfree_temps();\n");
}
// Generate code for a PRINT USING statement
static void genPrintUsing(Node *pu) {
// Initialize format parser with format string
emit("_busing_init(");
genExpr(pu->a);
emitRaw(");\n");
// Format and print each value
for (Node *val = pu->b; val; val = val->next) {
if (val->dataType == TYPE_STR) {
emit("_busing_str(");
} else {
emit("_busing_num(");
}
genExpr(val);
emitRaw(");\n");
}
// Print newline and cleanup
emit("_busing_end();\n");
emit("_bfree_temps();\n");
}
// Generate code for an INPUT statement
static void genInput(Node *inp) {
// Print prompt if any
if (inp->sval) {
emit("printf(\"%%s\", \"%s\");\n", inp->sval);
} else {
emit("printf(\"? \");\n");
}
emit("fflush(stdout);\n");
// Read each variable
for (Node *v = inp->a; v; v = v->next) {
if (v->dataType == TYPE_STR) {
emit("{ char _buf[1024]; if(fgets(_buf, sizeof(_buf), stdin)) {\n");
gIndent++;
emit("_buf[strcspn(_buf, \"\\n\")] = 0;\n");
emit("_bstr_assign(&%s, _buf);\n", cleanName(v->sval));
gIndent--;
emit("} }\n");
} else {
emit("scanf(\"%s\", &%s);\n",
cScanfFmt(v->dataType), cleanName(v->sval));
}
}
}
// Generate code for an assignment statement
static void genAssign(Node *n) {
Node *target = n->a;
Node *value = n->b;
// Check if we're assigning to the function return variable
if (gInFunc && gFuncName && target->type == NODE_IDENT &&
strIcmp(target->sval, gFuncName) == 0) {
const char *cn = cleanName(target->sval);
if (gFuncRet == TYPE_STR) {
emit("_bstr_assign(&%s_ret, ", cn);
genExpr(value);
emitRaw(");\n");
} else {
emit("%s_ret = ", cn);
genExpr(value);
emitRaw(";\n");
}
emit("_bfree_temps();\n");
return;
}
// Dot-access assignment: var.field = expr
if (target->type == NODE_DOT_ACCESS) {
int strLen = target->ival; // >0 for fixed-length STRING * N
if (target->dataType == TYPE_STR && strLen > 0) {
// Fixed-length string: strncpy + null terminate
emit("strncpy(");
genExpr(target->a);
emitRaw(".%s, ", cleanName(target->sval));
genExpr(value);
emitRaw(", %d);\n", strLen);
emit("");
genExpr(target->a);
emitRaw(".%s[%d] = '\\0';\n", cleanName(target->sval), strLen);
} else if (target->dataType == TYPE_STR) {
// Dynamic string in struct — unusual but handle it
emit("_bstr_assign(&(");
genExpr(target->a);
emitRaw(".%s), ", cleanName(target->sval));
genExpr(value);
emitRaw(");\n");
} else {
// Numeric field
emit("");
genExpr(target->a);
emitRaw(".%s = ", cleanName(target->sval));
genExpr(value);
emitRaw(";\n");
}
emit("_bfree_temps();\n");
return;
}
// Check if target is a BYREF parameter (needs pointer dereference)
int byref = (target->type == NODE_IDENT && isByrefParam(target->sval));
// String assignment uses _bstr_assign
if (target->dataType == TYPE_STR || isStringExpr(value)) {
if (target->type == NODE_ARRAY_REF) {
emit("_bstr_assign(&%s[", cleanName(target->sval));
genArrayFlatIndex(target->sval, target->a);
emitRaw("], ");
} else if (byref) {
emit("_bstr_assign(%s, ", cleanName(target->sval));
} else {
emit("_bstr_assign(&%s, ", cleanName(target->sval));
}
genExpr(value);
emitRaw(");\n");
} else {
// Numeric assignment
if (target->type == NODE_ARRAY_REF) {
emit("%s[", cleanName(target->sval));
genArrayFlatIndex(target->sval, target->a);
emitRaw("] = ");
} else if (byref) {
emit("(*%s) = ", cleanName(target->sval));
} else {
emit("%s = ", cleanName(target->sval));
}
genExpr(value);
emitRaw(";\n");
}
emit("_bfree_temps();\n");
}
// Generate a SUB or FUNCTION definition
static void genFuncDef(Node *n) {
int isFunc = (n->type == NODE_FUNC);
// Store a permanent copy of the clean function name so it survives
// additional cleanName() calls during parameter/body emission
char fname[MAX_IDENT];
strncpy(fname, cleanName(n->sval), MAX_IDENT - 1);
fname[MAX_IDENT - 1] = '\0';
DataType ret = isFunc ? n->dataType : TYPE_VOID;
// Save and set function context
int prevInFunc = gInFunc;
const char *prevFuncName = gFuncName;
DataType prevFuncRet = gFuncRet;
gInFunc = 1;
gFuncName = n->sval;
gFuncRet = ret;
// Function signature
emitRaw("%s %s(", cTypeStr(ret), fname);
int first = 1;
for (Node *p = n->a; p; p = p->next) {
if (!first) emitRaw(", ");
first = 0;
if (p->ival == PASS_BYREF) {
emitRaw("%s *%s", cTypeStr(p->dataType), cleanName(p->sval));
} else {
if (p->dataType == TYPE_STR)
emitRaw("const char *%s", cleanName(p->sval));
else
emitRaw("%s %s", cTypeStr(p->dataType), cleanName(p->sval));
}
}
if (first) emitRaw("void"); // no params
emitRaw(") {\n");
gIndent++;
// For FUNCTION: declare the return variable (named <FuncName>_ret)
if (isFunc) {
emit("%s %s_ret = %s;\n", cTypeStr(ret), fname, cDefaultVal(ret));
}
// Generate body
if (n->b) genBlock(n->b);
// Return statement for FUNCTION
if (isFunc) {
emit("return %s_ret;\n", fname);
}
gIndent--;
emitRaw("}\n\n");
// Restore context
gInFunc = prevInFunc;
gFuncName = prevFuncName;
gFuncRet = prevFuncRet;
}
// Generate code for a single statement
static void genStmt(Node *n) {
if (!n) return;
switch (n->type) {
case NODE_LABEL:
// Only emit C labels that are actually targeted by GOTO/GOSUB,
// to avoid -Wunused-label warnings.
if (n->sval) {
// Named label
if (isGotoStrTarget(n->sval))
emitRaw("%s: ;\n", cleanName(n->sval));
} else if (isGotoTarget(n->ival)) {
emitRaw("L%d: ;\n", n->ival);
}
break;
case NODE_TYPE_DEF:
// TYPE definitions are emitted globally in generate(), not here
break;
case NODE_DIM:
if (n->dataType == TYPE_UDT && n->ival == 0) {
// UDT scalar: struct _b_Name var; memset(&var, 0, sizeof(var));
const char *uts = cUdtTypeStr(n->ival2);
char cn[MAX_IDENT];
strncpy(cn, cleanName(n->sval), MAX_IDENT - 1);
cn[MAX_IDENT - 1] = '\0';
emit("%s %s _BUNUSED;\n", uts, cn);
emit("memset(&%s, 0, sizeof(%s));\n", cn, cn);
} else if (n->dataType == TYPE_UDT && n->ival > 0) {
// UDT array
const char *uts = cUdtTypeStr(n->ival2);
char cn[MAX_IDENT];
strncpy(cn, cleanName(n->sval), MAX_IDENT - 1);
cn[MAX_IDENT - 1] = '\0';
emit("%s *%s _BUNUSED = NULL;\n", uts, cn);
emit("int %s_size _BUNUSED = 0;\n", cn);
// Compute size and allocate
if (n->a) {
if (n->ival <= 1) {
emit("%s_size = (", cn);
genExpr(n->a);
emitRaw(") + 1;\n");
} else {
Node *dim = n->a;
for (int i = 0; i < n->ival; i++, dim = dim->next) {
emit("int %s_dim%d _BUNUSED = (", cn, i);
genExpr(dim);
emitRaw(") + 1;\n");
}
emit("%s_size = ", cn);
for (int i = 0; i < n->ival; i++) {
if (i > 0) emitRaw(" * ");
emitRaw("%s_dim%d", cn, i);
}
emitRaw(";\n");
}
emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n",
cn, uts, cn, uts);
}
} else if (n->ival) { // array -- n->ival is ndims
genDimArray(n->sval, n->dataType, n->a, n->ival);
} else { // scalar
genVarDecl(n->sval, n->dataType, 0);
}
break;
case NODE_REDIM: {
char rcn[MAX_IDENT];
strncpy(rcn, cleanName(n->sval), MAX_IDENT - 1);
rcn[MAX_IDENT - 1] = '\0';
int ndims = n->ival;
if (ndims <= 1) {
// 1D REDIM: backward-compatible realloc
emit("{ int _old_sz = %s_size;\n", rcn);
gIndent++;
emit("%s_size = (", rcn);
genExpr(n->a);
emitRaw(") + 1;\n");
emit("%s = (%s*)realloc(%s, %s_size * sizeof(%s));\n",
rcn, cTypeStr(n->dataType), rcn, rcn, cTypeStr(n->dataType));
emit("if (%s_size > _old_sz)\n", rcn);
gIndent++;
emit("memset(%s + _old_sz, 0, (%s_size - _old_sz) * sizeof(%s));\n",
rcn, rcn, cTypeStr(n->dataType));
gIndent--;
gIndent--;
emit("}\n");
} else {
// Multi-dim REDIM: recompute dims, free + calloc
emit("{\n");
gIndent++;
Node *dim = n->a;
for (int i = 0; i < ndims; i++, dim = dim->next) {
emit("%s_dim%d = (", rcn, i);
genExpr(dim);
emitRaw(") + 1;\n");
}
emit("%s_size = ", rcn);
for (int i = 0; i < ndims; i++) {
if (i > 0) emitRaw(" * ");
emitRaw("%s_dim%d", rcn, i);
}
emitRaw(";\n");
emit("free(%s);\n", rcn);
emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n",
rcn, cTypeStr(n->dataType), rcn, rcn, cTypeStr(n->dataType));
if (n->dataType == TYPE_STR) {
emit("for (int _i = 0; _i < %s_size; _i++) %s[_i] = _bstr(\"\");\n",
rcn, rcn);
}
gIndent--;
emit("}\n");
}
break;
}
case NODE_LOCAL:
genVarDecl(n->sval, n->dataType, 0);
break;
case NODE_STATIC:
genVarDecl(n->sval, n->dataType, 1);
break;
case NODE_ASSIGN:
genAssign(n);
break;
case NODE_PRINT:
genPrint(n);
break;
case NODE_PRINT_USING:
genPrintUsing(n);
break;
case NODE_INPUT:
genInput(n);
break;
case NODE_IF:
emit("if (");
genExpr(n->a);
emitRaw(") {\n");
gIndent++;
if (n->b) genBlock(n->b);
gIndent--;
if (n->c) {
if (n->c->type == NODE_IF) {
// ELSEIF: emit as "} else if (...)"
emit("} else ");
// Don't indent the nested if
genStmt(n->c);
return; // the nested if handles its own closing
} else {
emit("} else {\n");
gIndent++;
genBlock(n->c);
gIndent--;
}
}
emit("}\n");
break;
case NODE_FOR: {
const char *vn = cleanName(n->sval);
// Determine the C type for the loop variable
const char *vtype = cTypeStr(inferVarType(n->sval));
// When inside a function, the loop variable may not be declared
// locally. Wrap in a block and declare the variable to be safe.
// Any prior LOCAL/DIM of the same name has _BUNUSED to suppress
// shadowing warnings.
int needDecl = gInFunc;
if (n->c) {
// FOR with STEP: use a block with a step variable so the
// step expression is evaluated once, and the comparison
// direction adapts to the sign of the step at runtime.
emit("{ /* FOR %s with STEP */\n", vn);
gIndent++;
if (needDecl) emit("%s %s;\n", vtype, vn);
emit("%s _step_%s = ", vtype, vn);
genExpr(n->c);
emitRaw(";\n");
emit("for (%s = ", vn);
genExpr(n->a);
emitRaw("; _step_%s > 0 ? %s <= ", vn, vn);
genExpr(n->b);
emitRaw(" : %s >= ", vn);
genExpr(n->b);
emitRaw("; %s += _step_%s) {\n", vn, vn);
gIndent++;
if (n->d) genBlock(n->d);
gIndent--;
emit("}\n");
gIndent--;
emit("}\n");
} else {
// Default step = 1: simple ascending loop
if (needDecl) {
emit("{ %s %s;\n", vtype, vn);
gIndent++;
}
emit("for (%s = ", vn);
genExpr(n->a);
emitRaw("; %s <= ", vn);
genExpr(n->b);
emitRaw("; %s++) {\n", vn);
gIndent++;
if (n->d) genBlock(n->d);
gIndent--;
emit("}\n");
if (needDecl) {
gIndent--;
emit("}\n");
}
}
break;
}
case NODE_WHILE:
emit("while (");
genExpr(n->a);
emitRaw(") {\n");
gIndent++;
if (n->b) genBlock(n->b);
gIndent--;
emit("}\n");
break;
case NODE_DO_LOOP: {
int isUntil = n->ival & 1;
int atBottom = n->ival & 2;
if (!n->a) {
// Infinite loop: DO ... LOOP
emit("for (;;) {\n");
} else if (atBottom) {
emit("do {\n");
} else {
// Condition at top
emit("while (");
if (isUntil) emitRaw("!(");
genExpr(n->a);
if (isUntil) emitRaw(")");
emitRaw(") {\n");
}
gIndent++;
if (n->b) genBlock(n->b);
gIndent--;
if (atBottom && n->a) {
emit("} while (");
if (isUntil) emitRaw("!(");
genExpr(n->a);
if (isUntil) emitRaw(")");
emitRaw(");\n");
} else {
emit("}\n");
}
break;
}
case NODE_GOTO:
if (n->sval) {
emit("goto %s;\n", cleanName(n->sval));
} else if (n->ival) {
emit("goto L%d;\n", n->ival);
}
break;
case NODE_GOSUB:
if (!gRelease)
emit("if (_gosub_sp >= _GOSUB_MAX) { fprintf(stderr, \"GOSUB stack overflow\\n\"); exit(1); }\n");
emit("_gosub_stack[_gosub_sp++] = %d;\n", n->ival2);
if (n->sval)
emit("goto %s;\n", cleanName(n->sval));
else
emit("goto L%d;\n", n->ival);
emitRaw("_gr%d: ;\n", n->ival2);
break;
case NODE_RETURN:
if (gInFunc) {
// Return from FUNCTION
if (n->a) {
if (gFuncRet == TYPE_STR) {
emit("_bstr_assign(&%s_ret, ",
cleanName(gFuncName));
genExpr(n->a);
emitRaw(");\n");
} else {
emit("%s_ret = ", cleanName(gFuncName));
genExpr(n->a);
emitRaw(";\n");
}
}
emit("return %s_ret;\n", cleanName(gFuncName));
} else {
// RETURN from GOSUB: dispatch back using the stack
if (!gRelease)
emit("if (_gosub_sp <= 0) { fprintf(stderr, \"RETURN without GOSUB\\n\"); exit(1); }\n");
emit("switch (_gosub_stack[--_gosub_sp]) {\n");
for (int i = 0; i < gGosubCount; i++) {
emit(" case %d: goto _gr%d;\n", i, i);
}
emit("}\n");
}
break;
case NODE_EXIT:
if (n->ival == TOK_FOR || n->ival == TOK_WHILE || n->ival == TOK_DO)
emit("break;\n");
else if (n->ival == TOK_SUB)
emit("return;\n");
else if (n->ival == TOK_FUNCTION && gInFunc && gFuncName)
emit("return %s_ret;\n", cleanName(gFuncName));
break;
case NODE_CONTINUE:
emit("continue;\n");
break;
case NODE_CALL: {
const char *cn = cleanName(n->sval);
emit("%s(", cn);
Symbol *fsym = symLookup(n->sval);
int pi = 0;
for (Node *a = n->a; a; a = a->next, pi++) {
if (a != n->a) emitRaw(", ");
int needRef = (fsym && pi < fsym->paramCount &&
fsym->paramModes[pi] == PASS_BYREF);
if (needRef && a->type == NODE_IDENT) {
emitRaw("&%s", cleanName(a->sval));
} else if (needRef && a->type == NODE_ARRAY_REF) {
emitRaw("&%s[", cleanName(a->sval));
genExpr(a->a);
emitRaw("]");
} else {
genExpr(a);
}
}
emitRaw(");\n");
emit("_bfree_temps();\n");
break;
}
case NODE_SUB:
case NODE_FUNC:
// These are generated separately before main()
break;
case NODE_END:
emit("exit(0);\n");
break;
case NODE_OPEN: {
if (n->ival == 4) {
// RANDOM mode
emit("_bfile_open_random(");
genExpr(n->b);
emitRaw(", ");
genExpr(n->a);
emitRaw(", ");
if (n->c) {
genExpr(n->c);
} else {
emitRaw("0");
}
emitRaw(");\n");
} else {
const char *modes[] = {"r", "w", "a", "rb"};
emit("_bfile_open(");
genExpr(n->b);
emitRaw(", ");
genExpr(n->a);
emitRaw(", \"%s\");\n", modes[n->ival]);
}
break;
}
case NODE_GET:
// GET #filenum, record, variable
emit("fseek(_bfile_get(");
genExpr(n->a);
emitRaw("), (");
genExpr(n->b);
emitRaw(" - 1) * _bfile_reclen[");
genExpr(n->a);
emitRaw("], SEEK_SET);\n");
emit("fread(&%s, _bfile_reclen[", cleanName(n->c->sval));
genExpr(n->a);
emitRaw("], 1, _bfile_get(");
genExpr(n->a);
emitRaw("));\n");
break;
case NODE_PUT:
// PUT #filenum, record, variable
emit("fseek(_bfile_get(");
genExpr(n->a);
emitRaw("), (");
genExpr(n->b);
emitRaw(" - 1) * _bfile_reclen[");
genExpr(n->a);
emitRaw("], SEEK_SET);\n");
emit("fwrite(&%s, _bfile_reclen[", cleanName(n->c->sval));
genExpr(n->a);
emitRaw("], 1, _bfile_get(");
genExpr(n->a);
emitRaw("));\n");
break;
case NODE_CLOSE:
emit("_bfile_close(");
genExpr(n->b);
emitRaw(");\n");
break;
case NODE_FILE_PRINT: {
Node *item = n->a;
if (!item) {
// PRINT #n, alone = write newline
emit("fprintf(_bfile_get(");
genExpr(n->b);
emitRaw("), \"\\n\");\n");
break;
}
// Build fprintf with format string and arguments
emit("fprintf(_bfile_get(");
genExpr(n->b);
emitRaw("), \"");
for (Node *it = item; it; it = it->next) {
if (it->a) emitRaw("%s", cFmt(it->a->dataType));
if (it->ival == 1) {
// semicolon: no separator
} else if (it->ival == 2) { emitRaw("\\t"); }
else if (!it->next) { emitRaw("\\n"); }
}
emitRaw("\"");
for (Node *it = item; it; it = it->next) {
if (it->a) { emitRaw(", "); genExpr(it->a); }
}
emitRaw(");\n");
emit("_bfree_temps();\n");
break;
}
case NODE_FILE_INPUT:
for (Node *v = n->a; v; v = v->next) {
if (v->dataType == TYPE_STR) {
emit("_bline_input(");
genExpr(n->b);
emitRaw(", &%s);\n", cleanName(v->sval));
} else {
emit("fscanf(_bfile_get(");
genExpr(n->b);
emitRaw("), \"%s\", &%s);\n",
cScanfFmt(v->dataType), cleanName(v->sval));
}
}
break;
case NODE_LINE_INPUT:
emit("_bline_input(");
genExpr(n->b);
emitRaw(", &%s);\n", cleanName(n->a->sval));
break;
case NODE_FILE_WRITE: {
// WRITE # outputs CSV-style: strings quoted, comma-separated, newline
int first = 1;
for (Node *e = n->a; e; e = e->next) {
if (!first) {
emit("fprintf(_bfile_get(");
genExpr(n->b);
emitRaw("), \",\");\n");
}
first = 0;
if (e->dataType == TYPE_STR) {
emit("fprintf(_bfile_get(");
genExpr(n->b);
emitRaw("), \"\\\"%%s\\\"\", ");
genExpr(e);
emitRaw(");\n");
} else {
emit("fprintf(_bfile_get(");
genExpr(n->b);
emitRaw("), \"%s\", ", cFmt(e->dataType));
genExpr(e);
emitRaw(");\n");
}
}
emit("fprintf(_bfile_get(");
genExpr(n->b);
emitRaw("), \"\\n\");\n");
emit("_bfree_temps();\n");
break;
}
case NODE_DATA:
// No-op: DATA items are collected and emitted as a static array
break;
case NODE_READ:
for (Node *v = n->a; v; v = v->next) {
if (v->dataType == TYPE_STR) {
emit("_bstr_assign(&%s, _bdata[_bdata_pos].str);\n",
cleanName(v->sval));
} else {
emit("%s = (%s)_bdata[_bdata_pos].num;\n",
cleanName(v->sval), cTypeStr(v->dataType));
}
emit("_bdata_pos++;\n");
}
break;
case NODE_RESTORE:
if (n->sval) {
emit("_bdata_pos = %d;\n", dataIndexForLabel(n->sval));
} else if (n->ival != 0) {
emit("_bdata_pos = %d;\n", dataIndexForLine(n->ival));
} else {
emit("_bdata_pos = 0;\n");
}
break;
case NODE_CONST_DECL:
// No runtime code for constants — they're substituted at parse time
break;
case NODE_SWAP: {
// Determine the type from the left operand
DataType swapType = n->a->dataType;
const char *ctype = "double";
if (swapType == TYPE_BYTE) ctype = "uint8_t";
else if (swapType == TYPE_INT) ctype = "int16_t";
else if (swapType == TYPE_LONG) ctype = "int32_t";
else if (swapType == TYPE_FLOAT) ctype = "float";
else if (swapType == TYPE_DBL) ctype = "double";
if (swapType == TYPE_STR) {
// String swap: just swap the pointers
emit("{ char *_swap_tmp = ");
genExpr(n->a);
emitRaw("; ");
genExpr(n->a);
emitRaw(" = ");
genExpr(n->b);
emitRaw("; ");
genExpr(n->b);
emitRaw(" = _swap_tmp; }\n");
} else {
emit("{ %s _swap_tmp = ", ctype);
genExpr(n->a);
emitRaw("; ");
genExpr(n->a);
emitRaw(" = ");
genExpr(n->b);
emitRaw("; ");
genExpr(n->b);
emitRaw(" = _swap_tmp; }\n");
}
break;
}
case NODE_RANDOMIZE:
if (n->a) {
emit("srand((unsigned)(");
genExpr(n->a);
emitRaw("));\n");
} else {
emit("srand((unsigned)time(NULL));\n");
}
break;
case NODE_SELECT: {
// Emit test expression into a temp variable
static int selectId = 0;
int sid = selectId++;
DataType stype = n->a->dataType;
if (stype == TYPE_STR) {
emit("{ const char *_sel%d = ", sid);
genExpr(n->a);
emitRaw(";\n");
} else {
emit("{ double _sel%d = ", sid);
genExpr(n->a);
emitRaw(";\n");
}
// Emit CASE blocks as if/else if chain
int first = 1;
for (Node *c = n->b; c; c = c->next) {
if (c->ival == 1) {
// CASE ELSE
if (!first) emit("} else {\n");
else emit("{\n");
} else {
if (!first) emit("} else if (");
else emit("if (");
// Emit condition for each value, joined with ||
int firstVal = 1;
for (Node *v = c->a; v; v = v->next) {
if (!firstVal) emitRaw(" || ");
if (v->ival2 == 1) {
// IS comparison: v->ival is the comparison op, v->b is the value
emitRaw("(_sel%d ", sid);
switch (v->ival) {
case TOK_EQ: emitRaw("== "); break;
case TOK_NE: emitRaw("!= "); break;
case TOK_LT: emitRaw("< "); break;
case TOK_GT: emitRaw("> "); break;
case TOK_LE: emitRaw("<= "); break;
case TOK_GE: emitRaw(">= "); break;
}
genExpr(v->b);
emitRaw(")");
} else if (v->ival2 == 2) {
// Range: v->a TO v->b
emitRaw("(_sel%d >= ", sid);
genExpr(v->a);
emitRaw(" && _sel%d <= ", sid);
genExpr(v->b);
emitRaw(")");
} else {
// Single value
if (stype == TYPE_STR) {
emitRaw("(strcmp(_sel%d, ", sid);
genExpr(v);
emitRaw(") == 0)");
} else {
emitRaw("(_sel%d == ", sid);
genExpr(v);
emitRaw(")");
}
}
firstVal = 0;
}
emitRaw(") {\n");
}
gIndent++;
for (Node *s = c->b; s; s = s->next)
genStmt(s);
gIndent--;
first = 0;
}
if (!first) emit("}\n");
emit("}\n");
break;
}
case NODE_ON_GOTO:
emit("switch ((int)(");
genExpr(n->a);
emitRaw(")) {\n");
{
int idx = 1;
for (Node *lab = n->b; lab; lab = lab->next, idx++) {
if (lab->type == NODE_INT_LIT) {
emit(" case %d: goto L%d; break;\n", idx, lab->ival);
} else {
emit(" case %d: goto %s; break;\n", idx, cleanName(lab->sval));
}
}
}
emit("}\n");
break;
case NODE_ON_GOSUB:
emit("switch ((int)(");
genExpr(n->a);
emitRaw(")) {\n");
{
int idx = 1;
int rpid = n->ival2; // first return-point id
for (Node *lab = n->b; lab; lab = lab->next, idx++, rpid++) {
if (lab->type == NODE_INT_LIT) {
emit(" case %d: _gosub_stack[_gosub_sp++] = %d; goto L%d; break;\n",
idx, rpid, lab->ival);
} else {
emit(" case %d: _gosub_stack[_gosub_sp++] = %d; goto %s; break;\n",
idx, rpid, cleanName(lab->sval));
}
}
}
emit("}\n");
// Emit return labels
{
int rpid = n->ival2;
for (Node *lab = n->b; lab; lab = lab->next, rpid++) {
emitRaw("_gr%d: ;\n", rpid);
}
}
break;
case NODE_MID_ASSIGN:
emit("_bmid_assign(&");
genExpr(n->a);
emitRaw(", ");
genExpr(n->b);
emitRaw(", ");
genExpr(n->c);
emitRaw(", ");
genExpr(n->d);
emitRaw(");\n");
break;
default:
emit("/* unhandled node type %d */\n", n->type);
break;
}
}
// Generate code for a block (linked list of statements)
static void genBlock(Node *blk) {
if (!blk) return;
Node *s = (blk->type == NODE_BLOCK) ? blk->a : blk;
while (s) {
genStmt(s);
s = s->next;
}
}
// Collect all SUB/FUNCTION nodes from the AST into an array
static void collectFuncs(Node *n, Node **funcs, int *count, int max) {
if (!n) return;
if (n->type == NODE_SUB || n->type == NODE_FUNC) {
if (*count >= max)
fatal(n->line, "Too many SUB/FUNCTION definitions (max %d)", max);
funcs[(*count)++] = n;
}
if (n->type == NODE_BLOCK || n->type == NODE_PROGRAM) {
Node *s = n->a;
while (s) {
collectFuncs(s, funcs, count, max);
s = s->next;
}
}
}
// Recursively collect all NODE_DATA nodes from the AST
static void collectData(Node *n, Node **data, int *count, int max) {
if (!n) return;
if (n->type == NODE_DATA) {
if (*count >= max)
fatal(n->line, "Too many DATA statements (max %d)", max);
data[(*count)++] = n;
}
// When a NODE_LABEL is followed by NODE_DATA via ->next, tag the
// DATA node with the BASIC line number (stored in ival) or named
// label (stored in sval) so that RESTORE can find it.
if (n->type == NODE_LABEL && n->next && n->next->type == NODE_DATA) {
if (n->sval)
n->next->sval = n->sval;
else
n->next->ival = n->ival;
}
// Walk into blocks, programs, and sub/function bodies
if (n->type == NODE_BLOCK || n->type == NODE_PROGRAM) {
Node *s = n->a;
while (s) {
collectData(s, data, count, max);
s = s->next;
}
}
// Also collect from SUB/FUNCTION bodies (DATA is global in BASIC)
if (n->type == NODE_SUB || n->type == NODE_FUNC) {
collectData(n->b, data, count, max);
}
// Walk if/else branches
if (n->type == NODE_IF) {
collectData(n->b, data, count, max);
collectData(n->c, data, count, max);
}
// Walk loop bodies
if (n->type == NODE_FOR || n->type == NODE_WHILE || n->type == NODE_DO_LOOP) {
Node *body = (n->type == NODE_FOR) ? n->d : n->b;
collectData(body, data, count, max);
}
// Walk SELECT CASE bodies
if (n->type == NODE_SELECT) {
for (Node *c = n->b; c; c = c->next) {
for (Node *s = c->b; s; s = s->next)
collectData(s, data, count, max);
}
}
}
// Global storage for RESTORE line-number-to-data-index mapping
#define MAX_DATA_LINES 512
static int gDataLineNums[MAX_DATA_LINES];
static int gDataLineIdxs[MAX_DATA_LINES];
static int gDataLineCount = 0;
// Global storage for RESTORE named-label-to-data-index mapping
static char *gDataLabelNames[MAX_DATA_LINES];
static int gDataLabelIdxs[MAX_DATA_LINES];
static int gDataLabelCount = 0;
// Look up the data index for a RESTORE target line number
static int dataIndexForLine(int lnum) {
for (int i = 0; i < gDataLineCount; i++)
if (gDataLineNums[i] == lnum) return gDataLineIdxs[i];
return 0; // fallback to beginning
}
// Look up the data index for a RESTORE target named label
static int dataIndexForLabel(const char *name) {
for (int i = 0; i < gDataLabelCount; i++)
if (strIcmp(gDataLabelNames[i], name) == 0)
return gDataLabelIdxs[i];
return 0; // fallback to beginning
}
// Emit the runtime library (debug or release variant). Provides string
// operations, temp management, file I/O, and dynamic array support.
static void emitRuntime(void) {
// Common headers and defines — same in both modes
fprintf(gOut,
"/* ---- BASIC Runtime Library (%s) ---- */\n"
"#include <stdio.h>\n"
"#include <stdlib.h>\n"
"#include <string.h>\n"
"#include <stdint.h>\n"
"#include <math.h>\n"
"#include <ctype.h>\n"
"#include <time.h>\n\n"
"#ifdef __GNUC__\n"
"#define _BUNUSED __attribute__((unused))\n"
"#else\n"
"#define _BUNUSED\n"
"#endif\n\n"
"/* Temporary string pool: collects intermediate strings for cleanup */\n"
"#define _BMAX_TEMPS 256\n"
"static char *_btemps[_BMAX_TEMPS] _BUNUSED;\n"
"static int _btmp_count _BUNUSED = 0;\n\n"
"/* Register a heap string as temporary (will be freed by _bfree_temps) */\n"
"static _BUNUSED char *_btmp(char *s) {\n"
" if (_btmp_count < _BMAX_TEMPS) _btemps[_btmp_count++] = s;\n"
" return s;\n"
"}\n\n"
"/* Free all registered temporary strings */\n"
"static _BUNUSED void _bfree_temps(void) {\n"
" for (int i = 0; i < _btmp_count; i++) free(_btemps[i]);\n"
" _btmp_count = 0;\n"
"}\n\n",
gRelease ? "release" : "debug"
);
// String functions — debug vs release
if (gRelease) {
fprintf(gOut,
"static _BUNUSED char *_bstr(const char *s) {\n"
" char *d = (char*)malloc(strlen(s) + 1);\n"
" strcpy(d, s);\n"
" return d;\n"
"}\n\n"
"static _BUNUSED void _bstr_assign(char **dest, const char *src) {\n"
" if (*dest) free(*dest);\n"
" *dest = _bstr(src);\n"
"}\n\n"
"static _BUNUSED char *_bconcat(const char *a, const char *b) {\n"
" size_t la = strlen(a), lb = strlen(b);\n"
" char *r = (char*)malloc(la + lb + 1);\n"
" memcpy(r, a, la);\n"
" memcpy(r + la, b, lb);\n"
" r[la + lb] = '\\0';\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_bchr(int code) {\n"
" char *r = (char*)malloc(2);\n"
" r[0] = (char)code; r[1] = '\\0';\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_bstr_of_int(double val) {\n"
" char *r = (char*)malloc(64);\n"
" if (val == (int)val) sprintf(r, \"%%d\", (int)val);\n"
" else sprintf(r, \"%%g\", val);\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_bmid(const char *s, int start, int len) {\n"
" int slen = (int)strlen(s);\n"
" start--;\n"
" if (start < 0) start = 0;\n"
" if (start >= slen) return _btmp(_bstr(\"\"));\n"
" if (len < 0 || start + len > slen) len = slen - start;\n"
" char *r = (char*)malloc(len + 1);\n"
" memcpy(r, s + start, len);\n"
" r[len] = '\\0';\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_bleft(const char *s, int n) { return _bmid(s, 1, n); }\n\n"
"static _BUNUSED char *_bright(const char *s, int n) {\n"
" int slen = (int)strlen(s);\n"
" if (n >= slen) return _btmp(_bstr(s));\n"
" return _btmp(_bstr(s + slen - n));\n"
"}\n\n"
"static _BUNUSED char *_bucase(const char *s) {\n"
" char *r = _bstr(s);\n"
" for (char *p = r; *p; p++) *p = toupper((unsigned char)*p);\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_blcase(const char *s) {\n"
" char *r = _bstr(s);\n"
" for (char *p = r; *p; p++) *p = tolower((unsigned char)*p);\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED int _binstr(const char *haystack, const char *needle) {\n"
" const char *p = strstr(haystack, needle);\n"
" return p ? (int)(p - haystack) + 1 : 0;\n"
"}\n\n"
"static _BUNUSED double _babs(double x) { return x < 0 ? -x : x; }\n\n"
);
} else {
fprintf(gOut,
"static _BUNUSED char *_bstr(const char *s) {\n"
" if (!s) s = \"\";\n"
" char *d = (char*)malloc(strlen(s) + 1);\n"
" if (!d) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n"
" strcpy(d, s);\n"
" return d;\n"
"}\n\n"
"static _BUNUSED void _bstr_assign(char **dest, const char *src) {\n"
" if (*dest) free(*dest);\n"
" *dest = _bstr(src ? src : \"\");\n"
"}\n\n"
"static _BUNUSED char *_bconcat(const char *a, const char *b) {\n"
" if (!a) a = \"\";\n"
" if (!b) b = \"\";\n"
" size_t la = strlen(a), lb = strlen(b);\n"
" char *r = (char*)malloc(la + lb + 1);\n"
" if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n"
" memcpy(r, a, la);\n"
" memcpy(r + la, b, lb);\n"
" r[la + lb] = '\\0';\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_bchr(int code) {\n"
" char *r = (char*)malloc(2);\n"
" if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n"
" r[0] = (char)code; r[1] = '\\0';\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_bstr_of_int(double val) {\n"
" char *r = (char*)malloc(64);\n"
" if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n"
" if (val == (int)val) sprintf(r, \"%%d\", (int)val);\n"
" else sprintf(r, \"%%g\", val);\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_bmid(const char *s, int start, int len) {\n"
" if (!s) return _btmp(_bstr(\"\"));\n"
" int slen = (int)strlen(s);\n"
" start--;\n"
" if (start < 0) start = 0;\n"
" if (start >= slen) return _btmp(_bstr(\"\"));\n"
" if (len < 0 || start + len > slen) len = slen - start;\n"
" char *r = (char*)malloc(len + 1);\n"
" if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n"
" memcpy(r, s + start, len);\n"
" r[len] = '\\0';\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_bleft(const char *s, int n) { return _bmid(s, 1, n); }\n\n"
"static _BUNUSED char *_bright(const char *s, int n) {\n"
" if (!s) return _btmp(_bstr(\"\"));\n"
" int slen = (int)strlen(s);\n"
" if (n >= slen) return _btmp(_bstr(s));\n"
" return _btmp(_bstr(s + slen - n));\n"
"}\n\n"
"static _BUNUSED char *_bucase(const char *s) {\n"
" if (!s) return _btmp(_bstr(\"\"));\n"
" char *r = _bstr(s);\n"
" for (char *p = r; *p; p++) *p = toupper((unsigned char)*p);\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_blcase(const char *s) {\n"
" if (!s) return _btmp(_bstr(\"\"));\n"
" char *r = _bstr(s);\n"
" for (char *p = r; *p; p++) *p = tolower((unsigned char)*p);\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED int _binstr(const char *haystack, const char *needle) {\n"
" if (!haystack || !needle) return 0;\n"
" const char *p = strstr(haystack, needle);\n"
" return p ? (int)(p - haystack) + 1 : 0;\n"
"}\n\n"
"static _BUNUSED double _babs(double x) { return x < 0 ? -x : x; }\n\n"
);
}
// Additional string runtime functions (same in both modes)
fprintf(gOut,
"static _BUNUSED char *_bltrim(const char *s) {\n"
" while (*s == ' ') s++;\n"
" return _btmp(_bstr(s));\n"
"}\n\n"
"static _BUNUSED char *_brtrim(const char *s) {\n"
" char *r = _bstr(s);\n"
" int len = (int)strlen(r);\n"
" while (len > 0 && r[len-1] == ' ') len--;\n"
" r[len] = '\\0';\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_btrim(const char *s) {\n"
" while (*s == ' ') s++;\n"
" char *r = _bstr(s);\n"
" int len = (int)strlen(r);\n"
" while (len > 0 && r[len-1] == ' ') len--;\n"
" r[len] = '\\0';\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_bspace(int n) {\n"
" if (n < 0) n = 0;\n"
" char *r = (char*)malloc(n + 1);\n"
" memset(r, ' ', n);\n"
" r[n] = '\\0';\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_btab(int col) {\n"
" if (col < 1) col = 1;\n"
" char *r = (char*)malloc(col);\n"
" memset(r, ' ', col - 1);\n"
" r[col - 1] = '\\0';\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_bhex(int n) {\n"
" char *r = (char*)malloc(20);\n"
" sprintf(r, \"%%X\", (unsigned)n);\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_boct(int n) {\n"
" char *r = (char*)malloc(24);\n"
" sprintf(r, \"%%o\", (unsigned)n);\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_bstring_rep(int n, const char *ch) {\n"
" if (n < 0) n = 0;\n"
" char *r = (char*)malloc(n + 1);\n"
" memset(r, ch[0], n);\n"
" r[n] = '\\0';\n"
" return _btmp(r);\n"
"}\n\n"
"static _BUNUSED char *_bgetenv(const char *name) {\n"
" const char *val = getenv(name);\n"
" return _btmp(_bstr(val ? val : \"\"));\n"
"}\n\n"
"static _BUNUSED void _bmid_assign(char **dest, int start, int len, const char *repl) {\n"
" int dlen = (int)strlen(*dest);\n"
" int rlen = (int)strlen(repl);\n"
" start--;\n"
" if (start < 0 || start >= dlen) return;\n"
" if (len > dlen - start) len = dlen - start;\n"
" if (rlen < len) len = rlen;\n"
" memcpy(*dest + start, repl, len);\n"
"}\n\n"
"/* PRINT USING support */\n"
"static const char *_busing_fmt _BUNUSED;\n"
"static const char *_busing_pos _BUNUSED;\n\n"
"static _BUNUSED void _busing_init(const char *fmt) {\n"
" _busing_fmt = _busing_pos = fmt ? fmt : \"\";\n"
"}\n\n"
"static _BUNUSED void _busing_num(double val) {\n"
" const char *p = _busing_pos;\n"
" int width = 0, decimals = -1, dollar = 0, plus = 0, aster = 0, tminus = 0;\n"
" /* Skip literal chars until we find a numeric format start */\n"
" while (*p) {\n"
" if (*p == '#') break;\n"
" if (*p == '*' && p[1] == '*') break;\n"
" if (*p == '$' && p[1] == '$') break;\n"
" if (*p == '+' && (p[1] == '#' || p[1] == '$' || p[1] == '*')) break;\n"
" if (*p == '!' || *p == '&' || *p == '\\\\') break;\n"
" putchar(*p++);\n"
" }\n"
" if (!*p) { _busing_pos = _busing_fmt; return; }\n"
" /* Parse numeric format */\n"
" if (*p == '+') { plus = 1; p++; }\n"
" while (*p == '*') { aster++; width++; p++; }\n"
" while (*p == '$') { dollar++; p++; if (dollar > 1) width++; }\n"
" while (*p == '#' || *p == ',') { if (*p == '#') width++; p++; }\n"
" if (*p == '.') { p++; decimals = 0; while (*p == '#') { decimals++; p++; } }\n"
" if (*p == '-') { tminus = 1; p++; }\n"
" _busing_pos = p;\n"
" /* Format the number */\n"
" char buf[64];\n"
" double absval = val < 0 ? -val : val;\n"
" int neg = (val < 0);\n"
" if (decimals >= 0) {\n"
" snprintf(buf, sizeof(buf), \"%%.*f\", decimals, absval);\n"
" } else {\n"
" snprintf(buf, sizeof(buf), \"%%.0f\", absval);\n"
" }\n"
" int totalw = width + (decimals >= 0 ? decimals + 1 : 0);\n"
" int len = (int)strlen(buf);\n"
" int signw = (plus || neg) ? 1 : 0;\n"
" int dollarw = dollar ? 1 : 0;\n"
" int pad = totalw - len - signw - dollarw;\n"
" if (pad < 0) pad = 0;\n"
" for (int i = 0; i < pad; i++) putchar(aster >= 2 ? '*' : ' ');\n"
" if (plus) putchar(neg ? '-' : '+');\n"
" else if (neg && !tminus) putchar('-');\n"
" if (dollar) putchar('$');\n"
" printf(\"%%s\", buf);\n"
" if (tminus && neg) putchar('-');\n"
"}\n\n"
"static _BUNUSED void _busing_str(const char *val) {\n"
" const char *p = _busing_pos;\n"
" if (!val) val = \"\";\n"
" /* Skip literal chars, print them */\n"
" while (*p && *p != '!' && *p != '&' && *p != '\\\\' && *p != '#') {\n"
" putchar(*p++);\n"
" }\n"
" if (!*p) { _busing_pos = _busing_fmt; return; }\n"
" if (*p == '!') {\n"
" /* First character only */\n"
" putchar(val[0] ? val[0] : ' ');\n"
" _busing_pos = p + 1;\n"
" } else if (*p == '&') {\n"
" /* Entire string */\n"
" printf(\"%%s\", val);\n"
" _busing_pos = p + 1;\n"
" } else if (*p == '\\\\') {\n"
" /* Fixed width: count chars between backslashes */\n"
" p++;\n"
" int width = 2;\n"
" while (*p && *p != '\\\\') { width++; p++; }\n"
" if (*p == '\\\\') p++;\n"
" _busing_pos = p;\n"
" int len = (int)strlen(val);\n"
" for (int i = 0; i < width; i++)\n"
" putchar(i < len ? val[i] : ' ');\n"
" } else {\n"
" _busing_pos = p;\n"
" }\n"
"}\n\n"
"static _BUNUSED void _busing_end(void) {\n"
" putchar('\\n');\n"
" _busing_pos = _busing_fmt;\n"
"}\n\n"
);
// Only emit GOSUB stack if there are GOSUB sites, to avoid
// -Wunused-variable warnings.
if (gGosubCount > 0) {
fprintf(gOut,
"/* GOSUB return stack */\n"
"#define _GOSUB_MAX %d\n"
"static int _gosub_stack[_GOSUB_MAX];\n"
"static int _gosub_sp = 0;\n\n",
MAX_GOSUB_SITES
);
}
// File I/O runtime — debug vs release
fprintf(gOut,
"/* File I/O support */\n"
"#define _BMAX_FILES 16\n"
"static FILE *_bfiles[_BMAX_FILES] _BUNUSED = {0};\n"
"static long _bfile_reclen[_BMAX_FILES] _BUNUSED = {0};\n\n"
);
if (gRelease) {
fprintf(gOut,
"static _BUNUSED FILE *_bfile_get(int fnum) {\n"
" return _bfiles[fnum];\n"
"}\n\n"
"static _BUNUSED void _bfile_open(int fnum, const char *fname, const char *mode) {\n"
" if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n"
" _bfiles[fnum] = fopen(fname, mode);\n"
"}\n\n"
"static _BUNUSED void _bfile_open_random(int fnum, const char *fname, long reclen) {\n"
" if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n"
" _bfiles[fnum] = fopen(fname, \"r+b\");\n"
" if (!_bfiles[fnum]) _bfiles[fnum] = fopen(fname, \"w+b\");\n"
" _bfile_reclen[fnum] = reclen;\n"
"}\n\n"
"static _BUNUSED void _bfile_close(int fnum) {\n"
" if (_bfiles[fnum]) { fclose(_bfiles[fnum]); _bfiles[fnum] = NULL; }\n"
"}\n\n"
"static _BUNUSED int _beof(int fnum) {\n"
" if (!_bfiles[fnum]) return -1;\n"
" int c = fgetc(_bfiles[fnum]);\n"
" if (c == EOF) return -1;\n"
" ungetc(c, _bfiles[fnum]);\n"
" return 0;\n"
"}\n\n"
"static _BUNUSED long _blof(int fnum) {\n"
" if (!_bfiles[fnum]) return 0;\n"
" long cur = ftell(_bfiles[fnum]);\n"
" fseek(_bfiles[fnum], 0, SEEK_END);\n"
" long sz = ftell(_bfiles[fnum]);\n"
" fseek(_bfiles[fnum], cur, SEEK_SET);\n"
" return sz;\n"
"}\n\n"
"static _BUNUSED int _bfreefile(void) {\n"
" for (int i = 1; i < _BMAX_FILES; i++)\n"
" if (!_bfiles[i]) return i;\n"
" return 0;\n"
"}\n\n"
"static _BUNUSED void _bline_input(int fnum, char **dest) {\n"
" if (!_bfiles[fnum]) return;\n"
" char _buf[4096];\n"
" if (fgets(_buf, sizeof(_buf), _bfiles[fnum])) {\n"
" _buf[strcspn(_buf, \"\\r\\n\")] = 0;\n"
" _bstr_assign(dest, _buf);\n"
" }\n"
"}\n\n"
);
} else {
fprintf(gOut,
"static _BUNUSED FILE *_bfile_get(int fnum) {\n"
" if (fnum < 1 || fnum >= _BMAX_FILES || !_bfiles[fnum]) {\n"
" fprintf(stderr, \"Bad file number %%d\\n\", fnum);\n"
" exit(1);\n"
" }\n"
" return _bfiles[fnum];\n"
"}\n\n"
"static _BUNUSED void _bfile_open(int fnum, const char *fname, const char *mode) {\n"
" if (fnum < 1 || fnum >= _BMAX_FILES) {\n"
" fprintf(stderr, \"File number %%d out of range\\n\", fnum);\n"
" exit(1);\n"
" }\n"
" if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n"
" _bfiles[fnum] = fopen(fname, mode);\n"
" if (!_bfiles[fnum]) {\n"
" fprintf(stderr, \"Cannot open '%%s'\\n\", fname);\n"
" exit(1);\n"
" }\n"
"}\n\n"
"static _BUNUSED void _bfile_open_random(int fnum, const char *fname, long reclen) {\n"
" if (fnum < 1 || fnum >= _BMAX_FILES) {\n"
" fprintf(stderr, \"File number %%d out of range\\n\", fnum);\n"
" exit(1);\n"
" }\n"
" if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n"
" _bfiles[fnum] = fopen(fname, \"r+b\");\n"
" if (!_bfiles[fnum]) _bfiles[fnum] = fopen(fname, \"w+b\");\n"
" if (!_bfiles[fnum]) {\n"
" fprintf(stderr, \"Cannot open '%%s'\\n\", fname);\n"
" exit(1);\n"
" }\n"
" _bfile_reclen[fnum] = reclen;\n"
"}\n\n"
"static _BUNUSED void _bfile_close(int fnum) {\n"
" if (fnum >= 1 && fnum < _BMAX_FILES && _bfiles[fnum]) {\n"
" fclose(_bfiles[fnum]);\n"
" _bfiles[fnum] = NULL;\n"
" }\n"
"}\n\n"
"static _BUNUSED int _beof(int fnum) {\n"
" if (fnum < 1 || fnum >= _BMAX_FILES || !_bfiles[fnum]) return -1;\n"
" int c = fgetc(_bfiles[fnum]);\n"
" if (c == EOF) return -1;\n"
" ungetc(c, _bfiles[fnum]);\n"
" return 0;\n"
"}\n\n"
"static _BUNUSED long _blof(int fnum) {\n"
" if (fnum < 1 || fnum >= _BMAX_FILES || !_bfiles[fnum]) return 0;\n"
" long cur = ftell(_bfiles[fnum]);\n"
" fseek(_bfiles[fnum], 0, SEEK_END);\n"
" long sz = ftell(_bfiles[fnum]);\n"
" fseek(_bfiles[fnum], cur, SEEK_SET);\n"
" return sz;\n"
"}\n\n"
"static _BUNUSED int _bfreefile(void) {\n"
" for (int i = 1; i < _BMAX_FILES; i++)\n"
" if (!_bfiles[i]) return i;\n"
" return 0;\n"
"}\n\n"
"static _BUNUSED void _bline_input(int fnum, char **dest) {\n"
" char _buf[4096];\n"
" if (fnum >= 1 && fnum < _BMAX_FILES && _bfiles[fnum] &&\n"
" fgets(_buf, sizeof(_buf), _bfiles[fnum])) {\n"
" _buf[strcspn(_buf, \"\\r\\n\")] = 0;\n"
" _bstr_assign(dest, _buf);\n"
" }\n"
"}\n\n"
);
}
// DATA/READ support — same in both modes
fprintf(gOut,
"/* DATA/READ support */\n"
"typedef struct { int is_str; double num; const char *str; } _BDataItem;\n\n"
);
}
// Main code generation: emit the full C source file from the AST
static void generate(Node *prog) {
// Emit the runtime library
emitRuntime();
// Emit UDT struct definitions (packed for binary I/O compatibility)
if (gUdtCount > 0) {
fprintf(gOut, "/* User-defined types */\n");
fprintf(gOut, "#pragma pack(push, 1)\n");
for (int i = 0; i < gUdtCount; i++) {
UdtDef *u = &gUdts[i];
fprintf(gOut, "struct _b_%s {\n", cleanName(u->name));
for (int j = 0; j < u->fieldCount; j++) {
UdtField *f = &u->fields[j];
if (f->dataType == TYPE_STR && f->strLen > 0) {
fprintf(gOut, " char %s[%d];\n",
cleanName(f->name), f->strLen + 1);
} else if (f->dataType == TYPE_UDT) {
fprintf(gOut, " %s %s;\n",
cUdtTypeStr(f->udtIndex), cleanName(f->name));
} else {
fprintf(gOut, " %s %s;\n",
cTypeStr(f->dataType), cleanName(f->name));
}
}
fprintf(gOut, "};\n");
}
fprintf(gOut, "#pragma pack(pop)\n\n");
}
// Collect all DATA nodes and emit the data pool
Node *dataNodes[4096];
int dataNodeCount = 0;
collectData(prog, dataNodes, &dataNodeCount, 4096);
if (dataNodeCount > 0) {
// Emit the data pool array
fprintf(gOut, "/* DATA pool */\n");
fprintf(gOut, "static _BDataItem _bdata[] = {\n");
int totalItems = 0;
gDataLineCount = 0;
gDataLabelCount = 0;
for (int di = 0; di < dataNodeCount; di++) {
Node *dn = dataNodes[di];
// Record BASIC-line-number-to-index mapping for RESTORE.
// dn->ival is set by collectData when DATA follows a numeric label.
if (dn->ival != 0 && gDataLineCount < MAX_DATA_LINES) {
gDataLineNums[gDataLineCount] = dn->ival;
gDataLineIdxs[gDataLineCount] = totalItems;
gDataLineCount++;
}
// Record named-label-to-index mapping for RESTORE.
// dn->sval is set by collectData when DATA follows a named label.
if (dn->sval && gDataLabelCount < MAX_DATA_LINES) {
gDataLabelNames[gDataLabelCount] = dn->sval;
gDataLabelIdxs[gDataLabelCount] = totalItems;
gDataLabelCount++;
}
for (Node *item = dn->a; item; item = item->next) {
if (item->dataType == TYPE_STR) {
// Escape the string for C output
fprintf(gOut, " {1, 0, \"");
for (const char *p = item->sval; *p; p++) {
if (*p == '"') fprintf(gOut, "\\\"");
else if (*p == '\\') fprintf(gOut, "\\\\");
else fputc(*p, gOut);
}
fprintf(gOut, "\"},\n");
} else if (item->dataType == TYPE_DBL) {
fprintf(gOut, " {0, %g, NULL},\n", item->dval);
} else {
fprintf(gOut, " {0, %d, NULL},\n", item->ival);
}
totalItems++;
}
}
fprintf(gOut, "};\n");
fprintf(gOut, "static int _bdata_count _BUNUSED = %d;\n", totalItems);
fprintf(gOut, "static int _bdata_pos _BUNUSED = 0;\n\n");
} else {
// No DATA statements — emit empty placeholder
fprintf(gOut, "static _BDataItem _bdata[] _BUNUSED = {{0,0,NULL}};\n");
fprintf(gOut, "static int _bdata_count _BUNUSED = 0;\n");
fprintf(gOut, "static int _bdata_pos _BUNUSED = 0;\n\n");
}
// Collect all SUB/FUNCTION definitions
Node *funcs[256];
int funcCount = 0;
collectFuncs(prog, funcs, &funcCount, 256);
// Emit forward declarations for SUBs and FUNCTIONs
if (funcCount > 0) {
fprintf(gOut, "/* Forward declarations */\n");
for (int i = 0; i < funcCount; i++) {
Node *f = funcs[i];
int isFunc = (f->type == NODE_FUNC);
DataType ret = isFunc ? f->dataType : TYPE_VOID;
fprintf(gOut, "%s %s(", cTypeStr(ret), cleanName(f->sval));
int first = 1;
for (Node *p = f->a; p; p = p->next) {
if (!first) fprintf(gOut, ", ");
first = 0;
if (p->ival == PASS_BYREF)
fprintf(gOut, "%s*", cTypeStr(p->dataType));
else if (p->dataType == TYPE_STR)
fprintf(gOut, "const char*");
else
fprintf(gOut, "%s", cTypeStr(p->dataType));
}
if (first) fprintf(gOut, "void");
fprintf(gOut, ");\n");
}
fprintf(gOut, "\n");
}
// Emit SUB/FUNCTION implementations
for (int i = 0; i < funcCount; i++) {
genFuncDef(funcs[i]);
}
// Emit main() with global (non-function) statements
fprintf(gOut, "/* Main program */\n");
fprintf(gOut, "int main(void) {\n");
gIndent = 1;
// Walk the top-level block and emit non-function statements
Node *blk = (prog->type == NODE_PROGRAM) ? prog->a : prog;
Node *s = (blk && blk->type == NODE_BLOCK) ? blk->a : blk;
while (s) {
// Skip SUB/FUNCTION definitions (already emitted above)
if (s->type != NODE_SUB && s->type != NODE_FUNC) {
genStmt(s);
}
s = s->next;
}
emit("return 0;\n");
gIndent = 0;
fprintf(gOut, "}\n");
}
// -----------------------------------------------------------------------
// Section 9: Main Entry Point
// -----------------------------------------------------------------------
// Read an entire file into a malloc'd buffer. Returns NULL on failure.
static char *readFile(const char *path) {
FILE *f = fopen(path, "rb");
if (!f) return NULL;
if (fseek(f, 0, SEEK_END) != 0) { fclose(f); return NULL; }
long len = ftell(f);
if (len < 0) { fclose(f); return NULL; }
if (len > (long)((unsigned)-1 >> 1)) {
// File too large for int-based gSrcLen
fclose(f);
return NULL;
}
rewind(f);
char *buf = (char *)malloc((size_t)len + 1);
if (!buf) { fclose(f); return NULL; }
size_t nread = fread(buf, 1, (size_t)len, f);
buf[nread] = '\0';
fclose(f);
return buf;
}
// -----------------------------------------------------------------------
// $INCLUDE preprocessor
// -----------------------------------------------------------------------
// Extract directory part of a file path (returns malloc'd string)
static char *dirName(const char *path) {
const char *last = strrchr(path, '/');
if (!last) return strdup(".");
size_t len = (size_t)(last - path);
char *dir = (char *)malloc(len + 1);
memcpy(dir, path, len);
dir[len] = '\0';
return dir;
}
// Join directory and filename (returns malloc'd string)
static char *pathJoin(const char *dir, const char *file) {
// If file is absolute, return copy of file
if (file[0] == '/') return strdup(file);
size_t dlen = strlen(dir);
size_t flen = strlen(file);
char *result = (char *)malloc(dlen + 1 + flen + 1);
memcpy(result, dir, dlen);
result[dlen] = '/';
memcpy(result + dlen + 1, file, flen);
result[dlen + 1 + flen] = '\0';
return result;
}
// Growing buffer for source assembly
typedef struct {
char *data;
size_t len;
size_t cap;
} SourceBuf;
static void sbInit(SourceBuf *sb) {
sb->cap = 4096;
sb->data = (char *)malloc(sb->cap);
sb->len = 0;
sb->data[0] = '\0';
}
static void sbAppend(SourceBuf *sb, const char *s, size_t n) {
while (sb->len + n + 1 > sb->cap) {
sb->cap *= 2;
sb->data = (char *)realloc(sb->data, sb->cap);
}
memcpy(sb->data + sb->len, s, n);
sb->len += n;
sb->data[sb->len] = '\0';
}
// Case-insensitive prefix check
static int strNIcmp(const char *a, const char *b, size_t n) {
for (size_t i = 0; i < n; i++) {
int ca = toupper((unsigned char)a[i]);
int cb = toupper((unsigned char)b[i]);
if (ca != cb) return ca - cb;
if (ca == 0) return 0;
}
return 0;
}
// Process a source file, expanding $INCLUDE directives.
// Appends to the SourceBuf and gLineMap.
static void preprocessFile(const char *filePath, SourceBuf *sb,
const char **includeStack, int includeDepth) {
// Check depth
if (includeDepth >= MAX_INCLUDE_DEPTH) {
fprintf(stderr, "Error: $INCLUDE nested too deeply (max %d) at '%s'\n",
MAX_INCLUDE_DEPTH, filePath);
exit(1);
}
// Check circular includes
for (int i = 0; i < includeDepth; i++) {
if (strcmp(includeStack[i], filePath) == 0) {
fprintf(stderr, "Error: Circular $INCLUDE detected: '%s'\n", filePath);
exit(1);
}
}
// Read file
char *text = readFile(filePath);
if (!text) {
fprintf(stderr, "Error: Cannot open '%s'", filePath);
if (includeDepth > 0)
fprintf(stderr, " (included from '%s')", includeStack[includeDepth - 1]);
fprintf(stderr, "\n");
exit(1);
}
const char *fname = internFileName(filePath);
char *baseDir = dirName(filePath);
// Push onto include stack
includeStack[includeDepth] = filePath;
// Process line by line
const char *p = text;
int origLine = 0;
while (*p) {
origLine++;
// Find end of line
const char *lineStart = p;
while (*p && *p != '\n') p++;
size_t lineLen = (size_t)(p - lineStart);
if (*p == '\n') p++; // consume newline
// Check for '$INCLUDE: directive
// Format: '$INCLUDE: 'filename'
// Leading spaces are allowed before the '
const char *s = lineStart;
while (s < lineStart + lineLen && (*s == ' ' || *s == '\t')) s++;
int isInclude = 0;
char incFile[MAX_TOKEN_LEN] = {0};
// Check for ' (comment start) followed by $INCLUDE:
if (s < lineStart + lineLen && *s == '\'') {
s++; // skip '
// Skip optional spaces between ' and $
while (s < lineStart + lineLen && (*s == ' ' || *s == '\t')) s++;
if (s + 9 <= lineStart + lineLen && strNIcmp(s, "$INCLUDE:", 9) == 0) {
s += 9;
// Skip spaces
while (s < lineStart + lineLen && (*s == ' ' || *s == '\t')) s++;
// Extract filename between single quotes
if (s < lineStart + lineLen && *s == '\'') {
s++;
const char *fnStart = s;
while (s < lineStart + lineLen && *s != '\'') s++;
if (s > fnStart && s < lineStart + lineLen) {
size_t fnLen = (size_t)(s - fnStart);
if (fnLen < MAX_TOKEN_LEN) {
memcpy(incFile, fnStart, fnLen);
incFile[fnLen] = '\0';
isInclude = 1;
}
}
}
}
}
if (isInclude) {
// Resolve path relative to current file's directory
char *resolvedPath = pathJoin(baseDir, incFile);
preprocessFile(resolvedPath, sb, includeStack, includeDepth + 1);
free(resolvedPath);
} else {
// Record line map entry
if (gLineMapCount < MAX_SOURCE_LINES) {
gLineMap[gLineMapCount].fileName = fname;
gLineMap[gLineMapCount].origLine = origLine;
gLineMapCount++;
}
// Append line (with newline)
sbAppend(sb, lineStart, lineLen);
sbAppend(sb, "\n", 1);
}
}
free(baseDir);
free(text);
}
// Top-level preprocessor entry point
static char *preprocessSource(const char *filePath) {
SourceBuf sb;
sbInit(&sb);
const char *includeStack[MAX_INCLUDE_DEPTH];
preprocessFile(filePath, &sb, includeStack, 0);
return sb.data;
}
int main(int argc, char **argv) {
// Check for --release / -r flag
int argi = 1;
if (argc > 1 && (strcmp(argv[1], "--release") == 0 ||
strcmp(argv[1], "-r") == 0)) {
gRelease = 1;
argi++;
}
if (argi >= argc) {
fprintf(stderr, "Usage: basic2c [--release|-r] input.bas [output.c]\n");
fprintf(stderr, "External functions can be defined in functions.def\n");
return 1;
}
// Load external function definitions from functions.def in binary's directory
{
const char *binPath = argv[0];
const char *lastSlash = strrchr(binPath, '/');
if (lastSlash) {
size_t dirLen = lastSlash - binPath + 1;
char *defPath = malloc(dirLen + 14); // "functions.def" + null
memcpy(defPath, binPath, dirLen);
strcpy(defPath + dirLen, "functions.def");
loadExternFuncs(defPath);
free(defPath);
} else {
// Binary in current directory or bare name - try current directory
loadExternFuncs("functions.def");
}
}
// Also load from input file's directory (may add more or override)
{
const char *inputPath = argv[argi];
const char *lastSlash = strrchr(inputPath, '/');
if (lastSlash) {
size_t dirLen = lastSlash - inputPath + 1;
char *defPath = malloc(dirLen + 14); // "functions.def" + null
memcpy(defPath, inputPath, dirLen);
strcpy(defPath + dirLen, "functions.def");
loadExternFuncs(defPath);
free(defPath);
}
// Don't load from current dir again if binary was there
}
// Read and preprocess source file (expands $INCLUDE directives)
char *source = preprocessSource(argv[argi]);
// Open output file (or stdout)
if (argi + 1 < argc) {
gOut = fopen(argv[argi + 1], "w");
if (!gOut) {
fprintf(stderr, "Error: Cannot create '%s'\n", argv[argi + 1]);
free(source);
return 1;
}
} else {
gOut = stdout;
}
// Initialize lexer state
gSrc = source;
gSrcPos = 0;
size_t slen = strlen(source);
if (slen > (size_t)((unsigned)-1 >> 1)) {
fprintf(stderr, "Error: Source file too large (%zu bytes)\n", slen);
free(source);
return 1;
}
gSrcLen = (int)slen;
gLine = 1;
// Parse the BASIC source into an AST
Node *program = parseProgram();
// Generate C code from the AST
generate(program);
// Cleanup
if (gOut != stdout) fclose(gOut);
free(source);
return 0;
}