// ============================================================================ // basic2c.c - A BASIC to C Transpiler // // Translates BASIC source code into equivalent C source code. // // Supported features: // - Classic line-numbered BASIC and named labels (GOTO, GOSUB/RETURN) // - Modern structured BASIC (SUB, FUNCTION, IF/END IF, etc.) // - Data types: BYTE, INTEGER, LONG, FLOAT, DOUBLE, STRING // - User-defined types (TYPE/END TYPE) with nesting and SIZEOF // - Dynamic arrays (DIM, REDIM), multidimensional (up to N-D) // - Parameter passing: BYVAL (by value) and BYREF (by reference) // - LOCAL and STATIC variable declarations inside SUB/FUNCTION // - Constants (CONST) with compile-time substitution // - Control flow: IF/ELSEIF/ELSE, FOR/NEXT, WHILE/WEND, DO/LOOP, // SELECT CASE, ON GOTO, ON GOSUB, EXIT, CONTINUE // - PRINT statement with ? shortcut, PRINT USING for formatted output // - Operators: arithmetic, comparison, string concatenation (+, &), // bitwise/logical AND, OR, NOT, XOR // - DATA/READ/RESTORE for inline data // - File I/O: OPEN/CLOSE, PRINT #, INPUT #, LINE INPUT #, WRITE # // - Random-access file I/O: GET, PUT with record numbers // - String functions: LEN, MID$, LEFT$, RIGHT$, STR$, VAL, CHR$, // ASC, UCASE$, LCASE$, INSTR, STRING$, LTRIM$, RTRIM$, TRIM$, // SPACE$, HEX$, OCT$, MID$ assignment // - Print functions: TAB, SPC for cursor positioning // - Math functions: ABS, INT, SQR, SIN, COS, TAN, ATN, LOG, EXP, // SGN, RND (optional argument ignored), RANDOMIZE // - Array functions: LBOUND, UBOUND // - I/O functions: EOF, LOF, FREEFILE // - SWAP for exchanging variable values // - $INCLUDE metacommand for file inclusion with nested include // support, circular detection, and file+line error reporting // - Extensible built-in functions via builtins.def (compile-time) // - External function definitions via functions.def (runtime) // - Debug and release runtime modes (--release or -r flag) // // Usage: basic2c [--release|-r] input.bas [output.c] // If output.c is omitted, C code is written to stdout. // // Build: cc -o basic2c basic2c.c -lm // // Architecture: // 1. Preprocessor - processes $INCLUDE directives, builds line map // 2. Lexer - tokenizes BASIC source (case-insensitive keywords) // 3. Parser - recursive descent, builds an AST // 4. Codegen - walks AST, emits C source with a small runtime library // ============================================================================ #include #include #include #include #include // ----------------------------------------------------------------------- // Section 1: Constants and Limits // ----------------------------------------------------------------------- #define MAX_TOKEN_LEN 4096 // max length of one token string #define MAX_IDENT 128 // max identifier length #define MAX_PARAMS 32 // max parameters per SUB/FUNCTION #define MAX_SYMBOLS 2048 // symbol table capacity #define MAX_GOSUB_SITES 512 // max GOSUB return-point IDs #define MAX_LINE_LABELS 4096 // max classic line-number labels #define MAX_NODES 65536 // AST node pool size #define MAX_ARGS 64 // max arguments in a PRINT / CALL list #define MAX_SOURCE_LINES 65536 // max lines in preprocessed source #define MAX_INCLUDE_DEPTH 16 // max nested $INCLUDE depth #define MAX_INCLUDE_FILES 64 // max distinct included filenames #define MAX_EXTERN_FUNCS 128 // max external function definitions #define MAX_EXTERN_CODE 256 // max C code template length // ----------------------------------------------------------------------- // Section 2: Enumerations // ----------------------------------------------------------------------- // Token types produced by the lexer typedef enum { TOK_EOF = 0, TOK_NEWLINE, // end of line (statement separator) TOK_COLON, // : (statement separator on same line) TOK_INT_LIT, // integer literal TOK_DBL_LIT, // floating-point literal TOK_STR_LIT, // "..." string literal TOK_IDENT, // identifier (variable / sub / function name) // ---------- keywords ---------- TOK_DIM, TOK_REDIM, TOK_AS, TOK_BYTE, TOK_INTEGER, TOK_LONG, TOK_FLOAT, TOK_DOUBLE, TOK_STRING, TOK_LET, TOK_PRINT, TOK_INPUT, TOK_IF, TOK_THEN, TOK_ELSE, TOK_ELSEIF, TOK_END, TOK_FOR, TOK_TO, TOK_STEP, TOK_NEXT, TOK_WHILE, TOK_WEND, TOK_DO, TOK_LOOP, TOK_UNTIL, TOK_GOTO, TOK_GOSUB, TOK_RETURN, TOK_SUB, TOK_FUNCTION, TOK_CALL, TOK_BYVAL, TOK_BYREF, TOK_LOCAL, TOK_STATIC, TOK_EXIT, TOK_AND, TOK_OR, TOK_NOT, TOK_MOD, TOK_XOR, TOK_SELECT, TOK_CASE, TOK_SWAP, TOK_CONST, TOK_ON, TOK_REM, TOK_OPEN, TOK_CLOSE, TOK_OUTPUT, TOK_APPEND, TOK_BINARY, TOK_LINE, TOK_WRITE, // DATA, READ, RESTORE, GET, PUT, RANDOM, SIZEOF are contextual // keywords (checked as TOK_IDENT to avoid colliding with variable names) // ---------- operators / punctuation ---------- TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_BSLASH, TOK_CARET, TOK_EQ, TOK_NE, TOK_LT, TOK_GT, TOK_LE, TOK_GE, TOK_LPAREN, TOK_RPAREN, TOK_COMMA, TOK_SEMI, TOK_AMP, // & string concatenation TOK_HASH, // # file number prefix TOK_DOT, // . member access TOK_TYPE // TYPE keyword (user-defined types) } TokenType; // AST node kinds typedef enum { NODE_PROGRAM, // root: a = first top-level item (linked->next) NODE_BLOCK, // block of statements: a = first stmt NODE_INT_LIT, // ival = value NODE_DBL_LIT, // dval = value NODE_STR_LIT, // sval = string content NODE_IDENT, // sval = name NODE_ARRAY_REF, // sval = name, a = index exprs (linked list) NODE_BINOP, // ival = op token, a = left, b = right NODE_UNOP, // ival = op token, a = operand NODE_ASSIGN, // a = target (IDENT/ARRAY_REF), b = value NODE_DIM, // sval=name, dataType, a=sizes (list), ival=ndims NODE_REDIM, // sval=name, dataType, a=sizes (list), ival=ndims NODE_PRINT, // a = first print-item (linked->next) NODE_PRINT_ITEM, // a = expr, ival = separator after (';'=1,','=2) NODE_PRINT_USING, // a = format expr, b = value list (linked->next) NODE_INPUT, // sval = prompt (or NULL), a = first var ->next NODE_IF, // a=cond, b=then-block, c=else-part NODE_FOR, // sval=var, a=start, b=end, c=step, d=body NODE_WHILE, // a=cond, b=body NODE_DO_LOOP, // a=cond, b=body, ival bits: 1=UNTIL,2=bottom NODE_GOTO, // ival=line# or sval=label NODE_GOSUB, // ival=line#, ival2=return-point-id NODE_RETURN, // a=expr (FUNCTION return) or NULL NODE_LABEL, // ival=line number NODE_SUB, // sval=name, a=param list, b=body NODE_FUNC, // sval=name, a=params, b=body, dataType=ret NODE_PARAM, // sval=name, dataType, ival=passMode NODE_CALL, // sval=name, a=arg list (linked->next) NODE_FUNC_CALL, // sval=name, a=arg list (linked->next) NODE_EXIT, // ival=what (TOK_FOR,TOK_WHILE,TOK_DO,etc) NODE_CONTINUE, // ival=what (TOK_FOR,TOK_WHILE,TOK_DO) NODE_LOCAL, // sval=name, dataType NODE_STATIC, // sval=name, dataType NODE_END, // END statement NODE_OPEN, // a=filename, b=file# expr, ival=mode NODE_CLOSE, // b=file# expr NODE_FILE_PRINT, // b=file# expr, a=print items NODE_FILE_INPUT, // b=file# expr, a=variable list NODE_LINE_INPUT, // b=file# expr, a=target variable NODE_FILE_WRITE, // b=file# expr, a=expression list NODE_DATA, // a=linked list of literal items, line=source ln NODE_READ, // a=linked list of NODE_IDENT vars to read into NODE_RESTORE, // ival=target line number (0=beginning) NODE_TYPE_DEF, // sval=type name, ival=udtIndex NODE_DOT_ACCESS, // a=base expr, sval=field name, ival2=udtIndex NODE_GET, // a=file# expr, b=record# expr, c=var NODE_PUT, // a=file# expr, b=record# expr, c=var NODE_SELECT, // a=test expr, b=first NODE_CASE (linked->next) NODE_CASE, // a=value exprs (linked), b=body block, ival=flags NODE_SWAP, // a=first var, b=second var NODE_CONST_DECL, // sval=name, a=value expr NODE_RANDOMIZE, // a=seed expr (or NULL) NODE_ON_GOTO, // a=expr, b=label list (NODE_INT_LIT/NODE_IDENT) NODE_ON_GOSUB, // a=expr, b=label list, ival2=first return-point-id NODE_MID_ASSIGN // a=target string var, b=start, c=len, d=replacement } NodeType; // BASIC data types – ordered by numeric promotion rank so that // promoteType() can simply take the maximum of two types. typedef enum { TYPE_VOID = 0, // used for SUB (no return value) TYPE_BYTE, // BYTE -> uint8_t TYPE_INT, // INTEGER -> int16_t TYPE_LONG, // LONG -> int32_t TYPE_FLOAT, // FLOAT -> float TYPE_DBL, // DOUBLE -> double TYPE_STR, // STRING -> char* TYPE_UDT // user-defined TYPE -> struct } DataType; // Parameter passing modes typedef enum { PASS_BYVAL = 0, PASS_BYREF = 1 } PassMode; // ----------------------------------------------------------------------- // Section 3: Data Structures // ----------------------------------------------------------------------- // A single token from the lexer typedef struct { TokenType type; int line; // source line where token appears int ival; // integer value (for TOK_INT_LIT) double dval; // double value (for TOK_DBL_LIT) char sval[MAX_TOKEN_LEN]; // string payload } Token; // AST node – compact tagged structure. // Child pointers a,b,c,d have node-type-specific meanings (see enum). // The 'next' pointer chains siblings (statement lists, param lists). typedef struct Node { NodeType type; DataType dataType; // expression result type / decl type int ival; // multi-purpose int (operator, flags) int ival2; // secondary int (e.g. gosub return id) double dval; // double literal value char *sval; // identifier name / string literal struct Node *a, *b, *c, *d;// child pointers struct Node *next; // next sibling in a list int line; // source line for error messages } Node; // Symbol table entry – tracks variables, arrays, subs, functions typedef struct { char name[MAX_IDENT]; DataType dataType; int isArray; // 1 if dynamic array int ndims; // number of dimensions (0=scalar) int isFunc; // 1 = FUNCTION, 2 = SUB int paramCount; DataType paramTypes[MAX_PARAMS]; PassMode paramModes[MAX_PARAMS]; char paramNames[MAX_PARAMS][MAX_IDENT]; DataType returnType; // for functions int udtIndex; // index into gUdts[] for TYPE_UDT } Symbol; // User-defined type (UDT) support #define MAX_UDTS 64 #define MAX_UDT_FIELDS 32 typedef struct { char name[MAX_IDENT]; DataType dataType; int strLen; // >0 for STRING * N (fixed-length) int udtIndex; // index into gUdts[] if TYPE_UDT } UdtField; typedef struct { char name[MAX_IDENT]; UdtField fields[MAX_UDT_FIELDS]; int fieldCount; } UdtDef; static UdtDef gUdts[MAX_UDTS]; static int gUdtCount = 0; static int gLastUdtIndex = -1; // side-channel from parseType() // ----------------------------------------------------------------------- // Section 4: Global State // ----------------------------------------------------------------------- // Runtime mode: 0=debug (with error checks), 1=release (minimal) static int gRelease = 0; // Line map: maps merged-source line numbers to original file + line typedef struct { const char *fileName; // interned filename pointer int origLine; // 1-based line in original file } LineMapEntry; static LineMapEntry gLineMap[MAX_SOURCE_LINES]; static int gLineMapCount = 0; // Interned filename pool static char *gFileNames[MAX_INCLUDE_FILES]; static int gFileNameCount = 0; static const char *internFileName(const char *name) { for (int i = 0; i < gFileNameCount; i++) if (strcmp(gFileNames[i], name) == 0) return gFileNames[i]; if (gFileNameCount >= MAX_INCLUDE_FILES) { fprintf(stderr, "Too many include files (max %d)\n", MAX_INCLUDE_FILES); exit(1); } gFileNames[gFileNameCount] = strdup(name); return gFileNames[gFileNameCount++]; } // Source code static const char *gSrc = NULL; // source text static int gSrcPos = 0; // current read position static int gSrcLen = 0; // total source length static int gLine = 1; // current source line number // Current and peek tokens for the recursive-descent parser static Token gTok; // current token // AST node pool – simple bump allocator (nodes live until exit) static Node gNodePool[MAX_NODES]; static int gNodeCount = 0; // Symbol table static Symbol gSyms[MAX_SYMBOLS]; static int gSymCount = 0; // GOSUB bookkeeping: count of GOSUB sites for generating return switch static int gGosubCount = 0; // Collected line-number labels for the RETURN dispatch table static int gLineLabels[MAX_LINE_LABELS]; static int gLineLabelCount = 0; // Line numbers that are actually targeted by GOTO or GOSUB. // Only these need C labels emitted to avoid -Wunused-label. static int gGotoTargets[MAX_LINE_LABELS]; static int gGotoTargetCount = 0; // Named (string) labels targeted by GOTO or GOSUB static char *gGotoStrTargets[MAX_LINE_LABELS]; static int gGotoStrTargetCount = 0; // Compile-time constant table (for CONST declarations) #define MAX_CONSTS 256 typedef struct { char name[MAX_IDENT]; DataType dataType; double numVal; char strVal[MAX_TOKEN_LEN]; } ConstDef; static ConstDef gConsts[MAX_CONSTS]; static int gConstCount = 0; // External function definitions (loaded from functions.def) typedef struct { char name[MAX_IDENT]; // BASIC function name (e.g., "CEIL") DataType returnType; // return type char cCode[MAX_EXTERN_CODE]; // C code template (% = arg, %1 %2 = numbered) } ExternFunc; static ExternFunc gExternFuncs[MAX_EXTERN_FUNCS]; static int gExternFuncCount = 0; // Built-in function definitions (from builtins.def at compile time) typedef struct { const char *name; DataType returnType; const char *cCode; } BuiltinDef; #define BUILTIN(n, t, c) {n, t, c}, static const BuiltinDef gBuiltinDefs[] = { #include "builtins.def" {NULL, 0, NULL} // sentinel }; #undef BUILTIN // Code-generator state static int gIndent = 0; // current indentation depth static FILE *gOut = NULL; // output file handle // Track whether we are inside a SUB/FUNCTION (for scope) static int gInFunc = 0; static const char *gFuncName = NULL; // current function name static DataType gFuncRet = TYPE_VOID; // ----------------------------------------------------------------------- // Section 5: Utility Functions // ----------------------------------------------------------------------- // Report a fatal error with source file/line and exit static void fatal(int line, const char *fmt, ...) { va_list ap; if (line > 0 && line <= gLineMapCount) { LineMapEntry *e = &gLineMap[line - 1]; fprintf(stderr, "Error (%s:%d): ", e->fileName, e->origLine); } else { fprintf(stderr, "Error (line %d): ", line); } va_start(ap, fmt); vfprintf(stderr, fmt, ap); va_end(ap); fprintf(stderr, "\n"); exit(1); } // Allocate a new AST node from the pool static Node *newNode(NodeType type, int line) { if (gNodeCount >= MAX_NODES) fatal(line, "AST node pool exhausted (max %d)", MAX_NODES); Node *n = &gNodePool[gNodeCount++]; memset(n, 0, sizeof(*n)); n->type = type; n->line = line; return n; } // Duplicate a string into heap memory static char *strDup(const char *s) { if (!s) return NULL; char *d = malloc(strlen(s) + 1); if (!d) { fprintf(stderr, "Out of memory\n"); exit(1); } strcpy(d, s); return d; } // Case-insensitive string comparison static int strIcmp(const char *a, const char *b) { if (!a || !b) return (a != b); while (*a && *b) { if (toupper((unsigned char)*a) != toupper((unsigned char)*b)) return 1; a++; b++; } return *a != *b; } // Look up an external function by name; returns pointer or NULL static ExternFunc *externFuncLookup(const char *name) { for (int i = 0; i < gExternFuncCount; i++) if (strIcmp(gExternFuncs[i].name, name) == 0) return &gExternFuncs[i]; return NULL; } // Look up a built-in function definition by name; returns pointer or NULL static const BuiltinDef *builtinDefLookup(const char *name) { for (int i = 0; gBuiltinDefs[i].name; i++) if (strIcmp(gBuiltinDefs[i].name, name) == 0) return &gBuiltinDefs[i]; return NULL; } // Parse a type name from definition file static DataType parseTypeName(const char *s) { while (*s == ' ') s++; if (strIcmp(s, "byte") == 0) return TYPE_BYTE; if (strIcmp(s, "integer") == 0) return TYPE_INT; if (strIcmp(s, "long") == 0) return TYPE_LONG; if (strIcmp(s, "float") == 0) return TYPE_FLOAT; if (strIcmp(s, "double") == 0) return TYPE_DBL; if (strIcmp(s, "string") == 0) return TYPE_STR; return TYPE_DBL; // default } // Load external function definitions from a file // Format: name : type : c_code // Lines starting with # are comments, blank lines ignored static void loadExternFuncs(const char *filename) { FILE *f = fopen(filename, "r"); if (!f) return; // file not found is OK, just no external funcs char line[512]; while (fgets(line, sizeof(line), f)) { // Skip comments and blank lines char *p = line; while (*p == ' ' || *p == '\t') p++; if (*p == '#' || *p == '\n' || *p == '\0') continue; // Parse: name : type : c_code char *colon1 = strchr(p, ':'); if (!colon1) continue; char *colon2 = strchr(colon1 + 1, ':'); if (!colon2) continue; if (gExternFuncCount >= MAX_EXTERN_FUNCS) { fprintf(stderr, "Warning: too many external functions, ignoring rest\n"); break; } ExternFunc *ef = &gExternFuncs[gExternFuncCount]; // Extract name (trim whitespace) *colon1 = '\0'; char *name = p; while (*name == ' ' || *name == '\t') name++; char *nameEnd = colon1 - 1; while (nameEnd > name && (*nameEnd == ' ' || *nameEnd == '\t')) nameEnd--; nameEnd[1] = '\0'; strncpy(ef->name, name, MAX_IDENT - 1); ef->name[MAX_IDENT - 1] = '\0'; // Extract type *colon2 = '\0'; char *typeStr = colon1 + 1; while (*typeStr == ' ' || *typeStr == '\t') typeStr++; char *typeEnd = colon2 - 1; while (typeEnd > typeStr && (*typeEnd == ' ' || *typeEnd == '\t')) typeEnd--; typeEnd[1] = '\0'; ef->returnType = parseTypeName(typeStr); // Extract C code template (trim leading whitespace and trailing newline) char *code = colon2 + 1; while (*code == ' ' || *code == '\t') code++; size_t codeLen = strlen(code); while (codeLen > 0 && (code[codeLen-1] == '\n' || code[codeLen-1] == '\r' || code[codeLen-1] == ' ' || code[codeLen-1] == '\t')) codeLen--; if (codeLen >= MAX_EXTERN_CODE) codeLen = MAX_EXTERN_CODE - 1; strncpy(ef->cCode, code, codeLen); ef->cCode[codeLen] = '\0'; gExternFuncCount++; } fclose(f); } // Look up a symbol by name; returns pointer to entry or NULL static Symbol *symLookup(const char *name) { for (int i = 0; i < gSymCount; i++) if (strIcmp(gSyms[i].name, name) == 0) return &gSyms[i]; return NULL; } // Forward declaration static int isKeyword(const char *name); static Symbol *symAdd(const char *name) { if (isKeyword(name)) fatal(gLine, "Cannot use keyword '%s' as identifier", name); Symbol *s = symLookup(name); if (s) return s; if (gSymCount >= MAX_SYMBOLS) fatal(gLine, "Symbol table full"); s = &gSyms[gSymCount++]; memset(s, 0, sizeof(*s)); strncpy(s->name, name, MAX_IDENT - 1); return s; } // Look up a user-defined type by name; returns index or -1 static int udtLookup(const char *name) { for (int i = 0; i < gUdtCount; i++) if (strIcmp(gUdts[i].name, name) == 0) return i; return -1; } // Look up a field within a UDT; returns field index or -1 static int udtFieldLookup(int udtIdx, const char *field) { if (udtIdx < 0 || udtIdx >= gUdtCount) return -1; UdtDef *u = &gUdts[udtIdx]; for (int i = 0; i < u->fieldCount; i++) if (strIcmp(u->fields[i].name, field) == 0) return i; return -1; } // Check if 'name' is a BYREF parameter of the current function. // Returns 1 if so, 0 otherwise. Used during code generation to // emit pointer dereferences for BYREF params. static int isByrefParam(const char *name) { if (!gInFunc || !gFuncName) return 0; Symbol *fsym = symLookup(gFuncName); if (!fsym) return 0; for (int i = 0; i < fsym->paramCount; i++) { if (strIcmp(fsym->paramNames[i], name) == 0 && fsym->paramModes[i] == PASS_BYREF) return 1; } return 0; } // Emit indented text to the output file static void emit(const char *fmt, ...) { va_list ap; for (int i = 0; i < gIndent * 4; i++) fputc(' ', gOut); va_start(ap, fmt); vfprintf(gOut, fmt, ap); va_end(ap); } // Emit text without leading indentation static void emitRaw(const char *fmt, ...) { va_list ap; va_start(ap, fmt); vfprintf(gOut, fmt, ap); va_end(ap); } // Record a line-number label for later GOSUB dispatch static void recordLineLabel(int lnum) { for (int i = 0; i < gLineLabelCount; i++) if (gLineLabels[i] == lnum) return; // already recorded if (gLineLabelCount >= MAX_LINE_LABELS) fatal(gLine, "Too many line labels"); gLineLabels[gLineLabelCount++] = lnum; } // Record a line number as a GOTO/GOSUB target so its label is emitted static void recordGotoTarget(int lnum) { for (int i = 0; i < gGotoTargetCount; i++) if (gGotoTargets[i] == lnum) return; if (gGotoTargetCount >= MAX_LINE_LABELS) fatal(gLine, "Too many goto targets"); gGotoTargets[gGotoTargetCount++] = lnum; } // Check whether a line number is a GOTO/GOSUB target static int isGotoTarget(int lnum) { for (int i = 0; i < gGotoTargetCount; i++) if (gGotoTargets[i] == lnum) return 1; return 0; } // Record a named label as a GOTO/GOSUB target so its C label is emitted static void recordGotoStrTarget(const char *name) { for (int i = 0; i < gGotoStrTargetCount; i++) if (strIcmp(gGotoStrTargets[i], name) == 0) return; if (gGotoStrTargetCount >= MAX_LINE_LABELS) fatal(gLine, "Too many named goto targets"); gGotoStrTargets[gGotoStrTargetCount++] = strDup(name); } // Check whether a named label is a GOTO/GOSUB target static int isGotoStrTarget(const char *name) { for (int i = 0; i < gGotoStrTargetCount; i++) if (strIcmp(gGotoStrTargets[i], name) == 0) return 1; return 0; } // Infer the data type of a variable from its name suffix. // Names ending in '$' -> STRING, '%' -> INTEGER, '!' -> FLOAT, // '#' -> DOUBLE; otherwise check the symbol table, default INTEGER. static DataType inferVarType(const char *name) { int len = (int)strlen(name); if (len > 0 && name[len-1] == '$') return TYPE_STR; if (len > 0 && name[len-1] == '%') return TYPE_INT; if (len > 0 && name[len-1] == '!') return TYPE_FLOAT; if (len > 0 && name[len-1] == '#') return TYPE_DBL; Symbol *s = symLookup(name); if (s) return s->dataType; return TYPE_INT; // default } // Return the wider of two numeric types for expression promotion. // The DataType enum is ordered so that a higher value = wider type // (BYTE < INT < LONG < FLOAT < DBL). STRING is handled separately. static DataType promoteType(DataType a, DataType b) { if (a == TYPE_STR || b == TYPE_STR) return TYPE_STR; return (a > b) ? a : b; } // Strip type-suffix characters ($, %, #, !) from an identifier for C output. // Uses a rotating set of 8 static buffers so multiple calls within a // single expression don't clobber each other. static const char *cleanName(const char *name) { static char bufs[8][MAX_IDENT]; static int idx = 0; if (!name) return "_null_"; char *buf = bufs[idx++ & 7]; strncpy(buf, name, MAX_IDENT - 1); buf[MAX_IDENT - 1] = '\0'; int len = (int)strlen(buf); if (len > 0 && (buf[len-1]=='$' || buf[len-1]=='%' || buf[len-1]=='#' || buf[len-1]=='!')) buf[len-1] = '\0'; return buf; } // ----------------------------------------------------------------------- // Section 6: Lexer // // The lexer reads characters from gSrc and produces tokens one at a time. // BASIC keywords are case-insensitive; identifiers preserve case. // ----------------------------------------------------------------------- // Keyword table: maps keyword strings to token types static struct { const char *kw; TokenType tok; } gKeywords[] = { {"DIM", TOK_DIM}, {"REDIM", TOK_REDIM}, {"AS", TOK_AS}, {"BYTE", TOK_BYTE}, {"INTEGER", TOK_INTEGER}, {"LONG", TOK_LONG}, {"FLOAT", TOK_FLOAT}, {"DOUBLE", TOK_DOUBLE}, {"STRING", TOK_STRING}, {"LET", TOK_LET}, {"PRINT", TOK_PRINT}, {"INPUT", TOK_INPUT}, {"IF", TOK_IF}, {"THEN", TOK_THEN}, {"ELSE", TOK_ELSE}, {"ELSEIF", TOK_ELSEIF}, {"END", TOK_END}, {"FOR", TOK_FOR}, {"TO", TOK_TO}, {"STEP", TOK_STEP}, {"NEXT", TOK_NEXT}, {"WHILE", TOK_WHILE}, {"WEND", TOK_WEND}, {"DO", TOK_DO}, {"LOOP", TOK_LOOP}, {"UNTIL", TOK_UNTIL}, {"GOTO", TOK_GOTO}, {"GOSUB", TOK_GOSUB}, {"RETURN", TOK_RETURN}, {"SUB", TOK_SUB}, {"FUNCTION", TOK_FUNCTION}, {"CALL", TOK_CALL}, {"BYVAL", TOK_BYVAL}, {"BYREF", TOK_BYREF}, {"LOCAL", TOK_LOCAL}, {"STATIC", TOK_STATIC}, {"EXIT", TOK_EXIT}, {"AND", TOK_AND}, {"OR", TOK_OR}, {"NOT", TOK_NOT}, {"MOD", TOK_MOD}, {"REM", TOK_REM}, {"OPEN", TOK_OPEN}, {"CLOSE", TOK_CLOSE}, {"OUTPUT", TOK_OUTPUT}, {"APPEND", TOK_APPEND}, {"BINARY", TOK_BINARY}, {"LINE", TOK_LINE}, {"WRITE", TOK_WRITE}, {"TYPE", TOK_TYPE}, {"XOR", TOK_XOR}, {"SELECT", TOK_SELECT}, {"CASE", TOK_CASE}, {"CONST", TOK_CONST}, {"ON", TOK_ON}, {NULL, TOK_EOF} }; // Check if a name is a keyword static int isKeyword(const char *name) { for (int k = 0; gKeywords[k].kw; k++) if (strIcmp(name, gKeywords[k].kw) == 0) return 1; return 0; } // Peek at the current character without advancing static int peekChar(void) { if (gSrcPos >= gSrcLen) return EOF; return (unsigned char)gSrc[gSrcPos]; } // Read and advance past the current character static int readChar(void) { if (gSrcPos >= gSrcLen) return EOF; int ch = (unsigned char)gSrc[gSrcPos++]; if (ch == '\n') gLine++; return ch; } // Skip whitespace (spaces and tabs) but NOT newlines static void skipSpaces(void) { while (gSrcPos < gSrcLen) { int ch = gSrc[gSrcPos]; if (ch == ' ' || ch == '\t') gSrcPos++; else break; } } // Read the next token into gTok static void nextToken(void) { skipSpaces(); gTok.line = gLine; gTok.sval[0] = '\0'; gTok.ival = 0; gTok.dval = 0.0; int ch = peekChar(); // End of file if (ch == EOF) { gTok.type = TOK_EOF; return; } // Newline – statement separator if (ch == '\n') { readChar(); gTok.type = TOK_NEWLINE; return; } // Carriage return (handle \r\n) if (ch == '\r') { readChar(); if (peekChar() == '\n') readChar(); gTok.type = TOK_NEWLINE; return; } // Single-line comment: ' or REM if (ch == '\'') { // Skip until end of line while (peekChar() != '\n' && peekChar() != EOF) readChar(); gTok.type = TOK_NEWLINE; // treat comment as newline if (peekChar() == '\n') readChar(); return; } // String literal if (ch == '"') { readChar(); // consume opening quote int i = 0; while (peekChar() != '"' && peekChar() != '\n' && peekChar() != EOF) { if (i < MAX_TOKEN_LEN - 1) gTok.sval[i++] = (char)readChar(); else readChar(); } gTok.sval[i] = '\0'; if (peekChar() == '"') readChar(); // consume closing quote gTok.type = TOK_STR_LIT; return; } // Number literal (integer or double) if (isdigit(ch) || (ch == '.' && isdigit(gSrc[gSrcPos+1]))) { int i = 0; int hasDot = 0; while (isdigit(peekChar()) || peekChar() == '.') { if (peekChar() == '.') { if (hasDot) break; // second dot ends the number hasDot = 1; } if (i < MAX_TOKEN_LEN - 1) gTok.sval[i++] = (char)readChar(); else readChar(); } gTok.sval[i] = '\0'; if (hasDot) { gTok.type = TOK_DBL_LIT; gTok.dval = atof(gTok.sval); } else { gTok.type = TOK_INT_LIT; gTok.ival = atoi(gTok.sval); } return; } // Identifier or keyword if (isalpha(ch) || ch == '_') { int i = 0; while (isalnum(peekChar()) || peekChar() == '_') { if (i < MAX_TOKEN_LEN - 1) gTok.sval[i++] = (char)readChar(); else readChar(); } // Allow trailing $, %, #, ! for type suffixes: // $ = STRING, % = INTEGER (int16_t), // # = DOUBLE, ! = FLOAT if (peekChar()=='$' || peekChar()=='%' || peekChar()=='#' || peekChar()=='!') { if (i < MAX_TOKEN_LEN - 1) gTok.sval[i++] = (char)readChar(); } gTok.sval[i] = '\0'; // Check for REM (rest of line is comment) if (strIcmp(gTok.sval, "REM") == 0) { while (peekChar() != '\n' && peekChar() != EOF) readChar(); gTok.type = TOK_NEWLINE; if (peekChar() == '\n') readChar(); return; } // Check keyword table for (int k = 0; gKeywords[k].kw; k++) { if (strIcmp(gTok.sval, gKeywords[k].kw) == 0) { gTok.type = gKeywords[k].tok; return; } } // Not a keyword – it is an identifier gTok.type = TOK_IDENT; return; } // Operators and punctuation readChar(); switch (ch) { case '+': gTok.type = TOK_PLUS; return; case '-': gTok.type = TOK_MINUS; return; case '*': gTok.type = TOK_STAR; return; case '/': gTok.type = TOK_SLASH; return; case '\\':gTok.type = TOK_BSLASH; return; case '^': gTok.type = TOK_CARET; return; case '&': gTok.type = TOK_AMP; return; case '#': gTok.type = TOK_HASH; return; case '.': gTok.type = TOK_DOT; return; case '(': gTok.type = TOK_LPAREN; return; case ')': gTok.type = TOK_RPAREN; return; case ',': gTok.type = TOK_COMMA; return; case ';': gTok.type = TOK_SEMI; return; case ':': gTok.type = TOK_COLON; return; case '?': gTok.type = TOK_PRINT; return; case '=': gTok.type = TOK_EQ; return; case '<': if (peekChar() == '=') { readChar(); gTok.type = TOK_LE; } else if (peekChar() == '>') { readChar(); gTok.type = TOK_NE; } else gTok.type = TOK_LT; return; case '>': if (peekChar() == '=') { readChar(); gTok.type = TOK_GE; } else gTok.type = TOK_GT; return; default: fatal(gLine, "Unexpected character '%c' (0x%02X)", ch, ch); } } // Check if the current token matches a given type static int tokIs(TokenType t) { return gTok.type == t; } // Consume current token if it matches; returns 1 on match, 0 otherwise static int tokAccept(TokenType t) { if (gTok.type == t) { nextToken(); return 1; } return 0; } // Require the current token to be of a given type; fatal error otherwise static void tokExpect(TokenType t) { if (gTok.type != t) fatal(gTok.line, "Expected token type %d, got %d ('%s')", t, gTok.type, gTok.sval); nextToken(); } // Skip newlines and colons (statement separators) static void skipEol(void) { while (gTok.type == TOK_NEWLINE || gTok.type == TOK_COLON) nextToken(); } // ----------------------------------------------------------------------- // Section 7: Parser – Recursive Descent // // Grammar (simplified): // program = { sub_decl | func_decl | statement } // statement = dim | redim | type_def | assignment | print | input // | if | for | while | do_loop | goto | gosub | return // | call | exit | local | static | end | label | open // | close | data | read | restore | get | put | line_input // expression = or_expr // or_expr = and_expr { OR and_expr } // and_expr = not_expr { AND not_expr } // not_expr = NOT not_expr | cmp_expr // cmp_expr = add_expr { (= | <> | < | > | <= | >=) add_expr } // add_expr = mul_expr { (+ | - | &) mul_expr } // mul_expr = idiv_expr { (* | /) idiv_expr } // idiv_expr = mod_expr { '\' mod_expr } // mod_expr = power_expr { MOD power_expr } // power_expr = unary_expr { ^ unary_expr } // unary_expr = [+ | -] primary // primary = INT_LIT | DBL_LIT | STR_LIT | ident['('args')'][.field...] // | '(' expression ')' | SIZEOF'('type_name')' // ----------------------------------------------------------------------- // Forward declarations for mutually recursive parser functions static Node *parseExpr(void); static Node *parseStatement(void); static int dataIndexForLine(int lnum); static int dataIndexForLabel(const char *name); static Node *parseBlock(TokenType end1, TokenType end2, TokenType end3); static void skipNewlines(void) { while (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON)) nextToken(); } // ---- Expression parser ---- // Parse a primary expression (literals, variables, function calls, parens) static Node *parsePrimary(void) { int ln = gTok.line; // Integer literal if (tokIs(TOK_INT_LIT)) { Node *n = newNode(NODE_INT_LIT, ln); n->ival = gTok.ival; n->dataType = TYPE_INT; nextToken(); return n; } // Double literal if (tokIs(TOK_DBL_LIT)) { Node *n = newNode(NODE_DBL_LIT, ln); n->dval = gTok.dval; n->dataType = TYPE_DBL; nextToken(); return n; } // String literal if (tokIs(TOK_STR_LIT)) { Node *n = newNode(NODE_STR_LIT, ln); n->sval = strDup(gTok.sval); n->dataType = TYPE_STR; nextToken(); return n; } // Parenthesized expression if (tokIs(TOK_LPAREN)) { nextToken(); Node *n = parseExpr(); tokExpect(TOK_RPAREN); return n; } // Identifier: variable, array element, or function call if (tokIs(TOK_IDENT)) { char name[MAX_TOKEN_LEN]; strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); name[MAX_TOKEN_LEN - 1] = '\0'; nextToken(); // Check for '(' – array access or function call if (tokIs(TOK_LPAREN)) { nextToken(); // Collect argument list Node *args = NULL, *tail = NULL; if (!tokIs(TOK_RPAREN)) { Node *arg = parseExpr(); args = tail = arg; while (tokAccept(TOK_COMMA)) { arg = parseExpr(); tail->next = arg; tail = arg; } } tokExpect(TOK_RPAREN); // Determine if this is a known array or function Symbol *s = symLookup(name); // SIZEOF(TypeName) — compile-time sizeof if (strIcmp(name, "SIZEOF") == 0) { // args should be one identifier — the UDT name Node *n = newNode(NODE_FUNC_CALL, ln); n->sval = strDup("SIZEOF"); n->a = args; n->dataType = TYPE_LONG; return n; } if (s && s->isArray) { Node *n = newNode(NODE_ARRAY_REF, ln); n->sval = strDup(name); n->a = args; // index expression n->dataType = s->dataType; n->ival2 = s->udtIndex; // Check for dot-access on array element: arr(i).field[.field...] if (s->dataType == TYPE_UDT && tokIs(TOK_DOT)) { Node *cur = n; int curUdt = s->udtIndex; while (curUdt >= 0 && tokIs(TOK_DOT)) { nextToken(); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected field name after '.'"); int fi = udtFieldLookup(curUdt, gTok.sval); if (fi < 0) fatal(ln, "Unknown field '%s' in type '%s'", gTok.sval, gUdts[curUdt].name); Node *dot = newNode(NODE_DOT_ACCESS, ln); dot->a = cur; dot->sval = strDup(gTok.sval); dot->ival2 = curUdt; UdtField *uf = &gUdts[curUdt].fields[fi]; dot->dataType = uf->dataType; if (uf->dataType == TYPE_STR && uf->strLen > 0) dot->ival = uf->strLen; cur = dot; curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1; nextToken(); } return cur; } return n; } else { // Treat as function call Node *n = newNode(NODE_FUNC_CALL, ln); n->sval = strDup(name); n->a = args; // Infer return type: check symbol table, built-ins, name if (s && s->isFunc == 1) { n->dataType = s->returnType; } else if (strIcmp(name,"LOF")==0) { n->dataType = TYPE_LONG; } else if (strIcmp(name,"VAL")==0 || strIcmp(name,"ABS")==0) { n->dataType = TYPE_DBL; } else if (strIcmp(name,"LEN")==0 || strIcmp(name,"ASC")==0 || strIcmp(name,"INT")==0 || strIcmp(name,"INSTR")==0 || strIcmp(name,"EOF")==0 || strIcmp(name,"FREEFILE")==0 || strIcmp(name,"LBOUND")==0 || strIcmp(name,"UBOUND")==0) { n->dataType = TYPE_INT; } else { // Check external functions and compile-time builtins ExternFunc *ef = externFuncLookup(name); if (ef) { n->dataType = ef->returnType; } else { const BuiltinDef *bd = builtinDefLookup(name); if (bd) { n->dataType = bd->returnType; } else { n->dataType = inferVarType(name); } } } return n; } } // RND without parentheses — treat as RND() if (strIcmp(name, "RND") == 0) { Node *n = newNode(NODE_FUNC_CALL, ln); n->sval = strDup("RND"); n->a = NULL; n->dataType = TYPE_DBL; return n; } // Check compile-time constants for (int ci = 0; ci < gConstCount; ci++) { if (strIcmp(name, gConsts[ci].name) == 0) { if (gConsts[ci].dataType == TYPE_STR) { Node *n = newNode(NODE_STR_LIT, ln); n->sval = strDup(gConsts[ci].strVal); n->dataType = TYPE_STR; return n; } else { double v = gConsts[ci].numVal; if (v == (int)v && gConsts[ci].dataType != TYPE_DBL && gConsts[ci].dataType != TYPE_FLOAT) { Node *n = newNode(NODE_INT_LIT, ln); n->ival = (int)v; n->dataType = gConsts[ci].dataType; return n; } else { Node *n = newNode(NODE_DBL_LIT, ln); n->dval = v; n->dataType = gConsts[ci].dataType; return n; } } } } // Plain variable reference — check for dot-access (supports chaining) { Symbol *s = symLookup(name); if (s && s->dataType == TYPE_UDT && tokIs(TOK_DOT)) { Node *base = newNode(NODE_IDENT, ln); base->sval = strDup(name); base->dataType = TYPE_UDT; Node *cur = base; int curUdt = s->udtIndex; while (curUdt >= 0 && tokIs(TOK_DOT)) { nextToken(); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected field name after '.'"); int fi = udtFieldLookup(curUdt, gTok.sval); if (fi < 0) fatal(ln, "Unknown field '%s' in type '%s'", gTok.sval, gUdts[curUdt].name); Node *dot = newNode(NODE_DOT_ACCESS, ln); dot->a = cur; dot->sval = strDup(gTok.sval); dot->ival2 = curUdt; UdtField *uf = &gUdts[curUdt].fields[fi]; dot->dataType = uf->dataType; if (uf->dataType == TYPE_STR && uf->strLen > 0) dot->ival = uf->strLen; cur = dot; curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1; nextToken(); } return cur; } } Node *n = newNode(NODE_IDENT, ln); n->sval = strDup(name); n->dataType = inferVarType(name); return n; } fatal(ln, "Expected expression, got token type %d ('%s')", gTok.type, gTok.sval); return NULL; // unreachable } // Unary: [+|-] primary static Node *parseUnary(void) { int ln = gTok.line; if (tokIs(TOK_MINUS) || tokIs(TOK_PLUS)) { int op = gTok.type; nextToken(); Node *operand = parseUnary(); if (op == TOK_PLUS) return operand; // unary + is a no-op Node *n = newNode(NODE_UNOP, ln); n->ival = op; n->a = operand; n->dataType = operand->dataType; return n; } return parsePrimary(); } // Power: unary { ^ unary } (right-associative) static Node *parsePower(void) { Node *left = parseUnary(); if (tokIs(TOK_CARET)) { int ln = gTok.line; nextToken(); Node *right = parsePower(); // right-associative Node *n = newNode(NODE_BINOP, ln); n->ival = TOK_CARET; n->a = left; n->b = right; n->dataType = TYPE_DBL; return n; } return left; } // MOD: power { MOD power } static Node *parseMod(void) { Node *left = parsePower(); while (tokIs(TOK_MOD)) { int ln = gTok.line; nextToken(); Node *right = parsePower(); Node *n = newNode(NODE_BINOP, ln); n->ival = TOK_MOD; n->a = left; n->b = right; n->dataType = TYPE_INT; left = n; } return left; } // Integer division: mod { '\' mod } static Node *parseIdiv(void) { Node *left = parseMod(); while (tokIs(TOK_BSLASH)) { int ln = gTok.line; nextToken(); Node *right = parseMod(); Node *n = newNode(NODE_BINOP, ln); n->ival = TOK_BSLASH; n->a = left; n->b = right; n->dataType = TYPE_INT; left = n; } return left; } // Multiply / divide: idiv { (*|/) idiv } static Node *parseMuldiv(void) { Node *left = parseIdiv(); while (tokIs(TOK_STAR) || tokIs(TOK_SLASH)) { int ln = gTok.line; int op = gTok.type; nextToken(); Node *right = parseIdiv(); Node *n = newNode(NODE_BINOP, ln); n->ival = op; n->a = left; n->b = right; // Division always promotes to double; multiplication promotes // to the wider of the two operand types. n->dataType = (op == TOK_SLASH) ? TYPE_DBL : promoteType(left->dataType, right->dataType); left = n; } return left; } // Add / subtract / string concat: muldiv { (+|-|&) muldiv } static Node *parseAddsub(void) { Node *left = parseMuldiv(); while (tokIs(TOK_PLUS) || tokIs(TOK_MINUS) || tokIs(TOK_AMP)) { int ln = gTok.line; int op = gTok.type; nextToken(); Node *right = parseMuldiv(); Node *n = newNode(NODE_BINOP, ln); n->ival = op; n->a = left; n->b = right; // String concatenation if (op == TOK_AMP || (op == TOK_PLUS && (left->dataType == TYPE_STR || right->dataType == TYPE_STR))) n->dataType = TYPE_STR; else n->dataType = promoteType(left->dataType, right->dataType); left = n; } return left; } // Comparison: addsub { (=|<>|<|>|<=|>=) addsub } static Node *parseComparison(void) { Node *left = parseAddsub(); while (tokIs(TOK_EQ) || tokIs(TOK_NE) || tokIs(TOK_LT) || tokIs(TOK_GT) || tokIs(TOK_LE) || tokIs(TOK_GE)) { int ln = gTok.line; int op = gTok.type; nextToken(); Node *right = parseAddsub(); Node *n = newNode(NODE_BINOP, ln); n->ival = op; n->a = left; n->b = right; n->dataType = TYPE_INT; // comparisons yield integer (boolean) left = n; } return left; } // NOT: NOT not_expr | comparison static Node *parseNot(void) { if (tokIs(TOK_NOT)) { int ln = gTok.line; nextToken(); Node *operand = parseNot(); Node *n = newNode(NODE_UNOP, ln); n->ival = TOK_NOT; n->a = operand; n->dataType = TYPE_INT; return n; } return parseComparison(); } // AND: not { AND not } static Node *parseAnd(void) { Node *left = parseNot(); while (tokIs(TOK_AND)) { int ln = gTok.line; nextToken(); Node *right = parseNot(); Node *n = newNode(NODE_BINOP, ln); n->ival = TOK_AND; n->a = left; n->b = right; n->dataType = TYPE_INT; left = n; } return left; } // OR: and { OR and } — top-level expression rule static Node *parseOr(void) { Node *left = parseAnd(); while (tokIs(TOK_OR) || tokIs(TOK_XOR)) { int ln = gTok.line; int op = gTok.type; nextToken(); Node *right = parseAnd(); Node *n = newNode(NODE_BINOP, ln); n->ival = op; n->a = left; n->b = right; n->dataType = TYPE_INT; left = n; } return left; } // Top-level expression entry point static Node *parseExpr(void) { return parseOr(); } // ---- Statement parsers ---- // Parse a data-type keyword (BYTE, INTEGER, LONG, FLOAT, DOUBLE, STRING, or UDT name) static DataType parseType(void) { if (tokAccept(TOK_BYTE)) return TYPE_BYTE; if (tokAccept(TOK_INTEGER)) return TYPE_INT; if (tokAccept(TOK_LONG)) return TYPE_LONG; if (tokAccept(TOK_FLOAT)) return TYPE_FLOAT; if (tokAccept(TOK_DOUBLE)) return TYPE_DBL; if (tokAccept(TOK_STRING)) return TYPE_STR; // Check for user-defined type name if (tokIs(TOK_IDENT)) { int idx = udtLookup(gTok.sval); if (idx >= 0) { gLastUdtIndex = idx; nextToken(); return TYPE_UDT; } } fatal(gTok.line, "Expected type keyword (BYTE, INTEGER, LONG, FLOAT, DOUBLE, STRING)"); return TYPE_INT; } // Parse TYPE ... END TYPE definition static Node *parseTypeDef(void) { int ln = gTok.line; tokExpect(TOK_TYPE); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected type name after TYPE"); char tname[MAX_IDENT]; strncpy(tname, gTok.sval, MAX_IDENT - 1); tname[MAX_IDENT - 1] = '\0'; nextToken(); if (gUdtCount >= MAX_UDTS) fatal(ln, "Too many TYPE definitions (max %d)", MAX_UDTS); int udtIdx = gUdtCount++; UdtDef *u = &gUdts[udtIdx]; memset(u, 0, sizeof(*u)); strncpy(u->name, tname, MAX_IDENT - 1); // Skip newlines before fields while (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON)) nextToken(); // Parse fields until END TYPE while (!tokIs(TOK_EOF)) { // Check for END TYPE if (tokIs(TOK_END)) { int sp = gSrcPos; int sl = gLine; Token st = gTok; nextToken(); if (tokIs(TOK_TYPE)) { nextToken(); // consume TYPE break; } // Not END TYPE — restore gSrcPos = sp; gLine = sl; gTok = st; } // Parse field: name AS type if (!tokIs(TOK_IDENT)) fatal(gTok.line, "Expected field name in TYPE definition"); if (u->fieldCount >= MAX_UDT_FIELDS) fatal(gTok.line, "Too many fields in TYPE (max %d)", MAX_UDT_FIELDS); UdtField *f = &u->fields[u->fieldCount]; strncpy(f->name, gTok.sval, MAX_IDENT - 1); f->name[MAX_IDENT - 1] = '\0'; nextToken(); tokExpect(TOK_AS); // Check for STRING * N (fixed-length string) if (tokIs(TOK_STRING)) { nextToken(); if (tokAccept(TOK_STAR)) { if (!tokIs(TOK_INT_LIT)) fatal(gTok.line, "Expected integer after STRING *"); f->strLen = gTok.ival; nextToken(); } else { fatal(gTok.line, "STRING fields in TYPE require fixed length (STRING * N)"); } f->dataType = TYPE_STR; f->udtIndex = -1; } else { gLastUdtIndex = -1; f->dataType = parseType(); f->strLen = 0; f->udtIndex = gLastUdtIndex; } u->fieldCount++; // Skip newlines between fields while (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON)) nextToken(); } Node *n = newNode(NODE_TYPE_DEF, ln); n->sval = strDup(tname); n->ival = udtIdx; return n; } // Parse DIM statement: DIM name[(size[, size, ...])] AS type static Node *parseDim(void) { int ln = gTok.line; tokExpect(TOK_DIM); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected identifier after DIM"); char name[MAX_TOKEN_LEN]; strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); nextToken(); Node *size = NULL; int ndims = 0; if (tokAccept(TOK_LPAREN)) { size = parseExpr(); ndims = 1; Node *tail = size; while (tokAccept(TOK_COMMA)) { Node *dim = parseExpr(); tail->next = dim; tail = dim; ndims++; } tokExpect(TOK_RPAREN); } tokExpect(TOK_AS); gLastUdtIndex = -1; DataType dt = parseType(); Node *n = newNode(NODE_DIM, ln); n->sval = strDup(name); n->dataType = dt; n->a = size; n->ival = ndims; n->ival2 = gLastUdtIndex; // Register in symbol table Symbol *s = symAdd(name); s->dataType = dt; s->isArray = (ndims > 0); s->ndims = ndims; s->udtIndex = gLastUdtIndex; return n; } // Parse REDIM statement: REDIM name(size, ...) AS type static Node *parseRedim(void) { int ln = gTok.line; tokExpect(TOK_REDIM); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected identifier after REDIM"); char name[MAX_TOKEN_LEN]; strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); nextToken(); tokExpect(TOK_LPAREN); Node *size = parseExpr(); int ndims = 1; Node *tail = size; while (tokAccept(TOK_COMMA)) { Node *dim = parseExpr(); tail->next = dim; tail = dim; ndims++; } tokExpect(TOK_RPAREN); tokExpect(TOK_AS); DataType dt = parseType(); Node *n = newNode(NODE_REDIM, ln); n->sval = strDup(name); n->dataType = dt; n->a = size; n->ival = ndims; return n; } // Parse PRINT statement: PRINT [expr { (;|,) expr } [;]] // Helper: parse a file number (#expr) static Node *parseFileNumber(void) { tokExpect(TOK_HASH); return parseExpr(); } // Helper: parse print items (shared by PRINT and PRINT #) static Node *parsePrintItems(int ln) { Node *head = NULL, *tail = NULL; while (1) { Node *item = newNode(NODE_PRINT_ITEM, ln); item->a = parseExpr(); // Check for separator after this item if (tokIs(TOK_SEMI)) { item->ival = 1; // semicolon: no space nextToken(); } else if (tokIs(TOK_COMMA)) { item->ival = 2; // comma: tab nextToken(); } else { item->ival = 0; // end of print list } if (!head) head = tail = item; else { tail->next = item; tail = item; } // If no separator or end of statement, stop if (item->ival == 0) break; // If separator at end of line, stop (trailing separator) if (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON) || tokIs(TOK_EOF)) break; } return head; } static Node *parsePrint(void) { int ln = gTok.line; tokExpect(TOK_PRINT); // File-directed PRINT: PRINT #n, ... if (tokIs(TOK_HASH)) { Node *fpr = newNode(NODE_FILE_PRINT, ln); fpr->b = parseFileNumber(); tokExpect(TOK_COMMA); if (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON) || tokIs(TOK_EOF)) return fpr; // PRINT #n, alone = write newline to file fpr->a = parsePrintItems(ln); return fpr; } // PRINT USING "format"; value1; value2; ... if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "USING") == 0) { nextToken(); Node *pu = newNode(NODE_PRINT_USING, ln); pu->a = parseExpr(); // format string expression if (!tokAccept(TOK_SEMI)) tokExpect(TOK_COMMA); // allow ; or , after format // Parse values as linked list Node *head = NULL, *tail = NULL; while (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) { Node *val = parseExpr(); if (!head) head = tail = val; else { tail->next = val; tail = val; } if (!tokAccept(TOK_SEMI) && !tokAccept(TOK_COMMA)) break; } pu->b = head; return pu; } Node *pr = newNode(NODE_PRINT, ln); // Empty PRINT (just a newline) if (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON) || tokIs(TOK_EOF)) { return pr; } pr->a = parsePrintItems(ln); return pr; } // Parse INPUT statement: INPUT ["prompt";] var {, var} // Helper: parse a comma-separated variable list for INPUT static Node *parseInputVars(int ln) { Node *head = NULL, *tail = NULL; do { if (!tokIs(TOK_IDENT)) fatal(ln, "Expected variable name in INPUT"); Node *v = newNode(NODE_IDENT, ln); v->sval = strDup(gTok.sval); v->dataType = inferVarType(gTok.sval); nextToken(); if (!head) head = tail = v; else { tail->next = v; tail = v; } } while (tokAccept(TOK_COMMA)); return head; } static Node *parseInput(void) { int ln = gTok.line; tokExpect(TOK_INPUT); // File-directed INPUT: INPUT #n, var1, var2 if (tokIs(TOK_HASH)) { Node *finp = newNode(NODE_FILE_INPUT, ln); finp->b = parseFileNumber(); tokExpect(TOK_COMMA); finp->a = parseInputVars(ln); return finp; } Node *inp = newNode(NODE_INPUT, ln); // Optional string prompt if (tokIs(TOK_STR_LIT)) { inp->sval = strDup(gTok.sval); nextToken(); if (tokIs(TOK_SEMI) || tokIs(TOK_COMMA)) nextToken(); // consume separator after prompt } inp->a = parseInputVars(ln); return inp; } // Parse an ELSEIF chain as a nested IF node. // ELSEIF expr THEN \n block { ELSEIF ... } [ELSE block] // The caller (parseIf) consumes the final END IF. static Node *parseElseifChain(void) { int ln = gTok.line; tokExpect(TOK_ELSEIF); Node *cond = parseExpr(); tokExpect(TOK_THEN); Node *n = newNode(NODE_IF, ln); n->a = cond; skipEol(); n->b = parseBlock(TOK_ELSE, TOK_ELSEIF, TOK_END); if (tokIs(TOK_ELSEIF)) { n->c = parseElseifChain(); } else if (tokAccept(TOK_ELSE)) { skipEol(); n->c = parseBlock(TOK_END, TOK_EOF, TOK_EOF); } // END IF is consumed by the top-level parseIf return n; } // Parse IF block: // IF expr THEN stmt (single-line) // IF expr THEN \n block {ELSEIF...} [ELSE block] END IF static Node *parseIf(void) { int ln = gTok.line; tokExpect(TOK_IF); Node *cond = parseExpr(); tokExpect(TOK_THEN); Node *n = newNode(NODE_IF, ln); n->a = cond; // Single-line IF: statement on same line after THEN if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) { Node *stmt = parseStatement(); Node *blk = newNode(NODE_BLOCK, ln); blk->a = stmt; n->b = blk; return n; } // Multi-line IF skipEol(); n->b = parseBlock(TOK_ELSE, TOK_ELSEIF, TOK_END); // ELSEIF chain: parse as a nested IF node if (tokIs(TOK_ELSEIF)) { n->c = parseElseifChain(); } // ELSE block else if (tokAccept(TOK_ELSE)) { skipEol(); n->c = parseBlock(TOK_END, TOK_EOF, TOK_EOF); } // END IF tokExpect(TOK_END); tokExpect(TOK_IF); return n; } // Parse FOR loop: FOR var = start TO end [STEP step] \n block NEXT [var] static Node *parseFor(void) { int ln = gTok.line; tokExpect(TOK_FOR); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected variable after FOR"); char name[MAX_TOKEN_LEN]; strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); nextToken(); tokExpect(TOK_EQ); Node *start = parseExpr(); tokExpect(TOK_TO); Node *end = parseExpr(); Node *step = NULL; if (tokAccept(TOK_STEP)) { step = parseExpr(); } Node *n = newNode(NODE_FOR, ln); n->sval = strDup(name); n->a = start; n->b = end; n->c = step; skipEol(); n->d = parseBlock(TOK_NEXT, TOK_EOF, TOK_EOF); tokExpect(TOK_NEXT); // Optional variable name after NEXT if (tokIs(TOK_IDENT)) nextToken(); return n; } // Parse WHILE loop: WHILE expr \n block WEND static Node *parseWhile(void) { int ln = gTok.line; tokExpect(TOK_WHILE); Node *cond = parseExpr(); Node *n = newNode(NODE_WHILE, ln); n->a = cond; skipEol(); n->b = parseBlock(TOK_WEND, TOK_EOF, TOK_EOF); tokExpect(TOK_WEND); return n; } // Parse DO/LOOP: // DO [WHILE|UNTIL expr] \n block LOOP [WHILE|UNTIL expr] static Node *parseDoLoop(void) { int ln = gTok.line; tokExpect(TOK_DO); Node *n = newNode(NODE_DO_LOOP, ln); n->ival = 0; // flags: bit0 = isUntil, bit1 = conditionAtBottom // Optional top condition if (tokIs(TOK_WHILE)) { nextToken(); n->a = parseExpr(); } else if (tokIs(TOK_UNTIL)) { nextToken(); n->a = parseExpr(); n->ival |= 1; // UNTIL (vs WHILE) } skipEol(); n->b = parseBlock(TOK_LOOP, TOK_EOF, TOK_EOF); tokExpect(TOK_LOOP); // Optional bottom condition if (tokIs(TOK_WHILE)) { nextToken(); n->a = parseExpr(); n->ival = 2; // condition at bottom } else if (tokIs(TOK_UNTIL)) { nextToken(); n->a = parseExpr(); n->ival = 3; // until + at bottom } // If no condition at all, infinite loop (DO...LOOP) return n; } // Parse SUB declaration: // SUB name([BYVAL|BYREF] param AS type, ...) \n block END SUB static Node *parseSub(void) { int ln = gTok.line; tokExpect(TOK_SUB); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected subroutine name after SUB"); char name[MAX_TOKEN_LEN]; strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); nextToken(); // Parse parameter list Node *params = NULL, *ptail = NULL; int pcount = 0; Symbol *sym = symAdd(name); sym->isFunc = 2; // SUB sym->returnType = TYPE_VOID; if (tokAccept(TOK_LPAREN)) { while (!tokIs(TOK_RPAREN) && !tokIs(TOK_EOF)) { PassMode pm = PASS_BYVAL; if (tokAccept(TOK_BYREF)) pm = PASS_BYREF; else tokAccept(TOK_BYVAL); // optional BYVAL if (!tokIs(TOK_IDENT)) fatal(gTok.line, "Expected parameter name"); Node *p = newNode(NODE_PARAM, gTok.line); p->sval = strDup(gTok.sval); p->ival = pm; nextToken(); tokExpect(TOK_AS); p->dataType = parseType(); // Record param in the function's symbol entry if (pcount >= MAX_PARAMS) fatal(gTok.line, "Too many parameters (max %d)", MAX_PARAMS); strncpy(sym->paramNames[pcount], p->sval, MAX_IDENT - 1); sym->paramTypes[pcount] = p->dataType; sym->paramModes[pcount] = pm; pcount++; // Also register the parameter as a variable for type inference // inside the function body Symbol *psym = symAdd(p->sval); psym->dataType = p->dataType; if (!params) params = ptail = p; else { ptail->next = p; ptail = p; } if (!tokAccept(TOK_COMMA)) break; } tokExpect(TOK_RPAREN); } sym->paramCount = pcount; Node *n = newNode(NODE_SUB, ln); n->sval = strDup(name); n->a = params; skipEol(); n->b = parseBlock(TOK_END, TOK_EOF, TOK_EOF); tokExpect(TOK_END); tokExpect(TOK_SUB); return n; } // Parse FUNCTION declaration: // FUNCTION name([params]) AS type \n block END FUNCTION static Node *parseFunction(void) { int ln = gTok.line; tokExpect(TOK_FUNCTION); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected function name after FUNCTION"); char name[MAX_TOKEN_LEN]; strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); nextToken(); // Parse parameter list Node *params = NULL, *ptail = NULL; int pcount = 0; Symbol *sym = symAdd(name); sym->isFunc = 1; // FUNCTION if (tokAccept(TOK_LPAREN)) { while (!tokIs(TOK_RPAREN) && !tokIs(TOK_EOF)) { PassMode pm = PASS_BYVAL; if (tokAccept(TOK_BYREF)) pm = PASS_BYREF; else tokAccept(TOK_BYVAL); if (!tokIs(TOK_IDENT)) fatal(gTok.line, "Expected parameter name"); Node *p = newNode(NODE_PARAM, gTok.line); p->sval = strDup(gTok.sval); p->ival = pm; nextToken(); tokExpect(TOK_AS); p->dataType = parseType(); if (pcount >= MAX_PARAMS) fatal(gTok.line, "Too many parameters (max %d)", MAX_PARAMS); strncpy(sym->paramNames[pcount], p->sval, MAX_IDENT - 1); sym->paramTypes[pcount] = p->dataType; sym->paramModes[pcount] = pm; pcount++; // Register parameter as variable for type inference Symbol *psym = symAdd(p->sval); psym->dataType = p->dataType; if (!params) params = ptail = p; else { ptail->next = p; ptail = p; } if (!tokAccept(TOK_COMMA)) break; } tokExpect(TOK_RPAREN); } sym->paramCount = pcount; // Return type tokExpect(TOK_AS); DataType ret = parseType(); sym->returnType = ret; sym->dataType = ret; Node *n = newNode(NODE_FUNC, ln); n->sval = strDup(name); n->dataType = ret; n->a = params; skipEol(); n->b = parseBlock(TOK_END, TOK_EOF, TOK_EOF); tokExpect(TOK_END); tokExpect(TOK_FUNCTION); return n; } // Parse LOCAL declaration: LOCAL name AS type static Node *parseLocal(void) { int ln = gTok.line; tokExpect(TOK_LOCAL); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected variable name after LOCAL"); Node *n = newNode(NODE_LOCAL, ln); n->sval = strDup(gTok.sval); nextToken(); tokExpect(TOK_AS); n->dataType = parseType(); return n; } // Parse STATIC declaration: STATIC name AS type static Node *parseStatic(void) { int ln = gTok.line; tokExpect(TOK_STATIC); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected variable name after STATIC"); Node *n = newNode(NODE_STATIC, ln); n->sval = strDup(gTok.sval); nextToken(); tokExpect(TOK_AS); n->dataType = parseType(); return n; } // Parse DATA statement: DATA literal, literal, ... // Items can be integers, doubles, strings, or negative numbers. // Note: the "DATA" keyword is already consumed by parseStatement static Node *parseData(void) { int ln = gTok.line; Node *n = newNode(NODE_DATA, ln); Node *head = NULL, *tail = NULL; do { Node *item = NULL; // Handle negative numeric literals int neg = 0; if (tokIs(TOK_MINUS)) { neg = 1; nextToken(); } if (tokIs(TOK_INT_LIT)) { item = newNode(NODE_INT_LIT, ln); item->ival = neg ? -gTok.ival : gTok.ival; item->dataType = TYPE_INT; nextToken(); } else if (tokIs(TOK_DBL_LIT)) { item = newNode(NODE_DBL_LIT, ln); item->dval = neg ? -gTok.dval : gTok.dval; item->dataType = TYPE_DBL; nextToken(); } else if (tokIs(TOK_STR_LIT)) { item = newNode(NODE_STR_LIT, ln); item->sval = strDup(gTok.sval); item->dataType = TYPE_STR; nextToken(); } else { fatal(ln, "Expected literal value in DATA statement"); } if (!head) head = tail = item; else { tail->next = item; tail = item; } } while (tokAccept(TOK_COMMA)); n->a = head; return n; } // Parse READ statement: READ var1, var2, ... // Note: the "READ" keyword is already consumed by parseStatement static Node *parseRead(void) { int ln = gTok.line; Node *n = newNode(NODE_READ, ln); Node *head = NULL, *tail = NULL; do { if (!tokIs(TOK_IDENT)) fatal(ln, "Expected variable name in READ"); Node *v = newNode(NODE_IDENT, ln); v->sval = strDup(gTok.sval); v->dataType = inferVarType(gTok.sval); nextToken(); if (!head) head = tail = v; else { tail->next = v; tail = v; } } while (tokAccept(TOK_COMMA)); n->a = head; return n; } // Parse RESTORE statement: RESTORE [line_number] // Note: the "RESTORE" keyword is already consumed by parseStatement static Node *parseRestore(void) { int ln = gTok.line; Node *n = newNode(NODE_RESTORE, ln); if (tokIs(TOK_INT_LIT)) { n->ival = gTok.ival; nextToken(); } else if (tokIs(TOK_IDENT)) { n->sval = strDup(gTok.sval); nextToken(); } return n; } // Parse OPEN statement: OPEN "filename" FOR INPUT|OUTPUT|APPEND|BINARY|RANDOM AS #n [LEN = expr] static Node *parseOpen(void) { int ln = gTok.line; tokExpect(TOK_OPEN); Node *n = newNode(NODE_OPEN, ln); n->a = parseExpr(); // filename expression tokExpect(TOK_FOR); if (tokIs(TOK_INPUT)) { n->ival = 0; nextToken(); } else if (tokIs(TOK_OUTPUT)) { n->ival = 1; nextToken(); } else if (tokIs(TOK_APPEND)) { n->ival = 2; nextToken(); } else if (tokIs(TOK_BINARY)) { n->ival = 3; nextToken(); } else if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "RANDOM") == 0) { n->ival = 4; nextToken(); } else fatal(ln, "Expected INPUT, OUTPUT, APPEND, BINARY, or RANDOM after FOR"); tokExpect(TOK_AS); n->b = parseFileNumber(); // file number expression // Optional LEN = expr for RANDOM access if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "LEN") == 0) { nextToken(); tokExpect(TOK_EQ); n->c = parseExpr(); } return n; } // Parse CLOSE statement: CLOSE #n static Node *parseClose(void) { int ln = gTok.line; tokExpect(TOK_CLOSE); Node *n = newNode(NODE_CLOSE, ln); n->b = parseFileNumber(); return n; } // Parse a single statement static Node *parseStatement(void) { int ln = gTok.line; // Line-number label: a bare integer at the start of a statement if (tokIs(TOK_INT_LIT)) { int lnum = gTok.ival; nextToken(); // If followed by a statement, this is a labeled line Node *lbl = newNode(NODE_LABEL, ln); lbl->ival = lnum; recordLineLabel(lnum); // If there's a statement on this line, chain it if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) { Node *stmt = parseStatement(); lbl->next = stmt; } return lbl; } // Named label: identifier followed by colon (e.g., myLabel:) // Must peek ahead to distinguish from statement separator colons. // Only treat as label if the NEXT token is a colon. if (tokIs(TOK_IDENT)) { int savePos = gSrcPos; int saveLine = gLine; Token saveTok = gTok; char labelName[MAX_IDENT]; strncpy(labelName, gTok.sval, MAX_IDENT - 1); labelName[MAX_IDENT - 1] = '\0'; nextToken(); if (tokIs(TOK_COLON)) { if (isKeyword(labelName)) fatal(ln, "Cannot use keyword '%s' as label", labelName); nextToken(); Node *lbl = newNode(NODE_LABEL, ln); lbl->ival = 0; // 0 = named label, not numeric lbl->sval = strDup(labelName); // If there's a statement on this line, chain it if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) { Node *stmt = parseStatement(); lbl->next = stmt; } return lbl; } // Not a label — restore state gSrcPos = savePos; gLine = saveLine; gTok = saveTok; } // TYPE ... END TYPE if (tokIs(TOK_TYPE)) return parseTypeDef(); // DIM if (tokIs(TOK_DIM)) return parseDim(); // REDIM if (tokIs(TOK_REDIM)) return parseRedim(); // PRINT if (tokIs(TOK_PRINT)) return parsePrint(); // INPUT / INPUT # if (tokIs(TOK_INPUT)) return parseInput(); // OPEN if (tokIs(TOK_OPEN)) return parseOpen(); // CLOSE if (tokIs(TOK_CLOSE)) return parseClose(); // DATA, READ, RESTORE are contextual keywords — checked as identifiers // to avoid colliding with user variable names like "data(i)". // We peek ahead: DATA is a keyword only when NOT followed by '(' or '='. if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "DATA") == 0) { // Save state and peek at next token int savePos = gSrcPos; int saveLine = gLine; Token saveTok = gTok; nextToken(); if (!tokIs(TOK_LPAREN) && !tokIs(TOK_EQ)) { // It's a DATA statement return parseData(); } // Restore — it's a variable named "data" gSrcPos = savePos; gLine = saveLine; gTok = saveTok; } // READ (contextual keyword — same peek-ahead logic) if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "READ") == 0) { int savePos = gSrcPos; int saveLine = gLine; Token saveTok = gTok; nextToken(); if (!tokIs(TOK_LPAREN) && !tokIs(TOK_EQ)) { return parseRead(); } gSrcPos = savePos; gLine = saveLine; gTok = saveTok; } // RESTORE (contextual keyword) if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "RESTORE") == 0) { nextToken(); return parseRestore(); } // GET #filenum, record, variable (contextual keyword) if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "GET") == 0) { int savePos = gSrcPos; int saveLine = gLine; Token saveTok = gTok; nextToken(); if (tokIs(TOK_HASH)) { Node *n = newNode(NODE_GET, ln); n->a = parseFileNumber(); tokExpect(TOK_COMMA); n->b = parseExpr(); tokExpect(TOK_COMMA); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected variable name in GET"); Node *v = newNode(NODE_IDENT, ln); v->sval = strDup(gTok.sval); v->dataType = inferVarType(gTok.sval); nextToken(); n->c = v; return n; } gSrcPos = savePos; gLine = saveLine; gTok = saveTok; } // PUT #filenum, record, variable (contextual keyword) if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "PUT") == 0) { int savePos = gSrcPos; int saveLine = gLine; Token saveTok = gTok; nextToken(); if (tokIs(TOK_HASH)) { Node *n = newNode(NODE_PUT, ln); n->a = parseFileNumber(); tokExpect(TOK_COMMA); n->b = parseExpr(); tokExpect(TOK_COMMA); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected variable name in PUT"); Node *v = newNode(NODE_IDENT, ln); v->sval = strDup(gTok.sval); v->dataType = inferVarType(gTok.sval); nextToken(); n->c = v; return n; } gSrcPos = savePos; gLine = saveLine; gTok = saveTok; } // RANDOMIZE [seed] (contextual keyword) if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "RANDOMIZE") == 0) { nextToken(); Node *n = newNode(NODE_RANDOMIZE, ln); // Optional seed expression if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF) && !tokIs(TOK_ELSE)) { n->a = parseExpr(); } return n; } // MID$ assignment: MID$(s$, start, len) = replacement$ if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "MID$") == 0) { int savePos = gSrcPos; int saveLine = gLine; Token saveTok = gTok; nextToken(); if (tokIs(TOK_LPAREN)) { nextToken(); Node *n = newNode(NODE_MID_ASSIGN, ln); // Parse target string variable if (!tokIs(TOK_IDENT)) fatal(ln, "Expected string variable in MID$ assignment"); n->a = newNode(NODE_IDENT, ln); n->a->sval = strDup(gTok.sval); n->a->dataType = TYPE_STR; nextToken(); tokExpect(TOK_COMMA); n->b = parseExpr(); // start position if (tokAccept(TOK_COMMA)) { n->c = parseExpr(); // length } else { // No length — use large value Node *big = newNode(NODE_INT_LIT, ln); big->ival = 32767; n->c = big; } tokExpect(TOK_RPAREN); tokExpect(TOK_EQ); n->d = parseExpr(); // replacement string return n; } // Not MID$ assignment — restore gSrcPos = savePos; gLine = saveLine; gTok = saveTok; } // LINE INPUT # if (tokIs(TOK_LINE)) { nextToken(); tokExpect(TOK_INPUT); int lln = ln; Node *n = newNode(NODE_LINE_INPUT, lln); n->b = parseFileNumber(); tokExpect(TOK_COMMA); if (!tokIs(TOK_IDENT)) fatal(lln, "Expected variable name in LINE INPUT"); Node *v = newNode(NODE_IDENT, lln); v->sval = strDup(gTok.sval); v->dataType = TYPE_STR; nextToken(); n->a = v; return n; } // WRITE # if (tokIs(TOK_WRITE)) { nextToken(); Node *n = newNode(NODE_FILE_WRITE, ln); n->b = parseFileNumber(); tokExpect(TOK_COMMA); Node *head = NULL, *tail = NULL; do { Node *e = parseExpr(); if (!head) head = tail = e; else { tail->next = e; tail = e; } } while (tokAccept(TOK_COMMA)); n->a = head; return n; } // IF if (tokIs(TOK_IF)) return parseIf(); // FOR if (tokIs(TOK_FOR)) return parseFor(); // WHILE if (tokIs(TOK_WHILE)) return parseWhile(); // DO if (tokIs(TOK_DO)) return parseDoLoop(); // SUB if (tokIs(TOK_SUB)) return parseSub(); // FUNCTION if (tokIs(TOK_FUNCTION)) return parseFunction(); // LOCAL if (tokIs(TOK_LOCAL)) return parseLocal(); // STATIC if (tokIs(TOK_STATIC)) return parseStatic(); // GOTO if (tokIs(TOK_GOTO)) { nextToken(); Node *n = newNode(NODE_GOTO, ln); if (tokIs(TOK_INT_LIT)) { n->ival = gTok.ival; recordGotoTarget(n->ival); nextToken(); } else if (tokIs(TOK_IDENT)) { n->sval = strDup(gTok.sval); recordGotoStrTarget(n->sval); nextToken(); } else { fatal(ln, "Expected line number or label after GOTO"); } return n; } // GOSUB if (tokIs(TOK_GOSUB)) { nextToken(); Node *n = newNode(NODE_GOSUB, ln); if (tokIs(TOK_INT_LIT)) { n->ival = gTok.ival; recordGotoTarget(n->ival); nextToken(); } else if (tokIs(TOK_IDENT)) { n->sval = strDup(gTok.sval); recordGotoStrTarget(n->sval); nextToken(); } else { fatal(ln, "Expected line number or label after GOSUB"); } if (gGosubCount >= MAX_GOSUB_SITES) fatal(ln, "Too many GOSUB sites (max %d)", MAX_GOSUB_SITES); n->ival2 = gGosubCount++; return n; } // RETURN if (tokIs(TOK_RETURN)) { nextToken(); Node *n = newNode(NODE_RETURN, ln); // Optional return expression for FUNCTION if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) { n->a = parseExpr(); } return n; } // EXIT (FOR | WHILE | DO | SUB | FUNCTION) if (tokIs(TOK_EXIT)) { nextToken(); Node *n = newNode(NODE_EXIT, ln); if (tokIs(TOK_FOR)) { n->ival = TOK_FOR; nextToken(); } else if (tokIs(TOK_WHILE)){ n->ival = TOK_WHILE; nextToken(); } else if (tokIs(TOK_DO)) { n->ival = TOK_DO; nextToken(); } else if (tokIs(TOK_SUB)) { n->ival = TOK_SUB; nextToken(); } else if (tokIs(TOK_FUNCTION)){n->ival = TOK_FUNCTION;nextToken(); } else fatal(ln, "Expected FOR, WHILE, DO, SUB, or FUNCTION after EXIT"); return n; } // CONTINUE (FOR | WHILE | DO) — contextual keyword if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "CONTINUE") == 0) { nextToken(); Node *n = newNode(NODE_CONTINUE, ln); if (tokIs(TOK_FOR)) { n->ival = TOK_FOR; nextToken(); } else if (tokIs(TOK_WHILE)){ n->ival = TOK_WHILE; nextToken(); } else if (tokIs(TOK_DO)) { n->ival = TOK_DO; nextToken(); } else fatal(ln, "Expected FOR, WHILE, or DO after CONTINUE"); return n; } // END (program termination) if (tokIs(TOK_END)) { // Peek ahead: END IF / END SUB / END FUNCTION are handled by callers. // Bare END means program exit. // Save position to check next token int savePos = gSrcPos; int saveLine = gLine; Token saveTok = gTok; nextToken(); // If followed by IF, SUB, FUNCTION, SELECT – put it back (the caller handles it) if (tokIs(TOK_IF) || tokIs(TOK_SUB) || tokIs(TOK_FUNCTION) || tokIs(TOK_SELECT)) { gSrcPos = savePos; gLine = saveLine; gTok = saveTok; return NULL; // signal to caller: block terminator reached } // Bare END Node *n = newNode(NODE_END, ln); return n; } // CONST name = value if (tokIs(TOK_CONST)) { nextToken(); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected identifier after CONST"); char cname[MAX_IDENT]; strncpy(cname, gTok.sval, MAX_IDENT - 1); cname[MAX_IDENT - 1] = '\0'; nextToken(); tokExpect(TOK_EQ); // Parse the value — must be a literal Node *val = parseExpr(); Node *n = newNode(NODE_CONST_DECL, ln); n->sval = strDup(cname); n->a = val; // Store in constant table if (gConstCount < MAX_CONSTS) { strncpy(gConsts[gConstCount].name, cname, MAX_IDENT - 1); if (val->type == NODE_STR_LIT) { gConsts[gConstCount].dataType = TYPE_STR; strncpy(gConsts[gConstCount].strVal, val->sval, MAX_TOKEN_LEN - 1); } else if (val->type == NODE_DBL_LIT) { gConsts[gConstCount].dataType = TYPE_DBL; gConsts[gConstCount].numVal = val->dval; } else if (val->type == NODE_INT_LIT) { gConsts[gConstCount].dataType = TYPE_INT; gConsts[gConstCount].numVal = val->ival; } else if (val->type == NODE_UNOP && val->ival == TOK_MINUS) { // Handle negative constants like CONST X = -1 gConsts[gConstCount].dataType = TYPE_DBL; if (val->a->type == NODE_INT_LIT) gConsts[gConstCount].numVal = -(double)val->a->ival; else gConsts[gConstCount].numVal = -val->a->dval; } else { fatal(ln, "CONST value must be a literal"); } gConstCount++; } return n; } // SWAP var1, var2 (contextual keyword) if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "SWAP") == 0) { nextToken(); Node *n = newNode(NODE_SWAP, ln); n->a = parseExpr(); tokExpect(TOK_COMMA); n->b = parseExpr(); return n; } // SELECT CASE expr ... CASE ... END SELECT if (tokIs(TOK_SELECT)) { nextToken(); tokExpect(TOK_CASE); Node *n = newNode(NODE_SELECT, ln); n->a = parseExpr(); skipNewlines(); // Parse CASE blocks Node *caseHead = NULL, *caseTail = NULL; while (tokIs(TOK_CASE)) { nextToken(); Node *cb = newNode(NODE_CASE, gLine); // CASE ELSE if (tokIs(TOK_ELSE)) { nextToken(); cb->ival = 1; // flag: CASE ELSE } else { // Parse comma-separated values/ranges Node *valHead = NULL, *valTail = NULL; for (;;) { Node *v; // CASE IS >/=/<=/<>/= expr if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "IS") == 0) { nextToken(); // Expect a comparison operator int op = gTok.type; if (op != TOK_EQ && op != TOK_NE && op != TOK_LT && op != TOK_GT && op != TOK_LE && op != TOK_GE) fatal(gLine, "Expected comparison operator after IS"); nextToken(); v = newNode(NODE_BINOP, gLine); v->ival = op; v->a = NULL; // placeholder: test expr filled at codegen v->b = parseExpr(); v->ival2 = 1; // flag: IS comparison } else { v = parseExpr(); // Check for TO (range) if (tokIs(TOK_TO)) { nextToken(); Node *range = newNode(NODE_BINOP, gLine); range->ival = TOK_TO; // reuse TO token as range marker range->a = v; range->b = parseExpr(); range->ival2 = 2; // flag: range v = range; } } if (!valHead) { valHead = valTail = v; } else { valTail->next = v; valTail = v; } if (!tokAccept(TOK_COMMA)) break; } cb->a = valHead; } skipNewlines(); // Parse body until next CASE or END SELECT Node *bodyHead = NULL, *bodyTail = NULL; while (!tokIs(TOK_CASE) && !tokIs(TOK_END) && !tokIs(TOK_EOF)) { Node *s = parseStatement(); if (!s) break; if (!bodyHead) { bodyHead = bodyTail = s; } else { bodyTail->next = s; bodyTail = s; } skipNewlines(); } cb->b = bodyHead; if (!caseHead) { caseHead = caseTail = cb; } else { caseTail->next = cb; caseTail = cb; } } // Expect END SELECT tokExpect(TOK_END); tokExpect(TOK_SELECT); n->b = caseHead; return n; } // ON expr GOTO/GOSUB label1, label2, ... if (tokIs(TOK_ON)) { nextToken(); Node *expr = parseExpr(); if (tokIs(TOK_GOTO)) { nextToken(); Node *n = newNode(NODE_ON_GOTO, ln); n->a = expr; // Parse comma-separated labels Node *labHead = NULL, *labTail = NULL; for (;;) { Node *lab; if (tokIs(TOK_INT_LIT)) { lab = newNode(NODE_INT_LIT, gLine); lab->ival = gTok.ival; recordGotoTarget(lab->ival); nextToken(); } else if (tokIs(TOK_IDENT)) { lab = newNode(NODE_IDENT, gLine); lab->sval = strDup(gTok.sval); recordGotoStrTarget(lab->sval); nextToken(); } else { fatal(gLine, "Expected label in ON GOTO"); } if (!labHead) { labHead = labTail = lab; } else { labTail->next = lab; labTail = lab; } if (!tokAccept(TOK_COMMA)) break; } n->b = labHead; return n; } else if (tokIs(TOK_GOSUB)) { nextToken(); Node *n = newNode(NODE_ON_GOSUB, ln); n->a = expr; n->ival2 = gGosubCount; // first return-point id // Parse comma-separated labels Node *labHead = NULL, *labTail = NULL; for (;;) { Node *lab; if (tokIs(TOK_INT_LIT)) { lab = newNode(NODE_INT_LIT, gLine); lab->ival = gTok.ival; recordGotoTarget(lab->ival); nextToken(); } else if (tokIs(TOK_IDENT)) { lab = newNode(NODE_IDENT, gLine); lab->sval = strDup(gTok.sval); recordGotoStrTarget(lab->sval); nextToken(); } else { fatal(gLine, "Expected label in ON GOSUB"); } gGosubCount++; // allocate return-point id for each target if (!labHead) { labHead = labTail = lab; } else { labTail->next = lab; labTail = lab; } if (!tokAccept(TOK_COMMA)) break; } n->b = labHead; return n; } else { fatal(ln, "Expected GOTO or GOSUB after ON expression"); } } // CALL name(args) if (tokIs(TOK_CALL)) { nextToken(); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected subroutine name after CALL"); Node *n = newNode(NODE_CALL, ln); n->sval = strDup(gTok.sval); nextToken(); // Parse argument list Node *args = NULL, *atail = NULL; if (tokAccept(TOK_LPAREN)) { while (!tokIs(TOK_RPAREN) && !tokIs(TOK_EOF)) { Node *arg = parseExpr(); if (!args) args = atail = arg; else { atail->next = arg; atail = arg; } if (!tokAccept(TOK_COMMA)) break; } tokExpect(TOK_RPAREN); } n->a = args; return n; } // LET assignment or implicit assignment/call if (tokIs(TOK_LET)) nextToken(); // consume optional LET if (tokIs(TOK_IDENT)) { char name[MAX_TOKEN_LEN]; strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); nextToken(); // Array element assignment or sub call: name(args) [= expr] if (tokIs(TOK_LPAREN)) { nextToken(); Node *idx = parseExpr(); Node *tail = idx; while (tokAccept(TOK_COMMA)) { Node *dimIdx = parseExpr(); tail->next = dimIdx; tail = dimIdx; } tokExpect(TOK_RPAREN); // Check for array-element dot-access assignment: arr(i).field[.field...] = expr if (tokIs(TOK_DOT)) { Symbol *s = symLookup(name); if (s && s->dataType == TYPE_UDT) { Node *cur = newNode(NODE_ARRAY_REF, ln); cur->sval = strDup(name); cur->a = idx; cur->dataType = TYPE_UDT; cur->ival2 = s->udtIndex; int curUdt = s->udtIndex; while (curUdt >= 0 && tokIs(TOK_DOT)) { nextToken(); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected field name after '.'"); int fi = udtFieldLookup(curUdt, gTok.sval); if (fi < 0) fatal(ln, "Unknown field '%s'", gTok.sval); Node *dot = newNode(NODE_DOT_ACCESS, ln); dot->a = cur; dot->sval = strDup(gTok.sval); dot->ival2 = curUdt; UdtField *uf = &gUdts[curUdt].fields[fi]; dot->dataType = uf->dataType; if (uf->dataType == TYPE_STR && uf->strLen > 0) dot->ival = uf->strLen; cur = dot; curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1; nextToken(); } tokExpect(TOK_EQ); Node *val = parseExpr(); Node *n = newNode(NODE_ASSIGN, ln); n->a = cur; n->b = val; return n; } } if (tokAccept(TOK_EQ)) { // Array element assignment: name(i, j, ...) = expr Node *val = parseExpr(); Node *target = newNode(NODE_ARRAY_REF, ln); target->sval = strDup(name); target->a = idx; target->dataType = inferVarType(name); Node *n = newNode(NODE_ASSIGN, ln); n->a = target; n->b = val; return n; } // Not an assignment – must be a sub call: name(args) Node *n = newNode(NODE_CALL, ln); n->sval = strDup(name); n->a = idx; return n; } // Dot-access assignment: var.field[.field...] = expr if (tokIs(TOK_DOT)) { Symbol *s = symLookup(name); if (s && s->dataType == TYPE_UDT) { Node *cur = newNode(NODE_IDENT, ln); cur->sval = strDup(name); cur->dataType = TYPE_UDT; int curUdt = s->udtIndex; while (curUdt >= 0 && tokIs(TOK_DOT)) { nextToken(); if (!tokIs(TOK_IDENT)) fatal(ln, "Expected field name after '.'"); int fi = udtFieldLookup(curUdt, gTok.sval); if (fi < 0) fatal(ln, "Unknown field '%s' in type '%s'", gTok.sval, gUdts[curUdt].name); Node *dot = newNode(NODE_DOT_ACCESS, ln); dot->a = cur; dot->sval = strDup(gTok.sval); dot->ival2 = curUdt; UdtField *uf = &gUdts[curUdt].fields[fi]; dot->dataType = uf->dataType; if (uf->dataType == TYPE_STR && uf->strLen > 0) dot->ival = uf->strLen; cur = dot; curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1; nextToken(); } tokExpect(TOK_EQ); Node *val = parseExpr(); Node *n = newNode(NODE_ASSIGN, ln); n->a = cur; n->b = val; return n; } } // Simple variable assignment: name = expr if (tokAccept(TOK_EQ)) { Node *val = parseExpr(); Node *target = newNode(NODE_IDENT, ln); target->sval = strDup(name); target->dataType = inferVarType(name); Node *n = newNode(NODE_ASSIGN, ln); n->a = target; n->b = val; return n; } // Implicit sub call without CALL keyword: name arg1, arg2, ... Node *n = newNode(NODE_CALL, ln); n->sval = strDup(name); Node *args = NULL, *atail = NULL; if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) { Node *arg = parseExpr(); args = atail = arg; while (tokAccept(TOK_COMMA)) { arg = parseExpr(); atail->next = arg; atail = arg; } } n->a = args; return n; } fatal(ln, "Unexpected token '%s' (type %d)", gTok.sval, gTok.type); return NULL; } // Parse a block of statements until one of the terminator tokens is seen. // Returns a NODE_BLOCK containing the linked list of statements. static Node *parseBlock(TokenType end1, TokenType end2, TokenType end3) { Node *blk = newNode(NODE_BLOCK, gTok.line); Node *head = NULL, *tail = NULL; while (!tokIs(TOK_EOF)) { skipEol(); if (tokIs(end1) || tokIs(end2) || tokIs(end3)) break; if (tokIs(TOK_EOF)) break; // Check for END followed by IF/SUB/FUNCTION as block terminator if (tokIs(TOK_END)) { int savePos = gSrcPos; int saveLine = gLine; Token saveTok = gTok; nextToken(); if (tokIs(TOK_IF) || tokIs(TOK_SUB) || tokIs(TOK_FUNCTION)) { // Restore – the caller will handle END IF/SUB/FUNCTION gSrcPos = savePos; gLine = saveLine; gTok = saveTok; break; } // Restore and let parseStatement handle bare END gSrcPos = savePos; gLine = saveLine; gTok = saveTok; } Node *stmt = parseStatement(); if (!stmt) break; // NULL signals block terminator // Flatten: if statement has a ->next chain (e.g., label + stmt), // append the entire chain if (!head) head = tail = stmt; else { tail->next = stmt; } while (tail->next) tail = tail->next; } blk->a = head; return blk; } // Parse the entire program static Node *parseProgram(void) { nextToken(); // prime the first token Node *blk = parseBlock(TOK_EOF, TOK_EOF, TOK_EOF); Node *prog = newNode(NODE_PROGRAM, 1); prog->a = blk; return prog; } // ----------------------------------------------------------------------- // Section 8: Code Generator // // Walks the AST and emits C source code. The output includes: // - A runtime library for strings, file I/O, and dynamic arrays // (debug or release variant based on --release flag) // - Packed struct definitions for user-defined types // - A static DATA pool for DATA/READ/RESTORE // - Forward declarations for SUBs and FUNCTIONs // - SUB/FUNCTION implementations as C functions // - A main() function containing global code // ----------------------------------------------------------------------- // Forward declarations static void genExpr(Node *n); static void genStmt(Node *n); static void genArrayFlatIndex(const char *name, Node *indices); static void genBlock(Node *blk); // Return the C type string for a BASIC data type static const char *cTypeStr(DataType dt) { switch (dt) { case TYPE_BYTE: return "uint8_t"; case TYPE_INT: return "int16_t"; case TYPE_LONG: return "int32_t"; case TYPE_FLOAT: return "float"; case TYPE_DBL: return "double"; case TYPE_STR: return "char*"; default: return "void"; } } // Return the C struct type string for a UDT (uses rotating buffer) static const char *cUdtTypeStr(int udtIndex) { static char bufs[4][MAX_IDENT + 16]; static int bi = 0; char *buf = bufs[bi++ & 3]; if (udtIndex >= 0 && udtIndex < gUdtCount) snprintf(buf, MAX_IDENT + 16, "struct _b_%s", cleanName(gUdts[udtIndex].name)); else snprintf(buf, MAX_IDENT + 16, "void"); return buf; } // Return a C default-value expression for a data type static const char *cDefaultVal(DataType dt) { switch (dt) { case TYPE_BYTE: return "0"; case TYPE_INT: return "0"; case TYPE_LONG: return "0"; case TYPE_FLOAT: return "0.0f"; case TYPE_DBL: return "0.0"; case TYPE_STR: return "_bstr(\"\")"; default: return "0"; } } // Return a printf format specifier for a data type. // int16_t is promoted to int in varargs so %d is safe. // int32_t is int on all modern platforms so %d works. static const char *cFmt(DataType dt) { switch (dt) { case TYPE_BYTE: return "%u"; case TYPE_INT: return "%d"; case TYPE_LONG: return "%d"; case TYPE_FLOAT: return "%g"; case TYPE_DBL: return "%g"; case TYPE_STR: return "%s"; default: return "%d"; } } // Return a scanf format specifier for a data type static const char *cScanfFmt(DataType dt) { switch (dt) { case TYPE_BYTE: return "%hhu"; // uint8_t case TYPE_INT: return "%hd"; // int16_t needs short format case TYPE_LONG: return "%d"; // int32_t case TYPE_FLOAT: return "%f"; case TYPE_DBL: return "%lf"; default: return "%hd"; } } // Determine if an expression node produces a string type static int isStringExpr(Node *n) { if (!n) return 0; return n->dataType == TYPE_STR; } // Check if a name corresponds to a built-in BASIC function that returns // a string. Names ending in '$' are string functions. static int isBuiltinStrFunc(const char *name) { // Check external functions first ExternFunc *ef = externFuncLookup(name); if (ef) return ef->returnType == TYPE_STR; // Check compile-time builtins const BuiltinDef *bd = builtinDefLookup(name); if (bd) return bd->returnType == TYPE_STR; // Functions with special handling in genBuiltinCall return (strIcmp(name, "MID$") == 0 || strIcmp(name, "LEFT$") == 0 || strIcmp(name, "RIGHT$") == 0 || strIcmp(name, "STRING$") == 0); } // Check if a name is a built-in BASIC function static int isBuiltinFunc(const char *name) { // Check external functions if (externFuncLookup(name)) return 1; // Check compile-time builtins if (builtinDefLookup(name)) return 1; // Functions with special handling in genBuiltinCall return (isBuiltinStrFunc(name) || strIcmp(name, "LEN") == 0 || strIcmp(name, "VAL") == 0 || strIcmp(name, "ASC") == 0 || strIcmp(name, "INT") == 0 || strIcmp(name, "ABS") == 0 || strIcmp(name, "INSTR") == 0 || strIcmp(name, "EOF") == 0 || strIcmp(name, "LOF") == 0 || strIcmp(name, "FREEFILE") == 0 || strIcmp(name, "LBOUND") == 0 || strIcmp(name, "UBOUND") == 0); } // Count the number of nodes in a linked list static int countList(Node *n) { int c = 0; while (n) { c++; n = n->next; } return c; } // Generate code for a built-in function call static void genBuiltinCall(const char *name, Node *args) { int argc = countList(args); // Functions with special handling (validation, multiple args, etc.) if (strIcmp(name, "LEN") == 0) { emitRaw("((int)strlen("); genExpr(args); emitRaw("))"); } else if (strIcmp(name, "VAL") == 0) { emitRaw("atof("); genExpr(args); emitRaw(")"); } else if (strIcmp(name, "ASC") == 0) { emitRaw("((int)(unsigned char)("); genExpr(args); emitRaw(")[0])"); } else if (strIcmp(name, "INT") == 0) { emitRaw("((int)("); genExpr(args); emitRaw("))"); } else if (strIcmp(name, "ABS") == 0) { emitRaw("_babs("); genExpr(args); emitRaw(")"); } else if (strIcmp(name, "MID$") == 0) { if (argc < 2) fatal(0, "MID$ requires at least 2 arguments"); emitRaw("_bmid("); genExpr(args); emitRaw(", "); genExpr(args->next); if (argc >= 3 && args->next->next) { emitRaw(", "); genExpr(args->next->next); } else { emitRaw(", -1"); } emitRaw(")"); } else if (strIcmp(name, "LEFT$") == 0) { if (argc < 2) fatal(0, "LEFT$ requires 2 arguments"); emitRaw("_bleft("); genExpr(args); emitRaw(", "); genExpr(args->next); emitRaw(")"); } else if (strIcmp(name, "RIGHT$") == 0) { if (argc < 2) fatal(0, "RIGHT$ requires 2 arguments"); emitRaw("_bright("); genExpr(args); emitRaw(", "); genExpr(args->next); emitRaw(")"); } else if (strIcmp(name, "INSTR") == 0) { if (argc < 2) fatal(0, "INSTR requires 2 arguments"); emitRaw("_binstr("); genExpr(args); emitRaw(", "); genExpr(args->next); emitRaw(")"); } else if (strIcmp(name, "STRING$") == 0) { if (argc < 2) fatal(0, "STRING$ requires 2 arguments"); emitRaw("_bstring_rep("); genExpr(args); emitRaw(", "); genExpr(args->next); emitRaw(")"); } else if (strIcmp(name, "EOF") == 0) { emitRaw("_beof("); genExpr(args); emitRaw(")"); } else if (strIcmp(name, "LOF") == 0) { emitRaw("_blof("); genExpr(args); emitRaw(")"); } else if (strIcmp(name, "FREEFILE") == 0) { emitRaw("_bfreefile()"); // --- Array functions (need special codegen) --- } else if (strIcmp(name, "LBOUND") == 0) { emitRaw("0"); } else if (strIcmp(name, "UBOUND") == 0) { if (args && args->type == NODE_IDENT) { emitRaw("(%s_size - 1)", cleanName(args->sval)); } else { fatal(0, "UBOUND requires an array name"); } } else { // Check external function definitions and compile-time builtins const char *tmpl = NULL; ExternFunc *ef = externFuncLookup(name); if (ef) { tmpl = ef->cCode; } else { const BuiltinDef *bd = builtinDefLookup(name); if (bd) tmpl = bd->cCode; } if (tmpl) { // Expand template: % = first arg, %1 %2 etc = numbered args const char *t = tmpl; while (*t) { if (*t == '%') { t++; int argNum = 0; if (*t >= '1' && *t <= '9') { argNum = *t - '1'; t++; } // Find the nth argument Node *arg = args; for (int i = 0; i < argNum && arg; i++) arg = arg->next; if (arg) genExpr(arg); else emitRaw("0"); // missing arg } else { char c[2] = {*t, '\0'}; emitRaw("%s", c); t++; } } } else { // Unknown builtin – just emit as-is emitRaw("%s(", cleanName(name)); for (Node *a = args; a; a = a->next) { if (a != args) emitRaw(", "); genExpr(a); } emitRaw(")"); } } } // Generate code for an expression node static void genExpr(Node *n) { if (!n) { emitRaw("0"); return; } switch (n->type) { case NODE_INT_LIT: emitRaw("%d", n->ival); break; case NODE_DBL_LIT: emitRaw("%g", n->dval); break; case NODE_STR_LIT: // Emit as a C string literal emitRaw("\""); for (const char *p = n->sval; p && *p; p++) { if (*p == '"') emitRaw("\\\""); else if (*p == '\\') emitRaw("\\\\"); else if (*p == '\n') emitRaw("\\n"); else if (*p == '\t') emitRaw("\\t"); else emitRaw("%c", *p); } emitRaw("\""); break; case NODE_IDENT: { const char *cn = cleanName(n->sval); // Inside a function, check if this is the function name (return var) if (gInFunc && gFuncName && strIcmp(n->sval, gFuncName) == 0) { emitRaw("%s_ret", cn); } else if (isByrefParam(n->sval)) { // BYREF parameter: dereference the pointer emitRaw("(*%s)", cn); } else { emitRaw("%s", cn); } break; } case NODE_ARRAY_REF: emitRaw("%s[", cleanName(n->sval)); genArrayFlatIndex(n->sval, n->a); emitRaw("]"); break; case NODE_DOT_ACCESS: // base.field — base is in n->a, field name in n->sval genExpr(n->a); emitRaw(".%s", cleanName(n->sval)); break; case NODE_UNOP: if (n->ival == TOK_MINUS) { emitRaw("(-("); genExpr(n->a); emitRaw("))"); } else if (n->ival == TOK_NOT) { // If operand is a comparison, use logical NOT for cleaner code int isCmp = (n->a->type == NODE_BINOP && (n->a->ival == TOK_EQ || n->a->ival == TOK_NE || n->a->ival == TOK_LT || n->a->ival == TOK_GT || n->a->ival == TOK_LE || n->a->ival == TOK_GE || n->a->ival == TOK_AND || n->a->ival == TOK_OR)); if (isCmp || n->a->type == NODE_UNOP) { emitRaw("(!("); genExpr(n->a); emitRaw("))"); } else { emitRaw("(~(int)("); genExpr(n->a); emitRaw("))"); } } break; case NODE_BINOP: { int op = n->ival; // String concatenation if (n->dataType == TYPE_STR && (op == TOK_PLUS || op == TOK_AMP)) { emitRaw("_bconcat("); genExpr(n->a); emitRaw(", "); genExpr(n->b); emitRaw(")"); break; } // String comparison if (isStringExpr(n->a) && isStringExpr(n->b)) { const char *cmpOp; switch (op) { case TOK_EQ: cmpOp = "==0"; break; case TOK_NE: cmpOp = "!=0"; break; case TOK_LT: cmpOp = "<0"; break; case TOK_GT: cmpOp = ">0"; break; case TOK_LE: cmpOp = "<=0"; break; case TOK_GE: cmpOp = ">=0"; break; default: cmpOp = "==0"; break; } emitRaw("(strcmp("); genExpr(n->a); emitRaw(", "); genExpr(n->b); emitRaw(")%s)", cmpOp); break; } // Power operator: emit as pow() call if (op == TOK_CARET) { emitRaw("pow("); genExpr(n->a); emitRaw(", "); genExpr(n->b); emitRaw(")"); break; } // Integer division: cast operands to int if (op == TOK_BSLASH) { emitRaw("((int)("); genExpr(n->a); emitRaw(") / (int)("); genExpr(n->b); emitRaw("))"); break; } // Float division: BASIC '/' always produces a floating-point result if (op == TOK_SLASH) { emitRaw("((double)("); genExpr(n->a); emitRaw(") / (double)("); genExpr(n->b); emitRaw("))"); break; } // All other numeric and logical binary operators emitRaw("("); genExpr(n->a); switch (op) { case TOK_PLUS: emitRaw(" + "); break; case TOK_MINUS: emitRaw(" - "); break; case TOK_STAR: emitRaw(" * "); break; case TOK_MOD: emitRaw(" %% "); break; case TOK_EQ: emitRaw(" == "); break; case TOK_NE: emitRaw(" != "); break; case TOK_LT: emitRaw(" < "); break; case TOK_GT: emitRaw(" > "); break; case TOK_LE: emitRaw(" <= "); break; case TOK_GE: emitRaw(" >= "); break; case TOK_AND: emitRaw(" & "); break; case TOK_OR: emitRaw(" | "); break; case TOK_XOR: emitRaw(" ^ "); break; default: emitRaw(" ? "); break; } genExpr(n->b); emitRaw(")"); break; } case NODE_FUNC_CALL: // SIZEOF(TypeName) — emit sizeof(struct _b_TypeName) if (strIcmp(n->sval, "SIZEOF") == 0 && n->a && n->a->type == NODE_IDENT) { int ui = udtLookup(n->a->sval); if (ui >= 0) { emitRaw("(long)sizeof(%s)", cUdtTypeStr(ui)); break; } } if (isBuiltinFunc(n->sval)) { genBuiltinCall(n->sval, n->a); } else { emitRaw("%s(", cleanName(n->sval)); // Generate arguments, applying BYREF (&) where needed Symbol *fsym = symLookup(n->sval); int pi = 0; for (Node *a = n->a; a; a = a->next, pi++) { if (a != n->a) emitRaw(", "); int needRef = (fsym && pi < fsym->paramCount && fsym->paramModes[pi] == PASS_BYREF); if (needRef && a->type == NODE_IDENT) { emitRaw("&%s", cleanName(a->sval)); } else if (needRef && a->type == NODE_ARRAY_REF) { emitRaw("&%s[", cleanName(a->sval)); genArrayFlatIndex(a->sval, a->a); emitRaw("]"); } else { genExpr(a); } } emitRaw(")"); } break; default: emitRaw("/* unknown expr node %d */0", n->type); break; } } // Generate a variable declaration in C static void genVarDecl(const char *name, DataType dt, int isStatic) { const char *cn = cleanName(name); if (isStatic) emit("static "); else emit(""); if (dt == TYPE_STR) emitRaw("char *%s _BUNUSED = _bstr(\"\");\n", cn); else emitRaw("%s %s _BUNUSED = %s;\n", cTypeStr(dt), cn, cDefaultVal(dt)); } // Emit a row-major flattened index for multidimensional array access. // For 1D, just emits the single index expression (backward compatible). // For nD, emits: ((i0) * nameDim1 + (i1)) * nameDim2 + (i2) ... static void genArrayFlatIndex(const char *name, Node *indices) { char cn[MAX_IDENT]; strncpy(cn, cleanName(name), MAX_IDENT - 1); cn[MAX_IDENT - 1] = '\0'; // Count dimensions int ndims = 0; for (Node *p = indices; p; p = p->next) ndims++; if (ndims <= 1) { genExpr(indices); } else { // Row-major: fold left: acc = idx[0], for k=1..n-1: acc = acc * dimK + idx[k] // For 3D: (((i) * dim1 + (j)) * dim2 + (k)) Node *idx = indices; // Emit opening parens for nesting: need (ndims-1) wrapping levels for (int i = 1; i < ndims; i++) emitRaw("("); emitRaw("("); genExpr(idx); emitRaw(")"); idx = idx->next; int dimIdx = 1; while (idx) { emitRaw(" * %s_dim%d + (", cn, dimIdx); genExpr(idx); emitRaw("))"); idx = idx->next; dimIdx++; } } } // Generate a DIM array declaration (supports multidimensional) static void genDimArray(const char *name, DataType dt, Node *sizeList, int ndims) { char cn[MAX_IDENT]; strncpy(cn, cleanName(name), MAX_IDENT - 1); cn[MAX_IDENT - 1] = '\0'; emit("%s *%s _BUNUSED = NULL;\n", cTypeStr(dt), cn); if (ndims <= 1) { // 1D: backward-compatible emit("int %s_size _BUNUSED = 0;\n", cn); if (sizeList) { emit("%s_size = (", cn); genExpr(sizeList); emitRaw(") + 1;\n"); emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n", cn, cTypeStr(dt), cn, cTypeStr(dt)); if (dt == TYPE_STR) { emit("for (int _i = 0; _i < %s_size; _i++) %s[_i] = _bstr(\"\");\n", cn, cn); } } } else { // Multi-dimensional Node *dim = sizeList; for (int i = 0; i < ndims; i++, dim = dim->next) { emit("int %s_dim%d _BUNUSED = 0;\n", cn, i); } emit("int %s_size _BUNUSED = 0;\n", cn); dim = sizeList; for (int i = 0; i < ndims; i++, dim = dim->next) { emit("%s_dim%d = (", cn, i); genExpr(dim); emitRaw(") + 1;\n"); } emit("%s_size = ", cn); for (int i = 0; i < ndims; i++) { if (i > 0) emitRaw(" * "); emitRaw("%s_dim%d", cn, i); } emitRaw(";\n"); emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n", cn, cTypeStr(dt), cn, cTypeStr(dt)); if (dt == TYPE_STR) { emit("for (int _i = 0; _i < %s_size; _i++) %s[_i] = _bstr(\"\");\n", cn, cn); } } } // Generate code for a PRINT statement static void genPrint(Node *pr) { Node *item = pr->a; // Empty PRINT: just a newline if (!item) { emit("printf(\"\\n\");\n"); return; } // Build printf call with format string and arguments emit("printf(\""); // First pass: build format string for (Node *it = item; it; it = it->next) { if (it->a) { emitRaw("%s", cFmt(it->a->dataType)); } // Separator if (it->ival == 1) { // semicolon: no separator } else if (it->ival == 2) { emitRaw("\\t"); // comma: tab } else if (!it->next) { // Last item with no trailing separator: add newline emitRaw("\\n"); } } emitRaw("\""); // Second pass: arguments for (Node *it = item; it; it = it->next) { if (it->a) { emitRaw(", "); genExpr(it->a); } } emitRaw(");\n"); // Free temporary strings created during expression evaluation emit("_bfree_temps();\n"); } // Generate code for a PRINT USING statement static void genPrintUsing(Node *pu) { // Initialize format parser with format string emit("_busing_init("); genExpr(pu->a); emitRaw(");\n"); // Format and print each value for (Node *val = pu->b; val; val = val->next) { if (val->dataType == TYPE_STR) { emit("_busing_str("); } else { emit("_busing_num("); } genExpr(val); emitRaw(");\n"); } // Print newline and cleanup emit("_busing_end();\n"); emit("_bfree_temps();\n"); } // Generate code for an INPUT statement static void genInput(Node *inp) { // Print prompt if any if (inp->sval) { emit("printf(\"%%s\", \"%s\");\n", inp->sval); } else { emit("printf(\"? \");\n"); } emit("fflush(stdout);\n"); // Read each variable for (Node *v = inp->a; v; v = v->next) { if (v->dataType == TYPE_STR) { emit("{ char _buf[1024]; if(fgets(_buf, sizeof(_buf), stdin)) {\n"); gIndent++; emit("_buf[strcspn(_buf, \"\\n\")] = 0;\n"); emit("_bstr_assign(&%s, _buf);\n", cleanName(v->sval)); gIndent--; emit("} }\n"); } else { emit("scanf(\"%s\", &%s);\n", cScanfFmt(v->dataType), cleanName(v->sval)); } } } // Generate code for an assignment statement static void genAssign(Node *n) { Node *target = n->a; Node *value = n->b; // Check if we're assigning to the function return variable if (gInFunc && gFuncName && target->type == NODE_IDENT && strIcmp(target->sval, gFuncName) == 0) { const char *cn = cleanName(target->sval); if (gFuncRet == TYPE_STR) { emit("_bstr_assign(&%s_ret, ", cn); genExpr(value); emitRaw(");\n"); } else { emit("%s_ret = ", cn); genExpr(value); emitRaw(";\n"); } emit("_bfree_temps();\n"); return; } // Dot-access assignment: var.field = expr if (target->type == NODE_DOT_ACCESS) { int strLen = target->ival; // >0 for fixed-length STRING * N if (target->dataType == TYPE_STR && strLen > 0) { // Fixed-length string: strncpy + null terminate emit("strncpy("); genExpr(target->a); emitRaw(".%s, ", cleanName(target->sval)); genExpr(value); emitRaw(", %d);\n", strLen); emit(""); genExpr(target->a); emitRaw(".%s[%d] = '\\0';\n", cleanName(target->sval), strLen); } else if (target->dataType == TYPE_STR) { // Dynamic string in struct — unusual but handle it emit("_bstr_assign(&("); genExpr(target->a); emitRaw(".%s), ", cleanName(target->sval)); genExpr(value); emitRaw(");\n"); } else { // Numeric field emit(""); genExpr(target->a); emitRaw(".%s = ", cleanName(target->sval)); genExpr(value); emitRaw(";\n"); } emit("_bfree_temps();\n"); return; } // Check if target is a BYREF parameter (needs pointer dereference) int byref = (target->type == NODE_IDENT && isByrefParam(target->sval)); // String assignment uses _bstr_assign if (target->dataType == TYPE_STR || isStringExpr(value)) { if (target->type == NODE_ARRAY_REF) { emit("_bstr_assign(&%s[", cleanName(target->sval)); genArrayFlatIndex(target->sval, target->a); emitRaw("], "); } else if (byref) { emit("_bstr_assign(%s, ", cleanName(target->sval)); } else { emit("_bstr_assign(&%s, ", cleanName(target->sval)); } genExpr(value); emitRaw(");\n"); } else { // Numeric assignment if (target->type == NODE_ARRAY_REF) { emit("%s[", cleanName(target->sval)); genArrayFlatIndex(target->sval, target->a); emitRaw("] = "); } else if (byref) { emit("(*%s) = ", cleanName(target->sval)); } else { emit("%s = ", cleanName(target->sval)); } genExpr(value); emitRaw(";\n"); } emit("_bfree_temps();\n"); } // Generate a SUB or FUNCTION definition static void genFuncDef(Node *n) { int isFunc = (n->type == NODE_FUNC); // Store a permanent copy of the clean function name so it survives // additional cleanName() calls during parameter/body emission char fname[MAX_IDENT]; strncpy(fname, cleanName(n->sval), MAX_IDENT - 1); fname[MAX_IDENT - 1] = '\0'; DataType ret = isFunc ? n->dataType : TYPE_VOID; // Save and set function context int prevInFunc = gInFunc; const char *prevFuncName = gFuncName; DataType prevFuncRet = gFuncRet; gInFunc = 1; gFuncName = n->sval; gFuncRet = ret; // Function signature emitRaw("%s %s(", cTypeStr(ret), fname); int first = 1; for (Node *p = n->a; p; p = p->next) { if (!first) emitRaw(", "); first = 0; if (p->ival == PASS_BYREF) { emitRaw("%s *%s", cTypeStr(p->dataType), cleanName(p->sval)); } else { if (p->dataType == TYPE_STR) emitRaw("const char *%s", cleanName(p->sval)); else emitRaw("%s %s", cTypeStr(p->dataType), cleanName(p->sval)); } } if (first) emitRaw("void"); // no params emitRaw(") {\n"); gIndent++; // For FUNCTION: declare the return variable (named _ret) if (isFunc) { emit("%s %s_ret = %s;\n", cTypeStr(ret), fname, cDefaultVal(ret)); } // Generate body if (n->b) genBlock(n->b); // Return statement for FUNCTION if (isFunc) { emit("return %s_ret;\n", fname); } gIndent--; emitRaw("}\n\n"); // Restore context gInFunc = prevInFunc; gFuncName = prevFuncName; gFuncRet = prevFuncRet; } // Generate code for a single statement static void genStmt(Node *n) { if (!n) return; switch (n->type) { case NODE_LABEL: // Only emit C labels that are actually targeted by GOTO/GOSUB, // to avoid -Wunused-label warnings. if (n->sval) { // Named label if (isGotoStrTarget(n->sval)) emitRaw("%s: ;\n", cleanName(n->sval)); } else if (isGotoTarget(n->ival)) { emitRaw("L%d: ;\n", n->ival); } break; case NODE_TYPE_DEF: // TYPE definitions are emitted globally in generate(), not here break; case NODE_DIM: if (n->dataType == TYPE_UDT && n->ival == 0) { // UDT scalar: struct _b_Name var; memset(&var, 0, sizeof(var)); const char *uts = cUdtTypeStr(n->ival2); char cn[MAX_IDENT]; strncpy(cn, cleanName(n->sval), MAX_IDENT - 1); cn[MAX_IDENT - 1] = '\0'; emit("%s %s _BUNUSED;\n", uts, cn); emit("memset(&%s, 0, sizeof(%s));\n", cn, cn); } else if (n->dataType == TYPE_UDT && n->ival > 0) { // UDT array const char *uts = cUdtTypeStr(n->ival2); char cn[MAX_IDENT]; strncpy(cn, cleanName(n->sval), MAX_IDENT - 1); cn[MAX_IDENT - 1] = '\0'; emit("%s *%s _BUNUSED = NULL;\n", uts, cn); emit("int %s_size _BUNUSED = 0;\n", cn); // Compute size and allocate if (n->a) { if (n->ival <= 1) { emit("%s_size = (", cn); genExpr(n->a); emitRaw(") + 1;\n"); } else { Node *dim = n->a; for (int i = 0; i < n->ival; i++, dim = dim->next) { emit("int %s_dim%d _BUNUSED = (", cn, i); genExpr(dim); emitRaw(") + 1;\n"); } emit("%s_size = ", cn); for (int i = 0; i < n->ival; i++) { if (i > 0) emitRaw(" * "); emitRaw("%s_dim%d", cn, i); } emitRaw(";\n"); } emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n", cn, uts, cn, uts); } } else if (n->ival) { // array -- n->ival is ndims genDimArray(n->sval, n->dataType, n->a, n->ival); } else { // scalar genVarDecl(n->sval, n->dataType, 0); } break; case NODE_REDIM: { char rcn[MAX_IDENT]; strncpy(rcn, cleanName(n->sval), MAX_IDENT - 1); rcn[MAX_IDENT - 1] = '\0'; int ndims = n->ival; if (ndims <= 1) { // 1D REDIM: backward-compatible realloc emit("{ int _old_sz = %s_size;\n", rcn); gIndent++; emit("%s_size = (", rcn); genExpr(n->a); emitRaw(") + 1;\n"); emit("%s = (%s*)realloc(%s, %s_size * sizeof(%s));\n", rcn, cTypeStr(n->dataType), rcn, rcn, cTypeStr(n->dataType)); emit("if (%s_size > _old_sz)\n", rcn); gIndent++; emit("memset(%s + _old_sz, 0, (%s_size - _old_sz) * sizeof(%s));\n", rcn, rcn, cTypeStr(n->dataType)); gIndent--; gIndent--; emit("}\n"); } else { // Multi-dim REDIM: recompute dims, free + calloc emit("{\n"); gIndent++; Node *dim = n->a; for (int i = 0; i < ndims; i++, dim = dim->next) { emit("%s_dim%d = (", rcn, i); genExpr(dim); emitRaw(") + 1;\n"); } emit("%s_size = ", rcn); for (int i = 0; i < ndims; i++) { if (i > 0) emitRaw(" * "); emitRaw("%s_dim%d", rcn, i); } emitRaw(";\n"); emit("free(%s);\n", rcn); emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n", rcn, cTypeStr(n->dataType), rcn, rcn, cTypeStr(n->dataType)); if (n->dataType == TYPE_STR) { emit("for (int _i = 0; _i < %s_size; _i++) %s[_i] = _bstr(\"\");\n", rcn, rcn); } gIndent--; emit("}\n"); } break; } case NODE_LOCAL: genVarDecl(n->sval, n->dataType, 0); break; case NODE_STATIC: genVarDecl(n->sval, n->dataType, 1); break; case NODE_ASSIGN: genAssign(n); break; case NODE_PRINT: genPrint(n); break; case NODE_PRINT_USING: genPrintUsing(n); break; case NODE_INPUT: genInput(n); break; case NODE_IF: emit("if ("); genExpr(n->a); emitRaw(") {\n"); gIndent++; if (n->b) genBlock(n->b); gIndent--; if (n->c) { if (n->c->type == NODE_IF) { // ELSEIF: emit as "} else if (...)" emit("} else "); // Don't indent the nested if genStmt(n->c); return; // the nested if handles its own closing } else { emit("} else {\n"); gIndent++; genBlock(n->c); gIndent--; } } emit("}\n"); break; case NODE_FOR: { const char *vn = cleanName(n->sval); // Determine the C type for the loop variable const char *vtype = cTypeStr(inferVarType(n->sval)); // When inside a function, the loop variable may not be declared // locally. Wrap in a block and declare the variable to be safe. // Any prior LOCAL/DIM of the same name has _BUNUSED to suppress // shadowing warnings. int needDecl = gInFunc; if (n->c) { // FOR with STEP: use a block with a step variable so the // step expression is evaluated once, and the comparison // direction adapts to the sign of the step at runtime. emit("{ /* FOR %s with STEP */\n", vn); gIndent++; if (needDecl) emit("%s %s;\n", vtype, vn); emit("%s _step_%s = ", vtype, vn); genExpr(n->c); emitRaw(";\n"); emit("for (%s = ", vn); genExpr(n->a); emitRaw("; _step_%s > 0 ? %s <= ", vn, vn); genExpr(n->b); emitRaw(" : %s >= ", vn); genExpr(n->b); emitRaw("; %s += _step_%s) {\n", vn, vn); gIndent++; if (n->d) genBlock(n->d); gIndent--; emit("}\n"); gIndent--; emit("}\n"); } else { // Default step = 1: simple ascending loop if (needDecl) { emit("{ %s %s;\n", vtype, vn); gIndent++; } emit("for (%s = ", vn); genExpr(n->a); emitRaw("; %s <= ", vn); genExpr(n->b); emitRaw("; %s++) {\n", vn); gIndent++; if (n->d) genBlock(n->d); gIndent--; emit("}\n"); if (needDecl) { gIndent--; emit("}\n"); } } break; } case NODE_WHILE: emit("while ("); genExpr(n->a); emitRaw(") {\n"); gIndent++; if (n->b) genBlock(n->b); gIndent--; emit("}\n"); break; case NODE_DO_LOOP: { int isUntil = n->ival & 1; int atBottom = n->ival & 2; if (!n->a) { // Infinite loop: DO ... LOOP emit("for (;;) {\n"); } else if (atBottom) { emit("do {\n"); } else { // Condition at top emit("while ("); if (isUntil) emitRaw("!("); genExpr(n->a); if (isUntil) emitRaw(")"); emitRaw(") {\n"); } gIndent++; if (n->b) genBlock(n->b); gIndent--; if (atBottom && n->a) { emit("} while ("); if (isUntil) emitRaw("!("); genExpr(n->a); if (isUntil) emitRaw(")"); emitRaw(");\n"); } else { emit("}\n"); } break; } case NODE_GOTO: if (n->sval) { emit("goto %s;\n", cleanName(n->sval)); } else if (n->ival) { emit("goto L%d;\n", n->ival); } break; case NODE_GOSUB: if (!gRelease) emit("if (_gosub_sp >= _GOSUB_MAX) { fprintf(stderr, \"GOSUB stack overflow\\n\"); exit(1); }\n"); emit("_gosub_stack[_gosub_sp++] = %d;\n", n->ival2); if (n->sval) emit("goto %s;\n", cleanName(n->sval)); else emit("goto L%d;\n", n->ival); emitRaw("_gr%d: ;\n", n->ival2); break; case NODE_RETURN: if (gInFunc) { // Return from FUNCTION if (n->a) { if (gFuncRet == TYPE_STR) { emit("_bstr_assign(&%s_ret, ", cleanName(gFuncName)); genExpr(n->a); emitRaw(");\n"); } else { emit("%s_ret = ", cleanName(gFuncName)); genExpr(n->a); emitRaw(";\n"); } } emit("return %s_ret;\n", cleanName(gFuncName)); } else { // RETURN from GOSUB: dispatch back using the stack if (!gRelease) emit("if (_gosub_sp <= 0) { fprintf(stderr, \"RETURN without GOSUB\\n\"); exit(1); }\n"); emit("switch (_gosub_stack[--_gosub_sp]) {\n"); for (int i = 0; i < gGosubCount; i++) { emit(" case %d: goto _gr%d;\n", i, i); } emit("}\n"); } break; case NODE_EXIT: if (n->ival == TOK_FOR || n->ival == TOK_WHILE || n->ival == TOK_DO) emit("break;\n"); else if (n->ival == TOK_SUB) emit("return;\n"); else if (n->ival == TOK_FUNCTION && gInFunc && gFuncName) emit("return %s_ret;\n", cleanName(gFuncName)); break; case NODE_CONTINUE: emit("continue;\n"); break; case NODE_CALL: { const char *cn = cleanName(n->sval); emit("%s(", cn); Symbol *fsym = symLookup(n->sval); int pi = 0; for (Node *a = n->a; a; a = a->next, pi++) { if (a != n->a) emitRaw(", "); int needRef = (fsym && pi < fsym->paramCount && fsym->paramModes[pi] == PASS_BYREF); if (needRef && a->type == NODE_IDENT) { emitRaw("&%s", cleanName(a->sval)); } else if (needRef && a->type == NODE_ARRAY_REF) { emitRaw("&%s[", cleanName(a->sval)); genExpr(a->a); emitRaw("]"); } else { genExpr(a); } } emitRaw(");\n"); emit("_bfree_temps();\n"); break; } case NODE_SUB: case NODE_FUNC: // These are generated separately before main() break; case NODE_END: emit("exit(0);\n"); break; case NODE_OPEN: { if (n->ival == 4) { // RANDOM mode emit("_bfile_open_random("); genExpr(n->b); emitRaw(", "); genExpr(n->a); emitRaw(", "); if (n->c) { genExpr(n->c); } else { emitRaw("0"); } emitRaw(");\n"); } else { const char *modes[] = {"r", "w", "a", "rb"}; emit("_bfile_open("); genExpr(n->b); emitRaw(", "); genExpr(n->a); emitRaw(", \"%s\");\n", modes[n->ival]); } break; } case NODE_GET: // GET #filenum, record, variable emit("fseek(_bfile_get("); genExpr(n->a); emitRaw("), ("); genExpr(n->b); emitRaw(" - 1) * _bfile_reclen["); genExpr(n->a); emitRaw("], SEEK_SET);\n"); emit("fread(&%s, _bfile_reclen[", cleanName(n->c->sval)); genExpr(n->a); emitRaw("], 1, _bfile_get("); genExpr(n->a); emitRaw("));\n"); break; case NODE_PUT: // PUT #filenum, record, variable emit("fseek(_bfile_get("); genExpr(n->a); emitRaw("), ("); genExpr(n->b); emitRaw(" - 1) * _bfile_reclen["); genExpr(n->a); emitRaw("], SEEK_SET);\n"); emit("fwrite(&%s, _bfile_reclen[", cleanName(n->c->sval)); genExpr(n->a); emitRaw("], 1, _bfile_get("); genExpr(n->a); emitRaw("));\n"); break; case NODE_CLOSE: emit("_bfile_close("); genExpr(n->b); emitRaw(");\n"); break; case NODE_FILE_PRINT: { Node *item = n->a; if (!item) { // PRINT #n, alone = write newline emit("fprintf(_bfile_get("); genExpr(n->b); emitRaw("), \"\\n\");\n"); break; } // Build fprintf with format string and arguments emit("fprintf(_bfile_get("); genExpr(n->b); emitRaw("), \""); for (Node *it = item; it; it = it->next) { if (it->a) emitRaw("%s", cFmt(it->a->dataType)); if (it->ival == 1) { // semicolon: no separator } else if (it->ival == 2) { emitRaw("\\t"); } else if (!it->next) { emitRaw("\\n"); } } emitRaw("\""); for (Node *it = item; it; it = it->next) { if (it->a) { emitRaw(", "); genExpr(it->a); } } emitRaw(");\n"); emit("_bfree_temps();\n"); break; } case NODE_FILE_INPUT: for (Node *v = n->a; v; v = v->next) { if (v->dataType == TYPE_STR) { emit("_bline_input("); genExpr(n->b); emitRaw(", &%s);\n", cleanName(v->sval)); } else { emit("fscanf(_bfile_get("); genExpr(n->b); emitRaw("), \"%s\", &%s);\n", cScanfFmt(v->dataType), cleanName(v->sval)); } } break; case NODE_LINE_INPUT: emit("_bline_input("); genExpr(n->b); emitRaw(", &%s);\n", cleanName(n->a->sval)); break; case NODE_FILE_WRITE: { // WRITE # outputs CSV-style: strings quoted, comma-separated, newline int first = 1; for (Node *e = n->a; e; e = e->next) { if (!first) { emit("fprintf(_bfile_get("); genExpr(n->b); emitRaw("), \",\");\n"); } first = 0; if (e->dataType == TYPE_STR) { emit("fprintf(_bfile_get("); genExpr(n->b); emitRaw("), \"\\\"%%s\\\"\", "); genExpr(e); emitRaw(");\n"); } else { emit("fprintf(_bfile_get("); genExpr(n->b); emitRaw("), \"%s\", ", cFmt(e->dataType)); genExpr(e); emitRaw(");\n"); } } emit("fprintf(_bfile_get("); genExpr(n->b); emitRaw("), \"\\n\");\n"); emit("_bfree_temps();\n"); break; } case NODE_DATA: // No-op: DATA items are collected and emitted as a static array break; case NODE_READ: for (Node *v = n->a; v; v = v->next) { if (v->dataType == TYPE_STR) { emit("_bstr_assign(&%s, _bdata[_bdata_pos].str);\n", cleanName(v->sval)); } else { emit("%s = (%s)_bdata[_bdata_pos].num;\n", cleanName(v->sval), cTypeStr(v->dataType)); } emit("_bdata_pos++;\n"); } break; case NODE_RESTORE: if (n->sval) { emit("_bdata_pos = %d;\n", dataIndexForLabel(n->sval)); } else if (n->ival != 0) { emit("_bdata_pos = %d;\n", dataIndexForLine(n->ival)); } else { emit("_bdata_pos = 0;\n"); } break; case NODE_CONST_DECL: // No runtime code for constants — they're substituted at parse time break; case NODE_SWAP: { // Determine the type from the left operand DataType swapType = n->a->dataType; const char *ctype = "double"; if (swapType == TYPE_BYTE) ctype = "uint8_t"; else if (swapType == TYPE_INT) ctype = "int16_t"; else if (swapType == TYPE_LONG) ctype = "int32_t"; else if (swapType == TYPE_FLOAT) ctype = "float"; else if (swapType == TYPE_DBL) ctype = "double"; if (swapType == TYPE_STR) { // String swap: just swap the pointers emit("{ char *_swap_tmp = "); genExpr(n->a); emitRaw("; "); genExpr(n->a); emitRaw(" = "); genExpr(n->b); emitRaw("; "); genExpr(n->b); emitRaw(" = _swap_tmp; }\n"); } else { emit("{ %s _swap_tmp = ", ctype); genExpr(n->a); emitRaw("; "); genExpr(n->a); emitRaw(" = "); genExpr(n->b); emitRaw("; "); genExpr(n->b); emitRaw(" = _swap_tmp; }\n"); } break; } case NODE_RANDOMIZE: if (n->a) { emit("srand((unsigned)("); genExpr(n->a); emitRaw("));\n"); } else { emit("srand((unsigned)time(NULL));\n"); } break; case NODE_SELECT: { // Emit test expression into a temp variable static int selectId = 0; int sid = selectId++; DataType stype = n->a->dataType; if (stype == TYPE_STR) { emit("{ const char *_sel%d = ", sid); genExpr(n->a); emitRaw(";\n"); } else { emit("{ double _sel%d = ", sid); genExpr(n->a); emitRaw(";\n"); } // Emit CASE blocks as if/else if chain int first = 1; for (Node *c = n->b; c; c = c->next) { if (c->ival == 1) { // CASE ELSE if (!first) emit("} else {\n"); else emit("{\n"); } else { if (!first) emit("} else if ("); else emit("if ("); // Emit condition for each value, joined with || int firstVal = 1; for (Node *v = c->a; v; v = v->next) { if (!firstVal) emitRaw(" || "); if (v->ival2 == 1) { // IS comparison: v->ival is the comparison op, v->b is the value emitRaw("(_sel%d ", sid); switch (v->ival) { case TOK_EQ: emitRaw("== "); break; case TOK_NE: emitRaw("!= "); break; case TOK_LT: emitRaw("< "); break; case TOK_GT: emitRaw("> "); break; case TOK_LE: emitRaw("<= "); break; case TOK_GE: emitRaw(">= "); break; } genExpr(v->b); emitRaw(")"); } else if (v->ival2 == 2) { // Range: v->a TO v->b emitRaw("(_sel%d >= ", sid); genExpr(v->a); emitRaw(" && _sel%d <= ", sid); genExpr(v->b); emitRaw(")"); } else { // Single value if (stype == TYPE_STR) { emitRaw("(strcmp(_sel%d, ", sid); genExpr(v); emitRaw(") == 0)"); } else { emitRaw("(_sel%d == ", sid); genExpr(v); emitRaw(")"); } } firstVal = 0; } emitRaw(") {\n"); } gIndent++; for (Node *s = c->b; s; s = s->next) genStmt(s); gIndent--; first = 0; } if (!first) emit("}\n"); emit("}\n"); break; } case NODE_ON_GOTO: emit("switch ((int)("); genExpr(n->a); emitRaw(")) {\n"); { int idx = 1; for (Node *lab = n->b; lab; lab = lab->next, idx++) { if (lab->type == NODE_INT_LIT) { emit(" case %d: goto L%d; break;\n", idx, lab->ival); } else { emit(" case %d: goto %s; break;\n", idx, cleanName(lab->sval)); } } } emit("}\n"); break; case NODE_ON_GOSUB: emit("switch ((int)("); genExpr(n->a); emitRaw(")) {\n"); { int idx = 1; int rpid = n->ival2; // first return-point id for (Node *lab = n->b; lab; lab = lab->next, idx++, rpid++) { if (lab->type == NODE_INT_LIT) { emit(" case %d: _gosub_stack[_gosub_sp++] = %d; goto L%d; break;\n", idx, rpid, lab->ival); } else { emit(" case %d: _gosub_stack[_gosub_sp++] = %d; goto %s; break;\n", idx, rpid, cleanName(lab->sval)); } } } emit("}\n"); // Emit return labels { int rpid = n->ival2; for (Node *lab = n->b; lab; lab = lab->next, rpid++) { emitRaw("_gr%d: ;\n", rpid); } } break; case NODE_MID_ASSIGN: emit("_bmid_assign(&"); genExpr(n->a); emitRaw(", "); genExpr(n->b); emitRaw(", "); genExpr(n->c); emitRaw(", "); genExpr(n->d); emitRaw(");\n"); break; default: emit("/* unhandled node type %d */\n", n->type); break; } } // Generate code for a block (linked list of statements) static void genBlock(Node *blk) { if (!blk) return; Node *s = (blk->type == NODE_BLOCK) ? blk->a : blk; while (s) { genStmt(s); s = s->next; } } // Collect all SUB/FUNCTION nodes from the AST into an array static void collectFuncs(Node *n, Node **funcs, int *count, int max) { if (!n) return; if (n->type == NODE_SUB || n->type == NODE_FUNC) { if (*count >= max) fatal(n->line, "Too many SUB/FUNCTION definitions (max %d)", max); funcs[(*count)++] = n; } if (n->type == NODE_BLOCK || n->type == NODE_PROGRAM) { Node *s = n->a; while (s) { collectFuncs(s, funcs, count, max); s = s->next; } } } // Recursively collect all NODE_DATA nodes from the AST static void collectData(Node *n, Node **data, int *count, int max) { if (!n) return; if (n->type == NODE_DATA) { if (*count >= max) fatal(n->line, "Too many DATA statements (max %d)", max); data[(*count)++] = n; } // When a NODE_LABEL is followed by NODE_DATA via ->next, tag the // DATA node with the BASIC line number (stored in ival) or named // label (stored in sval) so that RESTORE can find it. if (n->type == NODE_LABEL && n->next && n->next->type == NODE_DATA) { if (n->sval) n->next->sval = n->sval; else n->next->ival = n->ival; } // Walk into blocks, programs, and sub/function bodies if (n->type == NODE_BLOCK || n->type == NODE_PROGRAM) { Node *s = n->a; while (s) { collectData(s, data, count, max); s = s->next; } } // Also collect from SUB/FUNCTION bodies (DATA is global in BASIC) if (n->type == NODE_SUB || n->type == NODE_FUNC) { collectData(n->b, data, count, max); } // Walk if/else branches if (n->type == NODE_IF) { collectData(n->b, data, count, max); collectData(n->c, data, count, max); } // Walk loop bodies if (n->type == NODE_FOR || n->type == NODE_WHILE || n->type == NODE_DO_LOOP) { Node *body = (n->type == NODE_FOR) ? n->d : n->b; collectData(body, data, count, max); } // Walk SELECT CASE bodies if (n->type == NODE_SELECT) { for (Node *c = n->b; c; c = c->next) { for (Node *s = c->b; s; s = s->next) collectData(s, data, count, max); } } } // Global storage for RESTORE line-number-to-data-index mapping #define MAX_DATA_LINES 512 static int gDataLineNums[MAX_DATA_LINES]; static int gDataLineIdxs[MAX_DATA_LINES]; static int gDataLineCount = 0; // Global storage for RESTORE named-label-to-data-index mapping static char *gDataLabelNames[MAX_DATA_LINES]; static int gDataLabelIdxs[MAX_DATA_LINES]; static int gDataLabelCount = 0; // Look up the data index for a RESTORE target line number static int dataIndexForLine(int lnum) { for (int i = 0; i < gDataLineCount; i++) if (gDataLineNums[i] == lnum) return gDataLineIdxs[i]; return 0; // fallback to beginning } // Look up the data index for a RESTORE target named label static int dataIndexForLabel(const char *name) { for (int i = 0; i < gDataLabelCount; i++) if (strIcmp(gDataLabelNames[i], name) == 0) return gDataLabelIdxs[i]; return 0; // fallback to beginning } // Emit the runtime library (debug or release variant). Provides string // operations, temp management, file I/O, and dynamic array support. static void emitRuntime(void) { // Common headers and defines — same in both modes fprintf(gOut, "/* ---- BASIC Runtime Library (%s) ---- */\n" "#include \n" "#include \n" "#include \n" "#include \n" "#include \n" "#include \n" "#include \n\n" "#ifdef __GNUC__\n" "#define _BUNUSED __attribute__((unused))\n" "#else\n" "#define _BUNUSED\n" "#endif\n\n" "/* Temporary string pool: collects intermediate strings for cleanup */\n" "#define _BMAX_TEMPS 256\n" "static char *_btemps[_BMAX_TEMPS] _BUNUSED;\n" "static int _btmp_count _BUNUSED = 0;\n\n" "/* Register a heap string as temporary (will be freed by _bfree_temps) */\n" "static _BUNUSED char *_btmp(char *s) {\n" " if (_btmp_count < _BMAX_TEMPS) _btemps[_btmp_count++] = s;\n" " return s;\n" "}\n\n" "/* Free all registered temporary strings */\n" "static _BUNUSED void _bfree_temps(void) {\n" " for (int i = 0; i < _btmp_count; i++) free(_btemps[i]);\n" " _btmp_count = 0;\n" "}\n\n", gRelease ? "release" : "debug" ); // String functions — debug vs release if (gRelease) { fprintf(gOut, "static _BUNUSED char *_bstr(const char *s) {\n" " char *d = (char*)malloc(strlen(s) + 1);\n" " strcpy(d, s);\n" " return d;\n" "}\n\n" "static _BUNUSED void _bstr_assign(char **dest, const char *src) {\n" " if (*dest) free(*dest);\n" " *dest = _bstr(src);\n" "}\n\n" "static _BUNUSED char *_bconcat(const char *a, const char *b) {\n" " size_t la = strlen(a), lb = strlen(b);\n" " char *r = (char*)malloc(la + lb + 1);\n" " memcpy(r, a, la);\n" " memcpy(r + la, b, lb);\n" " r[la + lb] = '\\0';\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_bchr(int code) {\n" " char *r = (char*)malloc(2);\n" " r[0] = (char)code; r[1] = '\\0';\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_bstr_of_int(double val) {\n" " char *r = (char*)malloc(64);\n" " if (val == (int)val) sprintf(r, \"%%d\", (int)val);\n" " else sprintf(r, \"%%g\", val);\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_bmid(const char *s, int start, int len) {\n" " int slen = (int)strlen(s);\n" " start--;\n" " if (start < 0) start = 0;\n" " if (start >= slen) return _btmp(_bstr(\"\"));\n" " if (len < 0 || start + len > slen) len = slen - start;\n" " char *r = (char*)malloc(len + 1);\n" " memcpy(r, s + start, len);\n" " r[len] = '\\0';\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_bleft(const char *s, int n) { return _bmid(s, 1, n); }\n\n" "static _BUNUSED char *_bright(const char *s, int n) {\n" " int slen = (int)strlen(s);\n" " if (n >= slen) return _btmp(_bstr(s));\n" " return _btmp(_bstr(s + slen - n));\n" "}\n\n" "static _BUNUSED char *_bucase(const char *s) {\n" " char *r = _bstr(s);\n" " for (char *p = r; *p; p++) *p = toupper((unsigned char)*p);\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_blcase(const char *s) {\n" " char *r = _bstr(s);\n" " for (char *p = r; *p; p++) *p = tolower((unsigned char)*p);\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED int _binstr(const char *haystack, const char *needle) {\n" " const char *p = strstr(haystack, needle);\n" " return p ? (int)(p - haystack) + 1 : 0;\n" "}\n\n" "static _BUNUSED double _babs(double x) { return x < 0 ? -x : x; }\n\n" ); } else { fprintf(gOut, "static _BUNUSED char *_bstr(const char *s) {\n" " if (!s) s = \"\";\n" " char *d = (char*)malloc(strlen(s) + 1);\n" " if (!d) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n" " strcpy(d, s);\n" " return d;\n" "}\n\n" "static _BUNUSED void _bstr_assign(char **dest, const char *src) {\n" " if (*dest) free(*dest);\n" " *dest = _bstr(src ? src : \"\");\n" "}\n\n" "static _BUNUSED char *_bconcat(const char *a, const char *b) {\n" " if (!a) a = \"\";\n" " if (!b) b = \"\";\n" " size_t la = strlen(a), lb = strlen(b);\n" " char *r = (char*)malloc(la + lb + 1);\n" " if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n" " memcpy(r, a, la);\n" " memcpy(r + la, b, lb);\n" " r[la + lb] = '\\0';\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_bchr(int code) {\n" " char *r = (char*)malloc(2);\n" " if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n" " r[0] = (char)code; r[1] = '\\0';\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_bstr_of_int(double val) {\n" " char *r = (char*)malloc(64);\n" " if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n" " if (val == (int)val) sprintf(r, \"%%d\", (int)val);\n" " else sprintf(r, \"%%g\", val);\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_bmid(const char *s, int start, int len) {\n" " if (!s) return _btmp(_bstr(\"\"));\n" " int slen = (int)strlen(s);\n" " start--;\n" " if (start < 0) start = 0;\n" " if (start >= slen) return _btmp(_bstr(\"\"));\n" " if (len < 0 || start + len > slen) len = slen - start;\n" " char *r = (char*)malloc(len + 1);\n" " if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n" " memcpy(r, s + start, len);\n" " r[len] = '\\0';\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_bleft(const char *s, int n) { return _bmid(s, 1, n); }\n\n" "static _BUNUSED char *_bright(const char *s, int n) {\n" " if (!s) return _btmp(_bstr(\"\"));\n" " int slen = (int)strlen(s);\n" " if (n >= slen) return _btmp(_bstr(s));\n" " return _btmp(_bstr(s + slen - n));\n" "}\n\n" "static _BUNUSED char *_bucase(const char *s) {\n" " if (!s) return _btmp(_bstr(\"\"));\n" " char *r = _bstr(s);\n" " for (char *p = r; *p; p++) *p = toupper((unsigned char)*p);\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_blcase(const char *s) {\n" " if (!s) return _btmp(_bstr(\"\"));\n" " char *r = _bstr(s);\n" " for (char *p = r; *p; p++) *p = tolower((unsigned char)*p);\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED int _binstr(const char *haystack, const char *needle) {\n" " if (!haystack || !needle) return 0;\n" " const char *p = strstr(haystack, needle);\n" " return p ? (int)(p - haystack) + 1 : 0;\n" "}\n\n" "static _BUNUSED double _babs(double x) { return x < 0 ? -x : x; }\n\n" ); } // Additional string runtime functions (same in both modes) fprintf(gOut, "static _BUNUSED char *_bltrim(const char *s) {\n" " while (*s == ' ') s++;\n" " return _btmp(_bstr(s));\n" "}\n\n" "static _BUNUSED char *_brtrim(const char *s) {\n" " char *r = _bstr(s);\n" " int len = (int)strlen(r);\n" " while (len > 0 && r[len-1] == ' ') len--;\n" " r[len] = '\\0';\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_btrim(const char *s) {\n" " while (*s == ' ') s++;\n" " char *r = _bstr(s);\n" " int len = (int)strlen(r);\n" " while (len > 0 && r[len-1] == ' ') len--;\n" " r[len] = '\\0';\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_bspace(int n) {\n" " if (n < 0) n = 0;\n" " char *r = (char*)malloc(n + 1);\n" " memset(r, ' ', n);\n" " r[n] = '\\0';\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_btab(int col) {\n" " if (col < 1) col = 1;\n" " char *r = (char*)malloc(col);\n" " memset(r, ' ', col - 1);\n" " r[col - 1] = '\\0';\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_bhex(int n) {\n" " char *r = (char*)malloc(20);\n" " sprintf(r, \"%%X\", (unsigned)n);\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_boct(int n) {\n" " char *r = (char*)malloc(24);\n" " sprintf(r, \"%%o\", (unsigned)n);\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_bstring_rep(int n, const char *ch) {\n" " if (n < 0) n = 0;\n" " char *r = (char*)malloc(n + 1);\n" " memset(r, ch[0], n);\n" " r[n] = '\\0';\n" " return _btmp(r);\n" "}\n\n" "static _BUNUSED char *_bgetenv(const char *name) {\n" " const char *val = getenv(name);\n" " return _btmp(_bstr(val ? val : \"\"));\n" "}\n\n" "static _BUNUSED void _bmid_assign(char **dest, int start, int len, const char *repl) {\n" " int dlen = (int)strlen(*dest);\n" " int rlen = (int)strlen(repl);\n" " start--;\n" " if (start < 0 || start >= dlen) return;\n" " if (len > dlen - start) len = dlen - start;\n" " if (rlen < len) len = rlen;\n" " memcpy(*dest + start, repl, len);\n" "}\n\n" "/* PRINT USING support */\n" "static const char *_busing_fmt _BUNUSED;\n" "static const char *_busing_pos _BUNUSED;\n\n" "static _BUNUSED void _busing_init(const char *fmt) {\n" " _busing_fmt = _busing_pos = fmt ? fmt : \"\";\n" "}\n\n" "static _BUNUSED void _busing_num(double val) {\n" " const char *p = _busing_pos;\n" " int width = 0, decimals = -1, dollar = 0, plus = 0, aster = 0, tminus = 0;\n" " /* Skip literal chars until we find a numeric format start */\n" " while (*p) {\n" " if (*p == '#') break;\n" " if (*p == '*' && p[1] == '*') break;\n" " if (*p == '$' && p[1] == '$') break;\n" " if (*p == '+' && (p[1] == '#' || p[1] == '$' || p[1] == '*')) break;\n" " if (*p == '!' || *p == '&' || *p == '\\\\') break;\n" " putchar(*p++);\n" " }\n" " if (!*p) { _busing_pos = _busing_fmt; return; }\n" " /* Parse numeric format */\n" " if (*p == '+') { plus = 1; p++; }\n" " while (*p == '*') { aster++; width++; p++; }\n" " while (*p == '$') { dollar++; p++; if (dollar > 1) width++; }\n" " while (*p == '#' || *p == ',') { if (*p == '#') width++; p++; }\n" " if (*p == '.') { p++; decimals = 0; while (*p == '#') { decimals++; p++; } }\n" " if (*p == '-') { tminus = 1; p++; }\n" " _busing_pos = p;\n" " /* Format the number */\n" " char buf[64];\n" " double absval = val < 0 ? -val : val;\n" " int neg = (val < 0);\n" " if (decimals >= 0) {\n" " snprintf(buf, sizeof(buf), \"%%.*f\", decimals, absval);\n" " } else {\n" " snprintf(buf, sizeof(buf), \"%%.0f\", absval);\n" " }\n" " int totalw = width + (decimals >= 0 ? decimals + 1 : 0);\n" " int len = (int)strlen(buf);\n" " int signw = (plus || neg) ? 1 : 0;\n" " int dollarw = dollar ? 1 : 0;\n" " int pad = totalw - len - signw - dollarw;\n" " if (pad < 0) pad = 0;\n" " for (int i = 0; i < pad; i++) putchar(aster >= 2 ? '*' : ' ');\n" " if (plus) putchar(neg ? '-' : '+');\n" " else if (neg && !tminus) putchar('-');\n" " if (dollar) putchar('$');\n" " printf(\"%%s\", buf);\n" " if (tminus && neg) putchar('-');\n" "}\n\n" "static _BUNUSED void _busing_str(const char *val) {\n" " const char *p = _busing_pos;\n" " if (!val) val = \"\";\n" " /* Skip literal chars, print them */\n" " while (*p && *p != '!' && *p != '&' && *p != '\\\\' && *p != '#') {\n" " putchar(*p++);\n" " }\n" " if (!*p) { _busing_pos = _busing_fmt; return; }\n" " if (*p == '!') {\n" " /* First character only */\n" " putchar(val[0] ? val[0] : ' ');\n" " _busing_pos = p + 1;\n" " } else if (*p == '&') {\n" " /* Entire string */\n" " printf(\"%%s\", val);\n" " _busing_pos = p + 1;\n" " } else if (*p == '\\\\') {\n" " /* Fixed width: count chars between backslashes */\n" " p++;\n" " int width = 2;\n" " while (*p && *p != '\\\\') { width++; p++; }\n" " if (*p == '\\\\') p++;\n" " _busing_pos = p;\n" " int len = (int)strlen(val);\n" " for (int i = 0; i < width; i++)\n" " putchar(i < len ? val[i] : ' ');\n" " } else {\n" " _busing_pos = p;\n" " }\n" "}\n\n" "static _BUNUSED void _busing_end(void) {\n" " putchar('\\n');\n" " _busing_pos = _busing_fmt;\n" "}\n\n" ); // Only emit GOSUB stack if there are GOSUB sites, to avoid // -Wunused-variable warnings. if (gGosubCount > 0) { fprintf(gOut, "/* GOSUB return stack */\n" "#define _GOSUB_MAX %d\n" "static int _gosub_stack[_GOSUB_MAX];\n" "static int _gosub_sp = 0;\n\n", MAX_GOSUB_SITES ); } // File I/O runtime — debug vs release fprintf(gOut, "/* File I/O support */\n" "#define _BMAX_FILES 16\n" "static FILE *_bfiles[_BMAX_FILES] _BUNUSED = {0};\n" "static long _bfile_reclen[_BMAX_FILES] _BUNUSED = {0};\n\n" ); if (gRelease) { fprintf(gOut, "static _BUNUSED FILE *_bfile_get(int fnum) {\n" " return _bfiles[fnum];\n" "}\n\n" "static _BUNUSED void _bfile_open(int fnum, const char *fname, const char *mode) {\n" " if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n" " _bfiles[fnum] = fopen(fname, mode);\n" "}\n\n" "static _BUNUSED void _bfile_open_random(int fnum, const char *fname, long reclen) {\n" " if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n" " _bfiles[fnum] = fopen(fname, \"r+b\");\n" " if (!_bfiles[fnum]) _bfiles[fnum] = fopen(fname, \"w+b\");\n" " _bfile_reclen[fnum] = reclen;\n" "}\n\n" "static _BUNUSED void _bfile_close(int fnum) {\n" " if (_bfiles[fnum]) { fclose(_bfiles[fnum]); _bfiles[fnum] = NULL; }\n" "}\n\n" "static _BUNUSED int _beof(int fnum) {\n" " if (!_bfiles[fnum]) return -1;\n" " int c = fgetc(_bfiles[fnum]);\n" " if (c == EOF) return -1;\n" " ungetc(c, _bfiles[fnum]);\n" " return 0;\n" "}\n\n" "static _BUNUSED long _blof(int fnum) {\n" " if (!_bfiles[fnum]) return 0;\n" " long cur = ftell(_bfiles[fnum]);\n" " fseek(_bfiles[fnum], 0, SEEK_END);\n" " long sz = ftell(_bfiles[fnum]);\n" " fseek(_bfiles[fnum], cur, SEEK_SET);\n" " return sz;\n" "}\n\n" "static _BUNUSED int _bfreefile(void) {\n" " for (int i = 1; i < _BMAX_FILES; i++)\n" " if (!_bfiles[i]) return i;\n" " return 0;\n" "}\n\n" "static _BUNUSED void _bline_input(int fnum, char **dest) {\n" " if (!_bfiles[fnum]) return;\n" " char _buf[4096];\n" " if (fgets(_buf, sizeof(_buf), _bfiles[fnum])) {\n" " _buf[strcspn(_buf, \"\\r\\n\")] = 0;\n" " _bstr_assign(dest, _buf);\n" " }\n" "}\n\n" ); } else { fprintf(gOut, "static _BUNUSED FILE *_bfile_get(int fnum) {\n" " if (fnum < 1 || fnum >= _BMAX_FILES || !_bfiles[fnum]) {\n" " fprintf(stderr, \"Bad file number %%d\\n\", fnum);\n" " exit(1);\n" " }\n" " return _bfiles[fnum];\n" "}\n\n" "static _BUNUSED void _bfile_open(int fnum, const char *fname, const char *mode) {\n" " if (fnum < 1 || fnum >= _BMAX_FILES) {\n" " fprintf(stderr, \"File number %%d out of range\\n\", fnum);\n" " exit(1);\n" " }\n" " if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n" " _bfiles[fnum] = fopen(fname, mode);\n" " if (!_bfiles[fnum]) {\n" " fprintf(stderr, \"Cannot open '%%s'\\n\", fname);\n" " exit(1);\n" " }\n" "}\n\n" "static _BUNUSED void _bfile_open_random(int fnum, const char *fname, long reclen) {\n" " if (fnum < 1 || fnum >= _BMAX_FILES) {\n" " fprintf(stderr, \"File number %%d out of range\\n\", fnum);\n" " exit(1);\n" " }\n" " if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n" " _bfiles[fnum] = fopen(fname, \"r+b\");\n" " if (!_bfiles[fnum]) _bfiles[fnum] = fopen(fname, \"w+b\");\n" " if (!_bfiles[fnum]) {\n" " fprintf(stderr, \"Cannot open '%%s'\\n\", fname);\n" " exit(1);\n" " }\n" " _bfile_reclen[fnum] = reclen;\n" "}\n\n" "static _BUNUSED void _bfile_close(int fnum) {\n" " if (fnum >= 1 && fnum < _BMAX_FILES && _bfiles[fnum]) {\n" " fclose(_bfiles[fnum]);\n" " _bfiles[fnum] = NULL;\n" " }\n" "}\n\n" "static _BUNUSED int _beof(int fnum) {\n" " if (fnum < 1 || fnum >= _BMAX_FILES || !_bfiles[fnum]) return -1;\n" " int c = fgetc(_bfiles[fnum]);\n" " if (c == EOF) return -1;\n" " ungetc(c, _bfiles[fnum]);\n" " return 0;\n" "}\n\n" "static _BUNUSED long _blof(int fnum) {\n" " if (fnum < 1 || fnum >= _BMAX_FILES || !_bfiles[fnum]) return 0;\n" " long cur = ftell(_bfiles[fnum]);\n" " fseek(_bfiles[fnum], 0, SEEK_END);\n" " long sz = ftell(_bfiles[fnum]);\n" " fseek(_bfiles[fnum], cur, SEEK_SET);\n" " return sz;\n" "}\n\n" "static _BUNUSED int _bfreefile(void) {\n" " for (int i = 1; i < _BMAX_FILES; i++)\n" " if (!_bfiles[i]) return i;\n" " return 0;\n" "}\n\n" "static _BUNUSED void _bline_input(int fnum, char **dest) {\n" " char _buf[4096];\n" " if (fnum >= 1 && fnum < _BMAX_FILES && _bfiles[fnum] &&\n" " fgets(_buf, sizeof(_buf), _bfiles[fnum])) {\n" " _buf[strcspn(_buf, \"\\r\\n\")] = 0;\n" " _bstr_assign(dest, _buf);\n" " }\n" "}\n\n" ); } // DATA/READ support — same in both modes fprintf(gOut, "/* DATA/READ support */\n" "typedef struct { int is_str; double num; const char *str; } _BDataItem;\n\n" ); } // Main code generation: emit the full C source file from the AST static void generate(Node *prog) { // Emit the runtime library emitRuntime(); // Emit UDT struct definitions (packed for binary I/O compatibility) if (gUdtCount > 0) { fprintf(gOut, "/* User-defined types */\n"); fprintf(gOut, "#pragma pack(push, 1)\n"); for (int i = 0; i < gUdtCount; i++) { UdtDef *u = &gUdts[i]; fprintf(gOut, "struct _b_%s {\n", cleanName(u->name)); for (int j = 0; j < u->fieldCount; j++) { UdtField *f = &u->fields[j]; if (f->dataType == TYPE_STR && f->strLen > 0) { fprintf(gOut, " char %s[%d];\n", cleanName(f->name), f->strLen + 1); } else if (f->dataType == TYPE_UDT) { fprintf(gOut, " %s %s;\n", cUdtTypeStr(f->udtIndex), cleanName(f->name)); } else { fprintf(gOut, " %s %s;\n", cTypeStr(f->dataType), cleanName(f->name)); } } fprintf(gOut, "};\n"); } fprintf(gOut, "#pragma pack(pop)\n\n"); } // Collect all DATA nodes and emit the data pool Node *dataNodes[4096]; int dataNodeCount = 0; collectData(prog, dataNodes, &dataNodeCount, 4096); if (dataNodeCount > 0) { // Emit the data pool array fprintf(gOut, "/* DATA pool */\n"); fprintf(gOut, "static _BDataItem _bdata[] = {\n"); int totalItems = 0; gDataLineCount = 0; gDataLabelCount = 0; for (int di = 0; di < dataNodeCount; di++) { Node *dn = dataNodes[di]; // Record BASIC-line-number-to-index mapping for RESTORE. // dn->ival is set by collectData when DATA follows a numeric label. if (dn->ival != 0 && gDataLineCount < MAX_DATA_LINES) { gDataLineNums[gDataLineCount] = dn->ival; gDataLineIdxs[gDataLineCount] = totalItems; gDataLineCount++; } // Record named-label-to-index mapping for RESTORE. // dn->sval is set by collectData when DATA follows a named label. if (dn->sval && gDataLabelCount < MAX_DATA_LINES) { gDataLabelNames[gDataLabelCount] = dn->sval; gDataLabelIdxs[gDataLabelCount] = totalItems; gDataLabelCount++; } for (Node *item = dn->a; item; item = item->next) { if (item->dataType == TYPE_STR) { // Escape the string for C output fprintf(gOut, " {1, 0, \""); for (const char *p = item->sval; *p; p++) { if (*p == '"') fprintf(gOut, "\\\""); else if (*p == '\\') fprintf(gOut, "\\\\"); else fputc(*p, gOut); } fprintf(gOut, "\"},\n"); } else if (item->dataType == TYPE_DBL) { fprintf(gOut, " {0, %g, NULL},\n", item->dval); } else { fprintf(gOut, " {0, %d, NULL},\n", item->ival); } totalItems++; } } fprintf(gOut, "};\n"); fprintf(gOut, "static int _bdata_count _BUNUSED = %d;\n", totalItems); fprintf(gOut, "static int _bdata_pos _BUNUSED = 0;\n\n"); } else { // No DATA statements — emit empty placeholder fprintf(gOut, "static _BDataItem _bdata[] _BUNUSED = {{0,0,NULL}};\n"); fprintf(gOut, "static int _bdata_count _BUNUSED = 0;\n"); fprintf(gOut, "static int _bdata_pos _BUNUSED = 0;\n\n"); } // Collect all SUB/FUNCTION definitions Node *funcs[256]; int funcCount = 0; collectFuncs(prog, funcs, &funcCount, 256); // Emit forward declarations for SUBs and FUNCTIONs if (funcCount > 0) { fprintf(gOut, "/* Forward declarations */\n"); for (int i = 0; i < funcCount; i++) { Node *f = funcs[i]; int isFunc = (f->type == NODE_FUNC); DataType ret = isFunc ? f->dataType : TYPE_VOID; fprintf(gOut, "%s %s(", cTypeStr(ret), cleanName(f->sval)); int first = 1; for (Node *p = f->a; p; p = p->next) { if (!first) fprintf(gOut, ", "); first = 0; if (p->ival == PASS_BYREF) fprintf(gOut, "%s*", cTypeStr(p->dataType)); else if (p->dataType == TYPE_STR) fprintf(gOut, "const char*"); else fprintf(gOut, "%s", cTypeStr(p->dataType)); } if (first) fprintf(gOut, "void"); fprintf(gOut, ");\n"); } fprintf(gOut, "\n"); } // Emit SUB/FUNCTION implementations for (int i = 0; i < funcCount; i++) { genFuncDef(funcs[i]); } // Emit main() with global (non-function) statements fprintf(gOut, "/* Main program */\n"); fprintf(gOut, "int main(void) {\n"); gIndent = 1; // Walk the top-level block and emit non-function statements Node *blk = (prog->type == NODE_PROGRAM) ? prog->a : prog; Node *s = (blk && blk->type == NODE_BLOCK) ? blk->a : blk; while (s) { // Skip SUB/FUNCTION definitions (already emitted above) if (s->type != NODE_SUB && s->type != NODE_FUNC) { genStmt(s); } s = s->next; } emit("return 0;\n"); gIndent = 0; fprintf(gOut, "}\n"); } // ----------------------------------------------------------------------- // Section 9: Main Entry Point // ----------------------------------------------------------------------- // Read an entire file into a malloc'd buffer. Returns NULL on failure. static char *readFile(const char *path) { FILE *f = fopen(path, "rb"); if (!f) return NULL; if (fseek(f, 0, SEEK_END) != 0) { fclose(f); return NULL; } long len = ftell(f); if (len < 0) { fclose(f); return NULL; } if (len > (long)((unsigned)-1 >> 1)) { // File too large for int-based gSrcLen fclose(f); return NULL; } rewind(f); char *buf = (char *)malloc((size_t)len + 1); if (!buf) { fclose(f); return NULL; } size_t nread = fread(buf, 1, (size_t)len, f); buf[nread] = '\0'; fclose(f); return buf; } // ----------------------------------------------------------------------- // $INCLUDE preprocessor // ----------------------------------------------------------------------- // Extract directory part of a file path (returns malloc'd string) static char *dirName(const char *path) { const char *last = strrchr(path, '/'); if (!last) return strdup("."); size_t len = (size_t)(last - path); char *dir = (char *)malloc(len + 1); memcpy(dir, path, len); dir[len] = '\0'; return dir; } // Join directory and filename (returns malloc'd string) static char *pathJoin(const char *dir, const char *file) { // If file is absolute, return copy of file if (file[0] == '/') return strdup(file); size_t dlen = strlen(dir); size_t flen = strlen(file); char *result = (char *)malloc(dlen + 1 + flen + 1); memcpy(result, dir, dlen); result[dlen] = '/'; memcpy(result + dlen + 1, file, flen); result[dlen + 1 + flen] = '\0'; return result; } // Growing buffer for source assembly typedef struct { char *data; size_t len; size_t cap; } SourceBuf; static void sbInit(SourceBuf *sb) { sb->cap = 4096; sb->data = (char *)malloc(sb->cap); sb->len = 0; sb->data[0] = '\0'; } static void sbAppend(SourceBuf *sb, const char *s, size_t n) { while (sb->len + n + 1 > sb->cap) { sb->cap *= 2; sb->data = (char *)realloc(sb->data, sb->cap); } memcpy(sb->data + sb->len, s, n); sb->len += n; sb->data[sb->len] = '\0'; } // Case-insensitive prefix check static int strNIcmp(const char *a, const char *b, size_t n) { for (size_t i = 0; i < n; i++) { int ca = toupper((unsigned char)a[i]); int cb = toupper((unsigned char)b[i]); if (ca != cb) return ca - cb; if (ca == 0) return 0; } return 0; } // Process a source file, expanding $INCLUDE directives. // Appends to the SourceBuf and gLineMap. static void preprocessFile(const char *filePath, SourceBuf *sb, const char **includeStack, int includeDepth) { // Check depth if (includeDepth >= MAX_INCLUDE_DEPTH) { fprintf(stderr, "Error: $INCLUDE nested too deeply (max %d) at '%s'\n", MAX_INCLUDE_DEPTH, filePath); exit(1); } // Check circular includes for (int i = 0; i < includeDepth; i++) { if (strcmp(includeStack[i], filePath) == 0) { fprintf(stderr, "Error: Circular $INCLUDE detected: '%s'\n", filePath); exit(1); } } // Read file char *text = readFile(filePath); if (!text) { fprintf(stderr, "Error: Cannot open '%s'", filePath); if (includeDepth > 0) fprintf(stderr, " (included from '%s')", includeStack[includeDepth - 1]); fprintf(stderr, "\n"); exit(1); } const char *fname = internFileName(filePath); char *baseDir = dirName(filePath); // Push onto include stack includeStack[includeDepth] = filePath; // Process line by line const char *p = text; int origLine = 0; while (*p) { origLine++; // Find end of line const char *lineStart = p; while (*p && *p != '\n') p++; size_t lineLen = (size_t)(p - lineStart); if (*p == '\n') p++; // consume newline // Check for '$INCLUDE: directive // Format: '$INCLUDE: 'filename' // Leading spaces are allowed before the ' const char *s = lineStart; while (s < lineStart + lineLen && (*s == ' ' || *s == '\t')) s++; int isInclude = 0; char incFile[MAX_TOKEN_LEN] = {0}; // Check for ' (comment start) followed by $INCLUDE: if (s < lineStart + lineLen && *s == '\'') { s++; // skip ' // Skip optional spaces between ' and $ while (s < lineStart + lineLen && (*s == ' ' || *s == '\t')) s++; if (s + 9 <= lineStart + lineLen && strNIcmp(s, "$INCLUDE:", 9) == 0) { s += 9; // Skip spaces while (s < lineStart + lineLen && (*s == ' ' || *s == '\t')) s++; // Extract filename between single quotes if (s < lineStart + lineLen && *s == '\'') { s++; const char *fnStart = s; while (s < lineStart + lineLen && *s != '\'') s++; if (s > fnStart && s < lineStart + lineLen) { size_t fnLen = (size_t)(s - fnStart); if (fnLen < MAX_TOKEN_LEN) { memcpy(incFile, fnStart, fnLen); incFile[fnLen] = '\0'; isInclude = 1; } } } } } if (isInclude) { // Resolve path relative to current file's directory char *resolvedPath = pathJoin(baseDir, incFile); preprocessFile(resolvedPath, sb, includeStack, includeDepth + 1); free(resolvedPath); } else { // Record line map entry if (gLineMapCount < MAX_SOURCE_LINES) { gLineMap[gLineMapCount].fileName = fname; gLineMap[gLineMapCount].origLine = origLine; gLineMapCount++; } // Append line (with newline) sbAppend(sb, lineStart, lineLen); sbAppend(sb, "\n", 1); } } free(baseDir); free(text); } // Top-level preprocessor entry point static char *preprocessSource(const char *filePath) { SourceBuf sb; sbInit(&sb); const char *includeStack[MAX_INCLUDE_DEPTH]; preprocessFile(filePath, &sb, includeStack, 0); return sb.data; } int main(int argc, char **argv) { // Check for --release / -r flag int argi = 1; if (argc > 1 && (strcmp(argv[1], "--release") == 0 || strcmp(argv[1], "-r") == 0)) { gRelease = 1; argi++; } if (argi >= argc) { fprintf(stderr, "Usage: basic2c [--release|-r] input.bas [output.c]\n"); fprintf(stderr, "External functions can be defined in functions.def\n"); return 1; } // Load external function definitions from functions.def in binary's directory { const char *binPath = argv[0]; const char *lastSlash = strrchr(binPath, '/'); if (lastSlash) { size_t dirLen = lastSlash - binPath + 1; char *defPath = malloc(dirLen + 14); // "functions.def" + null memcpy(defPath, binPath, dirLen); strcpy(defPath + dirLen, "functions.def"); loadExternFuncs(defPath); free(defPath); } else { // Binary in current directory or bare name - try current directory loadExternFuncs("functions.def"); } } // Also load from input file's directory (may add more or override) { const char *inputPath = argv[argi]; const char *lastSlash = strrchr(inputPath, '/'); if (lastSlash) { size_t dirLen = lastSlash - inputPath + 1; char *defPath = malloc(dirLen + 14); // "functions.def" + null memcpy(defPath, inputPath, dirLen); strcpy(defPath + dirLen, "functions.def"); loadExternFuncs(defPath); free(defPath); } // Don't load from current dir again if binary was there } // Read and preprocess source file (expands $INCLUDE directives) char *source = preprocessSource(argv[argi]); // Open output file (or stdout) if (argi + 1 < argc) { gOut = fopen(argv[argi + 1], "w"); if (!gOut) { fprintf(stderr, "Error: Cannot create '%s'\n", argv[argi + 1]); free(source); return 1; } } else { gOut = stdout; } // Initialize lexer state gSrc = source; gSrcPos = 0; size_t slen = strlen(source); if (slen > (size_t)((unsigned)-1 >> 1)) { fprintf(stderr, "Error: Source file too large (%zu bytes)\n", slen); free(source); return 1; } gSrcLen = (int)slen; gLine = 1; // Parse the BASIC source into an AST Node *program = parseProgram(); // Generate C code from the AST generate(program); // Cleanup if (gOut != stdout) fclose(gOut); free(source); return 0; }