5571 lines
182 KiB
C
5571 lines
182 KiB
C
// ============================================================================
|
||
// basic2c.c - A BASIC to C Transpiler
|
||
//
|
||
// Translates BASIC source code into equivalent C source code.
|
||
//
|
||
// Supported features:
|
||
// - Classic line-numbered BASIC and named labels (GOTO, GOSUB/RETURN)
|
||
// - Modern structured BASIC (SUB, FUNCTION, IF/END IF, etc.)
|
||
// - Data types: BYTE, INTEGER, LONG, FLOAT, DOUBLE, STRING
|
||
// - User-defined types (TYPE/END TYPE) with nesting and SIZEOF
|
||
// - Dynamic arrays (DIM, REDIM), multidimensional (up to N-D)
|
||
// - Parameter passing: BYVAL (by value) and BYREF (by reference)
|
||
// - LOCAL and STATIC variable declarations inside SUB/FUNCTION
|
||
// - Constants (CONST) with compile-time substitution
|
||
// - Control flow: IF/ELSEIF/ELSE, FOR/NEXT, WHILE/WEND, DO/LOOP,
|
||
// SELECT CASE, ON GOTO, ON GOSUB, EXIT, CONTINUE
|
||
// - PRINT statement with ? shortcut, PRINT USING for formatted output
|
||
// - Operators: arithmetic, comparison, string concatenation (+, &),
|
||
// bitwise/logical AND, OR, NOT, XOR
|
||
// - DATA/READ/RESTORE for inline data
|
||
// - File I/O: OPEN/CLOSE, PRINT #, INPUT #, LINE INPUT #, WRITE #
|
||
// - Random-access file I/O: GET, PUT with record numbers
|
||
// - String functions: LEN, MID$, LEFT$, RIGHT$, STR$, VAL, CHR$,
|
||
// ASC, UCASE$, LCASE$, INSTR, STRING$, LTRIM$, RTRIM$, TRIM$,
|
||
// SPACE$, HEX$, OCT$, MID$ assignment
|
||
// - Print functions: TAB, SPC for cursor positioning
|
||
// - Math functions: ABS, INT, SQR, SIN, COS, TAN, ATN, LOG, EXP,
|
||
// SGN, RND (optional argument ignored), RANDOMIZE
|
||
// - Array functions: LBOUND, UBOUND
|
||
// - I/O functions: EOF, LOF, FREEFILE
|
||
// - SWAP for exchanging variable values
|
||
// - $INCLUDE metacommand for file inclusion with nested include
|
||
// support, circular detection, and file+line error reporting
|
||
// - Extensible built-in functions via builtins.def (compile-time)
|
||
// - External function definitions via functions.def (runtime)
|
||
// - Debug and release runtime modes (--release or -r flag)
|
||
//
|
||
// Usage: basic2c [--release|-r] input.bas [output.c]
|
||
// If output.c is omitted, C code is written to stdout.
|
||
//
|
||
// Build: cc -o basic2c basic2c.c -lm
|
||
//
|
||
// Architecture:
|
||
// 1. Preprocessor - processes $INCLUDE directives, builds line map
|
||
// 2. Lexer - tokenizes BASIC source (case-insensitive keywords)
|
||
// 3. Parser - recursive descent, builds an AST
|
||
// 4. Codegen - walks AST, emits C source with a small runtime library
|
||
// ============================================================================
|
||
|
||
#include <stdio.h>
|
||
#include <stdlib.h>
|
||
#include <string.h>
|
||
#include <ctype.h>
|
||
#include <stdarg.h>
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Section 1: Constants and Limits
|
||
// -----------------------------------------------------------------------
|
||
#define MAX_TOKEN_LEN 4096 // max length of one token string
|
||
#define MAX_IDENT 128 // max identifier length
|
||
#define MAX_PARAMS 32 // max parameters per SUB/FUNCTION
|
||
#define MAX_SYMBOLS 2048 // symbol table capacity
|
||
#define MAX_GOSUB_SITES 512 // max GOSUB return-point IDs
|
||
#define MAX_LINE_LABELS 4096 // max classic line-number labels
|
||
#define MAX_NODES 65536 // AST node pool size
|
||
#define MAX_ARGS 64 // max arguments in a PRINT / CALL list
|
||
#define MAX_SOURCE_LINES 65536 // max lines in preprocessed source
|
||
#define MAX_INCLUDE_DEPTH 16 // max nested $INCLUDE depth
|
||
#define MAX_INCLUDE_FILES 64 // max distinct included filenames
|
||
#define MAX_EXTERN_FUNCS 128 // max external function definitions
|
||
#define MAX_EXTERN_CODE 256 // max C code template length
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Section 2: Enumerations
|
||
// -----------------------------------------------------------------------
|
||
|
||
// Token types produced by the lexer
|
||
typedef enum {
|
||
TOK_EOF = 0,
|
||
TOK_NEWLINE, // end of line (statement separator)
|
||
TOK_COLON, // : (statement separator on same line)
|
||
TOK_INT_LIT, // integer literal
|
||
TOK_DBL_LIT, // floating-point literal
|
||
TOK_STR_LIT, // "..." string literal
|
||
TOK_IDENT, // identifier (variable / sub / function name)
|
||
|
||
// ---------- keywords ----------
|
||
TOK_DIM, TOK_REDIM, TOK_AS,
|
||
TOK_BYTE, TOK_INTEGER, TOK_LONG, TOK_FLOAT, TOK_DOUBLE, TOK_STRING,
|
||
TOK_LET, TOK_PRINT, TOK_INPUT,
|
||
TOK_IF, TOK_THEN, TOK_ELSE, TOK_ELSEIF, TOK_END,
|
||
TOK_FOR, TOK_TO, TOK_STEP, TOK_NEXT,
|
||
TOK_WHILE, TOK_WEND,
|
||
TOK_DO, TOK_LOOP, TOK_UNTIL,
|
||
TOK_GOTO, TOK_GOSUB, TOK_RETURN,
|
||
TOK_SUB, TOK_FUNCTION, TOK_CALL,
|
||
TOK_BYVAL, TOK_BYREF,
|
||
TOK_LOCAL, TOK_STATIC,
|
||
TOK_EXIT,
|
||
TOK_AND, TOK_OR, TOK_NOT, TOK_MOD, TOK_XOR,
|
||
TOK_SELECT, TOK_CASE, TOK_SWAP, TOK_CONST, TOK_ON,
|
||
TOK_REM,
|
||
TOK_OPEN, TOK_CLOSE, TOK_OUTPUT, TOK_APPEND, TOK_BINARY,
|
||
TOK_LINE, TOK_WRITE,
|
||
// DATA, READ, RESTORE, GET, PUT, RANDOM, SIZEOF are contextual
|
||
// keywords (checked as TOK_IDENT to avoid colliding with variable names)
|
||
|
||
// ---------- operators / punctuation ----------
|
||
TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_BSLASH,
|
||
TOK_CARET,
|
||
TOK_EQ, TOK_NE, TOK_LT, TOK_GT, TOK_LE, TOK_GE,
|
||
TOK_LPAREN, TOK_RPAREN, TOK_COMMA, TOK_SEMI,
|
||
TOK_AMP, // & string concatenation
|
||
TOK_HASH, // # file number prefix
|
||
TOK_DOT, // . member access
|
||
TOK_TYPE // TYPE keyword (user-defined types)
|
||
} TokenType;
|
||
|
||
// AST node kinds
|
||
typedef enum {
|
||
NODE_PROGRAM, // root: a = first top-level item (linked->next)
|
||
NODE_BLOCK, // block of statements: a = first stmt
|
||
NODE_INT_LIT, // ival = value
|
||
NODE_DBL_LIT, // dval = value
|
||
NODE_STR_LIT, // sval = string content
|
||
NODE_IDENT, // sval = name
|
||
NODE_ARRAY_REF, // sval = name, a = index exprs (linked list)
|
||
NODE_BINOP, // ival = op token, a = left, b = right
|
||
NODE_UNOP, // ival = op token, a = operand
|
||
NODE_ASSIGN, // a = target (IDENT/ARRAY_REF), b = value
|
||
NODE_DIM, // sval=name, dataType, a=sizes (list), ival=ndims
|
||
NODE_REDIM, // sval=name, dataType, a=sizes (list), ival=ndims
|
||
NODE_PRINT, // a = first print-item (linked->next)
|
||
NODE_PRINT_ITEM, // a = expr, ival = separator after (';'=1,','=2)
|
||
NODE_PRINT_USING, // a = format expr, b = value list (linked->next)
|
||
NODE_INPUT, // sval = prompt (or NULL), a = first var ->next
|
||
NODE_IF, // a=cond, b=then-block, c=else-part
|
||
NODE_FOR, // sval=var, a=start, b=end, c=step, d=body
|
||
NODE_WHILE, // a=cond, b=body
|
||
NODE_DO_LOOP, // a=cond, b=body, ival bits: 1=UNTIL,2=bottom
|
||
NODE_GOTO, // ival=line# or sval=label
|
||
NODE_GOSUB, // ival=line#, ival2=return-point-id
|
||
NODE_RETURN, // a=expr (FUNCTION return) or NULL
|
||
NODE_LABEL, // ival=line number
|
||
NODE_SUB, // sval=name, a=param list, b=body
|
||
NODE_FUNC, // sval=name, a=params, b=body, dataType=ret
|
||
NODE_PARAM, // sval=name, dataType, ival=passMode
|
||
NODE_CALL, // sval=name, a=arg list (linked->next)
|
||
NODE_FUNC_CALL, // sval=name, a=arg list (linked->next)
|
||
NODE_EXIT, // ival=what (TOK_FOR,TOK_WHILE,TOK_DO,etc)
|
||
NODE_CONTINUE, // ival=what (TOK_FOR,TOK_WHILE,TOK_DO)
|
||
NODE_LOCAL, // sval=name, dataType
|
||
NODE_STATIC, // sval=name, dataType
|
||
NODE_END, // END statement
|
||
NODE_OPEN, // a=filename, b=file# expr, ival=mode
|
||
NODE_CLOSE, // b=file# expr
|
||
NODE_FILE_PRINT, // b=file# expr, a=print items
|
||
NODE_FILE_INPUT, // b=file# expr, a=variable list
|
||
NODE_LINE_INPUT, // b=file# expr, a=target variable
|
||
NODE_FILE_WRITE, // b=file# expr, a=expression list
|
||
NODE_DATA, // a=linked list of literal items, line=source ln
|
||
NODE_READ, // a=linked list of NODE_IDENT vars to read into
|
||
NODE_RESTORE, // ival=target line number (0=beginning)
|
||
NODE_TYPE_DEF, // sval=type name, ival=udtIndex
|
||
NODE_DOT_ACCESS, // a=base expr, sval=field name, ival2=udtIndex
|
||
NODE_GET, // a=file# expr, b=record# expr, c=var
|
||
NODE_PUT, // a=file# expr, b=record# expr, c=var
|
||
NODE_SELECT, // a=test expr, b=first NODE_CASE (linked->next)
|
||
NODE_CASE, // a=value exprs (linked), b=body block, ival=flags
|
||
NODE_SWAP, // a=first var, b=second var
|
||
NODE_CONST_DECL, // sval=name, a=value expr
|
||
NODE_RANDOMIZE, // a=seed expr (or NULL)
|
||
NODE_ON_GOTO, // a=expr, b=label list (NODE_INT_LIT/NODE_IDENT)
|
||
NODE_ON_GOSUB, // a=expr, b=label list, ival2=first return-point-id
|
||
NODE_MID_ASSIGN // a=target string var, b=start, c=len, d=replacement
|
||
} NodeType;
|
||
|
||
// BASIC data types – ordered by numeric promotion rank so that
|
||
// promoteType() can simply take the maximum of two types.
|
||
typedef enum {
|
||
TYPE_VOID = 0, // used for SUB (no return value)
|
||
TYPE_BYTE, // BYTE -> uint8_t
|
||
TYPE_INT, // INTEGER -> int16_t
|
||
TYPE_LONG, // LONG -> int32_t
|
||
TYPE_FLOAT, // FLOAT -> float
|
||
TYPE_DBL, // DOUBLE -> double
|
||
TYPE_STR, // STRING -> char*
|
||
TYPE_UDT // user-defined TYPE -> struct
|
||
} DataType;
|
||
|
||
// Parameter passing modes
|
||
typedef enum {
|
||
PASS_BYVAL = 0,
|
||
PASS_BYREF = 1
|
||
} PassMode;
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Section 3: Data Structures
|
||
// -----------------------------------------------------------------------
|
||
|
||
// A single token from the lexer
|
||
typedef struct {
|
||
TokenType type;
|
||
int line; // source line where token appears
|
||
int ival; // integer value (for TOK_INT_LIT)
|
||
double dval; // double value (for TOK_DBL_LIT)
|
||
char sval[MAX_TOKEN_LEN]; // string payload
|
||
} Token;
|
||
|
||
// AST node – compact tagged structure.
|
||
// Child pointers a,b,c,d have node-type-specific meanings (see enum).
|
||
// The 'next' pointer chains siblings (statement lists, param lists).
|
||
typedef struct Node {
|
||
NodeType type;
|
||
DataType dataType; // expression result type / decl type
|
||
int ival; // multi-purpose int (operator, flags)
|
||
int ival2; // secondary int (e.g. gosub return id)
|
||
double dval; // double literal value
|
||
char *sval; // identifier name / string literal
|
||
struct Node *a, *b, *c, *d;// child pointers
|
||
struct Node *next; // next sibling in a list
|
||
int line; // source line for error messages
|
||
} Node;
|
||
|
||
// Symbol table entry – tracks variables, arrays, subs, functions
|
||
typedef struct {
|
||
char name[MAX_IDENT];
|
||
DataType dataType;
|
||
int isArray; // 1 if dynamic array
|
||
int ndims; // number of dimensions (0=scalar)
|
||
int isFunc; // 1 = FUNCTION, 2 = SUB
|
||
int paramCount;
|
||
DataType paramTypes[MAX_PARAMS];
|
||
PassMode paramModes[MAX_PARAMS];
|
||
char paramNames[MAX_PARAMS][MAX_IDENT];
|
||
DataType returnType; // for functions
|
||
int udtIndex; // index into gUdts[] for TYPE_UDT
|
||
} Symbol;
|
||
|
||
// User-defined type (UDT) support
|
||
#define MAX_UDTS 64
|
||
#define MAX_UDT_FIELDS 32
|
||
|
||
typedef struct {
|
||
char name[MAX_IDENT];
|
||
DataType dataType;
|
||
int strLen; // >0 for STRING * N (fixed-length)
|
||
int udtIndex; // index into gUdts[] if TYPE_UDT
|
||
} UdtField;
|
||
|
||
typedef struct {
|
||
char name[MAX_IDENT];
|
||
UdtField fields[MAX_UDT_FIELDS];
|
||
int fieldCount;
|
||
} UdtDef;
|
||
|
||
static UdtDef gUdts[MAX_UDTS];
|
||
static int gUdtCount = 0;
|
||
static int gLastUdtIndex = -1; // side-channel from parseType()
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Section 4: Global State
|
||
// -----------------------------------------------------------------------
|
||
|
||
// Runtime mode: 0=debug (with error checks), 1=release (minimal)
|
||
static int gRelease = 0;
|
||
|
||
// Line map: maps merged-source line numbers to original file + line
|
||
typedef struct {
|
||
const char *fileName; // interned filename pointer
|
||
int origLine; // 1-based line in original file
|
||
} LineMapEntry;
|
||
|
||
static LineMapEntry gLineMap[MAX_SOURCE_LINES];
|
||
static int gLineMapCount = 0;
|
||
|
||
// Interned filename pool
|
||
static char *gFileNames[MAX_INCLUDE_FILES];
|
||
static int gFileNameCount = 0;
|
||
|
||
static const char *internFileName(const char *name) {
|
||
for (int i = 0; i < gFileNameCount; i++)
|
||
if (strcmp(gFileNames[i], name) == 0) return gFileNames[i];
|
||
if (gFileNameCount >= MAX_INCLUDE_FILES) {
|
||
fprintf(stderr, "Too many include files (max %d)\n", MAX_INCLUDE_FILES);
|
||
exit(1);
|
||
}
|
||
gFileNames[gFileNameCount] = strdup(name);
|
||
return gFileNames[gFileNameCount++];
|
||
}
|
||
|
||
// Source code
|
||
static const char *gSrc = NULL; // source text
|
||
static int gSrcPos = 0; // current read position
|
||
static int gSrcLen = 0; // total source length
|
||
static int gLine = 1; // current source line number
|
||
|
||
// Current and peek tokens for the recursive-descent parser
|
||
static Token gTok; // current token
|
||
|
||
// AST node pool – simple bump allocator (nodes live until exit)
|
||
static Node gNodePool[MAX_NODES];
|
||
static int gNodeCount = 0;
|
||
|
||
// Symbol table
|
||
static Symbol gSyms[MAX_SYMBOLS];
|
||
static int gSymCount = 0;
|
||
|
||
// GOSUB bookkeeping: count of GOSUB sites for generating return switch
|
||
static int gGosubCount = 0;
|
||
|
||
// Collected line-number labels for the RETURN dispatch table
|
||
static int gLineLabels[MAX_LINE_LABELS];
|
||
static int gLineLabelCount = 0;
|
||
|
||
// Line numbers that are actually targeted by GOTO or GOSUB.
|
||
// Only these need C labels emitted to avoid -Wunused-label.
|
||
static int gGotoTargets[MAX_LINE_LABELS];
|
||
static int gGotoTargetCount = 0;
|
||
|
||
// Named (string) labels targeted by GOTO or GOSUB
|
||
static char *gGotoStrTargets[MAX_LINE_LABELS];
|
||
static int gGotoStrTargetCount = 0;
|
||
|
||
// Compile-time constant table (for CONST declarations)
|
||
#define MAX_CONSTS 256
|
||
typedef struct {
|
||
char name[MAX_IDENT];
|
||
DataType dataType;
|
||
double numVal;
|
||
char strVal[MAX_TOKEN_LEN];
|
||
} ConstDef;
|
||
static ConstDef gConsts[MAX_CONSTS];
|
||
static int gConstCount = 0;
|
||
|
||
// External function definitions (loaded from functions.def)
|
||
typedef struct {
|
||
char name[MAX_IDENT]; // BASIC function name (e.g., "CEIL")
|
||
DataType returnType; // return type
|
||
char cCode[MAX_EXTERN_CODE]; // C code template (% = arg, %1 %2 = numbered)
|
||
} ExternFunc;
|
||
static ExternFunc gExternFuncs[MAX_EXTERN_FUNCS];
|
||
static int gExternFuncCount = 0;
|
||
|
||
// Built-in function definitions (from builtins.def at compile time)
|
||
typedef struct {
|
||
const char *name;
|
||
DataType returnType;
|
||
const char *cCode;
|
||
} BuiltinDef;
|
||
|
||
#define BUILTIN(n, t, c) {n, t, c},
|
||
static const BuiltinDef gBuiltinDefs[] = {
|
||
#include "builtins.def"
|
||
{NULL, 0, NULL} // sentinel
|
||
};
|
||
#undef BUILTIN
|
||
|
||
// Code-generator state
|
||
static int gIndent = 0; // current indentation depth
|
||
static FILE *gOut = NULL; // output file handle
|
||
|
||
// Track whether we are inside a SUB/FUNCTION (for scope)
|
||
static int gInFunc = 0;
|
||
static const char *gFuncName = NULL; // current function name
|
||
static DataType gFuncRet = TYPE_VOID;
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Section 5: Utility Functions
|
||
// -----------------------------------------------------------------------
|
||
|
||
// Report a fatal error with source file/line and exit
|
||
static void fatal(int line, const char *fmt, ...) {
|
||
va_list ap;
|
||
if (line > 0 && line <= gLineMapCount) {
|
||
LineMapEntry *e = &gLineMap[line - 1];
|
||
fprintf(stderr, "Error (%s:%d): ", e->fileName, e->origLine);
|
||
} else {
|
||
fprintf(stderr, "Error (line %d): ", line);
|
||
}
|
||
va_start(ap, fmt);
|
||
vfprintf(stderr, fmt, ap);
|
||
va_end(ap);
|
||
fprintf(stderr, "\n");
|
||
exit(1);
|
||
}
|
||
|
||
|
||
// Allocate a new AST node from the pool
|
||
static Node *newNode(NodeType type, int line) {
|
||
if (gNodeCount >= MAX_NODES)
|
||
fatal(line, "AST node pool exhausted (max %d)", MAX_NODES);
|
||
Node *n = &gNodePool[gNodeCount++];
|
||
memset(n, 0, sizeof(*n));
|
||
n->type = type;
|
||
n->line = line;
|
||
return n;
|
||
}
|
||
|
||
|
||
// Duplicate a string into heap memory
|
||
static char *strDup(const char *s) {
|
||
if (!s) return NULL;
|
||
char *d = malloc(strlen(s) + 1);
|
||
if (!d) { fprintf(stderr, "Out of memory\n"); exit(1); }
|
||
strcpy(d, s);
|
||
return d;
|
||
}
|
||
|
||
|
||
// Case-insensitive string comparison
|
||
static int strIcmp(const char *a, const char *b) {
|
||
if (!a || !b) return (a != b);
|
||
while (*a && *b) {
|
||
if (toupper((unsigned char)*a) != toupper((unsigned char)*b))
|
||
return 1;
|
||
a++; b++;
|
||
}
|
||
return *a != *b;
|
||
}
|
||
|
||
|
||
// Look up an external function by name; returns pointer or NULL
|
||
static ExternFunc *externFuncLookup(const char *name) {
|
||
for (int i = 0; i < gExternFuncCount; i++)
|
||
if (strIcmp(gExternFuncs[i].name, name) == 0)
|
||
return &gExternFuncs[i];
|
||
return NULL;
|
||
}
|
||
|
||
|
||
// Look up a built-in function definition by name; returns pointer or NULL
|
||
static const BuiltinDef *builtinDefLookup(const char *name) {
|
||
for (int i = 0; gBuiltinDefs[i].name; i++)
|
||
if (strIcmp(gBuiltinDefs[i].name, name) == 0)
|
||
return &gBuiltinDefs[i];
|
||
return NULL;
|
||
}
|
||
|
||
|
||
// Parse a type name from definition file
|
||
static DataType parseTypeName(const char *s) {
|
||
while (*s == ' ') s++;
|
||
if (strIcmp(s, "byte") == 0) return TYPE_BYTE;
|
||
if (strIcmp(s, "integer") == 0) return TYPE_INT;
|
||
if (strIcmp(s, "long") == 0) return TYPE_LONG;
|
||
if (strIcmp(s, "float") == 0) return TYPE_FLOAT;
|
||
if (strIcmp(s, "double") == 0) return TYPE_DBL;
|
||
if (strIcmp(s, "string") == 0) return TYPE_STR;
|
||
return TYPE_DBL; // default
|
||
}
|
||
|
||
|
||
// Load external function definitions from a file
|
||
// Format: name : type : c_code
|
||
// Lines starting with # are comments, blank lines ignored
|
||
static void loadExternFuncs(const char *filename) {
|
||
FILE *f = fopen(filename, "r");
|
||
if (!f) return; // file not found is OK, just no external funcs
|
||
|
||
char line[512];
|
||
while (fgets(line, sizeof(line), f)) {
|
||
// Skip comments and blank lines
|
||
char *p = line;
|
||
while (*p == ' ' || *p == '\t') p++;
|
||
if (*p == '#' || *p == '\n' || *p == '\0') continue;
|
||
|
||
// Parse: name : type : c_code
|
||
char *colon1 = strchr(p, ':');
|
||
if (!colon1) continue;
|
||
char *colon2 = strchr(colon1 + 1, ':');
|
||
if (!colon2) continue;
|
||
|
||
if (gExternFuncCount >= MAX_EXTERN_FUNCS) {
|
||
fprintf(stderr, "Warning: too many external functions, ignoring rest\n");
|
||
break;
|
||
}
|
||
|
||
ExternFunc *ef = &gExternFuncs[gExternFuncCount];
|
||
|
||
// Extract name (trim whitespace)
|
||
*colon1 = '\0';
|
||
char *name = p;
|
||
while (*name == ' ' || *name == '\t') name++;
|
||
char *nameEnd = colon1 - 1;
|
||
while (nameEnd > name && (*nameEnd == ' ' || *nameEnd == '\t')) nameEnd--;
|
||
nameEnd[1] = '\0';
|
||
strncpy(ef->name, name, MAX_IDENT - 1);
|
||
ef->name[MAX_IDENT - 1] = '\0';
|
||
|
||
// Extract type
|
||
*colon2 = '\0';
|
||
char *typeStr = colon1 + 1;
|
||
while (*typeStr == ' ' || *typeStr == '\t') typeStr++;
|
||
char *typeEnd = colon2 - 1;
|
||
while (typeEnd > typeStr && (*typeEnd == ' ' || *typeEnd == '\t')) typeEnd--;
|
||
typeEnd[1] = '\0';
|
||
ef->returnType = parseTypeName(typeStr);
|
||
|
||
// Extract C code template (trim leading whitespace and trailing newline)
|
||
char *code = colon2 + 1;
|
||
while (*code == ' ' || *code == '\t') code++;
|
||
size_t codeLen = strlen(code);
|
||
while (codeLen > 0 && (code[codeLen-1] == '\n' || code[codeLen-1] == '\r' ||
|
||
code[codeLen-1] == ' ' || code[codeLen-1] == '\t'))
|
||
codeLen--;
|
||
if (codeLen >= MAX_EXTERN_CODE) codeLen = MAX_EXTERN_CODE - 1;
|
||
strncpy(ef->cCode, code, codeLen);
|
||
ef->cCode[codeLen] = '\0';
|
||
|
||
gExternFuncCount++;
|
||
}
|
||
|
||
fclose(f);
|
||
}
|
||
|
||
|
||
// Look up a symbol by name; returns pointer to entry or NULL
|
||
static Symbol *symLookup(const char *name) {
|
||
for (int i = 0; i < gSymCount; i++)
|
||
if (strIcmp(gSyms[i].name, name) == 0)
|
||
return &gSyms[i];
|
||
return NULL;
|
||
}
|
||
|
||
|
||
// Forward declaration
|
||
static int isKeyword(const char *name);
|
||
|
||
static Symbol *symAdd(const char *name) {
|
||
if (isKeyword(name))
|
||
fatal(gLine, "Cannot use keyword '%s' as identifier", name);
|
||
Symbol *s = symLookup(name);
|
||
if (s) return s;
|
||
if (gSymCount >= MAX_SYMBOLS)
|
||
fatal(gLine, "Symbol table full");
|
||
s = &gSyms[gSymCount++];
|
||
memset(s, 0, sizeof(*s));
|
||
strncpy(s->name, name, MAX_IDENT - 1);
|
||
return s;
|
||
}
|
||
|
||
|
||
// Look up a user-defined type by name; returns index or -1
|
||
static int udtLookup(const char *name) {
|
||
for (int i = 0; i < gUdtCount; i++)
|
||
if (strIcmp(gUdts[i].name, name) == 0)
|
||
return i;
|
||
return -1;
|
||
}
|
||
|
||
|
||
// Look up a field within a UDT; returns field index or -1
|
||
static int udtFieldLookup(int udtIdx, const char *field) {
|
||
if (udtIdx < 0 || udtIdx >= gUdtCount) return -1;
|
||
UdtDef *u = &gUdts[udtIdx];
|
||
for (int i = 0; i < u->fieldCount; i++)
|
||
if (strIcmp(u->fields[i].name, field) == 0)
|
||
return i;
|
||
return -1;
|
||
}
|
||
|
||
|
||
// Check if 'name' is a BYREF parameter of the current function.
|
||
// Returns 1 if so, 0 otherwise. Used during code generation to
|
||
// emit pointer dereferences for BYREF params.
|
||
static int isByrefParam(const char *name) {
|
||
if (!gInFunc || !gFuncName) return 0;
|
||
Symbol *fsym = symLookup(gFuncName);
|
||
if (!fsym) return 0;
|
||
for (int i = 0; i < fsym->paramCount; i++) {
|
||
if (strIcmp(fsym->paramNames[i], name) == 0 &&
|
||
fsym->paramModes[i] == PASS_BYREF)
|
||
return 1;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
|
||
// Emit indented text to the output file
|
||
static void emit(const char *fmt, ...) {
|
||
va_list ap;
|
||
for (int i = 0; i < gIndent * 4; i++) fputc(' ', gOut);
|
||
va_start(ap, fmt);
|
||
vfprintf(gOut, fmt, ap);
|
||
va_end(ap);
|
||
}
|
||
|
||
|
||
// Emit text without leading indentation
|
||
static void emitRaw(const char *fmt, ...) {
|
||
va_list ap;
|
||
va_start(ap, fmt);
|
||
vfprintf(gOut, fmt, ap);
|
||
va_end(ap);
|
||
}
|
||
|
||
|
||
// Record a line-number label for later GOSUB dispatch
|
||
static void recordLineLabel(int lnum) {
|
||
for (int i = 0; i < gLineLabelCount; i++)
|
||
if (gLineLabels[i] == lnum) return; // already recorded
|
||
if (gLineLabelCount >= MAX_LINE_LABELS)
|
||
fatal(gLine, "Too many line labels");
|
||
gLineLabels[gLineLabelCount++] = lnum;
|
||
}
|
||
|
||
|
||
// Record a line number as a GOTO/GOSUB target so its label is emitted
|
||
static void recordGotoTarget(int lnum) {
|
||
for (int i = 0; i < gGotoTargetCount; i++)
|
||
if (gGotoTargets[i] == lnum) return;
|
||
if (gGotoTargetCount >= MAX_LINE_LABELS)
|
||
fatal(gLine, "Too many goto targets");
|
||
gGotoTargets[gGotoTargetCount++] = lnum;
|
||
}
|
||
|
||
|
||
// Check whether a line number is a GOTO/GOSUB target
|
||
static int isGotoTarget(int lnum) {
|
||
for (int i = 0; i < gGotoTargetCount; i++)
|
||
if (gGotoTargets[i] == lnum) return 1;
|
||
return 0;
|
||
}
|
||
|
||
|
||
// Record a named label as a GOTO/GOSUB target so its C label is emitted
|
||
static void recordGotoStrTarget(const char *name) {
|
||
for (int i = 0; i < gGotoStrTargetCount; i++)
|
||
if (strIcmp(gGotoStrTargets[i], name) == 0) return;
|
||
if (gGotoStrTargetCount >= MAX_LINE_LABELS)
|
||
fatal(gLine, "Too many named goto targets");
|
||
gGotoStrTargets[gGotoStrTargetCount++] = strDup(name);
|
||
}
|
||
|
||
|
||
// Check whether a named label is a GOTO/GOSUB target
|
||
static int isGotoStrTarget(const char *name) {
|
||
for (int i = 0; i < gGotoStrTargetCount; i++)
|
||
if (strIcmp(gGotoStrTargets[i], name) == 0) return 1;
|
||
return 0;
|
||
}
|
||
|
||
|
||
// Infer the data type of a variable from its name suffix.
|
||
// Names ending in '$' -> STRING, '%' -> INTEGER, '!' -> FLOAT,
|
||
// '#' -> DOUBLE; otherwise check the symbol table, default INTEGER.
|
||
static DataType inferVarType(const char *name) {
|
||
int len = (int)strlen(name);
|
||
if (len > 0 && name[len-1] == '$') return TYPE_STR;
|
||
if (len > 0 && name[len-1] == '%') return TYPE_INT;
|
||
if (len > 0 && name[len-1] == '!') return TYPE_FLOAT;
|
||
if (len > 0 && name[len-1] == '#') return TYPE_DBL;
|
||
Symbol *s = symLookup(name);
|
||
if (s) return s->dataType;
|
||
return TYPE_INT; // default
|
||
}
|
||
|
||
|
||
// Return the wider of two numeric types for expression promotion.
|
||
// The DataType enum is ordered so that a higher value = wider type
|
||
// (BYTE < INT < LONG < FLOAT < DBL). STRING is handled separately.
|
||
static DataType promoteType(DataType a, DataType b) {
|
||
if (a == TYPE_STR || b == TYPE_STR) return TYPE_STR;
|
||
return (a > b) ? a : b;
|
||
}
|
||
|
||
|
||
// Strip type-suffix characters ($, %, #, !) from an identifier for C output.
|
||
// Uses a rotating set of 8 static buffers so multiple calls within a
|
||
// single expression don't clobber each other.
|
||
static const char *cleanName(const char *name) {
|
||
static char bufs[8][MAX_IDENT];
|
||
static int idx = 0;
|
||
if (!name) return "_null_";
|
||
char *buf = bufs[idx++ & 7];
|
||
strncpy(buf, name, MAX_IDENT - 1);
|
||
buf[MAX_IDENT - 1] = '\0';
|
||
int len = (int)strlen(buf);
|
||
if (len > 0 && (buf[len-1]=='$' || buf[len-1]=='%' ||
|
||
buf[len-1]=='#' || buf[len-1]=='!'))
|
||
buf[len-1] = '\0';
|
||
return buf;
|
||
}
|
||
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Section 6: Lexer
|
||
//
|
||
// The lexer reads characters from gSrc and produces tokens one at a time.
|
||
// BASIC keywords are case-insensitive; identifiers preserve case.
|
||
// -----------------------------------------------------------------------
|
||
|
||
// Keyword table: maps keyword strings to token types
|
||
static struct { const char *kw; TokenType tok; } gKeywords[] = {
|
||
{"DIM", TOK_DIM}, {"REDIM", TOK_REDIM},
|
||
{"AS", TOK_AS}, {"BYTE", TOK_BYTE},
|
||
{"INTEGER", TOK_INTEGER},
|
||
{"LONG", TOK_LONG}, {"FLOAT", TOK_FLOAT},
|
||
{"DOUBLE", TOK_DOUBLE}, {"STRING", TOK_STRING},
|
||
{"LET", TOK_LET}, {"PRINT", TOK_PRINT},
|
||
{"INPUT", TOK_INPUT}, {"IF", TOK_IF},
|
||
{"THEN", TOK_THEN}, {"ELSE", TOK_ELSE},
|
||
{"ELSEIF", TOK_ELSEIF}, {"END", TOK_END},
|
||
{"FOR", TOK_FOR}, {"TO", TOK_TO},
|
||
{"STEP", TOK_STEP}, {"NEXT", TOK_NEXT},
|
||
{"WHILE", TOK_WHILE}, {"WEND", TOK_WEND},
|
||
{"DO", TOK_DO}, {"LOOP", TOK_LOOP},
|
||
{"UNTIL", TOK_UNTIL}, {"GOTO", TOK_GOTO},
|
||
{"GOSUB", TOK_GOSUB}, {"RETURN", TOK_RETURN},
|
||
{"SUB", TOK_SUB}, {"FUNCTION", TOK_FUNCTION},
|
||
{"CALL", TOK_CALL}, {"BYVAL", TOK_BYVAL},
|
||
{"BYREF", TOK_BYREF}, {"LOCAL", TOK_LOCAL},
|
||
{"STATIC", TOK_STATIC}, {"EXIT", TOK_EXIT},
|
||
{"AND", TOK_AND}, {"OR", TOK_OR},
|
||
{"NOT", TOK_NOT}, {"MOD", TOK_MOD},
|
||
{"REM", TOK_REM},
|
||
{"OPEN", TOK_OPEN}, {"CLOSE", TOK_CLOSE},
|
||
{"OUTPUT", TOK_OUTPUT}, {"APPEND", TOK_APPEND},
|
||
{"BINARY", TOK_BINARY}, {"LINE", TOK_LINE},
|
||
{"WRITE", TOK_WRITE},
|
||
{"TYPE", TOK_TYPE},
|
||
{"XOR", TOK_XOR},
|
||
{"SELECT", TOK_SELECT}, {"CASE", TOK_CASE},
|
||
{"CONST", TOK_CONST},
|
||
{"ON", TOK_ON},
|
||
{NULL, TOK_EOF}
|
||
};
|
||
|
||
// Check if a name is a keyword
|
||
static int isKeyword(const char *name) {
|
||
for (int k = 0; gKeywords[k].kw; k++)
|
||
if (strIcmp(name, gKeywords[k].kw) == 0)
|
||
return 1;
|
||
return 0;
|
||
}
|
||
|
||
// Peek at the current character without advancing
|
||
static int peekChar(void) {
|
||
if (gSrcPos >= gSrcLen) return EOF;
|
||
return (unsigned char)gSrc[gSrcPos];
|
||
}
|
||
|
||
|
||
// Read and advance past the current character
|
||
static int readChar(void) {
|
||
if (gSrcPos >= gSrcLen) return EOF;
|
||
int ch = (unsigned char)gSrc[gSrcPos++];
|
||
if (ch == '\n') gLine++;
|
||
return ch;
|
||
}
|
||
|
||
|
||
// Skip whitespace (spaces and tabs) but NOT newlines
|
||
static void skipSpaces(void) {
|
||
while (gSrcPos < gSrcLen) {
|
||
int ch = gSrc[gSrcPos];
|
||
if (ch == ' ' || ch == '\t')
|
||
gSrcPos++;
|
||
else
|
||
break;
|
||
}
|
||
}
|
||
|
||
|
||
// Read the next token into gTok
|
||
static void nextToken(void) {
|
||
skipSpaces();
|
||
gTok.line = gLine;
|
||
gTok.sval[0] = '\0';
|
||
gTok.ival = 0;
|
||
gTok.dval = 0.0;
|
||
|
||
int ch = peekChar();
|
||
|
||
// End of file
|
||
if (ch == EOF) { gTok.type = TOK_EOF; return; }
|
||
|
||
// Newline – statement separator
|
||
if (ch == '\n') {
|
||
readChar();
|
||
gTok.type = TOK_NEWLINE;
|
||
return;
|
||
}
|
||
|
||
// Carriage return (handle \r\n)
|
||
if (ch == '\r') {
|
||
readChar();
|
||
if (peekChar() == '\n') readChar();
|
||
gTok.type = TOK_NEWLINE;
|
||
return;
|
||
}
|
||
|
||
// Single-line comment: ' or REM
|
||
if (ch == '\'') {
|
||
// Skip until end of line
|
||
while (peekChar() != '\n' && peekChar() != EOF)
|
||
readChar();
|
||
gTok.type = TOK_NEWLINE; // treat comment as newline
|
||
if (peekChar() == '\n') readChar();
|
||
return;
|
||
}
|
||
|
||
// String literal
|
||
if (ch == '"') {
|
||
readChar(); // consume opening quote
|
||
int i = 0;
|
||
while (peekChar() != '"' && peekChar() != '\n' && peekChar() != EOF) {
|
||
if (i < MAX_TOKEN_LEN - 1)
|
||
gTok.sval[i++] = (char)readChar();
|
||
else
|
||
readChar();
|
||
}
|
||
gTok.sval[i] = '\0';
|
||
if (peekChar() == '"') readChar(); // consume closing quote
|
||
gTok.type = TOK_STR_LIT;
|
||
return;
|
||
}
|
||
|
||
// Number literal (integer or double)
|
||
if (isdigit(ch) || (ch == '.' && isdigit(gSrc[gSrcPos+1]))) {
|
||
int i = 0;
|
||
int hasDot = 0;
|
||
while (isdigit(peekChar()) || peekChar() == '.') {
|
||
if (peekChar() == '.') {
|
||
if (hasDot) break; // second dot ends the number
|
||
hasDot = 1;
|
||
}
|
||
if (i < MAX_TOKEN_LEN - 1)
|
||
gTok.sval[i++] = (char)readChar();
|
||
else
|
||
readChar();
|
||
}
|
||
gTok.sval[i] = '\0';
|
||
if (hasDot) {
|
||
gTok.type = TOK_DBL_LIT;
|
||
gTok.dval = atof(gTok.sval);
|
||
} else {
|
||
gTok.type = TOK_INT_LIT;
|
||
gTok.ival = atoi(gTok.sval);
|
||
}
|
||
return;
|
||
}
|
||
|
||
// Identifier or keyword
|
||
if (isalpha(ch) || ch == '_') {
|
||
int i = 0;
|
||
while (isalnum(peekChar()) || peekChar() == '_') {
|
||
if (i < MAX_TOKEN_LEN - 1)
|
||
gTok.sval[i++] = (char)readChar();
|
||
else
|
||
readChar();
|
||
}
|
||
// Allow trailing $, %, #, ! for type suffixes:
|
||
// $ = STRING, % = INTEGER (int16_t),
|
||
// # = DOUBLE, ! = FLOAT
|
||
if (peekChar()=='$' || peekChar()=='%' ||
|
||
peekChar()=='#' || peekChar()=='!') {
|
||
if (i < MAX_TOKEN_LEN - 1)
|
||
gTok.sval[i++] = (char)readChar();
|
||
}
|
||
gTok.sval[i] = '\0';
|
||
|
||
// Check for REM (rest of line is comment)
|
||
if (strIcmp(gTok.sval, "REM") == 0) {
|
||
while (peekChar() != '\n' && peekChar() != EOF)
|
||
readChar();
|
||
gTok.type = TOK_NEWLINE;
|
||
if (peekChar() == '\n') readChar();
|
||
return;
|
||
}
|
||
|
||
// Check keyword table
|
||
for (int k = 0; gKeywords[k].kw; k++) {
|
||
if (strIcmp(gTok.sval, gKeywords[k].kw) == 0) {
|
||
gTok.type = gKeywords[k].tok;
|
||
return;
|
||
}
|
||
}
|
||
|
||
// Not a keyword – it is an identifier
|
||
gTok.type = TOK_IDENT;
|
||
return;
|
||
}
|
||
|
||
// Operators and punctuation
|
||
readChar();
|
||
switch (ch) {
|
||
case '+': gTok.type = TOK_PLUS; return;
|
||
case '-': gTok.type = TOK_MINUS; return;
|
||
case '*': gTok.type = TOK_STAR; return;
|
||
case '/': gTok.type = TOK_SLASH; return;
|
||
case '\\':gTok.type = TOK_BSLASH; return;
|
||
case '^': gTok.type = TOK_CARET; return;
|
||
case '&': gTok.type = TOK_AMP; return;
|
||
case '#': gTok.type = TOK_HASH; return;
|
||
case '.': gTok.type = TOK_DOT; return;
|
||
case '(': gTok.type = TOK_LPAREN; return;
|
||
case ')': gTok.type = TOK_RPAREN; return;
|
||
case ',': gTok.type = TOK_COMMA; return;
|
||
case ';': gTok.type = TOK_SEMI; return;
|
||
case ':': gTok.type = TOK_COLON; return;
|
||
case '?': gTok.type = TOK_PRINT; return;
|
||
case '=': gTok.type = TOK_EQ; return;
|
||
case '<':
|
||
if (peekChar() == '=') { readChar(); gTok.type = TOK_LE; }
|
||
else if (peekChar() == '>') { readChar(); gTok.type = TOK_NE; }
|
||
else gTok.type = TOK_LT;
|
||
return;
|
||
case '>':
|
||
if (peekChar() == '=') { readChar(); gTok.type = TOK_GE; }
|
||
else gTok.type = TOK_GT;
|
||
return;
|
||
default:
|
||
fatal(gLine, "Unexpected character '%c' (0x%02X)", ch, ch);
|
||
}
|
||
}
|
||
|
||
|
||
// Check if the current token matches a given type
|
||
static int tokIs(TokenType t) { return gTok.type == t; }
|
||
|
||
// Consume current token if it matches; returns 1 on match, 0 otherwise
|
||
static int tokAccept(TokenType t) {
|
||
if (gTok.type == t) { nextToken(); return 1; }
|
||
return 0;
|
||
}
|
||
|
||
|
||
// Require the current token to be of a given type; fatal error otherwise
|
||
static void tokExpect(TokenType t) {
|
||
if (gTok.type != t)
|
||
fatal(gTok.line, "Expected token type %d, got %d ('%s')",
|
||
t, gTok.type, gTok.sval);
|
||
nextToken();
|
||
}
|
||
|
||
|
||
// Skip newlines and colons (statement separators)
|
||
static void skipEol(void) {
|
||
while (gTok.type == TOK_NEWLINE || gTok.type == TOK_COLON)
|
||
nextToken();
|
||
}
|
||
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Section 7: Parser – Recursive Descent
|
||
//
|
||
// Grammar (simplified):
|
||
// program = { sub_decl | func_decl | statement }
|
||
// statement = dim | redim | type_def | assignment | print | input
|
||
// | if | for | while | do_loop | goto | gosub | return
|
||
// | call | exit | local | static | end | label | open
|
||
// | close | data | read | restore | get | put | line_input
|
||
// expression = or_expr
|
||
// or_expr = and_expr { OR and_expr }
|
||
// and_expr = not_expr { AND not_expr }
|
||
// not_expr = NOT not_expr | cmp_expr
|
||
// cmp_expr = add_expr { (= | <> | < | > | <= | >=) add_expr }
|
||
// add_expr = mul_expr { (+ | - | &) mul_expr }
|
||
// mul_expr = idiv_expr { (* | /) idiv_expr }
|
||
// idiv_expr = mod_expr { '\' mod_expr }
|
||
// mod_expr = power_expr { MOD power_expr }
|
||
// power_expr = unary_expr { ^ unary_expr }
|
||
// unary_expr = [+ | -] primary
|
||
// primary = INT_LIT | DBL_LIT | STR_LIT | ident['('args')'][.field...]
|
||
// | '(' expression ')' | SIZEOF'('type_name')'
|
||
// -----------------------------------------------------------------------
|
||
|
||
// Forward declarations for mutually recursive parser functions
|
||
static Node *parseExpr(void);
|
||
static Node *parseStatement(void);
|
||
static int dataIndexForLine(int lnum);
|
||
static int dataIndexForLabel(const char *name);
|
||
static Node *parseBlock(TokenType end1, TokenType end2, TokenType end3);
|
||
|
||
static void skipNewlines(void) {
|
||
while (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON)) nextToken();
|
||
}
|
||
|
||
// ---- Expression parser ----
|
||
|
||
// Parse a primary expression (literals, variables, function calls, parens)
|
||
static Node *parsePrimary(void) {
|
||
int ln = gTok.line;
|
||
|
||
// Integer literal
|
||
if (tokIs(TOK_INT_LIT)) {
|
||
Node *n = newNode(NODE_INT_LIT, ln);
|
||
n->ival = gTok.ival;
|
||
n->dataType = TYPE_INT;
|
||
nextToken();
|
||
return n;
|
||
}
|
||
|
||
// Double literal
|
||
if (tokIs(TOK_DBL_LIT)) {
|
||
Node *n = newNode(NODE_DBL_LIT, ln);
|
||
n->dval = gTok.dval;
|
||
n->dataType = TYPE_DBL;
|
||
nextToken();
|
||
return n;
|
||
}
|
||
|
||
// String literal
|
||
if (tokIs(TOK_STR_LIT)) {
|
||
Node *n = newNode(NODE_STR_LIT, ln);
|
||
n->sval = strDup(gTok.sval);
|
||
n->dataType = TYPE_STR;
|
||
nextToken();
|
||
return n;
|
||
}
|
||
|
||
// Parenthesized expression
|
||
if (tokIs(TOK_LPAREN)) {
|
||
nextToken();
|
||
Node *n = parseExpr();
|
||
tokExpect(TOK_RPAREN);
|
||
return n;
|
||
}
|
||
|
||
// Identifier: variable, array element, or function call
|
||
if (tokIs(TOK_IDENT)) {
|
||
char name[MAX_TOKEN_LEN];
|
||
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
|
||
name[MAX_TOKEN_LEN - 1] = '\0';
|
||
nextToken();
|
||
|
||
// Check for '(' – array access or function call
|
||
if (tokIs(TOK_LPAREN)) {
|
||
nextToken();
|
||
// Collect argument list
|
||
Node *args = NULL, *tail = NULL;
|
||
if (!tokIs(TOK_RPAREN)) {
|
||
Node *arg = parseExpr();
|
||
args = tail = arg;
|
||
while (tokAccept(TOK_COMMA)) {
|
||
arg = parseExpr();
|
||
tail->next = arg;
|
||
tail = arg;
|
||
}
|
||
}
|
||
tokExpect(TOK_RPAREN);
|
||
|
||
// Determine if this is a known array or function
|
||
Symbol *s = symLookup(name);
|
||
|
||
// SIZEOF(TypeName) — compile-time sizeof
|
||
if (strIcmp(name, "SIZEOF") == 0) {
|
||
// args should be one identifier — the UDT name
|
||
Node *n = newNode(NODE_FUNC_CALL, ln);
|
||
n->sval = strDup("SIZEOF");
|
||
n->a = args;
|
||
n->dataType = TYPE_LONG;
|
||
return n;
|
||
}
|
||
|
||
if (s && s->isArray) {
|
||
Node *n = newNode(NODE_ARRAY_REF, ln);
|
||
n->sval = strDup(name);
|
||
n->a = args; // index expression
|
||
n->dataType = s->dataType;
|
||
n->ival2 = s->udtIndex;
|
||
// Check for dot-access on array element: arr(i).field[.field...]
|
||
if (s->dataType == TYPE_UDT && tokIs(TOK_DOT)) {
|
||
Node *cur = n;
|
||
int curUdt = s->udtIndex;
|
||
while (curUdt >= 0 && tokIs(TOK_DOT)) {
|
||
nextToken();
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected field name after '.'");
|
||
int fi = udtFieldLookup(curUdt, gTok.sval);
|
||
if (fi < 0)
|
||
fatal(ln, "Unknown field '%s' in type '%s'",
|
||
gTok.sval, gUdts[curUdt].name);
|
||
Node *dot = newNode(NODE_DOT_ACCESS, ln);
|
||
dot->a = cur;
|
||
dot->sval = strDup(gTok.sval);
|
||
dot->ival2 = curUdt;
|
||
UdtField *uf = &gUdts[curUdt].fields[fi];
|
||
dot->dataType = uf->dataType;
|
||
if (uf->dataType == TYPE_STR && uf->strLen > 0)
|
||
dot->ival = uf->strLen;
|
||
cur = dot;
|
||
curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1;
|
||
nextToken();
|
||
}
|
||
return cur;
|
||
}
|
||
return n;
|
||
} else {
|
||
// Treat as function call
|
||
Node *n = newNode(NODE_FUNC_CALL, ln);
|
||
n->sval = strDup(name);
|
||
n->a = args;
|
||
// Infer return type: check symbol table, built-ins, name
|
||
if (s && s->isFunc == 1) {
|
||
n->dataType = s->returnType;
|
||
} else if (strIcmp(name,"LOF")==0) {
|
||
n->dataType = TYPE_LONG;
|
||
} else if (strIcmp(name,"VAL")==0 || strIcmp(name,"ABS")==0) {
|
||
n->dataType = TYPE_DBL;
|
||
} else if (strIcmp(name,"LEN")==0 || strIcmp(name,"ASC")==0 ||
|
||
strIcmp(name,"INT")==0 || strIcmp(name,"INSTR")==0 ||
|
||
strIcmp(name,"EOF")==0 || strIcmp(name,"FREEFILE")==0 ||
|
||
strIcmp(name,"LBOUND")==0 || strIcmp(name,"UBOUND")==0) {
|
||
n->dataType = TYPE_INT;
|
||
} else {
|
||
// Check external functions and compile-time builtins
|
||
ExternFunc *ef = externFuncLookup(name);
|
||
if (ef) {
|
||
n->dataType = ef->returnType;
|
||
} else {
|
||
const BuiltinDef *bd = builtinDefLookup(name);
|
||
if (bd) {
|
||
n->dataType = bd->returnType;
|
||
} else {
|
||
n->dataType = inferVarType(name);
|
||
}
|
||
}
|
||
}
|
||
return n;
|
||
}
|
||
}
|
||
|
||
// RND without parentheses — treat as RND()
|
||
if (strIcmp(name, "RND") == 0) {
|
||
Node *n = newNode(NODE_FUNC_CALL, ln);
|
||
n->sval = strDup("RND");
|
||
n->a = NULL;
|
||
n->dataType = TYPE_DBL;
|
||
return n;
|
||
}
|
||
|
||
// Check compile-time constants
|
||
for (int ci = 0; ci < gConstCount; ci++) {
|
||
if (strIcmp(name, gConsts[ci].name) == 0) {
|
||
if (gConsts[ci].dataType == TYPE_STR) {
|
||
Node *n = newNode(NODE_STR_LIT, ln);
|
||
n->sval = strDup(gConsts[ci].strVal);
|
||
n->dataType = TYPE_STR;
|
||
return n;
|
||
} else {
|
||
double v = gConsts[ci].numVal;
|
||
if (v == (int)v && gConsts[ci].dataType != TYPE_DBL &&
|
||
gConsts[ci].dataType != TYPE_FLOAT) {
|
||
Node *n = newNode(NODE_INT_LIT, ln);
|
||
n->ival = (int)v;
|
||
n->dataType = gConsts[ci].dataType;
|
||
return n;
|
||
} else {
|
||
Node *n = newNode(NODE_DBL_LIT, ln);
|
||
n->dval = v;
|
||
n->dataType = gConsts[ci].dataType;
|
||
return n;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// Plain variable reference — check for dot-access (supports chaining)
|
||
{
|
||
Symbol *s = symLookup(name);
|
||
if (s && s->dataType == TYPE_UDT && tokIs(TOK_DOT)) {
|
||
Node *base = newNode(NODE_IDENT, ln);
|
||
base->sval = strDup(name);
|
||
base->dataType = TYPE_UDT;
|
||
Node *cur = base;
|
||
int curUdt = s->udtIndex;
|
||
while (curUdt >= 0 && tokIs(TOK_DOT)) {
|
||
nextToken();
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected field name after '.'");
|
||
int fi = udtFieldLookup(curUdt, gTok.sval);
|
||
if (fi < 0)
|
||
fatal(ln, "Unknown field '%s' in type '%s'",
|
||
gTok.sval, gUdts[curUdt].name);
|
||
Node *dot = newNode(NODE_DOT_ACCESS, ln);
|
||
dot->a = cur;
|
||
dot->sval = strDup(gTok.sval);
|
||
dot->ival2 = curUdt;
|
||
UdtField *uf = &gUdts[curUdt].fields[fi];
|
||
dot->dataType = uf->dataType;
|
||
if (uf->dataType == TYPE_STR && uf->strLen > 0)
|
||
dot->ival = uf->strLen;
|
||
cur = dot;
|
||
curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1;
|
||
nextToken();
|
||
}
|
||
return cur;
|
||
}
|
||
}
|
||
|
||
Node *n = newNode(NODE_IDENT, ln);
|
||
n->sval = strDup(name);
|
||
n->dataType = inferVarType(name);
|
||
return n;
|
||
}
|
||
|
||
fatal(ln, "Expected expression, got token type %d ('%s')",
|
||
gTok.type, gTok.sval);
|
||
return NULL; // unreachable
|
||
}
|
||
|
||
|
||
// Unary: [+|-] primary
|
||
static Node *parseUnary(void) {
|
||
int ln = gTok.line;
|
||
if (tokIs(TOK_MINUS) || tokIs(TOK_PLUS)) {
|
||
int op = gTok.type;
|
||
nextToken();
|
||
Node *operand = parseUnary();
|
||
if (op == TOK_PLUS) return operand; // unary + is a no-op
|
||
Node *n = newNode(NODE_UNOP, ln);
|
||
n->ival = op;
|
||
n->a = operand;
|
||
n->dataType = operand->dataType;
|
||
return n;
|
||
}
|
||
return parsePrimary();
|
||
}
|
||
|
||
|
||
// Power: unary { ^ unary } (right-associative)
|
||
static Node *parsePower(void) {
|
||
Node *left = parseUnary();
|
||
if (tokIs(TOK_CARET)) {
|
||
int ln = gTok.line;
|
||
nextToken();
|
||
Node *right = parsePower(); // right-associative
|
||
Node *n = newNode(NODE_BINOP, ln);
|
||
n->ival = TOK_CARET;
|
||
n->a = left;
|
||
n->b = right;
|
||
n->dataType = TYPE_DBL;
|
||
return n;
|
||
}
|
||
return left;
|
||
}
|
||
|
||
|
||
// MOD: power { MOD power }
|
||
static Node *parseMod(void) {
|
||
Node *left = parsePower();
|
||
while (tokIs(TOK_MOD)) {
|
||
int ln = gTok.line;
|
||
nextToken();
|
||
Node *right = parsePower();
|
||
Node *n = newNode(NODE_BINOP, ln);
|
||
n->ival = TOK_MOD;
|
||
n->a = left;
|
||
n->b = right;
|
||
n->dataType = TYPE_INT;
|
||
left = n;
|
||
}
|
||
return left;
|
||
}
|
||
|
||
|
||
// Integer division: mod { '\' mod }
|
||
static Node *parseIdiv(void) {
|
||
Node *left = parseMod();
|
||
while (tokIs(TOK_BSLASH)) {
|
||
int ln = gTok.line;
|
||
nextToken();
|
||
Node *right = parseMod();
|
||
Node *n = newNode(NODE_BINOP, ln);
|
||
n->ival = TOK_BSLASH;
|
||
n->a = left;
|
||
n->b = right;
|
||
n->dataType = TYPE_INT;
|
||
left = n;
|
||
}
|
||
return left;
|
||
}
|
||
|
||
|
||
// Multiply / divide: idiv { (*|/) idiv }
|
||
static Node *parseMuldiv(void) {
|
||
Node *left = parseIdiv();
|
||
while (tokIs(TOK_STAR) || tokIs(TOK_SLASH)) {
|
||
int ln = gTok.line;
|
||
int op = gTok.type;
|
||
nextToken();
|
||
Node *right = parseIdiv();
|
||
Node *n = newNode(NODE_BINOP, ln);
|
||
n->ival = op;
|
||
n->a = left;
|
||
n->b = right;
|
||
// Division always promotes to double; multiplication promotes
|
||
// to the wider of the two operand types.
|
||
n->dataType = (op == TOK_SLASH) ? TYPE_DBL :
|
||
promoteType(left->dataType, right->dataType);
|
||
left = n;
|
||
}
|
||
return left;
|
||
}
|
||
|
||
|
||
// Add / subtract / string concat: muldiv { (+|-|&) muldiv }
|
||
static Node *parseAddsub(void) {
|
||
Node *left = parseMuldiv();
|
||
while (tokIs(TOK_PLUS) || tokIs(TOK_MINUS) || tokIs(TOK_AMP)) {
|
||
int ln = gTok.line;
|
||
int op = gTok.type;
|
||
nextToken();
|
||
Node *right = parseMuldiv();
|
||
Node *n = newNode(NODE_BINOP, ln);
|
||
n->ival = op;
|
||
n->a = left;
|
||
n->b = right;
|
||
// String concatenation
|
||
if (op == TOK_AMP || (op == TOK_PLUS &&
|
||
(left->dataType == TYPE_STR || right->dataType == TYPE_STR)))
|
||
n->dataType = TYPE_STR;
|
||
else
|
||
n->dataType = promoteType(left->dataType, right->dataType);
|
||
left = n;
|
||
}
|
||
return left;
|
||
}
|
||
|
||
|
||
// Comparison: addsub { (=|<>|<|>|<=|>=) addsub }
|
||
static Node *parseComparison(void) {
|
||
Node *left = parseAddsub();
|
||
while (tokIs(TOK_EQ) || tokIs(TOK_NE) || tokIs(TOK_LT) ||
|
||
tokIs(TOK_GT) || tokIs(TOK_LE) || tokIs(TOK_GE)) {
|
||
int ln = gTok.line;
|
||
int op = gTok.type;
|
||
nextToken();
|
||
Node *right = parseAddsub();
|
||
Node *n = newNode(NODE_BINOP, ln);
|
||
n->ival = op;
|
||
n->a = left;
|
||
n->b = right;
|
||
n->dataType = TYPE_INT; // comparisons yield integer (boolean)
|
||
left = n;
|
||
}
|
||
return left;
|
||
}
|
||
|
||
|
||
// NOT: NOT not_expr | comparison
|
||
static Node *parseNot(void) {
|
||
if (tokIs(TOK_NOT)) {
|
||
int ln = gTok.line;
|
||
nextToken();
|
||
Node *operand = parseNot();
|
||
Node *n = newNode(NODE_UNOP, ln);
|
||
n->ival = TOK_NOT;
|
||
n->a = operand;
|
||
n->dataType = TYPE_INT;
|
||
return n;
|
||
}
|
||
return parseComparison();
|
||
}
|
||
|
||
|
||
// AND: not { AND not }
|
||
static Node *parseAnd(void) {
|
||
Node *left = parseNot();
|
||
while (tokIs(TOK_AND)) {
|
||
int ln = gTok.line;
|
||
nextToken();
|
||
Node *right = parseNot();
|
||
Node *n = newNode(NODE_BINOP, ln);
|
||
n->ival = TOK_AND;
|
||
n->a = left;
|
||
n->b = right;
|
||
n->dataType = TYPE_INT;
|
||
left = n;
|
||
}
|
||
return left;
|
||
}
|
||
|
||
|
||
// OR: and { OR and } — top-level expression rule
|
||
static Node *parseOr(void) {
|
||
Node *left = parseAnd();
|
||
while (tokIs(TOK_OR) || tokIs(TOK_XOR)) {
|
||
int ln = gTok.line;
|
||
int op = gTok.type;
|
||
nextToken();
|
||
Node *right = parseAnd();
|
||
Node *n = newNode(NODE_BINOP, ln);
|
||
n->ival = op;
|
||
n->a = left;
|
||
n->b = right;
|
||
n->dataType = TYPE_INT;
|
||
left = n;
|
||
}
|
||
return left;
|
||
}
|
||
|
||
|
||
// Top-level expression entry point
|
||
static Node *parseExpr(void) {
|
||
return parseOr();
|
||
}
|
||
|
||
|
||
// ---- Statement parsers ----
|
||
|
||
// Parse a data-type keyword (BYTE, INTEGER, LONG, FLOAT, DOUBLE, STRING, or UDT name)
|
||
static DataType parseType(void) {
|
||
if (tokAccept(TOK_BYTE)) return TYPE_BYTE;
|
||
if (tokAccept(TOK_INTEGER)) return TYPE_INT;
|
||
if (tokAccept(TOK_LONG)) return TYPE_LONG;
|
||
if (tokAccept(TOK_FLOAT)) return TYPE_FLOAT;
|
||
if (tokAccept(TOK_DOUBLE)) return TYPE_DBL;
|
||
if (tokAccept(TOK_STRING)) return TYPE_STR;
|
||
// Check for user-defined type name
|
||
if (tokIs(TOK_IDENT)) {
|
||
int idx = udtLookup(gTok.sval);
|
||
if (idx >= 0) {
|
||
gLastUdtIndex = idx;
|
||
nextToken();
|
||
return TYPE_UDT;
|
||
}
|
||
}
|
||
fatal(gTok.line,
|
||
"Expected type keyword (BYTE, INTEGER, LONG, FLOAT, DOUBLE, STRING)");
|
||
return TYPE_INT;
|
||
}
|
||
|
||
|
||
// Parse TYPE ... END TYPE definition
|
||
static Node *parseTypeDef(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_TYPE);
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected type name after TYPE");
|
||
|
||
char tname[MAX_IDENT];
|
||
strncpy(tname, gTok.sval, MAX_IDENT - 1);
|
||
tname[MAX_IDENT - 1] = '\0';
|
||
nextToken();
|
||
|
||
if (gUdtCount >= MAX_UDTS)
|
||
fatal(ln, "Too many TYPE definitions (max %d)", MAX_UDTS);
|
||
|
||
int udtIdx = gUdtCount++;
|
||
UdtDef *u = &gUdts[udtIdx];
|
||
memset(u, 0, sizeof(*u));
|
||
strncpy(u->name, tname, MAX_IDENT - 1);
|
||
|
||
// Skip newlines before fields
|
||
while (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON)) nextToken();
|
||
|
||
// Parse fields until END TYPE
|
||
while (!tokIs(TOK_EOF)) {
|
||
// Check for END TYPE
|
||
if (tokIs(TOK_END)) {
|
||
int sp = gSrcPos;
|
||
int sl = gLine;
|
||
Token st = gTok;
|
||
nextToken();
|
||
if (tokIs(TOK_TYPE)) {
|
||
nextToken(); // consume TYPE
|
||
break;
|
||
}
|
||
// Not END TYPE — restore
|
||
gSrcPos = sp;
|
||
gLine = sl;
|
||
gTok = st;
|
||
}
|
||
|
||
// Parse field: name AS type
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(gTok.line, "Expected field name in TYPE definition");
|
||
|
||
if (u->fieldCount >= MAX_UDT_FIELDS)
|
||
fatal(gTok.line, "Too many fields in TYPE (max %d)", MAX_UDT_FIELDS);
|
||
|
||
UdtField *f = &u->fields[u->fieldCount];
|
||
strncpy(f->name, gTok.sval, MAX_IDENT - 1);
|
||
f->name[MAX_IDENT - 1] = '\0';
|
||
nextToken();
|
||
|
||
tokExpect(TOK_AS);
|
||
|
||
// Check for STRING * N (fixed-length string)
|
||
if (tokIs(TOK_STRING)) {
|
||
nextToken();
|
||
if (tokAccept(TOK_STAR)) {
|
||
if (!tokIs(TOK_INT_LIT))
|
||
fatal(gTok.line, "Expected integer after STRING *");
|
||
f->strLen = gTok.ival;
|
||
nextToken();
|
||
} else {
|
||
fatal(gTok.line,
|
||
"STRING fields in TYPE require fixed length (STRING * N)");
|
||
}
|
||
f->dataType = TYPE_STR;
|
||
f->udtIndex = -1;
|
||
} else {
|
||
gLastUdtIndex = -1;
|
||
f->dataType = parseType();
|
||
f->strLen = 0;
|
||
f->udtIndex = gLastUdtIndex;
|
||
}
|
||
u->fieldCount++;
|
||
|
||
// Skip newlines between fields
|
||
while (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON)) nextToken();
|
||
}
|
||
|
||
Node *n = newNode(NODE_TYPE_DEF, ln);
|
||
n->sval = strDup(tname);
|
||
n->ival = udtIdx;
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse DIM statement: DIM name[(size[, size, ...])] AS type
|
||
static Node *parseDim(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_DIM);
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected identifier after DIM");
|
||
char name[MAX_TOKEN_LEN];
|
||
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
|
||
nextToken();
|
||
|
||
Node *size = NULL;
|
||
int ndims = 0;
|
||
if (tokAccept(TOK_LPAREN)) {
|
||
size = parseExpr();
|
||
ndims = 1;
|
||
Node *tail = size;
|
||
while (tokAccept(TOK_COMMA)) {
|
||
Node *dim = parseExpr();
|
||
tail->next = dim;
|
||
tail = dim;
|
||
ndims++;
|
||
}
|
||
tokExpect(TOK_RPAREN);
|
||
}
|
||
|
||
tokExpect(TOK_AS);
|
||
gLastUdtIndex = -1;
|
||
DataType dt = parseType();
|
||
|
||
Node *n = newNode(NODE_DIM, ln);
|
||
n->sval = strDup(name);
|
||
n->dataType = dt;
|
||
n->a = size;
|
||
n->ival = ndims;
|
||
n->ival2 = gLastUdtIndex;
|
||
|
||
// Register in symbol table
|
||
Symbol *s = symAdd(name);
|
||
s->dataType = dt;
|
||
s->isArray = (ndims > 0);
|
||
s->ndims = ndims;
|
||
s->udtIndex = gLastUdtIndex;
|
||
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse REDIM statement: REDIM name(size, ...) AS type
|
||
static Node *parseRedim(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_REDIM);
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected identifier after REDIM");
|
||
char name[MAX_TOKEN_LEN];
|
||
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
|
||
nextToken();
|
||
tokExpect(TOK_LPAREN);
|
||
Node *size = parseExpr();
|
||
int ndims = 1;
|
||
Node *tail = size;
|
||
while (tokAccept(TOK_COMMA)) {
|
||
Node *dim = parseExpr();
|
||
tail->next = dim;
|
||
tail = dim;
|
||
ndims++;
|
||
}
|
||
tokExpect(TOK_RPAREN);
|
||
tokExpect(TOK_AS);
|
||
DataType dt = parseType();
|
||
|
||
Node *n = newNode(NODE_REDIM, ln);
|
||
n->sval = strDup(name);
|
||
n->dataType = dt;
|
||
n->a = size;
|
||
n->ival = ndims;
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse PRINT statement: PRINT [expr { (;|,) expr } [;]]
|
||
// Helper: parse a file number (#expr)
|
||
static Node *parseFileNumber(void) {
|
||
tokExpect(TOK_HASH);
|
||
return parseExpr();
|
||
}
|
||
|
||
|
||
// Helper: parse print items (shared by PRINT and PRINT #)
|
||
static Node *parsePrintItems(int ln) {
|
||
Node *head = NULL, *tail = NULL;
|
||
while (1) {
|
||
Node *item = newNode(NODE_PRINT_ITEM, ln);
|
||
item->a = parseExpr();
|
||
|
||
// Check for separator after this item
|
||
if (tokIs(TOK_SEMI)) {
|
||
item->ival = 1; // semicolon: no space
|
||
nextToken();
|
||
} else if (tokIs(TOK_COMMA)) {
|
||
item->ival = 2; // comma: tab
|
||
nextToken();
|
||
} else {
|
||
item->ival = 0; // end of print list
|
||
}
|
||
|
||
if (!head) head = tail = item;
|
||
else { tail->next = item; tail = item; }
|
||
|
||
// If no separator or end of statement, stop
|
||
if (item->ival == 0) break;
|
||
// If separator at end of line, stop (trailing separator)
|
||
if (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON) || tokIs(TOK_EOF))
|
||
break;
|
||
}
|
||
return head;
|
||
}
|
||
|
||
|
||
static Node *parsePrint(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_PRINT);
|
||
|
||
// File-directed PRINT: PRINT #n, ...
|
||
if (tokIs(TOK_HASH)) {
|
||
Node *fpr = newNode(NODE_FILE_PRINT, ln);
|
||
fpr->b = parseFileNumber();
|
||
tokExpect(TOK_COMMA);
|
||
if (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON) || tokIs(TOK_EOF))
|
||
return fpr; // PRINT #n, alone = write newline to file
|
||
fpr->a = parsePrintItems(ln);
|
||
return fpr;
|
||
}
|
||
|
||
// PRINT USING "format"; value1; value2; ...
|
||
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "USING") == 0) {
|
||
nextToken();
|
||
Node *pu = newNode(NODE_PRINT_USING, ln);
|
||
pu->a = parseExpr(); // format string expression
|
||
if (!tokAccept(TOK_SEMI))
|
||
tokExpect(TOK_COMMA); // allow ; or , after format
|
||
// Parse values as linked list
|
||
Node *head = NULL, *tail = NULL;
|
||
while (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) {
|
||
Node *val = parseExpr();
|
||
if (!head) head = tail = val;
|
||
else { tail->next = val; tail = val; }
|
||
if (!tokAccept(TOK_SEMI) && !tokAccept(TOK_COMMA))
|
||
break;
|
||
}
|
||
pu->b = head;
|
||
return pu;
|
||
}
|
||
|
||
Node *pr = newNode(NODE_PRINT, ln);
|
||
|
||
// Empty PRINT (just a newline)
|
||
if (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON) || tokIs(TOK_EOF)) {
|
||
return pr;
|
||
}
|
||
|
||
pr->a = parsePrintItems(ln);
|
||
return pr;
|
||
}
|
||
|
||
|
||
// Parse INPUT statement: INPUT ["prompt";] var {, var}
|
||
// Helper: parse a comma-separated variable list for INPUT
|
||
static Node *parseInputVars(int ln) {
|
||
Node *head = NULL, *tail = NULL;
|
||
do {
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected variable name in INPUT");
|
||
Node *v = newNode(NODE_IDENT, ln);
|
||
v->sval = strDup(gTok.sval);
|
||
v->dataType = inferVarType(gTok.sval);
|
||
nextToken();
|
||
if (!head) head = tail = v;
|
||
else { tail->next = v; tail = v; }
|
||
} while (tokAccept(TOK_COMMA));
|
||
return head;
|
||
}
|
||
|
||
|
||
static Node *parseInput(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_INPUT);
|
||
|
||
// File-directed INPUT: INPUT #n, var1, var2
|
||
if (tokIs(TOK_HASH)) {
|
||
Node *finp = newNode(NODE_FILE_INPUT, ln);
|
||
finp->b = parseFileNumber();
|
||
tokExpect(TOK_COMMA);
|
||
finp->a = parseInputVars(ln);
|
||
return finp;
|
||
}
|
||
|
||
Node *inp = newNode(NODE_INPUT, ln);
|
||
|
||
// Optional string prompt
|
||
if (tokIs(TOK_STR_LIT)) {
|
||
inp->sval = strDup(gTok.sval);
|
||
nextToken();
|
||
if (tokIs(TOK_SEMI) || tokIs(TOK_COMMA))
|
||
nextToken(); // consume separator after prompt
|
||
}
|
||
|
||
inp->a = parseInputVars(ln);
|
||
return inp;
|
||
}
|
||
|
||
|
||
// Parse an ELSEIF chain as a nested IF node.
|
||
// ELSEIF expr THEN \n block { ELSEIF ... } [ELSE block]
|
||
// The caller (parseIf) consumes the final END IF.
|
||
static Node *parseElseifChain(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_ELSEIF);
|
||
Node *cond = parseExpr();
|
||
tokExpect(TOK_THEN);
|
||
|
||
Node *n = newNode(NODE_IF, ln);
|
||
n->a = cond;
|
||
|
||
skipEol();
|
||
n->b = parseBlock(TOK_ELSE, TOK_ELSEIF, TOK_END);
|
||
|
||
if (tokIs(TOK_ELSEIF)) {
|
||
n->c = parseElseifChain();
|
||
} else if (tokAccept(TOK_ELSE)) {
|
||
skipEol();
|
||
n->c = parseBlock(TOK_END, TOK_EOF, TOK_EOF);
|
||
}
|
||
// END IF is consumed by the top-level parseIf
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse IF block:
|
||
// IF expr THEN stmt (single-line)
|
||
// IF expr THEN \n block {ELSEIF...} [ELSE block] END IF
|
||
static Node *parseIf(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_IF);
|
||
Node *cond = parseExpr();
|
||
tokExpect(TOK_THEN);
|
||
|
||
Node *n = newNode(NODE_IF, ln);
|
||
n->a = cond;
|
||
|
||
// Single-line IF: statement on same line after THEN
|
||
if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) {
|
||
Node *stmt = parseStatement();
|
||
Node *blk = newNode(NODE_BLOCK, ln);
|
||
blk->a = stmt;
|
||
n->b = blk;
|
||
return n;
|
||
}
|
||
|
||
// Multi-line IF
|
||
skipEol();
|
||
n->b = parseBlock(TOK_ELSE, TOK_ELSEIF, TOK_END);
|
||
|
||
// ELSEIF chain: parse as a nested IF node
|
||
if (tokIs(TOK_ELSEIF)) {
|
||
n->c = parseElseifChain();
|
||
}
|
||
// ELSE block
|
||
else if (tokAccept(TOK_ELSE)) {
|
||
skipEol();
|
||
n->c = parseBlock(TOK_END, TOK_EOF, TOK_EOF);
|
||
}
|
||
|
||
// END IF
|
||
tokExpect(TOK_END);
|
||
tokExpect(TOK_IF);
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse FOR loop: FOR var = start TO end [STEP step] \n block NEXT [var]
|
||
static Node *parseFor(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_FOR);
|
||
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected variable after FOR");
|
||
char name[MAX_TOKEN_LEN];
|
||
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
|
||
nextToken();
|
||
|
||
tokExpect(TOK_EQ);
|
||
Node *start = parseExpr();
|
||
tokExpect(TOK_TO);
|
||
Node *end = parseExpr();
|
||
|
||
Node *step = NULL;
|
||
if (tokAccept(TOK_STEP)) {
|
||
step = parseExpr();
|
||
}
|
||
|
||
Node *n = newNode(NODE_FOR, ln);
|
||
n->sval = strDup(name);
|
||
n->a = start;
|
||
n->b = end;
|
||
n->c = step;
|
||
|
||
skipEol();
|
||
n->d = parseBlock(TOK_NEXT, TOK_EOF, TOK_EOF);
|
||
tokExpect(TOK_NEXT);
|
||
// Optional variable name after NEXT
|
||
if (tokIs(TOK_IDENT)) nextToken();
|
||
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse WHILE loop: WHILE expr \n block WEND
|
||
static Node *parseWhile(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_WHILE);
|
||
Node *cond = parseExpr();
|
||
|
||
Node *n = newNode(NODE_WHILE, ln);
|
||
n->a = cond;
|
||
|
||
skipEol();
|
||
n->b = parseBlock(TOK_WEND, TOK_EOF, TOK_EOF);
|
||
tokExpect(TOK_WEND);
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse DO/LOOP:
|
||
// DO [WHILE|UNTIL expr] \n block LOOP [WHILE|UNTIL expr]
|
||
static Node *parseDoLoop(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_DO);
|
||
|
||
Node *n = newNode(NODE_DO_LOOP, ln);
|
||
n->ival = 0; // flags: bit0 = isUntil, bit1 = conditionAtBottom
|
||
|
||
// Optional top condition
|
||
if (tokIs(TOK_WHILE)) {
|
||
nextToken();
|
||
n->a = parseExpr();
|
||
} else if (tokIs(TOK_UNTIL)) {
|
||
nextToken();
|
||
n->a = parseExpr();
|
||
n->ival |= 1; // UNTIL (vs WHILE)
|
||
}
|
||
|
||
skipEol();
|
||
n->b = parseBlock(TOK_LOOP, TOK_EOF, TOK_EOF);
|
||
tokExpect(TOK_LOOP);
|
||
|
||
// Optional bottom condition
|
||
if (tokIs(TOK_WHILE)) {
|
||
nextToken();
|
||
n->a = parseExpr();
|
||
n->ival = 2; // condition at bottom
|
||
} else if (tokIs(TOK_UNTIL)) {
|
||
nextToken();
|
||
n->a = parseExpr();
|
||
n->ival = 3; // until + at bottom
|
||
}
|
||
|
||
// If no condition at all, infinite loop (DO...LOOP)
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse SUB declaration:
|
||
// SUB name([BYVAL|BYREF] param AS type, ...) \n block END SUB
|
||
static Node *parseSub(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_SUB);
|
||
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected subroutine name after SUB");
|
||
char name[MAX_TOKEN_LEN];
|
||
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
|
||
nextToken();
|
||
|
||
// Parse parameter list
|
||
Node *params = NULL, *ptail = NULL;
|
||
int pcount = 0;
|
||
Symbol *sym = symAdd(name);
|
||
sym->isFunc = 2; // SUB
|
||
sym->returnType = TYPE_VOID;
|
||
|
||
if (tokAccept(TOK_LPAREN)) {
|
||
while (!tokIs(TOK_RPAREN) && !tokIs(TOK_EOF)) {
|
||
PassMode pm = PASS_BYVAL;
|
||
if (tokAccept(TOK_BYREF)) pm = PASS_BYREF;
|
||
else tokAccept(TOK_BYVAL); // optional BYVAL
|
||
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(gTok.line, "Expected parameter name");
|
||
Node *p = newNode(NODE_PARAM, gTok.line);
|
||
p->sval = strDup(gTok.sval);
|
||
p->ival = pm;
|
||
nextToken();
|
||
|
||
tokExpect(TOK_AS);
|
||
p->dataType = parseType();
|
||
|
||
// Record param in the function's symbol entry
|
||
if (pcount >= MAX_PARAMS)
|
||
fatal(gTok.line, "Too many parameters (max %d)", MAX_PARAMS);
|
||
strncpy(sym->paramNames[pcount], p->sval, MAX_IDENT - 1);
|
||
sym->paramTypes[pcount] = p->dataType;
|
||
sym->paramModes[pcount] = pm;
|
||
pcount++;
|
||
|
||
// Also register the parameter as a variable for type inference
|
||
// inside the function body
|
||
Symbol *psym = symAdd(p->sval);
|
||
psym->dataType = p->dataType;
|
||
|
||
if (!params) params = ptail = p;
|
||
else { ptail->next = p; ptail = p; }
|
||
|
||
if (!tokAccept(TOK_COMMA)) break;
|
||
}
|
||
tokExpect(TOK_RPAREN);
|
||
}
|
||
sym->paramCount = pcount;
|
||
|
||
Node *n = newNode(NODE_SUB, ln);
|
||
n->sval = strDup(name);
|
||
n->a = params;
|
||
|
||
skipEol();
|
||
n->b = parseBlock(TOK_END, TOK_EOF, TOK_EOF);
|
||
tokExpect(TOK_END);
|
||
tokExpect(TOK_SUB);
|
||
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse FUNCTION declaration:
|
||
// FUNCTION name([params]) AS type \n block END FUNCTION
|
||
static Node *parseFunction(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_FUNCTION);
|
||
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected function name after FUNCTION");
|
||
char name[MAX_TOKEN_LEN];
|
||
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
|
||
nextToken();
|
||
|
||
// Parse parameter list
|
||
Node *params = NULL, *ptail = NULL;
|
||
int pcount = 0;
|
||
Symbol *sym = symAdd(name);
|
||
sym->isFunc = 1; // FUNCTION
|
||
|
||
if (tokAccept(TOK_LPAREN)) {
|
||
while (!tokIs(TOK_RPAREN) && !tokIs(TOK_EOF)) {
|
||
PassMode pm = PASS_BYVAL;
|
||
if (tokAccept(TOK_BYREF)) pm = PASS_BYREF;
|
||
else tokAccept(TOK_BYVAL);
|
||
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(gTok.line, "Expected parameter name");
|
||
Node *p = newNode(NODE_PARAM, gTok.line);
|
||
p->sval = strDup(gTok.sval);
|
||
p->ival = pm;
|
||
nextToken();
|
||
|
||
tokExpect(TOK_AS);
|
||
p->dataType = parseType();
|
||
|
||
if (pcount >= MAX_PARAMS)
|
||
fatal(gTok.line, "Too many parameters (max %d)", MAX_PARAMS);
|
||
strncpy(sym->paramNames[pcount], p->sval, MAX_IDENT - 1);
|
||
sym->paramTypes[pcount] = p->dataType;
|
||
sym->paramModes[pcount] = pm;
|
||
pcount++;
|
||
|
||
// Register parameter as variable for type inference
|
||
Symbol *psym = symAdd(p->sval);
|
||
psym->dataType = p->dataType;
|
||
|
||
if (!params) params = ptail = p;
|
||
else { ptail->next = p; ptail = p; }
|
||
|
||
if (!tokAccept(TOK_COMMA)) break;
|
||
}
|
||
tokExpect(TOK_RPAREN);
|
||
}
|
||
sym->paramCount = pcount;
|
||
|
||
// Return type
|
||
tokExpect(TOK_AS);
|
||
DataType ret = parseType();
|
||
sym->returnType = ret;
|
||
sym->dataType = ret;
|
||
|
||
Node *n = newNode(NODE_FUNC, ln);
|
||
n->sval = strDup(name);
|
||
n->dataType = ret;
|
||
n->a = params;
|
||
|
||
skipEol();
|
||
n->b = parseBlock(TOK_END, TOK_EOF, TOK_EOF);
|
||
tokExpect(TOK_END);
|
||
tokExpect(TOK_FUNCTION);
|
||
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse LOCAL declaration: LOCAL name AS type
|
||
static Node *parseLocal(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_LOCAL);
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected variable name after LOCAL");
|
||
Node *n = newNode(NODE_LOCAL, ln);
|
||
n->sval = strDup(gTok.sval);
|
||
nextToken();
|
||
tokExpect(TOK_AS);
|
||
n->dataType = parseType();
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse STATIC declaration: STATIC name AS type
|
||
static Node *parseStatic(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_STATIC);
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected variable name after STATIC");
|
||
Node *n = newNode(NODE_STATIC, ln);
|
||
n->sval = strDup(gTok.sval);
|
||
nextToken();
|
||
tokExpect(TOK_AS);
|
||
n->dataType = parseType();
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse DATA statement: DATA literal, literal, ...
|
||
// Items can be integers, doubles, strings, or negative numbers.
|
||
// Note: the "DATA" keyword is already consumed by parseStatement
|
||
static Node *parseData(void) {
|
||
int ln = gTok.line;
|
||
Node *n = newNode(NODE_DATA, ln);
|
||
Node *head = NULL, *tail = NULL;
|
||
|
||
do {
|
||
Node *item = NULL;
|
||
// Handle negative numeric literals
|
||
int neg = 0;
|
||
if (tokIs(TOK_MINUS)) {
|
||
neg = 1;
|
||
nextToken();
|
||
}
|
||
if (tokIs(TOK_INT_LIT)) {
|
||
item = newNode(NODE_INT_LIT, ln);
|
||
item->ival = neg ? -gTok.ival : gTok.ival;
|
||
item->dataType = TYPE_INT;
|
||
nextToken();
|
||
} else if (tokIs(TOK_DBL_LIT)) {
|
||
item = newNode(NODE_DBL_LIT, ln);
|
||
item->dval = neg ? -gTok.dval : gTok.dval;
|
||
item->dataType = TYPE_DBL;
|
||
nextToken();
|
||
} else if (tokIs(TOK_STR_LIT)) {
|
||
item = newNode(NODE_STR_LIT, ln);
|
||
item->sval = strDup(gTok.sval);
|
||
item->dataType = TYPE_STR;
|
||
nextToken();
|
||
} else {
|
||
fatal(ln, "Expected literal value in DATA statement");
|
||
}
|
||
if (!head) head = tail = item;
|
||
else { tail->next = item; tail = item; }
|
||
} while (tokAccept(TOK_COMMA));
|
||
|
||
n->a = head;
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse READ statement: READ var1, var2, ...
|
||
// Note: the "READ" keyword is already consumed by parseStatement
|
||
static Node *parseRead(void) {
|
||
int ln = gTok.line;
|
||
Node *n = newNode(NODE_READ, ln);
|
||
Node *head = NULL, *tail = NULL;
|
||
|
||
do {
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected variable name in READ");
|
||
Node *v = newNode(NODE_IDENT, ln);
|
||
v->sval = strDup(gTok.sval);
|
||
v->dataType = inferVarType(gTok.sval);
|
||
nextToken();
|
||
if (!head) head = tail = v;
|
||
else { tail->next = v; tail = v; }
|
||
} while (tokAccept(TOK_COMMA));
|
||
|
||
n->a = head;
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse RESTORE statement: RESTORE [line_number]
|
||
// Note: the "RESTORE" keyword is already consumed by parseStatement
|
||
static Node *parseRestore(void) {
|
||
int ln = gTok.line;
|
||
Node *n = newNode(NODE_RESTORE, ln);
|
||
if (tokIs(TOK_INT_LIT)) {
|
||
n->ival = gTok.ival;
|
||
nextToken();
|
||
} else if (tokIs(TOK_IDENT)) {
|
||
n->sval = strDup(gTok.sval);
|
||
nextToken();
|
||
}
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse OPEN statement: OPEN "filename" FOR INPUT|OUTPUT|APPEND|BINARY|RANDOM AS #n [LEN = expr]
|
||
static Node *parseOpen(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_OPEN);
|
||
Node *n = newNode(NODE_OPEN, ln);
|
||
n->a = parseExpr(); // filename expression
|
||
tokExpect(TOK_FOR);
|
||
if (tokIs(TOK_INPUT)) { n->ival = 0; nextToken(); }
|
||
else if (tokIs(TOK_OUTPUT)) { n->ival = 1; nextToken(); }
|
||
else if (tokIs(TOK_APPEND)) { n->ival = 2; nextToken(); }
|
||
else if (tokIs(TOK_BINARY)) { n->ival = 3; nextToken(); }
|
||
else if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "RANDOM") == 0)
|
||
{ n->ival = 4; nextToken(); }
|
||
else fatal(ln, "Expected INPUT, OUTPUT, APPEND, BINARY, or RANDOM after FOR");
|
||
tokExpect(TOK_AS);
|
||
n->b = parseFileNumber(); // file number expression
|
||
// Optional LEN = expr for RANDOM access
|
||
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "LEN") == 0) {
|
||
nextToken();
|
||
tokExpect(TOK_EQ);
|
||
n->c = parseExpr();
|
||
}
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse CLOSE statement: CLOSE #n
|
||
static Node *parseClose(void) {
|
||
int ln = gTok.line;
|
||
tokExpect(TOK_CLOSE);
|
||
Node *n = newNode(NODE_CLOSE, ln);
|
||
n->b = parseFileNumber();
|
||
return n;
|
||
}
|
||
|
||
|
||
// Parse a single statement
|
||
static Node *parseStatement(void) {
|
||
int ln = gTok.line;
|
||
|
||
// Line-number label: a bare integer at the start of a statement
|
||
if (tokIs(TOK_INT_LIT)) {
|
||
int lnum = gTok.ival;
|
||
nextToken();
|
||
// If followed by a statement, this is a labeled line
|
||
Node *lbl = newNode(NODE_LABEL, ln);
|
||
lbl->ival = lnum;
|
||
recordLineLabel(lnum);
|
||
|
||
// If there's a statement on this line, chain it
|
||
if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) {
|
||
Node *stmt = parseStatement();
|
||
lbl->next = stmt;
|
||
}
|
||
return lbl;
|
||
}
|
||
|
||
// Named label: identifier followed by colon (e.g., myLabel:)
|
||
// Must peek ahead to distinguish from statement separator colons.
|
||
// Only treat as label if the NEXT token is a colon.
|
||
if (tokIs(TOK_IDENT)) {
|
||
int savePos = gSrcPos;
|
||
int saveLine = gLine;
|
||
Token saveTok = gTok;
|
||
char labelName[MAX_IDENT];
|
||
strncpy(labelName, gTok.sval, MAX_IDENT - 1);
|
||
labelName[MAX_IDENT - 1] = '\0';
|
||
nextToken();
|
||
if (tokIs(TOK_COLON)) {
|
||
if (isKeyword(labelName))
|
||
fatal(ln, "Cannot use keyword '%s' as label", labelName);
|
||
nextToken();
|
||
Node *lbl = newNode(NODE_LABEL, ln);
|
||
lbl->ival = 0; // 0 = named label, not numeric
|
||
lbl->sval = strDup(labelName);
|
||
// If there's a statement on this line, chain it
|
||
if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) {
|
||
Node *stmt = parseStatement();
|
||
lbl->next = stmt;
|
||
}
|
||
return lbl;
|
||
}
|
||
// Not a label — restore state
|
||
gSrcPos = savePos;
|
||
gLine = saveLine;
|
||
gTok = saveTok;
|
||
}
|
||
|
||
// TYPE ... END TYPE
|
||
if (tokIs(TOK_TYPE)) return parseTypeDef();
|
||
|
||
// DIM
|
||
if (tokIs(TOK_DIM)) return parseDim();
|
||
|
||
// REDIM
|
||
if (tokIs(TOK_REDIM)) return parseRedim();
|
||
|
||
// PRINT
|
||
if (tokIs(TOK_PRINT)) return parsePrint();
|
||
|
||
// INPUT / INPUT #
|
||
if (tokIs(TOK_INPUT)) return parseInput();
|
||
|
||
// OPEN
|
||
if (tokIs(TOK_OPEN)) return parseOpen();
|
||
|
||
// CLOSE
|
||
if (tokIs(TOK_CLOSE)) return parseClose();
|
||
|
||
// DATA, READ, RESTORE are contextual keywords — checked as identifiers
|
||
// to avoid colliding with user variable names like "data(i)".
|
||
// We peek ahead: DATA is a keyword only when NOT followed by '(' or '='.
|
||
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "DATA") == 0) {
|
||
// Save state and peek at next token
|
||
int savePos = gSrcPos;
|
||
int saveLine = gLine;
|
||
Token saveTok = gTok;
|
||
nextToken();
|
||
if (!tokIs(TOK_LPAREN) && !tokIs(TOK_EQ)) {
|
||
// It's a DATA statement
|
||
return parseData();
|
||
}
|
||
// Restore — it's a variable named "data"
|
||
gSrcPos = savePos;
|
||
gLine = saveLine;
|
||
gTok = saveTok;
|
||
}
|
||
|
||
// READ (contextual keyword — same peek-ahead logic)
|
||
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "READ") == 0) {
|
||
int savePos = gSrcPos;
|
||
int saveLine = gLine;
|
||
Token saveTok = gTok;
|
||
nextToken();
|
||
if (!tokIs(TOK_LPAREN) && !tokIs(TOK_EQ)) {
|
||
return parseRead();
|
||
}
|
||
gSrcPos = savePos;
|
||
gLine = saveLine;
|
||
gTok = saveTok;
|
||
}
|
||
|
||
// RESTORE (contextual keyword)
|
||
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "RESTORE") == 0) {
|
||
nextToken();
|
||
return parseRestore();
|
||
}
|
||
|
||
// GET #filenum, record, variable (contextual keyword)
|
||
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "GET") == 0) {
|
||
int savePos = gSrcPos;
|
||
int saveLine = gLine;
|
||
Token saveTok = gTok;
|
||
nextToken();
|
||
if (tokIs(TOK_HASH)) {
|
||
Node *n = newNode(NODE_GET, ln);
|
||
n->a = parseFileNumber();
|
||
tokExpect(TOK_COMMA);
|
||
n->b = parseExpr();
|
||
tokExpect(TOK_COMMA);
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected variable name in GET");
|
||
Node *v = newNode(NODE_IDENT, ln);
|
||
v->sval = strDup(gTok.sval);
|
||
v->dataType = inferVarType(gTok.sval);
|
||
nextToken();
|
||
n->c = v;
|
||
return n;
|
||
}
|
||
gSrcPos = savePos;
|
||
gLine = saveLine;
|
||
gTok = saveTok;
|
||
}
|
||
|
||
// PUT #filenum, record, variable (contextual keyword)
|
||
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "PUT") == 0) {
|
||
int savePos = gSrcPos;
|
||
int saveLine = gLine;
|
||
Token saveTok = gTok;
|
||
nextToken();
|
||
if (tokIs(TOK_HASH)) {
|
||
Node *n = newNode(NODE_PUT, ln);
|
||
n->a = parseFileNumber();
|
||
tokExpect(TOK_COMMA);
|
||
n->b = parseExpr();
|
||
tokExpect(TOK_COMMA);
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected variable name in PUT");
|
||
Node *v = newNode(NODE_IDENT, ln);
|
||
v->sval = strDup(gTok.sval);
|
||
v->dataType = inferVarType(gTok.sval);
|
||
nextToken();
|
||
n->c = v;
|
||
return n;
|
||
}
|
||
gSrcPos = savePos;
|
||
gLine = saveLine;
|
||
gTok = saveTok;
|
||
}
|
||
|
||
// RANDOMIZE [seed] (contextual keyword)
|
||
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "RANDOMIZE") == 0) {
|
||
nextToken();
|
||
Node *n = newNode(NODE_RANDOMIZE, ln);
|
||
// Optional seed expression
|
||
if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF) &&
|
||
!tokIs(TOK_ELSE)) {
|
||
n->a = parseExpr();
|
||
}
|
||
return n;
|
||
}
|
||
|
||
// MID$ assignment: MID$(s$, start, len) = replacement$
|
||
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "MID$") == 0) {
|
||
int savePos = gSrcPos;
|
||
int saveLine = gLine;
|
||
Token saveTok = gTok;
|
||
nextToken();
|
||
if (tokIs(TOK_LPAREN)) {
|
||
nextToken();
|
||
Node *n = newNode(NODE_MID_ASSIGN, ln);
|
||
// Parse target string variable
|
||
if (!tokIs(TOK_IDENT)) fatal(ln, "Expected string variable in MID$ assignment");
|
||
n->a = newNode(NODE_IDENT, ln);
|
||
n->a->sval = strDup(gTok.sval);
|
||
n->a->dataType = TYPE_STR;
|
||
nextToken();
|
||
tokExpect(TOK_COMMA);
|
||
n->b = parseExpr(); // start position
|
||
if (tokAccept(TOK_COMMA)) {
|
||
n->c = parseExpr(); // length
|
||
} else {
|
||
// No length — use large value
|
||
Node *big = newNode(NODE_INT_LIT, ln);
|
||
big->ival = 32767;
|
||
n->c = big;
|
||
}
|
||
tokExpect(TOK_RPAREN);
|
||
tokExpect(TOK_EQ);
|
||
n->d = parseExpr(); // replacement string
|
||
return n;
|
||
}
|
||
// Not MID$ assignment — restore
|
||
gSrcPos = savePos;
|
||
gLine = saveLine;
|
||
gTok = saveTok;
|
||
}
|
||
|
||
// LINE INPUT #
|
||
if (tokIs(TOK_LINE)) {
|
||
nextToken();
|
||
tokExpect(TOK_INPUT);
|
||
int lln = ln;
|
||
Node *n = newNode(NODE_LINE_INPUT, lln);
|
||
n->b = parseFileNumber();
|
||
tokExpect(TOK_COMMA);
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(lln, "Expected variable name in LINE INPUT");
|
||
Node *v = newNode(NODE_IDENT, lln);
|
||
v->sval = strDup(gTok.sval);
|
||
v->dataType = TYPE_STR;
|
||
nextToken();
|
||
n->a = v;
|
||
return n;
|
||
}
|
||
|
||
// WRITE #
|
||
if (tokIs(TOK_WRITE)) {
|
||
nextToken();
|
||
Node *n = newNode(NODE_FILE_WRITE, ln);
|
||
n->b = parseFileNumber();
|
||
tokExpect(TOK_COMMA);
|
||
Node *head = NULL, *tail = NULL;
|
||
do {
|
||
Node *e = parseExpr();
|
||
if (!head) head = tail = e;
|
||
else { tail->next = e; tail = e; }
|
||
} while (tokAccept(TOK_COMMA));
|
||
n->a = head;
|
||
return n;
|
||
}
|
||
|
||
// IF
|
||
if (tokIs(TOK_IF)) return parseIf();
|
||
|
||
// FOR
|
||
if (tokIs(TOK_FOR)) return parseFor();
|
||
|
||
// WHILE
|
||
if (tokIs(TOK_WHILE)) return parseWhile();
|
||
|
||
// DO
|
||
if (tokIs(TOK_DO)) return parseDoLoop();
|
||
|
||
// SUB
|
||
if (tokIs(TOK_SUB)) return parseSub();
|
||
|
||
// FUNCTION
|
||
if (tokIs(TOK_FUNCTION)) return parseFunction();
|
||
|
||
// LOCAL
|
||
if (tokIs(TOK_LOCAL)) return parseLocal();
|
||
|
||
// STATIC
|
||
if (tokIs(TOK_STATIC)) return parseStatic();
|
||
|
||
// GOTO
|
||
if (tokIs(TOK_GOTO)) {
|
||
nextToken();
|
||
Node *n = newNode(NODE_GOTO, ln);
|
||
if (tokIs(TOK_INT_LIT)) {
|
||
n->ival = gTok.ival;
|
||
recordGotoTarget(n->ival);
|
||
nextToken();
|
||
} else if (tokIs(TOK_IDENT)) {
|
||
n->sval = strDup(gTok.sval);
|
||
recordGotoStrTarget(n->sval);
|
||
nextToken();
|
||
} else {
|
||
fatal(ln, "Expected line number or label after GOTO");
|
||
}
|
||
return n;
|
||
}
|
||
|
||
// GOSUB
|
||
if (tokIs(TOK_GOSUB)) {
|
||
nextToken();
|
||
Node *n = newNode(NODE_GOSUB, ln);
|
||
if (tokIs(TOK_INT_LIT)) {
|
||
n->ival = gTok.ival;
|
||
recordGotoTarget(n->ival);
|
||
nextToken();
|
||
} else if (tokIs(TOK_IDENT)) {
|
||
n->sval = strDup(gTok.sval);
|
||
recordGotoStrTarget(n->sval);
|
||
nextToken();
|
||
} else {
|
||
fatal(ln, "Expected line number or label after GOSUB");
|
||
}
|
||
if (gGosubCount >= MAX_GOSUB_SITES)
|
||
fatal(ln, "Too many GOSUB sites (max %d)", MAX_GOSUB_SITES);
|
||
n->ival2 = gGosubCount++;
|
||
return n;
|
||
}
|
||
|
||
// RETURN
|
||
if (tokIs(TOK_RETURN)) {
|
||
nextToken();
|
||
Node *n = newNode(NODE_RETURN, ln);
|
||
// Optional return expression for FUNCTION
|
||
if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) {
|
||
n->a = parseExpr();
|
||
}
|
||
return n;
|
||
}
|
||
|
||
// EXIT (FOR | WHILE | DO | SUB | FUNCTION)
|
||
if (tokIs(TOK_EXIT)) {
|
||
nextToken();
|
||
Node *n = newNode(NODE_EXIT, ln);
|
||
if (tokIs(TOK_FOR)) { n->ival = TOK_FOR; nextToken(); }
|
||
else if (tokIs(TOK_WHILE)){ n->ival = TOK_WHILE; nextToken(); }
|
||
else if (tokIs(TOK_DO)) { n->ival = TOK_DO; nextToken(); }
|
||
else if (tokIs(TOK_SUB)) { n->ival = TOK_SUB; nextToken(); }
|
||
else if (tokIs(TOK_FUNCTION)){n->ival = TOK_FUNCTION;nextToken(); }
|
||
else fatal(ln, "Expected FOR, WHILE, DO, SUB, or FUNCTION after EXIT");
|
||
return n;
|
||
}
|
||
|
||
// CONTINUE (FOR | WHILE | DO) — contextual keyword
|
||
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "CONTINUE") == 0) {
|
||
nextToken();
|
||
Node *n = newNode(NODE_CONTINUE, ln);
|
||
if (tokIs(TOK_FOR)) { n->ival = TOK_FOR; nextToken(); }
|
||
else if (tokIs(TOK_WHILE)){ n->ival = TOK_WHILE; nextToken(); }
|
||
else if (tokIs(TOK_DO)) { n->ival = TOK_DO; nextToken(); }
|
||
else fatal(ln, "Expected FOR, WHILE, or DO after CONTINUE");
|
||
return n;
|
||
}
|
||
|
||
// END (program termination)
|
||
if (tokIs(TOK_END)) {
|
||
// Peek ahead: END IF / END SUB / END FUNCTION are handled by callers.
|
||
// Bare END means program exit.
|
||
// Save position to check next token
|
||
int savePos = gSrcPos;
|
||
int saveLine = gLine;
|
||
Token saveTok = gTok;
|
||
nextToken();
|
||
|
||
// If followed by IF, SUB, FUNCTION, SELECT – put it back (the caller handles it)
|
||
if (tokIs(TOK_IF) || tokIs(TOK_SUB) || tokIs(TOK_FUNCTION) ||
|
||
tokIs(TOK_SELECT)) {
|
||
gSrcPos = savePos;
|
||
gLine = saveLine;
|
||
gTok = saveTok;
|
||
return NULL; // signal to caller: block terminator reached
|
||
}
|
||
|
||
// Bare END
|
||
Node *n = newNode(NODE_END, ln);
|
||
return n;
|
||
}
|
||
|
||
// CONST name = value
|
||
if (tokIs(TOK_CONST)) {
|
||
nextToken();
|
||
if (!tokIs(TOK_IDENT)) fatal(ln, "Expected identifier after CONST");
|
||
char cname[MAX_IDENT];
|
||
strncpy(cname, gTok.sval, MAX_IDENT - 1);
|
||
cname[MAX_IDENT - 1] = '\0';
|
||
nextToken();
|
||
tokExpect(TOK_EQ);
|
||
// Parse the value — must be a literal
|
||
Node *val = parseExpr();
|
||
Node *n = newNode(NODE_CONST_DECL, ln);
|
||
n->sval = strDup(cname);
|
||
n->a = val;
|
||
// Store in constant table
|
||
if (gConstCount < MAX_CONSTS) {
|
||
strncpy(gConsts[gConstCount].name, cname, MAX_IDENT - 1);
|
||
if (val->type == NODE_STR_LIT) {
|
||
gConsts[gConstCount].dataType = TYPE_STR;
|
||
strncpy(gConsts[gConstCount].strVal, val->sval, MAX_TOKEN_LEN - 1);
|
||
} else if (val->type == NODE_DBL_LIT) {
|
||
gConsts[gConstCount].dataType = TYPE_DBL;
|
||
gConsts[gConstCount].numVal = val->dval;
|
||
} else if (val->type == NODE_INT_LIT) {
|
||
gConsts[gConstCount].dataType = TYPE_INT;
|
||
gConsts[gConstCount].numVal = val->ival;
|
||
} else if (val->type == NODE_UNOP && val->ival == TOK_MINUS) {
|
||
// Handle negative constants like CONST X = -1
|
||
gConsts[gConstCount].dataType = TYPE_DBL;
|
||
if (val->a->type == NODE_INT_LIT)
|
||
gConsts[gConstCount].numVal = -(double)val->a->ival;
|
||
else
|
||
gConsts[gConstCount].numVal = -val->a->dval;
|
||
} else {
|
||
fatal(ln, "CONST value must be a literal");
|
||
}
|
||
gConstCount++;
|
||
}
|
||
return n;
|
||
}
|
||
|
||
// SWAP var1, var2 (contextual keyword)
|
||
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "SWAP") == 0) {
|
||
nextToken();
|
||
Node *n = newNode(NODE_SWAP, ln);
|
||
n->a = parseExpr();
|
||
tokExpect(TOK_COMMA);
|
||
n->b = parseExpr();
|
||
return n;
|
||
}
|
||
|
||
// SELECT CASE expr ... CASE ... END SELECT
|
||
if (tokIs(TOK_SELECT)) {
|
||
nextToken();
|
||
tokExpect(TOK_CASE);
|
||
Node *n = newNode(NODE_SELECT, ln);
|
||
n->a = parseExpr();
|
||
skipNewlines();
|
||
// Parse CASE blocks
|
||
Node *caseHead = NULL, *caseTail = NULL;
|
||
while (tokIs(TOK_CASE)) {
|
||
nextToken();
|
||
Node *cb = newNode(NODE_CASE, gLine);
|
||
// CASE ELSE
|
||
if (tokIs(TOK_ELSE)) {
|
||
nextToken();
|
||
cb->ival = 1; // flag: CASE ELSE
|
||
} else {
|
||
// Parse comma-separated values/ranges
|
||
Node *valHead = NULL, *valTail = NULL;
|
||
for (;;) {
|
||
Node *v;
|
||
// CASE IS >/</>=/<=/<>/= expr
|
||
if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "IS") == 0) {
|
||
nextToken();
|
||
// Expect a comparison operator
|
||
int op = gTok.type;
|
||
if (op != TOK_EQ && op != TOK_NE && op != TOK_LT &&
|
||
op != TOK_GT && op != TOK_LE && op != TOK_GE)
|
||
fatal(gLine, "Expected comparison operator after IS");
|
||
nextToken();
|
||
v = newNode(NODE_BINOP, gLine);
|
||
v->ival = op;
|
||
v->a = NULL; // placeholder: test expr filled at codegen
|
||
v->b = parseExpr();
|
||
v->ival2 = 1; // flag: IS comparison
|
||
} else {
|
||
v = parseExpr();
|
||
// Check for TO (range)
|
||
if (tokIs(TOK_TO)) {
|
||
nextToken();
|
||
Node *range = newNode(NODE_BINOP, gLine);
|
||
range->ival = TOK_TO; // reuse TO token as range marker
|
||
range->a = v;
|
||
range->b = parseExpr();
|
||
range->ival2 = 2; // flag: range
|
||
v = range;
|
||
}
|
||
}
|
||
if (!valHead) { valHead = valTail = v; }
|
||
else { valTail->next = v; valTail = v; }
|
||
if (!tokAccept(TOK_COMMA)) break;
|
||
}
|
||
cb->a = valHead;
|
||
}
|
||
skipNewlines();
|
||
// Parse body until next CASE or END SELECT
|
||
Node *bodyHead = NULL, *bodyTail = NULL;
|
||
while (!tokIs(TOK_CASE) && !tokIs(TOK_END) && !tokIs(TOK_EOF)) {
|
||
Node *s = parseStatement();
|
||
if (!s) break;
|
||
if (!bodyHead) { bodyHead = bodyTail = s; }
|
||
else { bodyTail->next = s; bodyTail = s; }
|
||
skipNewlines();
|
||
}
|
||
cb->b = bodyHead;
|
||
if (!caseHead) { caseHead = caseTail = cb; }
|
||
else { caseTail->next = cb; caseTail = cb; }
|
||
}
|
||
// Expect END SELECT
|
||
tokExpect(TOK_END);
|
||
tokExpect(TOK_SELECT);
|
||
n->b = caseHead;
|
||
return n;
|
||
}
|
||
|
||
// ON expr GOTO/GOSUB label1, label2, ...
|
||
if (tokIs(TOK_ON)) {
|
||
nextToken();
|
||
Node *expr = parseExpr();
|
||
if (tokIs(TOK_GOTO)) {
|
||
nextToken();
|
||
Node *n = newNode(NODE_ON_GOTO, ln);
|
||
n->a = expr;
|
||
// Parse comma-separated labels
|
||
Node *labHead = NULL, *labTail = NULL;
|
||
for (;;) {
|
||
Node *lab;
|
||
if (tokIs(TOK_INT_LIT)) {
|
||
lab = newNode(NODE_INT_LIT, gLine);
|
||
lab->ival = gTok.ival;
|
||
recordGotoTarget(lab->ival);
|
||
nextToken();
|
||
} else if (tokIs(TOK_IDENT)) {
|
||
lab = newNode(NODE_IDENT, gLine);
|
||
lab->sval = strDup(gTok.sval);
|
||
recordGotoStrTarget(lab->sval);
|
||
nextToken();
|
||
} else {
|
||
fatal(gLine, "Expected label in ON GOTO");
|
||
}
|
||
if (!labHead) { labHead = labTail = lab; }
|
||
else { labTail->next = lab; labTail = lab; }
|
||
if (!tokAccept(TOK_COMMA)) break;
|
||
}
|
||
n->b = labHead;
|
||
return n;
|
||
} else if (tokIs(TOK_GOSUB)) {
|
||
nextToken();
|
||
Node *n = newNode(NODE_ON_GOSUB, ln);
|
||
n->a = expr;
|
||
n->ival2 = gGosubCount; // first return-point id
|
||
// Parse comma-separated labels
|
||
Node *labHead = NULL, *labTail = NULL;
|
||
for (;;) {
|
||
Node *lab;
|
||
if (tokIs(TOK_INT_LIT)) {
|
||
lab = newNode(NODE_INT_LIT, gLine);
|
||
lab->ival = gTok.ival;
|
||
recordGotoTarget(lab->ival);
|
||
nextToken();
|
||
} else if (tokIs(TOK_IDENT)) {
|
||
lab = newNode(NODE_IDENT, gLine);
|
||
lab->sval = strDup(gTok.sval);
|
||
recordGotoStrTarget(lab->sval);
|
||
nextToken();
|
||
} else {
|
||
fatal(gLine, "Expected label in ON GOSUB");
|
||
}
|
||
gGosubCount++; // allocate return-point id for each target
|
||
if (!labHead) { labHead = labTail = lab; }
|
||
else { labTail->next = lab; labTail = lab; }
|
||
if (!tokAccept(TOK_COMMA)) break;
|
||
}
|
||
n->b = labHead;
|
||
return n;
|
||
} else {
|
||
fatal(ln, "Expected GOTO or GOSUB after ON expression");
|
||
}
|
||
}
|
||
|
||
// CALL name(args)
|
||
if (tokIs(TOK_CALL)) {
|
||
nextToken();
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected subroutine name after CALL");
|
||
Node *n = newNode(NODE_CALL, ln);
|
||
n->sval = strDup(gTok.sval);
|
||
nextToken();
|
||
|
||
// Parse argument list
|
||
Node *args = NULL, *atail = NULL;
|
||
if (tokAccept(TOK_LPAREN)) {
|
||
while (!tokIs(TOK_RPAREN) && !tokIs(TOK_EOF)) {
|
||
Node *arg = parseExpr();
|
||
if (!args) args = atail = arg;
|
||
else { atail->next = arg; atail = arg; }
|
||
if (!tokAccept(TOK_COMMA)) break;
|
||
}
|
||
tokExpect(TOK_RPAREN);
|
||
}
|
||
n->a = args;
|
||
return n;
|
||
}
|
||
|
||
// LET assignment or implicit assignment/call
|
||
if (tokIs(TOK_LET)) nextToken(); // consume optional LET
|
||
|
||
if (tokIs(TOK_IDENT)) {
|
||
char name[MAX_TOKEN_LEN];
|
||
strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1);
|
||
nextToken();
|
||
|
||
// Array element assignment or sub call: name(args) [= expr]
|
||
if (tokIs(TOK_LPAREN)) {
|
||
nextToken();
|
||
Node *idx = parseExpr();
|
||
Node *tail = idx;
|
||
while (tokAccept(TOK_COMMA)) {
|
||
Node *dimIdx = parseExpr();
|
||
tail->next = dimIdx;
|
||
tail = dimIdx;
|
||
}
|
||
tokExpect(TOK_RPAREN);
|
||
|
||
// Check for array-element dot-access assignment: arr(i).field[.field...] = expr
|
||
if (tokIs(TOK_DOT)) {
|
||
Symbol *s = symLookup(name);
|
||
if (s && s->dataType == TYPE_UDT) {
|
||
Node *cur = newNode(NODE_ARRAY_REF, ln);
|
||
cur->sval = strDup(name);
|
||
cur->a = idx;
|
||
cur->dataType = TYPE_UDT;
|
||
cur->ival2 = s->udtIndex;
|
||
int curUdt = s->udtIndex;
|
||
while (curUdt >= 0 && tokIs(TOK_DOT)) {
|
||
nextToken();
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected field name after '.'");
|
||
int fi = udtFieldLookup(curUdt, gTok.sval);
|
||
if (fi < 0)
|
||
fatal(ln, "Unknown field '%s'", gTok.sval);
|
||
Node *dot = newNode(NODE_DOT_ACCESS, ln);
|
||
dot->a = cur;
|
||
dot->sval = strDup(gTok.sval);
|
||
dot->ival2 = curUdt;
|
||
UdtField *uf = &gUdts[curUdt].fields[fi];
|
||
dot->dataType = uf->dataType;
|
||
if (uf->dataType == TYPE_STR && uf->strLen > 0)
|
||
dot->ival = uf->strLen;
|
||
cur = dot;
|
||
curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1;
|
||
nextToken();
|
||
}
|
||
tokExpect(TOK_EQ);
|
||
Node *val = parseExpr();
|
||
Node *n = newNode(NODE_ASSIGN, ln);
|
||
n->a = cur;
|
||
n->b = val;
|
||
return n;
|
||
}
|
||
}
|
||
|
||
if (tokAccept(TOK_EQ)) {
|
||
// Array element assignment: name(i, j, ...) = expr
|
||
Node *val = parseExpr();
|
||
Node *target = newNode(NODE_ARRAY_REF, ln);
|
||
target->sval = strDup(name);
|
||
target->a = idx;
|
||
target->dataType = inferVarType(name);
|
||
|
||
Node *n = newNode(NODE_ASSIGN, ln);
|
||
n->a = target;
|
||
n->b = val;
|
||
return n;
|
||
}
|
||
|
||
// Not an assignment – must be a sub call: name(args)
|
||
Node *n = newNode(NODE_CALL, ln);
|
||
n->sval = strDup(name);
|
||
n->a = idx;
|
||
return n;
|
||
}
|
||
|
||
// Dot-access assignment: var.field[.field...] = expr
|
||
if (tokIs(TOK_DOT)) {
|
||
Symbol *s = symLookup(name);
|
||
if (s && s->dataType == TYPE_UDT) {
|
||
Node *cur = newNode(NODE_IDENT, ln);
|
||
cur->sval = strDup(name);
|
||
cur->dataType = TYPE_UDT;
|
||
int curUdt = s->udtIndex;
|
||
while (curUdt >= 0 && tokIs(TOK_DOT)) {
|
||
nextToken();
|
||
if (!tokIs(TOK_IDENT))
|
||
fatal(ln, "Expected field name after '.'");
|
||
int fi = udtFieldLookup(curUdt, gTok.sval);
|
||
if (fi < 0)
|
||
fatal(ln, "Unknown field '%s' in type '%s'",
|
||
gTok.sval, gUdts[curUdt].name);
|
||
Node *dot = newNode(NODE_DOT_ACCESS, ln);
|
||
dot->a = cur;
|
||
dot->sval = strDup(gTok.sval);
|
||
dot->ival2 = curUdt;
|
||
UdtField *uf = &gUdts[curUdt].fields[fi];
|
||
dot->dataType = uf->dataType;
|
||
if (uf->dataType == TYPE_STR && uf->strLen > 0)
|
||
dot->ival = uf->strLen;
|
||
cur = dot;
|
||
curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1;
|
||
nextToken();
|
||
}
|
||
tokExpect(TOK_EQ);
|
||
Node *val = parseExpr();
|
||
Node *n = newNode(NODE_ASSIGN, ln);
|
||
n->a = cur;
|
||
n->b = val;
|
||
return n;
|
||
}
|
||
}
|
||
|
||
// Simple variable assignment: name = expr
|
||
if (tokAccept(TOK_EQ)) {
|
||
Node *val = parseExpr();
|
||
Node *target = newNode(NODE_IDENT, ln);
|
||
target->sval = strDup(name);
|
||
target->dataType = inferVarType(name);
|
||
|
||
Node *n = newNode(NODE_ASSIGN, ln);
|
||
n->a = target;
|
||
n->b = val;
|
||
return n;
|
||
}
|
||
|
||
// Implicit sub call without CALL keyword: name arg1, arg2, ...
|
||
Node *n = newNode(NODE_CALL, ln);
|
||
n->sval = strDup(name);
|
||
Node *args = NULL, *atail = NULL;
|
||
if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) {
|
||
Node *arg = parseExpr();
|
||
args = atail = arg;
|
||
while (tokAccept(TOK_COMMA)) {
|
||
arg = parseExpr();
|
||
atail->next = arg;
|
||
atail = arg;
|
||
}
|
||
}
|
||
n->a = args;
|
||
return n;
|
||
}
|
||
|
||
fatal(ln, "Unexpected token '%s' (type %d)", gTok.sval, gTok.type);
|
||
return NULL;
|
||
}
|
||
|
||
|
||
// Parse a block of statements until one of the terminator tokens is seen.
|
||
// Returns a NODE_BLOCK containing the linked list of statements.
|
||
static Node *parseBlock(TokenType end1, TokenType end2, TokenType end3) {
|
||
Node *blk = newNode(NODE_BLOCK, gTok.line);
|
||
Node *head = NULL, *tail = NULL;
|
||
|
||
while (!tokIs(TOK_EOF)) {
|
||
skipEol();
|
||
if (tokIs(end1) || tokIs(end2) || tokIs(end3))
|
||
break;
|
||
if (tokIs(TOK_EOF)) break;
|
||
|
||
// Check for END followed by IF/SUB/FUNCTION as block terminator
|
||
if (tokIs(TOK_END)) {
|
||
int savePos = gSrcPos;
|
||
int saveLine = gLine;
|
||
Token saveTok = gTok;
|
||
nextToken();
|
||
if (tokIs(TOK_IF) || tokIs(TOK_SUB) || tokIs(TOK_FUNCTION)) {
|
||
// Restore – the caller will handle END IF/SUB/FUNCTION
|
||
gSrcPos = savePos;
|
||
gLine = saveLine;
|
||
gTok = saveTok;
|
||
break;
|
||
}
|
||
// Restore and let parseStatement handle bare END
|
||
gSrcPos = savePos;
|
||
gLine = saveLine;
|
||
gTok = saveTok;
|
||
}
|
||
|
||
Node *stmt = parseStatement();
|
||
if (!stmt) break; // NULL signals block terminator
|
||
|
||
// Flatten: if statement has a ->next chain (e.g., label + stmt),
|
||
// append the entire chain
|
||
if (!head) head = tail = stmt;
|
||
else { tail->next = stmt; }
|
||
while (tail->next) tail = tail->next;
|
||
}
|
||
|
||
blk->a = head;
|
||
return blk;
|
||
}
|
||
|
||
|
||
// Parse the entire program
|
||
static Node *parseProgram(void) {
|
||
nextToken(); // prime the first token
|
||
Node *blk = parseBlock(TOK_EOF, TOK_EOF, TOK_EOF);
|
||
Node *prog = newNode(NODE_PROGRAM, 1);
|
||
prog->a = blk;
|
||
return prog;
|
||
}
|
||
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Section 8: Code Generator
|
||
//
|
||
// Walks the AST and emits C source code. The output includes:
|
||
// - A runtime library for strings, file I/O, and dynamic arrays
|
||
// (debug or release variant based on --release flag)
|
||
// - Packed struct definitions for user-defined types
|
||
// - A static DATA pool for DATA/READ/RESTORE
|
||
// - Forward declarations for SUBs and FUNCTIONs
|
||
// - SUB/FUNCTION implementations as C functions
|
||
// - A main() function containing global code
|
||
// -----------------------------------------------------------------------
|
||
|
||
// Forward declarations
|
||
static void genExpr(Node *n);
|
||
static void genStmt(Node *n);
|
||
static void genArrayFlatIndex(const char *name, Node *indices);
|
||
static void genBlock(Node *blk);
|
||
|
||
// Return the C type string for a BASIC data type
|
||
static const char *cTypeStr(DataType dt) {
|
||
switch (dt) {
|
||
case TYPE_BYTE: return "uint8_t";
|
||
case TYPE_INT: return "int16_t";
|
||
case TYPE_LONG: return "int32_t";
|
||
case TYPE_FLOAT: return "float";
|
||
case TYPE_DBL: return "double";
|
||
case TYPE_STR: return "char*";
|
||
default: return "void";
|
||
}
|
||
}
|
||
|
||
|
||
// Return the C struct type string for a UDT (uses rotating buffer)
|
||
static const char *cUdtTypeStr(int udtIndex) {
|
||
static char bufs[4][MAX_IDENT + 16];
|
||
static int bi = 0;
|
||
char *buf = bufs[bi++ & 3];
|
||
if (udtIndex >= 0 && udtIndex < gUdtCount)
|
||
snprintf(buf, MAX_IDENT + 16, "struct _b_%s", cleanName(gUdts[udtIndex].name));
|
||
else
|
||
snprintf(buf, MAX_IDENT + 16, "void");
|
||
return buf;
|
||
}
|
||
|
||
|
||
// Return a C default-value expression for a data type
|
||
static const char *cDefaultVal(DataType dt) {
|
||
switch (dt) {
|
||
case TYPE_BYTE: return "0";
|
||
case TYPE_INT: return "0";
|
||
case TYPE_LONG: return "0";
|
||
case TYPE_FLOAT: return "0.0f";
|
||
case TYPE_DBL: return "0.0";
|
||
case TYPE_STR: return "_bstr(\"\")";
|
||
default: return "0";
|
||
}
|
||
}
|
||
|
||
|
||
// Return a printf format specifier for a data type.
|
||
// int16_t is promoted to int in varargs so %d is safe.
|
||
// int32_t is int on all modern platforms so %d works.
|
||
static const char *cFmt(DataType dt) {
|
||
switch (dt) {
|
||
case TYPE_BYTE: return "%u";
|
||
case TYPE_INT: return "%d";
|
||
case TYPE_LONG: return "%d";
|
||
case TYPE_FLOAT: return "%g";
|
||
case TYPE_DBL: return "%g";
|
||
case TYPE_STR: return "%s";
|
||
default: return "%d";
|
||
}
|
||
}
|
||
|
||
|
||
// Return a scanf format specifier for a data type
|
||
static const char *cScanfFmt(DataType dt) {
|
||
switch (dt) {
|
||
case TYPE_BYTE: return "%hhu"; // uint8_t
|
||
case TYPE_INT: return "%hd"; // int16_t needs short format
|
||
case TYPE_LONG: return "%d"; // int32_t
|
||
case TYPE_FLOAT: return "%f";
|
||
case TYPE_DBL: return "%lf";
|
||
default: return "%hd";
|
||
}
|
||
}
|
||
|
||
|
||
// Determine if an expression node produces a string type
|
||
static int isStringExpr(Node *n) {
|
||
if (!n) return 0;
|
||
return n->dataType == TYPE_STR;
|
||
}
|
||
|
||
|
||
// Check if a name corresponds to a built-in BASIC function that returns
|
||
// a string. Names ending in '$' are string functions.
|
||
static int isBuiltinStrFunc(const char *name) {
|
||
// Check external functions first
|
||
ExternFunc *ef = externFuncLookup(name);
|
||
if (ef) return ef->returnType == TYPE_STR;
|
||
|
||
// Check compile-time builtins
|
||
const BuiltinDef *bd = builtinDefLookup(name);
|
||
if (bd) return bd->returnType == TYPE_STR;
|
||
|
||
// Functions with special handling in genBuiltinCall
|
||
return (strIcmp(name, "MID$") == 0 ||
|
||
strIcmp(name, "LEFT$") == 0 ||
|
||
strIcmp(name, "RIGHT$") == 0 ||
|
||
strIcmp(name, "STRING$") == 0);
|
||
}
|
||
|
||
|
||
// Check if a name is a built-in BASIC function
|
||
static int isBuiltinFunc(const char *name) {
|
||
// Check external functions
|
||
if (externFuncLookup(name)) return 1;
|
||
|
||
// Check compile-time builtins
|
||
if (builtinDefLookup(name)) return 1;
|
||
|
||
// Functions with special handling in genBuiltinCall
|
||
return (isBuiltinStrFunc(name) ||
|
||
strIcmp(name, "LEN") == 0 ||
|
||
strIcmp(name, "VAL") == 0 ||
|
||
strIcmp(name, "ASC") == 0 ||
|
||
strIcmp(name, "INT") == 0 ||
|
||
strIcmp(name, "ABS") == 0 ||
|
||
strIcmp(name, "INSTR") == 0 ||
|
||
strIcmp(name, "EOF") == 0 ||
|
||
strIcmp(name, "LOF") == 0 ||
|
||
strIcmp(name, "FREEFILE") == 0 ||
|
||
strIcmp(name, "LBOUND") == 0 ||
|
||
strIcmp(name, "UBOUND") == 0);
|
||
}
|
||
|
||
|
||
// Count the number of nodes in a linked list
|
||
static int countList(Node *n) {
|
||
int c = 0;
|
||
while (n) { c++; n = n->next; }
|
||
return c;
|
||
}
|
||
|
||
|
||
// Generate code for a built-in function call
|
||
static void genBuiltinCall(const char *name, Node *args) {
|
||
int argc = countList(args);
|
||
|
||
// Functions with special handling (validation, multiple args, etc.)
|
||
if (strIcmp(name, "LEN") == 0) {
|
||
emitRaw("((int)strlen(");
|
||
genExpr(args);
|
||
emitRaw("))");
|
||
} else if (strIcmp(name, "VAL") == 0) {
|
||
emitRaw("atof(");
|
||
genExpr(args);
|
||
emitRaw(")");
|
||
} else if (strIcmp(name, "ASC") == 0) {
|
||
emitRaw("((int)(unsigned char)(");
|
||
genExpr(args);
|
||
emitRaw(")[0])");
|
||
} else if (strIcmp(name, "INT") == 0) {
|
||
emitRaw("((int)(");
|
||
genExpr(args);
|
||
emitRaw("))");
|
||
} else if (strIcmp(name, "ABS") == 0) {
|
||
emitRaw("_babs(");
|
||
genExpr(args);
|
||
emitRaw(")");
|
||
} else if (strIcmp(name, "MID$") == 0) {
|
||
if (argc < 2) fatal(0, "MID$ requires at least 2 arguments");
|
||
emitRaw("_bmid(");
|
||
genExpr(args);
|
||
emitRaw(", ");
|
||
genExpr(args->next);
|
||
if (argc >= 3 && args->next->next) {
|
||
emitRaw(", ");
|
||
genExpr(args->next->next);
|
||
} else {
|
||
emitRaw(", -1");
|
||
}
|
||
emitRaw(")");
|
||
} else if (strIcmp(name, "LEFT$") == 0) {
|
||
if (argc < 2) fatal(0, "LEFT$ requires 2 arguments");
|
||
emitRaw("_bleft(");
|
||
genExpr(args);
|
||
emitRaw(", ");
|
||
genExpr(args->next);
|
||
emitRaw(")");
|
||
} else if (strIcmp(name, "RIGHT$") == 0) {
|
||
if (argc < 2) fatal(0, "RIGHT$ requires 2 arguments");
|
||
emitRaw("_bright(");
|
||
genExpr(args);
|
||
emitRaw(", ");
|
||
genExpr(args->next);
|
||
emitRaw(")");
|
||
} else if (strIcmp(name, "INSTR") == 0) {
|
||
if (argc < 2) fatal(0, "INSTR requires 2 arguments");
|
||
emitRaw("_binstr(");
|
||
genExpr(args);
|
||
emitRaw(", ");
|
||
genExpr(args->next);
|
||
emitRaw(")");
|
||
} else if (strIcmp(name, "STRING$") == 0) {
|
||
if (argc < 2) fatal(0, "STRING$ requires 2 arguments");
|
||
emitRaw("_bstring_rep("); genExpr(args); emitRaw(", ");
|
||
genExpr(args->next); emitRaw(")");
|
||
} else if (strIcmp(name, "EOF") == 0) {
|
||
emitRaw("_beof(");
|
||
genExpr(args);
|
||
emitRaw(")");
|
||
} else if (strIcmp(name, "LOF") == 0) {
|
||
emitRaw("_blof(");
|
||
genExpr(args);
|
||
emitRaw(")");
|
||
} else if (strIcmp(name, "FREEFILE") == 0) {
|
||
emitRaw("_bfreefile()");
|
||
// --- Array functions (need special codegen) ---
|
||
} else if (strIcmp(name, "LBOUND") == 0) {
|
||
emitRaw("0");
|
||
} else if (strIcmp(name, "UBOUND") == 0) {
|
||
if (args && args->type == NODE_IDENT) {
|
||
emitRaw("(%s_size - 1)", cleanName(args->sval));
|
||
} else {
|
||
fatal(0, "UBOUND requires an array name");
|
||
}
|
||
} else {
|
||
// Check external function definitions and compile-time builtins
|
||
const char *tmpl = NULL;
|
||
ExternFunc *ef = externFuncLookup(name);
|
||
if (ef) {
|
||
tmpl = ef->cCode;
|
||
} else {
|
||
const BuiltinDef *bd = builtinDefLookup(name);
|
||
if (bd) tmpl = bd->cCode;
|
||
}
|
||
|
||
if (tmpl) {
|
||
// Expand template: % = first arg, %1 %2 etc = numbered args
|
||
const char *t = tmpl;
|
||
while (*t) {
|
||
if (*t == '%') {
|
||
t++;
|
||
int argNum = 0;
|
||
if (*t >= '1' && *t <= '9') {
|
||
argNum = *t - '1';
|
||
t++;
|
||
}
|
||
// Find the nth argument
|
||
Node *arg = args;
|
||
for (int i = 0; i < argNum && arg; i++)
|
||
arg = arg->next;
|
||
if (arg) genExpr(arg);
|
||
else emitRaw("0"); // missing arg
|
||
} else {
|
||
char c[2] = {*t, '\0'};
|
||
emitRaw("%s", c);
|
||
t++;
|
||
}
|
||
}
|
||
} else {
|
||
// Unknown builtin – just emit as-is
|
||
emitRaw("%s(", cleanName(name));
|
||
for (Node *a = args; a; a = a->next) {
|
||
if (a != args) emitRaw(", ");
|
||
genExpr(a);
|
||
}
|
||
emitRaw(")");
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
// Generate code for an expression node
|
||
static void genExpr(Node *n) {
|
||
if (!n) { emitRaw("0"); return; }
|
||
|
||
switch (n->type) {
|
||
case NODE_INT_LIT:
|
||
emitRaw("%d", n->ival);
|
||
break;
|
||
|
||
case NODE_DBL_LIT:
|
||
emitRaw("%g", n->dval);
|
||
break;
|
||
|
||
case NODE_STR_LIT:
|
||
// Emit as a C string literal
|
||
emitRaw("\"");
|
||
for (const char *p = n->sval; p && *p; p++) {
|
||
if (*p == '"') emitRaw("\\\"");
|
||
else if (*p == '\\') emitRaw("\\\\");
|
||
else if (*p == '\n') emitRaw("\\n");
|
||
else if (*p == '\t') emitRaw("\\t");
|
||
else emitRaw("%c", *p);
|
||
}
|
||
emitRaw("\"");
|
||
break;
|
||
|
||
case NODE_IDENT: {
|
||
const char *cn = cleanName(n->sval);
|
||
// Inside a function, check if this is the function name (return var)
|
||
if (gInFunc && gFuncName && strIcmp(n->sval, gFuncName) == 0) {
|
||
emitRaw("%s_ret", cn);
|
||
} else if (isByrefParam(n->sval)) {
|
||
// BYREF parameter: dereference the pointer
|
||
emitRaw("(*%s)", cn);
|
||
} else {
|
||
emitRaw("%s", cn);
|
||
}
|
||
break;
|
||
}
|
||
|
||
case NODE_ARRAY_REF:
|
||
emitRaw("%s[", cleanName(n->sval));
|
||
genArrayFlatIndex(n->sval, n->a);
|
||
emitRaw("]");
|
||
break;
|
||
|
||
case NODE_DOT_ACCESS:
|
||
// base.field — base is in n->a, field name in n->sval
|
||
genExpr(n->a);
|
||
emitRaw(".%s", cleanName(n->sval));
|
||
break;
|
||
|
||
case NODE_UNOP:
|
||
if (n->ival == TOK_MINUS) {
|
||
emitRaw("(-(");
|
||
genExpr(n->a);
|
||
emitRaw("))");
|
||
} else if (n->ival == TOK_NOT) {
|
||
// If operand is a comparison, use logical NOT for cleaner code
|
||
int isCmp = (n->a->type == NODE_BINOP &&
|
||
(n->a->ival == TOK_EQ || n->a->ival == TOK_NE ||
|
||
n->a->ival == TOK_LT || n->a->ival == TOK_GT ||
|
||
n->a->ival == TOK_LE || n->a->ival == TOK_GE ||
|
||
n->a->ival == TOK_AND || n->a->ival == TOK_OR));
|
||
if (isCmp || n->a->type == NODE_UNOP) {
|
||
emitRaw("(!(");
|
||
genExpr(n->a);
|
||
emitRaw("))");
|
||
} else {
|
||
emitRaw("(~(int)(");
|
||
genExpr(n->a);
|
||
emitRaw("))");
|
||
}
|
||
}
|
||
break;
|
||
|
||
case NODE_BINOP: {
|
||
int op = n->ival;
|
||
// String concatenation
|
||
if (n->dataType == TYPE_STR && (op == TOK_PLUS || op == TOK_AMP)) {
|
||
emitRaw("_bconcat(");
|
||
genExpr(n->a);
|
||
emitRaw(", ");
|
||
genExpr(n->b);
|
||
emitRaw(")");
|
||
break;
|
||
}
|
||
// String comparison
|
||
if (isStringExpr(n->a) && isStringExpr(n->b)) {
|
||
const char *cmpOp;
|
||
switch (op) {
|
||
case TOK_EQ: cmpOp = "==0"; break;
|
||
case TOK_NE: cmpOp = "!=0"; break;
|
||
case TOK_LT: cmpOp = "<0"; break;
|
||
case TOK_GT: cmpOp = ">0"; break;
|
||
case TOK_LE: cmpOp = "<=0"; break;
|
||
case TOK_GE: cmpOp = ">=0"; break;
|
||
default: cmpOp = "==0"; break;
|
||
}
|
||
emitRaw("(strcmp(");
|
||
genExpr(n->a);
|
||
emitRaw(", ");
|
||
genExpr(n->b);
|
||
emitRaw(")%s)", cmpOp);
|
||
break;
|
||
}
|
||
// Power operator: emit as pow() call
|
||
if (op == TOK_CARET) {
|
||
emitRaw("pow(");
|
||
genExpr(n->a);
|
||
emitRaw(", ");
|
||
genExpr(n->b);
|
||
emitRaw(")");
|
||
break;
|
||
}
|
||
// Integer division: cast operands to int
|
||
if (op == TOK_BSLASH) {
|
||
emitRaw("((int)(");
|
||
genExpr(n->a);
|
||
emitRaw(") / (int)(");
|
||
genExpr(n->b);
|
||
emitRaw("))");
|
||
break;
|
||
}
|
||
// Float division: BASIC '/' always produces a floating-point result
|
||
if (op == TOK_SLASH) {
|
||
emitRaw("((double)(");
|
||
genExpr(n->a);
|
||
emitRaw(") / (double)(");
|
||
genExpr(n->b);
|
||
emitRaw("))");
|
||
break;
|
||
}
|
||
// All other numeric and logical binary operators
|
||
emitRaw("(");
|
||
genExpr(n->a);
|
||
switch (op) {
|
||
case TOK_PLUS: emitRaw(" + "); break;
|
||
case TOK_MINUS: emitRaw(" - "); break;
|
||
case TOK_STAR: emitRaw(" * "); break;
|
||
case TOK_MOD: emitRaw(" %% "); break;
|
||
case TOK_EQ: emitRaw(" == "); break;
|
||
case TOK_NE: emitRaw(" != "); break;
|
||
case TOK_LT: emitRaw(" < "); break;
|
||
case TOK_GT: emitRaw(" > "); break;
|
||
case TOK_LE: emitRaw(" <= "); break;
|
||
case TOK_GE: emitRaw(" >= "); break;
|
||
case TOK_AND: emitRaw(" & "); break;
|
||
case TOK_OR: emitRaw(" | "); break;
|
||
case TOK_XOR: emitRaw(" ^ "); break;
|
||
default: emitRaw(" ? "); break;
|
||
}
|
||
genExpr(n->b);
|
||
emitRaw(")");
|
||
break;
|
||
}
|
||
|
||
case NODE_FUNC_CALL:
|
||
// SIZEOF(TypeName) — emit sizeof(struct _b_TypeName)
|
||
if (strIcmp(n->sval, "SIZEOF") == 0 && n->a &&
|
||
n->a->type == NODE_IDENT) {
|
||
int ui = udtLookup(n->a->sval);
|
||
if (ui >= 0) {
|
||
emitRaw("(long)sizeof(%s)", cUdtTypeStr(ui));
|
||
break;
|
||
}
|
||
}
|
||
if (isBuiltinFunc(n->sval)) {
|
||
genBuiltinCall(n->sval, n->a);
|
||
} else {
|
||
emitRaw("%s(", cleanName(n->sval));
|
||
// Generate arguments, applying BYREF (&) where needed
|
||
Symbol *fsym = symLookup(n->sval);
|
||
int pi = 0;
|
||
for (Node *a = n->a; a; a = a->next, pi++) {
|
||
if (a != n->a) emitRaw(", ");
|
||
int needRef = (fsym && pi < fsym->paramCount &&
|
||
fsym->paramModes[pi] == PASS_BYREF);
|
||
if (needRef && a->type == NODE_IDENT) {
|
||
emitRaw("&%s", cleanName(a->sval));
|
||
} else if (needRef && a->type == NODE_ARRAY_REF) {
|
||
emitRaw("&%s[", cleanName(a->sval));
|
||
genArrayFlatIndex(a->sval, a->a);
|
||
emitRaw("]");
|
||
} else {
|
||
genExpr(a);
|
||
}
|
||
}
|
||
emitRaw(")");
|
||
}
|
||
break;
|
||
|
||
default:
|
||
emitRaw("/* unknown expr node %d */0", n->type);
|
||
break;
|
||
}
|
||
}
|
||
|
||
|
||
// Generate a variable declaration in C
|
||
static void genVarDecl(const char *name, DataType dt, int isStatic) {
|
||
const char *cn = cleanName(name);
|
||
if (isStatic) emit("static ");
|
||
else emit("");
|
||
|
||
if (dt == TYPE_STR)
|
||
emitRaw("char *%s _BUNUSED = _bstr(\"\");\n", cn);
|
||
else
|
||
emitRaw("%s %s _BUNUSED = %s;\n", cTypeStr(dt), cn, cDefaultVal(dt));
|
||
}
|
||
|
||
|
||
// Emit a row-major flattened index for multidimensional array access.
|
||
// For 1D, just emits the single index expression (backward compatible).
|
||
// For nD, emits: ((i0) * nameDim1 + (i1)) * nameDim2 + (i2) ...
|
||
static void genArrayFlatIndex(const char *name, Node *indices) {
|
||
char cn[MAX_IDENT];
|
||
strncpy(cn, cleanName(name), MAX_IDENT - 1);
|
||
cn[MAX_IDENT - 1] = '\0';
|
||
|
||
// Count dimensions
|
||
int ndims = 0;
|
||
for (Node *p = indices; p; p = p->next) ndims++;
|
||
|
||
if (ndims <= 1) {
|
||
genExpr(indices);
|
||
} else {
|
||
// Row-major: fold left: acc = idx[0], for k=1..n-1: acc = acc * dimK + idx[k]
|
||
// For 3D: (((i) * dim1 + (j)) * dim2 + (k))
|
||
Node *idx = indices;
|
||
// Emit opening parens for nesting: need (ndims-1) wrapping levels
|
||
for (int i = 1; i < ndims; i++) emitRaw("(");
|
||
emitRaw("(");
|
||
genExpr(idx);
|
||
emitRaw(")");
|
||
idx = idx->next;
|
||
int dimIdx = 1;
|
||
while (idx) {
|
||
emitRaw(" * %s_dim%d + (", cn, dimIdx);
|
||
genExpr(idx);
|
||
emitRaw("))");
|
||
idx = idx->next;
|
||
dimIdx++;
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
// Generate a DIM array declaration (supports multidimensional)
|
||
static void genDimArray(const char *name, DataType dt, Node *sizeList,
|
||
int ndims) {
|
||
char cn[MAX_IDENT];
|
||
strncpy(cn, cleanName(name), MAX_IDENT - 1);
|
||
cn[MAX_IDENT - 1] = '\0';
|
||
|
||
emit("%s *%s _BUNUSED = NULL;\n", cTypeStr(dt), cn);
|
||
|
||
if (ndims <= 1) {
|
||
// 1D: backward-compatible
|
||
emit("int %s_size _BUNUSED = 0;\n", cn);
|
||
if (sizeList) {
|
||
emit("%s_size = (", cn);
|
||
genExpr(sizeList);
|
||
emitRaw(") + 1;\n");
|
||
emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n",
|
||
cn, cTypeStr(dt), cn, cTypeStr(dt));
|
||
if (dt == TYPE_STR) {
|
||
emit("for (int _i = 0; _i < %s_size; _i++) %s[_i] = _bstr(\"\");\n",
|
||
cn, cn);
|
||
}
|
||
}
|
||
} else {
|
||
// Multi-dimensional
|
||
Node *dim = sizeList;
|
||
for (int i = 0; i < ndims; i++, dim = dim->next) {
|
||
emit("int %s_dim%d _BUNUSED = 0;\n", cn, i);
|
||
}
|
||
emit("int %s_size _BUNUSED = 0;\n", cn);
|
||
|
||
dim = sizeList;
|
||
for (int i = 0; i < ndims; i++, dim = dim->next) {
|
||
emit("%s_dim%d = (", cn, i);
|
||
genExpr(dim);
|
||
emitRaw(") + 1;\n");
|
||
}
|
||
|
||
emit("%s_size = ", cn);
|
||
for (int i = 0; i < ndims; i++) {
|
||
if (i > 0) emitRaw(" * ");
|
||
emitRaw("%s_dim%d", cn, i);
|
||
}
|
||
emitRaw(";\n");
|
||
|
||
emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n",
|
||
cn, cTypeStr(dt), cn, cTypeStr(dt));
|
||
if (dt == TYPE_STR) {
|
||
emit("for (int _i = 0; _i < %s_size; _i++) %s[_i] = _bstr(\"\");\n",
|
||
cn, cn);
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
// Generate code for a PRINT statement
|
||
static void genPrint(Node *pr) {
|
||
Node *item = pr->a;
|
||
|
||
// Empty PRINT: just a newline
|
||
if (!item) {
|
||
emit("printf(\"\\n\");\n");
|
||
return;
|
||
}
|
||
|
||
// Build printf call with format string and arguments
|
||
emit("printf(\"");
|
||
|
||
// First pass: build format string
|
||
for (Node *it = item; it; it = it->next) {
|
||
if (it->a) {
|
||
emitRaw("%s", cFmt(it->a->dataType));
|
||
}
|
||
// Separator
|
||
if (it->ival == 1) {
|
||
// semicolon: no separator
|
||
} else if (it->ival == 2) {
|
||
emitRaw("\\t"); // comma: tab
|
||
} else if (!it->next) {
|
||
// Last item with no trailing separator: add newline
|
||
emitRaw("\\n");
|
||
}
|
||
}
|
||
emitRaw("\"");
|
||
|
||
// Second pass: arguments
|
||
for (Node *it = item; it; it = it->next) {
|
||
if (it->a) {
|
||
emitRaw(", ");
|
||
genExpr(it->a);
|
||
}
|
||
}
|
||
emitRaw(");\n");
|
||
|
||
// Free temporary strings created during expression evaluation
|
||
emit("_bfree_temps();\n");
|
||
}
|
||
|
||
|
||
// Generate code for a PRINT USING statement
|
||
static void genPrintUsing(Node *pu) {
|
||
// Initialize format parser with format string
|
||
emit("_busing_init(");
|
||
genExpr(pu->a);
|
||
emitRaw(");\n");
|
||
|
||
// Format and print each value
|
||
for (Node *val = pu->b; val; val = val->next) {
|
||
if (val->dataType == TYPE_STR) {
|
||
emit("_busing_str(");
|
||
} else {
|
||
emit("_busing_num(");
|
||
}
|
||
genExpr(val);
|
||
emitRaw(");\n");
|
||
}
|
||
|
||
// Print newline and cleanup
|
||
emit("_busing_end();\n");
|
||
emit("_bfree_temps();\n");
|
||
}
|
||
|
||
|
||
// Generate code for an INPUT statement
|
||
static void genInput(Node *inp) {
|
||
// Print prompt if any
|
||
if (inp->sval) {
|
||
emit("printf(\"%%s\", \"%s\");\n", inp->sval);
|
||
} else {
|
||
emit("printf(\"? \");\n");
|
||
}
|
||
emit("fflush(stdout);\n");
|
||
|
||
// Read each variable
|
||
for (Node *v = inp->a; v; v = v->next) {
|
||
if (v->dataType == TYPE_STR) {
|
||
emit("{ char _buf[1024]; if(fgets(_buf, sizeof(_buf), stdin)) {\n");
|
||
gIndent++;
|
||
emit("_buf[strcspn(_buf, \"\\n\")] = 0;\n");
|
||
emit("_bstr_assign(&%s, _buf);\n", cleanName(v->sval));
|
||
gIndent--;
|
||
emit("} }\n");
|
||
} else {
|
||
emit("scanf(\"%s\", &%s);\n",
|
||
cScanfFmt(v->dataType), cleanName(v->sval));
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
// Generate code for an assignment statement
|
||
static void genAssign(Node *n) {
|
||
Node *target = n->a;
|
||
Node *value = n->b;
|
||
|
||
// Check if we're assigning to the function return variable
|
||
if (gInFunc && gFuncName && target->type == NODE_IDENT &&
|
||
strIcmp(target->sval, gFuncName) == 0) {
|
||
const char *cn = cleanName(target->sval);
|
||
if (gFuncRet == TYPE_STR) {
|
||
emit("_bstr_assign(&%s_ret, ", cn);
|
||
genExpr(value);
|
||
emitRaw(");\n");
|
||
} else {
|
||
emit("%s_ret = ", cn);
|
||
genExpr(value);
|
||
emitRaw(";\n");
|
||
}
|
||
emit("_bfree_temps();\n");
|
||
return;
|
||
}
|
||
|
||
// Dot-access assignment: var.field = expr
|
||
if (target->type == NODE_DOT_ACCESS) {
|
||
int strLen = target->ival; // >0 for fixed-length STRING * N
|
||
if (target->dataType == TYPE_STR && strLen > 0) {
|
||
// Fixed-length string: strncpy + null terminate
|
||
emit("strncpy(");
|
||
genExpr(target->a);
|
||
emitRaw(".%s, ", cleanName(target->sval));
|
||
genExpr(value);
|
||
emitRaw(", %d);\n", strLen);
|
||
emit("");
|
||
genExpr(target->a);
|
||
emitRaw(".%s[%d] = '\\0';\n", cleanName(target->sval), strLen);
|
||
} else if (target->dataType == TYPE_STR) {
|
||
// Dynamic string in struct — unusual but handle it
|
||
emit("_bstr_assign(&(");
|
||
genExpr(target->a);
|
||
emitRaw(".%s), ", cleanName(target->sval));
|
||
genExpr(value);
|
||
emitRaw(");\n");
|
||
} else {
|
||
// Numeric field
|
||
emit("");
|
||
genExpr(target->a);
|
||
emitRaw(".%s = ", cleanName(target->sval));
|
||
genExpr(value);
|
||
emitRaw(";\n");
|
||
}
|
||
emit("_bfree_temps();\n");
|
||
return;
|
||
}
|
||
|
||
// Check if target is a BYREF parameter (needs pointer dereference)
|
||
int byref = (target->type == NODE_IDENT && isByrefParam(target->sval));
|
||
|
||
// String assignment uses _bstr_assign
|
||
if (target->dataType == TYPE_STR || isStringExpr(value)) {
|
||
if (target->type == NODE_ARRAY_REF) {
|
||
emit("_bstr_assign(&%s[", cleanName(target->sval));
|
||
genArrayFlatIndex(target->sval, target->a);
|
||
emitRaw("], ");
|
||
} else if (byref) {
|
||
emit("_bstr_assign(%s, ", cleanName(target->sval));
|
||
} else {
|
||
emit("_bstr_assign(&%s, ", cleanName(target->sval));
|
||
}
|
||
genExpr(value);
|
||
emitRaw(");\n");
|
||
} else {
|
||
// Numeric assignment
|
||
if (target->type == NODE_ARRAY_REF) {
|
||
emit("%s[", cleanName(target->sval));
|
||
genArrayFlatIndex(target->sval, target->a);
|
||
emitRaw("] = ");
|
||
} else if (byref) {
|
||
emit("(*%s) = ", cleanName(target->sval));
|
||
} else {
|
||
emit("%s = ", cleanName(target->sval));
|
||
}
|
||
genExpr(value);
|
||
emitRaw(";\n");
|
||
}
|
||
emit("_bfree_temps();\n");
|
||
}
|
||
|
||
|
||
// Generate a SUB or FUNCTION definition
|
||
static void genFuncDef(Node *n) {
|
||
int isFunc = (n->type == NODE_FUNC);
|
||
// Store a permanent copy of the clean function name so it survives
|
||
// additional cleanName() calls during parameter/body emission
|
||
char fname[MAX_IDENT];
|
||
strncpy(fname, cleanName(n->sval), MAX_IDENT - 1);
|
||
fname[MAX_IDENT - 1] = '\0';
|
||
DataType ret = isFunc ? n->dataType : TYPE_VOID;
|
||
|
||
// Save and set function context
|
||
int prevInFunc = gInFunc;
|
||
const char *prevFuncName = gFuncName;
|
||
DataType prevFuncRet = gFuncRet;
|
||
gInFunc = 1;
|
||
gFuncName = n->sval;
|
||
gFuncRet = ret;
|
||
|
||
// Function signature
|
||
emitRaw("%s %s(", cTypeStr(ret), fname);
|
||
int first = 1;
|
||
for (Node *p = n->a; p; p = p->next) {
|
||
if (!first) emitRaw(", ");
|
||
first = 0;
|
||
if (p->ival == PASS_BYREF) {
|
||
emitRaw("%s *%s", cTypeStr(p->dataType), cleanName(p->sval));
|
||
} else {
|
||
if (p->dataType == TYPE_STR)
|
||
emitRaw("const char *%s", cleanName(p->sval));
|
||
else
|
||
emitRaw("%s %s", cTypeStr(p->dataType), cleanName(p->sval));
|
||
}
|
||
}
|
||
if (first) emitRaw("void"); // no params
|
||
emitRaw(") {\n");
|
||
gIndent++;
|
||
|
||
// For FUNCTION: declare the return variable (named <FuncName>_ret)
|
||
if (isFunc) {
|
||
emit("%s %s_ret = %s;\n", cTypeStr(ret), fname, cDefaultVal(ret));
|
||
}
|
||
|
||
// Generate body
|
||
if (n->b) genBlock(n->b);
|
||
|
||
// Return statement for FUNCTION
|
||
if (isFunc) {
|
||
emit("return %s_ret;\n", fname);
|
||
}
|
||
|
||
gIndent--;
|
||
emitRaw("}\n\n");
|
||
|
||
// Restore context
|
||
gInFunc = prevInFunc;
|
||
gFuncName = prevFuncName;
|
||
gFuncRet = prevFuncRet;
|
||
}
|
||
|
||
|
||
// Generate code for a single statement
|
||
static void genStmt(Node *n) {
|
||
if (!n) return;
|
||
|
||
switch (n->type) {
|
||
case NODE_LABEL:
|
||
// Only emit C labels that are actually targeted by GOTO/GOSUB,
|
||
// to avoid -Wunused-label warnings.
|
||
if (n->sval) {
|
||
// Named label
|
||
if (isGotoStrTarget(n->sval))
|
||
emitRaw("%s: ;\n", cleanName(n->sval));
|
||
} else if (isGotoTarget(n->ival)) {
|
||
emitRaw("L%d: ;\n", n->ival);
|
||
}
|
||
break;
|
||
|
||
case NODE_TYPE_DEF:
|
||
// TYPE definitions are emitted globally in generate(), not here
|
||
break;
|
||
|
||
case NODE_DIM:
|
||
if (n->dataType == TYPE_UDT && n->ival == 0) {
|
||
// UDT scalar: struct _b_Name var; memset(&var, 0, sizeof(var));
|
||
const char *uts = cUdtTypeStr(n->ival2);
|
||
char cn[MAX_IDENT];
|
||
strncpy(cn, cleanName(n->sval), MAX_IDENT - 1);
|
||
cn[MAX_IDENT - 1] = '\0';
|
||
emit("%s %s _BUNUSED;\n", uts, cn);
|
||
emit("memset(&%s, 0, sizeof(%s));\n", cn, cn);
|
||
} else if (n->dataType == TYPE_UDT && n->ival > 0) {
|
||
// UDT array
|
||
const char *uts = cUdtTypeStr(n->ival2);
|
||
char cn[MAX_IDENT];
|
||
strncpy(cn, cleanName(n->sval), MAX_IDENT - 1);
|
||
cn[MAX_IDENT - 1] = '\0';
|
||
emit("%s *%s _BUNUSED = NULL;\n", uts, cn);
|
||
emit("int %s_size _BUNUSED = 0;\n", cn);
|
||
// Compute size and allocate
|
||
if (n->a) {
|
||
if (n->ival <= 1) {
|
||
emit("%s_size = (", cn);
|
||
genExpr(n->a);
|
||
emitRaw(") + 1;\n");
|
||
} else {
|
||
Node *dim = n->a;
|
||
for (int i = 0; i < n->ival; i++, dim = dim->next) {
|
||
emit("int %s_dim%d _BUNUSED = (", cn, i);
|
||
genExpr(dim);
|
||
emitRaw(") + 1;\n");
|
||
}
|
||
emit("%s_size = ", cn);
|
||
for (int i = 0; i < n->ival; i++) {
|
||
if (i > 0) emitRaw(" * ");
|
||
emitRaw("%s_dim%d", cn, i);
|
||
}
|
||
emitRaw(";\n");
|
||
}
|
||
emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n",
|
||
cn, uts, cn, uts);
|
||
}
|
||
} else if (n->ival) { // array -- n->ival is ndims
|
||
genDimArray(n->sval, n->dataType, n->a, n->ival);
|
||
} else { // scalar
|
||
genVarDecl(n->sval, n->dataType, 0);
|
||
}
|
||
break;
|
||
|
||
case NODE_REDIM: {
|
||
char rcn[MAX_IDENT];
|
||
strncpy(rcn, cleanName(n->sval), MAX_IDENT - 1);
|
||
rcn[MAX_IDENT - 1] = '\0';
|
||
int ndims = n->ival;
|
||
|
||
if (ndims <= 1) {
|
||
// 1D REDIM: backward-compatible realloc
|
||
emit("{ int _old_sz = %s_size;\n", rcn);
|
||
gIndent++;
|
||
emit("%s_size = (", rcn);
|
||
genExpr(n->a);
|
||
emitRaw(") + 1;\n");
|
||
emit("%s = (%s*)realloc(%s, %s_size * sizeof(%s));\n",
|
||
rcn, cTypeStr(n->dataType), rcn, rcn, cTypeStr(n->dataType));
|
||
emit("if (%s_size > _old_sz)\n", rcn);
|
||
gIndent++;
|
||
emit("memset(%s + _old_sz, 0, (%s_size - _old_sz) * sizeof(%s));\n",
|
||
rcn, rcn, cTypeStr(n->dataType));
|
||
gIndent--;
|
||
gIndent--;
|
||
emit("}\n");
|
||
} else {
|
||
// Multi-dim REDIM: recompute dims, free + calloc
|
||
emit("{\n");
|
||
gIndent++;
|
||
Node *dim = n->a;
|
||
for (int i = 0; i < ndims; i++, dim = dim->next) {
|
||
emit("%s_dim%d = (", rcn, i);
|
||
genExpr(dim);
|
||
emitRaw(") + 1;\n");
|
||
}
|
||
emit("%s_size = ", rcn);
|
||
for (int i = 0; i < ndims; i++) {
|
||
if (i > 0) emitRaw(" * ");
|
||
emitRaw("%s_dim%d", rcn, i);
|
||
}
|
||
emitRaw(";\n");
|
||
emit("free(%s);\n", rcn);
|
||
emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n",
|
||
rcn, cTypeStr(n->dataType), rcn, rcn, cTypeStr(n->dataType));
|
||
if (n->dataType == TYPE_STR) {
|
||
emit("for (int _i = 0; _i < %s_size; _i++) %s[_i] = _bstr(\"\");\n",
|
||
rcn, rcn);
|
||
}
|
||
gIndent--;
|
||
emit("}\n");
|
||
}
|
||
break;
|
||
}
|
||
|
||
case NODE_LOCAL:
|
||
genVarDecl(n->sval, n->dataType, 0);
|
||
break;
|
||
|
||
case NODE_STATIC:
|
||
genVarDecl(n->sval, n->dataType, 1);
|
||
break;
|
||
|
||
case NODE_ASSIGN:
|
||
genAssign(n);
|
||
break;
|
||
|
||
case NODE_PRINT:
|
||
genPrint(n);
|
||
break;
|
||
|
||
case NODE_PRINT_USING:
|
||
genPrintUsing(n);
|
||
break;
|
||
|
||
case NODE_INPUT:
|
||
genInput(n);
|
||
break;
|
||
|
||
case NODE_IF:
|
||
emit("if (");
|
||
genExpr(n->a);
|
||
emitRaw(") {\n");
|
||
gIndent++;
|
||
if (n->b) genBlock(n->b);
|
||
gIndent--;
|
||
if (n->c) {
|
||
if (n->c->type == NODE_IF) {
|
||
// ELSEIF: emit as "} else if (...)"
|
||
emit("} else ");
|
||
// Don't indent the nested if
|
||
genStmt(n->c);
|
||
return; // the nested if handles its own closing
|
||
} else {
|
||
emit("} else {\n");
|
||
gIndent++;
|
||
genBlock(n->c);
|
||
gIndent--;
|
||
}
|
||
}
|
||
emit("}\n");
|
||
break;
|
||
|
||
case NODE_FOR: {
|
||
const char *vn = cleanName(n->sval);
|
||
// Determine the C type for the loop variable
|
||
const char *vtype = cTypeStr(inferVarType(n->sval));
|
||
// When inside a function, the loop variable may not be declared
|
||
// locally. Wrap in a block and declare the variable to be safe.
|
||
// Any prior LOCAL/DIM of the same name has _BUNUSED to suppress
|
||
// shadowing warnings.
|
||
int needDecl = gInFunc;
|
||
if (n->c) {
|
||
// FOR with STEP: use a block with a step variable so the
|
||
// step expression is evaluated once, and the comparison
|
||
// direction adapts to the sign of the step at runtime.
|
||
emit("{ /* FOR %s with STEP */\n", vn);
|
||
gIndent++;
|
||
if (needDecl) emit("%s %s;\n", vtype, vn);
|
||
emit("%s _step_%s = ", vtype, vn);
|
||
genExpr(n->c);
|
||
emitRaw(";\n");
|
||
emit("for (%s = ", vn);
|
||
genExpr(n->a);
|
||
emitRaw("; _step_%s > 0 ? %s <= ", vn, vn);
|
||
genExpr(n->b);
|
||
emitRaw(" : %s >= ", vn);
|
||
genExpr(n->b);
|
||
emitRaw("; %s += _step_%s) {\n", vn, vn);
|
||
gIndent++;
|
||
if (n->d) genBlock(n->d);
|
||
gIndent--;
|
||
emit("}\n");
|
||
gIndent--;
|
||
emit("}\n");
|
||
} else {
|
||
// Default step = 1: simple ascending loop
|
||
if (needDecl) {
|
||
emit("{ %s %s;\n", vtype, vn);
|
||
gIndent++;
|
||
}
|
||
emit("for (%s = ", vn);
|
||
genExpr(n->a);
|
||
emitRaw("; %s <= ", vn);
|
||
genExpr(n->b);
|
||
emitRaw("; %s++) {\n", vn);
|
||
gIndent++;
|
||
if (n->d) genBlock(n->d);
|
||
gIndent--;
|
||
emit("}\n");
|
||
if (needDecl) {
|
||
gIndent--;
|
||
emit("}\n");
|
||
}
|
||
}
|
||
break;
|
||
}
|
||
|
||
case NODE_WHILE:
|
||
emit("while (");
|
||
genExpr(n->a);
|
||
emitRaw(") {\n");
|
||
gIndent++;
|
||
if (n->b) genBlock(n->b);
|
||
gIndent--;
|
||
emit("}\n");
|
||
break;
|
||
|
||
case NODE_DO_LOOP: {
|
||
int isUntil = n->ival & 1;
|
||
int atBottom = n->ival & 2;
|
||
|
||
if (!n->a) {
|
||
// Infinite loop: DO ... LOOP
|
||
emit("for (;;) {\n");
|
||
} else if (atBottom) {
|
||
emit("do {\n");
|
||
} else {
|
||
// Condition at top
|
||
emit("while (");
|
||
if (isUntil) emitRaw("!(");
|
||
genExpr(n->a);
|
||
if (isUntil) emitRaw(")");
|
||
emitRaw(") {\n");
|
||
}
|
||
gIndent++;
|
||
if (n->b) genBlock(n->b);
|
||
gIndent--;
|
||
if (atBottom && n->a) {
|
||
emit("} while (");
|
||
if (isUntil) emitRaw("!(");
|
||
genExpr(n->a);
|
||
if (isUntil) emitRaw(")");
|
||
emitRaw(");\n");
|
||
} else {
|
||
emit("}\n");
|
||
}
|
||
break;
|
||
}
|
||
|
||
case NODE_GOTO:
|
||
if (n->sval) {
|
||
emit("goto %s;\n", cleanName(n->sval));
|
||
} else if (n->ival) {
|
||
emit("goto L%d;\n", n->ival);
|
||
}
|
||
break;
|
||
|
||
case NODE_GOSUB:
|
||
if (!gRelease)
|
||
emit("if (_gosub_sp >= _GOSUB_MAX) { fprintf(stderr, \"GOSUB stack overflow\\n\"); exit(1); }\n");
|
||
emit("_gosub_stack[_gosub_sp++] = %d;\n", n->ival2);
|
||
if (n->sval)
|
||
emit("goto %s;\n", cleanName(n->sval));
|
||
else
|
||
emit("goto L%d;\n", n->ival);
|
||
emitRaw("_gr%d: ;\n", n->ival2);
|
||
break;
|
||
|
||
case NODE_RETURN:
|
||
if (gInFunc) {
|
||
// Return from FUNCTION
|
||
if (n->a) {
|
||
if (gFuncRet == TYPE_STR) {
|
||
emit("_bstr_assign(&%s_ret, ",
|
||
cleanName(gFuncName));
|
||
genExpr(n->a);
|
||
emitRaw(");\n");
|
||
} else {
|
||
emit("%s_ret = ", cleanName(gFuncName));
|
||
genExpr(n->a);
|
||
emitRaw(";\n");
|
||
}
|
||
}
|
||
emit("return %s_ret;\n", cleanName(gFuncName));
|
||
} else {
|
||
// RETURN from GOSUB: dispatch back using the stack
|
||
if (!gRelease)
|
||
emit("if (_gosub_sp <= 0) { fprintf(stderr, \"RETURN without GOSUB\\n\"); exit(1); }\n");
|
||
emit("switch (_gosub_stack[--_gosub_sp]) {\n");
|
||
for (int i = 0; i < gGosubCount; i++) {
|
||
emit(" case %d: goto _gr%d;\n", i, i);
|
||
}
|
||
emit("}\n");
|
||
}
|
||
break;
|
||
|
||
case NODE_EXIT:
|
||
if (n->ival == TOK_FOR || n->ival == TOK_WHILE || n->ival == TOK_DO)
|
||
emit("break;\n");
|
||
else if (n->ival == TOK_SUB)
|
||
emit("return;\n");
|
||
else if (n->ival == TOK_FUNCTION && gInFunc && gFuncName)
|
||
emit("return %s_ret;\n", cleanName(gFuncName));
|
||
break;
|
||
|
||
case NODE_CONTINUE:
|
||
emit("continue;\n");
|
||
break;
|
||
|
||
case NODE_CALL: {
|
||
const char *cn = cleanName(n->sval);
|
||
emit("%s(", cn);
|
||
Symbol *fsym = symLookup(n->sval);
|
||
int pi = 0;
|
||
for (Node *a = n->a; a; a = a->next, pi++) {
|
||
if (a != n->a) emitRaw(", ");
|
||
int needRef = (fsym && pi < fsym->paramCount &&
|
||
fsym->paramModes[pi] == PASS_BYREF);
|
||
if (needRef && a->type == NODE_IDENT) {
|
||
emitRaw("&%s", cleanName(a->sval));
|
||
} else if (needRef && a->type == NODE_ARRAY_REF) {
|
||
emitRaw("&%s[", cleanName(a->sval));
|
||
genExpr(a->a);
|
||
emitRaw("]");
|
||
} else {
|
||
genExpr(a);
|
||
}
|
||
}
|
||
emitRaw(");\n");
|
||
emit("_bfree_temps();\n");
|
||
break;
|
||
}
|
||
|
||
case NODE_SUB:
|
||
case NODE_FUNC:
|
||
// These are generated separately before main()
|
||
break;
|
||
|
||
case NODE_END:
|
||
emit("exit(0);\n");
|
||
break;
|
||
|
||
case NODE_OPEN: {
|
||
if (n->ival == 4) {
|
||
// RANDOM mode
|
||
emit("_bfile_open_random(");
|
||
genExpr(n->b);
|
||
emitRaw(", ");
|
||
genExpr(n->a);
|
||
emitRaw(", ");
|
||
if (n->c) {
|
||
genExpr(n->c);
|
||
} else {
|
||
emitRaw("0");
|
||
}
|
||
emitRaw(");\n");
|
||
} else {
|
||
const char *modes[] = {"r", "w", "a", "rb"};
|
||
emit("_bfile_open(");
|
||
genExpr(n->b);
|
||
emitRaw(", ");
|
||
genExpr(n->a);
|
||
emitRaw(", \"%s\");\n", modes[n->ival]);
|
||
}
|
||
break;
|
||
}
|
||
|
||
case NODE_GET:
|
||
// GET #filenum, record, variable
|
||
emit("fseek(_bfile_get(");
|
||
genExpr(n->a);
|
||
emitRaw("), (");
|
||
genExpr(n->b);
|
||
emitRaw(" - 1) * _bfile_reclen[");
|
||
genExpr(n->a);
|
||
emitRaw("], SEEK_SET);\n");
|
||
emit("fread(&%s, _bfile_reclen[", cleanName(n->c->sval));
|
||
genExpr(n->a);
|
||
emitRaw("], 1, _bfile_get(");
|
||
genExpr(n->a);
|
||
emitRaw("));\n");
|
||
break;
|
||
|
||
case NODE_PUT:
|
||
// PUT #filenum, record, variable
|
||
emit("fseek(_bfile_get(");
|
||
genExpr(n->a);
|
||
emitRaw("), (");
|
||
genExpr(n->b);
|
||
emitRaw(" - 1) * _bfile_reclen[");
|
||
genExpr(n->a);
|
||
emitRaw("], SEEK_SET);\n");
|
||
emit("fwrite(&%s, _bfile_reclen[", cleanName(n->c->sval));
|
||
genExpr(n->a);
|
||
emitRaw("], 1, _bfile_get(");
|
||
genExpr(n->a);
|
||
emitRaw("));\n");
|
||
break;
|
||
|
||
case NODE_CLOSE:
|
||
emit("_bfile_close(");
|
||
genExpr(n->b);
|
||
emitRaw(");\n");
|
||
break;
|
||
|
||
case NODE_FILE_PRINT: {
|
||
Node *item = n->a;
|
||
if (!item) {
|
||
// PRINT #n, alone = write newline
|
||
emit("fprintf(_bfile_get(");
|
||
genExpr(n->b);
|
||
emitRaw("), \"\\n\");\n");
|
||
break;
|
||
}
|
||
// Build fprintf with format string and arguments
|
||
emit("fprintf(_bfile_get(");
|
||
genExpr(n->b);
|
||
emitRaw("), \"");
|
||
for (Node *it = item; it; it = it->next) {
|
||
if (it->a) emitRaw("%s", cFmt(it->a->dataType));
|
||
if (it->ival == 1) {
|
||
// semicolon: no separator
|
||
} else if (it->ival == 2) { emitRaw("\\t"); }
|
||
else if (!it->next) { emitRaw("\\n"); }
|
||
}
|
||
emitRaw("\"");
|
||
for (Node *it = item; it; it = it->next) {
|
||
if (it->a) { emitRaw(", "); genExpr(it->a); }
|
||
}
|
||
emitRaw(");\n");
|
||
emit("_bfree_temps();\n");
|
||
break;
|
||
}
|
||
|
||
case NODE_FILE_INPUT:
|
||
for (Node *v = n->a; v; v = v->next) {
|
||
if (v->dataType == TYPE_STR) {
|
||
emit("_bline_input(");
|
||
genExpr(n->b);
|
||
emitRaw(", &%s);\n", cleanName(v->sval));
|
||
} else {
|
||
emit("fscanf(_bfile_get(");
|
||
genExpr(n->b);
|
||
emitRaw("), \"%s\", &%s);\n",
|
||
cScanfFmt(v->dataType), cleanName(v->sval));
|
||
}
|
||
}
|
||
break;
|
||
|
||
case NODE_LINE_INPUT:
|
||
emit("_bline_input(");
|
||
genExpr(n->b);
|
||
emitRaw(", &%s);\n", cleanName(n->a->sval));
|
||
break;
|
||
|
||
case NODE_FILE_WRITE: {
|
||
// WRITE # outputs CSV-style: strings quoted, comma-separated, newline
|
||
int first = 1;
|
||
for (Node *e = n->a; e; e = e->next) {
|
||
if (!first) {
|
||
emit("fprintf(_bfile_get(");
|
||
genExpr(n->b);
|
||
emitRaw("), \",\");\n");
|
||
}
|
||
first = 0;
|
||
if (e->dataType == TYPE_STR) {
|
||
emit("fprintf(_bfile_get(");
|
||
genExpr(n->b);
|
||
emitRaw("), \"\\\"%%s\\\"\", ");
|
||
genExpr(e);
|
||
emitRaw(");\n");
|
||
} else {
|
||
emit("fprintf(_bfile_get(");
|
||
genExpr(n->b);
|
||
emitRaw("), \"%s\", ", cFmt(e->dataType));
|
||
genExpr(e);
|
||
emitRaw(");\n");
|
||
}
|
||
}
|
||
emit("fprintf(_bfile_get(");
|
||
genExpr(n->b);
|
||
emitRaw("), \"\\n\");\n");
|
||
emit("_bfree_temps();\n");
|
||
break;
|
||
}
|
||
|
||
case NODE_DATA:
|
||
// No-op: DATA items are collected and emitted as a static array
|
||
break;
|
||
|
||
case NODE_READ:
|
||
for (Node *v = n->a; v; v = v->next) {
|
||
if (v->dataType == TYPE_STR) {
|
||
emit("_bstr_assign(&%s, _bdata[_bdata_pos].str);\n",
|
||
cleanName(v->sval));
|
||
} else {
|
||
emit("%s = (%s)_bdata[_bdata_pos].num;\n",
|
||
cleanName(v->sval), cTypeStr(v->dataType));
|
||
}
|
||
emit("_bdata_pos++;\n");
|
||
}
|
||
break;
|
||
|
||
case NODE_RESTORE:
|
||
if (n->sval) {
|
||
emit("_bdata_pos = %d;\n", dataIndexForLabel(n->sval));
|
||
} else if (n->ival != 0) {
|
||
emit("_bdata_pos = %d;\n", dataIndexForLine(n->ival));
|
||
} else {
|
||
emit("_bdata_pos = 0;\n");
|
||
}
|
||
break;
|
||
|
||
case NODE_CONST_DECL:
|
||
// No runtime code for constants — they're substituted at parse time
|
||
break;
|
||
|
||
case NODE_SWAP: {
|
||
// Determine the type from the left operand
|
||
DataType swapType = n->a->dataType;
|
||
const char *ctype = "double";
|
||
if (swapType == TYPE_BYTE) ctype = "uint8_t";
|
||
else if (swapType == TYPE_INT) ctype = "int16_t";
|
||
else if (swapType == TYPE_LONG) ctype = "int32_t";
|
||
else if (swapType == TYPE_FLOAT) ctype = "float";
|
||
else if (swapType == TYPE_DBL) ctype = "double";
|
||
|
||
if (swapType == TYPE_STR) {
|
||
// String swap: just swap the pointers
|
||
emit("{ char *_swap_tmp = ");
|
||
genExpr(n->a);
|
||
emitRaw("; ");
|
||
genExpr(n->a);
|
||
emitRaw(" = ");
|
||
genExpr(n->b);
|
||
emitRaw("; ");
|
||
genExpr(n->b);
|
||
emitRaw(" = _swap_tmp; }\n");
|
||
} else {
|
||
emit("{ %s _swap_tmp = ", ctype);
|
||
genExpr(n->a);
|
||
emitRaw("; ");
|
||
genExpr(n->a);
|
||
emitRaw(" = ");
|
||
genExpr(n->b);
|
||
emitRaw("; ");
|
||
genExpr(n->b);
|
||
emitRaw(" = _swap_tmp; }\n");
|
||
}
|
||
break;
|
||
}
|
||
|
||
case NODE_RANDOMIZE:
|
||
if (n->a) {
|
||
emit("srand((unsigned)(");
|
||
genExpr(n->a);
|
||
emitRaw("));\n");
|
||
} else {
|
||
emit("srand((unsigned)time(NULL));\n");
|
||
}
|
||
break;
|
||
|
||
case NODE_SELECT: {
|
||
// Emit test expression into a temp variable
|
||
static int selectId = 0;
|
||
int sid = selectId++;
|
||
DataType stype = n->a->dataType;
|
||
if (stype == TYPE_STR) {
|
||
emit("{ const char *_sel%d = ", sid);
|
||
genExpr(n->a);
|
||
emitRaw(";\n");
|
||
} else {
|
||
emit("{ double _sel%d = ", sid);
|
||
genExpr(n->a);
|
||
emitRaw(";\n");
|
||
}
|
||
// Emit CASE blocks as if/else if chain
|
||
int first = 1;
|
||
for (Node *c = n->b; c; c = c->next) {
|
||
if (c->ival == 1) {
|
||
// CASE ELSE
|
||
if (!first) emit("} else {\n");
|
||
else emit("{\n");
|
||
} else {
|
||
if (!first) emit("} else if (");
|
||
else emit("if (");
|
||
// Emit condition for each value, joined with ||
|
||
int firstVal = 1;
|
||
for (Node *v = c->a; v; v = v->next) {
|
||
if (!firstVal) emitRaw(" || ");
|
||
if (v->ival2 == 1) {
|
||
// IS comparison: v->ival is the comparison op, v->b is the value
|
||
emitRaw("(_sel%d ", sid);
|
||
switch (v->ival) {
|
||
case TOK_EQ: emitRaw("== "); break;
|
||
case TOK_NE: emitRaw("!= "); break;
|
||
case TOK_LT: emitRaw("< "); break;
|
||
case TOK_GT: emitRaw("> "); break;
|
||
case TOK_LE: emitRaw("<= "); break;
|
||
case TOK_GE: emitRaw(">= "); break;
|
||
}
|
||
genExpr(v->b);
|
||
emitRaw(")");
|
||
} else if (v->ival2 == 2) {
|
||
// Range: v->a TO v->b
|
||
emitRaw("(_sel%d >= ", sid);
|
||
genExpr(v->a);
|
||
emitRaw(" && _sel%d <= ", sid);
|
||
genExpr(v->b);
|
||
emitRaw(")");
|
||
} else {
|
||
// Single value
|
||
if (stype == TYPE_STR) {
|
||
emitRaw("(strcmp(_sel%d, ", sid);
|
||
genExpr(v);
|
||
emitRaw(") == 0)");
|
||
} else {
|
||
emitRaw("(_sel%d == ", sid);
|
||
genExpr(v);
|
||
emitRaw(")");
|
||
}
|
||
}
|
||
firstVal = 0;
|
||
}
|
||
emitRaw(") {\n");
|
||
}
|
||
gIndent++;
|
||
for (Node *s = c->b; s; s = s->next)
|
||
genStmt(s);
|
||
gIndent--;
|
||
first = 0;
|
||
}
|
||
if (!first) emit("}\n");
|
||
emit("}\n");
|
||
break;
|
||
}
|
||
|
||
case NODE_ON_GOTO:
|
||
emit("switch ((int)(");
|
||
genExpr(n->a);
|
||
emitRaw(")) {\n");
|
||
{
|
||
int idx = 1;
|
||
for (Node *lab = n->b; lab; lab = lab->next, idx++) {
|
||
if (lab->type == NODE_INT_LIT) {
|
||
emit(" case %d: goto L%d; break;\n", idx, lab->ival);
|
||
} else {
|
||
emit(" case %d: goto %s; break;\n", idx, cleanName(lab->sval));
|
||
}
|
||
}
|
||
}
|
||
emit("}\n");
|
||
break;
|
||
|
||
case NODE_ON_GOSUB:
|
||
emit("switch ((int)(");
|
||
genExpr(n->a);
|
||
emitRaw(")) {\n");
|
||
{
|
||
int idx = 1;
|
||
int rpid = n->ival2; // first return-point id
|
||
for (Node *lab = n->b; lab; lab = lab->next, idx++, rpid++) {
|
||
if (lab->type == NODE_INT_LIT) {
|
||
emit(" case %d: _gosub_stack[_gosub_sp++] = %d; goto L%d; break;\n",
|
||
idx, rpid, lab->ival);
|
||
} else {
|
||
emit(" case %d: _gosub_stack[_gosub_sp++] = %d; goto %s; break;\n",
|
||
idx, rpid, cleanName(lab->sval));
|
||
}
|
||
}
|
||
}
|
||
emit("}\n");
|
||
// Emit return labels
|
||
{
|
||
int rpid = n->ival2;
|
||
for (Node *lab = n->b; lab; lab = lab->next, rpid++) {
|
||
emitRaw("_gr%d: ;\n", rpid);
|
||
}
|
||
}
|
||
break;
|
||
|
||
case NODE_MID_ASSIGN:
|
||
emit("_bmid_assign(&");
|
||
genExpr(n->a);
|
||
emitRaw(", ");
|
||
genExpr(n->b);
|
||
emitRaw(", ");
|
||
genExpr(n->c);
|
||
emitRaw(", ");
|
||
genExpr(n->d);
|
||
emitRaw(");\n");
|
||
break;
|
||
|
||
default:
|
||
emit("/* unhandled node type %d */\n", n->type);
|
||
break;
|
||
}
|
||
}
|
||
|
||
|
||
// Generate code for a block (linked list of statements)
|
||
static void genBlock(Node *blk) {
|
||
if (!blk) return;
|
||
Node *s = (blk->type == NODE_BLOCK) ? blk->a : blk;
|
||
while (s) {
|
||
genStmt(s);
|
||
s = s->next;
|
||
}
|
||
}
|
||
|
||
|
||
// Collect all SUB/FUNCTION nodes from the AST into an array
|
||
static void collectFuncs(Node *n, Node **funcs, int *count, int max) {
|
||
if (!n) return;
|
||
if (n->type == NODE_SUB || n->type == NODE_FUNC) {
|
||
if (*count >= max)
|
||
fatal(n->line, "Too many SUB/FUNCTION definitions (max %d)", max);
|
||
funcs[(*count)++] = n;
|
||
}
|
||
if (n->type == NODE_BLOCK || n->type == NODE_PROGRAM) {
|
||
Node *s = n->a;
|
||
while (s) {
|
||
collectFuncs(s, funcs, count, max);
|
||
s = s->next;
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
// Recursively collect all NODE_DATA nodes from the AST
|
||
static void collectData(Node *n, Node **data, int *count, int max) {
|
||
if (!n) return;
|
||
if (n->type == NODE_DATA) {
|
||
if (*count >= max)
|
||
fatal(n->line, "Too many DATA statements (max %d)", max);
|
||
data[(*count)++] = n;
|
||
}
|
||
// When a NODE_LABEL is followed by NODE_DATA via ->next, tag the
|
||
// DATA node with the BASIC line number (stored in ival) or named
|
||
// label (stored in sval) so that RESTORE can find it.
|
||
if (n->type == NODE_LABEL && n->next && n->next->type == NODE_DATA) {
|
||
if (n->sval)
|
||
n->next->sval = n->sval;
|
||
else
|
||
n->next->ival = n->ival;
|
||
}
|
||
// Walk into blocks, programs, and sub/function bodies
|
||
if (n->type == NODE_BLOCK || n->type == NODE_PROGRAM) {
|
||
Node *s = n->a;
|
||
while (s) {
|
||
collectData(s, data, count, max);
|
||
s = s->next;
|
||
}
|
||
}
|
||
// Also collect from SUB/FUNCTION bodies (DATA is global in BASIC)
|
||
if (n->type == NODE_SUB || n->type == NODE_FUNC) {
|
||
collectData(n->b, data, count, max);
|
||
}
|
||
// Walk if/else branches
|
||
if (n->type == NODE_IF) {
|
||
collectData(n->b, data, count, max);
|
||
collectData(n->c, data, count, max);
|
||
}
|
||
// Walk loop bodies
|
||
if (n->type == NODE_FOR || n->type == NODE_WHILE || n->type == NODE_DO_LOOP) {
|
||
Node *body = (n->type == NODE_FOR) ? n->d : n->b;
|
||
collectData(body, data, count, max);
|
||
}
|
||
// Walk SELECT CASE bodies
|
||
if (n->type == NODE_SELECT) {
|
||
for (Node *c = n->b; c; c = c->next) {
|
||
for (Node *s = c->b; s; s = s->next)
|
||
collectData(s, data, count, max);
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
// Global storage for RESTORE line-number-to-data-index mapping
|
||
#define MAX_DATA_LINES 512
|
||
static int gDataLineNums[MAX_DATA_LINES];
|
||
static int gDataLineIdxs[MAX_DATA_LINES];
|
||
static int gDataLineCount = 0;
|
||
|
||
// Global storage for RESTORE named-label-to-data-index mapping
|
||
static char *gDataLabelNames[MAX_DATA_LINES];
|
||
static int gDataLabelIdxs[MAX_DATA_LINES];
|
||
static int gDataLabelCount = 0;
|
||
|
||
// Look up the data index for a RESTORE target line number
|
||
static int dataIndexForLine(int lnum) {
|
||
for (int i = 0; i < gDataLineCount; i++)
|
||
if (gDataLineNums[i] == lnum) return gDataLineIdxs[i];
|
||
return 0; // fallback to beginning
|
||
}
|
||
|
||
|
||
// Look up the data index for a RESTORE target named label
|
||
static int dataIndexForLabel(const char *name) {
|
||
for (int i = 0; i < gDataLabelCount; i++)
|
||
if (strIcmp(gDataLabelNames[i], name) == 0)
|
||
return gDataLabelIdxs[i];
|
||
return 0; // fallback to beginning
|
||
}
|
||
|
||
|
||
// Emit the runtime library (debug or release variant). Provides string
|
||
// operations, temp management, file I/O, and dynamic array support.
|
||
static void emitRuntime(void) {
|
||
// Common headers and defines — same in both modes
|
||
fprintf(gOut,
|
||
"/* ---- BASIC Runtime Library (%s) ---- */\n"
|
||
"#include <stdio.h>\n"
|
||
"#include <stdlib.h>\n"
|
||
"#include <string.h>\n"
|
||
"#include <stdint.h>\n"
|
||
"#include <math.h>\n"
|
||
"#include <ctype.h>\n"
|
||
"#include <time.h>\n\n"
|
||
"#ifdef __GNUC__\n"
|
||
"#define _BUNUSED __attribute__((unused))\n"
|
||
"#else\n"
|
||
"#define _BUNUSED\n"
|
||
"#endif\n\n"
|
||
|
||
"/* Temporary string pool: collects intermediate strings for cleanup */\n"
|
||
"#define _BMAX_TEMPS 256\n"
|
||
"static char *_btemps[_BMAX_TEMPS] _BUNUSED;\n"
|
||
"static int _btmp_count _BUNUSED = 0;\n\n"
|
||
|
||
"/* Register a heap string as temporary (will be freed by _bfree_temps) */\n"
|
||
"static _BUNUSED char *_btmp(char *s) {\n"
|
||
" if (_btmp_count < _BMAX_TEMPS) _btemps[_btmp_count++] = s;\n"
|
||
" return s;\n"
|
||
"}\n\n"
|
||
|
||
"/* Free all registered temporary strings */\n"
|
||
"static _BUNUSED void _bfree_temps(void) {\n"
|
||
" for (int i = 0; i < _btmp_count; i++) free(_btemps[i]);\n"
|
||
" _btmp_count = 0;\n"
|
||
"}\n\n",
|
||
gRelease ? "release" : "debug"
|
||
);
|
||
|
||
// String functions — debug vs release
|
||
if (gRelease) {
|
||
fprintf(gOut,
|
||
"static _BUNUSED char *_bstr(const char *s) {\n"
|
||
" char *d = (char*)malloc(strlen(s) + 1);\n"
|
||
" strcpy(d, s);\n"
|
||
" return d;\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _bstr_assign(char **dest, const char *src) {\n"
|
||
" if (*dest) free(*dest);\n"
|
||
" *dest = _bstr(src);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bconcat(const char *a, const char *b) {\n"
|
||
" size_t la = strlen(a), lb = strlen(b);\n"
|
||
" char *r = (char*)malloc(la + lb + 1);\n"
|
||
" memcpy(r, a, la);\n"
|
||
" memcpy(r + la, b, lb);\n"
|
||
" r[la + lb] = '\\0';\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bchr(int code) {\n"
|
||
" char *r = (char*)malloc(2);\n"
|
||
" r[0] = (char)code; r[1] = '\\0';\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bstr_of_int(double val) {\n"
|
||
" char *r = (char*)malloc(64);\n"
|
||
" if (val == (int)val) sprintf(r, \"%%d\", (int)val);\n"
|
||
" else sprintf(r, \"%%g\", val);\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bmid(const char *s, int start, int len) {\n"
|
||
" int slen = (int)strlen(s);\n"
|
||
" start--;\n"
|
||
" if (start < 0) start = 0;\n"
|
||
" if (start >= slen) return _btmp(_bstr(\"\"));\n"
|
||
" if (len < 0 || start + len > slen) len = slen - start;\n"
|
||
" char *r = (char*)malloc(len + 1);\n"
|
||
" memcpy(r, s + start, len);\n"
|
||
" r[len] = '\\0';\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bleft(const char *s, int n) { return _bmid(s, 1, n); }\n\n"
|
||
|
||
"static _BUNUSED char *_bright(const char *s, int n) {\n"
|
||
" int slen = (int)strlen(s);\n"
|
||
" if (n >= slen) return _btmp(_bstr(s));\n"
|
||
" return _btmp(_bstr(s + slen - n));\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bucase(const char *s) {\n"
|
||
" char *r = _bstr(s);\n"
|
||
" for (char *p = r; *p; p++) *p = toupper((unsigned char)*p);\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_blcase(const char *s) {\n"
|
||
" char *r = _bstr(s);\n"
|
||
" for (char *p = r; *p; p++) *p = tolower((unsigned char)*p);\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED int _binstr(const char *haystack, const char *needle) {\n"
|
||
" const char *p = strstr(haystack, needle);\n"
|
||
" return p ? (int)(p - haystack) + 1 : 0;\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED double _babs(double x) { return x < 0 ? -x : x; }\n\n"
|
||
);
|
||
} else {
|
||
fprintf(gOut,
|
||
"static _BUNUSED char *_bstr(const char *s) {\n"
|
||
" if (!s) s = \"\";\n"
|
||
" char *d = (char*)malloc(strlen(s) + 1);\n"
|
||
" if (!d) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n"
|
||
" strcpy(d, s);\n"
|
||
" return d;\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _bstr_assign(char **dest, const char *src) {\n"
|
||
" if (*dest) free(*dest);\n"
|
||
" *dest = _bstr(src ? src : \"\");\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bconcat(const char *a, const char *b) {\n"
|
||
" if (!a) a = \"\";\n"
|
||
" if (!b) b = \"\";\n"
|
||
" size_t la = strlen(a), lb = strlen(b);\n"
|
||
" char *r = (char*)malloc(la + lb + 1);\n"
|
||
" if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n"
|
||
" memcpy(r, a, la);\n"
|
||
" memcpy(r + la, b, lb);\n"
|
||
" r[la + lb] = '\\0';\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bchr(int code) {\n"
|
||
" char *r = (char*)malloc(2);\n"
|
||
" if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n"
|
||
" r[0] = (char)code; r[1] = '\\0';\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bstr_of_int(double val) {\n"
|
||
" char *r = (char*)malloc(64);\n"
|
||
" if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n"
|
||
" if (val == (int)val) sprintf(r, \"%%d\", (int)val);\n"
|
||
" else sprintf(r, \"%%g\", val);\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bmid(const char *s, int start, int len) {\n"
|
||
" if (!s) return _btmp(_bstr(\"\"));\n"
|
||
" int slen = (int)strlen(s);\n"
|
||
" start--;\n"
|
||
" if (start < 0) start = 0;\n"
|
||
" if (start >= slen) return _btmp(_bstr(\"\"));\n"
|
||
" if (len < 0 || start + len > slen) len = slen - start;\n"
|
||
" char *r = (char*)malloc(len + 1);\n"
|
||
" if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n"
|
||
" memcpy(r, s + start, len);\n"
|
||
" r[len] = '\\0';\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bleft(const char *s, int n) { return _bmid(s, 1, n); }\n\n"
|
||
|
||
"static _BUNUSED char *_bright(const char *s, int n) {\n"
|
||
" if (!s) return _btmp(_bstr(\"\"));\n"
|
||
" int slen = (int)strlen(s);\n"
|
||
" if (n >= slen) return _btmp(_bstr(s));\n"
|
||
" return _btmp(_bstr(s + slen - n));\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bucase(const char *s) {\n"
|
||
" if (!s) return _btmp(_bstr(\"\"));\n"
|
||
" char *r = _bstr(s);\n"
|
||
" for (char *p = r; *p; p++) *p = toupper((unsigned char)*p);\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_blcase(const char *s) {\n"
|
||
" if (!s) return _btmp(_bstr(\"\"));\n"
|
||
" char *r = _bstr(s);\n"
|
||
" for (char *p = r; *p; p++) *p = tolower((unsigned char)*p);\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED int _binstr(const char *haystack, const char *needle) {\n"
|
||
" if (!haystack || !needle) return 0;\n"
|
||
" const char *p = strstr(haystack, needle);\n"
|
||
" return p ? (int)(p - haystack) + 1 : 0;\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED double _babs(double x) { return x < 0 ? -x : x; }\n\n"
|
||
);
|
||
}
|
||
|
||
// Additional string runtime functions (same in both modes)
|
||
fprintf(gOut,
|
||
"static _BUNUSED char *_bltrim(const char *s) {\n"
|
||
" while (*s == ' ') s++;\n"
|
||
" return _btmp(_bstr(s));\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_brtrim(const char *s) {\n"
|
||
" char *r = _bstr(s);\n"
|
||
" int len = (int)strlen(r);\n"
|
||
" while (len > 0 && r[len-1] == ' ') len--;\n"
|
||
" r[len] = '\\0';\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_btrim(const char *s) {\n"
|
||
" while (*s == ' ') s++;\n"
|
||
" char *r = _bstr(s);\n"
|
||
" int len = (int)strlen(r);\n"
|
||
" while (len > 0 && r[len-1] == ' ') len--;\n"
|
||
" r[len] = '\\0';\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bspace(int n) {\n"
|
||
" if (n < 0) n = 0;\n"
|
||
" char *r = (char*)malloc(n + 1);\n"
|
||
" memset(r, ' ', n);\n"
|
||
" r[n] = '\\0';\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_btab(int col) {\n"
|
||
" if (col < 1) col = 1;\n"
|
||
" char *r = (char*)malloc(col);\n"
|
||
" memset(r, ' ', col - 1);\n"
|
||
" r[col - 1] = '\\0';\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bhex(int n) {\n"
|
||
" char *r = (char*)malloc(20);\n"
|
||
" sprintf(r, \"%%X\", (unsigned)n);\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_boct(int n) {\n"
|
||
" char *r = (char*)malloc(24);\n"
|
||
" sprintf(r, \"%%o\", (unsigned)n);\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bstring_rep(int n, const char *ch) {\n"
|
||
" if (n < 0) n = 0;\n"
|
||
" char *r = (char*)malloc(n + 1);\n"
|
||
" memset(r, ch[0], n);\n"
|
||
" r[n] = '\\0';\n"
|
||
" return _btmp(r);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED char *_bgetenv(const char *name) {\n"
|
||
" const char *val = getenv(name);\n"
|
||
" return _btmp(_bstr(val ? val : \"\"));\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _bmid_assign(char **dest, int start, int len, const char *repl) {\n"
|
||
" int dlen = (int)strlen(*dest);\n"
|
||
" int rlen = (int)strlen(repl);\n"
|
||
" start--;\n"
|
||
" if (start < 0 || start >= dlen) return;\n"
|
||
" if (len > dlen - start) len = dlen - start;\n"
|
||
" if (rlen < len) len = rlen;\n"
|
||
" memcpy(*dest + start, repl, len);\n"
|
||
"}\n\n"
|
||
|
||
"/* PRINT USING support */\n"
|
||
"static const char *_busing_fmt _BUNUSED;\n"
|
||
"static const char *_busing_pos _BUNUSED;\n\n"
|
||
|
||
"static _BUNUSED void _busing_init(const char *fmt) {\n"
|
||
" _busing_fmt = _busing_pos = fmt ? fmt : \"\";\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _busing_num(double val) {\n"
|
||
" const char *p = _busing_pos;\n"
|
||
" int width = 0, decimals = -1, dollar = 0, plus = 0, aster = 0, tminus = 0;\n"
|
||
" /* Skip literal chars until we find a numeric format start */\n"
|
||
" while (*p) {\n"
|
||
" if (*p == '#') break;\n"
|
||
" if (*p == '*' && p[1] == '*') break;\n"
|
||
" if (*p == '$' && p[1] == '$') break;\n"
|
||
" if (*p == '+' && (p[1] == '#' || p[1] == '$' || p[1] == '*')) break;\n"
|
||
" if (*p == '!' || *p == '&' || *p == '\\\\') break;\n"
|
||
" putchar(*p++);\n"
|
||
" }\n"
|
||
" if (!*p) { _busing_pos = _busing_fmt; return; }\n"
|
||
" /* Parse numeric format */\n"
|
||
" if (*p == '+') { plus = 1; p++; }\n"
|
||
" while (*p == '*') { aster++; width++; p++; }\n"
|
||
" while (*p == '$') { dollar++; p++; if (dollar > 1) width++; }\n"
|
||
" while (*p == '#' || *p == ',') { if (*p == '#') width++; p++; }\n"
|
||
" if (*p == '.') { p++; decimals = 0; while (*p == '#') { decimals++; p++; } }\n"
|
||
" if (*p == '-') { tminus = 1; p++; }\n"
|
||
" _busing_pos = p;\n"
|
||
" /* Format the number */\n"
|
||
" char buf[64];\n"
|
||
" double absval = val < 0 ? -val : val;\n"
|
||
" int neg = (val < 0);\n"
|
||
" if (decimals >= 0) {\n"
|
||
" snprintf(buf, sizeof(buf), \"%%.*f\", decimals, absval);\n"
|
||
" } else {\n"
|
||
" snprintf(buf, sizeof(buf), \"%%.0f\", absval);\n"
|
||
" }\n"
|
||
" int totalw = width + (decimals >= 0 ? decimals + 1 : 0);\n"
|
||
" int len = (int)strlen(buf);\n"
|
||
" int signw = (plus || neg) ? 1 : 0;\n"
|
||
" int dollarw = dollar ? 1 : 0;\n"
|
||
" int pad = totalw - len - signw - dollarw;\n"
|
||
" if (pad < 0) pad = 0;\n"
|
||
" for (int i = 0; i < pad; i++) putchar(aster >= 2 ? '*' : ' ');\n"
|
||
" if (plus) putchar(neg ? '-' : '+');\n"
|
||
" else if (neg && !tminus) putchar('-');\n"
|
||
" if (dollar) putchar('$');\n"
|
||
" printf(\"%%s\", buf);\n"
|
||
" if (tminus && neg) putchar('-');\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _busing_str(const char *val) {\n"
|
||
" const char *p = _busing_pos;\n"
|
||
" if (!val) val = \"\";\n"
|
||
" /* Skip literal chars, print them */\n"
|
||
" while (*p && *p != '!' && *p != '&' && *p != '\\\\' && *p != '#') {\n"
|
||
" putchar(*p++);\n"
|
||
" }\n"
|
||
" if (!*p) { _busing_pos = _busing_fmt; return; }\n"
|
||
" if (*p == '!') {\n"
|
||
" /* First character only */\n"
|
||
" putchar(val[0] ? val[0] : ' ');\n"
|
||
" _busing_pos = p + 1;\n"
|
||
" } else if (*p == '&') {\n"
|
||
" /* Entire string */\n"
|
||
" printf(\"%%s\", val);\n"
|
||
" _busing_pos = p + 1;\n"
|
||
" } else if (*p == '\\\\') {\n"
|
||
" /* Fixed width: count chars between backslashes */\n"
|
||
" p++;\n"
|
||
" int width = 2;\n"
|
||
" while (*p && *p != '\\\\') { width++; p++; }\n"
|
||
" if (*p == '\\\\') p++;\n"
|
||
" _busing_pos = p;\n"
|
||
" int len = (int)strlen(val);\n"
|
||
" for (int i = 0; i < width; i++)\n"
|
||
" putchar(i < len ? val[i] : ' ');\n"
|
||
" } else {\n"
|
||
" _busing_pos = p;\n"
|
||
" }\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _busing_end(void) {\n"
|
||
" putchar('\\n');\n"
|
||
" _busing_pos = _busing_fmt;\n"
|
||
"}\n\n"
|
||
);
|
||
|
||
// Only emit GOSUB stack if there are GOSUB sites, to avoid
|
||
// -Wunused-variable warnings.
|
||
if (gGosubCount > 0) {
|
||
fprintf(gOut,
|
||
"/* GOSUB return stack */\n"
|
||
"#define _GOSUB_MAX %d\n"
|
||
"static int _gosub_stack[_GOSUB_MAX];\n"
|
||
"static int _gosub_sp = 0;\n\n",
|
||
MAX_GOSUB_SITES
|
||
);
|
||
}
|
||
|
||
// File I/O runtime — debug vs release
|
||
fprintf(gOut,
|
||
"/* File I/O support */\n"
|
||
"#define _BMAX_FILES 16\n"
|
||
"static FILE *_bfiles[_BMAX_FILES] _BUNUSED = {0};\n"
|
||
"static long _bfile_reclen[_BMAX_FILES] _BUNUSED = {0};\n\n"
|
||
);
|
||
|
||
if (gRelease) {
|
||
fprintf(gOut,
|
||
"static _BUNUSED FILE *_bfile_get(int fnum) {\n"
|
||
" return _bfiles[fnum];\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _bfile_open(int fnum, const char *fname, const char *mode) {\n"
|
||
" if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n"
|
||
" _bfiles[fnum] = fopen(fname, mode);\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _bfile_open_random(int fnum, const char *fname, long reclen) {\n"
|
||
" if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n"
|
||
" _bfiles[fnum] = fopen(fname, \"r+b\");\n"
|
||
" if (!_bfiles[fnum]) _bfiles[fnum] = fopen(fname, \"w+b\");\n"
|
||
" _bfile_reclen[fnum] = reclen;\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _bfile_close(int fnum) {\n"
|
||
" if (_bfiles[fnum]) { fclose(_bfiles[fnum]); _bfiles[fnum] = NULL; }\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED int _beof(int fnum) {\n"
|
||
" if (!_bfiles[fnum]) return -1;\n"
|
||
" int c = fgetc(_bfiles[fnum]);\n"
|
||
" if (c == EOF) return -1;\n"
|
||
" ungetc(c, _bfiles[fnum]);\n"
|
||
" return 0;\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED long _blof(int fnum) {\n"
|
||
" if (!_bfiles[fnum]) return 0;\n"
|
||
" long cur = ftell(_bfiles[fnum]);\n"
|
||
" fseek(_bfiles[fnum], 0, SEEK_END);\n"
|
||
" long sz = ftell(_bfiles[fnum]);\n"
|
||
" fseek(_bfiles[fnum], cur, SEEK_SET);\n"
|
||
" return sz;\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED int _bfreefile(void) {\n"
|
||
" for (int i = 1; i < _BMAX_FILES; i++)\n"
|
||
" if (!_bfiles[i]) return i;\n"
|
||
" return 0;\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _bline_input(int fnum, char **dest) {\n"
|
||
" if (!_bfiles[fnum]) return;\n"
|
||
" char _buf[4096];\n"
|
||
" if (fgets(_buf, sizeof(_buf), _bfiles[fnum])) {\n"
|
||
" _buf[strcspn(_buf, \"\\r\\n\")] = 0;\n"
|
||
" _bstr_assign(dest, _buf);\n"
|
||
" }\n"
|
||
"}\n\n"
|
||
);
|
||
} else {
|
||
fprintf(gOut,
|
||
"static _BUNUSED FILE *_bfile_get(int fnum) {\n"
|
||
" if (fnum < 1 || fnum >= _BMAX_FILES || !_bfiles[fnum]) {\n"
|
||
" fprintf(stderr, \"Bad file number %%d\\n\", fnum);\n"
|
||
" exit(1);\n"
|
||
" }\n"
|
||
" return _bfiles[fnum];\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _bfile_open(int fnum, const char *fname, const char *mode) {\n"
|
||
" if (fnum < 1 || fnum >= _BMAX_FILES) {\n"
|
||
" fprintf(stderr, \"File number %%d out of range\\n\", fnum);\n"
|
||
" exit(1);\n"
|
||
" }\n"
|
||
" if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n"
|
||
" _bfiles[fnum] = fopen(fname, mode);\n"
|
||
" if (!_bfiles[fnum]) {\n"
|
||
" fprintf(stderr, \"Cannot open '%%s'\\n\", fname);\n"
|
||
" exit(1);\n"
|
||
" }\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _bfile_open_random(int fnum, const char *fname, long reclen) {\n"
|
||
" if (fnum < 1 || fnum >= _BMAX_FILES) {\n"
|
||
" fprintf(stderr, \"File number %%d out of range\\n\", fnum);\n"
|
||
" exit(1);\n"
|
||
" }\n"
|
||
" if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n"
|
||
" _bfiles[fnum] = fopen(fname, \"r+b\");\n"
|
||
" if (!_bfiles[fnum]) _bfiles[fnum] = fopen(fname, \"w+b\");\n"
|
||
" if (!_bfiles[fnum]) {\n"
|
||
" fprintf(stderr, \"Cannot open '%%s'\\n\", fname);\n"
|
||
" exit(1);\n"
|
||
" }\n"
|
||
" _bfile_reclen[fnum] = reclen;\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _bfile_close(int fnum) {\n"
|
||
" if (fnum >= 1 && fnum < _BMAX_FILES && _bfiles[fnum]) {\n"
|
||
" fclose(_bfiles[fnum]);\n"
|
||
" _bfiles[fnum] = NULL;\n"
|
||
" }\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED int _beof(int fnum) {\n"
|
||
" if (fnum < 1 || fnum >= _BMAX_FILES || !_bfiles[fnum]) return -1;\n"
|
||
" int c = fgetc(_bfiles[fnum]);\n"
|
||
" if (c == EOF) return -1;\n"
|
||
" ungetc(c, _bfiles[fnum]);\n"
|
||
" return 0;\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED long _blof(int fnum) {\n"
|
||
" if (fnum < 1 || fnum >= _BMAX_FILES || !_bfiles[fnum]) return 0;\n"
|
||
" long cur = ftell(_bfiles[fnum]);\n"
|
||
" fseek(_bfiles[fnum], 0, SEEK_END);\n"
|
||
" long sz = ftell(_bfiles[fnum]);\n"
|
||
" fseek(_bfiles[fnum], cur, SEEK_SET);\n"
|
||
" return sz;\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED int _bfreefile(void) {\n"
|
||
" for (int i = 1; i < _BMAX_FILES; i++)\n"
|
||
" if (!_bfiles[i]) return i;\n"
|
||
" return 0;\n"
|
||
"}\n\n"
|
||
|
||
"static _BUNUSED void _bline_input(int fnum, char **dest) {\n"
|
||
" char _buf[4096];\n"
|
||
" if (fnum >= 1 && fnum < _BMAX_FILES && _bfiles[fnum] &&\n"
|
||
" fgets(_buf, sizeof(_buf), _bfiles[fnum])) {\n"
|
||
" _buf[strcspn(_buf, \"\\r\\n\")] = 0;\n"
|
||
" _bstr_assign(dest, _buf);\n"
|
||
" }\n"
|
||
"}\n\n"
|
||
);
|
||
}
|
||
|
||
// DATA/READ support — same in both modes
|
||
fprintf(gOut,
|
||
"/* DATA/READ support */\n"
|
||
"typedef struct { int is_str; double num; const char *str; } _BDataItem;\n\n"
|
||
);
|
||
}
|
||
|
||
|
||
// Main code generation: emit the full C source file from the AST
|
||
static void generate(Node *prog) {
|
||
// Emit the runtime library
|
||
emitRuntime();
|
||
|
||
// Emit UDT struct definitions (packed for binary I/O compatibility)
|
||
if (gUdtCount > 0) {
|
||
fprintf(gOut, "/* User-defined types */\n");
|
||
fprintf(gOut, "#pragma pack(push, 1)\n");
|
||
for (int i = 0; i < gUdtCount; i++) {
|
||
UdtDef *u = &gUdts[i];
|
||
fprintf(gOut, "struct _b_%s {\n", cleanName(u->name));
|
||
for (int j = 0; j < u->fieldCount; j++) {
|
||
UdtField *f = &u->fields[j];
|
||
if (f->dataType == TYPE_STR && f->strLen > 0) {
|
||
fprintf(gOut, " char %s[%d];\n",
|
||
cleanName(f->name), f->strLen + 1);
|
||
} else if (f->dataType == TYPE_UDT) {
|
||
fprintf(gOut, " %s %s;\n",
|
||
cUdtTypeStr(f->udtIndex), cleanName(f->name));
|
||
} else {
|
||
fprintf(gOut, " %s %s;\n",
|
||
cTypeStr(f->dataType), cleanName(f->name));
|
||
}
|
||
}
|
||
fprintf(gOut, "};\n");
|
||
}
|
||
fprintf(gOut, "#pragma pack(pop)\n\n");
|
||
}
|
||
|
||
// Collect all DATA nodes and emit the data pool
|
||
Node *dataNodes[4096];
|
||
int dataNodeCount = 0;
|
||
collectData(prog, dataNodes, &dataNodeCount, 4096);
|
||
|
||
if (dataNodeCount > 0) {
|
||
// Emit the data pool array
|
||
fprintf(gOut, "/* DATA pool */\n");
|
||
fprintf(gOut, "static _BDataItem _bdata[] = {\n");
|
||
int totalItems = 0;
|
||
gDataLineCount = 0;
|
||
gDataLabelCount = 0;
|
||
for (int di = 0; di < dataNodeCount; di++) {
|
||
Node *dn = dataNodes[di];
|
||
// Record BASIC-line-number-to-index mapping for RESTORE.
|
||
// dn->ival is set by collectData when DATA follows a numeric label.
|
||
if (dn->ival != 0 && gDataLineCount < MAX_DATA_LINES) {
|
||
gDataLineNums[gDataLineCount] = dn->ival;
|
||
gDataLineIdxs[gDataLineCount] = totalItems;
|
||
gDataLineCount++;
|
||
}
|
||
// Record named-label-to-index mapping for RESTORE.
|
||
// dn->sval is set by collectData when DATA follows a named label.
|
||
if (dn->sval && gDataLabelCount < MAX_DATA_LINES) {
|
||
gDataLabelNames[gDataLabelCount] = dn->sval;
|
||
gDataLabelIdxs[gDataLabelCount] = totalItems;
|
||
gDataLabelCount++;
|
||
}
|
||
for (Node *item = dn->a; item; item = item->next) {
|
||
if (item->dataType == TYPE_STR) {
|
||
// Escape the string for C output
|
||
fprintf(gOut, " {1, 0, \"");
|
||
for (const char *p = item->sval; *p; p++) {
|
||
if (*p == '"') fprintf(gOut, "\\\"");
|
||
else if (*p == '\\') fprintf(gOut, "\\\\");
|
||
else fputc(*p, gOut);
|
||
}
|
||
fprintf(gOut, "\"},\n");
|
||
} else if (item->dataType == TYPE_DBL) {
|
||
fprintf(gOut, " {0, %g, NULL},\n", item->dval);
|
||
} else {
|
||
fprintf(gOut, " {0, %d, NULL},\n", item->ival);
|
||
}
|
||
totalItems++;
|
||
}
|
||
}
|
||
fprintf(gOut, "};\n");
|
||
fprintf(gOut, "static int _bdata_count _BUNUSED = %d;\n", totalItems);
|
||
fprintf(gOut, "static int _bdata_pos _BUNUSED = 0;\n\n");
|
||
} else {
|
||
// No DATA statements — emit empty placeholder
|
||
fprintf(gOut, "static _BDataItem _bdata[] _BUNUSED = {{0,0,NULL}};\n");
|
||
fprintf(gOut, "static int _bdata_count _BUNUSED = 0;\n");
|
||
fprintf(gOut, "static int _bdata_pos _BUNUSED = 0;\n\n");
|
||
}
|
||
|
||
// Collect all SUB/FUNCTION definitions
|
||
Node *funcs[256];
|
||
int funcCount = 0;
|
||
collectFuncs(prog, funcs, &funcCount, 256);
|
||
|
||
// Emit forward declarations for SUBs and FUNCTIONs
|
||
if (funcCount > 0) {
|
||
fprintf(gOut, "/* Forward declarations */\n");
|
||
for (int i = 0; i < funcCount; i++) {
|
||
Node *f = funcs[i];
|
||
int isFunc = (f->type == NODE_FUNC);
|
||
DataType ret = isFunc ? f->dataType : TYPE_VOID;
|
||
fprintf(gOut, "%s %s(", cTypeStr(ret), cleanName(f->sval));
|
||
int first = 1;
|
||
for (Node *p = f->a; p; p = p->next) {
|
||
if (!first) fprintf(gOut, ", ");
|
||
first = 0;
|
||
if (p->ival == PASS_BYREF)
|
||
fprintf(gOut, "%s*", cTypeStr(p->dataType));
|
||
else if (p->dataType == TYPE_STR)
|
||
fprintf(gOut, "const char*");
|
||
else
|
||
fprintf(gOut, "%s", cTypeStr(p->dataType));
|
||
}
|
||
if (first) fprintf(gOut, "void");
|
||
fprintf(gOut, ");\n");
|
||
}
|
||
fprintf(gOut, "\n");
|
||
}
|
||
|
||
// Emit SUB/FUNCTION implementations
|
||
for (int i = 0; i < funcCount; i++) {
|
||
genFuncDef(funcs[i]);
|
||
}
|
||
|
||
// Emit main() with global (non-function) statements
|
||
fprintf(gOut, "/* Main program */\n");
|
||
fprintf(gOut, "int main(void) {\n");
|
||
gIndent = 1;
|
||
|
||
// Walk the top-level block and emit non-function statements
|
||
Node *blk = (prog->type == NODE_PROGRAM) ? prog->a : prog;
|
||
Node *s = (blk && blk->type == NODE_BLOCK) ? blk->a : blk;
|
||
while (s) {
|
||
// Skip SUB/FUNCTION definitions (already emitted above)
|
||
if (s->type != NODE_SUB && s->type != NODE_FUNC) {
|
||
genStmt(s);
|
||
}
|
||
s = s->next;
|
||
}
|
||
|
||
emit("return 0;\n");
|
||
gIndent = 0;
|
||
fprintf(gOut, "}\n");
|
||
}
|
||
|
||
|
||
// -----------------------------------------------------------------------
|
||
// Section 9: Main Entry Point
|
||
// -----------------------------------------------------------------------
|
||
|
||
// Read an entire file into a malloc'd buffer. Returns NULL on failure.
|
||
static char *readFile(const char *path) {
|
||
FILE *f = fopen(path, "rb");
|
||
if (!f) return NULL;
|
||
if (fseek(f, 0, SEEK_END) != 0) { fclose(f); return NULL; }
|
||
long len = ftell(f);
|
||
if (len < 0) { fclose(f); return NULL; }
|
||
if (len > (long)((unsigned)-1 >> 1)) {
|
||
// File too large for int-based gSrcLen
|
||
fclose(f);
|
||
return NULL;
|
||
}
|
||
rewind(f);
|
||
char *buf = (char *)malloc((size_t)len + 1);
|
||
if (!buf) { fclose(f); return NULL; }
|
||
size_t nread = fread(buf, 1, (size_t)len, f);
|
||
buf[nread] = '\0';
|
||
fclose(f);
|
||
return buf;
|
||
}
|
||
|
||
|
||
// -----------------------------------------------------------------------
|
||
// $INCLUDE preprocessor
|
||
// -----------------------------------------------------------------------
|
||
|
||
// Extract directory part of a file path (returns malloc'd string)
|
||
static char *dirName(const char *path) {
|
||
const char *last = strrchr(path, '/');
|
||
if (!last) return strdup(".");
|
||
size_t len = (size_t)(last - path);
|
||
char *dir = (char *)malloc(len + 1);
|
||
memcpy(dir, path, len);
|
||
dir[len] = '\0';
|
||
return dir;
|
||
}
|
||
|
||
// Join directory and filename (returns malloc'd string)
|
||
static char *pathJoin(const char *dir, const char *file) {
|
||
// If file is absolute, return copy of file
|
||
if (file[0] == '/') return strdup(file);
|
||
size_t dlen = strlen(dir);
|
||
size_t flen = strlen(file);
|
||
char *result = (char *)malloc(dlen + 1 + flen + 1);
|
||
memcpy(result, dir, dlen);
|
||
result[dlen] = '/';
|
||
memcpy(result + dlen + 1, file, flen);
|
||
result[dlen + 1 + flen] = '\0';
|
||
return result;
|
||
}
|
||
|
||
// Growing buffer for source assembly
|
||
typedef struct {
|
||
char *data;
|
||
size_t len;
|
||
size_t cap;
|
||
} SourceBuf;
|
||
|
||
static void sbInit(SourceBuf *sb) {
|
||
sb->cap = 4096;
|
||
sb->data = (char *)malloc(sb->cap);
|
||
sb->len = 0;
|
||
sb->data[0] = '\0';
|
||
}
|
||
|
||
static void sbAppend(SourceBuf *sb, const char *s, size_t n) {
|
||
while (sb->len + n + 1 > sb->cap) {
|
||
sb->cap *= 2;
|
||
sb->data = (char *)realloc(sb->data, sb->cap);
|
||
}
|
||
memcpy(sb->data + sb->len, s, n);
|
||
sb->len += n;
|
||
sb->data[sb->len] = '\0';
|
||
}
|
||
|
||
// Case-insensitive prefix check
|
||
static int strNIcmp(const char *a, const char *b, size_t n) {
|
||
for (size_t i = 0; i < n; i++) {
|
||
int ca = toupper((unsigned char)a[i]);
|
||
int cb = toupper((unsigned char)b[i]);
|
||
if (ca != cb) return ca - cb;
|
||
if (ca == 0) return 0;
|
||
}
|
||
return 0;
|
||
}
|
||
|
||
// Process a source file, expanding $INCLUDE directives.
|
||
// Appends to the SourceBuf and gLineMap.
|
||
static void preprocessFile(const char *filePath, SourceBuf *sb,
|
||
const char **includeStack, int includeDepth) {
|
||
// Check depth
|
||
if (includeDepth >= MAX_INCLUDE_DEPTH) {
|
||
fprintf(stderr, "Error: $INCLUDE nested too deeply (max %d) at '%s'\n",
|
||
MAX_INCLUDE_DEPTH, filePath);
|
||
exit(1);
|
||
}
|
||
|
||
// Check circular includes
|
||
for (int i = 0; i < includeDepth; i++) {
|
||
if (strcmp(includeStack[i], filePath) == 0) {
|
||
fprintf(stderr, "Error: Circular $INCLUDE detected: '%s'\n", filePath);
|
||
exit(1);
|
||
}
|
||
}
|
||
|
||
// Read file
|
||
char *text = readFile(filePath);
|
||
if (!text) {
|
||
fprintf(stderr, "Error: Cannot open '%s'", filePath);
|
||
if (includeDepth > 0)
|
||
fprintf(stderr, " (included from '%s')", includeStack[includeDepth - 1]);
|
||
fprintf(stderr, "\n");
|
||
exit(1);
|
||
}
|
||
|
||
const char *fname = internFileName(filePath);
|
||
char *baseDir = dirName(filePath);
|
||
|
||
// Push onto include stack
|
||
includeStack[includeDepth] = filePath;
|
||
|
||
// Process line by line
|
||
const char *p = text;
|
||
int origLine = 0;
|
||
while (*p) {
|
||
origLine++;
|
||
|
||
// Find end of line
|
||
const char *lineStart = p;
|
||
while (*p && *p != '\n') p++;
|
||
size_t lineLen = (size_t)(p - lineStart);
|
||
if (*p == '\n') p++; // consume newline
|
||
|
||
// Check for '$INCLUDE: directive
|
||
// Format: '$INCLUDE: 'filename'
|
||
// Leading spaces are allowed before the '
|
||
const char *s = lineStart;
|
||
while (s < lineStart + lineLen && (*s == ' ' || *s == '\t')) s++;
|
||
|
||
int isInclude = 0;
|
||
char incFile[MAX_TOKEN_LEN] = {0};
|
||
|
||
// Check for ' (comment start) followed by $INCLUDE:
|
||
if (s < lineStart + lineLen && *s == '\'') {
|
||
s++; // skip '
|
||
// Skip optional spaces between ' and $
|
||
while (s < lineStart + lineLen && (*s == ' ' || *s == '\t')) s++;
|
||
if (s + 9 <= lineStart + lineLen && strNIcmp(s, "$INCLUDE:", 9) == 0) {
|
||
s += 9;
|
||
// Skip spaces
|
||
while (s < lineStart + lineLen && (*s == ' ' || *s == '\t')) s++;
|
||
// Extract filename between single quotes
|
||
if (s < lineStart + lineLen && *s == '\'') {
|
||
s++;
|
||
const char *fnStart = s;
|
||
while (s < lineStart + lineLen && *s != '\'') s++;
|
||
if (s > fnStart && s < lineStart + lineLen) {
|
||
size_t fnLen = (size_t)(s - fnStart);
|
||
if (fnLen < MAX_TOKEN_LEN) {
|
||
memcpy(incFile, fnStart, fnLen);
|
||
incFile[fnLen] = '\0';
|
||
isInclude = 1;
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
if (isInclude) {
|
||
// Resolve path relative to current file's directory
|
||
char *resolvedPath = pathJoin(baseDir, incFile);
|
||
preprocessFile(resolvedPath, sb, includeStack, includeDepth + 1);
|
||
free(resolvedPath);
|
||
} else {
|
||
// Record line map entry
|
||
if (gLineMapCount < MAX_SOURCE_LINES) {
|
||
gLineMap[gLineMapCount].fileName = fname;
|
||
gLineMap[gLineMapCount].origLine = origLine;
|
||
gLineMapCount++;
|
||
}
|
||
// Append line (with newline)
|
||
sbAppend(sb, lineStart, lineLen);
|
||
sbAppend(sb, "\n", 1);
|
||
}
|
||
}
|
||
|
||
free(baseDir);
|
||
free(text);
|
||
}
|
||
|
||
// Top-level preprocessor entry point
|
||
static char *preprocessSource(const char *filePath) {
|
||
SourceBuf sb;
|
||
sbInit(&sb);
|
||
const char *includeStack[MAX_INCLUDE_DEPTH];
|
||
preprocessFile(filePath, &sb, includeStack, 0);
|
||
return sb.data;
|
||
}
|
||
|
||
|
||
int main(int argc, char **argv) {
|
||
// Check for --release / -r flag
|
||
int argi = 1;
|
||
if (argc > 1 && (strcmp(argv[1], "--release") == 0 ||
|
||
strcmp(argv[1], "-r") == 0)) {
|
||
gRelease = 1;
|
||
argi++;
|
||
}
|
||
|
||
if (argi >= argc) {
|
||
fprintf(stderr, "Usage: basic2c [--release|-r] input.bas [output.c]\n");
|
||
fprintf(stderr, "External functions can be defined in functions.def\n");
|
||
return 1;
|
||
}
|
||
|
||
// Load external function definitions from functions.def in binary's directory
|
||
{
|
||
const char *binPath = argv[0];
|
||
const char *lastSlash = strrchr(binPath, '/');
|
||
if (lastSlash) {
|
||
size_t dirLen = lastSlash - binPath + 1;
|
||
char *defPath = malloc(dirLen + 14); // "functions.def" + null
|
||
memcpy(defPath, binPath, dirLen);
|
||
strcpy(defPath + dirLen, "functions.def");
|
||
loadExternFuncs(defPath);
|
||
free(defPath);
|
||
} else {
|
||
// Binary in current directory or bare name - try current directory
|
||
loadExternFuncs("functions.def");
|
||
}
|
||
}
|
||
|
||
// Also load from input file's directory (may add more or override)
|
||
{
|
||
const char *inputPath = argv[argi];
|
||
const char *lastSlash = strrchr(inputPath, '/');
|
||
if (lastSlash) {
|
||
size_t dirLen = lastSlash - inputPath + 1;
|
||
char *defPath = malloc(dirLen + 14); // "functions.def" + null
|
||
memcpy(defPath, inputPath, dirLen);
|
||
strcpy(defPath + dirLen, "functions.def");
|
||
loadExternFuncs(defPath);
|
||
free(defPath);
|
||
}
|
||
// Don't load from current dir again if binary was there
|
||
}
|
||
|
||
// Read and preprocess source file (expands $INCLUDE directives)
|
||
char *source = preprocessSource(argv[argi]);
|
||
|
||
// Open output file (or stdout)
|
||
if (argi + 1 < argc) {
|
||
gOut = fopen(argv[argi + 1], "w");
|
||
if (!gOut) {
|
||
fprintf(stderr, "Error: Cannot create '%s'\n", argv[argi + 1]);
|
||
free(source);
|
||
return 1;
|
||
}
|
||
} else {
|
||
gOut = stdout;
|
||
}
|
||
|
||
// Initialize lexer state
|
||
gSrc = source;
|
||
gSrcPos = 0;
|
||
size_t slen = strlen(source);
|
||
if (slen > (size_t)((unsigned)-1 >> 1)) {
|
||
fprintf(stderr, "Error: Source file too large (%zu bytes)\n", slen);
|
||
free(source);
|
||
return 1;
|
||
}
|
||
gSrcLen = (int)slen;
|
||
gLine = 1;
|
||
|
||
// Parse the BASIC source into an AST
|
||
Node *program = parseProgram();
|
||
|
||
// Generate C code from the AST
|
||
generate(program);
|
||
|
||
// Cleanup
|
||
if (gOut != stdout) fclose(gOut);
|
||
free(source);
|
||
|
||
return 0;
|
||
}
|