From aa961425c934336367947289576e7dc38997a746 Mon Sep 17 00:00:00 2001 From: Scott Duensing Date: Fri, 27 Mar 2026 00:40:17 -0500 Subject: [PATCH] Initial DVX BASIC Compiler and VM. --- dvxbasic/compiler/codegen.c | 243 ++ dvxbasic/compiler/codegen.h | 76 + dvxbasic/compiler/lexer.c | 820 +++++++ dvxbasic/compiler/lexer.h | 221 ++ dvxbasic/compiler/opcodes.h | 287 +++ dvxbasic/compiler/parser.c | 4324 +++++++++++++++++++++++++++++++++++ dvxbasic/compiler/parser.h | 57 + dvxbasic/compiler/symtab.c | 147 ++ dvxbasic/compiler/symtab.h | 129 ++ dvxbasic/runtime/values.c | 633 +++++ dvxbasic/runtime/values.h | 180 ++ dvxbasic/runtime/vm.c | 3514 ++++++++++++++++++++++++++++ dvxbasic/runtime/vm.h | 211 ++ dvxbasic/test_compiler.c | 850 +++++++ dvxbasic/test_lex.c | 24 + dvxbasic/test_quick.c | 64 + dvxbasic/test_vm.c | 234 ++ 17 files changed, 12014 insertions(+) create mode 100644 dvxbasic/compiler/codegen.c create mode 100644 dvxbasic/compiler/codegen.h create mode 100644 dvxbasic/compiler/lexer.c create mode 100644 dvxbasic/compiler/lexer.h create mode 100644 dvxbasic/compiler/opcodes.h create mode 100644 dvxbasic/compiler/parser.c create mode 100644 dvxbasic/compiler/parser.h create mode 100644 dvxbasic/compiler/symtab.c create mode 100644 dvxbasic/compiler/symtab.h create mode 100644 dvxbasic/runtime/values.c create mode 100644 dvxbasic/runtime/values.h create mode 100644 dvxbasic/runtime/vm.c create mode 100644 dvxbasic/runtime/vm.h create mode 100644 dvxbasic/test_compiler.c create mode 100644 dvxbasic/test_lex.c create mode 100644 dvxbasic/test_quick.c create mode 100644 dvxbasic/test_vm.c diff --git a/dvxbasic/compiler/codegen.c b/dvxbasic/compiler/codegen.c new file mode 100644 index 0000000..6da847f --- /dev/null +++ b/dvxbasic/compiler/codegen.c @@ -0,0 +1,243 @@ +// codegen.c -- DVX BASIC p-code emitter implementation + +#include "codegen.h" +#include "opcodes.h" + +#include +#include + +// ============================================================ +// basAddData +// ============================================================ + +bool basAddData(BasCodeGenT *cg, BasValueT val) { + if (cg->dataCount >= BAS_MAX_CONSTANTS) { + return false; + } + + cg->dataPool[cg->dataCount++] = basValCopy(val); + return true; +} + + +// ============================================================ +// basAddConstant +// ============================================================ + +uint16_t basAddConstant(BasCodeGenT *cg, const char *text, int32_t len) { + // Check if this string is already in the pool + for (int32_t i = 0; i < cg->constCount; i++) { + if (cg->constants[i]->len == len && memcmp(cg->constants[i]->data, text, len) == 0) { + return (uint16_t)i; + } + } + + if (cg->constCount >= BAS_MAX_CONSTANTS) { + return 0; + } + + uint16_t idx = (uint16_t)cg->constCount; + cg->constants[cg->constCount++] = basStringNew(text, len); + return idx; +} + + +// ============================================================ +// basCodeGenBuildModule +// ============================================================ + +BasModuleT *basCodeGenBuildModule(BasCodeGenT *cg) { + BasModuleT *mod = (BasModuleT *)calloc(1, sizeof(BasModuleT)); + + if (!mod) { + return NULL; + } + + // Copy code + mod->code = (uint8_t *)malloc(cg->codeLen); + + if (!mod->code) { + free(mod); + return NULL; + } + + memcpy(mod->code, cg->code, cg->codeLen); + mod->codeLen = cg->codeLen; + + // Copy constant pool (share string refs) + if (cg->constCount > 0) { + mod->constants = (BasStringT **)malloc(cg->constCount * sizeof(BasStringT *)); + + if (!mod->constants) { + free(mod->code); + free(mod); + return NULL; + } + + for (int32_t i = 0; i < cg->constCount; i++) { + mod->constants[i] = basStringRef(cg->constants[i]); + } + } + + mod->constCount = cg->constCount; + mod->globalCount = cg->globalCount; + mod->entryPoint = 0; + + // Copy data pool + if (cg->dataCount > 0) { + mod->dataPool = (BasValueT *)malloc(cg->dataCount * sizeof(BasValueT)); + + if (!mod->dataPool) { + free(mod->constants); + free(mod->code); + free(mod); + return NULL; + } + + for (int32_t i = 0; i < cg->dataCount; i++) { + mod->dataPool[i] = basValCopy(cg->dataPool[i]); + } + } + + mod->dataCount = cg->dataCount; + + return mod; +} + + +// ============================================================ +// basCodeGenFree +// ============================================================ + +void basCodeGenFree(BasCodeGenT *cg) { + for (int32_t i = 0; i < cg->constCount; i++) { + basStringUnref(cg->constants[i]); + } + + for (int32_t i = 0; i < cg->dataCount; i++) { + basValRelease(&cg->dataPool[i]); + } + + cg->constCount = 0; + cg->dataCount = 0; + cg->codeLen = 0; +} + + +// ============================================================ +// basCodeGenInit +// ============================================================ + +void basCodeGenInit(BasCodeGenT *cg) { + memset(cg, 0, sizeof(*cg)); +} + + +// ============================================================ +// basCodePos +// ============================================================ + +int32_t basCodePos(const BasCodeGenT *cg) { + return cg->codeLen; +} + + +// ============================================================ +// basEmit8 +// ============================================================ + +void basEmit8(BasCodeGenT *cg, uint8_t b) { + if (cg->codeLen < BAS_MAX_CODE) { + cg->code[cg->codeLen++] = b; + } +} + + +// ============================================================ +// basEmit16 +// ============================================================ + +void basEmit16(BasCodeGenT *cg, int16_t v) { + if (cg->codeLen + 2 <= BAS_MAX_CODE) { + memcpy(&cg->code[cg->codeLen], &v, 2); + cg->codeLen += 2; + } +} + + +// ============================================================ +// basEmitDouble +// ============================================================ + +void basEmitDouble(BasCodeGenT *cg, double v) { + if (cg->codeLen + (int32_t)sizeof(double) <= BAS_MAX_CODE) { + memcpy(&cg->code[cg->codeLen], &v, sizeof(double)); + cg->codeLen += (int32_t)sizeof(double); + } +} + + +// ============================================================ +// basEmitFloat +// ============================================================ + +void basEmitFloat(BasCodeGenT *cg, float v) { + if (cg->codeLen + (int32_t)sizeof(float) <= BAS_MAX_CODE) { + memcpy(&cg->code[cg->codeLen], &v, sizeof(float)); + cg->codeLen += (int32_t)sizeof(float); + } +} + + +// ============================================================ +// basEmitU16 +// ============================================================ + +void basEmitU16(BasCodeGenT *cg, uint16_t v) { + if (cg->codeLen + 2 <= BAS_MAX_CODE) { + memcpy(&cg->code[cg->codeLen], &v, 2); + cg->codeLen += 2; + } +} + + +// ============================================================ +// basModuleFree +// ============================================================ + +void basModuleFree(BasModuleT *mod) { + if (!mod) { + return; + } + + free(mod->code); + + if (mod->constants) { + for (int32_t i = 0; i < mod->constCount; i++) { + basStringUnref(mod->constants[i]); + } + + free(mod->constants); + } + + if (mod->dataPool) { + for (int32_t i = 0; i < mod->dataCount; i++) { + basValRelease(&mod->dataPool[i]); + } + + free(mod->dataPool); + } + + free(mod); +} + + +// ============================================================ +// basPatch16 +// ============================================================ + +void basPatch16(BasCodeGenT *cg, int32_t pos, int16_t val) { + if (pos >= 0 && pos + 2 <= cg->codeLen) { + memcpy(&cg->code[pos], &val, 2); + } +} diff --git a/dvxbasic/compiler/codegen.h b/dvxbasic/compiler/codegen.h new file mode 100644 index 0000000..31da482 --- /dev/null +++ b/dvxbasic/compiler/codegen.h @@ -0,0 +1,76 @@ +// codegen.h -- DVX BASIC p-code emitter +// +// Builds a p-code byte stream and string constant pool from +// calls made by the parser. Provides helpers for backpatching +// forward jumps. +// +// Embeddable: no DVX dependencies, pure C. + +#ifndef DVXBASIC_CODEGEN_H +#define DVXBASIC_CODEGEN_H + +#include "../runtime/vm.h" +#include "../runtime/values.h" + +#include +#include + +// ============================================================ +// Code generator state +// ============================================================ + +#define BAS_MAX_CODE 65536 +#define BAS_MAX_CONSTANTS 1024 + +typedef struct { + uint8_t code[BAS_MAX_CODE]; + int32_t codeLen; + BasStringT *constants[BAS_MAX_CONSTANTS]; + int32_t constCount; + int32_t globalCount; + BasValueT dataPool[BAS_MAX_CONSTANTS]; + int32_t dataCount; +} BasCodeGenT; + +// ============================================================ +// API +// ============================================================ + +void basCodeGenInit(BasCodeGenT *cg); +void basCodeGenFree(BasCodeGenT *cg); + +// Emit single byte +void basEmit8(BasCodeGenT *cg, uint8_t b); + +// Emit 16-bit signed value +void basEmit16(BasCodeGenT *cg, int16_t v); + +// Emit 16-bit unsigned value +void basEmitU16(BasCodeGenT *cg, uint16_t v); + +// Emit 32-bit float +void basEmitFloat(BasCodeGenT *cg, float v); + +// Emit 64-bit double +void basEmitDouble(BasCodeGenT *cg, double v); + +// Get current code position (for jump targets) +int32_t basCodePos(const BasCodeGenT *cg); + +// Patch a 16-bit value at a previous position (for backpatching jumps) +void basPatch16(BasCodeGenT *cg, int32_t pos, int16_t val); + +// Add a string to the constant pool. Returns the pool index. +uint16_t basAddConstant(BasCodeGenT *cg, const char *text, int32_t len); + +// Add a value to the data pool (for DATA statements). Returns true on success. +bool basAddData(BasCodeGenT *cg, BasValueT val); + +// Build a BasModuleT from the generated code. The caller takes +// ownership of the module and must free it with basModuleFree(). +BasModuleT *basCodeGenBuildModule(BasCodeGenT *cg); + +// Free a module built by basCodeGenBuildModule. +void basModuleFree(BasModuleT *mod); + +#endif // DVXBASIC_CODEGEN_H diff --git a/dvxbasic/compiler/lexer.c b/dvxbasic/compiler/lexer.c new file mode 100644 index 0000000..b5d8e26 --- /dev/null +++ b/dvxbasic/compiler/lexer.c @@ -0,0 +1,820 @@ +// lexer.c -- DVX BASIC lexer implementation +// +// Single-pass tokenizer. Keywords are case-insensitive. Identifiers +// preserve their original case for display but comparisons are +// case-insensitive. Line continuations (underscore at end of line) +// are handled transparently. + +#include "lexer.h" + +#include +#include +#include +#include + +// ============================================================ +// Keyword table +// ============================================================ + +typedef struct { + const char *text; + BasTokenTypeE type; +} KeywordEntryT; + +static const KeywordEntryT sKeywords[] = { + { "AND", TOK_AND }, + { "APPEND", TOK_APPEND }, + { "AS", TOK_AS }, + { "BASE", TOK_BASE }, + { "BINARY", TOK_BINARY }, + { "BOOLEAN", TOK_BOOLEAN }, + { "BYVAL", TOK_BYVAL }, + { "CALL", TOK_CALL }, + { "CASE", TOK_CASE }, + { "CLOSE", TOK_CLOSE }, + { "CONST", TOK_CONST }, + { "DATA", TOK_DATA }, + { "DECLARE", TOK_DECLARE }, + { "DEF", TOK_DEF }, + { "DEFDBL", TOK_DEFDBL }, + { "DEFINT", TOK_DEFINT }, + { "DEFLNG", TOK_DEFLNG }, + { "DEFSNG", TOK_DEFSNG }, + { "DEFSTR", TOK_DEFSTR }, + { "DIM", TOK_DIM }, + { "DO", TOK_DO }, + { "DOEVENTS", TOK_DOEVENTS }, + { "DOUBLE", TOK_DOUBLE }, + { "ELSE", TOK_ELSE }, + { "ELSEIF", TOK_ELSEIF }, + { "END", TOK_END }, + { "EOF", TOK_EOF_KW }, + { "EQV", TOK_EQV }, + { "ERASE", TOK_ERASE }, + { "ERR", TOK_ERR }, + { "ERROR", TOK_ERROR_KW }, + { "EXPLICIT", TOK_EXPLICIT }, + { "EXIT", TOK_EXIT }, + { "FALSE", TOK_FALSE_KW }, + { "FOR", TOK_FOR }, + { "FUNCTION", TOK_FUNCTION }, + { "GET", TOK_GET }, + { "GOSUB", TOK_GOSUB }, + { "GOTO", TOK_GOTO }, + { "HIDE", TOK_HIDE }, + { "IF", TOK_IF }, + { "IMP", TOK_IMP }, + { "INPUT", TOK_INPUT }, + { "INTEGER", TOK_INTEGER }, + { "IS", TOK_IS }, + { "LBOUND", TOK_LBOUND }, + { "LET", TOK_LET }, + { "LINE", TOK_LINE }, + { "LOAD", TOK_LOAD }, + { "LONG", TOK_LONG }, + { "LOOP", TOK_LOOP }, + { "ME", TOK_ME }, + { "MOD", TOK_MOD }, + { "MSGBOX", TOK_MSGBOX }, + { "NEXT", TOK_NEXT }, + { "NOT", TOK_NOT }, + { "ON", TOK_ON }, + { "OPEN", TOK_OPEN }, + { "OPTION", TOK_OPTION }, + { "OR", TOK_OR }, + { "OUTPUT", TOK_OUTPUT }, + { "PRESERVE", TOK_PRESERVE }, + { "PRINT", TOK_PRINT }, + { "PUT", TOK_PUT }, + { "RANDOM", TOK_RANDOM }, + { "RANDOMIZE", TOK_RANDOMIZE }, + { "READ", TOK_READ }, + { "REDIM", TOK_REDIM }, + { "REM", TOK_REM }, + { "RESTORE", TOK_RESTORE }, + { "RESUME", TOK_RESUME }, + { "RETURN", TOK_RETURN }, + { "SEEK", TOK_SEEK }, + { "SELECT", TOK_SELECT }, + { "SET", TOK_SET }, + { "SHARED", TOK_SHARED }, + { "SHELL", TOK_SHELL }, + { "SHOW", TOK_SHOW }, + { "SINGLE", TOK_SINGLE }, + { "SLEEP", TOK_SLEEP }, + { "STATIC", TOK_STATIC }, + { "STEP", TOK_STEP }, + { "STRING", TOK_STRING_KW }, + { "SUB", TOK_SUB }, + { "SWAP", TOK_SWAP }, + { "THEN", TOK_THEN }, + { "TIMER", TOK_TIMER }, + { "TO", TOK_TO }, + { "TRUE", TOK_TRUE_KW }, + { "TYPE", TOK_TYPE }, + { "UBOUND", TOK_UBOUND }, + { "UNLOAD", TOK_UNLOAD }, + { "UNTIL", TOK_UNTIL }, + { "WEND", TOK_WEND }, + { "WHILE", TOK_WHILE }, + { "WITH", TOK_WITH }, + { "WRITE", TOK_WRITE }, + { "XOR", TOK_XOR }, + { NULL, TOK_ERROR } +}; + +#define KEYWORD_COUNT (sizeof(sKeywords) / sizeof(sKeywords[0]) - 1) + +// ============================================================ +// Prototypes +// ============================================================ + +static char advance(BasLexerT *lex); +static bool atEnd(const BasLexerT *lex); +static BasTokenTypeE lookupKeyword(const char *text, int32_t len); +static char peek(const BasLexerT *lex); +static char peekNext(const BasLexerT *lex); +static void setError(BasLexerT *lex, const char *msg); +static void skipLineComment(BasLexerT *lex); +static void skipWhitespace(BasLexerT *lex); +static BasTokenTypeE tokenizeHexLiteral(BasLexerT *lex); +static BasTokenTypeE tokenizeIdentOrKeyword(BasLexerT *lex); +static BasTokenTypeE tokenizeNumber(BasLexerT *lex); +static BasTokenTypeE tokenizeString(BasLexerT *lex); +static char upperChar(char c); + + +// ============================================================ +// advance +// ============================================================ + +static char advance(BasLexerT *lex) { + if (atEnd(lex)) { + return '\0'; + } + + char c = lex->source[lex->pos++]; + + if (c == '\n') { + lex->line++; + lex->col = 1; + } else { + lex->col++; + } + + return c; +} + + +// ============================================================ +// atEnd +// ============================================================ + +static bool atEnd(const BasLexerT *lex) { + return lex->pos >= lex->sourceLen; +} + + +// ============================================================ +// basLexerInit +// ============================================================ + +void basLexerInit(BasLexerT *lex, const char *source, int32_t sourceLen) { + memset(lex, 0, sizeof(*lex)); + lex->source = source; + lex->sourceLen = sourceLen; + lex->pos = 0; + lex->line = 1; + lex->col = 1; + + // Prime the first token + basLexerNext(lex); +} + + +// ============================================================ +// basLexerNext +// ============================================================ + +BasTokenTypeE basLexerNext(BasLexerT *lex) { + skipWhitespace(lex); + + lex->token.line = lex->line; + lex->token.col = lex->col; + lex->token.textLen = 0; + lex->token.text[0] = '\0'; + + if (atEnd(lex)) { + lex->token.type = TOK_EOF; + return TOK_EOF; + } + + char c = peek(lex); + + // Newline + if (c == '\n') { + advance(lex); + lex->token.type = TOK_NEWLINE; + lex->token.text[0] = '\n'; + lex->token.text[1] = '\0'; + lex->token.textLen = 1; + return TOK_NEWLINE; + } + + // Carriage return (handle CR, CRLF) + if (c == '\r') { + advance(lex); + + if (!atEnd(lex) && peek(lex) == '\n') { + advance(lex); + } + + lex->token.type = TOK_NEWLINE; + lex->token.text[0] = '\n'; + lex->token.text[1] = '\0'; + lex->token.textLen = 1; + return TOK_NEWLINE; + } + + // Comment (apostrophe) + if (c == '\'') { + skipLineComment(lex); + lex->token.type = TOK_NEWLINE; + lex->token.text[0] = '\n'; + lex->token.text[1] = '\0'; + lex->token.textLen = 1; + return TOK_NEWLINE; + } + + // String literal + if (c == '"') { + lex->token.type = tokenizeString(lex); + return lex->token.type; + } + + // Number + if (isdigit((unsigned char)c) || (c == '.' && isdigit((unsigned char)peekNext(lex)))) { + lex->token.type = tokenizeNumber(lex); + return lex->token.type; + } + + // Hex literal (&H...) + if (c == '&' && upperChar(peekNext(lex)) == 'H') { + lex->token.type = tokenizeHexLiteral(lex); + return lex->token.type; + } + + // Identifier or keyword + if (isalpha((unsigned char)c) || c == '_') { + lex->token.type = tokenizeIdentOrKeyword(lex); + return lex->token.type; + } + + // Single and multi-character operators/punctuation + advance(lex); + + switch (c) { + case '+': + lex->token.type = TOK_PLUS; + break; + + case '-': + lex->token.type = TOK_MINUS; + break; + + case '*': + lex->token.type = TOK_STAR; + break; + + case '/': + lex->token.type = TOK_SLASH; + break; + + case '\\': + lex->token.type = TOK_BACKSLASH; + break; + + case '^': + lex->token.type = TOK_CARET; + break; + + case '&': + lex->token.type = TOK_AMPERSAND; + break; + + case '(': + lex->token.type = TOK_LPAREN; + break; + + case ')': + lex->token.type = TOK_RPAREN; + break; + + case ',': + lex->token.type = TOK_COMMA; + break; + + case ';': + lex->token.type = TOK_SEMICOLON; + break; + + case ':': + lex->token.type = TOK_COLON; + break; + + case '.': + lex->token.type = TOK_DOT; + break; + + case '#': + lex->token.type = TOK_HASH; + break; + + case '=': + lex->token.type = TOK_EQ; + break; + + case '<': + if (!atEnd(lex) && peek(lex) == '>') { + advance(lex); + lex->token.type = TOK_NE; + } else if (!atEnd(lex) && peek(lex) == '=') { + advance(lex); + lex->token.type = TOK_LE; + } else { + lex->token.type = TOK_LT; + } + break; + + case '>': + if (!atEnd(lex) && peek(lex) == '=') { + advance(lex); + lex->token.type = TOK_GE; + } else { + lex->token.type = TOK_GT; + } + break; + + default: + setError(lex, "Unexpected character"); + lex->token.type = TOK_ERROR; + break; + } + + // Store the operator text + if (lex->token.type != TOK_ERROR) { + lex->token.text[0] = c; + lex->token.textLen = 1; + + if (lex->token.type == TOK_NE || lex->token.type == TOK_LE || lex->token.type == TOK_GE) { + lex->token.text[1] = lex->source[lex->pos - 1]; + lex->token.textLen = 2; + } + + lex->token.text[lex->token.textLen] = '\0'; + } + + return lex->token.type; +} + + +// ============================================================ +// basLexerPeek +// ============================================================ + +BasTokenTypeE basLexerPeek(const BasLexerT *lex) { + return lex->token.type; +} + + +// ============================================================ +// basTokenName +// ============================================================ + +const char *basTokenName(BasTokenTypeE type) { + switch (type) { + case TOK_INT_LIT: return "integer"; + case TOK_LONG_LIT: return "long"; + case TOK_FLOAT_LIT: return "float"; + case TOK_STRING_LIT: return "string"; + case TOK_IDENT: return "identifier"; + case TOK_DOT: return "'.'"; + case TOK_COMMA: return "','"; + case TOK_SEMICOLON: return "';'"; + case TOK_COLON: return "':'"; + case TOK_LPAREN: return "'('"; + case TOK_RPAREN: return "')'"; + case TOK_HASH: return "'#'"; + case TOK_PLUS: return "'+'"; + case TOK_MINUS: return "'-'"; + case TOK_STAR: return "'*'"; + case TOK_SLASH: return "'/'"; + case TOK_BACKSLASH: return "'\\'"; + case TOK_CARET: return "'^'"; + case TOK_AMPERSAND: return "'&'"; + case TOK_EQ: return "'='"; + case TOK_NE: return "'<>'"; + case TOK_LT: return "'<'"; + case TOK_GT: return "'>'"; + case TOK_LE: return "'<='"; + case TOK_GE: return "'>='"; + case TOK_NEWLINE: return "newline"; + case TOK_EOF: return "end of file"; + case TOK_ERROR: return "error"; + default: break; + } + + // Keywords + for (int32_t i = 0; i < (int32_t)KEYWORD_COUNT; i++) { + if (sKeywords[i].type == type) { + return sKeywords[i].text; + } + } + + return "?"; +} + + +// ============================================================ +// lookupKeyword +// ============================================================ + +static BasTokenTypeE lookupKeyword(const char *text, int32_t len) { + // Case-insensitive keyword lookup + for (int32_t i = 0; i < (int32_t)KEYWORD_COUNT; i++) { + const char *kw = sKeywords[i].text; + int32_t kwLen = (int32_t)strlen(kw); + + if (kwLen != len) { + continue; + } + + bool match = true; + + for (int32_t j = 0; j < len; j++) { + if (upperChar(text[j]) != kw[j]) { + match = false; + break; + } + } + + if (match) { + return sKeywords[i].type; + } + } + + return TOK_IDENT; +} + + +// ============================================================ +// peek +// ============================================================ + +static char peek(const BasLexerT *lex) { + if (atEnd(lex)) { + return '\0'; + } + + return lex->source[lex->pos]; +} + + +// ============================================================ +// peekNext +// ============================================================ + +static char peekNext(const BasLexerT *lex) { + if (lex->pos + 1 >= lex->sourceLen) { + return '\0'; + } + + return lex->source[lex->pos + 1]; +} + + +// ============================================================ +// setError +// ============================================================ + +static void setError(BasLexerT *lex, const char *msg) { + snprintf(lex->error, sizeof(lex->error), "Line %d, Col %d: %s", lex->line, lex->col, msg); +} + + +// ============================================================ +// skipLineComment +// ============================================================ + +static void skipLineComment(BasLexerT *lex) { + while (!atEnd(lex) && peek(lex) != '\n' && peek(lex) != '\r') { + advance(lex); + } +} + + +// ============================================================ +// skipWhitespace +// ============================================================ +// +// Skips spaces and tabs. Does NOT skip newlines (they are tokens). +// Handles line continuation: underscore followed by newline joins +// the next line to the current logical line. + +static void skipWhitespace(BasLexerT *lex) { + while (!atEnd(lex)) { + char c = peek(lex); + + if (c == ' ' || c == '\t') { + advance(lex); + continue; + } + + // Line continuation: _ at end of line + if (c == '_') { + int32_t savedPos = lex->pos; + int32_t savedLine = lex->line; + int32_t savedCol = lex->col; + advance(lex); + + // Skip spaces/tabs after underscore + while (!atEnd(lex) && (peek(lex) == ' ' || peek(lex) == '\t')) { + advance(lex); + } + + // Must be followed by newline + if (!atEnd(lex) && (peek(lex) == '\n' || peek(lex) == '\r')) { + advance(lex); + + if (!atEnd(lex) && peek(lex) == '\n' && lex->source[lex->pos - 1] == '\r') { + advance(lex); + } + + continue; // Continue skipping whitespace on next line + } + + // Not a continuation -- put back + lex->pos = savedPos; + lex->line = savedLine; + lex->col = savedCol; + break; + } + + break; + } +} + + +// ============================================================ +// tokenizeHexLiteral +// ============================================================ + +static BasTokenTypeE tokenizeHexLiteral(BasLexerT *lex) { + advance(lex); // skip & + advance(lex); // skip H + + int32_t idx = 0; + int32_t value = 0; + + while (!atEnd(lex) && isxdigit((unsigned char)peek(lex))) { + char c = advance(lex); + + if (idx < BAS_MAX_TOKEN_LEN - 1) { + lex->token.text[idx++] = c; + } + + int32_t digit; + + if (c >= '0' && c <= '9') { + digit = c - '0'; + } else if (c >= 'A' && c <= 'F') { + digit = c - 'A' + 10; + } else { + digit = c - 'a' + 10; + } + + value = (value << 4) | digit; + } + + lex->token.text[idx] = '\0'; + lex->token.textLen = idx; + + // Check for trailing & (long suffix) + if (!atEnd(lex) && peek(lex) == '&') { + advance(lex); + lex->token.longVal = (int64_t)value; + return TOK_LONG_LIT; + } + + lex->token.intVal = value; + return TOK_INT_LIT; +} + + +// ============================================================ +// tokenizeIdentOrKeyword +// ============================================================ + +static BasTokenTypeE tokenizeIdentOrKeyword(BasLexerT *lex) { + int32_t idx = 0; + + while (!atEnd(lex) && (isalnum((unsigned char)peek(lex)) || peek(lex) == '_')) { + char c = advance(lex); + + if (idx < BAS_MAX_TOKEN_LEN - 1) { + lex->token.text[idx++] = c; + } + } + + lex->token.text[idx] = '\0'; + lex->token.textLen = idx; + + // Check for type suffix + if (!atEnd(lex)) { + char c = peek(lex); + + if (c == '%' || c == '&' || c == '!' || c == '#' || c == '$') { + advance(lex); + lex->token.text[idx++] = c; + lex->token.text[idx] = '\0'; + lex->token.textLen = idx; + } + } + + // Check if this is a keyword + // For suffix-bearing identifiers, only check the base (without suffix) + int32_t baseLen = idx; + + if (baseLen > 0) { + char last = lex->token.text[baseLen - 1]; + + if (last == '%' || last == '&' || last == '!' || last == '#' || last == '$') { + baseLen--; + } + } + + BasTokenTypeE kwType = lookupKeyword(lex->token.text, baseLen); + + // REM is a comment -- skip to end of line + if (kwType == TOK_REM) { + skipLineComment(lex); + lex->token.type = TOK_NEWLINE; + lex->token.text[0] = '\n'; + lex->token.text[1] = '\0'; + lex->token.textLen = 1; + return TOK_NEWLINE; + } + + // If it's a keyword and has no suffix, return the keyword token + if (kwType != TOK_IDENT && baseLen == idx) { + return kwType; + } + + return TOK_IDENT; +} + + +// ============================================================ +// tokenizeNumber +// ============================================================ + +static BasTokenTypeE tokenizeNumber(BasLexerT *lex) { + int32_t idx = 0; + bool hasDecimal = false; + bool hasExp = false; + + // Integer part + while (!atEnd(lex) && isdigit((unsigned char)peek(lex))) { + if (idx < BAS_MAX_TOKEN_LEN - 1) { + lex->token.text[idx++] = advance(lex); + } else { + advance(lex); + } + } + + // Decimal part + if (!atEnd(lex) && peek(lex) == '.' && isdigit((unsigned char)peekNext(lex))) { + hasDecimal = true; + lex->token.text[idx++] = advance(lex); // . + + while (!atEnd(lex) && isdigit((unsigned char)peek(lex))) { + if (idx < BAS_MAX_TOKEN_LEN - 1) { + lex->token.text[idx++] = advance(lex); + } else { + advance(lex); + } + } + } + + // Exponent + if (!atEnd(lex) && (upperChar(peek(lex)) == 'E' || upperChar(peek(lex)) == 'D')) { + hasExp = true; + lex->token.text[idx++] = advance(lex); + + if (!atEnd(lex) && (peek(lex) == '+' || peek(lex) == '-')) { + lex->token.text[idx++] = advance(lex); + } + + while (!atEnd(lex) && isdigit((unsigned char)peek(lex))) { + if (idx < BAS_MAX_TOKEN_LEN - 1) { + lex->token.text[idx++] = advance(lex); + } else { + advance(lex); + } + } + } + + lex->token.text[idx] = '\0'; + lex->token.textLen = idx; + + // Check for type suffix + if (!atEnd(lex)) { + char c = peek(lex); + + if (c == '%') { + advance(lex); + lex->token.intVal = (int32_t)atoi(lex->token.text); + return TOK_INT_LIT; + } + + if (c == '&') { + advance(lex); + lex->token.longVal = (int64_t)atol(lex->token.text); + return TOK_LONG_LIT; + } + + if (c == '!') { + advance(lex); + lex->token.dblVal = atof(lex->token.text); + return TOK_FLOAT_LIT; + } + + if (c == '#') { + advance(lex); + lex->token.dblVal = atof(lex->token.text); + return TOK_FLOAT_LIT; + } + } + + // No suffix: determine type from content + if (hasDecimal || hasExp) { + lex->token.dblVal = atof(lex->token.text); + return TOK_FLOAT_LIT; + } + + long val = atol(lex->token.text); + + if (val >= -32768 && val <= 32767) { + lex->token.intVal = (int32_t)val; + return TOK_INT_LIT; + } + + lex->token.longVal = (int64_t)val; + return TOK_LONG_LIT; +} + + +// ============================================================ +// tokenizeString +// ============================================================ + +static BasTokenTypeE tokenizeString(BasLexerT *lex) { + advance(lex); // skip opening quote + + int32_t idx = 0; + + while (!atEnd(lex) && peek(lex) != '"' && peek(lex) != '\n' && peek(lex) != '\r') { + if (idx < BAS_MAX_TOKEN_LEN - 1) { + lex->token.text[idx++] = advance(lex); + } else { + advance(lex); + } + } + + if (atEnd(lex) || peek(lex) != '"') { + setError(lex, "Unterminated string literal"); + lex->token.text[idx] = '\0'; + lex->token.textLen = idx; + return TOK_ERROR; + } + + advance(lex); // skip closing quote + + lex->token.text[idx] = '\0'; + lex->token.textLen = idx; + + return TOK_STRING_LIT; +} + + +// ============================================================ +// upperChar +// ============================================================ + +static char upperChar(char c) { + if (c >= 'a' && c <= 'z') { + return c - 32; + } + + return c; +} diff --git a/dvxbasic/compiler/lexer.h b/dvxbasic/compiler/lexer.h new file mode 100644 index 0000000..f53922e --- /dev/null +++ b/dvxbasic/compiler/lexer.h @@ -0,0 +1,221 @@ +// lexer.h -- DVX BASIC lexer (tokenizer) +// +// Converts BASIC source text into a stream of tokens. Case-insensitive +// for keywords. Handles line continuations (_), comments (' and REM), +// type suffixes (%, &, !, #, $), and string literals. +// +// Embeddable: no DVX dependencies, pure C. + +#ifndef DVXBASIC_LEXER_H +#define DVXBASIC_LEXER_H + +#include +#include + +// ============================================================ +// Token types +// ============================================================ + +typedef enum { + // Literals + TOK_INT_LIT, // integer literal (123, &HFF) + TOK_LONG_LIT, // long literal (123&) + TOK_FLOAT_LIT, // float literal (3.14, 1.5E10) + TOK_STRING_LIT, // "string literal" + + // Identifiers and symbols + TOK_IDENT, // variable/function name + TOK_DOT, // . + TOK_COMMA, // , + TOK_SEMICOLON, // ; + TOK_COLON, // : + TOK_LPAREN, // ( + TOK_RPAREN, // ) + TOK_HASH, // # (file channel) + + // Operators + TOK_PLUS, // + + TOK_MINUS, // - + TOK_STAR, // * + TOK_SLASH, // / + TOK_BACKSLASH, // \ (integer divide) + TOK_CARET, // ^ + TOK_AMPERSAND, // & (string concat or hex prefix) + TOK_EQ, // = + TOK_NE, // <> + TOK_LT, // < + TOK_GT, // > + TOK_LE, // <= + TOK_GE, // >= + + // Type suffixes (attached to identifier) + TOK_SUFFIX_INT, // % + TOK_SUFFIX_LONG, // & + TOK_SUFFIX_SINGLE, // ! + TOK_SUFFIX_DOUBLE, // # + TOK_SUFFIX_STRING, // $ + + // Keywords + TOK_AND, + TOK_AS, + TOK_BASE, + TOK_BOOLEAN, + TOK_BYVAL, + TOK_CALL, + TOK_CASE, + TOK_CLOSE, + TOK_CONST, + TOK_DATA, + TOK_DECLARE, + TOK_DEF, + TOK_DEFDBL, + TOK_DEFINT, + TOK_DEFLNG, + TOK_DEFSNG, + TOK_DEFSTR, + TOK_DIM, + TOK_DO, + TOK_DOEVENTS, + TOK_DOUBLE, + TOK_ELSE, + TOK_ELSEIF, + TOK_END, + TOK_EOF_KW, // EOF (keyword, not end-of-file) + TOK_EQV, + TOK_ERASE, + TOK_ERR, + TOK_ERROR_KW, + TOK_EXPLICIT, + TOK_EXIT, + TOK_FALSE_KW, + TOK_FOR, + TOK_FUNCTION, + TOK_GET, + TOK_GOSUB, + TOK_GOTO, + TOK_HIDE, + TOK_IF, + TOK_IMP, + TOK_INPUT, + TOK_INTEGER, + TOK_IS, + TOK_LBOUND, + TOK_LET, + TOK_LINE, + TOK_LOAD, + TOK_LONG, + TOK_LOOP, + TOK_ME, + TOK_MOD, + TOK_MSGBOX, + TOK_NEXT, + TOK_NOT, + TOK_ON, + TOK_OPEN, + TOK_OPTION, + TOK_OR, + TOK_OUTPUT, + TOK_PRESERVE, + TOK_PRINT, + TOK_PUT, + TOK_RANDOMIZE, + TOK_READ, + TOK_REDIM, + TOK_REM, + TOK_RESTORE, + TOK_RESUME, + TOK_RETURN, + TOK_SEEK, + TOK_SELECT, + TOK_SET, + TOK_SHARED, + TOK_SHELL, + TOK_SHOW, + TOK_SINGLE, + TOK_SLEEP, + TOK_STATIC, + TOK_STEP, + TOK_STRING_KW, + TOK_SUB, + TOK_SWAP, + TOK_THEN, + TOK_TIMER, + TOK_TO, + TOK_TRUE_KW, + TOK_TYPE, + TOK_UBOUND, + TOK_UNLOAD, + TOK_UNTIL, + TOK_WEND, + TOK_WHILE, + TOK_WITH, + TOK_WRITE, + TOK_XOR, + + // File modes + TOK_APPEND, + TOK_BINARY, + TOK_RANDOM, + + // Special + TOK_NEWLINE, // end of logical line + TOK_EOF, // end of source + TOK_ERROR // lexer error +} BasTokenTypeE; + +// ============================================================ +// Token +// ============================================================ + +#define BAS_MAX_TOKEN_LEN 256 + +typedef struct { + BasTokenTypeE type; + int32_t line; // 1-based source line number + int32_t col; // 1-based column number + + // Value (depends on type) + union { + int32_t intVal; + int64_t longVal; + float fltVal; + double dblVal; + }; + + char text[BAS_MAX_TOKEN_LEN]; // raw text of the token + int32_t textLen; +} BasTokenT; + +// ============================================================ +// Lexer state +// ============================================================ + +typedef struct { + const char *source; // source text (not owned) + int32_t sourceLen; + int32_t pos; // current position in source + int32_t line; // current line (1-based) + int32_t col; // current column (1-based) + BasTokenT token; // current token + char error[256]; +} BasLexerT; + +// ============================================================ +// API +// ============================================================ + +// Initialize lexer with source text. The source must remain valid +// for the lifetime of the lexer. +void basLexerInit(BasLexerT *lex, const char *source, int32_t sourceLen); + +// Advance to the next token. Returns the token type. +// The token is available in lex->token. +BasTokenTypeE basLexerNext(BasLexerT *lex); + +// Peek at the current token type without advancing. +BasTokenTypeE basLexerPeek(const BasLexerT *lex); + +// Return human-readable name for a token type. +const char *basTokenName(BasTokenTypeE type); + +#endif // DVXBASIC_LEXER_H diff --git a/dvxbasic/compiler/opcodes.h b/dvxbasic/compiler/opcodes.h new file mode 100644 index 0000000..a2018e3 --- /dev/null +++ b/dvxbasic/compiler/opcodes.h @@ -0,0 +1,287 @@ +// opcodes.h -- DVX BASIC bytecode instruction definitions +// +// Stack-based p-code for the DVX BASIC virtual machine. +// Embeddable: no DVX dependencies, pure C. + +#ifndef DVXBASIC_OPCODES_H +#define DVXBASIC_OPCODES_H + +// ============================================================ +// Data type tags (used in Value representation) +// ============================================================ + +#define BAS_TYPE_INTEGER 0 // 16-bit signed +#define BAS_TYPE_LONG 1 // 32-bit signed +#define BAS_TYPE_SINGLE 2 // 32-bit float +#define BAS_TYPE_DOUBLE 3 // 64-bit float +#define BAS_TYPE_STRING 4 // ref-counted dynamic string +#define BAS_TYPE_BOOLEAN 5 // True (-1) or False (0) +#define BAS_TYPE_ARRAY 6 // ref-counted array +#define BAS_TYPE_UDT 7 // ref-counted user-defined type + +// ============================================================ +// Stack operations +// ============================================================ + +#define OP_NOP 0x00 +#define OP_PUSH_INT16 0x01 // [int16] push 16-bit integer +#define OP_PUSH_INT32 0x02 // [int32] push 32-bit integer +#define OP_PUSH_FLT32 0x03 // [float32] push 32-bit float +#define OP_PUSH_FLT64 0x04 // [float64] push 64-bit float +#define OP_PUSH_STR 0x05 // [uint16 idx] push string from constant pool +#define OP_PUSH_TRUE 0x06 // push boolean True (-1) +#define OP_PUSH_FALSE 0x07 // push boolean False (0) +#define OP_POP 0x08 // discard top of stack +#define OP_DUP 0x09 // duplicate top of stack + +// ============================================================ +// Variable access +// ============================================================ + +#define OP_LOAD_LOCAL 0x10 // [uint16 idx] push local variable +#define OP_STORE_LOCAL 0x11 // [uint16 idx] pop to local variable +#define OP_LOAD_GLOBAL 0x12 // [uint16 idx] push global variable +#define OP_STORE_GLOBAL 0x13 // [uint16 idx] pop to global variable +#define OP_LOAD_REF 0x14 // dereference top of stack (ByRef) +#define OP_STORE_REF 0x15 // store through reference on stack +#define OP_LOAD_ARRAY 0x16 // [uint8 dims] indices on stack, array ref below +#define OP_STORE_ARRAY 0x17 // [uint8 dims] value, indices, array ref on stack +#define OP_LOAD_FIELD 0x18 // [uint16 fieldIdx] load UDT field +#define OP_STORE_FIELD 0x19 // [uint16 fieldIdx] store UDT field +#define OP_PUSH_LOCAL_ADDR 0x1A // [uint16 idx] push address of local (for ByRef) +#define OP_PUSH_GLOBAL_ADDR 0x1B // [uint16 idx] push address of global (for ByRef) + +// ============================================================ +// Arithmetic (integer) +// ============================================================ + +#define OP_ADD_INT 0x20 +#define OP_SUB_INT 0x21 +#define OP_MUL_INT 0x22 +#define OP_IDIV_INT 0x23 // integer divide (\) +#define OP_MOD_INT 0x24 +#define OP_NEG_INT 0x25 + +// ============================================================ +// Arithmetic (float) +// ============================================================ + +#define OP_ADD_FLT 0x26 +#define OP_SUB_FLT 0x27 +#define OP_MUL_FLT 0x28 +#define OP_DIV_FLT 0x29 // float divide (/) +#define OP_NEG_FLT 0x2A +#define OP_POW 0x2B // exponentiation (^) + +// ============================================================ +// String operations +// ============================================================ + +#define OP_STR_CONCAT 0x30 +#define OP_STR_LEFT 0x31 +#define OP_STR_RIGHT 0x32 +#define OP_STR_MID 0x33 // 3 args: str, start, len +#define OP_STR_MID2 0x34 // 2 args: str, start (to end) +#define OP_STR_LEN 0x35 +#define OP_STR_INSTR 0x36 // 2 args: str, find +#define OP_STR_INSTR3 0x37 // 3 args: start, str, find +#define OP_STR_UCASE 0x38 +#define OP_STR_LCASE 0x39 +#define OP_STR_TRIM 0x3A +#define OP_STR_LTRIM 0x3B +#define OP_STR_RTRIM 0x3C +#define OP_STR_CHR 0x3D +#define OP_STR_ASC 0x3E +#define OP_STR_SPACE 0x3F + +// ============================================================ +// Comparison (push boolean result) +// ============================================================ + +#define OP_CMP_EQ 0x40 +#define OP_CMP_NE 0x41 +#define OP_CMP_LT 0x42 +#define OP_CMP_GT 0x43 +#define OP_CMP_LE 0x44 +#define OP_CMP_GE 0x45 + +// ============================================================ +// Logical / bitwise +// ============================================================ + +#define OP_AND 0x48 +#define OP_OR 0x49 +#define OP_NOT 0x4A +#define OP_XOR 0x4B +#define OP_EQV 0x4C +#define OP_IMP 0x4D + +// ============================================================ +// Control flow +// ============================================================ + +#define OP_JMP 0x50 // [int16 offset] unconditional jump +#define OP_JMP_TRUE 0x51 // [int16 offset] jump if TOS is true +#define OP_JMP_FALSE 0x52 // [int16 offset] jump if TOS is false +#define OP_CALL 0x53 // [uint16 addr] [uint8 argc] [uint8 baseSlot] +#define OP_GOSUB_RET 0x54 // pop PC from eval stack, jump (GOSUB return) +#define OP_RET 0x55 // return from subroutine +#define OP_RET_VAL 0x56 // return from function (value on stack) +#define OP_FOR_INIT 0x57 // [uint16 varIdx] [uint8 isLocal] init FOR +#define OP_FOR_NEXT 0x58 // [uint16 varIdx] [uint8 isLocal] [int16 loopTop] + +// ============================================================ +// Type conversion +// ============================================================ + +#define OP_CONV_INT_FLT 0x60 // int -> float +#define OP_CONV_FLT_INT 0x61 // float -> int (banker's rounding) +#define OP_CONV_INT_STR 0x62 // int -> string +#define OP_CONV_STR_INT 0x63 // string -> int (VAL) +#define OP_CONV_FLT_STR 0x64 // float -> string +#define OP_CONV_STR_FLT 0x65 // string -> float (VAL) +#define OP_CONV_INT_LONG 0x66 // int16 -> int32 +#define OP_CONV_LONG_INT 0x67 // int32 -> int16 + +// ============================================================ +// I/O +// ============================================================ + +#define OP_PRINT 0x70 // print TOS to current output +#define OP_PRINT_NL 0x71 // print newline +#define OP_PRINT_TAB 0x72 // print tab (14-column zones) +#define OP_PRINT_SPC 0x73 // [uint8 n] print n spaces +#define OP_INPUT 0x74 // read line into string on stack +#define OP_FILE_OPEN 0x75 // [uint8 mode] filename, channel# on stack +#define OP_FILE_CLOSE 0x76 // channel# on stack +#define OP_FILE_PRINT 0x77 // channel#, value on stack +#define OP_FILE_INPUT 0x78 // channel# on stack, push string +#define OP_FILE_EOF 0x79 // channel# on stack, push boolean +#define OP_FILE_LINE_INPUT 0x7A // channel# on stack, push string + +// ============================================================ +// UI / Event (used when form system is active) +// ============================================================ + +#define OP_LOAD_PROP 0x80 // [uint16 ctrl] [uint16 prop] push property value +#define OP_STORE_PROP 0x81 // [uint16 ctrl] [uint16 prop] pop to property +#define OP_CALL_METHOD 0x82 // [uint16 ctrl] [uint16 method] [uint8 argc] +#define OP_LOAD_FORM 0x83 // [uint16 formIdx] +#define OP_UNLOAD_FORM 0x84 // [uint16 formIdx] +#define OP_SHOW_FORM 0x85 // [uint16 formIdx] [uint8 modal] +#define OP_HIDE_FORM 0x86 // [uint16 formIdx] +#define OP_DO_EVENTS 0x87 +#define OP_MSGBOX 0x88 // [uint8 flags] message on stack +#define OP_INPUTBOX 0x89 // prompt on stack, push result +#define OP_ME_REF 0x8A // push current form reference + +// ============================================================ +// Array / misc +// ============================================================ + +#define OP_DIM_ARRAY 0x90 // [uint8 dims] [uint8 type] bounds on stack +#define OP_REDIM 0x91 // [uint8 dims] [uint8 preserve] bounds on stack +#define OP_ERASE 0x92 // array ref on stack +#define OP_LBOUND 0x93 // [uint8 dim] array ref on stack +#define OP_UBOUND 0x94 // [uint8 dim] array ref on stack +#define OP_ON_ERROR 0x95 // [int16 handler] set error handler (0 = disable) +#define OP_RESUME 0x96 // resume after error +#define OP_RESUME_NEXT 0x97 // resume at next statement +#define OP_RAISE_ERR 0x98 // error number on stack +#define OP_ERR_NUM 0x99 // push current error number +#define OP_ERR_CLEAR 0x9A // clear error state + +// ============================================================ +// Math built-ins (single opcode each for common functions) +// ============================================================ + +#define OP_MATH_ABS 0xA0 +#define OP_MATH_INT 0xA1 // floor +#define OP_MATH_FIX 0xA2 // truncate toward zero +#define OP_MATH_SGN 0xA3 +#define OP_MATH_SQR 0xA4 +#define OP_MATH_SIN 0xA5 +#define OP_MATH_COS 0xA6 +#define OP_MATH_TAN 0xA7 +#define OP_MATH_ATN 0xA8 +#define OP_MATH_LOG 0xA9 +#define OP_MATH_EXP 0xAA +#define OP_MATH_RND 0xAB +#define OP_MATH_RANDOMIZE 0xAC // seed on stack (or TIMER if -1) + +// ============================================================ +// Conversion built-ins +// ============================================================ + +#define OP_STR_VAL 0xB0 // VAL(s$) -> number +#define OP_STR_STRF 0xB1 // STR$(n) -> string +#define OP_STR_HEX 0xB2 // HEX$(n) -> string +#define OP_STR_STRING 0xB3 // STRING$(n, char) -> string + +// ============================================================ +// Extended built-ins +// ============================================================ + +#define OP_MATH_TIMER 0xB4 // push seconds since midnight as DOUBLE +#define OP_DATE_STR 0xB5 // push DATE$ string "MM-DD-YYYY" +#define OP_TIME_STR 0xB6 // push TIME$ string "HH:MM:SS" +#define OP_SLEEP 0xB7 // pop seconds, sleep +#define OP_ENVIRON 0xB8 // pop env var name, push value string + +// ============================================================ +// DATA/READ/RESTORE +// ============================================================ + +#define OP_READ_DATA 0xB9 // push next value from data pool +#define OP_RESTORE 0xBA // reset data pointer to 0 + +// ============================================================ +// WRITE # (comma-delimited with quoted strings) +// ============================================================ + +#define OP_FILE_WRITE 0xBB // pop channel + value, write in WRITE format +#define OP_FILE_WRITE_SEP 0xBC // pop channel, write comma separator +#define OP_FILE_WRITE_NL 0xBD // pop channel, write newline + +// ============================================================ +// Random/Binary file I/O +// ============================================================ + +#define OP_FILE_GET 0xBE // pop channel + recno, read record, push value +#define OP_FILE_PUT 0xBF // pop channel + recno + value, write record +#define OP_FILE_SEEK 0xC0 // pop channel + position, seek +#define OP_FILE_LOF 0xC1 // pop channel, push file length +#define OP_FILE_LOC 0xC2 // pop channel, push current position +#define OP_FILE_FREEFILE 0xC3 // push next free channel number +#define OP_FILE_INPUT_N 0xC4 // pop channel + n, read n chars, push string + +// ============================================================ +// Fixed-length strings and MID$ assignment +// ============================================================ + +#define OP_STR_FIXLEN 0xC5 // [uint16 len] pop string, pad/truncate, push +#define OP_STR_MID_ASGN 0xC6 // pop replacement, len, start, str; push modified + +// ============================================================ +// PRINT USING +// ============================================================ + +#define OP_PRINT_USING 0xC7 // pop format + value, push formatted string + +// ============================================================ +// SPC(n) and TAB(n) with stack-based argument +// ============================================================ + +#define OP_PRINT_TAB_N 0xC8 // pop column count, print spaces to reach column +#define OP_PRINT_SPC_N 0xC9 // pop count, print that many spaces +#define OP_FORMAT 0xCA // pop format string + value, push formatted string +#define OP_SHELL 0xCB // pop command string, call system(), push return value +#define OP_COMPARE_MODE 0xCC // [uint8 mode] set string compare mode (0=binary, 1=text) + +// ============================================================ +// Halt +// ============================================================ + +#define OP_HALT 0xFF + +#endif // DVXBASIC_OPCODES_H diff --git a/dvxbasic/compiler/parser.c b/dvxbasic/compiler/parser.c new file mode 100644 index 0000000..4b410c2 --- /dev/null +++ b/dvxbasic/compiler/parser.c @@ -0,0 +1,4324 @@ +// parser.c -- DVX BASIC recursive descent parser +// +// Single-pass compiler: reads tokens from the lexer and emits +// p-code directly via the code generator. No AST. +// +// Embeddable: no DVX dependencies, pure C. + +#include "parser.h" +#include "opcodes.h" + +#include +#include +#include + +// ============================================================ +// Forward jump list (for EXIT FOR / EXIT DO backpatching) +// ============================================================ + +#define MAX_EXITS 32 + +typedef struct { + int32_t patchAddr[MAX_EXITS]; + int32_t count; +} ExitListT; + +// ============================================================ +// Built-in function table +// ============================================================ + +typedef struct { + const char *name; + uint8_t opcode; + int32_t minArgs; + int32_t maxArgs; + uint8_t resultType; +} BuiltinFuncT; + +static const BuiltinFuncT builtinFuncs[] = { + // String functions + {"ASC", OP_STR_ASC, 1, 1, BAS_TYPE_INTEGER}, + {"CHR$", OP_STR_CHR, 1, 1, BAS_TYPE_STRING}, + {"DATE$", OP_DATE_STR, 0, 0, BAS_TYPE_STRING}, + {"ENVIRON$", OP_ENVIRON, 1, 1, BAS_TYPE_STRING}, + {"FORMAT$", OP_FORMAT, 2, 2, BAS_TYPE_STRING}, + {"HEX$", OP_STR_HEX, 1, 1, BAS_TYPE_STRING}, + {"INSTR", OP_STR_INSTR, 2, 3, BAS_TYPE_INTEGER}, + {"LCASE$", OP_STR_LCASE, 1, 1, BAS_TYPE_STRING}, + {"LEFT$", OP_STR_LEFT, 2, 2, BAS_TYPE_STRING}, + {"LEN", OP_STR_LEN, 1, 1, BAS_TYPE_INTEGER}, + {"LTRIM$", OP_STR_LTRIM, 1, 1, BAS_TYPE_STRING}, + {"MID$", OP_STR_MID2, 2, 3, BAS_TYPE_STRING}, + {"RIGHT$", OP_STR_RIGHT, 2, 2, BAS_TYPE_STRING}, + {"RTRIM$", OP_STR_RTRIM, 1, 1, BAS_TYPE_STRING}, + {"SPACE$", OP_STR_SPACE, 1, 1, BAS_TYPE_STRING}, + {"STR$", OP_STR_STRF, 1, 1, BAS_TYPE_STRING}, + {"STRING$", OP_STR_STRING, 2, 2, BAS_TYPE_STRING}, + {"TRIM$", OP_STR_TRIM, 1, 1, BAS_TYPE_STRING}, + {"UCASE$", OP_STR_UCASE, 1, 1, BAS_TYPE_STRING}, + {"VAL", OP_STR_VAL, 1, 1, BAS_TYPE_DOUBLE}, + + // File I/O functions + {"FREEFILE", OP_FILE_FREEFILE, 0, 0, BAS_TYPE_INTEGER}, + {"LOC", OP_FILE_LOC, 1, 1, BAS_TYPE_LONG}, + {"LOF", OP_FILE_LOF, 1, 1, BAS_TYPE_LONG}, + + // Math functions + {"ABS", OP_MATH_ABS, 1, 1, BAS_TYPE_DOUBLE}, + {"ATN", OP_MATH_ATN, 1, 1, BAS_TYPE_DOUBLE}, + {"COS", OP_MATH_COS, 1, 1, BAS_TYPE_DOUBLE}, + {"EXP", OP_MATH_EXP, 1, 1, BAS_TYPE_DOUBLE}, + {"FIX", OP_MATH_FIX, 1, 1, BAS_TYPE_INTEGER}, + {"INT", OP_MATH_INT, 1, 1, BAS_TYPE_INTEGER}, + {"LOG", OP_MATH_LOG, 1, 1, BAS_TYPE_DOUBLE}, + {"RND", OP_MATH_RND, 0, 1, BAS_TYPE_DOUBLE}, + {"SGN", OP_MATH_SGN, 1, 1, BAS_TYPE_INTEGER}, + {"SIN", OP_MATH_SIN, 1, 1, BAS_TYPE_DOUBLE}, + {"SQR", OP_MATH_SQR, 1, 1, BAS_TYPE_DOUBLE}, + {"TAN", OP_MATH_TAN, 1, 1, BAS_TYPE_DOUBLE}, + {"TIME$", OP_TIME_STR, 0, 0, BAS_TYPE_STRING}, + {"TIMER", OP_MATH_TIMER, 0, 0, BAS_TYPE_DOUBLE}, + + {NULL, 0, 0, 0, 0} +}; + +// ============================================================ +// Helper prototypes (alphabetized) +// ============================================================ + +static void advance(BasParserT *p); +static bool check(BasParserT *p, BasTokenTypeE type); +static bool checkKeyword(BasParserT *p, const char *kw); +static bool checkKeywordText(const char *text, const char *kw); +static void error(BasParserT *p, const char *msg); +static void errorExpected(BasParserT *p, const char *what); +static void expect(BasParserT *p, BasTokenTypeE type); +static void expectEndOfStatement(BasParserT *p); +static const BuiltinFuncT *findBuiltin(const char *name); +static BasSymbolT *findTypeDef(BasParserT *p, const char *name); +static bool match(BasParserT *p, BasTokenTypeE type); +static int32_t resolveFieldIndex(BasSymbolT *typeSym, const char *fieldName); +static uint8_t resolveTypeName(BasParserT *p); +static uint8_t suffixToType(const char *name); +static void skipNewlines(BasParserT *p); + +// ============================================================ +// Expression parser prototypes (by precedence, lowest first) +// ============================================================ + +static void parseExpression(BasParserT *p); +static void parseImpExpr(BasParserT *p); +static void parseEqvExpr(BasParserT *p); +static void parseOrExpr(BasParserT *p); +static void parseXorExpr(BasParserT *p); +static void parseAndExpr(BasParserT *p); +static void parseNotExpr(BasParserT *p); +static void parseCompareExpr(BasParserT *p); +static void parseConcatExpr(BasParserT *p); +static void parseAddExpr(BasParserT *p); +static void parseMulExpr(BasParserT *p); +static void parsePowExpr(BasParserT *p); +static void parseUnaryExpr(BasParserT *p); +static void parsePrimary(BasParserT *p); + +// ============================================================ +// Statement parser prototypes (alphabetized) +// ============================================================ + +static void parseAssignOrCall(BasParserT *p); +static void parseClose(BasParserT *p); +static void parseConst(BasParserT *p); +static void parseData(BasParserT *p); +static void parseDeclare(BasParserT *p); +static void parseDef(BasParserT *p); +static void parseDefType(BasParserT *p, uint8_t dataType); +static void parseDim(BasParserT *p); +static void parseDimBounds(BasParserT *p, int32_t *outDims); +static void parseDo(BasParserT *p); +static void parseEnd(BasParserT *p); +static void parseErase(BasParserT *p); +static void parseExit(BasParserT *p); +static void parseFor(BasParserT *p); +static void parseFunction(BasParserT *p); +static void parseGet(BasParserT *p); +static void parseGosub(BasParserT *p); +static void parseGoto(BasParserT *p); +static void parseIf(BasParserT *p); +static void parseInput(BasParserT *p); +static void parseLineInput(BasParserT *p); +static void parseModule(BasParserT *p); +static void parseOn(BasParserT *p); +static void parseOnError(BasParserT *p); +static void parseOpen(BasParserT *p); +static void parseOption(BasParserT *p); +static void parsePrint(BasParserT *p); +static void parsePut(BasParserT *p); +static void parseRead(BasParserT *p); +static void parseRedim(BasParserT *p); +static void parseRestore(BasParserT *p); +static void parseResume(BasParserT *p); +static void parseSeek(BasParserT *p); +static void parseSelectCase(BasParserT *p); +static void parseShell(BasParserT *p); +static void parseSleep(BasParserT *p); +static void parseStatement(BasParserT *p); +static void parseStatic(BasParserT *p); +static void parseSub(BasParserT *p); +static void parseSwap(BasParserT *p); +static void parseType(BasParserT *p); +static void parseWhile(BasParserT *p); +static void parseWrite(BasParserT *p); + +// ============================================================ +// Variable / code emit helper prototypes (alphabetized) +// ============================================================ + +static void emitFunctionCall(BasParserT *p, BasSymbolT *sym); +static int32_t emitJump(BasParserT *p, uint8_t opcode); +static void emitJumpToLabel(BasParserT *p, uint8_t opcode, const char *labelName); +static void emitLoad(BasParserT *p, BasSymbolT *sym); +static void emitStore(BasParserT *p, BasSymbolT *sym); +static BasSymbolT *ensureVariable(BasParserT *p, const char *name); +static void patchCallAddrs(BasParserT *p, BasSymbolT *sym); +static void patchJump(BasParserT *p, int32_t addr); +static void patchLabelRefs(BasParserT *p, BasSymbolT *sym); + +// ============================================================ +// Exit list helpers +// ============================================================ + +static ExitListT exitForList; +static ExitListT exitDoList; +static ExitListT exitSubList; +static ExitListT exitFuncList; + +static void exitListInit(ExitListT *el); +static void exitListAdd(ExitListT *el, int32_t addr); +static void exitListPatch(ExitListT *el, BasParserT *p); + + +// ============================================================ +// Helper implementations +// ============================================================ + +static void advance(BasParserT *p) { + if (p->hasError) { + return; + } + basLexerNext(&p->lex); + if (p->lex.token.type == TOK_ERROR) { + error(p, p->lex.error); + } +} + + +static bool check(BasParserT *p, BasTokenTypeE type) { + return p->lex.token.type == type; +} + + +static bool checkKeyword(BasParserT *p, const char *kw) { + if (p->lex.token.type != TOK_IDENT) { + return false; + } + // Case-insensitive comparison + const char *a = p->lex.token.text; + const char *b = kw; + while (*a && *b) { + if (toupper((unsigned char)*a) != toupper((unsigned char)*b)) { + return false; + } + a++; + b++; + } + return *a == '\0' && *b == '\0'; +} + + +static bool checkKeywordText(const char *text, const char *kw) { + const char *a = text; + const char *b = kw; + while (*a && *b) { + if (toupper((unsigned char)*a) != toupper((unsigned char)*b)) { + return false; + } + a++; + b++; + } + return *a == '\0' && *b == '\0'; +} + + +static void error(BasParserT *p, const char *msg) { + if (p->hasError) { + return; + } + p->hasError = true; + p->errorLine = p->lex.token.line; + snprintf(p->error, sizeof(p->error), "Line %d: %s", p->lex.token.line, msg); +} + + +static void errorExpected(BasParserT *p, const char *what) { + char buf[512]; + snprintf(buf, sizeof(buf), "Expected %s, got %s", what, basTokenName(p->lex.token.type)); + error(p, buf); +} + + +static void exitListAdd(ExitListT *el, int32_t addr) { + if (el->count < MAX_EXITS) { + el->patchAddr[el->count++] = addr; + } +} + + +static void exitListInit(ExitListT *el) { + el->count = 0; +} + + +static void exitListPatch(ExitListT *el, BasParserT *p) { + int32_t target = basCodePos(&p->cg); + for (int32_t i = 0; i < el->count; i++) { + int16_t offset = (int16_t)(target - (el->patchAddr[i] + 2)); + basPatch16(&p->cg, el->patchAddr[i], offset); + } + el->count = 0; +} + + +static void expect(BasParserT *p, BasTokenTypeE type) { + if (p->hasError) { + return; + } + if (p->lex.token.type != type) { + char buf[512]; + snprintf(buf, sizeof(buf), "Expected %s, got %s", basTokenName(type), basTokenName(p->lex.token.type)); + error(p, buf); + return; + } + advance(p); +} + + +static void expectEndOfStatement(BasParserT *p) { + if (p->hasError) { + return; + } + // Statement must end with newline, colon, EOF, or ELSE (single-line IF) + if (check(p, TOK_NEWLINE) || check(p, TOK_EOF) || check(p, TOK_ELSE)) { + return; + } + if (check(p, TOK_COLON)) { + advance(p); + return; + } + errorExpected(p, "end of statement"); +} + + +static const BuiltinFuncT *findBuiltin(const char *name) { + for (int32_t i = 0; builtinFuncs[i].name != NULL; i++) { + // Case-insensitive comparison + const char *a = name; + const char *b = builtinFuncs[i].name; + bool match = true; + while (*a && *b) { + if (toupper((unsigned char)*a) != toupper((unsigned char)*b)) { + match = false; + break; + } + a++; + b++; + } + if (match && *a == '\0' && *b == '\0') { + return &builtinFuncs[i]; + } + } + return NULL; +} + + +static BasSymbolT *findTypeDef(BasParserT *p, const char *name) { + for (int32_t i = 0; i < p->sym.count; i++) { + if (p->sym.symbols[i].kind == SYM_TYPE_DEF) { + // Case-insensitive comparison + const char *a = p->sym.symbols[i].name; + const char *b = name; + bool eq = true; + while (*a && *b) { + if (toupper((unsigned char)*a) != toupper((unsigned char)*b)) { + eq = false; + break; + } + a++; + b++; + } + if (eq && *a == '\0' && *b == '\0') { + return &p->sym.symbols[i]; + } + } + } + return NULL; +} + + +static bool match(BasParserT *p, BasTokenTypeE type) { + if (p->lex.token.type == type) { + advance(p); + return true; + } + return false; +} + + +static int32_t resolveFieldIndex(BasSymbolT *typeSym, const char *fieldName) { + for (int32_t i = 0; i < typeSym->fieldCount; i++) { + const char *a = typeSym->fields[i].name; + const char *b = fieldName; + bool eq = true; + while (*a && *b) { + if (toupper((unsigned char)*a) != toupper((unsigned char)*b)) { + eq = false; + break; + } + a++; + b++; + } + if (eq && *a == '\0' && *b == '\0') { + return i; + } + } + return -1; +} + + +static uint8_t resolveTypeName(BasParserT *p) { + // Expect a type keyword after AS + if (check(p, TOK_INTEGER)) { + advance(p); + return BAS_TYPE_INTEGER; + } + if (check(p, TOK_LONG)) { + advance(p); + return BAS_TYPE_LONG; + } + if (check(p, TOK_SINGLE)) { + advance(p); + return BAS_TYPE_SINGLE; + } + if (check(p, TOK_DOUBLE)) { + advance(p); + return BAS_TYPE_DOUBLE; + } + if (check(p, TOK_STRING_KW)) { + advance(p); + return BAS_TYPE_STRING; + } + if (check(p, TOK_BOOLEAN)) { + advance(p); + return BAS_TYPE_BOOLEAN; + } + // Check for user-defined TYPE name + if (check(p, TOK_IDENT)) { + BasSymbolT *typeSym = findTypeDef(p, p->lex.token.text); + if (typeSym != NULL) { + p->lastUdtTypeId = typeSym->index; + advance(p); + return BAS_TYPE_UDT; + } + } + error(p, "Expected type name (Integer, Long, Single, Double, String, Boolean, or TYPE name)"); + return BAS_TYPE_INTEGER; +} + + +static void skipNewlines(BasParserT *p) { + while (check(p, TOK_NEWLINE)) { + advance(p); + } +} + + +static uint8_t suffixToType(const char *name) { + int32_t len = (int32_t)strlen(name); + if (len == 0) { + return BAS_TYPE_SINGLE; // QB default + } + switch (name[len - 1]) { + case '%': + return BAS_TYPE_INTEGER; + case '&': + return BAS_TYPE_LONG; + case '!': + return BAS_TYPE_SINGLE; + case '#': + return BAS_TYPE_DOUBLE; + case '$': + return BAS_TYPE_STRING; + default: + return BAS_TYPE_SINGLE; // QB default + } +} + + +// ============================================================ +// Variable / code emit helpers +// ============================================================ + +static void emitFunctionCall(BasParserT *p, BasSymbolT *sym) { + // Parse argument list + expect(p, TOK_LPAREN); + int32_t argc = 0; + if (!check(p, TOK_RPAREN)) { + parseExpression(p); + argc++; + while (match(p, TOK_COMMA)) { + parseExpression(p); + argc++; + } + } + expect(p, TOK_RPAREN); + + if (p->hasError) { + return; + } + + if (argc != sym->paramCount) { + char buf[256]; + snprintf(buf, sizeof(buf), "Function '%s' expects %d arguments, got %d", sym->name, sym->paramCount, argc); + error(p, buf); + return; + } + + // baseSlot: functions reserve slot 0 for the return value + uint8_t baseSlot = (sym->kind == SYM_FUNCTION) ? 1 : 0; + + basEmit8(&p->cg, OP_CALL); + int32_t addrPos = basCodePos(&p->cg); + basEmitU16(&p->cg, (uint16_t)sym->codeAddr); + basEmit8(&p->cg, (uint8_t)argc); + basEmit8(&p->cg, baseSlot); + + // If not yet defined, record the address for backpatching + if (!sym->isDefined && sym->patchCount < BAS_MAX_CALL_PATCHES) { + sym->patchAddrs[sym->patchCount++] = addrPos; + } +} + + +static int32_t emitJump(BasParserT *p, uint8_t opcode) { + basEmit8(&p->cg, opcode); + int32_t addr = basCodePos(&p->cg); + basEmit16(&p->cg, 0); // placeholder + return addr; +} + + +static void emitJumpToLabel(BasParserT *p, uint8_t opcode, const char *labelName) { + // Look up label; if defined, emit direct jump; if not, create forward ref + BasSymbolT *sym = basSymTabFind(&p->sym, labelName); + + if (sym != NULL && sym->kind == SYM_LABEL && sym->isDefined) { + // Label already defined -- emit jump to known address + basEmit8(&p->cg, opcode); + int32_t here = basCodePos(&p->cg); + int16_t offset = (int16_t)(sym->codeAddr - (here + 2)); + basEmit16(&p->cg, offset); + return; + } + + // Forward reference -- create label symbol if needed + if (sym == NULL) { + sym = basSymTabAdd(&p->sym, labelName, SYM_LABEL, 0); + if (sym == NULL) { + error(p, "Symbol table full"); + return; + } + sym->scope = SCOPE_GLOBAL; + sym->isDefined = false; + sym->codeAddr = 0; + } + + // Emit jump with placeholder offset + basEmit8(&p->cg, opcode); + int32_t patchAddr = basCodePos(&p->cg); + basEmit16(&p->cg, 0); + + // Record patch address for backpatching when label is defined + if (sym->patchCount < BAS_MAX_CALL_PATCHES) { + sym->patchAddrs[sym->patchCount++] = patchAddr; + } +} + + +static void emitLoad(BasParserT *p, BasSymbolT *sym) { + if (sym->kind == SYM_CONST) { + // Emit the constant value directly + if (sym->dataType == BAS_TYPE_STRING) { + uint16_t idx = basAddConstant(&p->cg, sym->constStr, (int32_t)strlen(sym->constStr)); + basEmit8(&p->cg, OP_PUSH_STR); + basEmitU16(&p->cg, idx); + } else if (sym->dataType == BAS_TYPE_INTEGER || sym->dataType == BAS_TYPE_LONG) { + basEmit8(&p->cg, OP_PUSH_INT32); + basEmit16(&p->cg, (int16_t)(sym->constInt & 0xFFFF)); + basEmit16(&p->cg, (int16_t)((sym->constInt >> 16) & 0xFFFF)); + } else if (sym->dataType == BAS_TYPE_BOOLEAN) { + basEmit8(&p->cg, sym->constInt ? OP_PUSH_TRUE : OP_PUSH_FALSE); + } else { + // Float constant + basEmit8(&p->cg, OP_PUSH_FLT64); + basEmitDouble(&p->cg, sym->constDbl); + } + return; + } + + if (sym->scope == SCOPE_LOCAL) { + basEmit8(&p->cg, OP_LOAD_LOCAL); + basEmitU16(&p->cg, (uint16_t)sym->index); + } else { + basEmit8(&p->cg, OP_LOAD_GLOBAL); + basEmitU16(&p->cg, (uint16_t)sym->index); + } +} + + +static void emitStore(BasParserT *p, BasSymbolT *sym) { + // Fixed-length string: pad/truncate before storing + if (sym->fixedLen > 0) { + basEmit8(&p->cg, OP_STR_FIXLEN); + basEmitU16(&p->cg, (uint16_t)sym->fixedLen); + } + if (sym->scope == SCOPE_LOCAL) { + basEmit8(&p->cg, OP_STORE_LOCAL); + basEmitU16(&p->cg, (uint16_t)sym->index); + } else { + basEmit8(&p->cg, OP_STORE_GLOBAL); + basEmitU16(&p->cg, (uint16_t)sym->index); + } +} + + +static BasSymbolT *ensureVariable(BasParserT *p, const char *name) { + BasSymbolT *sym = basSymTabFind(&p->sym, name); + if (sym != NULL) { + return sym; + } + + // When in local scope, check if a shared global exists before auto-declaring + if (p->sym.inLocalScope) { + BasSymbolT *globalSym = basSymTabFindGlobal(&p->sym, name); + if (globalSym != NULL && globalSym->isShared) { + return globalSym; + } + } + + // OPTION EXPLICIT: require explicit DIM + if (p->optionExplicit) { + char buf[320]; + snprintf(buf, sizeof(buf), "Variable not declared: %s (OPTION EXPLICIT is on)", name); + error(p, buf); + return NULL; + } + + // Auto-declare (QB implicit declaration) + // Use suffix type if present, otherwise defType for the first letter + uint8_t dt = suffixToType(name); + + if (dt == BAS_TYPE_SINGLE && name[0] != '\0') { + // suffixToType returns SINGLE as the default when no suffix. + // Check if defType overrides it. + char firstLetter = name[0]; + + if (firstLetter >= 'a' && firstLetter <= 'z') { + firstLetter -= 32; + } + + if (firstLetter >= 'A' && firstLetter <= 'Z') { + uint8_t defDt = p->defType[firstLetter - 'A']; + + if (defDt != 0) { + dt = defDt; + } + } + } + + sym = basSymTabAdd(&p->sym, name, SYM_VARIABLE, dt); + + if (sym == NULL) { + error(p, "Symbol table full"); + return NULL; + } + + sym->scope = SCOPE_GLOBAL; + sym->index = basSymTabAllocSlot(&p->sym); + sym->isDefined = true; + return sym; +} + + +static void patchCallAddrs(BasParserT *p, BasSymbolT *sym) { + // Backpatch all forward-reference CALL addresses + uint16_t addr = (uint16_t)sym->codeAddr; + + for (int32_t i = 0; i < sym->patchCount; i++) { + int32_t pos = sym->patchAddrs[i]; + + if (pos >= 0 && pos + 2 <= p->cg.codeLen) { + memcpy(&p->cg.code[pos], &addr, sizeof(uint16_t)); + } + } + + sym->patchCount = 0; +} + + +static void patchJump(BasParserT *p, int32_t addr) { + int32_t target = basCodePos(&p->cg); + int16_t offset = (int16_t)(target - (addr + 2)); + basPatch16(&p->cg, addr, offset); +} + + +static void patchLabelRefs(BasParserT *p, BasSymbolT *sym) { + // Backpatch all forward-reference jumps to this label + int32_t target = sym->codeAddr; + + for (int32_t i = 0; i < sym->patchCount; i++) { + int32_t patchAddr = sym->patchAddrs[i]; + int16_t offset = (int16_t)(target - (patchAddr + 2)); + basPatch16(&p->cg, patchAddr, offset); + } + + sym->patchCount = 0; +} + + +// ============================================================ +// Expression parsing +// ============================================================ + +static void parseExpression(BasParserT *p) { + parseImpExpr(p); +} + + +static void parseImpExpr(BasParserT *p) { + parseEqvExpr(p); + while (!p->hasError && check(p, TOK_IMP)) { + advance(p); + parseEqvExpr(p); + basEmit8(&p->cg, OP_IMP); + } +} + + +static void parseEqvExpr(BasParserT *p) { + parseOrExpr(p); + while (!p->hasError && check(p, TOK_EQV)) { + advance(p); + parseOrExpr(p); + basEmit8(&p->cg, OP_EQV); + } +} + + +static void parseOrExpr(BasParserT *p) { + parseXorExpr(p); + while (!p->hasError && check(p, TOK_OR)) { + advance(p); + parseXorExpr(p); + basEmit8(&p->cg, OP_OR); + } +} + + +static void parseXorExpr(BasParserT *p) { + parseAndExpr(p); + while (!p->hasError && check(p, TOK_XOR)) { + advance(p); + parseAndExpr(p); + basEmit8(&p->cg, OP_XOR); + } +} + + +static void parseAndExpr(BasParserT *p) { + parseNotExpr(p); + while (!p->hasError && check(p, TOK_AND)) { + advance(p); + parseNotExpr(p); + basEmit8(&p->cg, OP_AND); + } +} + + +static void parseNotExpr(BasParserT *p) { + if (check(p, TOK_NOT)) { + advance(p); + parseNotExpr(p); + basEmit8(&p->cg, OP_NOT); + return; + } + parseCompareExpr(p); +} + + +static void parseCompareExpr(BasParserT *p) { + parseConcatExpr(p); + while (!p->hasError) { + if (check(p, TOK_EQ)) { + advance(p); + parseConcatExpr(p); + basEmit8(&p->cg, OP_CMP_EQ); + } else if (check(p, TOK_NE)) { + advance(p); + parseConcatExpr(p); + basEmit8(&p->cg, OP_CMP_NE); + } else if (check(p, TOK_LT)) { + advance(p); + parseConcatExpr(p); + basEmit8(&p->cg, OP_CMP_LT); + } else if (check(p, TOK_GT)) { + advance(p); + parseConcatExpr(p); + basEmit8(&p->cg, OP_CMP_GT); + } else if (check(p, TOK_LE)) { + advance(p); + parseConcatExpr(p); + basEmit8(&p->cg, OP_CMP_LE); + } else if (check(p, TOK_GE)) { + advance(p); + parseConcatExpr(p); + basEmit8(&p->cg, OP_CMP_GE); + } else { + break; + } + } +} + + +static void parseConcatExpr(BasParserT *p) { + parseAddExpr(p); + while (!p->hasError && check(p, TOK_AMPERSAND)) { + advance(p); + parseAddExpr(p); + basEmit8(&p->cg, OP_STR_CONCAT); + } +} + + +static void parseAddExpr(BasParserT *p) { + parseMulExpr(p); + while (!p->hasError) { + if (check(p, TOK_PLUS)) { + advance(p); + parseMulExpr(p); + basEmit8(&p->cg, OP_ADD_INT); // VM handles type promotion + } else if (check(p, TOK_MINUS)) { + advance(p); + parseMulExpr(p); + basEmit8(&p->cg, OP_SUB_INT); + } else { + break; + } + } +} + + +static void parseMulExpr(BasParserT *p) { + parsePowExpr(p); + while (!p->hasError) { + if (check(p, TOK_STAR)) { + advance(p); + parsePowExpr(p); + basEmit8(&p->cg, OP_MUL_INT); + } else if (check(p, TOK_SLASH)) { + advance(p); + parsePowExpr(p); + basEmit8(&p->cg, OP_DIV_FLT); + } else if (check(p, TOK_BACKSLASH)) { + advance(p); + parsePowExpr(p); + basEmit8(&p->cg, OP_IDIV_INT); + } else if (check(p, TOK_MOD)) { + advance(p); + parsePowExpr(p); + basEmit8(&p->cg, OP_MOD_INT); + } else { + break; + } + } +} + + +static void parsePowExpr(BasParserT *p) { + parseUnaryExpr(p); + // Right-associative, but iterative is fine for most BASIC uses + while (!p->hasError && check(p, TOK_CARET)) { + advance(p); + parseUnaryExpr(p); + basEmit8(&p->cg, OP_POW); + } +} + + +static void parseUnaryExpr(BasParserT *p) { + if (check(p, TOK_MINUS)) { + advance(p); + parseUnaryExpr(p); + basEmit8(&p->cg, OP_NEG_INT); + return; + } + if (check(p, TOK_PLUS)) { + advance(p); // unary plus is a no-op + parseUnaryExpr(p); + return; + } + parsePrimary(p); +} + + +static void parsePrimary(BasParserT *p) { + if (p->hasError) { + return; + } + + BasTokenTypeE tt = p->lex.token.type; + + // Integer literal + if (tt == TOK_INT_LIT) { + int32_t val = p->lex.token.intVal; + if (val >= -32768 && val <= 32767) { + basEmit8(&p->cg, OP_PUSH_INT16); + basEmit16(&p->cg, (int16_t)val); + } else { + basEmit8(&p->cg, OP_PUSH_INT32); + basEmit16(&p->cg, (int16_t)(val & 0xFFFF)); + basEmit16(&p->cg, (int16_t)((val >> 16) & 0xFFFF)); + } + advance(p); + return; + } + + // Long literal + if (tt == TOK_LONG_LIT) { + int32_t val = (int32_t)p->lex.token.longVal; + basEmit8(&p->cg, OP_PUSH_INT32); + basEmit16(&p->cg, (int16_t)(val & 0xFFFF)); + basEmit16(&p->cg, (int16_t)((val >> 16) & 0xFFFF)); + advance(p); + return; + } + + // Float literal + if (tt == TOK_FLOAT_LIT) { + basEmit8(&p->cg, OP_PUSH_FLT64); + basEmitDouble(&p->cg, p->lex.token.dblVal); + advance(p); + return; + } + + // String literal + if (tt == TOK_STRING_LIT) { + uint16_t idx = basAddConstant(&p->cg, p->lex.token.text, p->lex.token.textLen); + basEmit8(&p->cg, OP_PUSH_STR); + basEmitU16(&p->cg, idx); + advance(p); + return; + } + + // Boolean literals + if (tt == TOK_TRUE_KW) { + basEmit8(&p->cg, OP_PUSH_TRUE); + advance(p); + return; + } + if (tt == TOK_FALSE_KW) { + basEmit8(&p->cg, OP_PUSH_FALSE); + advance(p); + return; + } + + // EOF(#channel) -- file end-of-file test + if (tt == TOK_EOF_KW) { + advance(p); + expect(p, TOK_LPAREN); + match(p, TOK_HASH); // optional # + parseExpression(p); + expect(p, TOK_RPAREN); + basEmit8(&p->cg, OP_FILE_EOF); + return; + } + + // SEEK(n) -- return current file position (function form) + if (tt == TOK_SEEK) { + advance(p); + if (check(p, TOK_LPAREN)) { + expect(p, TOK_LPAREN); + parseExpression(p); + expect(p, TOK_RPAREN); + basEmit8(&p->cg, OP_FILE_LOC); + return; + } + // Not a function call -- error (SEEK as statement is handled elsewhere) + error(p, "SEEK requires parentheses when used as a function"); + return; + } + + // TIMER -- seconds since midnight (no args needed) + if (tt == TOK_TIMER) { + advance(p); + basEmit8(&p->cg, OP_MATH_TIMER); + return; + } + + // ERR -- current error number + if (tt == TOK_ERR) { + advance(p); + basEmit8(&p->cg, OP_ERR_NUM); + return; + } + + // SHELL("command") -- as function expression + if (tt == TOK_SHELL) { + advance(p); + expect(p, TOK_LPAREN); + parseExpression(p); + expect(p, TOK_RPAREN); + basEmit8(&p->cg, OP_SHELL); + return; + } + + // LBOUND(array [, dim]) + if (tt == TOK_LBOUND) { + advance(p); + expect(p, TOK_LPAREN); + parseExpression(p); + uint8_t dim = 1; + if (match(p, TOK_COMMA)) { + if (check(p, TOK_INT_LIT)) { + dim = (uint8_t)p->lex.token.intVal; + advance(p); + } else { + error(p, "LBOUND dimension must be a constant integer"); + } + } + expect(p, TOK_RPAREN); + basEmit8(&p->cg, OP_LBOUND); + basEmit8(&p->cg, dim); + return; + } + + // UBOUND(array [, dim]) + if (tt == TOK_UBOUND) { + advance(p); + expect(p, TOK_LPAREN); + parseExpression(p); + uint8_t dim = 1; + if (match(p, TOK_COMMA)) { + if (check(p, TOK_INT_LIT)) { + dim = (uint8_t)p->lex.token.intVal; + advance(p); + } else { + error(p, "UBOUND dimension must be a constant integer"); + } + } + expect(p, TOK_RPAREN); + basEmit8(&p->cg, OP_UBOUND); + basEmit8(&p->cg, dim); + return; + } + + // Parenthesized expression + if (tt == TOK_LPAREN) { + advance(p); + parseExpression(p); + expect(p, TOK_RPAREN); + return; + } + + // Identifier: variable, function call, or built-in + if (tt == TOK_IDENT) { + char name[BAS_MAX_TOKEN_LEN]; + strncpy(name, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + name[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + // INPUT$(n, #channel) -- special handling for optional # in second arg + if (checkKeywordText(name, "INPUT$") && check(p, TOK_LPAREN)) { + expect(p, TOK_LPAREN); + parseExpression(p); // n (number of chars) + expect(p, TOK_COMMA); + match(p, TOK_HASH); // optional # + parseExpression(p); // channel number + expect(p, TOK_RPAREN); + basEmit8(&p->cg, OP_FILE_INPUT_N); + return; + } + + // Check for built-in function + const BuiltinFuncT *builtin = findBuiltin(name); + if (builtin != NULL) { + int32_t argc = 0; + + // Zero-arg builtins can be used without parens + if (builtin->minArgs == 0 && builtin->maxArgs == 0 && !check(p, TOK_LPAREN)) { + basEmit8(&p->cg, builtin->opcode); + return; + } + + if (check(p, TOK_LPAREN)) { + expect(p, TOK_LPAREN); + + // RND/zero-arg builtins can be called with empty parens + if (!check(p, TOK_RPAREN)) { + parseExpression(p); + argc++; + while (match(p, TOK_COMMA)) { + parseExpression(p); + argc++; + } + } + expect(p, TOK_RPAREN); + } + + if (p->hasError) { + return; + } + + if (argc < builtin->minArgs || argc > builtin->maxArgs) { + char buf[256]; + snprintf(buf, sizeof(buf), "Built-in '%s' expects %d-%d arguments, got %d", builtin->name, builtin->minArgs, builtin->maxArgs, argc); + error(p, buf); + return; + } + + // MID$ with 3 args uses a different opcode than 2 args + if (builtin->opcode == OP_STR_MID2 && argc == 3) { + basEmit8(&p->cg, OP_STR_MID); + } else if (builtin->opcode == OP_STR_INSTR && argc == 3) { + basEmit8(&p->cg, OP_STR_INSTR3); + } else if (builtin->opcode == OP_MATH_RND && argc == 0) { + // Push -1 as dummy arg for RND() + basEmit8(&p->cg, OP_PUSH_INT16); + basEmit16(&p->cg, -1); + basEmit8(&p->cg, OP_MATH_RND); + } else { + basEmit8(&p->cg, builtin->opcode); + } + return; + } + + // Check symbol table for user-defined function or variable + BasSymbolT *sym = basSymTabFind(&p->sym, name); + + // Function call with parens + if (check(p, TOK_LPAREN)) { + if (sym != NULL && (sym->kind == SYM_FUNCTION || sym->kind == SYM_SUB)) { + emitFunctionCall(p, sym); + return; + } + // Could be an array access -- treat as load + array index + if (sym != NULL && sym->isArray) { + emitLoad(p, sym); + expect(p, TOK_LPAREN); + int32_t dims = 0; + parseExpression(p); + dims++; + while (match(p, TOK_COMMA)) { + parseExpression(p); + dims++; + } + expect(p, TOK_RPAREN); + basEmit8(&p->cg, OP_LOAD_ARRAY); + basEmit8(&p->cg, (uint8_t)dims); + return; + } + // Unknown function -- forward reference, assume it's a function + if (sym == NULL) { + sym = basSymTabAdd(&p->sym, name, SYM_FUNCTION, suffixToType(name)); + if (sym == NULL) { + error(p, "Symbol table full"); + return; + } + sym->scope = SCOPE_GLOBAL; + sym->isDefined = false; + sym->codeAddr = 0; + } + emitFunctionCall(p, sym); + return; + } + + // Check for UDT field access: var.field + if (check(p, TOK_DOT)) { + sym = ensureVariable(p, name); + if (sym != NULL && sym->dataType == BAS_TYPE_UDT && sym->udtTypeId >= 0) { + emitLoad(p, sym); + advance(p); // consume DOT + if (!check(p, TOK_IDENT)) { + errorExpected(p, "field name"); + return; + } + // Find the TYPE_DEF symbol + BasSymbolT *typeSym = NULL; + for (int32_t i = 0; i < p->sym.count; i++) { + if (p->sym.symbols[i].kind == SYM_TYPE_DEF && p->sym.symbols[i].index == sym->udtTypeId) { + typeSym = &p->sym.symbols[i]; + break; + } + } + if (typeSym == NULL) { + error(p, "Unknown TYPE definition"); + return; + } + int32_t fieldIdx = resolveFieldIndex(typeSym, p->lex.token.text); + if (fieldIdx < 0) { + char buf[256]; + snprintf(buf, sizeof(buf), "Unknown field '%s' in TYPE '%s'", p->lex.token.text, typeSym->name); + error(p, buf); + return; + } + advance(p); // consume field name + basEmit8(&p->cg, OP_LOAD_FIELD); + basEmitU16(&p->cg, (uint16_t)fieldIdx); + return; + } + } + + // Plain variable reference + sym = ensureVariable(p, name); + if (sym != NULL) { + emitLoad(p, sym); + } + return; + } + + // Nothing matched + errorExpected(p, "expression"); +} + + +// ============================================================ +// Statement parsing +// ============================================================ + +static void parseAssignOrCall(BasParserT *p) { + char name[BAS_MAX_TOKEN_LEN]; + strncpy(name, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + name[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + // MID$ statement: MID$(var$, start [, len]) = replacement$ + if (checkKeywordText(name, "MID$") && check(p, TOK_LPAREN)) { + expect(p, TOK_LPAREN); + + // First arg: target string variable + if (!check(p, TOK_IDENT)) { + errorExpected(p, "string variable name"); + return; + } + char varName[BAS_MAX_TOKEN_LEN]; + strncpy(varName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + varName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + BasSymbolT *varSym = ensureVariable(p, varName); + if (varSym == NULL) { + return; + } + + // Load the original string + emitLoad(p, varSym); + + expect(p, TOK_COMMA); + parseExpression(p); // start position + + // Optional length + if (match(p, TOK_COMMA)) { + parseExpression(p); // length + } else { + // Push 0 as sentinel meaning "use replacement length" + basEmit8(&p->cg, OP_PUSH_INT16); + basEmit16(&p->cg, 0); + } + + expect(p, TOK_RPAREN); + expect(p, TOK_EQ); + + // Parse replacement expression + parseExpression(p); + + // Emit MID$ assignment: pops replacement, len, start, str; pushes result + basEmit8(&p->cg, OP_STR_MID_ASGN); + + // Store back to the variable + emitStore(p, varSym); + return; + } + + BasSymbolT *sym = basSymTabFind(&p->sym, name); + + // UDT field assignment: var.field = expr + if (check(p, TOK_DOT)) { + if (sym == NULL) { + sym = ensureVariable(p, name); + } + if (sym == NULL) { + return; + } + if (sym->dataType == BAS_TYPE_UDT && sym->udtTypeId >= 0) { + emitLoad(p, sym); + advance(p); // consume DOT + if (!check(p, TOK_IDENT)) { + errorExpected(p, "field name"); + return; + } + BasSymbolT *typeSym = NULL; + for (int32_t i = 0; i < p->sym.count; i++) { + if (p->sym.symbols[i].kind == SYM_TYPE_DEF && p->sym.symbols[i].index == sym->udtTypeId) { + typeSym = &p->sym.symbols[i]; + break; + } + } + if (typeSym == NULL) { + error(p, "Unknown TYPE definition"); + return; + } + int32_t fieldIdx = resolveFieldIndex(typeSym, p->lex.token.text); + if (fieldIdx < 0) { + char buf[256]; + snprintf(buf, sizeof(buf), "Unknown field '%s' in TYPE '%s'", p->lex.token.text, typeSym->name); + error(p, buf); + return; + } + advance(p); // consume field name + expect(p, TOK_EQ); + parseExpression(p); + basEmit8(&p->cg, OP_STORE_FIELD); + basEmitU16(&p->cg, (uint16_t)fieldIdx); + return; + } + } + + // Array assignment: var(index) = expr + if (check(p, TOK_LPAREN)) { + // Could be a function call as a statement (discard result) + // or array assignment + if (sym != NULL && (sym->kind == SYM_SUB || sym->kind == SYM_FUNCTION)) { + emitFunctionCall(p, sym); + if (sym->kind == SYM_FUNCTION) { + basEmit8(&p->cg, OP_POP); // discard return value + } + return; + } + + // Array element assignment + if (sym == NULL) { + sym = ensureVariable(p, name); + } + if (sym == NULL) { + return; + } + + emitLoad(p, sym); + expect(p, TOK_LPAREN); + int32_t dims = 0; + parseExpression(p); + dims++; + while (match(p, TOK_COMMA)) { + parseExpression(p); + dims++; + } + expect(p, TOK_RPAREN); + + expect(p, TOK_EQ); + parseExpression(p); + + basEmit8(&p->cg, OP_STORE_ARRAY); + basEmit8(&p->cg, (uint8_t)dims); + return; + } + + // Simple assignment: var = expr + if (check(p, TOK_EQ)) { + advance(p); + if (sym == NULL) { + sym = ensureVariable(p, name); + } + if (sym == NULL) { + return; + } + if (sym->kind == SYM_CONST) { + error(p, "Cannot assign to a constant"); + return; + } + // Check if this is a function name (assigning return value) + if (sym->kind == SYM_FUNCTION) { + parseExpression(p); + // Store to the implicit return-value local slot (index 0 in function scope) + basEmit8(&p->cg, OP_STORE_LOCAL); + basEmitU16(&p->cg, 0); + return; + } + parseExpression(p); + emitStore(p, sym); + return; + } + + // Sub call without parens: SUBName arg1, arg2 ... + if (sym != NULL && sym->kind == SYM_SUB) { + int32_t argc = 0; + if (!check(p, TOK_NEWLINE) && !check(p, TOK_EOF) && !check(p, TOK_COLON) && !check(p, TOK_ELSE)) { + parseExpression(p); + argc++; + while (match(p, TOK_COMMA)) { + parseExpression(p); + argc++; + } + } + if (!p->hasError && argc != sym->paramCount) { + char buf[256]; + snprintf(buf, sizeof(buf), "Sub '%s' expects %d arguments, got %d", sym->name, sym->paramCount, argc); + error(p, buf); + return; + } + { + uint8_t baseSlot = (sym->kind == SYM_FUNCTION) ? 1 : 0; + basEmit8(&p->cg, OP_CALL); + int32_t addrPos = basCodePos(&p->cg); + basEmitU16(&p->cg, (uint16_t)sym->codeAddr); + basEmit8(&p->cg, (uint8_t)argc); + basEmit8(&p->cg, baseSlot); + + if (!sym->isDefined && sym->patchCount < BAS_MAX_CALL_PATCHES) { + sym->patchAddrs[sym->patchCount++] = addrPos; + } + } + + return; + } + + // If nothing else, it's an assignment missing the = + errorExpected(p, "'=' or '('"); +} + + +static void parseClose(BasParserT *p) { + // CLOSE #channel + advance(p); // consume CLOSE + + // Optional # prefix + match(p, TOK_HASH); + + // Channel number + parseExpression(p); + + basEmit8(&p->cg, OP_FILE_CLOSE); +} + + +static void parseConst(BasParserT *p) { + // CONST name = value + advance(p); // consume CONST + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "constant name"); + return; + } + + char name[BAS_MAX_TOKEN_LEN]; + strncpy(name, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + name[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + expect(p, TOK_EQ); + + // Parse the constant value (must be a literal) + bool isNeg = false; + if (check(p, TOK_MINUS)) { + isNeg = true; + advance(p); + } + + BasSymbolT *sym = NULL; + + if (check(p, TOK_INT_LIT) || check(p, TOK_LONG_LIT)) { + int32_t val = check(p, TOK_INT_LIT) ? p->lex.token.intVal : (int32_t)p->lex.token.longVal; + if (isNeg) { + val = -val; + } + sym = basSymTabAdd(&p->sym, name, SYM_CONST, BAS_TYPE_LONG); + if (sym != NULL) { + sym->constInt = val; + sym->isDefined = true; + sym->scope = SCOPE_GLOBAL; + } + advance(p); + } else if (check(p, TOK_FLOAT_LIT)) { + double val = p->lex.token.dblVal; + if (isNeg) { + val = -val; + } + sym = basSymTabAdd(&p->sym, name, SYM_CONST, BAS_TYPE_DOUBLE); + if (sym != NULL) { + sym->constDbl = val; + sym->isDefined = true; + sym->scope = SCOPE_GLOBAL; + } + advance(p); + } else if (check(p, TOK_STRING_LIT) && !isNeg) { + sym = basSymTabAdd(&p->sym, name, SYM_CONST, BAS_TYPE_STRING); + if (sym != NULL) { + strncpy(sym->constStr, p->lex.token.text, sizeof(sym->constStr) - 1); + sym->constStr[sizeof(sym->constStr) - 1] = '\0'; + sym->isDefined = true; + sym->scope = SCOPE_GLOBAL; + } + advance(p); + } else if (check(p, TOK_TRUE_KW) && !isNeg) { + sym = basSymTabAdd(&p->sym, name, SYM_CONST, BAS_TYPE_BOOLEAN); + if (sym != NULL) { + sym->constInt = -1; + sym->isDefined = true; + sym->scope = SCOPE_GLOBAL; + } + advance(p); + } else if (check(p, TOK_FALSE_KW) && !isNeg) { + sym = basSymTabAdd(&p->sym, name, SYM_CONST, BAS_TYPE_BOOLEAN); + if (sym != NULL) { + sym->constInt = 0; + sym->isDefined = true; + sym->scope = SCOPE_GLOBAL; + } + advance(p); + } else { + error(p, "Constant value must be a literal"); + } + + if (sym == NULL && !p->hasError) { + error(p, "Duplicate constant or symbol table full"); + } +} + + +static void parseData(BasParserT *p) { + // DATA val1, val2, "string", ... + // Collect all values into the data pool. No runtime code is emitted. + advance(p); // consume DATA + + for (;;) { + if (p->hasError) { + return; + } + + bool isNeg = false; + if (check(p, TOK_MINUS)) { + isNeg = true; + advance(p); + } + + if (check(p, TOK_INT_LIT)) { + int32_t val = p->lex.token.intVal; + if (isNeg) { + val = -val; + } + BasValueT v = basValInteger((int16_t)val); + basAddData(&p->cg, v); + advance(p); + } else if (check(p, TOK_LONG_LIT)) { + int32_t val = (int32_t)p->lex.token.longVal; + if (isNeg) { + val = -val; + } + BasValueT v = basValLong(val); + basAddData(&p->cg, v); + advance(p); + } else if (check(p, TOK_FLOAT_LIT)) { + double val = p->lex.token.dblVal; + if (isNeg) { + val = -val; + } + BasValueT v = basValDouble(val); + basAddData(&p->cg, v); + advance(p); + } else if (check(p, TOK_STRING_LIT) && !isNeg) { + BasValueT v = basValStringFromC(p->lex.token.text); + basAddData(&p->cg, v); + basValRelease(&v); + advance(p); + } else { + // Unquoted text -- read as string up to comma/newline/EOF + // In QB, unquoted DATA values are treated as strings + if (isNeg) { + // Negative sign without a number -- treat "-" as string data + BasValueT v = basValStringFromC("-"); + basAddData(&p->cg, v); + basValRelease(&v); + } else if (check(p, TOK_IDENT)) { + BasValueT v = basValStringFromC(p->lex.token.text); + basAddData(&p->cg, v); + basValRelease(&v); + advance(p); + } else { + error(p, "Expected DATA value"); + return; + } + } + + if (!match(p, TOK_COMMA)) { + break; + } + } +} + + +static void parseDeclare(BasParserT *p) { + // DECLARE SUB name(params) + // DECLARE FUNCTION name(params) AS type + advance(p); // consume DECLARE + + BasSymKindE kind; + + if (check(p, TOK_SUB)) { + kind = SYM_SUB; + advance(p); + } else if (check(p, TOK_FUNCTION)) { + kind = SYM_FUNCTION; + advance(p); + } else { + error(p, "Expected SUB or FUNCTION after DECLARE"); + return; + } + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "subroutine/function name"); + return; + } + + char name[BAS_MAX_TOKEN_LEN]; + strncpy(name, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + name[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + // Parse parameter list + int32_t paramCount = 0; + uint8_t paramTypes[BAS_MAX_PARAMS]; + bool paramByVal[BAS_MAX_PARAMS]; + + if (match(p, TOK_LPAREN)) { + while (!check(p, TOK_RPAREN) && !check(p, TOK_EOF) && !p->hasError) { + if (paramCount > 0) { + expect(p, TOK_COMMA); + } + + bool byVal = false; + + if (match(p, TOK_BYVAL)) { + byVal = true; + } + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "parameter name"); + return; + } + + char paramName[BAS_MAX_TOKEN_LEN]; + strncpy(paramName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + paramName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + uint8_t pdt = suffixToType(paramName); + + if (match(p, TOK_AS)) { + pdt = resolveTypeName(p); + } + + if (paramCount < BAS_MAX_PARAMS) { + paramTypes[paramCount] = pdt; + paramByVal[paramCount] = byVal; + } + + paramCount++; + } + + expect(p, TOK_RPAREN); + } + + // Return type for FUNCTION + uint8_t returnType = suffixToType(name); + + if (kind == SYM_FUNCTION && match(p, TOK_AS)) { + returnType = resolveTypeName(p); + } + + if (p->hasError) { + return; + } + + // Add to symbol table as forward declaration + BasSymbolT *sym = basSymTabAdd(&p->sym, name, kind, returnType); + + if (sym == NULL) { + // Might already be declared -- look it up + sym = basSymTabFind(&p->sym, name); + + if (sym == NULL) { + error(p, "Symbol table full"); + return; + } + } + + sym->scope = SCOPE_GLOBAL; + sym->isDefined = false; + sym->codeAddr = 0; + sym->paramCount = paramCount; + + for (int32_t i = 0; i < paramCount && i < BAS_MAX_PARAMS; i++) { + sym->paramTypes[i] = paramTypes[i]; + sym->paramByVal[i] = paramByVal[i]; + } +} + + +static void parseDef(BasParserT *p) { + // DEF FNname(params) = expression + advance(p); // consume DEF + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "function name (FNname)"); + return; + } + + char name[BAS_MAX_TOKEN_LEN]; + strncpy(name, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + name[BAS_MAX_TOKEN_LEN - 1] = '\0'; + + if ((name[0] != 'F' && name[0] != 'f') || (name[1] != 'N' && name[1] != 'n')) { + error(p, "DEF function name must start with FN"); + return; + } + + advance(p); + + int32_t skipJump = emitJump(p, OP_JMP); + int32_t funcAddr = basCodePos(&p->cg); + + basSymTabEnterLocal(&p->sym); + basSymTabAllocSlot(&p->sym); // slot 0 for return value + + int32_t paramCount = 0; + uint8_t paramTypes[BAS_MAX_PARAMS]; + bool paramByVal[BAS_MAX_PARAMS]; + + if (match(p, TOK_LPAREN)) { + while (!check(p, TOK_RPAREN) && !check(p, TOK_EOF) && !p->hasError) { + if (paramCount > 0) { + expect(p, TOK_COMMA); + } + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "parameter name"); + return; + } + + char paramName[BAS_MAX_TOKEN_LEN]; + strncpy(paramName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + paramName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + uint8_t pdt = suffixToType(paramName); + if (match(p, TOK_AS)) { + pdt = resolveTypeName(p); + } + + BasSymbolT *paramSym = basSymTabAdd(&p->sym, paramName, SYM_VARIABLE, pdt); + if (paramSym == NULL) { + error(p, "Symbol table full"); + return; + } + paramSym->scope = SCOPE_LOCAL; + paramSym->index = basSymTabAllocSlot(&p->sym); + paramSym->isDefined = true; + + if (paramCount < BAS_MAX_PARAMS) { + paramTypes[paramCount] = pdt; + paramByVal[paramCount] = true; + } + paramCount++; + } + expect(p, TOK_RPAREN); + } + + expect(p, TOK_EQ); + parseExpression(p); + basEmit8(&p->cg, OP_RET_VAL); + + basSymTabLeaveLocal(&p->sym); + + uint8_t returnType = suffixToType(name); + bool savedLocal = p->sym.inLocalScope; + p->sym.inLocalScope = false; + BasSymbolT *funcSym = basSymTabAdd(&p->sym, name, SYM_FUNCTION, returnType); + p->sym.inLocalScope = savedLocal; + + if (funcSym == NULL) { + error(p, "Could not register DEF function"); + return; + } + + funcSym->codeAddr = funcAddr; + funcSym->isDefined = true; + funcSym->paramCount = paramCount; + funcSym->scope = SCOPE_GLOBAL; + for (int32_t i = 0; i < paramCount && i < BAS_MAX_PARAMS; i++) { + funcSym->paramTypes[i] = paramTypes[i]; + funcSym->paramByVal[i] = paramByVal[i]; + } + + patchCallAddrs(p, funcSym); + patchJump(p, skipJump); +} + + +// ============================================================ +// parseDefType -- DEFINT, DEFLNG, DEFSNG, DEFDBL, DEFSTR +// ============================================================ +// +// Sets the default type for variables whose names start with +// letters in the given range. Example: DEFINT A-Z makes all +// untyped variables default to INTEGER. + +static void parseDefType(BasParserT *p, uint8_t dataType) { + advance(p); // consume DEFxxx keyword + + while (!p->hasError) { + if (!check(p, TOK_IDENT)) { + errorExpected(p, "letter or letter range"); + return; + } + + char startLetter = p->lex.token.text[0]; + + if (startLetter >= 'a' && startLetter <= 'z') { + startLetter -= 32; + } + + if (startLetter < 'A' || startLetter > 'Z') { + error(p, "Expected letter A-Z"); + return; + } + + advance(p); + + char endLetter = startLetter; + + if (match(p, TOK_MINUS)) { + if (!check(p, TOK_IDENT)) { + errorExpected(p, "letter after '-'"); + return; + } + + endLetter = p->lex.token.text[0]; + + if (endLetter >= 'a' && endLetter <= 'z') { + endLetter -= 32; + } + + if (endLetter < 'A' || endLetter > 'Z') { + error(p, "Expected letter A-Z"); + return; + } + + advance(p); + } + + // Set default type for the range + for (char c = startLetter; c <= endLetter; c++) { + p->defType[c - 'A'] = dataType; + } + + if (!match(p, TOK_COMMA)) { + break; + } + } +} + + +static void parseDimBounds(BasParserT *p, int32_t *outDims) { + // Parse each dimension bound, pushing (lbound, ubound) pairs onto the stack. + // Supports both "ubound" (lbound=optionBase) and "lbound TO ubound" syntax. + *outDims = 0; + + for (;;) { + // Save code position before parsing the first expression + int32_t exprStart = basCodePos(&p->cg); + parseExpression(p); + + if (match(p, TOK_TO)) { + // "lbound TO ubound" -- first expr is lbound, parse ubound next + parseExpression(p); + } else { + // Single value = ubound, lbound defaults to optionBase. + // Ubound expression already emitted. Insert PUSH_INT16 before it. + int32_t exprLen = basCodePos(&p->cg) - exprStart; + int32_t insertLen = 3; // OP_PUSH_INT16 + 2 bytes + + if (basCodePos(&p->cg) + insertLen <= BAS_MAX_CODE) { + memmove(&p->cg.code[exprStart + insertLen], &p->cg.code[exprStart], exprLen); + p->cg.code[exprStart] = OP_PUSH_INT16; + int16_t lbound = (int16_t)p->optionBase; + memcpy(&p->cg.code[exprStart + 1], &lbound, 2); + p->cg.codeLen += insertLen; + } + } + + (*outDims)++; + if (!match(p, TOK_COMMA)) { + break; + } + } +} + + +static void parseDim(BasParserT *p) { + // DIM [SHARED] var AS type + // DIM var(ubound) AS type + // DIM var(lbound TO ubound) AS type + // DIM var AS UdtType + advance(p); // consume DIM + + // Check for SHARED keyword + bool isShared = false; + if (check(p, TOK_SHARED)) { + isShared = true; + advance(p); + } + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "variable name"); + return; + } + + char name[BAS_MAX_TOKEN_LEN]; + strncpy(name, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + name[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + bool isArray = false; + int32_t dims = 0; + + // Check for array bounds + if (check(p, TOK_LPAREN)) { + isArray = true; + advance(p); + parseDimBounds(p, &dims); + expect(p, TOK_RPAREN); + } + + // Optional AS type + uint8_t dt = suffixToType(name); + int32_t udtTypeId = -1; + int32_t fixedLen = 0; + if (match(p, TOK_AS)) { + dt = resolveTypeName(p); + if (dt == BAS_TYPE_UDT) { + udtTypeId = p->lastUdtTypeId; + } + // Check for STRING * n (fixed-length string) + if (dt == BAS_TYPE_STRING && check(p, TOK_STAR)) { + advance(p); + if (check(p, TOK_INT_LIT)) { + fixedLen = p->lex.token.intVal; + advance(p); + } else { + error(p, "Expected integer after STRING *"); + } + } + } + + if (p->hasError) { + return; + } + + // Check for duplicate + BasSymbolT *existing = basSymTabFind(&p->sym, name); + if (existing != NULL && existing->isDefined) { + char buf[256]; + snprintf(buf, sizeof(buf), "Variable '%s' already declared", name); + error(p, buf); + return; + } + + BasSymbolT *sym = basSymTabAdd(&p->sym, name, SYM_VARIABLE, dt); + if (sym == NULL) { + error(p, "Symbol table full or duplicate name"); + return; + } + sym->index = basSymTabAllocSlot(&p->sym); + sym->isDefined = true; + sym->isArray = isArray; + sym->isShared = isShared; + sym->udtTypeId = udtTypeId; + sym->fixedLen = fixedLen; + + if (p->sym.inLocalScope) { + sym->scope = SCOPE_LOCAL; + } else { + sym->scope = SCOPE_GLOBAL; + } + + if (isArray) { + // Emit array dimension instruction + basEmit8(&p->cg, OP_DIM_ARRAY); + basEmit8(&p->cg, (uint8_t)dims); + basEmit8(&p->cg, dt); + emitStore(p, sym); + } else if (dt == BAS_TYPE_UDT && udtTypeId >= 0) { + // Allocate a UDT instance + BasSymbolT *typeSym = NULL; + for (int32_t i = 0; i < p->sym.count; i++) { + if (p->sym.symbols[i].kind == SYM_TYPE_DEF && p->sym.symbols[i].index == udtTypeId) { + typeSym = &p->sym.symbols[i]; + break; + } + } + if (typeSym != NULL) { + basEmit8(&p->cg, OP_PUSH_INT16); + basEmit16(&p->cg, (int16_t)udtTypeId); + basEmit8(&p->cg, OP_PUSH_INT16); + basEmit16(&p->cg, (int16_t)typeSym->fieldCount); + // OP_DIM_ARRAY with dims=0 signals UDT allocation + basEmit8(&p->cg, OP_DIM_ARRAY); + basEmit8(&p->cg, 0); + basEmit8(&p->cg, BAS_TYPE_UDT); + emitStore(p, sym); + } + } +} + + +static void parseDo(BasParserT *p) { + // DO [WHILE|UNTIL cond] + // ... + // LOOP [WHILE|UNTIL cond] + advance(p); // consume DO + + ExitListT savedExitDo = exitDoList; + exitListInit(&exitDoList); + + int32_t loopTop = basCodePos(&p->cg); + + bool hasPreCondition = false; + int32_t preCondJump = 0; + + // DO WHILE cond / DO UNTIL cond + if (check(p, TOK_WHILE)) { + hasPreCondition = true; + advance(p); + parseExpression(p); + preCondJump = emitJump(p, OP_JMP_FALSE); + } else if (check(p, TOK_UNTIL)) { + hasPreCondition = true; + advance(p); + parseExpression(p); + preCondJump = emitJump(p, OP_JMP_TRUE); + } + + expectEndOfStatement(p); + skipNewlines(p); + + // Loop body + while (!p->hasError && !check(p, TOK_LOOP) && !check(p, TOK_EOF)) { + parseStatement(p); + skipNewlines(p); + } + + if (p->hasError) { + return; + } + + expect(p, TOK_LOOP); + + // LOOP WHILE cond / LOOP UNTIL cond + if (check(p, TOK_WHILE)) { + advance(p); + parseExpression(p); + // Jump back to loopTop if condition is true + basEmit8(&p->cg, OP_JMP_TRUE); + int16_t backOffset = (int16_t)(loopTop - (basCodePos(&p->cg) + 2)); + basEmit16(&p->cg, backOffset); + } else if (check(p, TOK_UNTIL)) { + advance(p); + parseExpression(p); + // Jump back to loopTop if condition is false + basEmit8(&p->cg, OP_JMP_FALSE); + int16_t backOffset = (int16_t)(loopTop - (basCodePos(&p->cg) + 2)); + basEmit16(&p->cg, backOffset); + } else { + // Plain LOOP -- unconditional jump back + basEmit8(&p->cg, OP_JMP); + int16_t backOffset = (int16_t)(loopTop - (basCodePos(&p->cg) + 2)); + basEmit16(&p->cg, backOffset); + } + + // Backpatch pre-condition jump (exits the loop) + if (hasPreCondition) { + patchJump(p, preCondJump); + } + + // Patch all EXIT DO jumps to here + exitListPatch(&exitDoList, p); + exitDoList = savedExitDo; +} + + +static void parseEnd(BasParserT *p) { + // END -- by itself = halt + // END IF / END SUB / END FUNCTION / END SELECT are handled by their parsers + advance(p); // consume END + basEmit8(&p->cg, OP_HALT); +} + + +static void parseErase(BasParserT *p) { + // ERASE arrayVar + advance(p); // consume ERASE + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "array variable name"); + return; + } + + char name[BAS_MAX_TOKEN_LEN]; + strncpy(name, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + name[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + BasSymbolT *sym = basSymTabFind(&p->sym, name); + if (sym == NULL || !sym->isArray) { + error(p, "ERASE requires an array variable"); + return; + } + + emitLoad(p, sym); + basEmit8(&p->cg, OP_ERASE); + emitStore(p, sym); +} + + +static void parseExit(BasParserT *p) { + advance(p); // consume EXIT + + if (check(p, TOK_FOR)) { + advance(p); + int32_t addr = emitJump(p, OP_JMP); + exitListAdd(&exitForList, addr); + } else if (check(p, TOK_DO)) { + advance(p); + int32_t addr = emitJump(p, OP_JMP); + exitListAdd(&exitDoList, addr); + } else if (check(p, TOK_SUB)) { + advance(p); + int32_t addr = emitJump(p, OP_JMP); + exitListAdd(&exitSubList, addr); + } else if (check(p, TOK_FUNCTION)) { + advance(p); + int32_t addr = emitJump(p, OP_JMP); + exitListAdd(&exitFuncList, addr); + } else { + error(p, "Expected FOR, DO, SUB, or FUNCTION after EXIT"); + } +} + + +static void parseFor(BasParserT *p) { + // FOR var = start TO limit [STEP step] + // ... + // NEXT [var] + advance(p); // consume FOR + + ExitListT savedExitFor = exitForList; + exitListInit(&exitForList); + + // Loop variable + if (!check(p, TOK_IDENT)) { + errorExpected(p, "loop variable"); + return; + } + + char varName[BAS_MAX_TOKEN_LEN]; + strncpy(varName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + varName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + BasSymbolT *loopVar = ensureVariable(p, varName); + if (loopVar == NULL) { + return; + } + + // = start + expect(p, TOK_EQ); + parseExpression(p); + emitStore(p, loopVar); + + // TO limit + expect(p, TOK_TO); + parseExpression(p); // limit is on stack + + // STEP step (optional, default 1) + if (match(p, TOK_STEP)) { + parseExpression(p); // step is on stack + } else { + // Default step = 1 + basEmit8(&p->cg, OP_PUSH_INT16); + basEmit16(&p->cg, 1); + } + + // Emit FOR_INIT -- sets up the for-loop state in the VM + basEmit8(&p->cg, OP_FOR_INIT); + basEmitU16(&p->cg, (uint16_t)loopVar->index); + basEmit8(&p->cg, loopVar->scope == SCOPE_LOCAL ? 1 : 0); + + int32_t loopBody = basCodePos(&p->cg); + + expectEndOfStatement(p); + skipNewlines(p); + + // Loop body + while (!p->hasError && !check(p, TOK_NEXT) && !check(p, TOK_EOF)) { + parseStatement(p); + skipNewlines(p); + } + + if (p->hasError) { + return; + } + + expect(p, TOK_NEXT); + + // Optional variable name after NEXT (we just skip it) + if (check(p, TOK_IDENT)) { + advance(p); + } + + // Emit FOR_NEXT with backward jump to loop body + basEmit8(&p->cg, OP_FOR_NEXT); + basEmitU16(&p->cg, (uint16_t)loopVar->index); + basEmit8(&p->cg, loopVar->scope == SCOPE_LOCAL ? 1 : 0); + int16_t backOffset = (int16_t)(loopBody - (basCodePos(&p->cg) + 2)); + basEmit16(&p->cg, backOffset); + + // Patch all EXIT FOR jumps to here + exitListPatch(&exitForList, p); + exitForList = savedExitFor; +} + + +static void parseFunction(BasParserT *p) { + // FUNCTION name(params) AS type + // ... + // END FUNCTION + advance(p); // consume FUNCTION + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "function name"); + return; + } + + char name[BAS_MAX_TOKEN_LEN]; + strncpy(name, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + name[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + // Save current proc name for STATIC variable mangling + strncpy(p->currentProc, name, BAS_MAX_TOKEN_LEN - 1); + p->currentProc[BAS_MAX_TOKEN_LEN - 1] = '\0'; + + // Jump over the function body in module-level code + int32_t skipJump = emitJump(p, OP_JMP); + + int32_t funcAddr = basCodePos(&p->cg); + + // Enter local scope + basSymTabEnterLocal(&p->sym); + + ExitListT savedExitFunc = exitFuncList; + exitListInit(&exitFuncList); + + // Allocate slot 0 for return value + basSymTabAllocSlot(&p->sym); + + // Parse parameter list + int32_t paramCount = 0; + uint8_t paramTypes[BAS_MAX_PARAMS]; + bool paramByVal[BAS_MAX_PARAMS]; + + if (match(p, TOK_LPAREN)) { + while (!check(p, TOK_RPAREN) && !check(p, TOK_EOF) && !p->hasError) { + if (paramCount > 0) { + expect(p, TOK_COMMA); + } + + bool byVal = false; + if (match(p, TOK_BYVAL)) { + byVal = true; + } + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "parameter name"); + return; + } + + char paramName[BAS_MAX_TOKEN_LEN]; + strncpy(paramName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + paramName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + uint8_t pdt = suffixToType(paramName); + if (match(p, TOK_AS)) { + pdt = resolveTypeName(p); + } + + BasSymbolT *paramSym = basSymTabAdd(&p->sym, paramName, SYM_VARIABLE, pdt); + if (paramSym == NULL) { + error(p, "Symbol table full"); + return; + } + paramSym->scope = SCOPE_LOCAL; + paramSym->index = basSymTabAllocSlot(&p->sym); + paramSym->isDefined = true; + + if (paramCount < BAS_MAX_PARAMS) { + paramTypes[paramCount] = pdt; + paramByVal[paramCount] = byVal; + } + paramCount++; + } + expect(p, TOK_RPAREN); + } + + // Return type + uint8_t returnType = suffixToType(name); + if (match(p, TOK_AS)) { + returnType = resolveTypeName(p); + } + + // Register the function in the symbol table (global scope entry) + // We need to temporarily leave local scope to add to global + BasSymbolT *existing = basSymTabFindGlobal(&p->sym, name); + BasSymbolT *funcSym = NULL; + + if (existing != NULL && existing->kind == SYM_FUNCTION) { + // Forward-declared, now define it + funcSym = existing; + } else { + // Temporarily store the local state, add globally + bool savedLocal = p->sym.inLocalScope; + p->sym.inLocalScope = false; + funcSym = basSymTabAdd(&p->sym, name, SYM_FUNCTION, returnType); + p->sym.inLocalScope = savedLocal; + } + + if (funcSym == NULL) { + error(p, "Could not register function"); + return; + } + + funcSym->codeAddr = funcAddr; + funcSym->isDefined = true; + funcSym->paramCount = paramCount; + funcSym->scope = SCOPE_GLOBAL; + for (int32_t i = 0; i < paramCount && i < BAS_MAX_PARAMS; i++) { + funcSym->paramTypes[i] = paramTypes[i]; + funcSym->paramByVal[i] = paramByVal[i]; + } + + // Backpatch any forward-reference calls to this function + patchCallAddrs(p, funcSym); + + expectEndOfStatement(p); + skipNewlines(p); + + // Parse function body + while (!p->hasError && !check(p, TOK_EOF)) { + // Check for END FUNCTION + if (check(p, TOK_END)) { + // Peek ahead -- we need to see if it's END FUNCTION + BasLexerT savedLex = p->lex; + advance(p); + if (check(p, TOK_FUNCTION)) { + advance(p); + break; + } + // Not END FUNCTION, restore and parse as statement + p->lex = savedLex; + } + parseStatement(p); + skipNewlines(p); + } + + // Patch EXIT FUNCTION jumps + exitListPatch(&exitFuncList, p); + exitFuncList = savedExitFunc; + + // Load return value from slot 0 and return + basEmit8(&p->cg, OP_LOAD_LOCAL); + basEmitU16(&p->cg, 0); + basEmit8(&p->cg, OP_RET_VAL); + + // Leave local scope + basSymTabLeaveLocal(&p->sym); + p->currentProc[0] = '\0'; + + // Patch the skip jump + patchJump(p, skipJump); +} + + +static void parseGosub(BasParserT *p) { + // GOSUB label -- push return PC, then JMP to label + advance(p); // consume GOSUB + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "label name"); + return; + } + + char labelName[BAS_MAX_TOKEN_LEN]; + strncpy(labelName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + labelName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + // Push the return PC (address after the JMP instruction) + // OP_PUSH_INT32 = 1 + 4 bytes, OP_JMP = 1 + 2 bytes + int32_t pushPos = basCodePos(&p->cg); + basEmit8(&p->cg, OP_PUSH_INT32); + basEmit16(&p->cg, 0); // placeholder lo + basEmit16(&p->cg, 0); // placeholder hi + + // Emit the jump to the label + emitJumpToLabel(p, OP_JMP, labelName); + + // Backpatch the return address (PC is now right after the JMP) + int32_t returnPc = basCodePos(&p->cg); + int16_t lo = (int16_t)(returnPc & 0xFFFF); + int16_t hi = (int16_t)((returnPc >> 16) & 0xFFFF); + basPatch16(&p->cg, pushPos + 1, lo); + basPatch16(&p->cg, pushPos + 3, hi); +} + + +static void parseGoto(BasParserT *p) { + // GOTO label + advance(p); // consume GOTO + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "label name"); + return; + } + + char labelName[BAS_MAX_TOKEN_LEN]; + strncpy(labelName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + labelName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + emitJumpToLabel(p, OP_JMP, labelName); +} + + +static void parseIf(BasParserT *p) { + // IF expr THEN + // ... + // [ELSEIF expr THEN] + // ... + // [ELSE] + // ... + // END IF + advance(p); // consume IF + + parseExpression(p); + + expect(p, TOK_THEN); + if (p->hasError) { + return; + } + + // Check for single-line IF: IF cond THEN stmt + if (!check(p, TOK_NEWLINE) && !check(p, TOK_EOF)) { + // Single-line IF + int32_t falseJump = emitJump(p, OP_JMP_FALSE); + + parseStatement(p); + + if (check(p, TOK_ELSE)) { + advance(p); + int32_t endJump = emitJump(p, OP_JMP); + patchJump(p, falseJump); + parseStatement(p); + patchJump(p, endJump); + } else { + patchJump(p, falseJump); + } + return; + } + + // Multi-line IF + expectEndOfStatement(p); + skipNewlines(p); + + int32_t falseJump = emitJump(p, OP_JMP_FALSE); + + // Collect end-of-chain jumps for backpatching + int32_t endJumps[MAX_EXITS]; + int32_t endJumpCount = 0; + + // Parse THEN block + while (!p->hasError && !check(p, TOK_ELSEIF) && !check(p, TOK_ELSE) && !check(p, TOK_EOF)) { + // Check for END IF + if (check(p, TOK_END)) { + BasLexerT savedLex = p->lex; + advance(p); + if (check(p, TOK_IF)) { + advance(p); + patchJump(p, falseJump); + // Patch all end jumps + for (int32_t i = 0; i < endJumpCount; i++) { + patchJump(p, endJumps[i]); + } + return; + } + p->lex = savedLex; + } + parseStatement(p); + skipNewlines(p); + } + + // ELSEIF chain + while (!p->hasError && check(p, TOK_ELSEIF)) { + // Jump from previous true-block to end of chain + if (endJumpCount < MAX_EXITS) { + endJumps[endJumpCount++] = emitJump(p, OP_JMP); + } + + // Patch the previous false jump to here + patchJump(p, falseJump); + + advance(p); // consume ELSEIF + parseExpression(p); + expect(p, TOK_THEN); + + falseJump = emitJump(p, OP_JMP_FALSE); + + expectEndOfStatement(p); + skipNewlines(p); + + while (!p->hasError && !check(p, TOK_ELSEIF) && !check(p, TOK_ELSE) && !check(p, TOK_EOF)) { + if (check(p, TOK_END)) { + BasLexerT savedLex = p->lex; + advance(p); + if (check(p, TOK_IF)) { + advance(p); + patchJump(p, falseJump); + for (int32_t i = 0; i < endJumpCount; i++) { + patchJump(p, endJumps[i]); + } + return; + } + p->lex = savedLex; + } + parseStatement(p); + skipNewlines(p); + } + } + + // ELSE block + if (!p->hasError && check(p, TOK_ELSE)) { + if (endJumpCount < MAX_EXITS) { + endJumps[endJumpCount++] = emitJump(p, OP_JMP); + } + patchJump(p, falseJump); + falseJump = -1; // no more false jump needed + + advance(p); // consume ELSE + expectEndOfStatement(p); + skipNewlines(p); + + while (!p->hasError && !check(p, TOK_EOF)) { + if (check(p, TOK_END)) { + BasLexerT savedLex = p->lex; + advance(p); + if (check(p, TOK_IF)) { + advance(p); + for (int32_t i = 0; i < endJumpCount; i++) { + patchJump(p, endJumps[i]); + } + return; + } + p->lex = savedLex; + } + parseStatement(p); + skipNewlines(p); + } + } + + // Patch the last false jump if no ELSE block + if (falseJump >= 0) { + patchJump(p, falseJump); + } + + // Patch all end-of-chain jumps + for (int32_t i = 0; i < endJumpCount; i++) { + patchJump(p, endJumps[i]); + } + + // If we got here without END IF, that's an error + if (!p->hasError) { + error(p, "Expected END IF"); + } +} + + +static void parseInput(BasParserT *p) { + // INPUT #channel, var + // INPUT [prompt;] var + advance(p); // consume INPUT + + // Check for file I/O: INPUT #channel, var + if (check(p, TOK_HASH)) { + advance(p); // consume # + + // Channel number + parseExpression(p); + + // Comma separator + expect(p, TOK_COMMA); + + // Target variable + if (!check(p, TOK_IDENT)) { + errorExpected(p, "variable name"); + return; + } + + char varName[BAS_MAX_TOKEN_LEN]; + strncpy(varName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + varName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + basEmit8(&p->cg, OP_FILE_INPUT); + + BasSymbolT *sym = ensureVariable(p, varName); + + if (sym != NULL) { + // If the variable is numeric, convert the input string + if (sym->dataType != BAS_TYPE_STRING) { + if (sym->dataType == BAS_TYPE_INTEGER || sym->dataType == BAS_TYPE_LONG) { + basEmit8(&p->cg, OP_CONV_STR_INT); + } else { + basEmit8(&p->cg, OP_CONV_STR_FLT); + } + } + + emitStore(p, sym); + } + + return; + } + + // Check for optional prompt string + if (check(p, TOK_STRING_LIT)) { + uint16_t idx = basAddConstant(&p->cg, p->lex.token.text, p->lex.token.textLen); + basEmit8(&p->cg, OP_PUSH_STR); + basEmitU16(&p->cg, idx); + advance(p); + // Semicolon after prompt + if (match(p, TOK_SEMICOLON)) { + // nothing extra + } else if (match(p, TOK_COMMA)) { + // comma -- no question mark (just prompt) + } + } else { + // No prompt -- push empty string + uint16_t idx = basAddConstant(&p->cg, "", 0); + basEmit8(&p->cg, OP_PUSH_STR); + basEmitU16(&p->cg, idx); + } + + // Emit INPUT opcode -- pops prompt, pushes input string + basEmit8(&p->cg, OP_INPUT); + + // Target variable + if (!check(p, TOK_IDENT)) { + errorExpected(p, "variable name"); + return; + } + + char varName[BAS_MAX_TOKEN_LEN]; + strncpy(varName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + varName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + BasSymbolT *sym = ensureVariable(p, varName); + if (sym == NULL) { + return; + } + + // If the variable is numeric, we need to convert the input string + if (sym->dataType != BAS_TYPE_STRING) { + if (sym->dataType == BAS_TYPE_INTEGER || sym->dataType == BAS_TYPE_LONG) { + basEmit8(&p->cg, OP_CONV_STR_INT); + } else { + basEmit8(&p->cg, OP_CONV_STR_FLT); + } + } + + emitStore(p, sym); +} + + +static void parseLineInput(BasParserT *p) { + // LINE INPUT #channel, var + advance(p); // consume LINE + + if (!check(p, TOK_INPUT)) { + error(p, "Expected INPUT after LINE"); + return; + } + + advance(p); // consume INPUT + + // Must have # for file I/O + if (!match(p, TOK_HASH)) { + error(p, "Expected # for file channel in LINE INPUT"); + return; + } + + // Channel expression + parseExpression(p); + + // Comma separator + expect(p, TOK_COMMA); + + // Target variable + if (!check(p, TOK_IDENT)) { + errorExpected(p, "variable name"); + return; + } + + char varName[BAS_MAX_TOKEN_LEN]; + strncpy(varName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + varName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + basEmit8(&p->cg, OP_FILE_LINE_INPUT); + + BasSymbolT *sym = ensureVariable(p, varName); + + if (sym != NULL) { + emitStore(p, sym); + } +} + + +static void parseModule(BasParserT *p) { + skipNewlines(p); + + while (!p->hasError && !check(p, TOK_EOF)) { + parseStatement(p); + skipNewlines(p); + } + + // End of module -- emit HALT + basEmit8(&p->cg, OP_HALT); +} + + +#define MAX_ON_LABELS 32 + +static void parseOn(BasParserT *p) { + // ON ERROR GOTO label -- error handler + // ON expr GOTO label1, label2, ... -- computed goto + // ON expr GOSUB label1, label2, ... -- computed gosub + advance(p); // consume ON + + // ON ERROR GOTO is a special form + if (check(p, TOK_ERROR_KW)) { + parseOnError(p); + return; + } + + // ON expr GOTO/GOSUB label1, label2, ... + parseExpression(p); + + bool isGosub; + + if (check(p, TOK_GOTO)) { + isGosub = false; + advance(p); + } else if (check(p, TOK_GOSUB)) { + isGosub = true; + advance(p); + } else { + error(p, "Expected GOTO or GOSUB after ON expression"); + return; + } + + // Track end-of-gosub jumps for patching + int32_t endJumps[MAX_ON_LABELS]; + int32_t endJumpCount = 0; + int32_t labelIdx = 1; + + for (;;) { + if (p->hasError) { + return; + } + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "label name"); + return; + } + + char labelName[BAS_MAX_TOKEN_LEN]; + strncpy(labelName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + labelName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + // DUP the selector + basEmit8(&p->cg, OP_DUP); + + // PUSH the 1-based index + basEmit8(&p->cg, OP_PUSH_INT16); + basEmit16(&p->cg, (int16_t)labelIdx); + + // Compare + basEmit8(&p->cg, OP_CMP_EQ); + + // JMP_FALSE to skip this branch + int32_t skipAddr = emitJump(p, OP_JMP_FALSE); + + // Match: POP the selector value + basEmit8(&p->cg, OP_POP); + + if (isGosub) { + // Push return PC before jumping + int32_t pushPos = basCodePos(&p->cg); + basEmit8(&p->cg, OP_PUSH_INT32); + basEmit16(&p->cg, 0); // placeholder lo + basEmit16(&p->cg, 0); // placeholder hi + + emitJumpToLabel(p, OP_JMP, labelName); + + // Backpatch the return address + int32_t returnPc = basCodePos(&p->cg); + int16_t lo = (int16_t)(returnPc & 0xFFFF); + int16_t hi = (int16_t)((returnPc >> 16) & 0xFFFF); + basPatch16(&p->cg, pushPos + 1, lo); + basPatch16(&p->cg, pushPos + 3, hi); + + // After GOSUB returns, jump to end of ON...GOSUB + if (endJumpCount < MAX_ON_LABELS) { + endJumps[endJumpCount++] = emitJump(p, OP_JMP); + } + } else { + // GOTO: just jump to the label + emitJumpToLabel(p, OP_JMP, labelName); + } + + // Patch the skip (no-match continues to next branch) + patchJump(p, skipAddr); + + labelIdx++; + + if (!match(p, TOK_COMMA)) { + break; + } + } + + // No match: POP the selector and fall through + basEmit8(&p->cg, OP_POP); + + // Patch all end-of-gosub jumps to here + int32_t endTarget = basCodePos(&p->cg); + + for (int32_t i = 0; i < endJumpCount; i++) { + int16_t offset = (int16_t)(endTarget - (endJumps[i] + 2)); + basPatch16(&p->cg, endJumps[i], offset); + } +} + + +static void parseOnError(BasParserT *p) { + // ON ERROR GOTO label + // ON ERROR GOTO 0 (disable) + // Note: ON and ERROR already consumed by parseOn dispatcher + advance(p); // consume ERROR + + if (!check(p, TOK_GOTO)) { + error(p, "Expected GOTO after ON ERROR"); + return; + } + advance(p); // consume GOTO + + // ON ERROR GOTO 0 -- disable error handler + if (check(p, TOK_INT_LIT) && p->lex.token.intVal == 0) { + advance(p); + basEmit8(&p->cg, OP_ON_ERROR); + basEmit16(&p->cg, 0); + return; + } + + // ON ERROR GOTO label + if (!check(p, TOK_IDENT)) { + errorExpected(p, "label name or 0"); + return; + } + + char labelName[BAS_MAX_TOKEN_LEN]; + strncpy(labelName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + labelName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + // Look up the label + BasSymbolT *sym = basSymTabFind(&p->sym, labelName); + + if (sym != NULL && sym->kind == SYM_LABEL && sym->isDefined) { + // Label already defined -- emit ON_ERROR with offset to handler + basEmit8(&p->cg, OP_ON_ERROR); + int32_t here = basCodePos(&p->cg); + int16_t offset = (int16_t)(sym->codeAddr - (here + 2)); + basEmit16(&p->cg, offset); + } else { + // Forward reference + if (sym == NULL) { + sym = basSymTabAdd(&p->sym, labelName, SYM_LABEL, 0); + if (sym == NULL) { + error(p, "Symbol table full"); + return; + } + sym->scope = SCOPE_GLOBAL; + sym->isDefined = false; + sym->codeAddr = 0; + } + + basEmit8(&p->cg, OP_ON_ERROR); + int32_t patchAddr = basCodePos(&p->cg); + basEmit16(&p->cg, 0); + + if (sym->patchCount < BAS_MAX_CALL_PATCHES) { + sym->patchAddrs[sym->patchCount++] = patchAddr; + } + } +} + + +static void parseOpen(BasParserT *p) { + // OPEN filename FOR mode AS #channel + advance(p); // consume OPEN + + // Filename expression + parseExpression(p); + + // FOR keyword + expect(p, TOK_FOR); + + // Mode: INPUT, OUTPUT, APPEND + uint8_t mode; + + if (check(p, TOK_INPUT)) { + mode = 1; // INPUT + advance(p); + } else if (check(p, TOK_OUTPUT)) { + mode = 2; // OUTPUT + advance(p); + } else if (check(p, TOK_APPEND)) { + mode = 3; // APPEND + advance(p); + } else if (check(p, TOK_RANDOM)) { + mode = 4; // RANDOM + advance(p); + } else if (check(p, TOK_BINARY)) { + mode = 5; // BINARY + advance(p); + } else { + error(p, "Expected INPUT, OUTPUT, APPEND, RANDOM, or BINARY after FOR"); + return; + } + + // AS keyword + expect(p, TOK_AS); + + // Optional # prefix + match(p, TOK_HASH); + + // Channel number expression + parseExpression(p); + + // Optional LEN = recordsize (for RANDOM mode) + if (checkKeyword(p, "LEN")) { + advance(p); // consume LEN + expect(p, TOK_EQ); + // For now we just parse and discard -- record length is not + // enforced at the VM level (GET/PUT use variable type size) + parseExpression(p); + basEmit8(&p->cg, OP_POP); + } + + // Emit: stack has [filename, channel] -- OP_FILE_OPEN reads mode byte + basEmit8(&p->cg, OP_FILE_OPEN); + basEmit8(&p->cg, mode); +} + + +static void parseOption(BasParserT *p) { + // OPTION BASE 0 | OPTION BASE 1 + // OPTION COMPARE BINARY | OPTION COMPARE TEXT + advance(p); // consume OPTION + + if (check(p, TOK_BASE)) { + advance(p); // consume BASE + + if (!check(p, TOK_INT_LIT)) { + error(p, "Expected 0 or 1 after OPTION BASE"); + return; + } + + int32_t base = p->lex.token.intVal; + + if (base != 0 && base != 1) { + error(p, "OPTION BASE must be 0 or 1"); + return; + } + + p->optionBase = base; + advance(p); + return; + } + + if (checkKeyword(p, "COMPARE")) { + advance(p); // consume COMPARE + + if (check(p, TOK_BINARY)) { + p->optionCompareText = false; + advance(p); + basEmit8(&p->cg, OP_COMPARE_MODE); + basEmit8(&p->cg, 0); + } else if (checkKeyword(p, "TEXT")) { + p->optionCompareText = true; + advance(p); + basEmit8(&p->cg, OP_COMPARE_MODE); + basEmit8(&p->cg, 1); + } else { + error(p, "Expected BINARY or TEXT after OPTION COMPARE"); + } + + return; + } + + if (check(p, TOK_EXPLICIT)) { + advance(p); + p->optionExplicit = true; + return; + } + + error(p, "Expected BASE, COMPARE, or EXPLICIT after OPTION"); +} + + +static void parsePrint(BasParserT *p) { + // PRINT [#channel, expr] + // PRINT [expr] [; expr] [, expr] [;] + // PRINT USING "fmt"; expr [; expr] ... + advance(p); // consume PRINT + + // Check for file I/O: PRINT #channel, expr + if (check(p, TOK_HASH)) { + advance(p); // consume # + + // Channel number + parseExpression(p); + + // Comma separator + expect(p, TOK_COMMA); + + // Value to print + parseExpression(p); + + basEmit8(&p->cg, OP_FILE_PRINT); + return; + } + + // Check for PRINT USING + if (checkKeyword(p, "USING")) { + advance(p); // consume USING + + // Parse format string expression + parseExpression(p); + + // Semicolon separates format from values + expect(p, TOK_SEMICOLON); + + // Parse values, each one gets formatted with PRINT_USING + for (;;) { + parseExpression(p); + basEmit8(&p->cg, OP_PRINT_USING); + basEmit8(&p->cg, OP_PRINT); + + if (check(p, TOK_SEMICOLON)) { + advance(p); + if (check(p, TOK_NEWLINE) || check(p, TOK_EOF) || check(p, TOK_COLON) || check(p, TOK_ELSE)) { + break; + } + continue; + } + + break; + } + + basEmit8(&p->cg, OP_PRINT_NL); + return; + } + + bool trailingSemicolon = false; + + // Empty PRINT = just newline + if (check(p, TOK_NEWLINE) || check(p, TOK_EOF) || check(p, TOK_COLON) || check(p, TOK_ELSE)) { + basEmit8(&p->cg, OP_PRINT_NL); + return; + } + + while (!p->hasError) { + trailingSemicolon = false; + + if (check(p, TOK_SEMICOLON)) { + // Just a semicolon -- no space + trailingSemicolon = true; + advance(p); + if (check(p, TOK_NEWLINE) || check(p, TOK_EOF) || check(p, TOK_COLON) || check(p, TOK_ELSE)) { + break; + } + continue; + } + + if (check(p, TOK_COMMA)) { + // Comma -- print tab + basEmit8(&p->cg, OP_PRINT_TAB); + advance(p); + if (check(p, TOK_NEWLINE) || check(p, TOK_EOF) || check(p, TOK_COLON) || check(p, TOK_ELSE)) { + trailingSemicolon = true; // comma at end suppresses newline too + break; + } + continue; + } + + if (check(p, TOK_NEWLINE) || check(p, TOK_EOF) || check(p, TOK_COLON) || check(p, TOK_ELSE)) { + break; + } + + // Check for SPC(n) and TAB(n) inside PRINT + if (checkKeyword(p, "SPC")) { + advance(p); + expect(p, TOK_LPAREN); + parseExpression(p); + expect(p, TOK_RPAREN); + basEmit8(&p->cg, OP_PRINT_SPC_N); + continue; + } + + if (checkKeyword(p, "TAB")) { + advance(p); + expect(p, TOK_LPAREN); + parseExpression(p); + expect(p, TOK_RPAREN); + basEmit8(&p->cg, OP_PRINT_TAB_N); + continue; + } + + // Expression + parseExpression(p); + basEmit8(&p->cg, OP_PRINT); + } + + // Print newline unless suppressed by trailing semicolon/comma + if (!trailingSemicolon) { + basEmit8(&p->cg, OP_PRINT_NL); + } +} + + +static void parseRead(BasParserT *p) { + // READ var1, var2, ... + advance(p); // consume READ + + for (;;) { + if (p->hasError) { + return; + } + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "variable name"); + return; + } + + char name[BAS_MAX_TOKEN_LEN]; + strncpy(name, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + name[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + BasSymbolT *sym = ensureVariable(p, name); + if (sym == NULL) { + return; + } + + basEmit8(&p->cg, OP_READ_DATA); + emitStore(p, sym); + + if (!match(p, TOK_COMMA)) { + break; + } + } +} + + +static void parseRedim(BasParserT *p) { + // REDIM [PRESERVE] var(bounds) AS type + advance(p); // consume REDIM + + uint8_t preserve = 0; + if (check(p, TOK_PRESERVE)) { + preserve = 1; + advance(p); + } + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "array variable name"); + return; + } + + char name[BAS_MAX_TOKEN_LEN]; + strncpy(name, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + name[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + BasSymbolT *sym = basSymTabFind(&p->sym, name); + if (sym == NULL) { + sym = ensureVariable(p, name); + } + if (sym == NULL) { + return; + } + sym->isArray = true; + + // Load the old array reference + emitLoad(p, sym); + + // Parse new bounds + int32_t dims = 0; + expect(p, TOK_LPAREN); + parseDimBounds(p, &dims); + expect(p, TOK_RPAREN); + + // Optional AS type + if (match(p, TOK_AS)) { + resolveTypeName(p); + } + + if (p->hasError) { + return; + } + + basEmit8(&p->cg, OP_REDIM); + basEmit8(&p->cg, (uint8_t)dims); + basEmit8(&p->cg, preserve); + emitStore(p, sym); +} + + +static void parseRestore(BasParserT *p) { + // RESTORE -- reset the DATA read pointer to the beginning + advance(p); // consume RESTORE + basEmit8(&p->cg, OP_RESTORE); +} + + +static void parseResume(BasParserT *p) { + // RESUME -- re-execute the statement that caused the error + // RESUME NEXT -- continue at the next statement after the error + advance(p); // consume RESUME + + if (check(p, TOK_NEXT)) { + advance(p); + basEmit8(&p->cg, OP_RESUME_NEXT); + } else { + basEmit8(&p->cg, OP_RESUME); + } +} + + +static void parseSelectCase(BasParserT *p) { + // SELECT CASE expr + // CASE val [, val] ... + // ... + // [CASE ELSE] + // ... + // END SELECT + advance(p); // consume SELECT + expect(p, TOK_CASE); + + // Evaluate the test expression -- stays on stack throughout + parseExpression(p); + + expectEndOfStatement(p); + skipNewlines(p); + + int32_t endJumps[MAX_EXITS]; + int32_t endJumpCount = 0; + + while (!p->hasError && !check(p, TOK_EOF)) { + // Check for END SELECT + if (check(p, TOK_END)) { + BasLexerT savedLex = p->lex; + advance(p); + if (check(p, TOK_SELECT)) { + advance(p); + basEmit8(&p->cg, OP_POP); // pop test expression + for (int32_t i = 0; i < endJumpCount; i++) { + patchJump(p, endJumps[i]); + } + return; + } + p->lex = savedLex; + } + + if (!check(p, TOK_CASE)) { + error(p, "Expected CASE or END SELECT"); + return; + } + + advance(p); // consume CASE + + // CASE ELSE -- always matches, no comparison needed + if (check(p, TOK_ELSE)) { + advance(p); + expectEndOfStatement(p); + skipNewlines(p); + + // Parse body until END SELECT + while (!p->hasError && !check(p, TOK_EOF)) { + if (check(p, TOK_END)) { + BasLexerT savedLex = p->lex; + advance(p); + if (check(p, TOK_SELECT)) { + advance(p); + basEmit8(&p->cg, OP_POP); + for (int32_t i = 0; i < endJumpCount; i++) { + patchJump(p, endJumps[i]); + } + return; + } + p->lex = savedLex; + } + parseStatement(p); + skipNewlines(p); + } + continue; + } + + // CASE val [, val] ... + // + // Strategy for multi-value CASE using JMP_TRUE chaining: + // For each value: + // DUP testval + // push value + // CMP_EQ + // JMP_TRUE -> body + // JMP -> next_case (none of the values matched) + // body: + // ...statements... + // JMP -> end_select + // next_case: + + int32_t bodyJumps[MAX_EXITS]; + int32_t bodyJumpCount = 0; + + // First value + basEmit8(&p->cg, OP_DUP); + parseExpression(p); + basEmit8(&p->cg, OP_CMP_EQ); + + if (bodyJumpCount < MAX_EXITS) { + bodyJumps[bodyJumpCount++] = emitJump(p, OP_JMP_TRUE); + } + + // Additional comma-separated values + while (!p->hasError && match(p, TOK_COMMA)) { + basEmit8(&p->cg, OP_DUP); + parseExpression(p); + basEmit8(&p->cg, OP_CMP_EQ); + + if (bodyJumpCount < MAX_EXITS) { + bodyJumps[bodyJumpCount++] = emitJump(p, OP_JMP_TRUE); + } + } + + // None matched -- jump to next case + int32_t nextCaseJump = emitJump(p, OP_JMP); + + // Patch all body jumps to here (start of body) + for (int32_t i = 0; i < bodyJumpCount; i++) { + patchJump(p, bodyJumps[i]); + } + + // Parse the CASE body + expectEndOfStatement(p); + skipNewlines(p); + + while (!p->hasError && !check(p, TOK_CASE) && !check(p, TOK_EOF)) { + if (check(p, TOK_END)) { + BasLexerT savedLex = p->lex; + advance(p); + if (check(p, TOK_SELECT)) { + advance(p); + basEmit8(&p->cg, OP_POP); + patchJump(p, nextCaseJump); + for (int32_t i = 0; i < endJumpCount; i++) { + patchJump(p, endJumps[i]); + } + return; + } + p->lex = savedLex; + } + parseStatement(p); + skipNewlines(p); + } + + // Jump to end of SELECT (skip remaining cases) + if (endJumpCount < MAX_EXITS) { + endJumps[endJumpCount++] = emitJump(p, OP_JMP); + } + + // Patch the next-case jump to here + patchJump(p, nextCaseJump); + } + + // Pop test value (reached if no END SELECT but hit EOF) + basEmit8(&p->cg, OP_POP); + + for (int32_t i = 0; i < endJumpCount; i++) { + patchJump(p, endJumps[i]); + } + + if (!p->hasError) { + error(p, "Expected END SELECT"); + } +} + + +static void parseShell(BasParserT *p) { + // SHELL "command" -- execute an OS command (discard return value) + // SHELL -- no argument, no-op in embedded context + advance(p); // consume SHELL + + if (check(p, TOK_NEWLINE) || check(p, TOK_EOF) || check(p, TOK_COLON) || check(p, TOK_ELSE)) { + // No argument -- push empty string and call SHELL (no-op) + uint16_t idx = basAddConstant(&p->cg, "", 0); + basEmit8(&p->cg, OP_PUSH_STR); + basEmitU16(&p->cg, idx); + } else { + parseExpression(p); + } + + basEmit8(&p->cg, OP_SHELL); + basEmit8(&p->cg, OP_POP); // discard return value in statement form +} + + +static void parseSleep(BasParserT *p) { + // SLEEP [seconds] + // If no argument, default to 1 second + advance(p); // consume SLEEP + + if (check(p, TOK_NEWLINE) || check(p, TOK_EOF) || check(p, TOK_COLON) || check(p, TOK_ELSE)) { + // No argument -- push 1 second + basEmit8(&p->cg, OP_PUSH_INT16); + basEmit16(&p->cg, 1); + } else { + parseExpression(p); + } + + basEmit8(&p->cg, OP_SLEEP); +} + + +static void parseStatic(BasParserT *p) { + // STATIC var AS type + // Only valid inside SUB/FUNCTION. Creates a global variable with a + // mangled name (procName$varName) that persists across calls. + advance(p); // consume STATIC + + if (!p->sym.inLocalScope) { + error(p, "STATIC is only valid inside SUB or FUNCTION"); + return; + } + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "variable name"); + return; + } + + char varName[BAS_MAX_TOKEN_LEN]; + strncpy(varName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + varName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + // Optional AS type + uint8_t dt = suffixToType(varName); + if (match(p, TOK_AS)) { + dt = resolveTypeName(p); + } + + if (p->hasError) { + return; + } + + // Create a mangled global name: "procName$varName" + char mangledName[BAS_MAX_SYMBOL_NAME]; + snprintf(mangledName, sizeof(mangledName), "%s$%s", p->currentProc, varName); + + // Create the global variable with the mangled name + bool savedLocal = p->sym.inLocalScope; + p->sym.inLocalScope = false; + BasSymbolT *globalSym = basSymTabAdd(&p->sym, mangledName, SYM_VARIABLE, dt); + p->sym.inLocalScope = savedLocal; + + if (globalSym == NULL) { + error(p, "Symbol table full or duplicate STATIC variable"); + return; + } + globalSym->scope = SCOPE_GLOBAL; + globalSym->index = p->sym.nextGlobalIdx++; + globalSym->isDefined = true; + + // Create a local alias that maps to this global's index + BasSymbolT *localSym = basSymTabAdd(&p->sym, varName, SYM_VARIABLE, dt); + if (localSym == NULL) { + error(p, "Symbol table full or duplicate variable name"); + return; + } + localSym->scope = SCOPE_GLOBAL; // accessed as global + localSym->index = globalSym->index; + localSym->isDefined = true; +} + + +static void parseStatement(BasParserT *p) { + if (p->hasError) { + return; + } + + skipNewlines(p); + + if (check(p, TOK_EOF)) { + return; + } + + BasTokenTypeE tt = p->lex.token.type; + + switch (tt) { + case TOK_PRINT: + parsePrint(p); + break; + + case TOK_DIM: + parseDim(p); + break; + + case TOK_DATA: + parseData(p); + break; + + case TOK_READ: + parseRead(p); + break; + + case TOK_RESTORE: + parseRestore(p); + break; + + case TOK_STATIC: + parseStatic(p); + break; + + case TOK_DEF: + parseDef(p); + break; + + case TOK_DEFINT: + parseDefType(p, BAS_TYPE_INTEGER); + break; + + case TOK_DEFLNG: + parseDefType(p, BAS_TYPE_LONG); + break; + + case TOK_DEFSNG: + parseDefType(p, BAS_TYPE_SINGLE); + break; + + case TOK_DEFDBL: + parseDefType(p, BAS_TYPE_DOUBLE); + break; + + case TOK_DEFSTR: + parseDefType(p, BAS_TYPE_STRING); + break; + + case TOK_DECLARE: + parseDeclare(p); + break; + + case TOK_IF: + parseIf(p); + break; + + case TOK_FOR: + parseFor(p); + break; + + case TOK_DO: + parseDo(p); + break; + + case TOK_WHILE: + parseWhile(p); + break; + + case TOK_SELECT: + parseSelectCase(p); + break; + + case TOK_SUB: + parseSub(p); + break; + + case TOK_FUNCTION: + parseFunction(p); + break; + + case TOK_EXIT: + parseExit(p); + break; + + case TOK_CONST: + parseConst(p); + break; + + case TOK_END: + parseEnd(p); + break; + + case TOK_ERASE: + parseErase(p); + break; + + case TOK_TYPE: + parseType(p); + break; + + case TOK_REDIM: + parseRedim(p); + break; + + case TOK_INPUT: + parseInput(p); + break; + + case TOK_OPEN: + parseOpen(p); + break; + + case TOK_CLOSE: + parseClose(p); + break; + + case TOK_GET: + parseGet(p); + break; + + case TOK_PUT: + parsePut(p); + break; + + case TOK_SEEK: + parseSeek(p); + break; + + case TOK_WRITE: + parseWrite(p); + break; + + case TOK_LINE: + parseLineInput(p); + break; + + case TOK_GOTO: + parseGoto(p); + break; + + case TOK_GOSUB: + parseGosub(p); + break; + + case TOK_ON: + parseOn(p); + break; + + case TOK_OPTION: + parseOption(p); + break; + + case TOK_SHELL: + parseShell(p); + break; + + case TOK_RESUME: + parseResume(p); + break; + + case TOK_RETURN: + advance(p); + if (p->sym.inLocalScope) { + // Inside SUB/FUNCTION: return from subroutine + basEmit8(&p->cg, OP_RET); + } else { + // Module level: GOSUB return (pop PC from eval stack) + basEmit8(&p->cg, OP_GOSUB_RET); + } + break; + + case TOK_SLEEP: + parseSleep(p); + break; + + case TOK_SWAP: + parseSwap(p); + break; + + case TOK_CALL: { + advance(p); // consume CALL + if (!check(p, TOK_IDENT)) { + errorExpected(p, "subroutine name"); + break; + } + char name[BAS_MAX_TOKEN_LEN]; + strncpy(name, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + name[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + BasSymbolT *sym = basSymTabFind(&p->sym, name); + if (sym == NULL) { + // Forward reference + sym = basSymTabAdd(&p->sym, name, SYM_SUB, BAS_TYPE_INTEGER); + if (sym == NULL) { + error(p, "Symbol table full"); + break; + } + sym->scope = SCOPE_GLOBAL; + sym->isDefined = false; + sym->codeAddr = 0; + } + + if (check(p, TOK_LPAREN)) { + emitFunctionCall(p, sym); + } else { + // CALL with no arguments + uint8_t baseSlot = (sym->kind == SYM_FUNCTION) ? 1 : 0; + basEmit8(&p->cg, OP_CALL); + int32_t addrPos = basCodePos(&p->cg); + basEmitU16(&p->cg, (uint16_t)sym->codeAddr); + basEmit8(&p->cg, 0); + basEmit8(&p->cg, baseSlot); + + if (!sym->isDefined && sym->patchCount < BAS_MAX_CALL_PATCHES) { + sym->patchAddrs[sym->patchCount++] = addrPos; + } + } + + if (sym->kind == SYM_FUNCTION) { + basEmit8(&p->cg, OP_POP); // discard return value + } + break; + } + + case TOK_RANDOMIZE: + advance(p); + if (check(p, TOK_TIMER)) { + advance(p); + basEmit8(&p->cg, OP_PUSH_INT16); + basEmit16(&p->cg, -1); + } else { + parseExpression(p); + } + basEmit8(&p->cg, OP_MATH_RANDOMIZE); + break; + + case TOK_DOEVENTS: + advance(p); + basEmit8(&p->cg, OP_DO_EVENTS); + break; + + case TOK_LET: + advance(p); // consume LET, then fall through to assignment + if (!check(p, TOK_IDENT)) { + errorExpected(p, "variable name after LET"); + break; + } + parseAssignOrCall(p); + break; + + case TOK_IDENT: { + // Check for label: identifier followed by colon + BasLexerT savedLex = p->lex; + char labelName[BAS_MAX_TOKEN_LEN]; + strncpy(labelName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + labelName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + if (check(p, TOK_COLON)) { + advance(p); // consume colon + // Record the label at the current code position + BasSymbolT *sym = basSymTabFind(&p->sym, labelName); + if (sym != NULL && sym->kind == SYM_LABEL) { + // Forward-declared label -- now define it + sym->codeAddr = basCodePos(&p->cg); + sym->isDefined = true; + patchLabelRefs(p, sym); + } else if (sym == NULL) { + sym = basSymTabAdd(&p->sym, labelName, SYM_LABEL, 0); + if (sym == NULL) { + error(p, "Symbol table full"); + break; + } + sym->scope = SCOPE_GLOBAL; + sym->isDefined = true; + sym->codeAddr = basCodePos(&p->cg); + } else { + char buf[256]; + snprintf(buf, sizeof(buf), "Name '%s' already used", labelName); + error(p, buf); + } + // After the label, there may be a statement on the same line + // which will be parsed on the next iteration + break; + } + + // Not a label -- restore and parse as assignment/call + p->lex = savedLex; + parseAssignOrCall(p); + break; + } + + case TOK_REM: + // Comment -- skip to end of line + advance(p); + break; + + default: { + char buf[256]; + snprintf(buf, sizeof(buf), "Unexpected token: %s", basTokenName(tt)); + error(p, buf); + break; + } + } + + if (!p->hasError) { + expectEndOfStatement(p); + } +} + + +static void parseSub(BasParserT *p) { + // SUB name(params) + // ... + // END SUB + advance(p); // consume SUB + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "subroutine name"); + return; + } + + char name[BAS_MAX_TOKEN_LEN]; + strncpy(name, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + name[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + // Save current proc name for STATIC variable mangling + strncpy(p->currentProc, name, BAS_MAX_TOKEN_LEN - 1); + p->currentProc[BAS_MAX_TOKEN_LEN - 1] = '\0'; + + // Jump over the sub body in module-level code + int32_t skipJump = emitJump(p, OP_JMP); + + int32_t subAddr = basCodePos(&p->cg); + + // Enter local scope + basSymTabEnterLocal(&p->sym); + + ExitListT savedExitSub = exitSubList; + exitListInit(&exitSubList); + + // Parse parameter list + int32_t paramCount = 0; + uint8_t paramTypes[BAS_MAX_PARAMS]; + bool paramByVal[BAS_MAX_PARAMS]; + + if (match(p, TOK_LPAREN)) { + while (!check(p, TOK_RPAREN) && !check(p, TOK_EOF) && !p->hasError) { + if (paramCount > 0) { + expect(p, TOK_COMMA); + } + + bool byVal = false; + if (match(p, TOK_BYVAL)) { + byVal = true; + } + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "parameter name"); + return; + } + + char paramName[BAS_MAX_TOKEN_LEN]; + strncpy(paramName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + paramName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + uint8_t pdt = suffixToType(paramName); + if (match(p, TOK_AS)) { + pdt = resolveTypeName(p); + } + + BasSymbolT *paramSym = basSymTabAdd(&p->sym, paramName, SYM_VARIABLE, pdt); + if (paramSym == NULL) { + error(p, "Symbol table full"); + return; + } + paramSym->scope = SCOPE_LOCAL; + paramSym->index = basSymTabAllocSlot(&p->sym); + paramSym->isDefined = true; + + if (paramCount < BAS_MAX_PARAMS) { + paramTypes[paramCount] = pdt; + paramByVal[paramCount] = byVal; + } + paramCount++; + } + expect(p, TOK_RPAREN); + } + + // Register the sub in the symbol table (global scope) + BasSymbolT *existing = basSymTabFindGlobal(&p->sym, name); + BasSymbolT *subSym = NULL; + + if (existing != NULL && existing->kind == SYM_SUB) { + subSym = existing; + } else { + bool savedLocal = p->sym.inLocalScope; + p->sym.inLocalScope = false; + subSym = basSymTabAdd(&p->sym, name, SYM_SUB, BAS_TYPE_INTEGER); + p->sym.inLocalScope = savedLocal; + } + + if (subSym == NULL) { + error(p, "Could not register subroutine"); + return; + } + + subSym->codeAddr = subAddr; + subSym->isDefined = true; + subSym->paramCount = paramCount; + subSym->scope = SCOPE_GLOBAL; + for (int32_t i = 0; i < paramCount && i < BAS_MAX_PARAMS; i++) { + subSym->paramTypes[i] = paramTypes[i]; + subSym->paramByVal[i] = paramByVal[i]; + } + + // Backpatch any forward-reference calls to this sub + patchCallAddrs(p, subSym); + + expectEndOfStatement(p); + skipNewlines(p); + + // Parse sub body + while (!p->hasError && !check(p, TOK_EOF)) { + if (check(p, TOK_END)) { + BasLexerT savedLex = p->lex; + advance(p); + if (check(p, TOK_SUB)) { + advance(p); + break; + } + p->lex = savedLex; + } + parseStatement(p); + skipNewlines(p); + } + + // Patch EXIT SUB jumps + exitListPatch(&exitSubList, p); + exitSubList = savedExitSub; + + basEmit8(&p->cg, OP_RET); + + // Leave local scope + basSymTabLeaveLocal(&p->sym); + p->currentProc[0] = '\0'; + + // Patch the skip jump + patchJump(p, skipJump); +} + + +static void parseType(BasParserT *p) { + // TYPE name + // field AS type + // ... + // END TYPE + advance(p); // consume TYPE + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "type name"); + return; + } + + char typeName[BAS_MAX_TOKEN_LEN]; + strncpy(typeName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + typeName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + // Add TYPE_DEF symbol + bool savedLocal = p->sym.inLocalScope; + p->sym.inLocalScope = false; + BasSymbolT *typeSym = basSymTabAdd(&p->sym, typeName, SYM_TYPE_DEF, BAS_TYPE_UDT); + p->sym.inLocalScope = savedLocal; + + if (typeSym == NULL) { + error(p, "Symbol table full or duplicate TYPE name"); + return; + } + typeSym->scope = SCOPE_GLOBAL; + typeSym->isDefined = true; + typeSym->index = p->sym.count - 1; + typeSym->fieldCount = 0; + + expectEndOfStatement(p); + skipNewlines(p); + + // Parse fields until END TYPE + while (!p->hasError && !check(p, TOK_EOF)) { + if (check(p, TOK_END)) { + BasLexerT savedLex = p->lex; + advance(p); + if (check(p, TOK_TYPE)) { + advance(p); + break; + } + p->lex = savedLex; + } + + if (!check(p, TOK_IDENT)) { + errorExpected(p, "field name or END TYPE"); + return; + } + + if (typeSym->fieldCount >= BAS_MAX_UDT_FIELDS) { + error(p, "Too many fields in TYPE"); + return; + } + + BasFieldDefT *field = &typeSym->fields[typeSym->fieldCount]; + strncpy(field->name, p->lex.token.text, BAS_MAX_SYMBOL_NAME - 1); + field->name[BAS_MAX_SYMBOL_NAME - 1] = '\0'; + advance(p); + + expect(p, TOK_AS); + field->dataType = resolveTypeName(p); + if (field->dataType == BAS_TYPE_UDT) { + field->udtTypeId = p->lastUdtTypeId; + } + + typeSym->fieldCount++; + + expectEndOfStatement(p); + skipNewlines(p); + } +} + + +static void parseSwap(BasParserT *p) { + // SWAP a, b -- swap the values of two variables + advance(p); // consume SWAP + + // First variable + if (!check(p, TOK_IDENT)) { + errorExpected(p, "variable name"); + return; + } + + char nameA[BAS_MAX_TOKEN_LEN]; + strncpy(nameA, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + nameA[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + BasSymbolT *symA = ensureVariable(p, nameA); + if (symA == NULL) { + return; + } + + expect(p, TOK_COMMA); + + // Second variable + if (!check(p, TOK_IDENT)) { + errorExpected(p, "variable name"); + return; + } + + char nameB[BAS_MAX_TOKEN_LEN]; + strncpy(nameB, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + nameB[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + BasSymbolT *symB = ensureVariable(p, nameB); + if (symB == NULL) { + return; + } + + // Emit: load a, load b, store a, store b + emitLoad(p, symA); + emitLoad(p, symB); + emitStore(p, symA); + emitStore(p, symB); +} +static void parseGet(BasParserT *p) { + // GET #channel, [recno], var + advance(p); // consume GET + + match(p, TOK_HASH); // optional # + + // Channel number + parseExpression(p); + expect(p, TOK_COMMA); + + // Optional record number + if (check(p, TOK_COMMA)) { + // No record number specified -- push 0 (current position) + basEmit8(&p->cg, OP_PUSH_INT16); + basEmit16(&p->cg, 0); + } else { + parseExpression(p); + } + expect(p, TOK_COMMA); + + // Target variable + if (!check(p, TOK_IDENT)) { + errorExpected(p, "variable name"); + return; + } + + char varName[BAS_MAX_TOKEN_LEN]; + strncpy(varName, p->lex.token.text, BAS_MAX_TOKEN_LEN - 1); + varName[BAS_MAX_TOKEN_LEN - 1] = '\0'; + advance(p); + + BasSymbolT *sym = ensureVariable(p, varName); + if (sym == NULL) { + return; + } + + // Push variable type so VM knows how many bytes to read + basEmit8(&p->cg, OP_PUSH_INT16); + basEmit16(&p->cg, (int16_t)sym->dataType); + + basEmit8(&p->cg, OP_FILE_GET); + + emitStore(p, sym); +} + + +static void parsePut(BasParserT *p) { + // PUT #channel, [recno], var + advance(p); // consume PUT + + match(p, TOK_HASH); // optional # + + // Channel number + parseExpression(p); + expect(p, TOK_COMMA); + + // Optional record number + if (check(p, TOK_COMMA)) { + // No record number specified -- push 0 (current position) + basEmit8(&p->cg, OP_PUSH_INT16); + basEmit16(&p->cg, 0); + } else { + parseExpression(p); + } + expect(p, TOK_COMMA); + + // Value expression + parseExpression(p); + + basEmit8(&p->cg, OP_FILE_PUT); +} + + +static void parseSeek(BasParserT *p) { + // SEEK #channel, position + advance(p); // consume SEEK + + match(p, TOK_HASH); // optional # + + // Channel number + parseExpression(p); + + expect(p, TOK_COMMA); + + // Position + parseExpression(p); + + basEmit8(&p->cg, OP_FILE_SEEK); +} + + +static void parseWrite(BasParserT *p) { + // WRITE #channel, expr1, expr2, ... + // Values are comma-delimited. Strings are quoted. Numbers undecorated. + // Each WRITE statement ends with a newline. + advance(p); // consume WRITE + + if (!check(p, TOK_HASH)) { + error(p, "Expected # after WRITE"); + return; + } + advance(p); // consume # + + // Channel number expression + parseExpression(p); + + // Comma separator between channel and first value + expect(p, TOK_COMMA); + + // Parse each value + bool first = true; + while (!p->hasError) { + if (!first) { + // Emit comma separator to file + basEmit8(&p->cg, OP_DUP); // dup channel for separator + basEmit8(&p->cg, OP_FILE_WRITE_SEP); + } + first = false; + + // Duplicate channel for the write operation + basEmit8(&p->cg, OP_DUP); + + // Parse value expression + parseExpression(p); + + // Write value in WRITE format (strings quoted, numbers undecorated) + basEmit8(&p->cg, OP_FILE_WRITE); + + if (!match(p, TOK_COMMA)) { + break; + } + } + + // Write newline to file (channel still on stack) + basEmit8(&p->cg, OP_FILE_WRITE_NL); +} + + + + +static void parseWhile(BasParserT *p) { + // WHILE cond + // ... + // WEND + advance(p); // consume WHILE + + ExitListT savedExitDo = exitDoList; + exitListInit(&exitDoList); + + int32_t loopTop = basCodePos(&p->cg); + + parseExpression(p); + int32_t falseJump = emitJump(p, OP_JMP_FALSE); + + expectEndOfStatement(p); + skipNewlines(p); + + while (!p->hasError && !check(p, TOK_WEND) && !check(p, TOK_EOF)) { + parseStatement(p); + skipNewlines(p); + } + + if (p->hasError) { + return; + } + + expect(p, TOK_WEND); + + // Jump back to loop top + basEmit8(&p->cg, OP_JMP); + int16_t backOffset = (int16_t)(loopTop - (basCodePos(&p->cg) + 2)); + basEmit16(&p->cg, backOffset); + + // Patch the false jump to exit + patchJump(p, falseJump); + + // Patch EXIT DO jumps (WHILE/WEND uses the DO exit list) + exitListPatch(&exitDoList, p); + exitDoList = savedExitDo; +} + + +// ============================================================ +// Public API +// ============================================================ + +void basParserInit(BasParserT *p, const char *source, int32_t sourceLen) { + memset(p, 0, sizeof(BasParserT)); + basLexerInit(&p->lex, source, sourceLen); + basCodeGenInit(&p->cg); + basSymTabInit(&p->sym); + p->hasError = false; + p->errorLine = 0; + p->error[0] = '\0'; + + exitListInit(&exitForList); + exitListInit(&exitDoList); + exitListInit(&exitSubList); + exitListInit(&exitFuncList); + + // basLexerInit already primes the first token -- no advance needed +} + + +bool basParse(BasParserT *p) { + parseModule(p); + return !p->hasError; +} + + +BasModuleT *basParserBuildModule(BasParserT *p) { + if (p->hasError) { + return NULL; + } + p->cg.globalCount = p->sym.nextGlobalIdx; + return basCodeGenBuildModule(&p->cg); +} + + +void basParserFree(BasParserT *p) { + basCodeGenFree(&p->cg); +} diff --git a/dvxbasic/compiler/parser.h b/dvxbasic/compiler/parser.h new file mode 100644 index 0000000..b4828c8 --- /dev/null +++ b/dvxbasic/compiler/parser.h @@ -0,0 +1,57 @@ +// parser.h -- DVX BASIC parser (recursive descent) +// +// Single-pass compiler: reads tokens from the lexer and emits +// p-code directly via the code generator. No AST. Forward +// references to SUBs/FUNCTIONs are resolved via backpatching. +// +// Embeddable: no DVX dependencies, pure C. + +#ifndef DVXBASIC_PARSER_H +#define DVXBASIC_PARSER_H + +#include "lexer.h" +#include "codegen.h" +#include "symtab.h" +#include "../runtime/vm.h" + +#include +#include + +// ============================================================ +// Parser state +// ============================================================ + +typedef struct { + BasLexerT lex; + BasCodeGenT cg; + BasSymTabT sym; + char error[512]; + bool hasError; + int32_t errorLine; + int32_t lastUdtTypeId; // index of last resolved UDT type from resolveTypeName + int32_t optionBase; // default array lower bound (0 or 1) + bool optionCompareText; // true = case-insensitive string comparison + bool optionExplicit; // true = variables must be declared with DIM + uint8_t defType[26]; // default type per letter (A-Z), set by DEFINT etc. + char currentProc[BAS_MAX_TOKEN_LEN]; // name of current SUB/FUNCTION +} BasParserT; + +// ============================================================ +// API +// ============================================================ + +// Initialize parser with source text. +void basParserInit(BasParserT *p, const char *source, int32_t sourceLen); + +// Parse the entire source and generate p-code. +// Returns true on success, false on error (check p->error). +bool basParse(BasParserT *p); + +// Build a module from the parsed code. Returns NULL on error. +// Caller owns the module and must free with basModuleFree(). +BasModuleT *basParserBuildModule(BasParserT *p); + +// Free parser resources. +void basParserFree(BasParserT *p); + +#endif // DVXBASIC_PARSER_H diff --git a/dvxbasic/compiler/symtab.c b/dvxbasic/compiler/symtab.c new file mode 100644 index 0000000..5f6f29a --- /dev/null +++ b/dvxbasic/compiler/symtab.c @@ -0,0 +1,147 @@ +// symtab.c -- DVX BASIC symbol table implementation + +#include "symtab.h" + +#include +#include + +// ============================================================ +// Case-insensitive name comparison +// ============================================================ + +static bool namesEqual(const char *a, const char *b) { + while (*a && *b) { + char ca = *a >= 'a' && *a <= 'z' ? *a - 32 : *a; + char cb = *b >= 'a' && *b <= 'z' ? *b - 32 : *b; + + if (ca != cb) { + return false; + } + + a++; + b++; + } + + return *a == *b; +} + + +// ============================================================ +// basSymTabAdd +// ============================================================ + +BasSymbolT *basSymTabAdd(BasSymTabT *tab, const char *name, BasSymKindE kind, uint8_t dataType) { + if (tab->count >= BAS_MAX_SYMBOLS) { + return NULL; + } + + // Check for duplicate in current scope + BasScopeE scope = tab->inLocalScope ? SCOPE_LOCAL : SCOPE_GLOBAL; + + for (int32_t i = 0; i < tab->count; i++) { + if (tab->symbols[i].scope == scope && namesEqual(tab->symbols[i].name, name)) { + return NULL; // duplicate + } + } + + BasSymbolT *sym = &tab->symbols[tab->count++]; + memset(sym, 0, sizeof(*sym)); + strncpy(sym->name, name, BAS_MAX_SYMBOL_NAME - 1); + sym->name[BAS_MAX_SYMBOL_NAME - 1] = '\0'; + sym->kind = kind; + sym->scope = scope; + sym->dataType = dataType; + sym->isDefined = true; + + return sym; +} + + +// ============================================================ +// basSymTabAllocSlot +// ============================================================ + +int32_t basSymTabAllocSlot(BasSymTabT *tab) { + if (tab->inLocalScope) { + return tab->nextLocalIdx++; + } + + return tab->nextGlobalIdx++; +} + + +// ============================================================ +// basSymTabEnterLocal +// ============================================================ + +void basSymTabEnterLocal(BasSymTabT *tab) { + tab->inLocalScope = true; + tab->nextLocalIdx = 0; +} + + +// ============================================================ +// basSymTabFind +// ============================================================ + +BasSymbolT *basSymTabFind(BasSymTabT *tab, const char *name) { + // Search local scope first + if (tab->inLocalScope) { + for (int32_t i = tab->count - 1; i >= 0; i--) { + if (tab->symbols[i].scope == SCOPE_LOCAL && namesEqual(tab->symbols[i].name, name)) { + return &tab->symbols[i]; + } + } + } + + // Search global scope + return basSymTabFindGlobal(tab, name); +} + + +// ============================================================ +// basSymTabFindGlobal +// ============================================================ + +BasSymbolT *basSymTabFindGlobal(BasSymTabT *tab, const char *name) { + for (int32_t i = 0; i < tab->count; i++) { + if (tab->symbols[i].scope == SCOPE_GLOBAL && namesEqual(tab->symbols[i].name, name)) { + return &tab->symbols[i]; + } + } + + return NULL; +} + + +// ============================================================ +// basSymTabInit +// ============================================================ + +void basSymTabInit(BasSymTabT *tab) { + memset(tab, 0, sizeof(*tab)); +} + + +// ============================================================ +// basSymTabLeaveLocal +// ============================================================ + +void basSymTabLeaveLocal(BasSymTabT *tab) { + // Remove all local symbols + int32_t newCount = 0; + + for (int32_t i = 0; i < tab->count; i++) { + if (tab->symbols[i].scope != SCOPE_LOCAL) { + if (i != newCount) { + tab->symbols[newCount] = tab->symbols[i]; + } + + newCount++; + } + } + + tab->count = newCount; + tab->inLocalScope = false; + tab->nextLocalIdx = 0; +} diff --git a/dvxbasic/compiler/symtab.h b/dvxbasic/compiler/symtab.h new file mode 100644 index 0000000..e4cf59a --- /dev/null +++ b/dvxbasic/compiler/symtab.h @@ -0,0 +1,129 @@ +// symtab.h -- DVX BASIC symbol table +// +// Tracks variables, constants, subroutines, functions, and labels +// during compilation. Supports nested scopes (global + one local +// scope per SUB/FUNCTION). +// +// Embeddable: no DVX dependencies, pure C. + +#ifndef DVXBASIC_SYMTAB_H +#define DVXBASIC_SYMTAB_H + +#include "../compiler/opcodes.h" + +#include +#include + +// ============================================================ +// Symbol kinds +// ============================================================ + +typedef enum { + SYM_VARIABLE, + SYM_CONST, + SYM_SUB, + SYM_FUNCTION, + SYM_LABEL, + SYM_TYPE_DEF // user-defined TYPE +} BasSymKindE; + +// ============================================================ +// Symbol scope +// ============================================================ + +typedef enum { + SCOPE_GLOBAL, + SCOPE_LOCAL +} BasScopeE; + +// ============================================================ +// Symbol entry +// ============================================================ + +#define BAS_MAX_SYMBOL_NAME 64 +#define BAS_MAX_PARAMS 16 +#define BAS_MAX_CALL_PATCHES 32 +#define BAS_MAX_UDT_FIELDS 32 + +// UDT field definition +typedef struct { + char name[BAS_MAX_SYMBOL_NAME]; + uint8_t dataType; // BAS_TYPE_* + int32_t udtTypeId; // if dataType == BAS_TYPE_UDT, index of the TYPE_DEF symbol +} BasFieldDefT; + +typedef struct { + char name[BAS_MAX_SYMBOL_NAME]; + BasSymKindE kind; + BasScopeE scope; + uint8_t dataType; // BAS_TYPE_* for variables/functions + int32_t index; // slot index (local or global) + int32_t codeAddr; // PC address for SUB/FUNCTION/LABEL + bool isDefined; // false = forward-declared + bool isArray; + bool isShared; + int32_t udtTypeId; // for variables of BAS_TYPE_UDT: index of TYPE_DEF symbol + int32_t fixedLen; // for STRING * n: fixed length (0 = variable-length) + + // For SUB/FUNCTION: parameter info + int32_t paramCount; + uint8_t paramTypes[BAS_MAX_PARAMS]; + bool paramByVal[BAS_MAX_PARAMS]; + + // Forward-reference backpatch list (code addresses to patch when defined) + int32_t patchAddrs[BAS_MAX_CALL_PATCHES]; + int32_t patchCount; + + // For CONST: the constant value + union { + int32_t constInt; + double constDbl; + }; + char constStr[256]; + + // For TYPE_DEF: field definitions + BasFieldDefT fields[BAS_MAX_UDT_FIELDS]; + int32_t fieldCount; +} BasSymbolT; + +// ============================================================ +// Symbol table +// ============================================================ + +#define BAS_MAX_SYMBOLS 512 + +typedef struct { + BasSymbolT symbols[BAS_MAX_SYMBOLS]; + int32_t count; + int32_t nextGlobalIdx; // next global variable slot + int32_t nextLocalIdx; // next local variable slot (reset per SUB/FUNCTION) + bool inLocalScope; // true when inside SUB/FUNCTION +} BasSymTabT; + +// ============================================================ +// API +// ============================================================ + +void basSymTabInit(BasSymTabT *tab); + +// Add a symbol. Returns the symbol pointer, or NULL if the table is full +// or the name already exists in the current scope. +BasSymbolT *basSymTabAdd(BasSymTabT *tab, const char *name, BasSymKindE kind, uint8_t dataType); + +// Look up a symbol by name. Searches local scope first, then global. +// Case-insensitive. +BasSymbolT *basSymTabFind(BasSymTabT *tab, const char *name); + +// Look up a symbol in the global scope only. +BasSymbolT *basSymTabFindGlobal(BasSymTabT *tab, const char *name); + +// Enter local scope (called at SUB/FUNCTION start). +void basSymTabEnterLocal(BasSymTabT *tab); + +// Leave local scope (called at END SUB/FUNCTION). Removes local symbols. +void basSymTabLeaveLocal(BasSymTabT *tab); + +// Allocate the next variable slot (global or local depending on scope). +int32_t basSymTabAllocSlot(BasSymTabT *tab); + +#endif // DVXBASIC_SYMTAB_H diff --git a/dvxbasic/runtime/values.c b/dvxbasic/runtime/values.c new file mode 100644 index 0000000..3937491 --- /dev/null +++ b/dvxbasic/runtime/values.c @@ -0,0 +1,633 @@ +// values.c -- DVX BASIC value system implementation +// +// Tagged union values with reference-counted strings. The string +// heap uses simple refcounting: assignment increments, scope exit +// decrements, zero frees. No garbage collector needed. + +#include "values.h" +#include "../compiler/opcodes.h" + +#include +#include +#include +#include +#include + +// ============================================================ +// String system +// ============================================================ + +// Singleton empty string -- never freed, always available. +// Extra byte for the null terminator via the struct hack. +static struct { + BasStringT hdr; + char nul; +} sEmptyStringStorage = { { .refCount = 999999, .len = 0, .cap = 1 }, '\0' }; +BasStringT *basEmptyString = &sEmptyStringStorage.hdr; + + +BasStringT *basStringAlloc(int32_t cap) { + if (cap < 1) { + cap = 1; + } + + BasStringT *s = (BasStringT *)malloc(sizeof(BasStringT) + cap); + + if (!s) { + return basStringRef(basEmptyString); + } + + s->refCount = 1; + s->len = 0; + s->cap = cap; + s->data[0] = '\0'; + return s; +} + + +BasStringT *basStringConcat(const BasStringT *a, const BasStringT *b) { + int32_t newLen = a->len + b->len; + BasStringT *s = basStringAlloc(newLen + 1); + memcpy(s->data, a->data, a->len); + memcpy(s->data + a->len, b->data, b->len); + s->data[newLen] = '\0'; + s->len = newLen; + return s; +} + + +int32_t basStringCompare(const BasStringT *a, const BasStringT *b) { + int32_t minLen = a->len < b->len ? a->len : b->len; + int32_t cmp = memcmp(a->data, b->data, minLen); + + if (cmp != 0) { + return cmp; + } + + if (a->len < b->len) { + return -1; + } + + if (a->len > b->len) { + return 1; + } + + return 0; +} + + +int32_t basStringCompareCI(const BasStringT *a, const BasStringT *b) { + int32_t minLen = a->len < b->len ? a->len : b->len; + + for (int32_t i = 0; i < minLen; i++) { + int32_t ca = toupper((unsigned char)a->data[i]); + int32_t cb = toupper((unsigned char)b->data[i]); + + if (ca != cb) { + return ca - cb; + } + } + + if (a->len < b->len) { + return -1; + } + + if (a->len > b->len) { + return 1; + } + + return 0; +} + + +BasStringT *basStringNew(const char *text, int32_t len) { + if (!text || len <= 0) { + return basStringRef(basEmptyString); + } + + BasStringT *s = basStringAlloc(len + 1); + memcpy(s->data, text, len); + s->data[len] = '\0'; + s->len = len; + return s; +} + + +BasStringT *basStringRef(BasStringT *s) { + if (s) { + s->refCount++; + } + + return s; +} + + +BasStringT *basStringSub(const BasStringT *s, int32_t start, int32_t len) { + if (start < 0) { + start = 0; + } + + if (start >= s->len) { + return basStringRef(basEmptyString); + } + + if (len < 0 || start + len > s->len) { + len = s->len - start; + } + + return basStringNew(s->data + start, len); +} + + +void basStringSystemInit(void) { + sEmptyStringStorage.nul = '\0'; +} + + +void basStringSystemShutdown(void) { + // Nothing to do -- empty string is static +} + + +void basStringUnref(BasStringT *s) { + if (!s || s == basEmptyString) { + return; + } + + s->refCount--; + + if (s->refCount <= 0) { + free(s); + } +} + + +// ============================================================ +// Array system +// ============================================================ + +void basArrayFree(BasArrayT *arr) { + if (!arr) { + return; + } + + if (arr->elements) { + for (int32_t i = 0; i < arr->totalElements; i++) { + basValRelease(&arr->elements[i]); + } + + free(arr->elements); + } + + free(arr); +} + + +int32_t basArrayIndex(BasArrayT *arr, int32_t *indices, int32_t ndims) { + if (!arr || ndims != arr->dims) { + return -1; + } + + int32_t flatIdx = 0; + int32_t multiplier = 1; + + // Row-major order: last dimension varies fastest + for (int32_t d = ndims - 1; d >= 0; d--) { + int32_t idx = indices[d] - arr->lbound[d]; + int32_t dimSize = arr->ubound[d] - arr->lbound[d] + 1; + + if (idx < 0 || idx >= dimSize) { + return -1; + } + + flatIdx += idx * multiplier; + multiplier *= dimSize; + } + + return flatIdx; +} + + +BasArrayT *basArrayNew(int32_t dims, int32_t *lbounds, int32_t *ubounds, uint8_t elementType) { + if (dims < 1 || dims > BAS_ARRAY_MAX_DIMS) { + return NULL; + } + + BasArrayT *arr = (BasArrayT *)calloc(1, sizeof(BasArrayT)); + + if (!arr) { + return NULL; + } + + arr->refCount = 1; + arr->elementType = elementType; + arr->dims = dims; + + int32_t total = 1; + + for (int32_t d = 0; d < dims; d++) { + arr->lbound[d] = lbounds[d]; + arr->ubound[d] = ubounds[d]; + int32_t dimSize = ubounds[d] - lbounds[d] + 1; + + if (dimSize < 1) { + free(arr); + return NULL; + } + + total *= dimSize; + } + + arr->totalElements = total; + arr->elements = (BasValueT *)calloc(total, sizeof(BasValueT)); + + if (!arr->elements) { + free(arr); + return NULL; + } + + // Initialize all elements to the default for the element type + for (int32_t i = 0; i < total; i++) { + arr->elements[i].type = elementType; + } + + return arr; +} + + +BasArrayT *basArrayRef(BasArrayT *arr) { + if (arr) { + arr->refCount++; + } + + return arr; +} + + +void basArrayUnref(BasArrayT *arr) { + if (!arr) { + return; + } + + arr->refCount--; + + if (arr->refCount <= 0) { + basArrayFree(arr); + } +} + + +// ============================================================ +// UDT system +// ============================================================ + +void basUdtFree(BasUdtT *udt) { + if (!udt) { + return; + } + + if (udt->fields) { + for (int32_t i = 0; i < udt->fieldCount; i++) { + basValRelease(&udt->fields[i]); + } + + free(udt->fields); + } + + free(udt); +} + + +BasUdtT *basUdtNew(int32_t typeId, int32_t fieldCount) { + BasUdtT *udt = (BasUdtT *)calloc(1, sizeof(BasUdtT)); + + if (!udt) { + return NULL; + } + + udt->refCount = 1; + udt->typeId = typeId; + udt->fieldCount = fieldCount; + udt->fields = (BasValueT *)calloc(fieldCount, sizeof(BasValueT)); + + if (!udt->fields) { + free(udt); + return NULL; + } + + return udt; +} + + +BasUdtT *basUdtRef(BasUdtT *udt) { + if (udt) { + udt->refCount++; + } + + return udt; +} + + +void basUdtUnref(BasUdtT *udt) { + if (!udt) { + return; + } + + udt->refCount--; + + if (udt->refCount <= 0) { + basUdtFree(udt); + } +} + + +// ============================================================ +// Value constructors +// ============================================================ + +BasValueT basValBool(bool v) { + BasValueT val; + val.type = BAS_TYPE_BOOLEAN; + val.boolVal = v ? -1 : 0; + return val; +} + + +BasValueT basValCopy(BasValueT v) { + if (v.type == BAS_TYPE_STRING && v.strVal) { + basStringRef(v.strVal); + } else if (v.type == BAS_TYPE_ARRAY && v.arrVal) { + basArrayRef(v.arrVal); + } else if (v.type == BAS_TYPE_UDT && v.udtVal) { + basUdtRef(v.udtVal); + } + + return v; +} + + +BasValueT basValDouble(double v) { + BasValueT val; + val.type = BAS_TYPE_DOUBLE; + val.dblVal = v; + return val; +} + + +BasValueT basValInteger(int16_t v) { + BasValueT val; + val.type = BAS_TYPE_INTEGER; + val.intVal = v; + return val; +} + + +BasValueT basValLong(int32_t v) { + BasValueT val; + val.type = BAS_TYPE_LONG; + val.longVal = v; + return val; +} + + +BasValueT basValSingle(float v) { + BasValueT val; + val.type = BAS_TYPE_SINGLE; + val.sngVal = v; + return val; +} + + +BasValueT basValString(BasStringT *s) { + BasValueT val; + val.type = BAS_TYPE_STRING; + val.strVal = s ? basStringRef(s) : basStringRef(basEmptyString); + return val; +} + + +BasValueT basValStringFromC(const char *text) { + BasValueT val; + val.type = BAS_TYPE_STRING; + val.strVal = basStringNew(text, text ? (int32_t)strlen(text) : 0); + return val; +} + + +void basValRelease(BasValueT *v) { + if (v->type == BAS_TYPE_STRING) { + basStringUnref(v->strVal); + v->strVal = NULL; + } else if (v->type == BAS_TYPE_ARRAY) { + basArrayUnref(v->arrVal); + v->arrVal = NULL; + } else if (v->type == BAS_TYPE_UDT) { + basUdtUnref(v->udtVal); + v->udtVal = NULL; + } +} + + +// ============================================================ +// Type conversion +// ============================================================ + +BasValueT basValToBool(BasValueT v) { + return basValBool(basValIsTruthy(v)); +} + + +BasValueT basValToDouble(BasValueT v) { + return basValDouble(basValToNumber(v)); +} + + +BasValueT basValToInteger(BasValueT v) { + double n = basValToNumber(v); + // Banker's rounding (round half to even) + int32_t rounded = (int32_t)(n + (n > 0 ? 0.5 : -0.5)); + return basValInteger((int16_t)rounded); +} + + +BasValueT basValToLong(BasValueT v) { + double n = basValToNumber(v); + int32_t rounded = (int32_t)(n + (n > 0 ? 0.5 : -0.5)); + return basValLong(rounded); +} + + +double basValToNumber(BasValueT v) { + switch (v.type) { + case BAS_TYPE_INTEGER: + return (double)v.intVal; + + case BAS_TYPE_LONG: + return (double)v.longVal; + + case BAS_TYPE_SINGLE: + return (double)v.sngVal; + + case BAS_TYPE_DOUBLE: + return v.dblVal; + + case BAS_TYPE_BOOLEAN: + return (double)v.boolVal; + + case BAS_TYPE_STRING: + if (v.strVal && v.strVal->len > 0) { + return atof(v.strVal->data); + } + + return 0.0; + + default: + return 0.0; + } +} + + +BasValueT basValToSingle(BasValueT v) { + return basValSingle((float)basValToNumber(v)); +} + + +BasValueT basValToString(BasValueT v) { + if (v.type == BAS_TYPE_STRING) { + return basValCopy(v); + } + + BasStringT *s = basValFormatString(v); + BasValueT result; + result.type = BAS_TYPE_STRING; + result.strVal = s; + return result; +} + + +BasStringT *basValFormatString(BasValueT v) { + char buf[64]; + + switch (v.type) { + case BAS_TYPE_INTEGER: + snprintf(buf, sizeof(buf), "%d", (int)v.intVal); + return basStringNew(buf, (int32_t)strlen(buf)); + + case BAS_TYPE_LONG: + snprintf(buf, sizeof(buf), "%ld", (long)v.longVal); + return basStringNew(buf, (int32_t)strlen(buf)); + + case BAS_TYPE_SINGLE: { + snprintf(buf, sizeof(buf), "%g", (double)v.sngVal); + return basStringNew(buf, (int32_t)strlen(buf)); + } + + case BAS_TYPE_DOUBLE: + snprintf(buf, sizeof(buf), "%g", v.dblVal); + return basStringNew(buf, (int32_t)strlen(buf)); + + case BAS_TYPE_BOOLEAN: + return basStringNew(v.boolVal ? "True" : "False", v.boolVal ? 4 : 5); + + case BAS_TYPE_STRING: + return v.strVal ? basStringRef(v.strVal) : basStringRef(basEmptyString); + + default: + return basStringRef(basEmptyString); + } +} + + +bool basValIsTruthy(BasValueT v) { + switch (v.type) { + case BAS_TYPE_INTEGER: + return v.intVal != 0; + + case BAS_TYPE_LONG: + return v.longVal != 0; + + case BAS_TYPE_SINGLE: + return v.sngVal != 0.0f; + + case BAS_TYPE_DOUBLE: + return v.dblVal != 0.0; + + case BAS_TYPE_BOOLEAN: + return v.boolVal != 0; + + case BAS_TYPE_STRING: + return v.strVal && v.strVal->len > 0; + + default: + return false; + } +} + + +int32_t basValCompare(BasValueT a, BasValueT b) { + // String comparison + if (a.type == BAS_TYPE_STRING && b.type == BAS_TYPE_STRING) { + return basStringCompare(a.strVal ? a.strVal : basEmptyString, b.strVal ? b.strVal : basEmptyString); + } + + // Numeric comparison + double na = basValToNumber(a); + double nb = basValToNumber(b); + + if (na < nb) { + return -1; + } + + if (na > nb) { + return 1; + } + + return 0; +} + + +int32_t basValCompareCI(BasValueT a, BasValueT b) { + // String comparison (case-insensitive) + if (a.type == BAS_TYPE_STRING && b.type == BAS_TYPE_STRING) { + return basStringCompareCI(a.strVal ? a.strVal : basEmptyString, b.strVal ? b.strVal : basEmptyString); + } + + // Numeric comparison (same as basValCompare) + double na = basValToNumber(a); + double nb = basValToNumber(b); + + if (na < nb) { + return -1; + } + + if (na > nb) { + return 1; + } + + return 0; +} + + +uint8_t basValPromoteType(uint8_t a, uint8_t b) { + // String stays string (concat, not arithmetic) + if (a == BAS_TYPE_STRING || b == BAS_TYPE_STRING) { + return BAS_TYPE_STRING; + } + + // Double wins over everything + if (a == BAS_TYPE_DOUBLE || b == BAS_TYPE_DOUBLE) { + return BAS_TYPE_DOUBLE; + } + + // Single wins over integer/long + if (a == BAS_TYPE_SINGLE || b == BAS_TYPE_SINGLE) { + return BAS_TYPE_SINGLE; + } + + // Long wins over integer + if (a == BAS_TYPE_LONG || b == BAS_TYPE_LONG) { + return BAS_TYPE_LONG; + } + + return BAS_TYPE_INTEGER; +} diff --git a/dvxbasic/runtime/values.h b/dvxbasic/runtime/values.h new file mode 100644 index 0000000..d403278 --- /dev/null +++ b/dvxbasic/runtime/values.h @@ -0,0 +1,180 @@ +// values.h -- DVX BASIC value representation and string heap +// +// Tagged union value type for the VM's evaluation stack, variables, +// and array elements. Strings are reference-counted for automatic +// memory management without a garbage collector. +// +// Embeddable: no DVX dependencies, pure C. + +#ifndef DVXBASIC_VALUES_H +#define DVXBASIC_VALUES_H + +#include +#include +#include + +// ============================================================ +// Reference-counted string +// ============================================================ + +typedef struct { + int32_t refCount; + int32_t len; + int32_t cap; // allocated capacity (>= len + 1) + char data[]; // flexible array member, null-terminated +} BasStringT; + +// Allocate a new string from a C string. refCount starts at 1. +BasStringT *basStringNew(const char *text, int32_t len); + +// Allocate an empty string with a given capacity. +BasStringT *basStringAlloc(int32_t cap); + +// Increment reference count. +BasStringT *basStringRef(BasStringT *s); + +// Decrement reference count. Frees if count reaches zero. +void basStringUnref(BasStringT *s); + +// Concatenate two strings. Returns a new string (refCount 1). +BasStringT *basStringConcat(const BasStringT *a, const BasStringT *b); + +// Substring. Returns a new string (refCount 1). +BasStringT *basStringSub(const BasStringT *s, int32_t start, int32_t len); + +// Compare two strings. Returns <0, 0, >0 like strcmp. +int32_t basStringCompare(const BasStringT *a, const BasStringT *b); + +// Compare two strings case-insensitively. Returns <0, 0, >0. +int32_t basStringCompareCI(const BasStringT *a, const BasStringT *b); + +// The empty string singleton (never freed). +extern BasStringT *basEmptyString; + +// Initialize/shutdown the string system. +void basStringSystemInit(void); +void basStringSystemShutdown(void); + +// ============================================================ +// Forward declarations +// ============================================================ + +typedef struct BasValueTag BasValueT; + +// ============================================================ +// Reference-counted array +// ============================================================ + +#define BAS_ARRAY_MAX_DIMS 8 + +typedef struct { + int32_t refCount; + uint8_t elementType; // BAS_TYPE_* + int32_t dims; // number of dimensions + int32_t lbound[BAS_ARRAY_MAX_DIMS]; // lower bound per dimension + int32_t ubound[BAS_ARRAY_MAX_DIMS]; // upper bound per dimension + int32_t totalElements; + BasValueT *elements; // flat array of values +} BasArrayT; + +// Allocate a new array. refCount starts at 1. +BasArrayT *basArrayNew(int32_t dims, int32_t *lbounds, int32_t *ubounds, uint8_t elementType); + +// Free all elements and release the array. +void basArrayFree(BasArrayT *arr); + +// Increment reference count. +BasArrayT *basArrayRef(BasArrayT *arr); + +// Decrement reference count. Frees if count reaches zero. +void basArrayUnref(BasArrayT *arr); + +// Compute flat index from multi-dimensional indices. Returns -1 if out of bounds. +int32_t basArrayIndex(BasArrayT *arr, int32_t *indices, int32_t ndims); + +// ============================================================ +// Reference-counted user-defined type instance +// ============================================================ + +typedef struct { + int32_t refCount; + int32_t typeId; // index into type definition table + int32_t fieldCount; + BasValueT *fields; // array of field values +} BasUdtT; + +// Allocate a new UDT instance. refCount starts at 1. +BasUdtT *basUdtNew(int32_t typeId, int32_t fieldCount); + +// Free all fields and release the UDT. +void basUdtFree(BasUdtT *udt); + +// Increment reference count. +BasUdtT *basUdtRef(BasUdtT *udt); + +// Decrement reference count. Frees if count reaches zero. +void basUdtUnref(BasUdtT *udt); + +// ============================================================ +// Tagged value +// ============================================================ + +struct BasValueTag { + uint8_t type; // BAS_TYPE_* + union { + int16_t intVal; // BAS_TYPE_INTEGER + int32_t longVal; // BAS_TYPE_LONG + float sngVal; // BAS_TYPE_SINGLE + double dblVal; // BAS_TYPE_DOUBLE + BasStringT *strVal; // BAS_TYPE_STRING (ref-counted) + int16_t boolVal; // BAS_TYPE_BOOLEAN (True=-1, False=0) + BasArrayT *arrVal; // BAS_TYPE_ARRAY (ref-counted) + BasUdtT *udtVal; // BAS_TYPE_UDT (ref-counted) + }; +}; + +// Create values +BasValueT basValInteger(int16_t v); +BasValueT basValLong(int32_t v); +BasValueT basValSingle(float v); +BasValueT basValDouble(double v); +BasValueT basValString(BasStringT *s); +BasValueT basValStringFromC(const char *text); +BasValueT basValBool(bool v); + +// Copy a value (increments string refcount if applicable). +BasValueT basValCopy(BasValueT v); + +// Release a value (decrements string refcount if applicable). +void basValRelease(BasValueT *v); + +// Convert a value to a specific type. Returns the converted value. +// The original is NOT released -- caller manages lifetime. +BasValueT basValToInteger(BasValueT v); +BasValueT basValToLong(BasValueT v); +BasValueT basValToSingle(BasValueT v); +BasValueT basValToDouble(BasValueT v); +BasValueT basValToString(BasValueT v); +BasValueT basValToBool(BasValueT v); + +// Get the numeric value as a double (for mixed-type arithmetic). +double basValToNumber(BasValueT v); + +// Get the string representation. Returns a new ref-counted string. +BasStringT *basValFormatString(BasValueT v); + +// Check if a value is truthy (non-zero number, non-empty string). +bool basValIsTruthy(BasValueT v); + +// Compare two values. Returns -1, 0, or 1. +// Numeric types are compared numerically. Strings lexicographically. +int32_t basValCompare(BasValueT a, BasValueT b); + +// Compare two values case-insensitively (for OPTION COMPARE TEXT). +int32_t basValCompareCI(BasValueT a, BasValueT b); + +// Determine the common type for a binary operation (type promotion). +// Integer + Single -> Single, etc. +uint8_t basValPromoteType(uint8_t a, uint8_t b); + +#endif // DVXBASIC_VALUES_H diff --git a/dvxbasic/runtime/vm.c b/dvxbasic/runtime/vm.c new file mode 100644 index 0000000..6edf77e --- /dev/null +++ b/dvxbasic/runtime/vm.c @@ -0,0 +1,3514 @@ +// vm.c -- DVX BASIC virtual machine implementation +// +// Stack-based p-code interpreter. Executes one instruction per +// basVmStep() call, or runs until completion via basVmRun(). +// All I/O is through host-provided callbacks -- the VM itself +// has no platform dependencies. + +#include "vm.h" +#include "../compiler/opcodes.h" + +#include +#include +#include +#include +#include +#include +#include + +// ============================================================ +// Prototypes +// ============================================================ + +static BasCallFrameT *currentFrame(BasVmT *vm); +static void defaultPrint(void *ctx, const char *text, bool newline); +static BasVmResultE execArith(BasVmT *vm, uint8_t op); +static BasVmResultE execCompare(BasVmT *vm, uint8_t op); +static BasVmResultE execFileOp(BasVmT *vm, uint8_t op); +static BasVmResultE execLogical(BasVmT *vm, uint8_t op); +static BasVmResultE execMath(BasVmT *vm, uint8_t op); +static BasVmResultE execPrint(BasVmT *vm); +static BasVmResultE execStringOp(BasVmT *vm, uint8_t op); +static bool pop(BasVmT *vm, BasValueT *val); +static bool push(BasVmT *vm, BasValueT val); +static int16_t readInt16(BasVmT *vm); +static uint8_t readUint8(BasVmT *vm); +static uint16_t readUint16(BasVmT *vm); +static void runtimeError(BasVmT *vm, int32_t errNum, const char *msg); + + +// ============================================================ +// basVmCreate +// ============================================================ + +BasVmT *basVmCreate(void) { + BasVmT *vm = (BasVmT *)calloc(1, sizeof(BasVmT)); + + if (!vm) { + return NULL; + } + + vm->printFn = defaultPrint; + basStringSystemInit(); + return vm; +} + + +// ============================================================ +// basVmDestroy +// ============================================================ + +void basVmDestroy(BasVmT *vm) { + if (!vm) { + return; + } + + // Release stack values + for (int32_t i = 0; i < vm->sp; i++) { + basValRelease(&vm->stack[i]); + } + + // Release global variables + for (int32_t i = 0; i < BAS_VM_MAX_GLOBALS; i++) { + basValRelease(&vm->globals[i]); + } + + // Release call frame locals + for (int32_t d = 0; d < vm->callDepth; d++) { + for (int32_t i = 0; i < vm->callStack[d].localCount; i++) { + basValRelease(&vm->callStack[d].locals[i]); + } + } + + // Release FOR stack + for (int32_t i = 0; i < vm->forDepth; i++) { + basValRelease(&vm->forStack[i].limit); + basValRelease(&vm->forStack[i].step); + } + + // Close files + for (int32_t i = 0; i < BAS_VM_MAX_FILES; i++) { + if (vm->files[i].handle) { + fclose((FILE *)vm->files[i].handle); + } + } + + basStringSystemShutdown(); + free(vm); +} + + +// ============================================================ +// basVmGetError +// ============================================================ + +const char *basVmGetError(const BasVmT *vm) { + return vm->errorMsg; +} + + +// ============================================================ +// basVmLoadModule +// ============================================================ + +void basVmLoadModule(BasVmT *vm, BasModuleT *module) { + vm->module = module; + vm->pc = module->entryPoint; +} + + +// ============================================================ +// basVmPop +// ============================================================ + +bool basVmPop(BasVmT *vm, BasValueT *val) { + return pop(vm, val); +} + + +// ============================================================ +// basVmPush +// ============================================================ + +bool basVmPush(BasVmT *vm, BasValueT val) { + return push(vm, val); +} + + +// ============================================================ +// basVmReset +// ============================================================ + +void basVmReset(BasVmT *vm) { + for (int32_t i = 0; i < vm->sp; i++) { + basValRelease(&vm->stack[i]); + } + + for (int32_t i = 0; i < BAS_VM_MAX_GLOBALS; i++) { + basValRelease(&vm->globals[i]); + } + + vm->sp = 0; + vm->callDepth = 0; + vm->forDepth = 0; + vm->pc = vm->module ? vm->module->entryPoint : 0; + vm->running = false; + vm->yielded = false; + vm->dataPtr = 0; + vm->errorHandler = 0; + vm->errorNumber = 0; + vm->errorPc = 0; + vm->errorNextPc = 0; + vm->inErrorHandler = false; + vm->errorMsg[0] = '\0'; +} + + +// ============================================================ +// basVmRun +// ============================================================ + +BasVmResultE basVmRun(BasVmT *vm) { + vm->running = true; + vm->yielded = false; + + while (vm->running) { + // Save PC before each instruction for RESUME support + int32_t savedPc = vm->pc; + BasVmResultE result = basVmStep(vm); + + if (result != BAS_VM_OK) { + // If an error handler is set and this is a trappable error, + // jump to the handler instead of stopping execution + if (vm->errorHandler != 0 && !vm->inErrorHandler && result != BAS_VM_HALTED && result != BAS_VM_BAD_OPCODE) { + vm->errorPc = savedPc; + vm->errorNextPc = vm->pc; + vm->inErrorHandler = true; + vm->pc = vm->errorHandler; + continue; + } + + vm->running = false; + return result; + } + } + + return BAS_VM_HALTED; +} + + +// ============================================================ +// basVmSetDoEventsCallback +// ============================================================ + +void basVmSetDoEventsCallback(BasVmT *vm, BasDoEventsFnT fn, void *ctx) { + vm->doEventsFn = fn; + vm->doEventsCtx = ctx; +} + + +// ============================================================ +// basVmSetInputCallback +// ============================================================ + +void basVmSetInputCallback(BasVmT *vm, BasInputFnT fn, void *ctx) { + vm->inputFn = fn; + vm->inputCtx = ctx; +} + + +// ============================================================ +// basVmSetPrintCallback +// ============================================================ + +void basVmSetPrintCallback(BasVmT *vm, BasPrintFnT fn, void *ctx) { + vm->printFn = fn; + vm->printCtx = ctx; +} + + +// ============================================================ +// basVmStep -- execute one instruction +// ============================================================ + +BasVmResultE basVmStep(BasVmT *vm) { + if (!vm->module || vm->pc < 0 || vm->pc >= vm->module->codeLen) { + vm->running = false; + return BAS_VM_HALTED; + } + + uint8_t op = vm->module->code[vm->pc++]; + + switch (op) { + case OP_NOP: + break; + + // ============================================================ + // Stack operations + // ============================================================ + + case OP_PUSH_INT16: { + int16_t val = readInt16(vm); + + if (!push(vm, basValInteger(val))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_PUSH_INT32: { + int16_t lo = readInt16(vm); + int16_t hi = readInt16(vm); + int32_t val = ((int32_t)hi << 16) | (uint16_t)lo; + + if (!push(vm, basValLong(val))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_PUSH_FLT32: { + float val; + memcpy(&val, &vm->module->code[vm->pc], sizeof(float)); + vm->pc += sizeof(float); + + if (!push(vm, basValSingle(val))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_PUSH_FLT64: { + double val; + memcpy(&val, &vm->module->code[vm->pc], sizeof(double)); + vm->pc += sizeof(double); + + if (!push(vm, basValDouble(val))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_PUSH_STR: { + uint16_t idx = readUint16(vm); + + if (idx < (uint16_t)vm->module->constCount) { + if (!push(vm, basValString(vm->module->constants[idx]))) { + return BAS_VM_STACK_OVERFLOW; + } + } else { + if (!push(vm, basValStringFromC(""))) { + return BAS_VM_STACK_OVERFLOW; + } + } + + break; + } + + case OP_PUSH_TRUE: + if (!push(vm, basValBool(true))) { + return BAS_VM_STACK_OVERFLOW; + } + break; + + case OP_PUSH_FALSE: + if (!push(vm, basValBool(false))) { + return BAS_VM_STACK_OVERFLOW; + } + break; + + case OP_POP: { + BasValueT val; + + if (!pop(vm, &val)) { + return BAS_VM_STACK_UNDERFLOW; + } + + basValRelease(&val); + break; + } + + case OP_DUP: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + if (!push(vm, basValCopy(vm->stack[vm->sp - 1]))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + // ============================================================ + // Variable access + // ============================================================ + + case OP_LOAD_LOCAL: { + uint16_t idx = readUint16(vm); + BasCallFrameT *frame = currentFrame(vm); + + if (!frame || idx >= (uint16_t)frame->localCount) { + runtimeError(vm, 9, "Invalid local variable index"); + return BAS_VM_ERROR; + } + + if (!push(vm, basValCopy(frame->locals[idx]))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_STORE_LOCAL: { + uint16_t idx = readUint16(vm); + BasCallFrameT *frame = currentFrame(vm); + BasValueT val; + + if (!pop(vm, &val)) { + return BAS_VM_STACK_UNDERFLOW; + } + + if (!frame || idx >= (uint16_t)frame->localCount) { + basValRelease(&val); + runtimeError(vm, 9, "Invalid local variable index"); + return BAS_VM_ERROR; + } + + basValRelease(&frame->locals[idx]); + frame->locals[idx] = val; + break; + } + + case OP_LOAD_GLOBAL: { + uint16_t idx = readUint16(vm); + + if (idx >= BAS_VM_MAX_GLOBALS) { + runtimeError(vm, 9, "Invalid global variable index"); + return BAS_VM_ERROR; + } + + if (!push(vm, basValCopy(vm->globals[idx]))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_STORE_GLOBAL: { + uint16_t idx = readUint16(vm); + BasValueT val; + + if (!pop(vm, &val)) { + return BAS_VM_STACK_UNDERFLOW; + } + + if (idx >= BAS_VM_MAX_GLOBALS) { + basValRelease(&val); + runtimeError(vm, 9, "Invalid global variable index"); + return BAS_VM_ERROR; + } + + basValRelease(&vm->globals[idx]); + vm->globals[idx] = val; + break; + } + + // ============================================================ + // Arithmetic + // ============================================================ + + case OP_ADD_INT: + case OP_SUB_INT: + case OP_MUL_INT: + case OP_IDIV_INT: + case OP_MOD_INT: + case OP_ADD_FLT: + case OP_SUB_FLT: + case OP_MUL_FLT: + case OP_DIV_FLT: + case OP_POW: + return execArith(vm, op); + + case OP_NEG_INT: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + + if (top->type == BAS_TYPE_INTEGER) { + top->intVal = -top->intVal; + } else if (top->type == BAS_TYPE_LONG) { + top->longVal = -top->longVal; + } else { + double n = basValToNumber(*top); + basValRelease(top); + *top = basValDouble(-n); + } + + break; + } + + case OP_NEG_FLT: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + + if (top->type == BAS_TYPE_SINGLE) { + top->sngVal = -top->sngVal; + } else if (top->type == BAS_TYPE_DOUBLE) { + top->dblVal = -top->dblVal; + } else { + double n = basValToNumber(*top); + basValRelease(top); + *top = basValDouble(-n); + } + + break; + } + + // ============================================================ + // String operations + // ============================================================ + + case OP_STR_CONCAT: + case OP_STR_LEFT: + case OP_STR_RIGHT: + case OP_STR_MID: + case OP_STR_MID2: + case OP_STR_LEN: + case OP_STR_INSTR: + case OP_STR_INSTR3: + case OP_STR_UCASE: + case OP_STR_LCASE: + case OP_STR_TRIM: + case OP_STR_LTRIM: + case OP_STR_RTRIM: + case OP_STR_CHR: + case OP_STR_ASC: + case OP_STR_SPACE: + case OP_STR_FIXLEN: + case OP_STR_MID_ASGN: + return execStringOp(vm, op); + + // ============================================================ + // Comparison + // ============================================================ + + case OP_CMP_EQ: + case OP_CMP_NE: + case OP_CMP_LT: + case OP_CMP_GT: + case OP_CMP_LE: + case OP_CMP_GE: + return execCompare(vm, op); + + // ============================================================ + // Logical / bitwise + // ============================================================ + + case OP_AND: + case OP_OR: + case OP_NOT: + case OP_XOR: + case OP_EQV: + case OP_IMP: + return execLogical(vm, op); + + // ============================================================ + // Control flow + // ============================================================ + + case OP_JMP: { + int16_t offset = readInt16(vm); + vm->pc += offset; + break; + } + + case OP_JMP_TRUE: { + int16_t offset = readInt16(vm); + BasValueT val; + + if (!pop(vm, &val)) { + return BAS_VM_STACK_UNDERFLOW; + } + + if (basValIsTruthy(val)) { + vm->pc += offset; + } + + basValRelease(&val); + break; + } + + case OP_JMP_FALSE: { + int16_t offset = readInt16(vm); + BasValueT val; + + if (!pop(vm, &val)) { + return BAS_VM_STACK_UNDERFLOW; + } + + if (!basValIsTruthy(val)) { + vm->pc += offset; + } + + basValRelease(&val); + break; + } + + case OP_CALL: { + uint16_t addr = readUint16(vm); + uint8_t argc = readUint8(vm); + uint8_t baseSlot = readUint8(vm); + + if (vm->callDepth >= BAS_VM_CALL_STACK_SIZE) { + return BAS_VM_CALL_OVERFLOW; + } + + BasCallFrameT *frame = &vm->callStack[vm->callDepth++]; + frame->returnPc = vm->pc; + frame->localCount = BAS_VM_MAX_LOCALS; + + // Zero all local slots + memset(frame->locals, 0, sizeof(frame->locals)); + + // Pop arguments into locals starting at baseSlot (in reverse order) + for (int32_t i = baseSlot + argc - 1; i >= baseSlot; i--) { + if (!pop(vm, &frame->locals[i])) { + return BAS_VM_STACK_UNDERFLOW; + } + } + + vm->pc = addr; + break; + } + + case OP_RET: { + if (vm->callDepth <= 0) { + vm->running = false; + return BAS_VM_HALTED; + } + + BasCallFrameT *frame = &vm->callStack[--vm->callDepth]; + + // Release locals + for (int32_t i = 0; i < frame->localCount; i++) { + basValRelease(&frame->locals[i]); + } + + vm->pc = frame->returnPc; + break; + } + + case OP_RET_VAL: { + // Like RET but leaves TOS as the return value + BasValueT retVal; + + if (!pop(vm, &retVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + if (vm->callDepth <= 0) { + basValRelease(&retVal); + vm->running = false; + return BAS_VM_HALTED; + } + + BasCallFrameT *frame = &vm->callStack[--vm->callDepth]; + + for (int32_t i = 0; i < frame->localCount; i++) { + basValRelease(&frame->locals[i]); + } + + vm->pc = frame->returnPc; + + if (!push(vm, retVal)) { + basValRelease(&retVal); + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_GOSUB_RET: { + // GOSUB return: pop integer from eval stack, set PC to that value + BasValueT retAddr; + + if (!pop(vm, &retAddr)) { + return BAS_VM_STACK_UNDERFLOW; + } + + vm->pc = (int32_t)basValToNumber(retAddr); + basValRelease(&retAddr); + break; + } + + case OP_FOR_INIT: { + uint16_t varIdx = readUint16(vm); + uint8_t isLocal = readUint8(vm); + BasValueT stepVal; + BasValueT limitVal; + + if (!pop(vm, &stepVal) || !pop(vm, &limitVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + if (vm->forDepth >= BAS_VM_MAX_FOR_DEPTH) { + basValRelease(&stepVal); + basValRelease(&limitVal); + runtimeError(vm, 26, "FOR loop nesting too deep"); + return BAS_VM_ERROR; + } + + BasForStateT *fs = &vm->forStack[vm->forDepth++]; + fs->varIdx = varIdx; + fs->isLocal = (isLocal != 0); + fs->limit = limitVal; + fs->step = stepVal; + fs->loopTop = vm->pc; + break; + } + + case OP_FOR_NEXT: { + uint16_t varIdx = readUint16(vm); + uint8_t isLocal = readUint8(vm); + int16_t loopTopOffset = readInt16(vm); + + if (vm->forDepth <= 0) { + runtimeError(vm, 1, "NEXT without FOR"); + return BAS_VM_ERROR; + } + + BasForStateT *fs = &vm->forStack[vm->forDepth - 1]; + + if (fs->varIdx != (int32_t)varIdx) { + runtimeError(vm, 1, "NEXT variable mismatch"); + return BAS_VM_ERROR; + } + + // Get pointer to the loop variable (global or local) + BasValueT *varSlot; + + if (isLocal) { + BasCallFrameT *frame = currentFrame(vm); + + if (!frame || varIdx >= (uint16_t)frame->localCount) { + runtimeError(vm, 9, "Invalid local variable index"); + return BAS_VM_ERROR; + } + + varSlot = &frame->locals[varIdx]; + } else { + if (varIdx >= BAS_VM_MAX_GLOBALS) { + runtimeError(vm, 9, "Invalid global variable index"); + return BAS_VM_ERROR; + } + + varSlot = &vm->globals[varIdx]; + } + + // Increment: var = var + step + double varVal = basValToNumber(*varSlot); + double stepVal = basValToNumber(fs->step); + double limVal = basValToNumber(fs->limit); + varVal += stepVal; + + basValRelease(varSlot); + *varSlot = basValDouble(varVal); + + // Test: if step > 0 then continue while var <= limit + // if step < 0 then continue while var >= limit + bool cont; + + if (stepVal >= 0) { + cont = (varVal <= limVal); + } else { + cont = (varVal >= limVal); + } + + if (cont) { + vm->pc += loopTopOffset; + } else { + // Loop done -- pop FOR state + basValRelease(&fs->limit); + basValRelease(&fs->step); + vm->forDepth--; + } + + break; + } + + // ============================================================ + // Type conversion + // ============================================================ + + case OP_CONV_INT_FLT: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + double n = basValToNumber(*top); + basValRelease(top); + *top = basValSingle((float)n); + break; + } + + case OP_CONV_FLT_INT: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + BasValueT conv = basValToInteger(*top); + basValRelease(top); + *top = conv; + break; + } + + case OP_CONV_INT_STR: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + BasValueT conv = basValToString(*top); + basValRelease(top); + *top = conv; + break; + } + + case OP_CONV_STR_INT: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + BasValueT conv = basValToInteger(*top); + basValRelease(top); + *top = conv; + break; + } + + case OP_CONV_FLT_STR: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + BasValueT conv = basValToString(*top); + basValRelease(top); + *top = conv; + break; + } + + case OP_CONV_STR_FLT: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + BasValueT conv = basValToDouble(*top); + basValRelease(top); + *top = conv; + break; + } + + case OP_CONV_INT_LONG: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + BasValueT conv = basValToLong(*top); + basValRelease(top); + *top = conv; + break; + } + + // ============================================================ + // I/O + // ============================================================ + + case OP_PRINT: + return execPrint(vm); + + case OP_PRINT_NL: + if (vm->printFn) { + vm->printFn(vm->printCtx, "", true); + } + break; + + case OP_PRINT_TAB: + if (vm->printFn) { + vm->printFn(vm->printCtx, "\t", false); + } + break; + + case OP_PRINT_SPC: { + uint8_t n = readUint8(vm); + char spaces[256]; + int32_t count = n < 255 ? n : 255; + memset(spaces, ' ', count); + spaces[count] = '\0'; + + if (vm->printFn) { + vm->printFn(vm->printCtx, spaces, false); + } + + break; + } + + case OP_PRINT_SPC_N: { + BasValueT nVal; + + if (!pop(vm, &nVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t count = (int32_t)basValToNumber(nVal); + basValRelease(&nVal); + + if (count < 0) { + count = 0; + } + + if (count > 255) { + count = 255; + } + + char spaces[256]; + memset(spaces, ' ', count); + spaces[count] = '\0'; + + if (vm->printFn) { + vm->printFn(vm->printCtx, spaces, false); + } + + break; + } + + case OP_PRINT_TAB_N: { + BasValueT nVal; + + if (!pop(vm, &nVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t col = (int32_t)basValToNumber(nVal); + basValRelease(&nVal); + + if (col < 1) { + col = 1; + } + + if (col > 255) { + col = 255; + } + + // TAB outputs spaces to reach the specified column + // For simplicity, just output (col-1) spaces + char spaces[256]; + int32_t count = col - 1; + memset(spaces, ' ', count); + spaces[count] = '\0'; + + if (vm->printFn) { + vm->printFn(vm->printCtx, spaces, false); + } + + break; + } + + case OP_PRINT_USING: { + // Pop value and format string (format is below value on stack) + BasValueT val; + BasValueT fmtVal; + + if (!pop(vm, &val) || !pop(vm, &fmtVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT fmtStr = basValToString(fmtVal); + basValRelease(&fmtVal); + + const char *fmt = fmtStr.strVal->data; + int32_t fmtLen = fmtStr.strVal->len; + char buf[256]; + + if (val.type == BAS_TYPE_STRING) { + // String formatting + const char *src = val.strVal ? val.strVal->data : ""; + + if (fmtLen > 0 && fmt[0] == '!') { + // First character only + buf[0] = src[0] ? src[0] : ' '; + buf[1] = '\0'; + } else if (fmtLen > 0 && fmt[0] == '&') { + // Entire string + snprintf(buf, sizeof(buf), "%s", src); + } else if (fmtLen >= 2 && fmt[0] == '\\') { + // Fixed-width: count characters between backslashes + int32_t width = 2; + for (int32_t i = 1; i < fmtLen; i++) { + if (fmt[i] == '\\') { + width = i + 1; + break; + } + width = i + 2; + } + int32_t srcLen = (int32_t)strlen(src); + int32_t copyLen = srcLen < width ? srcLen : width; + memcpy(buf, src, copyLen); + for (int32_t i = copyLen; i < width; i++) { + buf[i] = ' '; + } + buf[width] = '\0'; + } else { + snprintf(buf, sizeof(buf), "%s", src); + } + } else { + // Numeric formatting + double n = basValToNumber(val); + + // Parse format flags + bool asteriskFill = false; // ** fill leading spaces with * + bool dollarFloat = false; // $$ floating dollar sign + bool plusAtStart = false; // + at start: always show sign + bool plusAtEnd = false; // + at end: always show sign + bool minusAtEnd = false; // - at end: show minus for negative + bool sciNotation = false; // ^^^^ scientific notation + bool hasDecimal = false; + bool hasComma = false; + int32_t digitsBefore = 0; + int32_t digitsAfter = 0; + + // Check for ** at start + if (fmtLen >= 2 && fmt[0] == '*' && fmt[1] == '*') { + asteriskFill = true; + } + + // Check for $$ (may follow **) + int32_t scanStart = 0; + if (asteriskFill) { + scanStart = 2; + } + if (fmtLen >= scanStart + 2 && fmt[scanStart] == '$' && fmt[scanStart + 1] == '$') { + dollarFloat = true; + } + + // Check for + at start or end + if (fmtLen > 0 && fmt[0] == '+') { + plusAtStart = true; + } + if (fmtLen > 0 && fmt[fmtLen - 1] == '+') { + plusAtEnd = true; + } + + // Check for - at end + if (fmtLen > 0 && fmt[fmtLen - 1] == '-') { + minusAtEnd = true; + } + + // Check for ^^^^ (scientific notation) + for (int32_t i = 0; i <= fmtLen - 4; i++) { + if (fmt[i] == '^' && fmt[i+1] == '^' && fmt[i+2] == '^' && fmt[i+3] == '^') { + sciNotation = true; + break; + } + } + + // Count # and 0 digits before and after decimal + for (int32_t i = 0; i < fmtLen; i++) { + if (fmt[i] == '.') { + hasDecimal = true; + } else if (fmt[i] == ',') { + hasComma = true; + } else if (fmt[i] == '#' || fmt[i] == '0') { + if (hasDecimal) { + digitsAfter++; + } else { + digitsBefore++; + } + } else if (fmt[i] == '*') { + if (!hasDecimal) { + digitsBefore++; + } + } + } + + if (sciNotation) { + // Scientific notation + char sciFmt[32]; + int32_t decimals = hasDecimal ? digitsAfter : 0; + snprintf(sciFmt, sizeof(sciFmt), "%%.%dE", decimals); + snprintf(buf, sizeof(buf), sciFmt, n); + } else { + // Standard formatting + bool isNeg = (n < 0); + double absN = isNeg ? -n : n; + int32_t decimals = hasDecimal ? digitsAfter : 0; + + char numBuf[128]; + snprintf(numBuf, sizeof(numBuf), "%.*f", decimals, absN); + + // Split into integer and decimal parts + char intPart[128]; + char decPart[128]; + intPart[0] = '\0'; + decPart[0] = '\0'; + char *dot = strchr(numBuf, '.'); + if (dot) { + int32_t intLen = (int32_t)(dot - numBuf); + memcpy(intPart, numBuf, intLen); + intPart[intLen] = '\0'; + strncpy(decPart, dot + 1, sizeof(decPart) - 1); + decPart[sizeof(decPart) - 1] = '\0'; + } else { + strncpy(intPart, numBuf, sizeof(intPart) - 1); + intPart[sizeof(intPart) - 1] = '\0'; + } + + // Apply thousands separator + char fmtIntPart[128]; + if (hasComma) { + int32_t srcLen = (int32_t)strlen(intPart); + int32_t dstIdx = 0; + for (int32_t i = 0; i < srcLen; i++) { + if (i > 0 && (srcLen - i) % 3 == 0) { + fmtIntPart[dstIdx++] = ','; + } + fmtIntPart[dstIdx++] = intPart[i]; + } + fmtIntPart[dstIdx] = '\0'; + } else { + strncpy(fmtIntPart, intPart, sizeof(fmtIntPart) - 1); + fmtIntPart[sizeof(fmtIntPart) - 1] = '\0'; + } + + // Build result + int32_t idx = 0; + + // Sign prefix + if (plusAtStart) { + buf[idx++] = isNeg ? '-' : '+'; + } else if (isNeg && !minusAtEnd) { + buf[idx++] = '-'; + } + + // Dollar sign + if (dollarFloat) { + buf[idx++] = '$'; + } + + // Pad leading + int32_t intLen = (int32_t)strlen(fmtIntPart); + int32_t padNeeded = digitsBefore - intLen; + char fillChar = asteriskFill ? '*' : ' '; + + for (int32_t i = 0; i < padNeeded; i++) { + buf[idx++] = fillChar; + } + + // Integer part + for (int32_t i = 0; fmtIntPart[i]; i++) { + buf[idx++] = fmtIntPart[i]; + } + + // Decimal part + if (hasDecimal) { + buf[idx++] = '.'; + for (int32_t i = 0; i < decimals; i++) { + buf[idx++] = decPart[i] ? decPart[i] : '0'; + } + } + + // Trailing sign + if (plusAtEnd) { + buf[idx++] = isNeg ? '-' : '+'; + } else if (minusAtEnd) { + buf[idx++] = isNeg ? '-' : ' '; + } + + buf[idx] = '\0'; + } + } + + basValRelease(&val); + + // Push format string back (PRINT USING reuses it for multiple values) + if (!push(vm, fmtStr)) { + return BAS_VM_STACK_OVERFLOW; + } + + // Push formatted result + if (!push(vm, basValStringFromC(buf))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_INPUT: { + char buf[1024]; + buf[0] = '\0'; + + if (vm->inputFn) { + if (!vm->inputFn(vm->inputCtx, "? ", buf, sizeof(buf))) { + buf[0] = '\0'; + } + } + + if (!push(vm, basValStringFromC(buf))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + // ============================================================ + // Math built-ins + // ============================================================ + + case OP_MATH_ABS: + case OP_MATH_INT: + case OP_MATH_FIX: + case OP_MATH_SGN: + case OP_MATH_SQR: + case OP_MATH_SIN: + case OP_MATH_COS: + case OP_MATH_TAN: + case OP_MATH_ATN: + case OP_MATH_LOG: + case OP_MATH_EXP: + case OP_MATH_RND: + case OP_MATH_RANDOMIZE: + return execMath(vm, op); + + // ============================================================ + // Conversion built-ins + // ============================================================ + + case OP_STR_VAL: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + double n = basValToNumber(*top); + basValRelease(top); + *top = basValDouble(n); + break; + } + + case OP_STR_STRF: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + BasStringT *s = basValFormatString(*top); + basValRelease(top); + top->type = BAS_TYPE_STRING; + top->strVal = s; + break; + } + + case OP_STR_HEX: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + int32_t n = (int32_t)basValToNumber(*top); + char buf[16]; + snprintf(buf, sizeof(buf), "%X", (unsigned int)n); + basValRelease(top); + *top = basValStringFromC(buf); + break; + } + + case OP_STR_STRING: { + // STRING$(n, char) + BasValueT charVal; + BasValueT countVal; + + if (!pop(vm, &charVal) || !pop(vm, &countVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t count = (int32_t)basValToNumber(countVal); + basValRelease(&countVal); + + char ch; + + if (charVal.type == BAS_TYPE_STRING && charVal.strVal && charVal.strVal->len > 0) { + ch = charVal.strVal->data[0]; + } else { + ch = (char)(int32_t)basValToNumber(charVal); + } + + basValRelease(&charVal); + + if (count < 0) { + count = 0; + } + + if (count > 32767) { + count = 32767; + } + + BasStringT *s = basStringAlloc(count + 1); + memset(s->data, ch, count); + s->data[count] = '\0'; + s->len = count; + + if (!push(vm, basValString(s))) { + basStringUnref(s); + return BAS_VM_STACK_OVERFLOW; + } + + basStringUnref(s); + break; + } + + // ============================================================ + // Extended built-ins + // ============================================================ + + case OP_MATH_TIMER: { + // Push seconds since midnight as a double + time_t now = time(NULL); + struct tm *t = localtime(&now); + double secs = (double)t->tm_hour * 3600.0 + (double)t->tm_min * 60.0 + (double)t->tm_sec; + + if (!push(vm, basValDouble(secs))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_DATE_STR: { + // Push DATE$ as "MM-DD-YYYY" + time_t now = time(NULL); + struct tm *t = localtime(&now); + char buf[16]; + snprintf(buf, sizeof(buf), "%02d-%02d-%04d", t->tm_mon + 1, t->tm_mday, t->tm_year + 1900); + + if (!push(vm, basValStringFromC(buf))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_TIME_STR: { + // Push TIME$ as "HH:MM:SS" + time_t now = time(NULL); + struct tm *t = localtime(&now); + char buf[16]; + snprintf(buf, sizeof(buf), "%02d:%02d:%02d", t->tm_hour, t->tm_min, t->tm_sec); + + if (!push(vm, basValStringFromC(buf))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_SLEEP: { + // Pop seconds, sleep + BasValueT val; + + if (!pop(vm, &val)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t secs = (int32_t)basValToNumber(val); + basValRelease(&val); + + if (secs > 0) { + sleep((unsigned int)secs); + } + + break; + } + + case OP_ENVIRON: { + // Pop env var name, push value string + BasValueT nameVal; + + if (!pop(vm, &nameVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT nameStr = basValToString(nameVal); + basValRelease(&nameVal); + + const char *envVal = getenv(nameStr.strVal->data); + basValRelease(&nameStr); + + if (envVal == NULL) { + envVal = ""; + } + + if (!push(vm, basValStringFromC(envVal))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + // ============================================================ + // File I/O + // ============================================================ + + case OP_FILE_OPEN: + case OP_FILE_CLOSE: + case OP_FILE_PRINT: + case OP_FILE_INPUT: + case OP_FILE_EOF: + case OP_FILE_LINE_INPUT: + case OP_FILE_WRITE: + case OP_FILE_WRITE_SEP: + case OP_FILE_WRITE_NL: + case OP_FILE_GET: + case OP_FILE_PUT: + case OP_FILE_SEEK: + case OP_FILE_LOF: + case OP_FILE_LOC: + case OP_FILE_FREEFILE: + case OP_FILE_INPUT_N: + return execFileOp(vm, op); + + // ============================================================ + // DoEvents + // ============================================================ + + case OP_DO_EVENTS: + if (vm->doEventsFn) { + if (!vm->doEventsFn(vm->doEventsCtx)) { + vm->running = false; + return BAS_VM_HALTED; + } + } + break; + + // ============================================================ + // Error handling + // ============================================================ + + case OP_ON_ERROR: { + int16_t handler = readInt16(vm); + vm->errorHandler = (handler == 0) ? 0 : vm->pc + handler; + break; + } + + case OP_ERR_NUM: + if (!push(vm, basValInteger((int16_t)vm->errorNumber))) { + return BAS_VM_STACK_OVERFLOW; + } + break; + + case OP_ERR_CLEAR: + vm->errorNumber = 0; + vm->errorMsg[0] = '\0'; + break; + + case OP_RESUME: + // RESUME -- re-execute the statement that caused the error + vm->pc = vm->errorPc; + vm->errorNumber = 0; + vm->errorMsg[0] = '\0'; + vm->inErrorHandler = false; + break; + + case OP_RESUME_NEXT: + // RESUME NEXT -- continue at next statement after the error + vm->pc = vm->errorNextPc; + vm->errorNumber = 0; + vm->errorMsg[0] = '\0'; + vm->inErrorHandler = false; + break; + + // ============================================================ + // Array / UDT operations + // ============================================================ + + case OP_DIM_ARRAY: { + uint8_t dims = readUint8(vm); + uint8_t elementType = readUint8(vm); + + // dims=0 with elementType=BAS_TYPE_UDT means allocate a UDT instance + if (dims == 0 && elementType == BAS_TYPE_UDT) { + BasValueT fieldCountVal; + BasValueT typeIdVal; + + if (!pop(vm, &fieldCountVal) || !pop(vm, &typeIdVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t fieldCount = (int32_t)basValToNumber(fieldCountVal); + int32_t typeId = (int32_t)basValToNumber(typeIdVal); + basValRelease(&fieldCountVal); + basValRelease(&typeIdVal); + + BasUdtT *udt = basUdtNew(typeId, fieldCount); + + if (!udt) { + runtimeError(vm, 7, "Out of memory allocating TYPE"); + return BAS_VM_OUT_OF_MEMORY; + } + + BasValueT udtVal; + udtVal.type = BAS_TYPE_UDT; + udtVal.udtVal = udt; + + if (!push(vm, udtVal)) { + basUdtFree(udt); + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + // Normal array allocation: parser pushes (lbound, ubound) pairs per dim + int32_t lbounds[BAS_ARRAY_MAX_DIMS]; + int32_t ubounds[BAS_ARRAY_MAX_DIMS]; + + // Pop bounds in reverse order (last dim first) + for (int32_t d = dims - 1; d >= 0; d--) { + BasValueT ubVal; + BasValueT lbVal; + + if (!pop(vm, &ubVal) || !pop(vm, &lbVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + ubounds[d] = (int32_t)basValToNumber(ubVal); + lbounds[d] = (int32_t)basValToNumber(lbVal); + basValRelease(&ubVal); + basValRelease(&lbVal); + } + + BasArrayT *arr = basArrayNew(dims, lbounds, ubounds, elementType); + + if (!arr) { + runtimeError(vm, 7, "Out of memory allocating array"); + return BAS_VM_OUT_OF_MEMORY; + } + + BasValueT arrVal; + arrVal.type = BAS_TYPE_ARRAY; + arrVal.arrVal = arr; + + if (!push(vm, arrVal)) { + basArrayFree(arr); + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_LOAD_ARRAY: { + uint8_t dims = readUint8(vm); + int32_t indices[BAS_ARRAY_MAX_DIMS]; + + // Pop indices in reverse order + for (int32_t d = dims - 1; d >= 0; d--) { + BasValueT idxVal; + + if (!pop(vm, &idxVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + indices[d] = (int32_t)basValToNumber(idxVal); + basValRelease(&idxVal); + } + + // Pop array reference + BasValueT arrRef; + + if (!pop(vm, &arrRef)) { + return BAS_VM_STACK_UNDERFLOW; + } + + if (arrRef.type != BAS_TYPE_ARRAY || !arrRef.arrVal) { + basValRelease(&arrRef); + runtimeError(vm, 13, "Not an array"); + return BAS_VM_TYPE_MISMATCH; + } + + int32_t flatIdx = basArrayIndex(arrRef.arrVal, indices, dims); + + if (flatIdx < 0) { + basValRelease(&arrRef); + runtimeError(vm, 9, "Subscript out of range"); + return BAS_VM_SUBSCRIPT_RANGE; + } + + BasValueT elem = basValCopy(arrRef.arrVal->elements[flatIdx]); + basValRelease(&arrRef); + + if (!push(vm, elem)) { + basValRelease(&elem); + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_STORE_ARRAY: { + uint8_t dims = readUint8(vm); + + // Pop value to store + BasValueT storeVal; + + if (!pop(vm, &storeVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + // Pop indices in reverse order + int32_t indices[BAS_ARRAY_MAX_DIMS]; + + for (int32_t d = dims - 1; d >= 0; d--) { + BasValueT idxVal; + + if (!pop(vm, &idxVal)) { + basValRelease(&storeVal); + return BAS_VM_STACK_UNDERFLOW; + } + + indices[d] = (int32_t)basValToNumber(idxVal); + basValRelease(&idxVal); + } + + // Pop array reference + BasValueT arrRef; + + if (!pop(vm, &arrRef)) { + basValRelease(&storeVal); + return BAS_VM_STACK_UNDERFLOW; + } + + if (arrRef.type != BAS_TYPE_ARRAY || !arrRef.arrVal) { + basValRelease(&arrRef); + basValRelease(&storeVal); + runtimeError(vm, 13, "Not an array"); + return BAS_VM_TYPE_MISMATCH; + } + + int32_t flatIdx = basArrayIndex(arrRef.arrVal, indices, dims); + + if (flatIdx < 0) { + basValRelease(&arrRef); + basValRelease(&storeVal); + runtimeError(vm, 9, "Subscript out of range"); + return BAS_VM_SUBSCRIPT_RANGE; + } + + basValRelease(&arrRef.arrVal->elements[flatIdx]); + arrRef.arrVal->elements[flatIdx] = storeVal; + basValRelease(&arrRef); + break; + } + + case OP_REDIM: { + uint8_t dims = readUint8(vm); + uint8_t preserve = readUint8(vm); + + int32_t lbounds[BAS_ARRAY_MAX_DIMS]; + int32_t ubounds[BAS_ARRAY_MAX_DIMS]; + + for (int32_t d = dims - 1; d >= 0; d--) { + BasValueT ubVal; + BasValueT lbVal; + + if (!pop(vm, &ubVal) || !pop(vm, &lbVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + ubounds[d] = (int32_t)basValToNumber(ubVal); + lbounds[d] = (int32_t)basValToNumber(lbVal); + basValRelease(&ubVal); + basValRelease(&lbVal); + } + + // Pop old array reference + BasValueT oldRef; + + if (!pop(vm, &oldRef)) { + return BAS_VM_STACK_UNDERFLOW; + } + + uint8_t elementType = BAS_TYPE_INTEGER; + + if (oldRef.type == BAS_TYPE_ARRAY && oldRef.arrVal) { + elementType = oldRef.arrVal->elementType; + } + + BasArrayT *newArr = basArrayNew(dims, lbounds, ubounds, elementType); + + if (!newArr) { + basValRelease(&oldRef); + runtimeError(vm, 7, "Out of memory in REDIM"); + return BAS_VM_OUT_OF_MEMORY; + } + + // Copy old elements if PRESERVE + if (preserve && oldRef.type == BAS_TYPE_ARRAY && oldRef.arrVal) { + int32_t copyCount = oldRef.arrVal->totalElements; + + if (copyCount > newArr->totalElements) { + copyCount = newArr->totalElements; + } + + for (int32_t i = 0; i < copyCount; i++) { + newArr->elements[i] = basValCopy(oldRef.arrVal->elements[i]); + } + } + + basValRelease(&oldRef); + + BasValueT arrVal; + arrVal.type = BAS_TYPE_ARRAY; + arrVal.arrVal = newArr; + + if (!push(vm, arrVal)) { + basArrayFree(newArr); + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_ERASE: { + BasValueT arrRef; + + if (!pop(vm, &arrRef)) { + return BAS_VM_STACK_UNDERFLOW; + } + + basValRelease(&arrRef); + + // Push an empty/zero value to store back + BasValueT empty; + memset(&empty, 0, sizeof(empty)); + + if (!push(vm, empty)) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_LBOUND: { + uint8_t dim = readUint8(vm); + BasValueT arrRef; + + if (!pop(vm, &arrRef)) { + return BAS_VM_STACK_UNDERFLOW; + } + + if (arrRef.type != BAS_TYPE_ARRAY || !arrRef.arrVal) { + basValRelease(&arrRef); + runtimeError(vm, 13, "Not an array"); + return BAS_VM_TYPE_MISMATCH; + } + + if (dim < 1 || dim > (uint8_t)arrRef.arrVal->dims) { + basValRelease(&arrRef); + runtimeError(vm, 9, "Invalid dimension for LBOUND"); + return BAS_VM_SUBSCRIPT_RANGE; + } + + int32_t lb = arrRef.arrVal->lbound[dim - 1]; + basValRelease(&arrRef); + + if (!push(vm, basValLong(lb))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_UBOUND: { + uint8_t dim = readUint8(vm); + BasValueT arrRef; + + if (!pop(vm, &arrRef)) { + return BAS_VM_STACK_UNDERFLOW; + } + + if (arrRef.type != BAS_TYPE_ARRAY || !arrRef.arrVal) { + basValRelease(&arrRef); + runtimeError(vm, 13, "Not an array"); + return BAS_VM_TYPE_MISMATCH; + } + + if (dim < 1 || dim > (uint8_t)arrRef.arrVal->dims) { + basValRelease(&arrRef); + runtimeError(vm, 9, "Invalid dimension for UBOUND"); + return BAS_VM_SUBSCRIPT_RANGE; + } + + int32_t ub = arrRef.arrVal->ubound[dim - 1]; + basValRelease(&arrRef); + + if (!push(vm, basValLong(ub))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_LOAD_FIELD: { + uint16_t fieldIdx = readUint16(vm); + BasValueT udtRef; + + if (!pop(vm, &udtRef)) { + return BAS_VM_STACK_UNDERFLOW; + } + + if (udtRef.type != BAS_TYPE_UDT || !udtRef.udtVal) { + basValRelease(&udtRef); + runtimeError(vm, 13, "Not a TYPE instance"); + return BAS_VM_TYPE_MISMATCH; + } + + if (fieldIdx >= (uint16_t)udtRef.udtVal->fieldCount) { + basValRelease(&udtRef); + runtimeError(vm, 9, "Invalid field index"); + return BAS_VM_ERROR; + } + + BasValueT fieldVal = basValCopy(udtRef.udtVal->fields[fieldIdx]); + basValRelease(&udtRef); + + if (!push(vm, fieldVal)) { + basValRelease(&fieldVal); + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_STORE_FIELD: { + uint16_t fieldIdx = readUint16(vm); + BasValueT storeVal; + + if (!pop(vm, &storeVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT udtRef; + + if (!pop(vm, &udtRef)) { + basValRelease(&storeVal); + return BAS_VM_STACK_UNDERFLOW; + } + + if (udtRef.type != BAS_TYPE_UDT || !udtRef.udtVal) { + basValRelease(&udtRef); + basValRelease(&storeVal); + runtimeError(vm, 13, "Not a TYPE instance"); + return BAS_VM_TYPE_MISMATCH; + } + + if (fieldIdx >= (uint16_t)udtRef.udtVal->fieldCount) { + basValRelease(&udtRef); + basValRelease(&storeVal); + runtimeError(vm, 9, "Invalid field index"); + return BAS_VM_ERROR; + } + + basValRelease(&udtRef.udtVal->fields[fieldIdx]); + udtRef.udtVal->fields[fieldIdx] = storeVal; + basValRelease(&udtRef); + break; + } + + // ============================================================ + // DATA/READ/RESTORE + // ============================================================ + + case OP_READ_DATA: { + if (!vm->module || vm->dataPtr >= vm->module->dataCount) { + runtimeError(vm, 4, "Out of DATA"); + return BAS_VM_ERROR; + } + + if (!push(vm, basValCopy(vm->module->dataPool[vm->dataPtr++]))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_RESTORE: + vm->dataPtr = 0; + break; + + // ============================================================ + // FORMAT$ + // ============================================================ + + case OP_FORMAT: { + // Pop format string, then value + BasValueT fmtVal; + BasValueT val; + + if (!pop(vm, &fmtVal) || !pop(vm, &val)) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT fmtStr = basValToString(fmtVal); + basValRelease(&fmtVal); + + const char *fmt = fmtStr.strVal->data; + int32_t fmtLen = fmtStr.strVal->len; + double n = basValToNumber(val); + basValRelease(&val); + + char buf[256]; + buf[0] = '\0'; + + // Check for "percent" format + bool isPercent = false; + if (fmtLen == 7) { + isPercent = true; + const char *pct = "PERCENT"; + for (int32_t i = 0; i < 7; i++) { + if (toupper((unsigned char)fmt[i]) != pct[i]) { + isPercent = false; + break; + } + } + } + if (isPercent) { + snprintf(buf, sizeof(buf), "%.0f%%", n * 100.0); + } else { + // Count format characters + int32_t hashBefore = 0; + int32_t zeroBefore = 0; + int32_t hashAfter = 0; + int32_t zeroAfter = 0; + bool hasDecimal = false; + bool hasComma = false; + bool plusStart = false; + bool plusEnd = false; + bool minusEnd = false; + + for (int32_t i = 0; i < fmtLen; i++) { + if (fmt[i] == '+' && i == 0) { + plusStart = true; + } else if (fmt[i] == '+' && i == fmtLen - 1) { + plusEnd = true; + } else if (fmt[i] == '-' && i == fmtLen - 1) { + minusEnd = true; + } else if (fmt[i] == '.') { + hasDecimal = true; + } else if (fmt[i] == ',') { + hasComma = true; + } else if (fmt[i] == '#') { + if (hasDecimal) { + hashAfter++; + } else { + hashBefore++; + } + } else if (fmt[i] == '0') { + if (hasDecimal) { + zeroAfter++; + } else { + zeroBefore++; + } + } + } + + int32_t decimals = hashAfter + zeroAfter; + bool isNeg = (n < 0); + double absN = isNeg ? -n : n; + + // Format the number + char numBuf[128]; + + if (hasDecimal) { + snprintf(numBuf, sizeof(numBuf), "%.*f", decimals, absN); + } else { + snprintf(numBuf, sizeof(numBuf), "%.0f", absN); + } + + // Split into integer and decimal parts + char intPart[128]; + char decPart[128]; + intPart[0] = '\0'; + decPart[0] = '\0'; + + char *dot = strchr(numBuf, '.'); + + if (dot) { + int32_t intLen = (int32_t)(dot - numBuf); + memcpy(intPart, numBuf, intLen); + intPart[intLen] = '\0'; + strncpy(decPart, dot + 1, sizeof(decPart) - 1); + decPart[sizeof(decPart) - 1] = '\0'; + } else { + strncpy(intPart, numBuf, sizeof(intPart) - 1); + intPart[sizeof(intPart) - 1] = '\0'; + } + + // Apply thousands separator + char fmtIntPart[128]; + + if (hasComma) { + int32_t srcLen = (int32_t)strlen(intPart); + int32_t dstIdx = 0; + + for (int32_t i = 0; i < srcLen; i++) { + if (i > 0 && (srcLen - i) % 3 == 0) { + fmtIntPart[dstIdx++] = ','; + } + fmtIntPart[dstIdx++] = intPart[i]; + } + + fmtIntPart[dstIdx] = '\0'; + } else { + strncpy(fmtIntPart, intPart, sizeof(fmtIntPart) - 1); + fmtIntPart[sizeof(fmtIntPart) - 1] = '\0'; + } + + // Pad integer part with leading zeros if format has 0's + int32_t totalIntDigits = hashBefore + zeroBefore; + int32_t curIntLen = (int32_t)strlen(fmtIntPart); + + // Build result + int32_t idx = 0; + + // Sign prefix + if (plusStart || plusEnd) { + if (isNeg) { + buf[idx++] = '-'; + } else if (plusStart) { + buf[idx++] = '+'; + } + } else if (isNeg) { + buf[idx++] = '-'; + } + + // Pad with leading spaces or zeros + int32_t padNeeded = totalIntDigits - curIntLen; + + for (int32_t i = 0; i < padNeeded; i++) { + if (i < padNeeded - (int32_t)strlen(intPart)) { + // Positions before the number + if (zeroBefore > 0) { + buf[idx++] = '0'; + } else { + buf[idx++] = ' '; + } + } else { + buf[idx++] = '0'; + } + } + + // Integer part + for (int32_t i = 0; fmtIntPart[i]; i++) { + buf[idx++] = fmtIntPart[i]; + } + + // Decimal part + if (hasDecimal) { + buf[idx++] = '.'; + for (int32_t i = 0; decPart[i] && i < decimals; i++) { + buf[idx++] = decPart[i]; + } + } + + // Trailing sign + if (plusEnd && !isNeg) { + buf[idx++] = '+'; + } else if (minusEnd && isNeg) { + buf[idx++] = '-'; + } else if (minusEnd && !isNeg) { + buf[idx++] = ' '; + } + + buf[idx] = '\0'; + } + + basValRelease(&fmtStr); + + if (!push(vm, basValStringFromC(buf))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + // ============================================================ + // SHELL + // ============================================================ + + case OP_SHELL: { + BasValueT cmdVal; + + if (!pop(vm, &cmdVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT cmdStr = basValToString(cmdVal); + basValRelease(&cmdVal); + + int32_t result = 0; + + if (cmdStr.strVal && cmdStr.strVal->len > 0) { + result = system(cmdStr.strVal->data); + } + + basValRelease(&cmdStr); + + if (!push(vm, basValInteger((int16_t)result))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + // ============================================================ + // COMPARE_MODE + // ============================================================ + + case OP_COMPARE_MODE: { + uint8_t mode = readUint8(vm); + vm->compareTextMode = (mode != 0); + break; + } + + // ============================================================ + // Halt + // ============================================================ + + case OP_HALT: + vm->running = false; + return BAS_VM_HALTED; + + default: + runtimeError(vm, 51, "Bad opcode"); + return BAS_VM_BAD_OPCODE; + } + + return BAS_VM_OK; +} + + +// ============================================================ +// currentFrame +// ============================================================ + +static BasCallFrameT *currentFrame(BasVmT *vm) { + if (vm->callDepth <= 0) { + // Module-level: use callStack[0] as implicit main frame + return &vm->callStack[0]; + } + + return &vm->callStack[vm->callDepth - 1]; +} + + +// ============================================================ +// defaultPrint +// ============================================================ + +static void defaultPrint(void *ctx, const char *text, bool newline) { + (void)ctx; + fputs(text, stdout); + + if (newline) { + fputc('\n', stdout); + } +} + + +// ============================================================ +// execArith +// ============================================================ + +static BasVmResultE execArith(BasVmT *vm, uint8_t op) { + BasValueT b; + BasValueT a; + + if (!pop(vm, &b) || !pop(vm, &a)) { + return BAS_VM_STACK_UNDERFLOW; + } + + double na = basValToNumber(a); + double nb = basValToNumber(b); + basValRelease(&a); + basValRelease(&b); + + double result; + + switch (op) { + case OP_ADD_INT: + case OP_ADD_FLT: + result = na + nb; + break; + + case OP_SUB_INT: + case OP_SUB_FLT: + result = na - nb; + break; + + case OP_MUL_INT: + case OP_MUL_FLT: + result = na * nb; + break; + + case OP_IDIV_INT: + if ((int32_t)nb == 0) { + runtimeError(vm, 11, "Division by zero"); + return BAS_VM_DIV_BY_ZERO; + } + + result = (double)((int32_t)na / (int32_t)nb); + break; + + case OP_DIV_FLT: + if (nb == 0.0) { + runtimeError(vm, 11, "Division by zero"); + return BAS_VM_DIV_BY_ZERO; + } + + result = na / nb; + break; + + case OP_MOD_INT: + if ((int32_t)nb == 0) { + runtimeError(vm, 11, "Division by zero"); + return BAS_VM_DIV_BY_ZERO; + } + + result = (double)((int32_t)na % (int32_t)nb); + break; + + case OP_POW: + result = pow(na, nb); + break; + + default: + result = 0.0; + break; + } + + // Return appropriate type + if (op == OP_ADD_INT || op == OP_SUB_INT || op == OP_MUL_INT || op == OP_IDIV_INT || op == OP_MOD_INT) { + if (result >= -32768.0 && result <= 32767.0) { + push(vm, basValInteger((int16_t)result)); + } else if (result >= -2147483648.0 && result <= 2147483647.0) { + push(vm, basValLong((int32_t)result)); + } else { + push(vm, basValDouble(result)); + } + } else { + push(vm, basValDouble(result)); + } + + return BAS_VM_OK; +} + + +// ============================================================ +// execCompare +// ============================================================ + +static BasVmResultE execCompare(BasVmT *vm, uint8_t op) { + BasValueT b; + BasValueT a; + + if (!pop(vm, &b) || !pop(vm, &a)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t cmp = vm->compareTextMode ? basValCompareCI(a, b) : basValCompare(a, b); + basValRelease(&a); + basValRelease(&b); + + bool result; + + switch (op) { + case OP_CMP_EQ: result = (cmp == 0); break; + case OP_CMP_NE: result = (cmp != 0); break; + case OP_CMP_LT: result = (cmp < 0); break; + case OP_CMP_GT: result = (cmp > 0); break; + case OP_CMP_LE: result = (cmp <= 0); break; + case OP_CMP_GE: result = (cmp >= 0); break; + default: result = false; break; + } + + push(vm, basValBool(result)); + return BAS_VM_OK; +} + + +// ============================================================ +// execFileOp +// ============================================================ + +// File mode constants (matches compiler/parser.c emission) +#define FILE_MODE_INPUT 1 +#define FILE_MODE_OUTPUT 2 +#define FILE_MODE_APPEND 3 +#define FILE_MODE_RANDOM 4 +#define FILE_MODE_BINARY 5 + +static BasVmResultE execFileOp(BasVmT *vm, uint8_t op) { + switch (op) { + case OP_FILE_OPEN: { + uint8_t mode = readUint8(vm); + BasValueT channelVal; + BasValueT filenameVal; + + if (!pop(vm, &channelVal) || !pop(vm, &filenameVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + basValRelease(&channelVal); + + BasValueT fnStr = basValToString(filenameVal); + basValRelease(&filenameVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES) { + basValRelease(&fnStr); + runtimeError(vm, 52, "Bad file channel number"); + return BAS_VM_FILE_ERROR; + } + + // Close existing file on this channel + if (vm->files[channel].handle) { + fclose((FILE *)vm->files[channel].handle); + vm->files[channel].handle = NULL; + vm->files[channel].mode = 0; + } + + const char *modeStr; + + switch (mode) { + case FILE_MODE_INPUT: + modeStr = "r"; + break; + case FILE_MODE_OUTPUT: + modeStr = "w"; + break; + case FILE_MODE_APPEND: + modeStr = "a"; + break; + case FILE_MODE_RANDOM: + case FILE_MODE_BINARY: + modeStr = "r+b"; + break; + default: + basValRelease(&fnStr); + runtimeError(vm, 54, "Bad file mode"); + return BAS_VM_FILE_ERROR; + } + + // For RANDOM/BINARY: create file if it doesn't exist, then reopen r+b + if (mode == FILE_MODE_RANDOM || mode == FILE_MODE_BINARY) { + FILE *test = fopen(fnStr.strVal->data, "r"); + if (!test) { + // Create the file + test = fopen(fnStr.strVal->data, "w+b"); + if (test) { + fclose(test); + } + } else { + fclose(test); + } + } + + FILE *fp = fopen(fnStr.strVal->data, modeStr); + basValRelease(&fnStr); + + if (!fp) { + runtimeError(vm, 53, "File not found or cannot open"); + return BAS_VM_FILE_ERROR; + } + + vm->files[channel].handle = fp; + vm->files[channel].mode = mode; + break; + } + + case OP_FILE_CLOSE: { + BasValueT channelVal; + + if (!pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + basValRelease(&channelVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES) { + runtimeError(vm, 52, "Bad file channel number"); + return BAS_VM_FILE_ERROR; + } + + if (vm->files[channel].handle) { + fclose((FILE *)vm->files[channel].handle); + vm->files[channel].handle = NULL; + vm->files[channel].mode = 0; + } + + break; + } + + case OP_FILE_PRINT: { + BasValueT val; + BasValueT channelVal; + + if (!pop(vm, &val) || !pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + basValRelease(&channelVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + basValRelease(&val); + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + BasStringT *s = basValFormatString(val); + basValRelease(&val); + + if (s) { + fputs(s->data, (FILE *)vm->files[channel].handle); + fputc('\n', (FILE *)vm->files[channel].handle); + basStringUnref(s); + } + + break; + } + + case OP_FILE_INPUT: { + BasValueT channelVal; + + if (!pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + basValRelease(&channelVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + char buf[1024]; + buf[0] = '\0'; + + if (fgets(buf, sizeof(buf), (FILE *)vm->files[channel].handle)) { + // Strip trailing newline + int32_t len = (int32_t)strlen(buf); + + while (len > 0 && (buf[len - 1] == '\n' || buf[len - 1] == '\r')) { + buf[--len] = '\0'; + } + } + + if (!push(vm, basValStringFromC(buf))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_FILE_EOF: { + BasValueT channelVal; + + if (!pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + basValRelease(&channelVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + // Peek ahead to detect EOF before the next read + FILE *fp = (FILE *)vm->files[channel].handle; + int ch = fgetc(fp); + bool isEof; + + if (ch == EOF) { + isEof = true; + } else { + ungetc(ch, fp); + isEof = false; + } + + if (!push(vm, basValBool(isEof))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_FILE_LINE_INPUT: { + BasValueT channelVal; + + if (!pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + basValRelease(&channelVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + char buf[1024]; + buf[0] = '\0'; + + if (fgets(buf, sizeof(buf), (FILE *)vm->files[channel].handle)) { + int32_t len = (int32_t)strlen(buf); + + while (len > 0 && (buf[len - 1] == '\n' || buf[len - 1] == '\r')) { + buf[--len] = '\0'; + } + } + + if (!push(vm, basValStringFromC(buf))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_FILE_WRITE: { + // Pop value and channel, write value in WRITE format + BasValueT val; + BasValueT channelVal; + + if (!pop(vm, &val) || !pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + basValRelease(&channelVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + basValRelease(&val); + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + FILE *fp = (FILE *)vm->files[channel].handle; + + if (val.type == BAS_TYPE_STRING) { + // Strings: enclosed in quotes + fputc('"', fp); + if (val.strVal) { + fputs(val.strVal->data, fp); + } + fputc('"', fp); + } else { + // Numbers: no leading space (unlike PRINT) + BasStringT *s = basValFormatString(val); + if (s) { + // Skip leading space that basValFormatString adds for positive numbers + const char *text = s->data; + if (*text == ' ') { + text++; + } + fputs(text, fp); + basStringUnref(s); + } + } + + basValRelease(&val); + break; + } + + case OP_FILE_WRITE_SEP: { + // Pop channel, write comma separator + BasValueT channelVal; + + if (!pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + basValRelease(&channelVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + fputc(',', (FILE *)vm->files[channel].handle); + break; + } + + case OP_FILE_WRITE_NL: { + // Pop channel, write newline + BasValueT channelVal; + + if (!pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + basValRelease(&channelVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + fputc('\n', (FILE *)vm->files[channel].handle); + break; + } + + case OP_FILE_GET: { + // Pop type, recno, channel; read data; push value + BasValueT typeVal; + BasValueT recnoVal; + BasValueT channelVal; + + if (!pop(vm, &typeVal) || !pop(vm, &recnoVal) || !pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + int32_t recno = (int32_t)basValToNumber(recnoVal); + int32_t dataType = (int32_t)basValToNumber(typeVal); + basValRelease(&channelVal); + basValRelease(&recnoVal); + basValRelease(&typeVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + FILE *fp = (FILE *)vm->files[channel].handle; + + // Seek to record position if recno > 0 + // (recno is 1-based in QB; 0 means current position) + if (recno > 0) { + // For simplicity, use fixed 128-byte records for RANDOM + fseek(fp, (long)(recno - 1) * 128, SEEK_SET); + } + + BasValueT result; + memset(&result, 0, sizeof(result)); + + switch (dataType) { + case BAS_TYPE_INTEGER: { + int16_t val = 0; + if (fread(&val, sizeof(val), 1, fp) < 1) { /* EOF ok */ } + result = basValInteger(val); + break; + } + case BAS_TYPE_LONG: { + int32_t val = 0; + if (fread(&val, sizeof(val), 1, fp) < 1) { /* EOF ok */ } + result = basValLong(val); + break; + } + case BAS_TYPE_SINGLE: { + float val = 0.0f; + if (fread(&val, sizeof(val), 1, fp) < 1) { /* EOF ok */ } + result = basValSingle(val); + break; + } + case BAS_TYPE_DOUBLE: { + double val = 0.0; + if (fread(&val, sizeof(val), 1, fp) < 1) { /* EOF ok */ } + result = basValDouble(val); + break; + } + case BAS_TYPE_STRING: { + // Read a length-prefixed string (int16 len + data) + int16_t len = 0; + if (fread(&len, sizeof(len), 1, fp) < 1) { /* EOF ok */ } + if (len < 0) { + len = 0; + } + char *buf = (char *)malloc(len + 1); + if (buf) { + if (fread(buf, 1, len, fp) < 1) { /* EOF ok */ } + buf[len] = '\0'; + result = basValStringFromC(buf); + free(buf); + } else { + result = basValStringFromC(""); + } + break; + } + default: + result = basValInteger(0); + break; + } + + if (!push(vm, result)) { + basValRelease(&result); + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_FILE_PUT: { + // Pop value, recno, channel; write data + BasValueT val; + BasValueT recnoVal; + BasValueT channelVal; + + if (!pop(vm, &val) || !pop(vm, &recnoVal) || !pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + int32_t recno = (int32_t)basValToNumber(recnoVal); + basValRelease(&channelVal); + basValRelease(&recnoVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + basValRelease(&val); + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + FILE *fp = (FILE *)vm->files[channel].handle; + + if (recno > 0) { + fseek(fp, (long)(recno - 1) * 128, SEEK_SET); + } + + switch (val.type) { + case BAS_TYPE_INTEGER: { + int16_t v = val.intVal; + fwrite(&v, sizeof(v), 1, fp); + break; + } + case BAS_TYPE_LONG: { + int32_t v = val.longVal; + fwrite(&v, sizeof(v), 1, fp); + break; + } + case BAS_TYPE_SINGLE: { + float v = val.sngVal; + fwrite(&v, sizeof(v), 1, fp); + break; + } + case BAS_TYPE_DOUBLE: { + double v = val.dblVal; + fwrite(&v, sizeof(v), 1, fp); + break; + } + case BAS_TYPE_STRING: { + // Write length-prefixed string + int16_t len = val.strVal ? (int16_t)val.strVal->len : 0; + fwrite(&len, sizeof(len), 1, fp); + if (len > 0 && val.strVal) { + fwrite(val.strVal->data, 1, len, fp); + } + break; + } + default: { + int16_t zero = 0; + fwrite(&zero, sizeof(zero), 1, fp); + break; + } + } + + fflush(fp); + basValRelease(&val); + break; + } + + case OP_FILE_SEEK: { + // Pop position and channel, seek + BasValueT posVal; + BasValueT channelVal; + + if (!pop(vm, &posVal) || !pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + int32_t pos = (int32_t)basValToNumber(posVal); + basValRelease(&channelVal); + basValRelease(&posVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + // QB SEEK is 1-based + fseek((FILE *)vm->files[channel].handle, (long)(pos - 1), SEEK_SET); + break; + } + + case OP_FILE_LOF: { + // Pop channel, push file length + BasValueT channelVal; + + if (!pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + basValRelease(&channelVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + FILE *fp = (FILE *)vm->files[channel].handle; + long savedPos = ftell(fp); + fseek(fp, 0, SEEK_END); + long length = ftell(fp); + fseek(fp, savedPos, SEEK_SET); + + if (!push(vm, basValLong((int32_t)length))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_FILE_LOC: { + // Pop channel, push current position + BasValueT channelVal; + + if (!pop(vm, &channelVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + basValRelease(&channelVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + long pos = ftell((FILE *)vm->files[channel].handle); + + // QB returns 1-based position + if (!push(vm, basValLong((int32_t)(pos + 1)))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_FILE_FREEFILE: { + // Push the next available file channel number + int32_t freeNum = 0; + + for (int32_t i = 1; i < BAS_VM_MAX_FILES; i++) { + if (!vm->files[i].handle) { + freeNum = i; + break; + } + } + + if (freeNum == 0) { + runtimeError(vm, 67, "Too many files open"); + return BAS_VM_FILE_ERROR; + } + + if (!push(vm, basValInteger((int16_t)freeNum))) { + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + case OP_FILE_INPUT_N: { + // Pop channel and n, read n chars, push string + BasValueT channelVal; + BasValueT nVal; + + if (!pop(vm, &channelVal) || !pop(vm, &nVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t channel = (int32_t)basValToNumber(channelVal); + int32_t n = (int32_t)basValToNumber(nVal); + basValRelease(&channelVal); + basValRelease(&nVal); + + if (channel < 1 || channel >= BAS_VM_MAX_FILES || !vm->files[channel].handle) { + runtimeError(vm, 52, "Bad file number or file not open"); + return BAS_VM_FILE_ERROR; + } + + if (n < 0) { + n = 0; + } + + if (n > 32767) { + n = 32767; + } + + char *buf = (char *)malloc(n + 1); + + if (!buf) { + runtimeError(vm, 7, "Out of memory"); + return BAS_VM_OUT_OF_MEMORY; + } + + int32_t bytesRead = (int32_t)fread(buf, 1, n, (FILE *)vm->files[channel].handle); + buf[bytesRead] = '\0'; + + BasStringT *s = basStringNew(buf, bytesRead); + free(buf); + + BasValueT result; + result.type = BAS_TYPE_STRING; + result.strVal = s; + + if (!push(vm, result)) { + basStringUnref(s); + return BAS_VM_STACK_OVERFLOW; + } + + break; + } + + default: + return BAS_VM_BAD_OPCODE; + } + + return BAS_VM_OK; +} + + +// ============================================================ +// execLogical +// ============================================================ + +static BasVmResultE execLogical(BasVmT *vm, uint8_t op) { + if (op == OP_NOT) { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + int32_t n = (int32_t)basValToNumber(*top); + basValRelease(top); + *top = basValInteger((int16_t)(~n)); + return BAS_VM_OK; + } + + BasValueT b; + BasValueT a; + + if (!pop(vm, &b) || !pop(vm, &a)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t na = (int32_t)basValToNumber(a); + int32_t nb = (int32_t)basValToNumber(b); + basValRelease(&a); + basValRelease(&b); + + int32_t result; + + switch (op) { + case OP_AND: result = na & nb; break; + case OP_OR: result = na | nb; break; + case OP_XOR: result = na ^ nb; break; + case OP_EQV: result = ~(na ^ nb); break; + case OP_IMP: result = (~na) | nb; break; + default: result = 0; break; + } + + push(vm, basValInteger((int16_t)result)); + return BAS_VM_OK; +} + + +// ============================================================ +// execMath +// ============================================================ + +static BasVmResultE execMath(BasVmT *vm, uint8_t op) { + if (op == OP_MATH_RND) { + // Pop the dummy arg (parser pushes -1 for RND()) + BasValueT dummy; + + if (!pop(vm, &dummy)) { + return BAS_VM_STACK_UNDERFLOW; + } + + basValRelease(&dummy); + + double r = (double)rand() / (double)RAND_MAX; + + if (!push(vm, basValSingle((float)r))) { + return BAS_VM_STACK_OVERFLOW; + } + + return BAS_VM_OK; + } + + if (op == OP_MATH_RANDOMIZE) { + BasValueT val; + + if (!pop(vm, &val)) { + return BAS_VM_STACK_UNDERFLOW; + } + + double n = basValToNumber(val); + basValRelease(&val); + + if (n < 0) { + srand((unsigned int)time(NULL)); + } else { + srand((unsigned int)n); + } + + return BAS_VM_OK; + } + + // All other math ops take one argument + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + double n = basValToNumber(*top); + double result; + + switch (op) { + case OP_MATH_ABS: result = fabs(n); break; + case OP_MATH_INT: result = floor(n); break; + case OP_MATH_FIX: result = (n >= 0) ? floor(n) : ceil(n); break; + case OP_MATH_SGN: result = (n > 0) ? 1.0 : (n < 0) ? -1.0 : 0.0; break; + case OP_MATH_SQR: result = sqrt(n); break; + case OP_MATH_SIN: result = sin(n); break; + case OP_MATH_COS: result = cos(n); break; + case OP_MATH_TAN: result = tan(n); break; + case OP_MATH_ATN: result = atan(n); break; + case OP_MATH_LOG: result = log(n); break; + case OP_MATH_EXP: result = exp(n); break; + default: result = 0.0; break; + } + + basValRelease(top); + *top = basValDouble(result); + return BAS_VM_OK; +} + + +// ============================================================ +// execPrint +// ============================================================ + +static BasVmResultE execPrint(BasVmT *vm) { + BasValueT val; + + if (!pop(vm, &val)) { + return BAS_VM_STACK_UNDERFLOW; + } + + // QB prints numeric values with a trailing space + bool isNumeric = (val.type != BAS_TYPE_STRING); + + BasStringT *s = basValFormatString(val); + basValRelease(&val); + + if (vm->printFn && s) { + vm->printFn(vm->printCtx, s->data, false); + + if (isNumeric) { + vm->printFn(vm->printCtx, " ", false); + } + } + + basStringUnref(s); + return BAS_VM_OK; +} + + +// ============================================================ +// execStringOp +// ============================================================ + +static BasVmResultE execStringOp(BasVmT *vm, uint8_t op) { + switch (op) { + case OP_STR_CONCAT: { + BasValueT b; + BasValueT a; + + if (!pop(vm, &b) || !pop(vm, &a)) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT sa = basValToString(a); + BasValueT sb = basValToString(b); + basValRelease(&a); + basValRelease(&b); + + BasStringT *result = basStringConcat(sa.strVal, sb.strVal); + basValRelease(&sa); + basValRelease(&sb); + + BasValueT rv; + rv.type = BAS_TYPE_STRING; + rv.strVal = result; + push(vm, rv); + return BAS_VM_OK; + } + + case OP_STR_LEFT: { + BasValueT nVal; + BasValueT sVal; + + if (!pop(vm, &nVal) || !pop(vm, &sVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t n = (int32_t)basValToNumber(nVal); + basValRelease(&nVal); + + BasValueT sv = basValToString(sVal); + basValRelease(&sVal); + + BasStringT *result = basStringSub(sv.strVal, 0, n); + basValRelease(&sv); + + BasValueT rv; + rv.type = BAS_TYPE_STRING; + rv.strVal = result; + push(vm, rv); + return BAS_VM_OK; + } + + case OP_STR_RIGHT: { + BasValueT nVal; + BasValueT sVal; + + if (!pop(vm, &nVal) || !pop(vm, &sVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t n = (int32_t)basValToNumber(nVal); + basValRelease(&nVal); + + BasValueT sv = basValToString(sVal); + basValRelease(&sVal); + + int32_t start = sv.strVal->len - n; + + if (start < 0) { + start = 0; + } + + BasStringT *result = basStringSub(sv.strVal, start, n); + basValRelease(&sv); + + BasValueT rv; + rv.type = BAS_TYPE_STRING; + rv.strVal = result; + push(vm, rv); + return BAS_VM_OK; + } + + case OP_STR_MID: { + BasValueT lenVal; + BasValueT startVal; + BasValueT sVal; + + if (!pop(vm, &lenVal) || !pop(vm, &startVal) || !pop(vm, &sVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t start = (int32_t)basValToNumber(startVal) - 1; // 1-based to 0-based + int32_t len = (int32_t)basValToNumber(lenVal); + basValRelease(&startVal); + basValRelease(&lenVal); + + BasValueT sv = basValToString(sVal); + basValRelease(&sVal); + + BasStringT *result = basStringSub(sv.strVal, start, len); + basValRelease(&sv); + + BasValueT rv; + rv.type = BAS_TYPE_STRING; + rv.strVal = result; + push(vm, rv); + return BAS_VM_OK; + } + + case OP_STR_MID2: { + BasValueT startVal; + BasValueT sVal; + + if (!pop(vm, &startVal) || !pop(vm, &sVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + int32_t start = (int32_t)basValToNumber(startVal) - 1; + basValRelease(&startVal); + + BasValueT sv = basValToString(sVal); + basValRelease(&sVal); + + BasStringT *result = basStringSub(sv.strVal, start, sv.strVal->len - start); + basValRelease(&sv); + + BasValueT rv; + rv.type = BAS_TYPE_STRING; + rv.strVal = result; + push(vm, rv); + return BAS_VM_OK; + } + + case OP_STR_LEN: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + BasValueT sv = basValToString(*top); + int32_t len = sv.strVal ? sv.strVal->len : 0; + basValRelease(&sv); + basValRelease(top); + *top = basValInteger((int16_t)len); + return BAS_VM_OK; + } + + case OP_STR_INSTR: { + BasValueT findVal; + BasValueT sVal; + + if (!pop(vm, &findVal) || !pop(vm, &sVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT sv = basValToString(sVal); + BasValueT fv = basValToString(findVal); + basValRelease(&sVal); + basValRelease(&findVal); + + int32_t pos = 0; + char *found = strstr(sv.strVal->data, fv.strVal->data); + + if (found) { + pos = (int32_t)(found - sv.strVal->data) + 1; // 1-based + } + + basValRelease(&sv); + basValRelease(&fv); + push(vm, basValInteger((int16_t)pos)); + return BAS_VM_OK; + } + + case OP_STR_UCASE: + case OP_STR_LCASE: + case OP_STR_TRIM: + case OP_STR_LTRIM: + case OP_STR_RTRIM: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + BasValueT sv = basValToString(*top); + basValRelease(top); + + BasStringT *src = sv.strVal; + BasStringT *result; + + if (op == OP_STR_UCASE || op == OP_STR_LCASE) { + result = basStringNew(src->data, src->len); + + for (int32_t i = 0; i < result->len; i++) { + if (op == OP_STR_UCASE && result->data[i] >= 'a' && result->data[i] <= 'z') { + result->data[i] -= 32; + } else if (op == OP_STR_LCASE && result->data[i] >= 'A' && result->data[i] <= 'Z') { + result->data[i] += 32; + } + } + } else { + int32_t start = 0; + int32_t end = src->len; + + if (op == OP_STR_LTRIM || op == OP_STR_TRIM) { + while (start < end && src->data[start] == ' ') { + start++; + } + } + + if (op == OP_STR_RTRIM || op == OP_STR_TRIM) { + while (end > start && src->data[end - 1] == ' ') { + end--; + } + } + + result = basStringSub(src, start, end - start); + } + + basValRelease(&sv); + top->type = BAS_TYPE_STRING; + top->strVal = result; + return BAS_VM_OK; + } + + case OP_STR_CHR: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + int32_t code = (int32_t)basValToNumber(*top); + char buf[2] = { (char)(code & 0xFF), '\0' }; + basValRelease(top); + *top = basValStringFromC(buf); + return BAS_VM_OK; + } + + case OP_STR_ASC: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + BasValueT sv = basValToString(*top); + int32_t code = (sv.strVal && sv.strVal->len > 0) ? (unsigned char)sv.strVal->data[0] : 0; + basValRelease(&sv); + basValRelease(top); + *top = basValInteger((int16_t)code); + return BAS_VM_OK; + } + + case OP_STR_SPACE: { + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + int32_t n = (int32_t)basValToNumber(*top); + basValRelease(top); + + if (n < 0) { + n = 0; + } + + if (n > 32767) { + n = 32767; + } + + BasStringT *s = basStringAlloc(n + 1); + memset(s->data, ' ', n); + s->data[n] = '\0'; + s->len = n; + + top->type = BAS_TYPE_STRING; + top->strVal = s; + return BAS_VM_OK; + } + + case OP_STR_FIXLEN: { + // [uint16 len] pop string, pad/truncate to fixed length, push result + uint16_t fixLen = readUint16(vm); + + if (vm->sp < 1) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT *top = &vm->stack[vm->sp - 1]; + BasValueT sv = basValToString(*top); + basValRelease(top); + + BasStringT *src = sv.strVal; + BasStringT *result = basStringAlloc(fixLen + 1); + result->len = fixLen; + + int32_t srcLen = src ? src->len : 0; + int32_t copyLen = srcLen < (int32_t)fixLen ? srcLen : (int32_t)fixLen; + + if (copyLen > 0 && src) { + memcpy(result->data, src->data, copyLen); + } + + // Pad with spaces + for (int32_t i = copyLen; i < (int32_t)fixLen; i++) { + result->data[i] = ' '; + } + + result->data[fixLen] = '\0'; + basValRelease(&sv); + + top->type = BAS_TYPE_STRING; + top->strVal = result; + return BAS_VM_OK; + } + + case OP_STR_MID_ASGN: { + // Pop replacement, len, start, str; push modified string + BasValueT replVal; + BasValueT lenVal; + BasValueT startVal; + BasValueT strVal; + + if (!pop(vm, &replVal) || !pop(vm, &lenVal) || !pop(vm, &startVal) || !pop(vm, &strVal)) { + return BAS_VM_STACK_UNDERFLOW; + } + + BasValueT sv = basValToString(strVal); + BasValueT rv = basValToString(replVal); + int32_t start = (int32_t)basValToNumber(startVal) - 1; // 1-based to 0-based + int32_t len = (int32_t)basValToNumber(lenVal); + basValRelease(&strVal); + basValRelease(&startVal); + basValRelease(&lenVal); + basValRelease(&replVal); + + BasStringT *src = sv.strVal; + BasStringT *repl = rv.strVal; + int32_t srcLen = src ? src->len : 0; + int32_t replLen = repl ? repl->len : 0; + + // If len is 0, use replacement length + if (len <= 0) { + len = replLen; + } + + // Clamp to available replacement length + if (len > replLen) { + len = replLen; + } + + // Create a copy of the original string + BasStringT *result = basStringNew(src ? src->data : "", srcLen); + + // Replace characters + if (start >= 0 && start < srcLen && len > 0) { + int32_t maxReplace = srcLen - start; + if (len > maxReplace) { + len = maxReplace; + } + if (repl) { + memcpy(result->data + start, repl->data, len); + } + } + + basValRelease(&sv); + basValRelease(&rv); + + BasValueT resultVal; + resultVal.type = BAS_TYPE_STRING; + resultVal.strVal = result; + + if (!push(vm, resultVal)) { + basStringUnref(result); + return BAS_VM_STACK_OVERFLOW; + } + + return BAS_VM_OK; + } + + default: + return BAS_VM_BAD_OPCODE; + } +} + + +// ============================================================ +// pop +// ============================================================ + +static bool pop(BasVmT *vm, BasValueT *val) { + if (vm->sp <= 0) { + return false; + } + + *val = vm->stack[--vm->sp]; + return true; +} + + +// ============================================================ +// push +// ============================================================ + +static bool push(BasVmT *vm, BasValueT val) { + if (vm->sp >= BAS_VM_STACK_SIZE) { + return false; + } + + vm->stack[vm->sp++] = val; + return true; +} + + +// ============================================================ +// readInt16 +// ============================================================ + +static int16_t readInt16(BasVmT *vm) { + int16_t val; + memcpy(&val, &vm->module->code[vm->pc], sizeof(int16_t)); + vm->pc += sizeof(int16_t); + return val; +} + + +// ============================================================ +// readUint8 +// ============================================================ + +static uint8_t readUint8(BasVmT *vm) { + return vm->module->code[vm->pc++]; +} + + +// ============================================================ +// readUint16 +// ============================================================ + +static uint16_t readUint16(BasVmT *vm) { + uint16_t val; + memcpy(&val, &vm->module->code[vm->pc], sizeof(uint16_t)); + vm->pc += sizeof(uint16_t); + return val; +} + + +// ============================================================ +// runtimeError +// ============================================================ + +static void runtimeError(BasVmT *vm, int32_t errNum, const char *msg) { + vm->errorNumber = errNum; + snprintf(vm->errorMsg, sizeof(vm->errorMsg), "Runtime error %d at PC %d: %s", errNum, vm->pc, msg); +} diff --git a/dvxbasic/runtime/vm.h b/dvxbasic/runtime/vm.h new file mode 100644 index 0000000..84a4b20 --- /dev/null +++ b/dvxbasic/runtime/vm.h @@ -0,0 +1,211 @@ +// vm.h -- DVX BASIC virtual machine +// +// Stack-based p-code interpreter. Executes compiled BASIC bytecode. +// Embeddable: the host provides I/O callbacks. No DVX dependencies. +// +// Usage: +// BasVmT *vm = basVmCreate(); +// basVmSetPrintCallback(vm, myPrintFn, myCtx); +// basVmSetInputCallback(vm, myInputFn, myCtx); +// basVmLoadModule(vm, compiledCode, codeLen, constants, numConsts); +// BasVmResultE result = basVmRun(vm); +// basVmDestroy(vm); + +#ifndef DVXBASIC_VM_H +#define DVXBASIC_VM_H + +#include "values.h" + +#include +#include + +// ============================================================ +// Limits +// ============================================================ + +#define BAS_VM_STACK_SIZE 256 // evaluation stack depth +#define BAS_VM_CALL_STACK_SIZE 64 // max call nesting +#define BAS_VM_MAX_GLOBALS 512 // global variable slots +#define BAS_VM_MAX_LOCALS 64 // locals per stack frame +#define BAS_VM_MAX_FOR_DEPTH 32 // nested FOR loops +#define BAS_VM_MAX_FILES 16 // open file channels + +// ============================================================ +// Result codes +// ============================================================ + +typedef enum { + BAS_VM_OK, // program completed normally + BAS_VM_HALTED, // HALT instruction reached + BAS_VM_YIELDED, // DoEvents yielded control + BAS_VM_ERROR, // runtime error + BAS_VM_STACK_OVERFLOW, + BAS_VM_STACK_UNDERFLOW, + BAS_VM_CALL_OVERFLOW, + BAS_VM_DIV_BY_ZERO, + BAS_VM_TYPE_MISMATCH, + BAS_VM_OUT_OF_MEMORY, + BAS_VM_BAD_OPCODE, + BAS_VM_FILE_ERROR, + BAS_VM_SUBSCRIPT_RANGE, + BAS_VM_USER_ERROR // ON ERROR raised +} BasVmResultE; + +// ============================================================ +// I/O callbacks (host-provided) +// ============================================================ + +// Print callback: called for PRINT output. +// text is a null-terminated string. newline indicates whether +// to advance to the next line after printing. +typedef void (*BasPrintFnT)(void *ctx, const char *text, bool newline); + +// Input callback: called for INPUT statement. +// prompt is the text to display. The callback must fill buf +// (up to bufSize-1 chars, null-terminated). Returns true on +// success, false on cancel/error. +typedef bool (*BasInputFnT)(void *ctx, const char *prompt, char *buf, int32_t bufSize); + +// DoEvents callback: called for DoEvents statement. +// The host should process pending events and return. Returns +// true to continue execution, false to stop the program. +typedef bool (*BasDoEventsFnT)(void *ctx); + +// ============================================================ +// Call stack frame +// ============================================================ + +typedef struct { + int32_t returnPc; // instruction to return to + int32_t baseSlot; // base index in locals array + int32_t localCount; // number of locals in this frame + BasValueT locals[BAS_VM_MAX_LOCALS]; +} BasCallFrameT; + +// ============================================================ +// FOR loop state +// ============================================================ + +typedef struct { + int32_t varIdx; // loop variable slot index + bool isLocal; // true = local, false = global + BasValueT limit; // upper bound + BasValueT step; // step value + int32_t loopTop; // PC of the loop body start +} BasForStateT; + +// ============================================================ +// File channel +// ============================================================ + +typedef struct { + void *handle; // FILE* or platform-specific + int32_t mode; // 0=closed, 1=input, 2=output, 3=append, 4=random, 5=binary +} BasFileChannelT; + +// ============================================================ +// Compiled module (output of the compiler) +// ============================================================ + +typedef struct { + uint8_t *code; // p-code bytecode + int32_t codeLen; + BasStringT **constants; // string constant pool + int32_t constCount; + int32_t globalCount; // number of global variable slots needed + int32_t entryPoint; // PC of the first instruction (module-level code) + BasValueT *dataPool; // DATA statement value pool + int32_t dataCount; // number of values in the data pool +} BasModuleT; + +// ============================================================ +// VM state +// ============================================================ + +typedef struct { + // Program + BasModuleT *module; + + // Execution + int32_t pc; // program counter + bool running; + bool yielded; + + // Evaluation stack + BasValueT stack[BAS_VM_STACK_SIZE]; + int32_t sp; // stack pointer (index of next free slot) + + // Call stack + BasCallFrameT callStack[BAS_VM_CALL_STACK_SIZE]; + int32_t callDepth; + + // FOR loop stack + BasForStateT forStack[BAS_VM_MAX_FOR_DEPTH]; + int32_t forDepth; + + // Global variables + BasValueT globals[BAS_VM_MAX_GLOBALS]; + + // File channels (1-based, index 0 unused) + BasFileChannelT files[BAS_VM_MAX_FILES]; + + // DATA/READ pointer + int32_t dataPtr; // current READ position in data pool + + // String comparison mode + bool compareTextMode; // true = case-insensitive comparisons + + // Error handling + int32_t errorHandler; // PC of ON ERROR GOTO handler (0 = none) + int32_t errorNumber; // current Err number + int32_t errorPc; // PC of the instruction that caused the error (for RESUME) + int32_t errorNextPc; // PC of the next instruction after error (for RESUME NEXT) + bool inErrorHandler; // true when executing error handler code + char errorMsg[256]; // current error description + + // I/O callbacks + BasPrintFnT printFn; + void *printCtx; + BasInputFnT inputFn; + void *inputCtx; + BasDoEventsFnT doEventsFn; + void *doEventsCtx; +} BasVmT; + +// ============================================================ +// API +// ============================================================ + +// Create a new VM instance. +BasVmT *basVmCreate(void); + +// Destroy a VM instance and free all resources. +void basVmDestroy(BasVmT *vm); + +// Load a compiled module into the VM. +void basVmLoadModule(BasVmT *vm, BasModuleT *module); + +// Execute the loaded module. Returns when the program ends, +// halts, yields, or hits an error. +BasVmResultE basVmRun(BasVmT *vm); + +// Execute a single instruction. Returns the result. +// Useful for stepping/debugging. +BasVmResultE basVmStep(BasVmT *vm); + +// Reset the VM to initial state (clear stack, globals, PC). +void basVmReset(BasVmT *vm); + +// Set I/O callbacks. +void basVmSetPrintCallback(BasVmT *vm, BasPrintFnT fn, void *ctx); +void basVmSetInputCallback(BasVmT *vm, BasInputFnT fn, void *ctx); +void basVmSetDoEventsCallback(BasVmT *vm, BasDoEventsFnT fn, void *ctx); + +// Push/pop values on the evaluation stack (for host integration). +bool basVmPush(BasVmT *vm, BasValueT val); +bool basVmPop(BasVmT *vm, BasValueT *val); + +// Get the current error message. +const char *basVmGetError(const BasVmT *vm); + +#endif // DVXBASIC_VM_H diff --git a/dvxbasic/test_compiler.c b/dvxbasic/test_compiler.c new file mode 100644 index 0000000..a349475 --- /dev/null +++ b/dvxbasic/test_compiler.c @@ -0,0 +1,850 @@ +// test_compiler.c -- End-to-end test: source -> compiler -> VM -> output +// +// Build (native): +// gcc -O2 -Wall -o test_compiler test_compiler.c \ +// compiler/lexer.c compiler/parser.c compiler/codegen.c compiler/symtab.c \ +// runtime/vm.c runtime/values.c -lm + +#include "compiler/parser.h" +#include "runtime/vm.h" +#include "runtime/values.h" + +#include +#include + +static void runProgram(const char *name, const char *source) { + printf("=== %s ===\n", name); + + int32_t len = (int32_t)strlen(source); + + BasParserT parser; + basParserInit(&parser, source, len); + + if (!basParse(&parser)) { + printf("COMPILE ERROR: %s\n\n", parser.error); + basParserFree(&parser); + return; + } + + BasModuleT *mod = basParserBuildModule(&parser); + basParserFree(&parser); + + if (!mod) { + printf("MODULE BUILD FAILED\n\n"); + return; + } + + BasVmT *vm = basVmCreate(); + basVmLoadModule(vm, mod); + + // Module-level code uses callStack[0] as implicit main frame + vm->callStack[0].localCount = mod->globalCount > 64 ? 64 : mod->globalCount; + vm->callDepth = 1; + + BasVmResultE result = basVmRun(vm); + + if (result != BAS_VM_HALTED && result != BAS_VM_OK) { + printf("[VM error %d: %s]\n", result, basVmGetError(vm)); + } + + basVmDestroy(vm); + basModuleFree(mod); + printf("\n"); +} + + +int main(void) { + printf("DVX BASIC Compiler Tests\n"); + printf("========================\n\n"); + + basStringSystemInit(); + + // Test 1: Hello World + runProgram("Hello World", + "PRINT \"Hello, World!\"\n" + ); + + // Test 2: Arithmetic + runProgram("Arithmetic", + "PRINT 2 + 3 * 4\n" + "PRINT 10 \\ 3\n" + "PRINT 10 MOD 3\n" + "PRINT 2 ^ 8\n" + ); + + // Test 3: String operations + runProgram("String Ops", + "DIM s AS STRING\n" + "s = \"Hello, BASIC!\"\n" + "PRINT s\n" + "PRINT LEN(s)\n" + "PRINT LEFT$(s, 5)\n" + "PRINT RIGHT$(s, 6)\n" + "PRINT MID$(s, 8, 5)\n" + "PRINT UCASE$(s)\n" + ); + + // Test 4: IF/THEN/ELSE + runProgram("IF/THEN/ELSE", + "DIM x AS INTEGER\n" + "x = 42\n" + "IF x > 100 THEN\n" + " PRINT \"big\"\n" + "ELSEIF x > 10 THEN\n" + " PRINT \"medium\"\n" + "ELSE\n" + " PRINT \"small\"\n" + "END IF\n" + ); + + // Test 5: FOR loop + runProgram("FOR Loop", + "DIM i AS INTEGER\n" + "FOR i = 1 TO 10\n" + " PRINT i;\n" + "NEXT i\n" + "PRINT\n" + ); + + // Test 6: DO/WHILE loop + runProgram("DO/WHILE Loop", + "DIM n AS INTEGER\n" + "n = 1\n" + "DO WHILE n <= 5\n" + " PRINT n;\n" + " n = n + 1\n" + "LOOP\n" + "PRINT\n" + ); + + // Test 7: SUB and FUNCTION + runProgram("SUB and FUNCTION", + "DECLARE SUB Greet(name AS STRING)\n" + "DECLARE FUNCTION Square(x AS INTEGER) AS INTEGER\n" + "\n" + "CALL Greet(\"World\")\n" + "PRINT Square(7)\n" + "\n" + "SUB Greet(name AS STRING)\n" + " PRINT \"Hello, \" & name & \"!\"\n" + "END SUB\n" + "\n" + "FUNCTION Square(x AS INTEGER) AS INTEGER\n" + " Square = x * x\n" + "END FUNCTION\n" + ); + + // Test 8: SELECT CASE + runProgram("SELECT CASE", + "DIM grade AS STRING\n" + "grade = \"B\"\n" + "SELECT CASE grade\n" + " CASE \"A\"\n" + " PRINT \"Excellent\"\n" + " CASE \"B\", \"C\"\n" + " PRINT \"Good\"\n" + " CASE ELSE\n" + " PRINT \"Other\"\n" + "END SELECT\n" + ); + + // Test 9: Fibonacci + runProgram("Fibonacci", + "DIM a AS INTEGER\n" + "DIM b AS INTEGER\n" + "DIM temp AS INTEGER\n" + "DIM i AS INTEGER\n" + "a = 0\n" + "b = 1\n" + "FOR i = 1 TO 10\n" + " PRINT a;\n" + " temp = a + b\n" + " a = b\n" + " b = temp\n" + "NEXT i\n" + "PRINT\n" + ); + + // Test 10: Math functions + runProgram("Math Functions", + "PRINT ABS(-42)\n" + "PRINT SQR(144)\n" + "PRINT INT(3.7)\n" + ); + + // Test 11: File I/O + runProgram("File I/O", + "OPEN \"/tmp/dvxbasic_test.txt\" FOR OUTPUT AS #1\n" + "PRINT #1, \"Hello from BASIC!\"\n" + "PRINT #1, \"Line two\"\n" + "PRINT #1, \"42\"\n" + "CLOSE #1\n" + "\n" + "DIM line$ AS STRING\n" + "DIM count AS INTEGER\n" + "count = 0\n" + "OPEN \"/tmp/dvxbasic_test.txt\" FOR INPUT AS #1\n" + "DO WHILE NOT EOF(#1)\n" + " INPUT #1, line$\n" + " PRINT line$\n" + " count = count + 1\n" + "LOOP\n" + "CLOSE #1\n" + "PRINT count;\n" + "PRINT \"lines read\"\n" + ); + + // Test 12: LINE INPUT# and APPEND + runProgram("LINE INPUT and APPEND", + "OPEN \"/tmp/dvxbasic_test2.txt\" FOR OUTPUT AS #2\n" + "PRINT #2, \"First line\"\n" + "CLOSE #2\n" + "\n" + "OPEN \"/tmp/dvxbasic_test2.txt\" FOR APPEND AS #2\n" + "PRINT #2, \"Appended line\"\n" + "CLOSE #2\n" + "\n" + "DIM s$ AS STRING\n" + "OPEN \"/tmp/dvxbasic_test2.txt\" FOR INPUT AS #2\n" + "LINE INPUT #2, s$\n" + "PRINT s$\n" + "LINE INPUT #2, s$\n" + "PRINT s$\n" + "CLOSE #2\n" + ); + + // Test 13: Array -- 1D with default lbound=0 + runProgram("1D Array", + "DIM arr(5) AS INTEGER\n" + "DIM i AS INTEGER\n" + "FOR i = 1 TO 5\n" + " arr(i) = i * i\n" + "NEXT i\n" + "FOR i = 1 TO 5\n" + " PRINT arr(i);\n" + "NEXT i\n" + "PRINT\n" + ); + // Expected: 1 4 9 16 25 + + // Test 14: Multi-dimensional array + runProgram("Multi-dim Array", + "DIM m(2, 2) AS INTEGER\n" + "m(1, 1) = 11\n" + "m(1, 2) = 12\n" + "m(2, 1) = 21\n" + "m(2, 2) = 22\n" + "PRINT m(1, 1); m(1, 2); m(2, 1); m(2, 2)\n" + ); + // Expected: 11 12 21 22 + + // Test 15: Array with explicit bounds (TO syntax) + runProgram("Array with TO bounds", + "DIM a(1 TO 3) AS INTEGER\n" + "a(1) = 10\n" + "a(2) = 20\n" + "a(3) = 30\n" + "PRINT a(1); a(2); a(3)\n" + ); + // Expected: 10 20 30 + + // Test 16: LBOUND and UBOUND + runProgram("LBOUND/UBOUND", + "DIM a(5 TO 10) AS INTEGER\n" + "PRINT LBOUND(a); UBOUND(a)\n" + ); + // Expected: 5 10 + + // Test 17: User-defined TYPE + runProgram("TYPE", + "TYPE Point\n" + " x AS INTEGER\n" + " y AS INTEGER\n" + "END TYPE\n" + "DIM p AS Point\n" + "p.x = 10\n" + "p.y = 20\n" + "PRINT p.x; p.y\n" + ); + // Expected: 10 20 + + // Test 18: String array + runProgram("String Array", + "DIM names(3) AS STRING\n" + "names(0) = \"Alice\"\n" + "names(1) = \"Bob\"\n" + "names(2) = \"Charlie\"\n" + "DIM i AS INTEGER\n" + "FOR i = 0 TO 2\n" + " PRINT names(i)\n" + "NEXT i\n" + ); + // Expected: Alice / Bob / Charlie + + // Test 19: REDIM with PRESERVE + runProgram("REDIM PRESERVE", + "DIM a(3) AS INTEGER\n" + "a(0) = 100\n" + "a(1) = 200\n" + "a(2) = 300\n" + "REDIM PRESERVE a(5) AS INTEGER\n" + "a(4) = 500\n" + "PRINT a(0); a(1); a(2); a(4)\n" + ); + // Expected: 100 200 300 500 + + // Test 20: ERASE + runProgram("ERASE", + "DIM a(3) AS INTEGER\n" + "a(1) = 42\n" + "ERASE a\n" + "DIM b(2) AS INTEGER\n" + "b(1) = 99\n" + "PRINT b(1)\n" + ); + // Expected: 99 + + // Test 21: Array in FOR loop accumulation + runProgram("Array Accumulation", + "DIM sums(5) AS INTEGER\n" + "DIM i AS INTEGER\n" + "DIM j AS INTEGER\n" + "FOR i = 1 TO 5\n" + " sums(i) = 0\n" + " FOR j = 1 TO i\n" + " sums(i) = sums(i) + j\n" + " NEXT j\n" + "NEXT i\n" + "FOR i = 1 TO 5\n" + " PRINT sums(i);\n" + "NEXT i\n" + "PRINT\n" + ); + // Expected: 1 3 6 10 15 + + // ============================================================ + // Batch 1: Control Flow + // ============================================================ + + // Test: GOTO with forward jump + runProgram("GOTO Forward", + "PRINT \"before\"\n" + "GOTO skip\n" + "PRINT \"skipped\"\n" + "skip:\n" + "PRINT \"after\"\n" + ); + // Expected: before / after + + // Test: GOTO with backward jump + runProgram("GOTO Backward", + "DIM n AS INTEGER\n" + "n = 0\n" + "top:\n" + "n = n + 1\n" + "IF n < 5 THEN GOTO top\n" + "PRINT n\n" + ); + // Expected: 5 + + // Test: GOSUB/RETURN + runProgram("GOSUB/RETURN", + "DIM x AS INTEGER\n" + "x = 10\n" + "GOSUB dbl\n" + "PRINT x\n" + "END\n" + "dbl:\n" + "x = x * 2\n" + "RETURN\n" + ); + // Expected: 20 + + // Test: ON ERROR GOTO -- verify error handler catches errors + // and ERR returns the error number + runProgram("ON ERROR GOTO", + "ON ERROR GOTO handler\n" + "PRINT 10 / 0\n" + "END\n" + "handler:\n" + "PRINT \"caught\"\n" + "PRINT ERR\n" + ); + // Expected: caught / 11 + + // Test: Single-line IF + runProgram("Single-line IF", + "DIM x AS INTEGER\n" + "x = 42\n" + "IF x > 10 THEN PRINT \"big\"\n" + "IF x < 10 THEN PRINT \"small\"\n" + "IF x = 42 THEN PRINT \"exact\" ELSE PRINT \"nope\"\n" + ); + // Expected: big / exact + + // Test: Multi-statement line with : + runProgram("Multi-statement :", + "DIM x AS INTEGER\n" + "DIM y AS INTEGER\n" + "x = 1 : y = 2 : PRINT x + y\n" + ); + // Expected: 3 + + // ============================================================ + // Batch 2: Misc Features + // ============================================================ + + // Test: SWAP + runProgram("SWAP", + "DIM a AS INTEGER\n" + "DIM b AS INTEGER\n" + "a = 10\n" + "b = 20\n" + "SWAP a, b\n" + "PRINT a;\n" + "PRINT b\n" + ); + // Expected: 20 10 + + // Test: TIMER (returns number > 0) + runProgram("TIMER", + "DIM t AS DOUBLE\n" + "t = TIMER\n" + "IF t > 0 THEN PRINT \"ok\"\n" + ); + // Expected: ok + + // Test: DATE$ (returns non-empty string) + runProgram("DATE$", + "DIM d$ AS STRING\n" + "d$ = DATE$\n" + "IF LEN(d$) > 0 THEN PRINT \"ok\"\n" + ); + // Expected: ok + + // Test: TIME$ (returns non-empty string) + runProgram("TIME$", + "DIM t$ AS STRING\n" + "t$ = TIME$\n" + "IF LEN(t$) > 0 THEN PRINT \"ok\"\n" + ); + // Expected: ok + + // Test: ENVIRON$ + runProgram("ENVIRON$", + "DIM p$ AS STRING\n" + "p$ = ENVIRON$(\"HOME\")\n" + "IF LEN(p$) > 0 THEN PRINT \"ok\"\n" + ); + // Expected: ok + + // ============================================================ + // Batch 3: New features (DATA/READ/RESTORE, DIM SHARED, + // STATIC, DEF FN, OPTION BASE) + // ============================================================ + + // Test: DATA/READ/RESTORE + runProgram("DATA/READ/RESTORE", + "DATA 10, 20, \"hello\"\n" + "DIM a AS INTEGER\n" + "DIM b AS INTEGER\n" + "DIM c AS STRING\n" + "READ a, b, c\n" + "PRINT a; b;\n" + "PRINT c\n" + "RESTORE\n" + "READ a\n" + "PRINT a\n" + ); + // Expected: 10 20 hello / 10 + + // Test: DIM SHARED + runProgram("DIM SHARED", + "DIM SHARED count AS INTEGER\n" + "count = 0\n" + "CALL Increment\n" + "CALL Increment\n" + "CALL Increment\n" + "PRINT count\n" + "SUB Increment\n" + " count = count + 1\n" + "END SUB\n" + ); + // Expected: 3 + + // Test: STATIC + runProgram("STATIC", + "CALL Counter\n" + "CALL Counter\n" + "CALL Counter\n" + "SUB Counter\n" + " STATIC n AS INTEGER\n" + " n = n + 1\n" + " PRINT n;\n" + "END SUB\n" + "PRINT\n" + ); + // Expected: 1 2 3 + + // Test: DEF FN + runProgram("DEF FN", + "DEF FNdouble(x AS INTEGER) = x * 2\n" + "PRINT FNdouble(5)\n" + "PRINT FNdouble(21)\n" + ); + // Expected: 10 / 42 + + // Test: OPTION BASE + runProgram("OPTION BASE", + "OPTION BASE 1\n" + "DIM arr(3) AS INTEGER\n" + "arr(1) = 10\n" + "arr(3) = 30\n" + "PRINT arr(1); arr(3)\n" + ); + // Expected: 10 30 + + // Test: DATA with mixed types + runProgram("DATA mixed types", + "DATA 100, 3.14, \"world\"\n" + "DIM x AS INTEGER\n" + "DIM y AS DOUBLE\n" + "DIM z AS STRING\n" + "READ x, y, z\n" + "PRINT x\n" + "PRINT z\n" + ); + // Expected: 100 / world + + // Test: Multiple DATA statements scattered + runProgram("DATA scattered", + "DIM a AS INTEGER\n" + "DIM b AS INTEGER\n" + "DIM c AS INTEGER\n" + "DATA 1, 2\n" + "READ a, b\n" + "DATA 3\n" + "READ c\n" + "PRINT a; b; c\n" + ); + // Expected: 1 2 3 + + // Test: DIM SHARED with SUB modifying shared variable + runProgram("DIM SHARED multi", + "DIM SHARED total AS INTEGER\n" + "DIM SHARED msg AS STRING\n" + "total = 100\n" + "msg = \"start\"\n" + "CALL Modify\n" + "PRINT total\n" + "PRINT msg\n" + "SUB Modify\n" + " total = total + 50\n" + " msg = \"done\"\n" + "END SUB\n" + ); + // Expected: 150 / done + + // ============================================================ + // Batch 4: New I/O and string features + // ============================================================ + + // Test: WRITE # + runProgram("WRITE #", + "OPEN \"/tmp/dvxbasic_write.txt\" FOR OUTPUT AS #1\n" + "WRITE #1, 10, \"hello\", 3.14\n" + "CLOSE #1\n" + "OPEN \"/tmp/dvxbasic_write.txt\" FOR INPUT AS #1\n" + "DIM s AS STRING\n" + "LINE INPUT #1, s\n" + "PRINT s\n" + "CLOSE #1\n" + ); + // Expected: 10,"hello",3.14 + + // Test: FREEFILE + runProgram("FREEFILE", + "DIM f AS INTEGER\n" + "f = FREEFILE\n" + "PRINT f\n" + ); + // Expected: 1 + + // Test: PRINT USING numeric + runProgram("PRINT USING numeric", + "PRINT USING \"###.##\"; 3.14159\n" + ); + // Expected: 3.14 + + // Test: PRINT USING string + runProgram("PRINT USING string", + "PRINT USING \"!\"; \"Hello\"\n" + ); + // Expected: H + + // Test: SPC and TAB in PRINT + runProgram("SPC/TAB", + "PRINT SPC(3); \"hi\"\n" + ); + // Expected: hi + + // Test: Fixed-length string + runProgram("STRING * n", + "DIM s AS STRING * 5\n" + "s = \"Hi\"\n" + "PRINT \"[\" & s & \"]\"\n" + "PRINT LEN(s)\n" + ); + // Expected: [Hi ] / 5 + + // Test: MID$ statement + runProgram("MID$ statement", + "DIM s AS STRING\n" + "s = \"Hello World\"\n" + "MID$(s, 7, 5) = \"BASIC\"\n" + "PRINT s\n" + ); + // Expected: Hello BASIC + + // Test: OPEN FOR BINARY / GET / PUT + runProgram("BINARY GET/PUT", + "DIM v AS INTEGER\n" + "OPEN \"/tmp/dvxbasic_bin.tmp\" FOR BINARY AS #1\n" + "v = 12345\n" + "PUT #1, , v\n" + "SEEK #1, 1\n" + "DIM r AS INTEGER\n" + "GET #1, , r\n" + "PRINT r\n" + "CLOSE #1\n" + ); + // Expected: 12345 + + // Test: LOF and LOC + runProgram("LOF/LOC", + "OPEN \"/tmp/dvxbasic_lof.txt\" FOR OUTPUT AS #1\n" + "PRINT #1, \"test\"\n" + "CLOSE #1\n" + "OPEN \"/tmp/dvxbasic_lof.txt\" FOR INPUT AS #1\n" + "DIM sz AS LONG\n" + "sz = LOF(1)\n" + "IF sz > 0 THEN PRINT \"ok\"\n" + "CLOSE #1\n" + ); + // Expected: ok + + // Test: INPUT$(n, #channel) + runProgram("INPUT$", + "OPEN \"/tmp/dvxbasic_inp.txt\" FOR OUTPUT AS #1\n" + "PRINT #1, \"ABCDEF\"\n" + "CLOSE #1\n" + "OPEN \"/tmp/dvxbasic_inp.txt\" FOR INPUT AS #1\n" + "DIM s AS STRING\n" + "s = INPUT$(3, #1)\n" + "PRINT s\n" + "CLOSE #1\n" + ); + // Expected: ABC + + // Test: SEEK function form + runProgram("SEEK function", + "OPEN \"/tmp/dvxbasic_seek.txt\" FOR OUTPUT AS #1\n" + "PRINT #1, \"test\"\n" + "CLOSE #1\n" + "OPEN \"/tmp/dvxbasic_seek.txt\" FOR BINARY AS #1\n" + "DIM p AS LONG\n" + "p = SEEK(1)\n" + "IF p = 1 THEN PRINT \"ok\"\n" + "CLOSE #1\n" + ); + // Expected: ok + + // Test: ON n GOTO + runProgram("ON n GOTO", + "DIM n AS INTEGER\n" + "n = 2\n" + "ON n GOTO ten, twenty, thirty\n" + "PRINT \"none\"\n" + "GOTO done\n" + "ten:\n" + "PRINT \"ten\"\n" + "GOTO done\n" + "twenty:\n" + "PRINT \"twenty\"\n" + "GOTO done\n" + "thirty:\n" + "PRINT \"thirty\"\n" + "done:\n" + ); + // Expected: twenty + + // Test: ON n GOTO (no match) + runProgram("ON n GOTO no match", + "DIM n AS INTEGER\n" + "n = 5\n" + "ON n GOTO aa, bb\n" + "PRINT \"fallthrough\"\n" + "GOTO done2\n" + "aa:\n" + "PRINT \"aa\"\n" + "GOTO done2\n" + "bb:\n" + "PRINT \"bb\"\n" + "done2:\n" + ); + // Expected: fallthrough + + // Test: ON n GOSUB + runProgram("ON n GOSUB", + "DIM n AS INTEGER\n" + "DIM result AS INTEGER\n" + "result = 0\n" + "n = 2\n" + "ON n GOSUB addTen, addTwenty, addThirty\n" + "PRINT result\n" + "GOTO endProg\n" + "addTen:\n" + "result = result + 10\n" + "RETURN\n" + "addTwenty:\n" + "result = result + 20\n" + "RETURN\n" + "addThirty:\n" + "result = result + 30\n" + "RETURN\n" + "endProg:\n" + ); + // Expected: 20 + + // Test: FORMAT$ + runProgram("FORMAT$", + "PRINT FORMAT$(1234.5, \"#,##0.00\")\n" + "PRINT FORMAT$(0.5, \"0.00\")\n" + "PRINT FORMAT$(-42, \"+#0\")\n" + "PRINT FORMAT$(0.75, \"percent\")\n" + ); + // Expected: 1,234.50\n0.50\n-42\n75% + + // Test: SHELL as function expression + runProgram("SHELL function", + "DIM r AS INTEGER\n" + "r = SHELL(\"echo hello > /dev/null\")\n" + "IF r = 0 THEN PRINT \"ok\"\n" + ); + // Expected: ok + + // Test: SHELL as statement + runProgram("SHELL statement", + "SHELL \"echo hello > /dev/null\"\n" + "PRINT \"done\"\n" + ); + // Expected: done + + // Test: OPTION COMPARE TEXT + runProgram("OPTION COMPARE TEXT", + "OPTION COMPARE TEXT\n" + "IF \"hello\" = \"HELLO\" THEN\n" + " PRINT \"equal\"\n" + "ELSE\n" + " PRINT \"not equal\"\n" + "END IF\n" + "IF \"abc\" < \"XYZ\" THEN\n" + " PRINT \"less\"\n" + "END IF\n" + ); + // Expected: equal\nless + + // Test: OPTION COMPARE BINARY (default) + runProgram("OPTION COMPARE BINARY", + "OPTION COMPARE BINARY\n" + "IF \"hello\" = \"HELLO\" THEN\n" + " PRINT \"equal\"\n" + "ELSE\n" + " PRINT \"not equal\"\n" + "END IF\n" + ); + // Expected: not equal + + // Test: EQV operator + runProgram("EQV operator", + "PRINT -1 EQV -1\n" + "PRINT 0 EQV 0\n" + "PRINT -1 EQV 0\n" + "PRINT 0 EQV -1\n" + ); + // Expected: -1\n-1\n0\n0 + + // Test: IMP operator + runProgram("IMP operator", + "PRINT 0 IMP -1\n" + "PRINT -1 IMP 0\n" + "PRINT -1 IMP -1\n" + "PRINT 0 IMP 0\n" + ); + // Expected: -1\n0\n-1\n-1 + + // Test: PRINT USING advanced patterns + runProgram("PRINT USING advanced", + "PRINT USING \"**#,##0.00\"; 1234.5\n" + "PRINT USING \"$$#,##0.00\"; 42.5\n" + "PRINT USING \"+###.##\"; 42.5\n" + "PRINT USING \"+###.##\"; -42.5\n" + "PRINT USING \"###.##-\"; -42.5\n" + "PRINT USING \"###.##-\"; 42.5\n" + "PRINT USING \"#.##^^^^\"; 1234.5\n" + ); + + // Test: DEFINT + runProgram("DEFINT", + "DEFINT A-Z\n" + "a = 42\n" + "b = 3.7\n" + "PRINT a; b\n" + ); + + // Test: DEFSTR + runProgram("DEFSTR", + "DEFSTR S\n" + "s = \"hello\"\n" + "PRINT s\n" + ); + + // Test: DEFINT range + runProgram("DEFINT range", + "DEFINT I-N\n" + "i = 10\n" + "j = 20\n" + "x = 3.14\n" + "PRINT i; j; x\n" + ); + + // Test: OPTION EXPLICIT success + runProgram("OPTION EXPLICIT ok", + "OPTION EXPLICIT\n" + "DIM x AS INTEGER\n" + "x = 42\n" + "PRINT x\n" + ); + + // Test: OPTION EXPLICIT failure (should error) + { + printf("=== OPTION EXPLICIT error ===\n"); + const char *src = + "OPTION EXPLICIT\n" + "x = 42\n"; + int32_t len = (int32_t)strlen(src); + BasParserT parser; + basParserInit(&parser, src, len); + bool ok = basParse(&parser); + if (!ok) { + printf("Correctly caught: %s\n", parser.error); + } else { + printf("ERROR: should have failed\n"); + } + basParserFree(&parser); + printf("\n"); + } + + printf("All tests complete.\n"); + return 0; +} diff --git a/dvxbasic/test_lex.c b/dvxbasic/test_lex.c new file mode 100644 index 0000000..2dc2452 --- /dev/null +++ b/dvxbasic/test_lex.c @@ -0,0 +1,24 @@ +// test_lex.c -- Dump lexer tokens +// gcc -O2 -w -o test_lex test_lex.c compiler/lexer.c -lm + +#include "compiler/lexer.h" +#include +#include + +int main(void) { + const char *src = "PRINT \"Hello, World!\"\n"; + BasLexerT lex; + basLexerInit(&lex, src, (int32_t)strlen(src)); + + for (int i = 0; i < 20; i++) { + printf("Token %d: type=%d (%s) text='%s'\n", i, lex.token.type, basTokenName(lex.token.type), lex.token.text); + + if (lex.token.type == TOK_EOF) { + break; + } + + basLexerNext(&lex); + } + + return 0; +} diff --git a/dvxbasic/test_quick.c b/dvxbasic/test_quick.c new file mode 100644 index 0000000..a7bdc8c --- /dev/null +++ b/dvxbasic/test_quick.c @@ -0,0 +1,64 @@ +// test_quick.c -- Quick single-program test +// gcc -O2 -Wall -o test_quick test_quick.c compiler/lexer.c compiler/parser.c compiler/codegen.c compiler/symtab.c runtime/vm.c runtime/values.c -lm + +#include "compiler/parser.h" +#include "runtime/vm.h" +#include "runtime/values.h" +#include +#include + +int main(void) { + basStringSystemInit(); + + const char *source = "PRINT \"Hello, World!\"\n"; + printf("Source: [%s]\n", source); + printf("Source len: %d\n", (int)strlen(source)); + + int32_t len = (int32_t)strlen(source); + BasParserT parser; + basParserInit(&parser, source, len); + + if (!basParse(&parser)) { + printf("COMPILE ERROR: %s\n", parser.error); + basParserFree(&parser); + return 1; + } + + printf("Compiled OK (%d bytes of p-code)\n", parser.cg.codeLen); + + // Dump p-code + for (int i = 0; i < parser.cg.codeLen; i++) { + printf("%02X ", parser.cg.code[i]); + } + printf("\n"); + + BasModuleT *mod = basParserBuildModule(&parser); + basParserFree(&parser); + + BasVmT *vm = basVmCreate(); + basVmLoadModule(vm, mod); + vm->callStack[0].localCount = mod->globalCount > 64 ? 64 : mod->globalCount; + vm->callDepth = 1; + + // Step limit + int steps = 0; + vm->running = true; + + while (vm->running && steps < 1000) { + BasVmResultE r = basVmStep(vm); + steps++; + + if (r != BAS_VM_OK) { + printf("[Result: %d after %d steps: %s]\n", r, steps, basVmGetError(vm)); + break; + } + } + + if (steps >= 1000) { + printf("[TIMEOUT after %d steps, PC=%d]\n", steps, vm->pc); + } + + basVmDestroy(vm); + basModuleFree(mod); + return 0; +} diff --git a/dvxbasic/test_vm.c b/dvxbasic/test_vm.c new file mode 100644 index 0000000..6d92ced --- /dev/null +++ b/dvxbasic/test_vm.c @@ -0,0 +1,234 @@ +// test_vm.c -- Quick test for the DVX BASIC VM +// +// Hand-assembles a small p-code program and executes it. +// Tests: PRINT "Hello, World!", arithmetic, FOR loop, string ops. +// +// Build (native, not cross-compiled): +// gcc -O2 -Wall -o test_vm test_vm.c runtime/vm.c runtime/values.c -lm + +#include "compiler/opcodes.h" +#include "runtime/vm.h" +#include "runtime/values.h" + +#include +#include + +// ============================================================ +// Helper: emit bytes into a code buffer +// ============================================================ + +static uint8_t sCode[4096]; +static int32_t sCodeLen = 0; + +static void emit8(uint8_t b) { + sCode[sCodeLen++] = b; +} + + +static void emit16(int16_t v) { + memcpy(&sCode[sCodeLen], &v, 2); + sCodeLen += 2; +} + + +static void emitU16(uint16_t v) { + memcpy(&sCode[sCodeLen], &v, 2); + sCodeLen += 2; +} + + +// ============================================================ +// Test 1: PRINT "Hello, World!" +// ============================================================ + +static void test1(void) { + printf("--- Test 1: PRINT \"Hello, World!\" ---\n"); + + sCodeLen = 0; + + // String constant pool + BasStringT *consts[1]; + consts[0] = basStringNew("Hello, World!", 13); + + // Code: PUSH_STR 0; PRINT; PRINT_NL; HALT + emit8(OP_PUSH_STR); + emitU16(0); + emit8(OP_PRINT); + emit8(OP_PRINT_NL); + emit8(OP_HALT); + + BasModuleT module; + memset(&module, 0, sizeof(module)); + module.code = sCode; + module.codeLen = sCodeLen; + module.constants = consts; + module.constCount = 1; + module.entryPoint = 0; + + BasVmT *vm = basVmCreate(); + basVmLoadModule(vm, &module); + BasVmResultE result = basVmRun(vm); + printf("Result: %d (expected %d = HALTED)\n\n", result, BAS_VM_HALTED); + basVmDestroy(vm); + basStringUnref(consts[0]); +} + + +// ============================================================ +// Test 2: Arithmetic: PRINT 2 + 3 * 4 +// ============================================================ + +static void test2(void) { + printf("--- Test 2: PRINT 2 + 3 * 4 (expect 14) ---\n"); + + sCodeLen = 0; + + // Code: PUSH 3; PUSH 4; MUL; PUSH 2; ADD; PRINT; PRINT_NL; HALT + emit8(OP_PUSH_INT16); + emit16(3); + emit8(OP_PUSH_INT16); + emit16(4); + emit8(OP_MUL_INT); + emit8(OP_PUSH_INT16); + emit16(2); + emit8(OP_ADD_INT); + emit8(OP_PRINT); + emit8(OP_PRINT_NL); + emit8(OP_HALT); + + BasModuleT module; + memset(&module, 0, sizeof(module)); + module.code = sCode; + module.codeLen = sCodeLen; + module.entryPoint = 0; + + BasVmT *vm = basVmCreate(); + basVmLoadModule(vm, &module); + basVmRun(vm); + basVmDestroy(vm); + printf("\n"); +} + + +// ============================================================ +// Test 3: String concatenation +// ============================================================ + +static void test3(void) { + printf("--- Test 3: PRINT \"Hello\" & \" \" & \"BASIC\" ---\n"); + + sCodeLen = 0; + + BasStringT *consts[3]; + consts[0] = basStringNew("Hello", 5); + consts[1] = basStringNew(" ", 1); + consts[2] = basStringNew("BASIC", 5); + + // Code: PUSH consts[0]; PUSH consts[1]; CONCAT; PUSH consts[2]; CONCAT; PRINT; PRINT_NL; HALT + emit8(OP_PUSH_STR); emitU16(0); + emit8(OP_PUSH_STR); emitU16(1); + emit8(OP_STR_CONCAT); + emit8(OP_PUSH_STR); emitU16(2); + emit8(OP_STR_CONCAT); + emit8(OP_PRINT); + emit8(OP_PRINT_NL); + emit8(OP_HALT); + + BasModuleT module; + memset(&module, 0, sizeof(module)); + module.code = sCode; + module.codeLen = sCodeLen; + module.constants = consts; + module.constCount = 3; + module.entryPoint = 0; + + BasVmT *vm = basVmCreate(); + basVmLoadModule(vm, &module); + basVmRun(vm); + basVmDestroy(vm); + printf("\n"); + + basStringUnref(consts[0]); + basStringUnref(consts[1]); + basStringUnref(consts[2]); +} + + +// ============================================================ +// Test 4: FOR loop -- PRINT 1 to 5 +// ============================================================ + +static void test4(void) { + printf("--- Test 4: FOR i = 1 TO 5: PRINT i: NEXT ---\n"); + + sCodeLen = 0; + + // We need a call frame with at least 1 local (the loop variable) + // For module-level code, we use callStack[0] as implicit frame + + // Setup: store initial value in local 0 + // PUSH 1; STORE_LOCAL 0 -- i = 1 + emit8(OP_PUSH_INT16); emit16(1); + emit8(OP_STORE_LOCAL); emitU16(0); + + // Push limit and step for FOR_INIT + // PUSH 5 (limit); PUSH 1 (step) + emit8(OP_PUSH_INT16); emit16(5); + emit8(OP_PUSH_INT16); emit16(1); + emit8(OP_FOR_INIT); emitU16(0); emit8(1); // isLocal=1 + + // Loop body start (record PC for FOR_NEXT offset) + int32_t loopBody = sCodeLen; + + // LOAD_LOCAL 0; PRINT; PRINT " " + emit8(OP_LOAD_LOCAL); emitU16(0); + emit8(OP_PRINT); + + // FOR_NEXT: increment i, test, jump back + emit8(OP_FOR_NEXT); + emitU16(0); // local index + emit8(1); // isLocal=1 + int16_t offset = (int16_t)(loopBody - (sCodeLen + 2)); + emit16(offset); + + // After loop + emit8(OP_PRINT_NL); + emit8(OP_HALT); + + BasModuleT module; + memset(&module, 0, sizeof(module)); + module.code = sCode; + module.codeLen = sCodeLen; + module.entryPoint = 0; + + BasVmT *vm = basVmCreate(); + + // Initialize the implicit main frame with 1 local + vm->callStack[0].localCount = 1; + vm->callDepth = 1; + + basVmLoadModule(vm, &module); + basVmRun(vm); + basVmDestroy(vm); + printf("\n"); +} + + +// ============================================================ +// main +// ============================================================ + +int main(void) { + printf("DVX BASIC VM Tests\n"); + printf("==================\n\n"); + + basStringSystemInit(); + + test1(); + test2(); + test3(); + test4(); + + printf("All tests complete.\n"); + return 0; +}