Initial DVX BASIC Compiler and VM.

This commit is contained in:
Scott Duensing 2026-03-27 00:40:17 -05:00
parent 89690ca97c
commit aa961425c9
17 changed files with 12014 additions and 0 deletions

243
dvxbasic/compiler/codegen.c Normal file
View file

@ -0,0 +1,243 @@
// codegen.c -- DVX BASIC p-code emitter implementation
#include "codegen.h"
#include "opcodes.h"
#include <stdlib.h>
#include <string.h>
// ============================================================
// basAddData
// ============================================================
bool basAddData(BasCodeGenT *cg, BasValueT val) {
if (cg->dataCount >= BAS_MAX_CONSTANTS) {
return false;
}
cg->dataPool[cg->dataCount++] = basValCopy(val);
return true;
}
// ============================================================
// basAddConstant
// ============================================================
uint16_t basAddConstant(BasCodeGenT *cg, const char *text, int32_t len) {
// Check if this string is already in the pool
for (int32_t i = 0; i < cg->constCount; i++) {
if (cg->constants[i]->len == len && memcmp(cg->constants[i]->data, text, len) == 0) {
return (uint16_t)i;
}
}
if (cg->constCount >= BAS_MAX_CONSTANTS) {
return 0;
}
uint16_t idx = (uint16_t)cg->constCount;
cg->constants[cg->constCount++] = basStringNew(text, len);
return idx;
}
// ============================================================
// basCodeGenBuildModule
// ============================================================
BasModuleT *basCodeGenBuildModule(BasCodeGenT *cg) {
BasModuleT *mod = (BasModuleT *)calloc(1, sizeof(BasModuleT));
if (!mod) {
return NULL;
}
// Copy code
mod->code = (uint8_t *)malloc(cg->codeLen);
if (!mod->code) {
free(mod);
return NULL;
}
memcpy(mod->code, cg->code, cg->codeLen);
mod->codeLen = cg->codeLen;
// Copy constant pool (share string refs)
if (cg->constCount > 0) {
mod->constants = (BasStringT **)malloc(cg->constCount * sizeof(BasStringT *));
if (!mod->constants) {
free(mod->code);
free(mod);
return NULL;
}
for (int32_t i = 0; i < cg->constCount; i++) {
mod->constants[i] = basStringRef(cg->constants[i]);
}
}
mod->constCount = cg->constCount;
mod->globalCount = cg->globalCount;
mod->entryPoint = 0;
// Copy data pool
if (cg->dataCount > 0) {
mod->dataPool = (BasValueT *)malloc(cg->dataCount * sizeof(BasValueT));
if (!mod->dataPool) {
free(mod->constants);
free(mod->code);
free(mod);
return NULL;
}
for (int32_t i = 0; i < cg->dataCount; i++) {
mod->dataPool[i] = basValCopy(cg->dataPool[i]);
}
}
mod->dataCount = cg->dataCount;
return mod;
}
// ============================================================
// basCodeGenFree
// ============================================================
void basCodeGenFree(BasCodeGenT *cg) {
for (int32_t i = 0; i < cg->constCount; i++) {
basStringUnref(cg->constants[i]);
}
for (int32_t i = 0; i < cg->dataCount; i++) {
basValRelease(&cg->dataPool[i]);
}
cg->constCount = 0;
cg->dataCount = 0;
cg->codeLen = 0;
}
// ============================================================
// basCodeGenInit
// ============================================================
void basCodeGenInit(BasCodeGenT *cg) {
memset(cg, 0, sizeof(*cg));
}
// ============================================================
// basCodePos
// ============================================================
int32_t basCodePos(const BasCodeGenT *cg) {
return cg->codeLen;
}
// ============================================================
// basEmit8
// ============================================================
void basEmit8(BasCodeGenT *cg, uint8_t b) {
if (cg->codeLen < BAS_MAX_CODE) {
cg->code[cg->codeLen++] = b;
}
}
// ============================================================
// basEmit16
// ============================================================
void basEmit16(BasCodeGenT *cg, int16_t v) {
if (cg->codeLen + 2 <= BAS_MAX_CODE) {
memcpy(&cg->code[cg->codeLen], &v, 2);
cg->codeLen += 2;
}
}
// ============================================================
// basEmitDouble
// ============================================================
void basEmitDouble(BasCodeGenT *cg, double v) {
if (cg->codeLen + (int32_t)sizeof(double) <= BAS_MAX_CODE) {
memcpy(&cg->code[cg->codeLen], &v, sizeof(double));
cg->codeLen += (int32_t)sizeof(double);
}
}
// ============================================================
// basEmitFloat
// ============================================================
void basEmitFloat(BasCodeGenT *cg, float v) {
if (cg->codeLen + (int32_t)sizeof(float) <= BAS_MAX_CODE) {
memcpy(&cg->code[cg->codeLen], &v, sizeof(float));
cg->codeLen += (int32_t)sizeof(float);
}
}
// ============================================================
// basEmitU16
// ============================================================
void basEmitU16(BasCodeGenT *cg, uint16_t v) {
if (cg->codeLen + 2 <= BAS_MAX_CODE) {
memcpy(&cg->code[cg->codeLen], &v, 2);
cg->codeLen += 2;
}
}
// ============================================================
// basModuleFree
// ============================================================
void basModuleFree(BasModuleT *mod) {
if (!mod) {
return;
}
free(mod->code);
if (mod->constants) {
for (int32_t i = 0; i < mod->constCount; i++) {
basStringUnref(mod->constants[i]);
}
free(mod->constants);
}
if (mod->dataPool) {
for (int32_t i = 0; i < mod->dataCount; i++) {
basValRelease(&mod->dataPool[i]);
}
free(mod->dataPool);
}
free(mod);
}
// ============================================================
// basPatch16
// ============================================================
void basPatch16(BasCodeGenT *cg, int32_t pos, int16_t val) {
if (pos >= 0 && pos + 2 <= cg->codeLen) {
memcpy(&cg->code[pos], &val, 2);
}
}

View file

@ -0,0 +1,76 @@
// codegen.h -- DVX BASIC p-code emitter
//
// Builds a p-code byte stream and string constant pool from
// calls made by the parser. Provides helpers for backpatching
// forward jumps.
//
// Embeddable: no DVX dependencies, pure C.
#ifndef DVXBASIC_CODEGEN_H
#define DVXBASIC_CODEGEN_H
#include "../runtime/vm.h"
#include "../runtime/values.h"
#include <stdint.h>
#include <stdbool.h>
// ============================================================
// Code generator state
// ============================================================
#define BAS_MAX_CODE 65536
#define BAS_MAX_CONSTANTS 1024
typedef struct {
uint8_t code[BAS_MAX_CODE];
int32_t codeLen;
BasStringT *constants[BAS_MAX_CONSTANTS];
int32_t constCount;
int32_t globalCount;
BasValueT dataPool[BAS_MAX_CONSTANTS];
int32_t dataCount;
} BasCodeGenT;
// ============================================================
// API
// ============================================================
void basCodeGenInit(BasCodeGenT *cg);
void basCodeGenFree(BasCodeGenT *cg);
// Emit single byte
void basEmit8(BasCodeGenT *cg, uint8_t b);
// Emit 16-bit signed value
void basEmit16(BasCodeGenT *cg, int16_t v);
// Emit 16-bit unsigned value
void basEmitU16(BasCodeGenT *cg, uint16_t v);
// Emit 32-bit float
void basEmitFloat(BasCodeGenT *cg, float v);
// Emit 64-bit double
void basEmitDouble(BasCodeGenT *cg, double v);
// Get current code position (for jump targets)
int32_t basCodePos(const BasCodeGenT *cg);
// Patch a 16-bit value at a previous position (for backpatching jumps)
void basPatch16(BasCodeGenT *cg, int32_t pos, int16_t val);
// Add a string to the constant pool. Returns the pool index.
uint16_t basAddConstant(BasCodeGenT *cg, const char *text, int32_t len);
// Add a value to the data pool (for DATA statements). Returns true on success.
bool basAddData(BasCodeGenT *cg, BasValueT val);
// Build a BasModuleT from the generated code. The caller takes
// ownership of the module and must free it with basModuleFree().
BasModuleT *basCodeGenBuildModule(BasCodeGenT *cg);
// Free a module built by basCodeGenBuildModule.
void basModuleFree(BasModuleT *mod);
#endif // DVXBASIC_CODEGEN_H

820
dvxbasic/compiler/lexer.c Normal file
View file

@ -0,0 +1,820 @@
// lexer.c -- DVX BASIC lexer implementation
//
// Single-pass tokenizer. Keywords are case-insensitive. Identifiers
// preserve their original case for display but comparisons are
// case-insensitive. Line continuations (underscore at end of line)
// are handled transparently.
#include "lexer.h"
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// ============================================================
// Keyword table
// ============================================================
typedef struct {
const char *text;
BasTokenTypeE type;
} KeywordEntryT;
static const KeywordEntryT sKeywords[] = {
{ "AND", TOK_AND },
{ "APPEND", TOK_APPEND },
{ "AS", TOK_AS },
{ "BASE", TOK_BASE },
{ "BINARY", TOK_BINARY },
{ "BOOLEAN", TOK_BOOLEAN },
{ "BYVAL", TOK_BYVAL },
{ "CALL", TOK_CALL },
{ "CASE", TOK_CASE },
{ "CLOSE", TOK_CLOSE },
{ "CONST", TOK_CONST },
{ "DATA", TOK_DATA },
{ "DECLARE", TOK_DECLARE },
{ "DEF", TOK_DEF },
{ "DEFDBL", TOK_DEFDBL },
{ "DEFINT", TOK_DEFINT },
{ "DEFLNG", TOK_DEFLNG },
{ "DEFSNG", TOK_DEFSNG },
{ "DEFSTR", TOK_DEFSTR },
{ "DIM", TOK_DIM },
{ "DO", TOK_DO },
{ "DOEVENTS", TOK_DOEVENTS },
{ "DOUBLE", TOK_DOUBLE },
{ "ELSE", TOK_ELSE },
{ "ELSEIF", TOK_ELSEIF },
{ "END", TOK_END },
{ "EOF", TOK_EOF_KW },
{ "EQV", TOK_EQV },
{ "ERASE", TOK_ERASE },
{ "ERR", TOK_ERR },
{ "ERROR", TOK_ERROR_KW },
{ "EXPLICIT", TOK_EXPLICIT },
{ "EXIT", TOK_EXIT },
{ "FALSE", TOK_FALSE_KW },
{ "FOR", TOK_FOR },
{ "FUNCTION", TOK_FUNCTION },
{ "GET", TOK_GET },
{ "GOSUB", TOK_GOSUB },
{ "GOTO", TOK_GOTO },
{ "HIDE", TOK_HIDE },
{ "IF", TOK_IF },
{ "IMP", TOK_IMP },
{ "INPUT", TOK_INPUT },
{ "INTEGER", TOK_INTEGER },
{ "IS", TOK_IS },
{ "LBOUND", TOK_LBOUND },
{ "LET", TOK_LET },
{ "LINE", TOK_LINE },
{ "LOAD", TOK_LOAD },
{ "LONG", TOK_LONG },
{ "LOOP", TOK_LOOP },
{ "ME", TOK_ME },
{ "MOD", TOK_MOD },
{ "MSGBOX", TOK_MSGBOX },
{ "NEXT", TOK_NEXT },
{ "NOT", TOK_NOT },
{ "ON", TOK_ON },
{ "OPEN", TOK_OPEN },
{ "OPTION", TOK_OPTION },
{ "OR", TOK_OR },
{ "OUTPUT", TOK_OUTPUT },
{ "PRESERVE", TOK_PRESERVE },
{ "PRINT", TOK_PRINT },
{ "PUT", TOK_PUT },
{ "RANDOM", TOK_RANDOM },
{ "RANDOMIZE", TOK_RANDOMIZE },
{ "READ", TOK_READ },
{ "REDIM", TOK_REDIM },
{ "REM", TOK_REM },
{ "RESTORE", TOK_RESTORE },
{ "RESUME", TOK_RESUME },
{ "RETURN", TOK_RETURN },
{ "SEEK", TOK_SEEK },
{ "SELECT", TOK_SELECT },
{ "SET", TOK_SET },
{ "SHARED", TOK_SHARED },
{ "SHELL", TOK_SHELL },
{ "SHOW", TOK_SHOW },
{ "SINGLE", TOK_SINGLE },
{ "SLEEP", TOK_SLEEP },
{ "STATIC", TOK_STATIC },
{ "STEP", TOK_STEP },
{ "STRING", TOK_STRING_KW },
{ "SUB", TOK_SUB },
{ "SWAP", TOK_SWAP },
{ "THEN", TOK_THEN },
{ "TIMER", TOK_TIMER },
{ "TO", TOK_TO },
{ "TRUE", TOK_TRUE_KW },
{ "TYPE", TOK_TYPE },
{ "UBOUND", TOK_UBOUND },
{ "UNLOAD", TOK_UNLOAD },
{ "UNTIL", TOK_UNTIL },
{ "WEND", TOK_WEND },
{ "WHILE", TOK_WHILE },
{ "WITH", TOK_WITH },
{ "WRITE", TOK_WRITE },
{ "XOR", TOK_XOR },
{ NULL, TOK_ERROR }
};
#define KEYWORD_COUNT (sizeof(sKeywords) / sizeof(sKeywords[0]) - 1)
// ============================================================
// Prototypes
// ============================================================
static char advance(BasLexerT *lex);
static bool atEnd(const BasLexerT *lex);
static BasTokenTypeE lookupKeyword(const char *text, int32_t len);
static char peek(const BasLexerT *lex);
static char peekNext(const BasLexerT *lex);
static void setError(BasLexerT *lex, const char *msg);
static void skipLineComment(BasLexerT *lex);
static void skipWhitespace(BasLexerT *lex);
static BasTokenTypeE tokenizeHexLiteral(BasLexerT *lex);
static BasTokenTypeE tokenizeIdentOrKeyword(BasLexerT *lex);
static BasTokenTypeE tokenizeNumber(BasLexerT *lex);
static BasTokenTypeE tokenizeString(BasLexerT *lex);
static char upperChar(char c);
// ============================================================
// advance
// ============================================================
static char advance(BasLexerT *lex) {
if (atEnd(lex)) {
return '\0';
}
char c = lex->source[lex->pos++];
if (c == '\n') {
lex->line++;
lex->col = 1;
} else {
lex->col++;
}
return c;
}
// ============================================================
// atEnd
// ============================================================
static bool atEnd(const BasLexerT *lex) {
return lex->pos >= lex->sourceLen;
}
// ============================================================
// basLexerInit
// ============================================================
void basLexerInit(BasLexerT *lex, const char *source, int32_t sourceLen) {
memset(lex, 0, sizeof(*lex));
lex->source = source;
lex->sourceLen = sourceLen;
lex->pos = 0;
lex->line = 1;
lex->col = 1;
// Prime the first token
basLexerNext(lex);
}
// ============================================================
// basLexerNext
// ============================================================
BasTokenTypeE basLexerNext(BasLexerT *lex) {
skipWhitespace(lex);
lex->token.line = lex->line;
lex->token.col = lex->col;
lex->token.textLen = 0;
lex->token.text[0] = '\0';
if (atEnd(lex)) {
lex->token.type = TOK_EOF;
return TOK_EOF;
}
char c = peek(lex);
// Newline
if (c == '\n') {
advance(lex);
lex->token.type = TOK_NEWLINE;
lex->token.text[0] = '\n';
lex->token.text[1] = '\0';
lex->token.textLen = 1;
return TOK_NEWLINE;
}
// Carriage return (handle CR, CRLF)
if (c == '\r') {
advance(lex);
if (!atEnd(lex) && peek(lex) == '\n') {
advance(lex);
}
lex->token.type = TOK_NEWLINE;
lex->token.text[0] = '\n';
lex->token.text[1] = '\0';
lex->token.textLen = 1;
return TOK_NEWLINE;
}
// Comment (apostrophe)
if (c == '\'') {
skipLineComment(lex);
lex->token.type = TOK_NEWLINE;
lex->token.text[0] = '\n';
lex->token.text[1] = '\0';
lex->token.textLen = 1;
return TOK_NEWLINE;
}
// String literal
if (c == '"') {
lex->token.type = tokenizeString(lex);
return lex->token.type;
}
// Number
if (isdigit((unsigned char)c) || (c == '.' && isdigit((unsigned char)peekNext(lex)))) {
lex->token.type = tokenizeNumber(lex);
return lex->token.type;
}
// Hex literal (&H...)
if (c == '&' && upperChar(peekNext(lex)) == 'H') {
lex->token.type = tokenizeHexLiteral(lex);
return lex->token.type;
}
// Identifier or keyword
if (isalpha((unsigned char)c) || c == '_') {
lex->token.type = tokenizeIdentOrKeyword(lex);
return lex->token.type;
}
// Single and multi-character operators/punctuation
advance(lex);
switch (c) {
case '+':
lex->token.type = TOK_PLUS;
break;
case '-':
lex->token.type = TOK_MINUS;
break;
case '*':
lex->token.type = TOK_STAR;
break;
case '/':
lex->token.type = TOK_SLASH;
break;
case '\\':
lex->token.type = TOK_BACKSLASH;
break;
case '^':
lex->token.type = TOK_CARET;
break;
case '&':
lex->token.type = TOK_AMPERSAND;
break;
case '(':
lex->token.type = TOK_LPAREN;
break;
case ')':
lex->token.type = TOK_RPAREN;
break;
case ',':
lex->token.type = TOK_COMMA;
break;
case ';':
lex->token.type = TOK_SEMICOLON;
break;
case ':':
lex->token.type = TOK_COLON;
break;
case '.':
lex->token.type = TOK_DOT;
break;
case '#':
lex->token.type = TOK_HASH;
break;
case '=':
lex->token.type = TOK_EQ;
break;
case '<':
if (!atEnd(lex) && peek(lex) == '>') {
advance(lex);
lex->token.type = TOK_NE;
} else if (!atEnd(lex) && peek(lex) == '=') {
advance(lex);
lex->token.type = TOK_LE;
} else {
lex->token.type = TOK_LT;
}
break;
case '>':
if (!atEnd(lex) && peek(lex) == '=') {
advance(lex);
lex->token.type = TOK_GE;
} else {
lex->token.type = TOK_GT;
}
break;
default:
setError(lex, "Unexpected character");
lex->token.type = TOK_ERROR;
break;
}
// Store the operator text
if (lex->token.type != TOK_ERROR) {
lex->token.text[0] = c;
lex->token.textLen = 1;
if (lex->token.type == TOK_NE || lex->token.type == TOK_LE || lex->token.type == TOK_GE) {
lex->token.text[1] = lex->source[lex->pos - 1];
lex->token.textLen = 2;
}
lex->token.text[lex->token.textLen] = '\0';
}
return lex->token.type;
}
// ============================================================
// basLexerPeek
// ============================================================
BasTokenTypeE basLexerPeek(const BasLexerT *lex) {
return lex->token.type;
}
// ============================================================
// basTokenName
// ============================================================
const char *basTokenName(BasTokenTypeE type) {
switch (type) {
case TOK_INT_LIT: return "integer";
case TOK_LONG_LIT: return "long";
case TOK_FLOAT_LIT: return "float";
case TOK_STRING_LIT: return "string";
case TOK_IDENT: return "identifier";
case TOK_DOT: return "'.'";
case TOK_COMMA: return "','";
case TOK_SEMICOLON: return "';'";
case TOK_COLON: return "':'";
case TOK_LPAREN: return "'('";
case TOK_RPAREN: return "')'";
case TOK_HASH: return "'#'";
case TOK_PLUS: return "'+'";
case TOK_MINUS: return "'-'";
case TOK_STAR: return "'*'";
case TOK_SLASH: return "'/'";
case TOK_BACKSLASH: return "'\\'";
case TOK_CARET: return "'^'";
case TOK_AMPERSAND: return "'&'";
case TOK_EQ: return "'='";
case TOK_NE: return "'<>'";
case TOK_LT: return "'<'";
case TOK_GT: return "'>'";
case TOK_LE: return "'<='";
case TOK_GE: return "'>='";
case TOK_NEWLINE: return "newline";
case TOK_EOF: return "end of file";
case TOK_ERROR: return "error";
default: break;
}
// Keywords
for (int32_t i = 0; i < (int32_t)KEYWORD_COUNT; i++) {
if (sKeywords[i].type == type) {
return sKeywords[i].text;
}
}
return "?";
}
// ============================================================
// lookupKeyword
// ============================================================
static BasTokenTypeE lookupKeyword(const char *text, int32_t len) {
// Case-insensitive keyword lookup
for (int32_t i = 0; i < (int32_t)KEYWORD_COUNT; i++) {
const char *kw = sKeywords[i].text;
int32_t kwLen = (int32_t)strlen(kw);
if (kwLen != len) {
continue;
}
bool match = true;
for (int32_t j = 0; j < len; j++) {
if (upperChar(text[j]) != kw[j]) {
match = false;
break;
}
}
if (match) {
return sKeywords[i].type;
}
}
return TOK_IDENT;
}
// ============================================================
// peek
// ============================================================
static char peek(const BasLexerT *lex) {
if (atEnd(lex)) {
return '\0';
}
return lex->source[lex->pos];
}
// ============================================================
// peekNext
// ============================================================
static char peekNext(const BasLexerT *lex) {
if (lex->pos + 1 >= lex->sourceLen) {
return '\0';
}
return lex->source[lex->pos + 1];
}
// ============================================================
// setError
// ============================================================
static void setError(BasLexerT *lex, const char *msg) {
snprintf(lex->error, sizeof(lex->error), "Line %d, Col %d: %s", lex->line, lex->col, msg);
}
// ============================================================
// skipLineComment
// ============================================================
static void skipLineComment(BasLexerT *lex) {
while (!atEnd(lex) && peek(lex) != '\n' && peek(lex) != '\r') {
advance(lex);
}
}
// ============================================================
// skipWhitespace
// ============================================================
//
// Skips spaces and tabs. Does NOT skip newlines (they are tokens).
// Handles line continuation: underscore followed by newline joins
// the next line to the current logical line.
static void skipWhitespace(BasLexerT *lex) {
while (!atEnd(lex)) {
char c = peek(lex);
if (c == ' ' || c == '\t') {
advance(lex);
continue;
}
// Line continuation: _ at end of line
if (c == '_') {
int32_t savedPos = lex->pos;
int32_t savedLine = lex->line;
int32_t savedCol = lex->col;
advance(lex);
// Skip spaces/tabs after underscore
while (!atEnd(lex) && (peek(lex) == ' ' || peek(lex) == '\t')) {
advance(lex);
}
// Must be followed by newline
if (!atEnd(lex) && (peek(lex) == '\n' || peek(lex) == '\r')) {
advance(lex);
if (!atEnd(lex) && peek(lex) == '\n' && lex->source[lex->pos - 1] == '\r') {
advance(lex);
}
continue; // Continue skipping whitespace on next line
}
// Not a continuation -- put back
lex->pos = savedPos;
lex->line = savedLine;
lex->col = savedCol;
break;
}
break;
}
}
// ============================================================
// tokenizeHexLiteral
// ============================================================
static BasTokenTypeE tokenizeHexLiteral(BasLexerT *lex) {
advance(lex); // skip &
advance(lex); // skip H
int32_t idx = 0;
int32_t value = 0;
while (!atEnd(lex) && isxdigit((unsigned char)peek(lex))) {
char c = advance(lex);
if (idx < BAS_MAX_TOKEN_LEN - 1) {
lex->token.text[idx++] = c;
}
int32_t digit;
if (c >= '0' && c <= '9') {
digit = c - '0';
} else if (c >= 'A' && c <= 'F') {
digit = c - 'A' + 10;
} else {
digit = c - 'a' + 10;
}
value = (value << 4) | digit;
}
lex->token.text[idx] = '\0';
lex->token.textLen = idx;
// Check for trailing & (long suffix)
if (!atEnd(lex) && peek(lex) == '&') {
advance(lex);
lex->token.longVal = (int64_t)value;
return TOK_LONG_LIT;
}
lex->token.intVal = value;
return TOK_INT_LIT;
}
// ============================================================
// tokenizeIdentOrKeyword
// ============================================================
static BasTokenTypeE tokenizeIdentOrKeyword(BasLexerT *lex) {
int32_t idx = 0;
while (!atEnd(lex) && (isalnum((unsigned char)peek(lex)) || peek(lex) == '_')) {
char c = advance(lex);
if (idx < BAS_MAX_TOKEN_LEN - 1) {
lex->token.text[idx++] = c;
}
}
lex->token.text[idx] = '\0';
lex->token.textLen = idx;
// Check for type suffix
if (!atEnd(lex)) {
char c = peek(lex);
if (c == '%' || c == '&' || c == '!' || c == '#' || c == '$') {
advance(lex);
lex->token.text[idx++] = c;
lex->token.text[idx] = '\0';
lex->token.textLen = idx;
}
}
// Check if this is a keyword
// For suffix-bearing identifiers, only check the base (without suffix)
int32_t baseLen = idx;
if (baseLen > 0) {
char last = lex->token.text[baseLen - 1];
if (last == '%' || last == '&' || last == '!' || last == '#' || last == '$') {
baseLen--;
}
}
BasTokenTypeE kwType = lookupKeyword(lex->token.text, baseLen);
// REM is a comment -- skip to end of line
if (kwType == TOK_REM) {
skipLineComment(lex);
lex->token.type = TOK_NEWLINE;
lex->token.text[0] = '\n';
lex->token.text[1] = '\0';
lex->token.textLen = 1;
return TOK_NEWLINE;
}
// If it's a keyword and has no suffix, return the keyword token
if (kwType != TOK_IDENT && baseLen == idx) {
return kwType;
}
return TOK_IDENT;
}
// ============================================================
// tokenizeNumber
// ============================================================
static BasTokenTypeE tokenizeNumber(BasLexerT *lex) {
int32_t idx = 0;
bool hasDecimal = false;
bool hasExp = false;
// Integer part
while (!atEnd(lex) && isdigit((unsigned char)peek(lex))) {
if (idx < BAS_MAX_TOKEN_LEN - 1) {
lex->token.text[idx++] = advance(lex);
} else {
advance(lex);
}
}
// Decimal part
if (!atEnd(lex) && peek(lex) == '.' && isdigit((unsigned char)peekNext(lex))) {
hasDecimal = true;
lex->token.text[idx++] = advance(lex); // .
while (!atEnd(lex) && isdigit((unsigned char)peek(lex))) {
if (idx < BAS_MAX_TOKEN_LEN - 1) {
lex->token.text[idx++] = advance(lex);
} else {
advance(lex);
}
}
}
// Exponent
if (!atEnd(lex) && (upperChar(peek(lex)) == 'E' || upperChar(peek(lex)) == 'D')) {
hasExp = true;
lex->token.text[idx++] = advance(lex);
if (!atEnd(lex) && (peek(lex) == '+' || peek(lex) == '-')) {
lex->token.text[idx++] = advance(lex);
}
while (!atEnd(lex) && isdigit((unsigned char)peek(lex))) {
if (idx < BAS_MAX_TOKEN_LEN - 1) {
lex->token.text[idx++] = advance(lex);
} else {
advance(lex);
}
}
}
lex->token.text[idx] = '\0';
lex->token.textLen = idx;
// Check for type suffix
if (!atEnd(lex)) {
char c = peek(lex);
if (c == '%') {
advance(lex);
lex->token.intVal = (int32_t)atoi(lex->token.text);
return TOK_INT_LIT;
}
if (c == '&') {
advance(lex);
lex->token.longVal = (int64_t)atol(lex->token.text);
return TOK_LONG_LIT;
}
if (c == '!') {
advance(lex);
lex->token.dblVal = atof(lex->token.text);
return TOK_FLOAT_LIT;
}
if (c == '#') {
advance(lex);
lex->token.dblVal = atof(lex->token.text);
return TOK_FLOAT_LIT;
}
}
// No suffix: determine type from content
if (hasDecimal || hasExp) {
lex->token.dblVal = atof(lex->token.text);
return TOK_FLOAT_LIT;
}
long val = atol(lex->token.text);
if (val >= -32768 && val <= 32767) {
lex->token.intVal = (int32_t)val;
return TOK_INT_LIT;
}
lex->token.longVal = (int64_t)val;
return TOK_LONG_LIT;
}
// ============================================================
// tokenizeString
// ============================================================
static BasTokenTypeE tokenizeString(BasLexerT *lex) {
advance(lex); // skip opening quote
int32_t idx = 0;
while (!atEnd(lex) && peek(lex) != '"' && peek(lex) != '\n' && peek(lex) != '\r') {
if (idx < BAS_MAX_TOKEN_LEN - 1) {
lex->token.text[idx++] = advance(lex);
} else {
advance(lex);
}
}
if (atEnd(lex) || peek(lex) != '"') {
setError(lex, "Unterminated string literal");
lex->token.text[idx] = '\0';
lex->token.textLen = idx;
return TOK_ERROR;
}
advance(lex); // skip closing quote
lex->token.text[idx] = '\0';
lex->token.textLen = idx;
return TOK_STRING_LIT;
}
// ============================================================
// upperChar
// ============================================================
static char upperChar(char c) {
if (c >= 'a' && c <= 'z') {
return c - 32;
}
return c;
}

221
dvxbasic/compiler/lexer.h Normal file
View file

@ -0,0 +1,221 @@
// lexer.h -- DVX BASIC lexer (tokenizer)
//
// Converts BASIC source text into a stream of tokens. Case-insensitive
// for keywords. Handles line continuations (_), comments (' and REM),
// type suffixes (%, &, !, #, $), and string literals.
//
// Embeddable: no DVX dependencies, pure C.
#ifndef DVXBASIC_LEXER_H
#define DVXBASIC_LEXER_H
#include <stdint.h>
#include <stdbool.h>
// ============================================================
// Token types
// ============================================================
typedef enum {
// Literals
TOK_INT_LIT, // integer literal (123, &HFF)
TOK_LONG_LIT, // long literal (123&)
TOK_FLOAT_LIT, // float literal (3.14, 1.5E10)
TOK_STRING_LIT, // "string literal"
// Identifiers and symbols
TOK_IDENT, // variable/function name
TOK_DOT, // .
TOK_COMMA, // ,
TOK_SEMICOLON, // ;
TOK_COLON, // :
TOK_LPAREN, // (
TOK_RPAREN, // )
TOK_HASH, // # (file channel)
// Operators
TOK_PLUS, // +
TOK_MINUS, // -
TOK_STAR, // *
TOK_SLASH, // /
TOK_BACKSLASH, // \ (integer divide)
TOK_CARET, // ^
TOK_AMPERSAND, // & (string concat or hex prefix)
TOK_EQ, // =
TOK_NE, // <>
TOK_LT, // <
TOK_GT, // >
TOK_LE, // <=
TOK_GE, // >=
// Type suffixes (attached to identifier)
TOK_SUFFIX_INT, // %
TOK_SUFFIX_LONG, // &
TOK_SUFFIX_SINGLE, // !
TOK_SUFFIX_DOUBLE, // #
TOK_SUFFIX_STRING, // $
// Keywords
TOK_AND,
TOK_AS,
TOK_BASE,
TOK_BOOLEAN,
TOK_BYVAL,
TOK_CALL,
TOK_CASE,
TOK_CLOSE,
TOK_CONST,
TOK_DATA,
TOK_DECLARE,
TOK_DEF,
TOK_DEFDBL,
TOK_DEFINT,
TOK_DEFLNG,
TOK_DEFSNG,
TOK_DEFSTR,
TOK_DIM,
TOK_DO,
TOK_DOEVENTS,
TOK_DOUBLE,
TOK_ELSE,
TOK_ELSEIF,
TOK_END,
TOK_EOF_KW, // EOF (keyword, not end-of-file)
TOK_EQV,
TOK_ERASE,
TOK_ERR,
TOK_ERROR_KW,
TOK_EXPLICIT,
TOK_EXIT,
TOK_FALSE_KW,
TOK_FOR,
TOK_FUNCTION,
TOK_GET,
TOK_GOSUB,
TOK_GOTO,
TOK_HIDE,
TOK_IF,
TOK_IMP,
TOK_INPUT,
TOK_INTEGER,
TOK_IS,
TOK_LBOUND,
TOK_LET,
TOK_LINE,
TOK_LOAD,
TOK_LONG,
TOK_LOOP,
TOK_ME,
TOK_MOD,
TOK_MSGBOX,
TOK_NEXT,
TOK_NOT,
TOK_ON,
TOK_OPEN,
TOK_OPTION,
TOK_OR,
TOK_OUTPUT,
TOK_PRESERVE,
TOK_PRINT,
TOK_PUT,
TOK_RANDOMIZE,
TOK_READ,
TOK_REDIM,
TOK_REM,
TOK_RESTORE,
TOK_RESUME,
TOK_RETURN,
TOK_SEEK,
TOK_SELECT,
TOK_SET,
TOK_SHARED,
TOK_SHELL,
TOK_SHOW,
TOK_SINGLE,
TOK_SLEEP,
TOK_STATIC,
TOK_STEP,
TOK_STRING_KW,
TOK_SUB,
TOK_SWAP,
TOK_THEN,
TOK_TIMER,
TOK_TO,
TOK_TRUE_KW,
TOK_TYPE,
TOK_UBOUND,
TOK_UNLOAD,
TOK_UNTIL,
TOK_WEND,
TOK_WHILE,
TOK_WITH,
TOK_WRITE,
TOK_XOR,
// File modes
TOK_APPEND,
TOK_BINARY,
TOK_RANDOM,
// Special
TOK_NEWLINE, // end of logical line
TOK_EOF, // end of source
TOK_ERROR // lexer error
} BasTokenTypeE;
// ============================================================
// Token
// ============================================================
#define BAS_MAX_TOKEN_LEN 256
typedef struct {
BasTokenTypeE type;
int32_t line; // 1-based source line number
int32_t col; // 1-based column number
// Value (depends on type)
union {
int32_t intVal;
int64_t longVal;
float fltVal;
double dblVal;
};
char text[BAS_MAX_TOKEN_LEN]; // raw text of the token
int32_t textLen;
} BasTokenT;
// ============================================================
// Lexer state
// ============================================================
typedef struct {
const char *source; // source text (not owned)
int32_t sourceLen;
int32_t pos; // current position in source
int32_t line; // current line (1-based)
int32_t col; // current column (1-based)
BasTokenT token; // current token
char error[256];
} BasLexerT;
// ============================================================
// API
// ============================================================
// Initialize lexer with source text. The source must remain valid
// for the lifetime of the lexer.
void basLexerInit(BasLexerT *lex, const char *source, int32_t sourceLen);
// Advance to the next token. Returns the token type.
// The token is available in lex->token.
BasTokenTypeE basLexerNext(BasLexerT *lex);
// Peek at the current token type without advancing.
BasTokenTypeE basLexerPeek(const BasLexerT *lex);
// Return human-readable name for a token type.
const char *basTokenName(BasTokenTypeE type);
#endif // DVXBASIC_LEXER_H

287
dvxbasic/compiler/opcodes.h Normal file
View file

@ -0,0 +1,287 @@
// opcodes.h -- DVX BASIC bytecode instruction definitions
//
// Stack-based p-code for the DVX BASIC virtual machine.
// Embeddable: no DVX dependencies, pure C.
#ifndef DVXBASIC_OPCODES_H
#define DVXBASIC_OPCODES_H
// ============================================================
// Data type tags (used in Value representation)
// ============================================================
#define BAS_TYPE_INTEGER 0 // 16-bit signed
#define BAS_TYPE_LONG 1 // 32-bit signed
#define BAS_TYPE_SINGLE 2 // 32-bit float
#define BAS_TYPE_DOUBLE 3 // 64-bit float
#define BAS_TYPE_STRING 4 // ref-counted dynamic string
#define BAS_TYPE_BOOLEAN 5 // True (-1) or False (0)
#define BAS_TYPE_ARRAY 6 // ref-counted array
#define BAS_TYPE_UDT 7 // ref-counted user-defined type
// ============================================================
// Stack operations
// ============================================================
#define OP_NOP 0x00
#define OP_PUSH_INT16 0x01 // [int16] push 16-bit integer
#define OP_PUSH_INT32 0x02 // [int32] push 32-bit integer
#define OP_PUSH_FLT32 0x03 // [float32] push 32-bit float
#define OP_PUSH_FLT64 0x04 // [float64] push 64-bit float
#define OP_PUSH_STR 0x05 // [uint16 idx] push string from constant pool
#define OP_PUSH_TRUE 0x06 // push boolean True (-1)
#define OP_PUSH_FALSE 0x07 // push boolean False (0)
#define OP_POP 0x08 // discard top of stack
#define OP_DUP 0x09 // duplicate top of stack
// ============================================================
// Variable access
// ============================================================
#define OP_LOAD_LOCAL 0x10 // [uint16 idx] push local variable
#define OP_STORE_LOCAL 0x11 // [uint16 idx] pop to local variable
#define OP_LOAD_GLOBAL 0x12 // [uint16 idx] push global variable
#define OP_STORE_GLOBAL 0x13 // [uint16 idx] pop to global variable
#define OP_LOAD_REF 0x14 // dereference top of stack (ByRef)
#define OP_STORE_REF 0x15 // store through reference on stack
#define OP_LOAD_ARRAY 0x16 // [uint8 dims] indices on stack, array ref below
#define OP_STORE_ARRAY 0x17 // [uint8 dims] value, indices, array ref on stack
#define OP_LOAD_FIELD 0x18 // [uint16 fieldIdx] load UDT field
#define OP_STORE_FIELD 0x19 // [uint16 fieldIdx] store UDT field
#define OP_PUSH_LOCAL_ADDR 0x1A // [uint16 idx] push address of local (for ByRef)
#define OP_PUSH_GLOBAL_ADDR 0x1B // [uint16 idx] push address of global (for ByRef)
// ============================================================
// Arithmetic (integer)
// ============================================================
#define OP_ADD_INT 0x20
#define OP_SUB_INT 0x21
#define OP_MUL_INT 0x22
#define OP_IDIV_INT 0x23 // integer divide (\)
#define OP_MOD_INT 0x24
#define OP_NEG_INT 0x25
// ============================================================
// Arithmetic (float)
// ============================================================
#define OP_ADD_FLT 0x26
#define OP_SUB_FLT 0x27
#define OP_MUL_FLT 0x28
#define OP_DIV_FLT 0x29 // float divide (/)
#define OP_NEG_FLT 0x2A
#define OP_POW 0x2B // exponentiation (^)
// ============================================================
// String operations
// ============================================================
#define OP_STR_CONCAT 0x30
#define OP_STR_LEFT 0x31
#define OP_STR_RIGHT 0x32
#define OP_STR_MID 0x33 // 3 args: str, start, len
#define OP_STR_MID2 0x34 // 2 args: str, start (to end)
#define OP_STR_LEN 0x35
#define OP_STR_INSTR 0x36 // 2 args: str, find
#define OP_STR_INSTR3 0x37 // 3 args: start, str, find
#define OP_STR_UCASE 0x38
#define OP_STR_LCASE 0x39
#define OP_STR_TRIM 0x3A
#define OP_STR_LTRIM 0x3B
#define OP_STR_RTRIM 0x3C
#define OP_STR_CHR 0x3D
#define OP_STR_ASC 0x3E
#define OP_STR_SPACE 0x3F
// ============================================================
// Comparison (push boolean result)
// ============================================================
#define OP_CMP_EQ 0x40
#define OP_CMP_NE 0x41
#define OP_CMP_LT 0x42
#define OP_CMP_GT 0x43
#define OP_CMP_LE 0x44
#define OP_CMP_GE 0x45
// ============================================================
// Logical / bitwise
// ============================================================
#define OP_AND 0x48
#define OP_OR 0x49
#define OP_NOT 0x4A
#define OP_XOR 0x4B
#define OP_EQV 0x4C
#define OP_IMP 0x4D
// ============================================================
// Control flow
// ============================================================
#define OP_JMP 0x50 // [int16 offset] unconditional jump
#define OP_JMP_TRUE 0x51 // [int16 offset] jump if TOS is true
#define OP_JMP_FALSE 0x52 // [int16 offset] jump if TOS is false
#define OP_CALL 0x53 // [uint16 addr] [uint8 argc] [uint8 baseSlot]
#define OP_GOSUB_RET 0x54 // pop PC from eval stack, jump (GOSUB return)
#define OP_RET 0x55 // return from subroutine
#define OP_RET_VAL 0x56 // return from function (value on stack)
#define OP_FOR_INIT 0x57 // [uint16 varIdx] [uint8 isLocal] init FOR
#define OP_FOR_NEXT 0x58 // [uint16 varIdx] [uint8 isLocal] [int16 loopTop]
// ============================================================
// Type conversion
// ============================================================
#define OP_CONV_INT_FLT 0x60 // int -> float
#define OP_CONV_FLT_INT 0x61 // float -> int (banker's rounding)
#define OP_CONV_INT_STR 0x62 // int -> string
#define OP_CONV_STR_INT 0x63 // string -> int (VAL)
#define OP_CONV_FLT_STR 0x64 // float -> string
#define OP_CONV_STR_FLT 0x65 // string -> float (VAL)
#define OP_CONV_INT_LONG 0x66 // int16 -> int32
#define OP_CONV_LONG_INT 0x67 // int32 -> int16
// ============================================================
// I/O
// ============================================================
#define OP_PRINT 0x70 // print TOS to current output
#define OP_PRINT_NL 0x71 // print newline
#define OP_PRINT_TAB 0x72 // print tab (14-column zones)
#define OP_PRINT_SPC 0x73 // [uint8 n] print n spaces
#define OP_INPUT 0x74 // read line into string on stack
#define OP_FILE_OPEN 0x75 // [uint8 mode] filename, channel# on stack
#define OP_FILE_CLOSE 0x76 // channel# on stack
#define OP_FILE_PRINT 0x77 // channel#, value on stack
#define OP_FILE_INPUT 0x78 // channel# on stack, push string
#define OP_FILE_EOF 0x79 // channel# on stack, push boolean
#define OP_FILE_LINE_INPUT 0x7A // channel# on stack, push string
// ============================================================
// UI / Event (used when form system is active)
// ============================================================
#define OP_LOAD_PROP 0x80 // [uint16 ctrl] [uint16 prop] push property value
#define OP_STORE_PROP 0x81 // [uint16 ctrl] [uint16 prop] pop to property
#define OP_CALL_METHOD 0x82 // [uint16 ctrl] [uint16 method] [uint8 argc]
#define OP_LOAD_FORM 0x83 // [uint16 formIdx]
#define OP_UNLOAD_FORM 0x84 // [uint16 formIdx]
#define OP_SHOW_FORM 0x85 // [uint16 formIdx] [uint8 modal]
#define OP_HIDE_FORM 0x86 // [uint16 formIdx]
#define OP_DO_EVENTS 0x87
#define OP_MSGBOX 0x88 // [uint8 flags] message on stack
#define OP_INPUTBOX 0x89 // prompt on stack, push result
#define OP_ME_REF 0x8A // push current form reference
// ============================================================
// Array / misc
// ============================================================
#define OP_DIM_ARRAY 0x90 // [uint8 dims] [uint8 type] bounds on stack
#define OP_REDIM 0x91 // [uint8 dims] [uint8 preserve] bounds on stack
#define OP_ERASE 0x92 // array ref on stack
#define OP_LBOUND 0x93 // [uint8 dim] array ref on stack
#define OP_UBOUND 0x94 // [uint8 dim] array ref on stack
#define OP_ON_ERROR 0x95 // [int16 handler] set error handler (0 = disable)
#define OP_RESUME 0x96 // resume after error
#define OP_RESUME_NEXT 0x97 // resume at next statement
#define OP_RAISE_ERR 0x98 // error number on stack
#define OP_ERR_NUM 0x99 // push current error number
#define OP_ERR_CLEAR 0x9A // clear error state
// ============================================================
// Math built-ins (single opcode each for common functions)
// ============================================================
#define OP_MATH_ABS 0xA0
#define OP_MATH_INT 0xA1 // floor
#define OP_MATH_FIX 0xA2 // truncate toward zero
#define OP_MATH_SGN 0xA3
#define OP_MATH_SQR 0xA4
#define OP_MATH_SIN 0xA5
#define OP_MATH_COS 0xA6
#define OP_MATH_TAN 0xA7
#define OP_MATH_ATN 0xA8
#define OP_MATH_LOG 0xA9
#define OP_MATH_EXP 0xAA
#define OP_MATH_RND 0xAB
#define OP_MATH_RANDOMIZE 0xAC // seed on stack (or TIMER if -1)
// ============================================================
// Conversion built-ins
// ============================================================
#define OP_STR_VAL 0xB0 // VAL(s$) -> number
#define OP_STR_STRF 0xB1 // STR$(n) -> string
#define OP_STR_HEX 0xB2 // HEX$(n) -> string
#define OP_STR_STRING 0xB3 // STRING$(n, char) -> string
// ============================================================
// Extended built-ins
// ============================================================
#define OP_MATH_TIMER 0xB4 // push seconds since midnight as DOUBLE
#define OP_DATE_STR 0xB5 // push DATE$ string "MM-DD-YYYY"
#define OP_TIME_STR 0xB6 // push TIME$ string "HH:MM:SS"
#define OP_SLEEP 0xB7 // pop seconds, sleep
#define OP_ENVIRON 0xB8 // pop env var name, push value string
// ============================================================
// DATA/READ/RESTORE
// ============================================================
#define OP_READ_DATA 0xB9 // push next value from data pool
#define OP_RESTORE 0xBA // reset data pointer to 0
// ============================================================
// WRITE # (comma-delimited with quoted strings)
// ============================================================
#define OP_FILE_WRITE 0xBB // pop channel + value, write in WRITE format
#define OP_FILE_WRITE_SEP 0xBC // pop channel, write comma separator
#define OP_FILE_WRITE_NL 0xBD // pop channel, write newline
// ============================================================
// Random/Binary file I/O
// ============================================================
#define OP_FILE_GET 0xBE // pop channel + recno, read record, push value
#define OP_FILE_PUT 0xBF // pop channel + recno + value, write record
#define OP_FILE_SEEK 0xC0 // pop channel + position, seek
#define OP_FILE_LOF 0xC1 // pop channel, push file length
#define OP_FILE_LOC 0xC2 // pop channel, push current position
#define OP_FILE_FREEFILE 0xC3 // push next free channel number
#define OP_FILE_INPUT_N 0xC4 // pop channel + n, read n chars, push string
// ============================================================
// Fixed-length strings and MID$ assignment
// ============================================================
#define OP_STR_FIXLEN 0xC5 // [uint16 len] pop string, pad/truncate, push
#define OP_STR_MID_ASGN 0xC6 // pop replacement, len, start, str; push modified
// ============================================================
// PRINT USING
// ============================================================
#define OP_PRINT_USING 0xC7 // pop format + value, push formatted string
// ============================================================
// SPC(n) and TAB(n) with stack-based argument
// ============================================================
#define OP_PRINT_TAB_N 0xC8 // pop column count, print spaces to reach column
#define OP_PRINT_SPC_N 0xC9 // pop count, print that many spaces
#define OP_FORMAT 0xCA // pop format string + value, push formatted string
#define OP_SHELL 0xCB // pop command string, call system(), push return value
#define OP_COMPARE_MODE 0xCC // [uint8 mode] set string compare mode (0=binary, 1=text)
// ============================================================
// Halt
// ============================================================
#define OP_HALT 0xFF
#endif // DVXBASIC_OPCODES_H

4324
dvxbasic/compiler/parser.c Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,57 @@
// parser.h -- DVX BASIC parser (recursive descent)
//
// Single-pass compiler: reads tokens from the lexer and emits
// p-code directly via the code generator. No AST. Forward
// references to SUBs/FUNCTIONs are resolved via backpatching.
//
// Embeddable: no DVX dependencies, pure C.
#ifndef DVXBASIC_PARSER_H
#define DVXBASIC_PARSER_H
#include "lexer.h"
#include "codegen.h"
#include "symtab.h"
#include "../runtime/vm.h"
#include <stdint.h>
#include <stdbool.h>
// ============================================================
// Parser state
// ============================================================
typedef struct {
BasLexerT lex;
BasCodeGenT cg;
BasSymTabT sym;
char error[512];
bool hasError;
int32_t errorLine;
int32_t lastUdtTypeId; // index of last resolved UDT type from resolveTypeName
int32_t optionBase; // default array lower bound (0 or 1)
bool optionCompareText; // true = case-insensitive string comparison
bool optionExplicit; // true = variables must be declared with DIM
uint8_t defType[26]; // default type per letter (A-Z), set by DEFINT etc.
char currentProc[BAS_MAX_TOKEN_LEN]; // name of current SUB/FUNCTION
} BasParserT;
// ============================================================
// API
// ============================================================
// Initialize parser with source text.
void basParserInit(BasParserT *p, const char *source, int32_t sourceLen);
// Parse the entire source and generate p-code.
// Returns true on success, false on error (check p->error).
bool basParse(BasParserT *p);
// Build a module from the parsed code. Returns NULL on error.
// Caller owns the module and must free with basModuleFree().
BasModuleT *basParserBuildModule(BasParserT *p);
// Free parser resources.
void basParserFree(BasParserT *p);
#endif // DVXBASIC_PARSER_H

147
dvxbasic/compiler/symtab.c Normal file
View file

@ -0,0 +1,147 @@
// symtab.c -- DVX BASIC symbol table implementation
#include "symtab.h"
#include <ctype.h>
#include <string.h>
// ============================================================
// Case-insensitive name comparison
// ============================================================
static bool namesEqual(const char *a, const char *b) {
while (*a && *b) {
char ca = *a >= 'a' && *a <= 'z' ? *a - 32 : *a;
char cb = *b >= 'a' && *b <= 'z' ? *b - 32 : *b;
if (ca != cb) {
return false;
}
a++;
b++;
}
return *a == *b;
}
// ============================================================
// basSymTabAdd
// ============================================================
BasSymbolT *basSymTabAdd(BasSymTabT *tab, const char *name, BasSymKindE kind, uint8_t dataType) {
if (tab->count >= BAS_MAX_SYMBOLS) {
return NULL;
}
// Check for duplicate in current scope
BasScopeE scope = tab->inLocalScope ? SCOPE_LOCAL : SCOPE_GLOBAL;
for (int32_t i = 0; i < tab->count; i++) {
if (tab->symbols[i].scope == scope && namesEqual(tab->symbols[i].name, name)) {
return NULL; // duplicate
}
}
BasSymbolT *sym = &tab->symbols[tab->count++];
memset(sym, 0, sizeof(*sym));
strncpy(sym->name, name, BAS_MAX_SYMBOL_NAME - 1);
sym->name[BAS_MAX_SYMBOL_NAME - 1] = '\0';
sym->kind = kind;
sym->scope = scope;
sym->dataType = dataType;
sym->isDefined = true;
return sym;
}
// ============================================================
// basSymTabAllocSlot
// ============================================================
int32_t basSymTabAllocSlot(BasSymTabT *tab) {
if (tab->inLocalScope) {
return tab->nextLocalIdx++;
}
return tab->nextGlobalIdx++;
}
// ============================================================
// basSymTabEnterLocal
// ============================================================
void basSymTabEnterLocal(BasSymTabT *tab) {
tab->inLocalScope = true;
tab->nextLocalIdx = 0;
}
// ============================================================
// basSymTabFind
// ============================================================
BasSymbolT *basSymTabFind(BasSymTabT *tab, const char *name) {
// Search local scope first
if (tab->inLocalScope) {
for (int32_t i = tab->count - 1; i >= 0; i--) {
if (tab->symbols[i].scope == SCOPE_LOCAL && namesEqual(tab->symbols[i].name, name)) {
return &tab->symbols[i];
}
}
}
// Search global scope
return basSymTabFindGlobal(tab, name);
}
// ============================================================
// basSymTabFindGlobal
// ============================================================
BasSymbolT *basSymTabFindGlobal(BasSymTabT *tab, const char *name) {
for (int32_t i = 0; i < tab->count; i++) {
if (tab->symbols[i].scope == SCOPE_GLOBAL && namesEqual(tab->symbols[i].name, name)) {
return &tab->symbols[i];
}
}
return NULL;
}
// ============================================================
// basSymTabInit
// ============================================================
void basSymTabInit(BasSymTabT *tab) {
memset(tab, 0, sizeof(*tab));
}
// ============================================================
// basSymTabLeaveLocal
// ============================================================
void basSymTabLeaveLocal(BasSymTabT *tab) {
// Remove all local symbols
int32_t newCount = 0;
for (int32_t i = 0; i < tab->count; i++) {
if (tab->symbols[i].scope != SCOPE_LOCAL) {
if (i != newCount) {
tab->symbols[newCount] = tab->symbols[i];
}
newCount++;
}
}
tab->count = newCount;
tab->inLocalScope = false;
tab->nextLocalIdx = 0;
}

129
dvxbasic/compiler/symtab.h Normal file
View file

@ -0,0 +1,129 @@
// symtab.h -- DVX BASIC symbol table
//
// Tracks variables, constants, subroutines, functions, and labels
// during compilation. Supports nested scopes (global + one local
// scope per SUB/FUNCTION).
//
// Embeddable: no DVX dependencies, pure C.
#ifndef DVXBASIC_SYMTAB_H
#define DVXBASIC_SYMTAB_H
#include "../compiler/opcodes.h"
#include <stdint.h>
#include <stdbool.h>
// ============================================================
// Symbol kinds
// ============================================================
typedef enum {
SYM_VARIABLE,
SYM_CONST,
SYM_SUB,
SYM_FUNCTION,
SYM_LABEL,
SYM_TYPE_DEF // user-defined TYPE
} BasSymKindE;
// ============================================================
// Symbol scope
// ============================================================
typedef enum {
SCOPE_GLOBAL,
SCOPE_LOCAL
} BasScopeE;
// ============================================================
// Symbol entry
// ============================================================
#define BAS_MAX_SYMBOL_NAME 64
#define BAS_MAX_PARAMS 16
#define BAS_MAX_CALL_PATCHES 32
#define BAS_MAX_UDT_FIELDS 32
// UDT field definition
typedef struct {
char name[BAS_MAX_SYMBOL_NAME];
uint8_t dataType; // BAS_TYPE_*
int32_t udtTypeId; // if dataType == BAS_TYPE_UDT, index of the TYPE_DEF symbol
} BasFieldDefT;
typedef struct {
char name[BAS_MAX_SYMBOL_NAME];
BasSymKindE kind;
BasScopeE scope;
uint8_t dataType; // BAS_TYPE_* for variables/functions
int32_t index; // slot index (local or global)
int32_t codeAddr; // PC address for SUB/FUNCTION/LABEL
bool isDefined; // false = forward-declared
bool isArray;
bool isShared;
int32_t udtTypeId; // for variables of BAS_TYPE_UDT: index of TYPE_DEF symbol
int32_t fixedLen; // for STRING * n: fixed length (0 = variable-length)
// For SUB/FUNCTION: parameter info
int32_t paramCount;
uint8_t paramTypes[BAS_MAX_PARAMS];
bool paramByVal[BAS_MAX_PARAMS];
// Forward-reference backpatch list (code addresses to patch when defined)
int32_t patchAddrs[BAS_MAX_CALL_PATCHES];
int32_t patchCount;
// For CONST: the constant value
union {
int32_t constInt;
double constDbl;
};
char constStr[256];
// For TYPE_DEF: field definitions
BasFieldDefT fields[BAS_MAX_UDT_FIELDS];
int32_t fieldCount;
} BasSymbolT;
// ============================================================
// Symbol table
// ============================================================
#define BAS_MAX_SYMBOLS 512
typedef struct {
BasSymbolT symbols[BAS_MAX_SYMBOLS];
int32_t count;
int32_t nextGlobalIdx; // next global variable slot
int32_t nextLocalIdx; // next local variable slot (reset per SUB/FUNCTION)
bool inLocalScope; // true when inside SUB/FUNCTION
} BasSymTabT;
// ============================================================
// API
// ============================================================
void basSymTabInit(BasSymTabT *tab);
// Add a symbol. Returns the symbol pointer, or NULL if the table is full
// or the name already exists in the current scope.
BasSymbolT *basSymTabAdd(BasSymTabT *tab, const char *name, BasSymKindE kind, uint8_t dataType);
// Look up a symbol by name. Searches local scope first, then global.
// Case-insensitive.
BasSymbolT *basSymTabFind(BasSymTabT *tab, const char *name);
// Look up a symbol in the global scope only.
BasSymbolT *basSymTabFindGlobal(BasSymTabT *tab, const char *name);
// Enter local scope (called at SUB/FUNCTION start).
void basSymTabEnterLocal(BasSymTabT *tab);
// Leave local scope (called at END SUB/FUNCTION). Removes local symbols.
void basSymTabLeaveLocal(BasSymTabT *tab);
// Allocate the next variable slot (global or local depending on scope).
int32_t basSymTabAllocSlot(BasSymTabT *tab);
#endif // DVXBASIC_SYMTAB_H

633
dvxbasic/runtime/values.c Normal file
View file

@ -0,0 +1,633 @@
// values.c -- DVX BASIC value system implementation
//
// Tagged union values with reference-counted strings. The string
// heap uses simple refcounting: assignment increments, scope exit
// decrements, zero frees. No garbage collector needed.
#include "values.h"
#include "../compiler/opcodes.h"
#include <ctype.h>
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
// ============================================================
// String system
// ============================================================
// Singleton empty string -- never freed, always available.
// Extra byte for the null terminator via the struct hack.
static struct {
BasStringT hdr;
char nul;
} sEmptyStringStorage = { { .refCount = 999999, .len = 0, .cap = 1 }, '\0' };
BasStringT *basEmptyString = &sEmptyStringStorage.hdr;
BasStringT *basStringAlloc(int32_t cap) {
if (cap < 1) {
cap = 1;
}
BasStringT *s = (BasStringT *)malloc(sizeof(BasStringT) + cap);
if (!s) {
return basStringRef(basEmptyString);
}
s->refCount = 1;
s->len = 0;
s->cap = cap;
s->data[0] = '\0';
return s;
}
BasStringT *basStringConcat(const BasStringT *a, const BasStringT *b) {
int32_t newLen = a->len + b->len;
BasStringT *s = basStringAlloc(newLen + 1);
memcpy(s->data, a->data, a->len);
memcpy(s->data + a->len, b->data, b->len);
s->data[newLen] = '\0';
s->len = newLen;
return s;
}
int32_t basStringCompare(const BasStringT *a, const BasStringT *b) {
int32_t minLen = a->len < b->len ? a->len : b->len;
int32_t cmp = memcmp(a->data, b->data, minLen);
if (cmp != 0) {
return cmp;
}
if (a->len < b->len) {
return -1;
}
if (a->len > b->len) {
return 1;
}
return 0;
}
int32_t basStringCompareCI(const BasStringT *a, const BasStringT *b) {
int32_t minLen = a->len < b->len ? a->len : b->len;
for (int32_t i = 0; i < minLen; i++) {
int32_t ca = toupper((unsigned char)a->data[i]);
int32_t cb = toupper((unsigned char)b->data[i]);
if (ca != cb) {
return ca - cb;
}
}
if (a->len < b->len) {
return -1;
}
if (a->len > b->len) {
return 1;
}
return 0;
}
BasStringT *basStringNew(const char *text, int32_t len) {
if (!text || len <= 0) {
return basStringRef(basEmptyString);
}
BasStringT *s = basStringAlloc(len + 1);
memcpy(s->data, text, len);
s->data[len] = '\0';
s->len = len;
return s;
}
BasStringT *basStringRef(BasStringT *s) {
if (s) {
s->refCount++;
}
return s;
}
BasStringT *basStringSub(const BasStringT *s, int32_t start, int32_t len) {
if (start < 0) {
start = 0;
}
if (start >= s->len) {
return basStringRef(basEmptyString);
}
if (len < 0 || start + len > s->len) {
len = s->len - start;
}
return basStringNew(s->data + start, len);
}
void basStringSystemInit(void) {
sEmptyStringStorage.nul = '\0';
}
void basStringSystemShutdown(void) {
// Nothing to do -- empty string is static
}
void basStringUnref(BasStringT *s) {
if (!s || s == basEmptyString) {
return;
}
s->refCount--;
if (s->refCount <= 0) {
free(s);
}
}
// ============================================================
// Array system
// ============================================================
void basArrayFree(BasArrayT *arr) {
if (!arr) {
return;
}
if (arr->elements) {
for (int32_t i = 0; i < arr->totalElements; i++) {
basValRelease(&arr->elements[i]);
}
free(arr->elements);
}
free(arr);
}
int32_t basArrayIndex(BasArrayT *arr, int32_t *indices, int32_t ndims) {
if (!arr || ndims != arr->dims) {
return -1;
}
int32_t flatIdx = 0;
int32_t multiplier = 1;
// Row-major order: last dimension varies fastest
for (int32_t d = ndims - 1; d >= 0; d--) {
int32_t idx = indices[d] - arr->lbound[d];
int32_t dimSize = arr->ubound[d] - arr->lbound[d] + 1;
if (idx < 0 || idx >= dimSize) {
return -1;
}
flatIdx += idx * multiplier;
multiplier *= dimSize;
}
return flatIdx;
}
BasArrayT *basArrayNew(int32_t dims, int32_t *lbounds, int32_t *ubounds, uint8_t elementType) {
if (dims < 1 || dims > BAS_ARRAY_MAX_DIMS) {
return NULL;
}
BasArrayT *arr = (BasArrayT *)calloc(1, sizeof(BasArrayT));
if (!arr) {
return NULL;
}
arr->refCount = 1;
arr->elementType = elementType;
arr->dims = dims;
int32_t total = 1;
for (int32_t d = 0; d < dims; d++) {
arr->lbound[d] = lbounds[d];
arr->ubound[d] = ubounds[d];
int32_t dimSize = ubounds[d] - lbounds[d] + 1;
if (dimSize < 1) {
free(arr);
return NULL;
}
total *= dimSize;
}
arr->totalElements = total;
arr->elements = (BasValueT *)calloc(total, sizeof(BasValueT));
if (!arr->elements) {
free(arr);
return NULL;
}
// Initialize all elements to the default for the element type
for (int32_t i = 0; i < total; i++) {
arr->elements[i].type = elementType;
}
return arr;
}
BasArrayT *basArrayRef(BasArrayT *arr) {
if (arr) {
arr->refCount++;
}
return arr;
}
void basArrayUnref(BasArrayT *arr) {
if (!arr) {
return;
}
arr->refCount--;
if (arr->refCount <= 0) {
basArrayFree(arr);
}
}
// ============================================================
// UDT system
// ============================================================
void basUdtFree(BasUdtT *udt) {
if (!udt) {
return;
}
if (udt->fields) {
for (int32_t i = 0; i < udt->fieldCount; i++) {
basValRelease(&udt->fields[i]);
}
free(udt->fields);
}
free(udt);
}
BasUdtT *basUdtNew(int32_t typeId, int32_t fieldCount) {
BasUdtT *udt = (BasUdtT *)calloc(1, sizeof(BasUdtT));
if (!udt) {
return NULL;
}
udt->refCount = 1;
udt->typeId = typeId;
udt->fieldCount = fieldCount;
udt->fields = (BasValueT *)calloc(fieldCount, sizeof(BasValueT));
if (!udt->fields) {
free(udt);
return NULL;
}
return udt;
}
BasUdtT *basUdtRef(BasUdtT *udt) {
if (udt) {
udt->refCount++;
}
return udt;
}
void basUdtUnref(BasUdtT *udt) {
if (!udt) {
return;
}
udt->refCount--;
if (udt->refCount <= 0) {
basUdtFree(udt);
}
}
// ============================================================
// Value constructors
// ============================================================
BasValueT basValBool(bool v) {
BasValueT val;
val.type = BAS_TYPE_BOOLEAN;
val.boolVal = v ? -1 : 0;
return val;
}
BasValueT basValCopy(BasValueT v) {
if (v.type == BAS_TYPE_STRING && v.strVal) {
basStringRef(v.strVal);
} else if (v.type == BAS_TYPE_ARRAY && v.arrVal) {
basArrayRef(v.arrVal);
} else if (v.type == BAS_TYPE_UDT && v.udtVal) {
basUdtRef(v.udtVal);
}
return v;
}
BasValueT basValDouble(double v) {
BasValueT val;
val.type = BAS_TYPE_DOUBLE;
val.dblVal = v;
return val;
}
BasValueT basValInteger(int16_t v) {
BasValueT val;
val.type = BAS_TYPE_INTEGER;
val.intVal = v;
return val;
}
BasValueT basValLong(int32_t v) {
BasValueT val;
val.type = BAS_TYPE_LONG;
val.longVal = v;
return val;
}
BasValueT basValSingle(float v) {
BasValueT val;
val.type = BAS_TYPE_SINGLE;
val.sngVal = v;
return val;
}
BasValueT basValString(BasStringT *s) {
BasValueT val;
val.type = BAS_TYPE_STRING;
val.strVal = s ? basStringRef(s) : basStringRef(basEmptyString);
return val;
}
BasValueT basValStringFromC(const char *text) {
BasValueT val;
val.type = BAS_TYPE_STRING;
val.strVal = basStringNew(text, text ? (int32_t)strlen(text) : 0);
return val;
}
void basValRelease(BasValueT *v) {
if (v->type == BAS_TYPE_STRING) {
basStringUnref(v->strVal);
v->strVal = NULL;
} else if (v->type == BAS_TYPE_ARRAY) {
basArrayUnref(v->arrVal);
v->arrVal = NULL;
} else if (v->type == BAS_TYPE_UDT) {
basUdtUnref(v->udtVal);
v->udtVal = NULL;
}
}
// ============================================================
// Type conversion
// ============================================================
BasValueT basValToBool(BasValueT v) {
return basValBool(basValIsTruthy(v));
}
BasValueT basValToDouble(BasValueT v) {
return basValDouble(basValToNumber(v));
}
BasValueT basValToInteger(BasValueT v) {
double n = basValToNumber(v);
// Banker's rounding (round half to even)
int32_t rounded = (int32_t)(n + (n > 0 ? 0.5 : -0.5));
return basValInteger((int16_t)rounded);
}
BasValueT basValToLong(BasValueT v) {
double n = basValToNumber(v);
int32_t rounded = (int32_t)(n + (n > 0 ? 0.5 : -0.5));
return basValLong(rounded);
}
double basValToNumber(BasValueT v) {
switch (v.type) {
case BAS_TYPE_INTEGER:
return (double)v.intVal;
case BAS_TYPE_LONG:
return (double)v.longVal;
case BAS_TYPE_SINGLE:
return (double)v.sngVal;
case BAS_TYPE_DOUBLE:
return v.dblVal;
case BAS_TYPE_BOOLEAN:
return (double)v.boolVal;
case BAS_TYPE_STRING:
if (v.strVal && v.strVal->len > 0) {
return atof(v.strVal->data);
}
return 0.0;
default:
return 0.0;
}
}
BasValueT basValToSingle(BasValueT v) {
return basValSingle((float)basValToNumber(v));
}
BasValueT basValToString(BasValueT v) {
if (v.type == BAS_TYPE_STRING) {
return basValCopy(v);
}
BasStringT *s = basValFormatString(v);
BasValueT result;
result.type = BAS_TYPE_STRING;
result.strVal = s;
return result;
}
BasStringT *basValFormatString(BasValueT v) {
char buf[64];
switch (v.type) {
case BAS_TYPE_INTEGER:
snprintf(buf, sizeof(buf), "%d", (int)v.intVal);
return basStringNew(buf, (int32_t)strlen(buf));
case BAS_TYPE_LONG:
snprintf(buf, sizeof(buf), "%ld", (long)v.longVal);
return basStringNew(buf, (int32_t)strlen(buf));
case BAS_TYPE_SINGLE: {
snprintf(buf, sizeof(buf), "%g", (double)v.sngVal);
return basStringNew(buf, (int32_t)strlen(buf));
}
case BAS_TYPE_DOUBLE:
snprintf(buf, sizeof(buf), "%g", v.dblVal);
return basStringNew(buf, (int32_t)strlen(buf));
case BAS_TYPE_BOOLEAN:
return basStringNew(v.boolVal ? "True" : "False", v.boolVal ? 4 : 5);
case BAS_TYPE_STRING:
return v.strVal ? basStringRef(v.strVal) : basStringRef(basEmptyString);
default:
return basStringRef(basEmptyString);
}
}
bool basValIsTruthy(BasValueT v) {
switch (v.type) {
case BAS_TYPE_INTEGER:
return v.intVal != 0;
case BAS_TYPE_LONG:
return v.longVal != 0;
case BAS_TYPE_SINGLE:
return v.sngVal != 0.0f;
case BAS_TYPE_DOUBLE:
return v.dblVal != 0.0;
case BAS_TYPE_BOOLEAN:
return v.boolVal != 0;
case BAS_TYPE_STRING:
return v.strVal && v.strVal->len > 0;
default:
return false;
}
}
int32_t basValCompare(BasValueT a, BasValueT b) {
// String comparison
if (a.type == BAS_TYPE_STRING && b.type == BAS_TYPE_STRING) {
return basStringCompare(a.strVal ? a.strVal : basEmptyString, b.strVal ? b.strVal : basEmptyString);
}
// Numeric comparison
double na = basValToNumber(a);
double nb = basValToNumber(b);
if (na < nb) {
return -1;
}
if (na > nb) {
return 1;
}
return 0;
}
int32_t basValCompareCI(BasValueT a, BasValueT b) {
// String comparison (case-insensitive)
if (a.type == BAS_TYPE_STRING && b.type == BAS_TYPE_STRING) {
return basStringCompareCI(a.strVal ? a.strVal : basEmptyString, b.strVal ? b.strVal : basEmptyString);
}
// Numeric comparison (same as basValCompare)
double na = basValToNumber(a);
double nb = basValToNumber(b);
if (na < nb) {
return -1;
}
if (na > nb) {
return 1;
}
return 0;
}
uint8_t basValPromoteType(uint8_t a, uint8_t b) {
// String stays string (concat, not arithmetic)
if (a == BAS_TYPE_STRING || b == BAS_TYPE_STRING) {
return BAS_TYPE_STRING;
}
// Double wins over everything
if (a == BAS_TYPE_DOUBLE || b == BAS_TYPE_DOUBLE) {
return BAS_TYPE_DOUBLE;
}
// Single wins over integer/long
if (a == BAS_TYPE_SINGLE || b == BAS_TYPE_SINGLE) {
return BAS_TYPE_SINGLE;
}
// Long wins over integer
if (a == BAS_TYPE_LONG || b == BAS_TYPE_LONG) {
return BAS_TYPE_LONG;
}
return BAS_TYPE_INTEGER;
}

180
dvxbasic/runtime/values.h Normal file
View file

@ -0,0 +1,180 @@
// values.h -- DVX BASIC value representation and string heap
//
// Tagged union value type for the VM's evaluation stack, variables,
// and array elements. Strings are reference-counted for automatic
// memory management without a garbage collector.
//
// Embeddable: no DVX dependencies, pure C.
#ifndef DVXBASIC_VALUES_H
#define DVXBASIC_VALUES_H
#include <stdint.h>
#include <stdbool.h>
#include <stddef.h>
// ============================================================
// Reference-counted string
// ============================================================
typedef struct {
int32_t refCount;
int32_t len;
int32_t cap; // allocated capacity (>= len + 1)
char data[]; // flexible array member, null-terminated
} BasStringT;
// Allocate a new string from a C string. refCount starts at 1.
BasStringT *basStringNew(const char *text, int32_t len);
// Allocate an empty string with a given capacity.
BasStringT *basStringAlloc(int32_t cap);
// Increment reference count.
BasStringT *basStringRef(BasStringT *s);
// Decrement reference count. Frees if count reaches zero.
void basStringUnref(BasStringT *s);
// Concatenate two strings. Returns a new string (refCount 1).
BasStringT *basStringConcat(const BasStringT *a, const BasStringT *b);
// Substring. Returns a new string (refCount 1).
BasStringT *basStringSub(const BasStringT *s, int32_t start, int32_t len);
// Compare two strings. Returns <0, 0, >0 like strcmp.
int32_t basStringCompare(const BasStringT *a, const BasStringT *b);
// Compare two strings case-insensitively. Returns <0, 0, >0.
int32_t basStringCompareCI(const BasStringT *a, const BasStringT *b);
// The empty string singleton (never freed).
extern BasStringT *basEmptyString;
// Initialize/shutdown the string system.
void basStringSystemInit(void);
void basStringSystemShutdown(void);
// ============================================================
// Forward declarations
// ============================================================
typedef struct BasValueTag BasValueT;
// ============================================================
// Reference-counted array
// ============================================================
#define BAS_ARRAY_MAX_DIMS 8
typedef struct {
int32_t refCount;
uint8_t elementType; // BAS_TYPE_*
int32_t dims; // number of dimensions
int32_t lbound[BAS_ARRAY_MAX_DIMS]; // lower bound per dimension
int32_t ubound[BAS_ARRAY_MAX_DIMS]; // upper bound per dimension
int32_t totalElements;
BasValueT *elements; // flat array of values
} BasArrayT;
// Allocate a new array. refCount starts at 1.
BasArrayT *basArrayNew(int32_t dims, int32_t *lbounds, int32_t *ubounds, uint8_t elementType);
// Free all elements and release the array.
void basArrayFree(BasArrayT *arr);
// Increment reference count.
BasArrayT *basArrayRef(BasArrayT *arr);
// Decrement reference count. Frees if count reaches zero.
void basArrayUnref(BasArrayT *arr);
// Compute flat index from multi-dimensional indices. Returns -1 if out of bounds.
int32_t basArrayIndex(BasArrayT *arr, int32_t *indices, int32_t ndims);
// ============================================================
// Reference-counted user-defined type instance
// ============================================================
typedef struct {
int32_t refCount;
int32_t typeId; // index into type definition table
int32_t fieldCount;
BasValueT *fields; // array of field values
} BasUdtT;
// Allocate a new UDT instance. refCount starts at 1.
BasUdtT *basUdtNew(int32_t typeId, int32_t fieldCount);
// Free all fields and release the UDT.
void basUdtFree(BasUdtT *udt);
// Increment reference count.
BasUdtT *basUdtRef(BasUdtT *udt);
// Decrement reference count. Frees if count reaches zero.
void basUdtUnref(BasUdtT *udt);
// ============================================================
// Tagged value
// ============================================================
struct BasValueTag {
uint8_t type; // BAS_TYPE_*
union {
int16_t intVal; // BAS_TYPE_INTEGER
int32_t longVal; // BAS_TYPE_LONG
float sngVal; // BAS_TYPE_SINGLE
double dblVal; // BAS_TYPE_DOUBLE
BasStringT *strVal; // BAS_TYPE_STRING (ref-counted)
int16_t boolVal; // BAS_TYPE_BOOLEAN (True=-1, False=0)
BasArrayT *arrVal; // BAS_TYPE_ARRAY (ref-counted)
BasUdtT *udtVal; // BAS_TYPE_UDT (ref-counted)
};
};
// Create values
BasValueT basValInteger(int16_t v);
BasValueT basValLong(int32_t v);
BasValueT basValSingle(float v);
BasValueT basValDouble(double v);
BasValueT basValString(BasStringT *s);
BasValueT basValStringFromC(const char *text);
BasValueT basValBool(bool v);
// Copy a value (increments string refcount if applicable).
BasValueT basValCopy(BasValueT v);
// Release a value (decrements string refcount if applicable).
void basValRelease(BasValueT *v);
// Convert a value to a specific type. Returns the converted value.
// The original is NOT released -- caller manages lifetime.
BasValueT basValToInteger(BasValueT v);
BasValueT basValToLong(BasValueT v);
BasValueT basValToSingle(BasValueT v);
BasValueT basValToDouble(BasValueT v);
BasValueT basValToString(BasValueT v);
BasValueT basValToBool(BasValueT v);
// Get the numeric value as a double (for mixed-type arithmetic).
double basValToNumber(BasValueT v);
// Get the string representation. Returns a new ref-counted string.
BasStringT *basValFormatString(BasValueT v);
// Check if a value is truthy (non-zero number, non-empty string).
bool basValIsTruthy(BasValueT v);
// Compare two values. Returns -1, 0, or 1.
// Numeric types are compared numerically. Strings lexicographically.
int32_t basValCompare(BasValueT a, BasValueT b);
// Compare two values case-insensitively (for OPTION COMPARE TEXT).
int32_t basValCompareCI(BasValueT a, BasValueT b);
// Determine the common type for a binary operation (type promotion).
// Integer + Single -> Single, etc.
uint8_t basValPromoteType(uint8_t a, uint8_t b);
#endif // DVXBASIC_VALUES_H

3514
dvxbasic/runtime/vm.c Normal file

File diff suppressed because it is too large Load diff

211
dvxbasic/runtime/vm.h Normal file
View file

@ -0,0 +1,211 @@
// vm.h -- DVX BASIC virtual machine
//
// Stack-based p-code interpreter. Executes compiled BASIC bytecode.
// Embeddable: the host provides I/O callbacks. No DVX dependencies.
//
// Usage:
// BasVmT *vm = basVmCreate();
// basVmSetPrintCallback(vm, myPrintFn, myCtx);
// basVmSetInputCallback(vm, myInputFn, myCtx);
// basVmLoadModule(vm, compiledCode, codeLen, constants, numConsts);
// BasVmResultE result = basVmRun(vm);
// basVmDestroy(vm);
#ifndef DVXBASIC_VM_H
#define DVXBASIC_VM_H
#include "values.h"
#include <stdint.h>
#include <stdbool.h>
// ============================================================
// Limits
// ============================================================
#define BAS_VM_STACK_SIZE 256 // evaluation stack depth
#define BAS_VM_CALL_STACK_SIZE 64 // max call nesting
#define BAS_VM_MAX_GLOBALS 512 // global variable slots
#define BAS_VM_MAX_LOCALS 64 // locals per stack frame
#define BAS_VM_MAX_FOR_DEPTH 32 // nested FOR loops
#define BAS_VM_MAX_FILES 16 // open file channels
// ============================================================
// Result codes
// ============================================================
typedef enum {
BAS_VM_OK, // program completed normally
BAS_VM_HALTED, // HALT instruction reached
BAS_VM_YIELDED, // DoEvents yielded control
BAS_VM_ERROR, // runtime error
BAS_VM_STACK_OVERFLOW,
BAS_VM_STACK_UNDERFLOW,
BAS_VM_CALL_OVERFLOW,
BAS_VM_DIV_BY_ZERO,
BAS_VM_TYPE_MISMATCH,
BAS_VM_OUT_OF_MEMORY,
BAS_VM_BAD_OPCODE,
BAS_VM_FILE_ERROR,
BAS_VM_SUBSCRIPT_RANGE,
BAS_VM_USER_ERROR // ON ERROR raised
} BasVmResultE;
// ============================================================
// I/O callbacks (host-provided)
// ============================================================
// Print callback: called for PRINT output.
// text is a null-terminated string. newline indicates whether
// to advance to the next line after printing.
typedef void (*BasPrintFnT)(void *ctx, const char *text, bool newline);
// Input callback: called for INPUT statement.
// prompt is the text to display. The callback must fill buf
// (up to bufSize-1 chars, null-terminated). Returns true on
// success, false on cancel/error.
typedef bool (*BasInputFnT)(void *ctx, const char *prompt, char *buf, int32_t bufSize);
// DoEvents callback: called for DoEvents statement.
// The host should process pending events and return. Returns
// true to continue execution, false to stop the program.
typedef bool (*BasDoEventsFnT)(void *ctx);
// ============================================================
// Call stack frame
// ============================================================
typedef struct {
int32_t returnPc; // instruction to return to
int32_t baseSlot; // base index in locals array
int32_t localCount; // number of locals in this frame
BasValueT locals[BAS_VM_MAX_LOCALS];
} BasCallFrameT;
// ============================================================
// FOR loop state
// ============================================================
typedef struct {
int32_t varIdx; // loop variable slot index
bool isLocal; // true = local, false = global
BasValueT limit; // upper bound
BasValueT step; // step value
int32_t loopTop; // PC of the loop body start
} BasForStateT;
// ============================================================
// File channel
// ============================================================
typedef struct {
void *handle; // FILE* or platform-specific
int32_t mode; // 0=closed, 1=input, 2=output, 3=append, 4=random, 5=binary
} BasFileChannelT;
// ============================================================
// Compiled module (output of the compiler)
// ============================================================
typedef struct {
uint8_t *code; // p-code bytecode
int32_t codeLen;
BasStringT **constants; // string constant pool
int32_t constCount;
int32_t globalCount; // number of global variable slots needed
int32_t entryPoint; // PC of the first instruction (module-level code)
BasValueT *dataPool; // DATA statement value pool
int32_t dataCount; // number of values in the data pool
} BasModuleT;
// ============================================================
// VM state
// ============================================================
typedef struct {
// Program
BasModuleT *module;
// Execution
int32_t pc; // program counter
bool running;
bool yielded;
// Evaluation stack
BasValueT stack[BAS_VM_STACK_SIZE];
int32_t sp; // stack pointer (index of next free slot)
// Call stack
BasCallFrameT callStack[BAS_VM_CALL_STACK_SIZE];
int32_t callDepth;
// FOR loop stack
BasForStateT forStack[BAS_VM_MAX_FOR_DEPTH];
int32_t forDepth;
// Global variables
BasValueT globals[BAS_VM_MAX_GLOBALS];
// File channels (1-based, index 0 unused)
BasFileChannelT files[BAS_VM_MAX_FILES];
// DATA/READ pointer
int32_t dataPtr; // current READ position in data pool
// String comparison mode
bool compareTextMode; // true = case-insensitive comparisons
// Error handling
int32_t errorHandler; // PC of ON ERROR GOTO handler (0 = none)
int32_t errorNumber; // current Err number
int32_t errorPc; // PC of the instruction that caused the error (for RESUME)
int32_t errorNextPc; // PC of the next instruction after error (for RESUME NEXT)
bool inErrorHandler; // true when executing error handler code
char errorMsg[256]; // current error description
// I/O callbacks
BasPrintFnT printFn;
void *printCtx;
BasInputFnT inputFn;
void *inputCtx;
BasDoEventsFnT doEventsFn;
void *doEventsCtx;
} BasVmT;
// ============================================================
// API
// ============================================================
// Create a new VM instance.
BasVmT *basVmCreate(void);
// Destroy a VM instance and free all resources.
void basVmDestroy(BasVmT *vm);
// Load a compiled module into the VM.
void basVmLoadModule(BasVmT *vm, BasModuleT *module);
// Execute the loaded module. Returns when the program ends,
// halts, yields, or hits an error.
BasVmResultE basVmRun(BasVmT *vm);
// Execute a single instruction. Returns the result.
// Useful for stepping/debugging.
BasVmResultE basVmStep(BasVmT *vm);
// Reset the VM to initial state (clear stack, globals, PC).
void basVmReset(BasVmT *vm);
// Set I/O callbacks.
void basVmSetPrintCallback(BasVmT *vm, BasPrintFnT fn, void *ctx);
void basVmSetInputCallback(BasVmT *vm, BasInputFnT fn, void *ctx);
void basVmSetDoEventsCallback(BasVmT *vm, BasDoEventsFnT fn, void *ctx);
// Push/pop values on the evaluation stack (for host integration).
bool basVmPush(BasVmT *vm, BasValueT val);
bool basVmPop(BasVmT *vm, BasValueT *val);
// Get the current error message.
const char *basVmGetError(const BasVmT *vm);
#endif // DVXBASIC_VM_H

850
dvxbasic/test_compiler.c Normal file
View file

@ -0,0 +1,850 @@
// test_compiler.c -- End-to-end test: source -> compiler -> VM -> output
//
// Build (native):
// gcc -O2 -Wall -o test_compiler test_compiler.c \
// compiler/lexer.c compiler/parser.c compiler/codegen.c compiler/symtab.c \
// runtime/vm.c runtime/values.c -lm
#include "compiler/parser.h"
#include "runtime/vm.h"
#include "runtime/values.h"
#include <stdio.h>
#include <string.h>
static void runProgram(const char *name, const char *source) {
printf("=== %s ===\n", name);
int32_t len = (int32_t)strlen(source);
BasParserT parser;
basParserInit(&parser, source, len);
if (!basParse(&parser)) {
printf("COMPILE ERROR: %s\n\n", parser.error);
basParserFree(&parser);
return;
}
BasModuleT *mod = basParserBuildModule(&parser);
basParserFree(&parser);
if (!mod) {
printf("MODULE BUILD FAILED\n\n");
return;
}
BasVmT *vm = basVmCreate();
basVmLoadModule(vm, mod);
// Module-level code uses callStack[0] as implicit main frame
vm->callStack[0].localCount = mod->globalCount > 64 ? 64 : mod->globalCount;
vm->callDepth = 1;
BasVmResultE result = basVmRun(vm);
if (result != BAS_VM_HALTED && result != BAS_VM_OK) {
printf("[VM error %d: %s]\n", result, basVmGetError(vm));
}
basVmDestroy(vm);
basModuleFree(mod);
printf("\n");
}
int main(void) {
printf("DVX BASIC Compiler Tests\n");
printf("========================\n\n");
basStringSystemInit();
// Test 1: Hello World
runProgram("Hello World",
"PRINT \"Hello, World!\"\n"
);
// Test 2: Arithmetic
runProgram("Arithmetic",
"PRINT 2 + 3 * 4\n"
"PRINT 10 \\ 3\n"
"PRINT 10 MOD 3\n"
"PRINT 2 ^ 8\n"
);
// Test 3: String operations
runProgram("String Ops",
"DIM s AS STRING\n"
"s = \"Hello, BASIC!\"\n"
"PRINT s\n"
"PRINT LEN(s)\n"
"PRINT LEFT$(s, 5)\n"
"PRINT RIGHT$(s, 6)\n"
"PRINT MID$(s, 8, 5)\n"
"PRINT UCASE$(s)\n"
);
// Test 4: IF/THEN/ELSE
runProgram("IF/THEN/ELSE",
"DIM x AS INTEGER\n"
"x = 42\n"
"IF x > 100 THEN\n"
" PRINT \"big\"\n"
"ELSEIF x > 10 THEN\n"
" PRINT \"medium\"\n"
"ELSE\n"
" PRINT \"small\"\n"
"END IF\n"
);
// Test 5: FOR loop
runProgram("FOR Loop",
"DIM i AS INTEGER\n"
"FOR i = 1 TO 10\n"
" PRINT i;\n"
"NEXT i\n"
"PRINT\n"
);
// Test 6: DO/WHILE loop
runProgram("DO/WHILE Loop",
"DIM n AS INTEGER\n"
"n = 1\n"
"DO WHILE n <= 5\n"
" PRINT n;\n"
" n = n + 1\n"
"LOOP\n"
"PRINT\n"
);
// Test 7: SUB and FUNCTION
runProgram("SUB and FUNCTION",
"DECLARE SUB Greet(name AS STRING)\n"
"DECLARE FUNCTION Square(x AS INTEGER) AS INTEGER\n"
"\n"
"CALL Greet(\"World\")\n"
"PRINT Square(7)\n"
"\n"
"SUB Greet(name AS STRING)\n"
" PRINT \"Hello, \" & name & \"!\"\n"
"END SUB\n"
"\n"
"FUNCTION Square(x AS INTEGER) AS INTEGER\n"
" Square = x * x\n"
"END FUNCTION\n"
);
// Test 8: SELECT CASE
runProgram("SELECT CASE",
"DIM grade AS STRING\n"
"grade = \"B\"\n"
"SELECT CASE grade\n"
" CASE \"A\"\n"
" PRINT \"Excellent\"\n"
" CASE \"B\", \"C\"\n"
" PRINT \"Good\"\n"
" CASE ELSE\n"
" PRINT \"Other\"\n"
"END SELECT\n"
);
// Test 9: Fibonacci
runProgram("Fibonacci",
"DIM a AS INTEGER\n"
"DIM b AS INTEGER\n"
"DIM temp AS INTEGER\n"
"DIM i AS INTEGER\n"
"a = 0\n"
"b = 1\n"
"FOR i = 1 TO 10\n"
" PRINT a;\n"
" temp = a + b\n"
" a = b\n"
" b = temp\n"
"NEXT i\n"
"PRINT\n"
);
// Test 10: Math functions
runProgram("Math Functions",
"PRINT ABS(-42)\n"
"PRINT SQR(144)\n"
"PRINT INT(3.7)\n"
);
// Test 11: File I/O
runProgram("File I/O",
"OPEN \"/tmp/dvxbasic_test.txt\" FOR OUTPUT AS #1\n"
"PRINT #1, \"Hello from BASIC!\"\n"
"PRINT #1, \"Line two\"\n"
"PRINT #1, \"42\"\n"
"CLOSE #1\n"
"\n"
"DIM line$ AS STRING\n"
"DIM count AS INTEGER\n"
"count = 0\n"
"OPEN \"/tmp/dvxbasic_test.txt\" FOR INPUT AS #1\n"
"DO WHILE NOT EOF(#1)\n"
" INPUT #1, line$\n"
" PRINT line$\n"
" count = count + 1\n"
"LOOP\n"
"CLOSE #1\n"
"PRINT count;\n"
"PRINT \"lines read\"\n"
);
// Test 12: LINE INPUT# and APPEND
runProgram("LINE INPUT and APPEND",
"OPEN \"/tmp/dvxbasic_test2.txt\" FOR OUTPUT AS #2\n"
"PRINT #2, \"First line\"\n"
"CLOSE #2\n"
"\n"
"OPEN \"/tmp/dvxbasic_test2.txt\" FOR APPEND AS #2\n"
"PRINT #2, \"Appended line\"\n"
"CLOSE #2\n"
"\n"
"DIM s$ AS STRING\n"
"OPEN \"/tmp/dvxbasic_test2.txt\" FOR INPUT AS #2\n"
"LINE INPUT #2, s$\n"
"PRINT s$\n"
"LINE INPUT #2, s$\n"
"PRINT s$\n"
"CLOSE #2\n"
);
// Test 13: Array -- 1D with default lbound=0
runProgram("1D Array",
"DIM arr(5) AS INTEGER\n"
"DIM i AS INTEGER\n"
"FOR i = 1 TO 5\n"
" arr(i) = i * i\n"
"NEXT i\n"
"FOR i = 1 TO 5\n"
" PRINT arr(i);\n"
"NEXT i\n"
"PRINT\n"
);
// Expected: 1 4 9 16 25
// Test 14: Multi-dimensional array
runProgram("Multi-dim Array",
"DIM m(2, 2) AS INTEGER\n"
"m(1, 1) = 11\n"
"m(1, 2) = 12\n"
"m(2, 1) = 21\n"
"m(2, 2) = 22\n"
"PRINT m(1, 1); m(1, 2); m(2, 1); m(2, 2)\n"
);
// Expected: 11 12 21 22
// Test 15: Array with explicit bounds (TO syntax)
runProgram("Array with TO bounds",
"DIM a(1 TO 3) AS INTEGER\n"
"a(1) = 10\n"
"a(2) = 20\n"
"a(3) = 30\n"
"PRINT a(1); a(2); a(3)\n"
);
// Expected: 10 20 30
// Test 16: LBOUND and UBOUND
runProgram("LBOUND/UBOUND",
"DIM a(5 TO 10) AS INTEGER\n"
"PRINT LBOUND(a); UBOUND(a)\n"
);
// Expected: 5 10
// Test 17: User-defined TYPE
runProgram("TYPE",
"TYPE Point\n"
" x AS INTEGER\n"
" y AS INTEGER\n"
"END TYPE\n"
"DIM p AS Point\n"
"p.x = 10\n"
"p.y = 20\n"
"PRINT p.x; p.y\n"
);
// Expected: 10 20
// Test 18: String array
runProgram("String Array",
"DIM names(3) AS STRING\n"
"names(0) = \"Alice\"\n"
"names(1) = \"Bob\"\n"
"names(2) = \"Charlie\"\n"
"DIM i AS INTEGER\n"
"FOR i = 0 TO 2\n"
" PRINT names(i)\n"
"NEXT i\n"
);
// Expected: Alice / Bob / Charlie
// Test 19: REDIM with PRESERVE
runProgram("REDIM PRESERVE",
"DIM a(3) AS INTEGER\n"
"a(0) = 100\n"
"a(1) = 200\n"
"a(2) = 300\n"
"REDIM PRESERVE a(5) AS INTEGER\n"
"a(4) = 500\n"
"PRINT a(0); a(1); a(2); a(4)\n"
);
// Expected: 100 200 300 500
// Test 20: ERASE
runProgram("ERASE",
"DIM a(3) AS INTEGER\n"
"a(1) = 42\n"
"ERASE a\n"
"DIM b(2) AS INTEGER\n"
"b(1) = 99\n"
"PRINT b(1)\n"
);
// Expected: 99
// Test 21: Array in FOR loop accumulation
runProgram("Array Accumulation",
"DIM sums(5) AS INTEGER\n"
"DIM i AS INTEGER\n"
"DIM j AS INTEGER\n"
"FOR i = 1 TO 5\n"
" sums(i) = 0\n"
" FOR j = 1 TO i\n"
" sums(i) = sums(i) + j\n"
" NEXT j\n"
"NEXT i\n"
"FOR i = 1 TO 5\n"
" PRINT sums(i);\n"
"NEXT i\n"
"PRINT\n"
);
// Expected: 1 3 6 10 15
// ============================================================
// Batch 1: Control Flow
// ============================================================
// Test: GOTO with forward jump
runProgram("GOTO Forward",
"PRINT \"before\"\n"
"GOTO skip\n"
"PRINT \"skipped\"\n"
"skip:\n"
"PRINT \"after\"\n"
);
// Expected: before / after
// Test: GOTO with backward jump
runProgram("GOTO Backward",
"DIM n AS INTEGER\n"
"n = 0\n"
"top:\n"
"n = n + 1\n"
"IF n < 5 THEN GOTO top\n"
"PRINT n\n"
);
// Expected: 5
// Test: GOSUB/RETURN
runProgram("GOSUB/RETURN",
"DIM x AS INTEGER\n"
"x = 10\n"
"GOSUB dbl\n"
"PRINT x\n"
"END\n"
"dbl:\n"
"x = x * 2\n"
"RETURN\n"
);
// Expected: 20
// Test: ON ERROR GOTO -- verify error handler catches errors
// and ERR returns the error number
runProgram("ON ERROR GOTO",
"ON ERROR GOTO handler\n"
"PRINT 10 / 0\n"
"END\n"
"handler:\n"
"PRINT \"caught\"\n"
"PRINT ERR\n"
);
// Expected: caught / 11
// Test: Single-line IF
runProgram("Single-line IF",
"DIM x AS INTEGER\n"
"x = 42\n"
"IF x > 10 THEN PRINT \"big\"\n"
"IF x < 10 THEN PRINT \"small\"\n"
"IF x = 42 THEN PRINT \"exact\" ELSE PRINT \"nope\"\n"
);
// Expected: big / exact
// Test: Multi-statement line with :
runProgram("Multi-statement :",
"DIM x AS INTEGER\n"
"DIM y AS INTEGER\n"
"x = 1 : y = 2 : PRINT x + y\n"
);
// Expected: 3
// ============================================================
// Batch 2: Misc Features
// ============================================================
// Test: SWAP
runProgram("SWAP",
"DIM a AS INTEGER\n"
"DIM b AS INTEGER\n"
"a = 10\n"
"b = 20\n"
"SWAP a, b\n"
"PRINT a;\n"
"PRINT b\n"
);
// Expected: 20 10
// Test: TIMER (returns number > 0)
runProgram("TIMER",
"DIM t AS DOUBLE\n"
"t = TIMER\n"
"IF t > 0 THEN PRINT \"ok\"\n"
);
// Expected: ok
// Test: DATE$ (returns non-empty string)
runProgram("DATE$",
"DIM d$ AS STRING\n"
"d$ = DATE$\n"
"IF LEN(d$) > 0 THEN PRINT \"ok\"\n"
);
// Expected: ok
// Test: TIME$ (returns non-empty string)
runProgram("TIME$",
"DIM t$ AS STRING\n"
"t$ = TIME$\n"
"IF LEN(t$) > 0 THEN PRINT \"ok\"\n"
);
// Expected: ok
// Test: ENVIRON$
runProgram("ENVIRON$",
"DIM p$ AS STRING\n"
"p$ = ENVIRON$(\"HOME\")\n"
"IF LEN(p$) > 0 THEN PRINT \"ok\"\n"
);
// Expected: ok
// ============================================================
// Batch 3: New features (DATA/READ/RESTORE, DIM SHARED,
// STATIC, DEF FN, OPTION BASE)
// ============================================================
// Test: DATA/READ/RESTORE
runProgram("DATA/READ/RESTORE",
"DATA 10, 20, \"hello\"\n"
"DIM a AS INTEGER\n"
"DIM b AS INTEGER\n"
"DIM c AS STRING\n"
"READ a, b, c\n"
"PRINT a; b;\n"
"PRINT c\n"
"RESTORE\n"
"READ a\n"
"PRINT a\n"
);
// Expected: 10 20 hello / 10
// Test: DIM SHARED
runProgram("DIM SHARED",
"DIM SHARED count AS INTEGER\n"
"count = 0\n"
"CALL Increment\n"
"CALL Increment\n"
"CALL Increment\n"
"PRINT count\n"
"SUB Increment\n"
" count = count + 1\n"
"END SUB\n"
);
// Expected: 3
// Test: STATIC
runProgram("STATIC",
"CALL Counter\n"
"CALL Counter\n"
"CALL Counter\n"
"SUB Counter\n"
" STATIC n AS INTEGER\n"
" n = n + 1\n"
" PRINT n;\n"
"END SUB\n"
"PRINT\n"
);
// Expected: 1 2 3
// Test: DEF FN
runProgram("DEF FN",
"DEF FNdouble(x AS INTEGER) = x * 2\n"
"PRINT FNdouble(5)\n"
"PRINT FNdouble(21)\n"
);
// Expected: 10 / 42
// Test: OPTION BASE
runProgram("OPTION BASE",
"OPTION BASE 1\n"
"DIM arr(3) AS INTEGER\n"
"arr(1) = 10\n"
"arr(3) = 30\n"
"PRINT arr(1); arr(3)\n"
);
// Expected: 10 30
// Test: DATA with mixed types
runProgram("DATA mixed types",
"DATA 100, 3.14, \"world\"\n"
"DIM x AS INTEGER\n"
"DIM y AS DOUBLE\n"
"DIM z AS STRING\n"
"READ x, y, z\n"
"PRINT x\n"
"PRINT z\n"
);
// Expected: 100 / world
// Test: Multiple DATA statements scattered
runProgram("DATA scattered",
"DIM a AS INTEGER\n"
"DIM b AS INTEGER\n"
"DIM c AS INTEGER\n"
"DATA 1, 2\n"
"READ a, b\n"
"DATA 3\n"
"READ c\n"
"PRINT a; b; c\n"
);
// Expected: 1 2 3
// Test: DIM SHARED with SUB modifying shared variable
runProgram("DIM SHARED multi",
"DIM SHARED total AS INTEGER\n"
"DIM SHARED msg AS STRING\n"
"total = 100\n"
"msg = \"start\"\n"
"CALL Modify\n"
"PRINT total\n"
"PRINT msg\n"
"SUB Modify\n"
" total = total + 50\n"
" msg = \"done\"\n"
"END SUB\n"
);
// Expected: 150 / done
// ============================================================
// Batch 4: New I/O and string features
// ============================================================
// Test: WRITE #
runProgram("WRITE #",
"OPEN \"/tmp/dvxbasic_write.txt\" FOR OUTPUT AS #1\n"
"WRITE #1, 10, \"hello\", 3.14\n"
"CLOSE #1\n"
"OPEN \"/tmp/dvxbasic_write.txt\" FOR INPUT AS #1\n"
"DIM s AS STRING\n"
"LINE INPUT #1, s\n"
"PRINT s\n"
"CLOSE #1\n"
);
// Expected: 10,"hello",3.14
// Test: FREEFILE
runProgram("FREEFILE",
"DIM f AS INTEGER\n"
"f = FREEFILE\n"
"PRINT f\n"
);
// Expected: 1
// Test: PRINT USING numeric
runProgram("PRINT USING numeric",
"PRINT USING \"###.##\"; 3.14159\n"
);
// Expected: 3.14
// Test: PRINT USING string
runProgram("PRINT USING string",
"PRINT USING \"!\"; \"Hello\"\n"
);
// Expected: H
// Test: SPC and TAB in PRINT
runProgram("SPC/TAB",
"PRINT SPC(3); \"hi\"\n"
);
// Expected: hi
// Test: Fixed-length string
runProgram("STRING * n",
"DIM s AS STRING * 5\n"
"s = \"Hi\"\n"
"PRINT \"[\" & s & \"]\"\n"
"PRINT LEN(s)\n"
);
// Expected: [Hi ] / 5
// Test: MID$ statement
runProgram("MID$ statement",
"DIM s AS STRING\n"
"s = \"Hello World\"\n"
"MID$(s, 7, 5) = \"BASIC\"\n"
"PRINT s\n"
);
// Expected: Hello BASIC
// Test: OPEN FOR BINARY / GET / PUT
runProgram("BINARY GET/PUT",
"DIM v AS INTEGER\n"
"OPEN \"/tmp/dvxbasic_bin.tmp\" FOR BINARY AS #1\n"
"v = 12345\n"
"PUT #1, , v\n"
"SEEK #1, 1\n"
"DIM r AS INTEGER\n"
"GET #1, , r\n"
"PRINT r\n"
"CLOSE #1\n"
);
// Expected: 12345
// Test: LOF and LOC
runProgram("LOF/LOC",
"OPEN \"/tmp/dvxbasic_lof.txt\" FOR OUTPUT AS #1\n"
"PRINT #1, \"test\"\n"
"CLOSE #1\n"
"OPEN \"/tmp/dvxbasic_lof.txt\" FOR INPUT AS #1\n"
"DIM sz AS LONG\n"
"sz = LOF(1)\n"
"IF sz > 0 THEN PRINT \"ok\"\n"
"CLOSE #1\n"
);
// Expected: ok
// Test: INPUT$(n, #channel)
runProgram("INPUT$",
"OPEN \"/tmp/dvxbasic_inp.txt\" FOR OUTPUT AS #1\n"
"PRINT #1, \"ABCDEF\"\n"
"CLOSE #1\n"
"OPEN \"/tmp/dvxbasic_inp.txt\" FOR INPUT AS #1\n"
"DIM s AS STRING\n"
"s = INPUT$(3, #1)\n"
"PRINT s\n"
"CLOSE #1\n"
);
// Expected: ABC
// Test: SEEK function form
runProgram("SEEK function",
"OPEN \"/tmp/dvxbasic_seek.txt\" FOR OUTPUT AS #1\n"
"PRINT #1, \"test\"\n"
"CLOSE #1\n"
"OPEN \"/tmp/dvxbasic_seek.txt\" FOR BINARY AS #1\n"
"DIM p AS LONG\n"
"p = SEEK(1)\n"
"IF p = 1 THEN PRINT \"ok\"\n"
"CLOSE #1\n"
);
// Expected: ok
// Test: ON n GOTO
runProgram("ON n GOTO",
"DIM n AS INTEGER\n"
"n = 2\n"
"ON n GOTO ten, twenty, thirty\n"
"PRINT \"none\"\n"
"GOTO done\n"
"ten:\n"
"PRINT \"ten\"\n"
"GOTO done\n"
"twenty:\n"
"PRINT \"twenty\"\n"
"GOTO done\n"
"thirty:\n"
"PRINT \"thirty\"\n"
"done:\n"
);
// Expected: twenty
// Test: ON n GOTO (no match)
runProgram("ON n GOTO no match",
"DIM n AS INTEGER\n"
"n = 5\n"
"ON n GOTO aa, bb\n"
"PRINT \"fallthrough\"\n"
"GOTO done2\n"
"aa:\n"
"PRINT \"aa\"\n"
"GOTO done2\n"
"bb:\n"
"PRINT \"bb\"\n"
"done2:\n"
);
// Expected: fallthrough
// Test: ON n GOSUB
runProgram("ON n GOSUB",
"DIM n AS INTEGER\n"
"DIM result AS INTEGER\n"
"result = 0\n"
"n = 2\n"
"ON n GOSUB addTen, addTwenty, addThirty\n"
"PRINT result\n"
"GOTO endProg\n"
"addTen:\n"
"result = result + 10\n"
"RETURN\n"
"addTwenty:\n"
"result = result + 20\n"
"RETURN\n"
"addThirty:\n"
"result = result + 30\n"
"RETURN\n"
"endProg:\n"
);
// Expected: 20
// Test: FORMAT$
runProgram("FORMAT$",
"PRINT FORMAT$(1234.5, \"#,##0.00\")\n"
"PRINT FORMAT$(0.5, \"0.00\")\n"
"PRINT FORMAT$(-42, \"+#0\")\n"
"PRINT FORMAT$(0.75, \"percent\")\n"
);
// Expected: 1,234.50\n0.50\n-42\n75%
// Test: SHELL as function expression
runProgram("SHELL function",
"DIM r AS INTEGER\n"
"r = SHELL(\"echo hello > /dev/null\")\n"
"IF r = 0 THEN PRINT \"ok\"\n"
);
// Expected: ok
// Test: SHELL as statement
runProgram("SHELL statement",
"SHELL \"echo hello > /dev/null\"\n"
"PRINT \"done\"\n"
);
// Expected: done
// Test: OPTION COMPARE TEXT
runProgram("OPTION COMPARE TEXT",
"OPTION COMPARE TEXT\n"
"IF \"hello\" = \"HELLO\" THEN\n"
" PRINT \"equal\"\n"
"ELSE\n"
" PRINT \"not equal\"\n"
"END IF\n"
"IF \"abc\" < \"XYZ\" THEN\n"
" PRINT \"less\"\n"
"END IF\n"
);
// Expected: equal\nless
// Test: OPTION COMPARE BINARY (default)
runProgram("OPTION COMPARE BINARY",
"OPTION COMPARE BINARY\n"
"IF \"hello\" = \"HELLO\" THEN\n"
" PRINT \"equal\"\n"
"ELSE\n"
" PRINT \"not equal\"\n"
"END IF\n"
);
// Expected: not equal
// Test: EQV operator
runProgram("EQV operator",
"PRINT -1 EQV -1\n"
"PRINT 0 EQV 0\n"
"PRINT -1 EQV 0\n"
"PRINT 0 EQV -1\n"
);
// Expected: -1\n-1\n0\n0
// Test: IMP operator
runProgram("IMP operator",
"PRINT 0 IMP -1\n"
"PRINT -1 IMP 0\n"
"PRINT -1 IMP -1\n"
"PRINT 0 IMP 0\n"
);
// Expected: -1\n0\n-1\n-1
// Test: PRINT USING advanced patterns
runProgram("PRINT USING advanced",
"PRINT USING \"**#,##0.00\"; 1234.5\n"
"PRINT USING \"$$#,##0.00\"; 42.5\n"
"PRINT USING \"+###.##\"; 42.5\n"
"PRINT USING \"+###.##\"; -42.5\n"
"PRINT USING \"###.##-\"; -42.5\n"
"PRINT USING \"###.##-\"; 42.5\n"
"PRINT USING \"#.##^^^^\"; 1234.5\n"
);
// Test: DEFINT
runProgram("DEFINT",
"DEFINT A-Z\n"
"a = 42\n"
"b = 3.7\n"
"PRINT a; b\n"
);
// Test: DEFSTR
runProgram("DEFSTR",
"DEFSTR S\n"
"s = \"hello\"\n"
"PRINT s\n"
);
// Test: DEFINT range
runProgram("DEFINT range",
"DEFINT I-N\n"
"i = 10\n"
"j = 20\n"
"x = 3.14\n"
"PRINT i; j; x\n"
);
// Test: OPTION EXPLICIT success
runProgram("OPTION EXPLICIT ok",
"OPTION EXPLICIT\n"
"DIM x AS INTEGER\n"
"x = 42\n"
"PRINT x\n"
);
// Test: OPTION EXPLICIT failure (should error)
{
printf("=== OPTION EXPLICIT error ===\n");
const char *src =
"OPTION EXPLICIT\n"
"x = 42\n";
int32_t len = (int32_t)strlen(src);
BasParserT parser;
basParserInit(&parser, src, len);
bool ok = basParse(&parser);
if (!ok) {
printf("Correctly caught: %s\n", parser.error);
} else {
printf("ERROR: should have failed\n");
}
basParserFree(&parser);
printf("\n");
}
printf("All tests complete.\n");
return 0;
}

24
dvxbasic/test_lex.c Normal file
View file

@ -0,0 +1,24 @@
// test_lex.c -- Dump lexer tokens
// gcc -O2 -w -o test_lex test_lex.c compiler/lexer.c -lm
#include "compiler/lexer.h"
#include <stdio.h>
#include <string.h>
int main(void) {
const char *src = "PRINT \"Hello, World!\"\n";
BasLexerT lex;
basLexerInit(&lex, src, (int32_t)strlen(src));
for (int i = 0; i < 20; i++) {
printf("Token %d: type=%d (%s) text='%s'\n", i, lex.token.type, basTokenName(lex.token.type), lex.token.text);
if (lex.token.type == TOK_EOF) {
break;
}
basLexerNext(&lex);
}
return 0;
}

64
dvxbasic/test_quick.c Normal file
View file

@ -0,0 +1,64 @@
// test_quick.c -- Quick single-program test
// gcc -O2 -Wall -o test_quick test_quick.c compiler/lexer.c compiler/parser.c compiler/codegen.c compiler/symtab.c runtime/vm.c runtime/values.c -lm
#include "compiler/parser.h"
#include "runtime/vm.h"
#include "runtime/values.h"
#include <stdio.h>
#include <string.h>
int main(void) {
basStringSystemInit();
const char *source = "PRINT \"Hello, World!\"\n";
printf("Source: [%s]\n", source);
printf("Source len: %d\n", (int)strlen(source));
int32_t len = (int32_t)strlen(source);
BasParserT parser;
basParserInit(&parser, source, len);
if (!basParse(&parser)) {
printf("COMPILE ERROR: %s\n", parser.error);
basParserFree(&parser);
return 1;
}
printf("Compiled OK (%d bytes of p-code)\n", parser.cg.codeLen);
// Dump p-code
for (int i = 0; i < parser.cg.codeLen; i++) {
printf("%02X ", parser.cg.code[i]);
}
printf("\n");
BasModuleT *mod = basParserBuildModule(&parser);
basParserFree(&parser);
BasVmT *vm = basVmCreate();
basVmLoadModule(vm, mod);
vm->callStack[0].localCount = mod->globalCount > 64 ? 64 : mod->globalCount;
vm->callDepth = 1;
// Step limit
int steps = 0;
vm->running = true;
while (vm->running && steps < 1000) {
BasVmResultE r = basVmStep(vm);
steps++;
if (r != BAS_VM_OK) {
printf("[Result: %d after %d steps: %s]\n", r, steps, basVmGetError(vm));
break;
}
}
if (steps >= 1000) {
printf("[TIMEOUT after %d steps, PC=%d]\n", steps, vm->pc);
}
basVmDestroy(vm);
basModuleFree(mod);
return 0;
}

234
dvxbasic/test_vm.c Normal file
View file

@ -0,0 +1,234 @@
// test_vm.c -- Quick test for the DVX BASIC VM
//
// Hand-assembles a small p-code program and executes it.
// Tests: PRINT "Hello, World!", arithmetic, FOR loop, string ops.
//
// Build (native, not cross-compiled):
// gcc -O2 -Wall -o test_vm test_vm.c runtime/vm.c runtime/values.c -lm
#include "compiler/opcodes.h"
#include "runtime/vm.h"
#include "runtime/values.h"
#include <stdio.h>
#include <string.h>
// ============================================================
// Helper: emit bytes into a code buffer
// ============================================================
static uint8_t sCode[4096];
static int32_t sCodeLen = 0;
static void emit8(uint8_t b) {
sCode[sCodeLen++] = b;
}
static void emit16(int16_t v) {
memcpy(&sCode[sCodeLen], &v, 2);
sCodeLen += 2;
}
static void emitU16(uint16_t v) {
memcpy(&sCode[sCodeLen], &v, 2);
sCodeLen += 2;
}
// ============================================================
// Test 1: PRINT "Hello, World!"
// ============================================================
static void test1(void) {
printf("--- Test 1: PRINT \"Hello, World!\" ---\n");
sCodeLen = 0;
// String constant pool
BasStringT *consts[1];
consts[0] = basStringNew("Hello, World!", 13);
// Code: PUSH_STR 0; PRINT; PRINT_NL; HALT
emit8(OP_PUSH_STR);
emitU16(0);
emit8(OP_PRINT);
emit8(OP_PRINT_NL);
emit8(OP_HALT);
BasModuleT module;
memset(&module, 0, sizeof(module));
module.code = sCode;
module.codeLen = sCodeLen;
module.constants = consts;
module.constCount = 1;
module.entryPoint = 0;
BasVmT *vm = basVmCreate();
basVmLoadModule(vm, &module);
BasVmResultE result = basVmRun(vm);
printf("Result: %d (expected %d = HALTED)\n\n", result, BAS_VM_HALTED);
basVmDestroy(vm);
basStringUnref(consts[0]);
}
// ============================================================
// Test 2: Arithmetic: PRINT 2 + 3 * 4
// ============================================================
static void test2(void) {
printf("--- Test 2: PRINT 2 + 3 * 4 (expect 14) ---\n");
sCodeLen = 0;
// Code: PUSH 3; PUSH 4; MUL; PUSH 2; ADD; PRINT; PRINT_NL; HALT
emit8(OP_PUSH_INT16);
emit16(3);
emit8(OP_PUSH_INT16);
emit16(4);
emit8(OP_MUL_INT);
emit8(OP_PUSH_INT16);
emit16(2);
emit8(OP_ADD_INT);
emit8(OP_PRINT);
emit8(OP_PRINT_NL);
emit8(OP_HALT);
BasModuleT module;
memset(&module, 0, sizeof(module));
module.code = sCode;
module.codeLen = sCodeLen;
module.entryPoint = 0;
BasVmT *vm = basVmCreate();
basVmLoadModule(vm, &module);
basVmRun(vm);
basVmDestroy(vm);
printf("\n");
}
// ============================================================
// Test 3: String concatenation
// ============================================================
static void test3(void) {
printf("--- Test 3: PRINT \"Hello\" & \" \" & \"BASIC\" ---\n");
sCodeLen = 0;
BasStringT *consts[3];
consts[0] = basStringNew("Hello", 5);
consts[1] = basStringNew(" ", 1);
consts[2] = basStringNew("BASIC", 5);
// Code: PUSH consts[0]; PUSH consts[1]; CONCAT; PUSH consts[2]; CONCAT; PRINT; PRINT_NL; HALT
emit8(OP_PUSH_STR); emitU16(0);
emit8(OP_PUSH_STR); emitU16(1);
emit8(OP_STR_CONCAT);
emit8(OP_PUSH_STR); emitU16(2);
emit8(OP_STR_CONCAT);
emit8(OP_PRINT);
emit8(OP_PRINT_NL);
emit8(OP_HALT);
BasModuleT module;
memset(&module, 0, sizeof(module));
module.code = sCode;
module.codeLen = sCodeLen;
module.constants = consts;
module.constCount = 3;
module.entryPoint = 0;
BasVmT *vm = basVmCreate();
basVmLoadModule(vm, &module);
basVmRun(vm);
basVmDestroy(vm);
printf("\n");
basStringUnref(consts[0]);
basStringUnref(consts[1]);
basStringUnref(consts[2]);
}
// ============================================================
// Test 4: FOR loop -- PRINT 1 to 5
// ============================================================
static void test4(void) {
printf("--- Test 4: FOR i = 1 TO 5: PRINT i: NEXT ---\n");
sCodeLen = 0;
// We need a call frame with at least 1 local (the loop variable)
// For module-level code, we use callStack[0] as implicit frame
// Setup: store initial value in local 0
// PUSH 1; STORE_LOCAL 0 -- i = 1
emit8(OP_PUSH_INT16); emit16(1);
emit8(OP_STORE_LOCAL); emitU16(0);
// Push limit and step for FOR_INIT
// PUSH 5 (limit); PUSH 1 (step)
emit8(OP_PUSH_INT16); emit16(5);
emit8(OP_PUSH_INT16); emit16(1);
emit8(OP_FOR_INIT); emitU16(0); emit8(1); // isLocal=1
// Loop body start (record PC for FOR_NEXT offset)
int32_t loopBody = sCodeLen;
// LOAD_LOCAL 0; PRINT; PRINT " "
emit8(OP_LOAD_LOCAL); emitU16(0);
emit8(OP_PRINT);
// FOR_NEXT: increment i, test, jump back
emit8(OP_FOR_NEXT);
emitU16(0); // local index
emit8(1); // isLocal=1
int16_t offset = (int16_t)(loopBody - (sCodeLen + 2));
emit16(offset);
// After loop
emit8(OP_PRINT_NL);
emit8(OP_HALT);
BasModuleT module;
memset(&module, 0, sizeof(module));
module.code = sCode;
module.codeLen = sCodeLen;
module.entryPoint = 0;
BasVmT *vm = basVmCreate();
// Initialize the implicit main frame with 1 local
vm->callStack[0].localCount = 1;
vm->callDepth = 1;
basVmLoadModule(vm, &module);
basVmRun(vm);
basVmDestroy(vm);
printf("\n");
}
// ============================================================
// main
// ============================================================
int main(void) {
printf("DVX BASIC VM Tests\n");
printf("==================\n\n");
basStringSystemInit();
test1();
test2();
test3();
test4();
printf("All tests complete.\n");
return 0;
}