Initial DVX BASIC Compiler and VM.
This commit is contained in:
parent
89690ca97c
commit
aa961425c9
17 changed files with 12014 additions and 0 deletions
243
dvxbasic/compiler/codegen.c
Normal file
243
dvxbasic/compiler/codegen.c
Normal file
|
|
@ -0,0 +1,243 @@
|
|||
// codegen.c -- DVX BASIC p-code emitter implementation
|
||||
|
||||
#include "codegen.h"
|
||||
#include "opcodes.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// ============================================================
|
||||
// basAddData
|
||||
// ============================================================
|
||||
|
||||
bool basAddData(BasCodeGenT *cg, BasValueT val) {
|
||||
if (cg->dataCount >= BAS_MAX_CONSTANTS) {
|
||||
return false;
|
||||
}
|
||||
|
||||
cg->dataPool[cg->dataCount++] = basValCopy(val);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basAddConstant
|
||||
// ============================================================
|
||||
|
||||
uint16_t basAddConstant(BasCodeGenT *cg, const char *text, int32_t len) {
|
||||
// Check if this string is already in the pool
|
||||
for (int32_t i = 0; i < cg->constCount; i++) {
|
||||
if (cg->constants[i]->len == len && memcmp(cg->constants[i]->data, text, len) == 0) {
|
||||
return (uint16_t)i;
|
||||
}
|
||||
}
|
||||
|
||||
if (cg->constCount >= BAS_MAX_CONSTANTS) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint16_t idx = (uint16_t)cg->constCount;
|
||||
cg->constants[cg->constCount++] = basStringNew(text, len);
|
||||
return idx;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basCodeGenBuildModule
|
||||
// ============================================================
|
||||
|
||||
BasModuleT *basCodeGenBuildModule(BasCodeGenT *cg) {
|
||||
BasModuleT *mod = (BasModuleT *)calloc(1, sizeof(BasModuleT));
|
||||
|
||||
if (!mod) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Copy code
|
||||
mod->code = (uint8_t *)malloc(cg->codeLen);
|
||||
|
||||
if (!mod->code) {
|
||||
free(mod);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
memcpy(mod->code, cg->code, cg->codeLen);
|
||||
mod->codeLen = cg->codeLen;
|
||||
|
||||
// Copy constant pool (share string refs)
|
||||
if (cg->constCount > 0) {
|
||||
mod->constants = (BasStringT **)malloc(cg->constCount * sizeof(BasStringT *));
|
||||
|
||||
if (!mod->constants) {
|
||||
free(mod->code);
|
||||
free(mod);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < cg->constCount; i++) {
|
||||
mod->constants[i] = basStringRef(cg->constants[i]);
|
||||
}
|
||||
}
|
||||
|
||||
mod->constCount = cg->constCount;
|
||||
mod->globalCount = cg->globalCount;
|
||||
mod->entryPoint = 0;
|
||||
|
||||
// Copy data pool
|
||||
if (cg->dataCount > 0) {
|
||||
mod->dataPool = (BasValueT *)malloc(cg->dataCount * sizeof(BasValueT));
|
||||
|
||||
if (!mod->dataPool) {
|
||||
free(mod->constants);
|
||||
free(mod->code);
|
||||
free(mod);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < cg->dataCount; i++) {
|
||||
mod->dataPool[i] = basValCopy(cg->dataPool[i]);
|
||||
}
|
||||
}
|
||||
|
||||
mod->dataCount = cg->dataCount;
|
||||
|
||||
return mod;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basCodeGenFree
|
||||
// ============================================================
|
||||
|
||||
void basCodeGenFree(BasCodeGenT *cg) {
|
||||
for (int32_t i = 0; i < cg->constCount; i++) {
|
||||
basStringUnref(cg->constants[i]);
|
||||
}
|
||||
|
||||
for (int32_t i = 0; i < cg->dataCount; i++) {
|
||||
basValRelease(&cg->dataPool[i]);
|
||||
}
|
||||
|
||||
cg->constCount = 0;
|
||||
cg->dataCount = 0;
|
||||
cg->codeLen = 0;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basCodeGenInit
|
||||
// ============================================================
|
||||
|
||||
void basCodeGenInit(BasCodeGenT *cg) {
|
||||
memset(cg, 0, sizeof(*cg));
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basCodePos
|
||||
// ============================================================
|
||||
|
||||
int32_t basCodePos(const BasCodeGenT *cg) {
|
||||
return cg->codeLen;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basEmit8
|
||||
// ============================================================
|
||||
|
||||
void basEmit8(BasCodeGenT *cg, uint8_t b) {
|
||||
if (cg->codeLen < BAS_MAX_CODE) {
|
||||
cg->code[cg->codeLen++] = b;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basEmit16
|
||||
// ============================================================
|
||||
|
||||
void basEmit16(BasCodeGenT *cg, int16_t v) {
|
||||
if (cg->codeLen + 2 <= BAS_MAX_CODE) {
|
||||
memcpy(&cg->code[cg->codeLen], &v, 2);
|
||||
cg->codeLen += 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basEmitDouble
|
||||
// ============================================================
|
||||
|
||||
void basEmitDouble(BasCodeGenT *cg, double v) {
|
||||
if (cg->codeLen + (int32_t)sizeof(double) <= BAS_MAX_CODE) {
|
||||
memcpy(&cg->code[cg->codeLen], &v, sizeof(double));
|
||||
cg->codeLen += (int32_t)sizeof(double);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basEmitFloat
|
||||
// ============================================================
|
||||
|
||||
void basEmitFloat(BasCodeGenT *cg, float v) {
|
||||
if (cg->codeLen + (int32_t)sizeof(float) <= BAS_MAX_CODE) {
|
||||
memcpy(&cg->code[cg->codeLen], &v, sizeof(float));
|
||||
cg->codeLen += (int32_t)sizeof(float);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basEmitU16
|
||||
// ============================================================
|
||||
|
||||
void basEmitU16(BasCodeGenT *cg, uint16_t v) {
|
||||
if (cg->codeLen + 2 <= BAS_MAX_CODE) {
|
||||
memcpy(&cg->code[cg->codeLen], &v, 2);
|
||||
cg->codeLen += 2;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basModuleFree
|
||||
// ============================================================
|
||||
|
||||
void basModuleFree(BasModuleT *mod) {
|
||||
if (!mod) {
|
||||
return;
|
||||
}
|
||||
|
||||
free(mod->code);
|
||||
|
||||
if (mod->constants) {
|
||||
for (int32_t i = 0; i < mod->constCount; i++) {
|
||||
basStringUnref(mod->constants[i]);
|
||||
}
|
||||
|
||||
free(mod->constants);
|
||||
}
|
||||
|
||||
if (mod->dataPool) {
|
||||
for (int32_t i = 0; i < mod->dataCount; i++) {
|
||||
basValRelease(&mod->dataPool[i]);
|
||||
}
|
||||
|
||||
free(mod->dataPool);
|
||||
}
|
||||
|
||||
free(mod);
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basPatch16
|
||||
// ============================================================
|
||||
|
||||
void basPatch16(BasCodeGenT *cg, int32_t pos, int16_t val) {
|
||||
if (pos >= 0 && pos + 2 <= cg->codeLen) {
|
||||
memcpy(&cg->code[pos], &val, 2);
|
||||
}
|
||||
}
|
||||
76
dvxbasic/compiler/codegen.h
Normal file
76
dvxbasic/compiler/codegen.h
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
// codegen.h -- DVX BASIC p-code emitter
|
||||
//
|
||||
// Builds a p-code byte stream and string constant pool from
|
||||
// calls made by the parser. Provides helpers for backpatching
|
||||
// forward jumps.
|
||||
//
|
||||
// Embeddable: no DVX dependencies, pure C.
|
||||
|
||||
#ifndef DVXBASIC_CODEGEN_H
|
||||
#define DVXBASIC_CODEGEN_H
|
||||
|
||||
#include "../runtime/vm.h"
|
||||
#include "../runtime/values.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
// ============================================================
|
||||
// Code generator state
|
||||
// ============================================================
|
||||
|
||||
#define BAS_MAX_CODE 65536
|
||||
#define BAS_MAX_CONSTANTS 1024
|
||||
|
||||
typedef struct {
|
||||
uint8_t code[BAS_MAX_CODE];
|
||||
int32_t codeLen;
|
||||
BasStringT *constants[BAS_MAX_CONSTANTS];
|
||||
int32_t constCount;
|
||||
int32_t globalCount;
|
||||
BasValueT dataPool[BAS_MAX_CONSTANTS];
|
||||
int32_t dataCount;
|
||||
} BasCodeGenT;
|
||||
|
||||
// ============================================================
|
||||
// API
|
||||
// ============================================================
|
||||
|
||||
void basCodeGenInit(BasCodeGenT *cg);
|
||||
void basCodeGenFree(BasCodeGenT *cg);
|
||||
|
||||
// Emit single byte
|
||||
void basEmit8(BasCodeGenT *cg, uint8_t b);
|
||||
|
||||
// Emit 16-bit signed value
|
||||
void basEmit16(BasCodeGenT *cg, int16_t v);
|
||||
|
||||
// Emit 16-bit unsigned value
|
||||
void basEmitU16(BasCodeGenT *cg, uint16_t v);
|
||||
|
||||
// Emit 32-bit float
|
||||
void basEmitFloat(BasCodeGenT *cg, float v);
|
||||
|
||||
// Emit 64-bit double
|
||||
void basEmitDouble(BasCodeGenT *cg, double v);
|
||||
|
||||
// Get current code position (for jump targets)
|
||||
int32_t basCodePos(const BasCodeGenT *cg);
|
||||
|
||||
// Patch a 16-bit value at a previous position (for backpatching jumps)
|
||||
void basPatch16(BasCodeGenT *cg, int32_t pos, int16_t val);
|
||||
|
||||
// Add a string to the constant pool. Returns the pool index.
|
||||
uint16_t basAddConstant(BasCodeGenT *cg, const char *text, int32_t len);
|
||||
|
||||
// Add a value to the data pool (for DATA statements). Returns true on success.
|
||||
bool basAddData(BasCodeGenT *cg, BasValueT val);
|
||||
|
||||
// Build a BasModuleT from the generated code. The caller takes
|
||||
// ownership of the module and must free it with basModuleFree().
|
||||
BasModuleT *basCodeGenBuildModule(BasCodeGenT *cg);
|
||||
|
||||
// Free a module built by basCodeGenBuildModule.
|
||||
void basModuleFree(BasModuleT *mod);
|
||||
|
||||
#endif // DVXBASIC_CODEGEN_H
|
||||
820
dvxbasic/compiler/lexer.c
Normal file
820
dvxbasic/compiler/lexer.c
Normal file
|
|
@ -0,0 +1,820 @@
|
|||
// lexer.c -- DVX BASIC lexer implementation
|
||||
//
|
||||
// Single-pass tokenizer. Keywords are case-insensitive. Identifiers
|
||||
// preserve their original case for display but comparisons are
|
||||
// case-insensitive. Line continuations (underscore at end of line)
|
||||
// are handled transparently.
|
||||
|
||||
#include "lexer.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// ============================================================
|
||||
// Keyword table
|
||||
// ============================================================
|
||||
|
||||
typedef struct {
|
||||
const char *text;
|
||||
BasTokenTypeE type;
|
||||
} KeywordEntryT;
|
||||
|
||||
static const KeywordEntryT sKeywords[] = {
|
||||
{ "AND", TOK_AND },
|
||||
{ "APPEND", TOK_APPEND },
|
||||
{ "AS", TOK_AS },
|
||||
{ "BASE", TOK_BASE },
|
||||
{ "BINARY", TOK_BINARY },
|
||||
{ "BOOLEAN", TOK_BOOLEAN },
|
||||
{ "BYVAL", TOK_BYVAL },
|
||||
{ "CALL", TOK_CALL },
|
||||
{ "CASE", TOK_CASE },
|
||||
{ "CLOSE", TOK_CLOSE },
|
||||
{ "CONST", TOK_CONST },
|
||||
{ "DATA", TOK_DATA },
|
||||
{ "DECLARE", TOK_DECLARE },
|
||||
{ "DEF", TOK_DEF },
|
||||
{ "DEFDBL", TOK_DEFDBL },
|
||||
{ "DEFINT", TOK_DEFINT },
|
||||
{ "DEFLNG", TOK_DEFLNG },
|
||||
{ "DEFSNG", TOK_DEFSNG },
|
||||
{ "DEFSTR", TOK_DEFSTR },
|
||||
{ "DIM", TOK_DIM },
|
||||
{ "DO", TOK_DO },
|
||||
{ "DOEVENTS", TOK_DOEVENTS },
|
||||
{ "DOUBLE", TOK_DOUBLE },
|
||||
{ "ELSE", TOK_ELSE },
|
||||
{ "ELSEIF", TOK_ELSEIF },
|
||||
{ "END", TOK_END },
|
||||
{ "EOF", TOK_EOF_KW },
|
||||
{ "EQV", TOK_EQV },
|
||||
{ "ERASE", TOK_ERASE },
|
||||
{ "ERR", TOK_ERR },
|
||||
{ "ERROR", TOK_ERROR_KW },
|
||||
{ "EXPLICIT", TOK_EXPLICIT },
|
||||
{ "EXIT", TOK_EXIT },
|
||||
{ "FALSE", TOK_FALSE_KW },
|
||||
{ "FOR", TOK_FOR },
|
||||
{ "FUNCTION", TOK_FUNCTION },
|
||||
{ "GET", TOK_GET },
|
||||
{ "GOSUB", TOK_GOSUB },
|
||||
{ "GOTO", TOK_GOTO },
|
||||
{ "HIDE", TOK_HIDE },
|
||||
{ "IF", TOK_IF },
|
||||
{ "IMP", TOK_IMP },
|
||||
{ "INPUT", TOK_INPUT },
|
||||
{ "INTEGER", TOK_INTEGER },
|
||||
{ "IS", TOK_IS },
|
||||
{ "LBOUND", TOK_LBOUND },
|
||||
{ "LET", TOK_LET },
|
||||
{ "LINE", TOK_LINE },
|
||||
{ "LOAD", TOK_LOAD },
|
||||
{ "LONG", TOK_LONG },
|
||||
{ "LOOP", TOK_LOOP },
|
||||
{ "ME", TOK_ME },
|
||||
{ "MOD", TOK_MOD },
|
||||
{ "MSGBOX", TOK_MSGBOX },
|
||||
{ "NEXT", TOK_NEXT },
|
||||
{ "NOT", TOK_NOT },
|
||||
{ "ON", TOK_ON },
|
||||
{ "OPEN", TOK_OPEN },
|
||||
{ "OPTION", TOK_OPTION },
|
||||
{ "OR", TOK_OR },
|
||||
{ "OUTPUT", TOK_OUTPUT },
|
||||
{ "PRESERVE", TOK_PRESERVE },
|
||||
{ "PRINT", TOK_PRINT },
|
||||
{ "PUT", TOK_PUT },
|
||||
{ "RANDOM", TOK_RANDOM },
|
||||
{ "RANDOMIZE", TOK_RANDOMIZE },
|
||||
{ "READ", TOK_READ },
|
||||
{ "REDIM", TOK_REDIM },
|
||||
{ "REM", TOK_REM },
|
||||
{ "RESTORE", TOK_RESTORE },
|
||||
{ "RESUME", TOK_RESUME },
|
||||
{ "RETURN", TOK_RETURN },
|
||||
{ "SEEK", TOK_SEEK },
|
||||
{ "SELECT", TOK_SELECT },
|
||||
{ "SET", TOK_SET },
|
||||
{ "SHARED", TOK_SHARED },
|
||||
{ "SHELL", TOK_SHELL },
|
||||
{ "SHOW", TOK_SHOW },
|
||||
{ "SINGLE", TOK_SINGLE },
|
||||
{ "SLEEP", TOK_SLEEP },
|
||||
{ "STATIC", TOK_STATIC },
|
||||
{ "STEP", TOK_STEP },
|
||||
{ "STRING", TOK_STRING_KW },
|
||||
{ "SUB", TOK_SUB },
|
||||
{ "SWAP", TOK_SWAP },
|
||||
{ "THEN", TOK_THEN },
|
||||
{ "TIMER", TOK_TIMER },
|
||||
{ "TO", TOK_TO },
|
||||
{ "TRUE", TOK_TRUE_KW },
|
||||
{ "TYPE", TOK_TYPE },
|
||||
{ "UBOUND", TOK_UBOUND },
|
||||
{ "UNLOAD", TOK_UNLOAD },
|
||||
{ "UNTIL", TOK_UNTIL },
|
||||
{ "WEND", TOK_WEND },
|
||||
{ "WHILE", TOK_WHILE },
|
||||
{ "WITH", TOK_WITH },
|
||||
{ "WRITE", TOK_WRITE },
|
||||
{ "XOR", TOK_XOR },
|
||||
{ NULL, TOK_ERROR }
|
||||
};
|
||||
|
||||
#define KEYWORD_COUNT (sizeof(sKeywords) / sizeof(sKeywords[0]) - 1)
|
||||
|
||||
// ============================================================
|
||||
// Prototypes
|
||||
// ============================================================
|
||||
|
||||
static char advance(BasLexerT *lex);
|
||||
static bool atEnd(const BasLexerT *lex);
|
||||
static BasTokenTypeE lookupKeyword(const char *text, int32_t len);
|
||||
static char peek(const BasLexerT *lex);
|
||||
static char peekNext(const BasLexerT *lex);
|
||||
static void setError(BasLexerT *lex, const char *msg);
|
||||
static void skipLineComment(BasLexerT *lex);
|
||||
static void skipWhitespace(BasLexerT *lex);
|
||||
static BasTokenTypeE tokenizeHexLiteral(BasLexerT *lex);
|
||||
static BasTokenTypeE tokenizeIdentOrKeyword(BasLexerT *lex);
|
||||
static BasTokenTypeE tokenizeNumber(BasLexerT *lex);
|
||||
static BasTokenTypeE tokenizeString(BasLexerT *lex);
|
||||
static char upperChar(char c);
|
||||
|
||||
|
||||
// ============================================================
|
||||
// advance
|
||||
// ============================================================
|
||||
|
||||
static char advance(BasLexerT *lex) {
|
||||
if (atEnd(lex)) {
|
||||
return '\0';
|
||||
}
|
||||
|
||||
char c = lex->source[lex->pos++];
|
||||
|
||||
if (c == '\n') {
|
||||
lex->line++;
|
||||
lex->col = 1;
|
||||
} else {
|
||||
lex->col++;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// atEnd
|
||||
// ============================================================
|
||||
|
||||
static bool atEnd(const BasLexerT *lex) {
|
||||
return lex->pos >= lex->sourceLen;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basLexerInit
|
||||
// ============================================================
|
||||
|
||||
void basLexerInit(BasLexerT *lex, const char *source, int32_t sourceLen) {
|
||||
memset(lex, 0, sizeof(*lex));
|
||||
lex->source = source;
|
||||
lex->sourceLen = sourceLen;
|
||||
lex->pos = 0;
|
||||
lex->line = 1;
|
||||
lex->col = 1;
|
||||
|
||||
// Prime the first token
|
||||
basLexerNext(lex);
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basLexerNext
|
||||
// ============================================================
|
||||
|
||||
BasTokenTypeE basLexerNext(BasLexerT *lex) {
|
||||
skipWhitespace(lex);
|
||||
|
||||
lex->token.line = lex->line;
|
||||
lex->token.col = lex->col;
|
||||
lex->token.textLen = 0;
|
||||
lex->token.text[0] = '\0';
|
||||
|
||||
if (atEnd(lex)) {
|
||||
lex->token.type = TOK_EOF;
|
||||
return TOK_EOF;
|
||||
}
|
||||
|
||||
char c = peek(lex);
|
||||
|
||||
// Newline
|
||||
if (c == '\n') {
|
||||
advance(lex);
|
||||
lex->token.type = TOK_NEWLINE;
|
||||
lex->token.text[0] = '\n';
|
||||
lex->token.text[1] = '\0';
|
||||
lex->token.textLen = 1;
|
||||
return TOK_NEWLINE;
|
||||
}
|
||||
|
||||
// Carriage return (handle CR, CRLF)
|
||||
if (c == '\r') {
|
||||
advance(lex);
|
||||
|
||||
if (!atEnd(lex) && peek(lex) == '\n') {
|
||||
advance(lex);
|
||||
}
|
||||
|
||||
lex->token.type = TOK_NEWLINE;
|
||||
lex->token.text[0] = '\n';
|
||||
lex->token.text[1] = '\0';
|
||||
lex->token.textLen = 1;
|
||||
return TOK_NEWLINE;
|
||||
}
|
||||
|
||||
// Comment (apostrophe)
|
||||
if (c == '\'') {
|
||||
skipLineComment(lex);
|
||||
lex->token.type = TOK_NEWLINE;
|
||||
lex->token.text[0] = '\n';
|
||||
lex->token.text[1] = '\0';
|
||||
lex->token.textLen = 1;
|
||||
return TOK_NEWLINE;
|
||||
}
|
||||
|
||||
// String literal
|
||||
if (c == '"') {
|
||||
lex->token.type = tokenizeString(lex);
|
||||
return lex->token.type;
|
||||
}
|
||||
|
||||
// Number
|
||||
if (isdigit((unsigned char)c) || (c == '.' && isdigit((unsigned char)peekNext(lex)))) {
|
||||
lex->token.type = tokenizeNumber(lex);
|
||||
return lex->token.type;
|
||||
}
|
||||
|
||||
// Hex literal (&H...)
|
||||
if (c == '&' && upperChar(peekNext(lex)) == 'H') {
|
||||
lex->token.type = tokenizeHexLiteral(lex);
|
||||
return lex->token.type;
|
||||
}
|
||||
|
||||
// Identifier or keyword
|
||||
if (isalpha((unsigned char)c) || c == '_') {
|
||||
lex->token.type = tokenizeIdentOrKeyword(lex);
|
||||
return lex->token.type;
|
||||
}
|
||||
|
||||
// Single and multi-character operators/punctuation
|
||||
advance(lex);
|
||||
|
||||
switch (c) {
|
||||
case '+':
|
||||
lex->token.type = TOK_PLUS;
|
||||
break;
|
||||
|
||||
case '-':
|
||||
lex->token.type = TOK_MINUS;
|
||||
break;
|
||||
|
||||
case '*':
|
||||
lex->token.type = TOK_STAR;
|
||||
break;
|
||||
|
||||
case '/':
|
||||
lex->token.type = TOK_SLASH;
|
||||
break;
|
||||
|
||||
case '\\':
|
||||
lex->token.type = TOK_BACKSLASH;
|
||||
break;
|
||||
|
||||
case '^':
|
||||
lex->token.type = TOK_CARET;
|
||||
break;
|
||||
|
||||
case '&':
|
||||
lex->token.type = TOK_AMPERSAND;
|
||||
break;
|
||||
|
||||
case '(':
|
||||
lex->token.type = TOK_LPAREN;
|
||||
break;
|
||||
|
||||
case ')':
|
||||
lex->token.type = TOK_RPAREN;
|
||||
break;
|
||||
|
||||
case ',':
|
||||
lex->token.type = TOK_COMMA;
|
||||
break;
|
||||
|
||||
case ';':
|
||||
lex->token.type = TOK_SEMICOLON;
|
||||
break;
|
||||
|
||||
case ':':
|
||||
lex->token.type = TOK_COLON;
|
||||
break;
|
||||
|
||||
case '.':
|
||||
lex->token.type = TOK_DOT;
|
||||
break;
|
||||
|
||||
case '#':
|
||||
lex->token.type = TOK_HASH;
|
||||
break;
|
||||
|
||||
case '=':
|
||||
lex->token.type = TOK_EQ;
|
||||
break;
|
||||
|
||||
case '<':
|
||||
if (!atEnd(lex) && peek(lex) == '>') {
|
||||
advance(lex);
|
||||
lex->token.type = TOK_NE;
|
||||
} else if (!atEnd(lex) && peek(lex) == '=') {
|
||||
advance(lex);
|
||||
lex->token.type = TOK_LE;
|
||||
} else {
|
||||
lex->token.type = TOK_LT;
|
||||
}
|
||||
break;
|
||||
|
||||
case '>':
|
||||
if (!atEnd(lex) && peek(lex) == '=') {
|
||||
advance(lex);
|
||||
lex->token.type = TOK_GE;
|
||||
} else {
|
||||
lex->token.type = TOK_GT;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
setError(lex, "Unexpected character");
|
||||
lex->token.type = TOK_ERROR;
|
||||
break;
|
||||
}
|
||||
|
||||
// Store the operator text
|
||||
if (lex->token.type != TOK_ERROR) {
|
||||
lex->token.text[0] = c;
|
||||
lex->token.textLen = 1;
|
||||
|
||||
if (lex->token.type == TOK_NE || lex->token.type == TOK_LE || lex->token.type == TOK_GE) {
|
||||
lex->token.text[1] = lex->source[lex->pos - 1];
|
||||
lex->token.textLen = 2;
|
||||
}
|
||||
|
||||
lex->token.text[lex->token.textLen] = '\0';
|
||||
}
|
||||
|
||||
return lex->token.type;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basLexerPeek
|
||||
// ============================================================
|
||||
|
||||
BasTokenTypeE basLexerPeek(const BasLexerT *lex) {
|
||||
return lex->token.type;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basTokenName
|
||||
// ============================================================
|
||||
|
||||
const char *basTokenName(BasTokenTypeE type) {
|
||||
switch (type) {
|
||||
case TOK_INT_LIT: return "integer";
|
||||
case TOK_LONG_LIT: return "long";
|
||||
case TOK_FLOAT_LIT: return "float";
|
||||
case TOK_STRING_LIT: return "string";
|
||||
case TOK_IDENT: return "identifier";
|
||||
case TOK_DOT: return "'.'";
|
||||
case TOK_COMMA: return "','";
|
||||
case TOK_SEMICOLON: return "';'";
|
||||
case TOK_COLON: return "':'";
|
||||
case TOK_LPAREN: return "'('";
|
||||
case TOK_RPAREN: return "')'";
|
||||
case TOK_HASH: return "'#'";
|
||||
case TOK_PLUS: return "'+'";
|
||||
case TOK_MINUS: return "'-'";
|
||||
case TOK_STAR: return "'*'";
|
||||
case TOK_SLASH: return "'/'";
|
||||
case TOK_BACKSLASH: return "'\\'";
|
||||
case TOK_CARET: return "'^'";
|
||||
case TOK_AMPERSAND: return "'&'";
|
||||
case TOK_EQ: return "'='";
|
||||
case TOK_NE: return "'<>'";
|
||||
case TOK_LT: return "'<'";
|
||||
case TOK_GT: return "'>'";
|
||||
case TOK_LE: return "'<='";
|
||||
case TOK_GE: return "'>='";
|
||||
case TOK_NEWLINE: return "newline";
|
||||
case TOK_EOF: return "end of file";
|
||||
case TOK_ERROR: return "error";
|
||||
default: break;
|
||||
}
|
||||
|
||||
// Keywords
|
||||
for (int32_t i = 0; i < (int32_t)KEYWORD_COUNT; i++) {
|
||||
if (sKeywords[i].type == type) {
|
||||
return sKeywords[i].text;
|
||||
}
|
||||
}
|
||||
|
||||
return "?";
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// lookupKeyword
|
||||
// ============================================================
|
||||
|
||||
static BasTokenTypeE lookupKeyword(const char *text, int32_t len) {
|
||||
// Case-insensitive keyword lookup
|
||||
for (int32_t i = 0; i < (int32_t)KEYWORD_COUNT; i++) {
|
||||
const char *kw = sKeywords[i].text;
|
||||
int32_t kwLen = (int32_t)strlen(kw);
|
||||
|
||||
if (kwLen != len) {
|
||||
continue;
|
||||
}
|
||||
|
||||
bool match = true;
|
||||
|
||||
for (int32_t j = 0; j < len; j++) {
|
||||
if (upperChar(text[j]) != kw[j]) {
|
||||
match = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (match) {
|
||||
return sKeywords[i].type;
|
||||
}
|
||||
}
|
||||
|
||||
return TOK_IDENT;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// peek
|
||||
// ============================================================
|
||||
|
||||
static char peek(const BasLexerT *lex) {
|
||||
if (atEnd(lex)) {
|
||||
return '\0';
|
||||
}
|
||||
|
||||
return lex->source[lex->pos];
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// peekNext
|
||||
// ============================================================
|
||||
|
||||
static char peekNext(const BasLexerT *lex) {
|
||||
if (lex->pos + 1 >= lex->sourceLen) {
|
||||
return '\0';
|
||||
}
|
||||
|
||||
return lex->source[lex->pos + 1];
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// setError
|
||||
// ============================================================
|
||||
|
||||
static void setError(BasLexerT *lex, const char *msg) {
|
||||
snprintf(lex->error, sizeof(lex->error), "Line %d, Col %d: %s", lex->line, lex->col, msg);
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// skipLineComment
|
||||
// ============================================================
|
||||
|
||||
static void skipLineComment(BasLexerT *lex) {
|
||||
while (!atEnd(lex) && peek(lex) != '\n' && peek(lex) != '\r') {
|
||||
advance(lex);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// skipWhitespace
|
||||
// ============================================================
|
||||
//
|
||||
// Skips spaces and tabs. Does NOT skip newlines (they are tokens).
|
||||
// Handles line continuation: underscore followed by newline joins
|
||||
// the next line to the current logical line.
|
||||
|
||||
static void skipWhitespace(BasLexerT *lex) {
|
||||
while (!atEnd(lex)) {
|
||||
char c = peek(lex);
|
||||
|
||||
if (c == ' ' || c == '\t') {
|
||||
advance(lex);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Line continuation: _ at end of line
|
||||
if (c == '_') {
|
||||
int32_t savedPos = lex->pos;
|
||||
int32_t savedLine = lex->line;
|
||||
int32_t savedCol = lex->col;
|
||||
advance(lex);
|
||||
|
||||
// Skip spaces/tabs after underscore
|
||||
while (!atEnd(lex) && (peek(lex) == ' ' || peek(lex) == '\t')) {
|
||||
advance(lex);
|
||||
}
|
||||
|
||||
// Must be followed by newline
|
||||
if (!atEnd(lex) && (peek(lex) == '\n' || peek(lex) == '\r')) {
|
||||
advance(lex);
|
||||
|
||||
if (!atEnd(lex) && peek(lex) == '\n' && lex->source[lex->pos - 1] == '\r') {
|
||||
advance(lex);
|
||||
}
|
||||
|
||||
continue; // Continue skipping whitespace on next line
|
||||
}
|
||||
|
||||
// Not a continuation -- put back
|
||||
lex->pos = savedPos;
|
||||
lex->line = savedLine;
|
||||
lex->col = savedCol;
|
||||
break;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// tokenizeHexLiteral
|
||||
// ============================================================
|
||||
|
||||
static BasTokenTypeE tokenizeHexLiteral(BasLexerT *lex) {
|
||||
advance(lex); // skip &
|
||||
advance(lex); // skip H
|
||||
|
||||
int32_t idx = 0;
|
||||
int32_t value = 0;
|
||||
|
||||
while (!atEnd(lex) && isxdigit((unsigned char)peek(lex))) {
|
||||
char c = advance(lex);
|
||||
|
||||
if (idx < BAS_MAX_TOKEN_LEN - 1) {
|
||||
lex->token.text[idx++] = c;
|
||||
}
|
||||
|
||||
int32_t digit;
|
||||
|
||||
if (c >= '0' && c <= '9') {
|
||||
digit = c - '0';
|
||||
} else if (c >= 'A' && c <= 'F') {
|
||||
digit = c - 'A' + 10;
|
||||
} else {
|
||||
digit = c - 'a' + 10;
|
||||
}
|
||||
|
||||
value = (value << 4) | digit;
|
||||
}
|
||||
|
||||
lex->token.text[idx] = '\0';
|
||||
lex->token.textLen = idx;
|
||||
|
||||
// Check for trailing & (long suffix)
|
||||
if (!atEnd(lex) && peek(lex) == '&') {
|
||||
advance(lex);
|
||||
lex->token.longVal = (int64_t)value;
|
||||
return TOK_LONG_LIT;
|
||||
}
|
||||
|
||||
lex->token.intVal = value;
|
||||
return TOK_INT_LIT;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// tokenizeIdentOrKeyword
|
||||
// ============================================================
|
||||
|
||||
static BasTokenTypeE tokenizeIdentOrKeyword(BasLexerT *lex) {
|
||||
int32_t idx = 0;
|
||||
|
||||
while (!atEnd(lex) && (isalnum((unsigned char)peek(lex)) || peek(lex) == '_')) {
|
||||
char c = advance(lex);
|
||||
|
||||
if (idx < BAS_MAX_TOKEN_LEN - 1) {
|
||||
lex->token.text[idx++] = c;
|
||||
}
|
||||
}
|
||||
|
||||
lex->token.text[idx] = '\0';
|
||||
lex->token.textLen = idx;
|
||||
|
||||
// Check for type suffix
|
||||
if (!atEnd(lex)) {
|
||||
char c = peek(lex);
|
||||
|
||||
if (c == '%' || c == '&' || c == '!' || c == '#' || c == '$') {
|
||||
advance(lex);
|
||||
lex->token.text[idx++] = c;
|
||||
lex->token.text[idx] = '\0';
|
||||
lex->token.textLen = idx;
|
||||
}
|
||||
}
|
||||
|
||||
// Check if this is a keyword
|
||||
// For suffix-bearing identifiers, only check the base (without suffix)
|
||||
int32_t baseLen = idx;
|
||||
|
||||
if (baseLen > 0) {
|
||||
char last = lex->token.text[baseLen - 1];
|
||||
|
||||
if (last == '%' || last == '&' || last == '!' || last == '#' || last == '$') {
|
||||
baseLen--;
|
||||
}
|
||||
}
|
||||
|
||||
BasTokenTypeE kwType = lookupKeyword(lex->token.text, baseLen);
|
||||
|
||||
// REM is a comment -- skip to end of line
|
||||
if (kwType == TOK_REM) {
|
||||
skipLineComment(lex);
|
||||
lex->token.type = TOK_NEWLINE;
|
||||
lex->token.text[0] = '\n';
|
||||
lex->token.text[1] = '\0';
|
||||
lex->token.textLen = 1;
|
||||
return TOK_NEWLINE;
|
||||
}
|
||||
|
||||
// If it's a keyword and has no suffix, return the keyword token
|
||||
if (kwType != TOK_IDENT && baseLen == idx) {
|
||||
return kwType;
|
||||
}
|
||||
|
||||
return TOK_IDENT;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// tokenizeNumber
|
||||
// ============================================================
|
||||
|
||||
static BasTokenTypeE tokenizeNumber(BasLexerT *lex) {
|
||||
int32_t idx = 0;
|
||||
bool hasDecimal = false;
|
||||
bool hasExp = false;
|
||||
|
||||
// Integer part
|
||||
while (!atEnd(lex) && isdigit((unsigned char)peek(lex))) {
|
||||
if (idx < BAS_MAX_TOKEN_LEN - 1) {
|
||||
lex->token.text[idx++] = advance(lex);
|
||||
} else {
|
||||
advance(lex);
|
||||
}
|
||||
}
|
||||
|
||||
// Decimal part
|
||||
if (!atEnd(lex) && peek(lex) == '.' && isdigit((unsigned char)peekNext(lex))) {
|
||||
hasDecimal = true;
|
||||
lex->token.text[idx++] = advance(lex); // .
|
||||
|
||||
while (!atEnd(lex) && isdigit((unsigned char)peek(lex))) {
|
||||
if (idx < BAS_MAX_TOKEN_LEN - 1) {
|
||||
lex->token.text[idx++] = advance(lex);
|
||||
} else {
|
||||
advance(lex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Exponent
|
||||
if (!atEnd(lex) && (upperChar(peek(lex)) == 'E' || upperChar(peek(lex)) == 'D')) {
|
||||
hasExp = true;
|
||||
lex->token.text[idx++] = advance(lex);
|
||||
|
||||
if (!atEnd(lex) && (peek(lex) == '+' || peek(lex) == '-')) {
|
||||
lex->token.text[idx++] = advance(lex);
|
||||
}
|
||||
|
||||
while (!atEnd(lex) && isdigit((unsigned char)peek(lex))) {
|
||||
if (idx < BAS_MAX_TOKEN_LEN - 1) {
|
||||
lex->token.text[idx++] = advance(lex);
|
||||
} else {
|
||||
advance(lex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lex->token.text[idx] = '\0';
|
||||
lex->token.textLen = idx;
|
||||
|
||||
// Check for type suffix
|
||||
if (!atEnd(lex)) {
|
||||
char c = peek(lex);
|
||||
|
||||
if (c == '%') {
|
||||
advance(lex);
|
||||
lex->token.intVal = (int32_t)atoi(lex->token.text);
|
||||
return TOK_INT_LIT;
|
||||
}
|
||||
|
||||
if (c == '&') {
|
||||
advance(lex);
|
||||
lex->token.longVal = (int64_t)atol(lex->token.text);
|
||||
return TOK_LONG_LIT;
|
||||
}
|
||||
|
||||
if (c == '!') {
|
||||
advance(lex);
|
||||
lex->token.dblVal = atof(lex->token.text);
|
||||
return TOK_FLOAT_LIT;
|
||||
}
|
||||
|
||||
if (c == '#') {
|
||||
advance(lex);
|
||||
lex->token.dblVal = atof(lex->token.text);
|
||||
return TOK_FLOAT_LIT;
|
||||
}
|
||||
}
|
||||
|
||||
// No suffix: determine type from content
|
||||
if (hasDecimal || hasExp) {
|
||||
lex->token.dblVal = atof(lex->token.text);
|
||||
return TOK_FLOAT_LIT;
|
||||
}
|
||||
|
||||
long val = atol(lex->token.text);
|
||||
|
||||
if (val >= -32768 && val <= 32767) {
|
||||
lex->token.intVal = (int32_t)val;
|
||||
return TOK_INT_LIT;
|
||||
}
|
||||
|
||||
lex->token.longVal = (int64_t)val;
|
||||
return TOK_LONG_LIT;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// tokenizeString
|
||||
// ============================================================
|
||||
|
||||
static BasTokenTypeE tokenizeString(BasLexerT *lex) {
|
||||
advance(lex); // skip opening quote
|
||||
|
||||
int32_t idx = 0;
|
||||
|
||||
while (!atEnd(lex) && peek(lex) != '"' && peek(lex) != '\n' && peek(lex) != '\r') {
|
||||
if (idx < BAS_MAX_TOKEN_LEN - 1) {
|
||||
lex->token.text[idx++] = advance(lex);
|
||||
} else {
|
||||
advance(lex);
|
||||
}
|
||||
}
|
||||
|
||||
if (atEnd(lex) || peek(lex) != '"') {
|
||||
setError(lex, "Unterminated string literal");
|
||||
lex->token.text[idx] = '\0';
|
||||
lex->token.textLen = idx;
|
||||
return TOK_ERROR;
|
||||
}
|
||||
|
||||
advance(lex); // skip closing quote
|
||||
|
||||
lex->token.text[idx] = '\0';
|
||||
lex->token.textLen = idx;
|
||||
|
||||
return TOK_STRING_LIT;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// upperChar
|
||||
// ============================================================
|
||||
|
||||
static char upperChar(char c) {
|
||||
if (c >= 'a' && c <= 'z') {
|
||||
return c - 32;
|
||||
}
|
||||
|
||||
return c;
|
||||
}
|
||||
221
dvxbasic/compiler/lexer.h
Normal file
221
dvxbasic/compiler/lexer.h
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
// lexer.h -- DVX BASIC lexer (tokenizer)
|
||||
//
|
||||
// Converts BASIC source text into a stream of tokens. Case-insensitive
|
||||
// for keywords. Handles line continuations (_), comments (' and REM),
|
||||
// type suffixes (%, &, !, #, $), and string literals.
|
||||
//
|
||||
// Embeddable: no DVX dependencies, pure C.
|
||||
|
||||
#ifndef DVXBASIC_LEXER_H
|
||||
#define DVXBASIC_LEXER_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
// ============================================================
|
||||
// Token types
|
||||
// ============================================================
|
||||
|
||||
typedef enum {
|
||||
// Literals
|
||||
TOK_INT_LIT, // integer literal (123, &HFF)
|
||||
TOK_LONG_LIT, // long literal (123&)
|
||||
TOK_FLOAT_LIT, // float literal (3.14, 1.5E10)
|
||||
TOK_STRING_LIT, // "string literal"
|
||||
|
||||
// Identifiers and symbols
|
||||
TOK_IDENT, // variable/function name
|
||||
TOK_DOT, // .
|
||||
TOK_COMMA, // ,
|
||||
TOK_SEMICOLON, // ;
|
||||
TOK_COLON, // :
|
||||
TOK_LPAREN, // (
|
||||
TOK_RPAREN, // )
|
||||
TOK_HASH, // # (file channel)
|
||||
|
||||
// Operators
|
||||
TOK_PLUS, // +
|
||||
TOK_MINUS, // -
|
||||
TOK_STAR, // *
|
||||
TOK_SLASH, // /
|
||||
TOK_BACKSLASH, // \ (integer divide)
|
||||
TOK_CARET, // ^
|
||||
TOK_AMPERSAND, // & (string concat or hex prefix)
|
||||
TOK_EQ, // =
|
||||
TOK_NE, // <>
|
||||
TOK_LT, // <
|
||||
TOK_GT, // >
|
||||
TOK_LE, // <=
|
||||
TOK_GE, // >=
|
||||
|
||||
// Type suffixes (attached to identifier)
|
||||
TOK_SUFFIX_INT, // %
|
||||
TOK_SUFFIX_LONG, // &
|
||||
TOK_SUFFIX_SINGLE, // !
|
||||
TOK_SUFFIX_DOUBLE, // #
|
||||
TOK_SUFFIX_STRING, // $
|
||||
|
||||
// Keywords
|
||||
TOK_AND,
|
||||
TOK_AS,
|
||||
TOK_BASE,
|
||||
TOK_BOOLEAN,
|
||||
TOK_BYVAL,
|
||||
TOK_CALL,
|
||||
TOK_CASE,
|
||||
TOK_CLOSE,
|
||||
TOK_CONST,
|
||||
TOK_DATA,
|
||||
TOK_DECLARE,
|
||||
TOK_DEF,
|
||||
TOK_DEFDBL,
|
||||
TOK_DEFINT,
|
||||
TOK_DEFLNG,
|
||||
TOK_DEFSNG,
|
||||
TOK_DEFSTR,
|
||||
TOK_DIM,
|
||||
TOK_DO,
|
||||
TOK_DOEVENTS,
|
||||
TOK_DOUBLE,
|
||||
TOK_ELSE,
|
||||
TOK_ELSEIF,
|
||||
TOK_END,
|
||||
TOK_EOF_KW, // EOF (keyword, not end-of-file)
|
||||
TOK_EQV,
|
||||
TOK_ERASE,
|
||||
TOK_ERR,
|
||||
TOK_ERROR_KW,
|
||||
TOK_EXPLICIT,
|
||||
TOK_EXIT,
|
||||
TOK_FALSE_KW,
|
||||
TOK_FOR,
|
||||
TOK_FUNCTION,
|
||||
TOK_GET,
|
||||
TOK_GOSUB,
|
||||
TOK_GOTO,
|
||||
TOK_HIDE,
|
||||
TOK_IF,
|
||||
TOK_IMP,
|
||||
TOK_INPUT,
|
||||
TOK_INTEGER,
|
||||
TOK_IS,
|
||||
TOK_LBOUND,
|
||||
TOK_LET,
|
||||
TOK_LINE,
|
||||
TOK_LOAD,
|
||||
TOK_LONG,
|
||||
TOK_LOOP,
|
||||
TOK_ME,
|
||||
TOK_MOD,
|
||||
TOK_MSGBOX,
|
||||
TOK_NEXT,
|
||||
TOK_NOT,
|
||||
TOK_ON,
|
||||
TOK_OPEN,
|
||||
TOK_OPTION,
|
||||
TOK_OR,
|
||||
TOK_OUTPUT,
|
||||
TOK_PRESERVE,
|
||||
TOK_PRINT,
|
||||
TOK_PUT,
|
||||
TOK_RANDOMIZE,
|
||||
TOK_READ,
|
||||
TOK_REDIM,
|
||||
TOK_REM,
|
||||
TOK_RESTORE,
|
||||
TOK_RESUME,
|
||||
TOK_RETURN,
|
||||
TOK_SEEK,
|
||||
TOK_SELECT,
|
||||
TOK_SET,
|
||||
TOK_SHARED,
|
||||
TOK_SHELL,
|
||||
TOK_SHOW,
|
||||
TOK_SINGLE,
|
||||
TOK_SLEEP,
|
||||
TOK_STATIC,
|
||||
TOK_STEP,
|
||||
TOK_STRING_KW,
|
||||
TOK_SUB,
|
||||
TOK_SWAP,
|
||||
TOK_THEN,
|
||||
TOK_TIMER,
|
||||
TOK_TO,
|
||||
TOK_TRUE_KW,
|
||||
TOK_TYPE,
|
||||
TOK_UBOUND,
|
||||
TOK_UNLOAD,
|
||||
TOK_UNTIL,
|
||||
TOK_WEND,
|
||||
TOK_WHILE,
|
||||
TOK_WITH,
|
||||
TOK_WRITE,
|
||||
TOK_XOR,
|
||||
|
||||
// File modes
|
||||
TOK_APPEND,
|
||||
TOK_BINARY,
|
||||
TOK_RANDOM,
|
||||
|
||||
// Special
|
||||
TOK_NEWLINE, // end of logical line
|
||||
TOK_EOF, // end of source
|
||||
TOK_ERROR // lexer error
|
||||
} BasTokenTypeE;
|
||||
|
||||
// ============================================================
|
||||
// Token
|
||||
// ============================================================
|
||||
|
||||
#define BAS_MAX_TOKEN_LEN 256
|
||||
|
||||
typedef struct {
|
||||
BasTokenTypeE type;
|
||||
int32_t line; // 1-based source line number
|
||||
int32_t col; // 1-based column number
|
||||
|
||||
// Value (depends on type)
|
||||
union {
|
||||
int32_t intVal;
|
||||
int64_t longVal;
|
||||
float fltVal;
|
||||
double dblVal;
|
||||
};
|
||||
|
||||
char text[BAS_MAX_TOKEN_LEN]; // raw text of the token
|
||||
int32_t textLen;
|
||||
} BasTokenT;
|
||||
|
||||
// ============================================================
|
||||
// Lexer state
|
||||
// ============================================================
|
||||
|
||||
typedef struct {
|
||||
const char *source; // source text (not owned)
|
||||
int32_t sourceLen;
|
||||
int32_t pos; // current position in source
|
||||
int32_t line; // current line (1-based)
|
||||
int32_t col; // current column (1-based)
|
||||
BasTokenT token; // current token
|
||||
char error[256];
|
||||
} BasLexerT;
|
||||
|
||||
// ============================================================
|
||||
// API
|
||||
// ============================================================
|
||||
|
||||
// Initialize lexer with source text. The source must remain valid
|
||||
// for the lifetime of the lexer.
|
||||
void basLexerInit(BasLexerT *lex, const char *source, int32_t sourceLen);
|
||||
|
||||
// Advance to the next token. Returns the token type.
|
||||
// The token is available in lex->token.
|
||||
BasTokenTypeE basLexerNext(BasLexerT *lex);
|
||||
|
||||
// Peek at the current token type without advancing.
|
||||
BasTokenTypeE basLexerPeek(const BasLexerT *lex);
|
||||
|
||||
// Return human-readable name for a token type.
|
||||
const char *basTokenName(BasTokenTypeE type);
|
||||
|
||||
#endif // DVXBASIC_LEXER_H
|
||||
287
dvxbasic/compiler/opcodes.h
Normal file
287
dvxbasic/compiler/opcodes.h
Normal file
|
|
@ -0,0 +1,287 @@
|
|||
// opcodes.h -- DVX BASIC bytecode instruction definitions
|
||||
//
|
||||
// Stack-based p-code for the DVX BASIC virtual machine.
|
||||
// Embeddable: no DVX dependencies, pure C.
|
||||
|
||||
#ifndef DVXBASIC_OPCODES_H
|
||||
#define DVXBASIC_OPCODES_H
|
||||
|
||||
// ============================================================
|
||||
// Data type tags (used in Value representation)
|
||||
// ============================================================
|
||||
|
||||
#define BAS_TYPE_INTEGER 0 // 16-bit signed
|
||||
#define BAS_TYPE_LONG 1 // 32-bit signed
|
||||
#define BAS_TYPE_SINGLE 2 // 32-bit float
|
||||
#define BAS_TYPE_DOUBLE 3 // 64-bit float
|
||||
#define BAS_TYPE_STRING 4 // ref-counted dynamic string
|
||||
#define BAS_TYPE_BOOLEAN 5 // True (-1) or False (0)
|
||||
#define BAS_TYPE_ARRAY 6 // ref-counted array
|
||||
#define BAS_TYPE_UDT 7 // ref-counted user-defined type
|
||||
|
||||
// ============================================================
|
||||
// Stack operations
|
||||
// ============================================================
|
||||
|
||||
#define OP_NOP 0x00
|
||||
#define OP_PUSH_INT16 0x01 // [int16] push 16-bit integer
|
||||
#define OP_PUSH_INT32 0x02 // [int32] push 32-bit integer
|
||||
#define OP_PUSH_FLT32 0x03 // [float32] push 32-bit float
|
||||
#define OP_PUSH_FLT64 0x04 // [float64] push 64-bit float
|
||||
#define OP_PUSH_STR 0x05 // [uint16 idx] push string from constant pool
|
||||
#define OP_PUSH_TRUE 0x06 // push boolean True (-1)
|
||||
#define OP_PUSH_FALSE 0x07 // push boolean False (0)
|
||||
#define OP_POP 0x08 // discard top of stack
|
||||
#define OP_DUP 0x09 // duplicate top of stack
|
||||
|
||||
// ============================================================
|
||||
// Variable access
|
||||
// ============================================================
|
||||
|
||||
#define OP_LOAD_LOCAL 0x10 // [uint16 idx] push local variable
|
||||
#define OP_STORE_LOCAL 0x11 // [uint16 idx] pop to local variable
|
||||
#define OP_LOAD_GLOBAL 0x12 // [uint16 idx] push global variable
|
||||
#define OP_STORE_GLOBAL 0x13 // [uint16 idx] pop to global variable
|
||||
#define OP_LOAD_REF 0x14 // dereference top of stack (ByRef)
|
||||
#define OP_STORE_REF 0x15 // store through reference on stack
|
||||
#define OP_LOAD_ARRAY 0x16 // [uint8 dims] indices on stack, array ref below
|
||||
#define OP_STORE_ARRAY 0x17 // [uint8 dims] value, indices, array ref on stack
|
||||
#define OP_LOAD_FIELD 0x18 // [uint16 fieldIdx] load UDT field
|
||||
#define OP_STORE_FIELD 0x19 // [uint16 fieldIdx] store UDT field
|
||||
#define OP_PUSH_LOCAL_ADDR 0x1A // [uint16 idx] push address of local (for ByRef)
|
||||
#define OP_PUSH_GLOBAL_ADDR 0x1B // [uint16 idx] push address of global (for ByRef)
|
||||
|
||||
// ============================================================
|
||||
// Arithmetic (integer)
|
||||
// ============================================================
|
||||
|
||||
#define OP_ADD_INT 0x20
|
||||
#define OP_SUB_INT 0x21
|
||||
#define OP_MUL_INT 0x22
|
||||
#define OP_IDIV_INT 0x23 // integer divide (\)
|
||||
#define OP_MOD_INT 0x24
|
||||
#define OP_NEG_INT 0x25
|
||||
|
||||
// ============================================================
|
||||
// Arithmetic (float)
|
||||
// ============================================================
|
||||
|
||||
#define OP_ADD_FLT 0x26
|
||||
#define OP_SUB_FLT 0x27
|
||||
#define OP_MUL_FLT 0x28
|
||||
#define OP_DIV_FLT 0x29 // float divide (/)
|
||||
#define OP_NEG_FLT 0x2A
|
||||
#define OP_POW 0x2B // exponentiation (^)
|
||||
|
||||
// ============================================================
|
||||
// String operations
|
||||
// ============================================================
|
||||
|
||||
#define OP_STR_CONCAT 0x30
|
||||
#define OP_STR_LEFT 0x31
|
||||
#define OP_STR_RIGHT 0x32
|
||||
#define OP_STR_MID 0x33 // 3 args: str, start, len
|
||||
#define OP_STR_MID2 0x34 // 2 args: str, start (to end)
|
||||
#define OP_STR_LEN 0x35
|
||||
#define OP_STR_INSTR 0x36 // 2 args: str, find
|
||||
#define OP_STR_INSTR3 0x37 // 3 args: start, str, find
|
||||
#define OP_STR_UCASE 0x38
|
||||
#define OP_STR_LCASE 0x39
|
||||
#define OP_STR_TRIM 0x3A
|
||||
#define OP_STR_LTRIM 0x3B
|
||||
#define OP_STR_RTRIM 0x3C
|
||||
#define OP_STR_CHR 0x3D
|
||||
#define OP_STR_ASC 0x3E
|
||||
#define OP_STR_SPACE 0x3F
|
||||
|
||||
// ============================================================
|
||||
// Comparison (push boolean result)
|
||||
// ============================================================
|
||||
|
||||
#define OP_CMP_EQ 0x40
|
||||
#define OP_CMP_NE 0x41
|
||||
#define OP_CMP_LT 0x42
|
||||
#define OP_CMP_GT 0x43
|
||||
#define OP_CMP_LE 0x44
|
||||
#define OP_CMP_GE 0x45
|
||||
|
||||
// ============================================================
|
||||
// Logical / bitwise
|
||||
// ============================================================
|
||||
|
||||
#define OP_AND 0x48
|
||||
#define OP_OR 0x49
|
||||
#define OP_NOT 0x4A
|
||||
#define OP_XOR 0x4B
|
||||
#define OP_EQV 0x4C
|
||||
#define OP_IMP 0x4D
|
||||
|
||||
// ============================================================
|
||||
// Control flow
|
||||
// ============================================================
|
||||
|
||||
#define OP_JMP 0x50 // [int16 offset] unconditional jump
|
||||
#define OP_JMP_TRUE 0x51 // [int16 offset] jump if TOS is true
|
||||
#define OP_JMP_FALSE 0x52 // [int16 offset] jump if TOS is false
|
||||
#define OP_CALL 0x53 // [uint16 addr] [uint8 argc] [uint8 baseSlot]
|
||||
#define OP_GOSUB_RET 0x54 // pop PC from eval stack, jump (GOSUB return)
|
||||
#define OP_RET 0x55 // return from subroutine
|
||||
#define OP_RET_VAL 0x56 // return from function (value on stack)
|
||||
#define OP_FOR_INIT 0x57 // [uint16 varIdx] [uint8 isLocal] init FOR
|
||||
#define OP_FOR_NEXT 0x58 // [uint16 varIdx] [uint8 isLocal] [int16 loopTop]
|
||||
|
||||
// ============================================================
|
||||
// Type conversion
|
||||
// ============================================================
|
||||
|
||||
#define OP_CONV_INT_FLT 0x60 // int -> float
|
||||
#define OP_CONV_FLT_INT 0x61 // float -> int (banker's rounding)
|
||||
#define OP_CONV_INT_STR 0x62 // int -> string
|
||||
#define OP_CONV_STR_INT 0x63 // string -> int (VAL)
|
||||
#define OP_CONV_FLT_STR 0x64 // float -> string
|
||||
#define OP_CONV_STR_FLT 0x65 // string -> float (VAL)
|
||||
#define OP_CONV_INT_LONG 0x66 // int16 -> int32
|
||||
#define OP_CONV_LONG_INT 0x67 // int32 -> int16
|
||||
|
||||
// ============================================================
|
||||
// I/O
|
||||
// ============================================================
|
||||
|
||||
#define OP_PRINT 0x70 // print TOS to current output
|
||||
#define OP_PRINT_NL 0x71 // print newline
|
||||
#define OP_PRINT_TAB 0x72 // print tab (14-column zones)
|
||||
#define OP_PRINT_SPC 0x73 // [uint8 n] print n spaces
|
||||
#define OP_INPUT 0x74 // read line into string on stack
|
||||
#define OP_FILE_OPEN 0x75 // [uint8 mode] filename, channel# on stack
|
||||
#define OP_FILE_CLOSE 0x76 // channel# on stack
|
||||
#define OP_FILE_PRINT 0x77 // channel#, value on stack
|
||||
#define OP_FILE_INPUT 0x78 // channel# on stack, push string
|
||||
#define OP_FILE_EOF 0x79 // channel# on stack, push boolean
|
||||
#define OP_FILE_LINE_INPUT 0x7A // channel# on stack, push string
|
||||
|
||||
// ============================================================
|
||||
// UI / Event (used when form system is active)
|
||||
// ============================================================
|
||||
|
||||
#define OP_LOAD_PROP 0x80 // [uint16 ctrl] [uint16 prop] push property value
|
||||
#define OP_STORE_PROP 0x81 // [uint16 ctrl] [uint16 prop] pop to property
|
||||
#define OP_CALL_METHOD 0x82 // [uint16 ctrl] [uint16 method] [uint8 argc]
|
||||
#define OP_LOAD_FORM 0x83 // [uint16 formIdx]
|
||||
#define OP_UNLOAD_FORM 0x84 // [uint16 formIdx]
|
||||
#define OP_SHOW_FORM 0x85 // [uint16 formIdx] [uint8 modal]
|
||||
#define OP_HIDE_FORM 0x86 // [uint16 formIdx]
|
||||
#define OP_DO_EVENTS 0x87
|
||||
#define OP_MSGBOX 0x88 // [uint8 flags] message on stack
|
||||
#define OP_INPUTBOX 0x89 // prompt on stack, push result
|
||||
#define OP_ME_REF 0x8A // push current form reference
|
||||
|
||||
// ============================================================
|
||||
// Array / misc
|
||||
// ============================================================
|
||||
|
||||
#define OP_DIM_ARRAY 0x90 // [uint8 dims] [uint8 type] bounds on stack
|
||||
#define OP_REDIM 0x91 // [uint8 dims] [uint8 preserve] bounds on stack
|
||||
#define OP_ERASE 0x92 // array ref on stack
|
||||
#define OP_LBOUND 0x93 // [uint8 dim] array ref on stack
|
||||
#define OP_UBOUND 0x94 // [uint8 dim] array ref on stack
|
||||
#define OP_ON_ERROR 0x95 // [int16 handler] set error handler (0 = disable)
|
||||
#define OP_RESUME 0x96 // resume after error
|
||||
#define OP_RESUME_NEXT 0x97 // resume at next statement
|
||||
#define OP_RAISE_ERR 0x98 // error number on stack
|
||||
#define OP_ERR_NUM 0x99 // push current error number
|
||||
#define OP_ERR_CLEAR 0x9A // clear error state
|
||||
|
||||
// ============================================================
|
||||
// Math built-ins (single opcode each for common functions)
|
||||
// ============================================================
|
||||
|
||||
#define OP_MATH_ABS 0xA0
|
||||
#define OP_MATH_INT 0xA1 // floor
|
||||
#define OP_MATH_FIX 0xA2 // truncate toward zero
|
||||
#define OP_MATH_SGN 0xA3
|
||||
#define OP_MATH_SQR 0xA4
|
||||
#define OP_MATH_SIN 0xA5
|
||||
#define OP_MATH_COS 0xA6
|
||||
#define OP_MATH_TAN 0xA7
|
||||
#define OP_MATH_ATN 0xA8
|
||||
#define OP_MATH_LOG 0xA9
|
||||
#define OP_MATH_EXP 0xAA
|
||||
#define OP_MATH_RND 0xAB
|
||||
#define OP_MATH_RANDOMIZE 0xAC // seed on stack (or TIMER if -1)
|
||||
|
||||
// ============================================================
|
||||
// Conversion built-ins
|
||||
// ============================================================
|
||||
|
||||
#define OP_STR_VAL 0xB0 // VAL(s$) -> number
|
||||
#define OP_STR_STRF 0xB1 // STR$(n) -> string
|
||||
#define OP_STR_HEX 0xB2 // HEX$(n) -> string
|
||||
#define OP_STR_STRING 0xB3 // STRING$(n, char) -> string
|
||||
|
||||
// ============================================================
|
||||
// Extended built-ins
|
||||
// ============================================================
|
||||
|
||||
#define OP_MATH_TIMER 0xB4 // push seconds since midnight as DOUBLE
|
||||
#define OP_DATE_STR 0xB5 // push DATE$ string "MM-DD-YYYY"
|
||||
#define OP_TIME_STR 0xB6 // push TIME$ string "HH:MM:SS"
|
||||
#define OP_SLEEP 0xB7 // pop seconds, sleep
|
||||
#define OP_ENVIRON 0xB8 // pop env var name, push value string
|
||||
|
||||
// ============================================================
|
||||
// DATA/READ/RESTORE
|
||||
// ============================================================
|
||||
|
||||
#define OP_READ_DATA 0xB9 // push next value from data pool
|
||||
#define OP_RESTORE 0xBA // reset data pointer to 0
|
||||
|
||||
// ============================================================
|
||||
// WRITE # (comma-delimited with quoted strings)
|
||||
// ============================================================
|
||||
|
||||
#define OP_FILE_WRITE 0xBB // pop channel + value, write in WRITE format
|
||||
#define OP_FILE_WRITE_SEP 0xBC // pop channel, write comma separator
|
||||
#define OP_FILE_WRITE_NL 0xBD // pop channel, write newline
|
||||
|
||||
// ============================================================
|
||||
// Random/Binary file I/O
|
||||
// ============================================================
|
||||
|
||||
#define OP_FILE_GET 0xBE // pop channel + recno, read record, push value
|
||||
#define OP_FILE_PUT 0xBF // pop channel + recno + value, write record
|
||||
#define OP_FILE_SEEK 0xC0 // pop channel + position, seek
|
||||
#define OP_FILE_LOF 0xC1 // pop channel, push file length
|
||||
#define OP_FILE_LOC 0xC2 // pop channel, push current position
|
||||
#define OP_FILE_FREEFILE 0xC3 // push next free channel number
|
||||
#define OP_FILE_INPUT_N 0xC4 // pop channel + n, read n chars, push string
|
||||
|
||||
// ============================================================
|
||||
// Fixed-length strings and MID$ assignment
|
||||
// ============================================================
|
||||
|
||||
#define OP_STR_FIXLEN 0xC5 // [uint16 len] pop string, pad/truncate, push
|
||||
#define OP_STR_MID_ASGN 0xC6 // pop replacement, len, start, str; push modified
|
||||
|
||||
// ============================================================
|
||||
// PRINT USING
|
||||
// ============================================================
|
||||
|
||||
#define OP_PRINT_USING 0xC7 // pop format + value, push formatted string
|
||||
|
||||
// ============================================================
|
||||
// SPC(n) and TAB(n) with stack-based argument
|
||||
// ============================================================
|
||||
|
||||
#define OP_PRINT_TAB_N 0xC8 // pop column count, print spaces to reach column
|
||||
#define OP_PRINT_SPC_N 0xC9 // pop count, print that many spaces
|
||||
#define OP_FORMAT 0xCA // pop format string + value, push formatted string
|
||||
#define OP_SHELL 0xCB // pop command string, call system(), push return value
|
||||
#define OP_COMPARE_MODE 0xCC // [uint8 mode] set string compare mode (0=binary, 1=text)
|
||||
|
||||
// ============================================================
|
||||
// Halt
|
||||
// ============================================================
|
||||
|
||||
#define OP_HALT 0xFF
|
||||
|
||||
#endif // DVXBASIC_OPCODES_H
|
||||
4324
dvxbasic/compiler/parser.c
Normal file
4324
dvxbasic/compiler/parser.c
Normal file
File diff suppressed because it is too large
Load diff
57
dvxbasic/compiler/parser.h
Normal file
57
dvxbasic/compiler/parser.h
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
// parser.h -- DVX BASIC parser (recursive descent)
|
||||
//
|
||||
// Single-pass compiler: reads tokens from the lexer and emits
|
||||
// p-code directly via the code generator. No AST. Forward
|
||||
// references to SUBs/FUNCTIONs are resolved via backpatching.
|
||||
//
|
||||
// Embeddable: no DVX dependencies, pure C.
|
||||
|
||||
#ifndef DVXBASIC_PARSER_H
|
||||
#define DVXBASIC_PARSER_H
|
||||
|
||||
#include "lexer.h"
|
||||
#include "codegen.h"
|
||||
#include "symtab.h"
|
||||
#include "../runtime/vm.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
// ============================================================
|
||||
// Parser state
|
||||
// ============================================================
|
||||
|
||||
typedef struct {
|
||||
BasLexerT lex;
|
||||
BasCodeGenT cg;
|
||||
BasSymTabT sym;
|
||||
char error[512];
|
||||
bool hasError;
|
||||
int32_t errorLine;
|
||||
int32_t lastUdtTypeId; // index of last resolved UDT type from resolveTypeName
|
||||
int32_t optionBase; // default array lower bound (0 or 1)
|
||||
bool optionCompareText; // true = case-insensitive string comparison
|
||||
bool optionExplicit; // true = variables must be declared with DIM
|
||||
uint8_t defType[26]; // default type per letter (A-Z), set by DEFINT etc.
|
||||
char currentProc[BAS_MAX_TOKEN_LEN]; // name of current SUB/FUNCTION
|
||||
} BasParserT;
|
||||
|
||||
// ============================================================
|
||||
// API
|
||||
// ============================================================
|
||||
|
||||
// Initialize parser with source text.
|
||||
void basParserInit(BasParserT *p, const char *source, int32_t sourceLen);
|
||||
|
||||
// Parse the entire source and generate p-code.
|
||||
// Returns true on success, false on error (check p->error).
|
||||
bool basParse(BasParserT *p);
|
||||
|
||||
// Build a module from the parsed code. Returns NULL on error.
|
||||
// Caller owns the module and must free with basModuleFree().
|
||||
BasModuleT *basParserBuildModule(BasParserT *p);
|
||||
|
||||
// Free parser resources.
|
||||
void basParserFree(BasParserT *p);
|
||||
|
||||
#endif // DVXBASIC_PARSER_H
|
||||
147
dvxbasic/compiler/symtab.c
Normal file
147
dvxbasic/compiler/symtab.c
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
// symtab.c -- DVX BASIC symbol table implementation
|
||||
|
||||
#include "symtab.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <string.h>
|
||||
|
||||
// ============================================================
|
||||
// Case-insensitive name comparison
|
||||
// ============================================================
|
||||
|
||||
static bool namesEqual(const char *a, const char *b) {
|
||||
while (*a && *b) {
|
||||
char ca = *a >= 'a' && *a <= 'z' ? *a - 32 : *a;
|
||||
char cb = *b >= 'a' && *b <= 'z' ? *b - 32 : *b;
|
||||
|
||||
if (ca != cb) {
|
||||
return false;
|
||||
}
|
||||
|
||||
a++;
|
||||
b++;
|
||||
}
|
||||
|
||||
return *a == *b;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basSymTabAdd
|
||||
// ============================================================
|
||||
|
||||
BasSymbolT *basSymTabAdd(BasSymTabT *tab, const char *name, BasSymKindE kind, uint8_t dataType) {
|
||||
if (tab->count >= BAS_MAX_SYMBOLS) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Check for duplicate in current scope
|
||||
BasScopeE scope = tab->inLocalScope ? SCOPE_LOCAL : SCOPE_GLOBAL;
|
||||
|
||||
for (int32_t i = 0; i < tab->count; i++) {
|
||||
if (tab->symbols[i].scope == scope && namesEqual(tab->symbols[i].name, name)) {
|
||||
return NULL; // duplicate
|
||||
}
|
||||
}
|
||||
|
||||
BasSymbolT *sym = &tab->symbols[tab->count++];
|
||||
memset(sym, 0, sizeof(*sym));
|
||||
strncpy(sym->name, name, BAS_MAX_SYMBOL_NAME - 1);
|
||||
sym->name[BAS_MAX_SYMBOL_NAME - 1] = '\0';
|
||||
sym->kind = kind;
|
||||
sym->scope = scope;
|
||||
sym->dataType = dataType;
|
||||
sym->isDefined = true;
|
||||
|
||||
return sym;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basSymTabAllocSlot
|
||||
// ============================================================
|
||||
|
||||
int32_t basSymTabAllocSlot(BasSymTabT *tab) {
|
||||
if (tab->inLocalScope) {
|
||||
return tab->nextLocalIdx++;
|
||||
}
|
||||
|
||||
return tab->nextGlobalIdx++;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basSymTabEnterLocal
|
||||
// ============================================================
|
||||
|
||||
void basSymTabEnterLocal(BasSymTabT *tab) {
|
||||
tab->inLocalScope = true;
|
||||
tab->nextLocalIdx = 0;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basSymTabFind
|
||||
// ============================================================
|
||||
|
||||
BasSymbolT *basSymTabFind(BasSymTabT *tab, const char *name) {
|
||||
// Search local scope first
|
||||
if (tab->inLocalScope) {
|
||||
for (int32_t i = tab->count - 1; i >= 0; i--) {
|
||||
if (tab->symbols[i].scope == SCOPE_LOCAL && namesEqual(tab->symbols[i].name, name)) {
|
||||
return &tab->symbols[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Search global scope
|
||||
return basSymTabFindGlobal(tab, name);
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basSymTabFindGlobal
|
||||
// ============================================================
|
||||
|
||||
BasSymbolT *basSymTabFindGlobal(BasSymTabT *tab, const char *name) {
|
||||
for (int32_t i = 0; i < tab->count; i++) {
|
||||
if (tab->symbols[i].scope == SCOPE_GLOBAL && namesEqual(tab->symbols[i].name, name)) {
|
||||
return &tab->symbols[i];
|
||||
}
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basSymTabInit
|
||||
// ============================================================
|
||||
|
||||
void basSymTabInit(BasSymTabT *tab) {
|
||||
memset(tab, 0, sizeof(*tab));
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// basSymTabLeaveLocal
|
||||
// ============================================================
|
||||
|
||||
void basSymTabLeaveLocal(BasSymTabT *tab) {
|
||||
// Remove all local symbols
|
||||
int32_t newCount = 0;
|
||||
|
||||
for (int32_t i = 0; i < tab->count; i++) {
|
||||
if (tab->symbols[i].scope != SCOPE_LOCAL) {
|
||||
if (i != newCount) {
|
||||
tab->symbols[newCount] = tab->symbols[i];
|
||||
}
|
||||
|
||||
newCount++;
|
||||
}
|
||||
}
|
||||
|
||||
tab->count = newCount;
|
||||
tab->inLocalScope = false;
|
||||
tab->nextLocalIdx = 0;
|
||||
}
|
||||
129
dvxbasic/compiler/symtab.h
Normal file
129
dvxbasic/compiler/symtab.h
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
// symtab.h -- DVX BASIC symbol table
|
||||
//
|
||||
// Tracks variables, constants, subroutines, functions, and labels
|
||||
// during compilation. Supports nested scopes (global + one local
|
||||
// scope per SUB/FUNCTION).
|
||||
//
|
||||
// Embeddable: no DVX dependencies, pure C.
|
||||
|
||||
#ifndef DVXBASIC_SYMTAB_H
|
||||
#define DVXBASIC_SYMTAB_H
|
||||
|
||||
#include "../compiler/opcodes.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
// ============================================================
|
||||
// Symbol kinds
|
||||
// ============================================================
|
||||
|
||||
typedef enum {
|
||||
SYM_VARIABLE,
|
||||
SYM_CONST,
|
||||
SYM_SUB,
|
||||
SYM_FUNCTION,
|
||||
SYM_LABEL,
|
||||
SYM_TYPE_DEF // user-defined TYPE
|
||||
} BasSymKindE;
|
||||
|
||||
// ============================================================
|
||||
// Symbol scope
|
||||
// ============================================================
|
||||
|
||||
typedef enum {
|
||||
SCOPE_GLOBAL,
|
||||
SCOPE_LOCAL
|
||||
} BasScopeE;
|
||||
|
||||
// ============================================================
|
||||
// Symbol entry
|
||||
// ============================================================
|
||||
|
||||
#define BAS_MAX_SYMBOL_NAME 64
|
||||
#define BAS_MAX_PARAMS 16
|
||||
#define BAS_MAX_CALL_PATCHES 32
|
||||
#define BAS_MAX_UDT_FIELDS 32
|
||||
|
||||
// UDT field definition
|
||||
typedef struct {
|
||||
char name[BAS_MAX_SYMBOL_NAME];
|
||||
uint8_t dataType; // BAS_TYPE_*
|
||||
int32_t udtTypeId; // if dataType == BAS_TYPE_UDT, index of the TYPE_DEF symbol
|
||||
} BasFieldDefT;
|
||||
|
||||
typedef struct {
|
||||
char name[BAS_MAX_SYMBOL_NAME];
|
||||
BasSymKindE kind;
|
||||
BasScopeE scope;
|
||||
uint8_t dataType; // BAS_TYPE_* for variables/functions
|
||||
int32_t index; // slot index (local or global)
|
||||
int32_t codeAddr; // PC address for SUB/FUNCTION/LABEL
|
||||
bool isDefined; // false = forward-declared
|
||||
bool isArray;
|
||||
bool isShared;
|
||||
int32_t udtTypeId; // for variables of BAS_TYPE_UDT: index of TYPE_DEF symbol
|
||||
int32_t fixedLen; // for STRING * n: fixed length (0 = variable-length)
|
||||
|
||||
// For SUB/FUNCTION: parameter info
|
||||
int32_t paramCount;
|
||||
uint8_t paramTypes[BAS_MAX_PARAMS];
|
||||
bool paramByVal[BAS_MAX_PARAMS];
|
||||
|
||||
// Forward-reference backpatch list (code addresses to patch when defined)
|
||||
int32_t patchAddrs[BAS_MAX_CALL_PATCHES];
|
||||
int32_t patchCount;
|
||||
|
||||
// For CONST: the constant value
|
||||
union {
|
||||
int32_t constInt;
|
||||
double constDbl;
|
||||
};
|
||||
char constStr[256];
|
||||
|
||||
// For TYPE_DEF: field definitions
|
||||
BasFieldDefT fields[BAS_MAX_UDT_FIELDS];
|
||||
int32_t fieldCount;
|
||||
} BasSymbolT;
|
||||
|
||||
// ============================================================
|
||||
// Symbol table
|
||||
// ============================================================
|
||||
|
||||
#define BAS_MAX_SYMBOLS 512
|
||||
|
||||
typedef struct {
|
||||
BasSymbolT symbols[BAS_MAX_SYMBOLS];
|
||||
int32_t count;
|
||||
int32_t nextGlobalIdx; // next global variable slot
|
||||
int32_t nextLocalIdx; // next local variable slot (reset per SUB/FUNCTION)
|
||||
bool inLocalScope; // true when inside SUB/FUNCTION
|
||||
} BasSymTabT;
|
||||
|
||||
// ============================================================
|
||||
// API
|
||||
// ============================================================
|
||||
|
||||
void basSymTabInit(BasSymTabT *tab);
|
||||
|
||||
// Add a symbol. Returns the symbol pointer, or NULL if the table is full
|
||||
// or the name already exists in the current scope.
|
||||
BasSymbolT *basSymTabAdd(BasSymTabT *tab, const char *name, BasSymKindE kind, uint8_t dataType);
|
||||
|
||||
// Look up a symbol by name. Searches local scope first, then global.
|
||||
// Case-insensitive.
|
||||
BasSymbolT *basSymTabFind(BasSymTabT *tab, const char *name);
|
||||
|
||||
// Look up a symbol in the global scope only.
|
||||
BasSymbolT *basSymTabFindGlobal(BasSymTabT *tab, const char *name);
|
||||
|
||||
// Enter local scope (called at SUB/FUNCTION start).
|
||||
void basSymTabEnterLocal(BasSymTabT *tab);
|
||||
|
||||
// Leave local scope (called at END SUB/FUNCTION). Removes local symbols.
|
||||
void basSymTabLeaveLocal(BasSymTabT *tab);
|
||||
|
||||
// Allocate the next variable slot (global or local depending on scope).
|
||||
int32_t basSymTabAllocSlot(BasSymTabT *tab);
|
||||
|
||||
#endif // DVXBASIC_SYMTAB_H
|
||||
633
dvxbasic/runtime/values.c
Normal file
633
dvxbasic/runtime/values.c
Normal file
|
|
@ -0,0 +1,633 @@
|
|||
// values.c -- DVX BASIC value system implementation
|
||||
//
|
||||
// Tagged union values with reference-counted strings. The string
|
||||
// heap uses simple refcounting: assignment increments, scope exit
|
||||
// decrements, zero frees. No garbage collector needed.
|
||||
|
||||
#include "values.h"
|
||||
#include "../compiler/opcodes.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// ============================================================
|
||||
// String system
|
||||
// ============================================================
|
||||
|
||||
// Singleton empty string -- never freed, always available.
|
||||
// Extra byte for the null terminator via the struct hack.
|
||||
static struct {
|
||||
BasStringT hdr;
|
||||
char nul;
|
||||
} sEmptyStringStorage = { { .refCount = 999999, .len = 0, .cap = 1 }, '\0' };
|
||||
BasStringT *basEmptyString = &sEmptyStringStorage.hdr;
|
||||
|
||||
|
||||
BasStringT *basStringAlloc(int32_t cap) {
|
||||
if (cap < 1) {
|
||||
cap = 1;
|
||||
}
|
||||
|
||||
BasStringT *s = (BasStringT *)malloc(sizeof(BasStringT) + cap);
|
||||
|
||||
if (!s) {
|
||||
return basStringRef(basEmptyString);
|
||||
}
|
||||
|
||||
s->refCount = 1;
|
||||
s->len = 0;
|
||||
s->cap = cap;
|
||||
s->data[0] = '\0';
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
BasStringT *basStringConcat(const BasStringT *a, const BasStringT *b) {
|
||||
int32_t newLen = a->len + b->len;
|
||||
BasStringT *s = basStringAlloc(newLen + 1);
|
||||
memcpy(s->data, a->data, a->len);
|
||||
memcpy(s->data + a->len, b->data, b->len);
|
||||
s->data[newLen] = '\0';
|
||||
s->len = newLen;
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
int32_t basStringCompare(const BasStringT *a, const BasStringT *b) {
|
||||
int32_t minLen = a->len < b->len ? a->len : b->len;
|
||||
int32_t cmp = memcmp(a->data, b->data, minLen);
|
||||
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
|
||||
if (a->len < b->len) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (a->len > b->len) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int32_t basStringCompareCI(const BasStringT *a, const BasStringT *b) {
|
||||
int32_t minLen = a->len < b->len ? a->len : b->len;
|
||||
|
||||
for (int32_t i = 0; i < minLen; i++) {
|
||||
int32_t ca = toupper((unsigned char)a->data[i]);
|
||||
int32_t cb = toupper((unsigned char)b->data[i]);
|
||||
|
||||
if (ca != cb) {
|
||||
return ca - cb;
|
||||
}
|
||||
}
|
||||
|
||||
if (a->len < b->len) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (a->len > b->len) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
BasStringT *basStringNew(const char *text, int32_t len) {
|
||||
if (!text || len <= 0) {
|
||||
return basStringRef(basEmptyString);
|
||||
}
|
||||
|
||||
BasStringT *s = basStringAlloc(len + 1);
|
||||
memcpy(s->data, text, len);
|
||||
s->data[len] = '\0';
|
||||
s->len = len;
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
BasStringT *basStringRef(BasStringT *s) {
|
||||
if (s) {
|
||||
s->refCount++;
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
|
||||
BasStringT *basStringSub(const BasStringT *s, int32_t start, int32_t len) {
|
||||
if (start < 0) {
|
||||
start = 0;
|
||||
}
|
||||
|
||||
if (start >= s->len) {
|
||||
return basStringRef(basEmptyString);
|
||||
}
|
||||
|
||||
if (len < 0 || start + len > s->len) {
|
||||
len = s->len - start;
|
||||
}
|
||||
|
||||
return basStringNew(s->data + start, len);
|
||||
}
|
||||
|
||||
|
||||
void basStringSystemInit(void) {
|
||||
sEmptyStringStorage.nul = '\0';
|
||||
}
|
||||
|
||||
|
||||
void basStringSystemShutdown(void) {
|
||||
// Nothing to do -- empty string is static
|
||||
}
|
||||
|
||||
|
||||
void basStringUnref(BasStringT *s) {
|
||||
if (!s || s == basEmptyString) {
|
||||
return;
|
||||
}
|
||||
|
||||
s->refCount--;
|
||||
|
||||
if (s->refCount <= 0) {
|
||||
free(s);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// Array system
|
||||
// ============================================================
|
||||
|
||||
void basArrayFree(BasArrayT *arr) {
|
||||
if (!arr) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (arr->elements) {
|
||||
for (int32_t i = 0; i < arr->totalElements; i++) {
|
||||
basValRelease(&arr->elements[i]);
|
||||
}
|
||||
|
||||
free(arr->elements);
|
||||
}
|
||||
|
||||
free(arr);
|
||||
}
|
||||
|
||||
|
||||
int32_t basArrayIndex(BasArrayT *arr, int32_t *indices, int32_t ndims) {
|
||||
if (!arr || ndims != arr->dims) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
int32_t flatIdx = 0;
|
||||
int32_t multiplier = 1;
|
||||
|
||||
// Row-major order: last dimension varies fastest
|
||||
for (int32_t d = ndims - 1; d >= 0; d--) {
|
||||
int32_t idx = indices[d] - arr->lbound[d];
|
||||
int32_t dimSize = arr->ubound[d] - arr->lbound[d] + 1;
|
||||
|
||||
if (idx < 0 || idx >= dimSize) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
flatIdx += idx * multiplier;
|
||||
multiplier *= dimSize;
|
||||
}
|
||||
|
||||
return flatIdx;
|
||||
}
|
||||
|
||||
|
||||
BasArrayT *basArrayNew(int32_t dims, int32_t *lbounds, int32_t *ubounds, uint8_t elementType) {
|
||||
if (dims < 1 || dims > BAS_ARRAY_MAX_DIMS) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
BasArrayT *arr = (BasArrayT *)calloc(1, sizeof(BasArrayT));
|
||||
|
||||
if (!arr) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
arr->refCount = 1;
|
||||
arr->elementType = elementType;
|
||||
arr->dims = dims;
|
||||
|
||||
int32_t total = 1;
|
||||
|
||||
for (int32_t d = 0; d < dims; d++) {
|
||||
arr->lbound[d] = lbounds[d];
|
||||
arr->ubound[d] = ubounds[d];
|
||||
int32_t dimSize = ubounds[d] - lbounds[d] + 1;
|
||||
|
||||
if (dimSize < 1) {
|
||||
free(arr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
total *= dimSize;
|
||||
}
|
||||
|
||||
arr->totalElements = total;
|
||||
arr->elements = (BasValueT *)calloc(total, sizeof(BasValueT));
|
||||
|
||||
if (!arr->elements) {
|
||||
free(arr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Initialize all elements to the default for the element type
|
||||
for (int32_t i = 0; i < total; i++) {
|
||||
arr->elements[i].type = elementType;
|
||||
}
|
||||
|
||||
return arr;
|
||||
}
|
||||
|
||||
|
||||
BasArrayT *basArrayRef(BasArrayT *arr) {
|
||||
if (arr) {
|
||||
arr->refCount++;
|
||||
}
|
||||
|
||||
return arr;
|
||||
}
|
||||
|
||||
|
||||
void basArrayUnref(BasArrayT *arr) {
|
||||
if (!arr) {
|
||||
return;
|
||||
}
|
||||
|
||||
arr->refCount--;
|
||||
|
||||
if (arr->refCount <= 0) {
|
||||
basArrayFree(arr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// UDT system
|
||||
// ============================================================
|
||||
|
||||
void basUdtFree(BasUdtT *udt) {
|
||||
if (!udt) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (udt->fields) {
|
||||
for (int32_t i = 0; i < udt->fieldCount; i++) {
|
||||
basValRelease(&udt->fields[i]);
|
||||
}
|
||||
|
||||
free(udt->fields);
|
||||
}
|
||||
|
||||
free(udt);
|
||||
}
|
||||
|
||||
|
||||
BasUdtT *basUdtNew(int32_t typeId, int32_t fieldCount) {
|
||||
BasUdtT *udt = (BasUdtT *)calloc(1, sizeof(BasUdtT));
|
||||
|
||||
if (!udt) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
udt->refCount = 1;
|
||||
udt->typeId = typeId;
|
||||
udt->fieldCount = fieldCount;
|
||||
udt->fields = (BasValueT *)calloc(fieldCount, sizeof(BasValueT));
|
||||
|
||||
if (!udt->fields) {
|
||||
free(udt);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return udt;
|
||||
}
|
||||
|
||||
|
||||
BasUdtT *basUdtRef(BasUdtT *udt) {
|
||||
if (udt) {
|
||||
udt->refCount++;
|
||||
}
|
||||
|
||||
return udt;
|
||||
}
|
||||
|
||||
|
||||
void basUdtUnref(BasUdtT *udt) {
|
||||
if (!udt) {
|
||||
return;
|
||||
}
|
||||
|
||||
udt->refCount--;
|
||||
|
||||
if (udt->refCount <= 0) {
|
||||
basUdtFree(udt);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// Value constructors
|
||||
// ============================================================
|
||||
|
||||
BasValueT basValBool(bool v) {
|
||||
BasValueT val;
|
||||
val.type = BAS_TYPE_BOOLEAN;
|
||||
val.boolVal = v ? -1 : 0;
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
BasValueT basValCopy(BasValueT v) {
|
||||
if (v.type == BAS_TYPE_STRING && v.strVal) {
|
||||
basStringRef(v.strVal);
|
||||
} else if (v.type == BAS_TYPE_ARRAY && v.arrVal) {
|
||||
basArrayRef(v.arrVal);
|
||||
} else if (v.type == BAS_TYPE_UDT && v.udtVal) {
|
||||
basUdtRef(v.udtVal);
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
|
||||
BasValueT basValDouble(double v) {
|
||||
BasValueT val;
|
||||
val.type = BAS_TYPE_DOUBLE;
|
||||
val.dblVal = v;
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
BasValueT basValInteger(int16_t v) {
|
||||
BasValueT val;
|
||||
val.type = BAS_TYPE_INTEGER;
|
||||
val.intVal = v;
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
BasValueT basValLong(int32_t v) {
|
||||
BasValueT val;
|
||||
val.type = BAS_TYPE_LONG;
|
||||
val.longVal = v;
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
BasValueT basValSingle(float v) {
|
||||
BasValueT val;
|
||||
val.type = BAS_TYPE_SINGLE;
|
||||
val.sngVal = v;
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
BasValueT basValString(BasStringT *s) {
|
||||
BasValueT val;
|
||||
val.type = BAS_TYPE_STRING;
|
||||
val.strVal = s ? basStringRef(s) : basStringRef(basEmptyString);
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
BasValueT basValStringFromC(const char *text) {
|
||||
BasValueT val;
|
||||
val.type = BAS_TYPE_STRING;
|
||||
val.strVal = basStringNew(text, text ? (int32_t)strlen(text) : 0);
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
void basValRelease(BasValueT *v) {
|
||||
if (v->type == BAS_TYPE_STRING) {
|
||||
basStringUnref(v->strVal);
|
||||
v->strVal = NULL;
|
||||
} else if (v->type == BAS_TYPE_ARRAY) {
|
||||
basArrayUnref(v->arrVal);
|
||||
v->arrVal = NULL;
|
||||
} else if (v->type == BAS_TYPE_UDT) {
|
||||
basUdtUnref(v->udtVal);
|
||||
v->udtVal = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// Type conversion
|
||||
// ============================================================
|
||||
|
||||
BasValueT basValToBool(BasValueT v) {
|
||||
return basValBool(basValIsTruthy(v));
|
||||
}
|
||||
|
||||
|
||||
BasValueT basValToDouble(BasValueT v) {
|
||||
return basValDouble(basValToNumber(v));
|
||||
}
|
||||
|
||||
|
||||
BasValueT basValToInteger(BasValueT v) {
|
||||
double n = basValToNumber(v);
|
||||
// Banker's rounding (round half to even)
|
||||
int32_t rounded = (int32_t)(n + (n > 0 ? 0.5 : -0.5));
|
||||
return basValInteger((int16_t)rounded);
|
||||
}
|
||||
|
||||
|
||||
BasValueT basValToLong(BasValueT v) {
|
||||
double n = basValToNumber(v);
|
||||
int32_t rounded = (int32_t)(n + (n > 0 ? 0.5 : -0.5));
|
||||
return basValLong(rounded);
|
||||
}
|
||||
|
||||
|
||||
double basValToNumber(BasValueT v) {
|
||||
switch (v.type) {
|
||||
case BAS_TYPE_INTEGER:
|
||||
return (double)v.intVal;
|
||||
|
||||
case BAS_TYPE_LONG:
|
||||
return (double)v.longVal;
|
||||
|
||||
case BAS_TYPE_SINGLE:
|
||||
return (double)v.sngVal;
|
||||
|
||||
case BAS_TYPE_DOUBLE:
|
||||
return v.dblVal;
|
||||
|
||||
case BAS_TYPE_BOOLEAN:
|
||||
return (double)v.boolVal;
|
||||
|
||||
case BAS_TYPE_STRING:
|
||||
if (v.strVal && v.strVal->len > 0) {
|
||||
return atof(v.strVal->data);
|
||||
}
|
||||
|
||||
return 0.0;
|
||||
|
||||
default:
|
||||
return 0.0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
BasValueT basValToSingle(BasValueT v) {
|
||||
return basValSingle((float)basValToNumber(v));
|
||||
}
|
||||
|
||||
|
||||
BasValueT basValToString(BasValueT v) {
|
||||
if (v.type == BAS_TYPE_STRING) {
|
||||
return basValCopy(v);
|
||||
}
|
||||
|
||||
BasStringT *s = basValFormatString(v);
|
||||
BasValueT result;
|
||||
result.type = BAS_TYPE_STRING;
|
||||
result.strVal = s;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
BasStringT *basValFormatString(BasValueT v) {
|
||||
char buf[64];
|
||||
|
||||
switch (v.type) {
|
||||
case BAS_TYPE_INTEGER:
|
||||
snprintf(buf, sizeof(buf), "%d", (int)v.intVal);
|
||||
return basStringNew(buf, (int32_t)strlen(buf));
|
||||
|
||||
case BAS_TYPE_LONG:
|
||||
snprintf(buf, sizeof(buf), "%ld", (long)v.longVal);
|
||||
return basStringNew(buf, (int32_t)strlen(buf));
|
||||
|
||||
case BAS_TYPE_SINGLE: {
|
||||
snprintf(buf, sizeof(buf), "%g", (double)v.sngVal);
|
||||
return basStringNew(buf, (int32_t)strlen(buf));
|
||||
}
|
||||
|
||||
case BAS_TYPE_DOUBLE:
|
||||
snprintf(buf, sizeof(buf), "%g", v.dblVal);
|
||||
return basStringNew(buf, (int32_t)strlen(buf));
|
||||
|
||||
case BAS_TYPE_BOOLEAN:
|
||||
return basStringNew(v.boolVal ? "True" : "False", v.boolVal ? 4 : 5);
|
||||
|
||||
case BAS_TYPE_STRING:
|
||||
return v.strVal ? basStringRef(v.strVal) : basStringRef(basEmptyString);
|
||||
|
||||
default:
|
||||
return basStringRef(basEmptyString);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool basValIsTruthy(BasValueT v) {
|
||||
switch (v.type) {
|
||||
case BAS_TYPE_INTEGER:
|
||||
return v.intVal != 0;
|
||||
|
||||
case BAS_TYPE_LONG:
|
||||
return v.longVal != 0;
|
||||
|
||||
case BAS_TYPE_SINGLE:
|
||||
return v.sngVal != 0.0f;
|
||||
|
||||
case BAS_TYPE_DOUBLE:
|
||||
return v.dblVal != 0.0;
|
||||
|
||||
case BAS_TYPE_BOOLEAN:
|
||||
return v.boolVal != 0;
|
||||
|
||||
case BAS_TYPE_STRING:
|
||||
return v.strVal && v.strVal->len > 0;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int32_t basValCompare(BasValueT a, BasValueT b) {
|
||||
// String comparison
|
||||
if (a.type == BAS_TYPE_STRING && b.type == BAS_TYPE_STRING) {
|
||||
return basStringCompare(a.strVal ? a.strVal : basEmptyString, b.strVal ? b.strVal : basEmptyString);
|
||||
}
|
||||
|
||||
// Numeric comparison
|
||||
double na = basValToNumber(a);
|
||||
double nb = basValToNumber(b);
|
||||
|
||||
if (na < nb) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (na > nb) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int32_t basValCompareCI(BasValueT a, BasValueT b) {
|
||||
// String comparison (case-insensitive)
|
||||
if (a.type == BAS_TYPE_STRING && b.type == BAS_TYPE_STRING) {
|
||||
return basStringCompareCI(a.strVal ? a.strVal : basEmptyString, b.strVal ? b.strVal : basEmptyString);
|
||||
}
|
||||
|
||||
// Numeric comparison (same as basValCompare)
|
||||
double na = basValToNumber(a);
|
||||
double nb = basValToNumber(b);
|
||||
|
||||
if (na < nb) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (na > nb) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
uint8_t basValPromoteType(uint8_t a, uint8_t b) {
|
||||
// String stays string (concat, not arithmetic)
|
||||
if (a == BAS_TYPE_STRING || b == BAS_TYPE_STRING) {
|
||||
return BAS_TYPE_STRING;
|
||||
}
|
||||
|
||||
// Double wins over everything
|
||||
if (a == BAS_TYPE_DOUBLE || b == BAS_TYPE_DOUBLE) {
|
||||
return BAS_TYPE_DOUBLE;
|
||||
}
|
||||
|
||||
// Single wins over integer/long
|
||||
if (a == BAS_TYPE_SINGLE || b == BAS_TYPE_SINGLE) {
|
||||
return BAS_TYPE_SINGLE;
|
||||
}
|
||||
|
||||
// Long wins over integer
|
||||
if (a == BAS_TYPE_LONG || b == BAS_TYPE_LONG) {
|
||||
return BAS_TYPE_LONG;
|
||||
}
|
||||
|
||||
return BAS_TYPE_INTEGER;
|
||||
}
|
||||
180
dvxbasic/runtime/values.h
Normal file
180
dvxbasic/runtime/values.h
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
// values.h -- DVX BASIC value representation and string heap
|
||||
//
|
||||
// Tagged union value type for the VM's evaluation stack, variables,
|
||||
// and array elements. Strings are reference-counted for automatic
|
||||
// memory management without a garbage collector.
|
||||
//
|
||||
// Embeddable: no DVX dependencies, pure C.
|
||||
|
||||
#ifndef DVXBASIC_VALUES_H
|
||||
#define DVXBASIC_VALUES_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
|
||||
// ============================================================
|
||||
// Reference-counted string
|
||||
// ============================================================
|
||||
|
||||
typedef struct {
|
||||
int32_t refCount;
|
||||
int32_t len;
|
||||
int32_t cap; // allocated capacity (>= len + 1)
|
||||
char data[]; // flexible array member, null-terminated
|
||||
} BasStringT;
|
||||
|
||||
// Allocate a new string from a C string. refCount starts at 1.
|
||||
BasStringT *basStringNew(const char *text, int32_t len);
|
||||
|
||||
// Allocate an empty string with a given capacity.
|
||||
BasStringT *basStringAlloc(int32_t cap);
|
||||
|
||||
// Increment reference count.
|
||||
BasStringT *basStringRef(BasStringT *s);
|
||||
|
||||
// Decrement reference count. Frees if count reaches zero.
|
||||
void basStringUnref(BasStringT *s);
|
||||
|
||||
// Concatenate two strings. Returns a new string (refCount 1).
|
||||
BasStringT *basStringConcat(const BasStringT *a, const BasStringT *b);
|
||||
|
||||
// Substring. Returns a new string (refCount 1).
|
||||
BasStringT *basStringSub(const BasStringT *s, int32_t start, int32_t len);
|
||||
|
||||
// Compare two strings. Returns <0, 0, >0 like strcmp.
|
||||
int32_t basStringCompare(const BasStringT *a, const BasStringT *b);
|
||||
|
||||
// Compare two strings case-insensitively. Returns <0, 0, >0.
|
||||
int32_t basStringCompareCI(const BasStringT *a, const BasStringT *b);
|
||||
|
||||
// The empty string singleton (never freed).
|
||||
extern BasStringT *basEmptyString;
|
||||
|
||||
// Initialize/shutdown the string system.
|
||||
void basStringSystemInit(void);
|
||||
void basStringSystemShutdown(void);
|
||||
|
||||
// ============================================================
|
||||
// Forward declarations
|
||||
// ============================================================
|
||||
|
||||
typedef struct BasValueTag BasValueT;
|
||||
|
||||
// ============================================================
|
||||
// Reference-counted array
|
||||
// ============================================================
|
||||
|
||||
#define BAS_ARRAY_MAX_DIMS 8
|
||||
|
||||
typedef struct {
|
||||
int32_t refCount;
|
||||
uint8_t elementType; // BAS_TYPE_*
|
||||
int32_t dims; // number of dimensions
|
||||
int32_t lbound[BAS_ARRAY_MAX_DIMS]; // lower bound per dimension
|
||||
int32_t ubound[BAS_ARRAY_MAX_DIMS]; // upper bound per dimension
|
||||
int32_t totalElements;
|
||||
BasValueT *elements; // flat array of values
|
||||
} BasArrayT;
|
||||
|
||||
// Allocate a new array. refCount starts at 1.
|
||||
BasArrayT *basArrayNew(int32_t dims, int32_t *lbounds, int32_t *ubounds, uint8_t elementType);
|
||||
|
||||
// Free all elements and release the array.
|
||||
void basArrayFree(BasArrayT *arr);
|
||||
|
||||
// Increment reference count.
|
||||
BasArrayT *basArrayRef(BasArrayT *arr);
|
||||
|
||||
// Decrement reference count. Frees if count reaches zero.
|
||||
void basArrayUnref(BasArrayT *arr);
|
||||
|
||||
// Compute flat index from multi-dimensional indices. Returns -1 if out of bounds.
|
||||
int32_t basArrayIndex(BasArrayT *arr, int32_t *indices, int32_t ndims);
|
||||
|
||||
// ============================================================
|
||||
// Reference-counted user-defined type instance
|
||||
// ============================================================
|
||||
|
||||
typedef struct {
|
||||
int32_t refCount;
|
||||
int32_t typeId; // index into type definition table
|
||||
int32_t fieldCount;
|
||||
BasValueT *fields; // array of field values
|
||||
} BasUdtT;
|
||||
|
||||
// Allocate a new UDT instance. refCount starts at 1.
|
||||
BasUdtT *basUdtNew(int32_t typeId, int32_t fieldCount);
|
||||
|
||||
// Free all fields and release the UDT.
|
||||
void basUdtFree(BasUdtT *udt);
|
||||
|
||||
// Increment reference count.
|
||||
BasUdtT *basUdtRef(BasUdtT *udt);
|
||||
|
||||
// Decrement reference count. Frees if count reaches zero.
|
||||
void basUdtUnref(BasUdtT *udt);
|
||||
|
||||
// ============================================================
|
||||
// Tagged value
|
||||
// ============================================================
|
||||
|
||||
struct BasValueTag {
|
||||
uint8_t type; // BAS_TYPE_*
|
||||
union {
|
||||
int16_t intVal; // BAS_TYPE_INTEGER
|
||||
int32_t longVal; // BAS_TYPE_LONG
|
||||
float sngVal; // BAS_TYPE_SINGLE
|
||||
double dblVal; // BAS_TYPE_DOUBLE
|
||||
BasStringT *strVal; // BAS_TYPE_STRING (ref-counted)
|
||||
int16_t boolVal; // BAS_TYPE_BOOLEAN (True=-1, False=0)
|
||||
BasArrayT *arrVal; // BAS_TYPE_ARRAY (ref-counted)
|
||||
BasUdtT *udtVal; // BAS_TYPE_UDT (ref-counted)
|
||||
};
|
||||
};
|
||||
|
||||
// Create values
|
||||
BasValueT basValInteger(int16_t v);
|
||||
BasValueT basValLong(int32_t v);
|
||||
BasValueT basValSingle(float v);
|
||||
BasValueT basValDouble(double v);
|
||||
BasValueT basValString(BasStringT *s);
|
||||
BasValueT basValStringFromC(const char *text);
|
||||
BasValueT basValBool(bool v);
|
||||
|
||||
// Copy a value (increments string refcount if applicable).
|
||||
BasValueT basValCopy(BasValueT v);
|
||||
|
||||
// Release a value (decrements string refcount if applicable).
|
||||
void basValRelease(BasValueT *v);
|
||||
|
||||
// Convert a value to a specific type. Returns the converted value.
|
||||
// The original is NOT released -- caller manages lifetime.
|
||||
BasValueT basValToInteger(BasValueT v);
|
||||
BasValueT basValToLong(BasValueT v);
|
||||
BasValueT basValToSingle(BasValueT v);
|
||||
BasValueT basValToDouble(BasValueT v);
|
||||
BasValueT basValToString(BasValueT v);
|
||||
BasValueT basValToBool(BasValueT v);
|
||||
|
||||
// Get the numeric value as a double (for mixed-type arithmetic).
|
||||
double basValToNumber(BasValueT v);
|
||||
|
||||
// Get the string representation. Returns a new ref-counted string.
|
||||
BasStringT *basValFormatString(BasValueT v);
|
||||
|
||||
// Check if a value is truthy (non-zero number, non-empty string).
|
||||
bool basValIsTruthy(BasValueT v);
|
||||
|
||||
// Compare two values. Returns -1, 0, or 1.
|
||||
// Numeric types are compared numerically. Strings lexicographically.
|
||||
int32_t basValCompare(BasValueT a, BasValueT b);
|
||||
|
||||
// Compare two values case-insensitively (for OPTION COMPARE TEXT).
|
||||
int32_t basValCompareCI(BasValueT a, BasValueT b);
|
||||
|
||||
// Determine the common type for a binary operation (type promotion).
|
||||
// Integer + Single -> Single, etc.
|
||||
uint8_t basValPromoteType(uint8_t a, uint8_t b);
|
||||
|
||||
#endif // DVXBASIC_VALUES_H
|
||||
3514
dvxbasic/runtime/vm.c
Normal file
3514
dvxbasic/runtime/vm.c
Normal file
File diff suppressed because it is too large
Load diff
211
dvxbasic/runtime/vm.h
Normal file
211
dvxbasic/runtime/vm.h
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
// vm.h -- DVX BASIC virtual machine
|
||||
//
|
||||
// Stack-based p-code interpreter. Executes compiled BASIC bytecode.
|
||||
// Embeddable: the host provides I/O callbacks. No DVX dependencies.
|
||||
//
|
||||
// Usage:
|
||||
// BasVmT *vm = basVmCreate();
|
||||
// basVmSetPrintCallback(vm, myPrintFn, myCtx);
|
||||
// basVmSetInputCallback(vm, myInputFn, myCtx);
|
||||
// basVmLoadModule(vm, compiledCode, codeLen, constants, numConsts);
|
||||
// BasVmResultE result = basVmRun(vm);
|
||||
// basVmDestroy(vm);
|
||||
|
||||
#ifndef DVXBASIC_VM_H
|
||||
#define DVXBASIC_VM_H
|
||||
|
||||
#include "values.h"
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
// ============================================================
|
||||
// Limits
|
||||
// ============================================================
|
||||
|
||||
#define BAS_VM_STACK_SIZE 256 // evaluation stack depth
|
||||
#define BAS_VM_CALL_STACK_SIZE 64 // max call nesting
|
||||
#define BAS_VM_MAX_GLOBALS 512 // global variable slots
|
||||
#define BAS_VM_MAX_LOCALS 64 // locals per stack frame
|
||||
#define BAS_VM_MAX_FOR_DEPTH 32 // nested FOR loops
|
||||
#define BAS_VM_MAX_FILES 16 // open file channels
|
||||
|
||||
// ============================================================
|
||||
// Result codes
|
||||
// ============================================================
|
||||
|
||||
typedef enum {
|
||||
BAS_VM_OK, // program completed normally
|
||||
BAS_VM_HALTED, // HALT instruction reached
|
||||
BAS_VM_YIELDED, // DoEvents yielded control
|
||||
BAS_VM_ERROR, // runtime error
|
||||
BAS_VM_STACK_OVERFLOW,
|
||||
BAS_VM_STACK_UNDERFLOW,
|
||||
BAS_VM_CALL_OVERFLOW,
|
||||
BAS_VM_DIV_BY_ZERO,
|
||||
BAS_VM_TYPE_MISMATCH,
|
||||
BAS_VM_OUT_OF_MEMORY,
|
||||
BAS_VM_BAD_OPCODE,
|
||||
BAS_VM_FILE_ERROR,
|
||||
BAS_VM_SUBSCRIPT_RANGE,
|
||||
BAS_VM_USER_ERROR // ON ERROR raised
|
||||
} BasVmResultE;
|
||||
|
||||
// ============================================================
|
||||
// I/O callbacks (host-provided)
|
||||
// ============================================================
|
||||
|
||||
// Print callback: called for PRINT output.
|
||||
// text is a null-terminated string. newline indicates whether
|
||||
// to advance to the next line after printing.
|
||||
typedef void (*BasPrintFnT)(void *ctx, const char *text, bool newline);
|
||||
|
||||
// Input callback: called for INPUT statement.
|
||||
// prompt is the text to display. The callback must fill buf
|
||||
// (up to bufSize-1 chars, null-terminated). Returns true on
|
||||
// success, false on cancel/error.
|
||||
typedef bool (*BasInputFnT)(void *ctx, const char *prompt, char *buf, int32_t bufSize);
|
||||
|
||||
// DoEvents callback: called for DoEvents statement.
|
||||
// The host should process pending events and return. Returns
|
||||
// true to continue execution, false to stop the program.
|
||||
typedef bool (*BasDoEventsFnT)(void *ctx);
|
||||
|
||||
// ============================================================
|
||||
// Call stack frame
|
||||
// ============================================================
|
||||
|
||||
typedef struct {
|
||||
int32_t returnPc; // instruction to return to
|
||||
int32_t baseSlot; // base index in locals array
|
||||
int32_t localCount; // number of locals in this frame
|
||||
BasValueT locals[BAS_VM_MAX_LOCALS];
|
||||
} BasCallFrameT;
|
||||
|
||||
// ============================================================
|
||||
// FOR loop state
|
||||
// ============================================================
|
||||
|
||||
typedef struct {
|
||||
int32_t varIdx; // loop variable slot index
|
||||
bool isLocal; // true = local, false = global
|
||||
BasValueT limit; // upper bound
|
||||
BasValueT step; // step value
|
||||
int32_t loopTop; // PC of the loop body start
|
||||
} BasForStateT;
|
||||
|
||||
// ============================================================
|
||||
// File channel
|
||||
// ============================================================
|
||||
|
||||
typedef struct {
|
||||
void *handle; // FILE* or platform-specific
|
||||
int32_t mode; // 0=closed, 1=input, 2=output, 3=append, 4=random, 5=binary
|
||||
} BasFileChannelT;
|
||||
|
||||
// ============================================================
|
||||
// Compiled module (output of the compiler)
|
||||
// ============================================================
|
||||
|
||||
typedef struct {
|
||||
uint8_t *code; // p-code bytecode
|
||||
int32_t codeLen;
|
||||
BasStringT **constants; // string constant pool
|
||||
int32_t constCount;
|
||||
int32_t globalCount; // number of global variable slots needed
|
||||
int32_t entryPoint; // PC of the first instruction (module-level code)
|
||||
BasValueT *dataPool; // DATA statement value pool
|
||||
int32_t dataCount; // number of values in the data pool
|
||||
} BasModuleT;
|
||||
|
||||
// ============================================================
|
||||
// VM state
|
||||
// ============================================================
|
||||
|
||||
typedef struct {
|
||||
// Program
|
||||
BasModuleT *module;
|
||||
|
||||
// Execution
|
||||
int32_t pc; // program counter
|
||||
bool running;
|
||||
bool yielded;
|
||||
|
||||
// Evaluation stack
|
||||
BasValueT stack[BAS_VM_STACK_SIZE];
|
||||
int32_t sp; // stack pointer (index of next free slot)
|
||||
|
||||
// Call stack
|
||||
BasCallFrameT callStack[BAS_VM_CALL_STACK_SIZE];
|
||||
int32_t callDepth;
|
||||
|
||||
// FOR loop stack
|
||||
BasForStateT forStack[BAS_VM_MAX_FOR_DEPTH];
|
||||
int32_t forDepth;
|
||||
|
||||
// Global variables
|
||||
BasValueT globals[BAS_VM_MAX_GLOBALS];
|
||||
|
||||
// File channels (1-based, index 0 unused)
|
||||
BasFileChannelT files[BAS_VM_MAX_FILES];
|
||||
|
||||
// DATA/READ pointer
|
||||
int32_t dataPtr; // current READ position in data pool
|
||||
|
||||
// String comparison mode
|
||||
bool compareTextMode; // true = case-insensitive comparisons
|
||||
|
||||
// Error handling
|
||||
int32_t errorHandler; // PC of ON ERROR GOTO handler (0 = none)
|
||||
int32_t errorNumber; // current Err number
|
||||
int32_t errorPc; // PC of the instruction that caused the error (for RESUME)
|
||||
int32_t errorNextPc; // PC of the next instruction after error (for RESUME NEXT)
|
||||
bool inErrorHandler; // true when executing error handler code
|
||||
char errorMsg[256]; // current error description
|
||||
|
||||
// I/O callbacks
|
||||
BasPrintFnT printFn;
|
||||
void *printCtx;
|
||||
BasInputFnT inputFn;
|
||||
void *inputCtx;
|
||||
BasDoEventsFnT doEventsFn;
|
||||
void *doEventsCtx;
|
||||
} BasVmT;
|
||||
|
||||
// ============================================================
|
||||
// API
|
||||
// ============================================================
|
||||
|
||||
// Create a new VM instance.
|
||||
BasVmT *basVmCreate(void);
|
||||
|
||||
// Destroy a VM instance and free all resources.
|
||||
void basVmDestroy(BasVmT *vm);
|
||||
|
||||
// Load a compiled module into the VM.
|
||||
void basVmLoadModule(BasVmT *vm, BasModuleT *module);
|
||||
|
||||
// Execute the loaded module. Returns when the program ends,
|
||||
// halts, yields, or hits an error.
|
||||
BasVmResultE basVmRun(BasVmT *vm);
|
||||
|
||||
// Execute a single instruction. Returns the result.
|
||||
// Useful for stepping/debugging.
|
||||
BasVmResultE basVmStep(BasVmT *vm);
|
||||
|
||||
// Reset the VM to initial state (clear stack, globals, PC).
|
||||
void basVmReset(BasVmT *vm);
|
||||
|
||||
// Set I/O callbacks.
|
||||
void basVmSetPrintCallback(BasVmT *vm, BasPrintFnT fn, void *ctx);
|
||||
void basVmSetInputCallback(BasVmT *vm, BasInputFnT fn, void *ctx);
|
||||
void basVmSetDoEventsCallback(BasVmT *vm, BasDoEventsFnT fn, void *ctx);
|
||||
|
||||
// Push/pop values on the evaluation stack (for host integration).
|
||||
bool basVmPush(BasVmT *vm, BasValueT val);
|
||||
bool basVmPop(BasVmT *vm, BasValueT *val);
|
||||
|
||||
// Get the current error message.
|
||||
const char *basVmGetError(const BasVmT *vm);
|
||||
|
||||
#endif // DVXBASIC_VM_H
|
||||
850
dvxbasic/test_compiler.c
Normal file
850
dvxbasic/test_compiler.c
Normal file
|
|
@ -0,0 +1,850 @@
|
|||
// test_compiler.c -- End-to-end test: source -> compiler -> VM -> output
|
||||
//
|
||||
// Build (native):
|
||||
// gcc -O2 -Wall -o test_compiler test_compiler.c \
|
||||
// compiler/lexer.c compiler/parser.c compiler/codegen.c compiler/symtab.c \
|
||||
// runtime/vm.c runtime/values.c -lm
|
||||
|
||||
#include "compiler/parser.h"
|
||||
#include "runtime/vm.h"
|
||||
#include "runtime/values.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
static void runProgram(const char *name, const char *source) {
|
||||
printf("=== %s ===\n", name);
|
||||
|
||||
int32_t len = (int32_t)strlen(source);
|
||||
|
||||
BasParserT parser;
|
||||
basParserInit(&parser, source, len);
|
||||
|
||||
if (!basParse(&parser)) {
|
||||
printf("COMPILE ERROR: %s\n\n", parser.error);
|
||||
basParserFree(&parser);
|
||||
return;
|
||||
}
|
||||
|
||||
BasModuleT *mod = basParserBuildModule(&parser);
|
||||
basParserFree(&parser);
|
||||
|
||||
if (!mod) {
|
||||
printf("MODULE BUILD FAILED\n\n");
|
||||
return;
|
||||
}
|
||||
|
||||
BasVmT *vm = basVmCreate();
|
||||
basVmLoadModule(vm, mod);
|
||||
|
||||
// Module-level code uses callStack[0] as implicit main frame
|
||||
vm->callStack[0].localCount = mod->globalCount > 64 ? 64 : mod->globalCount;
|
||||
vm->callDepth = 1;
|
||||
|
||||
BasVmResultE result = basVmRun(vm);
|
||||
|
||||
if (result != BAS_VM_HALTED && result != BAS_VM_OK) {
|
||||
printf("[VM error %d: %s]\n", result, basVmGetError(vm));
|
||||
}
|
||||
|
||||
basVmDestroy(vm);
|
||||
basModuleFree(mod);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
int main(void) {
|
||||
printf("DVX BASIC Compiler Tests\n");
|
||||
printf("========================\n\n");
|
||||
|
||||
basStringSystemInit();
|
||||
|
||||
// Test 1: Hello World
|
||||
runProgram("Hello World",
|
||||
"PRINT \"Hello, World!\"\n"
|
||||
);
|
||||
|
||||
// Test 2: Arithmetic
|
||||
runProgram("Arithmetic",
|
||||
"PRINT 2 + 3 * 4\n"
|
||||
"PRINT 10 \\ 3\n"
|
||||
"PRINT 10 MOD 3\n"
|
||||
"PRINT 2 ^ 8\n"
|
||||
);
|
||||
|
||||
// Test 3: String operations
|
||||
runProgram("String Ops",
|
||||
"DIM s AS STRING\n"
|
||||
"s = \"Hello, BASIC!\"\n"
|
||||
"PRINT s\n"
|
||||
"PRINT LEN(s)\n"
|
||||
"PRINT LEFT$(s, 5)\n"
|
||||
"PRINT RIGHT$(s, 6)\n"
|
||||
"PRINT MID$(s, 8, 5)\n"
|
||||
"PRINT UCASE$(s)\n"
|
||||
);
|
||||
|
||||
// Test 4: IF/THEN/ELSE
|
||||
runProgram("IF/THEN/ELSE",
|
||||
"DIM x AS INTEGER\n"
|
||||
"x = 42\n"
|
||||
"IF x > 100 THEN\n"
|
||||
" PRINT \"big\"\n"
|
||||
"ELSEIF x > 10 THEN\n"
|
||||
" PRINT \"medium\"\n"
|
||||
"ELSE\n"
|
||||
" PRINT \"small\"\n"
|
||||
"END IF\n"
|
||||
);
|
||||
|
||||
// Test 5: FOR loop
|
||||
runProgram("FOR Loop",
|
||||
"DIM i AS INTEGER\n"
|
||||
"FOR i = 1 TO 10\n"
|
||||
" PRINT i;\n"
|
||||
"NEXT i\n"
|
||||
"PRINT\n"
|
||||
);
|
||||
|
||||
// Test 6: DO/WHILE loop
|
||||
runProgram("DO/WHILE Loop",
|
||||
"DIM n AS INTEGER\n"
|
||||
"n = 1\n"
|
||||
"DO WHILE n <= 5\n"
|
||||
" PRINT n;\n"
|
||||
" n = n + 1\n"
|
||||
"LOOP\n"
|
||||
"PRINT\n"
|
||||
);
|
||||
|
||||
// Test 7: SUB and FUNCTION
|
||||
runProgram("SUB and FUNCTION",
|
||||
"DECLARE SUB Greet(name AS STRING)\n"
|
||||
"DECLARE FUNCTION Square(x AS INTEGER) AS INTEGER\n"
|
||||
"\n"
|
||||
"CALL Greet(\"World\")\n"
|
||||
"PRINT Square(7)\n"
|
||||
"\n"
|
||||
"SUB Greet(name AS STRING)\n"
|
||||
" PRINT \"Hello, \" & name & \"!\"\n"
|
||||
"END SUB\n"
|
||||
"\n"
|
||||
"FUNCTION Square(x AS INTEGER) AS INTEGER\n"
|
||||
" Square = x * x\n"
|
||||
"END FUNCTION\n"
|
||||
);
|
||||
|
||||
// Test 8: SELECT CASE
|
||||
runProgram("SELECT CASE",
|
||||
"DIM grade AS STRING\n"
|
||||
"grade = \"B\"\n"
|
||||
"SELECT CASE grade\n"
|
||||
" CASE \"A\"\n"
|
||||
" PRINT \"Excellent\"\n"
|
||||
" CASE \"B\", \"C\"\n"
|
||||
" PRINT \"Good\"\n"
|
||||
" CASE ELSE\n"
|
||||
" PRINT \"Other\"\n"
|
||||
"END SELECT\n"
|
||||
);
|
||||
|
||||
// Test 9: Fibonacci
|
||||
runProgram("Fibonacci",
|
||||
"DIM a AS INTEGER\n"
|
||||
"DIM b AS INTEGER\n"
|
||||
"DIM temp AS INTEGER\n"
|
||||
"DIM i AS INTEGER\n"
|
||||
"a = 0\n"
|
||||
"b = 1\n"
|
||||
"FOR i = 1 TO 10\n"
|
||||
" PRINT a;\n"
|
||||
" temp = a + b\n"
|
||||
" a = b\n"
|
||||
" b = temp\n"
|
||||
"NEXT i\n"
|
||||
"PRINT\n"
|
||||
);
|
||||
|
||||
// Test 10: Math functions
|
||||
runProgram("Math Functions",
|
||||
"PRINT ABS(-42)\n"
|
||||
"PRINT SQR(144)\n"
|
||||
"PRINT INT(3.7)\n"
|
||||
);
|
||||
|
||||
// Test 11: File I/O
|
||||
runProgram("File I/O",
|
||||
"OPEN \"/tmp/dvxbasic_test.txt\" FOR OUTPUT AS #1\n"
|
||||
"PRINT #1, \"Hello from BASIC!\"\n"
|
||||
"PRINT #1, \"Line two\"\n"
|
||||
"PRINT #1, \"42\"\n"
|
||||
"CLOSE #1\n"
|
||||
"\n"
|
||||
"DIM line$ AS STRING\n"
|
||||
"DIM count AS INTEGER\n"
|
||||
"count = 0\n"
|
||||
"OPEN \"/tmp/dvxbasic_test.txt\" FOR INPUT AS #1\n"
|
||||
"DO WHILE NOT EOF(#1)\n"
|
||||
" INPUT #1, line$\n"
|
||||
" PRINT line$\n"
|
||||
" count = count + 1\n"
|
||||
"LOOP\n"
|
||||
"CLOSE #1\n"
|
||||
"PRINT count;\n"
|
||||
"PRINT \"lines read\"\n"
|
||||
);
|
||||
|
||||
// Test 12: LINE INPUT# and APPEND
|
||||
runProgram("LINE INPUT and APPEND",
|
||||
"OPEN \"/tmp/dvxbasic_test2.txt\" FOR OUTPUT AS #2\n"
|
||||
"PRINT #2, \"First line\"\n"
|
||||
"CLOSE #2\n"
|
||||
"\n"
|
||||
"OPEN \"/tmp/dvxbasic_test2.txt\" FOR APPEND AS #2\n"
|
||||
"PRINT #2, \"Appended line\"\n"
|
||||
"CLOSE #2\n"
|
||||
"\n"
|
||||
"DIM s$ AS STRING\n"
|
||||
"OPEN \"/tmp/dvxbasic_test2.txt\" FOR INPUT AS #2\n"
|
||||
"LINE INPUT #2, s$\n"
|
||||
"PRINT s$\n"
|
||||
"LINE INPUT #2, s$\n"
|
||||
"PRINT s$\n"
|
||||
"CLOSE #2\n"
|
||||
);
|
||||
|
||||
// Test 13: Array -- 1D with default lbound=0
|
||||
runProgram("1D Array",
|
||||
"DIM arr(5) AS INTEGER\n"
|
||||
"DIM i AS INTEGER\n"
|
||||
"FOR i = 1 TO 5\n"
|
||||
" arr(i) = i * i\n"
|
||||
"NEXT i\n"
|
||||
"FOR i = 1 TO 5\n"
|
||||
" PRINT arr(i);\n"
|
||||
"NEXT i\n"
|
||||
"PRINT\n"
|
||||
);
|
||||
// Expected: 1 4 9 16 25
|
||||
|
||||
// Test 14: Multi-dimensional array
|
||||
runProgram("Multi-dim Array",
|
||||
"DIM m(2, 2) AS INTEGER\n"
|
||||
"m(1, 1) = 11\n"
|
||||
"m(1, 2) = 12\n"
|
||||
"m(2, 1) = 21\n"
|
||||
"m(2, 2) = 22\n"
|
||||
"PRINT m(1, 1); m(1, 2); m(2, 1); m(2, 2)\n"
|
||||
);
|
||||
// Expected: 11 12 21 22
|
||||
|
||||
// Test 15: Array with explicit bounds (TO syntax)
|
||||
runProgram("Array with TO bounds",
|
||||
"DIM a(1 TO 3) AS INTEGER\n"
|
||||
"a(1) = 10\n"
|
||||
"a(2) = 20\n"
|
||||
"a(3) = 30\n"
|
||||
"PRINT a(1); a(2); a(3)\n"
|
||||
);
|
||||
// Expected: 10 20 30
|
||||
|
||||
// Test 16: LBOUND and UBOUND
|
||||
runProgram("LBOUND/UBOUND",
|
||||
"DIM a(5 TO 10) AS INTEGER\n"
|
||||
"PRINT LBOUND(a); UBOUND(a)\n"
|
||||
);
|
||||
// Expected: 5 10
|
||||
|
||||
// Test 17: User-defined TYPE
|
||||
runProgram("TYPE",
|
||||
"TYPE Point\n"
|
||||
" x AS INTEGER\n"
|
||||
" y AS INTEGER\n"
|
||||
"END TYPE\n"
|
||||
"DIM p AS Point\n"
|
||||
"p.x = 10\n"
|
||||
"p.y = 20\n"
|
||||
"PRINT p.x; p.y\n"
|
||||
);
|
||||
// Expected: 10 20
|
||||
|
||||
// Test 18: String array
|
||||
runProgram("String Array",
|
||||
"DIM names(3) AS STRING\n"
|
||||
"names(0) = \"Alice\"\n"
|
||||
"names(1) = \"Bob\"\n"
|
||||
"names(2) = \"Charlie\"\n"
|
||||
"DIM i AS INTEGER\n"
|
||||
"FOR i = 0 TO 2\n"
|
||||
" PRINT names(i)\n"
|
||||
"NEXT i\n"
|
||||
);
|
||||
// Expected: Alice / Bob / Charlie
|
||||
|
||||
// Test 19: REDIM with PRESERVE
|
||||
runProgram("REDIM PRESERVE",
|
||||
"DIM a(3) AS INTEGER\n"
|
||||
"a(0) = 100\n"
|
||||
"a(1) = 200\n"
|
||||
"a(2) = 300\n"
|
||||
"REDIM PRESERVE a(5) AS INTEGER\n"
|
||||
"a(4) = 500\n"
|
||||
"PRINT a(0); a(1); a(2); a(4)\n"
|
||||
);
|
||||
// Expected: 100 200 300 500
|
||||
|
||||
// Test 20: ERASE
|
||||
runProgram("ERASE",
|
||||
"DIM a(3) AS INTEGER\n"
|
||||
"a(1) = 42\n"
|
||||
"ERASE a\n"
|
||||
"DIM b(2) AS INTEGER\n"
|
||||
"b(1) = 99\n"
|
||||
"PRINT b(1)\n"
|
||||
);
|
||||
// Expected: 99
|
||||
|
||||
// Test 21: Array in FOR loop accumulation
|
||||
runProgram("Array Accumulation",
|
||||
"DIM sums(5) AS INTEGER\n"
|
||||
"DIM i AS INTEGER\n"
|
||||
"DIM j AS INTEGER\n"
|
||||
"FOR i = 1 TO 5\n"
|
||||
" sums(i) = 0\n"
|
||||
" FOR j = 1 TO i\n"
|
||||
" sums(i) = sums(i) + j\n"
|
||||
" NEXT j\n"
|
||||
"NEXT i\n"
|
||||
"FOR i = 1 TO 5\n"
|
||||
" PRINT sums(i);\n"
|
||||
"NEXT i\n"
|
||||
"PRINT\n"
|
||||
);
|
||||
// Expected: 1 3 6 10 15
|
||||
|
||||
// ============================================================
|
||||
// Batch 1: Control Flow
|
||||
// ============================================================
|
||||
|
||||
// Test: GOTO with forward jump
|
||||
runProgram("GOTO Forward",
|
||||
"PRINT \"before\"\n"
|
||||
"GOTO skip\n"
|
||||
"PRINT \"skipped\"\n"
|
||||
"skip:\n"
|
||||
"PRINT \"after\"\n"
|
||||
);
|
||||
// Expected: before / after
|
||||
|
||||
// Test: GOTO with backward jump
|
||||
runProgram("GOTO Backward",
|
||||
"DIM n AS INTEGER\n"
|
||||
"n = 0\n"
|
||||
"top:\n"
|
||||
"n = n + 1\n"
|
||||
"IF n < 5 THEN GOTO top\n"
|
||||
"PRINT n\n"
|
||||
);
|
||||
// Expected: 5
|
||||
|
||||
// Test: GOSUB/RETURN
|
||||
runProgram("GOSUB/RETURN",
|
||||
"DIM x AS INTEGER\n"
|
||||
"x = 10\n"
|
||||
"GOSUB dbl\n"
|
||||
"PRINT x\n"
|
||||
"END\n"
|
||||
"dbl:\n"
|
||||
"x = x * 2\n"
|
||||
"RETURN\n"
|
||||
);
|
||||
// Expected: 20
|
||||
|
||||
// Test: ON ERROR GOTO -- verify error handler catches errors
|
||||
// and ERR returns the error number
|
||||
runProgram("ON ERROR GOTO",
|
||||
"ON ERROR GOTO handler\n"
|
||||
"PRINT 10 / 0\n"
|
||||
"END\n"
|
||||
"handler:\n"
|
||||
"PRINT \"caught\"\n"
|
||||
"PRINT ERR\n"
|
||||
);
|
||||
// Expected: caught / 11
|
||||
|
||||
// Test: Single-line IF
|
||||
runProgram("Single-line IF",
|
||||
"DIM x AS INTEGER\n"
|
||||
"x = 42\n"
|
||||
"IF x > 10 THEN PRINT \"big\"\n"
|
||||
"IF x < 10 THEN PRINT \"small\"\n"
|
||||
"IF x = 42 THEN PRINT \"exact\" ELSE PRINT \"nope\"\n"
|
||||
);
|
||||
// Expected: big / exact
|
||||
|
||||
// Test: Multi-statement line with :
|
||||
runProgram("Multi-statement :",
|
||||
"DIM x AS INTEGER\n"
|
||||
"DIM y AS INTEGER\n"
|
||||
"x = 1 : y = 2 : PRINT x + y\n"
|
||||
);
|
||||
// Expected: 3
|
||||
|
||||
// ============================================================
|
||||
// Batch 2: Misc Features
|
||||
// ============================================================
|
||||
|
||||
// Test: SWAP
|
||||
runProgram("SWAP",
|
||||
"DIM a AS INTEGER\n"
|
||||
"DIM b AS INTEGER\n"
|
||||
"a = 10\n"
|
||||
"b = 20\n"
|
||||
"SWAP a, b\n"
|
||||
"PRINT a;\n"
|
||||
"PRINT b\n"
|
||||
);
|
||||
// Expected: 20 10
|
||||
|
||||
// Test: TIMER (returns number > 0)
|
||||
runProgram("TIMER",
|
||||
"DIM t AS DOUBLE\n"
|
||||
"t = TIMER\n"
|
||||
"IF t > 0 THEN PRINT \"ok\"\n"
|
||||
);
|
||||
// Expected: ok
|
||||
|
||||
// Test: DATE$ (returns non-empty string)
|
||||
runProgram("DATE$",
|
||||
"DIM d$ AS STRING\n"
|
||||
"d$ = DATE$\n"
|
||||
"IF LEN(d$) > 0 THEN PRINT \"ok\"\n"
|
||||
);
|
||||
// Expected: ok
|
||||
|
||||
// Test: TIME$ (returns non-empty string)
|
||||
runProgram("TIME$",
|
||||
"DIM t$ AS STRING\n"
|
||||
"t$ = TIME$\n"
|
||||
"IF LEN(t$) > 0 THEN PRINT \"ok\"\n"
|
||||
);
|
||||
// Expected: ok
|
||||
|
||||
// Test: ENVIRON$
|
||||
runProgram("ENVIRON$",
|
||||
"DIM p$ AS STRING\n"
|
||||
"p$ = ENVIRON$(\"HOME\")\n"
|
||||
"IF LEN(p$) > 0 THEN PRINT \"ok\"\n"
|
||||
);
|
||||
// Expected: ok
|
||||
|
||||
// ============================================================
|
||||
// Batch 3: New features (DATA/READ/RESTORE, DIM SHARED,
|
||||
// STATIC, DEF FN, OPTION BASE)
|
||||
// ============================================================
|
||||
|
||||
// Test: DATA/READ/RESTORE
|
||||
runProgram("DATA/READ/RESTORE",
|
||||
"DATA 10, 20, \"hello\"\n"
|
||||
"DIM a AS INTEGER\n"
|
||||
"DIM b AS INTEGER\n"
|
||||
"DIM c AS STRING\n"
|
||||
"READ a, b, c\n"
|
||||
"PRINT a; b;\n"
|
||||
"PRINT c\n"
|
||||
"RESTORE\n"
|
||||
"READ a\n"
|
||||
"PRINT a\n"
|
||||
);
|
||||
// Expected: 10 20 hello / 10
|
||||
|
||||
// Test: DIM SHARED
|
||||
runProgram("DIM SHARED",
|
||||
"DIM SHARED count AS INTEGER\n"
|
||||
"count = 0\n"
|
||||
"CALL Increment\n"
|
||||
"CALL Increment\n"
|
||||
"CALL Increment\n"
|
||||
"PRINT count\n"
|
||||
"SUB Increment\n"
|
||||
" count = count + 1\n"
|
||||
"END SUB\n"
|
||||
);
|
||||
// Expected: 3
|
||||
|
||||
// Test: STATIC
|
||||
runProgram("STATIC",
|
||||
"CALL Counter\n"
|
||||
"CALL Counter\n"
|
||||
"CALL Counter\n"
|
||||
"SUB Counter\n"
|
||||
" STATIC n AS INTEGER\n"
|
||||
" n = n + 1\n"
|
||||
" PRINT n;\n"
|
||||
"END SUB\n"
|
||||
"PRINT\n"
|
||||
);
|
||||
// Expected: 1 2 3
|
||||
|
||||
// Test: DEF FN
|
||||
runProgram("DEF FN",
|
||||
"DEF FNdouble(x AS INTEGER) = x * 2\n"
|
||||
"PRINT FNdouble(5)\n"
|
||||
"PRINT FNdouble(21)\n"
|
||||
);
|
||||
// Expected: 10 / 42
|
||||
|
||||
// Test: OPTION BASE
|
||||
runProgram("OPTION BASE",
|
||||
"OPTION BASE 1\n"
|
||||
"DIM arr(3) AS INTEGER\n"
|
||||
"arr(1) = 10\n"
|
||||
"arr(3) = 30\n"
|
||||
"PRINT arr(1); arr(3)\n"
|
||||
);
|
||||
// Expected: 10 30
|
||||
|
||||
// Test: DATA with mixed types
|
||||
runProgram("DATA mixed types",
|
||||
"DATA 100, 3.14, \"world\"\n"
|
||||
"DIM x AS INTEGER\n"
|
||||
"DIM y AS DOUBLE\n"
|
||||
"DIM z AS STRING\n"
|
||||
"READ x, y, z\n"
|
||||
"PRINT x\n"
|
||||
"PRINT z\n"
|
||||
);
|
||||
// Expected: 100 / world
|
||||
|
||||
// Test: Multiple DATA statements scattered
|
||||
runProgram("DATA scattered",
|
||||
"DIM a AS INTEGER\n"
|
||||
"DIM b AS INTEGER\n"
|
||||
"DIM c AS INTEGER\n"
|
||||
"DATA 1, 2\n"
|
||||
"READ a, b\n"
|
||||
"DATA 3\n"
|
||||
"READ c\n"
|
||||
"PRINT a; b; c\n"
|
||||
);
|
||||
// Expected: 1 2 3
|
||||
|
||||
// Test: DIM SHARED with SUB modifying shared variable
|
||||
runProgram("DIM SHARED multi",
|
||||
"DIM SHARED total AS INTEGER\n"
|
||||
"DIM SHARED msg AS STRING\n"
|
||||
"total = 100\n"
|
||||
"msg = \"start\"\n"
|
||||
"CALL Modify\n"
|
||||
"PRINT total\n"
|
||||
"PRINT msg\n"
|
||||
"SUB Modify\n"
|
||||
" total = total + 50\n"
|
||||
" msg = \"done\"\n"
|
||||
"END SUB\n"
|
||||
);
|
||||
// Expected: 150 / done
|
||||
|
||||
// ============================================================
|
||||
// Batch 4: New I/O and string features
|
||||
// ============================================================
|
||||
|
||||
// Test: WRITE #
|
||||
runProgram("WRITE #",
|
||||
"OPEN \"/tmp/dvxbasic_write.txt\" FOR OUTPUT AS #1\n"
|
||||
"WRITE #1, 10, \"hello\", 3.14\n"
|
||||
"CLOSE #1\n"
|
||||
"OPEN \"/tmp/dvxbasic_write.txt\" FOR INPUT AS #1\n"
|
||||
"DIM s AS STRING\n"
|
||||
"LINE INPUT #1, s\n"
|
||||
"PRINT s\n"
|
||||
"CLOSE #1\n"
|
||||
);
|
||||
// Expected: 10,"hello",3.14
|
||||
|
||||
// Test: FREEFILE
|
||||
runProgram("FREEFILE",
|
||||
"DIM f AS INTEGER\n"
|
||||
"f = FREEFILE\n"
|
||||
"PRINT f\n"
|
||||
);
|
||||
// Expected: 1
|
||||
|
||||
// Test: PRINT USING numeric
|
||||
runProgram("PRINT USING numeric",
|
||||
"PRINT USING \"###.##\"; 3.14159\n"
|
||||
);
|
||||
// Expected: 3.14
|
||||
|
||||
// Test: PRINT USING string
|
||||
runProgram("PRINT USING string",
|
||||
"PRINT USING \"!\"; \"Hello\"\n"
|
||||
);
|
||||
// Expected: H
|
||||
|
||||
// Test: SPC and TAB in PRINT
|
||||
runProgram("SPC/TAB",
|
||||
"PRINT SPC(3); \"hi\"\n"
|
||||
);
|
||||
// Expected: hi
|
||||
|
||||
// Test: Fixed-length string
|
||||
runProgram("STRING * n",
|
||||
"DIM s AS STRING * 5\n"
|
||||
"s = \"Hi\"\n"
|
||||
"PRINT \"[\" & s & \"]\"\n"
|
||||
"PRINT LEN(s)\n"
|
||||
);
|
||||
// Expected: [Hi ] / 5
|
||||
|
||||
// Test: MID$ statement
|
||||
runProgram("MID$ statement",
|
||||
"DIM s AS STRING\n"
|
||||
"s = \"Hello World\"\n"
|
||||
"MID$(s, 7, 5) = \"BASIC\"\n"
|
||||
"PRINT s\n"
|
||||
);
|
||||
// Expected: Hello BASIC
|
||||
|
||||
// Test: OPEN FOR BINARY / GET / PUT
|
||||
runProgram("BINARY GET/PUT",
|
||||
"DIM v AS INTEGER\n"
|
||||
"OPEN \"/tmp/dvxbasic_bin.tmp\" FOR BINARY AS #1\n"
|
||||
"v = 12345\n"
|
||||
"PUT #1, , v\n"
|
||||
"SEEK #1, 1\n"
|
||||
"DIM r AS INTEGER\n"
|
||||
"GET #1, , r\n"
|
||||
"PRINT r\n"
|
||||
"CLOSE #1\n"
|
||||
);
|
||||
// Expected: 12345
|
||||
|
||||
// Test: LOF and LOC
|
||||
runProgram("LOF/LOC",
|
||||
"OPEN \"/tmp/dvxbasic_lof.txt\" FOR OUTPUT AS #1\n"
|
||||
"PRINT #1, \"test\"\n"
|
||||
"CLOSE #1\n"
|
||||
"OPEN \"/tmp/dvxbasic_lof.txt\" FOR INPUT AS #1\n"
|
||||
"DIM sz AS LONG\n"
|
||||
"sz = LOF(1)\n"
|
||||
"IF sz > 0 THEN PRINT \"ok\"\n"
|
||||
"CLOSE #1\n"
|
||||
);
|
||||
// Expected: ok
|
||||
|
||||
// Test: INPUT$(n, #channel)
|
||||
runProgram("INPUT$",
|
||||
"OPEN \"/tmp/dvxbasic_inp.txt\" FOR OUTPUT AS #1\n"
|
||||
"PRINT #1, \"ABCDEF\"\n"
|
||||
"CLOSE #1\n"
|
||||
"OPEN \"/tmp/dvxbasic_inp.txt\" FOR INPUT AS #1\n"
|
||||
"DIM s AS STRING\n"
|
||||
"s = INPUT$(3, #1)\n"
|
||||
"PRINT s\n"
|
||||
"CLOSE #1\n"
|
||||
);
|
||||
// Expected: ABC
|
||||
|
||||
// Test: SEEK function form
|
||||
runProgram("SEEK function",
|
||||
"OPEN \"/tmp/dvxbasic_seek.txt\" FOR OUTPUT AS #1\n"
|
||||
"PRINT #1, \"test\"\n"
|
||||
"CLOSE #1\n"
|
||||
"OPEN \"/tmp/dvxbasic_seek.txt\" FOR BINARY AS #1\n"
|
||||
"DIM p AS LONG\n"
|
||||
"p = SEEK(1)\n"
|
||||
"IF p = 1 THEN PRINT \"ok\"\n"
|
||||
"CLOSE #1\n"
|
||||
);
|
||||
// Expected: ok
|
||||
|
||||
// Test: ON n GOTO
|
||||
runProgram("ON n GOTO",
|
||||
"DIM n AS INTEGER\n"
|
||||
"n = 2\n"
|
||||
"ON n GOTO ten, twenty, thirty\n"
|
||||
"PRINT \"none\"\n"
|
||||
"GOTO done\n"
|
||||
"ten:\n"
|
||||
"PRINT \"ten\"\n"
|
||||
"GOTO done\n"
|
||||
"twenty:\n"
|
||||
"PRINT \"twenty\"\n"
|
||||
"GOTO done\n"
|
||||
"thirty:\n"
|
||||
"PRINT \"thirty\"\n"
|
||||
"done:\n"
|
||||
);
|
||||
// Expected: twenty
|
||||
|
||||
// Test: ON n GOTO (no match)
|
||||
runProgram("ON n GOTO no match",
|
||||
"DIM n AS INTEGER\n"
|
||||
"n = 5\n"
|
||||
"ON n GOTO aa, bb\n"
|
||||
"PRINT \"fallthrough\"\n"
|
||||
"GOTO done2\n"
|
||||
"aa:\n"
|
||||
"PRINT \"aa\"\n"
|
||||
"GOTO done2\n"
|
||||
"bb:\n"
|
||||
"PRINT \"bb\"\n"
|
||||
"done2:\n"
|
||||
);
|
||||
// Expected: fallthrough
|
||||
|
||||
// Test: ON n GOSUB
|
||||
runProgram("ON n GOSUB",
|
||||
"DIM n AS INTEGER\n"
|
||||
"DIM result AS INTEGER\n"
|
||||
"result = 0\n"
|
||||
"n = 2\n"
|
||||
"ON n GOSUB addTen, addTwenty, addThirty\n"
|
||||
"PRINT result\n"
|
||||
"GOTO endProg\n"
|
||||
"addTen:\n"
|
||||
"result = result + 10\n"
|
||||
"RETURN\n"
|
||||
"addTwenty:\n"
|
||||
"result = result + 20\n"
|
||||
"RETURN\n"
|
||||
"addThirty:\n"
|
||||
"result = result + 30\n"
|
||||
"RETURN\n"
|
||||
"endProg:\n"
|
||||
);
|
||||
// Expected: 20
|
||||
|
||||
// Test: FORMAT$
|
||||
runProgram("FORMAT$",
|
||||
"PRINT FORMAT$(1234.5, \"#,##0.00\")\n"
|
||||
"PRINT FORMAT$(0.5, \"0.00\")\n"
|
||||
"PRINT FORMAT$(-42, \"+#0\")\n"
|
||||
"PRINT FORMAT$(0.75, \"percent\")\n"
|
||||
);
|
||||
// Expected: 1,234.50\n0.50\n-42\n75%
|
||||
|
||||
// Test: SHELL as function expression
|
||||
runProgram("SHELL function",
|
||||
"DIM r AS INTEGER\n"
|
||||
"r = SHELL(\"echo hello > /dev/null\")\n"
|
||||
"IF r = 0 THEN PRINT \"ok\"\n"
|
||||
);
|
||||
// Expected: ok
|
||||
|
||||
// Test: SHELL as statement
|
||||
runProgram("SHELL statement",
|
||||
"SHELL \"echo hello > /dev/null\"\n"
|
||||
"PRINT \"done\"\n"
|
||||
);
|
||||
// Expected: done
|
||||
|
||||
// Test: OPTION COMPARE TEXT
|
||||
runProgram("OPTION COMPARE TEXT",
|
||||
"OPTION COMPARE TEXT\n"
|
||||
"IF \"hello\" = \"HELLO\" THEN\n"
|
||||
" PRINT \"equal\"\n"
|
||||
"ELSE\n"
|
||||
" PRINT \"not equal\"\n"
|
||||
"END IF\n"
|
||||
"IF \"abc\" < \"XYZ\" THEN\n"
|
||||
" PRINT \"less\"\n"
|
||||
"END IF\n"
|
||||
);
|
||||
// Expected: equal\nless
|
||||
|
||||
// Test: OPTION COMPARE BINARY (default)
|
||||
runProgram("OPTION COMPARE BINARY",
|
||||
"OPTION COMPARE BINARY\n"
|
||||
"IF \"hello\" = \"HELLO\" THEN\n"
|
||||
" PRINT \"equal\"\n"
|
||||
"ELSE\n"
|
||||
" PRINT \"not equal\"\n"
|
||||
"END IF\n"
|
||||
);
|
||||
// Expected: not equal
|
||||
|
||||
// Test: EQV operator
|
||||
runProgram("EQV operator",
|
||||
"PRINT -1 EQV -1\n"
|
||||
"PRINT 0 EQV 0\n"
|
||||
"PRINT -1 EQV 0\n"
|
||||
"PRINT 0 EQV -1\n"
|
||||
);
|
||||
// Expected: -1\n-1\n0\n0
|
||||
|
||||
// Test: IMP operator
|
||||
runProgram("IMP operator",
|
||||
"PRINT 0 IMP -1\n"
|
||||
"PRINT -1 IMP 0\n"
|
||||
"PRINT -1 IMP -1\n"
|
||||
"PRINT 0 IMP 0\n"
|
||||
);
|
||||
// Expected: -1\n0\n-1\n-1
|
||||
|
||||
// Test: PRINT USING advanced patterns
|
||||
runProgram("PRINT USING advanced",
|
||||
"PRINT USING \"**#,##0.00\"; 1234.5\n"
|
||||
"PRINT USING \"$$#,##0.00\"; 42.5\n"
|
||||
"PRINT USING \"+###.##\"; 42.5\n"
|
||||
"PRINT USING \"+###.##\"; -42.5\n"
|
||||
"PRINT USING \"###.##-\"; -42.5\n"
|
||||
"PRINT USING \"###.##-\"; 42.5\n"
|
||||
"PRINT USING \"#.##^^^^\"; 1234.5\n"
|
||||
);
|
||||
|
||||
// Test: DEFINT
|
||||
runProgram("DEFINT",
|
||||
"DEFINT A-Z\n"
|
||||
"a = 42\n"
|
||||
"b = 3.7\n"
|
||||
"PRINT a; b\n"
|
||||
);
|
||||
|
||||
// Test: DEFSTR
|
||||
runProgram("DEFSTR",
|
||||
"DEFSTR S\n"
|
||||
"s = \"hello\"\n"
|
||||
"PRINT s\n"
|
||||
);
|
||||
|
||||
// Test: DEFINT range
|
||||
runProgram("DEFINT range",
|
||||
"DEFINT I-N\n"
|
||||
"i = 10\n"
|
||||
"j = 20\n"
|
||||
"x = 3.14\n"
|
||||
"PRINT i; j; x\n"
|
||||
);
|
||||
|
||||
// Test: OPTION EXPLICIT success
|
||||
runProgram("OPTION EXPLICIT ok",
|
||||
"OPTION EXPLICIT\n"
|
||||
"DIM x AS INTEGER\n"
|
||||
"x = 42\n"
|
||||
"PRINT x\n"
|
||||
);
|
||||
|
||||
// Test: OPTION EXPLICIT failure (should error)
|
||||
{
|
||||
printf("=== OPTION EXPLICIT error ===\n");
|
||||
const char *src =
|
||||
"OPTION EXPLICIT\n"
|
||||
"x = 42\n";
|
||||
int32_t len = (int32_t)strlen(src);
|
||||
BasParserT parser;
|
||||
basParserInit(&parser, src, len);
|
||||
bool ok = basParse(&parser);
|
||||
if (!ok) {
|
||||
printf("Correctly caught: %s\n", parser.error);
|
||||
} else {
|
||||
printf("ERROR: should have failed\n");
|
||||
}
|
||||
basParserFree(&parser);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
printf("All tests complete.\n");
|
||||
return 0;
|
||||
}
|
||||
24
dvxbasic/test_lex.c
Normal file
24
dvxbasic/test_lex.c
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
// test_lex.c -- Dump lexer tokens
|
||||
// gcc -O2 -w -o test_lex test_lex.c compiler/lexer.c -lm
|
||||
|
||||
#include "compiler/lexer.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
int main(void) {
|
||||
const char *src = "PRINT \"Hello, World!\"\n";
|
||||
BasLexerT lex;
|
||||
basLexerInit(&lex, src, (int32_t)strlen(src));
|
||||
|
||||
for (int i = 0; i < 20; i++) {
|
||||
printf("Token %d: type=%d (%s) text='%s'\n", i, lex.token.type, basTokenName(lex.token.type), lex.token.text);
|
||||
|
||||
if (lex.token.type == TOK_EOF) {
|
||||
break;
|
||||
}
|
||||
|
||||
basLexerNext(&lex);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
64
dvxbasic/test_quick.c
Normal file
64
dvxbasic/test_quick.c
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
// test_quick.c -- Quick single-program test
|
||||
// gcc -O2 -Wall -o test_quick test_quick.c compiler/lexer.c compiler/parser.c compiler/codegen.c compiler/symtab.c runtime/vm.c runtime/values.c -lm
|
||||
|
||||
#include "compiler/parser.h"
|
||||
#include "runtime/vm.h"
|
||||
#include "runtime/values.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
int main(void) {
|
||||
basStringSystemInit();
|
||||
|
||||
const char *source = "PRINT \"Hello, World!\"\n";
|
||||
printf("Source: [%s]\n", source);
|
||||
printf("Source len: %d\n", (int)strlen(source));
|
||||
|
||||
int32_t len = (int32_t)strlen(source);
|
||||
BasParserT parser;
|
||||
basParserInit(&parser, source, len);
|
||||
|
||||
if (!basParse(&parser)) {
|
||||
printf("COMPILE ERROR: %s\n", parser.error);
|
||||
basParserFree(&parser);
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf("Compiled OK (%d bytes of p-code)\n", parser.cg.codeLen);
|
||||
|
||||
// Dump p-code
|
||||
for (int i = 0; i < parser.cg.codeLen; i++) {
|
||||
printf("%02X ", parser.cg.code[i]);
|
||||
}
|
||||
printf("\n");
|
||||
|
||||
BasModuleT *mod = basParserBuildModule(&parser);
|
||||
basParserFree(&parser);
|
||||
|
||||
BasVmT *vm = basVmCreate();
|
||||
basVmLoadModule(vm, mod);
|
||||
vm->callStack[0].localCount = mod->globalCount > 64 ? 64 : mod->globalCount;
|
||||
vm->callDepth = 1;
|
||||
|
||||
// Step limit
|
||||
int steps = 0;
|
||||
vm->running = true;
|
||||
|
||||
while (vm->running && steps < 1000) {
|
||||
BasVmResultE r = basVmStep(vm);
|
||||
steps++;
|
||||
|
||||
if (r != BAS_VM_OK) {
|
||||
printf("[Result: %d after %d steps: %s]\n", r, steps, basVmGetError(vm));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (steps >= 1000) {
|
||||
printf("[TIMEOUT after %d steps, PC=%d]\n", steps, vm->pc);
|
||||
}
|
||||
|
||||
basVmDestroy(vm);
|
||||
basModuleFree(mod);
|
||||
return 0;
|
||||
}
|
||||
234
dvxbasic/test_vm.c
Normal file
234
dvxbasic/test_vm.c
Normal file
|
|
@ -0,0 +1,234 @@
|
|||
// test_vm.c -- Quick test for the DVX BASIC VM
|
||||
//
|
||||
// Hand-assembles a small p-code program and executes it.
|
||||
// Tests: PRINT "Hello, World!", arithmetic, FOR loop, string ops.
|
||||
//
|
||||
// Build (native, not cross-compiled):
|
||||
// gcc -O2 -Wall -o test_vm test_vm.c runtime/vm.c runtime/values.c -lm
|
||||
|
||||
#include "compiler/opcodes.h"
|
||||
#include "runtime/vm.h"
|
||||
#include "runtime/values.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
// ============================================================
|
||||
// Helper: emit bytes into a code buffer
|
||||
// ============================================================
|
||||
|
||||
static uint8_t sCode[4096];
|
||||
static int32_t sCodeLen = 0;
|
||||
|
||||
static void emit8(uint8_t b) {
|
||||
sCode[sCodeLen++] = b;
|
||||
}
|
||||
|
||||
|
||||
static void emit16(int16_t v) {
|
||||
memcpy(&sCode[sCodeLen], &v, 2);
|
||||
sCodeLen += 2;
|
||||
}
|
||||
|
||||
|
||||
static void emitU16(uint16_t v) {
|
||||
memcpy(&sCode[sCodeLen], &v, 2);
|
||||
sCodeLen += 2;
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// Test 1: PRINT "Hello, World!"
|
||||
// ============================================================
|
||||
|
||||
static void test1(void) {
|
||||
printf("--- Test 1: PRINT \"Hello, World!\" ---\n");
|
||||
|
||||
sCodeLen = 0;
|
||||
|
||||
// String constant pool
|
||||
BasStringT *consts[1];
|
||||
consts[0] = basStringNew("Hello, World!", 13);
|
||||
|
||||
// Code: PUSH_STR 0; PRINT; PRINT_NL; HALT
|
||||
emit8(OP_PUSH_STR);
|
||||
emitU16(0);
|
||||
emit8(OP_PRINT);
|
||||
emit8(OP_PRINT_NL);
|
||||
emit8(OP_HALT);
|
||||
|
||||
BasModuleT module;
|
||||
memset(&module, 0, sizeof(module));
|
||||
module.code = sCode;
|
||||
module.codeLen = sCodeLen;
|
||||
module.constants = consts;
|
||||
module.constCount = 1;
|
||||
module.entryPoint = 0;
|
||||
|
||||
BasVmT *vm = basVmCreate();
|
||||
basVmLoadModule(vm, &module);
|
||||
BasVmResultE result = basVmRun(vm);
|
||||
printf("Result: %d (expected %d = HALTED)\n\n", result, BAS_VM_HALTED);
|
||||
basVmDestroy(vm);
|
||||
basStringUnref(consts[0]);
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// Test 2: Arithmetic: PRINT 2 + 3 * 4
|
||||
// ============================================================
|
||||
|
||||
static void test2(void) {
|
||||
printf("--- Test 2: PRINT 2 + 3 * 4 (expect 14) ---\n");
|
||||
|
||||
sCodeLen = 0;
|
||||
|
||||
// Code: PUSH 3; PUSH 4; MUL; PUSH 2; ADD; PRINT; PRINT_NL; HALT
|
||||
emit8(OP_PUSH_INT16);
|
||||
emit16(3);
|
||||
emit8(OP_PUSH_INT16);
|
||||
emit16(4);
|
||||
emit8(OP_MUL_INT);
|
||||
emit8(OP_PUSH_INT16);
|
||||
emit16(2);
|
||||
emit8(OP_ADD_INT);
|
||||
emit8(OP_PRINT);
|
||||
emit8(OP_PRINT_NL);
|
||||
emit8(OP_HALT);
|
||||
|
||||
BasModuleT module;
|
||||
memset(&module, 0, sizeof(module));
|
||||
module.code = sCode;
|
||||
module.codeLen = sCodeLen;
|
||||
module.entryPoint = 0;
|
||||
|
||||
BasVmT *vm = basVmCreate();
|
||||
basVmLoadModule(vm, &module);
|
||||
basVmRun(vm);
|
||||
basVmDestroy(vm);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// Test 3: String concatenation
|
||||
// ============================================================
|
||||
|
||||
static void test3(void) {
|
||||
printf("--- Test 3: PRINT \"Hello\" & \" \" & \"BASIC\" ---\n");
|
||||
|
||||
sCodeLen = 0;
|
||||
|
||||
BasStringT *consts[3];
|
||||
consts[0] = basStringNew("Hello", 5);
|
||||
consts[1] = basStringNew(" ", 1);
|
||||
consts[2] = basStringNew("BASIC", 5);
|
||||
|
||||
// Code: PUSH consts[0]; PUSH consts[1]; CONCAT; PUSH consts[2]; CONCAT; PRINT; PRINT_NL; HALT
|
||||
emit8(OP_PUSH_STR); emitU16(0);
|
||||
emit8(OP_PUSH_STR); emitU16(1);
|
||||
emit8(OP_STR_CONCAT);
|
||||
emit8(OP_PUSH_STR); emitU16(2);
|
||||
emit8(OP_STR_CONCAT);
|
||||
emit8(OP_PRINT);
|
||||
emit8(OP_PRINT_NL);
|
||||
emit8(OP_HALT);
|
||||
|
||||
BasModuleT module;
|
||||
memset(&module, 0, sizeof(module));
|
||||
module.code = sCode;
|
||||
module.codeLen = sCodeLen;
|
||||
module.constants = consts;
|
||||
module.constCount = 3;
|
||||
module.entryPoint = 0;
|
||||
|
||||
BasVmT *vm = basVmCreate();
|
||||
basVmLoadModule(vm, &module);
|
||||
basVmRun(vm);
|
||||
basVmDestroy(vm);
|
||||
printf("\n");
|
||||
|
||||
basStringUnref(consts[0]);
|
||||
basStringUnref(consts[1]);
|
||||
basStringUnref(consts[2]);
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// Test 4: FOR loop -- PRINT 1 to 5
|
||||
// ============================================================
|
||||
|
||||
static void test4(void) {
|
||||
printf("--- Test 4: FOR i = 1 TO 5: PRINT i: NEXT ---\n");
|
||||
|
||||
sCodeLen = 0;
|
||||
|
||||
// We need a call frame with at least 1 local (the loop variable)
|
||||
// For module-level code, we use callStack[0] as implicit frame
|
||||
|
||||
// Setup: store initial value in local 0
|
||||
// PUSH 1; STORE_LOCAL 0 -- i = 1
|
||||
emit8(OP_PUSH_INT16); emit16(1);
|
||||
emit8(OP_STORE_LOCAL); emitU16(0);
|
||||
|
||||
// Push limit and step for FOR_INIT
|
||||
// PUSH 5 (limit); PUSH 1 (step)
|
||||
emit8(OP_PUSH_INT16); emit16(5);
|
||||
emit8(OP_PUSH_INT16); emit16(1);
|
||||
emit8(OP_FOR_INIT); emitU16(0); emit8(1); // isLocal=1
|
||||
|
||||
// Loop body start (record PC for FOR_NEXT offset)
|
||||
int32_t loopBody = sCodeLen;
|
||||
|
||||
// LOAD_LOCAL 0; PRINT; PRINT " "
|
||||
emit8(OP_LOAD_LOCAL); emitU16(0);
|
||||
emit8(OP_PRINT);
|
||||
|
||||
// FOR_NEXT: increment i, test, jump back
|
||||
emit8(OP_FOR_NEXT);
|
||||
emitU16(0); // local index
|
||||
emit8(1); // isLocal=1
|
||||
int16_t offset = (int16_t)(loopBody - (sCodeLen + 2));
|
||||
emit16(offset);
|
||||
|
||||
// After loop
|
||||
emit8(OP_PRINT_NL);
|
||||
emit8(OP_HALT);
|
||||
|
||||
BasModuleT module;
|
||||
memset(&module, 0, sizeof(module));
|
||||
module.code = sCode;
|
||||
module.codeLen = sCodeLen;
|
||||
module.entryPoint = 0;
|
||||
|
||||
BasVmT *vm = basVmCreate();
|
||||
|
||||
// Initialize the implicit main frame with 1 local
|
||||
vm->callStack[0].localCount = 1;
|
||||
vm->callDepth = 1;
|
||||
|
||||
basVmLoadModule(vm, &module);
|
||||
basVmRun(vm);
|
||||
basVmDestroy(vm);
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
|
||||
// ============================================================
|
||||
// main
|
||||
// ============================================================
|
||||
|
||||
int main(void) {
|
||||
printf("DVX BASIC VM Tests\n");
|
||||
printf("==================\n\n");
|
||||
|
||||
basStringSystemInit();
|
||||
|
||||
test1();
|
||||
test2();
|
||||
test3();
|
||||
test4();
|
||||
|
||||
printf("All tests complete.\n");
|
||||
return 0;
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue