// lexer.h -- DVX BASIC lexer (tokenizer) // // Converts BASIC source text into a stream of tokens. Case-insensitive // for keywords. Handles line continuations (_), comments (' and REM), // type suffixes (%, &, !, #, $), and string literals. // // Embeddable: no DVX dependencies, pure C. #ifndef DVXBASIC_LEXER_H #define DVXBASIC_LEXER_H #include #include // ============================================================ // Token types // ============================================================ typedef enum { // Literals TOK_INT_LIT, // integer literal (123, &HFF) TOK_LONG_LIT, // long literal (123&) TOK_FLOAT_LIT, // float literal (3.14, 1.5E10) TOK_STRING_LIT, // "string literal" // Identifiers and symbols TOK_IDENT, // variable/function name TOK_DOT, // . TOK_COMMA, // , TOK_SEMICOLON, // ; TOK_COLON, // : TOK_LPAREN, // ( TOK_RPAREN, // ) TOK_HASH, // # (file channel) // Operators TOK_PLUS, // + TOK_MINUS, // - TOK_STAR, // * TOK_SLASH, // / TOK_BACKSLASH, // \ (integer divide) TOK_CARET, // ^ TOK_AMPERSAND, // & (string concat or hex prefix) TOK_EQ, // = TOK_NE, // <> TOK_LT, // < TOK_GT, // > TOK_LE, // <= TOK_GE, // >= // Type suffixes (attached to identifier) TOK_SUFFIX_INT, // % TOK_SUFFIX_LONG, // & TOK_SUFFIX_SINGLE, // ! TOK_SUFFIX_DOUBLE, // # TOK_SUFFIX_STRING, // $ // Keywords TOK_AND, TOK_APP, TOK_AS, TOK_BASE, TOK_BOOLEAN, TOK_BYVAL, TOK_CALL, TOK_CASE, TOK_CLOSE, TOK_CONST, TOK_DATA, TOK_DECLARE, TOK_DEF, TOK_DEFDBL, TOK_DEFINT, TOK_DEFLNG, TOK_DEFSNG, TOK_DEFSTR, TOK_DIM, TOK_DO, TOK_DOEVENTS, TOK_DOUBLE, TOK_ELSE, TOK_ELSEIF, TOK_END, TOK_EOF_KW, // EOF (keyword, not end-of-file) TOK_EQV, TOK_ERASE, TOK_ERR, TOK_ERROR_KW, TOK_EXPLICIT, TOK_EXIT, TOK_FALSE_KW, TOK_FOR, TOK_FUNCTION, TOK_GET, TOK_GOSUB, TOK_GOTO, TOK_HIDE, TOK_IF, TOK_IMP, TOK_INPUT, TOK_INTEGER, TOK_IS, TOK_LBOUND, TOK_LET, TOK_LINE, TOK_LOAD, TOK_LONG, TOK_LOOP, TOK_ME, TOK_MOD, TOK_MSGBOX, TOK_NEXT, TOK_NOT, TOK_ON, TOK_OPEN, TOK_OPTION, TOK_OR, TOK_OUTPUT, TOK_PRESERVE, TOK_PRINT, TOK_PUT, TOK_RANDOMIZE, TOK_READ, TOK_REDIM, TOK_REM, TOK_RESTORE, TOK_RESUME, TOK_RETURN, TOK_SEEK, TOK_SELECT, TOK_SET, TOK_SHARED, TOK_SHELL, TOK_SHOW, TOK_SINGLE, TOK_SLEEP, TOK_INIREAD, TOK_INIWRITE, TOK_SQLCLOSE, TOK_SQLEOF, TOK_SQLERROR, TOK_SQLEXEC, TOK_SQLFIELD, // SQLField$(rs, col) or SQLField$(rs, "name") TOK_SQLFIELDCOUNT, TOK_SQLFIELDINT, TOK_SQLFIELDDBL, TOK_SQLFREERESULT, TOK_SQLNEXT, TOK_SQLOPEN, TOK_SQLQUERY, TOK_SQLAFFECTED, TOK_STATIC, TOK_STEP, TOK_STRING_KW, TOK_SUB, TOK_SWAP, TOK_THEN, TOK_TIMER, TOK_TO, TOK_TRUE_KW, TOK_TYPE, TOK_UBOUND, TOK_UNLOAD, TOK_UNTIL, TOK_WEND, TOK_WHILE, TOK_WITH, TOK_WRITE, TOK_XOR, // File modes TOK_APPEND, TOK_BINARY, TOK_RANDOM, // Special TOK_NEWLINE, // end of logical line TOK_EOF, // end of source TOK_ERROR // lexer error } BasTokenTypeE; // ============================================================ // Token // ============================================================ #define BAS_MAX_TOKEN_LEN 256 typedef struct { BasTokenTypeE type; int32_t line; // 1-based source line number int32_t col; // 1-based column number // Value (depends on type) union { int32_t intVal; int64_t longVal; float fltVal; double dblVal; }; char text[BAS_MAX_TOKEN_LEN]; // raw text of the token int32_t textLen; } BasTokenT; // ============================================================ // Lexer state // ============================================================ typedef struct { const char *source; // source text (not owned) int32_t sourceLen; int32_t pos; // current position in source int32_t line; // current line (1-based) int32_t col; // current column (1-based) BasTokenT token; // current token char error[256]; } BasLexerT; // ============================================================ // API // ============================================================ // Initialize lexer with source text. The source must remain valid // for the lifetime of the lexer. void basLexerInit(BasLexerT *lex, const char *source, int32_t sourceLen); // Advance to the next token. Returns the token type. // The token is available in lex->token. BasTokenTypeE basLexerNext(BasLexerT *lex); // Peek at the current token type without advancing. BasTokenTypeE basLexerPeek(const BasLexerT *lex); // Return human-readable name for a token type. const char *basTokenName(BasTokenTypeE type); #endif // DVXBASIC_LEXER_H