237 lines
5.4 KiB
C
237 lines
5.4 KiB
C
// lexer.h -- DVX BASIC lexer (tokenizer)
|
|
//
|
|
// Converts BASIC source text into a stream of tokens. Case-insensitive
|
|
// for keywords. Handles line continuations (_), comments (' and REM),
|
|
// type suffixes (%, &, !, #, $), and string literals.
|
|
//
|
|
// Embeddable: no DVX dependencies, pure C.
|
|
|
|
#ifndef DVXBASIC_LEXER_H
|
|
#define DVXBASIC_LEXER_H
|
|
|
|
#include <stdint.h>
|
|
#include <stdbool.h>
|
|
|
|
// ============================================================
|
|
// Token types
|
|
// ============================================================
|
|
|
|
typedef enum {
|
|
// Literals
|
|
TOK_INT_LIT, // integer literal (123, &HFF)
|
|
TOK_LONG_LIT, // long literal (123&)
|
|
TOK_FLOAT_LIT, // float literal (3.14, 1.5E10)
|
|
TOK_STRING_LIT, // "string literal"
|
|
|
|
// Identifiers and symbols
|
|
TOK_IDENT, // variable/function name
|
|
TOK_DOT, // .
|
|
TOK_COMMA, // ,
|
|
TOK_SEMICOLON, // ;
|
|
TOK_COLON, // :
|
|
TOK_LPAREN, // (
|
|
TOK_RPAREN, // )
|
|
TOK_HASH, // # (file channel)
|
|
|
|
// Operators
|
|
TOK_PLUS, // +
|
|
TOK_MINUS, // -
|
|
TOK_STAR, // *
|
|
TOK_SLASH, // /
|
|
TOK_BACKSLASH, // \ (integer divide)
|
|
TOK_CARET, // ^
|
|
TOK_AMPERSAND, // & (string concat or hex prefix)
|
|
TOK_EQ, // =
|
|
TOK_NE, // <>
|
|
TOK_LT, // <
|
|
TOK_GT, // >
|
|
TOK_LE, // <=
|
|
TOK_GE, // >=
|
|
|
|
// Type suffixes (attached to identifier)
|
|
TOK_SUFFIX_INT, // %
|
|
TOK_SUFFIX_LONG, // &
|
|
TOK_SUFFIX_SINGLE, // !
|
|
TOK_SUFFIX_DOUBLE, // #
|
|
TOK_SUFFIX_STRING, // $
|
|
|
|
// Keywords
|
|
TOK_AND,
|
|
TOK_APP,
|
|
TOK_AS,
|
|
TOK_BASE,
|
|
TOK_BOOLEAN,
|
|
TOK_BYVAL,
|
|
TOK_CALL,
|
|
TOK_CASE,
|
|
TOK_CLOSE,
|
|
TOK_CONST,
|
|
TOK_DATA,
|
|
TOK_DECLARE,
|
|
TOK_DEF,
|
|
TOK_DEFDBL,
|
|
TOK_DEFINT,
|
|
TOK_DEFLNG,
|
|
TOK_DEFSNG,
|
|
TOK_DEFSTR,
|
|
TOK_DIM,
|
|
TOK_DO,
|
|
TOK_DOEVENTS,
|
|
TOK_DOUBLE,
|
|
TOK_ELSE,
|
|
TOK_ELSEIF,
|
|
TOK_END,
|
|
TOK_EOF_KW, // EOF (keyword, not end-of-file)
|
|
TOK_EQV,
|
|
TOK_ERASE,
|
|
TOK_ERR,
|
|
TOK_ERROR_KW,
|
|
TOK_EXPLICIT,
|
|
TOK_EXIT,
|
|
TOK_FALSE_KW,
|
|
TOK_FOR,
|
|
TOK_FUNCTION,
|
|
TOK_GET,
|
|
TOK_GOSUB,
|
|
TOK_GOTO,
|
|
TOK_HIDE,
|
|
TOK_IF,
|
|
TOK_IMP,
|
|
TOK_INPUT,
|
|
TOK_INTEGER,
|
|
TOK_IS,
|
|
TOK_LBOUND,
|
|
TOK_LET,
|
|
TOK_LINE,
|
|
TOK_LOAD,
|
|
TOK_LONG,
|
|
TOK_LOOP,
|
|
TOK_ME,
|
|
TOK_MOD,
|
|
TOK_MSGBOX,
|
|
TOK_NEXT,
|
|
TOK_NOT,
|
|
TOK_ON,
|
|
TOK_OPEN,
|
|
TOK_OPTION,
|
|
TOK_OR,
|
|
TOK_OUTPUT,
|
|
TOK_PRESERVE,
|
|
TOK_PRINT,
|
|
TOK_PUT,
|
|
TOK_RANDOMIZE,
|
|
TOK_READ,
|
|
TOK_REDIM,
|
|
TOK_REM,
|
|
TOK_RESTORE,
|
|
TOK_RESUME,
|
|
TOK_RETURN,
|
|
TOK_SEEK,
|
|
TOK_SELECT,
|
|
TOK_SET,
|
|
TOK_SHARED,
|
|
TOK_SHELL,
|
|
TOK_SHOW,
|
|
TOK_SINGLE,
|
|
TOK_SLEEP,
|
|
TOK_INIREAD,
|
|
TOK_INIWRITE,
|
|
TOK_SQLCLOSE,
|
|
TOK_SQLEOF,
|
|
TOK_SQLERROR,
|
|
TOK_SQLEXEC,
|
|
TOK_SQLFIELD, // SQLField$(rs, col) or SQLField$(rs, "name")
|
|
TOK_SQLFIELDCOUNT,
|
|
TOK_SQLFIELDINT,
|
|
TOK_SQLFIELDDBL,
|
|
TOK_SQLFREERESULT,
|
|
TOK_SQLNEXT,
|
|
TOK_SQLOPEN,
|
|
TOK_SQLQUERY,
|
|
TOK_SQLAFFECTED,
|
|
TOK_STATIC,
|
|
TOK_STEP,
|
|
TOK_STRING_KW,
|
|
TOK_SUB,
|
|
TOK_SWAP,
|
|
TOK_THEN,
|
|
TOK_TIMER,
|
|
TOK_TO,
|
|
TOK_TRUE_KW,
|
|
TOK_TYPE,
|
|
TOK_UBOUND,
|
|
TOK_UNLOAD,
|
|
TOK_UNTIL,
|
|
TOK_WEND,
|
|
TOK_WHILE,
|
|
TOK_WITH,
|
|
TOK_WRITE,
|
|
TOK_XOR,
|
|
|
|
// File modes
|
|
TOK_APPEND,
|
|
TOK_BINARY,
|
|
TOK_RANDOM,
|
|
|
|
// Special
|
|
TOK_NEWLINE, // end of logical line
|
|
TOK_EOF, // end of source
|
|
TOK_ERROR // lexer error
|
|
} BasTokenTypeE;
|
|
|
|
// ============================================================
|
|
// Token
|
|
// ============================================================
|
|
|
|
#define BAS_MAX_TOKEN_LEN 256
|
|
|
|
typedef struct {
|
|
BasTokenTypeE type;
|
|
int32_t line; // 1-based source line number
|
|
int32_t col; // 1-based column number
|
|
|
|
// Value (depends on type)
|
|
union {
|
|
int32_t intVal;
|
|
int64_t longVal;
|
|
float fltVal;
|
|
double dblVal;
|
|
};
|
|
|
|
char text[BAS_MAX_TOKEN_LEN]; // raw text of the token
|
|
int32_t textLen;
|
|
} BasTokenT;
|
|
|
|
// ============================================================
|
|
// Lexer state
|
|
// ============================================================
|
|
|
|
typedef struct {
|
|
const char *source; // source text (not owned)
|
|
int32_t sourceLen;
|
|
int32_t pos; // current position in source
|
|
int32_t line; // current line (1-based)
|
|
int32_t col; // current column (1-based)
|
|
BasTokenT token; // current token
|
|
char error[256];
|
|
} BasLexerT;
|
|
|
|
// ============================================================
|
|
// API
|
|
// ============================================================
|
|
|
|
// Initialize lexer with source text. The source must remain valid
|
|
// for the lifetime of the lexer.
|
|
void basLexerInit(BasLexerT *lex, const char *source, int32_t sourceLen);
|
|
|
|
// Advance to the next token. Returns the token type.
|
|
// The token is available in lex->token.
|
|
BasTokenTypeE basLexerNext(BasLexerT *lex);
|
|
|
|
// Peek at the current token type without advancing.
|
|
BasTokenTypeE basLexerPeek(const BasLexerT *lex);
|
|
|
|
// Return human-readable name for a token type.
|
|
const char *basTokenName(BasTokenTypeE type);
|
|
|
|
#endif // DVXBASIC_LEXER_H
|