DVX_GUI/apps/dvxbasic/compiler/lexer.h

245 lines
5.5 KiB
C

// lexer.h -- DVX BASIC lexer (tokenizer)
//
// Converts BASIC source text into a stream of tokens. Case-insensitive
// for keywords. Handles line continuations (_), comments (' and REM),
// type suffixes (%, &, !, #, $), and string literals.
//
// Embeddable: no DVX dependencies, pure C.
#ifndef DVXBASIC_LEXER_H
#define DVXBASIC_LEXER_H
#include <stdint.h>
#include <stdbool.h>
// ============================================================
// Token types
// ============================================================
typedef enum {
// Literals
TOK_INT_LIT, // integer literal (123, &HFF)
TOK_LONG_LIT, // long literal (123&)
TOK_FLOAT_LIT, // float literal (3.14, 1.5E10)
TOK_STRING_LIT, // "string literal"
// Identifiers and symbols
TOK_IDENT, // variable/function name
TOK_DOT, // .
TOK_COMMA, // ,
TOK_SEMICOLON, // ;
TOK_COLON, // :
TOK_LPAREN, // (
TOK_RPAREN, // )
TOK_HASH, // # (file channel)
// Operators
TOK_PLUS, // +
TOK_MINUS, // -
TOK_STAR, // *
TOK_SLASH, // /
TOK_BACKSLASH, // \ (integer divide)
TOK_CARET, // ^
TOK_AMPERSAND, // & (string concat or hex prefix)
TOK_EQ, // =
TOK_NE, // <>
TOK_LT, // <
TOK_GT, // >
TOK_LE, // <=
TOK_GE, // >=
// Type suffixes (attached to identifier)
TOK_SUFFIX_INT, // %
TOK_SUFFIX_LONG, // &
TOK_SUFFIX_SINGLE, // !
TOK_SUFFIX_DOUBLE, // #
TOK_SUFFIX_STRING, // $
// Keywords
TOK_AND,
TOK_APP,
TOK_AS,
TOK_BASE,
TOK_BOOLEAN,
TOK_BYVAL,
TOK_CALL,
TOK_CASE,
TOK_CLOSE,
TOK_CONST,
TOK_CREATECONTROL,
TOK_CREATEFORM,
TOK_DATA,
TOK_DECLARE,
TOK_DEF,
TOK_DEFDBL,
TOK_DEFINT,
TOK_DEFLNG,
TOK_DEFSNG,
TOK_DEFSTR,
TOK_DIM,
TOK_DO,
TOK_DOEVENTS,
TOK_DOUBLE,
TOK_ELSE,
TOK_ELSEIF,
TOK_END,
TOK_EOF_KW, // EOF (keyword, not end-of-file)
TOK_EQV,
TOK_ERASE,
TOK_ERR,
TOK_ERROR_KW,
TOK_EXPLICIT,
TOK_EXIT,
TOK_FALSE_KW,
TOK_FOR,
TOK_FUNCTION,
TOK_GET,
TOK_GOSUB,
TOK_GOTO,
TOK_HIDE,
TOK_IF,
TOK_IMP,
TOK_INPUT,
TOK_INTEGER,
TOK_IS,
TOK_LBOUND,
TOK_LET,
TOK_LINE,
TOK_LOAD,
TOK_LONG,
TOK_LOOP,
TOK_ME,
TOK_MOD,
TOK_INPUTBOX,
TOK_MSGBOX,
TOK_NEXT,
TOK_NOT,
TOK_NOTHING,
TOK_ON,
TOK_OPEN,
TOK_OPTIONAL,
TOK_OPTION,
TOK_OR,
TOK_OUTPUT,
TOK_PRESERVE,
TOK_PRINT,
TOK_PUT,
TOK_RANDOMIZE,
TOK_READ,
TOK_REDIM,
TOK_REM,
TOK_REMOVECONTROL,
TOK_RESTORE,
TOK_RESUME,
TOK_RETURN,
TOK_SEEK,
TOK_SELECT,
TOK_SET,
TOK_SETEVENT,
TOK_SHARED,
TOK_SHELL,
TOK_SHOW,
TOK_SINGLE,
TOK_SLEEP,
TOK_INIREAD,
TOK_INIWRITE,
TOK_STATIC,
TOK_STEP,
TOK_STRING_KW,
TOK_SUB,
TOK_SWAP,
TOK_THEN,
TOK_TIMER,
TOK_TO,
TOK_TRUE_KW,
TOK_TYPE,
TOK_UBOUND,
TOK_UNLOAD,
TOK_UNTIL,
TOK_WEND,
TOK_WHILE,
TOK_WITH,
TOK_WRITE,
TOK_XOR,
// Filesystem keywords
TOK_CHDIR,
TOK_CHDRIVE,
TOK_CURDIR,
TOK_DIR,
TOK_FILECOPY,
TOK_FILELEN,
TOK_GETATTR,
TOK_KILL,
TOK_MKDIR,
TOK_NAME,
TOK_RMDIR,
TOK_SETATTR,
// File modes
TOK_APPEND,
TOK_BINARY,
TOK_RANDOM,
// Special
TOK_NEWLINE, // end of logical line
TOK_EOF, // end of source
TOK_ERROR // lexer error
} BasTokenTypeE;
// ============================================================
// Token
// ============================================================
#define BAS_MAX_TOKEN_LEN 256
typedef struct {
BasTokenTypeE type;
int32_t line; // 1-based source line number
int32_t col; // 1-based column number
// Value (depends on type)
union {
int32_t intVal;
int64_t longVal;
float fltVal;
double dblVal;
};
char text[BAS_MAX_TOKEN_LEN]; // raw text of the token
int32_t textLen;
} BasTokenT;
// ============================================================
// Lexer state
// ============================================================
typedef struct {
const char *source; // source text (not owned)
int32_t sourceLen;
int32_t pos; // current position in source
int32_t line; // current line (1-based)
int32_t col; // current column (1-based)
BasTokenT token; // current token
char error[256];
} BasLexerT;
// ============================================================
// API
// ============================================================
// Initialize lexer with source text. The source must remain valid
// for the lifetime of the lexer.
void basLexerInit(BasLexerT *lex, const char *source, int32_t sourceLen);
// Advance to the next token. Returns the token type.
// The token is available in lex->token.
BasTokenTypeE basLexerNext(BasLexerT *lex);
// Peek at the current token type without advancing.
BasTokenTypeE basLexerPeek(const BasLexerT *lex);
// Return human-readable name for a token type.
const char *basTokenName(BasTokenTypeE type);
#endif // DVXBASIC_LEXER_H