DVX_GUI/src/apps/kpunch/dvxbasic/compiler/lexer.c

// The MIT License (MIT)
//
// Copyright (C) 2026 Scott Duensing
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to
// deal in the Software without restriction, including without limitation the
// rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
// sell copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
// IN THE SOFTWARE.

// lexer.c -- DVX BASIC lexer implementation
//
// Single-pass tokenizer. Keywords are case-insensitive. Identifiers
// preserve their original case for display but comparisons are
// case-insensitive. Line continuations (underscore at end of line)
// are handled transparently.

#include "lexer.h"

#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

// ============================================================
// Keyword table
// ============================================================

typedef struct {
    const char    *text;
    BasTokenTypeE  type;
} KeywordEntryT;

static const KeywordEntryT sKeywords[] = {
    { "AND",       TOK_AND },
    { "APP",       TOK_APP },
    { "APPEND",    TOK_APPEND },
    { "AS",        TOK_AS },
    { "BASE",      TOK_BASE },
    { "BINARY",    TOK_BINARY },
    { "BOOLEAN",   TOK_BOOLEAN },
    { "BYVAL",     TOK_BYVAL },
    { "CALL",      TOK_CALL },
    { "CASE",      TOK_CASE },
    { "CHDIR",     TOK_CHDIR },
    { "CHDRIVE",   TOK_CHDRIVE },
    { "CLOSE",          TOK_CLOSE },
    { "CREATECONTROL",  TOK_CREATECONTROL },
    { "CREATEFORM",     TOK_CREATEFORM },
    { "CURDIR",         TOK_CURDIR },
    { "CURDIR$",   TOK_CURDIR },
    { "CONST",     TOK_CONST },
    { "DATA",      TOK_DATA },
    { "DECLARE",   TOK_DECLARE },
    { "DEF",       TOK_DEF },
    { "DEFDBL",    TOK_DEFDBL },
    { "DEFINT",    TOK_DEFINT },
    { "DEFLNG",    TOK_DEFLNG },
    { "DEFSNG",    TOK_DEFSNG },
    { "DEFSTR",    TOK_DEFSTR },
    { "DIM",       TOK_DIM },
    { "DIR",       TOK_DIR },
    { "DIR$",      TOK_DIR },
    { "DO",        TOK_DO },
    { "DOEVENTS",  TOK_DOEVENTS },
    { "DOUBLE",    TOK_DOUBLE },
    { "ELSE",      TOK_ELSE },
    { "ELSEIF",    TOK_ELSEIF },
    { "END",       TOK_END },
    { "EOF",       TOK_EOF_KW },
    { "EQV",       TOK_EQV },
    { "ERASE",     TOK_ERASE },
    { "ERR",       TOK_ERR },
    { "ERROR",     TOK_ERROR_KW },
    { "EXPLICIT",  TOK_EXPLICIT },
    { "EXIT",      TOK_EXIT },
    { "FALSE",     TOK_FALSE_KW },
    { "FILECOPY",  TOK_FILECOPY },
    { "FILELEN",   TOK_FILELEN },
    { "FOR",       TOK_FOR },
    { "FUNCTION",  TOK_FUNCTION },
    { "GET",       TOK_GET },
    { "GETATTR",   TOK_GETATTR },
    { "GOSUB",     TOK_GOSUB },
    { "GOTO",      TOK_GOTO },
    { "HIDE",      TOK_HIDE },
    { "IF",        TOK_IF },
    { "IMP",       TOK_IMP },
    { "INIREAD",   TOK_INIREAD },
    { "INIREAD$",  TOK_INIREAD },
    { "INIWRITE",  TOK_INIWRITE },
    { "INPUT",     TOK_INPUT },
    { "INTEGER",   TOK_INTEGER },
    { "IS",        TOK_IS },
    { "KILL",      TOK_KILL },
    { "LBOUND",    TOK_LBOUND },
    { "LET",       TOK_LET },
    { "LINE",      TOK_LINE },
    { "LOAD",      TOK_LOAD },
    { "LONG",      TOK_LONG },
    { "LOOP",      TOK_LOOP },
    { "ME",        TOK_ME },
    { "MKDIR",     TOK_MKDIR },
    { "MOD",       TOK_MOD },
    { "INPUTBOX",  TOK_INPUTBOX },
    { "INPUTBOX$", TOK_INPUTBOX },
    { "MSGBOX",    TOK_MSGBOX },
    { "NAME",      TOK_NAME },
    { "NEXT",      TOK_NEXT },
    { "NOT",       TOK_NOT },
    { "NOTHING",   TOK_NOTHING },
    { "ON",        TOK_ON },
    { "OPEN",      TOK_OPEN },
    { "OPTIONAL",  TOK_OPTIONAL },
    { "OPTION",    TOK_OPTION },
    { "OR",        TOK_OR },
    { "OUTPUT",    TOK_OUTPUT },
    { "PRESERVE",  TOK_PRESERVE },
    { "PRINT",     TOK_PRINT },
    { "PUT",       TOK_PUT },
    { "RANDOM",    TOK_RANDOM },
    { "RANDOMIZE", TOK_RANDOMIZE },
    { "READ",      TOK_READ },
    { "REDIM",          TOK_REDIM },
    { "REM",            TOK_REM },
    { "REMOVECONTROL",  TOK_REMOVECONTROL },
    { "RESTORE",   TOK_RESTORE },
    { "RESUME",    TOK_RESUME },
    { "RETURN",    TOK_RETURN },
    { "RMDIR",     TOK_RMDIR },
    { "SEEK",      TOK_SEEK },
    { "SELECT",    TOK_SELECT },
    { "SET",       TOK_SET },
    { "SETATTR",   TOK_SETATTR },
    { "SETEVENT",  TOK_SETEVENT },
    { "SHARED",    TOK_SHARED },
    { "SHELL",     TOK_SHELL },
    { "SHOW",      TOK_SHOW },
    { "SINGLE",    TOK_SINGLE },
    { "SLEEP",          TOK_SLEEP },
    { "STATIC",    TOK_STATIC },
    { "STEP",      TOK_STEP },
    { "STRING",    TOK_STRING_KW },
    { "SUB",       TOK_SUB },
    { "SWAP",      TOK_SWAP },
    { "THEN",      TOK_THEN },
    { "TIMER",     TOK_TIMER },
    { "TO",        TOK_TO },
    { "TRUE",      TOK_TRUE_KW },
    { "TYPE",      TOK_TYPE },
    { "UBOUND",    TOK_UBOUND },
    { "UNLOAD",    TOK_UNLOAD },
    { "UNTIL",     TOK_UNTIL },
    { "WEND",      TOK_WEND },
    { "WHILE",     TOK_WHILE },
    { "WITH",      TOK_WITH },
    { "WRITE",     TOK_WRITE },
    { "XOR",       TOK_XOR },
    { NULL,        TOK_ERROR }
};

#define KEYWORD_COUNT (sizeof(sKeywords) / sizeof(sKeywords[0]) - 1)


// Function prototypes (alphabetical)
static char advance(BasLexerT *lex);
static bool atEnd(const BasLexerT *lex);
void basLexerInit(BasLexerT *lex, const char *source, int32_t sourceLen);
const char *basLexerKeywordAt(int32_t i);
BasKeywordClassE basLexerKeywordClass(int32_t i);
int32_t basLexerKeywordCount(void);
BasTokenTypeE basLexerNext(BasLexerT *lex);
BasTokenTypeE basLexerPeek(const BasLexerT *lex);
const char *basTokenName(BasTokenTypeE type);
static BasTokenTypeE lookupKeyword(const char *text, int32_t len);
static char peek(const BasLexerT *lex);
static char peekNext(const BasLexerT *lex);
static void setError(BasLexerT *lex, const char *msg);
static void skipLineComment(BasLexerT *lex);
static void skipWhitespace(BasLexerT *lex);
static BasTokenTypeE tokenizeHexLiteral(BasLexerT *lex);
static BasTokenTypeE tokenizeIdentOrKeyword(BasLexerT *lex);
static BasTokenTypeE tokenizeNumber(BasLexerT *lex);
static BasTokenTypeE tokenizeString(BasLexerT *lex);
static char upperChar(char c);

static char advance(BasLexerT *lex) {
    if (atEnd(lex)) {
        return '\0';
    }

    char c = lex->source[lex->pos++];

    if (c == '\n') {
        lex->line++;
        lex->col = 1;
    } else {
        lex->col++;
    }

    return c;
}


static bool atEnd(const BasLexerT *lex) {
    return lex->pos >= lex->sourceLen;
}


void basLexerInit(BasLexerT *lex, const char *source, int32_t sourceLen) {
    memset(lex, 0, sizeof(*lex));
    lex->source    = source;
    lex->sourceLen = (sourceLen < 0) ? (int32_t)strlen(source) : sourceLen;
    lex->pos       = 0;
    lex->line      = 1;
    lex->col       = 1;

    // Prime the first token
    basLexerNext(lex);
}


const char *basLexerKeywordAt(int32_t i) {
    if (i < 0 || i >= (int32_t)KEYWORD_COUNT) {
        return NULL;
    }

    return sKeywords[i].text;
}


BasKeywordClassE basLexerKeywordClass(int32_t i) {
    if (i < 0 || i >= (int32_t)KEYWORD_COUNT) {
        return BAS_KW_CLASS_OTHER;
    }

    switch (sKeywords[i].type) {
        case TOK_BOOLEAN:
        case TOK_DOUBLE:
        case TOK_INTEGER:
        case TOK_LONG:
        case TOK_SINGLE:
        case TOK_STRING_KW:
            return BAS_KW_CLASS_TYPE;

        case TOK_TRUE_KW:
        case TOK_FALSE_KW:
        case TOK_NOTHING:
            return BAS_KW_CLASS_LITERAL;

        default:
            return BAS_KW_CLASS_OTHER;
    }
}


int32_t basLexerKeywordCount(void) {
    return (int32_t)KEYWORD_COUNT;
}


BasTokenTypeE basLexerNext(BasLexerT *lex) {
    skipWhitespace(lex);

    lex->token.line    = lex->line;
    lex->token.col     = lex->col;
    lex->token.textLen = 0;
    lex->token.text[0] = '\0';

    if (atEnd(lex)) {
        lex->token.type = TOK_EOF;
        return TOK_EOF;
    }

    char c = peek(lex);

    // Newline
    if (c == '\n') {
        advance(lex);
        lex->token.type    = TOK_NEWLINE;
        lex->token.text[0] = '\n';
        lex->token.text[1] = '\0';
        lex->token.textLen = 1;
        return TOK_NEWLINE;
    }

    // Carriage return (handle CR, CRLF)
    if (c == '\r') {
        advance(lex);

        if (!atEnd(lex) && peek(lex) == '\n') {
            advance(lex);
        }

        lex->token.type    = TOK_NEWLINE;
        lex->token.text[0] = '\n';
        lex->token.text[1] = '\0';
        lex->token.textLen = 1;
        return TOK_NEWLINE;
    }

    // Comment (apostrophe)
    if (c == '\'') {
        skipLineComment(lex);
        lex->token.type    = TOK_NEWLINE;
        lex->token.text[0] = '\n';
        lex->token.text[1] = '\0';
        lex->token.textLen = 1;
        return TOK_NEWLINE;
    }

    // String literal
    if (c == '"') {
        lex->token.type = tokenizeString(lex);
        return lex->token.type;
    }

    // Number
    if (isdigit((unsigned char)c) || (c == '.' && isdigit((unsigned char)peekNext(lex)))) {
        lex->token.type = tokenizeNumber(lex);
        return lex->token.type;
    }

    // Hex literal (&H...)
    if (c == '&' && upperChar(peekNext(lex)) == 'H') {
        lex->token.type = tokenizeHexLiteral(lex);
        return lex->token.type;
    }

    // Identifier or keyword
    if (isalpha((unsigned char)c) || c == '_') {
        lex->token.type = tokenizeIdentOrKeyword(lex);
        return lex->token.type;
    }

    // Single and multi-character operators/punctuation
    advance(lex);

    switch (c) {
        case '+':
            lex->token.type = TOK_PLUS;
            break;

        case '-':
            lex->token.type = TOK_MINUS;
            break;

        case '*':
            lex->token.type = TOK_STAR;
            break;

        case '/':
            lex->token.type = TOK_SLASH;
            break;

        case '\\':
            lex->token.type = TOK_BACKSLASH;
            break;

        case '^':
            lex->token.type = TOK_CARET;
            break;

        case '&':
            lex->token.type = TOK_AMPERSAND;
            break;

        case '(':
            lex->token.type = TOK_LPAREN;
            break;

        case ')':
            lex->token.type = TOK_RPAREN;
            break;

        case ',':
            lex->token.type = TOK_COMMA;
            break;

        case ';':
            lex->token.type = TOK_SEMICOLON;
            break;

        case ':':
            lex->token.type = TOK_COLON;
            break;

        case '.':
            lex->token.type = TOK_DOT;
            break;

        case '#':
            lex->token.type = TOK_HASH;
            break;

        case '?':
            lex->token.type = TOK_PRINT;
            break;

        case '=':
            lex->token.type = TOK_EQ;
            break;

        case '<':
            if (!atEnd(lex) && peek(lex) == '>') {
                advance(lex);
                lex->token.type = TOK_NE;
            } else if (!atEnd(lex) && peek(lex) == '=') {
                advance(lex);
                lex->token.type = TOK_LE;
            } else {
                lex->token.type = TOK_LT;
            }
            break;

        case '>':
            if (!atEnd(lex) && peek(lex) == '=') {
                advance(lex);
                lex->token.type = TOK_GE;
            } else {
                lex->token.type = TOK_GT;
            }
            break;

        default:
            setError(lex, "Unexpected character");
            lex->token.type = TOK_ERROR;
            break;
    }

    // Store the operator text
    if (lex->token.type != TOK_ERROR) {
        lex->token.text[0] = c;
        lex->token.textLen = 1;

        if (lex->token.type == TOK_NE || lex->token.type == TOK_LE || lex->token.type == TOK_GE) {
            lex->token.text[1] = lex->source[lex->pos - 1];
            lex->token.textLen = 2;
        }

        lex->token.text[lex->token.textLen] = '\0';
    }

    return lex->token.type;
}


BasTokenTypeE basLexerPeek(const BasLexerT *lex) {
    return lex->token.type;
}


const char *basTokenName(BasTokenTypeE type) {
    switch (type) {
        case TOK_INT_LIT:      return "integer";
        case TOK_LONG_LIT:     return "long";
        case TOK_FLOAT_LIT:    return "float";
        case TOK_STRING_LIT:   return "string";
        case TOK_IDENT:        return "identifier";
        case TOK_DOT:          return "'.'";
        case TOK_COMMA:        return "','";
        case TOK_SEMICOLON:    return "';'";
        case TOK_COLON:        return "':'";
        case TOK_LPAREN:       return "'('";
        case TOK_RPAREN:       return "')'";
        case TOK_HASH:         return "'#'";
        case TOK_PLUS:         return "'+'";
        case TOK_MINUS:        return "'-'";
        case TOK_STAR:         return "'*'";
        case TOK_SLASH:        return "'/'";
        case TOK_BACKSLASH:    return "'\\'";
        case TOK_CARET:        return "'^'";
        case TOK_AMPERSAND:    return "'&'";
        case TOK_EQ:           return "'='";
        case TOK_NE:           return "'<>'";
        case TOK_LT:           return "'<'";
        case TOK_GT:           return "'>'";
        case TOK_LE:           return "'<='";
        case TOK_GE:           return "'>='";
        case TOK_NEWLINE:      return "newline";
        case TOK_EOF:          return "end of file";
        case TOK_ERROR:        return "error";
        default:               break;
    }

    // Keywords
    for (int32_t i = 0; i < (int32_t)KEYWORD_COUNT; i++) {
        if (sKeywords[i].type == type) {
            return sKeywords[i].text;
        }
    }

    return "?";
}


static BasTokenTypeE lookupKeyword(const char *text, int32_t len) {
    // Case-insensitive keyword lookup
    for (int32_t i = 0; i < (int32_t)KEYWORD_COUNT; i++) {
        const char *kw = sKeywords[i].text;
        int32_t kwLen   = (int32_t)strlen(kw);

        if (kwLen != len) {
            continue;
        }

        bool match = true;

        for (int32_t j = 0; j < len; j++) {
            if (upperChar(text[j]) != kw[j]) {
                match = false;
                break;
            }
        }

        if (match) {
            return sKeywords[i].type;
        }
    }

    return TOK_IDENT;
}


static char peek(const BasLexerT *lex) {
    if (atEnd(lex)) {
        return '\0';
    }

    return lex->source[lex->pos];
}


static char peekNext(const BasLexerT *lex) {
    if (lex->pos + 1 >= lex->sourceLen) {
        return '\0';
    }

    return lex->source[lex->pos + 1];
}


static void setError(BasLexerT *lex, const char *msg) {
    snprintf(lex->error, sizeof(lex->error), "Line %d, Col %d: %s", (int)lex->line, (int)lex->col, msg);
}


static void skipLineComment(BasLexerT *lex) {
    while (!atEnd(lex) && peek(lex) != '\n' && peek(lex) != '\r') {
        advance(lex);
    }
}


//
// Skips spaces and tabs. Does NOT skip newlines (they are tokens).
// Handles line continuation: underscore followed by newline joins
// the next line to the current logical line.
static void skipWhitespace(BasLexerT *lex) {
    while (!atEnd(lex)) {
        char c = peek(lex);

        if (c == ' ' || c == '\t') {
            advance(lex);
            continue;
        }

        // Line continuation: _ at end of line
        if (c == '_') {
            int32_t savedPos  = lex->pos;
            int32_t savedLine = lex->line;
            int32_t savedCol  = lex->col;
            advance(lex);

            // Skip spaces/tabs after underscore
            while (!atEnd(lex) && (peek(lex) == ' ' || peek(lex) == '\t')) {
                advance(lex);
            }

            // Must be followed by newline
            if (!atEnd(lex) && (peek(lex) == '\n' || peek(lex) == '\r')) {
                advance(lex);

                if (!atEnd(lex) && peek(lex) == '\n' && lex->source[lex->pos - 1] == '\r') {
                    advance(lex);
                }

                continue;  // Continue skipping whitespace on next line
            }

            // Not a continuation -- put back
            lex->pos  = savedPos;
            lex->line = savedLine;
            lex->col  = savedCol;
            break;
        }

        break;
    }
}


static BasTokenTypeE tokenizeHexLiteral(BasLexerT *lex) {
    advance(lex);  // skip &
    advance(lex);  // skip H

    int32_t idx = 0;
    int32_t value = 0;

    while (!atEnd(lex) && isxdigit((unsigned char)peek(lex))) {
        char c = advance(lex);

        if (idx < BAS_MAX_TOKEN_LEN - 1) {
            lex->token.text[idx++] = c;
        }

        int32_t digit;

        if (c >= '0' && c <= '9') {
            digit = c - '0';
        } else if (c >= 'A' && c <= 'F') {
            digit = c - 'A' + 10;
        } else {
            digit = c - 'a' + 10;
        }

        value = (value << 4) | digit;
    }

    lex->token.text[idx] = '\0';
    lex->token.textLen   = idx;

    // Check for trailing & (long suffix)
    if (!atEnd(lex) && peek(lex) == '&') {
        advance(lex);
        lex->token.longVal = (int64_t)value;
        return TOK_LONG_LIT;
    }

    lex->token.intVal = value;
    return TOK_INT_LIT;
}


static BasTokenTypeE tokenizeIdentOrKeyword(BasLexerT *lex) {
    int32_t idx = 0;

    while (!atEnd(lex) && (isalnum((unsigned char)peek(lex)) || peek(lex) == '_')) {
        char c = advance(lex);

        if (idx < BAS_MAX_TOKEN_LEN - 1) {
            lex->token.text[idx++] = c;
        }
    }

    lex->token.text[idx] = '\0';
    lex->token.textLen   = idx;

    // Check for type suffix
    if (!atEnd(lex)) {
        char c = peek(lex);

        if (c == '%' || c == '&' || c == '!' || c == '#' || c == '$') {
            advance(lex);
            lex->token.text[idx++] = c;
            lex->token.text[idx]   = '\0';
            lex->token.textLen     = idx;
        }
    }

    // Check if this is a keyword
    // For suffix-bearing identifiers, only check the base (without suffix)
    int32_t baseLen = idx;

    if (baseLen > 0) {
        char last = lex->token.text[baseLen - 1];

        if (last == '%' || last == '&' || last == '!' || last == '#' || last == '$') {
            baseLen--;
        }
    }

    // Try the full text first (including any type suffix). Suffix-bearing
    // keywords like CURDIR$, DIR$, INIREAD$, INPUTBOX$ are listed in the
    // keyword table with their $ and will match here. If the full text
    // isn't a keyword, fall back to the base name (without suffix).
    BasTokenTypeE kwType = lookupKeyword(lex->token.text, idx);
    bool matchedWithSuffix = (kwType != TOK_IDENT && baseLen != idx);

    if (kwType == TOK_IDENT && baseLen != idx) {
        kwType = lookupKeyword(lex->token.text, baseLen);
    }

    // REM is a comment -- skip to end of line
    if (kwType == TOK_REM) {
        skipLineComment(lex);
        lex->token.type    = TOK_NEWLINE;
        lex->token.text[0] = '\n';
        lex->token.text[1] = '\0';
        lex->token.textLen = 1;
        return TOK_NEWLINE;
    }

    // Accept the keyword if it's a plain keyword (no suffix on source) or
    // if it explicitly matched a $-suffixed entry in the keyword table.
    if (kwType != TOK_IDENT && (baseLen == idx || matchedWithSuffix)) {
        return kwType;
    }

    return TOK_IDENT;
}


static BasTokenTypeE tokenizeNumber(BasLexerT *lex) {
    int32_t idx    = 0;
    bool hasDecimal = false;
    bool hasExp     = false;

    // Integer part
    while (!atEnd(lex) && isdigit((unsigned char)peek(lex))) {
        if (idx < BAS_MAX_TOKEN_LEN - 1) {
            lex->token.text[idx++] = advance(lex);
        } else {
            advance(lex);
        }
    }

    // Decimal part
    if (!atEnd(lex) && peek(lex) == '.' && isdigit((unsigned char)peekNext(lex))) {
        hasDecimal = true;
        lex->token.text[idx++] = advance(lex);  // .

        while (!atEnd(lex) && isdigit((unsigned char)peek(lex))) {
            if (idx < BAS_MAX_TOKEN_LEN - 1) {
                lex->token.text[idx++] = advance(lex);
            } else {
                advance(lex);
            }
        }
    }

    // Exponent
    if (!atEnd(lex) && (upperChar(peek(lex)) == 'E' || upperChar(peek(lex)) == 'D')) {
        hasExp = true;
        lex->token.text[idx++] = advance(lex);

        if (!atEnd(lex) && (peek(lex) == '+' || peek(lex) == '-')) {
            lex->token.text[idx++] = advance(lex);
        }

        while (!atEnd(lex) && isdigit((unsigned char)peek(lex))) {
            if (idx < BAS_MAX_TOKEN_LEN - 1) {
                lex->token.text[idx++] = advance(lex);
            } else {
                advance(lex);
            }
        }
    }

    lex->token.text[idx] = '\0';
    lex->token.textLen   = idx;

    // Check for type suffix
    if (!atEnd(lex)) {
        char c = peek(lex);

        if (c == '%') {
            advance(lex);
            lex->token.intVal = (int32_t)atoi(lex->token.text);
            return TOK_INT_LIT;
        }

        if (c == '&') {
            advance(lex);
            lex->token.longVal = (int64_t)atol(lex->token.text);
            return TOK_LONG_LIT;
        }

        if (c == '!') {
            advance(lex);
            lex->token.dblVal = atof(lex->token.text);
            return TOK_FLOAT_LIT;
        }

        if (c == '#') {
            advance(lex);
            lex->token.dblVal = atof(lex->token.text);
            return TOK_FLOAT_LIT;
        }
    }

    // No suffix: determine type from content
    if (hasDecimal || hasExp) {
        lex->token.dblVal = atof(lex->token.text);
        return TOK_FLOAT_LIT;
    }

    long val = atol(lex->token.text);

    if (val >= -32768 && val <= 32767) {
        lex->token.intVal = (int32_t)val;
        return TOK_INT_LIT;
    }

    lex->token.longVal = (int64_t)val;
    return TOK_LONG_LIT;
}


static BasTokenTypeE tokenizeString(BasLexerT *lex) {
    advance(lex);  // skip opening quote

    int32_t idx = 0;

    while (!atEnd(lex) && peek(lex) != '"' && peek(lex) != '\n' && peek(lex) != '\r') {
        if (idx < BAS_MAX_TOKEN_LEN - 1) {
            lex->token.text[idx++] = advance(lex);
        } else {
            advance(lex);
        }
    }

    if (atEnd(lex) || peek(lex) != '"') {
        setError(lex, "Unterminated string literal");
        lex->token.text[idx] = '\0';
        lex->token.textLen   = idx;
        return TOK_ERROR;
    }

    advance(lex);  // skip closing quote

    lex->token.text[idx] = '\0';
    lex->token.textLen   = idx;

    return TOK_STRING_LIT;
}


static char upperChar(char c) {
    if (c >= 'a' && c <= 'z') {
        return c - 32;
    }

    return c;
}