From a0b6f078f8605cecdae6a93cf04e62136f628a11 Mon Sep 17 00:00:00 2001 From: Scott Duensing Date: Sat, 21 Feb 2026 18:51:40 -0600 Subject: [PATCH] Initial commit. --- .claude/settings.local.json | 92 + .gitignore | 3 + README.md | 951 ++++++ basic2c.c | 5571 +++++++++++++++++++++++++++++++++++ builtins.def | 45 + functions.def | 12 + test.bas | 202 ++ test_big.bas | 1049 +++++++ test_classic.bas | 17 + test_continue.bas | 46 + test_data.bas | 173 ++ test_fileio.bas | 190 ++ test_inc_b.bas | 6 + test_inc_c.bas | 6 + test_include.bas | 23 + test_include_lib.bas | 10 + test_include_nested.bas | 4 + test_labels.bas | 93 + test_multidim.bas | 176 ++ test_newfeatures.bas | 192 ++ test_redim.bas | 70 + test_types.bas | 338 +++ test_udt.bas | 361 +++ 23 files changed, 9630 insertions(+) create mode 100644 .claude/settings.local.json create mode 100644 .gitignore create mode 100644 README.md create mode 100644 basic2c.c create mode 100644 builtins.def create mode 100644 functions.def create mode 100644 test.bas create mode 100644 test_big.bas create mode 100644 test_classic.bas create mode 100644 test_continue.bas create mode 100644 test_data.bas create mode 100644 test_fileio.bas create mode 100644 test_inc_b.bas create mode 100644 test_inc_c.bas create mode 100644 test_include.bas create mode 100644 test_include_lib.bas create mode 100644 test_include_nested.bas create mode 100644 test_labels.bas create mode 100644 test_multidim.bas create mode 100644 test_newfeatures.bas create mode 100644 test_redim.bas create mode 100644 test_types.bas create mode 100644 test_udt.bas diff --git a/.claude/settings.local.json b/.claude/settings.local.json new file mode 100644 index 0000000..f408449 --- /dev/null +++ b/.claude/settings.local.json @@ -0,0 +1,92 @@ +{ + "permissions": { + "allow": [ + "Bash(cc:*)", + "Bash(./basic2c)", + "Bash(./basic2c:*)", + "Bash(./test_output)", + "Bash(./test_classic:*)", + "Bash(./test_redim:*)", + "Bash(/tmp/t1)", + "Bash(/tmp/t2)", + "Bash(/tmp/t3)", + "Bash(./test_big)", + "Bash(/tmp/tbig:*)", + "Bash(/tmp/ttypes:*)", + "Bash(/tmp/ttest:*)", + "Bash(/tmp/tclassic:*)", + "Bash(/tmp/tredim:*)", + "Bash(/tmp/tfileio:*)", + "Bash(/tmp/tdata)", + "Bash(/tmp/test_labels:*)", + "Bash(for f in test.bas test_classic.bas test_redim.bas test_big.bas test_types.bas test_fileio.bas test_data.bas)", + "Bash(do echo \"=== $f ===\")", + "Bash(if [ $? -ne 0 ])", + "Bash(then echo \"FAIL: $f\")", + "Bash(else echo \"OK\")", + "Bash(fi)", + "Bash(done)", + "Bash(/tmp/test_data)", + "Bash(for f in test.bas test_classic.bas test_redim.bas test_big.bas test_types.bas test_fileio.bas test_labels.bas)", + "Bash(do echo -n \"$f: \")", + "Bash(for f in test.bas test_classic.bas test_redim.bas test_big.bas test_types.bas test_fileio.bas test_data.bas test_labels.bas)", + "Bash(for f in test_classic.bas test_data.bas test_labels.bas test_fileio.bas)", + "Bash(do echo \"===== $f =====\")", + "Bash(/tmp/out)", + "Bash(/tmp/test_multidim:*)", + "Bash(for f in test.bas test_classic.bas test_redim.bas test_big.bas test_types.bas test_fileio.bas test_data.bas test_labels.bas test_multidim.bas)", + "Bash(echo:*)", + "Bash(/tmp/out_udt)", + "Bash(for f in test.bas test_classic.bas test_redim.bas test_big.bas test_types.bas test_fileio.bas test_data.bas test_labels.bas test_multidim.bas test_udt.bas)", + "Bash(failed=0)", + "Bash(for bas in test.bas test_classic.bas test_redim.bas test_big.bas test_types.bas test_fileio.bas test_data.bas test_labels.bas test_multidim.bas test_udt.bas)", + "Bash(do:*)", + "Bash(then echo \"FAIL compile: $bas\")", + "Bash(failed=1)", + "Bash(else /tmp/test_out)", + "Bash(then echo \"FAIL run: $bas\")", + "Bash(else echo \"PASS \\(debug\\): $bas\")", + "Bash(if [ $failed -eq 0 ])", + "Bash(then echo \"All debug tests passed\")", + "Bash(else echo \"PASS \\(release\\): $bas\")", + "Bash(then echo \"All release tests passed\")", + "Bash(then echo \"FAIL compile \\($mode\\): $bas\")", + "Bash(then echo \"FAIL run \\($mode\\): $bas\")", + "Bash(then echo \"All 20 tests passed \\(10 debug + 10 release\\)\")", + "Bash(printf:*)", + "Bash(/tmp/test_sincos:*)", + "Bash(then echo \"FAIL: $bas\")", + "Bash(else /tmp/t)", + "Bash({ echo \"FAIL run: $bas\")", + "Bash(})", + "Bash([ $failed -eq 0 ])", + "Bash(python3:*)", + "Bash({ echo \"FAIL run \\($mode\\): $bas\")", + "Bash(for bas in test.bas test_classic.bas test_redim.bas test_big.bas test_types.bas test_fileio.bas test_labels.bas test_data.bas test_multidim.bas test_udt.bas)", + "Bash(for bas in test.bas test_big.bas)", + "Bash(/tmp/test_prog:*)", + "Bash(/tmp/test_nf)", + "Bash(for bas in test.bas test_classic.bas test_redim.bas test_big.bas test_types.bas test_fileio.bas test_labels.bas test_data.bas test_multidim.bas test_udt.bas test_newfeatures.bas)", + "Bash(/tmp/inc_test)", + "Bash(for:*)", + "Bash(/tmp/test_continue)", + "Bash(/tmp/test_print:*)", + "Bash(/tmp/test2)", + "Bash(/tmp/test_tab)", + "Bash(/tmp/test_tab2)", + "Bash(/tmp/test_all)", + "Bash(/tmp/test_rnd)", + "Bash(/tmp/test_rnd2)", + "Bash(/tmp/test_spc:*)", + "Bash(/tmp/test_spc2:*)", + "Bash(./test_extern:*)", + "Bash(./test_timer:*)", + "Bash(cat:*)", + "Bash(./test_final)", + "Bash(./test_using)", + "Bash(./test_debug:*)", + "Bash(./test_using2)", + "Bash(/tmp/testproj/test:*)" + ] + } +} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ef87550 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*~ +basic2c +*.o diff --git a/README.md b/README.md new file mode 100644 index 0000000..13712f0 --- /dev/null +++ b/README.md @@ -0,0 +1,951 @@ +# basic2c + +A BASIC-to-C transpiler. Translates BASIC source code into equivalent C source +code with an embedded runtime library. + +## Build + +``` +cc -Wall -o basic2c basic2c.c -lm +``` + +## Usage + +``` +basic2c [--release|-r] input.bas [output.c] +``` + +- If `output.c` is omitted, C code is written to stdout. +- `--release` (or `-r`) selects the release runtime (see [Runtime Modes](#runtime-modes)). + +Compile the generated C: + +``` +cc -Wall -o program output.c -lm +``` + +## Architecture + +The transpiler is a single-file C program with three phases: + +1. **Lexer** — tokenizes BASIC source (case-insensitive keywords) +2. **Parser** — recursive descent, builds an AST +3. **Codegen** — walks the AST, emits C source with a small runtime library + +## Data Types + +| BASIC Type | C Type | Suffix | Notes | +|----------------|------------|--------|------------------------------| +| `BYTE` | `uint8_t` | | Unsigned 8-bit | +| `INTEGER` | `int16_t` | `%` | Signed 16-bit | +| `LONG` | `int32_t` | | Signed 32-bit | +| `FLOAT` | `float` | `!` | Single precision | +| `DOUBLE` | `double` | `#` | Double precision (default numeric) | +| `STRING` | `char*` | `$` | Dynamic, heap-allocated | + +Type suffixes on variable names are recognized: `name$` is STRING, `count%` is +INTEGER, `total#` is DOUBLE, `rate!` is FLOAT. Variables without a suffix or +explicit type declaration default to DOUBLE. + +Numeric types follow a promotion hierarchy: BYTE < INTEGER < LONG < FLOAT < +DOUBLE. Mixed-type expressions promote to the higher-ranked type. + +## Variables and Arrays + +### Declaration + +```basic +DIM x AS DOUBLE +DIM name AS STRING +DIM count AS INTEGER +``` + +Variables can also be used without declaration — they are implicitly declared +based on their type suffix or as DOUBLE by default. + +### Arrays + +```basic +DIM arr(10) AS INTEGER ' 1D array, indices 0..10 +DIM matrix(3, 4) AS DOUBLE ' 2D array, indices 0..3 x 0..4 +DIM cube(2, 3, 4) AS INTEGER ' 3D array +``` + +Arrays are zero-based. The dimension value is the upper bound (inclusive), so +`DIM arr(10)` allocates 11 elements (0 through 10). + +### REDIM + +```basic +REDIM arr(20) AS INTEGER ' Resize array (contents reset to zero) +REDIM matrix(5, 5) AS DOUBLE ' Resize multidimensional array +``` + +`REDIM` frees the previous allocation and creates a new zero-initialized array. + +## Operators + +### Arithmetic + +| Operator | Description | +|----------|----------------------| +| `+` | Addition | +| `-` | Subtraction / unary negation | +| `*` | Multiplication | +| `/` | Division | +| `\` | Integer division | +| `MOD` | Modulo | +| `^` | Exponentiation | + +### Comparison + +| Operator | Description | +|----------|----------------------| +| `=` | Equal | +| `<>` | Not equal | +| `<` | Less than | +| `>` | Greater than | +| `<=` | Less than or equal | +| `>=` | Greater than or equal| + +### Bitwise / Logical + +| Operator | Description | +|----------|----------------------| +| `AND` | Bitwise AND | +| `OR` | Bitwise OR | +| `NOT` | Bitwise NOT | +| `XOR` | Bitwise XOR | + +These operators work as both bitwise and logical operators. When used with +comparisons (which return 0 or 1), they behave logically: `x > 5 AND y < 10`. +When used with integers, they operate on individual bits: `15 AND 9` gives `9`. + +### String + +| Operator | Description | +|----------|----------------------| +| `+` | Concatenation (when operands are strings) | +| `&` | Concatenation (explicit) | + +## Control Flow + +### IF / THEN / ELSE + +Single-line: +```basic +IF x > 0 THEN PRINT "positive" ELSE PRINT "non-positive" +``` + +Multi-line: +```basic +IF x > 0 THEN + PRINT "positive" +ELSEIF x = 0 THEN + PRINT "zero" +ELSE + PRINT "negative" +END IF +``` + +### FOR / NEXT + +```basic +FOR i = 1 TO 10 + PRINT i +NEXT i + +FOR i = 10 TO 0 STEP -2 + PRINT i +NEXT i +``` + +### WHILE / WEND + +```basic +WHILE x > 0 + x = x - 1 +WEND +``` + +### DO / LOOP + +```basic +DO + x = x + 1 +LOOP UNTIL x >= 10 + +DO WHILE x < 100 + x = x * 2 +LOOP +``` + +### SELECT CASE + +```basic +SELECT CASE grade + CASE 90 TO 100 + PRINT "A" + CASE 80 TO 89 + PRINT "B" + CASE 70 TO 79 + PRINT "C" + CASE IS < 60 + PRINT "F" + CASE ELSE + PRINT "D" +END SELECT +``` + +CASE values support single values (`CASE 1`), comma-separated values +(`CASE 1, 2, 3`), ranges (`CASE 5 TO 10`), comparisons (`CASE IS > 100`), +and a default (`CASE ELSE`). Works with both numeric and string expressions. + +### EXIT + +```basic +EXIT FOR +EXIT WHILE +EXIT DO +EXIT SUB +EXIT FUNCTION +``` + +### CONTINUE + +```basic +CONTINUE FOR +CONTINUE WHILE +CONTINUE DO +``` + +Skips the rest of the current loop iteration and jumps to the next iteration. + +### GOTO + +```basic +GOTO 100 ' Jump to line number +GOTO myLabel ' Jump to named label +``` + +### GOSUB / RETURN + +```basic +GOSUB 200 +GOSUB myRoutine +' ... +200 PRINT "in subroutine" +RETURN + +myRoutine: +PRINT "named routine" +RETURN +``` + +GOSUB uses a compile-time dispatch mechanism — each GOSUB site gets a unique +return-point ID, and RETURN uses a switch statement to jump back. + +### ON GOTO / ON GOSUB + +```basic +ON choice GOTO label1, label2, label3 +ON choice GOSUB routine1, routine2, routine3 +``` + +Branches to the Nth label based on the expression value (1-based). If the +value is out of range, execution continues at the next statement. + +### Labels + +Both classic line numbers and named labels are supported: + +```basic +10 PRINT "line 10" +20 GOTO 10 + +myLabel: +PRINT "named label" +GOTO myLabel +``` + +## Constants + +```basic +CONST PI = 3.14159 +CONST MAX_SIZE = 100 +CONST GREETING$ = "Hello" +``` + +Constants are evaluated at compile time and substituted directly into +expressions. They cannot be reassigned. + +## SWAP + +```basic +SWAP a, b +SWAP s1$, s2$ +``` + +Exchanges the values of two variables of the same type. + +## Procedures + +### SUB + +```basic +SUB greet(name AS STRING) + PRINT "Hello, "; name +END SUB + +CALL greet("World") +greet "World" ' CALL keyword is optional +``` + +### FUNCTION + +```basic +FUNCTION square(x AS DOUBLE) AS DOUBLE + square = x * x +END FUNCTION + +PRINT square(5) +``` + +Functions return values by assigning to the function name or using `RETURN expr`. + +### Parameter Passing + +```basic +SUB increment(BYREF x AS INTEGER) + x = x + 1 +END SUB + +SUB display(BYVAL x AS INTEGER) + PRINT x +END SUB +``` + +- `BYREF` (default) — passes a pointer; changes affect the caller's variable +- `BYVAL` — passes a copy; changes are local to the procedure + +### LOCAL and STATIC + +```basic +SUB counter() + STATIC count AS INTEGER + LOCAL temp AS INTEGER + count = count + 1 + temp = count + PRINT temp +END SUB +``` + +- `LOCAL` — declares a variable scoped to the procedure +- `STATIC` — declares a variable that persists across calls + +## User-Defined Types + +### TYPE / END TYPE + +```basic +TYPE PersonRecord + firstName AS STRING * 20 + lastName AS STRING * 30 + age AS INTEGER + salary AS DOUBLE +END TYPE + +DIM person AS PersonRecord +person.firstName = "John" +person.lastName = "Doe" +person.age = 30 +person.salary = 55000.50 +``` + +String fields in TYPE definitions require a fixed length (`STRING * N`). Dynamic +strings (`AS STRING` without a length) are not permitted in TYPE definitions +because struct copy would produce dangling pointers. + +Supported field types: `BYTE`, `INTEGER`, `LONG`, `FLOAT`, `DOUBLE`, +`STRING * N`, and other user-defined types (nesting). + +### Nested UDTs + +```basic +TYPE Vec2 + x AS DOUBLE + y AS DOUBLE +END TYPE + +TYPE Circle + center AS Vec2 + radius AS DOUBLE +END TYPE + +DIM c AS Circle +c.center.x = 10.0 +c.center.y = 20.0 +c.radius = 5.0 +``` + +Nesting depth is unlimited. Chained dot-access works for both reads and writes. + +### UDT Arrays + +```basic +DIM points(10) AS Vec2 +points(0).x = 1.5 +points(0).y = 2.5 +``` + +### UDT Assignment + +Whole-struct copy via assignment: + +```basic +DIM a AS Vec2 +DIM b AS Vec2 +a.x = 1.0 +a.y = 2.0 +b = a ' Copies all fields +``` + +Sub-struct copy also works: + +```basic +DIM saved AS Vec2 +saved = c.center ' Copy nested struct out +c.center = saved ' Copy nested struct in +``` + +Array element copy: + +```basic +circles(0) = circles(2) +``` + +### SIZEOF + +```basic +DIM sz AS LONG +sz = SIZEOF(PersonRecord) +``` + +Returns the byte size of a user-defined type. Used primarily with random-access +file I/O to specify record length. + +## Built-in Functions + +### String Functions + +| Function | Description | +|-----------------------|------------------------------------------------| +| `LEN(s$)` | Length of string | +| `MID$(s$, start, len)` | Substring (1-based start position) | +| `LEFT$(s$, n)` | First n characters | +| `RIGHT$(s$, n)` | Last n characters | +| `CHR$(n)` | Character from ASCII code | +| `ASC(s$)` | ASCII code of first character | +| `STR$(n)` | Convert number to string | +| `VAL(s$)` | Convert string to number | +| `UCASE$(s$)` | Convert to uppercase | +| `LCASE$(s$)` | Convert to lowercase | +| `INSTR(haystack$, needle$)` | Find substring position (1-based, 0 if not found) | +| `STRING$(n, char$)` | Repeat a character n times | +| `LTRIM$(s$)` | Remove leading spaces | +| `RTRIM$(s$)` | Remove trailing spaces | +| `TRIM$(s$)` | Remove leading and trailing spaces | +| `SPACE$(n)` | String of n spaces | +| `HEX$(n)` | Hexadecimal string representation | +| `OCT$(n)` | Octal string representation | + +### MID$ Assignment + +```basic +DIM s AS STRING +s = "Hello World" +MID$(s, 7, 5) = "BASIC" ' s is now "Hello BASIC" +``` + +Replaces characters in a string starting at a 1-based position. The length +parameter limits how many characters are replaced. + +### Math Functions + +| Function | Description | +|------------|------------------------------------------| +| `ABS(n)` | Absolute value | +| `INT(n)` | Truncate to integer | +| `SQR(n)` | Square root | +| `SIN(n)` | Sine (radians) | +| `COS(n)` | Cosine (radians) | +| `TAN(n)` | Tangent (radians) | +| `ATN(n)` | Arctangent (returns radians) | +| `LOG(n)` | Natural logarithm | +| `EXP(n)` | e raised to the power n | +| `SGN(n)` | Sign: -1, 0, or 1 | +| `RND` | Random number between 0 and 1 | + +Numeric expressions also support `^` for exponentiation (emitted as `pow()`). + +### Print Formatting Functions + +| Function | Description | +|------------|------------------------------------------| +| `TAB(n)` | Output spaces to reach column n | +| `SPC(n)` | Output exactly n spaces | + +These functions are used within PRINT statements: + +```basic +PRINT "Name"; TAB(20); "Value" +PRINT "A"; SPC(5); "B" ' Outputs "A B" +``` + +`RND` can be called with or without parentheses, and accepts an optional argument +(which is ignored) for compatibility with other BASIC dialects. Use `RANDOMIZE` +to seed the random number generator: + +```basic +RANDOMIZE ' Seed from system clock +RANDOMIZE 12345 ' Seed with specific value +x = RND ' Random double 0..1 +x = RND(1) ' Same as RND (argument ignored) +``` + +### Array Functions + +| Function | Description | +|---------------|----------------------------------------------| +| `LBOUND(arr)` | Lower bound of array (always 0) | +| `UBOUND(arr)` | Upper bound of array | + +### I/O Functions + +| Function | Description | +|--------------|--------------------------------------------------| +| `EOF(n)` | Returns true (-1) if at end of file n | +| `LOF(n)` | Returns byte length of file n | +| `FREEFILE()` | Returns the next available file number | + +## Console I/O + +### PRINT + +```basic +PRINT "Hello, World!" +PRINT "x = "; x +PRINT x; " "; y ' Semicolon suppresses newline between items +PRINT x, y ' Comma advances to next tab stop +PRINT "no newline"; ' Trailing semicolon suppresses final newline +? "shortcut" ' ? is a shortcut for PRINT +``` + +The `?` character can be used as a shortcut for `PRINT`, for compatibility with +classic BASIC dialects and interactive use. + +### PRINT USING + +```basic +PRINT USING "###.##"; 123.456 ' Outputs: 123.46 +PRINT USING "$$#,###.##"; 1234.56 ' Outputs: $1,234.56 +PRINT USING "+###.##"; -45.6 ' Outputs: -45.60 +PRINT USING "**###.##"; 9.99 ' Outputs: ****9.99 +PRINT USING "!"; "Hello" ' Outputs: H +PRINT USING "&"; "World" ' Outputs: World +PRINT USING "\ \"; "Testing" ' Outputs: Testin (6 chars) +``` + +Format specifiers for numbers: + +| Format | Description | +|--------|-------------| +| `#` | Digit placeholder | +| `.` | Decimal point position | +| `,` | Thousands separator (in format, not output) | +| `+` | Show sign (+ or -) at start | +| `-` | Trailing minus for negative numbers | +| `$$` | Floating dollar sign | +| `**` | Fill leading spaces with asterisks | + +Format specifiers for strings: + +| Format | Description | +|--------|-------------| +| `!` | First character only | +| `&` | Entire string | +| `\ \` | Fixed width (spaces between backslashes + 2) | + +Multiple values can be formatted with one format string: + +```basic +PRINT USING "### + ### = ###"; 10; 20; 30 +' Outputs: 10 + 20 = 30 +``` + +### INPUT + +```basic +INPUT "Enter name: "; name$ +INPUT x +``` + +### LINE INPUT + +```basic +LINE INPUT "Enter text: "; line$ +``` + +Reads an entire line including commas and spaces. + +## File I/O + +### Sequential Files + +```basic +' Write +OPEN "data.txt" FOR OUTPUT AS #1 +PRINT #1, "Hello" +PRINT #1, 42 +CLOSE #1 + +' Read +OPEN "data.txt" FOR INPUT AS #1 +LINE INPUT #1, text$ +INPUT #1, value +CLOSE #1 + +' Append +OPEN "log.txt" FOR APPEND AS #1 +PRINT #1, "new entry" +CLOSE #1 +``` + +### WRITE # + +```basic +WRITE #1, name$, age, salary +``` + +Outputs CSV-style: strings are quoted, values are comma-separated, terminated +with a newline. + +### Binary Files + +```basic +OPEN "file.dat" FOR BINARY AS #1 +``` + +### Random-Access Files + +```basic +TYPE Record + name AS STRING * 20 + value AS DOUBLE +END TYPE + +DIM rec AS Record +rec.name = "test" +rec.value = 3.14 + +OPEN "data.dat" FOR RANDOM AS #1 LEN = SIZEOF(Record) +PUT #1, 1, rec ' Write record at position 1 (1-based) +GET #1, 1, rec ' Read record at position 1 +CLOSE #1 +``` + +Random-access uses `GET` and `PUT` with 1-based record numbers. The `LEN` +clause specifies record size in bytes. Records can be read and written in any +order. + +### File Modes + +| Mode | C Mode | Description | +|----------|--------|--------------------------------------| +| `INPUT` | `"r"` | Read sequential text | +| `OUTPUT` | `"w"` | Write sequential text (truncates) | +| `APPEND` | `"a"` | Append sequential text | +| `BINARY` | `"rb"` | Binary read | +| `RANDOM` | `"r+b"`| Random access (creates if not found) | + +## DATA / READ / RESTORE + +```basic +DATA 10, 20, 30, "hello" + +DIM x AS INTEGER +DIM s AS STRING +READ x ' x = 10 +READ x ' x = 20 +READ x ' x = 30 +READ s ' s = "hello" + +RESTORE ' Reset read pointer to beginning +READ x ' x = 10 again +``` + +`DATA` statements define a pool of literal values. `READ` consumes them in +order. `RESTORE` resets the read pointer (optionally to a specific line number). + +## Comments + +```basic +' This is a comment +REM This is also a comment +x = 5 ' Inline comment +``` + +## $INCLUDE Metacommand + +```basic +'$INCLUDE: 'helpers.bas' +``` + +The `$INCLUDE` metacommand inserts the contents of another file at the point +of the directive, before lexing and parsing. The directive is placed inside a +comment (the leading `'` makes it invisible to editors that don't understand it). + +### Syntax + +The filename is enclosed in single quotes after `'$INCLUDE:`. The keyword is +case-insensitive. Any amount of whitespace may appear between the colon and the +opening quote. + +### Nested Includes + +Included files may themselves contain `$INCLUDE` directives: + +```basic +' main.bas +'$INCLUDE: 'math_lib.bas' +'$INCLUDE: 'string_lib.bas' +``` + +```basic +' math_lib.bas — can include further files +'$INCLUDE: 'constants.bas' +FUNCTION Square(x AS DOUBLE) AS DOUBLE + Square = x * x +END FUNCTION +``` + +### Path Resolution + +Filenames are resolved relative to the **including file's directory**, not the +working directory. If `src/main.bas` includes `'lib/util.bas'`, the transpiler +looks for `src/lib/util.bas`. + +### Error Reporting + +When `$INCLUDE` is used, error messages show the originating file and line: + +``` +Error (math_lib.bas:12): undeclared variable 'q' +``` + +Without includes, the format is the same but shows the input filename: + +``` +Error (main.bas:5): type mismatch +``` + +### Circular Include Detection + +If file A includes file B which includes file A, the transpiler reports a fatal +error rather than looping infinitely: + +``` +Error: Circular include detected: main.bas +``` + +## Extensible Functions + +The transpiler supports two mechanisms for defining additional functions: + +### Built-in Functions (builtins.def) + +The `builtins.def` file is compiled into basic2c and provides functions that are +always available. To add permanent built-in functions, edit `builtins.def` and +recompile basic2c. + +Default built-ins include: + +**Math functions:** + +| Function | Description | +|----------|-------------| +| `SQR(n)` | Square root | +| `SIN(n)` | Sine (radians) | +| `COS(n)` | Cosine (radians) | +| `TAN(n)` | Tangent (radians) | +| `ATN(n)` | Arctangent (returns radians) | +| `LOG(n)` | Natural logarithm | +| `EXP(n)` | e raised to power n | +| `SGN(n)` | Sign: -1, 0, or 1 | +| `RND()` | Random number 0 to 1 | +| `CEIL(n)` | Round up to integer | +| `FLOOR(n)` | Round down to integer | +| `ROUND(n)` | Round to nearest integer | +| `FIX(n)` | Truncate toward zero | +| `FRAC(n)` | Fractional part | +| `HYPOT(x, y)` | Hypotenuse (sqrt(x² + y²)) | +| `MAX(a, b)` | Maximum of two values | +| `MIN(a, b)` | Minimum of two values | + +**String functions:** + +| Function | Description | +|----------|-------------| +| `CHR$(n)` | Character from ASCII code | +| `STR$(n)` | Convert number to string | +| `UCASE$(s)` | Convert to uppercase | +| `LCASE$(s)` | Convert to lowercase | +| `LTRIM$(s)` | Remove leading spaces | +| `RTRIM$(s)` | Remove trailing spaces | +| `TRIM$(s)` | Remove leading and trailing spaces | +| `SPACE$(n)` | String of n spaces | +| `HEX$(n)` | Hexadecimal representation | +| `OCT$(n)` | Octal representation | +| `TAB(n)` | Spaces to reach column n | +| `SPC(n)` | Output n spaces | +| `ENVIRON$(name)` | Get environment variable | + +**System:** + +| Function | Description | +|----------|-------------| +| `TIMER()` | Seconds since program start | + +### External Functions (functions.def) + +The `functions.def` file is loaded at runtime from two locations (both if present): + +1. The directory containing the `basic2c` binary (global extensions) +2. The directory containing the input `.bas` file (project-specific) + +Functions from the input file's directory are loaded second, allowing project-specific +definitions to supplement or override earlier ones. + +### Definition Format + +Both `builtins.def` and `functions.def` use the same format: + +``` +# Comment lines start with # +# Format: name : type : c_template + +SQUARE : double : ((%) * (%)) +CUBE : double : ((%) * (%) * (%)) +``` + +Each line defines: +- **name** — The BASIC function name (case-insensitive) +- **type** — Return type: `byte`, `integer`, `long`, `float`, `double`, or `string` +- **c_template** — C code with argument placeholders + +### Argument Placeholders + +- `%` or `%1` — First argument +- `%2` — Second argument +- `%3` — Third argument (and so on) + +Arguments are substituted directly, so use parentheses in templates to ensure +correct precedence: `((%) * (%2))` not `% * %2`. + +### Usage + +```basic +PRINT CEIL(3.7) ' Outputs: 4 +PRINT MAX(5, 10) ' Outputs: 10 +t = TIMER() ' Get elapsed time +PRINT ENVIRON$("HOME") ' Print home directory +``` + +Extensible functions require parentheses, even with no arguments: `TIMER()` not `TIMER`. + +## Runtime Modes + +The transpiler supports two runtime modes selected at transpile time: + +### Debug Mode (default) + +The debug runtime includes error checking and diagnostics: + +- NULL guards on string function arguments +- `malloc`/`calloc` failure checks with error messages +- File number bounds checking +- `fopen` failure reporting with filename +- GOSUB stack overflow/underflow detection +- All errors print to stderr and call `exit(1)` + +### Release Mode (`--release` or `-r`) + +The release runtime strips all diagnostic checks for minimal generated code: + +- No NULL guards on string functions +- No malloc failure checks +- No file number bounds checking +- No GOSUB stack overflow/underflow checks +- ~8% fewer lines of generated C code + +Functional guards are preserved in release mode to prevent crashes: + +- `EOF()` returns true (-1) for NULL file handles (enables file existence checks) +- `LOF()` returns 0 for NULL file handles +- `CLOSE` is a no-op for NULL file handles +- `LINE INPUT` is a no-op for NULL file handles +- Temp string pool management (`_bfree_temps`, `_btmp`) +- String variable management (`_bstr_assign`) + +## Limits + +| Resource | Maximum | +|------------------------|---------| +| Token length | 4096 | +| Identifier length | 128 | +| Parameters per procedure | 32 | +| Symbol table entries | 2048 | +| GOSUB return sites | 512 | +| Line number labels | 4096 | +| AST nodes | 65536 | +| Arguments per call | 64 | +| User-defined types | 64 | +| Fields per type | 32 | +| Constants | 256 | +| Include nesting depth | 16 | +| Included files | 64 | +| Total source lines | 65536 | + +## Example + +```basic +TYPE Item + name AS STRING * 20 + price AS DOUBLE +END TYPE + +DIM items(2) AS Item +items(0).name = "Widget" +items(0).price = 9.99 +items(1).name = "Gadget" +items(1).price = 24.95 +items(2).name = "Doohickey" +items(2).price = 4.50 + +DIM i AS INTEGER +DIM total AS DOUBLE +total = 0 +FOR i = 0 TO 2 + PRINT items(i).name; " $"; items(i).price + total = total + items(i).price +NEXT i +PRINT "Total: $"; total +``` + +Transpile and run: + +``` +./basic2c example.bas example.c +cc -Wall -o example example.c -lm +./example +``` diff --git a/basic2c.c b/basic2c.c new file mode 100644 index 0000000..6d06d0c --- /dev/null +++ b/basic2c.c @@ -0,0 +1,5571 @@ +// ============================================================================ +// basic2c.c - A BASIC to C Transpiler +// +// Translates BASIC source code into equivalent C source code. +// +// Supported features: +// - Classic line-numbered BASIC and named labels (GOTO, GOSUB/RETURN) +// - Modern structured BASIC (SUB, FUNCTION, IF/END IF, etc.) +// - Data types: BYTE, INTEGER, LONG, FLOAT, DOUBLE, STRING +// - User-defined types (TYPE/END TYPE) with nesting and SIZEOF +// - Dynamic arrays (DIM, REDIM), multidimensional (up to N-D) +// - Parameter passing: BYVAL (by value) and BYREF (by reference) +// - LOCAL and STATIC variable declarations inside SUB/FUNCTION +// - Constants (CONST) with compile-time substitution +// - Control flow: IF/ELSEIF/ELSE, FOR/NEXT, WHILE/WEND, DO/LOOP, +// SELECT CASE, ON GOTO, ON GOSUB, EXIT, CONTINUE +// - PRINT statement with ? shortcut, PRINT USING for formatted output +// - Operators: arithmetic, comparison, string concatenation (+, &), +// bitwise/logical AND, OR, NOT, XOR +// - DATA/READ/RESTORE for inline data +// - File I/O: OPEN/CLOSE, PRINT #, INPUT #, LINE INPUT #, WRITE # +// - Random-access file I/O: GET, PUT with record numbers +// - String functions: LEN, MID$, LEFT$, RIGHT$, STR$, VAL, CHR$, +// ASC, UCASE$, LCASE$, INSTR, STRING$, LTRIM$, RTRIM$, TRIM$, +// SPACE$, HEX$, OCT$, MID$ assignment +// - Print functions: TAB, SPC for cursor positioning +// - Math functions: ABS, INT, SQR, SIN, COS, TAN, ATN, LOG, EXP, +// SGN, RND (optional argument ignored), RANDOMIZE +// - Array functions: LBOUND, UBOUND +// - I/O functions: EOF, LOF, FREEFILE +// - SWAP for exchanging variable values +// - $INCLUDE metacommand for file inclusion with nested include +// support, circular detection, and file+line error reporting +// - Extensible built-in functions via builtins.def (compile-time) +// - External function definitions via functions.def (runtime) +// - Debug and release runtime modes (--release or -r flag) +// +// Usage: basic2c [--release|-r] input.bas [output.c] +// If output.c is omitted, C code is written to stdout. +// +// Build: cc -o basic2c basic2c.c -lm +// +// Architecture: +// 1. Preprocessor - processes $INCLUDE directives, builds line map +// 2. Lexer - tokenizes BASIC source (case-insensitive keywords) +// 3. Parser - recursive descent, builds an AST +// 4. Codegen - walks AST, emits C source with a small runtime library +// ============================================================================ + +#include +#include +#include +#include +#include + +// ----------------------------------------------------------------------- +// Section 1: Constants and Limits +// ----------------------------------------------------------------------- +#define MAX_TOKEN_LEN 4096 // max length of one token string +#define MAX_IDENT 128 // max identifier length +#define MAX_PARAMS 32 // max parameters per SUB/FUNCTION +#define MAX_SYMBOLS 2048 // symbol table capacity +#define MAX_GOSUB_SITES 512 // max GOSUB return-point IDs +#define MAX_LINE_LABELS 4096 // max classic line-number labels +#define MAX_NODES 65536 // AST node pool size +#define MAX_ARGS 64 // max arguments in a PRINT / CALL list +#define MAX_SOURCE_LINES 65536 // max lines in preprocessed source +#define MAX_INCLUDE_DEPTH 16 // max nested $INCLUDE depth +#define MAX_INCLUDE_FILES 64 // max distinct included filenames +#define MAX_EXTERN_FUNCS 128 // max external function definitions +#define MAX_EXTERN_CODE 256 // max C code template length + +// ----------------------------------------------------------------------- +// Section 2: Enumerations +// ----------------------------------------------------------------------- + +// Token types produced by the lexer +typedef enum { + TOK_EOF = 0, + TOK_NEWLINE, // end of line (statement separator) + TOK_COLON, // : (statement separator on same line) + TOK_INT_LIT, // integer literal + TOK_DBL_LIT, // floating-point literal + TOK_STR_LIT, // "..." string literal + TOK_IDENT, // identifier (variable / sub / function name) + + // ---------- keywords ---------- + TOK_DIM, TOK_REDIM, TOK_AS, + TOK_BYTE, TOK_INTEGER, TOK_LONG, TOK_FLOAT, TOK_DOUBLE, TOK_STRING, + TOK_LET, TOK_PRINT, TOK_INPUT, + TOK_IF, TOK_THEN, TOK_ELSE, TOK_ELSEIF, TOK_END, + TOK_FOR, TOK_TO, TOK_STEP, TOK_NEXT, + TOK_WHILE, TOK_WEND, + TOK_DO, TOK_LOOP, TOK_UNTIL, + TOK_GOTO, TOK_GOSUB, TOK_RETURN, + TOK_SUB, TOK_FUNCTION, TOK_CALL, + TOK_BYVAL, TOK_BYREF, + TOK_LOCAL, TOK_STATIC, + TOK_EXIT, + TOK_AND, TOK_OR, TOK_NOT, TOK_MOD, TOK_XOR, + TOK_SELECT, TOK_CASE, TOK_SWAP, TOK_CONST, TOK_ON, + TOK_REM, + TOK_OPEN, TOK_CLOSE, TOK_OUTPUT, TOK_APPEND, TOK_BINARY, + TOK_LINE, TOK_WRITE, + // DATA, READ, RESTORE, GET, PUT, RANDOM, SIZEOF are contextual + // keywords (checked as TOK_IDENT to avoid colliding with variable names) + + // ---------- operators / punctuation ---------- + TOK_PLUS, TOK_MINUS, TOK_STAR, TOK_SLASH, TOK_BSLASH, + TOK_CARET, + TOK_EQ, TOK_NE, TOK_LT, TOK_GT, TOK_LE, TOK_GE, + TOK_LPAREN, TOK_RPAREN, TOK_COMMA, TOK_SEMI, + TOK_AMP, // & string concatenation + TOK_HASH, // # file number prefix + TOK_DOT, // . member access + TOK_TYPE // TYPE keyword (user-defined types) +} TokenType; + +// AST node kinds +typedef enum { + NODE_PROGRAM, // root: a = first top-level item (linked->next) + NODE_BLOCK, // block of statements: a = first stmt + NODE_INT_LIT, // ival = value + NODE_DBL_LIT, // dval = value + NODE_STR_LIT, // sval = string content + NODE_IDENT, // sval = name + NODE_ARRAY_REF, // sval = name, a = index exprs (linked list) + NODE_BINOP, // ival = op token, a = left, b = right + NODE_UNOP, // ival = op token, a = operand + NODE_ASSIGN, // a = target (IDENT/ARRAY_REF), b = value + NODE_DIM, // sval=name, dataType, a=sizes (list), ival=ndims + NODE_REDIM, // sval=name, dataType, a=sizes (list), ival=ndims + NODE_PRINT, // a = first print-item (linked->next) + NODE_PRINT_ITEM, // a = expr, ival = separator after (';'=1,','=2) + NODE_PRINT_USING, // a = format expr, b = value list (linked->next) + NODE_INPUT, // sval = prompt (or NULL), a = first var ->next + NODE_IF, // a=cond, b=then-block, c=else-part + NODE_FOR, // sval=var, a=start, b=end, c=step, d=body + NODE_WHILE, // a=cond, b=body + NODE_DO_LOOP, // a=cond, b=body, ival bits: 1=UNTIL,2=bottom + NODE_GOTO, // ival=line# or sval=label + NODE_GOSUB, // ival=line#, ival2=return-point-id + NODE_RETURN, // a=expr (FUNCTION return) or NULL + NODE_LABEL, // ival=line number + NODE_SUB, // sval=name, a=param list, b=body + NODE_FUNC, // sval=name, a=params, b=body, dataType=ret + NODE_PARAM, // sval=name, dataType, ival=passMode + NODE_CALL, // sval=name, a=arg list (linked->next) + NODE_FUNC_CALL, // sval=name, a=arg list (linked->next) + NODE_EXIT, // ival=what (TOK_FOR,TOK_WHILE,TOK_DO,etc) + NODE_CONTINUE, // ival=what (TOK_FOR,TOK_WHILE,TOK_DO) + NODE_LOCAL, // sval=name, dataType + NODE_STATIC, // sval=name, dataType + NODE_END, // END statement + NODE_OPEN, // a=filename, b=file# expr, ival=mode + NODE_CLOSE, // b=file# expr + NODE_FILE_PRINT, // b=file# expr, a=print items + NODE_FILE_INPUT, // b=file# expr, a=variable list + NODE_LINE_INPUT, // b=file# expr, a=target variable + NODE_FILE_WRITE, // b=file# expr, a=expression list + NODE_DATA, // a=linked list of literal items, line=source ln + NODE_READ, // a=linked list of NODE_IDENT vars to read into + NODE_RESTORE, // ival=target line number (0=beginning) + NODE_TYPE_DEF, // sval=type name, ival=udtIndex + NODE_DOT_ACCESS, // a=base expr, sval=field name, ival2=udtIndex + NODE_GET, // a=file# expr, b=record# expr, c=var + NODE_PUT, // a=file# expr, b=record# expr, c=var + NODE_SELECT, // a=test expr, b=first NODE_CASE (linked->next) + NODE_CASE, // a=value exprs (linked), b=body block, ival=flags + NODE_SWAP, // a=first var, b=second var + NODE_CONST_DECL, // sval=name, a=value expr + NODE_RANDOMIZE, // a=seed expr (or NULL) + NODE_ON_GOTO, // a=expr, b=label list (NODE_INT_LIT/NODE_IDENT) + NODE_ON_GOSUB, // a=expr, b=label list, ival2=first return-point-id + NODE_MID_ASSIGN // a=target string var, b=start, c=len, d=replacement +} NodeType; + +// BASIC data types – ordered by numeric promotion rank so that +// promoteType() can simply take the maximum of two types. +typedef enum { + TYPE_VOID = 0, // used for SUB (no return value) + TYPE_BYTE, // BYTE -> uint8_t + TYPE_INT, // INTEGER -> int16_t + TYPE_LONG, // LONG -> int32_t + TYPE_FLOAT, // FLOAT -> float + TYPE_DBL, // DOUBLE -> double + TYPE_STR, // STRING -> char* + TYPE_UDT // user-defined TYPE -> struct +} DataType; + +// Parameter passing modes +typedef enum { + PASS_BYVAL = 0, + PASS_BYREF = 1 +} PassMode; + +// ----------------------------------------------------------------------- +// Section 3: Data Structures +// ----------------------------------------------------------------------- + +// A single token from the lexer +typedef struct { + TokenType type; + int line; // source line where token appears + int ival; // integer value (for TOK_INT_LIT) + double dval; // double value (for TOK_DBL_LIT) + char sval[MAX_TOKEN_LEN]; // string payload +} Token; + +// AST node – compact tagged structure. +// Child pointers a,b,c,d have node-type-specific meanings (see enum). +// The 'next' pointer chains siblings (statement lists, param lists). +typedef struct Node { + NodeType type; + DataType dataType; // expression result type / decl type + int ival; // multi-purpose int (operator, flags) + int ival2; // secondary int (e.g. gosub return id) + double dval; // double literal value + char *sval; // identifier name / string literal + struct Node *a, *b, *c, *d;// child pointers + struct Node *next; // next sibling in a list + int line; // source line for error messages +} Node; + +// Symbol table entry – tracks variables, arrays, subs, functions +typedef struct { + char name[MAX_IDENT]; + DataType dataType; + int isArray; // 1 if dynamic array + int ndims; // number of dimensions (0=scalar) + int isFunc; // 1 = FUNCTION, 2 = SUB + int paramCount; + DataType paramTypes[MAX_PARAMS]; + PassMode paramModes[MAX_PARAMS]; + char paramNames[MAX_PARAMS][MAX_IDENT]; + DataType returnType; // for functions + int udtIndex; // index into gUdts[] for TYPE_UDT +} Symbol; + +// User-defined type (UDT) support +#define MAX_UDTS 64 +#define MAX_UDT_FIELDS 32 + +typedef struct { + char name[MAX_IDENT]; + DataType dataType; + int strLen; // >0 for STRING * N (fixed-length) + int udtIndex; // index into gUdts[] if TYPE_UDT +} UdtField; + +typedef struct { + char name[MAX_IDENT]; + UdtField fields[MAX_UDT_FIELDS]; + int fieldCount; +} UdtDef; + +static UdtDef gUdts[MAX_UDTS]; +static int gUdtCount = 0; +static int gLastUdtIndex = -1; // side-channel from parseType() + +// ----------------------------------------------------------------------- +// Section 4: Global State +// ----------------------------------------------------------------------- + +// Runtime mode: 0=debug (with error checks), 1=release (minimal) +static int gRelease = 0; + +// Line map: maps merged-source line numbers to original file + line +typedef struct { + const char *fileName; // interned filename pointer + int origLine; // 1-based line in original file +} LineMapEntry; + +static LineMapEntry gLineMap[MAX_SOURCE_LINES]; +static int gLineMapCount = 0; + +// Interned filename pool +static char *gFileNames[MAX_INCLUDE_FILES]; +static int gFileNameCount = 0; + +static const char *internFileName(const char *name) { + for (int i = 0; i < gFileNameCount; i++) + if (strcmp(gFileNames[i], name) == 0) return gFileNames[i]; + if (gFileNameCount >= MAX_INCLUDE_FILES) { + fprintf(stderr, "Too many include files (max %d)\n", MAX_INCLUDE_FILES); + exit(1); + } + gFileNames[gFileNameCount] = strdup(name); + return gFileNames[gFileNameCount++]; +} + +// Source code +static const char *gSrc = NULL; // source text +static int gSrcPos = 0; // current read position +static int gSrcLen = 0; // total source length +static int gLine = 1; // current source line number + +// Current and peek tokens for the recursive-descent parser +static Token gTok; // current token + +// AST node pool – simple bump allocator (nodes live until exit) +static Node gNodePool[MAX_NODES]; +static int gNodeCount = 0; + +// Symbol table +static Symbol gSyms[MAX_SYMBOLS]; +static int gSymCount = 0; + +// GOSUB bookkeeping: count of GOSUB sites for generating return switch +static int gGosubCount = 0; + +// Collected line-number labels for the RETURN dispatch table +static int gLineLabels[MAX_LINE_LABELS]; +static int gLineLabelCount = 0; + +// Line numbers that are actually targeted by GOTO or GOSUB. +// Only these need C labels emitted to avoid -Wunused-label. +static int gGotoTargets[MAX_LINE_LABELS]; +static int gGotoTargetCount = 0; + +// Named (string) labels targeted by GOTO or GOSUB +static char *gGotoStrTargets[MAX_LINE_LABELS]; +static int gGotoStrTargetCount = 0; + +// Compile-time constant table (for CONST declarations) +#define MAX_CONSTS 256 +typedef struct { + char name[MAX_IDENT]; + DataType dataType; + double numVal; + char strVal[MAX_TOKEN_LEN]; +} ConstDef; +static ConstDef gConsts[MAX_CONSTS]; +static int gConstCount = 0; + +// External function definitions (loaded from functions.def) +typedef struct { + char name[MAX_IDENT]; // BASIC function name (e.g., "CEIL") + DataType returnType; // return type + char cCode[MAX_EXTERN_CODE]; // C code template (% = arg, %1 %2 = numbered) +} ExternFunc; +static ExternFunc gExternFuncs[MAX_EXTERN_FUNCS]; +static int gExternFuncCount = 0; + +// Built-in function definitions (from builtins.def at compile time) +typedef struct { + const char *name; + DataType returnType; + const char *cCode; +} BuiltinDef; + +#define BUILTIN(n, t, c) {n, t, c}, +static const BuiltinDef gBuiltinDefs[] = { +#include "builtins.def" + {NULL, 0, NULL} // sentinel +}; +#undef BUILTIN + +// Code-generator state +static int gIndent = 0; // current indentation depth +static FILE *gOut = NULL; // output file handle + +// Track whether we are inside a SUB/FUNCTION (for scope) +static int gInFunc = 0; +static const char *gFuncName = NULL; // current function name +static DataType gFuncRet = TYPE_VOID; + +// ----------------------------------------------------------------------- +// Section 5: Utility Functions +// ----------------------------------------------------------------------- + +// Report a fatal error with source file/line and exit +static void fatal(int line, const char *fmt, ...) { + va_list ap; + if (line > 0 && line <= gLineMapCount) { + LineMapEntry *e = &gLineMap[line - 1]; + fprintf(stderr, "Error (%s:%d): ", e->fileName, e->origLine); + } else { + fprintf(stderr, "Error (line %d): ", line); + } + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + fprintf(stderr, "\n"); + exit(1); +} + + +// Allocate a new AST node from the pool +static Node *newNode(NodeType type, int line) { + if (gNodeCount >= MAX_NODES) + fatal(line, "AST node pool exhausted (max %d)", MAX_NODES); + Node *n = &gNodePool[gNodeCount++]; + memset(n, 0, sizeof(*n)); + n->type = type; + n->line = line; + return n; +} + + +// Duplicate a string into heap memory +static char *strDup(const char *s) { + if (!s) return NULL; + char *d = malloc(strlen(s) + 1); + if (!d) { fprintf(stderr, "Out of memory\n"); exit(1); } + strcpy(d, s); + return d; +} + + +// Case-insensitive string comparison +static int strIcmp(const char *a, const char *b) { + if (!a || !b) return (a != b); + while (*a && *b) { + if (toupper((unsigned char)*a) != toupper((unsigned char)*b)) + return 1; + a++; b++; + } + return *a != *b; +} + + +// Look up an external function by name; returns pointer or NULL +static ExternFunc *externFuncLookup(const char *name) { + for (int i = 0; i < gExternFuncCount; i++) + if (strIcmp(gExternFuncs[i].name, name) == 0) + return &gExternFuncs[i]; + return NULL; +} + + +// Look up a built-in function definition by name; returns pointer or NULL +static const BuiltinDef *builtinDefLookup(const char *name) { + for (int i = 0; gBuiltinDefs[i].name; i++) + if (strIcmp(gBuiltinDefs[i].name, name) == 0) + return &gBuiltinDefs[i]; + return NULL; +} + + +// Parse a type name from definition file +static DataType parseTypeName(const char *s) { + while (*s == ' ') s++; + if (strIcmp(s, "byte") == 0) return TYPE_BYTE; + if (strIcmp(s, "integer") == 0) return TYPE_INT; + if (strIcmp(s, "long") == 0) return TYPE_LONG; + if (strIcmp(s, "float") == 0) return TYPE_FLOAT; + if (strIcmp(s, "double") == 0) return TYPE_DBL; + if (strIcmp(s, "string") == 0) return TYPE_STR; + return TYPE_DBL; // default +} + + +// Load external function definitions from a file +// Format: name : type : c_code +// Lines starting with # are comments, blank lines ignored +static void loadExternFuncs(const char *filename) { + FILE *f = fopen(filename, "r"); + if (!f) return; // file not found is OK, just no external funcs + + char line[512]; + while (fgets(line, sizeof(line), f)) { + // Skip comments and blank lines + char *p = line; + while (*p == ' ' || *p == '\t') p++; + if (*p == '#' || *p == '\n' || *p == '\0') continue; + + // Parse: name : type : c_code + char *colon1 = strchr(p, ':'); + if (!colon1) continue; + char *colon2 = strchr(colon1 + 1, ':'); + if (!colon2) continue; + + if (gExternFuncCount >= MAX_EXTERN_FUNCS) { + fprintf(stderr, "Warning: too many external functions, ignoring rest\n"); + break; + } + + ExternFunc *ef = &gExternFuncs[gExternFuncCount]; + + // Extract name (trim whitespace) + *colon1 = '\0'; + char *name = p; + while (*name == ' ' || *name == '\t') name++; + char *nameEnd = colon1 - 1; + while (nameEnd > name && (*nameEnd == ' ' || *nameEnd == '\t')) nameEnd--; + nameEnd[1] = '\0'; + strncpy(ef->name, name, MAX_IDENT - 1); + ef->name[MAX_IDENT - 1] = '\0'; + + // Extract type + *colon2 = '\0'; + char *typeStr = colon1 + 1; + while (*typeStr == ' ' || *typeStr == '\t') typeStr++; + char *typeEnd = colon2 - 1; + while (typeEnd > typeStr && (*typeEnd == ' ' || *typeEnd == '\t')) typeEnd--; + typeEnd[1] = '\0'; + ef->returnType = parseTypeName(typeStr); + + // Extract C code template (trim leading whitespace and trailing newline) + char *code = colon2 + 1; + while (*code == ' ' || *code == '\t') code++; + size_t codeLen = strlen(code); + while (codeLen > 0 && (code[codeLen-1] == '\n' || code[codeLen-1] == '\r' || + code[codeLen-1] == ' ' || code[codeLen-1] == '\t')) + codeLen--; + if (codeLen >= MAX_EXTERN_CODE) codeLen = MAX_EXTERN_CODE - 1; + strncpy(ef->cCode, code, codeLen); + ef->cCode[codeLen] = '\0'; + + gExternFuncCount++; + } + + fclose(f); +} + + +// Look up a symbol by name; returns pointer to entry or NULL +static Symbol *symLookup(const char *name) { + for (int i = 0; i < gSymCount; i++) + if (strIcmp(gSyms[i].name, name) == 0) + return &gSyms[i]; + return NULL; +} + + +// Forward declaration +static int isKeyword(const char *name); + +static Symbol *symAdd(const char *name) { + if (isKeyword(name)) + fatal(gLine, "Cannot use keyword '%s' as identifier", name); + Symbol *s = symLookup(name); + if (s) return s; + if (gSymCount >= MAX_SYMBOLS) + fatal(gLine, "Symbol table full"); + s = &gSyms[gSymCount++]; + memset(s, 0, sizeof(*s)); + strncpy(s->name, name, MAX_IDENT - 1); + return s; +} + + +// Look up a user-defined type by name; returns index or -1 +static int udtLookup(const char *name) { + for (int i = 0; i < gUdtCount; i++) + if (strIcmp(gUdts[i].name, name) == 0) + return i; + return -1; +} + + +// Look up a field within a UDT; returns field index or -1 +static int udtFieldLookup(int udtIdx, const char *field) { + if (udtIdx < 0 || udtIdx >= gUdtCount) return -1; + UdtDef *u = &gUdts[udtIdx]; + for (int i = 0; i < u->fieldCount; i++) + if (strIcmp(u->fields[i].name, field) == 0) + return i; + return -1; +} + + +// Check if 'name' is a BYREF parameter of the current function. +// Returns 1 if so, 0 otherwise. Used during code generation to +// emit pointer dereferences for BYREF params. +static int isByrefParam(const char *name) { + if (!gInFunc || !gFuncName) return 0; + Symbol *fsym = symLookup(gFuncName); + if (!fsym) return 0; + for (int i = 0; i < fsym->paramCount; i++) { + if (strIcmp(fsym->paramNames[i], name) == 0 && + fsym->paramModes[i] == PASS_BYREF) + return 1; + } + return 0; +} + + +// Emit indented text to the output file +static void emit(const char *fmt, ...) { + va_list ap; + for (int i = 0; i < gIndent * 4; i++) fputc(' ', gOut); + va_start(ap, fmt); + vfprintf(gOut, fmt, ap); + va_end(ap); +} + + +// Emit text without leading indentation +static void emitRaw(const char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(gOut, fmt, ap); + va_end(ap); +} + + +// Record a line-number label for later GOSUB dispatch +static void recordLineLabel(int lnum) { + for (int i = 0; i < gLineLabelCount; i++) + if (gLineLabels[i] == lnum) return; // already recorded + if (gLineLabelCount >= MAX_LINE_LABELS) + fatal(gLine, "Too many line labels"); + gLineLabels[gLineLabelCount++] = lnum; +} + + +// Record a line number as a GOTO/GOSUB target so its label is emitted +static void recordGotoTarget(int lnum) { + for (int i = 0; i < gGotoTargetCount; i++) + if (gGotoTargets[i] == lnum) return; + if (gGotoTargetCount >= MAX_LINE_LABELS) + fatal(gLine, "Too many goto targets"); + gGotoTargets[gGotoTargetCount++] = lnum; +} + + +// Check whether a line number is a GOTO/GOSUB target +static int isGotoTarget(int lnum) { + for (int i = 0; i < gGotoTargetCount; i++) + if (gGotoTargets[i] == lnum) return 1; + return 0; +} + + +// Record a named label as a GOTO/GOSUB target so its C label is emitted +static void recordGotoStrTarget(const char *name) { + for (int i = 0; i < gGotoStrTargetCount; i++) + if (strIcmp(gGotoStrTargets[i], name) == 0) return; + if (gGotoStrTargetCount >= MAX_LINE_LABELS) + fatal(gLine, "Too many named goto targets"); + gGotoStrTargets[gGotoStrTargetCount++] = strDup(name); +} + + +// Check whether a named label is a GOTO/GOSUB target +static int isGotoStrTarget(const char *name) { + for (int i = 0; i < gGotoStrTargetCount; i++) + if (strIcmp(gGotoStrTargets[i], name) == 0) return 1; + return 0; +} + + +// Infer the data type of a variable from its name suffix. +// Names ending in '$' -> STRING, '%' -> INTEGER, '!' -> FLOAT, +// '#' -> DOUBLE; otherwise check the symbol table, default INTEGER. +static DataType inferVarType(const char *name) { + int len = (int)strlen(name); + if (len > 0 && name[len-1] == '$') return TYPE_STR; + if (len > 0 && name[len-1] == '%') return TYPE_INT; + if (len > 0 && name[len-1] == '!') return TYPE_FLOAT; + if (len > 0 && name[len-1] == '#') return TYPE_DBL; + Symbol *s = symLookup(name); + if (s) return s->dataType; + return TYPE_INT; // default +} + + +// Return the wider of two numeric types for expression promotion. +// The DataType enum is ordered so that a higher value = wider type +// (BYTE < INT < LONG < FLOAT < DBL). STRING is handled separately. +static DataType promoteType(DataType a, DataType b) { + if (a == TYPE_STR || b == TYPE_STR) return TYPE_STR; + return (a > b) ? a : b; +} + + +// Strip type-suffix characters ($, %, #, !) from an identifier for C output. +// Uses a rotating set of 8 static buffers so multiple calls within a +// single expression don't clobber each other. +static const char *cleanName(const char *name) { + static char bufs[8][MAX_IDENT]; + static int idx = 0; + if (!name) return "_null_"; + char *buf = bufs[idx++ & 7]; + strncpy(buf, name, MAX_IDENT - 1); + buf[MAX_IDENT - 1] = '\0'; + int len = (int)strlen(buf); + if (len > 0 && (buf[len-1]=='$' || buf[len-1]=='%' || + buf[len-1]=='#' || buf[len-1]=='!')) + buf[len-1] = '\0'; + return buf; +} + + +// ----------------------------------------------------------------------- +// Section 6: Lexer +// +// The lexer reads characters from gSrc and produces tokens one at a time. +// BASIC keywords are case-insensitive; identifiers preserve case. +// ----------------------------------------------------------------------- + +// Keyword table: maps keyword strings to token types +static struct { const char *kw; TokenType tok; } gKeywords[] = { + {"DIM", TOK_DIM}, {"REDIM", TOK_REDIM}, + {"AS", TOK_AS}, {"BYTE", TOK_BYTE}, + {"INTEGER", TOK_INTEGER}, + {"LONG", TOK_LONG}, {"FLOAT", TOK_FLOAT}, + {"DOUBLE", TOK_DOUBLE}, {"STRING", TOK_STRING}, + {"LET", TOK_LET}, {"PRINT", TOK_PRINT}, + {"INPUT", TOK_INPUT}, {"IF", TOK_IF}, + {"THEN", TOK_THEN}, {"ELSE", TOK_ELSE}, + {"ELSEIF", TOK_ELSEIF}, {"END", TOK_END}, + {"FOR", TOK_FOR}, {"TO", TOK_TO}, + {"STEP", TOK_STEP}, {"NEXT", TOK_NEXT}, + {"WHILE", TOK_WHILE}, {"WEND", TOK_WEND}, + {"DO", TOK_DO}, {"LOOP", TOK_LOOP}, + {"UNTIL", TOK_UNTIL}, {"GOTO", TOK_GOTO}, + {"GOSUB", TOK_GOSUB}, {"RETURN", TOK_RETURN}, + {"SUB", TOK_SUB}, {"FUNCTION", TOK_FUNCTION}, + {"CALL", TOK_CALL}, {"BYVAL", TOK_BYVAL}, + {"BYREF", TOK_BYREF}, {"LOCAL", TOK_LOCAL}, + {"STATIC", TOK_STATIC}, {"EXIT", TOK_EXIT}, + {"AND", TOK_AND}, {"OR", TOK_OR}, + {"NOT", TOK_NOT}, {"MOD", TOK_MOD}, + {"REM", TOK_REM}, + {"OPEN", TOK_OPEN}, {"CLOSE", TOK_CLOSE}, + {"OUTPUT", TOK_OUTPUT}, {"APPEND", TOK_APPEND}, + {"BINARY", TOK_BINARY}, {"LINE", TOK_LINE}, + {"WRITE", TOK_WRITE}, + {"TYPE", TOK_TYPE}, + {"XOR", TOK_XOR}, + {"SELECT", TOK_SELECT}, {"CASE", TOK_CASE}, + {"CONST", TOK_CONST}, + {"ON", TOK_ON}, + {NULL, TOK_EOF} +}; + +// Check if a name is a keyword +static int isKeyword(const char *name) { + for (int k = 0; gKeywords[k].kw; k++) + if (strIcmp(name, gKeywords[k].kw) == 0) + return 1; + return 0; +} + +// Peek at the current character without advancing +static int peekChar(void) { + if (gSrcPos >= gSrcLen) return EOF; + return (unsigned char)gSrc[gSrcPos]; +} + + +// Read and advance past the current character +static int readChar(void) { + if (gSrcPos >= gSrcLen) return EOF; + int ch = (unsigned char)gSrc[gSrcPos++]; + if (ch == '\n') gLine++; + return ch; +} + + +// Skip whitespace (spaces and tabs) but NOT newlines +static void skipSpaces(void) { + while (gSrcPos < gSrcLen) { + int ch = gSrc[gSrcPos]; + if (ch == ' ' || ch == '\t') + gSrcPos++; + else + break; + } +} + + +// Read the next token into gTok +static void nextToken(void) { + skipSpaces(); + gTok.line = gLine; + gTok.sval[0] = '\0'; + gTok.ival = 0; + gTok.dval = 0.0; + + int ch = peekChar(); + + // End of file + if (ch == EOF) { gTok.type = TOK_EOF; return; } + + // Newline – statement separator + if (ch == '\n') { + readChar(); + gTok.type = TOK_NEWLINE; + return; + } + + // Carriage return (handle \r\n) + if (ch == '\r') { + readChar(); + if (peekChar() == '\n') readChar(); + gTok.type = TOK_NEWLINE; + return; + } + + // Single-line comment: ' or REM + if (ch == '\'') { + // Skip until end of line + while (peekChar() != '\n' && peekChar() != EOF) + readChar(); + gTok.type = TOK_NEWLINE; // treat comment as newline + if (peekChar() == '\n') readChar(); + return; + } + + // String literal + if (ch == '"') { + readChar(); // consume opening quote + int i = 0; + while (peekChar() != '"' && peekChar() != '\n' && peekChar() != EOF) { + if (i < MAX_TOKEN_LEN - 1) + gTok.sval[i++] = (char)readChar(); + else + readChar(); + } + gTok.sval[i] = '\0'; + if (peekChar() == '"') readChar(); // consume closing quote + gTok.type = TOK_STR_LIT; + return; + } + + // Number literal (integer or double) + if (isdigit(ch) || (ch == '.' && isdigit(gSrc[gSrcPos+1]))) { + int i = 0; + int hasDot = 0; + while (isdigit(peekChar()) || peekChar() == '.') { + if (peekChar() == '.') { + if (hasDot) break; // second dot ends the number + hasDot = 1; + } + if (i < MAX_TOKEN_LEN - 1) + gTok.sval[i++] = (char)readChar(); + else + readChar(); + } + gTok.sval[i] = '\0'; + if (hasDot) { + gTok.type = TOK_DBL_LIT; + gTok.dval = atof(gTok.sval); + } else { + gTok.type = TOK_INT_LIT; + gTok.ival = atoi(gTok.sval); + } + return; + } + + // Identifier or keyword + if (isalpha(ch) || ch == '_') { + int i = 0; + while (isalnum(peekChar()) || peekChar() == '_') { + if (i < MAX_TOKEN_LEN - 1) + gTok.sval[i++] = (char)readChar(); + else + readChar(); + } + // Allow trailing $, %, #, ! for type suffixes: + // $ = STRING, % = INTEGER (int16_t), + // # = DOUBLE, ! = FLOAT + if (peekChar()=='$' || peekChar()=='%' || + peekChar()=='#' || peekChar()=='!') { + if (i < MAX_TOKEN_LEN - 1) + gTok.sval[i++] = (char)readChar(); + } + gTok.sval[i] = '\0'; + + // Check for REM (rest of line is comment) + if (strIcmp(gTok.sval, "REM") == 0) { + while (peekChar() != '\n' && peekChar() != EOF) + readChar(); + gTok.type = TOK_NEWLINE; + if (peekChar() == '\n') readChar(); + return; + } + + // Check keyword table + for (int k = 0; gKeywords[k].kw; k++) { + if (strIcmp(gTok.sval, gKeywords[k].kw) == 0) { + gTok.type = gKeywords[k].tok; + return; + } + } + + // Not a keyword – it is an identifier + gTok.type = TOK_IDENT; + return; + } + + // Operators and punctuation + readChar(); + switch (ch) { + case '+': gTok.type = TOK_PLUS; return; + case '-': gTok.type = TOK_MINUS; return; + case '*': gTok.type = TOK_STAR; return; + case '/': gTok.type = TOK_SLASH; return; + case '\\':gTok.type = TOK_BSLASH; return; + case '^': gTok.type = TOK_CARET; return; + case '&': gTok.type = TOK_AMP; return; + case '#': gTok.type = TOK_HASH; return; + case '.': gTok.type = TOK_DOT; return; + case '(': gTok.type = TOK_LPAREN; return; + case ')': gTok.type = TOK_RPAREN; return; + case ',': gTok.type = TOK_COMMA; return; + case ';': gTok.type = TOK_SEMI; return; + case ':': gTok.type = TOK_COLON; return; + case '?': gTok.type = TOK_PRINT; return; + case '=': gTok.type = TOK_EQ; return; + case '<': + if (peekChar() == '=') { readChar(); gTok.type = TOK_LE; } + else if (peekChar() == '>') { readChar(); gTok.type = TOK_NE; } + else gTok.type = TOK_LT; + return; + case '>': + if (peekChar() == '=') { readChar(); gTok.type = TOK_GE; } + else gTok.type = TOK_GT; + return; + default: + fatal(gLine, "Unexpected character '%c' (0x%02X)", ch, ch); + } +} + + +// Check if the current token matches a given type +static int tokIs(TokenType t) { return gTok.type == t; } + +// Consume current token if it matches; returns 1 on match, 0 otherwise +static int tokAccept(TokenType t) { + if (gTok.type == t) { nextToken(); return 1; } + return 0; +} + + +// Require the current token to be of a given type; fatal error otherwise +static void tokExpect(TokenType t) { + if (gTok.type != t) + fatal(gTok.line, "Expected token type %d, got %d ('%s')", + t, gTok.type, gTok.sval); + nextToken(); +} + + +// Skip newlines and colons (statement separators) +static void skipEol(void) { + while (gTok.type == TOK_NEWLINE || gTok.type == TOK_COLON) + nextToken(); +} + + +// ----------------------------------------------------------------------- +// Section 7: Parser – Recursive Descent +// +// Grammar (simplified): +// program = { sub_decl | func_decl | statement } +// statement = dim | redim | type_def | assignment | print | input +// | if | for | while | do_loop | goto | gosub | return +// | call | exit | local | static | end | label | open +// | close | data | read | restore | get | put | line_input +// expression = or_expr +// or_expr = and_expr { OR and_expr } +// and_expr = not_expr { AND not_expr } +// not_expr = NOT not_expr | cmp_expr +// cmp_expr = add_expr { (= | <> | < | > | <= | >=) add_expr } +// add_expr = mul_expr { (+ | - | &) mul_expr } +// mul_expr = idiv_expr { (* | /) idiv_expr } +// idiv_expr = mod_expr { '\' mod_expr } +// mod_expr = power_expr { MOD power_expr } +// power_expr = unary_expr { ^ unary_expr } +// unary_expr = [+ | -] primary +// primary = INT_LIT | DBL_LIT | STR_LIT | ident['('args')'][.field...] +// | '(' expression ')' | SIZEOF'('type_name')' +// ----------------------------------------------------------------------- + +// Forward declarations for mutually recursive parser functions +static Node *parseExpr(void); +static Node *parseStatement(void); +static int dataIndexForLine(int lnum); +static int dataIndexForLabel(const char *name); +static Node *parseBlock(TokenType end1, TokenType end2, TokenType end3); + +static void skipNewlines(void) { + while (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON)) nextToken(); +} + +// ---- Expression parser ---- + +// Parse a primary expression (literals, variables, function calls, parens) +static Node *parsePrimary(void) { + int ln = gTok.line; + + // Integer literal + if (tokIs(TOK_INT_LIT)) { + Node *n = newNode(NODE_INT_LIT, ln); + n->ival = gTok.ival; + n->dataType = TYPE_INT; + nextToken(); + return n; + } + + // Double literal + if (tokIs(TOK_DBL_LIT)) { + Node *n = newNode(NODE_DBL_LIT, ln); + n->dval = gTok.dval; + n->dataType = TYPE_DBL; + nextToken(); + return n; + } + + // String literal + if (tokIs(TOK_STR_LIT)) { + Node *n = newNode(NODE_STR_LIT, ln); + n->sval = strDup(gTok.sval); + n->dataType = TYPE_STR; + nextToken(); + return n; + } + + // Parenthesized expression + if (tokIs(TOK_LPAREN)) { + nextToken(); + Node *n = parseExpr(); + tokExpect(TOK_RPAREN); + return n; + } + + // Identifier: variable, array element, or function call + if (tokIs(TOK_IDENT)) { + char name[MAX_TOKEN_LEN]; + strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); + name[MAX_TOKEN_LEN - 1] = '\0'; + nextToken(); + + // Check for '(' – array access or function call + if (tokIs(TOK_LPAREN)) { + nextToken(); + // Collect argument list + Node *args = NULL, *tail = NULL; + if (!tokIs(TOK_RPAREN)) { + Node *arg = parseExpr(); + args = tail = arg; + while (tokAccept(TOK_COMMA)) { + arg = parseExpr(); + tail->next = arg; + tail = arg; + } + } + tokExpect(TOK_RPAREN); + + // Determine if this is a known array or function + Symbol *s = symLookup(name); + + // SIZEOF(TypeName) — compile-time sizeof + if (strIcmp(name, "SIZEOF") == 0) { + // args should be one identifier — the UDT name + Node *n = newNode(NODE_FUNC_CALL, ln); + n->sval = strDup("SIZEOF"); + n->a = args; + n->dataType = TYPE_LONG; + return n; + } + + if (s && s->isArray) { + Node *n = newNode(NODE_ARRAY_REF, ln); + n->sval = strDup(name); + n->a = args; // index expression + n->dataType = s->dataType; + n->ival2 = s->udtIndex; + // Check for dot-access on array element: arr(i).field[.field...] + if (s->dataType == TYPE_UDT && tokIs(TOK_DOT)) { + Node *cur = n; + int curUdt = s->udtIndex; + while (curUdt >= 0 && tokIs(TOK_DOT)) { + nextToken(); + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected field name after '.'"); + int fi = udtFieldLookup(curUdt, gTok.sval); + if (fi < 0) + fatal(ln, "Unknown field '%s' in type '%s'", + gTok.sval, gUdts[curUdt].name); + Node *dot = newNode(NODE_DOT_ACCESS, ln); + dot->a = cur; + dot->sval = strDup(gTok.sval); + dot->ival2 = curUdt; + UdtField *uf = &gUdts[curUdt].fields[fi]; + dot->dataType = uf->dataType; + if (uf->dataType == TYPE_STR && uf->strLen > 0) + dot->ival = uf->strLen; + cur = dot; + curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1; + nextToken(); + } + return cur; + } + return n; + } else { + // Treat as function call + Node *n = newNode(NODE_FUNC_CALL, ln); + n->sval = strDup(name); + n->a = args; + // Infer return type: check symbol table, built-ins, name + if (s && s->isFunc == 1) { + n->dataType = s->returnType; + } else if (strIcmp(name,"LOF")==0) { + n->dataType = TYPE_LONG; + } else if (strIcmp(name,"VAL")==0 || strIcmp(name,"ABS")==0) { + n->dataType = TYPE_DBL; + } else if (strIcmp(name,"LEN")==0 || strIcmp(name,"ASC")==0 || + strIcmp(name,"INT")==0 || strIcmp(name,"INSTR")==0 || + strIcmp(name,"EOF")==0 || strIcmp(name,"FREEFILE")==0 || + strIcmp(name,"LBOUND")==0 || strIcmp(name,"UBOUND")==0) { + n->dataType = TYPE_INT; + } else { + // Check external functions and compile-time builtins + ExternFunc *ef = externFuncLookup(name); + if (ef) { + n->dataType = ef->returnType; + } else { + const BuiltinDef *bd = builtinDefLookup(name); + if (bd) { + n->dataType = bd->returnType; + } else { + n->dataType = inferVarType(name); + } + } + } + return n; + } + } + + // RND without parentheses — treat as RND() + if (strIcmp(name, "RND") == 0) { + Node *n = newNode(NODE_FUNC_CALL, ln); + n->sval = strDup("RND"); + n->a = NULL; + n->dataType = TYPE_DBL; + return n; + } + + // Check compile-time constants + for (int ci = 0; ci < gConstCount; ci++) { + if (strIcmp(name, gConsts[ci].name) == 0) { + if (gConsts[ci].dataType == TYPE_STR) { + Node *n = newNode(NODE_STR_LIT, ln); + n->sval = strDup(gConsts[ci].strVal); + n->dataType = TYPE_STR; + return n; + } else { + double v = gConsts[ci].numVal; + if (v == (int)v && gConsts[ci].dataType != TYPE_DBL && + gConsts[ci].dataType != TYPE_FLOAT) { + Node *n = newNode(NODE_INT_LIT, ln); + n->ival = (int)v; + n->dataType = gConsts[ci].dataType; + return n; + } else { + Node *n = newNode(NODE_DBL_LIT, ln); + n->dval = v; + n->dataType = gConsts[ci].dataType; + return n; + } + } + } + } + + // Plain variable reference — check for dot-access (supports chaining) + { + Symbol *s = symLookup(name); + if (s && s->dataType == TYPE_UDT && tokIs(TOK_DOT)) { + Node *base = newNode(NODE_IDENT, ln); + base->sval = strDup(name); + base->dataType = TYPE_UDT; + Node *cur = base; + int curUdt = s->udtIndex; + while (curUdt >= 0 && tokIs(TOK_DOT)) { + nextToken(); + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected field name after '.'"); + int fi = udtFieldLookup(curUdt, gTok.sval); + if (fi < 0) + fatal(ln, "Unknown field '%s' in type '%s'", + gTok.sval, gUdts[curUdt].name); + Node *dot = newNode(NODE_DOT_ACCESS, ln); + dot->a = cur; + dot->sval = strDup(gTok.sval); + dot->ival2 = curUdt; + UdtField *uf = &gUdts[curUdt].fields[fi]; + dot->dataType = uf->dataType; + if (uf->dataType == TYPE_STR && uf->strLen > 0) + dot->ival = uf->strLen; + cur = dot; + curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1; + nextToken(); + } + return cur; + } + } + + Node *n = newNode(NODE_IDENT, ln); + n->sval = strDup(name); + n->dataType = inferVarType(name); + return n; + } + + fatal(ln, "Expected expression, got token type %d ('%s')", + gTok.type, gTok.sval); + return NULL; // unreachable +} + + +// Unary: [+|-] primary +static Node *parseUnary(void) { + int ln = gTok.line; + if (tokIs(TOK_MINUS) || tokIs(TOK_PLUS)) { + int op = gTok.type; + nextToken(); + Node *operand = parseUnary(); + if (op == TOK_PLUS) return operand; // unary + is a no-op + Node *n = newNode(NODE_UNOP, ln); + n->ival = op; + n->a = operand; + n->dataType = operand->dataType; + return n; + } + return parsePrimary(); +} + + +// Power: unary { ^ unary } (right-associative) +static Node *parsePower(void) { + Node *left = parseUnary(); + if (tokIs(TOK_CARET)) { + int ln = gTok.line; + nextToken(); + Node *right = parsePower(); // right-associative + Node *n = newNode(NODE_BINOP, ln); + n->ival = TOK_CARET; + n->a = left; + n->b = right; + n->dataType = TYPE_DBL; + return n; + } + return left; +} + + +// MOD: power { MOD power } +static Node *parseMod(void) { + Node *left = parsePower(); + while (tokIs(TOK_MOD)) { + int ln = gTok.line; + nextToken(); + Node *right = parsePower(); + Node *n = newNode(NODE_BINOP, ln); + n->ival = TOK_MOD; + n->a = left; + n->b = right; + n->dataType = TYPE_INT; + left = n; + } + return left; +} + + +// Integer division: mod { '\' mod } +static Node *parseIdiv(void) { + Node *left = parseMod(); + while (tokIs(TOK_BSLASH)) { + int ln = gTok.line; + nextToken(); + Node *right = parseMod(); + Node *n = newNode(NODE_BINOP, ln); + n->ival = TOK_BSLASH; + n->a = left; + n->b = right; + n->dataType = TYPE_INT; + left = n; + } + return left; +} + + +// Multiply / divide: idiv { (*|/) idiv } +static Node *parseMuldiv(void) { + Node *left = parseIdiv(); + while (tokIs(TOK_STAR) || tokIs(TOK_SLASH)) { + int ln = gTok.line; + int op = gTok.type; + nextToken(); + Node *right = parseIdiv(); + Node *n = newNode(NODE_BINOP, ln); + n->ival = op; + n->a = left; + n->b = right; + // Division always promotes to double; multiplication promotes + // to the wider of the two operand types. + n->dataType = (op == TOK_SLASH) ? TYPE_DBL : + promoteType(left->dataType, right->dataType); + left = n; + } + return left; +} + + +// Add / subtract / string concat: muldiv { (+|-|&) muldiv } +static Node *parseAddsub(void) { + Node *left = parseMuldiv(); + while (tokIs(TOK_PLUS) || tokIs(TOK_MINUS) || tokIs(TOK_AMP)) { + int ln = gTok.line; + int op = gTok.type; + nextToken(); + Node *right = parseMuldiv(); + Node *n = newNode(NODE_BINOP, ln); + n->ival = op; + n->a = left; + n->b = right; + // String concatenation + if (op == TOK_AMP || (op == TOK_PLUS && + (left->dataType == TYPE_STR || right->dataType == TYPE_STR))) + n->dataType = TYPE_STR; + else + n->dataType = promoteType(left->dataType, right->dataType); + left = n; + } + return left; +} + + +// Comparison: addsub { (=|<>|<|>|<=|>=) addsub } +static Node *parseComparison(void) { + Node *left = parseAddsub(); + while (tokIs(TOK_EQ) || tokIs(TOK_NE) || tokIs(TOK_LT) || + tokIs(TOK_GT) || tokIs(TOK_LE) || tokIs(TOK_GE)) { + int ln = gTok.line; + int op = gTok.type; + nextToken(); + Node *right = parseAddsub(); + Node *n = newNode(NODE_BINOP, ln); + n->ival = op; + n->a = left; + n->b = right; + n->dataType = TYPE_INT; // comparisons yield integer (boolean) + left = n; + } + return left; +} + + +// NOT: NOT not_expr | comparison +static Node *parseNot(void) { + if (tokIs(TOK_NOT)) { + int ln = gTok.line; + nextToken(); + Node *operand = parseNot(); + Node *n = newNode(NODE_UNOP, ln); + n->ival = TOK_NOT; + n->a = operand; + n->dataType = TYPE_INT; + return n; + } + return parseComparison(); +} + + +// AND: not { AND not } +static Node *parseAnd(void) { + Node *left = parseNot(); + while (tokIs(TOK_AND)) { + int ln = gTok.line; + nextToken(); + Node *right = parseNot(); + Node *n = newNode(NODE_BINOP, ln); + n->ival = TOK_AND; + n->a = left; + n->b = right; + n->dataType = TYPE_INT; + left = n; + } + return left; +} + + +// OR: and { OR and } — top-level expression rule +static Node *parseOr(void) { + Node *left = parseAnd(); + while (tokIs(TOK_OR) || tokIs(TOK_XOR)) { + int ln = gTok.line; + int op = gTok.type; + nextToken(); + Node *right = parseAnd(); + Node *n = newNode(NODE_BINOP, ln); + n->ival = op; + n->a = left; + n->b = right; + n->dataType = TYPE_INT; + left = n; + } + return left; +} + + +// Top-level expression entry point +static Node *parseExpr(void) { + return parseOr(); +} + + +// ---- Statement parsers ---- + +// Parse a data-type keyword (BYTE, INTEGER, LONG, FLOAT, DOUBLE, STRING, or UDT name) +static DataType parseType(void) { + if (tokAccept(TOK_BYTE)) return TYPE_BYTE; + if (tokAccept(TOK_INTEGER)) return TYPE_INT; + if (tokAccept(TOK_LONG)) return TYPE_LONG; + if (tokAccept(TOK_FLOAT)) return TYPE_FLOAT; + if (tokAccept(TOK_DOUBLE)) return TYPE_DBL; + if (tokAccept(TOK_STRING)) return TYPE_STR; + // Check for user-defined type name + if (tokIs(TOK_IDENT)) { + int idx = udtLookup(gTok.sval); + if (idx >= 0) { + gLastUdtIndex = idx; + nextToken(); + return TYPE_UDT; + } + } + fatal(gTok.line, + "Expected type keyword (BYTE, INTEGER, LONG, FLOAT, DOUBLE, STRING)"); + return TYPE_INT; +} + + +// Parse TYPE ... END TYPE definition +static Node *parseTypeDef(void) { + int ln = gTok.line; + tokExpect(TOK_TYPE); + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected type name after TYPE"); + + char tname[MAX_IDENT]; + strncpy(tname, gTok.sval, MAX_IDENT - 1); + tname[MAX_IDENT - 1] = '\0'; + nextToken(); + + if (gUdtCount >= MAX_UDTS) + fatal(ln, "Too many TYPE definitions (max %d)", MAX_UDTS); + + int udtIdx = gUdtCount++; + UdtDef *u = &gUdts[udtIdx]; + memset(u, 0, sizeof(*u)); + strncpy(u->name, tname, MAX_IDENT - 1); + + // Skip newlines before fields + while (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON)) nextToken(); + + // Parse fields until END TYPE + while (!tokIs(TOK_EOF)) { + // Check for END TYPE + if (tokIs(TOK_END)) { + int sp = gSrcPos; + int sl = gLine; + Token st = gTok; + nextToken(); + if (tokIs(TOK_TYPE)) { + nextToken(); // consume TYPE + break; + } + // Not END TYPE — restore + gSrcPos = sp; + gLine = sl; + gTok = st; + } + + // Parse field: name AS type + if (!tokIs(TOK_IDENT)) + fatal(gTok.line, "Expected field name in TYPE definition"); + + if (u->fieldCount >= MAX_UDT_FIELDS) + fatal(gTok.line, "Too many fields in TYPE (max %d)", MAX_UDT_FIELDS); + + UdtField *f = &u->fields[u->fieldCount]; + strncpy(f->name, gTok.sval, MAX_IDENT - 1); + f->name[MAX_IDENT - 1] = '\0'; + nextToken(); + + tokExpect(TOK_AS); + + // Check for STRING * N (fixed-length string) + if (tokIs(TOK_STRING)) { + nextToken(); + if (tokAccept(TOK_STAR)) { + if (!tokIs(TOK_INT_LIT)) + fatal(gTok.line, "Expected integer after STRING *"); + f->strLen = gTok.ival; + nextToken(); + } else { + fatal(gTok.line, + "STRING fields in TYPE require fixed length (STRING * N)"); + } + f->dataType = TYPE_STR; + f->udtIndex = -1; + } else { + gLastUdtIndex = -1; + f->dataType = parseType(); + f->strLen = 0; + f->udtIndex = gLastUdtIndex; + } + u->fieldCount++; + + // Skip newlines between fields + while (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON)) nextToken(); + } + + Node *n = newNode(NODE_TYPE_DEF, ln); + n->sval = strDup(tname); + n->ival = udtIdx; + return n; +} + + +// Parse DIM statement: DIM name[(size[, size, ...])] AS type +static Node *parseDim(void) { + int ln = gTok.line; + tokExpect(TOK_DIM); + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected identifier after DIM"); + char name[MAX_TOKEN_LEN]; + strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); + nextToken(); + + Node *size = NULL; + int ndims = 0; + if (tokAccept(TOK_LPAREN)) { + size = parseExpr(); + ndims = 1; + Node *tail = size; + while (tokAccept(TOK_COMMA)) { + Node *dim = parseExpr(); + tail->next = dim; + tail = dim; + ndims++; + } + tokExpect(TOK_RPAREN); + } + + tokExpect(TOK_AS); + gLastUdtIndex = -1; + DataType dt = parseType(); + + Node *n = newNode(NODE_DIM, ln); + n->sval = strDup(name); + n->dataType = dt; + n->a = size; + n->ival = ndims; + n->ival2 = gLastUdtIndex; + + // Register in symbol table + Symbol *s = symAdd(name); + s->dataType = dt; + s->isArray = (ndims > 0); + s->ndims = ndims; + s->udtIndex = gLastUdtIndex; + + return n; +} + + +// Parse REDIM statement: REDIM name(size, ...) AS type +static Node *parseRedim(void) { + int ln = gTok.line; + tokExpect(TOK_REDIM); + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected identifier after REDIM"); + char name[MAX_TOKEN_LEN]; + strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); + nextToken(); + tokExpect(TOK_LPAREN); + Node *size = parseExpr(); + int ndims = 1; + Node *tail = size; + while (tokAccept(TOK_COMMA)) { + Node *dim = parseExpr(); + tail->next = dim; + tail = dim; + ndims++; + } + tokExpect(TOK_RPAREN); + tokExpect(TOK_AS); + DataType dt = parseType(); + + Node *n = newNode(NODE_REDIM, ln); + n->sval = strDup(name); + n->dataType = dt; + n->a = size; + n->ival = ndims; + return n; +} + + +// Parse PRINT statement: PRINT [expr { (;|,) expr } [;]] +// Helper: parse a file number (#expr) +static Node *parseFileNumber(void) { + tokExpect(TOK_HASH); + return parseExpr(); +} + + +// Helper: parse print items (shared by PRINT and PRINT #) +static Node *parsePrintItems(int ln) { + Node *head = NULL, *tail = NULL; + while (1) { + Node *item = newNode(NODE_PRINT_ITEM, ln); + item->a = parseExpr(); + + // Check for separator after this item + if (tokIs(TOK_SEMI)) { + item->ival = 1; // semicolon: no space + nextToken(); + } else if (tokIs(TOK_COMMA)) { + item->ival = 2; // comma: tab + nextToken(); + } else { + item->ival = 0; // end of print list + } + + if (!head) head = tail = item; + else { tail->next = item; tail = item; } + + // If no separator or end of statement, stop + if (item->ival == 0) break; + // If separator at end of line, stop (trailing separator) + if (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON) || tokIs(TOK_EOF)) + break; + } + return head; +} + + +static Node *parsePrint(void) { + int ln = gTok.line; + tokExpect(TOK_PRINT); + + // File-directed PRINT: PRINT #n, ... + if (tokIs(TOK_HASH)) { + Node *fpr = newNode(NODE_FILE_PRINT, ln); + fpr->b = parseFileNumber(); + tokExpect(TOK_COMMA); + if (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON) || tokIs(TOK_EOF)) + return fpr; // PRINT #n, alone = write newline to file + fpr->a = parsePrintItems(ln); + return fpr; + } + + // PRINT USING "format"; value1; value2; ... + if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "USING") == 0) { + nextToken(); + Node *pu = newNode(NODE_PRINT_USING, ln); + pu->a = parseExpr(); // format string expression + if (!tokAccept(TOK_SEMI)) + tokExpect(TOK_COMMA); // allow ; or , after format + // Parse values as linked list + Node *head = NULL, *tail = NULL; + while (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) { + Node *val = parseExpr(); + if (!head) head = tail = val; + else { tail->next = val; tail = val; } + if (!tokAccept(TOK_SEMI) && !tokAccept(TOK_COMMA)) + break; + } + pu->b = head; + return pu; + } + + Node *pr = newNode(NODE_PRINT, ln); + + // Empty PRINT (just a newline) + if (tokIs(TOK_NEWLINE) || tokIs(TOK_COLON) || tokIs(TOK_EOF)) { + return pr; + } + + pr->a = parsePrintItems(ln); + return pr; +} + + +// Parse INPUT statement: INPUT ["prompt";] var {, var} +// Helper: parse a comma-separated variable list for INPUT +static Node *parseInputVars(int ln) { + Node *head = NULL, *tail = NULL; + do { + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected variable name in INPUT"); + Node *v = newNode(NODE_IDENT, ln); + v->sval = strDup(gTok.sval); + v->dataType = inferVarType(gTok.sval); + nextToken(); + if (!head) head = tail = v; + else { tail->next = v; tail = v; } + } while (tokAccept(TOK_COMMA)); + return head; +} + + +static Node *parseInput(void) { + int ln = gTok.line; + tokExpect(TOK_INPUT); + + // File-directed INPUT: INPUT #n, var1, var2 + if (tokIs(TOK_HASH)) { + Node *finp = newNode(NODE_FILE_INPUT, ln); + finp->b = parseFileNumber(); + tokExpect(TOK_COMMA); + finp->a = parseInputVars(ln); + return finp; + } + + Node *inp = newNode(NODE_INPUT, ln); + + // Optional string prompt + if (tokIs(TOK_STR_LIT)) { + inp->sval = strDup(gTok.sval); + nextToken(); + if (tokIs(TOK_SEMI) || tokIs(TOK_COMMA)) + nextToken(); // consume separator after prompt + } + + inp->a = parseInputVars(ln); + return inp; +} + + +// Parse an ELSEIF chain as a nested IF node. +// ELSEIF expr THEN \n block { ELSEIF ... } [ELSE block] +// The caller (parseIf) consumes the final END IF. +static Node *parseElseifChain(void) { + int ln = gTok.line; + tokExpect(TOK_ELSEIF); + Node *cond = parseExpr(); + tokExpect(TOK_THEN); + + Node *n = newNode(NODE_IF, ln); + n->a = cond; + + skipEol(); + n->b = parseBlock(TOK_ELSE, TOK_ELSEIF, TOK_END); + + if (tokIs(TOK_ELSEIF)) { + n->c = parseElseifChain(); + } else if (tokAccept(TOK_ELSE)) { + skipEol(); + n->c = parseBlock(TOK_END, TOK_EOF, TOK_EOF); + } + // END IF is consumed by the top-level parseIf + return n; +} + + +// Parse IF block: +// IF expr THEN stmt (single-line) +// IF expr THEN \n block {ELSEIF...} [ELSE block] END IF +static Node *parseIf(void) { + int ln = gTok.line; + tokExpect(TOK_IF); + Node *cond = parseExpr(); + tokExpect(TOK_THEN); + + Node *n = newNode(NODE_IF, ln); + n->a = cond; + + // Single-line IF: statement on same line after THEN + if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) { + Node *stmt = parseStatement(); + Node *blk = newNode(NODE_BLOCK, ln); + blk->a = stmt; + n->b = blk; + return n; + } + + // Multi-line IF + skipEol(); + n->b = parseBlock(TOK_ELSE, TOK_ELSEIF, TOK_END); + + // ELSEIF chain: parse as a nested IF node + if (tokIs(TOK_ELSEIF)) { + n->c = parseElseifChain(); + } + // ELSE block + else if (tokAccept(TOK_ELSE)) { + skipEol(); + n->c = parseBlock(TOK_END, TOK_EOF, TOK_EOF); + } + + // END IF + tokExpect(TOK_END); + tokExpect(TOK_IF); + return n; +} + + +// Parse FOR loop: FOR var = start TO end [STEP step] \n block NEXT [var] +static Node *parseFor(void) { + int ln = gTok.line; + tokExpect(TOK_FOR); + + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected variable after FOR"); + char name[MAX_TOKEN_LEN]; + strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); + nextToken(); + + tokExpect(TOK_EQ); + Node *start = parseExpr(); + tokExpect(TOK_TO); + Node *end = parseExpr(); + + Node *step = NULL; + if (tokAccept(TOK_STEP)) { + step = parseExpr(); + } + + Node *n = newNode(NODE_FOR, ln); + n->sval = strDup(name); + n->a = start; + n->b = end; + n->c = step; + + skipEol(); + n->d = parseBlock(TOK_NEXT, TOK_EOF, TOK_EOF); + tokExpect(TOK_NEXT); + // Optional variable name after NEXT + if (tokIs(TOK_IDENT)) nextToken(); + + return n; +} + + +// Parse WHILE loop: WHILE expr \n block WEND +static Node *parseWhile(void) { + int ln = gTok.line; + tokExpect(TOK_WHILE); + Node *cond = parseExpr(); + + Node *n = newNode(NODE_WHILE, ln); + n->a = cond; + + skipEol(); + n->b = parseBlock(TOK_WEND, TOK_EOF, TOK_EOF); + tokExpect(TOK_WEND); + return n; +} + + +// Parse DO/LOOP: +// DO [WHILE|UNTIL expr] \n block LOOP [WHILE|UNTIL expr] +static Node *parseDoLoop(void) { + int ln = gTok.line; + tokExpect(TOK_DO); + + Node *n = newNode(NODE_DO_LOOP, ln); + n->ival = 0; // flags: bit0 = isUntil, bit1 = conditionAtBottom + + // Optional top condition + if (tokIs(TOK_WHILE)) { + nextToken(); + n->a = parseExpr(); + } else if (tokIs(TOK_UNTIL)) { + nextToken(); + n->a = parseExpr(); + n->ival |= 1; // UNTIL (vs WHILE) + } + + skipEol(); + n->b = parseBlock(TOK_LOOP, TOK_EOF, TOK_EOF); + tokExpect(TOK_LOOP); + + // Optional bottom condition + if (tokIs(TOK_WHILE)) { + nextToken(); + n->a = parseExpr(); + n->ival = 2; // condition at bottom + } else if (tokIs(TOK_UNTIL)) { + nextToken(); + n->a = parseExpr(); + n->ival = 3; // until + at bottom + } + + // If no condition at all, infinite loop (DO...LOOP) + return n; +} + + +// Parse SUB declaration: +// SUB name([BYVAL|BYREF] param AS type, ...) \n block END SUB +static Node *parseSub(void) { + int ln = gTok.line; + tokExpect(TOK_SUB); + + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected subroutine name after SUB"); + char name[MAX_TOKEN_LEN]; + strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); + nextToken(); + + // Parse parameter list + Node *params = NULL, *ptail = NULL; + int pcount = 0; + Symbol *sym = symAdd(name); + sym->isFunc = 2; // SUB + sym->returnType = TYPE_VOID; + + if (tokAccept(TOK_LPAREN)) { + while (!tokIs(TOK_RPAREN) && !tokIs(TOK_EOF)) { + PassMode pm = PASS_BYVAL; + if (tokAccept(TOK_BYREF)) pm = PASS_BYREF; + else tokAccept(TOK_BYVAL); // optional BYVAL + + if (!tokIs(TOK_IDENT)) + fatal(gTok.line, "Expected parameter name"); + Node *p = newNode(NODE_PARAM, gTok.line); + p->sval = strDup(gTok.sval); + p->ival = pm; + nextToken(); + + tokExpect(TOK_AS); + p->dataType = parseType(); + + // Record param in the function's symbol entry + if (pcount >= MAX_PARAMS) + fatal(gTok.line, "Too many parameters (max %d)", MAX_PARAMS); + strncpy(sym->paramNames[pcount], p->sval, MAX_IDENT - 1); + sym->paramTypes[pcount] = p->dataType; + sym->paramModes[pcount] = pm; + pcount++; + + // Also register the parameter as a variable for type inference + // inside the function body + Symbol *psym = symAdd(p->sval); + psym->dataType = p->dataType; + + if (!params) params = ptail = p; + else { ptail->next = p; ptail = p; } + + if (!tokAccept(TOK_COMMA)) break; + } + tokExpect(TOK_RPAREN); + } + sym->paramCount = pcount; + + Node *n = newNode(NODE_SUB, ln); + n->sval = strDup(name); + n->a = params; + + skipEol(); + n->b = parseBlock(TOK_END, TOK_EOF, TOK_EOF); + tokExpect(TOK_END); + tokExpect(TOK_SUB); + + return n; +} + + +// Parse FUNCTION declaration: +// FUNCTION name([params]) AS type \n block END FUNCTION +static Node *parseFunction(void) { + int ln = gTok.line; + tokExpect(TOK_FUNCTION); + + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected function name after FUNCTION"); + char name[MAX_TOKEN_LEN]; + strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); + nextToken(); + + // Parse parameter list + Node *params = NULL, *ptail = NULL; + int pcount = 0; + Symbol *sym = symAdd(name); + sym->isFunc = 1; // FUNCTION + + if (tokAccept(TOK_LPAREN)) { + while (!tokIs(TOK_RPAREN) && !tokIs(TOK_EOF)) { + PassMode pm = PASS_BYVAL; + if (tokAccept(TOK_BYREF)) pm = PASS_BYREF; + else tokAccept(TOK_BYVAL); + + if (!tokIs(TOK_IDENT)) + fatal(gTok.line, "Expected parameter name"); + Node *p = newNode(NODE_PARAM, gTok.line); + p->sval = strDup(gTok.sval); + p->ival = pm; + nextToken(); + + tokExpect(TOK_AS); + p->dataType = parseType(); + + if (pcount >= MAX_PARAMS) + fatal(gTok.line, "Too many parameters (max %d)", MAX_PARAMS); + strncpy(sym->paramNames[pcount], p->sval, MAX_IDENT - 1); + sym->paramTypes[pcount] = p->dataType; + sym->paramModes[pcount] = pm; + pcount++; + + // Register parameter as variable for type inference + Symbol *psym = symAdd(p->sval); + psym->dataType = p->dataType; + + if (!params) params = ptail = p; + else { ptail->next = p; ptail = p; } + + if (!tokAccept(TOK_COMMA)) break; + } + tokExpect(TOK_RPAREN); + } + sym->paramCount = pcount; + + // Return type + tokExpect(TOK_AS); + DataType ret = parseType(); + sym->returnType = ret; + sym->dataType = ret; + + Node *n = newNode(NODE_FUNC, ln); + n->sval = strDup(name); + n->dataType = ret; + n->a = params; + + skipEol(); + n->b = parseBlock(TOK_END, TOK_EOF, TOK_EOF); + tokExpect(TOK_END); + tokExpect(TOK_FUNCTION); + + return n; +} + + +// Parse LOCAL declaration: LOCAL name AS type +static Node *parseLocal(void) { + int ln = gTok.line; + tokExpect(TOK_LOCAL); + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected variable name after LOCAL"); + Node *n = newNode(NODE_LOCAL, ln); + n->sval = strDup(gTok.sval); + nextToken(); + tokExpect(TOK_AS); + n->dataType = parseType(); + return n; +} + + +// Parse STATIC declaration: STATIC name AS type +static Node *parseStatic(void) { + int ln = gTok.line; + tokExpect(TOK_STATIC); + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected variable name after STATIC"); + Node *n = newNode(NODE_STATIC, ln); + n->sval = strDup(gTok.sval); + nextToken(); + tokExpect(TOK_AS); + n->dataType = parseType(); + return n; +} + + +// Parse DATA statement: DATA literal, literal, ... +// Items can be integers, doubles, strings, or negative numbers. +// Note: the "DATA" keyword is already consumed by parseStatement +static Node *parseData(void) { + int ln = gTok.line; + Node *n = newNode(NODE_DATA, ln); + Node *head = NULL, *tail = NULL; + + do { + Node *item = NULL; + // Handle negative numeric literals + int neg = 0; + if (tokIs(TOK_MINUS)) { + neg = 1; + nextToken(); + } + if (tokIs(TOK_INT_LIT)) { + item = newNode(NODE_INT_LIT, ln); + item->ival = neg ? -gTok.ival : gTok.ival; + item->dataType = TYPE_INT; + nextToken(); + } else if (tokIs(TOK_DBL_LIT)) { + item = newNode(NODE_DBL_LIT, ln); + item->dval = neg ? -gTok.dval : gTok.dval; + item->dataType = TYPE_DBL; + nextToken(); + } else if (tokIs(TOK_STR_LIT)) { + item = newNode(NODE_STR_LIT, ln); + item->sval = strDup(gTok.sval); + item->dataType = TYPE_STR; + nextToken(); + } else { + fatal(ln, "Expected literal value in DATA statement"); + } + if (!head) head = tail = item; + else { tail->next = item; tail = item; } + } while (tokAccept(TOK_COMMA)); + + n->a = head; + return n; +} + + +// Parse READ statement: READ var1, var2, ... +// Note: the "READ" keyword is already consumed by parseStatement +static Node *parseRead(void) { + int ln = gTok.line; + Node *n = newNode(NODE_READ, ln); + Node *head = NULL, *tail = NULL; + + do { + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected variable name in READ"); + Node *v = newNode(NODE_IDENT, ln); + v->sval = strDup(gTok.sval); + v->dataType = inferVarType(gTok.sval); + nextToken(); + if (!head) head = tail = v; + else { tail->next = v; tail = v; } + } while (tokAccept(TOK_COMMA)); + + n->a = head; + return n; +} + + +// Parse RESTORE statement: RESTORE [line_number] +// Note: the "RESTORE" keyword is already consumed by parseStatement +static Node *parseRestore(void) { + int ln = gTok.line; + Node *n = newNode(NODE_RESTORE, ln); + if (tokIs(TOK_INT_LIT)) { + n->ival = gTok.ival; + nextToken(); + } else if (tokIs(TOK_IDENT)) { + n->sval = strDup(gTok.sval); + nextToken(); + } + return n; +} + + +// Parse OPEN statement: OPEN "filename" FOR INPUT|OUTPUT|APPEND|BINARY|RANDOM AS #n [LEN = expr] +static Node *parseOpen(void) { + int ln = gTok.line; + tokExpect(TOK_OPEN); + Node *n = newNode(NODE_OPEN, ln); + n->a = parseExpr(); // filename expression + tokExpect(TOK_FOR); + if (tokIs(TOK_INPUT)) { n->ival = 0; nextToken(); } + else if (tokIs(TOK_OUTPUT)) { n->ival = 1; nextToken(); } + else if (tokIs(TOK_APPEND)) { n->ival = 2; nextToken(); } + else if (tokIs(TOK_BINARY)) { n->ival = 3; nextToken(); } + else if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "RANDOM") == 0) + { n->ival = 4; nextToken(); } + else fatal(ln, "Expected INPUT, OUTPUT, APPEND, BINARY, or RANDOM after FOR"); + tokExpect(TOK_AS); + n->b = parseFileNumber(); // file number expression + // Optional LEN = expr for RANDOM access + if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "LEN") == 0) { + nextToken(); + tokExpect(TOK_EQ); + n->c = parseExpr(); + } + return n; +} + + +// Parse CLOSE statement: CLOSE #n +static Node *parseClose(void) { + int ln = gTok.line; + tokExpect(TOK_CLOSE); + Node *n = newNode(NODE_CLOSE, ln); + n->b = parseFileNumber(); + return n; +} + + +// Parse a single statement +static Node *parseStatement(void) { + int ln = gTok.line; + + // Line-number label: a bare integer at the start of a statement + if (tokIs(TOK_INT_LIT)) { + int lnum = gTok.ival; + nextToken(); + // If followed by a statement, this is a labeled line + Node *lbl = newNode(NODE_LABEL, ln); + lbl->ival = lnum; + recordLineLabel(lnum); + + // If there's a statement on this line, chain it + if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) { + Node *stmt = parseStatement(); + lbl->next = stmt; + } + return lbl; + } + + // Named label: identifier followed by colon (e.g., myLabel:) + // Must peek ahead to distinguish from statement separator colons. + // Only treat as label if the NEXT token is a colon. + if (tokIs(TOK_IDENT)) { + int savePos = gSrcPos; + int saveLine = gLine; + Token saveTok = gTok; + char labelName[MAX_IDENT]; + strncpy(labelName, gTok.sval, MAX_IDENT - 1); + labelName[MAX_IDENT - 1] = '\0'; + nextToken(); + if (tokIs(TOK_COLON)) { + if (isKeyword(labelName)) + fatal(ln, "Cannot use keyword '%s' as label", labelName); + nextToken(); + Node *lbl = newNode(NODE_LABEL, ln); + lbl->ival = 0; // 0 = named label, not numeric + lbl->sval = strDup(labelName); + // If there's a statement on this line, chain it + if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) { + Node *stmt = parseStatement(); + lbl->next = stmt; + } + return lbl; + } + // Not a label — restore state + gSrcPos = savePos; + gLine = saveLine; + gTok = saveTok; + } + + // TYPE ... END TYPE + if (tokIs(TOK_TYPE)) return parseTypeDef(); + + // DIM + if (tokIs(TOK_DIM)) return parseDim(); + + // REDIM + if (tokIs(TOK_REDIM)) return parseRedim(); + + // PRINT + if (tokIs(TOK_PRINT)) return parsePrint(); + + // INPUT / INPUT # + if (tokIs(TOK_INPUT)) return parseInput(); + + // OPEN + if (tokIs(TOK_OPEN)) return parseOpen(); + + // CLOSE + if (tokIs(TOK_CLOSE)) return parseClose(); + + // DATA, READ, RESTORE are contextual keywords — checked as identifiers + // to avoid colliding with user variable names like "data(i)". + // We peek ahead: DATA is a keyword only when NOT followed by '(' or '='. + if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "DATA") == 0) { + // Save state and peek at next token + int savePos = gSrcPos; + int saveLine = gLine; + Token saveTok = gTok; + nextToken(); + if (!tokIs(TOK_LPAREN) && !tokIs(TOK_EQ)) { + // It's a DATA statement + return parseData(); + } + // Restore — it's a variable named "data" + gSrcPos = savePos; + gLine = saveLine; + gTok = saveTok; + } + + // READ (contextual keyword — same peek-ahead logic) + if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "READ") == 0) { + int savePos = gSrcPos; + int saveLine = gLine; + Token saveTok = gTok; + nextToken(); + if (!tokIs(TOK_LPAREN) && !tokIs(TOK_EQ)) { + return parseRead(); + } + gSrcPos = savePos; + gLine = saveLine; + gTok = saveTok; + } + + // RESTORE (contextual keyword) + if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "RESTORE") == 0) { + nextToken(); + return parseRestore(); + } + + // GET #filenum, record, variable (contextual keyword) + if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "GET") == 0) { + int savePos = gSrcPos; + int saveLine = gLine; + Token saveTok = gTok; + nextToken(); + if (tokIs(TOK_HASH)) { + Node *n = newNode(NODE_GET, ln); + n->a = parseFileNumber(); + tokExpect(TOK_COMMA); + n->b = parseExpr(); + tokExpect(TOK_COMMA); + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected variable name in GET"); + Node *v = newNode(NODE_IDENT, ln); + v->sval = strDup(gTok.sval); + v->dataType = inferVarType(gTok.sval); + nextToken(); + n->c = v; + return n; + } + gSrcPos = savePos; + gLine = saveLine; + gTok = saveTok; + } + + // PUT #filenum, record, variable (contextual keyword) + if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "PUT") == 0) { + int savePos = gSrcPos; + int saveLine = gLine; + Token saveTok = gTok; + nextToken(); + if (tokIs(TOK_HASH)) { + Node *n = newNode(NODE_PUT, ln); + n->a = parseFileNumber(); + tokExpect(TOK_COMMA); + n->b = parseExpr(); + tokExpect(TOK_COMMA); + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected variable name in PUT"); + Node *v = newNode(NODE_IDENT, ln); + v->sval = strDup(gTok.sval); + v->dataType = inferVarType(gTok.sval); + nextToken(); + n->c = v; + return n; + } + gSrcPos = savePos; + gLine = saveLine; + gTok = saveTok; + } + + // RANDOMIZE [seed] (contextual keyword) + if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "RANDOMIZE") == 0) { + nextToken(); + Node *n = newNode(NODE_RANDOMIZE, ln); + // Optional seed expression + if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF) && + !tokIs(TOK_ELSE)) { + n->a = parseExpr(); + } + return n; + } + + // MID$ assignment: MID$(s$, start, len) = replacement$ + if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "MID$") == 0) { + int savePos = gSrcPos; + int saveLine = gLine; + Token saveTok = gTok; + nextToken(); + if (tokIs(TOK_LPAREN)) { + nextToken(); + Node *n = newNode(NODE_MID_ASSIGN, ln); + // Parse target string variable + if (!tokIs(TOK_IDENT)) fatal(ln, "Expected string variable in MID$ assignment"); + n->a = newNode(NODE_IDENT, ln); + n->a->sval = strDup(gTok.sval); + n->a->dataType = TYPE_STR; + nextToken(); + tokExpect(TOK_COMMA); + n->b = parseExpr(); // start position + if (tokAccept(TOK_COMMA)) { + n->c = parseExpr(); // length + } else { + // No length — use large value + Node *big = newNode(NODE_INT_LIT, ln); + big->ival = 32767; + n->c = big; + } + tokExpect(TOK_RPAREN); + tokExpect(TOK_EQ); + n->d = parseExpr(); // replacement string + return n; + } + // Not MID$ assignment — restore + gSrcPos = savePos; + gLine = saveLine; + gTok = saveTok; + } + + // LINE INPUT # + if (tokIs(TOK_LINE)) { + nextToken(); + tokExpect(TOK_INPUT); + int lln = ln; + Node *n = newNode(NODE_LINE_INPUT, lln); + n->b = parseFileNumber(); + tokExpect(TOK_COMMA); + if (!tokIs(TOK_IDENT)) + fatal(lln, "Expected variable name in LINE INPUT"); + Node *v = newNode(NODE_IDENT, lln); + v->sval = strDup(gTok.sval); + v->dataType = TYPE_STR; + nextToken(); + n->a = v; + return n; + } + + // WRITE # + if (tokIs(TOK_WRITE)) { + nextToken(); + Node *n = newNode(NODE_FILE_WRITE, ln); + n->b = parseFileNumber(); + tokExpect(TOK_COMMA); + Node *head = NULL, *tail = NULL; + do { + Node *e = parseExpr(); + if (!head) head = tail = e; + else { tail->next = e; tail = e; } + } while (tokAccept(TOK_COMMA)); + n->a = head; + return n; + } + + // IF + if (tokIs(TOK_IF)) return parseIf(); + + // FOR + if (tokIs(TOK_FOR)) return parseFor(); + + // WHILE + if (tokIs(TOK_WHILE)) return parseWhile(); + + // DO + if (tokIs(TOK_DO)) return parseDoLoop(); + + // SUB + if (tokIs(TOK_SUB)) return parseSub(); + + // FUNCTION + if (tokIs(TOK_FUNCTION)) return parseFunction(); + + // LOCAL + if (tokIs(TOK_LOCAL)) return parseLocal(); + + // STATIC + if (tokIs(TOK_STATIC)) return parseStatic(); + + // GOTO + if (tokIs(TOK_GOTO)) { + nextToken(); + Node *n = newNode(NODE_GOTO, ln); + if (tokIs(TOK_INT_LIT)) { + n->ival = gTok.ival; + recordGotoTarget(n->ival); + nextToken(); + } else if (tokIs(TOK_IDENT)) { + n->sval = strDup(gTok.sval); + recordGotoStrTarget(n->sval); + nextToken(); + } else { + fatal(ln, "Expected line number or label after GOTO"); + } + return n; + } + + // GOSUB + if (tokIs(TOK_GOSUB)) { + nextToken(); + Node *n = newNode(NODE_GOSUB, ln); + if (tokIs(TOK_INT_LIT)) { + n->ival = gTok.ival; + recordGotoTarget(n->ival); + nextToken(); + } else if (tokIs(TOK_IDENT)) { + n->sval = strDup(gTok.sval); + recordGotoStrTarget(n->sval); + nextToken(); + } else { + fatal(ln, "Expected line number or label after GOSUB"); + } + if (gGosubCount >= MAX_GOSUB_SITES) + fatal(ln, "Too many GOSUB sites (max %d)", MAX_GOSUB_SITES); + n->ival2 = gGosubCount++; + return n; + } + + // RETURN + if (tokIs(TOK_RETURN)) { + nextToken(); + Node *n = newNode(NODE_RETURN, ln); + // Optional return expression for FUNCTION + if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) { + n->a = parseExpr(); + } + return n; + } + + // EXIT (FOR | WHILE | DO | SUB | FUNCTION) + if (tokIs(TOK_EXIT)) { + nextToken(); + Node *n = newNode(NODE_EXIT, ln); + if (tokIs(TOK_FOR)) { n->ival = TOK_FOR; nextToken(); } + else if (tokIs(TOK_WHILE)){ n->ival = TOK_WHILE; nextToken(); } + else if (tokIs(TOK_DO)) { n->ival = TOK_DO; nextToken(); } + else if (tokIs(TOK_SUB)) { n->ival = TOK_SUB; nextToken(); } + else if (tokIs(TOK_FUNCTION)){n->ival = TOK_FUNCTION;nextToken(); } + else fatal(ln, "Expected FOR, WHILE, DO, SUB, or FUNCTION after EXIT"); + return n; + } + + // CONTINUE (FOR | WHILE | DO) — contextual keyword + if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "CONTINUE") == 0) { + nextToken(); + Node *n = newNode(NODE_CONTINUE, ln); + if (tokIs(TOK_FOR)) { n->ival = TOK_FOR; nextToken(); } + else if (tokIs(TOK_WHILE)){ n->ival = TOK_WHILE; nextToken(); } + else if (tokIs(TOK_DO)) { n->ival = TOK_DO; nextToken(); } + else fatal(ln, "Expected FOR, WHILE, or DO after CONTINUE"); + return n; + } + + // END (program termination) + if (tokIs(TOK_END)) { + // Peek ahead: END IF / END SUB / END FUNCTION are handled by callers. + // Bare END means program exit. + // Save position to check next token + int savePos = gSrcPos; + int saveLine = gLine; + Token saveTok = gTok; + nextToken(); + + // If followed by IF, SUB, FUNCTION, SELECT – put it back (the caller handles it) + if (tokIs(TOK_IF) || tokIs(TOK_SUB) || tokIs(TOK_FUNCTION) || + tokIs(TOK_SELECT)) { + gSrcPos = savePos; + gLine = saveLine; + gTok = saveTok; + return NULL; // signal to caller: block terminator reached + } + + // Bare END + Node *n = newNode(NODE_END, ln); + return n; + } + + // CONST name = value + if (tokIs(TOK_CONST)) { + nextToken(); + if (!tokIs(TOK_IDENT)) fatal(ln, "Expected identifier after CONST"); + char cname[MAX_IDENT]; + strncpy(cname, gTok.sval, MAX_IDENT - 1); + cname[MAX_IDENT - 1] = '\0'; + nextToken(); + tokExpect(TOK_EQ); + // Parse the value — must be a literal + Node *val = parseExpr(); + Node *n = newNode(NODE_CONST_DECL, ln); + n->sval = strDup(cname); + n->a = val; + // Store in constant table + if (gConstCount < MAX_CONSTS) { + strncpy(gConsts[gConstCount].name, cname, MAX_IDENT - 1); + if (val->type == NODE_STR_LIT) { + gConsts[gConstCount].dataType = TYPE_STR; + strncpy(gConsts[gConstCount].strVal, val->sval, MAX_TOKEN_LEN - 1); + } else if (val->type == NODE_DBL_LIT) { + gConsts[gConstCount].dataType = TYPE_DBL; + gConsts[gConstCount].numVal = val->dval; + } else if (val->type == NODE_INT_LIT) { + gConsts[gConstCount].dataType = TYPE_INT; + gConsts[gConstCount].numVal = val->ival; + } else if (val->type == NODE_UNOP && val->ival == TOK_MINUS) { + // Handle negative constants like CONST X = -1 + gConsts[gConstCount].dataType = TYPE_DBL; + if (val->a->type == NODE_INT_LIT) + gConsts[gConstCount].numVal = -(double)val->a->ival; + else + gConsts[gConstCount].numVal = -val->a->dval; + } else { + fatal(ln, "CONST value must be a literal"); + } + gConstCount++; + } + return n; + } + + // SWAP var1, var2 (contextual keyword) + if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "SWAP") == 0) { + nextToken(); + Node *n = newNode(NODE_SWAP, ln); + n->a = parseExpr(); + tokExpect(TOK_COMMA); + n->b = parseExpr(); + return n; + } + + // SELECT CASE expr ... CASE ... END SELECT + if (tokIs(TOK_SELECT)) { + nextToken(); + tokExpect(TOK_CASE); + Node *n = newNode(NODE_SELECT, ln); + n->a = parseExpr(); + skipNewlines(); + // Parse CASE blocks + Node *caseHead = NULL, *caseTail = NULL; + while (tokIs(TOK_CASE)) { + nextToken(); + Node *cb = newNode(NODE_CASE, gLine); + // CASE ELSE + if (tokIs(TOK_ELSE)) { + nextToken(); + cb->ival = 1; // flag: CASE ELSE + } else { + // Parse comma-separated values/ranges + Node *valHead = NULL, *valTail = NULL; + for (;;) { + Node *v; + // CASE IS >/=/<=/<>/= expr + if (tokIs(TOK_IDENT) && strIcmp(gTok.sval, "IS") == 0) { + nextToken(); + // Expect a comparison operator + int op = gTok.type; + if (op != TOK_EQ && op != TOK_NE && op != TOK_LT && + op != TOK_GT && op != TOK_LE && op != TOK_GE) + fatal(gLine, "Expected comparison operator after IS"); + nextToken(); + v = newNode(NODE_BINOP, gLine); + v->ival = op; + v->a = NULL; // placeholder: test expr filled at codegen + v->b = parseExpr(); + v->ival2 = 1; // flag: IS comparison + } else { + v = parseExpr(); + // Check for TO (range) + if (tokIs(TOK_TO)) { + nextToken(); + Node *range = newNode(NODE_BINOP, gLine); + range->ival = TOK_TO; // reuse TO token as range marker + range->a = v; + range->b = parseExpr(); + range->ival2 = 2; // flag: range + v = range; + } + } + if (!valHead) { valHead = valTail = v; } + else { valTail->next = v; valTail = v; } + if (!tokAccept(TOK_COMMA)) break; + } + cb->a = valHead; + } + skipNewlines(); + // Parse body until next CASE or END SELECT + Node *bodyHead = NULL, *bodyTail = NULL; + while (!tokIs(TOK_CASE) && !tokIs(TOK_END) && !tokIs(TOK_EOF)) { + Node *s = parseStatement(); + if (!s) break; + if (!bodyHead) { bodyHead = bodyTail = s; } + else { bodyTail->next = s; bodyTail = s; } + skipNewlines(); + } + cb->b = bodyHead; + if (!caseHead) { caseHead = caseTail = cb; } + else { caseTail->next = cb; caseTail = cb; } + } + // Expect END SELECT + tokExpect(TOK_END); + tokExpect(TOK_SELECT); + n->b = caseHead; + return n; + } + + // ON expr GOTO/GOSUB label1, label2, ... + if (tokIs(TOK_ON)) { + nextToken(); + Node *expr = parseExpr(); + if (tokIs(TOK_GOTO)) { + nextToken(); + Node *n = newNode(NODE_ON_GOTO, ln); + n->a = expr; + // Parse comma-separated labels + Node *labHead = NULL, *labTail = NULL; + for (;;) { + Node *lab; + if (tokIs(TOK_INT_LIT)) { + lab = newNode(NODE_INT_LIT, gLine); + lab->ival = gTok.ival; + recordGotoTarget(lab->ival); + nextToken(); + } else if (tokIs(TOK_IDENT)) { + lab = newNode(NODE_IDENT, gLine); + lab->sval = strDup(gTok.sval); + recordGotoStrTarget(lab->sval); + nextToken(); + } else { + fatal(gLine, "Expected label in ON GOTO"); + } + if (!labHead) { labHead = labTail = lab; } + else { labTail->next = lab; labTail = lab; } + if (!tokAccept(TOK_COMMA)) break; + } + n->b = labHead; + return n; + } else if (tokIs(TOK_GOSUB)) { + nextToken(); + Node *n = newNode(NODE_ON_GOSUB, ln); + n->a = expr; + n->ival2 = gGosubCount; // first return-point id + // Parse comma-separated labels + Node *labHead = NULL, *labTail = NULL; + for (;;) { + Node *lab; + if (tokIs(TOK_INT_LIT)) { + lab = newNode(NODE_INT_LIT, gLine); + lab->ival = gTok.ival; + recordGotoTarget(lab->ival); + nextToken(); + } else if (tokIs(TOK_IDENT)) { + lab = newNode(NODE_IDENT, gLine); + lab->sval = strDup(gTok.sval); + recordGotoStrTarget(lab->sval); + nextToken(); + } else { + fatal(gLine, "Expected label in ON GOSUB"); + } + gGosubCount++; // allocate return-point id for each target + if (!labHead) { labHead = labTail = lab; } + else { labTail->next = lab; labTail = lab; } + if (!tokAccept(TOK_COMMA)) break; + } + n->b = labHead; + return n; + } else { + fatal(ln, "Expected GOTO or GOSUB after ON expression"); + } + } + + // CALL name(args) + if (tokIs(TOK_CALL)) { + nextToken(); + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected subroutine name after CALL"); + Node *n = newNode(NODE_CALL, ln); + n->sval = strDup(gTok.sval); + nextToken(); + + // Parse argument list + Node *args = NULL, *atail = NULL; + if (tokAccept(TOK_LPAREN)) { + while (!tokIs(TOK_RPAREN) && !tokIs(TOK_EOF)) { + Node *arg = parseExpr(); + if (!args) args = atail = arg; + else { atail->next = arg; atail = arg; } + if (!tokAccept(TOK_COMMA)) break; + } + tokExpect(TOK_RPAREN); + } + n->a = args; + return n; + } + + // LET assignment or implicit assignment/call + if (tokIs(TOK_LET)) nextToken(); // consume optional LET + + if (tokIs(TOK_IDENT)) { + char name[MAX_TOKEN_LEN]; + strncpy(name, gTok.sval, MAX_TOKEN_LEN - 1); + nextToken(); + + // Array element assignment or sub call: name(args) [= expr] + if (tokIs(TOK_LPAREN)) { + nextToken(); + Node *idx = parseExpr(); + Node *tail = idx; + while (tokAccept(TOK_COMMA)) { + Node *dimIdx = parseExpr(); + tail->next = dimIdx; + tail = dimIdx; + } + tokExpect(TOK_RPAREN); + + // Check for array-element dot-access assignment: arr(i).field[.field...] = expr + if (tokIs(TOK_DOT)) { + Symbol *s = symLookup(name); + if (s && s->dataType == TYPE_UDT) { + Node *cur = newNode(NODE_ARRAY_REF, ln); + cur->sval = strDup(name); + cur->a = idx; + cur->dataType = TYPE_UDT; + cur->ival2 = s->udtIndex; + int curUdt = s->udtIndex; + while (curUdt >= 0 && tokIs(TOK_DOT)) { + nextToken(); + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected field name after '.'"); + int fi = udtFieldLookup(curUdt, gTok.sval); + if (fi < 0) + fatal(ln, "Unknown field '%s'", gTok.sval); + Node *dot = newNode(NODE_DOT_ACCESS, ln); + dot->a = cur; + dot->sval = strDup(gTok.sval); + dot->ival2 = curUdt; + UdtField *uf = &gUdts[curUdt].fields[fi]; + dot->dataType = uf->dataType; + if (uf->dataType == TYPE_STR && uf->strLen > 0) + dot->ival = uf->strLen; + cur = dot; + curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1; + nextToken(); + } + tokExpect(TOK_EQ); + Node *val = parseExpr(); + Node *n = newNode(NODE_ASSIGN, ln); + n->a = cur; + n->b = val; + return n; + } + } + + if (tokAccept(TOK_EQ)) { + // Array element assignment: name(i, j, ...) = expr + Node *val = parseExpr(); + Node *target = newNode(NODE_ARRAY_REF, ln); + target->sval = strDup(name); + target->a = idx; + target->dataType = inferVarType(name); + + Node *n = newNode(NODE_ASSIGN, ln); + n->a = target; + n->b = val; + return n; + } + + // Not an assignment – must be a sub call: name(args) + Node *n = newNode(NODE_CALL, ln); + n->sval = strDup(name); + n->a = idx; + return n; + } + + // Dot-access assignment: var.field[.field...] = expr + if (tokIs(TOK_DOT)) { + Symbol *s = symLookup(name); + if (s && s->dataType == TYPE_UDT) { + Node *cur = newNode(NODE_IDENT, ln); + cur->sval = strDup(name); + cur->dataType = TYPE_UDT; + int curUdt = s->udtIndex; + while (curUdt >= 0 && tokIs(TOK_DOT)) { + nextToken(); + if (!tokIs(TOK_IDENT)) + fatal(ln, "Expected field name after '.'"); + int fi = udtFieldLookup(curUdt, gTok.sval); + if (fi < 0) + fatal(ln, "Unknown field '%s' in type '%s'", + gTok.sval, gUdts[curUdt].name); + Node *dot = newNode(NODE_DOT_ACCESS, ln); + dot->a = cur; + dot->sval = strDup(gTok.sval); + dot->ival2 = curUdt; + UdtField *uf = &gUdts[curUdt].fields[fi]; + dot->dataType = uf->dataType; + if (uf->dataType == TYPE_STR && uf->strLen > 0) + dot->ival = uf->strLen; + cur = dot; + curUdt = (uf->dataType == TYPE_UDT) ? uf->udtIndex : -1; + nextToken(); + } + tokExpect(TOK_EQ); + Node *val = parseExpr(); + Node *n = newNode(NODE_ASSIGN, ln); + n->a = cur; + n->b = val; + return n; + } + } + + // Simple variable assignment: name = expr + if (tokAccept(TOK_EQ)) { + Node *val = parseExpr(); + Node *target = newNode(NODE_IDENT, ln); + target->sval = strDup(name); + target->dataType = inferVarType(name); + + Node *n = newNode(NODE_ASSIGN, ln); + n->a = target; + n->b = val; + return n; + } + + // Implicit sub call without CALL keyword: name arg1, arg2, ... + Node *n = newNode(NODE_CALL, ln); + n->sval = strDup(name); + Node *args = NULL, *atail = NULL; + if (!tokIs(TOK_NEWLINE) && !tokIs(TOK_COLON) && !tokIs(TOK_EOF)) { + Node *arg = parseExpr(); + args = atail = arg; + while (tokAccept(TOK_COMMA)) { + arg = parseExpr(); + atail->next = arg; + atail = arg; + } + } + n->a = args; + return n; + } + + fatal(ln, "Unexpected token '%s' (type %d)", gTok.sval, gTok.type); + return NULL; +} + + +// Parse a block of statements until one of the terminator tokens is seen. +// Returns a NODE_BLOCK containing the linked list of statements. +static Node *parseBlock(TokenType end1, TokenType end2, TokenType end3) { + Node *blk = newNode(NODE_BLOCK, gTok.line); + Node *head = NULL, *tail = NULL; + + while (!tokIs(TOK_EOF)) { + skipEol(); + if (tokIs(end1) || tokIs(end2) || tokIs(end3)) + break; + if (tokIs(TOK_EOF)) break; + + // Check for END followed by IF/SUB/FUNCTION as block terminator + if (tokIs(TOK_END)) { + int savePos = gSrcPos; + int saveLine = gLine; + Token saveTok = gTok; + nextToken(); + if (tokIs(TOK_IF) || tokIs(TOK_SUB) || tokIs(TOK_FUNCTION)) { + // Restore – the caller will handle END IF/SUB/FUNCTION + gSrcPos = savePos; + gLine = saveLine; + gTok = saveTok; + break; + } + // Restore and let parseStatement handle bare END + gSrcPos = savePos; + gLine = saveLine; + gTok = saveTok; + } + + Node *stmt = parseStatement(); + if (!stmt) break; // NULL signals block terminator + + // Flatten: if statement has a ->next chain (e.g., label + stmt), + // append the entire chain + if (!head) head = tail = stmt; + else { tail->next = stmt; } + while (tail->next) tail = tail->next; + } + + blk->a = head; + return blk; +} + + +// Parse the entire program +static Node *parseProgram(void) { + nextToken(); // prime the first token + Node *blk = parseBlock(TOK_EOF, TOK_EOF, TOK_EOF); + Node *prog = newNode(NODE_PROGRAM, 1); + prog->a = blk; + return prog; +} + + +// ----------------------------------------------------------------------- +// Section 8: Code Generator +// +// Walks the AST and emits C source code. The output includes: +// - A runtime library for strings, file I/O, and dynamic arrays +// (debug or release variant based on --release flag) +// - Packed struct definitions for user-defined types +// - A static DATA pool for DATA/READ/RESTORE +// - Forward declarations for SUBs and FUNCTIONs +// - SUB/FUNCTION implementations as C functions +// - A main() function containing global code +// ----------------------------------------------------------------------- + +// Forward declarations +static void genExpr(Node *n); +static void genStmt(Node *n); +static void genArrayFlatIndex(const char *name, Node *indices); +static void genBlock(Node *blk); + +// Return the C type string for a BASIC data type +static const char *cTypeStr(DataType dt) { + switch (dt) { + case TYPE_BYTE: return "uint8_t"; + case TYPE_INT: return "int16_t"; + case TYPE_LONG: return "int32_t"; + case TYPE_FLOAT: return "float"; + case TYPE_DBL: return "double"; + case TYPE_STR: return "char*"; + default: return "void"; + } +} + + +// Return the C struct type string for a UDT (uses rotating buffer) +static const char *cUdtTypeStr(int udtIndex) { + static char bufs[4][MAX_IDENT + 16]; + static int bi = 0; + char *buf = bufs[bi++ & 3]; + if (udtIndex >= 0 && udtIndex < gUdtCount) + snprintf(buf, MAX_IDENT + 16, "struct _b_%s", cleanName(gUdts[udtIndex].name)); + else + snprintf(buf, MAX_IDENT + 16, "void"); + return buf; +} + + +// Return a C default-value expression for a data type +static const char *cDefaultVal(DataType dt) { + switch (dt) { + case TYPE_BYTE: return "0"; + case TYPE_INT: return "0"; + case TYPE_LONG: return "0"; + case TYPE_FLOAT: return "0.0f"; + case TYPE_DBL: return "0.0"; + case TYPE_STR: return "_bstr(\"\")"; + default: return "0"; + } +} + + +// Return a printf format specifier for a data type. +// int16_t is promoted to int in varargs so %d is safe. +// int32_t is int on all modern platforms so %d works. +static const char *cFmt(DataType dt) { + switch (dt) { + case TYPE_BYTE: return "%u"; + case TYPE_INT: return "%d"; + case TYPE_LONG: return "%d"; + case TYPE_FLOAT: return "%g"; + case TYPE_DBL: return "%g"; + case TYPE_STR: return "%s"; + default: return "%d"; + } +} + + +// Return a scanf format specifier for a data type +static const char *cScanfFmt(DataType dt) { + switch (dt) { + case TYPE_BYTE: return "%hhu"; // uint8_t + case TYPE_INT: return "%hd"; // int16_t needs short format + case TYPE_LONG: return "%d"; // int32_t + case TYPE_FLOAT: return "%f"; + case TYPE_DBL: return "%lf"; + default: return "%hd"; + } +} + + +// Determine if an expression node produces a string type +static int isStringExpr(Node *n) { + if (!n) return 0; + return n->dataType == TYPE_STR; +} + + +// Check if a name corresponds to a built-in BASIC function that returns +// a string. Names ending in '$' are string functions. +static int isBuiltinStrFunc(const char *name) { + // Check external functions first + ExternFunc *ef = externFuncLookup(name); + if (ef) return ef->returnType == TYPE_STR; + + // Check compile-time builtins + const BuiltinDef *bd = builtinDefLookup(name); + if (bd) return bd->returnType == TYPE_STR; + + // Functions with special handling in genBuiltinCall + return (strIcmp(name, "MID$") == 0 || + strIcmp(name, "LEFT$") == 0 || + strIcmp(name, "RIGHT$") == 0 || + strIcmp(name, "STRING$") == 0); +} + + +// Check if a name is a built-in BASIC function +static int isBuiltinFunc(const char *name) { + // Check external functions + if (externFuncLookup(name)) return 1; + + // Check compile-time builtins + if (builtinDefLookup(name)) return 1; + + // Functions with special handling in genBuiltinCall + return (isBuiltinStrFunc(name) || + strIcmp(name, "LEN") == 0 || + strIcmp(name, "VAL") == 0 || + strIcmp(name, "ASC") == 0 || + strIcmp(name, "INT") == 0 || + strIcmp(name, "ABS") == 0 || + strIcmp(name, "INSTR") == 0 || + strIcmp(name, "EOF") == 0 || + strIcmp(name, "LOF") == 0 || + strIcmp(name, "FREEFILE") == 0 || + strIcmp(name, "LBOUND") == 0 || + strIcmp(name, "UBOUND") == 0); +} + + +// Count the number of nodes in a linked list +static int countList(Node *n) { + int c = 0; + while (n) { c++; n = n->next; } + return c; +} + + +// Generate code for a built-in function call +static void genBuiltinCall(const char *name, Node *args) { + int argc = countList(args); + + // Functions with special handling (validation, multiple args, etc.) + if (strIcmp(name, "LEN") == 0) { + emitRaw("((int)strlen("); + genExpr(args); + emitRaw("))"); + } else if (strIcmp(name, "VAL") == 0) { + emitRaw("atof("); + genExpr(args); + emitRaw(")"); + } else if (strIcmp(name, "ASC") == 0) { + emitRaw("((int)(unsigned char)("); + genExpr(args); + emitRaw(")[0])"); + } else if (strIcmp(name, "INT") == 0) { + emitRaw("((int)("); + genExpr(args); + emitRaw("))"); + } else if (strIcmp(name, "ABS") == 0) { + emitRaw("_babs("); + genExpr(args); + emitRaw(")"); + } else if (strIcmp(name, "MID$") == 0) { + if (argc < 2) fatal(0, "MID$ requires at least 2 arguments"); + emitRaw("_bmid("); + genExpr(args); + emitRaw(", "); + genExpr(args->next); + if (argc >= 3 && args->next->next) { + emitRaw(", "); + genExpr(args->next->next); + } else { + emitRaw(", -1"); + } + emitRaw(")"); + } else if (strIcmp(name, "LEFT$") == 0) { + if (argc < 2) fatal(0, "LEFT$ requires 2 arguments"); + emitRaw("_bleft("); + genExpr(args); + emitRaw(", "); + genExpr(args->next); + emitRaw(")"); + } else if (strIcmp(name, "RIGHT$") == 0) { + if (argc < 2) fatal(0, "RIGHT$ requires 2 arguments"); + emitRaw("_bright("); + genExpr(args); + emitRaw(", "); + genExpr(args->next); + emitRaw(")"); + } else if (strIcmp(name, "INSTR") == 0) { + if (argc < 2) fatal(0, "INSTR requires 2 arguments"); + emitRaw("_binstr("); + genExpr(args); + emitRaw(", "); + genExpr(args->next); + emitRaw(")"); + } else if (strIcmp(name, "STRING$") == 0) { + if (argc < 2) fatal(0, "STRING$ requires 2 arguments"); + emitRaw("_bstring_rep("); genExpr(args); emitRaw(", "); + genExpr(args->next); emitRaw(")"); + } else if (strIcmp(name, "EOF") == 0) { + emitRaw("_beof("); + genExpr(args); + emitRaw(")"); + } else if (strIcmp(name, "LOF") == 0) { + emitRaw("_blof("); + genExpr(args); + emitRaw(")"); + } else if (strIcmp(name, "FREEFILE") == 0) { + emitRaw("_bfreefile()"); + // --- Array functions (need special codegen) --- + } else if (strIcmp(name, "LBOUND") == 0) { + emitRaw("0"); + } else if (strIcmp(name, "UBOUND") == 0) { + if (args && args->type == NODE_IDENT) { + emitRaw("(%s_size - 1)", cleanName(args->sval)); + } else { + fatal(0, "UBOUND requires an array name"); + } + } else { + // Check external function definitions and compile-time builtins + const char *tmpl = NULL; + ExternFunc *ef = externFuncLookup(name); + if (ef) { + tmpl = ef->cCode; + } else { + const BuiltinDef *bd = builtinDefLookup(name); + if (bd) tmpl = bd->cCode; + } + + if (tmpl) { + // Expand template: % = first arg, %1 %2 etc = numbered args + const char *t = tmpl; + while (*t) { + if (*t == '%') { + t++; + int argNum = 0; + if (*t >= '1' && *t <= '9') { + argNum = *t - '1'; + t++; + } + // Find the nth argument + Node *arg = args; + for (int i = 0; i < argNum && arg; i++) + arg = arg->next; + if (arg) genExpr(arg); + else emitRaw("0"); // missing arg + } else { + char c[2] = {*t, '\0'}; + emitRaw("%s", c); + t++; + } + } + } else { + // Unknown builtin – just emit as-is + emitRaw("%s(", cleanName(name)); + for (Node *a = args; a; a = a->next) { + if (a != args) emitRaw(", "); + genExpr(a); + } + emitRaw(")"); + } + } +} + + +// Generate code for an expression node +static void genExpr(Node *n) { + if (!n) { emitRaw("0"); return; } + + switch (n->type) { + case NODE_INT_LIT: + emitRaw("%d", n->ival); + break; + + case NODE_DBL_LIT: + emitRaw("%g", n->dval); + break; + + case NODE_STR_LIT: + // Emit as a C string literal + emitRaw("\""); + for (const char *p = n->sval; p && *p; p++) { + if (*p == '"') emitRaw("\\\""); + else if (*p == '\\') emitRaw("\\\\"); + else if (*p == '\n') emitRaw("\\n"); + else if (*p == '\t') emitRaw("\\t"); + else emitRaw("%c", *p); + } + emitRaw("\""); + break; + + case NODE_IDENT: { + const char *cn = cleanName(n->sval); + // Inside a function, check if this is the function name (return var) + if (gInFunc && gFuncName && strIcmp(n->sval, gFuncName) == 0) { + emitRaw("%s_ret", cn); + } else if (isByrefParam(n->sval)) { + // BYREF parameter: dereference the pointer + emitRaw("(*%s)", cn); + } else { + emitRaw("%s", cn); + } + break; + } + + case NODE_ARRAY_REF: + emitRaw("%s[", cleanName(n->sval)); + genArrayFlatIndex(n->sval, n->a); + emitRaw("]"); + break; + + case NODE_DOT_ACCESS: + // base.field — base is in n->a, field name in n->sval + genExpr(n->a); + emitRaw(".%s", cleanName(n->sval)); + break; + + case NODE_UNOP: + if (n->ival == TOK_MINUS) { + emitRaw("(-("); + genExpr(n->a); + emitRaw("))"); + } else if (n->ival == TOK_NOT) { + // If operand is a comparison, use logical NOT for cleaner code + int isCmp = (n->a->type == NODE_BINOP && + (n->a->ival == TOK_EQ || n->a->ival == TOK_NE || + n->a->ival == TOK_LT || n->a->ival == TOK_GT || + n->a->ival == TOK_LE || n->a->ival == TOK_GE || + n->a->ival == TOK_AND || n->a->ival == TOK_OR)); + if (isCmp || n->a->type == NODE_UNOP) { + emitRaw("(!("); + genExpr(n->a); + emitRaw("))"); + } else { + emitRaw("(~(int)("); + genExpr(n->a); + emitRaw("))"); + } + } + break; + + case NODE_BINOP: { + int op = n->ival; + // String concatenation + if (n->dataType == TYPE_STR && (op == TOK_PLUS || op == TOK_AMP)) { + emitRaw("_bconcat("); + genExpr(n->a); + emitRaw(", "); + genExpr(n->b); + emitRaw(")"); + break; + } + // String comparison + if (isStringExpr(n->a) && isStringExpr(n->b)) { + const char *cmpOp; + switch (op) { + case TOK_EQ: cmpOp = "==0"; break; + case TOK_NE: cmpOp = "!=0"; break; + case TOK_LT: cmpOp = "<0"; break; + case TOK_GT: cmpOp = ">0"; break; + case TOK_LE: cmpOp = "<=0"; break; + case TOK_GE: cmpOp = ">=0"; break; + default: cmpOp = "==0"; break; + } + emitRaw("(strcmp("); + genExpr(n->a); + emitRaw(", "); + genExpr(n->b); + emitRaw(")%s)", cmpOp); + break; + } + // Power operator: emit as pow() call + if (op == TOK_CARET) { + emitRaw("pow("); + genExpr(n->a); + emitRaw(", "); + genExpr(n->b); + emitRaw(")"); + break; + } + // Integer division: cast operands to int + if (op == TOK_BSLASH) { + emitRaw("((int)("); + genExpr(n->a); + emitRaw(") / (int)("); + genExpr(n->b); + emitRaw("))"); + break; + } + // Float division: BASIC '/' always produces a floating-point result + if (op == TOK_SLASH) { + emitRaw("((double)("); + genExpr(n->a); + emitRaw(") / (double)("); + genExpr(n->b); + emitRaw("))"); + break; + } + // All other numeric and logical binary operators + emitRaw("("); + genExpr(n->a); + switch (op) { + case TOK_PLUS: emitRaw(" + "); break; + case TOK_MINUS: emitRaw(" - "); break; + case TOK_STAR: emitRaw(" * "); break; + case TOK_MOD: emitRaw(" %% "); break; + case TOK_EQ: emitRaw(" == "); break; + case TOK_NE: emitRaw(" != "); break; + case TOK_LT: emitRaw(" < "); break; + case TOK_GT: emitRaw(" > "); break; + case TOK_LE: emitRaw(" <= "); break; + case TOK_GE: emitRaw(" >= "); break; + case TOK_AND: emitRaw(" & "); break; + case TOK_OR: emitRaw(" | "); break; + case TOK_XOR: emitRaw(" ^ "); break; + default: emitRaw(" ? "); break; + } + genExpr(n->b); + emitRaw(")"); + break; + } + + case NODE_FUNC_CALL: + // SIZEOF(TypeName) — emit sizeof(struct _b_TypeName) + if (strIcmp(n->sval, "SIZEOF") == 0 && n->a && + n->a->type == NODE_IDENT) { + int ui = udtLookup(n->a->sval); + if (ui >= 0) { + emitRaw("(long)sizeof(%s)", cUdtTypeStr(ui)); + break; + } + } + if (isBuiltinFunc(n->sval)) { + genBuiltinCall(n->sval, n->a); + } else { + emitRaw("%s(", cleanName(n->sval)); + // Generate arguments, applying BYREF (&) where needed + Symbol *fsym = symLookup(n->sval); + int pi = 0; + for (Node *a = n->a; a; a = a->next, pi++) { + if (a != n->a) emitRaw(", "); + int needRef = (fsym && pi < fsym->paramCount && + fsym->paramModes[pi] == PASS_BYREF); + if (needRef && a->type == NODE_IDENT) { + emitRaw("&%s", cleanName(a->sval)); + } else if (needRef && a->type == NODE_ARRAY_REF) { + emitRaw("&%s[", cleanName(a->sval)); + genArrayFlatIndex(a->sval, a->a); + emitRaw("]"); + } else { + genExpr(a); + } + } + emitRaw(")"); + } + break; + + default: + emitRaw("/* unknown expr node %d */0", n->type); + break; + } +} + + +// Generate a variable declaration in C +static void genVarDecl(const char *name, DataType dt, int isStatic) { + const char *cn = cleanName(name); + if (isStatic) emit("static "); + else emit(""); + + if (dt == TYPE_STR) + emitRaw("char *%s _BUNUSED = _bstr(\"\");\n", cn); + else + emitRaw("%s %s _BUNUSED = %s;\n", cTypeStr(dt), cn, cDefaultVal(dt)); +} + + +// Emit a row-major flattened index for multidimensional array access. +// For 1D, just emits the single index expression (backward compatible). +// For nD, emits: ((i0) * nameDim1 + (i1)) * nameDim2 + (i2) ... +static void genArrayFlatIndex(const char *name, Node *indices) { + char cn[MAX_IDENT]; + strncpy(cn, cleanName(name), MAX_IDENT - 1); + cn[MAX_IDENT - 1] = '\0'; + + // Count dimensions + int ndims = 0; + for (Node *p = indices; p; p = p->next) ndims++; + + if (ndims <= 1) { + genExpr(indices); + } else { + // Row-major: fold left: acc = idx[0], for k=1..n-1: acc = acc * dimK + idx[k] + // For 3D: (((i) * dim1 + (j)) * dim2 + (k)) + Node *idx = indices; + // Emit opening parens for nesting: need (ndims-1) wrapping levels + for (int i = 1; i < ndims; i++) emitRaw("("); + emitRaw("("); + genExpr(idx); + emitRaw(")"); + idx = idx->next; + int dimIdx = 1; + while (idx) { + emitRaw(" * %s_dim%d + (", cn, dimIdx); + genExpr(idx); + emitRaw("))"); + idx = idx->next; + dimIdx++; + } + } +} + + +// Generate a DIM array declaration (supports multidimensional) +static void genDimArray(const char *name, DataType dt, Node *sizeList, + int ndims) { + char cn[MAX_IDENT]; + strncpy(cn, cleanName(name), MAX_IDENT - 1); + cn[MAX_IDENT - 1] = '\0'; + + emit("%s *%s _BUNUSED = NULL;\n", cTypeStr(dt), cn); + + if (ndims <= 1) { + // 1D: backward-compatible + emit("int %s_size _BUNUSED = 0;\n", cn); + if (sizeList) { + emit("%s_size = (", cn); + genExpr(sizeList); + emitRaw(") + 1;\n"); + emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n", + cn, cTypeStr(dt), cn, cTypeStr(dt)); + if (dt == TYPE_STR) { + emit("for (int _i = 0; _i < %s_size; _i++) %s[_i] = _bstr(\"\");\n", + cn, cn); + } + } + } else { + // Multi-dimensional + Node *dim = sizeList; + for (int i = 0; i < ndims; i++, dim = dim->next) { + emit("int %s_dim%d _BUNUSED = 0;\n", cn, i); + } + emit("int %s_size _BUNUSED = 0;\n", cn); + + dim = sizeList; + for (int i = 0; i < ndims; i++, dim = dim->next) { + emit("%s_dim%d = (", cn, i); + genExpr(dim); + emitRaw(") + 1;\n"); + } + + emit("%s_size = ", cn); + for (int i = 0; i < ndims; i++) { + if (i > 0) emitRaw(" * "); + emitRaw("%s_dim%d", cn, i); + } + emitRaw(";\n"); + + emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n", + cn, cTypeStr(dt), cn, cTypeStr(dt)); + if (dt == TYPE_STR) { + emit("for (int _i = 0; _i < %s_size; _i++) %s[_i] = _bstr(\"\");\n", + cn, cn); + } + } +} + + +// Generate code for a PRINT statement +static void genPrint(Node *pr) { + Node *item = pr->a; + + // Empty PRINT: just a newline + if (!item) { + emit("printf(\"\\n\");\n"); + return; + } + + // Build printf call with format string and arguments + emit("printf(\""); + + // First pass: build format string + for (Node *it = item; it; it = it->next) { + if (it->a) { + emitRaw("%s", cFmt(it->a->dataType)); + } + // Separator + if (it->ival == 1) { + // semicolon: no separator + } else if (it->ival == 2) { + emitRaw("\\t"); // comma: tab + } else if (!it->next) { + // Last item with no trailing separator: add newline + emitRaw("\\n"); + } + } + emitRaw("\""); + + // Second pass: arguments + for (Node *it = item; it; it = it->next) { + if (it->a) { + emitRaw(", "); + genExpr(it->a); + } + } + emitRaw(");\n"); + + // Free temporary strings created during expression evaluation + emit("_bfree_temps();\n"); +} + + +// Generate code for a PRINT USING statement +static void genPrintUsing(Node *pu) { + // Initialize format parser with format string + emit("_busing_init("); + genExpr(pu->a); + emitRaw(");\n"); + + // Format and print each value + for (Node *val = pu->b; val; val = val->next) { + if (val->dataType == TYPE_STR) { + emit("_busing_str("); + } else { + emit("_busing_num("); + } + genExpr(val); + emitRaw(");\n"); + } + + // Print newline and cleanup + emit("_busing_end();\n"); + emit("_bfree_temps();\n"); +} + + +// Generate code for an INPUT statement +static void genInput(Node *inp) { + // Print prompt if any + if (inp->sval) { + emit("printf(\"%%s\", \"%s\");\n", inp->sval); + } else { + emit("printf(\"? \");\n"); + } + emit("fflush(stdout);\n"); + + // Read each variable + for (Node *v = inp->a; v; v = v->next) { + if (v->dataType == TYPE_STR) { + emit("{ char _buf[1024]; if(fgets(_buf, sizeof(_buf), stdin)) {\n"); + gIndent++; + emit("_buf[strcspn(_buf, \"\\n\")] = 0;\n"); + emit("_bstr_assign(&%s, _buf);\n", cleanName(v->sval)); + gIndent--; + emit("} }\n"); + } else { + emit("scanf(\"%s\", &%s);\n", + cScanfFmt(v->dataType), cleanName(v->sval)); + } + } +} + + +// Generate code for an assignment statement +static void genAssign(Node *n) { + Node *target = n->a; + Node *value = n->b; + + // Check if we're assigning to the function return variable + if (gInFunc && gFuncName && target->type == NODE_IDENT && + strIcmp(target->sval, gFuncName) == 0) { + const char *cn = cleanName(target->sval); + if (gFuncRet == TYPE_STR) { + emit("_bstr_assign(&%s_ret, ", cn); + genExpr(value); + emitRaw(");\n"); + } else { + emit("%s_ret = ", cn); + genExpr(value); + emitRaw(";\n"); + } + emit("_bfree_temps();\n"); + return; + } + + // Dot-access assignment: var.field = expr + if (target->type == NODE_DOT_ACCESS) { + int strLen = target->ival; // >0 for fixed-length STRING * N + if (target->dataType == TYPE_STR && strLen > 0) { + // Fixed-length string: strncpy + null terminate + emit("strncpy("); + genExpr(target->a); + emitRaw(".%s, ", cleanName(target->sval)); + genExpr(value); + emitRaw(", %d);\n", strLen); + emit(""); + genExpr(target->a); + emitRaw(".%s[%d] = '\\0';\n", cleanName(target->sval), strLen); + } else if (target->dataType == TYPE_STR) { + // Dynamic string in struct — unusual but handle it + emit("_bstr_assign(&("); + genExpr(target->a); + emitRaw(".%s), ", cleanName(target->sval)); + genExpr(value); + emitRaw(");\n"); + } else { + // Numeric field + emit(""); + genExpr(target->a); + emitRaw(".%s = ", cleanName(target->sval)); + genExpr(value); + emitRaw(";\n"); + } + emit("_bfree_temps();\n"); + return; + } + + // Check if target is a BYREF parameter (needs pointer dereference) + int byref = (target->type == NODE_IDENT && isByrefParam(target->sval)); + + // String assignment uses _bstr_assign + if (target->dataType == TYPE_STR || isStringExpr(value)) { + if (target->type == NODE_ARRAY_REF) { + emit("_bstr_assign(&%s[", cleanName(target->sval)); + genArrayFlatIndex(target->sval, target->a); + emitRaw("], "); + } else if (byref) { + emit("_bstr_assign(%s, ", cleanName(target->sval)); + } else { + emit("_bstr_assign(&%s, ", cleanName(target->sval)); + } + genExpr(value); + emitRaw(");\n"); + } else { + // Numeric assignment + if (target->type == NODE_ARRAY_REF) { + emit("%s[", cleanName(target->sval)); + genArrayFlatIndex(target->sval, target->a); + emitRaw("] = "); + } else if (byref) { + emit("(*%s) = ", cleanName(target->sval)); + } else { + emit("%s = ", cleanName(target->sval)); + } + genExpr(value); + emitRaw(";\n"); + } + emit("_bfree_temps();\n"); +} + + +// Generate a SUB or FUNCTION definition +static void genFuncDef(Node *n) { + int isFunc = (n->type == NODE_FUNC); + // Store a permanent copy of the clean function name so it survives + // additional cleanName() calls during parameter/body emission + char fname[MAX_IDENT]; + strncpy(fname, cleanName(n->sval), MAX_IDENT - 1); + fname[MAX_IDENT - 1] = '\0'; + DataType ret = isFunc ? n->dataType : TYPE_VOID; + + // Save and set function context + int prevInFunc = gInFunc; + const char *prevFuncName = gFuncName; + DataType prevFuncRet = gFuncRet; + gInFunc = 1; + gFuncName = n->sval; + gFuncRet = ret; + + // Function signature + emitRaw("%s %s(", cTypeStr(ret), fname); + int first = 1; + for (Node *p = n->a; p; p = p->next) { + if (!first) emitRaw(", "); + first = 0; + if (p->ival == PASS_BYREF) { + emitRaw("%s *%s", cTypeStr(p->dataType), cleanName(p->sval)); + } else { + if (p->dataType == TYPE_STR) + emitRaw("const char *%s", cleanName(p->sval)); + else + emitRaw("%s %s", cTypeStr(p->dataType), cleanName(p->sval)); + } + } + if (first) emitRaw("void"); // no params + emitRaw(") {\n"); + gIndent++; + + // For FUNCTION: declare the return variable (named _ret) + if (isFunc) { + emit("%s %s_ret = %s;\n", cTypeStr(ret), fname, cDefaultVal(ret)); + } + + // Generate body + if (n->b) genBlock(n->b); + + // Return statement for FUNCTION + if (isFunc) { + emit("return %s_ret;\n", fname); + } + + gIndent--; + emitRaw("}\n\n"); + + // Restore context + gInFunc = prevInFunc; + gFuncName = prevFuncName; + gFuncRet = prevFuncRet; +} + + +// Generate code for a single statement +static void genStmt(Node *n) { + if (!n) return; + + switch (n->type) { + case NODE_LABEL: + // Only emit C labels that are actually targeted by GOTO/GOSUB, + // to avoid -Wunused-label warnings. + if (n->sval) { + // Named label + if (isGotoStrTarget(n->sval)) + emitRaw("%s: ;\n", cleanName(n->sval)); + } else if (isGotoTarget(n->ival)) { + emitRaw("L%d: ;\n", n->ival); + } + break; + + case NODE_TYPE_DEF: + // TYPE definitions are emitted globally in generate(), not here + break; + + case NODE_DIM: + if (n->dataType == TYPE_UDT && n->ival == 0) { + // UDT scalar: struct _b_Name var; memset(&var, 0, sizeof(var)); + const char *uts = cUdtTypeStr(n->ival2); + char cn[MAX_IDENT]; + strncpy(cn, cleanName(n->sval), MAX_IDENT - 1); + cn[MAX_IDENT - 1] = '\0'; + emit("%s %s _BUNUSED;\n", uts, cn); + emit("memset(&%s, 0, sizeof(%s));\n", cn, cn); + } else if (n->dataType == TYPE_UDT && n->ival > 0) { + // UDT array + const char *uts = cUdtTypeStr(n->ival2); + char cn[MAX_IDENT]; + strncpy(cn, cleanName(n->sval), MAX_IDENT - 1); + cn[MAX_IDENT - 1] = '\0'; + emit("%s *%s _BUNUSED = NULL;\n", uts, cn); + emit("int %s_size _BUNUSED = 0;\n", cn); + // Compute size and allocate + if (n->a) { + if (n->ival <= 1) { + emit("%s_size = (", cn); + genExpr(n->a); + emitRaw(") + 1;\n"); + } else { + Node *dim = n->a; + for (int i = 0; i < n->ival; i++, dim = dim->next) { + emit("int %s_dim%d _BUNUSED = (", cn, i); + genExpr(dim); + emitRaw(") + 1;\n"); + } + emit("%s_size = ", cn); + for (int i = 0; i < n->ival; i++) { + if (i > 0) emitRaw(" * "); + emitRaw("%s_dim%d", cn, i); + } + emitRaw(";\n"); + } + emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n", + cn, uts, cn, uts); + } + } else if (n->ival) { // array -- n->ival is ndims + genDimArray(n->sval, n->dataType, n->a, n->ival); + } else { // scalar + genVarDecl(n->sval, n->dataType, 0); + } + break; + + case NODE_REDIM: { + char rcn[MAX_IDENT]; + strncpy(rcn, cleanName(n->sval), MAX_IDENT - 1); + rcn[MAX_IDENT - 1] = '\0'; + int ndims = n->ival; + + if (ndims <= 1) { + // 1D REDIM: backward-compatible realloc + emit("{ int _old_sz = %s_size;\n", rcn); + gIndent++; + emit("%s_size = (", rcn); + genExpr(n->a); + emitRaw(") + 1;\n"); + emit("%s = (%s*)realloc(%s, %s_size * sizeof(%s));\n", + rcn, cTypeStr(n->dataType), rcn, rcn, cTypeStr(n->dataType)); + emit("if (%s_size > _old_sz)\n", rcn); + gIndent++; + emit("memset(%s + _old_sz, 0, (%s_size - _old_sz) * sizeof(%s));\n", + rcn, rcn, cTypeStr(n->dataType)); + gIndent--; + gIndent--; + emit("}\n"); + } else { + // Multi-dim REDIM: recompute dims, free + calloc + emit("{\n"); + gIndent++; + Node *dim = n->a; + for (int i = 0; i < ndims; i++, dim = dim->next) { + emit("%s_dim%d = (", rcn, i); + genExpr(dim); + emitRaw(") + 1;\n"); + } + emit("%s_size = ", rcn); + for (int i = 0; i < ndims; i++) { + if (i > 0) emitRaw(" * "); + emitRaw("%s_dim%d", rcn, i); + } + emitRaw(";\n"); + emit("free(%s);\n", rcn); + emit("%s = (%s*)calloc(%s_size, sizeof(%s));\n", + rcn, cTypeStr(n->dataType), rcn, rcn, cTypeStr(n->dataType)); + if (n->dataType == TYPE_STR) { + emit("for (int _i = 0; _i < %s_size; _i++) %s[_i] = _bstr(\"\");\n", + rcn, rcn); + } + gIndent--; + emit("}\n"); + } + break; + } + + case NODE_LOCAL: + genVarDecl(n->sval, n->dataType, 0); + break; + + case NODE_STATIC: + genVarDecl(n->sval, n->dataType, 1); + break; + + case NODE_ASSIGN: + genAssign(n); + break; + + case NODE_PRINT: + genPrint(n); + break; + + case NODE_PRINT_USING: + genPrintUsing(n); + break; + + case NODE_INPUT: + genInput(n); + break; + + case NODE_IF: + emit("if ("); + genExpr(n->a); + emitRaw(") {\n"); + gIndent++; + if (n->b) genBlock(n->b); + gIndent--; + if (n->c) { + if (n->c->type == NODE_IF) { + // ELSEIF: emit as "} else if (...)" + emit("} else "); + // Don't indent the nested if + genStmt(n->c); + return; // the nested if handles its own closing + } else { + emit("} else {\n"); + gIndent++; + genBlock(n->c); + gIndent--; + } + } + emit("}\n"); + break; + + case NODE_FOR: { + const char *vn = cleanName(n->sval); + // Determine the C type for the loop variable + const char *vtype = cTypeStr(inferVarType(n->sval)); + // When inside a function, the loop variable may not be declared + // locally. Wrap in a block and declare the variable to be safe. + // Any prior LOCAL/DIM of the same name has _BUNUSED to suppress + // shadowing warnings. + int needDecl = gInFunc; + if (n->c) { + // FOR with STEP: use a block with a step variable so the + // step expression is evaluated once, and the comparison + // direction adapts to the sign of the step at runtime. + emit("{ /* FOR %s with STEP */\n", vn); + gIndent++; + if (needDecl) emit("%s %s;\n", vtype, vn); + emit("%s _step_%s = ", vtype, vn); + genExpr(n->c); + emitRaw(";\n"); + emit("for (%s = ", vn); + genExpr(n->a); + emitRaw("; _step_%s > 0 ? %s <= ", vn, vn); + genExpr(n->b); + emitRaw(" : %s >= ", vn); + genExpr(n->b); + emitRaw("; %s += _step_%s) {\n", vn, vn); + gIndent++; + if (n->d) genBlock(n->d); + gIndent--; + emit("}\n"); + gIndent--; + emit("}\n"); + } else { + // Default step = 1: simple ascending loop + if (needDecl) { + emit("{ %s %s;\n", vtype, vn); + gIndent++; + } + emit("for (%s = ", vn); + genExpr(n->a); + emitRaw("; %s <= ", vn); + genExpr(n->b); + emitRaw("; %s++) {\n", vn); + gIndent++; + if (n->d) genBlock(n->d); + gIndent--; + emit("}\n"); + if (needDecl) { + gIndent--; + emit("}\n"); + } + } + break; + } + + case NODE_WHILE: + emit("while ("); + genExpr(n->a); + emitRaw(") {\n"); + gIndent++; + if (n->b) genBlock(n->b); + gIndent--; + emit("}\n"); + break; + + case NODE_DO_LOOP: { + int isUntil = n->ival & 1; + int atBottom = n->ival & 2; + + if (!n->a) { + // Infinite loop: DO ... LOOP + emit("for (;;) {\n"); + } else if (atBottom) { + emit("do {\n"); + } else { + // Condition at top + emit("while ("); + if (isUntil) emitRaw("!("); + genExpr(n->a); + if (isUntil) emitRaw(")"); + emitRaw(") {\n"); + } + gIndent++; + if (n->b) genBlock(n->b); + gIndent--; + if (atBottom && n->a) { + emit("} while ("); + if (isUntil) emitRaw("!("); + genExpr(n->a); + if (isUntil) emitRaw(")"); + emitRaw(");\n"); + } else { + emit("}\n"); + } + break; + } + + case NODE_GOTO: + if (n->sval) { + emit("goto %s;\n", cleanName(n->sval)); + } else if (n->ival) { + emit("goto L%d;\n", n->ival); + } + break; + + case NODE_GOSUB: + if (!gRelease) + emit("if (_gosub_sp >= _GOSUB_MAX) { fprintf(stderr, \"GOSUB stack overflow\\n\"); exit(1); }\n"); + emit("_gosub_stack[_gosub_sp++] = %d;\n", n->ival2); + if (n->sval) + emit("goto %s;\n", cleanName(n->sval)); + else + emit("goto L%d;\n", n->ival); + emitRaw("_gr%d: ;\n", n->ival2); + break; + + case NODE_RETURN: + if (gInFunc) { + // Return from FUNCTION + if (n->a) { + if (gFuncRet == TYPE_STR) { + emit("_bstr_assign(&%s_ret, ", + cleanName(gFuncName)); + genExpr(n->a); + emitRaw(");\n"); + } else { + emit("%s_ret = ", cleanName(gFuncName)); + genExpr(n->a); + emitRaw(";\n"); + } + } + emit("return %s_ret;\n", cleanName(gFuncName)); + } else { + // RETURN from GOSUB: dispatch back using the stack + if (!gRelease) + emit("if (_gosub_sp <= 0) { fprintf(stderr, \"RETURN without GOSUB\\n\"); exit(1); }\n"); + emit("switch (_gosub_stack[--_gosub_sp]) {\n"); + for (int i = 0; i < gGosubCount; i++) { + emit(" case %d: goto _gr%d;\n", i, i); + } + emit("}\n"); + } + break; + + case NODE_EXIT: + if (n->ival == TOK_FOR || n->ival == TOK_WHILE || n->ival == TOK_DO) + emit("break;\n"); + else if (n->ival == TOK_SUB) + emit("return;\n"); + else if (n->ival == TOK_FUNCTION && gInFunc && gFuncName) + emit("return %s_ret;\n", cleanName(gFuncName)); + break; + + case NODE_CONTINUE: + emit("continue;\n"); + break; + + case NODE_CALL: { + const char *cn = cleanName(n->sval); + emit("%s(", cn); + Symbol *fsym = symLookup(n->sval); + int pi = 0; + for (Node *a = n->a; a; a = a->next, pi++) { + if (a != n->a) emitRaw(", "); + int needRef = (fsym && pi < fsym->paramCount && + fsym->paramModes[pi] == PASS_BYREF); + if (needRef && a->type == NODE_IDENT) { + emitRaw("&%s", cleanName(a->sval)); + } else if (needRef && a->type == NODE_ARRAY_REF) { + emitRaw("&%s[", cleanName(a->sval)); + genExpr(a->a); + emitRaw("]"); + } else { + genExpr(a); + } + } + emitRaw(");\n"); + emit("_bfree_temps();\n"); + break; + } + + case NODE_SUB: + case NODE_FUNC: + // These are generated separately before main() + break; + + case NODE_END: + emit("exit(0);\n"); + break; + + case NODE_OPEN: { + if (n->ival == 4) { + // RANDOM mode + emit("_bfile_open_random("); + genExpr(n->b); + emitRaw(", "); + genExpr(n->a); + emitRaw(", "); + if (n->c) { + genExpr(n->c); + } else { + emitRaw("0"); + } + emitRaw(");\n"); + } else { + const char *modes[] = {"r", "w", "a", "rb"}; + emit("_bfile_open("); + genExpr(n->b); + emitRaw(", "); + genExpr(n->a); + emitRaw(", \"%s\");\n", modes[n->ival]); + } + break; + } + + case NODE_GET: + // GET #filenum, record, variable + emit("fseek(_bfile_get("); + genExpr(n->a); + emitRaw("), ("); + genExpr(n->b); + emitRaw(" - 1) * _bfile_reclen["); + genExpr(n->a); + emitRaw("], SEEK_SET);\n"); + emit("fread(&%s, _bfile_reclen[", cleanName(n->c->sval)); + genExpr(n->a); + emitRaw("], 1, _bfile_get("); + genExpr(n->a); + emitRaw("));\n"); + break; + + case NODE_PUT: + // PUT #filenum, record, variable + emit("fseek(_bfile_get("); + genExpr(n->a); + emitRaw("), ("); + genExpr(n->b); + emitRaw(" - 1) * _bfile_reclen["); + genExpr(n->a); + emitRaw("], SEEK_SET);\n"); + emit("fwrite(&%s, _bfile_reclen[", cleanName(n->c->sval)); + genExpr(n->a); + emitRaw("], 1, _bfile_get("); + genExpr(n->a); + emitRaw("));\n"); + break; + + case NODE_CLOSE: + emit("_bfile_close("); + genExpr(n->b); + emitRaw(");\n"); + break; + + case NODE_FILE_PRINT: { + Node *item = n->a; + if (!item) { + // PRINT #n, alone = write newline + emit("fprintf(_bfile_get("); + genExpr(n->b); + emitRaw("), \"\\n\");\n"); + break; + } + // Build fprintf with format string and arguments + emit("fprintf(_bfile_get("); + genExpr(n->b); + emitRaw("), \""); + for (Node *it = item; it; it = it->next) { + if (it->a) emitRaw("%s", cFmt(it->a->dataType)); + if (it->ival == 1) { + // semicolon: no separator + } else if (it->ival == 2) { emitRaw("\\t"); } + else if (!it->next) { emitRaw("\\n"); } + } + emitRaw("\""); + for (Node *it = item; it; it = it->next) { + if (it->a) { emitRaw(", "); genExpr(it->a); } + } + emitRaw(");\n"); + emit("_bfree_temps();\n"); + break; + } + + case NODE_FILE_INPUT: + for (Node *v = n->a; v; v = v->next) { + if (v->dataType == TYPE_STR) { + emit("_bline_input("); + genExpr(n->b); + emitRaw(", &%s);\n", cleanName(v->sval)); + } else { + emit("fscanf(_bfile_get("); + genExpr(n->b); + emitRaw("), \"%s\", &%s);\n", + cScanfFmt(v->dataType), cleanName(v->sval)); + } + } + break; + + case NODE_LINE_INPUT: + emit("_bline_input("); + genExpr(n->b); + emitRaw(", &%s);\n", cleanName(n->a->sval)); + break; + + case NODE_FILE_WRITE: { + // WRITE # outputs CSV-style: strings quoted, comma-separated, newline + int first = 1; + for (Node *e = n->a; e; e = e->next) { + if (!first) { + emit("fprintf(_bfile_get("); + genExpr(n->b); + emitRaw("), \",\");\n"); + } + first = 0; + if (e->dataType == TYPE_STR) { + emit("fprintf(_bfile_get("); + genExpr(n->b); + emitRaw("), \"\\\"%%s\\\"\", "); + genExpr(e); + emitRaw(");\n"); + } else { + emit("fprintf(_bfile_get("); + genExpr(n->b); + emitRaw("), \"%s\", ", cFmt(e->dataType)); + genExpr(e); + emitRaw(");\n"); + } + } + emit("fprintf(_bfile_get("); + genExpr(n->b); + emitRaw("), \"\\n\");\n"); + emit("_bfree_temps();\n"); + break; + } + + case NODE_DATA: + // No-op: DATA items are collected and emitted as a static array + break; + + case NODE_READ: + for (Node *v = n->a; v; v = v->next) { + if (v->dataType == TYPE_STR) { + emit("_bstr_assign(&%s, _bdata[_bdata_pos].str);\n", + cleanName(v->sval)); + } else { + emit("%s = (%s)_bdata[_bdata_pos].num;\n", + cleanName(v->sval), cTypeStr(v->dataType)); + } + emit("_bdata_pos++;\n"); + } + break; + + case NODE_RESTORE: + if (n->sval) { + emit("_bdata_pos = %d;\n", dataIndexForLabel(n->sval)); + } else if (n->ival != 0) { + emit("_bdata_pos = %d;\n", dataIndexForLine(n->ival)); + } else { + emit("_bdata_pos = 0;\n"); + } + break; + + case NODE_CONST_DECL: + // No runtime code for constants — they're substituted at parse time + break; + + case NODE_SWAP: { + // Determine the type from the left operand + DataType swapType = n->a->dataType; + const char *ctype = "double"; + if (swapType == TYPE_BYTE) ctype = "uint8_t"; + else if (swapType == TYPE_INT) ctype = "int16_t"; + else if (swapType == TYPE_LONG) ctype = "int32_t"; + else if (swapType == TYPE_FLOAT) ctype = "float"; + else if (swapType == TYPE_DBL) ctype = "double"; + + if (swapType == TYPE_STR) { + // String swap: just swap the pointers + emit("{ char *_swap_tmp = "); + genExpr(n->a); + emitRaw("; "); + genExpr(n->a); + emitRaw(" = "); + genExpr(n->b); + emitRaw("; "); + genExpr(n->b); + emitRaw(" = _swap_tmp; }\n"); + } else { + emit("{ %s _swap_tmp = ", ctype); + genExpr(n->a); + emitRaw("; "); + genExpr(n->a); + emitRaw(" = "); + genExpr(n->b); + emitRaw("; "); + genExpr(n->b); + emitRaw(" = _swap_tmp; }\n"); + } + break; + } + + case NODE_RANDOMIZE: + if (n->a) { + emit("srand((unsigned)("); + genExpr(n->a); + emitRaw("));\n"); + } else { + emit("srand((unsigned)time(NULL));\n"); + } + break; + + case NODE_SELECT: { + // Emit test expression into a temp variable + static int selectId = 0; + int sid = selectId++; + DataType stype = n->a->dataType; + if (stype == TYPE_STR) { + emit("{ const char *_sel%d = ", sid); + genExpr(n->a); + emitRaw(";\n"); + } else { + emit("{ double _sel%d = ", sid); + genExpr(n->a); + emitRaw(";\n"); + } + // Emit CASE blocks as if/else if chain + int first = 1; + for (Node *c = n->b; c; c = c->next) { + if (c->ival == 1) { + // CASE ELSE + if (!first) emit("} else {\n"); + else emit("{\n"); + } else { + if (!first) emit("} else if ("); + else emit("if ("); + // Emit condition for each value, joined with || + int firstVal = 1; + for (Node *v = c->a; v; v = v->next) { + if (!firstVal) emitRaw(" || "); + if (v->ival2 == 1) { + // IS comparison: v->ival is the comparison op, v->b is the value + emitRaw("(_sel%d ", sid); + switch (v->ival) { + case TOK_EQ: emitRaw("== "); break; + case TOK_NE: emitRaw("!= "); break; + case TOK_LT: emitRaw("< "); break; + case TOK_GT: emitRaw("> "); break; + case TOK_LE: emitRaw("<= "); break; + case TOK_GE: emitRaw(">= "); break; + } + genExpr(v->b); + emitRaw(")"); + } else if (v->ival2 == 2) { + // Range: v->a TO v->b + emitRaw("(_sel%d >= ", sid); + genExpr(v->a); + emitRaw(" && _sel%d <= ", sid); + genExpr(v->b); + emitRaw(")"); + } else { + // Single value + if (stype == TYPE_STR) { + emitRaw("(strcmp(_sel%d, ", sid); + genExpr(v); + emitRaw(") == 0)"); + } else { + emitRaw("(_sel%d == ", sid); + genExpr(v); + emitRaw(")"); + } + } + firstVal = 0; + } + emitRaw(") {\n"); + } + gIndent++; + for (Node *s = c->b; s; s = s->next) + genStmt(s); + gIndent--; + first = 0; + } + if (!first) emit("}\n"); + emit("}\n"); + break; + } + + case NODE_ON_GOTO: + emit("switch ((int)("); + genExpr(n->a); + emitRaw(")) {\n"); + { + int idx = 1; + for (Node *lab = n->b; lab; lab = lab->next, idx++) { + if (lab->type == NODE_INT_LIT) { + emit(" case %d: goto L%d; break;\n", idx, lab->ival); + } else { + emit(" case %d: goto %s; break;\n", idx, cleanName(lab->sval)); + } + } + } + emit("}\n"); + break; + + case NODE_ON_GOSUB: + emit("switch ((int)("); + genExpr(n->a); + emitRaw(")) {\n"); + { + int idx = 1; + int rpid = n->ival2; // first return-point id + for (Node *lab = n->b; lab; lab = lab->next, idx++, rpid++) { + if (lab->type == NODE_INT_LIT) { + emit(" case %d: _gosub_stack[_gosub_sp++] = %d; goto L%d; break;\n", + idx, rpid, lab->ival); + } else { + emit(" case %d: _gosub_stack[_gosub_sp++] = %d; goto %s; break;\n", + idx, rpid, cleanName(lab->sval)); + } + } + } + emit("}\n"); + // Emit return labels + { + int rpid = n->ival2; + for (Node *lab = n->b; lab; lab = lab->next, rpid++) { + emitRaw("_gr%d: ;\n", rpid); + } + } + break; + + case NODE_MID_ASSIGN: + emit("_bmid_assign(&"); + genExpr(n->a); + emitRaw(", "); + genExpr(n->b); + emitRaw(", "); + genExpr(n->c); + emitRaw(", "); + genExpr(n->d); + emitRaw(");\n"); + break; + + default: + emit("/* unhandled node type %d */\n", n->type); + break; + } +} + + +// Generate code for a block (linked list of statements) +static void genBlock(Node *blk) { + if (!blk) return; + Node *s = (blk->type == NODE_BLOCK) ? blk->a : blk; + while (s) { + genStmt(s); + s = s->next; + } +} + + +// Collect all SUB/FUNCTION nodes from the AST into an array +static void collectFuncs(Node *n, Node **funcs, int *count, int max) { + if (!n) return; + if (n->type == NODE_SUB || n->type == NODE_FUNC) { + if (*count >= max) + fatal(n->line, "Too many SUB/FUNCTION definitions (max %d)", max); + funcs[(*count)++] = n; + } + if (n->type == NODE_BLOCK || n->type == NODE_PROGRAM) { + Node *s = n->a; + while (s) { + collectFuncs(s, funcs, count, max); + s = s->next; + } + } +} + + +// Recursively collect all NODE_DATA nodes from the AST +static void collectData(Node *n, Node **data, int *count, int max) { + if (!n) return; + if (n->type == NODE_DATA) { + if (*count >= max) + fatal(n->line, "Too many DATA statements (max %d)", max); + data[(*count)++] = n; + } + // When a NODE_LABEL is followed by NODE_DATA via ->next, tag the + // DATA node with the BASIC line number (stored in ival) or named + // label (stored in sval) so that RESTORE can find it. + if (n->type == NODE_LABEL && n->next && n->next->type == NODE_DATA) { + if (n->sval) + n->next->sval = n->sval; + else + n->next->ival = n->ival; + } + // Walk into blocks, programs, and sub/function bodies + if (n->type == NODE_BLOCK || n->type == NODE_PROGRAM) { + Node *s = n->a; + while (s) { + collectData(s, data, count, max); + s = s->next; + } + } + // Also collect from SUB/FUNCTION bodies (DATA is global in BASIC) + if (n->type == NODE_SUB || n->type == NODE_FUNC) { + collectData(n->b, data, count, max); + } + // Walk if/else branches + if (n->type == NODE_IF) { + collectData(n->b, data, count, max); + collectData(n->c, data, count, max); + } + // Walk loop bodies + if (n->type == NODE_FOR || n->type == NODE_WHILE || n->type == NODE_DO_LOOP) { + Node *body = (n->type == NODE_FOR) ? n->d : n->b; + collectData(body, data, count, max); + } + // Walk SELECT CASE bodies + if (n->type == NODE_SELECT) { + for (Node *c = n->b; c; c = c->next) { + for (Node *s = c->b; s; s = s->next) + collectData(s, data, count, max); + } + } +} + + +// Global storage for RESTORE line-number-to-data-index mapping +#define MAX_DATA_LINES 512 +static int gDataLineNums[MAX_DATA_LINES]; +static int gDataLineIdxs[MAX_DATA_LINES]; +static int gDataLineCount = 0; + +// Global storage for RESTORE named-label-to-data-index mapping +static char *gDataLabelNames[MAX_DATA_LINES]; +static int gDataLabelIdxs[MAX_DATA_LINES]; +static int gDataLabelCount = 0; + +// Look up the data index for a RESTORE target line number +static int dataIndexForLine(int lnum) { + for (int i = 0; i < gDataLineCount; i++) + if (gDataLineNums[i] == lnum) return gDataLineIdxs[i]; + return 0; // fallback to beginning +} + + +// Look up the data index for a RESTORE target named label +static int dataIndexForLabel(const char *name) { + for (int i = 0; i < gDataLabelCount; i++) + if (strIcmp(gDataLabelNames[i], name) == 0) + return gDataLabelIdxs[i]; + return 0; // fallback to beginning +} + + +// Emit the runtime library (debug or release variant). Provides string +// operations, temp management, file I/O, and dynamic array support. +static void emitRuntime(void) { + // Common headers and defines — same in both modes + fprintf(gOut, + "/* ---- BASIC Runtime Library (%s) ---- */\n" + "#include \n" + "#include \n" + "#include \n" + "#include \n" + "#include \n" + "#include \n" + "#include \n\n" + "#ifdef __GNUC__\n" + "#define _BUNUSED __attribute__((unused))\n" + "#else\n" + "#define _BUNUSED\n" + "#endif\n\n" + + "/* Temporary string pool: collects intermediate strings for cleanup */\n" + "#define _BMAX_TEMPS 256\n" + "static char *_btemps[_BMAX_TEMPS] _BUNUSED;\n" + "static int _btmp_count _BUNUSED = 0;\n\n" + + "/* Register a heap string as temporary (will be freed by _bfree_temps) */\n" + "static _BUNUSED char *_btmp(char *s) {\n" + " if (_btmp_count < _BMAX_TEMPS) _btemps[_btmp_count++] = s;\n" + " return s;\n" + "}\n\n" + + "/* Free all registered temporary strings */\n" + "static _BUNUSED void _bfree_temps(void) {\n" + " for (int i = 0; i < _btmp_count; i++) free(_btemps[i]);\n" + " _btmp_count = 0;\n" + "}\n\n", + gRelease ? "release" : "debug" + ); + + // String functions — debug vs release + if (gRelease) { + fprintf(gOut, + "static _BUNUSED char *_bstr(const char *s) {\n" + " char *d = (char*)malloc(strlen(s) + 1);\n" + " strcpy(d, s);\n" + " return d;\n" + "}\n\n" + + "static _BUNUSED void _bstr_assign(char **dest, const char *src) {\n" + " if (*dest) free(*dest);\n" + " *dest = _bstr(src);\n" + "}\n\n" + + "static _BUNUSED char *_bconcat(const char *a, const char *b) {\n" + " size_t la = strlen(a), lb = strlen(b);\n" + " char *r = (char*)malloc(la + lb + 1);\n" + " memcpy(r, a, la);\n" + " memcpy(r + la, b, lb);\n" + " r[la + lb] = '\\0';\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_bchr(int code) {\n" + " char *r = (char*)malloc(2);\n" + " r[0] = (char)code; r[1] = '\\0';\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_bstr_of_int(double val) {\n" + " char *r = (char*)malloc(64);\n" + " if (val == (int)val) sprintf(r, \"%%d\", (int)val);\n" + " else sprintf(r, \"%%g\", val);\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_bmid(const char *s, int start, int len) {\n" + " int slen = (int)strlen(s);\n" + " start--;\n" + " if (start < 0) start = 0;\n" + " if (start >= slen) return _btmp(_bstr(\"\"));\n" + " if (len < 0 || start + len > slen) len = slen - start;\n" + " char *r = (char*)malloc(len + 1);\n" + " memcpy(r, s + start, len);\n" + " r[len] = '\\0';\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_bleft(const char *s, int n) { return _bmid(s, 1, n); }\n\n" + + "static _BUNUSED char *_bright(const char *s, int n) {\n" + " int slen = (int)strlen(s);\n" + " if (n >= slen) return _btmp(_bstr(s));\n" + " return _btmp(_bstr(s + slen - n));\n" + "}\n\n" + + "static _BUNUSED char *_bucase(const char *s) {\n" + " char *r = _bstr(s);\n" + " for (char *p = r; *p; p++) *p = toupper((unsigned char)*p);\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_blcase(const char *s) {\n" + " char *r = _bstr(s);\n" + " for (char *p = r; *p; p++) *p = tolower((unsigned char)*p);\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED int _binstr(const char *haystack, const char *needle) {\n" + " const char *p = strstr(haystack, needle);\n" + " return p ? (int)(p - haystack) + 1 : 0;\n" + "}\n\n" + + "static _BUNUSED double _babs(double x) { return x < 0 ? -x : x; }\n\n" + ); + } else { + fprintf(gOut, + "static _BUNUSED char *_bstr(const char *s) {\n" + " if (!s) s = \"\";\n" + " char *d = (char*)malloc(strlen(s) + 1);\n" + " if (!d) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n" + " strcpy(d, s);\n" + " return d;\n" + "}\n\n" + + "static _BUNUSED void _bstr_assign(char **dest, const char *src) {\n" + " if (*dest) free(*dest);\n" + " *dest = _bstr(src ? src : \"\");\n" + "}\n\n" + + "static _BUNUSED char *_bconcat(const char *a, const char *b) {\n" + " if (!a) a = \"\";\n" + " if (!b) b = \"\";\n" + " size_t la = strlen(a), lb = strlen(b);\n" + " char *r = (char*)malloc(la + lb + 1);\n" + " if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n" + " memcpy(r, a, la);\n" + " memcpy(r + la, b, lb);\n" + " r[la + lb] = '\\0';\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_bchr(int code) {\n" + " char *r = (char*)malloc(2);\n" + " if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n" + " r[0] = (char)code; r[1] = '\\0';\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_bstr_of_int(double val) {\n" + " char *r = (char*)malloc(64);\n" + " if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n" + " if (val == (int)val) sprintf(r, \"%%d\", (int)val);\n" + " else sprintf(r, \"%%g\", val);\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_bmid(const char *s, int start, int len) {\n" + " if (!s) return _btmp(_bstr(\"\"));\n" + " int slen = (int)strlen(s);\n" + " start--;\n" + " if (start < 0) start = 0;\n" + " if (start >= slen) return _btmp(_bstr(\"\"));\n" + " if (len < 0 || start + len > slen) len = slen - start;\n" + " char *r = (char*)malloc(len + 1);\n" + " if (!r) { fprintf(stderr, \"Out of memory\\n\"); exit(1); }\n" + " memcpy(r, s + start, len);\n" + " r[len] = '\\0';\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_bleft(const char *s, int n) { return _bmid(s, 1, n); }\n\n" + + "static _BUNUSED char *_bright(const char *s, int n) {\n" + " if (!s) return _btmp(_bstr(\"\"));\n" + " int slen = (int)strlen(s);\n" + " if (n >= slen) return _btmp(_bstr(s));\n" + " return _btmp(_bstr(s + slen - n));\n" + "}\n\n" + + "static _BUNUSED char *_bucase(const char *s) {\n" + " if (!s) return _btmp(_bstr(\"\"));\n" + " char *r = _bstr(s);\n" + " for (char *p = r; *p; p++) *p = toupper((unsigned char)*p);\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_blcase(const char *s) {\n" + " if (!s) return _btmp(_bstr(\"\"));\n" + " char *r = _bstr(s);\n" + " for (char *p = r; *p; p++) *p = tolower((unsigned char)*p);\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED int _binstr(const char *haystack, const char *needle) {\n" + " if (!haystack || !needle) return 0;\n" + " const char *p = strstr(haystack, needle);\n" + " return p ? (int)(p - haystack) + 1 : 0;\n" + "}\n\n" + + "static _BUNUSED double _babs(double x) { return x < 0 ? -x : x; }\n\n" + ); + } + + // Additional string runtime functions (same in both modes) + fprintf(gOut, + "static _BUNUSED char *_bltrim(const char *s) {\n" + " while (*s == ' ') s++;\n" + " return _btmp(_bstr(s));\n" + "}\n\n" + + "static _BUNUSED char *_brtrim(const char *s) {\n" + " char *r = _bstr(s);\n" + " int len = (int)strlen(r);\n" + " while (len > 0 && r[len-1] == ' ') len--;\n" + " r[len] = '\\0';\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_btrim(const char *s) {\n" + " while (*s == ' ') s++;\n" + " char *r = _bstr(s);\n" + " int len = (int)strlen(r);\n" + " while (len > 0 && r[len-1] == ' ') len--;\n" + " r[len] = '\\0';\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_bspace(int n) {\n" + " if (n < 0) n = 0;\n" + " char *r = (char*)malloc(n + 1);\n" + " memset(r, ' ', n);\n" + " r[n] = '\\0';\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_btab(int col) {\n" + " if (col < 1) col = 1;\n" + " char *r = (char*)malloc(col);\n" + " memset(r, ' ', col - 1);\n" + " r[col - 1] = '\\0';\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_bhex(int n) {\n" + " char *r = (char*)malloc(20);\n" + " sprintf(r, \"%%X\", (unsigned)n);\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_boct(int n) {\n" + " char *r = (char*)malloc(24);\n" + " sprintf(r, \"%%o\", (unsigned)n);\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_bstring_rep(int n, const char *ch) {\n" + " if (n < 0) n = 0;\n" + " char *r = (char*)malloc(n + 1);\n" + " memset(r, ch[0], n);\n" + " r[n] = '\\0';\n" + " return _btmp(r);\n" + "}\n\n" + + "static _BUNUSED char *_bgetenv(const char *name) {\n" + " const char *val = getenv(name);\n" + " return _btmp(_bstr(val ? val : \"\"));\n" + "}\n\n" + + "static _BUNUSED void _bmid_assign(char **dest, int start, int len, const char *repl) {\n" + " int dlen = (int)strlen(*dest);\n" + " int rlen = (int)strlen(repl);\n" + " start--;\n" + " if (start < 0 || start >= dlen) return;\n" + " if (len > dlen - start) len = dlen - start;\n" + " if (rlen < len) len = rlen;\n" + " memcpy(*dest + start, repl, len);\n" + "}\n\n" + + "/* PRINT USING support */\n" + "static const char *_busing_fmt _BUNUSED;\n" + "static const char *_busing_pos _BUNUSED;\n\n" + + "static _BUNUSED void _busing_init(const char *fmt) {\n" + " _busing_fmt = _busing_pos = fmt ? fmt : \"\";\n" + "}\n\n" + + "static _BUNUSED void _busing_num(double val) {\n" + " const char *p = _busing_pos;\n" + " int width = 0, decimals = -1, dollar = 0, plus = 0, aster = 0, tminus = 0;\n" + " /* Skip literal chars until we find a numeric format start */\n" + " while (*p) {\n" + " if (*p == '#') break;\n" + " if (*p == '*' && p[1] == '*') break;\n" + " if (*p == '$' && p[1] == '$') break;\n" + " if (*p == '+' && (p[1] == '#' || p[1] == '$' || p[1] == '*')) break;\n" + " if (*p == '!' || *p == '&' || *p == '\\\\') break;\n" + " putchar(*p++);\n" + " }\n" + " if (!*p) { _busing_pos = _busing_fmt; return; }\n" + " /* Parse numeric format */\n" + " if (*p == '+') { plus = 1; p++; }\n" + " while (*p == '*') { aster++; width++; p++; }\n" + " while (*p == '$') { dollar++; p++; if (dollar > 1) width++; }\n" + " while (*p == '#' || *p == ',') { if (*p == '#') width++; p++; }\n" + " if (*p == '.') { p++; decimals = 0; while (*p == '#') { decimals++; p++; } }\n" + " if (*p == '-') { tminus = 1; p++; }\n" + " _busing_pos = p;\n" + " /* Format the number */\n" + " char buf[64];\n" + " double absval = val < 0 ? -val : val;\n" + " int neg = (val < 0);\n" + " if (decimals >= 0) {\n" + " snprintf(buf, sizeof(buf), \"%%.*f\", decimals, absval);\n" + " } else {\n" + " snprintf(buf, sizeof(buf), \"%%.0f\", absval);\n" + " }\n" + " int totalw = width + (decimals >= 0 ? decimals + 1 : 0);\n" + " int len = (int)strlen(buf);\n" + " int signw = (plus || neg) ? 1 : 0;\n" + " int dollarw = dollar ? 1 : 0;\n" + " int pad = totalw - len - signw - dollarw;\n" + " if (pad < 0) pad = 0;\n" + " for (int i = 0; i < pad; i++) putchar(aster >= 2 ? '*' : ' ');\n" + " if (plus) putchar(neg ? '-' : '+');\n" + " else if (neg && !tminus) putchar('-');\n" + " if (dollar) putchar('$');\n" + " printf(\"%%s\", buf);\n" + " if (tminus && neg) putchar('-');\n" + "}\n\n" + + "static _BUNUSED void _busing_str(const char *val) {\n" + " const char *p = _busing_pos;\n" + " if (!val) val = \"\";\n" + " /* Skip literal chars, print them */\n" + " while (*p && *p != '!' && *p != '&' && *p != '\\\\' && *p != '#') {\n" + " putchar(*p++);\n" + " }\n" + " if (!*p) { _busing_pos = _busing_fmt; return; }\n" + " if (*p == '!') {\n" + " /* First character only */\n" + " putchar(val[0] ? val[0] : ' ');\n" + " _busing_pos = p + 1;\n" + " } else if (*p == '&') {\n" + " /* Entire string */\n" + " printf(\"%%s\", val);\n" + " _busing_pos = p + 1;\n" + " } else if (*p == '\\\\') {\n" + " /* Fixed width: count chars between backslashes */\n" + " p++;\n" + " int width = 2;\n" + " while (*p && *p != '\\\\') { width++; p++; }\n" + " if (*p == '\\\\') p++;\n" + " _busing_pos = p;\n" + " int len = (int)strlen(val);\n" + " for (int i = 0; i < width; i++)\n" + " putchar(i < len ? val[i] : ' ');\n" + " } else {\n" + " _busing_pos = p;\n" + " }\n" + "}\n\n" + + "static _BUNUSED void _busing_end(void) {\n" + " putchar('\\n');\n" + " _busing_pos = _busing_fmt;\n" + "}\n\n" + ); + + // Only emit GOSUB stack if there are GOSUB sites, to avoid + // -Wunused-variable warnings. + if (gGosubCount > 0) { + fprintf(gOut, + "/* GOSUB return stack */\n" + "#define _GOSUB_MAX %d\n" + "static int _gosub_stack[_GOSUB_MAX];\n" + "static int _gosub_sp = 0;\n\n", + MAX_GOSUB_SITES + ); + } + + // File I/O runtime — debug vs release + fprintf(gOut, + "/* File I/O support */\n" + "#define _BMAX_FILES 16\n" + "static FILE *_bfiles[_BMAX_FILES] _BUNUSED = {0};\n" + "static long _bfile_reclen[_BMAX_FILES] _BUNUSED = {0};\n\n" + ); + + if (gRelease) { + fprintf(gOut, + "static _BUNUSED FILE *_bfile_get(int fnum) {\n" + " return _bfiles[fnum];\n" + "}\n\n" + + "static _BUNUSED void _bfile_open(int fnum, const char *fname, const char *mode) {\n" + " if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n" + " _bfiles[fnum] = fopen(fname, mode);\n" + "}\n\n" + + "static _BUNUSED void _bfile_open_random(int fnum, const char *fname, long reclen) {\n" + " if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n" + " _bfiles[fnum] = fopen(fname, \"r+b\");\n" + " if (!_bfiles[fnum]) _bfiles[fnum] = fopen(fname, \"w+b\");\n" + " _bfile_reclen[fnum] = reclen;\n" + "}\n\n" + + "static _BUNUSED void _bfile_close(int fnum) {\n" + " if (_bfiles[fnum]) { fclose(_bfiles[fnum]); _bfiles[fnum] = NULL; }\n" + "}\n\n" + + "static _BUNUSED int _beof(int fnum) {\n" + " if (!_bfiles[fnum]) return -1;\n" + " int c = fgetc(_bfiles[fnum]);\n" + " if (c == EOF) return -1;\n" + " ungetc(c, _bfiles[fnum]);\n" + " return 0;\n" + "}\n\n" + + "static _BUNUSED long _blof(int fnum) {\n" + " if (!_bfiles[fnum]) return 0;\n" + " long cur = ftell(_bfiles[fnum]);\n" + " fseek(_bfiles[fnum], 0, SEEK_END);\n" + " long sz = ftell(_bfiles[fnum]);\n" + " fseek(_bfiles[fnum], cur, SEEK_SET);\n" + " return sz;\n" + "}\n\n" + + "static _BUNUSED int _bfreefile(void) {\n" + " for (int i = 1; i < _BMAX_FILES; i++)\n" + " if (!_bfiles[i]) return i;\n" + " return 0;\n" + "}\n\n" + + "static _BUNUSED void _bline_input(int fnum, char **dest) {\n" + " if (!_bfiles[fnum]) return;\n" + " char _buf[4096];\n" + " if (fgets(_buf, sizeof(_buf), _bfiles[fnum])) {\n" + " _buf[strcspn(_buf, \"\\r\\n\")] = 0;\n" + " _bstr_assign(dest, _buf);\n" + " }\n" + "}\n\n" + ); + } else { + fprintf(gOut, + "static _BUNUSED FILE *_bfile_get(int fnum) {\n" + " if (fnum < 1 || fnum >= _BMAX_FILES || !_bfiles[fnum]) {\n" + " fprintf(stderr, \"Bad file number %%d\\n\", fnum);\n" + " exit(1);\n" + " }\n" + " return _bfiles[fnum];\n" + "}\n\n" + + "static _BUNUSED void _bfile_open(int fnum, const char *fname, const char *mode) {\n" + " if (fnum < 1 || fnum >= _BMAX_FILES) {\n" + " fprintf(stderr, \"File number %%d out of range\\n\", fnum);\n" + " exit(1);\n" + " }\n" + " if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n" + " _bfiles[fnum] = fopen(fname, mode);\n" + " if (!_bfiles[fnum]) {\n" + " fprintf(stderr, \"Cannot open '%%s'\\n\", fname);\n" + " exit(1);\n" + " }\n" + "}\n\n" + + "static _BUNUSED void _bfile_open_random(int fnum, const char *fname, long reclen) {\n" + " if (fnum < 1 || fnum >= _BMAX_FILES) {\n" + " fprintf(stderr, \"File number %%d out of range\\n\", fnum);\n" + " exit(1);\n" + " }\n" + " if (_bfiles[fnum]) fclose(_bfiles[fnum]);\n" + " _bfiles[fnum] = fopen(fname, \"r+b\");\n" + " if (!_bfiles[fnum]) _bfiles[fnum] = fopen(fname, \"w+b\");\n" + " if (!_bfiles[fnum]) {\n" + " fprintf(stderr, \"Cannot open '%%s'\\n\", fname);\n" + " exit(1);\n" + " }\n" + " _bfile_reclen[fnum] = reclen;\n" + "}\n\n" + + "static _BUNUSED void _bfile_close(int fnum) {\n" + " if (fnum >= 1 && fnum < _BMAX_FILES && _bfiles[fnum]) {\n" + " fclose(_bfiles[fnum]);\n" + " _bfiles[fnum] = NULL;\n" + " }\n" + "}\n\n" + + "static _BUNUSED int _beof(int fnum) {\n" + " if (fnum < 1 || fnum >= _BMAX_FILES || !_bfiles[fnum]) return -1;\n" + " int c = fgetc(_bfiles[fnum]);\n" + " if (c == EOF) return -1;\n" + " ungetc(c, _bfiles[fnum]);\n" + " return 0;\n" + "}\n\n" + + "static _BUNUSED long _blof(int fnum) {\n" + " if (fnum < 1 || fnum >= _BMAX_FILES || !_bfiles[fnum]) return 0;\n" + " long cur = ftell(_bfiles[fnum]);\n" + " fseek(_bfiles[fnum], 0, SEEK_END);\n" + " long sz = ftell(_bfiles[fnum]);\n" + " fseek(_bfiles[fnum], cur, SEEK_SET);\n" + " return sz;\n" + "}\n\n" + + "static _BUNUSED int _bfreefile(void) {\n" + " for (int i = 1; i < _BMAX_FILES; i++)\n" + " if (!_bfiles[i]) return i;\n" + " return 0;\n" + "}\n\n" + + "static _BUNUSED void _bline_input(int fnum, char **dest) {\n" + " char _buf[4096];\n" + " if (fnum >= 1 && fnum < _BMAX_FILES && _bfiles[fnum] &&\n" + " fgets(_buf, sizeof(_buf), _bfiles[fnum])) {\n" + " _buf[strcspn(_buf, \"\\r\\n\")] = 0;\n" + " _bstr_assign(dest, _buf);\n" + " }\n" + "}\n\n" + ); + } + + // DATA/READ support — same in both modes + fprintf(gOut, + "/* DATA/READ support */\n" + "typedef struct { int is_str; double num; const char *str; } _BDataItem;\n\n" + ); +} + + +// Main code generation: emit the full C source file from the AST +static void generate(Node *prog) { + // Emit the runtime library + emitRuntime(); + + // Emit UDT struct definitions (packed for binary I/O compatibility) + if (gUdtCount > 0) { + fprintf(gOut, "/* User-defined types */\n"); + fprintf(gOut, "#pragma pack(push, 1)\n"); + for (int i = 0; i < gUdtCount; i++) { + UdtDef *u = &gUdts[i]; + fprintf(gOut, "struct _b_%s {\n", cleanName(u->name)); + for (int j = 0; j < u->fieldCount; j++) { + UdtField *f = &u->fields[j]; + if (f->dataType == TYPE_STR && f->strLen > 0) { + fprintf(gOut, " char %s[%d];\n", + cleanName(f->name), f->strLen + 1); + } else if (f->dataType == TYPE_UDT) { + fprintf(gOut, " %s %s;\n", + cUdtTypeStr(f->udtIndex), cleanName(f->name)); + } else { + fprintf(gOut, " %s %s;\n", + cTypeStr(f->dataType), cleanName(f->name)); + } + } + fprintf(gOut, "};\n"); + } + fprintf(gOut, "#pragma pack(pop)\n\n"); + } + + // Collect all DATA nodes and emit the data pool + Node *dataNodes[4096]; + int dataNodeCount = 0; + collectData(prog, dataNodes, &dataNodeCount, 4096); + + if (dataNodeCount > 0) { + // Emit the data pool array + fprintf(gOut, "/* DATA pool */\n"); + fprintf(gOut, "static _BDataItem _bdata[] = {\n"); + int totalItems = 0; + gDataLineCount = 0; + gDataLabelCount = 0; + for (int di = 0; di < dataNodeCount; di++) { + Node *dn = dataNodes[di]; + // Record BASIC-line-number-to-index mapping for RESTORE. + // dn->ival is set by collectData when DATA follows a numeric label. + if (dn->ival != 0 && gDataLineCount < MAX_DATA_LINES) { + gDataLineNums[gDataLineCount] = dn->ival; + gDataLineIdxs[gDataLineCount] = totalItems; + gDataLineCount++; + } + // Record named-label-to-index mapping for RESTORE. + // dn->sval is set by collectData when DATA follows a named label. + if (dn->sval && gDataLabelCount < MAX_DATA_LINES) { + gDataLabelNames[gDataLabelCount] = dn->sval; + gDataLabelIdxs[gDataLabelCount] = totalItems; + gDataLabelCount++; + } + for (Node *item = dn->a; item; item = item->next) { + if (item->dataType == TYPE_STR) { + // Escape the string for C output + fprintf(gOut, " {1, 0, \""); + for (const char *p = item->sval; *p; p++) { + if (*p == '"') fprintf(gOut, "\\\""); + else if (*p == '\\') fprintf(gOut, "\\\\"); + else fputc(*p, gOut); + } + fprintf(gOut, "\"},\n"); + } else if (item->dataType == TYPE_DBL) { + fprintf(gOut, " {0, %g, NULL},\n", item->dval); + } else { + fprintf(gOut, " {0, %d, NULL},\n", item->ival); + } + totalItems++; + } + } + fprintf(gOut, "};\n"); + fprintf(gOut, "static int _bdata_count _BUNUSED = %d;\n", totalItems); + fprintf(gOut, "static int _bdata_pos _BUNUSED = 0;\n\n"); + } else { + // No DATA statements — emit empty placeholder + fprintf(gOut, "static _BDataItem _bdata[] _BUNUSED = {{0,0,NULL}};\n"); + fprintf(gOut, "static int _bdata_count _BUNUSED = 0;\n"); + fprintf(gOut, "static int _bdata_pos _BUNUSED = 0;\n\n"); + } + + // Collect all SUB/FUNCTION definitions + Node *funcs[256]; + int funcCount = 0; + collectFuncs(prog, funcs, &funcCount, 256); + + // Emit forward declarations for SUBs and FUNCTIONs + if (funcCount > 0) { + fprintf(gOut, "/* Forward declarations */\n"); + for (int i = 0; i < funcCount; i++) { + Node *f = funcs[i]; + int isFunc = (f->type == NODE_FUNC); + DataType ret = isFunc ? f->dataType : TYPE_VOID; + fprintf(gOut, "%s %s(", cTypeStr(ret), cleanName(f->sval)); + int first = 1; + for (Node *p = f->a; p; p = p->next) { + if (!first) fprintf(gOut, ", "); + first = 0; + if (p->ival == PASS_BYREF) + fprintf(gOut, "%s*", cTypeStr(p->dataType)); + else if (p->dataType == TYPE_STR) + fprintf(gOut, "const char*"); + else + fprintf(gOut, "%s", cTypeStr(p->dataType)); + } + if (first) fprintf(gOut, "void"); + fprintf(gOut, ");\n"); + } + fprintf(gOut, "\n"); + } + + // Emit SUB/FUNCTION implementations + for (int i = 0; i < funcCount; i++) { + genFuncDef(funcs[i]); + } + + // Emit main() with global (non-function) statements + fprintf(gOut, "/* Main program */\n"); + fprintf(gOut, "int main(void) {\n"); + gIndent = 1; + + // Walk the top-level block and emit non-function statements + Node *blk = (prog->type == NODE_PROGRAM) ? prog->a : prog; + Node *s = (blk && blk->type == NODE_BLOCK) ? blk->a : blk; + while (s) { + // Skip SUB/FUNCTION definitions (already emitted above) + if (s->type != NODE_SUB && s->type != NODE_FUNC) { + genStmt(s); + } + s = s->next; + } + + emit("return 0;\n"); + gIndent = 0; + fprintf(gOut, "}\n"); +} + + +// ----------------------------------------------------------------------- +// Section 9: Main Entry Point +// ----------------------------------------------------------------------- + +// Read an entire file into a malloc'd buffer. Returns NULL on failure. +static char *readFile(const char *path) { + FILE *f = fopen(path, "rb"); + if (!f) return NULL; + if (fseek(f, 0, SEEK_END) != 0) { fclose(f); return NULL; } + long len = ftell(f); + if (len < 0) { fclose(f); return NULL; } + if (len > (long)((unsigned)-1 >> 1)) { + // File too large for int-based gSrcLen + fclose(f); + return NULL; + } + rewind(f); + char *buf = (char *)malloc((size_t)len + 1); + if (!buf) { fclose(f); return NULL; } + size_t nread = fread(buf, 1, (size_t)len, f); + buf[nread] = '\0'; + fclose(f); + return buf; +} + + +// ----------------------------------------------------------------------- +// $INCLUDE preprocessor +// ----------------------------------------------------------------------- + +// Extract directory part of a file path (returns malloc'd string) +static char *dirName(const char *path) { + const char *last = strrchr(path, '/'); + if (!last) return strdup("."); + size_t len = (size_t)(last - path); + char *dir = (char *)malloc(len + 1); + memcpy(dir, path, len); + dir[len] = '\0'; + return dir; +} + +// Join directory and filename (returns malloc'd string) +static char *pathJoin(const char *dir, const char *file) { + // If file is absolute, return copy of file + if (file[0] == '/') return strdup(file); + size_t dlen = strlen(dir); + size_t flen = strlen(file); + char *result = (char *)malloc(dlen + 1 + flen + 1); + memcpy(result, dir, dlen); + result[dlen] = '/'; + memcpy(result + dlen + 1, file, flen); + result[dlen + 1 + flen] = '\0'; + return result; +} + +// Growing buffer for source assembly +typedef struct { + char *data; + size_t len; + size_t cap; +} SourceBuf; + +static void sbInit(SourceBuf *sb) { + sb->cap = 4096; + sb->data = (char *)malloc(sb->cap); + sb->len = 0; + sb->data[0] = '\0'; +} + +static void sbAppend(SourceBuf *sb, const char *s, size_t n) { + while (sb->len + n + 1 > sb->cap) { + sb->cap *= 2; + sb->data = (char *)realloc(sb->data, sb->cap); + } + memcpy(sb->data + sb->len, s, n); + sb->len += n; + sb->data[sb->len] = '\0'; +} + +// Case-insensitive prefix check +static int strNIcmp(const char *a, const char *b, size_t n) { + for (size_t i = 0; i < n; i++) { + int ca = toupper((unsigned char)a[i]); + int cb = toupper((unsigned char)b[i]); + if (ca != cb) return ca - cb; + if (ca == 0) return 0; + } + return 0; +} + +// Process a source file, expanding $INCLUDE directives. +// Appends to the SourceBuf and gLineMap. +static void preprocessFile(const char *filePath, SourceBuf *sb, + const char **includeStack, int includeDepth) { + // Check depth + if (includeDepth >= MAX_INCLUDE_DEPTH) { + fprintf(stderr, "Error: $INCLUDE nested too deeply (max %d) at '%s'\n", + MAX_INCLUDE_DEPTH, filePath); + exit(1); + } + + // Check circular includes + for (int i = 0; i < includeDepth; i++) { + if (strcmp(includeStack[i], filePath) == 0) { + fprintf(stderr, "Error: Circular $INCLUDE detected: '%s'\n", filePath); + exit(1); + } + } + + // Read file + char *text = readFile(filePath); + if (!text) { + fprintf(stderr, "Error: Cannot open '%s'", filePath); + if (includeDepth > 0) + fprintf(stderr, " (included from '%s')", includeStack[includeDepth - 1]); + fprintf(stderr, "\n"); + exit(1); + } + + const char *fname = internFileName(filePath); + char *baseDir = dirName(filePath); + + // Push onto include stack + includeStack[includeDepth] = filePath; + + // Process line by line + const char *p = text; + int origLine = 0; + while (*p) { + origLine++; + + // Find end of line + const char *lineStart = p; + while (*p && *p != '\n') p++; + size_t lineLen = (size_t)(p - lineStart); + if (*p == '\n') p++; // consume newline + + // Check for '$INCLUDE: directive + // Format: '$INCLUDE: 'filename' + // Leading spaces are allowed before the ' + const char *s = lineStart; + while (s < lineStart + lineLen && (*s == ' ' || *s == '\t')) s++; + + int isInclude = 0; + char incFile[MAX_TOKEN_LEN] = {0}; + + // Check for ' (comment start) followed by $INCLUDE: + if (s < lineStart + lineLen && *s == '\'') { + s++; // skip ' + // Skip optional spaces between ' and $ + while (s < lineStart + lineLen && (*s == ' ' || *s == '\t')) s++; + if (s + 9 <= lineStart + lineLen && strNIcmp(s, "$INCLUDE:", 9) == 0) { + s += 9; + // Skip spaces + while (s < lineStart + lineLen && (*s == ' ' || *s == '\t')) s++; + // Extract filename between single quotes + if (s < lineStart + lineLen && *s == '\'') { + s++; + const char *fnStart = s; + while (s < lineStart + lineLen && *s != '\'') s++; + if (s > fnStart && s < lineStart + lineLen) { + size_t fnLen = (size_t)(s - fnStart); + if (fnLen < MAX_TOKEN_LEN) { + memcpy(incFile, fnStart, fnLen); + incFile[fnLen] = '\0'; + isInclude = 1; + } + } + } + } + } + + if (isInclude) { + // Resolve path relative to current file's directory + char *resolvedPath = pathJoin(baseDir, incFile); + preprocessFile(resolvedPath, sb, includeStack, includeDepth + 1); + free(resolvedPath); + } else { + // Record line map entry + if (gLineMapCount < MAX_SOURCE_LINES) { + gLineMap[gLineMapCount].fileName = fname; + gLineMap[gLineMapCount].origLine = origLine; + gLineMapCount++; + } + // Append line (with newline) + sbAppend(sb, lineStart, lineLen); + sbAppend(sb, "\n", 1); + } + } + + free(baseDir); + free(text); +} + +// Top-level preprocessor entry point +static char *preprocessSource(const char *filePath) { + SourceBuf sb; + sbInit(&sb); + const char *includeStack[MAX_INCLUDE_DEPTH]; + preprocessFile(filePath, &sb, includeStack, 0); + return sb.data; +} + + +int main(int argc, char **argv) { + // Check for --release / -r flag + int argi = 1; + if (argc > 1 && (strcmp(argv[1], "--release") == 0 || + strcmp(argv[1], "-r") == 0)) { + gRelease = 1; + argi++; + } + + if (argi >= argc) { + fprintf(stderr, "Usage: basic2c [--release|-r] input.bas [output.c]\n"); + fprintf(stderr, "External functions can be defined in functions.def\n"); + return 1; + } + + // Load external function definitions from functions.def in binary's directory + { + const char *binPath = argv[0]; + const char *lastSlash = strrchr(binPath, '/'); + if (lastSlash) { + size_t dirLen = lastSlash - binPath + 1; + char *defPath = malloc(dirLen + 14); // "functions.def" + null + memcpy(defPath, binPath, dirLen); + strcpy(defPath + dirLen, "functions.def"); + loadExternFuncs(defPath); + free(defPath); + } else { + // Binary in current directory or bare name - try current directory + loadExternFuncs("functions.def"); + } + } + + // Also load from input file's directory (may add more or override) + { + const char *inputPath = argv[argi]; + const char *lastSlash = strrchr(inputPath, '/'); + if (lastSlash) { + size_t dirLen = lastSlash - inputPath + 1; + char *defPath = malloc(dirLen + 14); // "functions.def" + null + memcpy(defPath, inputPath, dirLen); + strcpy(defPath + dirLen, "functions.def"); + loadExternFuncs(defPath); + free(defPath); + } + // Don't load from current dir again if binary was there + } + + // Read and preprocess source file (expands $INCLUDE directives) + char *source = preprocessSource(argv[argi]); + + // Open output file (or stdout) + if (argi + 1 < argc) { + gOut = fopen(argv[argi + 1], "w"); + if (!gOut) { + fprintf(stderr, "Error: Cannot create '%s'\n", argv[argi + 1]); + free(source); + return 1; + } + } else { + gOut = stdout; + } + + // Initialize lexer state + gSrc = source; + gSrcPos = 0; + size_t slen = strlen(source); + if (slen > (size_t)((unsigned)-1 >> 1)) { + fprintf(stderr, "Error: Source file too large (%zu bytes)\n", slen); + free(source); + return 1; + } + gSrcLen = (int)slen; + gLine = 1; + + // Parse the BASIC source into an AST + Node *program = parseProgram(); + + // Generate C code from the AST + generate(program); + + // Cleanup + if (gOut != stdout) fclose(gOut); + free(source); + + return 0; +} diff --git a/builtins.def b/builtins.def new file mode 100644 index 0000000..ab85f9b --- /dev/null +++ b/builtins.def @@ -0,0 +1,45 @@ +// Built-in function definitions for basic2c +// Format: BUILTIN(name, return_type, c_template) +// This file is #included into basic2c.c with BUILTIN macro defined +// +// Template placeholders: % or %1 = first arg, %2 = second arg, etc. +// Return types: TYPE_BYTE, TYPE_INT, TYPE_LONG, TYPE_FLOAT, TYPE_DBL, TYPE_STR + +// Standard math functions +BUILTIN("SQR", TYPE_DBL, "sqrt(%)") +BUILTIN("SIN", TYPE_DBL, "sin(%)") +BUILTIN("COS", TYPE_DBL, "cos(%)") +BUILTIN("TAN", TYPE_DBL, "tan(%)") +BUILTIN("ATN", TYPE_DBL, "atan(%)") +BUILTIN("LOG", TYPE_DBL, "log(%)") +BUILTIN("EXP", TYPE_DBL, "exp(%)") +BUILTIN("SGN", TYPE_INT, "((%) > 0 ? 1 : ((%) < 0 ? -1 : 0))") +BUILTIN("RND", TYPE_DBL, "((double)rand() / (double)RAND_MAX)") + +// Extended math functions +BUILTIN("CEIL", TYPE_DBL, "ceil(%)") +BUILTIN("FLOOR", TYPE_DBL, "floor(%)") +BUILTIN("ROUND", TYPE_DBL, "round(%)") +BUILTIN("FRAC", TYPE_DBL, "((%) - floor(%))") +BUILTIN("FIX", TYPE_DBL, "trunc(%)") +BUILTIN("HYPOT", TYPE_DBL, "hypot(%, %2)") +BUILTIN("MAX", TYPE_DBL, "((double)((%) > (%2) ? (%) : (%2)))") +BUILTIN("MIN", TYPE_DBL, "((double)((%) < (%2) ? (%) : (%2)))") + +// System/Time +BUILTIN("TIMER", TYPE_DBL, "((double)clock() / CLOCKS_PER_SEC)") + +// String functions +BUILTIN("CHR$", TYPE_STR, "_bchr((int)(%))") +BUILTIN("STR$", TYPE_STR, "_bstr_of_int(%)") +BUILTIN("UCASE$", TYPE_STR, "_bucase(%)") +BUILTIN("LCASE$", TYPE_STR, "_blcase(%)") +BUILTIN("LTRIM$", TYPE_STR, "_bltrim(%)") +BUILTIN("RTRIM$", TYPE_STR, "_brtrim(%)") +BUILTIN("TRIM$", TYPE_STR, "_btrim(%)") +BUILTIN("SPACE$", TYPE_STR, "_bspace((int)(%))") +BUILTIN("HEX$", TYPE_STR, "_bhex((int)(%))") +BUILTIN("OCT$", TYPE_STR, "_boct((int)(%))") +BUILTIN("TAB", TYPE_STR, "_btab((int)(%))") +BUILTIN("SPC", TYPE_STR, "_bspace((int)(%))") +BUILTIN("ENVIRON$", TYPE_STR, "_bgetenv(%)") diff --git a/functions.def b/functions.def new file mode 100644 index 0000000..3ad4a7e --- /dev/null +++ b/functions.def @@ -0,0 +1,12 @@ +# External function definitions for basic2c +# Format: name : type : c_template +# Types: byte, integer, long, float, double, string +# Template: % or %1 = first arg, %2 = second arg, etc. +# +# Note: Common functions like CEIL, FLOOR, MAX, MIN, TIMER, ENVIRON$ +# are now built into basic2c via builtins.def. This file is for +# user-defined extensions that supplement or override the built-ins. +# +# Example custom functions: +# SQUARE : double : ((%) * (%)) +# CUBE : double : ((%) * (%) * (%)) diff --git a/test.bas b/test.bas new file mode 100644 index 0000000..c843291 --- /dev/null +++ b/test.bas @@ -0,0 +1,202 @@ +' ============================================ +' Test program for the basic2c transpiler +' Tests all major features +' ============================================ + +' --- Variable declarations --- +DIM x AS INTEGER +DIM y AS INTEGER +DIM pi AS DOUBLE +DIM greeting AS STRING +DIM name$ AS STRING + +' --- Assignments --- +x = 10 +y = 20 +pi = 3.14159 +greeting = "Hello, World!" +name$ = "BASIC" + +' --- PRINT with various separators --- +PRINT greeting +PRINT "x = "; x; " y = "; y +PRINT "PI is approximately "; pi +PRINT "Name: "; name$ + +' --- Arithmetic expressions --- +DIM result AS INTEGER +result = x + y * 2 - 5 +PRINT "x + y * 2 - 5 = "; result + +DIM quotient AS DOUBLE +quotient = x / 3 +PRINT "x / 3 = "; quotient + +DIM remainder AS INTEGER +remainder = y MOD 3 +PRINT "y MOD 3 = "; remainder + +' --- String operations --- +DIM full$ AS STRING +full$ = greeting + " from " + name$ +PRINT full$ +PRINT "Length of greeting: "; LEN(greeting) +PRINT "First 5 chars: "; LEFT$(greeting, 5) +PRINT "Last 6 chars: "; RIGHT$(greeting, 6) +PRINT "Middle: "; MID$(greeting, 3, 5) +PRINT "Upper: "; UCASE$(name$) +PRINT "Lower: "; LCASE$(greeting) + +' --- IF / ELSEIF / ELSE --- +IF x > 15 THEN + PRINT "x is greater than 15" +ELSEIF x > 5 THEN + PRINT "x is between 6 and 15" +ELSE + PRINT "x is 5 or less" +END IF + +' --- Single-line IF --- +IF y = 20 THEN PRINT "y is twenty" + +' --- FOR loop --- +PRINT "Counting 1 to 5:" +DIM i AS INTEGER +FOR i = 1 TO 5 + PRINT i; +NEXT i +PRINT "" + +' --- WHILE loop --- +DIM count AS INTEGER +count = 5 +PRINT "Countdown:" +WHILE count > 0 + PRINT count; + count = count - 1 +WEND +PRINT " Go!" + +' --- DO LOOP WHILE (bottom test) --- +DIM n AS INTEGER +n = 1 +PRINT "DO LOOP WHILE:" +DO + PRINT n; + n = n + 1 +LOOP WHILE n <= 5 +PRINT "" + +' --- DO WHILE LOOP (top test) --- +n = 10 +PRINT "DO WHILE LOOP:" +DO WHILE n > 5 + PRINT n; + n = n - 1 +LOOP +PRINT "" + +' --- DO UNTIL --- +n = 1 +PRINT "DO UNTIL:" +DO UNTIL n > 5 + PRINT n; + n = n + 1 +LOOP +PRINT "" + +' --- Dynamic arrays --- +DIM arr(10) AS INTEGER +FOR i = 0 TO 10 + arr(i) = i * i +NEXT i +PRINT "Array squares:" +FOR i = 0 TO 10 + PRINT arr(i); +NEXT i +PRINT "" + +' --- FUNCTION with BYVAL --- +FUNCTION Square(BYVAL n AS INTEGER) AS INTEGER + Square = n * n +END FUNCTION + +FUNCTION Factorial(BYVAL n AS INTEGER) AS INTEGER + IF n <= 1 THEN + Factorial = 1 + ELSE + Factorial = n * Factorial(n - 1) + END IF +END FUNCTION + +PRINT "Square(7) = "; Square(7) +PRINT "Factorial(6) = "; Factorial(6) + +' --- FUNCTION returning DOUBLE --- +FUNCTION CircleArea(BYVAL radius AS DOUBLE) AS DOUBLE + CircleArea = 3.14159 * radius * radius +END FUNCTION + +PRINT "Area of circle r=5: "; CircleArea(5.0) + +' --- SUB with BYREF (modifies caller's variables) --- +SUB Swap(BYREF a AS INTEGER, BYREF b AS INTEGER) + LOCAL temp AS INTEGER + temp = a + a = b + b = temp +END SUB + +DIM p AS INTEGER +DIM q AS INTEGER +p = 100 +q = 200 +PRINT "Before swap: p="; p; " q="; q +CALL Swap(p, q) +PRINT "After swap: p="; p; " q="; q + +' --- SUB with BYVAL --- +SUB ShowMessage(BYVAL msg AS STRING) + PRINT ">>> "; msg; " <<<" +END SUB + +CALL ShowMessage("This is a test message") + +' --- STATIC variable in a function --- +FUNCTION Counter() AS INTEGER + STATIC c AS INTEGER + c = c + 1 + Counter = c +END FUNCTION + +PRINT "Counter: "; Counter() +PRINT "Counter: "; Counter() +PRINT "Counter: "; Counter() + +' --- Nested IF --- +DIM score AS INTEGER +score = 85 +IF score >= 90 THEN + PRINT "Grade: A" +ELSEIF score >= 80 THEN + PRINT "Grade: B" +ELSEIF score >= 70 THEN + PRINT "Grade: C" +ELSE + PRINT "Grade: F" +END IF + +' --- Boolean expressions --- +IF x > 5 AND y < 30 THEN + PRINT "x>5 AND y<30 is TRUE" +END IF + +IF x > 100 OR y = 20 THEN + PRINT "x>100 OR y=20 is TRUE" +END IF + +IF NOT (x = 5) THEN + PRINT "NOT (x=5) is TRUE" +END IF + +PRINT "Done!" diff --git a/test_big.bas b/test_big.bas new file mode 100644 index 0000000..c71b25f --- /dev/null +++ b/test_big.bas @@ -0,0 +1,1049 @@ +' ============================================================ +' Comprehensive BASIC Test Program +' Tests all features of the basic2c transpiler extensively +' ============================================================ + +' ---- Global variable declarations ---- +DIM i AS INTEGER +DIM j AS INTEGER +DIM k AS INTEGER +DIM n AS INTEGER +DIM temp AS INTEGER +DIM total AS INTEGER +DIM found AS INTEGER +DIM flag AS INTEGER + +DIM x AS DOUBLE +DIM y AS DOUBLE +DIM z AS DOUBLE +DIM avg AS DOUBLE +DIM pi AS DOUBLE +DIM area AS DOUBLE + +DIM s$ AS STRING +DIM t$ AS STRING +DIM u$ AS STRING +DIM line$ AS STRING +DIM result$ AS STRING + +' ============================================================ +' PART 1: Arithmetic and operator precedence +' ============================================================ +PRINT "==== PART 1: Arithmetic ====" + +i = 2 + 3 * 4 +PRINT "2 + 3 * 4 = "; i + +i = (2 + 3) * 4 +PRINT "(2 + 3) * 4 = "; i + +x = 10.0 / 3.0 +PRINT "10.0 / 3.0 = "; x + +i = 17 \ 5 +PRINT "17 \\ 5 = "; i + +i = 17 MOD 5 +PRINT "17 MOD 5 = "; i + +x = 2.0 ^ 10 +PRINT "2 ^ 10 = "; x + +x = -5.5 + 3.2 +PRINT "-5.5 + 3.2 = "; x + +i = 100 - 30 - 20 - 10 +PRINT "100 - 30 - 20 - 10 = "; i + +x = 1.0 + 2.0 * 3.0 - 4.0 / 2.0 +PRINT "1 + 2*3 - 4/2 = "; x + +' Nested parentheses +i = ((((5 + 3) * 2) - 1) * 3) +PRINT "((((5+3)*2)-1)*3) = "; i + +' Integer division chain +i = 100 \ 3 \ 2 +PRINT "100 \\ 3 \\ 2 = "; i + +' MOD chain +i = 100 MOD 17 MOD 5 +PRINT "100 MOD 17 MOD 5 = "; i + +PRINT "" + +' ============================================================ +' PART 2: String operations +' ============================================================ +PRINT "==== PART 2: Strings ====" + +s$ = "Hello, World!" +PRINT "Original: "; s$ +PRINT "Length: "; LEN(s$) +PRINT "Left 5: "; LEFT$(s$, 5) +PRINT "Right 6: "; RIGHT$(s$, 6) +PRINT "Mid(8,5): "; MID$(s$, 8, 5) +PRINT "Upper: "; UCASE$(s$) +PRINT "Lower: "; LCASE$(s$) + +' String concatenation +t$ = "foo" +u$ = "bar" +result$ = t$ + u$ +PRINT "foo + bar = "; result$ + +' Multi-concat +result$ = "A" + "B" + "C" + "D" + "E" +PRINT "A+B+C+D+E = "; result$ + +' String with & operator +result$ = "Hello" & " " & "World" +PRINT "Hello & World = "; result$ + +' String comparison +s$ = "apple" +t$ = "banana" +IF s$ < t$ THEN + PRINT "apple < banana: TRUE" +ELSE + PRINT "apple < banana: FALSE" +END IF + +IF s$ = "apple" THEN + PRINT "s$ equals apple: TRUE" +END IF + +IF s$ <> "orange" THEN + PRINT "s$ <> orange: TRUE" +END IF + +' INSTR +s$ = "Hello World Hello" +PRINT "INSTR(Hello World Hello, World) = "; INSTR(s$, "World") +PRINT "INSTR(Hello World Hello, xyz) = "; INSTR(s$, "xyz") + +' CHR$ and ASC +PRINT "CHR$(65) = "; CHR$(65) +PRINT "ASC(A) = "; ASC("A") + +' STR$ and VAL +PRINT "STR$(42) = ["; STR$(42); "]" +PRINT "VAL(3.14) = "; VAL("3.14") + +' MID$ without length (to end of string) +s$ = "ABCDEFGHIJ" +PRINT "MID$(ABCDEFGHIJ, 4) = "; MID$(s$, 4) + +' Edge cases +PRINT "LEFT$ empty: ["; LEFT$("hello", 0); "]" +PRINT "RIGHT$ more than len: "; RIGHT$("hi", 10) +PRINT "LEN empty: "; LEN("") + +PRINT "" + +' ============================================================ +' PART 3: Control flow - IF/ELSEIF/ELSE +' ============================================================ +PRINT "==== PART 3: IF/ELSEIF/ELSE ====" + +' Nested IF +i = 42 +IF i > 0 THEN + IF i > 10 THEN + IF i > 100 THEN + PRINT "i > 100" + ELSE + PRINT "10 < i <= 100: "; i + END IF + ELSE + PRINT "0 < i <= 10" + END IF +ELSE + PRINT "i <= 0" +END IF + +' Long ELSEIF chain +n = 7 +IF n = 1 THEN + PRINT "one" +ELSEIF n = 2 THEN + PRINT "two" +ELSEIF n = 3 THEN + PRINT "three" +ELSEIF n = 4 THEN + PRINT "four" +ELSEIF n = 5 THEN + PRINT "five" +ELSEIF n = 6 THEN + PRINT "six" +ELSEIF n = 7 THEN + PRINT "seven" +ELSEIF n = 8 THEN + PRINT "eight" +ELSE + PRINT "other" +END IF + +' Single-line IF variations +IF 1 = 1 THEN PRINT "single-line IF works" +IF 0 = 1 THEN PRINT "THIS SHOULD NOT PRINT" + +' Boolean operators in conditions +i = 15 +IF i > 10 AND i < 20 THEN + PRINT "15 is between 10 and 20" +END IF + +IF i < 5 OR i > 10 THEN + PRINT "15 is outside 5..10" +END IF + +IF NOT (i = 0) THEN + PRINT "i is not zero" +END IF + +' Combined boolean +IF (i > 10 AND i < 20) OR i = 0 THEN + PRINT "complex boolean: TRUE" +END IF + +PRINT "" + +' ============================================================ +' PART 4: Loops +' ============================================================ +PRINT "==== PART 4: Loops ====" + +' FOR loop basic +PRINT "FOR 1 to 10: "; +FOR i = 1 TO 10 + PRINT i; +NEXT i +PRINT "" + +' FOR with STEP +PRINT "FOR 0 to 20 STEP 3: "; +FOR i = 0 TO 20 STEP 3 + PRINT i; +NEXT i +PRINT "" + +' FOR negative step +PRINT "FOR 10 to 1 STEP -1: "; +FOR i = 10 TO 1 STEP -1 + PRINT i; +NEXT i +PRINT "" + +' FOR step 2 +PRINT "FOR 1 to 10 STEP 2: "; +FOR i = 1 TO 10 STEP 2 + PRINT i; +NEXT i +PRINT "" + +' WHILE loop +PRINT "WHILE countdown: "; +i = 10 +WHILE i > 0 + PRINT i; + i = i - 2 +WEND +PRINT "" + +' DO WHILE at top +PRINT "DO WHILE top: "; +i = 1 +DO WHILE i <= 5 + PRINT i; + i = i + 1 +LOOP +PRINT "" + +' DO UNTIL at top +PRINT "DO UNTIL top: "; +i = 1 +DO UNTIL i > 5 + PRINT i; + i = i + 1 +LOOP +PRINT "" + +' DO LOOP WHILE at bottom +PRINT "DO LOOP WHILE bottom: "; +i = 1 +DO + PRINT i; + i = i + 1 +LOOP WHILE i <= 5 +PRINT "" + +' DO LOOP UNTIL at bottom +PRINT "DO LOOP UNTIL bottom: "; +i = 10 +DO + PRINT i; + i = i - 3 +LOOP UNTIL i <= 0 +PRINT "" + +' Infinite DO with EXIT +PRINT "DO with EXIT: "; +i = 0 +DO + i = i + 1 + IF i > 5 THEN EXIT DO + PRINT i; +LOOP +PRINT "" + +' Nested FOR loops +PRINT "Multiplication table 1-4:" +FOR i = 1 TO 4 + FOR j = 1 TO 4 + k = i * j + PRINT k; " "; + NEXT j + PRINT "" +NEXT i + +' EXIT FOR +PRINT "EXIT FOR test: "; +FOR i = 1 TO 100 + IF i > 5 THEN EXIT FOR + PRINT i; +NEXT i +PRINT "" + +' Nested loops with EXIT +PRINT "Nested EXIT: "; +FOR i = 1 TO 3 + FOR j = 1 TO 10 + IF j > 3 THEN EXIT FOR + PRINT i * 10 + j; + NEXT j +NEXT i +PRINT "" + +PRINT "" + +' ============================================================ +' PART 5: Arrays +' ============================================================ +PRINT "==== PART 5: Arrays ====" + +' Integer array +DIM nums(20) AS INTEGER +FOR i = 0 TO 20 + nums(i) = i * i +NEXT i +PRINT "Squares 0-10: "; +FOR i = 0 TO 10 + PRINT nums(i); +NEXT i +PRINT "" + +' Double array +DIM reals(5) AS DOUBLE +reals(0) = 1.1 +reals(1) = 2.2 +reals(2) = 3.3 +reals(3) = 4.4 +reals(4) = 5.5 +reals(5) = 6.6 +PRINT "Doubles: "; +FOR i = 0 TO 5 + PRINT reals(i); +NEXT i +PRINT "" + +' String array +DIM words(4) AS STRING +words(0) = "the" +words(1) = "quick" +words(2) = "brown" +words(3) = "fox" +words(4) = "jumps" +PRINT "Words: "; +FOR i = 0 TO 4 + PRINT words(i); " "; +NEXT i +PRINT "" + +' Array element operations +DIM data(10) AS INTEGER +FOR i = 0 TO 10 + data(i) = (i + 1) * 5 +NEXT i + +' Sum array +total = 0 +FOR i = 0 TO 10 + total = total + data(i) +NEXT i +PRINT "Sum of data: "; total + +' Find max +temp = data(0) +FOR i = 1 TO 10 + IF data(i) > temp THEN temp = data(i) +NEXT i +PRINT "Max of data: "; temp + +' Find min +temp = data(0) +FOR i = 1 TO 10 + IF data(i) < temp THEN temp = data(i) +NEXT i +PRINT "Min of data: "; temp + +' Average +avg = 0.0 +FOR i = 0 TO 10 + avg = avg + data(i) +NEXT i +avg = avg / 11 +PRINT "Average: "; avg + +' REDIM - grow array +DIM growArr(5) AS INTEGER +FOR i = 0 TO 5 + growArr(i) = i * 100 +NEXT i +PRINT "Before REDIM: "; +FOR i = 0 TO 5 + PRINT growArr(i); +NEXT i +PRINT "" + +REDIM growArr(10) AS INTEGER +growArr(7) = 777 +growArr(10) = 1000 +PRINT "After REDIM: "; +FOR i = 0 TO 10 + PRINT growArr(i); +NEXT i +PRINT "" + +' Bubble sort on an array +DIM sortArr(9) AS INTEGER +sortArr(0) = 64 +sortArr(1) = 34 +sortArr(2) = 25 +sortArr(3) = 12 +sortArr(4) = 22 +sortArr(5) = 11 +sortArr(6) = 90 +sortArr(7) = 1 +sortArr(8) = 45 +sortArr(9) = 78 + +PRINT "Before sort: "; +FOR i = 0 TO 9 + PRINT sortArr(i); " "; +NEXT i +PRINT "" + +' Bubble sort +FOR i = 0 TO 8 + FOR j = 0 TO 8 - i + IF sortArr(j) > sortArr(j + 1) THEN + temp = sortArr(j) + sortArr(j) = sortArr(j + 1) + sortArr(j + 1) = temp + END IF + NEXT j +NEXT i + +PRINT "After sort: "; +FOR i = 0 TO 9 + PRINT sortArr(i); " "; +NEXT i +PRINT "" + +PRINT "" + +' ============================================================ +' PART 6: Functions (BYVAL) +' ============================================================ +PRINT "==== PART 6: Functions ====" + +FUNCTION Max(BYVAL a AS INTEGER, BYVAL b AS INTEGER) AS INTEGER + IF a > b THEN + Max = a + ELSE + Max = b + END IF +END FUNCTION + +FUNCTION Min(BYVAL a AS INTEGER, BYVAL b AS INTEGER) AS INTEGER + IF a < b THEN + Min = a + ELSE + Min = b + END IF +END FUNCTION + +FUNCTION Clamp(BYVAL val AS INTEGER, BYVAL lo AS INTEGER, BYVAL hi AS INTEGER) AS INTEGER + IF val < lo THEN + Clamp = lo + ELSEIF val > hi THEN + Clamp = hi + ELSE + Clamp = val + END IF +END FUNCTION + +FUNCTION Abs2(BYVAL n AS INTEGER) AS INTEGER + IF n < 0 THEN + Abs2 = -n + ELSE + Abs2 = n + END IF +END FUNCTION + +FUNCTION Factorial(BYVAL n AS INTEGER) AS INTEGER + IF n <= 1 THEN + Factorial = 1 + ELSE + Factorial = n * Factorial(n - 1) + END IF +END FUNCTION + +FUNCTION Fibonacci(BYVAL n AS INTEGER) AS INTEGER + IF n <= 0 THEN + Fibonacci = 0 + ELSEIF n = 1 THEN + Fibonacci = 1 + ELSE + Fibonacci = Fibonacci(n - 1) + Fibonacci(n - 2) + END IF +END FUNCTION + +FUNCTION IsPrime(BYVAL n AS INTEGER) AS INTEGER + LOCAL d AS INTEGER + IF n < 2 THEN + IsPrime = 0 + EXIT FUNCTION + END IF + IF n = 2 THEN + IsPrime = 1 + EXIT FUNCTION + END IF + IF n MOD 2 = 0 THEN + IsPrime = 0 + EXIT FUNCTION + END IF + d = 3 + WHILE d * d <= n + IF n MOD d = 0 THEN + IsPrime = 0 + EXIT FUNCTION + END IF + d = d + 2 + WEND + IsPrime = 1 +END FUNCTION + +FUNCTION GCD(BYVAL a AS INTEGER, BYVAL b AS INTEGER) AS INTEGER + WHILE b <> 0 + LOCAL t AS INTEGER + t = b + b = a MOD b + a = t + WEND + GCD = a +END FUNCTION + +FUNCTION Power(BYVAL base AS DOUBLE, BYVAL exp AS INTEGER) AS DOUBLE + LOCAL result AS DOUBLE + result = 1.0 + IF exp < 0 THEN + Power = 1.0 / Power(base, -exp) + EXIT FUNCTION + END IF + FOR i = 1 TO exp + result = result * base + NEXT i + Power = result +END FUNCTION + +PRINT "Max(10, 20) = "; Max(10, 20) +PRINT "Min(10, 20) = "; Min(10, 20) +PRINT "Clamp(50, 0, 100) = "; Clamp(50, 0, 100) +PRINT "Clamp(-5, 0, 100) = "; Clamp(-5, 0, 100) +PRINT "Clamp(150, 0, 100) = "; Clamp(150, 0, 100) +PRINT "Abs2(-42) = "; Abs2(-42) +PRINT "Abs2(42) = "; Abs2(42) + +PRINT "Factorials: "; +FOR i = 1 TO 10 + PRINT Factorial(i); +NEXT i +PRINT "" + +PRINT "Fibonacci 0-12: "; +FOR i = 0 TO 12 + PRINT Fibonacci(i); +NEXT i +PRINT "" + +PRINT "Primes up to 50: "; +FOR i = 2 TO 50 + IF IsPrime(i) THEN PRINT i; +NEXT i +PRINT "" + +PRINT "GCD(48, 18) = "; GCD(48, 18) +PRINT "GCD(100, 75) = "; GCD(100, 75) +PRINT "GCD(17, 13) = "; GCD(17, 13) + +PRINT "Power(2, 10) = "; Power(2.0, 10) +PRINT "Power(3, 5) = "; Power(3.0, 5) +PRINT "Power(10, 0) = "; Power(10.0, 0) + +PRINT "" + +' ============================================================ +' PART 7: Functions returning DOUBLE +' ============================================================ +PRINT "==== PART 7: Double Functions ====" + +FUNCTION Distance(BYVAL x1 AS DOUBLE, BYVAL y1 AS DOUBLE, BYVAL x2 AS DOUBLE, BYVAL y2 AS DOUBLE) AS DOUBLE + LOCAL dx AS DOUBLE + LOCAL dy AS DOUBLE + dx = x2 - x1 + dy = y2 - y1 + Distance = SQR(dx * dx + dy * dy) +END FUNCTION + +FUNCTION DegToRad(BYVAL deg AS DOUBLE) AS DOUBLE + DegToRad = deg * 3.14159265358979 / 180.0 +END FUNCTION + +FUNCTION CircleArea(BYVAL r AS DOUBLE) AS DOUBLE + CircleArea = 3.14159265358979 * r * r +END FUNCTION + +FUNCTION CelsiusToFahr(BYVAL c AS DOUBLE) AS DOUBLE + CelsiusToFahr = c * 9.0 / 5.0 + 32.0 +END FUNCTION + +FUNCTION FahrToCelsius(BYVAL f AS DOUBLE) AS DOUBLE + FahrToCelsius = (f - 32.0) * 5.0 / 9.0 +END FUNCTION + +PRINT "Distance (0,0)-(3,4) = "; Distance(0.0, 0.0, 3.0, 4.0) +PRINT "Distance (1,1)-(4,5) = "; Distance(1.0, 1.0, 4.0, 5.0) +PRINT "90 degrees in radians = "; DegToRad(90.0) +PRINT "Circle area r=10 = "; CircleArea(10.0) +PRINT "100C in F = "; CelsiusToFahr(100.0) +PRINT "212F in C = "; FahrToCelsius(212.0) +PRINT "0C in F = "; CelsiusToFahr(0.0) +PRINT "32F in C = "; FahrToCelsius(32.0) + +PRINT "" + +' ============================================================ +' PART 8: Functions returning STRING +' ============================================================ +PRINT "==== PART 8: String Functions ====" + +FUNCTION Repeat$(BYVAL s AS STRING, BYVAL count AS INTEGER) AS STRING + LOCAL result$ AS STRING + LOCAL idx AS INTEGER + result$ = "" + FOR idx = 1 TO count + result$ = result$ + s + NEXT idx + Repeat$ = result$ +END FUNCTION + +FUNCTION Reverse$(BYVAL s AS STRING) AS STRING + LOCAL result$ AS STRING + LOCAL idx AS INTEGER + LOCAL ln AS INTEGER + result$ = "" + ln = LEN(s) + FOR idx = ln TO 1 STEP -1 + result$ = result$ + MID$(s, idx, 1) + NEXT idx + Reverse$ = result$ +END FUNCTION + +FUNCTION PadRight$(BYVAL s AS STRING, BYVAL width AS INTEGER) AS STRING + LOCAL result$ AS STRING + LOCAL idx AS INTEGER + result$ = s + FOR idx = LEN(s) TO width - 1 + result$ = result$ + " " + NEXT idx + PadRight$ = result$ +END FUNCTION + +FUNCTION ReplaceAll$(BYVAL src AS STRING, BYVAL find AS STRING, BYVAL repl AS STRING) AS STRING + LOCAL result$ AS STRING + LOCAL pos AS INTEGER + LOCAL findLen AS INTEGER + result$ = src + findLen = LEN(find) + pos = INSTR(result$, find) + WHILE pos > 0 + result$ = LEFT$(result$, pos - 1) + repl + MID$(result$, pos + findLen) + pos = INSTR(result$, find) + WEND + ReplaceAll$ = result$ +END FUNCTION + +PRINT "Repeat(abc, 4) = "; Repeat$("abc", 4) +PRINT "Repeat(XY, 5) = "; Repeat$("XY", 5) +PRINT "Reverse(Hello) = "; Reverse$("Hello") +PRINT "Reverse(abcdef) = "; Reverse$("abcdef") +PRINT "PadRight(hi, 10) = ["; PadRight$("hi", 10); "]" +PRINT "Replace(aabbcc, bb, XX) = "; ReplaceAll$("aabbcc", "bb", "XX") +PRINT "Replace(abcabc, abc, x) = "; ReplaceAll$("abcabc", "abc", "x") + +PRINT "" + +' ============================================================ +' PART 9: Subroutines with BYREF +' ============================================================ +PRINT "==== PART 9: BYREF ====" + +SUB Swap(BYREF a AS INTEGER, BYREF b AS INTEGER) + LOCAL t AS INTEGER + t = a + a = b + b = t +END SUB + +SUB Increment(BYREF val AS INTEGER, BYVAL amount AS INTEGER) + val = val + amount +END SUB + +SUB Triple(BYREF a AS INTEGER, BYREF b AS INTEGER, BYREF c AS INTEGER) + a = a * 3 + b = b * 3 + c = c * 3 +END SUB + +SUB MinMax(BYVAL a AS INTEGER, BYVAL b AS INTEGER, BYREF outMin AS INTEGER, BYREF outMax AS INTEGER) + IF a < b THEN + outMin = a + outMax = b + ELSE + outMin = b + outMax = a + END IF +END SUB + +DIM p AS INTEGER +DIM q AS INTEGER +DIM r AS INTEGER +DIM mn AS INTEGER +DIM mx AS INTEGER + +p = 100 +q = 200 +PRINT "Before Swap: p="; p; " q="; q +CALL Swap(p, q) +PRINT "After Swap: p="; p; " q="; q + +p = 10 +CALL Increment(p, 5) +PRINT "After Increment(10, 5): "; p +CALL Increment(p, 100) +PRINT "After Increment(15, 100): "; p + +p = 2 +q = 3 +r = 4 +PRINT "Before Triple: "; p; " "; q; " "; r +CALL Triple(p, q, r) +PRINT "After Triple: "; p; " "; q; " "; r + +CALL MinMax(42, 17, mn, mx) +PRINT "MinMax(42, 17): min="; mn; " max="; mx + +CALL MinMax(3, 99, mn, mx) +PRINT "MinMax(3, 99): min="; mn; " max="; mx + +PRINT "" + +' ============================================================ +' PART 10: STATIC variables +' ============================================================ +PRINT "==== PART 10: STATIC ====" + +FUNCTION Counter() AS INTEGER + STATIC c AS INTEGER + c = c + 1 + Counter = c +END FUNCTION + +FUNCTION Accumulator(BYVAL val AS DOUBLE) AS DOUBLE + STATIC sum AS DOUBLE + sum = sum + val + Accumulator = sum +END FUNCTION + +FUNCTION CallTracker(BYVAL name AS STRING) AS INTEGER + STATIC calls AS INTEGER + calls = calls + 1 + PRINT " CallTracker("; name; ") call #"; calls + CallTracker = calls +END FUNCTION + +PRINT "Counter sequence: "; +FOR i = 1 TO 8 + PRINT Counter(); +NEXT i +PRINT "" + +PRINT "Accumulator: "; +x = Accumulator(10.0) +PRINT x; +x = Accumulator(20.0) +PRINT x; +x = Accumulator(30.0) +PRINT x; +x = Accumulator(5.5) +PRINT x; +PRINT "" + +i = CallTracker("first") +i = CallTracker("second") +i = CallTracker("third") + +PRINT "" + +' ============================================================ +' PART 11: Classic BASIC with line numbers +' ============================================================ +PRINT "==== PART 11: Line Numbers ====" + +' Mix of modern and classic +DIM classicVar AS INTEGER +classicVar = 0 + +1000 classicVar = classicVar + 1 +1010 IF classicVar >= 5 THEN GOTO 1030 +1020 GOTO 1000 +1030 PRINT "Classic loop result: "; classicVar + +' GOSUB/RETURN +DIM gosubResult AS INTEGER +gosubResult = 0 + +1100 PRINT "Before GOSUB" +1110 GOSUB 1200 +1120 PRINT "gosubResult = "; gosubResult +1130 GOSUB 1200 +1140 PRINT "gosubResult = "; gosubResult +1150 GOTO 1300 + +1200 gosubResult = gosubResult + 100 +1210 PRINT " In GOSUB, gosubResult = "; gosubResult +1220 RETURN + +1300 PRINT "After all GOSUBs" + +PRINT "" + +' ============================================================ +' PART 12: Complex nested structures +' ============================================================ +PRINT "==== PART 12: Nested Structures ====" + +' Nested loops with conditions +DIM count AS INTEGER +count = 0 +FOR i = 1 TO 10 + FOR j = 1 TO 10 + IF i + j = 10 THEN + PRINT i; "+"; j; "=10 "; + count = count + 1 + END IF + NEXT j +NEXT i +PRINT "" +PRINT "Found "; count; " pairs" + +' FizzBuzz +PRINT "FizzBuzz 1-30:" +FOR i = 1 TO 30 + IF i MOD 15 = 0 THEN + PRINT "FizzBuzz"; + ELSEIF i MOD 3 = 0 THEN + PRINT "Fizz"; + ELSEIF i MOD 5 = 0 THEN + PRINT "Buzz"; + ELSE + PRINT i; + END IF + IF i < 30 THEN PRINT ","; +NEXT i +PRINT "" + +' Pascal's triangle +PRINT "Pascal's triangle (6 rows):" +DIM pascal(10) AS INTEGER +DIM newPascal(10) AS INTEGER +pascal(0) = 1 + +FOR i = 0 TO 5 + ' Print leading spaces + FOR j = 0 TO 4 - i + PRINT " "; + NEXT j + ' Print values + FOR j = 0 TO i + PRINT pascal(j); " "; + NEXT j + PRINT "" + ' Compute next row + newPascal(0) = 1 + FOR j = 1 TO i + newPascal(j) = pascal(j - 1) + pascal(j) + NEXT j + newPascal(i + 1) = 1 + FOR j = 0 TO i + 1 + pascal(j) = newPascal(j) + NEXT j +NEXT i + +' Sieve of Eratosthenes +DIM sieve(100) AS INTEGER +FOR i = 0 TO 100 + sieve(i) = 1 +NEXT i +sieve(0) = 0 +sieve(1) = 0 + +FOR i = 2 TO 10 + IF sieve(i) = 1 THEN + j = i * i + WHILE j <= 100 + sieve(j) = 0 + j = j + i + WEND + END IF +NEXT i + +PRINT "Primes to 100 (sieve): "; +count = 0 +FOR i = 2 TO 100 + IF sieve(i) = 1 THEN + PRINT i; + count = count + 1 + END IF +NEXT i +PRINT "" +PRINT "Count: "; count + +' Collatz sequence +FUNCTION Collatz(BYVAL n AS INTEGER) AS INTEGER + LOCAL steps AS INTEGER + steps = 0 + WHILE n <> 1 + IF n MOD 2 = 0 THEN + n = n / 2 + ELSE + n = n * 3 + 1 + END IF + steps = steps + 1 + WEND + Collatz = steps +END FUNCTION + +PRINT "" +PRINT "Collatz steps:" +FOR i = 1 TO 20 + PRINT " n="; i; " steps="; Collatz(i) +NEXT i + +PRINT "" + +' ============================================================ +' PART 13: SUB with no parameters +' ============================================================ +PRINT "==== PART 13: Parameterless SUB ====" + +SUB PrintSeparator() + PRINT "--------------------" +END SUB + +SUB PrintBanner() + CALL PrintSeparator() + PRINT " BASIC 2 C" + CALL PrintSeparator() +END SUB + +CALL PrintBanner() + +PRINT "" + +' ============================================================ +' PART 14: Edge cases and stress tests +' ============================================================ +PRINT "==== PART 14: Edge Cases ====" + +' Zero iterations +PRINT "Zero-iteration FOR: ["; +FOR i = 10 TO 5 + PRINT "SHOULD NOT PRINT"; +NEXT i +PRINT "]" + +' Single iteration +PRINT "Single-iteration FOR: ["; +FOR i = 5 TO 5 + PRINT i; +NEXT i +PRINT "]" + +' Deeply nested IF +n = 5 +IF n > 0 THEN + IF n > 1 THEN + IF n > 2 THEN + IF n > 3 THEN + IF n > 4 THEN + PRINT "n > 4 (deeply nested)" + END IF + END IF + END IF + END IF +END IF + +' Long expression +x = 1.0 + 2.0 + 3.0 + 4.0 + 5.0 + 6.0 + 7.0 + 8.0 + 9.0 + 10.0 +PRINT "Sum 1..10 (doubles) = "; x + +' Integer overflow test (within int range) +i = 32000 +i = i * 2 +PRINT "32000 * 2 = "; i + +' Negative number operations +i = -10 +PRINT "Abs(-10) = "; Abs2(-10) +PRINT "-(-10) = "; -i + +' Multiple function calls in one expression +i = Max(Min(100, 50), Max(20, 30)) +PRINT "Max(Min(100,50), Max(20,30)) = "; i + +' Function as argument to function +PRINT "Factorial(Max(3, 4)) = "; Factorial(Max(3, 4)) + +' String function chaining +PRINT "UCASE$(LEFT$(hello world, 5)) = "; UCASE$(LEFT$("hello world", 5)) + +PRINT "" + +' ============================================================ +' FINAL SUMMARY +' ============================================================ +PRINT "==== ALL TESTS COMPLETE ====" +PRINT "If you see this, all features work!" diff --git a/test_classic.bas b/test_classic.bas new file mode 100644 index 0000000..6e76e44 --- /dev/null +++ b/test_classic.bas @@ -0,0 +1,17 @@ +' Classic BASIC with line numbers +' Tests GOTO, GOSUB/RETURN, and line-numbered code + +10 DIM x AS INTEGER +20 x = 1 +30 PRINT "Start" +40 GOSUB 100 +50 PRINT "Back from first gosub, x="; x +60 GOSUB 100 +70 PRINT "Back from second gosub, x="; x +80 GOTO 200 +90 PRINT "This should not print" +100 x = x + 10 +110 PRINT "In subroutine, x="; x +120 RETURN +200 PRINT "After GOTO, x="; x +210 PRINT "Classic BASIC done!" diff --git a/test_continue.bas b/test_continue.bas new file mode 100644 index 0000000..e88f7c4 --- /dev/null +++ b/test_continue.bas @@ -0,0 +1,46 @@ +' Test CONTINUE statement +PRINT "==== CONTINUE Test ====" + +' CONTINUE FOR — skip even numbers +PRINT "Odd numbers 1-10:" +DIM i AS INTEGER +FOR i = 1 TO 10 + IF i MOD 2 = 0 THEN CONTINUE FOR + PRINT i; " "; +NEXT i +PRINT "" + +' CONTINUE WHILE — skip multiples of 3 +PRINT "Non-multiples of 3 (1-12):" +DIM w AS INTEGER +w = 0 +WHILE w < 12 + w = w + 1 + IF w MOD 3 = 0 THEN CONTINUE WHILE + PRINT w; " "; +WEND +PRINT "" + +' CONTINUE DO — skip value 5 +PRINT "1-8 without 5:" +DIM d AS INTEGER +d = 0 +DO + d = d + 1 + IF d = 5 THEN CONTINUE DO + PRINT d; " "; +LOOP UNTIL d >= 8 +PRINT "" + +' CONTINUE DO with WHILE form +PRINT "DO WHILE skip 3:" +DIM e AS INTEGER +e = 0 +DO WHILE e < 6 + e = e + 1 + IF e = 3 THEN CONTINUE DO + PRINT e; " "; +LOOP +PRINT "" + +PRINT "CONTINUE test complete!" diff --git a/test_data.bas b/test_data.bas new file mode 100644 index 0000000..b403c1e --- /dev/null +++ b/test_data.bas @@ -0,0 +1,173 @@ +' ============================================================ +' Test program for DATA/READ/RESTORE statements +' All DATA items form a single global pool, read sequentially. +' ============================================================ + +' All DATA statements (pool order matters) +DATA 10, 20, 30 +DATA "Hello", "World", "BASIC" +DATA 42, "Alice", 3.14, "Bob", 100 +DATA -5, -10, -3.14 +DATA 1.5, 2.7, 3.9, 4.1 + +PRINT "==== DATA/READ/RESTORE Tests ====" + +' ---- Test 1: Simple integer DATA/READ ---- +PRINT "" +PRINT "---- Test 1: Integer DATA/READ ----" +DIM a AS INTEGER +DIM b AS INTEGER +DIM c AS INTEGER +READ a, b, c +PRINT "Read: "; a; " "; b; " "; c + +' ---- Test 2: String DATA/READ ---- +PRINT "" +PRINT "---- Test 2: String DATA/READ ----" +DIM s1$ AS STRING +DIM s2$ AS STRING +DIM s3$ AS STRING +READ s1$, s2$, s3$ +PRINT "Read: "; s1$; " "; s2$; " "; s3$ + +' ---- Test 3: Mixed types ---- +PRINT "" +PRINT "---- Test 3: Mixed types ----" +DIM n AS INTEGER +DIM name$ AS STRING +DIM pi AS DOUBLE +DIM name2$ AS STRING +DIM m AS INTEGER +READ n, name$, pi, name2$, m +PRINT "Int: "; n +PRINT "Str: "; name$ +PRINT "Dbl: "; pi +PRINT "Str: "; name2$ +PRINT "Int: "; m + +' ---- Test 4: Negative numbers ---- +PRINT "" +PRINT "---- Test 4: Negative numbers ----" +DIM neg1 AS INTEGER +DIM neg2 AS INTEGER +DIM neg3 AS DOUBLE +READ neg1, neg2, neg3 +PRINT "Negatives: "; neg1; " "; neg2; " "; neg3 + +' ---- Test 5: Double values ---- +PRINT "" +PRINT "---- Test 5: Double values ----" +DIM f1 AS DOUBLE +DIM f2 AS DOUBLE +DIM f3 AS DOUBLE +DIM f4 AS DOUBLE +READ f1, f2, f3, f4 +PRINT "Doubles: "; f1; " "; f2; " "; f3; " "; f4 + +' ---- Test 6: RESTORE (reset to beginning) ---- +PRINT "" +PRINT "---- Test 6: RESTORE ----" +RESTORE +DIM ra AS INTEGER +DIM rb AS INTEGER +DIM rc AS INTEGER +READ ra, rb, rc +PRINT "After RESTORE: "; ra; " "; rb; " "; rc + +' ---- Test 7: READ in a loop ---- +PRINT "" +PRINT "---- Test 7: READ in loop ----" +RESTORE +DIM val AS INTEGER +DIM i AS INTEGER +PRINT "First 3 via loop: "; +FOR i = 1 TO 3 + READ val + PRINT val; " "; +NEXT i +PRINT "" + +' ---- Test 8: RESTORE then skip with multiple READs ---- +PRINT "" +PRINT "---- Test 8: Skip and read ----" +RESTORE +' Skip first 3 integers +DIM skip AS INTEGER +READ skip, skip, skip +' Now read the 3 strings +DIM rs1$ AS STRING +DIM rs2$ AS STRING +DIM rs3$ AS STRING +READ rs1$, rs2$, rs3$ +PRINT "Skipped to strings: "; rs1$; " "; rs2$; " "; rs3$ + +' ---- Test 9: RESTORE with line number ---- +PRINT "" +PRINT "---- Test 9: RESTORE with line number ----" +9000 DATA 111, 222, 333 +9010 DATA 444, 555, 666 +RESTORE 9010 +DIM r1 AS INTEGER +DIM r2 AS INTEGER +DIM r3 AS INTEGER +READ r1, r2, r3 +PRINT "RESTORE 9010: "; r1; " "; r2; " "; r3 + +RESTORE 9000 +READ r1, r2, r3 +PRINT "RESTORE 9000: "; r1; " "; r2; " "; r3 + +' ---- Test 10: Typed variables ---- +PRINT "" +PRINT "---- Test 10: Typed variables ----" +DATA 200, 1000, 100000, 1.5 +RESTORE +' Skip to the typed-variable DATA at the end. +' The pool has: 10,20,30, Hello,World,BASIC, 42,Alice,3.14,Bob,100, +' -5,-10,-3.14, 1.5,2.7,3.9,4.1, 111,222,333, 444,555,666, 200,1000,100000,1.5 +' Items 23-26 are 200,1000,100000,1.5 +' Use a known restore point instead: just re-read after RESTORE +' Actually, let's just read in order after a targeted DATA +9020 DATA 255, 32000, 100000, 2.5 +RESTORE 9020 +DIM bval AS BYTE +DIM ival AS INTEGER +DIM lval AS LONG +DIM fval AS FLOAT +READ bval, ival, lval, fval +PRINT "BYTE: "; bval +PRINT "INTEGER: "; ival +PRINT "LONG: "; lval +PRINT "FLOAT: "; fval + +' ---- Test 11: Re-read same data ---- +PRINT "" +PRINT "---- Test 11: Re-read same data ----" +RESTORE 9020 +READ bval, ival, lval, fval +PRINT "Re-read BYTE: "; bval +PRINT "Re-read INTEGER: "; ival + +' ---- Test 12: RESTORE with named label ---- +PRINT "" +PRINT "---- Test 12: RESTORE with named label ----" +myData: DATA 777, 888, 999 +moreData: DATA 50, 60, 70 +RESTORE myData +DIM nd1 AS INTEGER +DIM nd2 AS INTEGER +DIM nd3 AS INTEGER +READ nd1, nd2, nd3 +PRINT "RESTORE myData: "; nd1; " "; nd2; " "; nd3 + +RESTORE moreData +READ nd1, nd2, nd3 +PRINT "RESTORE moreData: "; nd1; " "; nd2; " "; nd3 + +' Re-read from named label again +RESTORE myData +READ nd1 +PRINT "Re-read first from myData: "; nd1 + +PRINT "" +PRINT "==== ALL DATA/READ/RESTORE TESTS COMPLETE ====" diff --git a/test_fileio.bas b/test_fileio.bas new file mode 100644 index 0000000..a453fb1 --- /dev/null +++ b/test_fileio.bas @@ -0,0 +1,190 @@ +' ============================================================ +' Test program for File I/O features +' OPEN, CLOSE, PRINT #, INPUT #, LINE INPUT #, WRITE #, +' EOF(), LOF(), FREEFILE() +' ============================================================ + +PRINT "==== File I/O Tests ====" + +' ---- Test 1: Basic OPEN/PRINT #/CLOSE (write a file) ---- +PRINT "" +PRINT "---- Test 1: Write file with PRINT # ----" +OPEN "/tmp/basic_test1.txt" FOR OUTPUT AS #1 +PRINT #1, "Hello, File World!" +PRINT #1, "Line two" +PRINT #1, "Line three" +CLOSE #1 +PRINT "Wrote 3 lines to /tmp/basic_test1.txt" + +' ---- Test 2: Read file back with LINE INPUT # ---- +PRINT "" +PRINT "---- Test 2: Read file with LINE INPUT # ----" +DIM line$ AS STRING +OPEN "/tmp/basic_test1.txt" FOR INPUT AS #1 +DIM lineCount AS INTEGER +lineCount = 0 +DO WHILE NOT EOF(1) + LINE INPUT #1, line$ + lineCount = lineCount + 1 + PRINT "Line "; lineCount; ": "; line$ +LOOP +CLOSE #1 +PRINT "Read "; lineCount; " lines" + +' ---- Test 3: FREEFILE ---- +PRINT "" +PRINT "---- Test 3: FREEFILE ----" +DIM f AS INTEGER +f = FREEFILE() +PRINT "First free file number: "; f +OPEN "/tmp/basic_test2.txt" FOR OUTPUT AS #f +PRINT #f, "Written using FREEFILE" +CLOSE #f +PRINT "Wrote using file #"; f + +' Read it back to verify +OPEN "/tmp/basic_test2.txt" FOR INPUT AS #1 +LINE INPUT #1, line$ +CLOSE #1 +PRINT "Read back: "; line$ + +' ---- Test 4: Numeric data with PRINT # and INPUT # ---- +PRINT "" +PRINT "---- Test 4: Numeric I/O ----" +DIM x AS DOUBLE +DIM y AS DOUBLE +DIM n AS INTEGER +x = 3.14159 +y = 2.71828 +n = 42 + +OPEN "/tmp/basic_test3.txt" FOR OUTPUT AS #1 +PRINT #1, x +PRINT #1, y +PRINT #1, n +CLOSE #1 + +DIM rx AS DOUBLE +DIM ry AS DOUBLE +DIM rn AS INTEGER +OPEN "/tmp/basic_test3.txt" FOR INPUT AS #1 +INPUT #1, rx +INPUT #1, ry +INPUT #1, rn +CLOSE #1 +PRINT "Read x = "; rx +PRINT "Read y = "; ry +PRINT "Read n = "; rn + +' ---- Test 5: WRITE # (CSV-style output) ---- +PRINT "" +PRINT "---- Test 5: WRITE # (CSV) ----" +DIM name$ AS STRING +DIM age AS INTEGER +DIM score AS DOUBLE +name$ = "Alice" +age = 30 +score = 95.5 + +OPEN "/tmp/basic_test4.csv" FOR OUTPUT AS #1 +WRITE #1, name$, age, score +name$ = "Bob" +age = 25 +score = 88.3 +WRITE #1, name$, age, score +CLOSE #1 + +' Read back and display +OPEN "/tmp/basic_test4.csv" FOR INPUT AS #1 +DO WHILE NOT EOF(1) + LINE INPUT #1, line$ + PRINT "CSV: "; line$ +LOOP +CLOSE #1 + +' ---- Test 6: LOF (file length) ---- +PRINT "" +PRINT "---- Test 6: LOF ----" +OPEN "/tmp/basic_test1.txt" FOR INPUT AS #1 +DIM fsize AS LONG +fsize = LOF(1) +PRINT "File size of test1.txt: "; fsize; " bytes" +CLOSE #1 + +' ---- Test 7: APPEND mode ---- +PRINT "" +PRINT "---- Test 7: APPEND mode ----" +OPEN "/tmp/basic_test1.txt" FOR APPEND AS #1 +PRINT #1, "Appended line four" +PRINT #1, "Appended line five" +CLOSE #1 + +' Read all lines back +OPEN "/tmp/basic_test1.txt" FOR INPUT AS #1 +lineCount = 0 +DO WHILE NOT EOF(1) + LINE INPUT #1, line$ + lineCount = lineCount + 1 + PRINT "Line "; lineCount; ": "; line$ +LOOP +CLOSE #1 +PRINT "Total lines after append: "; lineCount + +' ---- Test 8: PRINT # with semicolons (no newline) ---- +PRINT "" +PRINT "---- Test 8: PRINT # with separators ----" +OPEN "/tmp/basic_test5.txt" FOR OUTPUT AS #1 +PRINT #1, "A"; "B"; "C" +PRINT #1, 10; " "; 20; " "; 30 +CLOSE #1 + +OPEN "/tmp/basic_test5.txt" FOR INPUT AS #1 +DO WHILE NOT EOF(1) + LINE INPUT #1, line$ + PRINT " "; line$ +LOOP +CLOSE #1 + +' ---- Test 9: Multiple files open simultaneously ---- +PRINT "" +PRINT "---- Test 9: Multiple files ----" +OPEN "/tmp/basic_multi1.txt" FOR OUTPUT AS #1 +OPEN "/tmp/basic_multi2.txt" FOR OUTPUT AS #2 +PRINT #1, "File one content" +PRINT #2, "File two content" +CLOSE #2 +CLOSE #1 + +OPEN "/tmp/basic_multi1.txt" FOR INPUT AS #1 +LINE INPUT #1, line$ +PRINT "File 1: "; line$ +CLOSE #1 + +OPEN "/tmp/basic_multi2.txt" FOR INPUT AS #2 +LINE INPUT #2, line$ +PRINT "File 2: "; line$ +CLOSE #2 + +' ---- Test 10: File I/O in a FUNCTION ---- +PRINT "" +PRINT "---- Test 10: File I/O in FUNCTION ----" + +FUNCTION CountLines(BYVAL fname AS STRING) AS INTEGER + LOCAL count AS INTEGER + LOCAL buf$ AS STRING + count = 0 + OPEN fname FOR INPUT AS #3 + DO WHILE NOT EOF(3) + LINE INPUT #3, buf$ + count = count + 1 + LOOP + CLOSE #3 + CountLines = count +END FUNCTION + +DIM cnt AS INTEGER +cnt = CountLines("/tmp/basic_test1.txt") +PRINT "CountLines result: "; cnt + +PRINT "" +PRINT "==== ALL FILE I/O TESTS COMPLETE ====" diff --git a/test_inc_b.bas b/test_inc_b.bas new file mode 100644 index 0000000..015feb2 --- /dev/null +++ b/test_inc_b.bas @@ -0,0 +1,6 @@ +' Level B - includes C +'$INCLUDE: 'test_inc_c.bas' + +SUB FromB() + PRINT "Hello from file B (depth 2)" +END SUB diff --git a/test_inc_c.bas b/test_inc_c.bas new file mode 100644 index 0000000..ca0f842 --- /dev/null +++ b/test_inc_c.bas @@ -0,0 +1,6 @@ +' Level C - deepest include +CONST DEPTH_C = 3 + +SUB FromC() + PRINT "Hello from file C (depth 3)" +END SUB diff --git a/test_include.bas b/test_include.bas new file mode 100644 index 0000000..c5d01f1 --- /dev/null +++ b/test_include.bas @@ -0,0 +1,23 @@ +' Test $INCLUDE metacommand +PRINT "==== $INCLUDE Test ====" + +' Include a library file +'$INCLUDE: 'test_include_lib.bas' + +PRINT "Library version: "; LIB_VERSION + +' Call functions from included file +LibGreet "World" +PRINT "3 + 4 = "; LibAdd(3, 4) + +' Include a file that itself has no further includes +'$INCLUDE: 'test_include_nested.bas' +NestedGreet + +' Test nested include chain: this includes B which includes C +'$INCLUDE: 'test_inc_b.bas' +FromB +FromC +PRINT "Depth C constant: "; DEPTH_C + +PRINT "Include test complete!" diff --git a/test_include_lib.bas b/test_include_lib.bas new file mode 100644 index 0000000..b2fe160 --- /dev/null +++ b/test_include_lib.bas @@ -0,0 +1,10 @@ +' Library file for $INCLUDE testing +CONST LIB_VERSION = 1 + +SUB LibGreet(name AS STRING) + PRINT "Hello from library, "; name; "!" +END SUB + +FUNCTION LibAdd(a AS DOUBLE, b AS DOUBLE) AS DOUBLE + LibAdd = a + b +END FUNCTION diff --git a/test_include_nested.bas b/test_include_nested.bas new file mode 100644 index 0000000..e07b25f --- /dev/null +++ b/test_include_nested.bas @@ -0,0 +1,4 @@ +' Nested include file (no further includes) +SUB NestedGreet() + PRINT "Hello from nested include!" +END SUB diff --git a/test_labels.bas b/test_labels.bas new file mode 100644 index 0000000..7b4c1fa --- /dev/null +++ b/test_labels.bas @@ -0,0 +1,93 @@ +' ============================================================ +' Test program for named labels with GOTO and GOSUB +' ============================================================ + +PRINT "==== Named Label Tests ====" + +' ---- Test 1: Simple GOTO with named label ---- +PRINT "" +PRINT "---- Test 1: GOTO named label ----" +GOTO skipSection +PRINT "ERROR: This should be skipped" +skipSection: +PRINT "Jumped to skipSection" + +' ---- Test 2: GOSUB with named label ---- +PRINT "" +PRINT "---- Test 2: GOSUB named label ----" +GOSUB mySubroutine +PRINT "Returned from GOSUB" +GOTO afterSub + +mySubroutine: +PRINT "Inside mySubroutine" +RETURN + +afterSub: + +' ---- Test 3: Labels with underscores ---- +PRINT "" +PRINT "---- Test 3: Labels with underscores ----" +GOTO my_label_here +PRINT "ERROR: should not print" +my_label_here: +PRINT "Reached my_label_here" + +' ---- Test 4: Mixed numeric and named labels ---- +PRINT "" +PRINT "---- Test 4: Mixed numeric and named ----" +GOTO 100 +PRINT "ERROR: should not print" +100 PRINT "At line 100" +GOTO namedAfter100 +PRINT "ERROR: should not print" +namedAfter100: +PRINT "At namedAfter100" + +' ---- Test 5: Label with statement on same line ---- +PRINT "" +PRINT "---- Test 5: Label with trailing statement ----" +GOTO inlineLabel +PRINT "ERROR: should not print" +inlineLabel: PRINT "Statement on same line as label" + +' ---- Test 6: GOSUB to named label, then GOTO past RETURN ---- +PRINT "" +PRINT "---- Test 6: GOSUB named + flow control ----" +DIM count AS INTEGER +count = 0 +loopStart: +count = count + 1 +IF count <= 3 THEN + GOSUB doWork + GOTO loopStart +END IF +PRINT "Loop done, count = "; count +GOTO afterWork + +doWork: +PRINT " Working, count = "; count +RETURN + +afterWork: + +' ---- Test 7: Multiple GOSUBs to different named labels ---- +PRINT "" +PRINT "---- Test 7: Multiple named GOSUBs ----" +GOSUB subA +GOSUB subB +GOSUB subA +GOTO afterSubs + +subA: +PRINT "In subA" +RETURN + +subB: +PRINT "In subB" +RETURN + +afterSubs: + +PRINT "" +PRINT "==== ALL NAMED LABEL TESTS COMPLETE ====" diff --git a/test_multidim.bas b/test_multidim.bas new file mode 100644 index 0000000..649ff2b --- /dev/null +++ b/test_multidim.bas @@ -0,0 +1,176 @@ +' ============================================================ +' Test program for multidimensional arrays +' ============================================================ + +PRINT "==== Multidimensional Array Tests ====" + +' ---- Test 1: 2D integer array ---- +PRINT "" +PRINT "---- Test 1: 2D integer array ----" +DIM matrix(3, 4) AS INTEGER +DIM i AS INTEGER +DIM j AS INTEGER + +' Fill with i*10 + j +FOR i = 0 TO 3 + FOR j = 0 TO 4 + matrix(i, j) = i * 10 + j + NEXT j +NEXT i + +' Read back specific elements +PRINT "matrix(0,0) = "; matrix(0, 0) +PRINT "matrix(1,2) = "; matrix(1, 2) +PRINT "matrix(3,4) = "; matrix(3, 4) +PRINT "matrix(2,3) = "; matrix(2, 3) + +' ---- Test 2: 2D string array ---- +PRINT "" +PRINT "---- Test 2: 2D string array ----" +DIM grid$(2, 2) AS STRING +grid$(0, 0) = "NW" +grid$(0, 1) = "N" +grid$(0, 2) = "NE" +grid$(1, 0) = "W" +grid$(1, 1) = "C" +grid$(1, 2) = "E" +grid$(2, 0) = "SW" +grid$(2, 1) = "S" +grid$(2, 2) = "SE" + +FOR i = 0 TO 2 + FOR j = 0 TO 2 + PRINT grid$(i, j); " "; + NEXT j + PRINT "" +NEXT i + +' ---- Test 3: 3D array ---- +PRINT "" +PRINT "---- Test 3: 3D array ----" +DIM cube(2, 3, 4) AS INTEGER +DIM k AS INTEGER + +' Fill with i*100 + j*10 + k +FOR i = 0 TO 2 + FOR j = 0 TO 3 + FOR k = 0 TO 4 + cube(i, j, k) = i * 100 + j * 10 + k + NEXT k + NEXT j +NEXT i + +PRINT "cube(0,0,0) = "; cube(0, 0, 0) +PRINT "cube(1,2,3) = "; cube(1, 2, 3) +PRINT "cube(2,3,4) = "; cube(2, 3, 4) +PRINT "cube(2,0,1) = "; cube(2, 0, 1) + +' ---- Test 4: 2D with expressions ---- +PRINT "" +PRINT "---- Test 4: 2D with expressions ----" +DIM r AS INTEGER +DIM c AS INTEGER +r = 5 +c = 3 +DIM table(r, c) AS DOUBLE +table(1, 1) = 3.14 +table(4, 2) = 2.71 +table(r, c) = 9.99 +PRINT "table(1,1) = "; table(1, 1) +PRINT "table(4,2) = "; table(4, 2) +PRINT "table(5,3) = "; table(r, c) + +' ---- Test 5: Loop over 2D array ---- +PRINT "" +PRINT "---- Test 5: Loop over 2D ----" +DIM small(2, 3) AS INTEGER +DIM sum AS INTEGER +sum = 0 +FOR i = 0 TO 2 + FOR j = 0 TO 3 + small(i, j) = (i + 1) * (j + 1) + sum = sum + small(i, j) + NEXT j +NEXT i +PRINT "Sum of multiplication table: "; sum + +' Print the table +FOR i = 0 TO 2 + FOR j = 0 TO 3 + PRINT small(i, j); " "; + NEXT j + PRINT "" +NEXT i + +' ---- Test 6: REDIM 2D ---- +PRINT "" +PRINT "---- Test 6: REDIM 2D ----" +DIM resizable(2, 2) AS INTEGER +resizable(0, 0) = 1 +resizable(1, 1) = 5 +resizable(2, 2) = 9 +PRINT "Before REDIM: "; resizable(1, 1) +REDIM resizable(4, 4) AS INTEGER +PRINT "After REDIM (0,0): "; resizable(0, 0) +resizable(3, 3) = 42 +PRINT "After REDIM (3,3): "; resizable(3, 3) + +' ---- Test 7: Mixed 1D and 2D ---- +PRINT "" +PRINT "---- Test 7: Mixed 1D and 2D ----" +DIM arr1d(5) AS INTEGER +DIM arr2d(3, 3) AS INTEGER +FOR i = 0 TO 5 + arr1d(i) = i * 2 +NEXT i +FOR i = 0 TO 3 + FOR j = 0 TO 3 + arr2d(i, j) = arr1d(i) + arr1d(j) + NEXT j +NEXT i +PRINT "arr1d(3) = "; arr1d(3) +PRINT "arr2d(2,3) = "; arr2d(2, 3) +PRINT "arr2d(1,2) = "; arr2d(1, 2) + +' ---- Test 8: 2D array in SUB (BYREF) ---- +PRINT "" +PRINT "---- Test 8: 2D array element BYREF ----" +DIM val AS INTEGER +val = 100 +DIM testArr(3, 3) AS INTEGER +testArr(1, 2) = 50 +GOSUB addTen +PRINT "val after GOSUB: "; val +GOTO afterAddTen + +addTen: +val = val + 10 +RETURN + +afterAddTen: + +' ---- Test 9: 2D DOUBLE array ---- +PRINT "" +PRINT "---- Test 9: 2D DOUBLE array ----" +DIM dbl2d(2, 2) AS DOUBLE +dbl2d(0, 0) = 1.1 +dbl2d(0, 1) = 2.2 +dbl2d(0, 2) = 3.3 +dbl2d(1, 0) = 4.4 +dbl2d(1, 1) = 5.5 +dbl2d(1, 2) = 6.6 +dbl2d(2, 0) = 7.7 +dbl2d(2, 1) = 8.8 +dbl2d(2, 2) = 9.9 + +DIM dsum AS DOUBLE +dsum = 0 +FOR i = 0 TO 2 + FOR j = 0 TO 2 + dsum = dsum + dbl2d(i, j) + NEXT j +NEXT i +PRINT "Sum of doubles: "; dsum + +PRINT "" +PRINT "==== ALL MULTIDIMENSIONAL TESTS COMPLETE ====" diff --git a/test_newfeatures.bas b/test_newfeatures.bas new file mode 100644 index 0000000..d64508f --- /dev/null +++ b/test_newfeatures.bas @@ -0,0 +1,192 @@ +' Test file for all new features +' ============================================================ + +' === CONST === +PRINT "==== CONST ====" +CONST PI = 3.14159 +CONST MAX_SIZE = 100 +CONST GREETING$ = "Hello" +PRINT "PI = "; PI +PRINT "MAX_SIZE = "; MAX_SIZE +PRINT "GREETING$ = "; GREETING$ + +' === New Math Functions === +PRINT "" +PRINT "==== Math Functions ====" +PRINT "TAN(0.7854) = "; TAN(0.7854) +PRINT "ATN(1) = "; ATN(1) +PRINT "LOG(2.71828) = "; LOG(2.71828) +PRINT "EXP(1) = "; EXP(1) +PRINT "SGN(-5) = "; SGN(-5) +PRINT "SGN(0) = "; SGN(0) +PRINT "SGN(42) = "; SGN(42) + +' === RND / RANDOMIZE === +PRINT "" +PRINT "==== RND / RANDOMIZE ====" +RANDOMIZE 12345 +DIM r AS DOUBLE +r = RND +PRINT "RND (with seed 12345): "; r +r = RND() +PRINT "RND() again: "; r +RANDOMIZE +PRINT "RANDOMIZE (time-based) done" + +' === New String Functions === +PRINT "" +PRINT "==== String Functions ====" +PRINT "LTRIM$(' hello') = '"; LTRIM$(" hello"); "'" +PRINT "RTRIM$('hello ') = '"; RTRIM$("hello "); "'" +PRINT "TRIM$(' hello ') = '"; TRIM$(" hello "); "'" +PRINT "SPACE$(5) = '"; SPACE$(5); "'" +PRINT "HEX$(255) = "; HEX$(255) +PRINT "HEX$(16) = "; HEX$(16) +PRINT "OCT$(8) = "; OCT$(8) +PRINT "OCT$(255) = "; OCT$(255) +PRINT "STRING$(5, '*') = "; STRING$(5, "*") +PRINT "STRING$(3, 'AB') = "; STRING$(3, "AB") + +' === SWAP === +PRINT "" +PRINT "==== SWAP ====" +DIM a AS INTEGER +DIM b AS INTEGER +a = 10 +b = 20 +PRINT "Before: a="; a; " b="; b +SWAP a, b +PRINT "After: a="; a; " b="; b + +DIM s1 AS STRING +DIM s2 AS STRING +s1 = "first" +s2 = "second" +PRINT "Before: s1="; s1; " s2="; s2 +SWAP s1, s2 +PRINT "After: s1="; s1; " s2="; s2 + +' === LBOUND / UBOUND === +PRINT "" +PRINT "==== LBOUND / UBOUND ====" +DIM arr(10) AS INTEGER +PRINT "LBOUND(arr) = "; LBOUND(arr) +PRINT "UBOUND(arr) = "; UBOUND(arr) + +' === Bitwise Operators === +PRINT "" +PRINT "==== Bitwise Operators ====" +DIM x AS INTEGER +DIM y AS INTEGER +x = 15 +y = 9 +PRINT "15 AND 9 = "; x AND y +PRINT "15 OR 9 = "; x OR y +PRINT "15 XOR 9 = "; x XOR y +PRINT "NOT 0 = "; NOT 0 +PRINT "NOT -1 = "; NOT -1 + +' Logical-style usage (with comparisons) +DIM c AS INTEGER +c = 5 +IF c > 3 AND c < 10 THEN + PRINT "5 is between 3 and 10" +END IF +IF c > 10 OR c < 6 THEN + PRINT "5 > 10 OR 5 < 6 is TRUE" +END IF +IF NOT (c = 10) THEN + PRINT "NOT (5 = 10) is TRUE" +END IF + +' === SELECT CASE === +PRINT "" +PRINT "==== SELECT CASE ====" + +DIM grade AS INTEGER +grade = 85 +SELECT CASE grade + CASE 90 TO 100 + PRINT "Grade A" + CASE 80 TO 89 + PRINT "Grade B" + CASE 70 TO 79 + PRINT "Grade C" + CASE ELSE + PRINT "Grade F" +END SELECT + +DIM val AS INTEGER +val = 3 +SELECT CASE val + CASE 1 + PRINT "One" + CASE 2, 3 + PRINT "Two or Three" + CASE IS > 5 + PRINT "Greater than 5" + CASE ELSE + PRINT "Something else" +END SELECT + +' String SELECT CASE +DIM color AS STRING +color = "red" +SELECT CASE color + CASE "red" + PRINT "Color is RED" + CASE "blue" + PRINT "Color is BLUE" + CASE ELSE + PRINT "Unknown color" +END SELECT + +' === ON GOTO === +PRINT "" +PRINT "==== ON GOTO ====" +DIM choice AS INTEGER +choice = 2 +ON choice GOTO opt1, opt2, opt3 +GOTO skipOpts +opt1: +PRINT "Option 1" +GOTO skipOpts +opt2: +PRINT "Option 2" +GOTO skipOpts +opt3: +PRINT "Option 3" +skipOpts: + +' === ON GOSUB === +PRINT "" +PRINT "==== ON GOSUB ====" +choice = 1 +ON choice GOSUB sub1, sub2, sub3 +choice = 3 +ON choice GOSUB sub1, sub2, sub3 +GOTO skipSubs +sub1: +PRINT "Subroutine 1" +RETURN +sub2: +PRINT "Subroutine 2" +RETURN +sub3: +PRINT "Subroutine 3" +RETURN +skipSubs: + +' === MID$ Assignment === +PRINT "" +PRINT "==== MID$ Assignment ====" +DIM msg AS STRING +msg = "Hello World" +PRINT "Before: "; msg +MID$(msg, 7, 5) = "BASIC" +PRINT "After: "; msg +MID$(msg, 1, 5) = "Howdy" +PRINT "After2: "; msg + +PRINT "" +PRINT "All new feature tests complete!" diff --git a/test_redim.bas b/test_redim.bas new file mode 100644 index 0000000..fa6cf6c --- /dev/null +++ b/test_redim.bas @@ -0,0 +1,70 @@ +' Test dynamic arrays and REDIM +DIM arr(5) AS INTEGER +DIM i AS INTEGER + +' Fill initial array +FOR i = 0 TO 5 + arr(i) = i * 10 +NEXT i + +PRINT "Before REDIM:" +FOR i = 0 TO 5 + PRINT arr(i); +NEXT i +PRINT "" + +' Resize array larger +REDIM arr(10) AS INTEGER +arr(8) = 88 +arr(10) = 100 + +PRINT "After REDIM:" +FOR i = 0 TO 10 + PRINT arr(i); +NEXT i +PRINT "" + +' Test string array +DIM names(3) AS STRING +names(0) = "Alice" +names(1) = "Bob" +names(2) = "Charlie" +names(3) = "Diana" + +PRINT "Names:" +FOR i = 0 TO 3 + PRINT names(i) +NEXT i + +' Test FOR with STEP +PRINT "Even numbers 0 to 10:" +FOR i = 0 TO 10 STEP 2 + PRINT i; +NEXT i +PRINT "" + +' Test negative step +PRINT "Countdown by 2:" +FOR i = 10 TO 0 STEP -2 + PRINT i; +NEXT i +PRINT "" + +' Test integer division and MOD +DIM a AS INTEGER +DIM b AS INTEGER +a = 17 +b = 5 +PRINT "17 \ 5 = "; a \ b +PRINT "17 MOD 5 = "; a MOD b + +' Test nested loops with EXIT +PRINT "Nested loop with EXIT FOR:" +DIM j AS INTEGER +FOR i = 1 TO 5 + FOR j = 1 TO 5 + IF j = 3 THEN EXIT FOR + PRINT i * 10 + j; + NEXT j + PRINT ""; +NEXT i diff --git a/test_types.bas b/test_types.bas new file mode 100644 index 0000000..0de96fb --- /dev/null +++ b/test_types.bas @@ -0,0 +1,338 @@ +' ============================================================ +' Test program for extended data types +' BYTE -> uint8_t, INTEGER -> int16_t, LONG -> int32_t, +' FLOAT -> float, DOUBLE -> double +' ============================================================ + +' ---- BYTE (uint8_t): range 0 to 255 ---- +PRINT "==== BYTE (uint8_t) ====" +DIM b1 AS BYTE +DIM b2 AS BYTE + +b1 = 100 +b2 = 200 +PRINT "100 + 200 (byte) = "; b1 + b2 + +b1 = 255 +PRINT "Max uint8_t = "; b1 +b2 = b1 + 1 +PRINT "255 + 1 overflows to "; b2 + +' BYTE arithmetic +b1 = 50 +b2 = 30 +PRINT "50 - 30 = "; b1 - b2 +PRINT "50 * 3 = "; b1 * 3 + +' BYTE in array +DIM byteArr(3) AS BYTE +DIM bi AS BYTE +FOR bi = 0 TO 3 + byteArr(bi) = bi * 10 +NEXT bi +PRINT "BYTE array: "; +FOR bi = 0 TO 3 + PRINT byteArr(bi); " "; +NEXT bi +PRINT "" + +' BYTE + INTEGER promotion +DIM bv AS BYTE +DIM iv AS INTEGER +bv = 100 +iv = 1000 +PRINT "byte + int16: "; bv + iv + +' BYTE function +FUNCTION ByteMax(BYVAL x AS BYTE, BYVAL y AS BYTE) AS BYTE + IF x > y THEN + ByteMax = x + ELSE + ByteMax = y + END IF +END FUNCTION + +PRINT "ByteMax(100, 200) = "; ByteMax(100, 200) + +' ---- INTEGER (int16_t): range -32768 to 32767 ---- +PRINT "" +PRINT "==== INTEGER (int16_t) ====" +DIM i AS INTEGER +DIM j AS INTEGER + +i = 100 +j = 200 +PRINT "100 + 200 = "; i + j + +i = 32000 +PRINT "32000 as INTEGER = "; i + +' Demonstrate int16_t overflow wrapping +i = 32767 +PRINT "Max int16_t = "; i +j = i + 1 +PRINT "32767 + 1 overflows to "; j + +' Integer suffix % +DIM count% AS INTEGER +count% = 42 +PRINT "count% = "; count% + +' ---- LONG (int32_t): range -2147483648 to 2147483647 ---- +PRINT "" +PRINT "==== LONG (int32_t) ====" +DIM a AS LONG +DIM b AS LONG +DIM big AS LONG + +a = 100000 +b = 200000 +PRINT "100000 + 200000 = "; a + b + +big = 1000000 +big = big * 1000 +PRINT "1000000 * 1000 = "; big + +' LONG can hold values far beyond int16_t range +big = 32767 +big = big * 100 +PRINT "32767 * 100 = "; big + +' Arithmetic with LONGs +a = 123456 +b = 789012 +PRINT "123456 + 789012 = "; a + b +PRINT "789012 - 123456 = "; b - a +PRINT "123456 * 2 = "; a * 2 + +' LONG division and modulo +a = 1000000 +b = 7 +PRINT "1000000 \\ 7 = "; a \ b +PRINT "1000000 MOD 7 = "; a MOD b + +' ---- FLOAT (float: ~7 digits precision) ---- +PRINT "" +PRINT "==== FLOAT ====" +DIM f1 AS FLOAT +DIM f2 AS FLOAT +DIM fResult AS FLOAT + +f1 = 3.14 +f2 = 2.71 +PRINT "3.14 + 2.71 = "; f1 + f2 +PRINT "3.14 * 2.71 = "; f1 * f2 +PRINT "3.14 / 2.71 = "; f1 / f2 + +' Float suffix ! +DIM pi! AS FLOAT +pi! = 3.14159 +PRINT "pi! = "; pi! + +' Float precision test +f1 = 1.0 +f2 = 3.0 +fResult = f1 / f2 +PRINT "1.0 / 3.0 (float) = "; fResult + +f1 = 123456.789 +PRINT "123456.789 as float = "; f1 + +' ---- DOUBLE (double: ~15 digits precision) ---- +PRINT "" +PRINT "==== DOUBLE ====" +DIM d1 AS DOUBLE +DIM d2 AS DOUBLE +DIM dResult AS DOUBLE + +d1 = 3.14159265358979 +d2 = 2.71828182845904 +PRINT "pi = "; d1 +PRINT "e = "; d2 +PRINT "pi + e = "; d1 + d2 +PRINT "pi * e = "; d1 * d2 + +' Double suffix # +DIM exact# AS DOUBLE +exact# = 1.0 / 3.0 +PRINT "1/3 (double) = "; exact# + +' ---- Type promotion in expressions ---- +PRINT "" +PRINT "==== Type Promotion ====" + +' INTEGER + LONG -> LONG +DIM si AS INTEGER +DIM sl AS LONG +si = 100 +sl = 100000 +PRINT "int16 + int32: "; si + sl + +' INTEGER + FLOAT -> FLOAT +DIM sf AS FLOAT +sf = 3.14 +PRINT "int16 + float: "; si + sf + +' INTEGER + DOUBLE -> DOUBLE +DIM sd AS DOUBLE +sd = 3.14159265358979 +PRINT "int16 + double: "; si + sd + +' LONG + FLOAT -> FLOAT +PRINT "int32 + float: "; sl + sf + +' LONG + DOUBLE -> DOUBLE +PRINT "int32 + double: "; sl + sd + +' FLOAT + DOUBLE -> DOUBLE +PRINT "float + double: "; sf + sd + +' ---- Mixed type functions ---- +PRINT "" +PRINT "==== Functions with New Types ====" + +FUNCTION AddLongs(BYVAL x AS LONG, BYVAL y AS LONG) AS LONG + AddLongs = x + y +END FUNCTION + +FUNCTION MultiplyFloat(BYVAL x AS FLOAT, BYVAL y AS FLOAT) AS FLOAT + MultiplyFloat = x * y +END FUNCTION + +FUNCTION LongFactorial(BYVAL n AS LONG) AS LONG + IF n <= 1 THEN + LongFactorial = 1 + ELSE + LongFactorial = n * LongFactorial(n - 1) + END IF +END FUNCTION + +FUNCTION FloatSqrt(BYVAL x AS FLOAT) AS FLOAT + FloatSqrt = SQR(x) +END FUNCTION + +PRINT "AddLongs(100000, 200000) = "; AddLongs(100000, 200000) +PRINT "MultiplyFloat(3.14, 2.0) = "; MultiplyFloat(3.14, 2.0) +PRINT "LongFactorial(12) = "; LongFactorial(12) +PRINT "FloatSqrt(2.0) = "; FloatSqrt(2.0) + +' ---- BYREF with new types ---- +PRINT "" +PRINT "==== BYREF with New Types ====" + +SUB SwapLongs(BYREF x AS LONG, BYREF y AS LONG) + LOCAL t AS LONG + t = x + x = y + y = t +END SUB + +SUB ScaleFloat(BYREF val AS FLOAT, BYVAL factor AS FLOAT) + val = val * factor +END SUB + +DIM lx AS LONG +DIM ly AS LONG +lx = 100000 +ly = 999999 +PRINT "Before SwapLongs: "; lx; " "; ly +CALL SwapLongs(lx, ly) +PRINT "After SwapLongs: "; lx; " "; ly + +DIM fv AS FLOAT +fv = 2.5 +PRINT "Before ScaleFloat: "; fv +CALL ScaleFloat(fv, 4.0) +PRINT "After ScaleFloat(2.5, 4.0): "; fv + +' ---- Arrays with new types ---- +PRINT "" +PRINT "==== Arrays with New Types ====" + +DIM longArr(5) AS LONG +DIM fltArr(5) AS FLOAT +DIM idx AS INTEGER + +FOR idx = 0 TO 5 + longArr(idx) = (idx + 1) * 100000 + fltArr(idx) = (idx + 1) * 1.5 +NEXT idx + +PRINT "LONG array: "; +FOR idx = 0 TO 5 + PRINT longArr(idx); " "; +NEXT idx +PRINT "" + +PRINT "FLOAT array: "; +FOR idx = 0 TO 5 + PRINT fltArr(idx); " "; +NEXT idx +PRINT "" + +' ---- STATIC with new types ---- +PRINT "" +PRINT "==== STATIC with New Types ====" + +FUNCTION LongCounter() AS LONG + STATIC lc AS LONG + lc = lc + 100000 + LongCounter = lc +END FUNCTION + +FUNCTION FloatAccum(BYVAL v AS FLOAT) AS FLOAT + STATIC fs AS FLOAT + fs = fs + v + FloatAccum = fs +END FUNCTION + +PRINT "LongCounter: "; +FOR idx = 1 TO 5 + PRINT LongCounter(); " "; +NEXT idx +PRINT "" + +PRINT "FloatAccum: "; +DIM ftemp AS FLOAT +ftemp = FloatAccum(1.1) +PRINT ftemp; " "; +ftemp = FloatAccum(2.2) +PRINT ftemp; " "; +ftemp = FloatAccum(3.3) +PRINT ftemp; " "; +PRINT "" + +' ---- Mixing classic line numbers with new types ---- +PRINT "" +PRINT "==== Classic Lines with New Types ====" + +DIM counter AS LONG +counter = 0 + +2000 counter = counter + 50000 +2010 IF counter >= 250000 THEN GOTO 2030 +2020 GOTO 2000 +2030 PRINT "LONG counter reached: "; counter + +' ---- Range demonstration ---- +PRINT "" +PRINT "==== Type Ranges ====" +DIM maxInt AS INTEGER +DIM maxLong AS LONG +maxInt = 32767 +maxLong = 2147483647 +PRINT "Max INTEGER (int16_t): "; maxInt +PRINT "Max LONG (int32_t): "; maxLong + +' Show that LONG can handle what overflows INTEGER +DIM intVal AS INTEGER +DIM longVal AS LONG +intVal = 200 +longVal = 200 +intVal = intVal * intVal +longVal = longVal * longVal +PRINT "200 * 200 as INTEGER: "; intVal +PRINT "200 * 200 as LONG: "; longVal + +PRINT "" +PRINT "==== ALL TYPE TESTS COMPLETE ====" diff --git a/test_udt.bas b/test_udt.bas new file mode 100644 index 0000000..6e0c819 --- /dev/null +++ b/test_udt.bas @@ -0,0 +1,361 @@ +' ============================================================ +' Test program for user-defined types and random-access file I/O +' ============================================================ + +PRINT "==== User-Defined Type Tests ====" + +' ---- Test 1: Basic TYPE definition and field access ---- +PRINT "" +PRINT "---- Test 1: Basic TYPE and field access ----" + +TYPE PersonRecord + firstName AS STRING * 20 + lastName AS STRING * 30 + age AS INTEGER + salary AS DOUBLE +END TYPE + +DIM person AS PersonRecord +person.firstName = "John" +person.lastName = "Doe" +person.age = 30 +person.salary = 55000.50 + +PRINT "Name: "; person.firstName; " "; person.lastName +PRINT "Age: "; person.age +PRINT "Salary: "; person.salary + +' ---- Test 2: Multiple TYPE definitions ---- +PRINT "" +PRINT "---- Test 2: Multiple TYPE definitions ----" + +TYPE Point2D + x AS DOUBLE + y AS DOUBLE +END TYPE + +TYPE Rectangle + width AS DOUBLE + height AS DOUBLE +END TYPE + +DIM pt AS Point2D +pt.x = 3.0 +pt.y = 4.0 +PRINT "Point: ("; pt.x; ", "; pt.y; ")" + +DIM rect AS Rectangle +rect.width = 10.5 +rect.height = 20.3 +PRINT "Rect: "; rect.width; " x "; rect.height + +' ---- Test 3: UDT with integer fields ---- +PRINT "" +PRINT "---- Test 3: UDT integer fields ----" + +TYPE Color + r AS INTEGER + g AS INTEGER + b AS INTEGER +END TYPE + +DIM c AS Color +c.r = 255 +c.g = 128 +c.b = 64 +PRINT "Color: ("; c.r; ", "; c.g; ", "; c.b; ")" + +' ---- Test 4: Array of UDTs ---- +PRINT "" +PRINT "---- Test 4: Array of UDTs ----" + +DIM points(3) AS Point2D +DIM i AS INTEGER + +FOR i = 0 TO 3 + points(i).x = i * 1.5 + points(i).y = i * 2.5 +NEXT i + +FOR i = 0 TO 3 + PRINT "points("; i; "): ("; points(i).x; ", "; points(i).y; ")" +NEXT i + +' ---- Test 5: UDT field expressions ---- +PRINT "" +PRINT "---- Test 5: UDT field expressions ----" + +DIM p1 AS Point2D +DIM p2 AS Point2D +p1.x = 1.0 +p1.y = 2.0 +p2.x = 4.0 +p2.y = 6.0 + +DIM dist AS DOUBLE +dist = SQR((p2.x - p1.x) * (p2.x - p1.x) + (p2.y - p1.y) * (p2.y - p1.y)) +PRINT "Distance: "; dist + +' ---- Test 6: SIZEOF ---- +PRINT "" +PRINT "---- Test 6: SIZEOF ----" + +DIM szPerson AS LONG +szPerson = SIZEOF(PersonRecord) +PRINT "SIZEOF(PersonRecord) = "; szPerson + +DIM szPoint AS LONG +szPoint = SIZEOF(Point2D) +PRINT "SIZEOF(Point2D) = "; szPoint + +DIM szColor AS LONG +szColor = SIZEOF(Color) +PRINT "SIZEOF(Color) = "; szColor + +' ---- Test 7: Random-access file I/O ---- +PRINT "" +PRINT "---- Test 7: Random-access file I/O ----" + +TYPE Item + name AS STRING * 16 + price AS DOUBLE + qty AS INTEGER +END TYPE + +DIM item1 AS Item +DIM item2 AS Item +DIM item3 AS Item + +item1.name = "Widget" +item1.price = 9.99 +item1.qty = 100 + +item2.name = "Gadget" +item2.price = 24.95 +item2.qty = 50 + +item3.name = "Doohickey" +item3.price = 4.50 +item3.qty = 200 + +' Write records +OPEN "/tmp/test_items.dat" FOR RANDOM AS #1 LEN = SIZEOF(Item) +PUT #1, 1, item1 +PUT #1, 2, item2 +PUT #1, 3, item3 +CLOSE #1 + +' Read records back in different order +DIM loaded AS Item +OPEN "/tmp/test_items.dat" FOR RANDOM AS #1 LEN = SIZEOF(Item) + +GET #1, 2, loaded +PRINT "Record 2: "; loaded.name; " $"; loaded.price; " qty="; loaded.qty + +GET #1, 1, loaded +PRINT "Record 1: "; loaded.name; " $"; loaded.price; " qty="; loaded.qty + +GET #1, 3, loaded +PRINT "Record 3: "; loaded.name; " $"; loaded.price; " qty="; loaded.qty + +CLOSE #1 + +' ---- Test 8: Overwrite and re-read record ---- +PRINT "" +PRINT "---- Test 8: Overwrite and re-read ----" + +DIM updated AS Item +updated.name = "NewWidget" +updated.price = 19.99 +updated.qty = 75 + +OPEN "/tmp/test_items.dat" FOR RANDOM AS #1 LEN = SIZEOF(Item) +PUT #1, 1, updated +GET #1, 1, loaded +PRINT "Updated record 1: "; loaded.name; " $"; loaded.price; " qty="; loaded.qty + +' Verify record 3 is unchanged +GET #1, 3, loaded +PRINT "Record 3 unchanged: "; loaded.name; " $"; loaded.price; " qty="; loaded.qty +CLOSE #1 + +' ---- Test 9: UDT with mixed field types ---- +PRINT "" +PRINT "---- Test 9: Mixed field types ----" + +TYPE MixedRecord + label AS STRING * 10 + byteVal AS BYTE + intVal AS INTEGER + longVal AS LONG + floatVal AS FLOAT + dblVal AS DOUBLE +END TYPE + +DIM m AS MixedRecord +m.label = "TestMixed" +m.byteVal = 42 +m.intVal = -1000 +m.longVal = 100000 +m.floatVal = 3.14 +m.dblVal = 2.71828 + +PRINT "Label: "; m.label +PRINT "Byte: "; m.byteVal +PRINT "Int: "; m.intVal +PRINT "Long: "; m.longVal +PRINT "Double: "; m.dblVal + +' ---- Test 10: Nested UDTs ---- +PRINT "" +PRINT "---- Test 10: Nested UDTs ----" + +TYPE Vec2 + x AS DOUBLE + y AS DOUBLE +END TYPE + +TYPE Circle + center AS Vec2 + radius AS DOUBLE +END TYPE + +DIM circ AS Circle +circ.center.x = 10.0 +circ.center.y = 20.0 +circ.radius = 5.5 +PRINT "Circle center: ("; circ.center.x; ", "; circ.center.y; ")" +PRINT "Circle radius: "; circ.radius + +' ---- Test 11: Nested UDT read in expressions ---- +PRINT "" +PRINT "---- Test 11: Nested UDT expressions ----" + +DIM circ2 AS Circle +circ2.center.x = 30.0 +circ2.center.y = 40.0 +circ2.radius = 2.0 + +DIM dx AS DOUBLE +DIM dy AS DOUBLE +dx = circ2.center.x - circ.center.x +dy = circ2.center.y - circ.center.y +PRINT "dx = "; dx +PRINT "dy = "; dy + +' ---- Test 12: UDT whole-struct copy ---- +PRINT "" +PRINT "---- Test 12: UDT copy ----" + +DIM orig AS Vec2 +orig.x = 42.0 +orig.y = 99.0 + +DIM copy AS Vec2 +copy = orig +PRINT "copy.x = "; copy.x +PRINT "copy.y = "; copy.y + +' Modify original, verify copy is independent +orig.x = 0.0 +PRINT "After modify orig, copy.x = "; copy.x + +' ---- Test 13: Nested UDT sub-struct copy ---- +PRINT "" +PRINT "---- Test 13: Nested UDT sub-struct copy ----" + +DIM savedCenter AS Vec2 +savedCenter = circ.center +PRINT "savedCenter: ("; savedCenter.x; ", "; savedCenter.y; ")" + +' Assign sub-struct to nested field +DIM circ3 AS Circle +DIM newCenter AS Vec2 +newCenter.x = 77.0 +newCenter.y = 88.0 +circ3.center = newCenter +circ3.radius = 3.0 +PRINT "circ3 center: ("; circ3.center.x; ", "; circ3.center.y; ")" +PRINT "circ3 radius: "; circ3.radius + +' ---- Test 14: Array of UDTs with nested types ---- +PRINT "" +PRINT "---- Test 14: Array of nested UDTs ----" + +DIM circles(2) AS Circle +circles(0).center.x = 1.0 +circles(0).center.y = 2.0 +circles(0).radius = 10.0 +circles(1).center.x = 3.0 +circles(1).center.y = 4.0 +circles(1).radius = 20.0 +circles(2).center.x = 5.0 +circles(2).center.y = 6.0 +circles(2).radius = 30.0 + +DIM ci AS INTEGER +FOR ci = 0 TO 2 + PRINT "circles("; ci; "): ("; circles(ci).center.x; ", "; circles(ci).center.y; ") r="; circles(ci).radius +NEXT ci + +' ---- Test 15: Copy between array elements ---- +PRINT "" +PRINT "---- Test 15: Array element copy ----" + +circles(0) = circles(2) +PRINT "After copy, circles(0).center.x = "; circles(0).center.x +PRINT "After copy, circles(0).radius = "; circles(0).radius + +' ---- Test 16: Three-level nesting ---- +PRINT "" +PRINT "---- Test 16: Three-level nesting ----" + +TYPE LineSegment + start AS Vec2 + finish AS Vec2 +END TYPE + +TYPE Shape + outline AS LineSegment + label AS STRING * 16 +END TYPE + +DIM shape AS Shape +shape.outline.start.x = 1.0 +shape.outline.start.y = 2.0 +shape.outline.finish.x = 10.0 +shape.outline.finish.y = 20.0 +shape.label = "MyLine" + +PRINT "Shape: "; shape.label +PRINT " from ("; shape.outline.start.x; ", "; shape.outline.start.y; ")" +PRINT " to ("; shape.outline.finish.x; ", "; shape.outline.finish.y; ")" + +' ---- Test 17: Nested UDT file I/O ---- +PRINT "" +PRINT "---- Test 17: Nested UDT file I/O ----" + +DIM c1 AS Circle +DIM c2 AS Circle +c1.center.x = 111.0 +c1.center.y = 222.0 +c1.radius = 333.0 +c2.center.x = 444.0 +c2.center.y = 555.0 +c2.radius = 666.0 + +OPEN "/tmp/test_circles.dat" FOR RANDOM AS #1 LEN = SIZEOF(Circle) +PUT #1, 1, c1 +PUT #1, 2, c2 +CLOSE #1 + +DIM ld AS Circle +OPEN "/tmp/test_circles.dat" FOR RANDOM AS #1 LEN = SIZEOF(Circle) +GET #1, 2, ld +PRINT "Loaded circle 2: ("; ld.center.x; ", "; ld.center.y; ") r="; ld.radius +GET #1, 1, ld +PRINT "Loaded circle 1: ("; ld.center.x; ", "; ld.center.y; ") r="; ld.radius +CLOSE #1 + +PRINT "" +PRINT "==== ALL UDT TESTS COMPLETE ===="