joeydev/src/vecparse.c

775 lines
23 KiB
C

/*
* JoeyDev
* Copyright (C) 2018-2023 Scott Duensing <scott@kangaroopunch.com>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
#pragma clang diagnostic push
#pragma ide diagnostic ignored "EmptyDeclOrStmt"
#include <errno.h>
#include "common.h"
#include "vecparse.h"
#include "utils.h"
#define IS_NUMBER(n) (n & 0x8000 ? FALSE : TRUE)
typedef struct KeywordsS {
char *command;
ParserKeywordT id;
} KeywordsT;
typedef struct OperatorsS {
char *operator;
ParserMathT id;
} OperatorsT;
typedef struct LabelS {
char *key;
int value;
int line;
} LabelT;
static int labelGetValue(int lineNumber, VecByteCodeT *bytecode, LabelT *labels, LabelT ***unresolved, char *label);
static void outputByte(VecByteCodeT *bytecode, unsigned short word);
static void outputWord(VecByteCodeT *bytecode, unsigned short word);
static char *parserGetNextLine(char *str, char **savePtr);
static gboolean parserGetNextValue(char *token, char **valueEnd, char ***variables, int *x);
static int parserGetWord(char *wordList, char **tokenEnd);
static gboolean parserGetX(char **tokenEnd, char ***variables, int *x);
static gboolean parserGetXY(char **tokenEnd, char ***variables, int *x, int *y);
static gboolean parserGetXYZ(char **tokenEnd, char ***variables, int *x, int *y, int *z);
static gboolean variableCollect(char *value, char ***variables, int *result);
static int labelGetValue(int lineNumber, VecByteCodeT *bytecode, LabelT *labels, LabelT ***unresolved, char *label) {
int index;
int found = -1;
LabelT *temp;
// Do we know this label? We search ourselves so it's case-insensitive.
for (index = 0; index < hmlen(labels); index++) {
if (strcasecmp(label, labels[index].key) == 0) {
found = index;
break;
}
}
if (found < 0) {
// We don't yet know this label, so it's a forward reference.
// Record it with a negative location, so we can resolve it later.
temp = NEW(LabelT);
temp->key = strdup(label);
temp->value = bytecode->length;
temp->line = lineNumber;
arrput(*unresolved, temp);
index = 0 - ((int)arrlen(*unresolved) + 1);
} else {
index = labels[index].value;
}
return index;
}
static void outputByte(VecByteCodeT *bytecode, unsigned short word) {
unsigned char byte = (unsigned char)word;
utilEnsureBufferSize(&bytecode->bytes, &bytecode->bufferSize, bytecode->length + 1);
// If the word passed in is a variable, set the MSb in the byte as well.
if (word & 0x8000) {
byte |= 0x80;
}
bytecode->bytes[bytecode->length++] = byte;
}
static void outputWord(VecByteCodeT *bytecode, unsigned short word) {
utilEnsureBufferSize(&bytecode->bytes, &bytecode->bufferSize, bytecode->length + 2);
bytecode->bytes[bytecode->length++] = (word & 0xFF00) >> 8;
bytecode->bytes[bytecode->length++] = word & 0x00FF;
}
// Custom strtok_r because:
// A sequence of two or more contiguous delimiter bytes in the
// parsed string is considered to be a single delimiter.
// Which is exactly what we don't want for the line parser.
static char *parserGetNextLine(char *str, char **savePtr) {
char *token = NULL;
if (str) {
*savePtr = str;
}
if (!*savePtr) {
return NULL;
}
token = *savePtr;
*savePtr = strpbrk(*savePtr, "\n");
if (*savePtr) {
**savePtr = 0;
*savePtr = *savePtr + 1;
}
while (token != NULL && (token[0] == ' ' || token[0] == '\t')) token++;
return token;
}
static gboolean parserGetNextValue(char *token, char **valueEnd, char ***variables, int *x) {
char *value;
// Return next value in a comma separated list.
value = strtok_r(token, ",", valueEnd);
if (value == NULL) return FALSE;
return variableCollect(value, variables, x);;
}
static int parserGetWord(char *wordList, char **tokenEnd) {
char *token;
char *word;
char *wordEnd = NULL;
char *mutable;
int index = -1;
// Returns -1 if the word is not found in the wordlist.
// Returns a zero-based index of the position in the list
// if one of the words is found.
token = strtok_r(NULL, " ", tokenEnd);
if (token == NULL) return index;
mutable = strdup(wordList);
word = strtok_r(mutable, " ", &wordEnd);
while (word != NULL) {
index++;
if (strcasecmp(token, word) == 0) break;
word = strtok_r(NULL, " ", &wordEnd);
}
DEL(mutable);
return index;
}
static gboolean parserGetX(char **tokenEnd, char ***variables, int *x) {
char *value;
// Return single value.
value = strtok_r(NULL, " ", tokenEnd);
if (value == NULL) return FALSE;
return variableCollect(value, variables, x);
}
static gboolean parserGetXY(char **tokenEnd, char ***variables, int *x, int *y) {
char *token;
char *valueEnd;
// Return values of X,Y pair.
token = strtok_r(NULL, " ", tokenEnd);
if (token == NULL) return FALSE;
if (!parserGetNextValue(token, &valueEnd, variables, x)) return FALSE;
if (!parserGetNextValue(NULL, &valueEnd, variables, y)) return FALSE;
return TRUE;
}
static gboolean parserGetXYZ(char **tokenEnd, char ***variables, int *x, int *y, int *z) {
char *token;
char *valueEnd;
// Return values of X,Y pair.
token = strtok_r(NULL, " ", tokenEnd);
if (token == NULL) return FALSE;
if (!parserGetNextValue(token, &valueEnd, variables, x)) return FALSE;
if (!parserGetNextValue(NULL, &valueEnd, variables, y)) return FALSE;
if (!parserGetNextValue(NULL, &valueEnd, variables, z)) return FALSE;
return TRUE;
}
static gboolean variableCollect(char *value, char ***variables, int *result) {
int index;
int found;
char *endPtr = NULL;
char **vars = *variables;
// Is it a variable or number?
if (strlen(value) > 1 && value[0] == '%') {
// It's a variable. Do we know it?
found = -1;
for (index = 0; index < arrlen(vars); index++) {
if (strcasecmp(vars[index], value) == 0) {
found = index; // Found.
break;
}
}
if (found < 0) {
// Variable is not yet known. Add it to list.
found = arrlen(vars);
arrput(vars, value);
}
// Set MSb to indicate it's a variable.
found |= 0x8000;
} else {
// It's a number.
errno = 0; endPtr = NULL;
found = (int)strtol(value, &endPtr, 10);
if (errno != 0 || *endPtr != 0) return FALSE;
// Is it negative? If so, we need it in our format.
if (found < 0) {
found = -found;
found |= 0x4000;
}
// Ensure MSb is cleared, so we know it's a number.
found &= 0x7fff;
}
*result = found;
return TRUE;
}
int vecparser(char *programIn, VecByteCodeT *bytecode) {
int keyword;
char *line;
char *lineEnd;
gboolean lineOkay;
gboolean isOkay;
char *token;
char *tokenEnd;
int lineNumber;
int x1;
int y1;
int x2;
int y2;
PointT p1;
PointT p2;
PointT *points = NULL; // Used to collect points for LINE
char **variables = NULL; // Array of known variables and their IDs
LabelT *labels = NULL; // Known labels and their byte offsets
LabelT *label = NULL; // Temp label for array management.
LabelT **unresolved = NULL; // List of unresolved label uses and their byte offsets
int result = -1; // Returns -1 on success or line number of first error.
KeywordsT commands[] = {
{ "BOX", PARSE_BOX },
{ "CALL", PARSE_CALL },
{ "CIRCLE", PARSE_CIRCLE },
{ "CLEAR", PARSE_CLEAR },
{ "COLOR", PARSE_COLOR },
{ "//", PARSE_COMMENT },
{ "ELLIPSE", PARSE_ELLIPSE },
{ "GOTO", PARSE_GOTO },
{ "IF", PARSE_IF },
{ "FILL", PARSE_FILL },
{ "LINE", PARSE_LINE },
{ "PALETTE", PARSE_PALETTE },
{ "PLOT", PARSE_PLOT },
{ "RECTANGLE", PARSE_RECTANGLE },
{ "RETURN", PARSE_RETURN },
{ NULL, PARSE_NONE }
};
OperatorsT math[] = {
{ "=", MATH_ASSIGN },
{ "+", MATH_ADD },
{ "-", MATH_SUBTRACT },
{ "*", MATH_MULTIPLY },
{ "/", MATH_DIVIDE },
{ "RND", MATH_RND },
{ "MOD", MATH_MOD },
{ "POW", MATH_POW },
{ "SQRT", MATH_SQRT },
{ "ABS", MATH_ABS },
{ "COS", MATH_COS },
{ "SIN", MATH_SIN },
{ "TAN", MATH_TAN },
{ NULL, MATH_NONE }
};
// Parse code.
lineNumber = 0;
line = parserGetNextLine(programIn, &lineEnd);
while (line != NULL) {
// Get the first token on the line.
token = strtok_r(line, " ", &tokenEnd);
// Is this something we care about? It'll be math, a label, or a keyword.
lineOkay = FALSE;
// Is this a blank line with no tokens?
if (token == NULL) {
// Yep - just move on.
lineOkay = TRUE;
} else { // blank line
debug("[%s]\n", token);
// Is it math?
if (strlen(token) > 1 && token[0] == '%') {
// We're doing some kind of math.
// Look up variable index in table, or add it if needed.
// Variable index is stored in y1 for later.
y1 = -1;
for (y2 = 0; y2 < arrlen(variables); y2++) {
if (strcasecmp(variables[y2], token) == 0) {
y1 = y2; // Found.
break;
}
}
if (y1 < 0) {
// Variable is not yet known. Add it to list.
y1 = arrlen(variables);
arrput(variables, token);
}
// Mark as variable.
y1 |= 0x80;
// Find the operator.
token = strtok_r(NULL, " ", &tokenEnd);
if (token != NULL) {
keyword = 0;
while (math[keyword].operator) {
if (strcasecmp(math[keyword].operator, token) == 0) {
// Yep! Gather arguments and generate bytecode.
if (!parserGetX(&tokenEnd, &variables, &x1)) break;
outputByte(bytecode, PARSE_MATH);
outputByte(bytecode, y1);
outputByte(bytecode, math[keyword].id);
outputWord(bytecode, x1);
lineOkay = TRUE;
break;
}
keyword++;
}
}
} else { // doing math
// Is it a label?
if (strlen(token) > 1 && token[strlen(token) - 1] == ':') {
// It's a label.
// Remove trailing colon.
token[strlen(token) - 1] = 0;
// Have we already used this label?
x2 = -1;
// We search ourselves so it's case-insensitive.
for (x1 = 0; x1 < shlen(labels); x1++) {
if (strcasecmp(token, labels[x1].key) == 0) {
x2 = x1;
break;
}
}
if (x2 < 0) {
// New label. Add to hashmap.
shput(labels, token, bytecode->length);
lineOkay = TRUE;
}
} else { // doing a label
// It's a keyword.
keyword = 0;
while (commands[keyword].command) {
if (strcasecmp(commands[keyword].command, token) == 0) {
// Yep! Gather arguments and generate bytecode.
switch (commands[keyword].id) {
case PARSE_NONE:
// Won't happen, but silences an error.
break;
case PARSE_BOX:
// Box (value),(value) to (value),(value)
if (!parserGetXY(&tokenEnd, &variables, &x1, &y1)) break;
if (parserGetWord("TO", &tokenEnd) < 0) break;
if (!parserGetXY(&tokenEnd, &variables, &x2, &y2)) break;
if (IS_NUMBER(x1) && IS_NUMBER(x2)) if (x2 < x1) break;
if (IS_NUMBER(x1)) if (x1 < 0 || x1 > 319) break;
if (IS_NUMBER(x2)) if (x2 < 0 || x2 > 319) break;
if (IS_NUMBER(y1) && IS_NUMBER(y2)) if (y2 < y1) break;
if (IS_NUMBER(y1)) if (y1 < 0 || y1 > 199) break;
if (IS_NUMBER(y2)) if (y2 < 0 || y2 > 199) break;
outputByte(bytecode, PARSE_BOX);
outputWord(bytecode, x1);
outputWord(bytecode, y1);
outputWord(bytecode, x2);
outputWord(bytecode, y2);
lineOkay = TRUE;
break;
case PARSE_CALL:
// Call (label)
if (tokenEnd == NULL) break;
outputByte(bytecode, PARSE_CALL);
x1 = labelGetValue(lineNumber, bytecode, labels, &unresolved, tokenEnd);
outputWord(bytecode, x1);
lineOkay = TRUE;
break;
case PARSE_CIRCLE:
// Circle (value),(value) radius (value)
if (!parserGetXY(&tokenEnd, &variables, &x1, &y1)) break;
if (parserGetWord("RADIUS", &tokenEnd) < 0) break;
if (!parserGetX(&tokenEnd, &variables, &y2)) break;
if (IS_NUMBER(x1)) if (x1 < 0 || x1 > 319) break;
if (IS_NUMBER(y1)) if (y1 < 0 || y1 > 199) break;
outputByte(bytecode, PARSE_CIRCLE);
outputWord(bytecode, y2);
outputWord(bytecode, x1);
outputWord(bytecode, y1);
lineOkay = TRUE;
break;
case PARSE_CLEAR:
outputByte(bytecode, PARSE_CLEAR);
lineOkay = TRUE;
break;
case PARSE_COLOR:
// Color (short)
if (!parserGetX(&tokenEnd, &variables, &x1)) break;
if (IS_NUMBER(x1)) if (x1 < 0 || x1 > 15) break;
outputByte(bytecode, PARSE_COLOR);
outputByte(bytecode, x1);
lineOkay = TRUE;
break;
case PARSE_COMMENT:
// Eat the rest of the line.
while (token != NULL) {
token = strtok_r(NULL, " ", &tokenEnd);
}
lineOkay = TRUE;
break;
case PARSE_ELLIPSE:
// Ellipse (value),(value) to (value),(value)
if (!parserGetXY(&tokenEnd, &variables, &x1, &y1)) break;
if (parserGetWord("TO", &tokenEnd) < 0) break;
if (!parserGetXY(&tokenEnd, &variables, &x2, &y2)) break;
if (IS_NUMBER(x1) && IS_NUMBER(x2)) if (x2 < x1) break;
if (IS_NUMBER(x1)) if (x1 < 0 || x1 > 319) break;
if (IS_NUMBER(x2)) if (x2 < 0 || x2 > 319) break;
if (IS_NUMBER(y1) && IS_NUMBER(y2)) if (y2 < y1) break;
if (IS_NUMBER(y1)) if (y1 < 0 || y1 > 199) break;
if (IS_NUMBER(y2)) if (y2 < 0 || y2 > 199) break;
outputByte(bytecode, PARSE_ELLIPSE);
outputWord(bytecode, x1);
outputWord(bytecode, y1);
outputWord(bytecode, x2);
outputWord(bytecode, y2);
lineOkay = TRUE;
break;
case PARSE_FILL:
// Fill (value),(value) {to (value}
if (!parserGetXY(&tokenEnd, &variables, &x1, &y1)) break;
if (IS_NUMBER(x1)) if (x1 < 0 || x1 > 319) break;
if (IS_NUMBER(y1)) if (y1 < 0 || y1 > 199) break;
// Do they want to fill to a certain color? Or over the current color?
x2 = 16; // 16 == Fill, otherwise FillTo
if (parserGetWord("TO", &tokenEnd) >= 0) {
if (!parserGetX(&tokenEnd, &variables, &x2)) break;
if (IS_NUMBER(x2)) if (x2 < 0 || x2 > 15) break;
}
outputByte(bytecode, PARSE_FILL);
outputWord(bytecode, x1);
outputWord(bytecode, y1);
outputByte(bytecode, x2);
lineOkay = TRUE;
break;
case PARSE_GOTO:
// Goto (label)
if (tokenEnd == NULL) break;
outputByte(bytecode, PARSE_GOTO);
x1 = labelGetValue(lineNumber, bytecode, labels, &unresolved, tokenEnd);
outputWord(bytecode, x1);
lineOkay = TRUE;
break;
case PARSE_IF:
// If (value) (compare) (value) (goto|call) (label)
if (!parserGetX(&tokenEnd, &variables, &x1)) break;
y1 = parserGetWord("== != < > <= >=", &tokenEnd);
if (y1 < 0) break;
if (!parserGetX(&tokenEnd, &variables, &x2)) break;
y2 = parserGetWord("goto call", &tokenEnd);
if (y2 < 0) break;
if (tokenEnd == NULL) break;
outputByte(bytecode, PARSE_IF);
outputWord(bytecode, x1);
outputByte(bytecode, y1);
outputWord(bytecode, x2);
outputByte(bytecode, y2);
x1 = labelGetValue(lineNumber, bytecode, labels, &unresolved, tokenEnd);
outputWord(bytecode, x1);
lineOkay = TRUE;
break;
case PARSE_LABEL:
// Won't happen.
break;
case PARSE_LINE:
// Line (value),(value) to (value),(value) [to ...]
points = NULL;
if (!parserGetXY(&tokenEnd, &variables, &p1.x, &p1.y)) break;
if (parserGetWord("TO", &tokenEnd) < 0) break;
if (!parserGetXY(&tokenEnd, &variables, &p2.x, &p2.y)) break;
if (IS_NUMBER(p1.x)) if (p1.x < 0 || p1.x > 319) break;
if (IS_NUMBER(p1.y)) if (p1.y < 0 || p1.y > 199) break;
if (IS_NUMBER(p2.x)) if (p2.x < 0 || p2.x > 319) break;
if (IS_NUMBER(p2.y)) if (p2.y < 0 || p2.y > 199) break;
arrput(points, p1);
arrput(points, p2);
isOkay = TRUE;
while (parserGetWord("TO", &tokenEnd) >= 0) {
if (!parserGetXY(&tokenEnd, &variables, &p1.x, &p1.y)) {
// Error.
isOkay = FALSE;
break;
}
if (IS_NUMBER(p1.x)) if (p1.x < 0 || p1.x > 319) { isOkay = FALSE; break; }
if (IS_NUMBER(p1.y)) if (p1.y < 0 || p1.y > 199) { isOkay = FALSE; break; }
arrput(points, p1);
}
if (isOkay) {
outputByte(bytecode, PARSE_LINE);
outputWord(bytecode, arrlen(points));
for (x1 = 0; x1 < arrlen(points); x1++) {
outputWord(bytecode, points[x1].x);
outputWord(bytecode, points[x1].y);
}
lineOkay = TRUE;
}
break;
case PARSE_MATH:
// Won't happen.
break;
case PARSE_PALETTE:
// Palette (short) AS (short),(short),(short)
if (!parserGetX(&tokenEnd, &variables, &x1)) break;
if (parserGetWord("AS", &tokenEnd) < 0) break;
if (!parserGetXYZ(&tokenEnd, &variables, &x2, &y1, &y2)) break;
if (IS_NUMBER(x1)) if (x1 < 0 || x1 > 15) break;
if (IS_NUMBER(x2)) if (x2 < 0 || x2 > 15) break;
if (IS_NUMBER(y1)) if (y1 < 0 || y1 > 15) break;
if (IS_NUMBER(y2)) if (y2 < 0 || y2 > 15) break;
outputByte(bytecode, PARSE_PALETTE);
outputByte(bytecode, x1);
outputByte(bytecode, x2);
outputByte(bytecode, y1);
outputByte(bytecode, y2);
lineOkay = TRUE;
break;
case PARSE_PLOT:
// Plot (value),(value)
if (!parserGetXY(&tokenEnd, &variables, &x1, &y1)) break;
if (IS_NUMBER(x1)) if (x1 < 0 || x1 > 319) break;
if (IS_NUMBER(y1)) if (y1 < 0 || y1 > 199) break;
outputByte(bytecode, PARSE_PLOT);
outputWord(bytecode, x1);
outputWord(bytecode, y1);
lineOkay = TRUE;
break;
case PARSE_RECTANGLE:
// Rectangle (value),(value) to (value),(value)
if (!parserGetXY(&tokenEnd, &variables, &x1, &y1)) break;
if (parserGetWord("TO", &tokenEnd) < 0) break;
if (!parserGetXY(&tokenEnd, &variables, &x2, &y2)) break;
if (IS_NUMBER(x1) && IS_NUMBER(x2)) if (x2 < x1) break;
if (IS_NUMBER(x1)) if (x1 < 0 || x1 > 319) break;
if (IS_NUMBER(x2)) if (x2 < 0 || x2 > 319) break;
if (IS_NUMBER(y1) && IS_NUMBER(y2)) if (y2 < y1) break;
if (IS_NUMBER(y1)) if (y1 < 0 || y1 > 199) break;
if (IS_NUMBER(y2)) if (y2 < 0 || y2 > 199) break;
outputByte(bytecode, PARSE_RECTANGLE);
outputWord(bytecode, x1);
outputWord(bytecode, y1);
outputWord(bytecode, x2);
outputWord(bytecode, y2);
lineOkay = TRUE;
break;
case PARSE_RETURN:
// Return
outputByte(bytecode, PARSE_RETURN);
lineOkay = TRUE;
break;
} // switch
// Unwind point array if needed.
if (points != NULL) {
while (arrlen(points) > 0) {
arrdel(points, 0);
}
}
// Stop looking for this keyword - we handled it.
break;
} else {
// Keep looking until we find this keyword or run out of commands.
keyword++;
} // keyword match
} // loop over commands
} // label
} // math
} // blank line
// Is everything still okay?
if (!lineOkay) {
// Nope - error.
result = lineNumber;
break;
}
// Move to next line.
line = parserGetNextLine(NULL, &lineEnd);
lineNumber++;
} // read program line
// Resolve forward label declarations and patch bytecode.
if (lineOkay) {
#ifdef DEBUG_MODE
for (y1 = 0; y1 < shlen(labels); y1++) {
debug("Resolved - %s\n", labels[y1].key);
}
for (y1 = 0; y1 < arrlen(unresolved); y1++) {
debug("Unresolved - %s\n", unresolved[y1]->key);
}
#endif
for (y1 = 0; y1 < arrlen(unresolved); y1++) {
// Find offset of this unresolved label. We search ourselves so it's case-insensitive.
x2 = -1;
for (x1 = 0; x1 < shlen(labels); x1++) {
debug("Checking label %d of %d - %s == %s\n", y1, (int)arrlen(unresolved), unresolved[y1]->key,
labels[x1].key);
if (strcasecmp(unresolved[y1]->key, labels[x1].key) == 0) {
x2 = x1;
break;
}
}
if (x2 < 0) {
// Label not found! Error!
result = unresolved[y1]->line;
break;
} else {
// Write this label offset into the unresolved offset.
bytecode->bytes[unresolved[y1]->value++] = (labels[x2].value & 0xFF00) >> 8;
bytecode->bytes[unresolved[y1]->value] = labels[x2].value & 0x00FF;
}
}
}
// Unwind variables array if needed.
ARRFREE(variables);
// Unwind unresolved array if needed.
if (unresolved != NULL) {
while (arrlen(unresolved) > 0) {
label = unresolved[0];
DEL(label->key);
DEL(label);
arrdel(unresolved, 0);
}
}
// Unwind labels hashmap if needed.
if (labels != NULL) {
while (shlen(labels) > 0) {
shdel(labels, labels[0].key);
}
shfree(labels);
}
/*
* (value) is a 16-bit integer. Since we only need a fraction of the
* possible values provided by this, we steal a couple bits for our
* own use. All values are stored without messing with 2's complement.
*
* Type Negative Value
* \ /__________/_
* \ // \
* tnvvvvvvvvvvvvvv
*
* So with this scheme we can store values from -16383 to 16383 (yes,
* zero is represented twice).
*
* The Type bit determines if the value stored is a literal value or a
* reference to a variable in the variable table.
*
* (short) is a simplified version used for colors. It is always positive
* and has a range from 0 to 127 with Type being the MSb. This effectively
* limits the number of available variables to 128.
*
*/
return result;
}
#pragma clang diagnostic pop