564 lines
17 KiB
C
564 lines
17 KiB
C
// compact.c -- Release build bytecode compaction
|
|
//
|
|
// Walks the module's bytecode, removes OP_LINE instructions (3 bytes
|
|
// each), and rewrites all code-address references so control flow
|
|
// still lands on the correct instructions.
|
|
//
|
|
// Address references:
|
|
// - BasProcEntryT::codeAddr (absolute)
|
|
// - BasFormVarInfoT::initCodeAddr (absolute, 0 = no init)
|
|
// - OP_CALL operand (absolute uint16)
|
|
// - OP_JMP / OP_JMP_TRUE / OP_JMP_FALSE operand (relative int16)
|
|
// - OP_FOR_NEXT loopTop operand (relative int16)
|
|
// - OP_ON_ERROR handler operand (relative int16; 0 = disable, not remapped)
|
|
// - GOSUB return address (emitted as OP_PUSH_INT32 followed by OP_JMP,
|
|
// where the pushed value equals the PC immediately after the JMP)
|
|
//
|
|
// Safety: if any opcode cannot be sized, any jump overflows int16, or
|
|
// the walk doesn't reach codeLen exactly, the module is left untouched.
|
|
|
|
#include "compact.h"
|
|
#include "opcodes.h"
|
|
|
|
#include <stdint.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
|
|
// ============================================================
|
|
// Opcode operand size table
|
|
// ============================================================
|
|
// Returns operand byte count (excluding the 1-byte opcode), or -1 if unknown.
|
|
|
|
static int32_t opOperandSize(uint8_t op) {
|
|
switch (op) {
|
|
// No operand bytes
|
|
case OP_NOP:
|
|
case OP_PUSH_TRUE: case OP_PUSH_FALSE:
|
|
case OP_POP: case OP_DUP:
|
|
case OP_LOAD_REF: case OP_STORE_REF:
|
|
case OP_ADD_INT: case OP_SUB_INT: case OP_MUL_INT:
|
|
case OP_IDIV_INT: case OP_MOD_INT: case OP_NEG_INT:
|
|
case OP_ADD_FLT: case OP_SUB_FLT: case OP_MUL_FLT:
|
|
case OP_DIV_FLT: case OP_NEG_FLT: case OP_POW:
|
|
case OP_STR_CONCAT: case OP_STR_LEFT: case OP_STR_RIGHT:
|
|
case OP_STR_MID: case OP_STR_MID2: case OP_STR_LEN:
|
|
case OP_STR_INSTR: case OP_STR_INSTR3:
|
|
case OP_STR_UCASE: case OP_STR_LCASE:
|
|
case OP_STR_TRIM: case OP_STR_LTRIM: case OP_STR_RTRIM:
|
|
case OP_STR_CHR: case OP_STR_ASC: case OP_STR_SPACE:
|
|
case OP_CMP_EQ: case OP_CMP_NE: case OP_CMP_LT:
|
|
case OP_CMP_GT: case OP_CMP_LE: case OP_CMP_GE:
|
|
case OP_AND: case OP_OR: case OP_NOT:
|
|
case OP_XOR: case OP_EQV: case OP_IMP:
|
|
case OP_GOSUB_RET: case OP_RET: case OP_RET_VAL:
|
|
case OP_FOR_POP:
|
|
case OP_CONV_INT_FLT: case OP_CONV_FLT_INT:
|
|
case OP_CONV_INT_STR: case OP_CONV_STR_INT:
|
|
case OP_CONV_FLT_STR: case OP_CONV_STR_FLT:
|
|
case OP_CONV_INT_LONG: case OP_CONV_LONG_INT:
|
|
case OP_PRINT: case OP_PRINT_NL: case OP_PRINT_TAB:
|
|
case OP_INPUT:
|
|
case OP_FILE_CLOSE: case OP_FILE_PRINT: case OP_FILE_INPUT:
|
|
case OP_FILE_EOF: case OP_FILE_LINE_INPUT:
|
|
case OP_LOAD_PROP: case OP_STORE_PROP:
|
|
case OP_LOAD_FORM: case OP_UNLOAD_FORM:
|
|
case OP_HIDE_FORM: case OP_DO_EVENTS:
|
|
case OP_MSGBOX: case OP_INPUTBOX: case OP_ME_REF:
|
|
case OP_CREATE_CTRL: case OP_FIND_CTRL: case OP_FIND_CTRL_IDX:
|
|
case OP_CREATE_CTRL_EX:
|
|
case OP_ERASE:
|
|
case OP_RESUME: case OP_RESUME_NEXT:
|
|
case OP_RAISE_ERR: case OP_ERR_NUM: case OP_ERR_CLEAR:
|
|
case OP_MATH_ABS: case OP_MATH_INT: case OP_MATH_FIX:
|
|
case OP_MATH_SGN: case OP_MATH_SQR: case OP_MATH_SIN:
|
|
case OP_MATH_COS: case OP_MATH_TAN: case OP_MATH_ATN:
|
|
case OP_MATH_LOG: case OP_MATH_EXP: case OP_MATH_RND:
|
|
case OP_MATH_RANDOMIZE:
|
|
case OP_RGB:
|
|
case OP_GET_RED: case OP_GET_GREEN: case OP_GET_BLUE:
|
|
case OP_STR_VAL: case OP_STR_STRF: case OP_STR_HEX:
|
|
case OP_STR_STRING:
|
|
case OP_MATH_TIMER: case OP_DATE_STR: case OP_TIME_STR:
|
|
case OP_SLEEP: case OP_ENVIRON:
|
|
case OP_READ_DATA: case OP_RESTORE:
|
|
case OP_FILE_WRITE: case OP_FILE_WRITE_SEP: case OP_FILE_WRITE_NL:
|
|
case OP_FILE_GET: case OP_FILE_PUT: case OP_FILE_SEEK:
|
|
case OP_FILE_LOF: case OP_FILE_LOC: case OP_FILE_FREEFILE:
|
|
case OP_FILE_INPUT_N:
|
|
case OP_STR_MID_ASGN: case OP_PRINT_USING:
|
|
case OP_PRINT_TAB_N: case OP_PRINT_SPC_N:
|
|
case OP_FORMAT: case OP_SHELL:
|
|
case OP_APP_PATH: case OP_APP_CONFIG: case OP_APP_DATA:
|
|
case OP_INI_READ: case OP_INI_WRITE:
|
|
case OP_FS_KILL: case OP_FS_NAME: case OP_FS_FILECOPY:
|
|
case OP_FS_MKDIR: case OP_FS_RMDIR: case OP_FS_CHDIR:
|
|
case OP_FS_CHDRIVE: case OP_FS_CURDIR: case OP_FS_DIR:
|
|
case OP_FS_DIR_NEXT: case OP_FS_FILELEN:
|
|
case OP_FS_GETATTR: case OP_FS_SETATTR:
|
|
case OP_CREATE_FORM: case OP_SET_EVENT: case OP_REMOVE_CTRL:
|
|
case OP_END: case OP_HALT:
|
|
return 0;
|
|
|
|
case OP_LOAD_ARRAY: case OP_STORE_ARRAY:
|
|
case OP_PRINT_SPC: case OP_FILE_OPEN:
|
|
case OP_CALL_METHOD: case OP_SHOW_FORM:
|
|
case OP_LBOUND: case OP_UBOUND:
|
|
case OP_COMPARE_MODE:
|
|
return 1;
|
|
|
|
case OP_PUSH_INT16: case OP_PUSH_STR:
|
|
case OP_LOAD_LOCAL: case OP_STORE_LOCAL:
|
|
case OP_LOAD_GLOBAL: case OP_STORE_GLOBAL:
|
|
case OP_LOAD_FIELD: case OP_STORE_FIELD:
|
|
case OP_PUSH_LOCAL_ADDR: case OP_PUSH_GLOBAL_ADDR:
|
|
case OP_JMP: case OP_JMP_TRUE: case OP_JMP_FALSE:
|
|
case OP_CTRL_REF:
|
|
case OP_LOAD_FORM_VAR: case OP_STORE_FORM_VAR:
|
|
case OP_PUSH_FORM_ADDR:
|
|
case OP_DIM_ARRAY: case OP_REDIM:
|
|
case OP_ON_ERROR:
|
|
case OP_STR_FIXLEN:
|
|
case OP_LINE:
|
|
return 2;
|
|
|
|
case OP_STORE_ARRAY_FIELD:
|
|
case OP_FOR_INIT:
|
|
return 3;
|
|
|
|
case OP_PUSH_INT32: case OP_PUSH_FLT32:
|
|
case OP_CALL:
|
|
return 4;
|
|
|
|
case OP_FOR_NEXT:
|
|
return 5;
|
|
|
|
case OP_CALL_EXTERN:
|
|
return 6;
|
|
|
|
case OP_PUSH_FLT64:
|
|
return 8;
|
|
|
|
default:
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// Little-endian helpers (bytecode is always LE regardless of host)
|
|
// ============================================================
|
|
|
|
static int16_t readI16LE(const uint8_t *p) {
|
|
return (int16_t)((uint16_t)p[0] | ((uint16_t)p[1] << 8));
|
|
}
|
|
|
|
|
|
static uint16_t readU16LE(const uint8_t *p) {
|
|
return (uint16_t)p[0] | ((uint16_t)p[1] << 8);
|
|
}
|
|
|
|
|
|
static int32_t readI32LE(const uint8_t *p) {
|
|
return (int32_t)((uint32_t)p[0] |
|
|
((uint32_t)p[1] << 8) |
|
|
((uint32_t)p[2] << 16) |
|
|
((uint32_t)p[3] << 24));
|
|
}
|
|
|
|
|
|
static void writeI16LE(uint8_t *p, int16_t v) {
|
|
uint16_t u = (uint16_t)v;
|
|
p[0] = (uint8_t)(u & 0xFF);
|
|
p[1] = (uint8_t)((u >> 8) & 0xFF);
|
|
}
|
|
|
|
|
|
static void writeU16LE(uint8_t *p, uint16_t v) {
|
|
p[0] = (uint8_t)(v & 0xFF);
|
|
p[1] = (uint8_t)((v >> 8) & 0xFF);
|
|
}
|
|
|
|
|
|
static void writeI32LE(uint8_t *p, int32_t v) {
|
|
uint32_t u = (uint32_t)v;
|
|
p[0] = (uint8_t)(u & 0xFF);
|
|
p[1] = (uint8_t)((u >> 8) & 0xFF);
|
|
p[2] = (uint8_t)((u >> 16) & 0xFF);
|
|
p[3] = (uint8_t)((u >> 24) & 0xFF);
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// Walk bytecode, build remap
|
|
// ============================================================
|
|
//
|
|
// remap[oldPos] = newPos for every byte position in [0, oldCodeLen].
|
|
// For OP_LINE bytes (removed): remap points at where the NEXT instruction
|
|
// starts in the new code.
|
|
// Final entry remap[oldCodeLen] = newCodeLen.
|
|
//
|
|
// Returns malloc'd array of size (oldCodeLen + 1), or NULL on failure.
|
|
|
|
static int32_t *buildRemap(const uint8_t *code, int32_t codeLen, int32_t *outNewLen) {
|
|
int32_t *remap = (int32_t *)malloc((codeLen + 1) * sizeof(int32_t));
|
|
|
|
if (!remap) {
|
|
return NULL;
|
|
}
|
|
|
|
int32_t oldPc = 0;
|
|
int32_t newPc = 0;
|
|
|
|
while (oldPc < codeLen) {
|
|
uint8_t op = code[oldPc];
|
|
int32_t operand = opOperandSize(op);
|
|
|
|
if (operand < 0) {
|
|
free(remap);
|
|
return NULL;
|
|
}
|
|
|
|
int32_t instSize = 1 + operand;
|
|
|
|
if (oldPc + instSize > codeLen) {
|
|
free(remap);
|
|
return NULL;
|
|
}
|
|
|
|
if (op == OP_LINE) {
|
|
// These bytes are removed; they map to where the next instruction starts.
|
|
for (int32_t i = 0; i < instSize; i++) {
|
|
remap[oldPc + i] = newPc;
|
|
}
|
|
} else {
|
|
for (int32_t i = 0; i < instSize; i++) {
|
|
remap[oldPc + i] = newPc + i;
|
|
}
|
|
|
|
newPc += instSize;
|
|
}
|
|
|
|
oldPc += instSize;
|
|
}
|
|
|
|
if (oldPc != codeLen) {
|
|
free(remap);
|
|
return NULL;
|
|
}
|
|
|
|
remap[codeLen] = newPc;
|
|
*outNewLen = newPc;
|
|
return remap;
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// GOSUB pattern detection
|
|
// ============================================================
|
|
//
|
|
// GOSUB emits:
|
|
// oldPc: OP_PUSH_INT32 (1 byte)
|
|
// oldPc+1: int32 value V (4 bytes)
|
|
// oldPc+5: OP_JMP (1 byte)
|
|
// oldPc+6: int16 offset (2 bytes)
|
|
// oldPc+8: <next instruction>
|
|
// The invariant is V == oldPc + 8 (the pushed return address).
|
|
//
|
|
// Returns true if the given position is the start of such a pattern.
|
|
|
|
static bool isGosubPush(const uint8_t *code, int32_t codeLen, int32_t pos) {
|
|
if (pos + 8 > codeLen) {
|
|
return false;
|
|
}
|
|
|
|
if (code[pos] != OP_PUSH_INT32) {
|
|
return false;
|
|
}
|
|
|
|
if (code[pos + 5] != OP_JMP) {
|
|
return false;
|
|
}
|
|
|
|
int32_t value = readI32LE(code + pos + 1);
|
|
return value == pos + 8;
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// Apply remap to a single instruction's operand
|
|
// ============================================================
|
|
//
|
|
// Returns true on success, false if an offset overflows int16 or a
|
|
// target doesn't land on a valid instruction in the old code.
|
|
|
|
static bool remapAbsU16(uint8_t *newCode, int32_t newOpPos, int32_t operandOffset, uint16_t oldAddr, const int32_t *remap, int32_t newCodeLen) {
|
|
int32_t newAddr = remap[oldAddr];
|
|
|
|
if (newAddr < 0 || newAddr > newCodeLen) {
|
|
return false;
|
|
}
|
|
|
|
if (newAddr > 0xFFFF) {
|
|
return false;
|
|
}
|
|
|
|
writeU16LE(newCode + newOpPos + operandOffset, (uint16_t)newAddr);
|
|
return true;
|
|
}
|
|
|
|
|
|
// Rewrite a relative int16 offset.
|
|
// oldOpPos, oldPcAfter: position of opcode and PC after reading offset
|
|
// newOpPos, newPcAfter: same in the new code
|
|
// operandOffset: byte offset from opcode to the int16 operand
|
|
// oldOffset: the offset as stored in the old code
|
|
//
|
|
// Handles ON_ERROR's special case (offset == 0 means "disable").
|
|
// For ON_ERROR the caller passes allowZero=true; the zero is preserved as-is.
|
|
|
|
static bool remapRelI16(uint8_t *newCode, int32_t newOpPos, int32_t operandOffset, int16_t oldOffset, int32_t oldPcAfter, int32_t newPcAfter, const int32_t *remap, int32_t codeLen, int32_t newCodeLen, bool allowZero) {
|
|
if (allowZero && oldOffset == 0) {
|
|
writeI16LE(newCode + newOpPos + operandOffset, 0);
|
|
return true;
|
|
}
|
|
|
|
int32_t oldTarget = oldPcAfter + oldOffset;
|
|
|
|
if (oldTarget < 0 || oldTarget > codeLen) {
|
|
return false;
|
|
}
|
|
|
|
int32_t newTarget = remap[oldTarget];
|
|
|
|
if (newTarget < 0 || newTarget > newCodeLen) {
|
|
return false;
|
|
}
|
|
|
|
int32_t newOffset = newTarget - newPcAfter;
|
|
|
|
if (newOffset < -32768 || newOffset > 32767) {
|
|
return false;
|
|
}
|
|
|
|
writeI16LE(newCode + newOpPos + operandOffset, (int16_t)newOffset);
|
|
return true;
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// basCompactBytecode
|
|
// ============================================================
|
|
|
|
int32_t basCompactBytecode(BasModuleT *mod) {
|
|
if (!mod || !mod->code || mod->codeLen <= 0) {
|
|
return 0;
|
|
}
|
|
|
|
const uint8_t *oldCode = mod->code;
|
|
int32_t oldCodeLen = mod->codeLen;
|
|
|
|
// Count OP_LINE occurrences. If none, nothing to do.
|
|
int32_t lineCount = 0;
|
|
{
|
|
int32_t pc = 0;
|
|
|
|
while (pc < oldCodeLen) {
|
|
uint8_t op = oldCode[pc];
|
|
int32_t operand = opOperandSize(op);
|
|
|
|
if (operand < 0 || pc + 1 + operand > oldCodeLen) {
|
|
return 0; // unknown opcode -- skip compaction
|
|
}
|
|
|
|
if (op == OP_LINE) {
|
|
lineCount++;
|
|
}
|
|
|
|
pc += 1 + operand;
|
|
}
|
|
|
|
if (pc != oldCodeLen) {
|
|
return 0;
|
|
}
|
|
|
|
if (lineCount == 0) {
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
int32_t newCodeLen = 0;
|
|
int32_t *remap = buildRemap(oldCode, oldCodeLen, &newCodeLen);
|
|
|
|
if (!remap) {
|
|
return 0;
|
|
}
|
|
|
|
uint8_t *newCode = (uint8_t *)malloc(newCodeLen > 0 ? newCodeLen : 1);
|
|
|
|
if (!newCode) {
|
|
free(remap);
|
|
return 0;
|
|
}
|
|
|
|
// Copy bytes (skipping OP_LINE) and rewrite address operands.
|
|
bool ok = true;
|
|
int32_t oldPc = 0;
|
|
|
|
while (oldPc < oldCodeLen && ok) {
|
|
uint8_t op = oldCode[oldPc];
|
|
int32_t operand = opOperandSize(op);
|
|
int32_t instSize = 1 + operand;
|
|
|
|
if (op == OP_LINE) {
|
|
oldPc += instSize;
|
|
continue;
|
|
}
|
|
|
|
int32_t newPc = remap[oldPc];
|
|
|
|
// Copy the instruction verbatim first; we'll overwrite operands that
|
|
// need remapping below.
|
|
memcpy(newCode + newPc, oldCode + oldPc, instSize);
|
|
|
|
switch (op) {
|
|
case OP_CALL: {
|
|
uint16_t oldAddr = readU16LE(oldCode + oldPc + 1);
|
|
|
|
if (!remapAbsU16(newCode, newPc, 1, oldAddr, remap, newCodeLen)) {
|
|
ok = false;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case OP_JMP:
|
|
case OP_JMP_TRUE:
|
|
case OP_JMP_FALSE: {
|
|
int16_t oldOff = readI16LE(oldCode + oldPc + 1);
|
|
|
|
if (!remapRelI16(newCode, newPc, 1, oldOff,
|
|
oldPc + 3, newPc + 3,
|
|
remap, oldCodeLen, newCodeLen, false)) {
|
|
ok = false;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case OP_FOR_NEXT: {
|
|
int16_t oldOff = readI16LE(oldCode + oldPc + 4);
|
|
|
|
if (!remapRelI16(newCode, newPc, 4, oldOff,
|
|
oldPc + 6, newPc + 6,
|
|
remap, oldCodeLen, newCodeLen, false)) {
|
|
ok = false;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case OP_ON_ERROR: {
|
|
int16_t oldOff = readI16LE(oldCode + oldPc + 1);
|
|
|
|
if (!remapRelI16(newCode, newPc, 1, oldOff,
|
|
oldPc + 3, newPc + 3,
|
|
remap, oldCodeLen, newCodeLen, true)) {
|
|
ok = false;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
case OP_PUSH_INT32: {
|
|
// Detect GOSUB return-address push and remap the absolute address.
|
|
if (isGosubPush(oldCode, oldCodeLen, oldPc)) {
|
|
int32_t oldAddr = readI32LE(oldCode + oldPc + 1);
|
|
|
|
if (oldAddr < 0 || oldAddr > oldCodeLen) {
|
|
ok = false;
|
|
break;
|
|
}
|
|
|
|
int32_t newAddr = remap[oldAddr];
|
|
|
|
if (newAddr < 0 || newAddr > newCodeLen) {
|
|
ok = false;
|
|
break;
|
|
}
|
|
|
|
writeI32LE(newCode + newPc + 1, newAddr);
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
oldPc += instSize;
|
|
}
|
|
|
|
if (!ok) {
|
|
free(newCode);
|
|
free(remap);
|
|
return 0;
|
|
}
|
|
|
|
// Rewrite proc entry points
|
|
for (int32_t i = 0; i < mod->procCount; i++) {
|
|
int32_t oldAddr = mod->procs[i].codeAddr;
|
|
|
|
if (oldAddr < 0 || oldAddr > oldCodeLen) {
|
|
free(newCode);
|
|
free(remap);
|
|
return 0;
|
|
}
|
|
|
|
mod->procs[i].codeAddr = remap[oldAddr];
|
|
}
|
|
|
|
// Rewrite form-var init code addresses. Negative means "no init code".
|
|
for (int32_t i = 0; i < mod->formVarInfoCount; i++) {
|
|
int32_t oldAddr = mod->formVarInfo[i].initCodeAddr;
|
|
|
|
if (oldAddr < 0) {
|
|
continue;
|
|
}
|
|
|
|
if (oldAddr > oldCodeLen) {
|
|
free(newCode);
|
|
free(remap);
|
|
return 0;
|
|
}
|
|
|
|
int32_t oldLen = mod->formVarInfo[i].initCodeLen;
|
|
int32_t oldEnd = oldAddr + oldLen;
|
|
|
|
if (oldEnd > oldCodeLen) {
|
|
oldEnd = oldCodeLen;
|
|
}
|
|
|
|
int32_t newAddr = remap[oldAddr];
|
|
int32_t newEnd = remap[oldEnd];
|
|
|
|
mod->formVarInfo[i].initCodeAddr = newAddr;
|
|
mod->formVarInfo[i].initCodeLen = newEnd - newAddr;
|
|
}
|
|
|
|
// Rewrite entry point
|
|
if (mod->entryPoint >= 0 && mod->entryPoint <= oldCodeLen) {
|
|
mod->entryPoint = remap[mod->entryPoint];
|
|
}
|
|
|
|
// Swap in the new code
|
|
free(mod->code);
|
|
mod->code = newCode;
|
|
mod->codeLen = newCodeLen;
|
|
|
|
int32_t removed = oldCodeLen - newCodeLen;
|
|
|
|
free(remap);
|
|
return removed;
|
|
}
|