// compact.c -- Release build bytecode compaction // // Walks the module's bytecode, removes OP_LINE instructions (3 bytes // each), and rewrites all code-address references so control flow // still lands on the correct instructions. // // Address references: // - BasProcEntryT::codeAddr (absolute) // - BasFormVarInfoT::initCodeAddr (absolute, 0 = no init) // - OP_CALL operand (absolute uint16) // - OP_JMP / OP_JMP_TRUE / OP_JMP_FALSE operand (relative int16) // - OP_FOR_NEXT loopTop operand (relative int16) // - OP_ON_ERROR handler operand (relative int16; 0 = disable, not remapped) // - GOSUB return address (emitted as OP_PUSH_INT32 followed by OP_JMP, // where the pushed value equals the PC immediately after the JMP) // // Safety: if any opcode cannot be sized, any jump overflows int16, or // the walk doesn't reach codeLen exactly, the module is left untouched. #include "compact.h" #include "opcodes.h" #include #include #include // ============================================================ // Opcode operand size table // ============================================================ // Returns operand byte count (excluding the 1-byte opcode), or -1 if unknown. static int32_t opOperandSize(uint8_t op) { switch (op) { // No operand bytes case OP_NOP: case OP_PUSH_TRUE: case OP_PUSH_FALSE: case OP_POP: case OP_DUP: case OP_LOAD_REF: case OP_STORE_REF: case OP_ADD_INT: case OP_SUB_INT: case OP_MUL_INT: case OP_IDIV_INT: case OP_MOD_INT: case OP_NEG_INT: case OP_ADD_FLT: case OP_SUB_FLT: case OP_MUL_FLT: case OP_DIV_FLT: case OP_NEG_FLT: case OP_POW: case OP_STR_CONCAT: case OP_STR_LEFT: case OP_STR_RIGHT: case OP_STR_MID: case OP_STR_MID2: case OP_STR_LEN: case OP_STR_INSTR: case OP_STR_INSTR3: case OP_STR_UCASE: case OP_STR_LCASE: case OP_STR_TRIM: case OP_STR_LTRIM: case OP_STR_RTRIM: case OP_STR_CHR: case OP_STR_ASC: case OP_STR_SPACE: case OP_CMP_EQ: case OP_CMP_NE: case OP_CMP_LT: case OP_CMP_GT: case OP_CMP_LE: case OP_CMP_GE: case OP_AND: case OP_OR: case OP_NOT: case OP_XOR: case OP_EQV: case OP_IMP: case OP_GOSUB_RET: case OP_RET: case OP_RET_VAL: case OP_FOR_POP: case OP_CONV_INT_FLT: case OP_CONV_FLT_INT: case OP_CONV_INT_STR: case OP_CONV_STR_INT: case OP_CONV_FLT_STR: case OP_CONV_STR_FLT: case OP_CONV_INT_LONG: case OP_CONV_LONG_INT: case OP_PRINT: case OP_PRINT_NL: case OP_PRINT_TAB: case OP_INPUT: case OP_FILE_CLOSE: case OP_FILE_PRINT: case OP_FILE_INPUT: case OP_FILE_EOF: case OP_FILE_LINE_INPUT: case OP_LOAD_PROP: case OP_STORE_PROP: case OP_LOAD_FORM: case OP_UNLOAD_FORM: case OP_HIDE_FORM: case OP_DO_EVENTS: case OP_MSGBOX: case OP_INPUTBOX: case OP_ME_REF: case OP_CREATE_CTRL: case OP_FIND_CTRL: case OP_FIND_CTRL_IDX: case OP_CREATE_CTRL_EX: case OP_ERASE: case OP_RESUME: case OP_RESUME_NEXT: case OP_RAISE_ERR: case OP_ERR_NUM: case OP_ERR_CLEAR: case OP_MATH_ABS: case OP_MATH_INT: case OP_MATH_FIX: case OP_MATH_SGN: case OP_MATH_SQR: case OP_MATH_SIN: case OP_MATH_COS: case OP_MATH_TAN: case OP_MATH_ATN: case OP_MATH_LOG: case OP_MATH_EXP: case OP_MATH_RND: case OP_MATH_RANDOMIZE: case OP_RGB: case OP_GET_RED: case OP_GET_GREEN: case OP_GET_BLUE: case OP_STR_VAL: case OP_STR_STRF: case OP_STR_HEX: case OP_STR_STRING: case OP_MATH_TIMER: case OP_DATE_STR: case OP_TIME_STR: case OP_SLEEP: case OP_ENVIRON: case OP_READ_DATA: case OP_RESTORE: case OP_FILE_WRITE: case OP_FILE_WRITE_SEP: case OP_FILE_WRITE_NL: case OP_FILE_GET: case OP_FILE_PUT: case OP_FILE_SEEK: case OP_FILE_LOF: case OP_FILE_LOC: case OP_FILE_FREEFILE: case OP_FILE_INPUT_N: case OP_STR_MID_ASGN: case OP_PRINT_USING: case OP_PRINT_TAB_N: case OP_PRINT_SPC_N: case OP_FORMAT: case OP_SHELL: case OP_APP_PATH: case OP_APP_CONFIG: case OP_APP_DATA: case OP_INI_READ: case OP_INI_WRITE: case OP_FS_KILL: case OP_FS_NAME: case OP_FS_FILECOPY: case OP_FS_MKDIR: case OP_FS_RMDIR: case OP_FS_CHDIR: case OP_FS_CHDRIVE: case OP_FS_CURDIR: case OP_FS_DIR: case OP_FS_DIR_NEXT: case OP_FS_FILELEN: case OP_FS_GETATTR: case OP_FS_SETATTR: case OP_CREATE_FORM: case OP_SET_EVENT: case OP_REMOVE_CTRL: case OP_END: case OP_HALT: return 0; case OP_LOAD_ARRAY: case OP_STORE_ARRAY: case OP_PRINT_SPC: case OP_FILE_OPEN: case OP_CALL_METHOD: case OP_SHOW_FORM: case OP_LBOUND: case OP_UBOUND: case OP_COMPARE_MODE: return 1; case OP_PUSH_INT16: case OP_PUSH_STR: case OP_LOAD_LOCAL: case OP_STORE_LOCAL: case OP_LOAD_GLOBAL: case OP_STORE_GLOBAL: case OP_LOAD_FIELD: case OP_STORE_FIELD: case OP_PUSH_LOCAL_ADDR: case OP_PUSH_GLOBAL_ADDR: case OP_JMP: case OP_JMP_TRUE: case OP_JMP_FALSE: case OP_CTRL_REF: case OP_LOAD_FORM_VAR: case OP_STORE_FORM_VAR: case OP_PUSH_FORM_ADDR: case OP_DIM_ARRAY: case OP_REDIM: case OP_ON_ERROR: case OP_STR_FIXLEN: case OP_LINE: return 2; case OP_STORE_ARRAY_FIELD: case OP_FOR_INIT: return 3; case OP_PUSH_INT32: case OP_PUSH_FLT32: case OP_CALL: return 4; case OP_FOR_NEXT: return 5; case OP_CALL_EXTERN: return 6; case OP_PUSH_FLT64: return 8; default: return -1; } } // ============================================================ // Little-endian helpers (bytecode is always LE regardless of host) // ============================================================ static int16_t readI16LE(const uint8_t *p) { return (int16_t)((uint16_t)p[0] | ((uint16_t)p[1] << 8)); } static uint16_t readU16LE(const uint8_t *p) { return (uint16_t)p[0] | ((uint16_t)p[1] << 8); } static int32_t readI32LE(const uint8_t *p) { return (int32_t)((uint32_t)p[0] | ((uint32_t)p[1] << 8) | ((uint32_t)p[2] << 16) | ((uint32_t)p[3] << 24)); } static void writeI16LE(uint8_t *p, int16_t v) { uint16_t u = (uint16_t)v; p[0] = (uint8_t)(u & 0xFF); p[1] = (uint8_t)((u >> 8) & 0xFF); } static void writeU16LE(uint8_t *p, uint16_t v) { p[0] = (uint8_t)(v & 0xFF); p[1] = (uint8_t)((v >> 8) & 0xFF); } static void writeI32LE(uint8_t *p, int32_t v) { uint32_t u = (uint32_t)v; p[0] = (uint8_t)(u & 0xFF); p[1] = (uint8_t)((u >> 8) & 0xFF); p[2] = (uint8_t)((u >> 16) & 0xFF); p[3] = (uint8_t)((u >> 24) & 0xFF); } // ============================================================ // Walk bytecode, build remap // ============================================================ // // remap[oldPos] = newPos for every byte position in [0, oldCodeLen]. // For OP_LINE bytes (removed): remap points at where the NEXT instruction // starts in the new code. // Final entry remap[oldCodeLen] = newCodeLen. // // Returns malloc'd array of size (oldCodeLen + 1), or NULL on failure. static int32_t *buildRemap(const uint8_t *code, int32_t codeLen, int32_t *outNewLen) { int32_t *remap = (int32_t *)malloc((codeLen + 1) * sizeof(int32_t)); if (!remap) { return NULL; } int32_t oldPc = 0; int32_t newPc = 0; while (oldPc < codeLen) { uint8_t op = code[oldPc]; int32_t operand = opOperandSize(op); if (operand < 0) { free(remap); return NULL; } int32_t instSize = 1 + operand; if (oldPc + instSize > codeLen) { free(remap); return NULL; } if (op == OP_LINE) { // These bytes are removed; they map to where the next instruction starts. for (int32_t i = 0; i < instSize; i++) { remap[oldPc + i] = newPc; } } else { for (int32_t i = 0; i < instSize; i++) { remap[oldPc + i] = newPc + i; } newPc += instSize; } oldPc += instSize; } if (oldPc != codeLen) { free(remap); return NULL; } remap[codeLen] = newPc; *outNewLen = newPc; return remap; } // ============================================================ // GOSUB pattern detection // ============================================================ // // GOSUB emits: // oldPc: OP_PUSH_INT32 (1 byte) // oldPc+1: int32 value V (4 bytes) // oldPc+5: OP_JMP (1 byte) // oldPc+6: int16 offset (2 bytes) // oldPc+8: // The invariant is V == oldPc + 8 (the pushed return address). // // Returns true if the given position is the start of such a pattern. static bool isGosubPush(const uint8_t *code, int32_t codeLen, int32_t pos) { if (pos + 8 > codeLen) { return false; } if (code[pos] != OP_PUSH_INT32) { return false; } if (code[pos + 5] != OP_JMP) { return false; } int32_t value = readI32LE(code + pos + 1); return value == pos + 8; } // ============================================================ // Apply remap to a single instruction's operand // ============================================================ // // Returns true on success, false if an offset overflows int16 or a // target doesn't land on a valid instruction in the old code. static bool remapAbsU16(uint8_t *newCode, int32_t newOpPos, int32_t operandOffset, uint16_t oldAddr, const int32_t *remap, int32_t newCodeLen) { int32_t newAddr = remap[oldAddr]; if (newAddr < 0 || newAddr > newCodeLen) { return false; } if (newAddr > 0xFFFF) { return false; } writeU16LE(newCode + newOpPos + operandOffset, (uint16_t)newAddr); return true; } // Rewrite a relative int16 offset. // oldOpPos, oldPcAfter: position of opcode and PC after reading offset // newOpPos, newPcAfter: same in the new code // operandOffset: byte offset from opcode to the int16 operand // oldOffset: the offset as stored in the old code // // Handles ON_ERROR's special case (offset == 0 means "disable"). // For ON_ERROR the caller passes allowZero=true; the zero is preserved as-is. static bool remapRelI16(uint8_t *newCode, int32_t newOpPos, int32_t operandOffset, int16_t oldOffset, int32_t oldPcAfter, int32_t newPcAfter, const int32_t *remap, int32_t codeLen, int32_t newCodeLen, bool allowZero) { if (allowZero && oldOffset == 0) { writeI16LE(newCode + newOpPos + operandOffset, 0); return true; } int32_t oldTarget = oldPcAfter + oldOffset; if (oldTarget < 0 || oldTarget > codeLen) { return false; } int32_t newTarget = remap[oldTarget]; if (newTarget < 0 || newTarget > newCodeLen) { return false; } int32_t newOffset = newTarget - newPcAfter; if (newOffset < -32768 || newOffset > 32767) { return false; } writeI16LE(newCode + newOpPos + operandOffset, (int16_t)newOffset); return true; } // ============================================================ // basCompactBytecode // ============================================================ int32_t basCompactBytecode(BasModuleT *mod) { if (!mod || !mod->code || mod->codeLen <= 0) { return 0; } const uint8_t *oldCode = mod->code; int32_t oldCodeLen = mod->codeLen; // Count OP_LINE occurrences. If none, nothing to do. int32_t lineCount = 0; { int32_t pc = 0; while (pc < oldCodeLen) { uint8_t op = oldCode[pc]; int32_t operand = opOperandSize(op); if (operand < 0 || pc + 1 + operand > oldCodeLen) { return 0; // unknown opcode -- skip compaction } if (op == OP_LINE) { lineCount++; } pc += 1 + operand; } if (pc != oldCodeLen) { return 0; } if (lineCount == 0) { return 0; } } int32_t newCodeLen = 0; int32_t *remap = buildRemap(oldCode, oldCodeLen, &newCodeLen); if (!remap) { return 0; } uint8_t *newCode = (uint8_t *)malloc(newCodeLen > 0 ? newCodeLen : 1); if (!newCode) { free(remap); return 0; } // Copy bytes (skipping OP_LINE) and rewrite address operands. bool ok = true; int32_t oldPc = 0; while (oldPc < oldCodeLen && ok) { uint8_t op = oldCode[oldPc]; int32_t operand = opOperandSize(op); int32_t instSize = 1 + operand; if (op == OP_LINE) { oldPc += instSize; continue; } int32_t newPc = remap[oldPc]; // Copy the instruction verbatim first; we'll overwrite operands that // need remapping below. memcpy(newCode + newPc, oldCode + oldPc, instSize); switch (op) { case OP_CALL: { uint16_t oldAddr = readU16LE(oldCode + oldPc + 1); if (!remapAbsU16(newCode, newPc, 1, oldAddr, remap, newCodeLen)) { ok = false; } break; } case OP_JMP: case OP_JMP_TRUE: case OP_JMP_FALSE: { int16_t oldOff = readI16LE(oldCode + oldPc + 1); if (!remapRelI16(newCode, newPc, 1, oldOff, oldPc + 3, newPc + 3, remap, oldCodeLen, newCodeLen, false)) { ok = false; } break; } case OP_FOR_NEXT: { int16_t oldOff = readI16LE(oldCode + oldPc + 4); if (!remapRelI16(newCode, newPc, 4, oldOff, oldPc + 6, newPc + 6, remap, oldCodeLen, newCodeLen, false)) { ok = false; } break; } case OP_ON_ERROR: { int16_t oldOff = readI16LE(oldCode + oldPc + 1); if (!remapRelI16(newCode, newPc, 1, oldOff, oldPc + 3, newPc + 3, remap, oldCodeLen, newCodeLen, true)) { ok = false; } break; } case OP_PUSH_INT32: { // Detect GOSUB return-address push and remap the absolute address. if (isGosubPush(oldCode, oldCodeLen, oldPc)) { int32_t oldAddr = readI32LE(oldCode + oldPc + 1); if (oldAddr < 0 || oldAddr > oldCodeLen) { ok = false; break; } int32_t newAddr = remap[oldAddr]; if (newAddr < 0 || newAddr > newCodeLen) { ok = false; break; } writeI32LE(newCode + newPc + 1, newAddr); } break; } default: break; } oldPc += instSize; } if (!ok) { free(newCode); free(remap); return 0; } // Rewrite proc entry points for (int32_t i = 0; i < mod->procCount; i++) { int32_t oldAddr = mod->procs[i].codeAddr; if (oldAddr < 0 || oldAddr > oldCodeLen) { free(newCode); free(remap); return 0; } mod->procs[i].codeAddr = remap[oldAddr]; } // Rewrite form-var init code addresses. Negative means "no init code". for (int32_t i = 0; i < mod->formVarInfoCount; i++) { int32_t oldAddr = mod->formVarInfo[i].initCodeAddr; if (oldAddr < 0) { continue; } if (oldAddr > oldCodeLen) { free(newCode); free(remap); return 0; } int32_t oldLen = mod->formVarInfo[i].initCodeLen; int32_t oldEnd = oldAddr + oldLen; if (oldEnd > oldCodeLen) { oldEnd = oldCodeLen; } int32_t newAddr = remap[oldAddr]; int32_t newEnd = remap[oldEnd]; mod->formVarInfo[i].initCodeAddr = newAddr; mod->formVarInfo[i].initCodeLen = newEnd - newAddr; } // Rewrite entry point if (mod->entryPoint >= 0 && mod->entryPoint <= oldCodeLen) { mod->entryPoint = remap[mod->entryPoint]; } // Swap in the new code free(mod->code); mod->code = newCode; mod->codeLen = newCodeLen; int32_t removed = oldCodeLen - newCodeLen; free(remap); return removed; }