// FS2 sector-read tracer. // // Boots a synthetic Apple //e environment with the FS2 chunks loaded // into RAM, hooks the SmartPort entry point at $C70D, then trampolines // into the LoadSceneryFile* entry points and watches every block read. // // This is NOT a full Apple //e emulator. It only implements enough of // the 6502 instruction set + zero-page / RAM model to run the FS2 // loader code path. The Disk II / SmartPort layer is replaced with a // single hook that: // - logs the block number (read from RBBlockNumber at chunk4) // - copies 512 bytes from the .dsk image at file_offset = block * 512 // - returns "success" via CLC/RTS // // Usage: // fs2trace [entry] // entry is one of File0..File4 (default File0). Output is one // "BLOCK $XXXX" line per read. #include #include #include #include #include #define MEM_SIZE 65536 #define BLOCK_SIZE 512 #define SECTOR_SIZE 256 #define MAX_TRACE_BLKS 4096 static uint8_t mem[MEM_SIZE]; static uint8_t pc_low; static uint16_t pc; static uint8_t reg_a, reg_x, reg_y, reg_s; static uint8_t flag_n, flag_v, flag_d, flag_i, flag_z, flag_c; static int cycles; static int stop; // Set by FS2TRACE_DUMP_AT_BLOCK once the requested block count is // reached. The main step loop checks this and exits cleanly without // the noisy "last 16 PCs" trail that `stop` triggers. static int dumpRequested; // Disk image: scenery disk for the trace. static uint8_t *diskImage; static size_t diskSize; // Port of chunk3 `SceneryNibbleDecode` ($D52D). Despite the name in // the disassembly, this is really an ENCODER -- it takes raw input // bytes and produces an Apple-disk-style nibble format where every // output byte has bits 7,5,3,1 forced to 1 (the `$AA` pattern). Each // input byte expands to two output bytes: // // out[0] = (in >> 1) | $AA // out[1] = in | $AA // // A running EOR checksum is fed by the input stream, then emitted as // two trailing bytes (`(cksum>>1)|$AA` then `cksum|$AA`). The original // fills RAM from $3B60 forward and stops once the destination high // byte hits $3E -- so the loop emits exactly `(0x3E00 - 0x3B60) / 2` // = 720 input bytes in. // // Returns the number of input bytes consumed (always 720). static int nibbleDecode(const uint8_t *src, uint8_t *dst, uint8_t *checksumOut) { uint8_t cksum = 0; int written = 0; int target = 0x3E00 - 0x3B60; // bytes int srcPos = 0; while (written + 2 <= target) { uint8_t b = src[srcPos]; cksum ^= b; dst[written++] = (uint8_t)((b >> 1) | 0xAA); dst[written++] = (uint8_t)( b | 0xAA); srcPos++; } if (checksumOut != NULL) { checksumOut[0] = (uint8_t)((cksum >> 1) | 0xAA); checksumOut[1] = (uint8_t)( cksum | 0xAA); } return srcPos; } // Trace buffer. static int tracedBlocks[MAX_TRACE_BLKS]; static int traceCount; // Forward declarations. static uint8_t rd(uint16_t addr); static void wr(uint16_t addr, uint8_t v); static void push(uint8_t v); static uint8_t pop(void); static void push16(uint16_t v); static uint16_t pop16(void); static uint8_t fetch(void); static uint16_t fetch16(void); static void setNZ(uint8_t v); static uint8_t getP(void); static void setP(uint8_t v); static void step(void); static void hookSmartPort(void); static int loadChunk(const char *path, uint16_t addr, size_t maxLen); static uint8_t rd(uint16_t addr) { return mem[addr]; } // Apple II LCBANK split: $D000-$FFFF can be either ROM, LCBANK1, or // LCBANK2 depending on softswitches at $C080-$C08F. fs2trace treats // the entire $0000-$FFFF as plain RAM, but FS2 boot relies on hires // page writes NOT corrupting the chunk binaries that live at // $D300-$F3FF (chunk3) and $F600-$FBFF (chunk2). Ignore writes to // $D000-$FFFF so chunk binaries stay intact. This breaks Apply64K- // PatchTable if it ever targets a high address (it doesn't -- all // patch table addresses are in $6000-$AE00, chunk5 main). // // Without this guard the hires drawing loop at $1C07 (sta // (hires_ptr1),y) corrupts $FA67 and $FA71-3 in chunk2 (the wind // code), causing fs2trace to halt at $FA73 on undocumented opcode // $1B before more loader iterations can run. static void wr(uint16_t addr, uint8_t v) { const char *watchEnv = getenv("FS2TRACE_WATCH"); if (watchEnv != NULL && mem[addr] != v) { static int watchHits = 0; static uint16_t watchLo = 0, watchHi = 0; static int watchSetUp = 0; if (!watchSetUp) { unsigned long lo = strtoul(watchEnv, NULL, 0); char *dash = strchr(watchEnv, '-'); unsigned long hi = (dash != NULL) ? strtoul(dash + 1, NULL, 0) : lo; watchLo = (uint16_t)lo; watchHi = (uint16_t)hi; watchSetUp = 1; } int watchCap = 50; const char *capEnv = getenv("FS2TRACE_WATCH_CAP"); if (capEnv != NULL) { watchCap = (int)strtol(capEnv, NULL, 0); } if (addr >= watchLo && addr <= watchHi && watchHits < watchCap) { fprintf(stderr, " watch: $%04X = $%02X (was $%02X) at PC $%04X cycles=%d\n", addr, v, mem[addr], pc, cycles); watchHits++; } } // Protect chunk binaries from stray hires-page writes. If the // emulator ever needs LCBANK semantics for real (e.g. a patch // that targets $D000-$FFFF), this needs to grow into proper // softswitch tracking. if (addr >= 0xD000 && getenv("FS2TRACE_NO_LC_GUARD") == NULL) { return; } mem[addr] = v; } // Zero-page-wrapping 16-bit read. For (zp),Y and (zp,X) addressing // modes, the high byte of the pointer must come from `(zp + 1) & $FF` // -- staying inside zero page even when zp == $FF. Without this wrap // we'd read from $0100 (the stack) which corrupts the return address // in subtle ways during scenery loader runs. static uint16_t rd16zp(uint8_t zp) { uint8_t lo = mem[zp]; uint8_t hi = mem[(uint8_t)(zp + 1)]; return (uint16_t)lo | ((uint16_t)hi << 8); } static void push(uint8_t v) { mem[0x0100 + reg_s] = v; reg_s--; } static uint8_t pop(void) { reg_s++; return mem[0x0100 + reg_s]; } static void push16(uint16_t v) { push((uint8_t)(v >> 8)); push((uint8_t)(v & 0xFF)); } static uint16_t pop16(void) { uint8_t lo = pop(); uint8_t hi = pop(); return (uint16_t)lo | ((uint16_t)hi << 8); } static uint8_t fetch(void) { return mem[pc++]; } static uint16_t fetch16(void) { uint16_t lo = fetch(); uint16_t hi = fetch(); return lo | (hi << 8); } static void setNZ(uint8_t v) { flag_n = (v & 0x80) ? 1 : 0; flag_z = (v == 0) ? 1 : 0; } static uint8_t getP(void) { return (uint8_t)((flag_n << 7) | (flag_v << 6) | 0x20 | (flag_d << 3) | (flag_i << 2) | (flag_z << 1) | flag_c); } static void setP(uint8_t v) { flag_n = (v & 0x80) ? 1 : 0; flag_v = (v & 0x40) ? 1 : 0; flag_d = (v & 0x08) ? 1 : 0; flag_i = (v & 0x04) ? 1 : 0; flag_z = (v & 0x02) ? 1 : 0; flag_c = (v & 0x01) ? 1 : 0; } // Stub for chunk5 `PromptColorOrBW` ($AC3A). The real routine clears // the viewport, draws the intro banner, and waits for the user to // press 'A' (colour) or 'B' (black-and-white). Either choice copies // 22 bytes from ColorModePatch ($AB65) or BWModePatch ($AB7B) into // ColorOrBWModePatch ($0800), then returns. // // fs2trace can't render or read keys, so we simulate "user pressed // A" inline: copy ColorModePatch -> $0800 and RTS to the caller. // This unblocks the boot path so MainGameEntry can run through to // the main loop where the patched scenery slots fire. static void hookPromptColorOrBW(void) { for (int i = 0; i < 22; i++) { mem[0x0800 + i] = mem[0xAB65 + i]; } // Standard RTS: pop return address, add 1, set PC. uint8_t lo = pop(); uint8_t hi = pop(); pc = (uint16_t)(lo | (hi << 8)) + 1; } // SmartPort hook: runs when PC reaches $C70D. Reads RBBlockNumber // (3 bytes at chunk4-defined location, but we'll read it dynamically // from the call params) and copies the requested block from the disk // image into the SmartPort's data buffer. The caller's return is via // the standard SmartPort calling convention: after `jsr $C70D` the // command byte and parameter pointer are inline; we skip past them // before returning. static void hookSmartPort(void) { // Apple SmartPort calling convention: // jsr $C70D ; or whatever entry the firmware uses // .byte command ; here, $01 = ReadBlock // .word param_block_addr // The return address pushed by JSR points to the inline // command byte. We need to read the params, do the read, then // bump the return address past the inline data. uint16_t retLo = pop(); uint16_t retHi = pop(); uint16_t ret = retLo | (retHi << 8); // points one before inline cmd // 6502 jsr pushes (retAddr - 1). ret++; uint8_t command = mem[ret]; uint16_t paramAddr = (uint16_t)(mem[ret + 1] | (mem[ret + 2] << 8)); ret += 3; if (command == 0x01) { // ReadBlock // ParamBlock layout (chunk4 RBParams): // byte 0: parameter count ($03) // byte 1: unit number // bytes 2-3: data buffer addr // bytes 4-6: block number (3 bytes, 24-bit) uint16_t bufAddr = (uint16_t)(mem[paramAddr + 2] | (mem[paramAddr + 3] << 8)); uint32_t blockNum = (uint32_t)mem[paramAddr + 4] | ((uint32_t)mem[paramAddr + 5] << 8) | ((uint32_t)mem[paramAddr + 6] << 16); if (traceCount < MAX_TRACE_BLKS) { tracedBlocks[traceCount++] = (int)blockNum; } // FS2TRACE_DUMP_AT_BLOCK: snapshot RAM right after the // Nth block has been read and copied. The SD3 boot // sequence loads 16 blocks ($0360-$036F) into $2600+ // staging via SCRU0 -> $A7E0+ before the per-frame // PatchSlot_FrameSync starts overwriting the dispatcher // area. Stopping at block 16 captures the freshly-built // dispatcher + per-section geometry at $A800-$AAFF // before frames 1+ blow it away. const char *stopAtEnv = getenv("FS2TRACE_DUMP_AT_BLOCK"); if (stopAtEnv != NULL) { int stopAt = (int)strtol(stopAtEnv, NULL, 0); if (stopAt > 0 && traceCount == stopAt) { dumpRequested = 1; } } size_t off = (size_t)blockNum * BLOCK_SIZE; // Suppress reads that target our pre-loaded // ReadBlockDataBuffer at $D575. FS2's boot reads block // 0 (the .po boot block) into $D575, expecting it to // contain the scenery block list -- but the san-inc // pack .po has a standard ProDOS boot block there // instead. Stomping $D575 with boot-block content // breaks every subsequent block lookup. Our pre-fill // already has the right .blocks data; preserve it. bool skipWrite = (bufAddr >= 0xD575 && bufAddr < 0xD575 + 1024); if (skipWrite) { if (getenv("FS2TRACE_VERBOSE") != NULL) { fprintf(stderr, " read block $%04X -> $%04X (skipped: protect block list)\n", blockNum, bufAddr); } } else if (off + BLOCK_SIZE <= diskSize) { memcpy(&mem[bufAddr], &diskImage[off], BLOCK_SIZE); } else { memset(&mem[bufAddr], 0, BLOCK_SIZE); } if (!skipWrite && getenv("FS2TRACE_VERBOSE") != NULL) { fprintf(stderr, " read block $%04X -> $%04X (first byte: $%02X)\n", blockNum, bufAddr, mem[bufAddr]); } flag_c = 0; // success reg_a = 0; } else { fprintf(stderr, "unsupported SmartPort command $%02X\n", command); flag_c = 1; } // Return past the inline command + param pointer. push((uint8_t)((ret - 1) >> 8)); push((uint8_t)((ret - 1) & 0xFF)); // Standard RTS path: pop and add 1. retLo = pop(); retHi = pop(); pc = (uint16_t)((retHi << 8) | retLo) + 1; } // 6502 instruction step. Implements the documented opcodes used by // FS2's loader path. Anything else trips the unknown-opcode path and // stops the emulator with an error. static void step(void) { if (pc == 0xC70D || pc == 0xC700 || pc == 0xC709) { hookSmartPort(); return; } if (pc == 0xAC3A) { hookPromptColorOrBW(); return; } uint8_t op = fetch(); cycles++; switch (op) { // BRK -- treat as halt with status. Allow caller to // disable the halt via FS2TRACE_NO_BRK_HALT (= treat // BRK as RTS so we can chase past zero-padded chunk4 // areas without aborting the dispatcher). case 0x00: { if (getenv("FS2TRACE_NO_BRK_HALT") != NULL) { uint8_t rl = pop(); uint8_t rh = pop(); pc = (uint16_t)((rh << 8) | rl) + 1; break; } fprintf(stderr, "BRK at $%04X cycles=%d\n", (uint16_t)(pc - 1), cycles); fflush(stderr); stop = 1; return; } // NOP variants. case 0xEA: break; // CLC / SEC / CLD / SED / CLI / SEI / CLV case 0x18: flag_c = 0; break; case 0x38: flag_c = 1; break; case 0xD8: flag_d = 0; break; case 0xF8: flag_d = 1; break; case 0x58: flag_i = 0; break; case 0x78: flag_i = 1; break; case 0xB8: flag_v = 0; break; // Transfers. case 0xAA: reg_x = reg_a; setNZ(reg_x); break; // TAX case 0xA8: reg_y = reg_a; setNZ(reg_y); break; // TAY case 0x8A: reg_a = reg_x; setNZ(reg_a); break; // TXA case 0x98: reg_a = reg_y; setNZ(reg_a); break; // TYA case 0xBA: reg_x = reg_s; setNZ(reg_x); break; // TSX case 0x9A: reg_s = reg_x; break; // TXS // Stack. case 0x48: push(reg_a); break; // PHA case 0x68: reg_a = pop(); setNZ(reg_a); break; // PLA case 0x08: push((uint8_t)(getP() | 0x10)); break; // PHP case 0x28: setP(pop()); break; // PLP // INC/DEC X/Y case 0xE8: reg_x++; setNZ(reg_x); break; // INX case 0xC8: reg_y++; setNZ(reg_y); break; // INY case 0xCA: reg_x--; setNZ(reg_x); break; // DEX case 0x88: reg_y--; setNZ(reg_y); break; // DEY // LDA case 0xA9: reg_a = fetch(); setNZ(reg_a); break; // LDA imm case 0xA5: reg_a = rd(fetch()); setNZ(reg_a); break; // LDA zp case 0xB5: reg_a = rd((uint8_t)(fetch() + reg_x)); setNZ(reg_a); break; // LDA zp,X case 0xAD: reg_a = rd(fetch16()); setNZ(reg_a); break; // LDA abs case 0xBD: { uint16_t a = fetch16(); reg_a = rd((uint16_t)(a + reg_x)); setNZ(reg_a); break; } // LDA abs,X case 0xB9: { uint16_t a = fetch16(); reg_a = rd((uint16_t)(a + reg_y)); setNZ(reg_a); break; } // LDA abs,Y case 0xA1: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); reg_a = rd(a); setNZ(reg_a); break; } // LDA (zp,X) case 0xB1: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); reg_a = rd(a); setNZ(reg_a); break; } // LDA (zp),Y // LDX case 0xA2: reg_x = fetch(); setNZ(reg_x); break; // LDX imm case 0xA6: reg_x = rd(fetch()); setNZ(reg_x); break; // LDX zp case 0xB6: reg_x = rd((uint8_t)(fetch() + reg_y)); setNZ(reg_x); break; // LDX zp,Y case 0xAE: reg_x = rd(fetch16()); setNZ(reg_x); break; // LDX abs case 0xBE: { uint16_t a = fetch16(); reg_x = rd((uint16_t)(a + reg_y)); setNZ(reg_x); break; } // LDX abs,Y // LDY case 0xA0: reg_y = fetch(); setNZ(reg_y); break; // LDY imm case 0xA4: reg_y = rd(fetch()); setNZ(reg_y); break; // LDY zp case 0xB4: reg_y = rd((uint8_t)(fetch() + reg_x)); setNZ(reg_y); break; // LDY zp,X case 0xAC: reg_y = rd(fetch16()); setNZ(reg_y); break; // LDY abs case 0xBC: { uint16_t a = fetch16(); reg_y = rd((uint16_t)(a + reg_x)); setNZ(reg_y); break; } // LDY abs,X // STA case 0x85: wr(fetch(), reg_a); break; // STA zp case 0x95: wr((uint8_t)(fetch() + reg_x), reg_a); break; // STA zp,X case 0x8D: wr(fetch16(), reg_a); break; // STA abs case 0x9D: { uint16_t a = fetch16(); wr((uint16_t)(a + reg_x), reg_a); break; } // STA abs,X case 0x99: { uint16_t a = fetch16(); wr((uint16_t)(a + reg_y), reg_a); break; } // STA abs,Y case 0x81: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); wr(a, reg_a); break; } // STA (zp,X) case 0x91: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); wr(a, reg_a); break; } // STA (zp),Y // STX case 0x86: wr(fetch(), reg_x); break; // STX zp case 0x96: wr((uint8_t)(fetch() + reg_y), reg_x); break; case 0x8E: wr(fetch16(), reg_x); break; // STY case 0x84: wr(fetch(), reg_y); break; case 0x94: wr((uint8_t)(fetch() + reg_x), reg_y); break; case 0x8C: wr(fetch16(), reg_y); break; // INC zp / abs case 0xE6: { uint8_t a = fetch(); uint8_t v = (uint8_t)(rd(a) + 1); wr(a, v); setNZ(v); break; } case 0xF6: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = (uint8_t)(rd(a) + 1); wr(a, v); setNZ(v); break; } case 0xEE: { uint16_t a = fetch16(); uint8_t v = (uint8_t)(rd(a) + 1); wr(a, v); setNZ(v); break; } case 0xFE: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = (uint8_t)(rd(a) + 1); wr(a, v); setNZ(v); break; } // DEC case 0xC6: { uint8_t a = fetch(); uint8_t v = (uint8_t)(rd(a) - 1); wr(a, v); setNZ(v); break; } case 0xD6: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = (uint8_t)(rd(a) - 1); wr(a, v); setNZ(v); break; } case 0xCE: { uint16_t a = fetch16(); uint8_t v = (uint8_t)(rd(a) - 1); wr(a, v); setNZ(v); break; } case 0xDE: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = (uint8_t)(rd(a) - 1); wr(a, v); setNZ(v); break; } // Logical / arithmetic helpers (define lambdas inline). #define DO_ADC(v) do { uint16_t s = (uint16_t)reg_a + (uint16_t)(v) + (uint16_t)flag_c; \ flag_c = (s > 0xFF) ? 1 : 0; \ flag_v = ((reg_a ^ (v)) & 0x80) ? 0 : (((reg_a ^ s) & 0x80) ? 1 : 0); \ reg_a = (uint8_t)s; setNZ(reg_a); } while (0) #define DO_SBC(v) do { uint8_t vv = (uint8_t)~(v); \ uint16_t s = (uint16_t)reg_a + (uint16_t)vv + (uint16_t)flag_c; \ flag_c = (s > 0xFF) ? 1 : 0; \ flag_v = ((reg_a ^ vv) & 0x80) ? 0 : (((reg_a ^ s) & 0x80) ? 1 : 0); \ reg_a = (uint8_t)s; setNZ(reg_a); } while (0) case 0x69: { uint8_t v = fetch(); DO_ADC(v); break; } case 0x65: { uint8_t v = rd(fetch()); DO_ADC(v); break; } case 0x75: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_ADC(v); break; } case 0x6D: { uint8_t v = rd(fetch16()); DO_ADC(v); break; } case 0x7D: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_x)); DO_ADC(v); break; } case 0x79: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_y)); DO_ADC(v); break; } case 0x71: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); uint8_t v = rd(a); DO_ADC(v); break; } case 0x61: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); uint8_t v = rd(a); DO_ADC(v); break; } case 0xE9: { uint8_t v = fetch(); DO_SBC(v); break; } case 0xE5: { uint8_t v = rd(fetch()); DO_SBC(v); break; } case 0xF5: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_SBC(v); break; } case 0xED: { uint8_t v = rd(fetch16()); DO_SBC(v); break; } case 0xFD: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_x)); DO_SBC(v); break; } case 0xF9: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_y)); DO_SBC(v); break; } case 0xF1: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); uint8_t v = rd(a); DO_SBC(v); break; } case 0xE1: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); uint8_t v = rd(a); DO_SBC(v); break; } // CMP / CPX / CPY #define DO_CMP(reg, v) do { uint16_t r = (uint16_t)(reg) + 0x100 - (uint16_t)(v); \ flag_c = ((reg) >= (v)) ? 1 : 0; setNZ((uint8_t)(r & 0xFF)); } while (0) case 0xC9: { uint8_t v = fetch(); DO_CMP(reg_a, v); break; } case 0xC5: { uint8_t v = rd(fetch()); DO_CMP(reg_a, v); break; } case 0xD5: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_CMP(reg_a, v); break; } case 0xCD: { uint8_t v = rd(fetch16()); DO_CMP(reg_a, v); break; } case 0xDD: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_x)); DO_CMP(reg_a, v); break; } case 0xD9: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_y)); DO_CMP(reg_a, v); break; } case 0xD1: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); uint8_t v = rd(a); DO_CMP(reg_a, v); break; } case 0xC1: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); uint8_t v = rd(a); DO_CMP(reg_a, v); break; } case 0xE0: { uint8_t v = fetch(); DO_CMP(reg_x, v); break; } case 0xE4: { uint8_t v = rd(fetch()); DO_CMP(reg_x, v); break; } case 0xEC: { uint8_t v = rd(fetch16()); DO_CMP(reg_x, v); break; } case 0xC0: { uint8_t v = fetch(); DO_CMP(reg_y, v); break; } case 0xC4: { uint8_t v = rd(fetch()); DO_CMP(reg_y, v); break; } case 0xCC: { uint8_t v = rd(fetch16()); DO_CMP(reg_y, v); break; } // AND / ORA / EOR #define DO_AND(v) do { reg_a &= (v); setNZ(reg_a); } while (0) #define DO_ORA(v) do { reg_a |= (v); setNZ(reg_a); } while (0) #define DO_EOR(v) do { reg_a ^= (v); setNZ(reg_a); } while (0) case 0x29: { uint8_t v = fetch(); DO_AND(v); break; } case 0x25: { uint8_t v = rd(fetch()); DO_AND(v); break; } case 0x35: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_AND(v); break; } case 0x2D: { uint8_t v = rd(fetch16()); DO_AND(v); break; } case 0x3D: { uint16_t a = fetch16(); DO_AND(rd((uint16_t)(a + reg_x))); break; } case 0x39: { uint16_t a = fetch16(); DO_AND(rd((uint16_t)(a + reg_y))); break; } case 0x31: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); DO_AND(rd(a)); break; } case 0x21: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); DO_AND(rd(a)); break; } case 0x09: { uint8_t v = fetch(); DO_ORA(v); break; } case 0x05: { uint8_t v = rd(fetch()); DO_ORA(v); break; } case 0x15: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_ORA(v); break; } case 0x0D: { uint8_t v = rd(fetch16()); DO_ORA(v); break; } case 0x1D: { uint16_t a = fetch16(); DO_ORA(rd((uint16_t)(a + reg_x))); break; } case 0x19: { uint16_t a = fetch16(); DO_ORA(rd((uint16_t)(a + reg_y))); break; } case 0x11: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); DO_ORA(rd(a)); break; } case 0x01: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); DO_ORA(rd(a)); break; } case 0x49: { uint8_t v = fetch(); DO_EOR(v); break; } case 0x45: { uint8_t v = rd(fetch()); DO_EOR(v); break; } case 0x55: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_EOR(v); break; } case 0x4D: { uint8_t v = rd(fetch16()); DO_EOR(v); break; } case 0x5D: { uint16_t a = fetch16(); DO_EOR(rd((uint16_t)(a + reg_x))); break; } case 0x59: { uint16_t a = fetch16(); DO_EOR(rd((uint16_t)(a + reg_y))); break; } case 0x51: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); DO_EOR(rd(a)); break; } case 0x41: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); DO_EOR(rd(a)); break; } // BIT case 0x24: { uint8_t v = rd(fetch()); flag_z = (reg_a & v) == 0 ? 1 : 0; flag_n = (v & 0x80) ? 1 : 0; flag_v = (v & 0x40) ? 1 : 0; break; } case 0x2C: { uint8_t v = rd(fetch16()); flag_z = (reg_a & v) == 0 ? 1 : 0; flag_n = (v & 0x80) ? 1 : 0; flag_v = (v & 0x40) ? 1 : 0; break; } // ASL / LSR / ROL / ROR (accumulator + memory variants) #define ASL(v) do { flag_c = ((v) & 0x80) ? 1 : 0; (v) = (uint8_t)((v) << 1); setNZ(v); } while (0) #define LSR(v) do { flag_c = (v) & 1; (v) = (uint8_t)((v) >> 1); setNZ(v); } while (0) #define ROL(v) do { uint8_t c = flag_c; flag_c = ((v) & 0x80) ? 1 : 0; (v) = (uint8_t)(((v) << 1) | c); setNZ(v); } while (0) #define ROR(v) do { uint8_t c = flag_c; flag_c = (v) & 1; (v) = (uint8_t)(((v) >> 1) | (c << 7)); setNZ(v); } while (0) case 0x0A: ASL(reg_a); break; case 0x06: { uint8_t a = fetch(); uint8_t v = rd(a); ASL(v); wr(a, v); break; } case 0x16: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = rd(a); ASL(v); wr(a, v); break; } case 0x0E: { uint16_t a = fetch16(); uint8_t v = rd(a); ASL(v); wr(a, v); break; } case 0x1E: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = rd(a); ASL(v); wr(a, v); break; } case 0x4A: LSR(reg_a); break; case 0x46: { uint8_t a = fetch(); uint8_t v = rd(a); LSR(v); wr(a, v); break; } case 0x56: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = rd(a); LSR(v); wr(a, v); break; } case 0x4E: { uint16_t a = fetch16(); uint8_t v = rd(a); LSR(v); wr(a, v); break; } case 0x5E: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = rd(a); LSR(v); wr(a, v); break; } case 0x2A: ROL(reg_a); break; case 0x26: { uint8_t a = fetch(); uint8_t v = rd(a); ROL(v); wr(a, v); break; } case 0x36: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = rd(a); ROL(v); wr(a, v); break; } case 0x2E: { uint16_t a = fetch16(); uint8_t v = rd(a); ROL(v); wr(a, v); break; } case 0x3E: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = rd(a); ROL(v); wr(a, v); break; } case 0x6A: ROR(reg_a); break; case 0x66: { uint8_t a = fetch(); uint8_t v = rd(a); ROR(v); wr(a, v); break; } case 0x76: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = rd(a); ROR(v); wr(a, v); break; } case 0x6E: { uint16_t a = fetch16(); uint8_t v = rd(a); ROR(v); wr(a, v); break; } case 0x7E: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = rd(a); ROR(v); wr(a, v); break; } // Branches. #define BRANCH(cond) do { int8_t off = (int8_t)fetch(); if (cond) pc = (uint16_t)(pc + off); } while (0) case 0x10: BRANCH(!flag_n); break; // BPL case 0x30: BRANCH( flag_n); break; // BMI case 0x50: BRANCH(!flag_v); break; // BVC case 0x70: BRANCH( flag_v); break; // BVS case 0x90: BRANCH(!flag_c); break; // BCC case 0xB0: BRANCH( flag_c); break; // BCS case 0xD0: BRANCH(!flag_z); break; // BNE case 0xF0: BRANCH( flag_z); break; // BEQ // Jumps / subroutine. case 0x4C: pc = fetch16(); break; // JMP abs case 0x6C: { uint16_t a = fetch16(); // JMP (ind) // 6502 page-boundary bug uint16_t lo = mem[a]; uint16_t hi = mem[(a & 0xFF00) | ((a + 1) & 0xFF)]; pc = (uint16_t)(lo | (hi << 8)); break; } case 0x20: { // JSR abs uint16_t target = fetch16(); uint16_t retAddr = (uint16_t)(pc - 1); push16(retAddr); if (getenv("FS2TRACE_JSR") != NULL && (retAddr & 0xFF00) == 0x8000) { fprintf(stderr, "JSR pushes $%04X (target $%04X)\n", retAddr, target); } if (target == 0x78E0 && getenv("FS2TRACE_JSR_78E0") != NULL) { fprintf(stderr, " JSR $78E0 from PC $%04X A=$%02X $24=$%02X $B1=$%02X $0876=$%02X cycles=%d\n", retAddr, reg_a, mem[0x24], mem[0xB1], mem[0x0876], cycles); } // FS2TRACE_PERSP=1: log every PerspectiveDivide // call ($7BFD in MAME RAM). Inputs: A=num_hi, // Y=num_lo, $C4/$C5=denominator. Output is in // A on return. The self-modified table address // lives at $7D47/$7D48 (= MAME's L7D76+1/+2, // not source's $7D77/$7D78). if (target == 0x7BFD && getenv("FS2TRACE_PERSP") != NULL) { int16_t num = (int16_t)((uint16_t)reg_y | ((uint16_t)reg_a << 8)); int16_t den = (int16_t)((uint16_t)mem[0xC4] | ((uint16_t)mem[0xC5] << 8)); // MAME's L7D76 (= LDA abs,X) is at $7D48; the // self-modified table address bytes are at // $7D49 (lo) and $7D4A (hi). fprintf(stderr, " JSR PerspDiv from PC $%04X num=%6d den=%6d table=$%02X%02X\n", retAddr, num, den, mem[0x7D4A], mem[0x7D49]); } pc = target; break; } case 0x60: { // RTS // PC was advanced by fetch(); the RTS // instruction itself was at pc-1. uint16_t rtsAddr = (uint16_t)(pc - 1); pc = (uint16_t)(pop16() + 1); if (getenv("FS2TRACE_RTS") != NULL && rtsAddr == 0xD458) { fprintf(stderr, "RTS@$D458 -> $%04X (S=$%02X)\n", pc, reg_s); for (int s = 0; s < 8; s++) { fprintf(stderr, " stack[$%02X] = $%02X\n", (uint8_t)(reg_s - s), mem[0x100 + (uint8_t)(reg_s - s)]); } } // Capture PerspectiveDivide return value. // MAME's PerspectiveDivide RTS is at $7D51 // (= source's L7D7F equivalent). if (rtsAddr == 0x7D51 && getenv("FS2TRACE_PERSP") != NULL) { fprintf(stderr, " PerspDiv returns A=$%02X (signed=%d)\n", reg_a, (int)(int8_t)reg_a); } break; } case 0x40: setP(pop()); pc = pop16(); break; // RTI // 65C02 extensions used by chunk3. case 0x14: { // TRB zp uint8_t zp = fetch(); uint8_t m = mem[zp]; flag_z = ((reg_a & m) == 0); mem[zp] = (uint8_t)(m & ~reg_a); break; } case 0x1C: { // TRB abs uint16_t a = fetch16(); uint8_t m = rd(a); flag_z = ((reg_a & m) == 0); wr(a, (uint8_t)(m & ~reg_a)); break; } case 0x04: { // TSB zp uint8_t zp = fetch(); uint8_t m = mem[zp]; flag_z = ((reg_a & m) == 0); mem[zp] = (uint8_t)(m | reg_a); break; } case 0x0C: { // TSB abs uint16_t a = fetch16(); uint8_t m = rd(a); flag_z = ((reg_a & m) == 0); wr(a, (uint8_t)(m | reg_a)); break; } case 0x80: { // BRA rel int8_t off = (int8_t)fetch(); pc = (uint16_t)(pc + off); break; } case 0x12: { // ORA (zp) uint8_t zp = fetch(); uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8); reg_a = (uint8_t)(reg_a | rd(a)); setNZ(reg_a); break; } case 0x32: { // AND (zp) uint8_t zp = fetch(); uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8); reg_a = (uint8_t)(reg_a & rd(a)); setNZ(reg_a); break; } case 0x52: { // EOR (zp) uint8_t zp = fetch(); uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8); reg_a = (uint8_t)(reg_a ^ rd(a)); setNZ(reg_a); break; } case 0x72: { // ADC (zp) uint8_t zp = fetch(); uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8); uint8_t m = rd(a); uint16_t r = (uint16_t)reg_a + (uint16_t)m + (uint16_t)flag_c; flag_v = (((reg_a ^ m) & 0x80) == 0) && (((reg_a ^ (uint8_t)r) & 0x80) != 0); flag_c = r > 0xFF; reg_a = (uint8_t)r; setNZ(reg_a); break; } case 0x92: { // STA (zp) uint8_t zp = fetch(); uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8); wr(a, reg_a); break; } case 0xB2: { // LDA (zp) uint8_t zp = fetch(); uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8); reg_a = rd(a); setNZ(reg_a); break; } case 0xD2: { // CMP (zp) uint8_t zp = fetch(); uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8); uint8_t m = rd(a); flag_c = reg_a >= m; setNZ((uint8_t)(reg_a - m)); break; } case 0xF2: { // SBC (zp) uint8_t zp = fetch(); uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8); uint8_t m = rd(a); uint16_t r = (uint16_t)reg_a + (uint16_t)((uint8_t)~m) + (uint16_t)flag_c; flag_v = (((reg_a ^ m) & 0x80) != 0) && (((reg_a ^ (uint8_t)r) & 0x80) != 0); flag_c = r > 0xFF; reg_a = (uint8_t)r; setNZ(reg_a); break; } case 0x64: { // STZ zp uint8_t zp = fetch(); mem[zp] = 0; break; } case 0x74: { // STZ zp,X uint8_t zp = fetch(); mem[(uint8_t)(zp + reg_x)] = 0; break; } case 0x9C: { // STZ abs uint16_t a = fetch16(); wr(a, 0); break; } case 0x9E: { // STZ abs,X uint16_t a = fetch16(); wr((uint16_t)(a + reg_x), 0); break; } case 0x5A: push(reg_y); break; // PHY case 0x7A: { reg_y = pop(); setNZ(reg_y); break; } // PLY case 0xDA: push(reg_x); break; // PHX case 0xFA: { reg_x = pop(); setNZ(reg_x); break; } // PLX case 0x3A: reg_a = (uint8_t)(reg_a - 1); setNZ(reg_a); break; // DEC A case 0x1A: reg_a = (uint8_t)(reg_a + 1); setNZ(reg_a); break; // INC A // 65C02 RMB/SMB ops: reset/set memory bit N of zp. case 0x07: { uint8_t zp = fetch(); mem[zp] &= ~0x01; break; } // RMB0 case 0x17: { uint8_t zp = fetch(); mem[zp] &= ~0x02; break; } // RMB1 case 0x27: { uint8_t zp = fetch(); mem[zp] &= ~0x04; break; } // RMB2 case 0x37: { uint8_t zp = fetch(); mem[zp] &= ~0x08; break; } // RMB3 case 0x47: { uint8_t zp = fetch(); mem[zp] &= ~0x10; break; } // RMB4 case 0x57: { uint8_t zp = fetch(); mem[zp] &= ~0x20; break; } // RMB5 case 0x67: { uint8_t zp = fetch(); mem[zp] &= ~0x40; break; } // RMB6 case 0x77: { uint8_t zp = fetch(); mem[zp] &= ~0x80; break; } // RMB7 case 0x87: { uint8_t zp = fetch(); mem[zp] |= 0x01; break; } // SMB0 case 0x97: { uint8_t zp = fetch(); mem[zp] |= 0x02; break; } // SMB1 case 0xA7: { uint8_t zp = fetch(); mem[zp] |= 0x04; break; } // SMB2 case 0xB7: { uint8_t zp = fetch(); mem[zp] |= 0x08; break; } // SMB3 case 0xC7: { uint8_t zp = fetch(); mem[zp] |= 0x10; break; } // SMB4 case 0xD7: { uint8_t zp = fetch(); mem[zp] |= 0x20; break; } // SMB5 case 0xE7: { uint8_t zp = fetch(); mem[zp] |= 0x40; break; } // SMB6 case 0xF7: { uint8_t zp = fetch(); mem[zp] |= 0x80; break; } // SMB7 // BBR/BBS rel: branch on bit reset/set in zp. case 0x0F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x01)) pc = (uint16_t)(pc + off); break; } // BBR0 case 0x1F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x02)) pc = (uint16_t)(pc + off); break; } // BBR1 case 0x2F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x04)) pc = (uint16_t)(pc + off); break; } // BBR2 case 0x3F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x08)) pc = (uint16_t)(pc + off); break; } // BBR3 case 0x4F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x10)) pc = (uint16_t)(pc + off); break; } // BBR4 case 0x5F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x20)) pc = (uint16_t)(pc + off); break; } // BBR5 case 0x6F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x40)) pc = (uint16_t)(pc + off); break; } // BBR6 case 0x7F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x80)) pc = (uint16_t)(pc + off); break; } // BBR7 case 0x8F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x01) pc = (uint16_t)(pc + off); break; } // BBS0 case 0x9F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x02) pc = (uint16_t)(pc + off); break; } // BBS1 case 0xAF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x04) pc = (uint16_t)(pc + off); break; } // BBS2 case 0xBF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x08) pc = (uint16_t)(pc + off); break; } // BBS3 case 0xCF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x10) pc = (uint16_t)(pc + off); break; } // BBS4 case 0xDF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x20) pc = (uint16_t)(pc + off); break; } // BBS5 case 0xEF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x40) pc = (uint16_t)(pc + off); break; } // BBS6 case 0xFF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x80) pc = (uint16_t)(pc + off); break; } // BBS7 default: fflush(stdout); fprintf(stderr, "UNIMPL opcode $%02X at PC $%04X (cycles=%llu)\n", op, (uint16_t)(pc - 1), (unsigned long long)cycles); fflush(stderr); stop = 1; break; } (void)pc_low; } static int loadChunk(const char *path, uint16_t addr, size_t maxLen) { FILE *f = fopen(path, "rb"); if (f == NULL) { fprintf(stderr, "cannot open %s\n", path); return 0; } size_t n = fread(&mem[addr], 1, maxLen, f); fclose(f); fprintf(stderr, "loaded %s -> $%04X..%04X (%zu bytes)\n", path, addr, (uint16_t)(addr + n - 1), n); return (int)n; } // --matrix mode: run chunk5 SetupViewProjection ($6000) with the // supplied attitude / view inputs and dump the resulting $78..$89 // matrix. Uses a captured MAME RAM image as the initial state so all // ZP slots, chunk2-5 binaries, and demo wiring are already in place. // // Usage: // fs2trace --matrix [ram.bin] // // Default ram.bin = tmp/capture_boot.bin (chunk5Oracle replacement). static void loadOriginalChunks(void); // defined below static int matrixMode(int argc, char **argv) { if (argc < 6 || argc > 7) { fprintf(stderr, "usage: %s --matrix [ram.bin]\n", argv[0]); return 2; } long yaw = strtol(argv[2], NULL, 0); long pitch = strtol(argv[3], NULL, 0); long bank = strtol(argv[4], NULL, 0); long vd = strtol(argv[5], NULL, 0); const char *ramPath = (argc > 6) ? argv[6] : "/home/scott/claude/flight/tmp/capture_boot.bin"; // FS2TRACE_USE_ORIG=1: load chunk4/chunk5 from out/ instead // of from a captured RAM image. The captured chunk5 in the // boot dump is HEAVILY patched by Apply64KPatchTable -- the // SetupViewProjection control flow is rewritten there and no // longer matches the source. Using the unpatched binaries // gives source-faithful matrix output (matching the chunk5.s // listing), which is what we want for validating the C // transliteration in chunk5Setup.c. if (getenv("FS2TRACE_USE_ORIG") != NULL) { loadOriginalChunks(); // ZP isn't initialised by the binaries; explicit zero // is fine for SetupViewProjection (no read-before-write // outside the inputs we poke below). } else { FILE *rf = fopen(ramPath, "rb"); if (rf == NULL) { fprintf(stderr, "cannot open RAM image %s\n", ramPath); return 1; } size_t got = fread(mem, 1, MEM_SIZE, rf); fclose(rf); if (got != MEM_SIZE) { fprintf(stderr, "RAM image %s short read (%zu bytes)\n", ramPath, got); return 1; } } // Poke inputs over whatever the captured ZP held. uint16_t y = (uint16_t)((yaw < 0) ? (yaw + 0x10000) : yaw); uint16_t p = (uint16_t)((pitch < 0) ? (pitch + 0x10000) : pitch); uint16_t b = (uint16_t)((bank < 0) ? (bank + 0x10000) : bank); mem[0x6C] = (uint8_t)( y & 0xFF); mem[0x6D] = (uint8_t)((y >> 8) & 0xFF); mem[0x6E] = (uint8_t)( p & 0xFF); mem[0x6F] = (uint8_t)((p >> 8) & 0xFF); mem[0x70] = (uint8_t)( b & 0xFF); mem[0x71] = (uint8_t)((b >> 8) & 0xFF); mem[0x0A70] = (uint8_t)(vd & 0xFF); // Set up CPU and call $6000 = SetupViewProjection. Push a // sentinel return so the routine's RTS lands at $FFFF (the // SmartPort hook patches $C70D, but $FFFF isn't touched -- a // BRK at $0000 halts the loop cleanly via stop=1 below). reg_a = reg_x = reg_y = 0; reg_s = 0xFF; flag_n = flag_v = flag_d = flag_z = flag_c = 0; flag_i = 1; stop = 0; pc = 0x6000; push16(0xFFFE); // FS2TRACE_MATRIX_BREAK=$XXXX prints state every time PC // matches that address. Used to inspect intermediate state // (e.g., set to $177A to see inputs to shifted L1778). const char *brkEnv = getenv("FS2TRACE_MATRIX_BREAK"); uint16_t brkPC = (brkEnv != NULL) ? (uint16_t)strtol(brkEnv, NULL, 0) : 0xFFFE; for (cycles = 0; cycles < 5000000 && !stop; ) { if (pc == 0xFFFF || pc == 0x0000) { break; } if (pc == brkPC) { fprintf(stderr, "BRK pc=$%04X A=$%02X X=$%02X Y=$%02X " "$72/$73=$%02X%02X $74/$75=$%02X%02X " "$76/$77=$%02X%02X $CB=$%02X%02X\n", pc, reg_a, reg_x, reg_y, mem[0x73], mem[0x72], mem[0x75], mem[0x74], mem[0x77], mem[0x76], mem[0xCC], mem[0xCB]); } step(); } printf("inputs: yaw=%ld pitch=%ld bank=%ld VD=$%02X\n", yaw, pitch, bank, (uint8_t)(vd & 0xFF)); printf("matrix at $78..$89 (post-L6301 col shifts):\n"); for (int row = 0; row < 3; row++) { int rb = 0x78 + row * 6; int v0 = (int16_t)(mem[rb] | (mem[rb + 1] << 8)); int v1 = (int16_t)(mem[rb + 2] | (mem[rb + 3] << 8)); int v2 = (int16_t)(mem[rb + 4] | (mem[rb + 5] << 8)); printf(" row %d: %6d %6d %6d\n", row, v0, v1, v2); } // Cascade intermediates -- last values left after the routine // returned. $72/$74/$76 are the rotated-angle inputs; // $CB/$CD/$CF and $18/$D4/$D6 are the cos/sin lookups feeding // the matrix construction. if (getenv("FS2TRACE_MATRIX_DUMP") != NULL) { #define R16(addr) ((int16_t)(mem[addr] | (mem[(addr)+1] << 8))) printf("intermediates:\n"); printf(" $72/$73 = %d ($%02X%02X)\n", R16(0x72), mem[0x73], mem[0x72]); printf(" $74/$75 = %d ($%02X%02X)\n", R16(0x74), mem[0x75], mem[0x74]); printf(" $76/$77 = %d ($%02X%02X)\n", R16(0x76), mem[0x77], mem[0x76]); printf(" $CB/$CC = %d (sin of $72)\n", R16(0xCB)); printf(" $CD/$CE = %d (sin of $74)\n", R16(0xCD)); printf(" $CF/$D0 = %d (sin of $76)\n", R16(0xCF)); printf(" $18/$19 = %d (cos of $72)\n", R16(0x18)); printf(" $D4/$D5 = %d (cos of $74)\n", R16(0xD4)); printf(" $D6/$D7 = %d (cos of $76)\n", R16(0xD6)); printf(" $BA/$BB = %d (cos of VD<<4)\n", R16(0xBA)); printf(" $BE/$BF = %d (sin of VD<<4)\n", R16(0xBE)); #undef R16 } return 0; } // loadOriginalChunks: place chunk4 (at $0200) and chunk5 (at $6000) // into mem[] from out/*-built. Used by the --zpscale and --l177b // probes which target chunk4 routines in isolation -- the boot RAM // dump shifts chunk4 code by 2 bytes due to Apply64KPatchTable, so // L177B / ScaleC2ByC4 land at different addresses there. The .built // binaries are unpatched. static void loadOriginalChunks(void) { memset(mem, 0, MEM_SIZE); loadChunk("/home/scott/claude/flight/out/4_0200-25ff", 0x0200, 0x2400); loadChunk("/home/scott/claude/flight/out/5_6000-b3df", 0x6000, 0x53E0); } // --scenery [ramfile]: runs the captured chunk5 ProcessScenery // against a RAM image and counts how many DrawColorSpan calls fire. // This tells us "how much MAME-equivalent scenery would draw if we // ran the actual interpreter against this RAM state". Compare to // the port's `SCENERY_STATS=1 draws=N` to see where the port // diverges. Default RAM image is tmp/capture_boot.bin. // // The capture is patched by Apply64KPatchTable at runtime, so chunk5 // addresses differ from the source-listing values. The jump table at // $6000-$6020 provides indirection: $6006 jumps to ProcessScenery, // $601B (DrawColorSpanRelay) jumps to DrawColorSpan. We use $6006 // as the entry and watch for PC entering DrawColorSpan via the // $601B relay's target. static int sceneryMode(int argc, char **argv) { const char *ramPath = (argc > 2) ? argv[2] : "/home/scott/claude/flight/tmp/capture_boot.bin"; FILE *rf = fopen(ramPath, "rb"); if (rf == NULL) { fprintf(stderr, "cannot open %s\n", ramPath); return 1; } if (fread(mem, 1, MEM_SIZE, rf) != MEM_SIZE) { fprintf(stderr, "RAM image short read\n"); fclose(rf); return 1; } fclose(rf); // Resolve DrawColorSpan via the $601B jump table slot // (DrawColorSpanRelay). if (mem[0x601B] != 0x4C) { fprintf(stderr, "expected JMP at $601B, got $%02X\n", mem[0x601B]); return 1; } uint16_t drawColorSpanPC = (uint16_t)(mem[0x601C] | (mem[0x601D] << 8)); // Reset the scenery cursor to the dispatcher entry (LA7E0 in // the source = mem[$A7E0/$A7E1]). The captured cursor at // $8B/$8C is the END-OF-FRAME position; without resetting // we'd walk past the dispatcher into chunk2 territory. uint16_t dispatcherEntry = (uint16_t)(mem[0xA7E0] | (mem[0xA7E1] << 8)); mem[0x8B] = (uint8_t)( dispatcherEntry & 0xFF); mem[0x8C] = (uint8_t)((dispatcherEntry >> 8) & 0xFF); // Clear scenery in-progress flags so ProcessScenery starts // fresh (chunk5.s lines 1053-1062). mem[0x08F3] = 0; mem[0x090A] = 0; mem[0x08A9] = 0; mem[0x08C4] = 0; mem[0x008A] = 0; // Invalidate HEADER section cache so demand-loads fire. mem[0x08EA] = mem[0x08EB] = mem[0x08EC] = mem[0x08ED] = 0; fprintf(stderr, "scenery: reset cursor to LA7E0 = $%04X, DrawColorSpan at $%04X\n", dispatcherEntry, drawColorSpanPC); // EmitClippedLine isn't in the jump table; locate via byte // pattern would be ideal, but counting DrawColorSpan suffices // -- every line eventually goes through it. reg_s = 0xFF; flag_n = flag_v = flag_d = flag_z = flag_c = 0; flag_i = 1; stop = 0; long drawCount = 0; long opCount = 0; long emitOpCount = 0; // count cursor-driven scenery opcodes // Address of SceneryInterpreterStep / SceneryDispatch in capture // (boot patches relocate it). Find by searching for the byte // pattern after `lda ($8B),y; bmi; cmp #$46; bmi`. // The fetch is `B1 8B 30 ?? C9 46 30 ??` (lda($8B),y; bmi ; cmp #$46; bmi ). uint16_t fetchPC = 0; for (int i = 0x6000; i < 0xB400; i++) { if (mem[i] == 0xB1 && mem[i+1] == 0x8B && mem[i+2] == 0x30 && mem[i+4] == 0xC9 && mem[i+5] == 0x46 && mem[i+6] == 0x30) { fetchPC = (uint16_t)i; break; } } fprintf(stderr, " scenery fetch at $%04X (lda ($8B),y)\n", fetchPC); // Sequence in chunk5 MainLoop (chunk5.s line 5403+): // SetupViewProjection ($6000) // ShowSimpleCrashMessage / HandleCrashOrSplash (skipped here) // FlipPagesFillViewportRelay ($6003) -- runs a SECOND scenery // interpreter pass on the data at $8B/$8C derived from $77 // (= the boot pre-render that draws horizon-line water/sky // features in violet/blue, leaving STALE bytes in the hires // page that the main scenery pass doesn't overwrite). // ProcessScenery ($6006) const uint16_t calls[3] = { 0x6000, 0x6003, 0x6006 }; const char *callName[3] = { "SetupViewProjection", "FlipPagesFillViewport", "ProcessScenery" }; for (int callIdx = 0; callIdx < 3; callIdx++) { reg_a = reg_x = reg_y = 0; push16(0xFFFE); pc = calls[callIdx]; fprintf(stderr, " -- %s ($%04X) --\n", callName[callIdx], pc); long startOps = opCount; long startDraws = drawCount; long startFetches = emitOpCount; uint16_t lastPC = 0; long opsSinceFetch = 0; long maxOpsSinceFetch = 0; for (cycles = 0; cycles < 20000000 && !stop; ) { if (pc == 0xFFFF || pc == 0x0000) { fprintf(stderr, " EXIT pc=$%04X after %ld ops, ops-since-last-fetch=%ld\n", pc, opCount - startOps, opsSinceFetch); break; } if (pc == drawColorSpanPC) { drawCount++; } if (fetchPC != 0 && pc == fetchPC) { emitOpCount++; uint16_t cur = (uint16_t)(mem[0x8B] | (mem[0x8C] << 8)); if (getenv("FS2TRACE_BASE") != NULL) { // Print $4A..$52 BEFORE this op runs // so we can pair "before $24" with // "after $24" for diff. fprintf(stderr, " fetch #%ld cursor=$%04X opcode=$%02X base=[%02X%02X%02X,%02X%02X%02X,%02X%02X%02X] cam=[%02X%02X,%02X%02X,%02X%02X]\n", emitOpCount - startFetches, cur, mem[cur], mem[0x4A], mem[0x4B], mem[0x4C], mem[0x4D], mem[0x4E], mem[0x4F], mem[0x50], mem[0x51], mem[0x52], mem[0x67], mem[0x66], mem[0x69], mem[0x68], mem[0x6B], mem[0x6A]); } else { fprintf(stderr, " fetch #%ld cursor=$%04X opcode=$%02X (S=$%02X)\n", emitOpCount - startFetches, cur, mem[cur], reg_s); } if (opsSinceFetch > maxOpsSinceFetch) { maxOpsSinceFetch = opsSinceFetch; } opsSinceFetch = 0; } lastPC = pc; opsSinceFetch++; opCount++; step(); } fprintf(stderr, " last pc=$%04X reg_s=$%02X stop=%d max_ops_between_fetches=%ld\n", lastPC, reg_s, stop, maxOpsSinceFetch); fprintf(stderr, " %ld ops, %ld scenery-fetches, %ld draws\n", opCount - startOps, emitOpCount - startFetches, drawCount - startDraws); if (callIdx == 0) { // After SetupViewProjection, dump the matrix so // we know the projection cascade succeeded. fprintf(stderr, " matrix $78..$89: " "[%d %d %d / %d %d %d / %d %d %d]\n", (int16_t)(mem[0x78] | (mem[0x79]<<8)), (int16_t)(mem[0x7A] | (mem[0x7B]<<8)), (int16_t)(mem[0x7C] | (mem[0x7D]<<8)), (int16_t)(mem[0x7E] | (mem[0x7F]<<8)), (int16_t)(mem[0x80] | (mem[0x81]<<8)), (int16_t)(mem[0x82] | (mem[0x83]<<8)), (int16_t)(mem[0x84] | (mem[0x85]<<8)), (int16_t)(mem[0x86] | (mem[0x87]<<8)), (int16_t)(mem[0x88] | (mem[0x89]<<8))); } } fprintf(stderr, "Total: %ld ops, %ld scenery-fetches, %ld DrawColorSpan calls\n", opCount, emitOpCount, drawCount); return 0; } // --draws: run the FULL chunk5 dispatcher (SetupViewProjection + // ProcessScenery) against an unpatched chunk5 binary, watching the // DrawColorLine entry trampoline at $6009 and printing every line // drawn. This produces a bit-exact reference list of polygons // chunk5 would emit given the supplied input state -- the answer // to "what should port draw to match the original FS2 binary." // // Usage: // fs2trace --draws [ram.bin] // FS2TRACE_PORT_STATE=1 sets up port-equivalent ZP for Meigs boot // (camera $5C/$5D=287, $64/$65=804, $6C/$6D=-109, etc.) and // overrides the matrix to MAME's runtime values exactly. // // Output: one line per draw with screen X1/Y1/X2/Y2 and the V1/V2 // 3D coordinates from $CB..$D0 + $D4..$D9 at the moment of the call. static int drawsMode(int argc, char **argv) { const char *ramPath = (argc > 2) ? argv[2] : "/home/scott/claude/flight/port/sceneryRam_FS2.1.bin"; FILE *rf = fopen(ramPath, "rb"); if (rf == NULL) { fprintf(stderr, "cannot open %s\n", ramPath); return 1; } if (fread(mem, 1, MEM_SIZE, rf) != MEM_SIZE) { fprintf(stderr, "RAM image short read\n"); fclose(rf); return 1; } fclose(rf); // The captured RAM is the MAME-patched chunk5 (Apply64KPatchTable // installed at boot). Don't overlay the unpatched source -- // that would clobber the runtime patches AND replace the // dispatcher bytecode at $A800+ (which only exists in the // patched RAM; source has dispatcher data at $B000). // The patched chunk5 has dispatcher entry at $A800 = LA7E0. // chunk3 ($D300-$F3FF) holds the 64K callbacks (LookupADFStation // etc.) the patched chunk5 jumps into; load that too so those // calls don't hit unmapped memory. chunk2 ($F600-$FBFF) holds // a few helper routines. // chunk3 ($D300-$F3FF) holds the 64K callbacks the patched // chunk5 jumps into for $05/$1D/$1E station records, ADF // input, magneto state, etc. The boot RAM dump captures the // ROM bank at $D300+ (= zeros) instead of the LC-RAM-resident // chunk3, so we have to overlay the source binary; routine // addresses in source aren't quite the same as MAME's runtime // layout but the in-record advance values are unaffected. FILE *cf = fopen("/home/scott/claude/flight/out/3_d300-f3ff", "rb"); if (cf != NULL) { size_t n = fread(mem + 0xD300, 1, 0xF400 - 0xD300, cf); fclose(cf); fprintf(stderr, "loaded chunk3 (%zu bytes)\n", n); } cf = fopen("/home/scott/claude/flight/out/2_f600-fbff", "rb"); if (cf != NULL) { (void)fread(mem + 0xF600, 1, 0xFC00 - 0xF600, cf); fclose(cf); } // Override the SceneryOpADFRecord / NAVRecord / COMRecord // patches: source-binary chunk3 has these routines at // different addresses than MAME's runtime, so the patched JMP // targets in port_ram point at random source bytes. Restore // the unpatched 48K behaviour (= just advance past the record) // so the dispatcher doesn't crash entering chunk3. Station // records don't draw anything anyway. // SceneryOpADFRecord at $6021: source = "lda #$09; jmp $67FD" // (= advance 9, continue). The patched JMP $DB3F would call // chunk3 LookupADFStation but that maps to a different routine // in source, so undo the patch. // SceneryOpAdvanceAndContinue: in source chunk5 the entry // trampoline at $6018 is `JMP SceneryOpAdvanceAndContinue`. The // patched RAM preserves that trampoline, so we just read the // target out of $6019/$601A. uint16_t advanceAndContinue = 0; if (mem[0x6018] == 0x4C) { advanceAndContinue = (uint16_t)(mem[0x6019] | (mem[0x601A] << 8)); } fprintf(stderr, " SceneryOpAdvanceAndContinue at $%04X\n", advanceAndContinue); if (mem[0x6021] == 0x4C && advanceAndContinue != 0) { // patched JMP -> chunk3 mem[0x6021] = 0xA9; // lda mem[0x6022] = 0x09; // #$09 mem[0x6023] = 0x4C; // jmp mem[0x6024] = (uint8_t)( advanceAndContinue & 0xFF); mem[0x6025] = (uint8_t)((advanceAndContinue >> 8) & 0xFF); fprintf(stderr, " unpatched SceneryOpADFRecord ($6021) -> $%04X\n", advanceAndContinue); } // Same for NAVRecord and COMRecord -- locate by SceneryOpcodeTable // entries for $1D and $1E. // Find SceneryOpcodeTable: `cmp #$46; bmi ` then the // target is SceneryDispatch which loads from the table. Easier // to search the table itself: the first entry should point // at SceneryOpEmitV1XformAndPlot; the $1D entry is at // table+$1D*2 and points at SceneryOpNAVRecord. We can find // the table by looking for a known entry sequence. // For now, scan the dispatcher area for any byte sequence that // looks like `JMP ` (= $4C $xx $D[XYZ]) and patch // back to advance-and-continue with appropriate length. // chunk5.s says: $1D = NAVRecord (11-byte), $1E = COMRecord // (variable-length). // SceneryOpNAVRecord at... actually NAVRecord might or might // not be patched the same way. Be defensive: scan the // SceneryOpcodeTable for the $1D handler address and check // for a JMP-pattern there. // DrawColorLine entry: in the patched binary, $6009 = JMP (relayed), // and $601B = JMP DrawColorSpan. DrawColorLine itself is the // function called from EmitClippedLine. Find its entry by // scanning for the byte signature `lda $E9; sec; sbc $EB` // (= chunk5.s line 3556). uint16_t drawColorLinePC = 0; for (int i = 0x6000; i < 0xB400; i++) { if (mem[i] == 0xA5 && mem[i+1] == 0xE9 && mem[i+2] == 0x38 && mem[i+3] == 0xE5 && mem[i+4] == 0xEB) { drawColorLinePC = (uint16_t)i; break; } } if (drawColorLinePC == 0) { fprintf(stderr, "could not locate DrawColorLine\n"); return 1; } fprintf(stderr, "DrawColorLine entry: $%04X\n", drawColorLinePC); // DrawColorSpan entry: chunk5 trampoline at $601B is `JMP DrawColorSpan`. uint16_t drawColorSpanPC = 0; if (mem[0x601B] == 0x4C) { drawColorSpanPC = (uint16_t)(mem[0x601C] | (mem[0x601D] << 8)); } fprintf(stderr, "DrawColorSpan entry: $%04X\n", drawColorSpanPC); // SetEvenAndOddColorsAndPrepRowRoutine: scan for the byte signature // `lda ColorTableEven,x; sta ColorByteEven` (= chunk5.s lines // 3902-3903 = `BD ?? ?? 8D ?? ??`). The ColorTableEven address is // at $7A00ish in MAME RAM, and ColorByteEven is some self-modified // operand. Find the routine by looking for: BD ?? ?? 8D ?? ?? BD ?? ??. // For now we just track its calls via the trampoline at $6024 if // it's installed (= the older patched binary did this; check first). uint16_t setEvenAndOddPC = 0; // Try via the SceneryOpSetColor handler at the JMP that calls // it (or via SetPixelDrawMode which we already have). // Easier: scan for the ColorTableEven access pattern. for (int i = 0x7000; i < 0xB400; i++) { if (mem[i] == 0xBD // LDA abs,x && mem[i+3] == 0x8D // STA abs && mem[i+6] == 0xBD // LDA abs,x && mem[i+9] == 0x8D // STA abs && mem[i+12] == 0x8A) { // TXA setEvenAndOddPC = (uint16_t)i; break; } } fprintf(stderr, "SetEvenAndOddColorsAndPrepRowRoutine entry: $%04X\n", setEvenAndOddPC); // Set up port-equivalent ZP state. The values mirror what // port's runScreenshot + sceneryAttachCamera produce. if (getenv("FS2TRACE_PORT_STATE") != NULL) { // Camera position in scenery units ($5C=287, $64=804). mem[0x5C] = 0x1F; mem[0x5D] = 0x01; mem[0x64] = 0x24; mem[0x65] = 0x03; // Altitude pair ($5E/$5F = 768 from boot; $60/$61 = 0). mem[0x5E] = 0x00; mem[0x5F] = 0x03; mem[0x60] = 0x00; mem[0x61] = 0x00; // Rotation inputs ($6C/$6D = -109 yaw/X-axis; // $6E/$6F = 0 pitch/Z-axis; $70/$71 = 0 bank/Y-axis). mem[0x6C] = 0x93; mem[0x6D] = 0xFF; mem[0x6E] = 0x00; mem[0x6F] = 0x00; mem[0x70] = 0x00; mem[0x71] = 0x00; mem[0x0A70] = 0x00; // ViewDirection // Camera-section deltas. Port's sceneryAttachCamera // sets $66/$67=0, $68/$69 = wyUnits (= cam.worldY=25 // metres for Meigs boot, no scaling), $6A/$6B=0. Reset // to match. mem[0x66] = 0x00; mem[0x67] = 0x00; mem[0x68] = 0x19; mem[0x69] = 0x00; // 25 mem[0x6A] = 0x00; mem[0x6B] = 0x00; fprintf(stderr, "FS2TRACE_PORT_STATE: ZP set for Meigs boot\n"); } // Run SetupViewProjection ($6000) to compute the matrix at // $78..$89 from the just-set $6C/$6E/$70 inputs. reg_a = reg_x = reg_y = 0; reg_s = 0xFF; flag_n = flag_v = flag_d = flag_z = flag_c = 0; flag_i = 1; stop = 0; pc = 0x6000; push16(0xFFFE); for (cycles = 0; cycles < 5000000 && !stop; ) { if (pc == 0xFFFF || pc == 0x0000) break; step(); } fprintf(stderr, " matrix: row0=(%d,%d,%d) row1=(%d,%d,%d) row2=(%d,%d,%d)\n", (int16_t)(mem[0x78] | (mem[0x79]<<8)), (int16_t)(mem[0x7A] | (mem[0x7B]<<8)), (int16_t)(mem[0x7C] | (mem[0x7D]<<8)), (int16_t)(mem[0x7E] | (mem[0x7F]<<8)), (int16_t)(mem[0x80] | (mem[0x81]<<8)), (int16_t)(mem[0x82] | (mem[0x83]<<8)), (int16_t)(mem[0x84] | (mem[0x85]<<8)), (int16_t)(mem[0x86] | (mem[0x87]<<8)), (int16_t)(mem[0x88] | (mem[0x89]<<8))); // Reset cursor to LA7E0 = $A800 (= clean dispatch start). uint16_t dispatcherEntry = (uint16_t)(mem[0xA7E0] | (mem[0xA7E1] << 8)); mem[0x8B] = (uint8_t)( dispatcherEntry & 0xFF); mem[0x8C] = (uint8_t)((dispatcherEntry >> 8) & 0xFF); // Clear in-progress flags so ProcessScenery starts fresh. mem[0x08F3] = 0; mem[0x090A] = 0; mem[0x08A9] = 0; mem[0x08C4] = 0; mem[0x008A] = 0; // Invalidate HEADER section cache so demand-loads fire (= match // port's sceneryCacheInvalidated path). mem[0x08EA] = mem[0x08EB] = mem[0x08EC] = mem[0x08ED] = 0; // Reset $35/$36 (L631D base cache) so first $07/$24 forces // a full base recompute. mem[0x35] = 0; mem[0x36] = 0; fprintf(stderr, " cursor LA7E0 = $%04X\n", dispatcherEntry); // Find dispatcher fetch instruction (= the LDA ($8B),Y in // SceneryInterpreterStep). Same heuristic as sceneryMode. uint16_t fetchPC = 0; for (int i = 0x6000; i < 0xB400; i++) { if (mem[i] == 0xB1 && mem[i+1] == 0x8B && mem[i+2] == 0x30 && mem[i+4] == 0xC9 && mem[i+5] == 0x46 && mem[i+6] == 0x30) { fetchPC = (uint16_t)i; break; } } fprintf(stderr, " scenery fetch instruction at $%04X\n", fetchPC); // Run the chunk5 main-loop sequence: SetupViewProjection ($6000), // FlipPagesFillViewport ($6003), ProcessScenery ($6006). The // FlipPages pass is a SECOND scenery interpreter run on $0A78 // data (= boot pre-render), which can draw water/horizon polygons // in HIRES_VIOLET that ProcessScenery never touches. reg_s = 0xFF; flag_n = flag_v = flag_d = flag_z = flag_c = 0; flag_i = 1; long drawCount = 0; long opCount = 0; long fetchCount = 0; int prevPCWasNotDraw = 1; printf("# fs2trace --draws: chunk5 DrawColorLine sequence (3-pass)\n"); const char *trace = getenv("FS2TRACE_DRAWS_TRACE"); const uint16_t passes[3] = { 0x6000, 0x6003, 0x6006 }; const char *passName[3] = { "SetupViewProjection", "FlipPagesFillViewport", "ProcessScenery" }; for (int passIdx = 0; passIdx < 3; passIdx++) { stop = 0; reg_a = reg_x = reg_y = 0; pc = passes[passIdx]; push16(0xFFFE); fprintf(stderr, " ===== %s ($%04X) =====\n", passName[passIdx], pc); long passStartOps = opCount; long passStartDraws = drawCount; uint16_t lastPC = 0; for (cycles = 0; cycles < 1000000000 && !stop; ) { if (pc == 0xFFFF || pc == 0x0000) { fprintf(stderr, " hit sentinel pc=$%04X (last pc=$%04X) at op %ld\n", pc, lastPC, opCount); break; } if (fetchPC != 0 && pc == fetchPC) { fetchCount++; if (trace != NULL) { uint16_t cur = mem[0x8B] | (mem[0x8C] << 8); fprintf(stderr, " fetch #%ld cur=$%04X op=$%02X $29=$%02X\n", fetchCount, cur, mem[cur], mem[0x29]); } // FS2TRACE_VTX_DUMP: log V1/V2 (= mem[$CB..$D0] / // mem[$D4..$D9]) on every fetch after a $40/$41 // emit, so we can diff our port's transform output // against the authentic FS2 trace per-vertex. // FS2TRACE_FRAME_DUMP: log state at every $24/$07 // op fetch + after it executes, so port-vs-MAME // frame-setup divergence can be located precisely. if (getenv("FS2TRACE_FRAME_DUMP") != NULL) { static uint8_t prevFrameOp = 0; static uint16_t prevFrameCur = 0; if (prevFrameOp == 0x24 || prevFrameOp == 0x07) { fprintf(stderr, " POST-$%02X(var=$%02X) cam=(%d,%d,%d) base=[%02X%02X%02X,%02X%02X%02X,%02X%02X%02X] $19=$%02X $1C=$%02X $1F=$%02X $35=$%02X $36=$%02X (was cur=$%04X)\n", prevFrameOp, mem[prevFrameCur + 1], (int16_t)(mem[0x66] | (mem[0x67] << 8)), (int16_t)(mem[0x68] | (mem[0x69] << 8)), (int16_t)(mem[0x6A] | (mem[0x6B] << 8)), mem[0x4A], mem[0x4B], mem[0x4C], mem[0x4D], mem[0x4E], mem[0x4F], mem[0x50], mem[0x51], mem[0x52], mem[0x19], mem[0x1C], mem[0x1F], mem[0x35], mem[0x36], prevFrameCur); } uint16_t cur = mem[0x8B] | (mem[0x8C] << 8); if (mem[cur] == 0x24 || mem[cur] == 0x07) { prevFrameOp = mem[cur]; prevFrameCur = cur; } else { prevFrameOp = 0; } } if (getenv("FS2TRACE_VTX_DUMP") != NULL) { static uint8_t prevOp = 0; if (prevOp == 0x40 || prevOp == 0x41 || prevOp == 0x42) { int16_t v1x = (int16_t)(mem[0xCB] | (mem[0xCC] << 8)); int16_t v1y = (int16_t)(mem[0xCD] | (mem[0xCE] << 8)); int16_t v1z = (int16_t)(mem[0xCF] | (mem[0xD0] << 8)); int16_t v2x = (int16_t)(mem[0xD4] | (mem[0xD5] << 8)); int16_t v2y = (int16_t)(mem[0xD6] | (mem[0xD7] << 8)); int16_t v2z = (int16_t)(mem[0xD8] | (mem[0xD9] << 8)); fprintf(stderr, " POST-$%02X V1=(%6d,%6d,%6d) V2=(%6d,%6d,%6d) cam=(%d,%d,%d) base=[%02X%02X%02X,%02X%02X%02X,%02X%02X%02X]\n", prevOp, v1x, v1y, v1z, v2x, v2y, v2z, (int16_t)(mem[0x66] | (mem[0x67] << 8)), (int16_t)(mem[0x68] | (mem[0x69] << 8)), (int16_t)(mem[0x6A] | (mem[0x6B] << 8)), mem[0x4A], mem[0x4B], mem[0x4C], mem[0x4D], mem[0x4E], mem[0x4F], mem[0x50], mem[0x51], mem[0x52]); } uint16_t cur = mem[0x8B] | (mem[0x8C] << 8); prevOp = mem[cur]; } } // Trace flow inside chunk3 (= 64K patched callbacks). if (trace != NULL && pc >= 0xD300 && pc < 0xF400 && opCount > 2400) { fprintf(stderr, " chunk3 pc=$%04X A=$%02X X=$%02X Y=$%02X SP=$%02X\n", pc, reg_a, reg_x, reg_y, reg_s); } // Detect tight 6502 loops — same PC for many consecutive // ops. If we sit at a single PC for >50k ops, log + bail. static uint16_t stuckPC = 0; static long stuckCount = 0; if (pc == stuckPC) { stuckCount++; if (stuckCount == 50000) { fprintf(stderr, " STUCK at pc=$%04X for 50k ops at op=%ld (cur=$%04X)\n", pc, opCount, mem[0x8B] | (mem[0x8C] << 8)); break; } } else { stuckPC = pc; stuckCount = 1; } lastPC = pc; // Track active hires color (= last A passed to SetPixelDrawMode). // SetPixelDrawMode is at the JMP target stored at $6010-$6011 // (= chunk5 trampoline `JMP SetPixelDrawMode` at $600F). static uint16_t setPixelDrawModePC = 0xFFFF; static uint8_t curHiresColor = 0xFF; if (setPixelDrawModePC == 0xFFFF) { // Resolve once: chunk5 has `4C lo hi` at $600F. if (mem[0x600F] == 0x4C) { setPixelDrawModePC = (uint16_t)(mem[0x6010] | (mem[0x6011] << 8)); fprintf(stderr, " SetPixelDrawMode resolved at $%04X\n", setPixelDrawModePC); } } if (setPixelDrawModePC != 0xFFFF && pc == setPixelDrawModePC) { curHiresColor = reg_a; if (getenv("FS2TRACE_LOG_COLORS") != NULL) { fprintf(stderr, " SetPixelDrawMode A=$%02X (hires color %d) at op=%ld\n", reg_a, reg_a & 0x07, opCount); } } // Track SetEvenAndOddColorsAndPrepRowRoutine -- the SPAN // fill color setter. On entry X = hires color code. if (setEvenAndOddPC != 0 && pc == setEvenAndOddPC) { curHiresColor = reg_x; if (getenv("FS2TRACE_LOG_COLORS") != NULL) { fprintf(stderr, " SetEvenAndOddColors X=$%02X (hires color %d) at op=%ld\n", reg_x, reg_x & 0x07, opCount); } } // Track DrawColorSpan calls (= horizontal span fill) so we can // see polygon FILLS in addition to line draws. ZP $E9 = start col, // A on entry = end col, $27 = right edge. static int prevPCWasNotSpan = 1; if (drawColorSpanPC != 0 && pc == drawColorSpanPC && prevPCWasNotSpan) { uint8_t startCol = mem[0xE9]; uint8_t endCol = reg_a; uint8_t row = mem[0xB1]; printf("span %4ld col=$%02X: row=%3d startCol=%3d endCol=%3d\n", drawCount, curHiresColor, row, startCol, endCol); prevPCWasNotSpan = 0; } else if (pc != drawColorSpanPC) { prevPCWasNotSpan = 1; } if (pc == drawColorLinePC && prevPCWasNotDraw) { // First instruction of DrawColorLine. Capture // the line endpoints + V1/V2. uint8_t x1 = mem[0xE9]; uint8_t y1 = mem[0xEA]; uint8_t x2 = mem[0xEB]; uint8_t y2 = mem[0xEC]; int16_t v1x = (int16_t)(mem[0xCB] | (mem[0xCC] << 8)); int16_t v1y = (int16_t)(mem[0xCD] | (mem[0xCE] << 8)); int16_t v1z = (int16_t)(mem[0xCF] | (mem[0xD0] << 8)); int16_t v2x = (int16_t)(mem[0xD4] | (mem[0xD5] << 8)); int16_t v2y = (int16_t)(mem[0xD6] | (mem[0xD7] << 8)); int16_t v2z = (int16_t)(mem[0xD8] | (mem[0xD9] << 8)); drawCount++; uint16_t curAt = mem[0x8B] | (mem[0x8C] << 8); printf("draw %4ld cur=$%04X col=$%02X: (%3d,%3d)-(%3d,%3d) V1=(%6d,%6d,%6d) V2=(%6d,%6d,%6d) base=[%02X%02X%02X,%02X%02X%02X,%02X%02X%02X] cam=[%02X%02X,%02X%02X,%02X%02X]\n", drawCount, curAt, curHiresColor, x1, y1, x2, y2, v1x, v1y, v1z, v2x, v2y, v2z, mem[0x4A], mem[0x4B], mem[0x4C], mem[0x4D], mem[0x4E], mem[0x4F], mem[0x50], mem[0x51], mem[0x52], mem[0x67], mem[0x66], mem[0x69], mem[0x68], mem[0x6B], mem[0x6A]); prevPCWasNotDraw = 0; } else if (pc != drawColorLinePC) { prevPCWasNotDraw = 1; } opCount++; uint16_t prevPC = pc; step(); if (stop) { fprintf(stderr, " step() set stop=1 prevPC=$%04X newPC=$%04X opCount=%ld\n", prevPC, pc, opCount); break; } } fprintf(stderr, " pass-%s: %ld ops, %ld draws\n", passName[passIdx], opCount - passStartOps, drawCount - passStartDraws); } fprintf(stderr, "Total: %ld 6502 ops, %ld dispatch fetches, %ld DrawColorLine calls (final pc=$%04X cur=$%04X)\n", opCount, fetchCount, drawCount, pc, mem[0x8B] | (mem[0x8C] << 8)); return 0; } // --xform: run chunk5 TransformVertex7EBC ($7EBC) on the original // binary (= source-faithful, not the captured patched chunk5). Lets // us validate the C transliteration in chunk5Transform.c against the // asm for arbitrary inputs. // // Usage: // fs2trace --xform [state_overrides...] // // Inputs encode the 4 vertex bytes that follow the opcode in the // scenery stream. The routine reads them via ($8B),y and computes // transformed XYZ at $D4..$D9 (or $CB..$D0). static int xformMode(int argc, char **argv) { if (argc < 3) { fprintf(stderr, "usage: %s --xform [ram.bin]\n", argv[0]); fprintf(stderr, " Loads RAM image (default tmp/capture_boot.bin) for state\n" " (matrix/base/camera), then overlays the ORIGINAL chunk5\n" " binary at $6000-$B3DF (so $7EBC has source bytes).\n" " Sets cursor $8B/$8C to stream_addr-1 (so opcode is at\n" " ($8B),0 -- the typical layout when the dispatcher would\n" " invoke a vertex-emit handler at that opcode), and calls\n" " TransformVertex7EBC with Y=9 (V2 destination).\n" " Prints V2 (=$D4..$D9).\n"); return 2; } long streamAddr = strtol(argv[2], NULL, 0); const char *ramPath = (argc > 3) ? argv[3] : "/home/scott/claude/flight/tmp/capture_boot.bin"; FILE *rf = fopen(ramPath, "rb"); if (rf == NULL) { fprintf(stderr, "cannot open %s\n", ramPath); return 1; } if (fread(mem, 1, MEM_SIZE, rf) != MEM_SIZE) { fprintf(stderr, "RAM image short read\n"); fclose(rf); return 1; } fclose(rf); // Overlay ONLY the chunk5 code regions that don't overlap // loaded scenery sections. At boot Meigs, the loaded section // sits at ~$B280+, so we overlay $6000-$B27F (= source-faithful // chunk5 code) and keep $B280+ as the captured section data. // Also overlay all of chunk4 ($0200-$25FF) which holds the // ZPScale / cos table -- the captured RAM has it shifted // and we need the source addresses. FILE *cf = fopen("/home/scott/claude/flight/out/4_0200-25ff", "rb"); if (cf != NULL) { fread(mem + 0x0200, 1, 0x2400, cf); fclose(cf); } cf = fopen("/home/scott/claude/flight/out/5_6000-b3df", "rb"); if (cf != NULL) { fread(mem + 0x6000, 1, 0xB280 - 0x6000, cf); fclose(cf); } // Cursor: opcode at streamAddr, vertex bytes at streamAddr+1. // $8B/$8C = streamAddr (= the opcode address; ($8B),Y=0 reads // the opcode, Y=1.. reads vertex bytes -- chunk5's normal layout). mem[0x008B] = (uint8_t)( streamAddr & 0xFF); mem[0x008C] = (uint8_t)((streamAddr >> 8) & 0xFF); fprintf(stderr, " state: $8B/$8C=$%04X matrix=[%d,%d,%d/%d,%d,%d/%d,%d,%d]\n", (int)streamAddr, (int)(int16_t)(mem[0x78] | (mem[0x79] << 8)), (int)(int16_t)(mem[0x7A] | (mem[0x7B] << 8)), (int)(int16_t)(mem[0x7C] | (mem[0x7D] << 8)), (int)(int16_t)(mem[0x7E] | (mem[0x7F] << 8)), (int)(int16_t)(mem[0x80] | (mem[0x81] << 8)), (int)(int16_t)(mem[0x82] | (mem[0x83] << 8)), (int)(int16_t)(mem[0x84] | (mem[0x85] << 8)), (int)(int16_t)(mem[0x86] | (mem[0x87] << 8)), (int)(int16_t)(mem[0x88] | (mem[0x89] << 8))); fprintf(stderr, " base: ($4A..$4C)=%02X%02X%02X ($4D..$4F)=%02X%02X%02X ($50..$52)=%02X%02X%02X\n", mem[0x4A], mem[0x4B], mem[0x4C], mem[0x4D], mem[0x4E], mem[0x4F], mem[0x50], mem[0x51], mem[0x52]); fprintf(stderr, " cam ($66..$6B): %02X %02X %02X %02X %02X %02X\n", mem[0x66], mem[0x67], mem[0x68], mem[0x69], mem[0x6A], mem[0x6B]); fprintf(stderr, " vertex bytes at $%04X: %02X %02X %02X %02X %02X\n", (int)streamAddr, mem[streamAddr+0], mem[streamAddr+1], mem[streamAddr+2], mem[streamAddr+3], mem[streamAddr+4]); // The asm's TransformVertex7EBC reads destSlot from $E5 (Y // on entry). ProcessVertex2 sets Y = $D4 before JSR // (= absolute address of V2 slot), and the routine stores // output via `sta $00,x` with X loaded from $E5. So Y on // entry = $D4 (V2) or $CB (V1). reg_y = 0xD4; reg_a = reg_x = 0; reg_s = 0xFF; flag_n = flag_v = flag_d = flag_z = flag_c = 0; flag_i = 1; stop = 0; pc = 0x7EBC; push16(0xFFFE); const char *traceXform = getenv("FS2TRACE_XFORM_TRACE"); for (cycles = 0; cycles < 5000000 && !stop; ) { if (pc == 0xFFFF || pc == 0x0000) { break; } if (traceXform != NULL && pc >= 0x7EBC && pc <= 0x80AF) { // Inside TransformVertex7EBC: print state. fprintf(stderr, "pc=$%04X A=$%02X X=$%02X Y=$%02X NVZC=%d%d%d%d " "$18-$1A=%02X%02X%02X $1B-$1D=%02X%02X%02X $1E-$20=%02X%02X%02X " "$9E$9F=%02X%02X $A2$A3=%02X%02X $2F=%02X\n", pc, reg_a, reg_x, reg_y, flag_n, flag_v, flag_z, flag_c, mem[0x18], mem[0x19], mem[0x1A], mem[0x1B], mem[0x1C], mem[0x1D], mem[0x1E], mem[0x1F], mem[0x20], mem[0x9E], mem[0x9F], mem[0xA2], mem[0xA3], mem[0x2F]); } step(); } int16_t v2x = (int16_t)((uint16_t)mem[0xD4] | ((uint16_t)mem[0xD5] << 8)); int16_t v2y = (int16_t)((uint16_t)mem[0xD6] | ((uint16_t)mem[0xD7] << 8)); int16_t v2z = (int16_t)((uint16_t)mem[0xD8] | ((uint16_t)mem[0xD9] << 8)); printf("xform stream@$%04X -> V2=(%d,%d,%d)\n", (int)streamAddr, (int)v2x, (int)v2y, (int)v2z); return 0; } // --zpscale a b: runs ScaleC2ByC4 at $1569 in chunk4 with the // supplied 16-bit signed inputs in $C2/$C3 and $C4/$C5; prints the // result. Loads chunk4 freshly from out/4_0200-25ff so the address // matches the binary (vs the captured RAM which is patched). static int zpscaleMode(int argc, char **argv) { if (argc < 4) { fprintf(stderr, "usage: %s --zpscale \n", argv[0]); return 2; } long a = strtol(argv[2], NULL, 0); long b = strtol(argv[3], NULL, 0); loadOriginalChunks(); uint16_t ua = (uint16_t)((a < 0) ? (a + 0x10000) : a); uint16_t ub = (uint16_t)((b < 0) ? (b + 0x10000) : b); mem[0xC2] = (uint8_t)( ua & 0xFF); mem[0xC3] = (uint8_t)((ua >> 8) & 0xFF); mem[0xC4] = (uint8_t)( ub & 0xFF); mem[0xC5] = (uint8_t)((ub >> 8) & 0xFF); // ScaleC2ByC4 lives at $1565 (chunk4.s line 1565); ZPScale's // wrapper at $1544 handles the $C2/$C4 marshalling for an // arbitrary output address. We've already populated $C2..$C5 // directly so we call ScaleC2ByC4 ($1565) and pull the // result out of A:X via the trampoline below. reg_a = reg_x = reg_y = 0; reg_s = 0xFF; flag_n = flag_v = flag_d = flag_z = flag_c = 0; flag_i = 1; stop = 0; pc = 0x1569; push16(0xFFFE); const char *traceEnv = getenv("FS2TRACE_ZPSCALE_TRACE"); for (cycles = 0; cycles < 1000000 && !stop; ) { if (pc == 0xFFFF || pc == 0x0000) { break; } if (traceEnv != NULL) { fprintf(stderr, "pc=$%04X A=$%02X X=$%02X Y=$%02X C=%d " "C2=$%02X C3=$%02X C4=$%02X C5=$%02X " "A7=$%02X A8=$%02X\n", pc, reg_a, reg_x, reg_y, flag_c, mem[0xC2], mem[0xC3], mem[0xC4], mem[0xC5], mem[0xA7], mem[0xA8]); } step(); } // Result: A is low byte, X is high byte. int16_t result = (int16_t)((uint16_t)reg_a | ((uint16_t)reg_x << 8)); printf("ScaleC2ByC4(%ld, %ld) = %d\n", a, b, result); return 0; } // --l177b A X [ramfile]: probe chunk4 L177B (cos lookup with sub-byte // interpolation) in isolation. Returns the int16 result (A:X reg). static int l177bMode(int argc, char **argv) { if (argc < 4) { fprintf(stderr, "usage: %s --l177b [ram.bin]\n", argv[0]); return 2; } long aIn = strtol(argv[2], NULL, 0); long xIn = strtol(argv[3], NULL, 0); if (argc > 4) { FILE *rf = fopen(argv[4], "rb"); if (rf == NULL) { fprintf(stderr, "cannot open %s\n", argv[4]); return 1; } if (fread(mem, 1, MEM_SIZE, rf) != MEM_SIZE) { fprintf(stderr, "RAM image short read\n"); fclose(rf); return 1; } fclose(rf); } else { loadOriginalChunks(); } reg_a = (uint8_t)(aIn & 0xFF); reg_x = (uint8_t)(xIn & 0xFF); reg_y = 0; reg_s = 0xFF; flag_n = flag_v = flag_d = flag_z = flag_c = 0; flag_i = 1; stop = 0; // L177B at $177B; L1778 (sin entry) is 3 bytes earlier. // When running against a capture, FS2's boot patches shifted // chunk4 code by 2 bytes, so the entry points are at $177D / // $177A respectively. FS2TRACE_PC overrides explicitly. const char *l1778Env = getenv("FS2TRACE_USE_L1778"); const char *pcEnv = getenv("FS2TRACE_PC"); if (pcEnv != NULL) { pc = (uint16_t)strtol(pcEnv, NULL, 0); } else { pc = (l1778Env != NULL) ? 0x1778 : 0x177B; } push16(0xFFFE); const char *traceEnv2 = getenv("FS2TRACE_L177B_TRACE"); for (cycles = 0; cycles < 1000000 && !stop; ) { if (pc == 0xFFFF || pc == 0x0000) { break; } if (traceEnv2 != NULL) { fprintf(stderr, "pc=$%04X A=$%02X X=$%02X Y=$%02X N=%d Z=%d C=%d\n", pc, reg_a, reg_x, reg_y, flag_n, flag_z, flag_c); } step(); } int16_t result = (int16_t)((uint16_t)reg_a | ((uint16_t)reg_x << 8)); printf("L177B(%ld, %ld) = %d\n", aIn, xIn, result); return 0; } int main(int argc, char **argv) { // --matrix mode: chunk5 SetupViewProjection oracle. if (argc >= 2 && strcmp(argv[1], "--matrix") == 0) { return matrixMode(argc, argv); } // --zpscale: probe chunk4 ScaleC2ByC4 in isolation. if (argc >= 2 && strcmp(argv[1], "--zpscale") == 0) { return zpscaleMode(argc, argv); } // --l177b: probe chunk4 cos lookup. if (argc >= 2 && strcmp(argv[1], "--l177b") == 0) { return l177bMode(argc, argv); } // --scenery: count chunk5 ProcessScenery DrawColorSpan calls. if (argc >= 2 && strcmp(argv[1], "--scenery") == 0) { return sceneryMode(argc, argv); } // --xform: chunk5 TransformVertex7EBC oracle. if (argc >= 2 && strcmp(argv[1], "--xform") == 0) { return xformMode(argc, argv); } // --draws: full chunk5 DrawColorLine trace. if (argc >= 2 && strcmp(argv[1], "--draws") == 0) { return drawsMode(argc, argv); } // --nibble in.bin out.bin: run the chunk3 nibble decoder on // raw input bytes, emit encoded output. Used to verify the C // port matches what the 6502 emulator produces. if (argc == 4 && strcmp(argv[1], "--nibble") == 0) { FILE *in = fopen(argv[2], "rb"); FILE *out = fopen(argv[3], "wb"); if (in == NULL || out == NULL) { fprintf(stderr, "could not open files\n"); return 1; } fseek(in, 0, SEEK_END); size_t sz = (size_t)ftell(in); fseek(in, 0, SEEK_SET); uint8_t *buf = malloc(sz); if (fread(buf, 1, sz, in) != sz) { fprintf(stderr, "short read\n"); return 1; } fclose(in); uint8_t output[0x3E00 - 0x3B60]; uint8_t cksum[2]; int consumed = nibbleDecode(buf, output, cksum); fwrite(output, 1, sizeof(output), out); fwrite(cksum, 1, 2, out); fclose(out); fprintf(stderr, "consumed %d input bytes, wrote %zu output bytes\n", consumed, sizeof(output) + 2); free(buf); return 0; } const char *chunk4Path = "/home/scott/claude/flight/out/4_0200-25ff"; const char *chunk5Path = "/home/scott/claude/flight/out/5_6000-b3df"; const char *chunk3Path = "/home/scott/claude/flight/out/3_d300-f3ff"; const char *chunk2Path = "/home/scott/claude/flight/out/2_f600-fbff"; const char *diskPath = (argc > 1) ? argv[1] : "/home/scott/claude/flight/orig/flight simulator 2 with scenery PRODOS (san inc pack).po"; const char *blocksPath = (argc > 2) ? argv[2] : "/home/scott/claude/flight/downloads/scenery/extracted/A2.SDS1.blocks"; // Default entry: LoadSceneryFile1 (descriptor $0625, 6 sectors from // sector $25). LoadSceneryFile0 ($A66B) reads sector $22 first and on // four disks (SDS1, SD1, SD3, SD7A) the resulting LA7E0 word is a // low-memory address (e.g. $0003) so the bootstrap copy stomps the // stack page and the run halts. LoadSceneryFile1 sources LA7E0 from // a different file region and lands every disk on a sane chunk5 // address, so all 13 traces complete to the $FFFF sentinel. uint16_t entryPC = (argc > 3) ? (uint16_t)strtol(argv[3], NULL, 0) : 0xA674; memset(mem, 0, MEM_SIZE); if (!loadChunk(chunk4Path, 0x0200, 0x2400)) return 1; if (!loadChunk(chunk5Path, 0x6000, 0x53E0)) return 1; if (!loadChunk(chunk3Path, 0xD300, 0x2100)) return 1; if (!loadChunk(chunk2Path, 0xF600, 0x0600)) return 1; // Disk image. FILE *df = fopen(diskPath, "rb"); if (df == NULL) { fprintf(stderr, "cannot open %s\n", diskPath); return 1; } fseek(df, 0, SEEK_END); diskSize = (size_t)ftell(df); fseek(df, 0, SEEK_SET); diskImage = malloc(diskSize); if (diskImage == NULL || fread(diskImage, 1, diskSize, df) != diskSize) { fprintf(stderr, "cannot read %s\n", diskPath); fclose(df); return 1; } fclose(df); fprintf(stderr, "disk: %s (%zu bytes)\n", diskPath, diskSize); // Patch the SmartPort entry: when the FS2 code calls $C70D it // would normally land in the absent firmware; we trap the read // by hooking pc==$C70D inside step(). // Provide a sentinel BRK so any unintended fall-through halts. mem[0xC70D] = 0x00; // 64K mode patch: chunk4 has six L1EAD..L1EC1 thunks that the // 64K patch table rewrites to JMP into chunk3's // SceneryLoaderEntry1..7. We replicate those JMP targets // directly so the loader path actually reaches the chunk3 // implementations (Apply64KPatchTable would otherwise need to // be run too). struct { uint16_t thunk; uint16_t entry; } patches[] = { { 0x1EAD, 0xD3D0 }, // SceneryLoaderEntry1 { 0x1EB0, 0xD3D3 }, // SceneryLoaderEntry2 { 0x1EB3, 0xD3D6 }, // SceneryLoaderEntry3 { 0x1EB6, 0xD3D9 }, // SceneryLoaderEntry4 { 0x1EB9, 0xD3DC }, // SceneryLoaderEntry5 { 0x1EBC, 0xD3DF }, // SceneryLoaderEntry6 { 0x1EC1, 0xD3E2 }, // SceneryLoaderEntry7 }; for (size_t i = 0; i < sizeof(patches) / sizeof(patches[0]); i++) { uint16_t t = patches[i].thunk; uint16_t e = patches[i].entry; mem[t] = 0x4C; // JMP abs mem[t + 1] = (uint8_t)(e & 0xFF); mem[t + 2] = (uint8_t)((e >> 8) & 0xFF); } // Also flag 64K mode (chunk4 L1E07). mem[0x1E07] = 0x01; // Populate ReadBlockDataBuffer (chunk3 $D575) from the .blocks // file produced by prodosextract. The list is a sequence of // 16-bit little-endian ProDOS block numbers, one per logical // file block. ReadBlockDataBuffer wants block-low bytes at // offset 0..255 and block-high bytes at offset 256..511, so // we split the entries on load. // LD5C8 self-extension stub. The real FS2 boot decodes a // chunk of code into LD5C8 = ReadBlockDataBuffer + 83 via the // protected-disk loader path (SceneryReadDecoded -> nibble // decode -> JSR into the decoded code). Without simulating // that whole flow, JSR LD5C8 from chunk3 L416 / L514 lands on // zero (BRK) and halts the emulator. We patch LD5C8 with a // CLC; RTS so those calls are harmless no-ops -- enough to // let the rest of the loader proceed end-to-end. mem[0xD5C8] = 0x18; // CLC mem[0xD5C9] = 0x60; // RTS // Block-list cap: chunk3's ReadBlockDataBuffer ($D575) holds // 256 low bytes + 256 high bytes = 512 bytes total ($D575 + // $D675). FS2 sector counters can index well past the boot's // first 16 blocks (e.g. HEADER's section $76 -> entry 240), so // we populate the FULL 256 entries. LD5C8 (= buf+83) is // overwritten at runtime by the FS2 boot loader anyway. FILE *bf = fopen(blocksPath, "rb"); if (bf == NULL) { fprintf(stderr, "warning: cannot open %s; falling back to identity map\n", blocksPath); for (int i = 0; i < 256; i++) { mem[0xD575 + i] = (uint8_t)(i & 0xFF); } } else { uint8_t buf[2]; int i = 0; while (i < 256 && fread(buf, 1, 2, bf) == 2) { mem[0xD575 + i] = buf[0]; mem[0xD575 + 256 + i] = buf[1]; i++; } fclose(bf); fprintf(stderr, "loaded %d block-list entries from %s\n", i, blocksPath); } // Run one or more entry points. Default is a single entry // (back-compat with previous fs2trace usage). With --chain, // run File1+File2+File3+File4 in sequence: each one's RAM // changes accumulate, so LA7E0 ends up pointing at the full // loaded scenery instead of just the partial File1 result. // With --boot, start at MainGameEntry ($ABBA) so // Apply64KPatchTable rewrites the PatchSlot_* dispatch slots // before any scenery loader runs. PromptColorOrBW is stubbed // (default = colour) so the trace doesn't block on a key. bool chainMode = (getenv("FS2TRACE_CHAIN") != NULL); bool bootMode = (getenv("FS2TRACE_BOOT") != NULL); // FS2TRACE_CITY=N: load city scenery file N then run MainLoop // so its dispatcher's $0D HEADER demand-loads fire. Cities are: // N=0 -> LoadSceneryFile0 ($A66B) // N=1 -> LoadSceneryFile1 ($A674) (Chicago) // N=2 -> LoadSceneryFile2 ($A67D) (LA) // N=3 -> LoadSceneryFile3 ($A686) (Seattle) // N=4 -> LoadSceneryFile4 ($A68F) (NY) const char *cityEnv = getenv("FS2TRACE_CITY"); bool cityMode = (cityEnv != NULL); uint16_t entries[8]; int numEntries = 0; if (cityMode) { int n = (int)strtol(cityEnv, NULL, 0); static const uint16_t cityEntry[5] = { 0xA66B, 0xA674, 0xA67D, 0xA686, 0xA68F }; if (n >= 0 && n <= 4) { // Sequence: full game init → load city's scenery // → init dispatcher pointer → run interpreter // once. We bypass MainLoop because its // PatchSlot_FrameSync resets LA7E0 back to the // WW1 dispatcher; instead we directly invoke // LoadDispatcherPointer + L6006 (jmp // ProcessScenery), so HEADER demand-loads fire // against the city dispatcher we just loaded. entries[numEntries++] = 0xABBA; // MainGameEntry init entries[numEntries++] = cityEntry[n]; // Load city's dispatcher into LA7E0+ entries[numEntries++] = 0xA61B; // LoadDispatcherPointer ($8B = LA7E0) entries[numEntries++] = 0x6006; // jmp ProcessScenery } else { fprintf(stderr, "FS2TRACE_CITY: invalid index %d (must be 0-4)\n", n); return 1; } } else if (bootMode) { entries[numEntries++] = 0xABBA; // MainGameEntry } else if (chainMode) { entries[numEntries++] = 0xA674; // LoadSceneryFile1 entries[numEntries++] = 0xA67D; // LoadSceneryFile2 entries[numEntries++] = 0xA686; // LoadSceneryFile3 entries[numEntries++] = 0xA68F; // LoadSceneryFile4 } else { entries[numEntries++] = entryPC; } FILE *trace = (getenv("FS2TRACE_PCS") != NULL) ? fopen("tmp/fs2trace.pcs", "w") : NULL; int totalCycles = 0; for (int e = 0; e < numEntries; e++) { // Initialise machine state for this entry. Memory is // preserved across entries (the whole point of chain // mode) but registers and stack are reset. reg_a = reg_x = reg_y = 0; reg_s = 0xFF; flag_n = flag_v = flag_d = flag_z = flag_c = 0; flag_i = 1; stop = 0; pc = entries[e]; // Push a sentinel return address ($FFFF). When the // entry RTSes, the next fetch lands on $0000 (BRK) // which halts cleanly. push16(0xFFFE); uint16_t lastPCs[16] = { 0 }; int lastIdx = 0; int sawSmartPort = 0; int sawFetchSector = 0; int blocksBefore = traceCount; int entryStart = totalCycles; // Boot mode runs the full chunk5 main loop, which // never returns -- we need a generous cycle budget so // demand-driven scenery loads (triggered by HEADER // opcodes during ProcessScenery) get a chance to fire. // Override with FS2TRACE_CYCLES if needed. const char *cycleEnv = getenv("FS2TRACE_CYCLES"); int cycleLimit = bootMode ? 200000000 : 5000000; if (cycleEnv != NULL) { cycleLimit = (int)strtol(cycleEnv, NULL, 0); } // FS2TRACE_INIT_X / FS2TRACE_INIT_Z: pre-seed the // aircraft scenery position (upper-16 of zero-page // $5A-$65) before MainGameEntry runs. This forces the // scenery dispatcher to fire HEADER demand-loads for the // section containing those coords, so we can capture // city-specific RAM dumps. Values are 16-bit unsigned // (the upper word of the 24-bit scenery coordinate). const char *initXEnv = getenv("FS2TRACE_INIT_X"); const char *initZEnv = getenv("FS2TRACE_INIT_Z"); if (initXEnv != NULL) { uint16_t x = (uint16_t)strtol(initXEnv, NULL, 0); mem[0x5C] = (uint8_t)(x & 0xFF); mem[0x5D] = (uint8_t)((x >> 8) & 0xFF); fprintf(stderr, "FS2TRACE_INIT_X: $5C/$5D = $%04X\n", x); } if (initZEnv != NULL) { uint16_t z = (uint16_t)strtol(initZEnv, NULL, 0); mem[0x64] = (uint8_t)(z & 0xFF); mem[0x65] = (uint8_t)((z >> 8) & 0xFF); fprintf(stderr, "FS2TRACE_INIT_Z: $64/$65 = $%04X\n", z); } // Re-apply the position patch every frame at the // dispatcher entry point. Hook at $A61B // (LoadDispatcherPointer) -- this runs AFTER chunk5's // per-frame IntegratePhysicsStep ($87A2) but BEFORE the // dispatcher reads $5C/$5D for the cull check. Hooking // earlier (e.g. $877F MainLoop) doesn't work because // IntegratePhysicsStep normalises position cells. uint16_t patchX = (initXEnv != NULL) ? (uint16_t)strtol(initXEnv, NULL, 0) : 0; uint16_t patchZ = (initZEnv != NULL) ? (uint16_t)strtol(initZEnv, NULL, 0) : 0; for (cycles = 0; cycles < cycleLimit && !stop; ) { if (pc == 0xC70D) { sawSmartPort++; } if (pc == 0x1EC6) { sawFetchSector++; } if (pc == 0xA61B) { if (initXEnv != NULL) { mem[0x5C] = (uint8_t)(patchX & 0xFF); mem[0x5D] = (uint8_t)((patchX >> 8) & 0xFF); } if (initZEnv != NULL) { mem[0x64] = (uint8_t)(patchZ & 0xFF); mem[0x65] = (uint8_t)((patchZ >> 8) & 0xFF); } } // FS2TRACE_FORCE_INBOUNDS: short-circuit the // "beyond bounds" cull-redirect path at $6E6F so // every $13/$14 and $20/$21/$22 cull falls // through. Used to force every section's $0D // HEADER to fire during boot, so the resulting // RAM dump contains every reachable scenery // section at its dispatcher-expected dest -- a // single comprehensive dump per region without // needing to fly the camera there. if (pc == 0x6E6F && getenv("FS2TRACE_FORCE_INBOUNDS") != NULL) { // TestSceneryRangeReject does pla*4 + // jmp L00A5 to take the BEYOND path. // Replace with TestSceneryRangeOk's RTS // so the cull returns "in bounds" // instead -- caller advances past the // cull record. pc = 0x6E6E; } if (pc == 0x6006) { static int psHits = 0; psHits++; if (psHits <= 5) { fprintf(stderr, "ProcessScenery (L6006) hit #%d: $5C/$5D=$%02X%02X $64/$65=$%02X%02X $8B/$8C=$%02X%02X\n", psHits, mem[0x5D], mem[0x5C], mem[0x65], mem[0x64], mem[0x8C], mem[0x8B]); } } if (pc == 0xA63A) { static int hdrHits = 0; hdrHits++; if (hdrHits <= 20) { fprintf(stderr, "SceneryHeaderLoadIfMiss hit #%d: sectionId=$%02X count=$%02X dest=$%02X%02X cacheIdx=$%02X cache=$%02X%02X%02X%02X\n", hdrHits, mem[0x08E5], mem[0x08E6], mem[0x08E8], mem[0x08E7], mem[0x08E9], mem[0x08EA], mem[0x08EB], mem[0x08EC], mem[0x08ED]); } } if (pc == 0xA6CD) { static int runHits = 0; runHits++; if (runHits <= 20) { fprintf(stderr, "SceneryHeaderRunSection hit #%d: $9E=$%02X $9F=$%02X L1E01=$%02X L1E03=$%02X%02X L1E07=$%02X L1E09=$%02X\n", runHits, mem[0x9E], mem[0x9F], mem[0x1E01], mem[0x1E04], mem[0x1E03], mem[0x1E07], mem[0x1E09]); } } if (pc == 0xA6DF) { static int loopHits = 0; loopHits++; if (loopHits <= 5) { fprintf(stderr, "SceneryHeaderRunSectionLoop (jsr L1EAD): $9F=$%02X L1E01=$%02X L1E07=$%02X L1E09=$%02X\n", mem[0x9F], mem[0x1E01], mem[0x1E07], mem[0x1E09]); } } if (pc == 0xA6F3) { fprintf(stderr, "SceneryHeaderRunSectionFail: L1E01=$%02X carry-set\n", mem[0x1E01]); } lastPCs[lastIdx] = pc; lastIdx = (lastIdx + 1) % 16; if (trace != NULL) { fprintf(trace, "$%04X\n", pc); } step(); if (pc == 0xFFFF) { break; } if (dumpRequested) { fprintf(stderr, "FS2TRACE_DUMP_AT_BLOCK reached after block %d\n", traceCount); break; } } totalCycles += cycles; fprintf(stderr, "entry $%04X: %d cycles, smartport=%d, fetch=%d, blocks=%d, final PC=$%04X stop=%d\n", entries[e], cycles, sawSmartPort, sawFetchSector, traceCount - blocksBefore, pc, stop); if (stop) { fprintf(stderr, " last 16 PCs: "); for (int i = 0; i < 16; i++) { int idx = (lastIdx + 15 - i) % 16; fprintf(stderr, "$%04X ", lastPCs[idx]); } fprintf(stderr, "\n"); break; } (void)entryStart; } if (trace != NULL) { fclose(trace); } // Optional: dump key memory regions where the loader deposits // data, so the caller can grep for the scenery bytecode entry. if (getenv("FS2TRACE_DUMP") != NULL) { FILE *dump = fopen("tmp/fs2trace.ram", "wb"); if (dump != NULL) { fwrite(mem, 1, MEM_SIZE, dump); fclose(dump); fprintf(stderr, "wrote tmp/fs2trace.ram (full 64K)\n"); } } // Show LA7E0 (the scenery entry pointer chunk5's // LoadDispatcherPointer reads). uint16_t la7e0 = (uint16_t)(mem[0xA7E0] | (mem[0xA7E1] << 8)); fprintf(stderr, "LA7E0 = $%04X (scenery entry pointer)\n", la7e0); fprintf(stderr, "\n%d cycles total, %d block reads.\n", totalCycles, traceCount); for (int i = 0; i < traceCount; i++) { printf(" %d: BLOCK $%04X (%d)\n", i, tracedBlocks[i], tracedBlocks[i]); } free(diskImage); return 0; }