fs2port/port/tools/fs2trace.c
2026-05-13 21:32:05 -05:00

2234 lines
117 KiB
C

// FS2 sector-read tracer.
//
// Boots a synthetic Apple //e environment with the FS2 chunks loaded
// into RAM, hooks the SmartPort entry point at $C70D, then trampolines
// into the LoadSceneryFile* entry points and watches every block read.
//
// This is NOT a full Apple //e emulator. It only implements enough of
// the 6502 instruction set + zero-page / RAM model to run the FS2
// loader code path. The Disk II / SmartPort layer is replaced with a
// single hook that:
// - logs the block number (read from RBBlockNumber at chunk4)
// - copies 512 bytes from the .dsk image at file_offset = block * 512
// - returns "success" via CLC/RTS
//
// Usage:
// fs2trace <fs2.dsk> <scenery.dsk> [entry]
// entry is one of File0..File4 (default File0). Output is one
// "BLOCK $XXXX" line per read.
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MEM_SIZE 65536
#define BLOCK_SIZE 512
#define SECTOR_SIZE 256
#define MAX_TRACE_BLKS 4096
static uint8_t mem[MEM_SIZE];
static uint8_t pc_low;
static uint16_t pc;
static uint8_t reg_a, reg_x, reg_y, reg_s;
static uint8_t flag_n, flag_v, flag_d, flag_i, flag_z, flag_c;
static int cycles;
static int stop;
// Set by FS2TRACE_DUMP_AT_BLOCK once the requested block count is
// reached. The main step loop checks this and exits cleanly without
// the noisy "last 16 PCs" trail that `stop` triggers.
static int dumpRequested;
// Disk image: scenery disk for the trace.
static uint8_t *diskImage;
static size_t diskSize;
// Port of chunk3 `SceneryNibbleDecode` ($D52D). Despite the name in
// the disassembly, this is really an ENCODER -- it takes raw input
// bytes and produces an Apple-disk-style nibble format where every
// output byte has bits 7,5,3,1 forced to 1 (the `$AA` pattern). Each
// input byte expands to two output bytes:
//
// out[0] = (in >> 1) | $AA
// out[1] = in | $AA
//
// A running EOR checksum is fed by the input stream, then emitted as
// two trailing bytes (`(cksum>>1)|$AA` then `cksum|$AA`). The original
// fills RAM from $3B60 forward and stops once the destination high
// byte hits $3E -- so the loop emits exactly `(0x3E00 - 0x3B60) / 2`
// = 720 input bytes in.
//
// Returns the number of input bytes consumed (always 720).
static int nibbleDecode(const uint8_t *src, uint8_t *dst, uint8_t *checksumOut) {
uint8_t cksum = 0;
int written = 0;
int target = 0x3E00 - 0x3B60; // bytes
int srcPos = 0;
while (written + 2 <= target) {
uint8_t b = src[srcPos];
cksum ^= b;
dst[written++] = (uint8_t)((b >> 1) | 0xAA);
dst[written++] = (uint8_t)( b | 0xAA);
srcPos++;
}
if (checksumOut != NULL) {
checksumOut[0] = (uint8_t)((cksum >> 1) | 0xAA);
checksumOut[1] = (uint8_t)( cksum | 0xAA);
}
return srcPos;
}
// Trace buffer.
static int tracedBlocks[MAX_TRACE_BLKS];
static int traceCount;
// Forward declarations.
static uint8_t rd(uint16_t addr);
static void wr(uint16_t addr, uint8_t v);
static void push(uint8_t v);
static uint8_t pop(void);
static void push16(uint16_t v);
static uint16_t pop16(void);
static uint8_t fetch(void);
static uint16_t fetch16(void);
static void setNZ(uint8_t v);
static uint8_t getP(void);
static void setP(uint8_t v);
static void step(void);
static void hookSmartPort(void);
static int loadChunk(const char *path, uint16_t addr, size_t maxLen);
static uint8_t rd(uint16_t addr) {
return mem[addr];
}
// Apple II LCBANK split: $D000-$FFFF can be either ROM, LCBANK1, or
// LCBANK2 depending on softswitches at $C080-$C08F. fs2trace treats
// the entire $0000-$FFFF as plain RAM, but FS2 boot relies on hires
// page writes NOT corrupting the chunk binaries that live at
// $D300-$F3FF (chunk3) and $F600-$FBFF (chunk2). Ignore writes to
// $D000-$FFFF so chunk binaries stay intact. This breaks Apply64K-
// PatchTable if it ever targets a high address (it doesn't -- all
// patch table addresses are in $6000-$AE00, chunk5 main).
//
// Without this guard the hires drawing loop at $1C07 (sta
// (hires_ptr1),y) corrupts $FA67 and $FA71-3 in chunk2 (the wind
// code), causing fs2trace to halt at $FA73 on undocumented opcode
// $1B before more loader iterations can run.
static void wr(uint16_t addr, uint8_t v) {
const char *watchEnv = getenv("FS2TRACE_WATCH");
if (watchEnv != NULL && mem[addr] != v) {
static int watchHits = 0;
static uint16_t watchLo = 0, watchHi = 0;
static int watchSetUp = 0;
if (!watchSetUp) {
unsigned long lo = strtoul(watchEnv, NULL, 0);
char *dash = strchr(watchEnv, '-');
unsigned long hi = (dash != NULL) ? strtoul(dash + 1, NULL, 0) : lo;
watchLo = (uint16_t)lo;
watchHi = (uint16_t)hi;
watchSetUp = 1;
}
int watchCap = 50;
const char *capEnv = getenv("FS2TRACE_WATCH_CAP");
if (capEnv != NULL) {
watchCap = (int)strtol(capEnv, NULL, 0);
}
if (addr >= watchLo && addr <= watchHi && watchHits < watchCap) {
fprintf(stderr, " watch: $%04X = $%02X (was $%02X) at PC $%04X cycles=%d\n",
addr, v, mem[addr], pc, cycles);
watchHits++;
}
}
// Protect chunk binaries from stray hires-page writes. If the
// emulator ever needs LCBANK semantics for real (e.g. a patch
// that targets $D000-$FFFF), this needs to grow into proper
// softswitch tracking.
if (addr >= 0xD000 && getenv("FS2TRACE_NO_LC_GUARD") == NULL) {
return;
}
mem[addr] = v;
}
// Zero-page-wrapping 16-bit read. For (zp),Y and (zp,X) addressing
// modes, the high byte of the pointer must come from `(zp + 1) & $FF`
// -- staying inside zero page even when zp == $FF. Without this wrap
// we'd read from $0100 (the stack) which corrupts the return address
// in subtle ways during scenery loader runs.
static uint16_t rd16zp(uint8_t zp) {
uint8_t lo = mem[zp];
uint8_t hi = mem[(uint8_t)(zp + 1)];
return (uint16_t)lo | ((uint16_t)hi << 8);
}
static void push(uint8_t v) {
mem[0x0100 + reg_s] = v;
reg_s--;
}
static uint8_t pop(void) {
reg_s++;
return mem[0x0100 + reg_s];
}
static void push16(uint16_t v) {
push((uint8_t)(v >> 8));
push((uint8_t)(v & 0xFF));
}
static uint16_t pop16(void) {
uint8_t lo = pop();
uint8_t hi = pop();
return (uint16_t)lo | ((uint16_t)hi << 8);
}
static uint8_t fetch(void) {
return mem[pc++];
}
static uint16_t fetch16(void) {
uint16_t lo = fetch();
uint16_t hi = fetch();
return lo | (hi << 8);
}
static void setNZ(uint8_t v) {
flag_n = (v & 0x80) ? 1 : 0;
flag_z = (v == 0) ? 1 : 0;
}
static uint8_t getP(void) {
return (uint8_t)((flag_n << 7) | (flag_v << 6) | 0x20 | (flag_d << 3) |
(flag_i << 2) | (flag_z << 1) | flag_c);
}
static void setP(uint8_t v) {
flag_n = (v & 0x80) ? 1 : 0;
flag_v = (v & 0x40) ? 1 : 0;
flag_d = (v & 0x08) ? 1 : 0;
flag_i = (v & 0x04) ? 1 : 0;
flag_z = (v & 0x02) ? 1 : 0;
flag_c = (v & 0x01) ? 1 : 0;
}
// Stub for chunk5 `PromptColorOrBW` ($AC3A). The real routine clears
// the viewport, draws the intro banner, and waits for the user to
// press 'A' (colour) or 'B' (black-and-white). Either choice copies
// 22 bytes from ColorModePatch ($AB65) or BWModePatch ($AB7B) into
// ColorOrBWModePatch ($0800), then returns.
//
// fs2trace can't render or read keys, so we simulate "user pressed
// A" inline: copy ColorModePatch -> $0800 and RTS to the caller.
// This unblocks the boot path so MainGameEntry can run through to
// the main loop where the patched scenery slots fire.
static void hookPromptColorOrBW(void) {
for (int i = 0; i < 22; i++) {
mem[0x0800 + i] = mem[0xAB65 + i];
}
// Standard RTS: pop return address, add 1, set PC.
uint8_t lo = pop();
uint8_t hi = pop();
pc = (uint16_t)(lo | (hi << 8)) + 1;
}
// SmartPort hook: runs when PC reaches $C70D. Reads RBBlockNumber
// (3 bytes at chunk4-defined location, but we'll read it dynamically
// from the call params) and copies the requested block from the disk
// image into the SmartPort's data buffer. The caller's return is via
// the standard SmartPort calling convention: after `jsr $C70D` the
// command byte and parameter pointer are inline; we skip past them
// before returning.
static void hookSmartPort(void) {
// Apple SmartPort calling convention:
// jsr $C70D ; or whatever entry the firmware uses
// .byte command ; here, $01 = ReadBlock
// .word param_block_addr
// The return address pushed by JSR points to the inline
// command byte. We need to read the params, do the read, then
// bump the return address past the inline data.
uint16_t retLo = pop();
uint16_t retHi = pop();
uint16_t ret = retLo | (retHi << 8); // points one before inline cmd
// 6502 jsr pushes (retAddr - 1).
ret++;
uint8_t command = mem[ret];
uint16_t paramAddr = (uint16_t)(mem[ret + 1] | (mem[ret + 2] << 8));
ret += 3;
if (command == 0x01) { // ReadBlock
// ParamBlock layout (chunk4 RBParams):
// byte 0: parameter count ($03)
// byte 1: unit number
// bytes 2-3: data buffer addr
// bytes 4-6: block number (3 bytes, 24-bit)
uint16_t bufAddr = (uint16_t)(mem[paramAddr + 2] | (mem[paramAddr + 3] << 8));
uint32_t blockNum = (uint32_t)mem[paramAddr + 4]
| ((uint32_t)mem[paramAddr + 5] << 8)
| ((uint32_t)mem[paramAddr + 6] << 16);
if (traceCount < MAX_TRACE_BLKS) {
tracedBlocks[traceCount++] = (int)blockNum;
}
// FS2TRACE_DUMP_AT_BLOCK: snapshot RAM right after the
// Nth block has been read and copied. The SD3 boot
// sequence loads 16 blocks ($0360-$036F) into $2600+
// staging via SCRU0 -> $A7E0+ before the per-frame
// PatchSlot_FrameSync starts overwriting the dispatcher
// area. Stopping at block 16 captures the freshly-built
// dispatcher + per-section geometry at $A800-$AAFF
// before frames 1+ blow it away.
const char *stopAtEnv = getenv("FS2TRACE_DUMP_AT_BLOCK");
if (stopAtEnv != NULL) {
int stopAt = (int)strtol(stopAtEnv, NULL, 0);
if (stopAt > 0 && traceCount == stopAt) {
dumpRequested = 1;
}
}
size_t off = (size_t)blockNum * BLOCK_SIZE;
// Suppress reads that target our pre-loaded
// ReadBlockDataBuffer at $D575. FS2's boot reads block
// 0 (the .po boot block) into $D575, expecting it to
// contain the scenery block list -- but the san-inc
// pack .po has a standard ProDOS boot block there
// instead. Stomping $D575 with boot-block content
// breaks every subsequent block lookup. Our pre-fill
// already has the right .blocks data; preserve it.
bool skipWrite = (bufAddr >= 0xD575 && bufAddr < 0xD575 + 1024);
if (skipWrite) {
if (getenv("FS2TRACE_VERBOSE") != NULL) {
fprintf(stderr, " read block $%04X -> $%04X (skipped: protect block list)\n",
blockNum, bufAddr);
}
} else if (off + BLOCK_SIZE <= diskSize) {
memcpy(&mem[bufAddr], &diskImage[off], BLOCK_SIZE);
} else {
memset(&mem[bufAddr], 0, BLOCK_SIZE);
}
if (!skipWrite && getenv("FS2TRACE_VERBOSE") != NULL) {
fprintf(stderr, " read block $%04X -> $%04X (first byte: $%02X)\n",
blockNum, bufAddr, mem[bufAddr]);
}
flag_c = 0; // success
reg_a = 0;
} else {
fprintf(stderr, "unsupported SmartPort command $%02X\n", command);
flag_c = 1;
}
// Return past the inline command + param pointer.
push((uint8_t)((ret - 1) >> 8));
push((uint8_t)((ret - 1) & 0xFF));
// Standard RTS path: pop and add 1.
retLo = pop();
retHi = pop();
pc = (uint16_t)((retHi << 8) | retLo) + 1;
}
// 6502 instruction step. Implements the documented opcodes used by
// FS2's loader path. Anything else trips the unknown-opcode path and
// stops the emulator with an error.
static void step(void) {
if (pc == 0xC70D || pc == 0xC700 || pc == 0xC709) {
hookSmartPort();
return;
}
if (pc == 0xAC3A) {
hookPromptColorOrBW();
return;
}
uint8_t op = fetch();
cycles++;
switch (op) {
// BRK -- treat as halt with status. Allow caller to
// disable the halt via FS2TRACE_NO_BRK_HALT (= treat
// BRK as RTS so we can chase past zero-padded chunk4
// areas without aborting the dispatcher).
case 0x00: {
if (getenv("FS2TRACE_NO_BRK_HALT") != NULL) {
uint8_t rl = pop();
uint8_t rh = pop();
pc = (uint16_t)((rh << 8) | rl) + 1;
break;
}
fprintf(stderr, "BRK at $%04X cycles=%d\n", (uint16_t)(pc - 1), cycles);
fflush(stderr);
stop = 1;
return;
}
// NOP variants.
case 0xEA: break;
// CLC / SEC / CLD / SED / CLI / SEI / CLV
case 0x18: flag_c = 0; break;
case 0x38: flag_c = 1; break;
case 0xD8: flag_d = 0; break;
case 0xF8: flag_d = 1; break;
case 0x58: flag_i = 0; break;
case 0x78: flag_i = 1; break;
case 0xB8: flag_v = 0; break;
// Transfers.
case 0xAA: reg_x = reg_a; setNZ(reg_x); break; // TAX
case 0xA8: reg_y = reg_a; setNZ(reg_y); break; // TAY
case 0x8A: reg_a = reg_x; setNZ(reg_a); break; // TXA
case 0x98: reg_a = reg_y; setNZ(reg_a); break; // TYA
case 0xBA: reg_x = reg_s; setNZ(reg_x); break; // TSX
case 0x9A: reg_s = reg_x; break; // TXS
// Stack.
case 0x48: push(reg_a); break; // PHA
case 0x68: reg_a = pop(); setNZ(reg_a); break; // PLA
case 0x08: push((uint8_t)(getP() | 0x10)); break; // PHP
case 0x28: setP(pop()); break; // PLP
// INC/DEC X/Y
case 0xE8: reg_x++; setNZ(reg_x); break; // INX
case 0xC8: reg_y++; setNZ(reg_y); break; // INY
case 0xCA: reg_x--; setNZ(reg_x); break; // DEX
case 0x88: reg_y--; setNZ(reg_y); break; // DEY
// LDA
case 0xA9: reg_a = fetch(); setNZ(reg_a); break; // LDA imm
case 0xA5: reg_a = rd(fetch()); setNZ(reg_a); break; // LDA zp
case 0xB5: reg_a = rd((uint8_t)(fetch() + reg_x)); setNZ(reg_a); break; // LDA zp,X
case 0xAD: reg_a = rd(fetch16()); setNZ(reg_a); break; // LDA abs
case 0xBD: { uint16_t a = fetch16(); reg_a = rd((uint16_t)(a + reg_x)); setNZ(reg_a); break; } // LDA abs,X
case 0xB9: { uint16_t a = fetch16(); reg_a = rd((uint16_t)(a + reg_y)); setNZ(reg_a); break; } // LDA abs,Y
case 0xA1: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); reg_a = rd(a); setNZ(reg_a); break; } // LDA (zp,X)
case 0xB1: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); reg_a = rd(a); setNZ(reg_a); break; } // LDA (zp),Y
// LDX
case 0xA2: reg_x = fetch(); setNZ(reg_x); break; // LDX imm
case 0xA6: reg_x = rd(fetch()); setNZ(reg_x); break; // LDX zp
case 0xB6: reg_x = rd((uint8_t)(fetch() + reg_y)); setNZ(reg_x); break; // LDX zp,Y
case 0xAE: reg_x = rd(fetch16()); setNZ(reg_x); break; // LDX abs
case 0xBE: { uint16_t a = fetch16(); reg_x = rd((uint16_t)(a + reg_y)); setNZ(reg_x); break; } // LDX abs,Y
// LDY
case 0xA0: reg_y = fetch(); setNZ(reg_y); break; // LDY imm
case 0xA4: reg_y = rd(fetch()); setNZ(reg_y); break; // LDY zp
case 0xB4: reg_y = rd((uint8_t)(fetch() + reg_x)); setNZ(reg_y); break; // LDY zp,X
case 0xAC: reg_y = rd(fetch16()); setNZ(reg_y); break; // LDY abs
case 0xBC: { uint16_t a = fetch16(); reg_y = rd((uint16_t)(a + reg_x)); setNZ(reg_y); break; } // LDY abs,X
// STA
case 0x85: wr(fetch(), reg_a); break; // STA zp
case 0x95: wr((uint8_t)(fetch() + reg_x), reg_a); break; // STA zp,X
case 0x8D: wr(fetch16(), reg_a); break; // STA abs
case 0x9D: { uint16_t a = fetch16(); wr((uint16_t)(a + reg_x), reg_a); break; } // STA abs,X
case 0x99: { uint16_t a = fetch16(); wr((uint16_t)(a + reg_y), reg_a); break; } // STA abs,Y
case 0x81: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); wr(a, reg_a); break; } // STA (zp,X)
case 0x91: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); wr(a, reg_a); break; } // STA (zp),Y
// STX
case 0x86: wr(fetch(), reg_x); break; // STX zp
case 0x96: wr((uint8_t)(fetch() + reg_y), reg_x); break;
case 0x8E: wr(fetch16(), reg_x); break;
// STY
case 0x84: wr(fetch(), reg_y); break;
case 0x94: wr((uint8_t)(fetch() + reg_x), reg_y); break;
case 0x8C: wr(fetch16(), reg_y); break;
// INC zp / abs
case 0xE6: { uint8_t a = fetch(); uint8_t v = (uint8_t)(rd(a) + 1); wr(a, v); setNZ(v); break; }
case 0xF6: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = (uint8_t)(rd(a) + 1); wr(a, v); setNZ(v); break; }
case 0xEE: { uint16_t a = fetch16(); uint8_t v = (uint8_t)(rd(a) + 1); wr(a, v); setNZ(v); break; }
case 0xFE: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = (uint8_t)(rd(a) + 1); wr(a, v); setNZ(v); break; }
// DEC
case 0xC6: { uint8_t a = fetch(); uint8_t v = (uint8_t)(rd(a) - 1); wr(a, v); setNZ(v); break; }
case 0xD6: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = (uint8_t)(rd(a) - 1); wr(a, v); setNZ(v); break; }
case 0xCE: { uint16_t a = fetch16(); uint8_t v = (uint8_t)(rd(a) - 1); wr(a, v); setNZ(v); break; }
case 0xDE: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = (uint8_t)(rd(a) - 1); wr(a, v); setNZ(v); break; }
// Logical / arithmetic helpers (define lambdas inline).
#define DO_ADC(v) do { uint16_t s = (uint16_t)reg_a + (uint16_t)(v) + (uint16_t)flag_c; \
flag_c = (s > 0xFF) ? 1 : 0; \
flag_v = ((reg_a ^ (v)) & 0x80) ? 0 : (((reg_a ^ s) & 0x80) ? 1 : 0); \
reg_a = (uint8_t)s; setNZ(reg_a); } while (0)
#define DO_SBC(v) do { uint8_t vv = (uint8_t)~(v); \
uint16_t s = (uint16_t)reg_a + (uint16_t)vv + (uint16_t)flag_c; \
flag_c = (s > 0xFF) ? 1 : 0; \
flag_v = ((reg_a ^ vv) & 0x80) ? 0 : (((reg_a ^ s) & 0x80) ? 1 : 0); \
reg_a = (uint8_t)s; setNZ(reg_a); } while (0)
case 0x69: { uint8_t v = fetch(); DO_ADC(v); break; }
case 0x65: { uint8_t v = rd(fetch()); DO_ADC(v); break; }
case 0x75: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_ADC(v); break; }
case 0x6D: { uint8_t v = rd(fetch16()); DO_ADC(v); break; }
case 0x7D: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_x)); DO_ADC(v); break; }
case 0x79: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_y)); DO_ADC(v); break; }
case 0x71: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); uint8_t v = rd(a); DO_ADC(v); break; }
case 0x61: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); uint8_t v = rd(a); DO_ADC(v); break; }
case 0xE9: { uint8_t v = fetch(); DO_SBC(v); break; }
case 0xE5: { uint8_t v = rd(fetch()); DO_SBC(v); break; }
case 0xF5: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_SBC(v); break; }
case 0xED: { uint8_t v = rd(fetch16()); DO_SBC(v); break; }
case 0xFD: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_x)); DO_SBC(v); break; }
case 0xF9: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_y)); DO_SBC(v); break; }
case 0xF1: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); uint8_t v = rd(a); DO_SBC(v); break; }
case 0xE1: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); uint8_t v = rd(a); DO_SBC(v); break; }
// CMP / CPX / CPY
#define DO_CMP(reg, v) do { uint16_t r = (uint16_t)(reg) + 0x100 - (uint16_t)(v); \
flag_c = ((reg) >= (v)) ? 1 : 0; setNZ((uint8_t)(r & 0xFF)); } while (0)
case 0xC9: { uint8_t v = fetch(); DO_CMP(reg_a, v); break; }
case 0xC5: { uint8_t v = rd(fetch()); DO_CMP(reg_a, v); break; }
case 0xD5: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_CMP(reg_a, v); break; }
case 0xCD: { uint8_t v = rd(fetch16()); DO_CMP(reg_a, v); break; }
case 0xDD: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_x)); DO_CMP(reg_a, v); break; }
case 0xD9: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_y)); DO_CMP(reg_a, v); break; }
case 0xD1: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); uint8_t v = rd(a); DO_CMP(reg_a, v); break; }
case 0xC1: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); uint8_t v = rd(a); DO_CMP(reg_a, v); break; }
case 0xE0: { uint8_t v = fetch(); DO_CMP(reg_x, v); break; }
case 0xE4: { uint8_t v = rd(fetch()); DO_CMP(reg_x, v); break; }
case 0xEC: { uint8_t v = rd(fetch16()); DO_CMP(reg_x, v); break; }
case 0xC0: { uint8_t v = fetch(); DO_CMP(reg_y, v); break; }
case 0xC4: { uint8_t v = rd(fetch()); DO_CMP(reg_y, v); break; }
case 0xCC: { uint8_t v = rd(fetch16()); DO_CMP(reg_y, v); break; }
// AND / ORA / EOR
#define DO_AND(v) do { reg_a &= (v); setNZ(reg_a); } while (0)
#define DO_ORA(v) do { reg_a |= (v); setNZ(reg_a); } while (0)
#define DO_EOR(v) do { reg_a ^= (v); setNZ(reg_a); } while (0)
case 0x29: { uint8_t v = fetch(); DO_AND(v); break; }
case 0x25: { uint8_t v = rd(fetch()); DO_AND(v); break; }
case 0x35: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_AND(v); break; }
case 0x2D: { uint8_t v = rd(fetch16()); DO_AND(v); break; }
case 0x3D: { uint16_t a = fetch16(); DO_AND(rd((uint16_t)(a + reg_x))); break; }
case 0x39: { uint16_t a = fetch16(); DO_AND(rd((uint16_t)(a + reg_y))); break; }
case 0x31: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); DO_AND(rd(a)); break; }
case 0x21: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); DO_AND(rd(a)); break; }
case 0x09: { uint8_t v = fetch(); DO_ORA(v); break; }
case 0x05: { uint8_t v = rd(fetch()); DO_ORA(v); break; }
case 0x15: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_ORA(v); break; }
case 0x0D: { uint8_t v = rd(fetch16()); DO_ORA(v); break; }
case 0x1D: { uint16_t a = fetch16(); DO_ORA(rd((uint16_t)(a + reg_x))); break; }
case 0x19: { uint16_t a = fetch16(); DO_ORA(rd((uint16_t)(a + reg_y))); break; }
case 0x11: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); DO_ORA(rd(a)); break; }
case 0x01: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); DO_ORA(rd(a)); break; }
case 0x49: { uint8_t v = fetch(); DO_EOR(v); break; }
case 0x45: { uint8_t v = rd(fetch()); DO_EOR(v); break; }
case 0x55: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_EOR(v); break; }
case 0x4D: { uint8_t v = rd(fetch16()); DO_EOR(v); break; }
case 0x5D: { uint16_t a = fetch16(); DO_EOR(rd((uint16_t)(a + reg_x))); break; }
case 0x59: { uint16_t a = fetch16(); DO_EOR(rd((uint16_t)(a + reg_y))); break; }
case 0x51: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); DO_EOR(rd(a)); break; }
case 0x41: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); DO_EOR(rd(a)); break; }
// BIT
case 0x24: { uint8_t v = rd(fetch()); flag_z = (reg_a & v) == 0 ? 1 : 0; flag_n = (v & 0x80) ? 1 : 0; flag_v = (v & 0x40) ? 1 : 0; break; }
case 0x2C: { uint8_t v = rd(fetch16()); flag_z = (reg_a & v) == 0 ? 1 : 0; flag_n = (v & 0x80) ? 1 : 0; flag_v = (v & 0x40) ? 1 : 0; break; }
// ASL / LSR / ROL / ROR (accumulator + memory variants)
#define ASL(v) do { flag_c = ((v) & 0x80) ? 1 : 0; (v) = (uint8_t)((v) << 1); setNZ(v); } while (0)
#define LSR(v) do { flag_c = (v) & 1; (v) = (uint8_t)((v) >> 1); setNZ(v); } while (0)
#define ROL(v) do { uint8_t c = flag_c; flag_c = ((v) & 0x80) ? 1 : 0; (v) = (uint8_t)(((v) << 1) | c); setNZ(v); } while (0)
#define ROR(v) do { uint8_t c = flag_c; flag_c = (v) & 1; (v) = (uint8_t)(((v) >> 1) | (c << 7)); setNZ(v); } while (0)
case 0x0A: ASL(reg_a); break;
case 0x06: { uint8_t a = fetch(); uint8_t v = rd(a); ASL(v); wr(a, v); break; }
case 0x16: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = rd(a); ASL(v); wr(a, v); break; }
case 0x0E: { uint16_t a = fetch16(); uint8_t v = rd(a); ASL(v); wr(a, v); break; }
case 0x1E: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = rd(a); ASL(v); wr(a, v); break; }
case 0x4A: LSR(reg_a); break;
case 0x46: { uint8_t a = fetch(); uint8_t v = rd(a); LSR(v); wr(a, v); break; }
case 0x56: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = rd(a); LSR(v); wr(a, v); break; }
case 0x4E: { uint16_t a = fetch16(); uint8_t v = rd(a); LSR(v); wr(a, v); break; }
case 0x5E: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = rd(a); LSR(v); wr(a, v); break; }
case 0x2A: ROL(reg_a); break;
case 0x26: { uint8_t a = fetch(); uint8_t v = rd(a); ROL(v); wr(a, v); break; }
case 0x36: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = rd(a); ROL(v); wr(a, v); break; }
case 0x2E: { uint16_t a = fetch16(); uint8_t v = rd(a); ROL(v); wr(a, v); break; }
case 0x3E: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = rd(a); ROL(v); wr(a, v); break; }
case 0x6A: ROR(reg_a); break;
case 0x66: { uint8_t a = fetch(); uint8_t v = rd(a); ROR(v); wr(a, v); break; }
case 0x76: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = rd(a); ROR(v); wr(a, v); break; }
case 0x6E: { uint16_t a = fetch16(); uint8_t v = rd(a); ROR(v); wr(a, v); break; }
case 0x7E: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = rd(a); ROR(v); wr(a, v); break; }
// Branches.
#define BRANCH(cond) do { int8_t off = (int8_t)fetch(); if (cond) pc = (uint16_t)(pc + off); } while (0)
case 0x10: BRANCH(!flag_n); break; // BPL
case 0x30: BRANCH( flag_n); break; // BMI
case 0x50: BRANCH(!flag_v); break; // BVC
case 0x70: BRANCH( flag_v); break; // BVS
case 0x90: BRANCH(!flag_c); break; // BCC
case 0xB0: BRANCH( flag_c); break; // BCS
case 0xD0: BRANCH(!flag_z); break; // BNE
case 0xF0: BRANCH( flag_z); break; // BEQ
// Jumps / subroutine.
case 0x4C: pc = fetch16(); break; // JMP abs
case 0x6C: { uint16_t a = fetch16(); // JMP (ind)
// 6502 page-boundary bug
uint16_t lo = mem[a];
uint16_t hi = mem[(a & 0xFF00) | ((a + 1) & 0xFF)];
pc = (uint16_t)(lo | (hi << 8));
break; }
case 0x20: { // JSR abs
uint16_t target = fetch16();
uint16_t retAddr = (uint16_t)(pc - 1);
push16(retAddr);
if (getenv("FS2TRACE_JSR") != NULL && (retAddr & 0xFF00) == 0x8000) {
fprintf(stderr, "JSR pushes $%04X (target $%04X)\n", retAddr, target);
}
if (target == 0x78E0 && getenv("FS2TRACE_JSR_78E0") != NULL) {
fprintf(stderr, " JSR $78E0 from PC $%04X A=$%02X $24=$%02X $B1=$%02X $0876=$%02X cycles=%d\n",
retAddr, reg_a, mem[0x24], mem[0xB1], mem[0x0876], cycles);
}
// FS2TRACE_PERSP=1: log every PerspectiveDivide
// call ($7BFD in MAME RAM). Inputs: A=num_hi,
// Y=num_lo, $C4/$C5=denominator. Output is in
// A on return. The self-modified table address
// lives at $7D47/$7D48 (= MAME's L7D76+1/+2,
// not source's $7D77/$7D78).
if (target == 0x7BFD && getenv("FS2TRACE_PERSP") != NULL) {
int16_t num = (int16_t)((uint16_t)reg_y | ((uint16_t)reg_a << 8));
int16_t den = (int16_t)((uint16_t)mem[0xC4] | ((uint16_t)mem[0xC5] << 8));
// MAME's L7D76 (= LDA abs,X) is at $7D48; the
// self-modified table address bytes are at
// $7D49 (lo) and $7D4A (hi).
fprintf(stderr,
" JSR PerspDiv from PC $%04X num=%6d den=%6d table=$%02X%02X\n",
retAddr, num, den, mem[0x7D4A], mem[0x7D49]);
}
pc = target;
break; }
case 0x60: { // RTS
// PC was advanced by fetch(); the RTS
// instruction itself was at pc-1.
uint16_t rtsAddr = (uint16_t)(pc - 1);
pc = (uint16_t)(pop16() + 1);
if (getenv("FS2TRACE_RTS") != NULL && rtsAddr == 0xD458) {
fprintf(stderr, "RTS@$D458 -> $%04X (S=$%02X)\n", pc, reg_s);
for (int s = 0; s < 8; s++) {
fprintf(stderr, " stack[$%02X] = $%02X\n",
(uint8_t)(reg_s - s),
mem[0x100 + (uint8_t)(reg_s - s)]);
}
}
// Capture PerspectiveDivide return value.
// MAME's PerspectiveDivide RTS is at $7D51
// (= source's L7D7F equivalent).
if (rtsAddr == 0x7D51 && getenv("FS2TRACE_PERSP") != NULL) {
fprintf(stderr, " PerspDiv returns A=$%02X (signed=%d)\n",
reg_a, (int)(int8_t)reg_a);
}
break; }
case 0x40: setP(pop()); pc = pop16(); break; // RTI
// 65C02 extensions used by chunk3.
case 0x14: { // TRB zp
uint8_t zp = fetch();
uint8_t m = mem[zp];
flag_z = ((reg_a & m) == 0);
mem[zp] = (uint8_t)(m & ~reg_a);
break; }
case 0x1C: { // TRB abs
uint16_t a = fetch16();
uint8_t m = rd(a);
flag_z = ((reg_a & m) == 0);
wr(a, (uint8_t)(m & ~reg_a));
break; }
case 0x04: { // TSB zp
uint8_t zp = fetch();
uint8_t m = mem[zp];
flag_z = ((reg_a & m) == 0);
mem[zp] = (uint8_t)(m | reg_a);
break; }
case 0x0C: { // TSB abs
uint16_t a = fetch16();
uint8_t m = rd(a);
flag_z = ((reg_a & m) == 0);
wr(a, (uint8_t)(m | reg_a));
break; }
case 0x80: { // BRA rel
int8_t off = (int8_t)fetch();
pc = (uint16_t)(pc + off);
break; }
case 0x12: { // ORA (zp)
uint8_t zp = fetch();
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
reg_a = (uint8_t)(reg_a | rd(a));
setNZ(reg_a);
break; }
case 0x32: { // AND (zp)
uint8_t zp = fetch();
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
reg_a = (uint8_t)(reg_a & rd(a));
setNZ(reg_a);
break; }
case 0x52: { // EOR (zp)
uint8_t zp = fetch();
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
reg_a = (uint8_t)(reg_a ^ rd(a));
setNZ(reg_a);
break; }
case 0x72: { // ADC (zp)
uint8_t zp = fetch();
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
uint8_t m = rd(a);
uint16_t r = (uint16_t)reg_a + (uint16_t)m + (uint16_t)flag_c;
flag_v = (((reg_a ^ m) & 0x80) == 0)
&& (((reg_a ^ (uint8_t)r) & 0x80) != 0);
flag_c = r > 0xFF;
reg_a = (uint8_t)r;
setNZ(reg_a);
break; }
case 0x92: { // STA (zp)
uint8_t zp = fetch();
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
wr(a, reg_a);
break; }
case 0xB2: { // LDA (zp)
uint8_t zp = fetch();
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
reg_a = rd(a);
setNZ(reg_a);
break; }
case 0xD2: { // CMP (zp)
uint8_t zp = fetch();
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
uint8_t m = rd(a);
flag_c = reg_a >= m;
setNZ((uint8_t)(reg_a - m));
break; }
case 0xF2: { // SBC (zp)
uint8_t zp = fetch();
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
uint8_t m = rd(a);
uint16_t r = (uint16_t)reg_a + (uint16_t)((uint8_t)~m) + (uint16_t)flag_c;
flag_v = (((reg_a ^ m) & 0x80) != 0)
&& (((reg_a ^ (uint8_t)r) & 0x80) != 0);
flag_c = r > 0xFF;
reg_a = (uint8_t)r;
setNZ(reg_a);
break; }
case 0x64: { // STZ zp
uint8_t zp = fetch();
mem[zp] = 0;
break; }
case 0x74: { // STZ zp,X
uint8_t zp = fetch();
mem[(uint8_t)(zp + reg_x)] = 0;
break; }
case 0x9C: { // STZ abs
uint16_t a = fetch16();
wr(a, 0);
break; }
case 0x9E: { // STZ abs,X
uint16_t a = fetch16();
wr((uint16_t)(a + reg_x), 0);
break; }
case 0x5A: push(reg_y); break; // PHY
case 0x7A: { reg_y = pop(); setNZ(reg_y); break; } // PLY
case 0xDA: push(reg_x); break; // PHX
case 0xFA: { reg_x = pop(); setNZ(reg_x); break; } // PLX
case 0x3A: reg_a = (uint8_t)(reg_a - 1); setNZ(reg_a); break; // DEC A
case 0x1A: reg_a = (uint8_t)(reg_a + 1); setNZ(reg_a); break; // INC A
// 65C02 RMB/SMB ops: reset/set memory bit N of zp.
case 0x07: { uint8_t zp = fetch(); mem[zp] &= ~0x01; break; } // RMB0
case 0x17: { uint8_t zp = fetch(); mem[zp] &= ~0x02; break; } // RMB1
case 0x27: { uint8_t zp = fetch(); mem[zp] &= ~0x04; break; } // RMB2
case 0x37: { uint8_t zp = fetch(); mem[zp] &= ~0x08; break; } // RMB3
case 0x47: { uint8_t zp = fetch(); mem[zp] &= ~0x10; break; } // RMB4
case 0x57: { uint8_t zp = fetch(); mem[zp] &= ~0x20; break; } // RMB5
case 0x67: { uint8_t zp = fetch(); mem[zp] &= ~0x40; break; } // RMB6
case 0x77: { uint8_t zp = fetch(); mem[zp] &= ~0x80; break; } // RMB7
case 0x87: { uint8_t zp = fetch(); mem[zp] |= 0x01; break; } // SMB0
case 0x97: { uint8_t zp = fetch(); mem[zp] |= 0x02; break; } // SMB1
case 0xA7: { uint8_t zp = fetch(); mem[zp] |= 0x04; break; } // SMB2
case 0xB7: { uint8_t zp = fetch(); mem[zp] |= 0x08; break; } // SMB3
case 0xC7: { uint8_t zp = fetch(); mem[zp] |= 0x10; break; } // SMB4
case 0xD7: { uint8_t zp = fetch(); mem[zp] |= 0x20; break; } // SMB5
case 0xE7: { uint8_t zp = fetch(); mem[zp] |= 0x40; break; } // SMB6
case 0xF7: { uint8_t zp = fetch(); mem[zp] |= 0x80; break; } // SMB7
// BBR/BBS rel: branch on bit reset/set in zp.
case 0x0F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x01)) pc = (uint16_t)(pc + off); break; } // BBR0
case 0x1F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x02)) pc = (uint16_t)(pc + off); break; } // BBR1
case 0x2F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x04)) pc = (uint16_t)(pc + off); break; } // BBR2
case 0x3F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x08)) pc = (uint16_t)(pc + off); break; } // BBR3
case 0x4F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x10)) pc = (uint16_t)(pc + off); break; } // BBR4
case 0x5F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x20)) pc = (uint16_t)(pc + off); break; } // BBR5
case 0x6F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x40)) pc = (uint16_t)(pc + off); break; } // BBR6
case 0x7F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x80)) pc = (uint16_t)(pc + off); break; } // BBR7
case 0x8F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x01) pc = (uint16_t)(pc + off); break; } // BBS0
case 0x9F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x02) pc = (uint16_t)(pc + off); break; } // BBS1
case 0xAF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x04) pc = (uint16_t)(pc + off); break; } // BBS2
case 0xBF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x08) pc = (uint16_t)(pc + off); break; } // BBS3
case 0xCF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x10) pc = (uint16_t)(pc + off); break; } // BBS4
case 0xDF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x20) pc = (uint16_t)(pc + off); break; } // BBS5
case 0xEF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x40) pc = (uint16_t)(pc + off); break; } // BBS6
case 0xFF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x80) pc = (uint16_t)(pc + off); break; } // BBS7
default:
fflush(stdout);
fprintf(stderr, "UNIMPL opcode $%02X at PC $%04X (cycles=%llu)\n",
op, (uint16_t)(pc - 1), (unsigned long long)cycles);
fflush(stderr);
stop = 1;
break;
}
(void)pc_low;
}
static int loadChunk(const char *path, uint16_t addr, size_t maxLen) {
FILE *f = fopen(path, "rb");
if (f == NULL) {
fprintf(stderr, "cannot open %s\n", path);
return 0;
}
size_t n = fread(&mem[addr], 1, maxLen, f);
fclose(f);
fprintf(stderr, "loaded %s -> $%04X..%04X (%zu bytes)\n",
path, addr, (uint16_t)(addr + n - 1), n);
return (int)n;
}
// --matrix mode: run chunk5 SetupViewProjection ($6000) with the
// supplied attitude / view inputs and dump the resulting $78..$89
// matrix. Uses a captured MAME RAM image as the initial state so all
// ZP slots, chunk2-5 binaries, and demo wiring are already in place.
//
// Usage:
// fs2trace --matrix <yaw_i16> <pitch_i16> <bank_i16> <vd_byte> [ram.bin]
//
// Default ram.bin = tmp/capture_boot.bin (chunk5Oracle replacement).
static void loadOriginalChunks(void); // defined below
static int matrixMode(int argc, char **argv) {
if (argc < 6 || argc > 7) {
fprintf(stderr,
"usage: %s --matrix <yaw> <pitch> <bank> <vd> [ram.bin]\n",
argv[0]);
return 2;
}
long yaw = strtol(argv[2], NULL, 0);
long pitch = strtol(argv[3], NULL, 0);
long bank = strtol(argv[4], NULL, 0);
long vd = strtol(argv[5], NULL, 0);
const char *ramPath = (argc > 6)
? argv[6]
: "/home/scott/claude/flight/tmp/capture_boot.bin";
// FS2TRACE_USE_ORIG=1: load chunk4/chunk5 from out/ instead
// of from a captured RAM image. The captured chunk5 in the
// boot dump is HEAVILY patched by Apply64KPatchTable -- the
// SetupViewProjection control flow is rewritten there and no
// longer matches the source. Using the unpatched binaries
// gives source-faithful matrix output (matching the chunk5.s
// listing), which is what we want for validating the C
// transliteration in chunk5Setup.c.
if (getenv("FS2TRACE_USE_ORIG") != NULL) {
loadOriginalChunks();
// ZP isn't initialised by the binaries; explicit zero
// is fine for SetupViewProjection (no read-before-write
// outside the inputs we poke below).
} else {
FILE *rf = fopen(ramPath, "rb");
if (rf == NULL) {
fprintf(stderr, "cannot open RAM image %s\n", ramPath);
return 1;
}
size_t got = fread(mem, 1, MEM_SIZE, rf);
fclose(rf);
if (got != MEM_SIZE) {
fprintf(stderr, "RAM image %s short read (%zu bytes)\n", ramPath, got);
return 1;
}
}
// Poke inputs over whatever the captured ZP held.
uint16_t y = (uint16_t)((yaw < 0) ? (yaw + 0x10000) : yaw);
uint16_t p = (uint16_t)((pitch < 0) ? (pitch + 0x10000) : pitch);
uint16_t b = (uint16_t)((bank < 0) ? (bank + 0x10000) : bank);
mem[0x6C] = (uint8_t)( y & 0xFF);
mem[0x6D] = (uint8_t)((y >> 8) & 0xFF);
mem[0x6E] = (uint8_t)( p & 0xFF);
mem[0x6F] = (uint8_t)((p >> 8) & 0xFF);
mem[0x70] = (uint8_t)( b & 0xFF);
mem[0x71] = (uint8_t)((b >> 8) & 0xFF);
mem[0x0A70] = (uint8_t)(vd & 0xFF);
// Set up CPU and call $6000 = SetupViewProjection. Push a
// sentinel return so the routine's RTS lands at $FFFF (the
// SmartPort hook patches $C70D, but $FFFF isn't touched -- a
// BRK at $0000 halts the loop cleanly via stop=1 below).
reg_a = reg_x = reg_y = 0;
reg_s = 0xFF;
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
flag_i = 1;
stop = 0;
pc = 0x6000;
push16(0xFFFE);
// FS2TRACE_MATRIX_BREAK=$XXXX prints state every time PC
// matches that address. Used to inspect intermediate state
// (e.g., set to $177A to see inputs to shifted L1778).
const char *brkEnv = getenv("FS2TRACE_MATRIX_BREAK");
uint16_t brkPC = (brkEnv != NULL)
? (uint16_t)strtol(brkEnv, NULL, 0) : 0xFFFE;
for (cycles = 0; cycles < 5000000 && !stop; ) {
if (pc == 0xFFFF || pc == 0x0000) {
break;
}
if (pc == brkPC) {
fprintf(stderr,
"BRK pc=$%04X A=$%02X X=$%02X Y=$%02X "
"$72/$73=$%02X%02X $74/$75=$%02X%02X "
"$76/$77=$%02X%02X $CB=$%02X%02X\n",
pc, reg_a, reg_x, reg_y,
mem[0x73], mem[0x72],
mem[0x75], mem[0x74],
mem[0x77], mem[0x76],
mem[0xCC], mem[0xCB]);
}
step();
}
printf("inputs: yaw=%ld pitch=%ld bank=%ld VD=$%02X\n",
yaw, pitch, bank, (uint8_t)(vd & 0xFF));
printf("matrix at $78..$89 (post-L6301 col shifts):\n");
for (int row = 0; row < 3; row++) {
int rb = 0x78 + row * 6;
int v0 = (int16_t)(mem[rb] | (mem[rb + 1] << 8));
int v1 = (int16_t)(mem[rb + 2] | (mem[rb + 3] << 8));
int v2 = (int16_t)(mem[rb + 4] | (mem[rb + 5] << 8));
printf(" row %d: %6d %6d %6d\n", row, v0, v1, v2);
}
// Cascade intermediates -- last values left after the routine
// returned. $72/$74/$76 are the rotated-angle inputs;
// $CB/$CD/$CF and $18/$D4/$D6 are the cos/sin lookups feeding
// the matrix construction.
if (getenv("FS2TRACE_MATRIX_DUMP") != NULL) {
#define R16(addr) ((int16_t)(mem[addr] | (mem[(addr)+1] << 8)))
printf("intermediates:\n");
printf(" $72/$73 = %d ($%02X%02X)\n", R16(0x72), mem[0x73], mem[0x72]);
printf(" $74/$75 = %d ($%02X%02X)\n", R16(0x74), mem[0x75], mem[0x74]);
printf(" $76/$77 = %d ($%02X%02X)\n", R16(0x76), mem[0x77], mem[0x76]);
printf(" $CB/$CC = %d (sin of $72)\n", R16(0xCB));
printf(" $CD/$CE = %d (sin of $74)\n", R16(0xCD));
printf(" $CF/$D0 = %d (sin of $76)\n", R16(0xCF));
printf(" $18/$19 = %d (cos of $72)\n", R16(0x18));
printf(" $D4/$D5 = %d (cos of $74)\n", R16(0xD4));
printf(" $D6/$D7 = %d (cos of $76)\n", R16(0xD6));
printf(" $BA/$BB = %d (cos of VD<<4)\n", R16(0xBA));
printf(" $BE/$BF = %d (sin of VD<<4)\n", R16(0xBE));
#undef R16
}
return 0;
}
// loadOriginalChunks: place chunk4 (at $0200) and chunk5 (at $6000)
// into mem[] from out/*-built. Used by the --zpscale and --l177b
// probes which target chunk4 routines in isolation -- the boot RAM
// dump shifts chunk4 code by 2 bytes due to Apply64KPatchTable, so
// L177B / ScaleC2ByC4 land at different addresses there. The .built
// binaries are unpatched.
static void loadOriginalChunks(void) {
memset(mem, 0, MEM_SIZE);
loadChunk("/home/scott/claude/flight/out/4_0200-25ff", 0x0200, 0x2400);
loadChunk("/home/scott/claude/flight/out/5_6000-b3df", 0x6000, 0x53E0);
}
// --scenery [ramfile]: runs the captured chunk5 ProcessScenery
// against a RAM image and counts how many DrawColorSpan calls fire.
// This tells us "how much MAME-equivalent scenery would draw if we
// ran the actual interpreter against this RAM state". Compare to
// the port's `SCENERY_STATS=1 draws=N` to see where the port
// diverges. Default RAM image is tmp/capture_boot.bin.
//
// The capture is patched by Apply64KPatchTable at runtime, so chunk5
// addresses differ from the source-listing values. The jump table at
// $6000-$6020 provides indirection: $6006 jumps to ProcessScenery,
// $601B (DrawColorSpanRelay) jumps to DrawColorSpan. We use $6006
// as the entry and watch for PC entering DrawColorSpan via the
// $601B relay's target.
static int sceneryMode(int argc, char **argv) {
const char *ramPath = (argc > 2)
? argv[2]
: "/home/scott/claude/flight/tmp/capture_boot.bin";
FILE *rf = fopen(ramPath, "rb");
if (rf == NULL) { fprintf(stderr, "cannot open %s\n", ramPath); return 1; }
if (fread(mem, 1, MEM_SIZE, rf) != MEM_SIZE) {
fprintf(stderr, "RAM image short read\n");
fclose(rf);
return 1;
}
fclose(rf);
// Resolve DrawColorSpan via the $601B jump table slot
// (DrawColorSpanRelay).
if (mem[0x601B] != 0x4C) {
fprintf(stderr, "expected JMP at $601B, got $%02X\n", mem[0x601B]);
return 1;
}
uint16_t drawColorSpanPC = (uint16_t)(mem[0x601C] | (mem[0x601D] << 8));
// Reset the scenery cursor to the dispatcher entry (LA7E0 in
// the source = mem[$A7E0/$A7E1]). The captured cursor at
// $8B/$8C is the END-OF-FRAME position; without resetting
// we'd walk past the dispatcher into chunk2 territory.
uint16_t dispatcherEntry = (uint16_t)(mem[0xA7E0] | (mem[0xA7E1] << 8));
mem[0x8B] = (uint8_t)( dispatcherEntry & 0xFF);
mem[0x8C] = (uint8_t)((dispatcherEntry >> 8) & 0xFF);
// Clear scenery in-progress flags so ProcessScenery starts
// fresh (chunk5.s lines 1053-1062).
mem[0x08F3] = 0;
mem[0x090A] = 0;
mem[0x08A9] = 0;
mem[0x08C4] = 0;
mem[0x008A] = 0;
// Invalidate HEADER section cache so demand-loads fire.
mem[0x08EA] = mem[0x08EB] = mem[0x08EC] = mem[0x08ED] = 0;
fprintf(stderr, "scenery: reset cursor to LA7E0 = $%04X, DrawColorSpan at $%04X\n",
dispatcherEntry, drawColorSpanPC);
// EmitClippedLine isn't in the jump table; locate via byte
// pattern would be ideal, but counting DrawColorSpan suffices
// -- every line eventually goes through it.
reg_s = 0xFF;
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
flag_i = 1;
stop = 0;
long drawCount = 0;
long opCount = 0;
long emitOpCount = 0; // count cursor-driven scenery opcodes
// Address of SceneryInterpreterStep / SceneryDispatch in capture
// (boot patches relocate it). Find by searching for the byte
// pattern after `lda ($8B),y; bmi; cmp #$46; bmi`.
// The fetch is `B1 8B 30 ?? C9 46 30 ??` (lda($8B),y; bmi <off>; cmp #$46; bmi <off>).
uint16_t fetchPC = 0;
for (int i = 0x6000; i < 0xB400; i++) {
if (mem[i] == 0xB1 && mem[i+1] == 0x8B && mem[i+2] == 0x30
&& mem[i+4] == 0xC9 && mem[i+5] == 0x46 && mem[i+6] == 0x30) {
fetchPC = (uint16_t)i;
break;
}
}
fprintf(stderr, " scenery fetch at $%04X (lda ($8B),y)\n", fetchPC);
// Sequence in chunk5 MainLoop (chunk5.s line 5403+):
// SetupViewProjection ($6000)
// ShowSimpleCrashMessage / HandleCrashOrSplash (skipped here)
// FlipPagesFillViewportRelay ($6003) -- runs a SECOND scenery
// interpreter pass on the data at $8B/$8C derived from $77
// (= the boot pre-render that draws horizon-line water/sky
// features in violet/blue, leaving STALE bytes in the hires
// page that the main scenery pass doesn't overwrite).
// ProcessScenery ($6006)
const uint16_t calls[3] = { 0x6000, 0x6003, 0x6006 };
const char *callName[3] = { "SetupViewProjection", "FlipPagesFillViewport", "ProcessScenery" };
for (int callIdx = 0; callIdx < 3; callIdx++) {
reg_a = reg_x = reg_y = 0;
push16(0xFFFE);
pc = calls[callIdx];
fprintf(stderr, " -- %s ($%04X) --\n", callName[callIdx], pc);
long startOps = opCount;
long startDraws = drawCount;
long startFetches = emitOpCount;
uint16_t lastPC = 0;
long opsSinceFetch = 0;
long maxOpsSinceFetch = 0;
for (cycles = 0; cycles < 20000000 && !stop; ) {
if (pc == 0xFFFF || pc == 0x0000) {
fprintf(stderr, " EXIT pc=$%04X after %ld ops, ops-since-last-fetch=%ld\n",
pc, opCount - startOps, opsSinceFetch);
break;
}
if (pc == drawColorSpanPC) {
drawCount++;
}
if (fetchPC != 0 && pc == fetchPC) {
emitOpCount++;
uint16_t cur = (uint16_t)(mem[0x8B] | (mem[0x8C] << 8));
if (getenv("FS2TRACE_BASE") != NULL) {
// Print $4A..$52 BEFORE this op runs
// so we can pair "before $24" with
// "after $24" for diff.
fprintf(stderr,
" fetch #%ld cursor=$%04X opcode=$%02X base=[%02X%02X%02X,%02X%02X%02X,%02X%02X%02X] cam=[%02X%02X,%02X%02X,%02X%02X]\n",
emitOpCount - startFetches,
cur, mem[cur],
mem[0x4A], mem[0x4B], mem[0x4C],
mem[0x4D], mem[0x4E], mem[0x4F],
mem[0x50], mem[0x51], mem[0x52],
mem[0x67], mem[0x66],
mem[0x69], mem[0x68],
mem[0x6B], mem[0x6A]);
} else {
fprintf(stderr,
" fetch #%ld cursor=$%04X opcode=$%02X (S=$%02X)\n",
emitOpCount - startFetches,
cur, mem[cur], reg_s);
}
if (opsSinceFetch > maxOpsSinceFetch) {
maxOpsSinceFetch = opsSinceFetch;
}
opsSinceFetch = 0;
}
lastPC = pc;
opsSinceFetch++;
opCount++;
step();
}
fprintf(stderr,
" last pc=$%04X reg_s=$%02X stop=%d max_ops_between_fetches=%ld\n",
lastPC, reg_s, stop, maxOpsSinceFetch);
fprintf(stderr, " %ld ops, %ld scenery-fetches, %ld draws\n",
opCount - startOps, emitOpCount - startFetches,
drawCount - startDraws);
if (callIdx == 0) {
// After SetupViewProjection, dump the matrix so
// we know the projection cascade succeeded.
fprintf(stderr,
" matrix $78..$89: "
"[%d %d %d / %d %d %d / %d %d %d]\n",
(int16_t)(mem[0x78] | (mem[0x79]<<8)),
(int16_t)(mem[0x7A] | (mem[0x7B]<<8)),
(int16_t)(mem[0x7C] | (mem[0x7D]<<8)),
(int16_t)(mem[0x7E] | (mem[0x7F]<<8)),
(int16_t)(mem[0x80] | (mem[0x81]<<8)),
(int16_t)(mem[0x82] | (mem[0x83]<<8)),
(int16_t)(mem[0x84] | (mem[0x85]<<8)),
(int16_t)(mem[0x86] | (mem[0x87]<<8)),
(int16_t)(mem[0x88] | (mem[0x89]<<8)));
}
}
fprintf(stderr, "Total: %ld ops, %ld scenery-fetches, %ld DrawColorSpan calls\n",
opCount, emitOpCount, drawCount);
return 0;
}
// --draws: run the FULL chunk5 dispatcher (SetupViewProjection +
// ProcessScenery) against an unpatched chunk5 binary, watching the
// DrawColorLine entry trampoline at $6009 and printing every line
// drawn. This produces a bit-exact reference list of polygons
// chunk5 would emit given the supplied input state -- the answer
// to "what should port draw to match the original FS2 binary."
//
// Usage:
// fs2trace --draws [ram.bin]
// FS2TRACE_PORT_STATE=1 sets up port-equivalent ZP for Meigs boot
// (camera $5C/$5D=287, $64/$65=804, $6C/$6D=-109, etc.) and
// overrides the matrix to MAME's runtime values exactly.
//
// Output: one line per draw with screen X1/Y1/X2/Y2 and the V1/V2
// 3D coordinates from $CB..$D0 + $D4..$D9 at the moment of the call.
static int drawsMode(int argc, char **argv) {
const char *ramPath = (argc > 2)
? argv[2]
: "/home/scott/claude/flight/port/sceneryRam_FS2.1.bin";
FILE *rf = fopen(ramPath, "rb");
if (rf == NULL) { fprintf(stderr, "cannot open %s\n", ramPath); return 1; }
if (fread(mem, 1, MEM_SIZE, rf) != MEM_SIZE) {
fprintf(stderr, "RAM image short read\n");
fclose(rf);
return 1;
}
fclose(rf);
// The captured RAM is the MAME-patched chunk5 (Apply64KPatchTable
// installed at boot). Don't overlay the unpatched source --
// that would clobber the runtime patches AND replace the
// dispatcher bytecode at $A800+ (which only exists in the
// patched RAM; source has dispatcher data at $B000).
// The patched chunk5 has dispatcher entry at $A800 = LA7E0.
// chunk3 ($D300-$F3FF) holds the 64K callbacks (LookupADFStation
// etc.) the patched chunk5 jumps into; load that too so those
// calls don't hit unmapped memory. chunk2 ($F600-$FBFF) holds
// a few helper routines.
// chunk3 ($D300-$F3FF) holds the 64K callbacks the patched
// chunk5 jumps into for $05/$1D/$1E station records, ADF
// input, magneto state, etc. The boot RAM dump captures the
// ROM bank at $D300+ (= zeros) instead of the LC-RAM-resident
// chunk3, so we have to overlay the source binary; routine
// addresses in source aren't quite the same as MAME's runtime
// layout but the in-record advance values are unaffected.
FILE *cf = fopen("/home/scott/claude/flight/out/3_d300-f3ff", "rb");
if (cf != NULL) {
size_t n = fread(mem + 0xD300, 1, 0xF400 - 0xD300, cf);
fclose(cf);
fprintf(stderr, "loaded chunk3 (%zu bytes)\n", n);
}
cf = fopen("/home/scott/claude/flight/out/2_f600-fbff", "rb");
if (cf != NULL) { (void)fread(mem + 0xF600, 1, 0xFC00 - 0xF600, cf); fclose(cf); }
// Override the SceneryOpADFRecord / NAVRecord / COMRecord
// patches: source-binary chunk3 has these routines at
// different addresses than MAME's runtime, so the patched JMP
// targets in port_ram point at random source bytes. Restore
// the unpatched 48K behaviour (= just advance past the record)
// so the dispatcher doesn't crash entering chunk3. Station
// records don't draw anything anyway.
// SceneryOpADFRecord at $6021: source = "lda #$09; jmp $67FD"
// (= advance 9, continue). The patched JMP $DB3F would call
// chunk3 LookupADFStation but that maps to a different routine
// in source, so undo the patch.
// SceneryOpAdvanceAndContinue: in source chunk5 the entry
// trampoline at $6018 is `JMP SceneryOpAdvanceAndContinue`. The
// patched RAM preserves that trampoline, so we just read the
// target out of $6019/$601A.
uint16_t advanceAndContinue = 0;
if (mem[0x6018] == 0x4C) {
advanceAndContinue = (uint16_t)(mem[0x6019] | (mem[0x601A] << 8));
}
fprintf(stderr, " SceneryOpAdvanceAndContinue at $%04X\n", advanceAndContinue);
if (mem[0x6021] == 0x4C && advanceAndContinue != 0) { // patched JMP -> chunk3
mem[0x6021] = 0xA9; // lda
mem[0x6022] = 0x09; // #$09
mem[0x6023] = 0x4C; // jmp
mem[0x6024] = (uint8_t)( advanceAndContinue & 0xFF);
mem[0x6025] = (uint8_t)((advanceAndContinue >> 8) & 0xFF);
fprintf(stderr, " unpatched SceneryOpADFRecord ($6021) -> $%04X\n", advanceAndContinue);
}
// Same for NAVRecord and COMRecord -- locate by SceneryOpcodeTable
// entries for $1D and $1E.
// Find SceneryOpcodeTable: `cmp #$46; bmi <target>` then the
// target is SceneryDispatch which loads from the table. Easier
// to search the table itself: the first entry should point
// at SceneryOpEmitV1XformAndPlot; the $1D entry is at
// table+$1D*2 and points at SceneryOpNAVRecord. We can find
// the table by looking for a known entry sequence.
// For now, scan the dispatcher area for any byte sequence that
// looks like `JMP <chunk3>` (= $4C $xx $D[XYZ]) and patch
// back to advance-and-continue with appropriate length.
// chunk5.s says: $1D = NAVRecord (11-byte), $1E = COMRecord
// (variable-length).
// SceneryOpNAVRecord at... actually NAVRecord might or might
// not be patched the same way. Be defensive: scan the
// SceneryOpcodeTable for the $1D handler address and check
// for a JMP-pattern there.
// DrawColorLine entry: in the patched binary, $6009 = JMP (relayed),
// and $601B = JMP DrawColorSpan. DrawColorLine itself is the
// function called from EmitClippedLine. Find its entry by
// scanning for the byte signature `lda $E9; sec; sbc $EB`
// (= chunk5.s line 3556).
uint16_t drawColorLinePC = 0;
for (int i = 0x6000; i < 0xB400; i++) {
if (mem[i] == 0xA5 && mem[i+1] == 0xE9
&& mem[i+2] == 0x38
&& mem[i+3] == 0xE5 && mem[i+4] == 0xEB) {
drawColorLinePC = (uint16_t)i;
break;
}
}
if (drawColorLinePC == 0) {
fprintf(stderr, "could not locate DrawColorLine\n");
return 1;
}
fprintf(stderr, "DrawColorLine entry: $%04X\n", drawColorLinePC);
// DrawColorSpan entry: chunk5 trampoline at $601B is `JMP DrawColorSpan`.
uint16_t drawColorSpanPC = 0;
if (mem[0x601B] == 0x4C) {
drawColorSpanPC = (uint16_t)(mem[0x601C] | (mem[0x601D] << 8));
}
fprintf(stderr, "DrawColorSpan entry: $%04X\n", drawColorSpanPC);
// SetEvenAndOddColorsAndPrepRowRoutine: scan for the byte signature
// `lda ColorTableEven,x; sta ColorByteEven` (= chunk5.s lines
// 3902-3903 = `BD ?? ?? 8D ?? ??`). The ColorTableEven address is
// at $7A00ish in MAME RAM, and ColorByteEven is some self-modified
// operand. Find the routine by looking for: BD ?? ?? 8D ?? ?? BD ?? ??.
// For now we just track its calls via the trampoline at $6024 if
// it's installed (= the older patched binary did this; check first).
uint16_t setEvenAndOddPC = 0;
// Try via the SceneryOpSetColor handler at the JMP that calls
// it (or via SetPixelDrawMode which we already have).
// Easier: scan for the ColorTableEven access pattern.
for (int i = 0x7000; i < 0xB400; i++) {
if (mem[i] == 0xBD // LDA abs,x
&& mem[i+3] == 0x8D // STA abs
&& mem[i+6] == 0xBD // LDA abs,x
&& mem[i+9] == 0x8D // STA abs
&& mem[i+12] == 0x8A) { // TXA
setEvenAndOddPC = (uint16_t)i;
break;
}
}
fprintf(stderr, "SetEvenAndOddColorsAndPrepRowRoutine entry: $%04X\n", setEvenAndOddPC);
// Set up port-equivalent ZP state. The values mirror what
// port's runScreenshot + sceneryAttachCamera produce.
if (getenv("FS2TRACE_PORT_STATE") != NULL) {
// Camera position in scenery units ($5C=287, $64=804).
mem[0x5C] = 0x1F; mem[0x5D] = 0x01;
mem[0x64] = 0x24; mem[0x65] = 0x03;
// Altitude pair ($5E/$5F = 768 from boot; $60/$61 = 0).
mem[0x5E] = 0x00; mem[0x5F] = 0x03;
mem[0x60] = 0x00; mem[0x61] = 0x00;
// Rotation inputs ($6C/$6D = -109 yaw/X-axis;
// $6E/$6F = 0 pitch/Z-axis; $70/$71 = 0 bank/Y-axis).
mem[0x6C] = 0x93; mem[0x6D] = 0xFF;
mem[0x6E] = 0x00; mem[0x6F] = 0x00;
mem[0x70] = 0x00; mem[0x71] = 0x00;
mem[0x0A70] = 0x00; // ViewDirection
// Camera-section deltas. Port's sceneryAttachCamera
// sets $66/$67=0, $68/$69 = wyUnits (= cam.worldY=25
// metres for Meigs boot, no scaling), $6A/$6B=0. Reset
// to match.
mem[0x66] = 0x00; mem[0x67] = 0x00;
mem[0x68] = 0x19; mem[0x69] = 0x00; // 25
mem[0x6A] = 0x00; mem[0x6B] = 0x00;
fprintf(stderr, "FS2TRACE_PORT_STATE: ZP set for Meigs boot\n");
}
// Run SetupViewProjection ($6000) to compute the matrix at
// $78..$89 from the just-set $6C/$6E/$70 inputs.
reg_a = reg_x = reg_y = 0;
reg_s = 0xFF;
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
flag_i = 1;
stop = 0;
pc = 0x6000;
push16(0xFFFE);
for (cycles = 0; cycles < 5000000 && !stop; ) {
if (pc == 0xFFFF || pc == 0x0000) break;
step();
}
fprintf(stderr,
" matrix: row0=(%d,%d,%d) row1=(%d,%d,%d) row2=(%d,%d,%d)\n",
(int16_t)(mem[0x78] | (mem[0x79]<<8)),
(int16_t)(mem[0x7A] | (mem[0x7B]<<8)),
(int16_t)(mem[0x7C] | (mem[0x7D]<<8)),
(int16_t)(mem[0x7E] | (mem[0x7F]<<8)),
(int16_t)(mem[0x80] | (mem[0x81]<<8)),
(int16_t)(mem[0x82] | (mem[0x83]<<8)),
(int16_t)(mem[0x84] | (mem[0x85]<<8)),
(int16_t)(mem[0x86] | (mem[0x87]<<8)),
(int16_t)(mem[0x88] | (mem[0x89]<<8)));
// Reset cursor to LA7E0 = $A800 (= clean dispatch start).
uint16_t dispatcherEntry = (uint16_t)(mem[0xA7E0] | (mem[0xA7E1] << 8));
mem[0x8B] = (uint8_t)( dispatcherEntry & 0xFF);
mem[0x8C] = (uint8_t)((dispatcherEntry >> 8) & 0xFF);
// Clear in-progress flags so ProcessScenery starts fresh.
mem[0x08F3] = 0;
mem[0x090A] = 0;
mem[0x08A9] = 0;
mem[0x08C4] = 0;
mem[0x008A] = 0;
// Invalidate HEADER section cache so demand-loads fire (= match
// port's sceneryCacheInvalidated path).
mem[0x08EA] = mem[0x08EB] = mem[0x08EC] = mem[0x08ED] = 0;
// Reset $35/$36 (L631D base cache) so first $07/$24 forces
// a full base recompute.
mem[0x35] = 0;
mem[0x36] = 0;
fprintf(stderr, " cursor LA7E0 = $%04X\n", dispatcherEntry);
// Find dispatcher fetch instruction (= the LDA ($8B),Y in
// SceneryInterpreterStep). Same heuristic as sceneryMode.
uint16_t fetchPC = 0;
for (int i = 0x6000; i < 0xB400; i++) {
if (mem[i] == 0xB1 && mem[i+1] == 0x8B && mem[i+2] == 0x30
&& mem[i+4] == 0xC9 && mem[i+5] == 0x46 && mem[i+6] == 0x30) {
fetchPC = (uint16_t)i;
break;
}
}
fprintf(stderr, " scenery fetch instruction at $%04X\n", fetchPC);
// Run the chunk5 main-loop sequence: SetupViewProjection ($6000),
// FlipPagesFillViewport ($6003), ProcessScenery ($6006). The
// FlipPages pass is a SECOND scenery interpreter run on $0A78
// data (= boot pre-render), which can draw water/horizon polygons
// in HIRES_VIOLET that ProcessScenery never touches.
reg_s = 0xFF;
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
flag_i = 1;
long drawCount = 0;
long opCount = 0;
long fetchCount = 0;
int prevPCWasNotDraw = 1;
printf("# fs2trace --draws: chunk5 DrawColorLine sequence (3-pass)\n");
const char *trace = getenv("FS2TRACE_DRAWS_TRACE");
const uint16_t passes[3] = { 0x6000, 0x6003, 0x6006 };
const char *passName[3] = { "SetupViewProjection", "FlipPagesFillViewport", "ProcessScenery" };
for (int passIdx = 0; passIdx < 3; passIdx++) {
stop = 0;
reg_a = reg_x = reg_y = 0;
pc = passes[passIdx];
push16(0xFFFE);
fprintf(stderr, " ===== %s ($%04X) =====\n", passName[passIdx], pc);
long passStartOps = opCount;
long passStartDraws = drawCount;
uint16_t lastPC = 0;
for (cycles = 0; cycles < 1000000000 && !stop; ) {
if (pc == 0xFFFF || pc == 0x0000) {
fprintf(stderr, " hit sentinel pc=$%04X (last pc=$%04X) at op %ld\n",
pc, lastPC, opCount);
break;
}
if (fetchPC != 0 && pc == fetchPC) {
fetchCount++;
if (trace != NULL) {
uint16_t cur = mem[0x8B] | (mem[0x8C] << 8);
fprintf(stderr,
" fetch #%ld cur=$%04X op=$%02X $29=$%02X\n",
fetchCount, cur, mem[cur], mem[0x29]);
}
// FS2TRACE_VTX_DUMP: log V1/V2 (= mem[$CB..$D0] /
// mem[$D4..$D9]) on every fetch after a $40/$41
// emit, so we can diff our port's transform output
// against the authentic FS2 trace per-vertex.
// FS2TRACE_FRAME_DUMP: log state at every $24/$07
// op fetch + after it executes, so port-vs-MAME
// frame-setup divergence can be located precisely.
if (getenv("FS2TRACE_FRAME_DUMP") != NULL) {
static uint8_t prevFrameOp = 0;
static uint16_t prevFrameCur = 0;
if (prevFrameOp == 0x24 || prevFrameOp == 0x07) {
fprintf(stderr,
" POST-$%02X(var=$%02X) cam=(%d,%d,%d) base=[%02X%02X%02X,%02X%02X%02X,%02X%02X%02X] $19=$%02X $1C=$%02X $1F=$%02X $35=$%02X $36=$%02X (was cur=$%04X)\n",
prevFrameOp,
mem[prevFrameCur + 1],
(int16_t)(mem[0x66] | (mem[0x67] << 8)),
(int16_t)(mem[0x68] | (mem[0x69] << 8)),
(int16_t)(mem[0x6A] | (mem[0x6B] << 8)),
mem[0x4A], mem[0x4B], mem[0x4C],
mem[0x4D], mem[0x4E], mem[0x4F],
mem[0x50], mem[0x51], mem[0x52],
mem[0x19], mem[0x1C], mem[0x1F],
mem[0x35], mem[0x36],
prevFrameCur);
}
uint16_t cur = mem[0x8B] | (mem[0x8C] << 8);
if (mem[cur] == 0x24 || mem[cur] == 0x07) {
prevFrameOp = mem[cur];
prevFrameCur = cur;
} else {
prevFrameOp = 0;
}
}
if (getenv("FS2TRACE_VTX_DUMP") != NULL) {
static uint8_t prevOp = 0;
if (prevOp == 0x40 || prevOp == 0x41 || prevOp == 0x42) {
int16_t v1x = (int16_t)(mem[0xCB] | (mem[0xCC] << 8));
int16_t v1y = (int16_t)(mem[0xCD] | (mem[0xCE] << 8));
int16_t v1z = (int16_t)(mem[0xCF] | (mem[0xD0] << 8));
int16_t v2x = (int16_t)(mem[0xD4] | (mem[0xD5] << 8));
int16_t v2y = (int16_t)(mem[0xD6] | (mem[0xD7] << 8));
int16_t v2z = (int16_t)(mem[0xD8] | (mem[0xD9] << 8));
fprintf(stderr,
" POST-$%02X V1=(%6d,%6d,%6d) V2=(%6d,%6d,%6d) cam=(%d,%d,%d) base=[%02X%02X%02X,%02X%02X%02X,%02X%02X%02X]\n",
prevOp, v1x, v1y, v1z, v2x, v2y, v2z,
(int16_t)(mem[0x66] | (mem[0x67] << 8)),
(int16_t)(mem[0x68] | (mem[0x69] << 8)),
(int16_t)(mem[0x6A] | (mem[0x6B] << 8)),
mem[0x4A], mem[0x4B], mem[0x4C],
mem[0x4D], mem[0x4E], mem[0x4F],
mem[0x50], mem[0x51], mem[0x52]);
}
uint16_t cur = mem[0x8B] | (mem[0x8C] << 8);
prevOp = mem[cur];
}
}
// Trace flow inside chunk3 (= 64K patched callbacks).
if (trace != NULL && pc >= 0xD300 && pc < 0xF400 && opCount > 2400) {
fprintf(stderr, " chunk3 pc=$%04X A=$%02X X=$%02X Y=$%02X SP=$%02X\n",
pc, reg_a, reg_x, reg_y, reg_s);
}
// Detect tight 6502 loops — same PC for many consecutive
// ops. If we sit at a single PC for >50k ops, log + bail.
static uint16_t stuckPC = 0;
static long stuckCount = 0;
if (pc == stuckPC) {
stuckCount++;
if (stuckCount == 50000) {
fprintf(stderr,
" STUCK at pc=$%04X for 50k ops at op=%ld (cur=$%04X)\n",
pc, opCount, mem[0x8B] | (mem[0x8C] << 8));
break;
}
} else {
stuckPC = pc;
stuckCount = 1;
}
lastPC = pc;
// Track active hires color (= last A passed to SetPixelDrawMode).
// SetPixelDrawMode is at the JMP target stored at $6010-$6011
// (= chunk5 trampoline `JMP SetPixelDrawMode` at $600F).
static uint16_t setPixelDrawModePC = 0xFFFF;
static uint8_t curHiresColor = 0xFF;
if (setPixelDrawModePC == 0xFFFF) {
// Resolve once: chunk5 has `4C lo hi` at $600F.
if (mem[0x600F] == 0x4C) {
setPixelDrawModePC = (uint16_t)(mem[0x6010] | (mem[0x6011] << 8));
fprintf(stderr, " SetPixelDrawMode resolved at $%04X\n", setPixelDrawModePC);
}
}
if (setPixelDrawModePC != 0xFFFF && pc == setPixelDrawModePC) {
curHiresColor = reg_a;
if (getenv("FS2TRACE_LOG_COLORS") != NULL) {
fprintf(stderr, " SetPixelDrawMode A=$%02X (hires color %d) at op=%ld\n",
reg_a, reg_a & 0x07, opCount);
}
}
// Track SetEvenAndOddColorsAndPrepRowRoutine -- the SPAN
// fill color setter. On entry X = hires color code.
if (setEvenAndOddPC != 0 && pc == setEvenAndOddPC) {
curHiresColor = reg_x;
if (getenv("FS2TRACE_LOG_COLORS") != NULL) {
fprintf(stderr, " SetEvenAndOddColors X=$%02X (hires color %d) at op=%ld\n",
reg_x, reg_x & 0x07, opCount);
}
}
// Track DrawColorSpan calls (= horizontal span fill) so we can
// see polygon FILLS in addition to line draws. ZP $E9 = start col,
// A on entry = end col, $27 = right edge.
static int prevPCWasNotSpan = 1;
if (drawColorSpanPC != 0 && pc == drawColorSpanPC && prevPCWasNotSpan) {
uint8_t startCol = mem[0xE9];
uint8_t endCol = reg_a;
uint8_t row = mem[0xB1];
printf("span %4ld col=$%02X: row=%3d startCol=%3d endCol=%3d\n",
drawCount, curHiresColor, row, startCol, endCol);
prevPCWasNotSpan = 0;
} else if (pc != drawColorSpanPC) {
prevPCWasNotSpan = 1;
}
if (pc == drawColorLinePC && prevPCWasNotDraw) {
// First instruction of DrawColorLine. Capture
// the line endpoints + V1/V2.
uint8_t x1 = mem[0xE9];
uint8_t y1 = mem[0xEA];
uint8_t x2 = mem[0xEB];
uint8_t y2 = mem[0xEC];
int16_t v1x = (int16_t)(mem[0xCB] | (mem[0xCC] << 8));
int16_t v1y = (int16_t)(mem[0xCD] | (mem[0xCE] << 8));
int16_t v1z = (int16_t)(mem[0xCF] | (mem[0xD0] << 8));
int16_t v2x = (int16_t)(mem[0xD4] | (mem[0xD5] << 8));
int16_t v2y = (int16_t)(mem[0xD6] | (mem[0xD7] << 8));
int16_t v2z = (int16_t)(mem[0xD8] | (mem[0xD9] << 8));
drawCount++;
uint16_t curAt = mem[0x8B] | (mem[0x8C] << 8);
printf("draw %4ld cur=$%04X col=$%02X: (%3d,%3d)-(%3d,%3d) V1=(%6d,%6d,%6d) V2=(%6d,%6d,%6d) base=[%02X%02X%02X,%02X%02X%02X,%02X%02X%02X] cam=[%02X%02X,%02X%02X,%02X%02X]\n",
drawCount, curAt, curHiresColor, x1, y1, x2, y2,
v1x, v1y, v1z, v2x, v2y, v2z,
mem[0x4A], mem[0x4B], mem[0x4C],
mem[0x4D], mem[0x4E], mem[0x4F],
mem[0x50], mem[0x51], mem[0x52],
mem[0x67], mem[0x66],
mem[0x69], mem[0x68],
mem[0x6B], mem[0x6A]);
prevPCWasNotDraw = 0;
} else if (pc != drawColorLinePC) {
prevPCWasNotDraw = 1;
}
opCount++;
uint16_t prevPC = pc;
step();
if (stop) {
fprintf(stderr, " step() set stop=1 prevPC=$%04X newPC=$%04X opCount=%ld\n",
prevPC, pc, opCount);
break;
}
}
fprintf(stderr, " pass-%s: %ld ops, %ld draws\n",
passName[passIdx], opCount - passStartOps, drawCount - passStartDraws);
}
fprintf(stderr, "Total: %ld 6502 ops, %ld dispatch fetches, %ld DrawColorLine calls (final pc=$%04X cur=$%04X)\n",
opCount, fetchCount, drawCount, pc, mem[0x8B] | (mem[0x8C] << 8));
return 0;
}
// --xform: run chunk5 TransformVertex7EBC ($7EBC) on the original
// binary (= source-faithful, not the captured patched chunk5). Lets
// us validate the C transliteration in chunk5Transform.c against the
// asm for arbitrary inputs.
//
// Usage:
// fs2trace --xform <vx_lo> <vx_hi> <vy_lo> <vy_hi> [state_overrides...]
//
// Inputs encode the 4 vertex bytes that follow the opcode in the
// scenery stream. The routine reads them via ($8B),y and computes
// transformed XYZ at $D4..$D9 (or $CB..$D0).
static int xformMode(int argc, char **argv) {
if (argc < 3) {
fprintf(stderr,
"usage: %s --xform <stream_addr_hex> [ram.bin]\n",
argv[0]);
fprintf(stderr,
" Loads RAM image (default tmp/capture_boot.bin) for state\n"
" (matrix/base/camera), then overlays the ORIGINAL chunk5\n"
" binary at $6000-$B3DF (so $7EBC has source bytes).\n"
" Sets cursor $8B/$8C to stream_addr-1 (so opcode is at\n"
" ($8B),0 -- the typical layout when the dispatcher would\n"
" invoke a vertex-emit handler at that opcode), and calls\n"
" TransformVertex7EBC with Y=9 (V2 destination).\n"
" Prints V2 (=$D4..$D9).\n");
return 2;
}
long streamAddr = strtol(argv[2], NULL, 0);
const char *ramPath = (argc > 3)
? argv[3]
: "/home/scott/claude/flight/tmp/capture_boot.bin";
FILE *rf = fopen(ramPath, "rb");
if (rf == NULL) { fprintf(stderr, "cannot open %s\n", ramPath); return 1; }
if (fread(mem, 1, MEM_SIZE, rf) != MEM_SIZE) {
fprintf(stderr, "RAM image short read\n");
fclose(rf);
return 1;
}
fclose(rf);
// Overlay ONLY the chunk5 code regions that don't overlap
// loaded scenery sections. At boot Meigs, the loaded section
// sits at ~$B280+, so we overlay $6000-$B27F (= source-faithful
// chunk5 code) and keep $B280+ as the captured section data.
// Also overlay all of chunk4 ($0200-$25FF) which holds the
// ZPScale / cos table -- the captured RAM has it shifted
// and we need the source addresses.
FILE *cf = fopen("/home/scott/claude/flight/out/4_0200-25ff", "rb");
if (cf != NULL) {
fread(mem + 0x0200, 1, 0x2400, cf);
fclose(cf);
}
cf = fopen("/home/scott/claude/flight/out/5_6000-b3df", "rb");
if (cf != NULL) {
fread(mem + 0x6000, 1, 0xB280 - 0x6000, cf);
fclose(cf);
}
// Cursor: opcode at streamAddr, vertex bytes at streamAddr+1.
// $8B/$8C = streamAddr (= the opcode address; ($8B),Y=0 reads
// the opcode, Y=1.. reads vertex bytes -- chunk5's normal layout).
mem[0x008B] = (uint8_t)( streamAddr & 0xFF);
mem[0x008C] = (uint8_t)((streamAddr >> 8) & 0xFF);
fprintf(stderr,
" state: $8B/$8C=$%04X matrix=[%d,%d,%d/%d,%d,%d/%d,%d,%d]\n",
(int)streamAddr,
(int)(int16_t)(mem[0x78] | (mem[0x79] << 8)),
(int)(int16_t)(mem[0x7A] | (mem[0x7B] << 8)),
(int)(int16_t)(mem[0x7C] | (mem[0x7D] << 8)),
(int)(int16_t)(mem[0x7E] | (mem[0x7F] << 8)),
(int)(int16_t)(mem[0x80] | (mem[0x81] << 8)),
(int)(int16_t)(mem[0x82] | (mem[0x83] << 8)),
(int)(int16_t)(mem[0x84] | (mem[0x85] << 8)),
(int)(int16_t)(mem[0x86] | (mem[0x87] << 8)),
(int)(int16_t)(mem[0x88] | (mem[0x89] << 8)));
fprintf(stderr,
" base: ($4A..$4C)=%02X%02X%02X ($4D..$4F)=%02X%02X%02X ($50..$52)=%02X%02X%02X\n",
mem[0x4A], mem[0x4B], mem[0x4C],
mem[0x4D], mem[0x4E], mem[0x4F],
mem[0x50], mem[0x51], mem[0x52]);
fprintf(stderr,
" cam ($66..$6B): %02X %02X %02X %02X %02X %02X\n",
mem[0x66], mem[0x67], mem[0x68], mem[0x69], mem[0x6A], mem[0x6B]);
fprintf(stderr,
" vertex bytes at $%04X: %02X %02X %02X %02X %02X\n",
(int)streamAddr,
mem[streamAddr+0], mem[streamAddr+1], mem[streamAddr+2],
mem[streamAddr+3], mem[streamAddr+4]);
// The asm's TransformVertex7EBC reads destSlot from $E5 (Y
// on entry). ProcessVertex2 sets Y = $D4 before JSR
// (= absolute address of V2 slot), and the routine stores
// output via `sta $00,x` with X loaded from $E5. So Y on
// entry = $D4 (V2) or $CB (V1).
reg_y = 0xD4;
reg_a = reg_x = 0;
reg_s = 0xFF;
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
flag_i = 1;
stop = 0;
pc = 0x7EBC;
push16(0xFFFE);
const char *traceXform = getenv("FS2TRACE_XFORM_TRACE");
for (cycles = 0; cycles < 5000000 && !stop; ) {
if (pc == 0xFFFF || pc == 0x0000) {
break;
}
if (traceXform != NULL && pc >= 0x7EBC && pc <= 0x80AF) {
// Inside TransformVertex7EBC: print state.
fprintf(stderr,
"pc=$%04X A=$%02X X=$%02X Y=$%02X NVZC=%d%d%d%d "
"$18-$1A=%02X%02X%02X $1B-$1D=%02X%02X%02X $1E-$20=%02X%02X%02X "
"$9E$9F=%02X%02X $A2$A3=%02X%02X $2F=%02X\n",
pc, reg_a, reg_x, reg_y,
flag_n, flag_v, flag_z, flag_c,
mem[0x18], mem[0x19], mem[0x1A],
mem[0x1B], mem[0x1C], mem[0x1D],
mem[0x1E], mem[0x1F], mem[0x20],
mem[0x9E], mem[0x9F], mem[0xA2], mem[0xA3],
mem[0x2F]);
}
step();
}
int16_t v2x = (int16_t)((uint16_t)mem[0xD4] | ((uint16_t)mem[0xD5] << 8));
int16_t v2y = (int16_t)((uint16_t)mem[0xD6] | ((uint16_t)mem[0xD7] << 8));
int16_t v2z = (int16_t)((uint16_t)mem[0xD8] | ((uint16_t)mem[0xD9] << 8));
printf("xform stream@$%04X -> V2=(%d,%d,%d)\n",
(int)streamAddr, (int)v2x, (int)v2y, (int)v2z);
return 0;
}
// --zpscale a b: runs ScaleC2ByC4 at $1569 in chunk4 with the
// supplied 16-bit signed inputs in $C2/$C3 and $C4/$C5; prints the
// result. Loads chunk4 freshly from out/4_0200-25ff so the address
// matches the binary (vs the captured RAM which is patched).
static int zpscaleMode(int argc, char **argv) {
if (argc < 4) {
fprintf(stderr, "usage: %s --zpscale <a16> <b16>\n", argv[0]);
return 2;
}
long a = strtol(argv[2], NULL, 0);
long b = strtol(argv[3], NULL, 0);
loadOriginalChunks();
uint16_t ua = (uint16_t)((a < 0) ? (a + 0x10000) : a);
uint16_t ub = (uint16_t)((b < 0) ? (b + 0x10000) : b);
mem[0xC2] = (uint8_t)( ua & 0xFF);
mem[0xC3] = (uint8_t)((ua >> 8) & 0xFF);
mem[0xC4] = (uint8_t)( ub & 0xFF);
mem[0xC5] = (uint8_t)((ub >> 8) & 0xFF);
// ScaleC2ByC4 lives at $1565 (chunk4.s line 1565); ZPScale's
// wrapper at $1544 handles the $C2/$C4 marshalling for an
// arbitrary output address. We've already populated $C2..$C5
// directly so we call ScaleC2ByC4 ($1565) and pull the
// result out of A:X via the trampoline below.
reg_a = reg_x = reg_y = 0;
reg_s = 0xFF;
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
flag_i = 1;
stop = 0;
pc = 0x1569;
push16(0xFFFE);
const char *traceEnv = getenv("FS2TRACE_ZPSCALE_TRACE");
for (cycles = 0; cycles < 1000000 && !stop; ) {
if (pc == 0xFFFF || pc == 0x0000) {
break;
}
if (traceEnv != NULL) {
fprintf(stderr,
"pc=$%04X A=$%02X X=$%02X Y=$%02X C=%d "
"C2=$%02X C3=$%02X C4=$%02X C5=$%02X "
"A7=$%02X A8=$%02X\n",
pc, reg_a, reg_x, reg_y, flag_c,
mem[0xC2], mem[0xC3], mem[0xC4], mem[0xC5],
mem[0xA7], mem[0xA8]);
}
step();
}
// Result: A is low byte, X is high byte.
int16_t result = (int16_t)((uint16_t)reg_a | ((uint16_t)reg_x << 8));
printf("ScaleC2ByC4(%ld, %ld) = %d\n", a, b, result);
return 0;
}
// --l177b A X [ramfile]: probe chunk4 L177B (cos lookup with sub-byte
// interpolation) in isolation. Returns the int16 result (A:X reg).
static int l177bMode(int argc, char **argv) {
if (argc < 4) {
fprintf(stderr, "usage: %s --l177b <a_byte> <x_byte> [ram.bin]\n", argv[0]);
return 2;
}
long aIn = strtol(argv[2], NULL, 0);
long xIn = strtol(argv[3], NULL, 0);
if (argc > 4) {
FILE *rf = fopen(argv[4], "rb");
if (rf == NULL) { fprintf(stderr, "cannot open %s\n", argv[4]); return 1; }
if (fread(mem, 1, MEM_SIZE, rf) != MEM_SIZE) {
fprintf(stderr, "RAM image short read\n");
fclose(rf);
return 1;
}
fclose(rf);
} else {
loadOriginalChunks();
}
reg_a = (uint8_t)(aIn & 0xFF);
reg_x = (uint8_t)(xIn & 0xFF);
reg_y = 0;
reg_s = 0xFF;
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
flag_i = 1;
stop = 0;
// L177B at $177B; L1778 (sin entry) is 3 bytes earlier.
// When running against a capture, FS2's boot patches shifted
// chunk4 code by 2 bytes, so the entry points are at $177D /
// $177A respectively. FS2TRACE_PC overrides explicitly.
const char *l1778Env = getenv("FS2TRACE_USE_L1778");
const char *pcEnv = getenv("FS2TRACE_PC");
if (pcEnv != NULL) {
pc = (uint16_t)strtol(pcEnv, NULL, 0);
} else {
pc = (l1778Env != NULL) ? 0x1778 : 0x177B;
}
push16(0xFFFE);
const char *traceEnv2 = getenv("FS2TRACE_L177B_TRACE");
for (cycles = 0; cycles < 1000000 && !stop; ) {
if (pc == 0xFFFF || pc == 0x0000) {
break;
}
if (traceEnv2 != NULL) {
fprintf(stderr,
"pc=$%04X A=$%02X X=$%02X Y=$%02X N=%d Z=%d C=%d\n",
pc, reg_a, reg_x, reg_y, flag_n, flag_z, flag_c);
}
step();
}
int16_t result = (int16_t)((uint16_t)reg_a | ((uint16_t)reg_x << 8));
printf("L177B(%ld, %ld) = %d\n", aIn, xIn, result);
return 0;
}
int main(int argc, char **argv) {
// --matrix mode: chunk5 SetupViewProjection oracle.
if (argc >= 2 && strcmp(argv[1], "--matrix") == 0) {
return matrixMode(argc, argv);
}
// --zpscale: probe chunk4 ScaleC2ByC4 in isolation.
if (argc >= 2 && strcmp(argv[1], "--zpscale") == 0) {
return zpscaleMode(argc, argv);
}
// --l177b: probe chunk4 cos lookup.
if (argc >= 2 && strcmp(argv[1], "--l177b") == 0) {
return l177bMode(argc, argv);
}
// --scenery: count chunk5 ProcessScenery DrawColorSpan calls.
if (argc >= 2 && strcmp(argv[1], "--scenery") == 0) {
return sceneryMode(argc, argv);
}
// --xform: chunk5 TransformVertex7EBC oracle.
if (argc >= 2 && strcmp(argv[1], "--xform") == 0) {
return xformMode(argc, argv);
}
// --draws: full chunk5 DrawColorLine trace.
if (argc >= 2 && strcmp(argv[1], "--draws") == 0) {
return drawsMode(argc, argv);
}
// --nibble in.bin out.bin: run the chunk3 nibble decoder on
// raw input bytes, emit encoded output. Used to verify the C
// port matches what the 6502 emulator produces.
if (argc == 4 && strcmp(argv[1], "--nibble") == 0) {
FILE *in = fopen(argv[2], "rb");
FILE *out = fopen(argv[3], "wb");
if (in == NULL || out == NULL) {
fprintf(stderr, "could not open files\n");
return 1;
}
fseek(in, 0, SEEK_END);
size_t sz = (size_t)ftell(in);
fseek(in, 0, SEEK_SET);
uint8_t *buf = malloc(sz);
if (fread(buf, 1, sz, in) != sz) {
fprintf(stderr, "short read\n");
return 1;
}
fclose(in);
uint8_t output[0x3E00 - 0x3B60];
uint8_t cksum[2];
int consumed = nibbleDecode(buf, output, cksum);
fwrite(output, 1, sizeof(output), out);
fwrite(cksum, 1, 2, out);
fclose(out);
fprintf(stderr, "consumed %d input bytes, wrote %zu output bytes\n",
consumed, sizeof(output) + 2);
free(buf);
return 0;
}
const char *chunk4Path = "/home/scott/claude/flight/out/4_0200-25ff";
const char *chunk5Path = "/home/scott/claude/flight/out/5_6000-b3df";
const char *chunk3Path = "/home/scott/claude/flight/out/3_d300-f3ff";
const char *chunk2Path = "/home/scott/claude/flight/out/2_f600-fbff";
const char *diskPath = (argc > 1) ? argv[1] : "/home/scott/claude/flight/orig/flight simulator 2 with scenery PRODOS (san inc pack).po";
const char *blocksPath = (argc > 2) ? argv[2] : "/home/scott/claude/flight/downloads/scenery/extracted/A2.SDS1.blocks";
// Default entry: LoadSceneryFile1 (descriptor $0625, 6 sectors from
// sector $25). LoadSceneryFile0 ($A66B) reads sector $22 first and on
// four disks (SDS1, SD1, SD3, SD7A) the resulting LA7E0 word is a
// low-memory address (e.g. $0003) so the bootstrap copy stomps the
// stack page and the run halts. LoadSceneryFile1 sources LA7E0 from
// a different file region and lands every disk on a sane chunk5
// address, so all 13 traces complete to the $FFFF sentinel.
uint16_t entryPC = (argc > 3) ? (uint16_t)strtol(argv[3], NULL, 0) : 0xA674;
memset(mem, 0, MEM_SIZE);
if (!loadChunk(chunk4Path, 0x0200, 0x2400)) return 1;
if (!loadChunk(chunk5Path, 0x6000, 0x53E0)) return 1;
if (!loadChunk(chunk3Path, 0xD300, 0x2100)) return 1;
if (!loadChunk(chunk2Path, 0xF600, 0x0600)) return 1;
// Disk image.
FILE *df = fopen(diskPath, "rb");
if (df == NULL) { fprintf(stderr, "cannot open %s\n", diskPath); return 1; }
fseek(df, 0, SEEK_END);
diskSize = (size_t)ftell(df);
fseek(df, 0, SEEK_SET);
diskImage = malloc(diskSize);
if (diskImage == NULL || fread(diskImage, 1, diskSize, df) != diskSize) {
fprintf(stderr, "cannot read %s\n", diskPath);
fclose(df);
return 1;
}
fclose(df);
fprintf(stderr, "disk: %s (%zu bytes)\n", diskPath, diskSize);
// Patch the SmartPort entry: when the FS2 code calls $C70D it
// would normally land in the absent firmware; we trap the read
// by hooking pc==$C70D inside step().
// Provide a sentinel BRK so any unintended fall-through halts.
mem[0xC70D] = 0x00;
// 64K mode patch: chunk4 has six L1EAD..L1EC1 thunks that the
// 64K patch table rewrites to JMP into chunk3's
// SceneryLoaderEntry1..7. We replicate those JMP targets
// directly so the loader path actually reaches the chunk3
// implementations (Apply64KPatchTable would otherwise need to
// be run too).
struct { uint16_t thunk; uint16_t entry; } patches[] = {
{ 0x1EAD, 0xD3D0 }, // SceneryLoaderEntry1
{ 0x1EB0, 0xD3D3 }, // SceneryLoaderEntry2
{ 0x1EB3, 0xD3D6 }, // SceneryLoaderEntry3
{ 0x1EB6, 0xD3D9 }, // SceneryLoaderEntry4
{ 0x1EB9, 0xD3DC }, // SceneryLoaderEntry5
{ 0x1EBC, 0xD3DF }, // SceneryLoaderEntry6
{ 0x1EC1, 0xD3E2 }, // SceneryLoaderEntry7
};
for (size_t i = 0; i < sizeof(patches) / sizeof(patches[0]); i++) {
uint16_t t = patches[i].thunk;
uint16_t e = patches[i].entry;
mem[t] = 0x4C; // JMP abs
mem[t + 1] = (uint8_t)(e & 0xFF);
mem[t + 2] = (uint8_t)((e >> 8) & 0xFF);
}
// Also flag 64K mode (chunk4 L1E07).
mem[0x1E07] = 0x01;
// Populate ReadBlockDataBuffer (chunk3 $D575) from the .blocks
// file produced by prodosextract. The list is a sequence of
// 16-bit little-endian ProDOS block numbers, one per logical
// file block. ReadBlockDataBuffer wants block-low bytes at
// offset 0..255 and block-high bytes at offset 256..511, so
// we split the entries on load.
// LD5C8 self-extension stub. The real FS2 boot decodes a
// chunk of code into LD5C8 = ReadBlockDataBuffer + 83 via the
// protected-disk loader path (SceneryReadDecoded -> nibble
// decode -> JSR into the decoded code). Without simulating
// that whole flow, JSR LD5C8 from chunk3 L416 / L514 lands on
// zero (BRK) and halts the emulator. We patch LD5C8 with a
// CLC; RTS so those calls are harmless no-ops -- enough to
// let the rest of the loader proceed end-to-end.
mem[0xD5C8] = 0x18; // CLC
mem[0xD5C9] = 0x60; // RTS
// Block-list cap: chunk3's ReadBlockDataBuffer ($D575) holds
// 256 low bytes + 256 high bytes = 512 bytes total ($D575 +
// $D675). FS2 sector counters can index well past the boot's
// first 16 blocks (e.g. HEADER's section $76 -> entry 240), so
// we populate the FULL 256 entries. LD5C8 (= buf+83) is
// overwritten at runtime by the FS2 boot loader anyway.
FILE *bf = fopen(blocksPath, "rb");
if (bf == NULL) {
fprintf(stderr, "warning: cannot open %s; falling back to identity map\n", blocksPath);
for (int i = 0; i < 256; i++) {
mem[0xD575 + i] = (uint8_t)(i & 0xFF);
}
} else {
uint8_t buf[2];
int i = 0;
while (i < 256 && fread(buf, 1, 2, bf) == 2) {
mem[0xD575 + i] = buf[0];
mem[0xD575 + 256 + i] = buf[1];
i++;
}
fclose(bf);
fprintf(stderr, "loaded %d block-list entries from %s\n", i, blocksPath);
}
// Run one or more entry points. Default is a single entry
// (back-compat with previous fs2trace usage). With --chain,
// run File1+File2+File3+File4 in sequence: each one's RAM
// changes accumulate, so LA7E0 ends up pointing at the full
// loaded scenery instead of just the partial File1 result.
// With --boot, start at MainGameEntry ($ABBA) so
// Apply64KPatchTable rewrites the PatchSlot_* dispatch slots
// before any scenery loader runs. PromptColorOrBW is stubbed
// (default = colour) so the trace doesn't block on a key.
bool chainMode = (getenv("FS2TRACE_CHAIN") != NULL);
bool bootMode = (getenv("FS2TRACE_BOOT") != NULL);
// FS2TRACE_CITY=N: load city scenery file N then run MainLoop
// so its dispatcher's $0D HEADER demand-loads fire. Cities are:
// N=0 -> LoadSceneryFile0 ($A66B)
// N=1 -> LoadSceneryFile1 ($A674) (Chicago)
// N=2 -> LoadSceneryFile2 ($A67D) (LA)
// N=3 -> LoadSceneryFile3 ($A686) (Seattle)
// N=4 -> LoadSceneryFile4 ($A68F) (NY)
const char *cityEnv = getenv("FS2TRACE_CITY");
bool cityMode = (cityEnv != NULL);
uint16_t entries[8];
int numEntries = 0;
if (cityMode) {
int n = (int)strtol(cityEnv, NULL, 0);
static const uint16_t cityEntry[5] = {
0xA66B, 0xA674, 0xA67D, 0xA686, 0xA68F
};
if (n >= 0 && n <= 4) {
// Sequence: full game init → load city's scenery
// → init dispatcher pointer → run interpreter
// once. We bypass MainLoop because its
// PatchSlot_FrameSync resets LA7E0 back to the
// WW1 dispatcher; instead we directly invoke
// LoadDispatcherPointer + L6006 (jmp
// ProcessScenery), so HEADER demand-loads fire
// against the city dispatcher we just loaded.
entries[numEntries++] = 0xABBA; // MainGameEntry init
entries[numEntries++] = cityEntry[n]; // Load city's dispatcher into LA7E0+
entries[numEntries++] = 0xA61B; // LoadDispatcherPointer ($8B = LA7E0)
entries[numEntries++] = 0x6006; // jmp ProcessScenery
} else {
fprintf(stderr, "FS2TRACE_CITY: invalid index %d (must be 0-4)\n", n);
return 1;
}
} else if (bootMode) {
entries[numEntries++] = 0xABBA; // MainGameEntry
} else if (chainMode) {
entries[numEntries++] = 0xA674; // LoadSceneryFile1
entries[numEntries++] = 0xA67D; // LoadSceneryFile2
entries[numEntries++] = 0xA686; // LoadSceneryFile3
entries[numEntries++] = 0xA68F; // LoadSceneryFile4
} else {
entries[numEntries++] = entryPC;
}
FILE *trace = (getenv("FS2TRACE_PCS") != NULL) ? fopen("tmp/fs2trace.pcs", "w") : NULL;
int totalCycles = 0;
for (int e = 0; e < numEntries; e++) {
// Initialise machine state for this entry. Memory is
// preserved across entries (the whole point of chain
// mode) but registers and stack are reset.
reg_a = reg_x = reg_y = 0;
reg_s = 0xFF;
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
flag_i = 1;
stop = 0;
pc = entries[e];
// Push a sentinel return address ($FFFF). When the
// entry RTSes, the next fetch lands on $0000 (BRK)
// which halts cleanly.
push16(0xFFFE);
uint16_t lastPCs[16] = { 0 };
int lastIdx = 0;
int sawSmartPort = 0;
int sawFetchSector = 0;
int blocksBefore = traceCount;
int entryStart = totalCycles;
// Boot mode runs the full chunk5 main loop, which
// never returns -- we need a generous cycle budget so
// demand-driven scenery loads (triggered by HEADER
// opcodes during ProcessScenery) get a chance to fire.
// Override with FS2TRACE_CYCLES if needed.
const char *cycleEnv = getenv("FS2TRACE_CYCLES");
int cycleLimit = bootMode ? 200000000 : 5000000;
if (cycleEnv != NULL) {
cycleLimit = (int)strtol(cycleEnv, NULL, 0);
}
// FS2TRACE_INIT_X / FS2TRACE_INIT_Z: pre-seed the
// aircraft scenery position (upper-16 of zero-page
// $5A-$65) before MainGameEntry runs. This forces the
// scenery dispatcher to fire HEADER demand-loads for the
// section containing those coords, so we can capture
// city-specific RAM dumps. Values are 16-bit unsigned
// (the upper word of the 24-bit scenery coordinate).
const char *initXEnv = getenv("FS2TRACE_INIT_X");
const char *initZEnv = getenv("FS2TRACE_INIT_Z");
if (initXEnv != NULL) {
uint16_t x = (uint16_t)strtol(initXEnv, NULL, 0);
mem[0x5C] = (uint8_t)(x & 0xFF);
mem[0x5D] = (uint8_t)((x >> 8) & 0xFF);
fprintf(stderr, "FS2TRACE_INIT_X: $5C/$5D = $%04X\n", x);
}
if (initZEnv != NULL) {
uint16_t z = (uint16_t)strtol(initZEnv, NULL, 0);
mem[0x64] = (uint8_t)(z & 0xFF);
mem[0x65] = (uint8_t)((z >> 8) & 0xFF);
fprintf(stderr, "FS2TRACE_INIT_Z: $64/$65 = $%04X\n", z);
}
// Re-apply the position patch every frame at the
// dispatcher entry point. Hook at $A61B
// (LoadDispatcherPointer) -- this runs AFTER chunk5's
// per-frame IntegratePhysicsStep ($87A2) but BEFORE the
// dispatcher reads $5C/$5D for the cull check. Hooking
// earlier (e.g. $877F MainLoop) doesn't work because
// IntegratePhysicsStep normalises position cells.
uint16_t patchX = (initXEnv != NULL)
? (uint16_t)strtol(initXEnv, NULL, 0) : 0;
uint16_t patchZ = (initZEnv != NULL)
? (uint16_t)strtol(initZEnv, NULL, 0) : 0;
for (cycles = 0; cycles < cycleLimit && !stop; ) {
if (pc == 0xC70D) {
sawSmartPort++;
}
if (pc == 0x1EC6) {
sawFetchSector++;
}
if (pc == 0xA61B) {
if (initXEnv != NULL) {
mem[0x5C] = (uint8_t)(patchX & 0xFF);
mem[0x5D] = (uint8_t)((patchX >> 8) & 0xFF);
}
if (initZEnv != NULL) {
mem[0x64] = (uint8_t)(patchZ & 0xFF);
mem[0x65] = (uint8_t)((patchZ >> 8) & 0xFF);
}
}
// FS2TRACE_FORCE_INBOUNDS: short-circuit the
// "beyond bounds" cull-redirect path at $6E6F so
// every $13/$14 and $20/$21/$22 cull falls
// through. Used to force every section's $0D
// HEADER to fire during boot, so the resulting
// RAM dump contains every reachable scenery
// section at its dispatcher-expected dest -- a
// single comprehensive dump per region without
// needing to fly the camera there.
if (pc == 0x6E6F && getenv("FS2TRACE_FORCE_INBOUNDS") != NULL) {
// TestSceneryRangeReject does pla*4 +
// jmp L00A5 to take the BEYOND path.
// Replace with TestSceneryRangeOk's RTS
// so the cull returns "in bounds"
// instead -- caller advances past the
// cull record.
pc = 0x6E6E;
}
if (pc == 0x6006) {
static int psHits = 0;
psHits++;
if (psHits <= 5) {
fprintf(stderr, "ProcessScenery (L6006) hit #%d: $5C/$5D=$%02X%02X $64/$65=$%02X%02X $8B/$8C=$%02X%02X\n",
psHits, mem[0x5D], mem[0x5C], mem[0x65], mem[0x64],
mem[0x8C], mem[0x8B]);
}
}
if (pc == 0xA63A) {
static int hdrHits = 0;
hdrHits++;
if (hdrHits <= 20) {
fprintf(stderr, "SceneryHeaderLoadIfMiss hit #%d: sectionId=$%02X count=$%02X dest=$%02X%02X cacheIdx=$%02X cache=$%02X%02X%02X%02X\n",
hdrHits, mem[0x08E5], mem[0x08E6],
mem[0x08E8], mem[0x08E7], mem[0x08E9],
mem[0x08EA], mem[0x08EB], mem[0x08EC], mem[0x08ED]);
}
}
if (pc == 0xA6CD) {
static int runHits = 0;
runHits++;
if (runHits <= 20) {
fprintf(stderr, "SceneryHeaderRunSection hit #%d: $9E=$%02X $9F=$%02X L1E01=$%02X L1E03=$%02X%02X L1E07=$%02X L1E09=$%02X\n",
runHits, mem[0x9E], mem[0x9F], mem[0x1E01],
mem[0x1E04], mem[0x1E03], mem[0x1E07], mem[0x1E09]);
}
}
if (pc == 0xA6DF) {
static int loopHits = 0;
loopHits++;
if (loopHits <= 5) {
fprintf(stderr, "SceneryHeaderRunSectionLoop (jsr L1EAD): $9F=$%02X L1E01=$%02X L1E07=$%02X L1E09=$%02X\n",
mem[0x9F], mem[0x1E01], mem[0x1E07], mem[0x1E09]);
}
}
if (pc == 0xA6F3) {
fprintf(stderr, "SceneryHeaderRunSectionFail: L1E01=$%02X carry-set\n", mem[0x1E01]);
}
lastPCs[lastIdx] = pc;
lastIdx = (lastIdx + 1) % 16;
if (trace != NULL) {
fprintf(trace, "$%04X\n", pc);
}
step();
if (pc == 0xFFFF) {
break;
}
if (dumpRequested) {
fprintf(stderr, "FS2TRACE_DUMP_AT_BLOCK reached after block %d\n",
traceCount);
break;
}
}
totalCycles += cycles;
fprintf(stderr, "entry $%04X: %d cycles, smartport=%d, fetch=%d, blocks=%d, final PC=$%04X stop=%d\n",
entries[e], cycles, sawSmartPort, sawFetchSector,
traceCount - blocksBefore, pc, stop);
if (stop) {
fprintf(stderr, " last 16 PCs: ");
for (int i = 0; i < 16; i++) {
int idx = (lastIdx + 15 - i) % 16;
fprintf(stderr, "$%04X ", lastPCs[idx]);
}
fprintf(stderr, "\n");
break;
}
(void)entryStart;
}
if (trace != NULL) {
fclose(trace);
}
// Optional: dump key memory regions where the loader deposits
// data, so the caller can grep for the scenery bytecode entry.
if (getenv("FS2TRACE_DUMP") != NULL) {
FILE *dump = fopen("tmp/fs2trace.ram", "wb");
if (dump != NULL) {
fwrite(mem, 1, MEM_SIZE, dump);
fclose(dump);
fprintf(stderr, "wrote tmp/fs2trace.ram (full 64K)\n");
}
}
// Show LA7E0 (the scenery entry pointer chunk5's
// LoadDispatcherPointer reads).
uint16_t la7e0 = (uint16_t)(mem[0xA7E0] | (mem[0xA7E1] << 8));
fprintf(stderr, "LA7E0 = $%04X (scenery entry pointer)\n", la7e0);
fprintf(stderr, "\n%d cycles total, %d block reads.\n", totalCycles, traceCount);
for (int i = 0; i < traceCount; i++) {
printf(" %d: BLOCK $%04X (%d)\n", i, tracedBlocks[i], tracedBlocks[i]);
}
free(diskImage);
return 0;
}