2234 lines
117 KiB
C
2234 lines
117 KiB
C
// FS2 sector-read tracer.
|
|
//
|
|
// Boots a synthetic Apple //e environment with the FS2 chunks loaded
|
|
// into RAM, hooks the SmartPort entry point at $C70D, then trampolines
|
|
// into the LoadSceneryFile* entry points and watches every block read.
|
|
//
|
|
// This is NOT a full Apple //e emulator. It only implements enough of
|
|
// the 6502 instruction set + zero-page / RAM model to run the FS2
|
|
// loader code path. The Disk II / SmartPort layer is replaced with a
|
|
// single hook that:
|
|
// - logs the block number (read from RBBlockNumber at chunk4)
|
|
// - copies 512 bytes from the .dsk image at file_offset = block * 512
|
|
// - returns "success" via CLC/RTS
|
|
//
|
|
// Usage:
|
|
// fs2trace <fs2.dsk> <scenery.dsk> [entry]
|
|
// entry is one of File0..File4 (default File0). Output is one
|
|
// "BLOCK $XXXX" line per read.
|
|
|
|
#include <stdbool.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#define MEM_SIZE 65536
|
|
#define BLOCK_SIZE 512
|
|
#define SECTOR_SIZE 256
|
|
#define MAX_TRACE_BLKS 4096
|
|
|
|
|
|
static uint8_t mem[MEM_SIZE];
|
|
static uint8_t pc_low;
|
|
static uint16_t pc;
|
|
static uint8_t reg_a, reg_x, reg_y, reg_s;
|
|
static uint8_t flag_n, flag_v, flag_d, flag_i, flag_z, flag_c;
|
|
static int cycles;
|
|
static int stop;
|
|
// Set by FS2TRACE_DUMP_AT_BLOCK once the requested block count is
|
|
// reached. The main step loop checks this and exits cleanly without
|
|
// the noisy "last 16 PCs" trail that `stop` triggers.
|
|
static int dumpRequested;
|
|
|
|
// Disk image: scenery disk for the trace.
|
|
static uint8_t *diskImage;
|
|
static size_t diskSize;
|
|
|
|
|
|
// Port of chunk3 `SceneryNibbleDecode` ($D52D). Despite the name in
|
|
// the disassembly, this is really an ENCODER -- it takes raw input
|
|
// bytes and produces an Apple-disk-style nibble format where every
|
|
// output byte has bits 7,5,3,1 forced to 1 (the `$AA` pattern). Each
|
|
// input byte expands to two output bytes:
|
|
//
|
|
// out[0] = (in >> 1) | $AA
|
|
// out[1] = in | $AA
|
|
//
|
|
// A running EOR checksum is fed by the input stream, then emitted as
|
|
// two trailing bytes (`(cksum>>1)|$AA` then `cksum|$AA`). The original
|
|
// fills RAM from $3B60 forward and stops once the destination high
|
|
// byte hits $3E -- so the loop emits exactly `(0x3E00 - 0x3B60) / 2`
|
|
// = 720 input bytes in.
|
|
//
|
|
// Returns the number of input bytes consumed (always 720).
|
|
static int nibbleDecode(const uint8_t *src, uint8_t *dst, uint8_t *checksumOut) {
|
|
uint8_t cksum = 0;
|
|
int written = 0;
|
|
int target = 0x3E00 - 0x3B60; // bytes
|
|
int srcPos = 0;
|
|
while (written + 2 <= target) {
|
|
uint8_t b = src[srcPos];
|
|
cksum ^= b;
|
|
dst[written++] = (uint8_t)((b >> 1) | 0xAA);
|
|
dst[written++] = (uint8_t)( b | 0xAA);
|
|
srcPos++;
|
|
}
|
|
if (checksumOut != NULL) {
|
|
checksumOut[0] = (uint8_t)((cksum >> 1) | 0xAA);
|
|
checksumOut[1] = (uint8_t)( cksum | 0xAA);
|
|
}
|
|
return srcPos;
|
|
}
|
|
|
|
// Trace buffer.
|
|
static int tracedBlocks[MAX_TRACE_BLKS];
|
|
static int traceCount;
|
|
|
|
|
|
// Forward declarations.
|
|
static uint8_t rd(uint16_t addr);
|
|
static void wr(uint16_t addr, uint8_t v);
|
|
static void push(uint8_t v);
|
|
static uint8_t pop(void);
|
|
static void push16(uint16_t v);
|
|
static uint16_t pop16(void);
|
|
static uint8_t fetch(void);
|
|
static uint16_t fetch16(void);
|
|
static void setNZ(uint8_t v);
|
|
static uint8_t getP(void);
|
|
static void setP(uint8_t v);
|
|
static void step(void);
|
|
static void hookSmartPort(void);
|
|
static int loadChunk(const char *path, uint16_t addr, size_t maxLen);
|
|
|
|
|
|
static uint8_t rd(uint16_t addr) {
|
|
return mem[addr];
|
|
}
|
|
|
|
|
|
// Apple II LCBANK split: $D000-$FFFF can be either ROM, LCBANK1, or
|
|
// LCBANK2 depending on softswitches at $C080-$C08F. fs2trace treats
|
|
// the entire $0000-$FFFF as plain RAM, but FS2 boot relies on hires
|
|
// page writes NOT corrupting the chunk binaries that live at
|
|
// $D300-$F3FF (chunk3) and $F600-$FBFF (chunk2). Ignore writes to
|
|
// $D000-$FFFF so chunk binaries stay intact. This breaks Apply64K-
|
|
// PatchTable if it ever targets a high address (it doesn't -- all
|
|
// patch table addresses are in $6000-$AE00, chunk5 main).
|
|
//
|
|
// Without this guard the hires drawing loop at $1C07 (sta
|
|
// (hires_ptr1),y) corrupts $FA67 and $FA71-3 in chunk2 (the wind
|
|
// code), causing fs2trace to halt at $FA73 on undocumented opcode
|
|
// $1B before more loader iterations can run.
|
|
static void wr(uint16_t addr, uint8_t v) {
|
|
const char *watchEnv = getenv("FS2TRACE_WATCH");
|
|
if (watchEnv != NULL && mem[addr] != v) {
|
|
static int watchHits = 0;
|
|
static uint16_t watchLo = 0, watchHi = 0;
|
|
static int watchSetUp = 0;
|
|
if (!watchSetUp) {
|
|
unsigned long lo = strtoul(watchEnv, NULL, 0);
|
|
char *dash = strchr(watchEnv, '-');
|
|
unsigned long hi = (dash != NULL) ? strtoul(dash + 1, NULL, 0) : lo;
|
|
watchLo = (uint16_t)lo;
|
|
watchHi = (uint16_t)hi;
|
|
watchSetUp = 1;
|
|
}
|
|
int watchCap = 50;
|
|
const char *capEnv = getenv("FS2TRACE_WATCH_CAP");
|
|
if (capEnv != NULL) {
|
|
watchCap = (int)strtol(capEnv, NULL, 0);
|
|
}
|
|
if (addr >= watchLo && addr <= watchHi && watchHits < watchCap) {
|
|
fprintf(stderr, " watch: $%04X = $%02X (was $%02X) at PC $%04X cycles=%d\n",
|
|
addr, v, mem[addr], pc, cycles);
|
|
watchHits++;
|
|
}
|
|
}
|
|
// Protect chunk binaries from stray hires-page writes. If the
|
|
// emulator ever needs LCBANK semantics for real (e.g. a patch
|
|
// that targets $D000-$FFFF), this needs to grow into proper
|
|
// softswitch tracking.
|
|
if (addr >= 0xD000 && getenv("FS2TRACE_NO_LC_GUARD") == NULL) {
|
|
return;
|
|
}
|
|
mem[addr] = v;
|
|
}
|
|
|
|
|
|
// Zero-page-wrapping 16-bit read. For (zp),Y and (zp,X) addressing
|
|
// modes, the high byte of the pointer must come from `(zp + 1) & $FF`
|
|
// -- staying inside zero page even when zp == $FF. Without this wrap
|
|
// we'd read from $0100 (the stack) which corrupts the return address
|
|
// in subtle ways during scenery loader runs.
|
|
static uint16_t rd16zp(uint8_t zp) {
|
|
uint8_t lo = mem[zp];
|
|
uint8_t hi = mem[(uint8_t)(zp + 1)];
|
|
return (uint16_t)lo | ((uint16_t)hi << 8);
|
|
}
|
|
|
|
|
|
static void push(uint8_t v) {
|
|
mem[0x0100 + reg_s] = v;
|
|
reg_s--;
|
|
}
|
|
|
|
|
|
static uint8_t pop(void) {
|
|
reg_s++;
|
|
return mem[0x0100 + reg_s];
|
|
}
|
|
|
|
|
|
static void push16(uint16_t v) {
|
|
push((uint8_t)(v >> 8));
|
|
push((uint8_t)(v & 0xFF));
|
|
}
|
|
|
|
|
|
static uint16_t pop16(void) {
|
|
uint8_t lo = pop();
|
|
uint8_t hi = pop();
|
|
return (uint16_t)lo | ((uint16_t)hi << 8);
|
|
}
|
|
|
|
|
|
static uint8_t fetch(void) {
|
|
return mem[pc++];
|
|
}
|
|
|
|
|
|
static uint16_t fetch16(void) {
|
|
uint16_t lo = fetch();
|
|
uint16_t hi = fetch();
|
|
return lo | (hi << 8);
|
|
}
|
|
|
|
|
|
static void setNZ(uint8_t v) {
|
|
flag_n = (v & 0x80) ? 1 : 0;
|
|
flag_z = (v == 0) ? 1 : 0;
|
|
}
|
|
|
|
|
|
static uint8_t getP(void) {
|
|
return (uint8_t)((flag_n << 7) | (flag_v << 6) | 0x20 | (flag_d << 3) |
|
|
(flag_i << 2) | (flag_z << 1) | flag_c);
|
|
}
|
|
|
|
|
|
static void setP(uint8_t v) {
|
|
flag_n = (v & 0x80) ? 1 : 0;
|
|
flag_v = (v & 0x40) ? 1 : 0;
|
|
flag_d = (v & 0x08) ? 1 : 0;
|
|
flag_i = (v & 0x04) ? 1 : 0;
|
|
flag_z = (v & 0x02) ? 1 : 0;
|
|
flag_c = (v & 0x01) ? 1 : 0;
|
|
}
|
|
|
|
|
|
// Stub for chunk5 `PromptColorOrBW` ($AC3A). The real routine clears
|
|
// the viewport, draws the intro banner, and waits for the user to
|
|
// press 'A' (colour) or 'B' (black-and-white). Either choice copies
|
|
// 22 bytes from ColorModePatch ($AB65) or BWModePatch ($AB7B) into
|
|
// ColorOrBWModePatch ($0800), then returns.
|
|
//
|
|
// fs2trace can't render or read keys, so we simulate "user pressed
|
|
// A" inline: copy ColorModePatch -> $0800 and RTS to the caller.
|
|
// This unblocks the boot path so MainGameEntry can run through to
|
|
// the main loop where the patched scenery slots fire.
|
|
static void hookPromptColorOrBW(void) {
|
|
for (int i = 0; i < 22; i++) {
|
|
mem[0x0800 + i] = mem[0xAB65 + i];
|
|
}
|
|
// Standard RTS: pop return address, add 1, set PC.
|
|
uint8_t lo = pop();
|
|
uint8_t hi = pop();
|
|
pc = (uint16_t)(lo | (hi << 8)) + 1;
|
|
}
|
|
|
|
|
|
// SmartPort hook: runs when PC reaches $C70D. Reads RBBlockNumber
|
|
// (3 bytes at chunk4-defined location, but we'll read it dynamically
|
|
// from the call params) and copies the requested block from the disk
|
|
// image into the SmartPort's data buffer. The caller's return is via
|
|
// the standard SmartPort calling convention: after `jsr $C70D` the
|
|
// command byte and parameter pointer are inline; we skip past them
|
|
// before returning.
|
|
static void hookSmartPort(void) {
|
|
// Apple SmartPort calling convention:
|
|
// jsr $C70D ; or whatever entry the firmware uses
|
|
// .byte command ; here, $01 = ReadBlock
|
|
// .word param_block_addr
|
|
// The return address pushed by JSR points to the inline
|
|
// command byte. We need to read the params, do the read, then
|
|
// bump the return address past the inline data.
|
|
uint16_t retLo = pop();
|
|
uint16_t retHi = pop();
|
|
uint16_t ret = retLo | (retHi << 8); // points one before inline cmd
|
|
// 6502 jsr pushes (retAddr - 1).
|
|
ret++;
|
|
|
|
uint8_t command = mem[ret];
|
|
uint16_t paramAddr = (uint16_t)(mem[ret + 1] | (mem[ret + 2] << 8));
|
|
ret += 3;
|
|
|
|
if (command == 0x01) { // ReadBlock
|
|
// ParamBlock layout (chunk4 RBParams):
|
|
// byte 0: parameter count ($03)
|
|
// byte 1: unit number
|
|
// bytes 2-3: data buffer addr
|
|
// bytes 4-6: block number (3 bytes, 24-bit)
|
|
uint16_t bufAddr = (uint16_t)(mem[paramAddr + 2] | (mem[paramAddr + 3] << 8));
|
|
uint32_t blockNum = (uint32_t)mem[paramAddr + 4]
|
|
| ((uint32_t)mem[paramAddr + 5] << 8)
|
|
| ((uint32_t)mem[paramAddr + 6] << 16);
|
|
|
|
if (traceCount < MAX_TRACE_BLKS) {
|
|
tracedBlocks[traceCount++] = (int)blockNum;
|
|
}
|
|
|
|
// FS2TRACE_DUMP_AT_BLOCK: snapshot RAM right after the
|
|
// Nth block has been read and copied. The SD3 boot
|
|
// sequence loads 16 blocks ($0360-$036F) into $2600+
|
|
// staging via SCRU0 -> $A7E0+ before the per-frame
|
|
// PatchSlot_FrameSync starts overwriting the dispatcher
|
|
// area. Stopping at block 16 captures the freshly-built
|
|
// dispatcher + per-section geometry at $A800-$AAFF
|
|
// before frames 1+ blow it away.
|
|
const char *stopAtEnv = getenv("FS2TRACE_DUMP_AT_BLOCK");
|
|
if (stopAtEnv != NULL) {
|
|
int stopAt = (int)strtol(stopAtEnv, NULL, 0);
|
|
if (stopAt > 0 && traceCount == stopAt) {
|
|
dumpRequested = 1;
|
|
}
|
|
}
|
|
|
|
size_t off = (size_t)blockNum * BLOCK_SIZE;
|
|
// Suppress reads that target our pre-loaded
|
|
// ReadBlockDataBuffer at $D575. FS2's boot reads block
|
|
// 0 (the .po boot block) into $D575, expecting it to
|
|
// contain the scenery block list -- but the san-inc
|
|
// pack .po has a standard ProDOS boot block there
|
|
// instead. Stomping $D575 with boot-block content
|
|
// breaks every subsequent block lookup. Our pre-fill
|
|
// already has the right .blocks data; preserve it.
|
|
bool skipWrite = (bufAddr >= 0xD575 && bufAddr < 0xD575 + 1024);
|
|
if (skipWrite) {
|
|
if (getenv("FS2TRACE_VERBOSE") != NULL) {
|
|
fprintf(stderr, " read block $%04X -> $%04X (skipped: protect block list)\n",
|
|
blockNum, bufAddr);
|
|
}
|
|
} else if (off + BLOCK_SIZE <= diskSize) {
|
|
memcpy(&mem[bufAddr], &diskImage[off], BLOCK_SIZE);
|
|
} else {
|
|
memset(&mem[bufAddr], 0, BLOCK_SIZE);
|
|
}
|
|
if (!skipWrite && getenv("FS2TRACE_VERBOSE") != NULL) {
|
|
fprintf(stderr, " read block $%04X -> $%04X (first byte: $%02X)\n",
|
|
blockNum, bufAddr, mem[bufAddr]);
|
|
}
|
|
flag_c = 0; // success
|
|
reg_a = 0;
|
|
} else {
|
|
fprintf(stderr, "unsupported SmartPort command $%02X\n", command);
|
|
flag_c = 1;
|
|
}
|
|
|
|
// Return past the inline command + param pointer.
|
|
push((uint8_t)((ret - 1) >> 8));
|
|
push((uint8_t)((ret - 1) & 0xFF));
|
|
// Standard RTS path: pop and add 1.
|
|
retLo = pop();
|
|
retHi = pop();
|
|
pc = (uint16_t)((retHi << 8) | retLo) + 1;
|
|
}
|
|
|
|
|
|
// 6502 instruction step. Implements the documented opcodes used by
|
|
// FS2's loader path. Anything else trips the unknown-opcode path and
|
|
// stops the emulator with an error.
|
|
static void step(void) {
|
|
if (pc == 0xC70D || pc == 0xC700 || pc == 0xC709) {
|
|
hookSmartPort();
|
|
return;
|
|
}
|
|
if (pc == 0xAC3A) {
|
|
hookPromptColorOrBW();
|
|
return;
|
|
}
|
|
|
|
uint8_t op = fetch();
|
|
cycles++;
|
|
switch (op) {
|
|
// BRK -- treat as halt with status. Allow caller to
|
|
// disable the halt via FS2TRACE_NO_BRK_HALT (= treat
|
|
// BRK as RTS so we can chase past zero-padded chunk4
|
|
// areas without aborting the dispatcher).
|
|
case 0x00: {
|
|
if (getenv("FS2TRACE_NO_BRK_HALT") != NULL) {
|
|
uint8_t rl = pop();
|
|
uint8_t rh = pop();
|
|
pc = (uint16_t)((rh << 8) | rl) + 1;
|
|
break;
|
|
}
|
|
fprintf(stderr, "BRK at $%04X cycles=%d\n", (uint16_t)(pc - 1), cycles);
|
|
fflush(stderr);
|
|
stop = 1;
|
|
return;
|
|
}
|
|
// NOP variants.
|
|
case 0xEA: break;
|
|
// CLC / SEC / CLD / SED / CLI / SEI / CLV
|
|
case 0x18: flag_c = 0; break;
|
|
case 0x38: flag_c = 1; break;
|
|
case 0xD8: flag_d = 0; break;
|
|
case 0xF8: flag_d = 1; break;
|
|
case 0x58: flag_i = 0; break;
|
|
case 0x78: flag_i = 1; break;
|
|
case 0xB8: flag_v = 0; break;
|
|
// Transfers.
|
|
case 0xAA: reg_x = reg_a; setNZ(reg_x); break; // TAX
|
|
case 0xA8: reg_y = reg_a; setNZ(reg_y); break; // TAY
|
|
case 0x8A: reg_a = reg_x; setNZ(reg_a); break; // TXA
|
|
case 0x98: reg_a = reg_y; setNZ(reg_a); break; // TYA
|
|
case 0xBA: reg_x = reg_s; setNZ(reg_x); break; // TSX
|
|
case 0x9A: reg_s = reg_x; break; // TXS
|
|
// Stack.
|
|
case 0x48: push(reg_a); break; // PHA
|
|
case 0x68: reg_a = pop(); setNZ(reg_a); break; // PLA
|
|
case 0x08: push((uint8_t)(getP() | 0x10)); break; // PHP
|
|
case 0x28: setP(pop()); break; // PLP
|
|
// INC/DEC X/Y
|
|
case 0xE8: reg_x++; setNZ(reg_x); break; // INX
|
|
case 0xC8: reg_y++; setNZ(reg_y); break; // INY
|
|
case 0xCA: reg_x--; setNZ(reg_x); break; // DEX
|
|
case 0x88: reg_y--; setNZ(reg_y); break; // DEY
|
|
|
|
// LDA
|
|
case 0xA9: reg_a = fetch(); setNZ(reg_a); break; // LDA imm
|
|
case 0xA5: reg_a = rd(fetch()); setNZ(reg_a); break; // LDA zp
|
|
case 0xB5: reg_a = rd((uint8_t)(fetch() + reg_x)); setNZ(reg_a); break; // LDA zp,X
|
|
case 0xAD: reg_a = rd(fetch16()); setNZ(reg_a); break; // LDA abs
|
|
case 0xBD: { uint16_t a = fetch16(); reg_a = rd((uint16_t)(a + reg_x)); setNZ(reg_a); break; } // LDA abs,X
|
|
case 0xB9: { uint16_t a = fetch16(); reg_a = rd((uint16_t)(a + reg_y)); setNZ(reg_a); break; } // LDA abs,Y
|
|
case 0xA1: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); reg_a = rd(a); setNZ(reg_a); break; } // LDA (zp,X)
|
|
case 0xB1: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); reg_a = rd(a); setNZ(reg_a); break; } // LDA (zp),Y
|
|
|
|
// LDX
|
|
case 0xA2: reg_x = fetch(); setNZ(reg_x); break; // LDX imm
|
|
case 0xA6: reg_x = rd(fetch()); setNZ(reg_x); break; // LDX zp
|
|
case 0xB6: reg_x = rd((uint8_t)(fetch() + reg_y)); setNZ(reg_x); break; // LDX zp,Y
|
|
case 0xAE: reg_x = rd(fetch16()); setNZ(reg_x); break; // LDX abs
|
|
case 0xBE: { uint16_t a = fetch16(); reg_x = rd((uint16_t)(a + reg_y)); setNZ(reg_x); break; } // LDX abs,Y
|
|
|
|
// LDY
|
|
case 0xA0: reg_y = fetch(); setNZ(reg_y); break; // LDY imm
|
|
case 0xA4: reg_y = rd(fetch()); setNZ(reg_y); break; // LDY zp
|
|
case 0xB4: reg_y = rd((uint8_t)(fetch() + reg_x)); setNZ(reg_y); break; // LDY zp,X
|
|
case 0xAC: reg_y = rd(fetch16()); setNZ(reg_y); break; // LDY abs
|
|
case 0xBC: { uint16_t a = fetch16(); reg_y = rd((uint16_t)(a + reg_x)); setNZ(reg_y); break; } // LDY abs,X
|
|
|
|
// STA
|
|
case 0x85: wr(fetch(), reg_a); break; // STA zp
|
|
case 0x95: wr((uint8_t)(fetch() + reg_x), reg_a); break; // STA zp,X
|
|
case 0x8D: wr(fetch16(), reg_a); break; // STA abs
|
|
case 0x9D: { uint16_t a = fetch16(); wr((uint16_t)(a + reg_x), reg_a); break; } // STA abs,X
|
|
case 0x99: { uint16_t a = fetch16(); wr((uint16_t)(a + reg_y), reg_a); break; } // STA abs,Y
|
|
case 0x81: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); wr(a, reg_a); break; } // STA (zp,X)
|
|
case 0x91: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); wr(a, reg_a); break; } // STA (zp),Y
|
|
|
|
// STX
|
|
case 0x86: wr(fetch(), reg_x); break; // STX zp
|
|
case 0x96: wr((uint8_t)(fetch() + reg_y), reg_x); break;
|
|
case 0x8E: wr(fetch16(), reg_x); break;
|
|
// STY
|
|
case 0x84: wr(fetch(), reg_y); break;
|
|
case 0x94: wr((uint8_t)(fetch() + reg_x), reg_y); break;
|
|
case 0x8C: wr(fetch16(), reg_y); break;
|
|
|
|
// INC zp / abs
|
|
case 0xE6: { uint8_t a = fetch(); uint8_t v = (uint8_t)(rd(a) + 1); wr(a, v); setNZ(v); break; }
|
|
case 0xF6: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = (uint8_t)(rd(a) + 1); wr(a, v); setNZ(v); break; }
|
|
case 0xEE: { uint16_t a = fetch16(); uint8_t v = (uint8_t)(rd(a) + 1); wr(a, v); setNZ(v); break; }
|
|
case 0xFE: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = (uint8_t)(rd(a) + 1); wr(a, v); setNZ(v); break; }
|
|
// DEC
|
|
case 0xC6: { uint8_t a = fetch(); uint8_t v = (uint8_t)(rd(a) - 1); wr(a, v); setNZ(v); break; }
|
|
case 0xD6: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = (uint8_t)(rd(a) - 1); wr(a, v); setNZ(v); break; }
|
|
case 0xCE: { uint16_t a = fetch16(); uint8_t v = (uint8_t)(rd(a) - 1); wr(a, v); setNZ(v); break; }
|
|
case 0xDE: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = (uint8_t)(rd(a) - 1); wr(a, v); setNZ(v); break; }
|
|
|
|
// Logical / arithmetic helpers (define lambdas inline).
|
|
#define DO_ADC(v) do { uint16_t s = (uint16_t)reg_a + (uint16_t)(v) + (uint16_t)flag_c; \
|
|
flag_c = (s > 0xFF) ? 1 : 0; \
|
|
flag_v = ((reg_a ^ (v)) & 0x80) ? 0 : (((reg_a ^ s) & 0x80) ? 1 : 0); \
|
|
reg_a = (uint8_t)s; setNZ(reg_a); } while (0)
|
|
#define DO_SBC(v) do { uint8_t vv = (uint8_t)~(v); \
|
|
uint16_t s = (uint16_t)reg_a + (uint16_t)vv + (uint16_t)flag_c; \
|
|
flag_c = (s > 0xFF) ? 1 : 0; \
|
|
flag_v = ((reg_a ^ vv) & 0x80) ? 0 : (((reg_a ^ s) & 0x80) ? 1 : 0); \
|
|
reg_a = (uint8_t)s; setNZ(reg_a); } while (0)
|
|
|
|
case 0x69: { uint8_t v = fetch(); DO_ADC(v); break; }
|
|
case 0x65: { uint8_t v = rd(fetch()); DO_ADC(v); break; }
|
|
case 0x75: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_ADC(v); break; }
|
|
case 0x6D: { uint8_t v = rd(fetch16()); DO_ADC(v); break; }
|
|
case 0x7D: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_x)); DO_ADC(v); break; }
|
|
case 0x79: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_y)); DO_ADC(v); break; }
|
|
case 0x71: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); uint8_t v = rd(a); DO_ADC(v); break; }
|
|
case 0x61: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); uint8_t v = rd(a); DO_ADC(v); break; }
|
|
|
|
case 0xE9: { uint8_t v = fetch(); DO_SBC(v); break; }
|
|
case 0xE5: { uint8_t v = rd(fetch()); DO_SBC(v); break; }
|
|
case 0xF5: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_SBC(v); break; }
|
|
case 0xED: { uint8_t v = rd(fetch16()); DO_SBC(v); break; }
|
|
case 0xFD: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_x)); DO_SBC(v); break; }
|
|
case 0xF9: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_y)); DO_SBC(v); break; }
|
|
case 0xF1: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); uint8_t v = rd(a); DO_SBC(v); break; }
|
|
case 0xE1: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); uint8_t v = rd(a); DO_SBC(v); break; }
|
|
|
|
// CMP / CPX / CPY
|
|
#define DO_CMP(reg, v) do { uint16_t r = (uint16_t)(reg) + 0x100 - (uint16_t)(v); \
|
|
flag_c = ((reg) >= (v)) ? 1 : 0; setNZ((uint8_t)(r & 0xFF)); } while (0)
|
|
case 0xC9: { uint8_t v = fetch(); DO_CMP(reg_a, v); break; }
|
|
case 0xC5: { uint8_t v = rd(fetch()); DO_CMP(reg_a, v); break; }
|
|
case 0xD5: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_CMP(reg_a, v); break; }
|
|
case 0xCD: { uint8_t v = rd(fetch16()); DO_CMP(reg_a, v); break; }
|
|
case 0xDD: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_x)); DO_CMP(reg_a, v); break; }
|
|
case 0xD9: { uint16_t a = fetch16(); uint8_t v = rd((uint16_t)(a + reg_y)); DO_CMP(reg_a, v); break; }
|
|
case 0xD1: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); uint8_t v = rd(a); DO_CMP(reg_a, v); break; }
|
|
case 0xC1: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); uint8_t v = rd(a); DO_CMP(reg_a, v); break; }
|
|
case 0xE0: { uint8_t v = fetch(); DO_CMP(reg_x, v); break; }
|
|
case 0xE4: { uint8_t v = rd(fetch()); DO_CMP(reg_x, v); break; }
|
|
case 0xEC: { uint8_t v = rd(fetch16()); DO_CMP(reg_x, v); break; }
|
|
case 0xC0: { uint8_t v = fetch(); DO_CMP(reg_y, v); break; }
|
|
case 0xC4: { uint8_t v = rd(fetch()); DO_CMP(reg_y, v); break; }
|
|
case 0xCC: { uint8_t v = rd(fetch16()); DO_CMP(reg_y, v); break; }
|
|
|
|
// AND / ORA / EOR
|
|
#define DO_AND(v) do { reg_a &= (v); setNZ(reg_a); } while (0)
|
|
#define DO_ORA(v) do { reg_a |= (v); setNZ(reg_a); } while (0)
|
|
#define DO_EOR(v) do { reg_a ^= (v); setNZ(reg_a); } while (0)
|
|
case 0x29: { uint8_t v = fetch(); DO_AND(v); break; }
|
|
case 0x25: { uint8_t v = rd(fetch()); DO_AND(v); break; }
|
|
case 0x35: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_AND(v); break; }
|
|
case 0x2D: { uint8_t v = rd(fetch16()); DO_AND(v); break; }
|
|
case 0x3D: { uint16_t a = fetch16(); DO_AND(rd((uint16_t)(a + reg_x))); break; }
|
|
case 0x39: { uint16_t a = fetch16(); DO_AND(rd((uint16_t)(a + reg_y))); break; }
|
|
case 0x31: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); DO_AND(rd(a)); break; }
|
|
case 0x21: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); DO_AND(rd(a)); break; }
|
|
case 0x09: { uint8_t v = fetch(); DO_ORA(v); break; }
|
|
case 0x05: { uint8_t v = rd(fetch()); DO_ORA(v); break; }
|
|
case 0x15: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_ORA(v); break; }
|
|
case 0x0D: { uint8_t v = rd(fetch16()); DO_ORA(v); break; }
|
|
case 0x1D: { uint16_t a = fetch16(); DO_ORA(rd((uint16_t)(a + reg_x))); break; }
|
|
case 0x19: { uint16_t a = fetch16(); DO_ORA(rd((uint16_t)(a + reg_y))); break; }
|
|
case 0x11: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); DO_ORA(rd(a)); break; }
|
|
case 0x01: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); DO_ORA(rd(a)); break; }
|
|
case 0x49: { uint8_t v = fetch(); DO_EOR(v); break; }
|
|
case 0x45: { uint8_t v = rd(fetch()); DO_EOR(v); break; }
|
|
case 0x55: { uint8_t v = rd((uint8_t)(fetch() + reg_x)); DO_EOR(v); break; }
|
|
case 0x4D: { uint8_t v = rd(fetch16()); DO_EOR(v); break; }
|
|
case 0x5D: { uint16_t a = fetch16(); DO_EOR(rd((uint16_t)(a + reg_x))); break; }
|
|
case 0x59: { uint16_t a = fetch16(); DO_EOR(rd((uint16_t)(a + reg_y))); break; }
|
|
case 0x51: { uint8_t z = fetch(); uint16_t a = (uint16_t)(rd16zp(z) + reg_y); DO_EOR(rd(a)); break; }
|
|
case 0x41: { uint8_t z = (uint8_t)(fetch() + reg_x); uint16_t a = rd16zp(z); DO_EOR(rd(a)); break; }
|
|
|
|
// BIT
|
|
case 0x24: { uint8_t v = rd(fetch()); flag_z = (reg_a & v) == 0 ? 1 : 0; flag_n = (v & 0x80) ? 1 : 0; flag_v = (v & 0x40) ? 1 : 0; break; }
|
|
case 0x2C: { uint8_t v = rd(fetch16()); flag_z = (reg_a & v) == 0 ? 1 : 0; flag_n = (v & 0x80) ? 1 : 0; flag_v = (v & 0x40) ? 1 : 0; break; }
|
|
|
|
// ASL / LSR / ROL / ROR (accumulator + memory variants)
|
|
#define ASL(v) do { flag_c = ((v) & 0x80) ? 1 : 0; (v) = (uint8_t)((v) << 1); setNZ(v); } while (0)
|
|
#define LSR(v) do { flag_c = (v) & 1; (v) = (uint8_t)((v) >> 1); setNZ(v); } while (0)
|
|
#define ROL(v) do { uint8_t c = flag_c; flag_c = ((v) & 0x80) ? 1 : 0; (v) = (uint8_t)(((v) << 1) | c); setNZ(v); } while (0)
|
|
#define ROR(v) do { uint8_t c = flag_c; flag_c = (v) & 1; (v) = (uint8_t)(((v) >> 1) | (c << 7)); setNZ(v); } while (0)
|
|
case 0x0A: ASL(reg_a); break;
|
|
case 0x06: { uint8_t a = fetch(); uint8_t v = rd(a); ASL(v); wr(a, v); break; }
|
|
case 0x16: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = rd(a); ASL(v); wr(a, v); break; }
|
|
case 0x0E: { uint16_t a = fetch16(); uint8_t v = rd(a); ASL(v); wr(a, v); break; }
|
|
case 0x1E: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = rd(a); ASL(v); wr(a, v); break; }
|
|
case 0x4A: LSR(reg_a); break;
|
|
case 0x46: { uint8_t a = fetch(); uint8_t v = rd(a); LSR(v); wr(a, v); break; }
|
|
case 0x56: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = rd(a); LSR(v); wr(a, v); break; }
|
|
case 0x4E: { uint16_t a = fetch16(); uint8_t v = rd(a); LSR(v); wr(a, v); break; }
|
|
case 0x5E: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = rd(a); LSR(v); wr(a, v); break; }
|
|
case 0x2A: ROL(reg_a); break;
|
|
case 0x26: { uint8_t a = fetch(); uint8_t v = rd(a); ROL(v); wr(a, v); break; }
|
|
case 0x36: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = rd(a); ROL(v); wr(a, v); break; }
|
|
case 0x2E: { uint16_t a = fetch16(); uint8_t v = rd(a); ROL(v); wr(a, v); break; }
|
|
case 0x3E: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = rd(a); ROL(v); wr(a, v); break; }
|
|
case 0x6A: ROR(reg_a); break;
|
|
case 0x66: { uint8_t a = fetch(); uint8_t v = rd(a); ROR(v); wr(a, v); break; }
|
|
case 0x76: { uint8_t a = (uint8_t)(fetch() + reg_x); uint8_t v = rd(a); ROR(v); wr(a, v); break; }
|
|
case 0x6E: { uint16_t a = fetch16(); uint8_t v = rd(a); ROR(v); wr(a, v); break; }
|
|
case 0x7E: { uint16_t a = (uint16_t)(fetch16() + reg_x); uint8_t v = rd(a); ROR(v); wr(a, v); break; }
|
|
|
|
// Branches.
|
|
#define BRANCH(cond) do { int8_t off = (int8_t)fetch(); if (cond) pc = (uint16_t)(pc + off); } while (0)
|
|
case 0x10: BRANCH(!flag_n); break; // BPL
|
|
case 0x30: BRANCH( flag_n); break; // BMI
|
|
case 0x50: BRANCH(!flag_v); break; // BVC
|
|
case 0x70: BRANCH( flag_v); break; // BVS
|
|
case 0x90: BRANCH(!flag_c); break; // BCC
|
|
case 0xB0: BRANCH( flag_c); break; // BCS
|
|
case 0xD0: BRANCH(!flag_z); break; // BNE
|
|
case 0xF0: BRANCH( flag_z); break; // BEQ
|
|
|
|
// Jumps / subroutine.
|
|
case 0x4C: pc = fetch16(); break; // JMP abs
|
|
case 0x6C: { uint16_t a = fetch16(); // JMP (ind)
|
|
// 6502 page-boundary bug
|
|
uint16_t lo = mem[a];
|
|
uint16_t hi = mem[(a & 0xFF00) | ((a + 1) & 0xFF)];
|
|
pc = (uint16_t)(lo | (hi << 8));
|
|
break; }
|
|
case 0x20: { // JSR abs
|
|
uint16_t target = fetch16();
|
|
uint16_t retAddr = (uint16_t)(pc - 1);
|
|
push16(retAddr);
|
|
if (getenv("FS2TRACE_JSR") != NULL && (retAddr & 0xFF00) == 0x8000) {
|
|
fprintf(stderr, "JSR pushes $%04X (target $%04X)\n", retAddr, target);
|
|
}
|
|
if (target == 0x78E0 && getenv("FS2TRACE_JSR_78E0") != NULL) {
|
|
fprintf(stderr, " JSR $78E0 from PC $%04X A=$%02X $24=$%02X $B1=$%02X $0876=$%02X cycles=%d\n",
|
|
retAddr, reg_a, mem[0x24], mem[0xB1], mem[0x0876], cycles);
|
|
}
|
|
// FS2TRACE_PERSP=1: log every PerspectiveDivide
|
|
// call ($7BFD in MAME RAM). Inputs: A=num_hi,
|
|
// Y=num_lo, $C4/$C5=denominator. Output is in
|
|
// A on return. The self-modified table address
|
|
// lives at $7D47/$7D48 (= MAME's L7D76+1/+2,
|
|
// not source's $7D77/$7D78).
|
|
if (target == 0x7BFD && getenv("FS2TRACE_PERSP") != NULL) {
|
|
int16_t num = (int16_t)((uint16_t)reg_y | ((uint16_t)reg_a << 8));
|
|
int16_t den = (int16_t)((uint16_t)mem[0xC4] | ((uint16_t)mem[0xC5] << 8));
|
|
// MAME's L7D76 (= LDA abs,X) is at $7D48; the
|
|
// self-modified table address bytes are at
|
|
// $7D49 (lo) and $7D4A (hi).
|
|
fprintf(stderr,
|
|
" JSR PerspDiv from PC $%04X num=%6d den=%6d table=$%02X%02X\n",
|
|
retAddr, num, den, mem[0x7D4A], mem[0x7D49]);
|
|
}
|
|
pc = target;
|
|
break; }
|
|
case 0x60: { // RTS
|
|
// PC was advanced by fetch(); the RTS
|
|
// instruction itself was at pc-1.
|
|
uint16_t rtsAddr = (uint16_t)(pc - 1);
|
|
pc = (uint16_t)(pop16() + 1);
|
|
if (getenv("FS2TRACE_RTS") != NULL && rtsAddr == 0xD458) {
|
|
fprintf(stderr, "RTS@$D458 -> $%04X (S=$%02X)\n", pc, reg_s);
|
|
for (int s = 0; s < 8; s++) {
|
|
fprintf(stderr, " stack[$%02X] = $%02X\n",
|
|
(uint8_t)(reg_s - s),
|
|
mem[0x100 + (uint8_t)(reg_s - s)]);
|
|
}
|
|
}
|
|
// Capture PerspectiveDivide return value.
|
|
// MAME's PerspectiveDivide RTS is at $7D51
|
|
// (= source's L7D7F equivalent).
|
|
if (rtsAddr == 0x7D51 && getenv("FS2TRACE_PERSP") != NULL) {
|
|
fprintf(stderr, " PerspDiv returns A=$%02X (signed=%d)\n",
|
|
reg_a, (int)(int8_t)reg_a);
|
|
}
|
|
break; }
|
|
case 0x40: setP(pop()); pc = pop16(); break; // RTI
|
|
|
|
// 65C02 extensions used by chunk3.
|
|
case 0x14: { // TRB zp
|
|
uint8_t zp = fetch();
|
|
uint8_t m = mem[zp];
|
|
flag_z = ((reg_a & m) == 0);
|
|
mem[zp] = (uint8_t)(m & ~reg_a);
|
|
break; }
|
|
case 0x1C: { // TRB abs
|
|
uint16_t a = fetch16();
|
|
uint8_t m = rd(a);
|
|
flag_z = ((reg_a & m) == 0);
|
|
wr(a, (uint8_t)(m & ~reg_a));
|
|
break; }
|
|
case 0x04: { // TSB zp
|
|
uint8_t zp = fetch();
|
|
uint8_t m = mem[zp];
|
|
flag_z = ((reg_a & m) == 0);
|
|
mem[zp] = (uint8_t)(m | reg_a);
|
|
break; }
|
|
case 0x0C: { // TSB abs
|
|
uint16_t a = fetch16();
|
|
uint8_t m = rd(a);
|
|
flag_z = ((reg_a & m) == 0);
|
|
wr(a, (uint8_t)(m | reg_a));
|
|
break; }
|
|
case 0x80: { // BRA rel
|
|
int8_t off = (int8_t)fetch();
|
|
pc = (uint16_t)(pc + off);
|
|
break; }
|
|
case 0x12: { // ORA (zp)
|
|
uint8_t zp = fetch();
|
|
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
|
|
reg_a = (uint8_t)(reg_a | rd(a));
|
|
setNZ(reg_a);
|
|
break; }
|
|
case 0x32: { // AND (zp)
|
|
uint8_t zp = fetch();
|
|
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
|
|
reg_a = (uint8_t)(reg_a & rd(a));
|
|
setNZ(reg_a);
|
|
break; }
|
|
case 0x52: { // EOR (zp)
|
|
uint8_t zp = fetch();
|
|
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
|
|
reg_a = (uint8_t)(reg_a ^ rd(a));
|
|
setNZ(reg_a);
|
|
break; }
|
|
case 0x72: { // ADC (zp)
|
|
uint8_t zp = fetch();
|
|
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
|
|
uint8_t m = rd(a);
|
|
uint16_t r = (uint16_t)reg_a + (uint16_t)m + (uint16_t)flag_c;
|
|
flag_v = (((reg_a ^ m) & 0x80) == 0)
|
|
&& (((reg_a ^ (uint8_t)r) & 0x80) != 0);
|
|
flag_c = r > 0xFF;
|
|
reg_a = (uint8_t)r;
|
|
setNZ(reg_a);
|
|
break; }
|
|
case 0x92: { // STA (zp)
|
|
uint8_t zp = fetch();
|
|
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
|
|
wr(a, reg_a);
|
|
break; }
|
|
case 0xB2: { // LDA (zp)
|
|
uint8_t zp = fetch();
|
|
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
|
|
reg_a = rd(a);
|
|
setNZ(reg_a);
|
|
break; }
|
|
case 0xD2: { // CMP (zp)
|
|
uint8_t zp = fetch();
|
|
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
|
|
uint8_t m = rd(a);
|
|
flag_c = reg_a >= m;
|
|
setNZ((uint8_t)(reg_a - m));
|
|
break; }
|
|
case 0xF2: { // SBC (zp)
|
|
uint8_t zp = fetch();
|
|
uint16_t a = (uint16_t)mem[zp] | ((uint16_t)mem[(uint8_t)(zp+1)] << 8);
|
|
uint8_t m = rd(a);
|
|
uint16_t r = (uint16_t)reg_a + (uint16_t)((uint8_t)~m) + (uint16_t)flag_c;
|
|
flag_v = (((reg_a ^ m) & 0x80) != 0)
|
|
&& (((reg_a ^ (uint8_t)r) & 0x80) != 0);
|
|
flag_c = r > 0xFF;
|
|
reg_a = (uint8_t)r;
|
|
setNZ(reg_a);
|
|
break; }
|
|
case 0x64: { // STZ zp
|
|
uint8_t zp = fetch();
|
|
mem[zp] = 0;
|
|
break; }
|
|
case 0x74: { // STZ zp,X
|
|
uint8_t zp = fetch();
|
|
mem[(uint8_t)(zp + reg_x)] = 0;
|
|
break; }
|
|
case 0x9C: { // STZ abs
|
|
uint16_t a = fetch16();
|
|
wr(a, 0);
|
|
break; }
|
|
case 0x9E: { // STZ abs,X
|
|
uint16_t a = fetch16();
|
|
wr((uint16_t)(a + reg_x), 0);
|
|
break; }
|
|
case 0x5A: push(reg_y); break; // PHY
|
|
case 0x7A: { reg_y = pop(); setNZ(reg_y); break; } // PLY
|
|
case 0xDA: push(reg_x); break; // PHX
|
|
case 0xFA: { reg_x = pop(); setNZ(reg_x); break; } // PLX
|
|
case 0x3A: reg_a = (uint8_t)(reg_a - 1); setNZ(reg_a); break; // DEC A
|
|
case 0x1A: reg_a = (uint8_t)(reg_a + 1); setNZ(reg_a); break; // INC A
|
|
|
|
// 65C02 RMB/SMB ops: reset/set memory bit N of zp.
|
|
case 0x07: { uint8_t zp = fetch(); mem[zp] &= ~0x01; break; } // RMB0
|
|
case 0x17: { uint8_t zp = fetch(); mem[zp] &= ~0x02; break; } // RMB1
|
|
case 0x27: { uint8_t zp = fetch(); mem[zp] &= ~0x04; break; } // RMB2
|
|
case 0x37: { uint8_t zp = fetch(); mem[zp] &= ~0x08; break; } // RMB3
|
|
case 0x47: { uint8_t zp = fetch(); mem[zp] &= ~0x10; break; } // RMB4
|
|
case 0x57: { uint8_t zp = fetch(); mem[zp] &= ~0x20; break; } // RMB5
|
|
case 0x67: { uint8_t zp = fetch(); mem[zp] &= ~0x40; break; } // RMB6
|
|
case 0x77: { uint8_t zp = fetch(); mem[zp] &= ~0x80; break; } // RMB7
|
|
case 0x87: { uint8_t zp = fetch(); mem[zp] |= 0x01; break; } // SMB0
|
|
case 0x97: { uint8_t zp = fetch(); mem[zp] |= 0x02; break; } // SMB1
|
|
case 0xA7: { uint8_t zp = fetch(); mem[zp] |= 0x04; break; } // SMB2
|
|
case 0xB7: { uint8_t zp = fetch(); mem[zp] |= 0x08; break; } // SMB3
|
|
case 0xC7: { uint8_t zp = fetch(); mem[zp] |= 0x10; break; } // SMB4
|
|
case 0xD7: { uint8_t zp = fetch(); mem[zp] |= 0x20; break; } // SMB5
|
|
case 0xE7: { uint8_t zp = fetch(); mem[zp] |= 0x40; break; } // SMB6
|
|
case 0xF7: { uint8_t zp = fetch(); mem[zp] |= 0x80; break; } // SMB7
|
|
|
|
// BBR/BBS rel: branch on bit reset/set in zp.
|
|
case 0x0F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x01)) pc = (uint16_t)(pc + off); break; } // BBR0
|
|
case 0x1F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x02)) pc = (uint16_t)(pc + off); break; } // BBR1
|
|
case 0x2F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x04)) pc = (uint16_t)(pc + off); break; } // BBR2
|
|
case 0x3F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x08)) pc = (uint16_t)(pc + off); break; } // BBR3
|
|
case 0x4F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x10)) pc = (uint16_t)(pc + off); break; } // BBR4
|
|
case 0x5F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x20)) pc = (uint16_t)(pc + off); break; } // BBR5
|
|
case 0x6F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x40)) pc = (uint16_t)(pc + off); break; } // BBR6
|
|
case 0x7F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (!(mem[zp] & 0x80)) pc = (uint16_t)(pc + off); break; } // BBR7
|
|
case 0x8F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x01) pc = (uint16_t)(pc + off); break; } // BBS0
|
|
case 0x9F: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x02) pc = (uint16_t)(pc + off); break; } // BBS1
|
|
case 0xAF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x04) pc = (uint16_t)(pc + off); break; } // BBS2
|
|
case 0xBF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x08) pc = (uint16_t)(pc + off); break; } // BBS3
|
|
case 0xCF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x10) pc = (uint16_t)(pc + off); break; } // BBS4
|
|
case 0xDF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x20) pc = (uint16_t)(pc + off); break; } // BBS5
|
|
case 0xEF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x40) pc = (uint16_t)(pc + off); break; } // BBS6
|
|
case 0xFF: { uint8_t zp = fetch(); int8_t off = (int8_t)fetch(); if (mem[zp] & 0x80) pc = (uint16_t)(pc + off); break; } // BBS7
|
|
|
|
default:
|
|
fflush(stdout);
|
|
fprintf(stderr, "UNIMPL opcode $%02X at PC $%04X (cycles=%llu)\n",
|
|
op, (uint16_t)(pc - 1), (unsigned long long)cycles);
|
|
fflush(stderr);
|
|
stop = 1;
|
|
break;
|
|
}
|
|
(void)pc_low;
|
|
}
|
|
|
|
|
|
static int loadChunk(const char *path, uint16_t addr, size_t maxLen) {
|
|
FILE *f = fopen(path, "rb");
|
|
if (f == NULL) {
|
|
fprintf(stderr, "cannot open %s\n", path);
|
|
return 0;
|
|
}
|
|
size_t n = fread(&mem[addr], 1, maxLen, f);
|
|
fclose(f);
|
|
fprintf(stderr, "loaded %s -> $%04X..%04X (%zu bytes)\n",
|
|
path, addr, (uint16_t)(addr + n - 1), n);
|
|
return (int)n;
|
|
}
|
|
|
|
|
|
// --matrix mode: run chunk5 SetupViewProjection ($6000) with the
|
|
// supplied attitude / view inputs and dump the resulting $78..$89
|
|
// matrix. Uses a captured MAME RAM image as the initial state so all
|
|
// ZP slots, chunk2-5 binaries, and demo wiring are already in place.
|
|
//
|
|
// Usage:
|
|
// fs2trace --matrix <yaw_i16> <pitch_i16> <bank_i16> <vd_byte> [ram.bin]
|
|
//
|
|
// Default ram.bin = tmp/capture_boot.bin (chunk5Oracle replacement).
|
|
static void loadOriginalChunks(void); // defined below
|
|
|
|
static int matrixMode(int argc, char **argv) {
|
|
if (argc < 6 || argc > 7) {
|
|
fprintf(stderr,
|
|
"usage: %s --matrix <yaw> <pitch> <bank> <vd> [ram.bin]\n",
|
|
argv[0]);
|
|
return 2;
|
|
}
|
|
long yaw = strtol(argv[2], NULL, 0);
|
|
long pitch = strtol(argv[3], NULL, 0);
|
|
long bank = strtol(argv[4], NULL, 0);
|
|
long vd = strtol(argv[5], NULL, 0);
|
|
const char *ramPath = (argc > 6)
|
|
? argv[6]
|
|
: "/home/scott/claude/flight/tmp/capture_boot.bin";
|
|
|
|
// FS2TRACE_USE_ORIG=1: load chunk4/chunk5 from out/ instead
|
|
// of from a captured RAM image. The captured chunk5 in the
|
|
// boot dump is HEAVILY patched by Apply64KPatchTable -- the
|
|
// SetupViewProjection control flow is rewritten there and no
|
|
// longer matches the source. Using the unpatched binaries
|
|
// gives source-faithful matrix output (matching the chunk5.s
|
|
// listing), which is what we want for validating the C
|
|
// transliteration in chunk5Setup.c.
|
|
if (getenv("FS2TRACE_USE_ORIG") != NULL) {
|
|
loadOriginalChunks();
|
|
// ZP isn't initialised by the binaries; explicit zero
|
|
// is fine for SetupViewProjection (no read-before-write
|
|
// outside the inputs we poke below).
|
|
} else {
|
|
FILE *rf = fopen(ramPath, "rb");
|
|
if (rf == NULL) {
|
|
fprintf(stderr, "cannot open RAM image %s\n", ramPath);
|
|
return 1;
|
|
}
|
|
size_t got = fread(mem, 1, MEM_SIZE, rf);
|
|
fclose(rf);
|
|
if (got != MEM_SIZE) {
|
|
fprintf(stderr, "RAM image %s short read (%zu bytes)\n", ramPath, got);
|
|
return 1;
|
|
}
|
|
}
|
|
|
|
// Poke inputs over whatever the captured ZP held.
|
|
uint16_t y = (uint16_t)((yaw < 0) ? (yaw + 0x10000) : yaw);
|
|
uint16_t p = (uint16_t)((pitch < 0) ? (pitch + 0x10000) : pitch);
|
|
uint16_t b = (uint16_t)((bank < 0) ? (bank + 0x10000) : bank);
|
|
mem[0x6C] = (uint8_t)( y & 0xFF);
|
|
mem[0x6D] = (uint8_t)((y >> 8) & 0xFF);
|
|
mem[0x6E] = (uint8_t)( p & 0xFF);
|
|
mem[0x6F] = (uint8_t)((p >> 8) & 0xFF);
|
|
mem[0x70] = (uint8_t)( b & 0xFF);
|
|
mem[0x71] = (uint8_t)((b >> 8) & 0xFF);
|
|
mem[0x0A70] = (uint8_t)(vd & 0xFF);
|
|
|
|
// Set up CPU and call $6000 = SetupViewProjection. Push a
|
|
// sentinel return so the routine's RTS lands at $FFFF (the
|
|
// SmartPort hook patches $C70D, but $FFFF isn't touched -- a
|
|
// BRK at $0000 halts the loop cleanly via stop=1 below).
|
|
reg_a = reg_x = reg_y = 0;
|
|
reg_s = 0xFF;
|
|
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
|
|
flag_i = 1;
|
|
stop = 0;
|
|
pc = 0x6000;
|
|
push16(0xFFFE);
|
|
|
|
// FS2TRACE_MATRIX_BREAK=$XXXX prints state every time PC
|
|
// matches that address. Used to inspect intermediate state
|
|
// (e.g., set to $177A to see inputs to shifted L1778).
|
|
const char *brkEnv = getenv("FS2TRACE_MATRIX_BREAK");
|
|
uint16_t brkPC = (brkEnv != NULL)
|
|
? (uint16_t)strtol(brkEnv, NULL, 0) : 0xFFFE;
|
|
for (cycles = 0; cycles < 5000000 && !stop; ) {
|
|
if (pc == 0xFFFF || pc == 0x0000) {
|
|
break;
|
|
}
|
|
if (pc == brkPC) {
|
|
fprintf(stderr,
|
|
"BRK pc=$%04X A=$%02X X=$%02X Y=$%02X "
|
|
"$72/$73=$%02X%02X $74/$75=$%02X%02X "
|
|
"$76/$77=$%02X%02X $CB=$%02X%02X\n",
|
|
pc, reg_a, reg_x, reg_y,
|
|
mem[0x73], mem[0x72],
|
|
mem[0x75], mem[0x74],
|
|
mem[0x77], mem[0x76],
|
|
mem[0xCC], mem[0xCB]);
|
|
}
|
|
step();
|
|
}
|
|
|
|
printf("inputs: yaw=%ld pitch=%ld bank=%ld VD=$%02X\n",
|
|
yaw, pitch, bank, (uint8_t)(vd & 0xFF));
|
|
printf("matrix at $78..$89 (post-L6301 col shifts):\n");
|
|
for (int row = 0; row < 3; row++) {
|
|
int rb = 0x78 + row * 6;
|
|
int v0 = (int16_t)(mem[rb] | (mem[rb + 1] << 8));
|
|
int v1 = (int16_t)(mem[rb + 2] | (mem[rb + 3] << 8));
|
|
int v2 = (int16_t)(mem[rb + 4] | (mem[rb + 5] << 8));
|
|
printf(" row %d: %6d %6d %6d\n", row, v0, v1, v2);
|
|
}
|
|
// Cascade intermediates -- last values left after the routine
|
|
// returned. $72/$74/$76 are the rotated-angle inputs;
|
|
// $CB/$CD/$CF and $18/$D4/$D6 are the cos/sin lookups feeding
|
|
// the matrix construction.
|
|
if (getenv("FS2TRACE_MATRIX_DUMP") != NULL) {
|
|
#define R16(addr) ((int16_t)(mem[addr] | (mem[(addr)+1] << 8)))
|
|
printf("intermediates:\n");
|
|
printf(" $72/$73 = %d ($%02X%02X)\n", R16(0x72), mem[0x73], mem[0x72]);
|
|
printf(" $74/$75 = %d ($%02X%02X)\n", R16(0x74), mem[0x75], mem[0x74]);
|
|
printf(" $76/$77 = %d ($%02X%02X)\n", R16(0x76), mem[0x77], mem[0x76]);
|
|
printf(" $CB/$CC = %d (sin of $72)\n", R16(0xCB));
|
|
printf(" $CD/$CE = %d (sin of $74)\n", R16(0xCD));
|
|
printf(" $CF/$D0 = %d (sin of $76)\n", R16(0xCF));
|
|
printf(" $18/$19 = %d (cos of $72)\n", R16(0x18));
|
|
printf(" $D4/$D5 = %d (cos of $74)\n", R16(0xD4));
|
|
printf(" $D6/$D7 = %d (cos of $76)\n", R16(0xD6));
|
|
printf(" $BA/$BB = %d (cos of VD<<4)\n", R16(0xBA));
|
|
printf(" $BE/$BF = %d (sin of VD<<4)\n", R16(0xBE));
|
|
#undef R16
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
// loadOriginalChunks: place chunk4 (at $0200) and chunk5 (at $6000)
|
|
// into mem[] from out/*-built. Used by the --zpscale and --l177b
|
|
// probes which target chunk4 routines in isolation -- the boot RAM
|
|
// dump shifts chunk4 code by 2 bytes due to Apply64KPatchTable, so
|
|
// L177B / ScaleC2ByC4 land at different addresses there. The .built
|
|
// binaries are unpatched.
|
|
static void loadOriginalChunks(void) {
|
|
memset(mem, 0, MEM_SIZE);
|
|
loadChunk("/home/scott/claude/flight/out/4_0200-25ff", 0x0200, 0x2400);
|
|
loadChunk("/home/scott/claude/flight/out/5_6000-b3df", 0x6000, 0x53E0);
|
|
}
|
|
|
|
|
|
// --scenery [ramfile]: runs the captured chunk5 ProcessScenery
|
|
// against a RAM image and counts how many DrawColorSpan calls fire.
|
|
// This tells us "how much MAME-equivalent scenery would draw if we
|
|
// ran the actual interpreter against this RAM state". Compare to
|
|
// the port's `SCENERY_STATS=1 draws=N` to see where the port
|
|
// diverges. Default RAM image is tmp/capture_boot.bin.
|
|
//
|
|
// The capture is patched by Apply64KPatchTable at runtime, so chunk5
|
|
// addresses differ from the source-listing values. The jump table at
|
|
// $6000-$6020 provides indirection: $6006 jumps to ProcessScenery,
|
|
// $601B (DrawColorSpanRelay) jumps to DrawColorSpan. We use $6006
|
|
// as the entry and watch for PC entering DrawColorSpan via the
|
|
// $601B relay's target.
|
|
static int sceneryMode(int argc, char **argv) {
|
|
const char *ramPath = (argc > 2)
|
|
? argv[2]
|
|
: "/home/scott/claude/flight/tmp/capture_boot.bin";
|
|
FILE *rf = fopen(ramPath, "rb");
|
|
if (rf == NULL) { fprintf(stderr, "cannot open %s\n", ramPath); return 1; }
|
|
if (fread(mem, 1, MEM_SIZE, rf) != MEM_SIZE) {
|
|
fprintf(stderr, "RAM image short read\n");
|
|
fclose(rf);
|
|
return 1;
|
|
}
|
|
fclose(rf);
|
|
|
|
// Resolve DrawColorSpan via the $601B jump table slot
|
|
// (DrawColorSpanRelay).
|
|
if (mem[0x601B] != 0x4C) {
|
|
fprintf(stderr, "expected JMP at $601B, got $%02X\n", mem[0x601B]);
|
|
return 1;
|
|
}
|
|
uint16_t drawColorSpanPC = (uint16_t)(mem[0x601C] | (mem[0x601D] << 8));
|
|
|
|
// Reset the scenery cursor to the dispatcher entry (LA7E0 in
|
|
// the source = mem[$A7E0/$A7E1]). The captured cursor at
|
|
// $8B/$8C is the END-OF-FRAME position; without resetting
|
|
// we'd walk past the dispatcher into chunk2 territory.
|
|
uint16_t dispatcherEntry = (uint16_t)(mem[0xA7E0] | (mem[0xA7E1] << 8));
|
|
mem[0x8B] = (uint8_t)( dispatcherEntry & 0xFF);
|
|
mem[0x8C] = (uint8_t)((dispatcherEntry >> 8) & 0xFF);
|
|
// Clear scenery in-progress flags so ProcessScenery starts
|
|
// fresh (chunk5.s lines 1053-1062).
|
|
mem[0x08F3] = 0;
|
|
mem[0x090A] = 0;
|
|
mem[0x08A9] = 0;
|
|
mem[0x08C4] = 0;
|
|
mem[0x008A] = 0;
|
|
// Invalidate HEADER section cache so demand-loads fire.
|
|
mem[0x08EA] = mem[0x08EB] = mem[0x08EC] = mem[0x08ED] = 0;
|
|
fprintf(stderr, "scenery: reset cursor to LA7E0 = $%04X, DrawColorSpan at $%04X\n",
|
|
dispatcherEntry, drawColorSpanPC);
|
|
// EmitClippedLine isn't in the jump table; locate via byte
|
|
// pattern would be ideal, but counting DrawColorSpan suffices
|
|
// -- every line eventually goes through it.
|
|
|
|
reg_s = 0xFF;
|
|
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
|
|
flag_i = 1;
|
|
stop = 0;
|
|
long drawCount = 0;
|
|
long opCount = 0;
|
|
long emitOpCount = 0; // count cursor-driven scenery opcodes
|
|
|
|
// Address of SceneryInterpreterStep / SceneryDispatch in capture
|
|
// (boot patches relocate it). Find by searching for the byte
|
|
// pattern after `lda ($8B),y; bmi; cmp #$46; bmi`.
|
|
// The fetch is `B1 8B 30 ?? C9 46 30 ??` (lda($8B),y; bmi <off>; cmp #$46; bmi <off>).
|
|
uint16_t fetchPC = 0;
|
|
for (int i = 0x6000; i < 0xB400; i++) {
|
|
if (mem[i] == 0xB1 && mem[i+1] == 0x8B && mem[i+2] == 0x30
|
|
&& mem[i+4] == 0xC9 && mem[i+5] == 0x46 && mem[i+6] == 0x30) {
|
|
fetchPC = (uint16_t)i;
|
|
break;
|
|
}
|
|
}
|
|
fprintf(stderr, " scenery fetch at $%04X (lda ($8B),y)\n", fetchPC);
|
|
|
|
// Sequence in chunk5 MainLoop (chunk5.s line 5403+):
|
|
// SetupViewProjection ($6000)
|
|
// ShowSimpleCrashMessage / HandleCrashOrSplash (skipped here)
|
|
// FlipPagesFillViewportRelay ($6003) -- runs a SECOND scenery
|
|
// interpreter pass on the data at $8B/$8C derived from $77
|
|
// (= the boot pre-render that draws horizon-line water/sky
|
|
// features in violet/blue, leaving STALE bytes in the hires
|
|
// page that the main scenery pass doesn't overwrite).
|
|
// ProcessScenery ($6006)
|
|
const uint16_t calls[3] = { 0x6000, 0x6003, 0x6006 };
|
|
const char *callName[3] = { "SetupViewProjection", "FlipPagesFillViewport", "ProcessScenery" };
|
|
for (int callIdx = 0; callIdx < 3; callIdx++) {
|
|
reg_a = reg_x = reg_y = 0;
|
|
push16(0xFFFE);
|
|
pc = calls[callIdx];
|
|
fprintf(stderr, " -- %s ($%04X) --\n", callName[callIdx], pc);
|
|
long startOps = opCount;
|
|
long startDraws = drawCount;
|
|
long startFetches = emitOpCount;
|
|
uint16_t lastPC = 0;
|
|
long opsSinceFetch = 0;
|
|
long maxOpsSinceFetch = 0;
|
|
for (cycles = 0; cycles < 20000000 && !stop; ) {
|
|
if (pc == 0xFFFF || pc == 0x0000) {
|
|
fprintf(stderr, " EXIT pc=$%04X after %ld ops, ops-since-last-fetch=%ld\n",
|
|
pc, opCount - startOps, opsSinceFetch);
|
|
break;
|
|
}
|
|
if (pc == drawColorSpanPC) {
|
|
drawCount++;
|
|
}
|
|
if (fetchPC != 0 && pc == fetchPC) {
|
|
emitOpCount++;
|
|
uint16_t cur = (uint16_t)(mem[0x8B] | (mem[0x8C] << 8));
|
|
if (getenv("FS2TRACE_BASE") != NULL) {
|
|
// Print $4A..$52 BEFORE this op runs
|
|
// so we can pair "before $24" with
|
|
// "after $24" for diff.
|
|
fprintf(stderr,
|
|
" fetch #%ld cursor=$%04X opcode=$%02X base=[%02X%02X%02X,%02X%02X%02X,%02X%02X%02X] cam=[%02X%02X,%02X%02X,%02X%02X]\n",
|
|
emitOpCount - startFetches,
|
|
cur, mem[cur],
|
|
mem[0x4A], mem[0x4B], mem[0x4C],
|
|
mem[0x4D], mem[0x4E], mem[0x4F],
|
|
mem[0x50], mem[0x51], mem[0x52],
|
|
mem[0x67], mem[0x66],
|
|
mem[0x69], mem[0x68],
|
|
mem[0x6B], mem[0x6A]);
|
|
} else {
|
|
fprintf(stderr,
|
|
" fetch #%ld cursor=$%04X opcode=$%02X (S=$%02X)\n",
|
|
emitOpCount - startFetches,
|
|
cur, mem[cur], reg_s);
|
|
}
|
|
if (opsSinceFetch > maxOpsSinceFetch) {
|
|
maxOpsSinceFetch = opsSinceFetch;
|
|
}
|
|
opsSinceFetch = 0;
|
|
}
|
|
lastPC = pc;
|
|
opsSinceFetch++;
|
|
opCount++;
|
|
step();
|
|
}
|
|
fprintf(stderr,
|
|
" last pc=$%04X reg_s=$%02X stop=%d max_ops_between_fetches=%ld\n",
|
|
lastPC, reg_s, stop, maxOpsSinceFetch);
|
|
fprintf(stderr, " %ld ops, %ld scenery-fetches, %ld draws\n",
|
|
opCount - startOps, emitOpCount - startFetches,
|
|
drawCount - startDraws);
|
|
if (callIdx == 0) {
|
|
// After SetupViewProjection, dump the matrix so
|
|
// we know the projection cascade succeeded.
|
|
fprintf(stderr,
|
|
" matrix $78..$89: "
|
|
"[%d %d %d / %d %d %d / %d %d %d]\n",
|
|
(int16_t)(mem[0x78] | (mem[0x79]<<8)),
|
|
(int16_t)(mem[0x7A] | (mem[0x7B]<<8)),
|
|
(int16_t)(mem[0x7C] | (mem[0x7D]<<8)),
|
|
(int16_t)(mem[0x7E] | (mem[0x7F]<<8)),
|
|
(int16_t)(mem[0x80] | (mem[0x81]<<8)),
|
|
(int16_t)(mem[0x82] | (mem[0x83]<<8)),
|
|
(int16_t)(mem[0x84] | (mem[0x85]<<8)),
|
|
(int16_t)(mem[0x86] | (mem[0x87]<<8)),
|
|
(int16_t)(mem[0x88] | (mem[0x89]<<8)));
|
|
}
|
|
}
|
|
fprintf(stderr, "Total: %ld ops, %ld scenery-fetches, %ld DrawColorSpan calls\n",
|
|
opCount, emitOpCount, drawCount);
|
|
return 0;
|
|
}
|
|
|
|
|
|
// --draws: run the FULL chunk5 dispatcher (SetupViewProjection +
|
|
// ProcessScenery) against an unpatched chunk5 binary, watching the
|
|
// DrawColorLine entry trampoline at $6009 and printing every line
|
|
// drawn. This produces a bit-exact reference list of polygons
|
|
// chunk5 would emit given the supplied input state -- the answer
|
|
// to "what should port draw to match the original FS2 binary."
|
|
//
|
|
// Usage:
|
|
// fs2trace --draws [ram.bin]
|
|
// FS2TRACE_PORT_STATE=1 sets up port-equivalent ZP for Meigs boot
|
|
// (camera $5C/$5D=287, $64/$65=804, $6C/$6D=-109, etc.) and
|
|
// overrides the matrix to MAME's runtime values exactly.
|
|
//
|
|
// Output: one line per draw with screen X1/Y1/X2/Y2 and the V1/V2
|
|
// 3D coordinates from $CB..$D0 + $D4..$D9 at the moment of the call.
|
|
static int drawsMode(int argc, char **argv) {
|
|
const char *ramPath = (argc > 2)
|
|
? argv[2]
|
|
: "/home/scott/claude/flight/port/sceneryRam_FS2.1.bin";
|
|
FILE *rf = fopen(ramPath, "rb");
|
|
if (rf == NULL) { fprintf(stderr, "cannot open %s\n", ramPath); return 1; }
|
|
if (fread(mem, 1, MEM_SIZE, rf) != MEM_SIZE) {
|
|
fprintf(stderr, "RAM image short read\n");
|
|
fclose(rf);
|
|
return 1;
|
|
}
|
|
fclose(rf);
|
|
// The captured RAM is the MAME-patched chunk5 (Apply64KPatchTable
|
|
// installed at boot). Don't overlay the unpatched source --
|
|
// that would clobber the runtime patches AND replace the
|
|
// dispatcher bytecode at $A800+ (which only exists in the
|
|
// patched RAM; source has dispatcher data at $B000).
|
|
// The patched chunk5 has dispatcher entry at $A800 = LA7E0.
|
|
// chunk3 ($D300-$F3FF) holds the 64K callbacks (LookupADFStation
|
|
// etc.) the patched chunk5 jumps into; load that too so those
|
|
// calls don't hit unmapped memory. chunk2 ($F600-$FBFF) holds
|
|
// a few helper routines.
|
|
// chunk3 ($D300-$F3FF) holds the 64K callbacks the patched
|
|
// chunk5 jumps into for $05/$1D/$1E station records, ADF
|
|
// input, magneto state, etc. The boot RAM dump captures the
|
|
// ROM bank at $D300+ (= zeros) instead of the LC-RAM-resident
|
|
// chunk3, so we have to overlay the source binary; routine
|
|
// addresses in source aren't quite the same as MAME's runtime
|
|
// layout but the in-record advance values are unaffected.
|
|
FILE *cf = fopen("/home/scott/claude/flight/out/3_d300-f3ff", "rb");
|
|
if (cf != NULL) {
|
|
size_t n = fread(mem + 0xD300, 1, 0xF400 - 0xD300, cf);
|
|
fclose(cf);
|
|
fprintf(stderr, "loaded chunk3 (%zu bytes)\n", n);
|
|
}
|
|
cf = fopen("/home/scott/claude/flight/out/2_f600-fbff", "rb");
|
|
if (cf != NULL) { (void)fread(mem + 0xF600, 1, 0xFC00 - 0xF600, cf); fclose(cf); }
|
|
|
|
// Override the SceneryOpADFRecord / NAVRecord / COMRecord
|
|
// patches: source-binary chunk3 has these routines at
|
|
// different addresses than MAME's runtime, so the patched JMP
|
|
// targets in port_ram point at random source bytes. Restore
|
|
// the unpatched 48K behaviour (= just advance past the record)
|
|
// so the dispatcher doesn't crash entering chunk3. Station
|
|
// records don't draw anything anyway.
|
|
// SceneryOpADFRecord at $6021: source = "lda #$09; jmp $67FD"
|
|
// (= advance 9, continue). The patched JMP $DB3F would call
|
|
// chunk3 LookupADFStation but that maps to a different routine
|
|
// in source, so undo the patch.
|
|
// SceneryOpAdvanceAndContinue: in source chunk5 the entry
|
|
// trampoline at $6018 is `JMP SceneryOpAdvanceAndContinue`. The
|
|
// patched RAM preserves that trampoline, so we just read the
|
|
// target out of $6019/$601A.
|
|
uint16_t advanceAndContinue = 0;
|
|
if (mem[0x6018] == 0x4C) {
|
|
advanceAndContinue = (uint16_t)(mem[0x6019] | (mem[0x601A] << 8));
|
|
}
|
|
fprintf(stderr, " SceneryOpAdvanceAndContinue at $%04X\n", advanceAndContinue);
|
|
if (mem[0x6021] == 0x4C && advanceAndContinue != 0) { // patched JMP -> chunk3
|
|
mem[0x6021] = 0xA9; // lda
|
|
mem[0x6022] = 0x09; // #$09
|
|
mem[0x6023] = 0x4C; // jmp
|
|
mem[0x6024] = (uint8_t)( advanceAndContinue & 0xFF);
|
|
mem[0x6025] = (uint8_t)((advanceAndContinue >> 8) & 0xFF);
|
|
fprintf(stderr, " unpatched SceneryOpADFRecord ($6021) -> $%04X\n", advanceAndContinue);
|
|
}
|
|
// Same for NAVRecord and COMRecord -- locate by SceneryOpcodeTable
|
|
// entries for $1D and $1E.
|
|
// Find SceneryOpcodeTable: `cmp #$46; bmi <target>` then the
|
|
// target is SceneryDispatch which loads from the table. Easier
|
|
// to search the table itself: the first entry should point
|
|
// at SceneryOpEmitV1XformAndPlot; the $1D entry is at
|
|
// table+$1D*2 and points at SceneryOpNAVRecord. We can find
|
|
// the table by looking for a known entry sequence.
|
|
// For now, scan the dispatcher area for any byte sequence that
|
|
// looks like `JMP <chunk3>` (= $4C $xx $D[XYZ]) and patch
|
|
// back to advance-and-continue with appropriate length.
|
|
// chunk5.s says: $1D = NAVRecord (11-byte), $1E = COMRecord
|
|
// (variable-length).
|
|
// SceneryOpNAVRecord at... actually NAVRecord might or might
|
|
// not be patched the same way. Be defensive: scan the
|
|
// SceneryOpcodeTable for the $1D handler address and check
|
|
// for a JMP-pattern there.
|
|
|
|
// DrawColorLine entry: in the patched binary, $6009 = JMP (relayed),
|
|
// and $601B = JMP DrawColorSpan. DrawColorLine itself is the
|
|
// function called from EmitClippedLine. Find its entry by
|
|
// scanning for the byte signature `lda $E9; sec; sbc $EB`
|
|
// (= chunk5.s line 3556).
|
|
uint16_t drawColorLinePC = 0;
|
|
for (int i = 0x6000; i < 0xB400; i++) {
|
|
if (mem[i] == 0xA5 && mem[i+1] == 0xE9
|
|
&& mem[i+2] == 0x38
|
|
&& mem[i+3] == 0xE5 && mem[i+4] == 0xEB) {
|
|
drawColorLinePC = (uint16_t)i;
|
|
break;
|
|
}
|
|
}
|
|
if (drawColorLinePC == 0) {
|
|
fprintf(stderr, "could not locate DrawColorLine\n");
|
|
return 1;
|
|
}
|
|
fprintf(stderr, "DrawColorLine entry: $%04X\n", drawColorLinePC);
|
|
|
|
// DrawColorSpan entry: chunk5 trampoline at $601B is `JMP DrawColorSpan`.
|
|
uint16_t drawColorSpanPC = 0;
|
|
if (mem[0x601B] == 0x4C) {
|
|
drawColorSpanPC = (uint16_t)(mem[0x601C] | (mem[0x601D] << 8));
|
|
}
|
|
fprintf(stderr, "DrawColorSpan entry: $%04X\n", drawColorSpanPC);
|
|
|
|
// SetEvenAndOddColorsAndPrepRowRoutine: scan for the byte signature
|
|
// `lda ColorTableEven,x; sta ColorByteEven` (= chunk5.s lines
|
|
// 3902-3903 = `BD ?? ?? 8D ?? ??`). The ColorTableEven address is
|
|
// at $7A00ish in MAME RAM, and ColorByteEven is some self-modified
|
|
// operand. Find the routine by looking for: BD ?? ?? 8D ?? ?? BD ?? ??.
|
|
// For now we just track its calls via the trampoline at $6024 if
|
|
// it's installed (= the older patched binary did this; check first).
|
|
uint16_t setEvenAndOddPC = 0;
|
|
// Try via the SceneryOpSetColor handler at the JMP that calls
|
|
// it (or via SetPixelDrawMode which we already have).
|
|
// Easier: scan for the ColorTableEven access pattern.
|
|
for (int i = 0x7000; i < 0xB400; i++) {
|
|
if (mem[i] == 0xBD // LDA abs,x
|
|
&& mem[i+3] == 0x8D // STA abs
|
|
&& mem[i+6] == 0xBD // LDA abs,x
|
|
&& mem[i+9] == 0x8D // STA abs
|
|
&& mem[i+12] == 0x8A) { // TXA
|
|
setEvenAndOddPC = (uint16_t)i;
|
|
break;
|
|
}
|
|
}
|
|
fprintf(stderr, "SetEvenAndOddColorsAndPrepRowRoutine entry: $%04X\n", setEvenAndOddPC);
|
|
|
|
// Set up port-equivalent ZP state. The values mirror what
|
|
// port's runScreenshot + sceneryAttachCamera produce.
|
|
if (getenv("FS2TRACE_PORT_STATE") != NULL) {
|
|
// Camera position in scenery units ($5C=287, $64=804).
|
|
mem[0x5C] = 0x1F; mem[0x5D] = 0x01;
|
|
mem[0x64] = 0x24; mem[0x65] = 0x03;
|
|
// Altitude pair ($5E/$5F = 768 from boot; $60/$61 = 0).
|
|
mem[0x5E] = 0x00; mem[0x5F] = 0x03;
|
|
mem[0x60] = 0x00; mem[0x61] = 0x00;
|
|
// Rotation inputs ($6C/$6D = -109 yaw/X-axis;
|
|
// $6E/$6F = 0 pitch/Z-axis; $70/$71 = 0 bank/Y-axis).
|
|
mem[0x6C] = 0x93; mem[0x6D] = 0xFF;
|
|
mem[0x6E] = 0x00; mem[0x6F] = 0x00;
|
|
mem[0x70] = 0x00; mem[0x71] = 0x00;
|
|
mem[0x0A70] = 0x00; // ViewDirection
|
|
// Camera-section deltas. Port's sceneryAttachCamera
|
|
// sets $66/$67=0, $68/$69 = wyUnits (= cam.worldY=25
|
|
// metres for Meigs boot, no scaling), $6A/$6B=0. Reset
|
|
// to match.
|
|
mem[0x66] = 0x00; mem[0x67] = 0x00;
|
|
mem[0x68] = 0x19; mem[0x69] = 0x00; // 25
|
|
mem[0x6A] = 0x00; mem[0x6B] = 0x00;
|
|
fprintf(stderr, "FS2TRACE_PORT_STATE: ZP set for Meigs boot\n");
|
|
}
|
|
|
|
// Run SetupViewProjection ($6000) to compute the matrix at
|
|
// $78..$89 from the just-set $6C/$6E/$70 inputs.
|
|
reg_a = reg_x = reg_y = 0;
|
|
reg_s = 0xFF;
|
|
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
|
|
flag_i = 1;
|
|
stop = 0;
|
|
pc = 0x6000;
|
|
push16(0xFFFE);
|
|
for (cycles = 0; cycles < 5000000 && !stop; ) {
|
|
if (pc == 0xFFFF || pc == 0x0000) break;
|
|
step();
|
|
}
|
|
fprintf(stderr,
|
|
" matrix: row0=(%d,%d,%d) row1=(%d,%d,%d) row2=(%d,%d,%d)\n",
|
|
(int16_t)(mem[0x78] | (mem[0x79]<<8)),
|
|
(int16_t)(mem[0x7A] | (mem[0x7B]<<8)),
|
|
(int16_t)(mem[0x7C] | (mem[0x7D]<<8)),
|
|
(int16_t)(mem[0x7E] | (mem[0x7F]<<8)),
|
|
(int16_t)(mem[0x80] | (mem[0x81]<<8)),
|
|
(int16_t)(mem[0x82] | (mem[0x83]<<8)),
|
|
(int16_t)(mem[0x84] | (mem[0x85]<<8)),
|
|
(int16_t)(mem[0x86] | (mem[0x87]<<8)),
|
|
(int16_t)(mem[0x88] | (mem[0x89]<<8)));
|
|
|
|
// Reset cursor to LA7E0 = $A800 (= clean dispatch start).
|
|
uint16_t dispatcherEntry = (uint16_t)(mem[0xA7E0] | (mem[0xA7E1] << 8));
|
|
mem[0x8B] = (uint8_t)( dispatcherEntry & 0xFF);
|
|
mem[0x8C] = (uint8_t)((dispatcherEntry >> 8) & 0xFF);
|
|
// Clear in-progress flags so ProcessScenery starts fresh.
|
|
mem[0x08F3] = 0;
|
|
mem[0x090A] = 0;
|
|
mem[0x08A9] = 0;
|
|
mem[0x08C4] = 0;
|
|
mem[0x008A] = 0;
|
|
// Invalidate HEADER section cache so demand-loads fire (= match
|
|
// port's sceneryCacheInvalidated path).
|
|
mem[0x08EA] = mem[0x08EB] = mem[0x08EC] = mem[0x08ED] = 0;
|
|
// Reset $35/$36 (L631D base cache) so first $07/$24 forces
|
|
// a full base recompute.
|
|
mem[0x35] = 0;
|
|
mem[0x36] = 0;
|
|
fprintf(stderr, " cursor LA7E0 = $%04X\n", dispatcherEntry);
|
|
|
|
// Find dispatcher fetch instruction (= the LDA ($8B),Y in
|
|
// SceneryInterpreterStep). Same heuristic as sceneryMode.
|
|
uint16_t fetchPC = 0;
|
|
for (int i = 0x6000; i < 0xB400; i++) {
|
|
if (mem[i] == 0xB1 && mem[i+1] == 0x8B && mem[i+2] == 0x30
|
|
&& mem[i+4] == 0xC9 && mem[i+5] == 0x46 && mem[i+6] == 0x30) {
|
|
fetchPC = (uint16_t)i;
|
|
break;
|
|
}
|
|
}
|
|
fprintf(stderr, " scenery fetch instruction at $%04X\n", fetchPC);
|
|
|
|
// Run the chunk5 main-loop sequence: SetupViewProjection ($6000),
|
|
// FlipPagesFillViewport ($6003), ProcessScenery ($6006). The
|
|
// FlipPages pass is a SECOND scenery interpreter run on $0A78
|
|
// data (= boot pre-render), which can draw water/horizon polygons
|
|
// in HIRES_VIOLET that ProcessScenery never touches.
|
|
reg_s = 0xFF;
|
|
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
|
|
flag_i = 1;
|
|
long drawCount = 0;
|
|
long opCount = 0;
|
|
long fetchCount = 0;
|
|
int prevPCWasNotDraw = 1;
|
|
printf("# fs2trace --draws: chunk5 DrawColorLine sequence (3-pass)\n");
|
|
const char *trace = getenv("FS2TRACE_DRAWS_TRACE");
|
|
const uint16_t passes[3] = { 0x6000, 0x6003, 0x6006 };
|
|
const char *passName[3] = { "SetupViewProjection", "FlipPagesFillViewport", "ProcessScenery" };
|
|
for (int passIdx = 0; passIdx < 3; passIdx++) {
|
|
stop = 0;
|
|
reg_a = reg_x = reg_y = 0;
|
|
pc = passes[passIdx];
|
|
push16(0xFFFE);
|
|
fprintf(stderr, " ===== %s ($%04X) =====\n", passName[passIdx], pc);
|
|
long passStartOps = opCount;
|
|
long passStartDraws = drawCount;
|
|
uint16_t lastPC = 0;
|
|
for (cycles = 0; cycles < 1000000000 && !stop; ) {
|
|
if (pc == 0xFFFF || pc == 0x0000) {
|
|
fprintf(stderr, " hit sentinel pc=$%04X (last pc=$%04X) at op %ld\n",
|
|
pc, lastPC, opCount);
|
|
break;
|
|
}
|
|
if (fetchPC != 0 && pc == fetchPC) {
|
|
fetchCount++;
|
|
if (trace != NULL) {
|
|
uint16_t cur = mem[0x8B] | (mem[0x8C] << 8);
|
|
fprintf(stderr,
|
|
" fetch #%ld cur=$%04X op=$%02X $29=$%02X\n",
|
|
fetchCount, cur, mem[cur], mem[0x29]);
|
|
}
|
|
// FS2TRACE_VTX_DUMP: log V1/V2 (= mem[$CB..$D0] /
|
|
// mem[$D4..$D9]) on every fetch after a $40/$41
|
|
// emit, so we can diff our port's transform output
|
|
// against the authentic FS2 trace per-vertex.
|
|
// FS2TRACE_FRAME_DUMP: log state at every $24/$07
|
|
// op fetch + after it executes, so port-vs-MAME
|
|
// frame-setup divergence can be located precisely.
|
|
if (getenv("FS2TRACE_FRAME_DUMP") != NULL) {
|
|
static uint8_t prevFrameOp = 0;
|
|
static uint16_t prevFrameCur = 0;
|
|
if (prevFrameOp == 0x24 || prevFrameOp == 0x07) {
|
|
fprintf(stderr,
|
|
" POST-$%02X(var=$%02X) cam=(%d,%d,%d) base=[%02X%02X%02X,%02X%02X%02X,%02X%02X%02X] $19=$%02X $1C=$%02X $1F=$%02X $35=$%02X $36=$%02X (was cur=$%04X)\n",
|
|
prevFrameOp,
|
|
mem[prevFrameCur + 1],
|
|
(int16_t)(mem[0x66] | (mem[0x67] << 8)),
|
|
(int16_t)(mem[0x68] | (mem[0x69] << 8)),
|
|
(int16_t)(mem[0x6A] | (mem[0x6B] << 8)),
|
|
mem[0x4A], mem[0x4B], mem[0x4C],
|
|
mem[0x4D], mem[0x4E], mem[0x4F],
|
|
mem[0x50], mem[0x51], mem[0x52],
|
|
mem[0x19], mem[0x1C], mem[0x1F],
|
|
mem[0x35], mem[0x36],
|
|
prevFrameCur);
|
|
}
|
|
uint16_t cur = mem[0x8B] | (mem[0x8C] << 8);
|
|
if (mem[cur] == 0x24 || mem[cur] == 0x07) {
|
|
prevFrameOp = mem[cur];
|
|
prevFrameCur = cur;
|
|
} else {
|
|
prevFrameOp = 0;
|
|
}
|
|
}
|
|
if (getenv("FS2TRACE_VTX_DUMP") != NULL) {
|
|
static uint8_t prevOp = 0;
|
|
if (prevOp == 0x40 || prevOp == 0x41 || prevOp == 0x42) {
|
|
int16_t v1x = (int16_t)(mem[0xCB] | (mem[0xCC] << 8));
|
|
int16_t v1y = (int16_t)(mem[0xCD] | (mem[0xCE] << 8));
|
|
int16_t v1z = (int16_t)(mem[0xCF] | (mem[0xD0] << 8));
|
|
int16_t v2x = (int16_t)(mem[0xD4] | (mem[0xD5] << 8));
|
|
int16_t v2y = (int16_t)(mem[0xD6] | (mem[0xD7] << 8));
|
|
int16_t v2z = (int16_t)(mem[0xD8] | (mem[0xD9] << 8));
|
|
fprintf(stderr,
|
|
" POST-$%02X V1=(%6d,%6d,%6d) V2=(%6d,%6d,%6d) cam=(%d,%d,%d) base=[%02X%02X%02X,%02X%02X%02X,%02X%02X%02X]\n",
|
|
prevOp, v1x, v1y, v1z, v2x, v2y, v2z,
|
|
(int16_t)(mem[0x66] | (mem[0x67] << 8)),
|
|
(int16_t)(mem[0x68] | (mem[0x69] << 8)),
|
|
(int16_t)(mem[0x6A] | (mem[0x6B] << 8)),
|
|
mem[0x4A], mem[0x4B], mem[0x4C],
|
|
mem[0x4D], mem[0x4E], mem[0x4F],
|
|
mem[0x50], mem[0x51], mem[0x52]);
|
|
}
|
|
uint16_t cur = mem[0x8B] | (mem[0x8C] << 8);
|
|
prevOp = mem[cur];
|
|
}
|
|
}
|
|
// Trace flow inside chunk3 (= 64K patched callbacks).
|
|
if (trace != NULL && pc >= 0xD300 && pc < 0xF400 && opCount > 2400) {
|
|
fprintf(stderr, " chunk3 pc=$%04X A=$%02X X=$%02X Y=$%02X SP=$%02X\n",
|
|
pc, reg_a, reg_x, reg_y, reg_s);
|
|
}
|
|
// Detect tight 6502 loops — same PC for many consecutive
|
|
// ops. If we sit at a single PC for >50k ops, log + bail.
|
|
static uint16_t stuckPC = 0;
|
|
static long stuckCount = 0;
|
|
if (pc == stuckPC) {
|
|
stuckCount++;
|
|
if (stuckCount == 50000) {
|
|
fprintf(stderr,
|
|
" STUCK at pc=$%04X for 50k ops at op=%ld (cur=$%04X)\n",
|
|
pc, opCount, mem[0x8B] | (mem[0x8C] << 8));
|
|
break;
|
|
}
|
|
} else {
|
|
stuckPC = pc;
|
|
stuckCount = 1;
|
|
}
|
|
lastPC = pc;
|
|
// Track active hires color (= last A passed to SetPixelDrawMode).
|
|
// SetPixelDrawMode is at the JMP target stored at $6010-$6011
|
|
// (= chunk5 trampoline `JMP SetPixelDrawMode` at $600F).
|
|
static uint16_t setPixelDrawModePC = 0xFFFF;
|
|
static uint8_t curHiresColor = 0xFF;
|
|
if (setPixelDrawModePC == 0xFFFF) {
|
|
// Resolve once: chunk5 has `4C lo hi` at $600F.
|
|
if (mem[0x600F] == 0x4C) {
|
|
setPixelDrawModePC = (uint16_t)(mem[0x6010] | (mem[0x6011] << 8));
|
|
fprintf(stderr, " SetPixelDrawMode resolved at $%04X\n", setPixelDrawModePC);
|
|
}
|
|
}
|
|
if (setPixelDrawModePC != 0xFFFF && pc == setPixelDrawModePC) {
|
|
curHiresColor = reg_a;
|
|
if (getenv("FS2TRACE_LOG_COLORS") != NULL) {
|
|
fprintf(stderr, " SetPixelDrawMode A=$%02X (hires color %d) at op=%ld\n",
|
|
reg_a, reg_a & 0x07, opCount);
|
|
}
|
|
}
|
|
// Track SetEvenAndOddColorsAndPrepRowRoutine -- the SPAN
|
|
// fill color setter. On entry X = hires color code.
|
|
if (setEvenAndOddPC != 0 && pc == setEvenAndOddPC) {
|
|
curHiresColor = reg_x;
|
|
if (getenv("FS2TRACE_LOG_COLORS") != NULL) {
|
|
fprintf(stderr, " SetEvenAndOddColors X=$%02X (hires color %d) at op=%ld\n",
|
|
reg_x, reg_x & 0x07, opCount);
|
|
}
|
|
}
|
|
// Track DrawColorSpan calls (= horizontal span fill) so we can
|
|
// see polygon FILLS in addition to line draws. ZP $E9 = start col,
|
|
// A on entry = end col, $27 = right edge.
|
|
static int prevPCWasNotSpan = 1;
|
|
if (drawColorSpanPC != 0 && pc == drawColorSpanPC && prevPCWasNotSpan) {
|
|
uint8_t startCol = mem[0xE9];
|
|
uint8_t endCol = reg_a;
|
|
uint8_t row = mem[0xB1];
|
|
printf("span %4ld col=$%02X: row=%3d startCol=%3d endCol=%3d\n",
|
|
drawCount, curHiresColor, row, startCol, endCol);
|
|
prevPCWasNotSpan = 0;
|
|
} else if (pc != drawColorSpanPC) {
|
|
prevPCWasNotSpan = 1;
|
|
}
|
|
if (pc == drawColorLinePC && prevPCWasNotDraw) {
|
|
// First instruction of DrawColorLine. Capture
|
|
// the line endpoints + V1/V2.
|
|
uint8_t x1 = mem[0xE9];
|
|
uint8_t y1 = mem[0xEA];
|
|
uint8_t x2 = mem[0xEB];
|
|
uint8_t y2 = mem[0xEC];
|
|
int16_t v1x = (int16_t)(mem[0xCB] | (mem[0xCC] << 8));
|
|
int16_t v1y = (int16_t)(mem[0xCD] | (mem[0xCE] << 8));
|
|
int16_t v1z = (int16_t)(mem[0xCF] | (mem[0xD0] << 8));
|
|
int16_t v2x = (int16_t)(mem[0xD4] | (mem[0xD5] << 8));
|
|
int16_t v2y = (int16_t)(mem[0xD6] | (mem[0xD7] << 8));
|
|
int16_t v2z = (int16_t)(mem[0xD8] | (mem[0xD9] << 8));
|
|
drawCount++;
|
|
uint16_t curAt = mem[0x8B] | (mem[0x8C] << 8);
|
|
printf("draw %4ld cur=$%04X col=$%02X: (%3d,%3d)-(%3d,%3d) V1=(%6d,%6d,%6d) V2=(%6d,%6d,%6d) base=[%02X%02X%02X,%02X%02X%02X,%02X%02X%02X] cam=[%02X%02X,%02X%02X,%02X%02X]\n",
|
|
drawCount, curAt, curHiresColor, x1, y1, x2, y2,
|
|
v1x, v1y, v1z, v2x, v2y, v2z,
|
|
mem[0x4A], mem[0x4B], mem[0x4C],
|
|
mem[0x4D], mem[0x4E], mem[0x4F],
|
|
mem[0x50], mem[0x51], mem[0x52],
|
|
mem[0x67], mem[0x66],
|
|
mem[0x69], mem[0x68],
|
|
mem[0x6B], mem[0x6A]);
|
|
prevPCWasNotDraw = 0;
|
|
} else if (pc != drawColorLinePC) {
|
|
prevPCWasNotDraw = 1;
|
|
}
|
|
opCount++;
|
|
uint16_t prevPC = pc;
|
|
step();
|
|
if (stop) {
|
|
fprintf(stderr, " step() set stop=1 prevPC=$%04X newPC=$%04X opCount=%ld\n",
|
|
prevPC, pc, opCount);
|
|
break;
|
|
}
|
|
}
|
|
fprintf(stderr, " pass-%s: %ld ops, %ld draws\n",
|
|
passName[passIdx], opCount - passStartOps, drawCount - passStartDraws);
|
|
}
|
|
fprintf(stderr, "Total: %ld 6502 ops, %ld dispatch fetches, %ld DrawColorLine calls (final pc=$%04X cur=$%04X)\n",
|
|
opCount, fetchCount, drawCount, pc, mem[0x8B] | (mem[0x8C] << 8));
|
|
return 0;
|
|
}
|
|
|
|
|
|
// --xform: run chunk5 TransformVertex7EBC ($7EBC) on the original
|
|
// binary (= source-faithful, not the captured patched chunk5). Lets
|
|
// us validate the C transliteration in chunk5Transform.c against the
|
|
// asm for arbitrary inputs.
|
|
//
|
|
// Usage:
|
|
// fs2trace --xform <vx_lo> <vx_hi> <vy_lo> <vy_hi> [state_overrides...]
|
|
//
|
|
// Inputs encode the 4 vertex bytes that follow the opcode in the
|
|
// scenery stream. The routine reads them via ($8B),y and computes
|
|
// transformed XYZ at $D4..$D9 (or $CB..$D0).
|
|
static int xformMode(int argc, char **argv) {
|
|
if (argc < 3) {
|
|
fprintf(stderr,
|
|
"usage: %s --xform <stream_addr_hex> [ram.bin]\n",
|
|
argv[0]);
|
|
fprintf(stderr,
|
|
" Loads RAM image (default tmp/capture_boot.bin) for state\n"
|
|
" (matrix/base/camera), then overlays the ORIGINAL chunk5\n"
|
|
" binary at $6000-$B3DF (so $7EBC has source bytes).\n"
|
|
" Sets cursor $8B/$8C to stream_addr-1 (so opcode is at\n"
|
|
" ($8B),0 -- the typical layout when the dispatcher would\n"
|
|
" invoke a vertex-emit handler at that opcode), and calls\n"
|
|
" TransformVertex7EBC with Y=9 (V2 destination).\n"
|
|
" Prints V2 (=$D4..$D9).\n");
|
|
return 2;
|
|
}
|
|
long streamAddr = strtol(argv[2], NULL, 0);
|
|
const char *ramPath = (argc > 3)
|
|
? argv[3]
|
|
: "/home/scott/claude/flight/tmp/capture_boot.bin";
|
|
|
|
FILE *rf = fopen(ramPath, "rb");
|
|
if (rf == NULL) { fprintf(stderr, "cannot open %s\n", ramPath); return 1; }
|
|
if (fread(mem, 1, MEM_SIZE, rf) != MEM_SIZE) {
|
|
fprintf(stderr, "RAM image short read\n");
|
|
fclose(rf);
|
|
return 1;
|
|
}
|
|
fclose(rf);
|
|
// Overlay ONLY the chunk5 code regions that don't overlap
|
|
// loaded scenery sections. At boot Meigs, the loaded section
|
|
// sits at ~$B280+, so we overlay $6000-$B27F (= source-faithful
|
|
// chunk5 code) and keep $B280+ as the captured section data.
|
|
// Also overlay all of chunk4 ($0200-$25FF) which holds the
|
|
// ZPScale / cos table -- the captured RAM has it shifted
|
|
// and we need the source addresses.
|
|
FILE *cf = fopen("/home/scott/claude/flight/out/4_0200-25ff", "rb");
|
|
if (cf != NULL) {
|
|
fread(mem + 0x0200, 1, 0x2400, cf);
|
|
fclose(cf);
|
|
}
|
|
cf = fopen("/home/scott/claude/flight/out/5_6000-b3df", "rb");
|
|
if (cf != NULL) {
|
|
fread(mem + 0x6000, 1, 0xB280 - 0x6000, cf);
|
|
fclose(cf);
|
|
}
|
|
|
|
// Cursor: opcode at streamAddr, vertex bytes at streamAddr+1.
|
|
// $8B/$8C = streamAddr (= the opcode address; ($8B),Y=0 reads
|
|
// the opcode, Y=1.. reads vertex bytes -- chunk5's normal layout).
|
|
mem[0x008B] = (uint8_t)( streamAddr & 0xFF);
|
|
mem[0x008C] = (uint8_t)((streamAddr >> 8) & 0xFF);
|
|
fprintf(stderr,
|
|
" state: $8B/$8C=$%04X matrix=[%d,%d,%d/%d,%d,%d/%d,%d,%d]\n",
|
|
(int)streamAddr,
|
|
(int)(int16_t)(mem[0x78] | (mem[0x79] << 8)),
|
|
(int)(int16_t)(mem[0x7A] | (mem[0x7B] << 8)),
|
|
(int)(int16_t)(mem[0x7C] | (mem[0x7D] << 8)),
|
|
(int)(int16_t)(mem[0x7E] | (mem[0x7F] << 8)),
|
|
(int)(int16_t)(mem[0x80] | (mem[0x81] << 8)),
|
|
(int)(int16_t)(mem[0x82] | (mem[0x83] << 8)),
|
|
(int)(int16_t)(mem[0x84] | (mem[0x85] << 8)),
|
|
(int)(int16_t)(mem[0x86] | (mem[0x87] << 8)),
|
|
(int)(int16_t)(mem[0x88] | (mem[0x89] << 8)));
|
|
fprintf(stderr,
|
|
" base: ($4A..$4C)=%02X%02X%02X ($4D..$4F)=%02X%02X%02X ($50..$52)=%02X%02X%02X\n",
|
|
mem[0x4A], mem[0x4B], mem[0x4C],
|
|
mem[0x4D], mem[0x4E], mem[0x4F],
|
|
mem[0x50], mem[0x51], mem[0x52]);
|
|
fprintf(stderr,
|
|
" cam ($66..$6B): %02X %02X %02X %02X %02X %02X\n",
|
|
mem[0x66], mem[0x67], mem[0x68], mem[0x69], mem[0x6A], mem[0x6B]);
|
|
fprintf(stderr,
|
|
" vertex bytes at $%04X: %02X %02X %02X %02X %02X\n",
|
|
(int)streamAddr,
|
|
mem[streamAddr+0], mem[streamAddr+1], mem[streamAddr+2],
|
|
mem[streamAddr+3], mem[streamAddr+4]);
|
|
|
|
// The asm's TransformVertex7EBC reads destSlot from $E5 (Y
|
|
// on entry). ProcessVertex2 sets Y = $D4 before JSR
|
|
// (= absolute address of V2 slot), and the routine stores
|
|
// output via `sta $00,x` with X loaded from $E5. So Y on
|
|
// entry = $D4 (V2) or $CB (V1).
|
|
reg_y = 0xD4;
|
|
reg_a = reg_x = 0;
|
|
reg_s = 0xFF;
|
|
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
|
|
flag_i = 1;
|
|
stop = 0;
|
|
pc = 0x7EBC;
|
|
push16(0xFFFE);
|
|
const char *traceXform = getenv("FS2TRACE_XFORM_TRACE");
|
|
for (cycles = 0; cycles < 5000000 && !stop; ) {
|
|
if (pc == 0xFFFF || pc == 0x0000) {
|
|
break;
|
|
}
|
|
if (traceXform != NULL && pc >= 0x7EBC && pc <= 0x80AF) {
|
|
// Inside TransformVertex7EBC: print state.
|
|
fprintf(stderr,
|
|
"pc=$%04X A=$%02X X=$%02X Y=$%02X NVZC=%d%d%d%d "
|
|
"$18-$1A=%02X%02X%02X $1B-$1D=%02X%02X%02X $1E-$20=%02X%02X%02X "
|
|
"$9E$9F=%02X%02X $A2$A3=%02X%02X $2F=%02X\n",
|
|
pc, reg_a, reg_x, reg_y,
|
|
flag_n, flag_v, flag_z, flag_c,
|
|
mem[0x18], mem[0x19], mem[0x1A],
|
|
mem[0x1B], mem[0x1C], mem[0x1D],
|
|
mem[0x1E], mem[0x1F], mem[0x20],
|
|
mem[0x9E], mem[0x9F], mem[0xA2], mem[0xA3],
|
|
mem[0x2F]);
|
|
}
|
|
step();
|
|
}
|
|
|
|
int16_t v2x = (int16_t)((uint16_t)mem[0xD4] | ((uint16_t)mem[0xD5] << 8));
|
|
int16_t v2y = (int16_t)((uint16_t)mem[0xD6] | ((uint16_t)mem[0xD7] << 8));
|
|
int16_t v2z = (int16_t)((uint16_t)mem[0xD8] | ((uint16_t)mem[0xD9] << 8));
|
|
printf("xform stream@$%04X -> V2=(%d,%d,%d)\n",
|
|
(int)streamAddr, (int)v2x, (int)v2y, (int)v2z);
|
|
return 0;
|
|
}
|
|
|
|
|
|
// --zpscale a b: runs ScaleC2ByC4 at $1569 in chunk4 with the
|
|
// supplied 16-bit signed inputs in $C2/$C3 and $C4/$C5; prints the
|
|
// result. Loads chunk4 freshly from out/4_0200-25ff so the address
|
|
// matches the binary (vs the captured RAM which is patched).
|
|
static int zpscaleMode(int argc, char **argv) {
|
|
if (argc < 4) {
|
|
fprintf(stderr, "usage: %s --zpscale <a16> <b16>\n", argv[0]);
|
|
return 2;
|
|
}
|
|
long a = strtol(argv[2], NULL, 0);
|
|
long b = strtol(argv[3], NULL, 0);
|
|
loadOriginalChunks();
|
|
|
|
uint16_t ua = (uint16_t)((a < 0) ? (a + 0x10000) : a);
|
|
uint16_t ub = (uint16_t)((b < 0) ? (b + 0x10000) : b);
|
|
mem[0xC2] = (uint8_t)( ua & 0xFF);
|
|
mem[0xC3] = (uint8_t)((ua >> 8) & 0xFF);
|
|
mem[0xC4] = (uint8_t)( ub & 0xFF);
|
|
mem[0xC5] = (uint8_t)((ub >> 8) & 0xFF);
|
|
|
|
// ScaleC2ByC4 lives at $1565 (chunk4.s line 1565); ZPScale's
|
|
// wrapper at $1544 handles the $C2/$C4 marshalling for an
|
|
// arbitrary output address. We've already populated $C2..$C5
|
|
// directly so we call ScaleC2ByC4 ($1565) and pull the
|
|
// result out of A:X via the trampoline below.
|
|
reg_a = reg_x = reg_y = 0;
|
|
reg_s = 0xFF;
|
|
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
|
|
flag_i = 1;
|
|
stop = 0;
|
|
pc = 0x1569;
|
|
push16(0xFFFE);
|
|
|
|
const char *traceEnv = getenv("FS2TRACE_ZPSCALE_TRACE");
|
|
for (cycles = 0; cycles < 1000000 && !stop; ) {
|
|
if (pc == 0xFFFF || pc == 0x0000) {
|
|
break;
|
|
}
|
|
if (traceEnv != NULL) {
|
|
fprintf(stderr,
|
|
"pc=$%04X A=$%02X X=$%02X Y=$%02X C=%d "
|
|
"C2=$%02X C3=$%02X C4=$%02X C5=$%02X "
|
|
"A7=$%02X A8=$%02X\n",
|
|
pc, reg_a, reg_x, reg_y, flag_c,
|
|
mem[0xC2], mem[0xC3], mem[0xC4], mem[0xC5],
|
|
mem[0xA7], mem[0xA8]);
|
|
}
|
|
step();
|
|
}
|
|
// Result: A is low byte, X is high byte.
|
|
int16_t result = (int16_t)((uint16_t)reg_a | ((uint16_t)reg_x << 8));
|
|
printf("ScaleC2ByC4(%ld, %ld) = %d\n", a, b, result);
|
|
return 0;
|
|
}
|
|
|
|
|
|
// --l177b A X [ramfile]: probe chunk4 L177B (cos lookup with sub-byte
|
|
// interpolation) in isolation. Returns the int16 result (A:X reg).
|
|
static int l177bMode(int argc, char **argv) {
|
|
if (argc < 4) {
|
|
fprintf(stderr, "usage: %s --l177b <a_byte> <x_byte> [ram.bin]\n", argv[0]);
|
|
return 2;
|
|
}
|
|
long aIn = strtol(argv[2], NULL, 0);
|
|
long xIn = strtol(argv[3], NULL, 0);
|
|
if (argc > 4) {
|
|
FILE *rf = fopen(argv[4], "rb");
|
|
if (rf == NULL) { fprintf(stderr, "cannot open %s\n", argv[4]); return 1; }
|
|
if (fread(mem, 1, MEM_SIZE, rf) != MEM_SIZE) {
|
|
fprintf(stderr, "RAM image short read\n");
|
|
fclose(rf);
|
|
return 1;
|
|
}
|
|
fclose(rf);
|
|
} else {
|
|
loadOriginalChunks();
|
|
}
|
|
reg_a = (uint8_t)(aIn & 0xFF);
|
|
reg_x = (uint8_t)(xIn & 0xFF);
|
|
reg_y = 0;
|
|
reg_s = 0xFF;
|
|
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
|
|
flag_i = 1;
|
|
stop = 0;
|
|
// L177B at $177B; L1778 (sin entry) is 3 bytes earlier.
|
|
// When running against a capture, FS2's boot patches shifted
|
|
// chunk4 code by 2 bytes, so the entry points are at $177D /
|
|
// $177A respectively. FS2TRACE_PC overrides explicitly.
|
|
const char *l1778Env = getenv("FS2TRACE_USE_L1778");
|
|
const char *pcEnv = getenv("FS2TRACE_PC");
|
|
if (pcEnv != NULL) {
|
|
pc = (uint16_t)strtol(pcEnv, NULL, 0);
|
|
} else {
|
|
pc = (l1778Env != NULL) ? 0x1778 : 0x177B;
|
|
}
|
|
push16(0xFFFE);
|
|
const char *traceEnv2 = getenv("FS2TRACE_L177B_TRACE");
|
|
for (cycles = 0; cycles < 1000000 && !stop; ) {
|
|
if (pc == 0xFFFF || pc == 0x0000) {
|
|
break;
|
|
}
|
|
if (traceEnv2 != NULL) {
|
|
fprintf(stderr,
|
|
"pc=$%04X A=$%02X X=$%02X Y=$%02X N=%d Z=%d C=%d\n",
|
|
pc, reg_a, reg_x, reg_y, flag_n, flag_z, flag_c);
|
|
}
|
|
step();
|
|
}
|
|
int16_t result = (int16_t)((uint16_t)reg_a | ((uint16_t)reg_x << 8));
|
|
printf("L177B(%ld, %ld) = %d\n", aIn, xIn, result);
|
|
return 0;
|
|
}
|
|
|
|
|
|
int main(int argc, char **argv) {
|
|
// --matrix mode: chunk5 SetupViewProjection oracle.
|
|
if (argc >= 2 && strcmp(argv[1], "--matrix") == 0) {
|
|
return matrixMode(argc, argv);
|
|
}
|
|
// --zpscale: probe chunk4 ScaleC2ByC4 in isolation.
|
|
if (argc >= 2 && strcmp(argv[1], "--zpscale") == 0) {
|
|
return zpscaleMode(argc, argv);
|
|
}
|
|
// --l177b: probe chunk4 cos lookup.
|
|
if (argc >= 2 && strcmp(argv[1], "--l177b") == 0) {
|
|
return l177bMode(argc, argv);
|
|
}
|
|
// --scenery: count chunk5 ProcessScenery DrawColorSpan calls.
|
|
if (argc >= 2 && strcmp(argv[1], "--scenery") == 0) {
|
|
return sceneryMode(argc, argv);
|
|
}
|
|
// --xform: chunk5 TransformVertex7EBC oracle.
|
|
if (argc >= 2 && strcmp(argv[1], "--xform") == 0) {
|
|
return xformMode(argc, argv);
|
|
}
|
|
// --draws: full chunk5 DrawColorLine trace.
|
|
if (argc >= 2 && strcmp(argv[1], "--draws") == 0) {
|
|
return drawsMode(argc, argv);
|
|
}
|
|
|
|
// --nibble in.bin out.bin: run the chunk3 nibble decoder on
|
|
// raw input bytes, emit encoded output. Used to verify the C
|
|
// port matches what the 6502 emulator produces.
|
|
if (argc == 4 && strcmp(argv[1], "--nibble") == 0) {
|
|
FILE *in = fopen(argv[2], "rb");
|
|
FILE *out = fopen(argv[3], "wb");
|
|
if (in == NULL || out == NULL) {
|
|
fprintf(stderr, "could not open files\n");
|
|
return 1;
|
|
}
|
|
fseek(in, 0, SEEK_END);
|
|
size_t sz = (size_t)ftell(in);
|
|
fseek(in, 0, SEEK_SET);
|
|
uint8_t *buf = malloc(sz);
|
|
if (fread(buf, 1, sz, in) != sz) {
|
|
fprintf(stderr, "short read\n");
|
|
return 1;
|
|
}
|
|
fclose(in);
|
|
uint8_t output[0x3E00 - 0x3B60];
|
|
uint8_t cksum[2];
|
|
int consumed = nibbleDecode(buf, output, cksum);
|
|
fwrite(output, 1, sizeof(output), out);
|
|
fwrite(cksum, 1, 2, out);
|
|
fclose(out);
|
|
fprintf(stderr, "consumed %d input bytes, wrote %zu output bytes\n",
|
|
consumed, sizeof(output) + 2);
|
|
free(buf);
|
|
return 0;
|
|
}
|
|
|
|
const char *chunk4Path = "/home/scott/claude/flight/out/4_0200-25ff";
|
|
const char *chunk5Path = "/home/scott/claude/flight/out/5_6000-b3df";
|
|
const char *chunk3Path = "/home/scott/claude/flight/out/3_d300-f3ff";
|
|
const char *chunk2Path = "/home/scott/claude/flight/out/2_f600-fbff";
|
|
const char *diskPath = (argc > 1) ? argv[1] : "/home/scott/claude/flight/orig/flight simulator 2 with scenery PRODOS (san inc pack).po";
|
|
const char *blocksPath = (argc > 2) ? argv[2] : "/home/scott/claude/flight/downloads/scenery/extracted/A2.SDS1.blocks";
|
|
// Default entry: LoadSceneryFile1 (descriptor $0625, 6 sectors from
|
|
// sector $25). LoadSceneryFile0 ($A66B) reads sector $22 first and on
|
|
// four disks (SDS1, SD1, SD3, SD7A) the resulting LA7E0 word is a
|
|
// low-memory address (e.g. $0003) so the bootstrap copy stomps the
|
|
// stack page and the run halts. LoadSceneryFile1 sources LA7E0 from
|
|
// a different file region and lands every disk on a sane chunk5
|
|
// address, so all 13 traces complete to the $FFFF sentinel.
|
|
uint16_t entryPC = (argc > 3) ? (uint16_t)strtol(argv[3], NULL, 0) : 0xA674;
|
|
|
|
memset(mem, 0, MEM_SIZE);
|
|
|
|
if (!loadChunk(chunk4Path, 0x0200, 0x2400)) return 1;
|
|
if (!loadChunk(chunk5Path, 0x6000, 0x53E0)) return 1;
|
|
if (!loadChunk(chunk3Path, 0xD300, 0x2100)) return 1;
|
|
if (!loadChunk(chunk2Path, 0xF600, 0x0600)) return 1;
|
|
|
|
// Disk image.
|
|
FILE *df = fopen(diskPath, "rb");
|
|
if (df == NULL) { fprintf(stderr, "cannot open %s\n", diskPath); return 1; }
|
|
fseek(df, 0, SEEK_END);
|
|
diskSize = (size_t)ftell(df);
|
|
fseek(df, 0, SEEK_SET);
|
|
diskImage = malloc(diskSize);
|
|
if (diskImage == NULL || fread(diskImage, 1, diskSize, df) != diskSize) {
|
|
fprintf(stderr, "cannot read %s\n", diskPath);
|
|
fclose(df);
|
|
return 1;
|
|
}
|
|
fclose(df);
|
|
fprintf(stderr, "disk: %s (%zu bytes)\n", diskPath, diskSize);
|
|
|
|
// Patch the SmartPort entry: when the FS2 code calls $C70D it
|
|
// would normally land in the absent firmware; we trap the read
|
|
// by hooking pc==$C70D inside step().
|
|
// Provide a sentinel BRK so any unintended fall-through halts.
|
|
mem[0xC70D] = 0x00;
|
|
|
|
// 64K mode patch: chunk4 has six L1EAD..L1EC1 thunks that the
|
|
// 64K patch table rewrites to JMP into chunk3's
|
|
// SceneryLoaderEntry1..7. We replicate those JMP targets
|
|
// directly so the loader path actually reaches the chunk3
|
|
// implementations (Apply64KPatchTable would otherwise need to
|
|
// be run too).
|
|
struct { uint16_t thunk; uint16_t entry; } patches[] = {
|
|
{ 0x1EAD, 0xD3D0 }, // SceneryLoaderEntry1
|
|
{ 0x1EB0, 0xD3D3 }, // SceneryLoaderEntry2
|
|
{ 0x1EB3, 0xD3D6 }, // SceneryLoaderEntry3
|
|
{ 0x1EB6, 0xD3D9 }, // SceneryLoaderEntry4
|
|
{ 0x1EB9, 0xD3DC }, // SceneryLoaderEntry5
|
|
{ 0x1EBC, 0xD3DF }, // SceneryLoaderEntry6
|
|
{ 0x1EC1, 0xD3E2 }, // SceneryLoaderEntry7
|
|
};
|
|
for (size_t i = 0; i < sizeof(patches) / sizeof(patches[0]); i++) {
|
|
uint16_t t = patches[i].thunk;
|
|
uint16_t e = patches[i].entry;
|
|
mem[t] = 0x4C; // JMP abs
|
|
mem[t + 1] = (uint8_t)(e & 0xFF);
|
|
mem[t + 2] = (uint8_t)((e >> 8) & 0xFF);
|
|
}
|
|
// Also flag 64K mode (chunk4 L1E07).
|
|
mem[0x1E07] = 0x01;
|
|
|
|
// Populate ReadBlockDataBuffer (chunk3 $D575) from the .blocks
|
|
// file produced by prodosextract. The list is a sequence of
|
|
// 16-bit little-endian ProDOS block numbers, one per logical
|
|
// file block. ReadBlockDataBuffer wants block-low bytes at
|
|
// offset 0..255 and block-high bytes at offset 256..511, so
|
|
// we split the entries on load.
|
|
// LD5C8 self-extension stub. The real FS2 boot decodes a
|
|
// chunk of code into LD5C8 = ReadBlockDataBuffer + 83 via the
|
|
// protected-disk loader path (SceneryReadDecoded -> nibble
|
|
// decode -> JSR into the decoded code). Without simulating
|
|
// that whole flow, JSR LD5C8 from chunk3 L416 / L514 lands on
|
|
// zero (BRK) and halts the emulator. We patch LD5C8 with a
|
|
// CLC; RTS so those calls are harmless no-ops -- enough to
|
|
// let the rest of the loader proceed end-to-end.
|
|
mem[0xD5C8] = 0x18; // CLC
|
|
mem[0xD5C9] = 0x60; // RTS
|
|
|
|
// Block-list cap: chunk3's ReadBlockDataBuffer ($D575) holds
|
|
// 256 low bytes + 256 high bytes = 512 bytes total ($D575 +
|
|
// $D675). FS2 sector counters can index well past the boot's
|
|
// first 16 blocks (e.g. HEADER's section $76 -> entry 240), so
|
|
// we populate the FULL 256 entries. LD5C8 (= buf+83) is
|
|
// overwritten at runtime by the FS2 boot loader anyway.
|
|
FILE *bf = fopen(blocksPath, "rb");
|
|
if (bf == NULL) {
|
|
fprintf(stderr, "warning: cannot open %s; falling back to identity map\n", blocksPath);
|
|
for (int i = 0; i < 256; i++) {
|
|
mem[0xD575 + i] = (uint8_t)(i & 0xFF);
|
|
}
|
|
} else {
|
|
uint8_t buf[2];
|
|
int i = 0;
|
|
while (i < 256 && fread(buf, 1, 2, bf) == 2) {
|
|
mem[0xD575 + i] = buf[0];
|
|
mem[0xD575 + 256 + i] = buf[1];
|
|
i++;
|
|
}
|
|
fclose(bf);
|
|
fprintf(stderr, "loaded %d block-list entries from %s\n", i, blocksPath);
|
|
}
|
|
|
|
// Run one or more entry points. Default is a single entry
|
|
// (back-compat with previous fs2trace usage). With --chain,
|
|
// run File1+File2+File3+File4 in sequence: each one's RAM
|
|
// changes accumulate, so LA7E0 ends up pointing at the full
|
|
// loaded scenery instead of just the partial File1 result.
|
|
// With --boot, start at MainGameEntry ($ABBA) so
|
|
// Apply64KPatchTable rewrites the PatchSlot_* dispatch slots
|
|
// before any scenery loader runs. PromptColorOrBW is stubbed
|
|
// (default = colour) so the trace doesn't block on a key.
|
|
bool chainMode = (getenv("FS2TRACE_CHAIN") != NULL);
|
|
bool bootMode = (getenv("FS2TRACE_BOOT") != NULL);
|
|
// FS2TRACE_CITY=N: load city scenery file N then run MainLoop
|
|
// so its dispatcher's $0D HEADER demand-loads fire. Cities are:
|
|
// N=0 -> LoadSceneryFile0 ($A66B)
|
|
// N=1 -> LoadSceneryFile1 ($A674) (Chicago)
|
|
// N=2 -> LoadSceneryFile2 ($A67D) (LA)
|
|
// N=3 -> LoadSceneryFile3 ($A686) (Seattle)
|
|
// N=4 -> LoadSceneryFile4 ($A68F) (NY)
|
|
const char *cityEnv = getenv("FS2TRACE_CITY");
|
|
bool cityMode = (cityEnv != NULL);
|
|
uint16_t entries[8];
|
|
int numEntries = 0;
|
|
if (cityMode) {
|
|
int n = (int)strtol(cityEnv, NULL, 0);
|
|
static const uint16_t cityEntry[5] = {
|
|
0xA66B, 0xA674, 0xA67D, 0xA686, 0xA68F
|
|
};
|
|
if (n >= 0 && n <= 4) {
|
|
// Sequence: full game init → load city's scenery
|
|
// → init dispatcher pointer → run interpreter
|
|
// once. We bypass MainLoop because its
|
|
// PatchSlot_FrameSync resets LA7E0 back to the
|
|
// WW1 dispatcher; instead we directly invoke
|
|
// LoadDispatcherPointer + L6006 (jmp
|
|
// ProcessScenery), so HEADER demand-loads fire
|
|
// against the city dispatcher we just loaded.
|
|
entries[numEntries++] = 0xABBA; // MainGameEntry init
|
|
entries[numEntries++] = cityEntry[n]; // Load city's dispatcher into LA7E0+
|
|
entries[numEntries++] = 0xA61B; // LoadDispatcherPointer ($8B = LA7E0)
|
|
entries[numEntries++] = 0x6006; // jmp ProcessScenery
|
|
} else {
|
|
fprintf(stderr, "FS2TRACE_CITY: invalid index %d (must be 0-4)\n", n);
|
|
return 1;
|
|
}
|
|
} else if (bootMode) {
|
|
entries[numEntries++] = 0xABBA; // MainGameEntry
|
|
} else if (chainMode) {
|
|
entries[numEntries++] = 0xA674; // LoadSceneryFile1
|
|
entries[numEntries++] = 0xA67D; // LoadSceneryFile2
|
|
entries[numEntries++] = 0xA686; // LoadSceneryFile3
|
|
entries[numEntries++] = 0xA68F; // LoadSceneryFile4
|
|
} else {
|
|
entries[numEntries++] = entryPC;
|
|
}
|
|
|
|
FILE *trace = (getenv("FS2TRACE_PCS") != NULL) ? fopen("tmp/fs2trace.pcs", "w") : NULL;
|
|
int totalCycles = 0;
|
|
|
|
for (int e = 0; e < numEntries; e++) {
|
|
// Initialise machine state for this entry. Memory is
|
|
// preserved across entries (the whole point of chain
|
|
// mode) but registers and stack are reset.
|
|
reg_a = reg_x = reg_y = 0;
|
|
reg_s = 0xFF;
|
|
flag_n = flag_v = flag_d = flag_z = flag_c = 0;
|
|
flag_i = 1;
|
|
stop = 0;
|
|
pc = entries[e];
|
|
|
|
// Push a sentinel return address ($FFFF). When the
|
|
// entry RTSes, the next fetch lands on $0000 (BRK)
|
|
// which halts cleanly.
|
|
push16(0xFFFE);
|
|
|
|
uint16_t lastPCs[16] = { 0 };
|
|
int lastIdx = 0;
|
|
int sawSmartPort = 0;
|
|
int sawFetchSector = 0;
|
|
int blocksBefore = traceCount;
|
|
int entryStart = totalCycles;
|
|
|
|
// Boot mode runs the full chunk5 main loop, which
|
|
// never returns -- we need a generous cycle budget so
|
|
// demand-driven scenery loads (triggered by HEADER
|
|
// opcodes during ProcessScenery) get a chance to fire.
|
|
// Override with FS2TRACE_CYCLES if needed.
|
|
const char *cycleEnv = getenv("FS2TRACE_CYCLES");
|
|
int cycleLimit = bootMode ? 200000000 : 5000000;
|
|
if (cycleEnv != NULL) {
|
|
cycleLimit = (int)strtol(cycleEnv, NULL, 0);
|
|
}
|
|
// FS2TRACE_INIT_X / FS2TRACE_INIT_Z: pre-seed the
|
|
// aircraft scenery position (upper-16 of zero-page
|
|
// $5A-$65) before MainGameEntry runs. This forces the
|
|
// scenery dispatcher to fire HEADER demand-loads for the
|
|
// section containing those coords, so we can capture
|
|
// city-specific RAM dumps. Values are 16-bit unsigned
|
|
// (the upper word of the 24-bit scenery coordinate).
|
|
const char *initXEnv = getenv("FS2TRACE_INIT_X");
|
|
const char *initZEnv = getenv("FS2TRACE_INIT_Z");
|
|
if (initXEnv != NULL) {
|
|
uint16_t x = (uint16_t)strtol(initXEnv, NULL, 0);
|
|
mem[0x5C] = (uint8_t)(x & 0xFF);
|
|
mem[0x5D] = (uint8_t)((x >> 8) & 0xFF);
|
|
fprintf(stderr, "FS2TRACE_INIT_X: $5C/$5D = $%04X\n", x);
|
|
}
|
|
if (initZEnv != NULL) {
|
|
uint16_t z = (uint16_t)strtol(initZEnv, NULL, 0);
|
|
mem[0x64] = (uint8_t)(z & 0xFF);
|
|
mem[0x65] = (uint8_t)((z >> 8) & 0xFF);
|
|
fprintf(stderr, "FS2TRACE_INIT_Z: $64/$65 = $%04X\n", z);
|
|
}
|
|
// Re-apply the position patch every frame at the
|
|
// dispatcher entry point. Hook at $A61B
|
|
// (LoadDispatcherPointer) -- this runs AFTER chunk5's
|
|
// per-frame IntegratePhysicsStep ($87A2) but BEFORE the
|
|
// dispatcher reads $5C/$5D for the cull check. Hooking
|
|
// earlier (e.g. $877F MainLoop) doesn't work because
|
|
// IntegratePhysicsStep normalises position cells.
|
|
uint16_t patchX = (initXEnv != NULL)
|
|
? (uint16_t)strtol(initXEnv, NULL, 0) : 0;
|
|
uint16_t patchZ = (initZEnv != NULL)
|
|
? (uint16_t)strtol(initZEnv, NULL, 0) : 0;
|
|
for (cycles = 0; cycles < cycleLimit && !stop; ) {
|
|
if (pc == 0xC70D) {
|
|
sawSmartPort++;
|
|
}
|
|
if (pc == 0x1EC6) {
|
|
sawFetchSector++;
|
|
}
|
|
if (pc == 0xA61B) {
|
|
if (initXEnv != NULL) {
|
|
mem[0x5C] = (uint8_t)(patchX & 0xFF);
|
|
mem[0x5D] = (uint8_t)((patchX >> 8) & 0xFF);
|
|
}
|
|
if (initZEnv != NULL) {
|
|
mem[0x64] = (uint8_t)(patchZ & 0xFF);
|
|
mem[0x65] = (uint8_t)((patchZ >> 8) & 0xFF);
|
|
}
|
|
}
|
|
// FS2TRACE_FORCE_INBOUNDS: short-circuit the
|
|
// "beyond bounds" cull-redirect path at $6E6F so
|
|
// every $13/$14 and $20/$21/$22 cull falls
|
|
// through. Used to force every section's $0D
|
|
// HEADER to fire during boot, so the resulting
|
|
// RAM dump contains every reachable scenery
|
|
// section at its dispatcher-expected dest -- a
|
|
// single comprehensive dump per region without
|
|
// needing to fly the camera there.
|
|
if (pc == 0x6E6F && getenv("FS2TRACE_FORCE_INBOUNDS") != NULL) {
|
|
// TestSceneryRangeReject does pla*4 +
|
|
// jmp L00A5 to take the BEYOND path.
|
|
// Replace with TestSceneryRangeOk's RTS
|
|
// so the cull returns "in bounds"
|
|
// instead -- caller advances past the
|
|
// cull record.
|
|
pc = 0x6E6E;
|
|
}
|
|
if (pc == 0x6006) {
|
|
static int psHits = 0;
|
|
psHits++;
|
|
if (psHits <= 5) {
|
|
fprintf(stderr, "ProcessScenery (L6006) hit #%d: $5C/$5D=$%02X%02X $64/$65=$%02X%02X $8B/$8C=$%02X%02X\n",
|
|
psHits, mem[0x5D], mem[0x5C], mem[0x65], mem[0x64],
|
|
mem[0x8C], mem[0x8B]);
|
|
}
|
|
}
|
|
if (pc == 0xA63A) {
|
|
static int hdrHits = 0;
|
|
hdrHits++;
|
|
if (hdrHits <= 20) {
|
|
fprintf(stderr, "SceneryHeaderLoadIfMiss hit #%d: sectionId=$%02X count=$%02X dest=$%02X%02X cacheIdx=$%02X cache=$%02X%02X%02X%02X\n",
|
|
hdrHits, mem[0x08E5], mem[0x08E6],
|
|
mem[0x08E8], mem[0x08E7], mem[0x08E9],
|
|
mem[0x08EA], mem[0x08EB], mem[0x08EC], mem[0x08ED]);
|
|
}
|
|
}
|
|
if (pc == 0xA6CD) {
|
|
static int runHits = 0;
|
|
runHits++;
|
|
if (runHits <= 20) {
|
|
fprintf(stderr, "SceneryHeaderRunSection hit #%d: $9E=$%02X $9F=$%02X L1E01=$%02X L1E03=$%02X%02X L1E07=$%02X L1E09=$%02X\n",
|
|
runHits, mem[0x9E], mem[0x9F], mem[0x1E01],
|
|
mem[0x1E04], mem[0x1E03], mem[0x1E07], mem[0x1E09]);
|
|
}
|
|
}
|
|
if (pc == 0xA6DF) {
|
|
static int loopHits = 0;
|
|
loopHits++;
|
|
if (loopHits <= 5) {
|
|
fprintf(stderr, "SceneryHeaderRunSectionLoop (jsr L1EAD): $9F=$%02X L1E01=$%02X L1E07=$%02X L1E09=$%02X\n",
|
|
mem[0x9F], mem[0x1E01], mem[0x1E07], mem[0x1E09]);
|
|
}
|
|
}
|
|
if (pc == 0xA6F3) {
|
|
fprintf(stderr, "SceneryHeaderRunSectionFail: L1E01=$%02X carry-set\n", mem[0x1E01]);
|
|
}
|
|
lastPCs[lastIdx] = pc;
|
|
lastIdx = (lastIdx + 1) % 16;
|
|
if (trace != NULL) {
|
|
fprintf(trace, "$%04X\n", pc);
|
|
}
|
|
step();
|
|
if (pc == 0xFFFF) {
|
|
break;
|
|
}
|
|
if (dumpRequested) {
|
|
fprintf(stderr, "FS2TRACE_DUMP_AT_BLOCK reached after block %d\n",
|
|
traceCount);
|
|
break;
|
|
}
|
|
}
|
|
totalCycles += cycles;
|
|
|
|
fprintf(stderr, "entry $%04X: %d cycles, smartport=%d, fetch=%d, blocks=%d, final PC=$%04X stop=%d\n",
|
|
entries[e], cycles, sawSmartPort, sawFetchSector,
|
|
traceCount - blocksBefore, pc, stop);
|
|
if (stop) {
|
|
fprintf(stderr, " last 16 PCs: ");
|
|
for (int i = 0; i < 16; i++) {
|
|
int idx = (lastIdx + 15 - i) % 16;
|
|
fprintf(stderr, "$%04X ", lastPCs[idx]);
|
|
}
|
|
fprintf(stderr, "\n");
|
|
break;
|
|
}
|
|
(void)entryStart;
|
|
}
|
|
|
|
if (trace != NULL) {
|
|
fclose(trace);
|
|
}
|
|
|
|
// Optional: dump key memory regions where the loader deposits
|
|
// data, so the caller can grep for the scenery bytecode entry.
|
|
if (getenv("FS2TRACE_DUMP") != NULL) {
|
|
FILE *dump = fopen("tmp/fs2trace.ram", "wb");
|
|
if (dump != NULL) {
|
|
fwrite(mem, 1, MEM_SIZE, dump);
|
|
fclose(dump);
|
|
fprintf(stderr, "wrote tmp/fs2trace.ram (full 64K)\n");
|
|
}
|
|
}
|
|
|
|
// Show LA7E0 (the scenery entry pointer chunk5's
|
|
// LoadDispatcherPointer reads).
|
|
uint16_t la7e0 = (uint16_t)(mem[0xA7E0] | (mem[0xA7E1] << 8));
|
|
fprintf(stderr, "LA7E0 = $%04X (scenery entry pointer)\n", la7e0);
|
|
|
|
fprintf(stderr, "\n%d cycles total, %d block reads.\n", totalCycles, traceCount);
|
|
for (int i = 0; i < traceCount; i++) {
|
|
printf(" %d: BLOCK $%04X (%d)\n", i, tracedBlocks[i], tracedBlocks[i]);
|
|
}
|
|
|
|
free(diskImage);
|
|
return 0;
|
|
}
|