fs2port/port/tools/extractstations.c
2026-05-13 21:32:05 -05:00

519 lines
24 KiB
C

// FS2 station extractor.
//
// Walks the scenery bytecode in each extracted A2.SD* file and pulls
// out NAV ($1D), ADF ($05), and COM ($1E) station records. With the
// loader correctly identifying the bytecode entry as file offset
// 0x9000 (verified via fs2trace), this gives us the per-disk station
// database that drives the VOR1/VOR2 + ADF + COM panel readouts.
//
// Each disk's bytecode ends in a RETURN ($19) or terminator (>= $46
// or bit 7 set). We scan until that point, plus an optional second
// pass starting from the next plausible bytecode block (file offsets
// 0xA000, 0xC000) to catch records reachable via JMP/sub-invoke that
// our static walker can't follow.
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define BYTECODE_ENTRY 0x9000
#define MAX_STATIONS 4096
typedef struct StationT {
uint16_t freq; // 16-bit frequency identifier (encoding varies by record type)
int32_t x; // 24-bit signed position (X / east)
int32_t y; // 24-bit signed position (Y / north)
int32_t z; // optional 24-bit Z (NAV only)
char name[16];
char type; // 'A'=ADF, 'N'=NAV, 'C'=COM
} StationT;
static int adv[256];
static int stationCount;
static StationT stations[MAX_STATIONS];
static int bcdNibblesValid(uint8_t b);
static int comFreqValid(uint16_t freq);
static int adfFreqValid(uint16_t freq);
static int navFreqValid(uint16_t freq);
static int32_t signed24(uint8_t lo, uint8_t mid, uint8_t hi);
static int scanBytecode(const uint8_t *bytes, int length, int entry, int regionIdx);
// FS2 packs station frequencies in BCD (chunk5 DecodeBCDFreqString,
// chunk3 LookupADFStation). Reject any record whose freq bytes have
// non-BCD nibbles or fall outside the legal radio band -- those are
// almost always our brute-force scan misreading vertex / polygon
// data as a station record.
static int bcdNibblesValid(uint8_t b) {
return ((b & 0x0F) <= 0x09) && ((b >> 4) <= 0x09);
}
// ADF: byte 1 = ADFFreqLowPacked (BCD pair, mid+low digits), byte 2 =
// ADFFreqHighDigit (single digit -- high nibble must be 0). chunk3
// packs as e.g. $34, $02 -> displayed as "234" kHz, so the 3-digit
// range here is 200..999 kHz (NDB beacons; FS2 doesn't represent the
// upper 1605..1750 kHz portion since it lacks a 4th digit).
static int adfFreqValid(uint16_t freq) {
uint8_t lo = (uint8_t)(freq & 0xFF);
uint8_t hi = (uint8_t)(freq >> 8);
if (!bcdNibblesValid(lo)) {
return 0;
}
if ((hi & 0xF0) != 0x00 || (hi & 0x0F) > 0x09) {
return 0;
}
int khz = (hi & 0x0F) * 100 + ((lo >> 4) * 10) + (lo & 0x0F);
return (khz >= 200 && khz <= 999);
}
// NAV (VOR): byte 1 / byte 2 are the BCD pairs from
// DecodeBCDFreqString -- byte1 = lower 2 digits, byte2 = upper 2.
// Legal civil VOR band is 108.00..117.95 MHz: hi=$10 requires lo>=$80
// (108.0..109.95) and hi=$11 requires lo<=$79 (110.0..117.95).
static int navFreqValid(uint16_t freq) {
uint8_t lo = (uint8_t)(freq & 0xFF);
uint8_t hi = (uint8_t)(freq >> 8);
if (!bcdNibblesValid(lo) || !bcdNibblesValid(hi)) {
return 0;
}
if (hi == 0x10) {
return lo >= 0x80;
}
if (hi == 0x11) {
return lo <= 0x79;
}
return 0;
}
// COM (airport tower / ATIS): same BCD packing as NAV. Apple II FS2
// uses the civil aviation 118..137 MHz band; high-byte BCD is $11
// ($1180+), $12, or $13 ($1300..$1369).
static int comFreqValid(uint16_t freq) {
uint8_t lo = (uint8_t)(freq & 0xFF);
uint8_t hi = (uint8_t)(freq >> 8);
if (!bcdNibblesValid(lo) || !bcdNibblesValid(hi)) {
return 0;
}
if (hi == 0x11) {
return lo >= 0x80;
}
if (hi == 0x12) {
return 1;
}
if (hi == 0x13) {
return lo <= 0x69;
}
return 0;
}
static int32_t signed24(uint8_t lo, uint8_t mid, uint8_t hi) {
uint32_t u = (uint32_t)lo | ((uint32_t)mid << 8) | ((uint32_t)hi << 16);
// Sign extend if bit 23 set.
if (u & 0x800000) {
u |= 0xFF000000;
}
return (int32_t)u;
}
// `visited` tracks positions we've already walked from in this file
// pass to break cycles introduced by JMP / SUB_INVOKE chains. Sized
// to a typical scenery file (143 KB).
static uint8_t visited[200000];
static int scanBytecode(const uint8_t *bytes, int length, int entry, int regionIdx) {
(void)regionIdx;
int pos = entry;
int found = 0;
int ops = 0;
if (pos < 0 || pos >= length || pos >= (int)sizeof(visited)) {
return 0;
}
while (pos < length && pos < (int)sizeof(visited)) {
if (visited[pos]) {
break;
}
visited[pos] = 1;
uint8_t op = bytes[pos];
if ((op & 0x80) || op >= 0x46) {
break;
}
int a = adv[op];
// Follow $0B (JMP relative). Operand is a signed 16-bit
// offset from the current position. Recurse so the
// primary walk continues past this branch.
if (op == 0x0B && pos + 3 <= length) {
int16_t off = (int16_t)(bytes[pos + 1] | (bytes[pos + 2] << 8));
scanBytecode(bytes, length, pos + off, regionIdx);
// Don't fall through past JMP -- it's
// unconditional, so we're done with this branch.
break;
}
// Follow $18 (sub-invoke). Operand is a signed 16-bit
// relative offset to the subroutine; on return ($19)
// we continue past the 3-byte sub-invoke record.
if (op == 0x18 && pos + 3 <= length) {
int16_t off = (int16_t)(bytes[pos + 1] | (bytes[pos + 2] << 8));
scanBytecode(bytes, length, pos + off, regionIdx);
// After sub-invoke, fall through to advance 3.
}
// Follow cull-on-outside jump targets ($20/$21/$22).
// Each record's first 2 bytes after the opcode are a
// signed 16-bit relative target; the FS2 dispatcher
// takes that target when the cull range test fails.
// We try BOTH branches statically since we don't know
// the camera state at extract time.
if ((op == 0x20 || op == 0x21 || op == 0x22) && pos + 3 <= length) {
int16_t off = (int16_t)(bytes[pos + 1] | (bytes[pos + 2] << 8));
scanBytecode(bytes, length, pos + off, regionIdx);
// Fall through to advance past the inside-path
// record (9 / 15 / 21 bytes).
}
// Pull station data before advancing.
if (op == 0x05 && pos + 9 <= length && stationCount < MAX_STATIONS) {
uint16_t freq = (uint16_t)(bytes[pos + 1] | (bytes[pos + 2] << 8));
if (adfFreqValid(freq)) {
StationT *s = &stations[stationCount++];
s->type = 'A';
s->freq = freq;
s->x = signed24(bytes[pos + 3], bytes[pos + 4], bytes[pos + 5]);
s->y = signed24(bytes[pos + 6], bytes[pos + 7], bytes[pos + 8]);
s->z = 0;
s->name[0] = '\0';
found++;
}
}
if (op == 0x1D && pos + 11 <= length && stationCount < MAX_STATIONS) {
uint16_t freq = (uint16_t)(bytes[pos + 1] | (bytes[pos + 2] << 8));
if (navFreqValid(freq)) {
StationT *s = &stations[stationCount++];
s->type = 'N';
s->freq = freq;
s->x = signed24(bytes[pos + 3], bytes[pos + 4], bytes[pos + 5]);
s->y = signed24(bytes[pos + 6], bytes[pos + 7], bytes[pos + 8]);
s->z = signed24(bytes[pos + 8], bytes[pos + 9], bytes[pos + 10]);
s->name[0] = '\0';
found++;
}
}
if (op == 0x1E) {
if (pos + 1 >= length) {
break;
}
int recLen = bytes[pos + 1];
if (pos + recLen + 2 <= length && stationCount < MAX_STATIONS) {
uint16_t freq = (uint16_t)(bytes[pos + 2] | (bytes[pos + 3] << 8));
if (comFreqValid(freq)) {
StationT *s = &stations[stationCount++];
s->type = 'C';
s->freq = freq;
s->x = signed24(bytes[pos + 4], bytes[pos + 5], bytes[pos + 6]);
s->y = signed24(bytes[pos + 7], bytes[pos + 8], bytes[pos + 9]);
s->z = 0;
int nameStart = pos + 13;
int nameLen = recLen - 11;
if (nameLen > 0 && nameLen < (int)sizeof(s->name) - 1) {
// Apple II text storage uses bit 7 to mark
// "normal" characters (set high bit). Mask
// it off before validating; bytes that are
// still non-printable after masking get '?'.
for (int i = 0; i < nameLen; i++) {
uint8_t c = bytes[nameStart + i] & 0x7F;
s->name[i] = (c >= 32 && c < 127) ? (char)c : '?';
}
s->name[nameLen] = '\0';
} else {
s->name[0] = '\0';
}
found++;
}
}
a = recLen + 2;
}
if (a < 0) {
// SceneryOpInvalid in chunk5 -- chunk dispatcher
// would reset state and bail. Treat as a stream
// terminator rather than guessing.
break;
}
if (a == 0) {
// Opcode not in our table at all (not an
// explicit Invalid either). Advance 1 byte and
// try again -- cycle detection bounds the damage
// and we may still hit stations downstream.
a = 1;
}
pos += a;
ops++;
if (ops > 100000) {
break;
}
}
return ops;
}
// Region tag attached to each station so the C-emit output records
// where a station was first seen (useful when debugging).
static char stationRegion[MAX_STATIONS];
// Dedupe stations by (type, freq, x, y). z is excluded because the
// extractor occasionally reads z=0 vs z=junk depending on whether the
// trailing bytes of an 11-byte NAV record line up with surrounding
// scenery; type/freq/x/y are reliable.
static int stationKeyEq(const StationT *a, const StationT *b) {
return a->type == b->type
&& a->freq == b->freq
&& a->x == b->x
&& a->y == b->y;
}
// Stable sort key: type then freq then x then y. Lets the embedded
// array be scanned linearly with cache-friendly behaviour.
static int stationCmp(const void *pa, const void *pb) {
const StationT *a = pa;
const StationT *b = pb;
if (a->type != b->type) return (a->type < b->type) ? -1 : 1;
if (a->freq != b->freq) return (a->freq < b->freq) ? -1 : 1;
if (a->x != b->x ) return (a->x < b->x ) ? -1 : 1;
if (a->y != b->y ) return (a->y < b->y ) ? -1 : 1;
return 0;
}
int main(int argc, char **argv) {
const char *cOutput = NULL;
int argi = 1;
while (argi < argc && argv[argi][0] == '-') {
if (strcmp(argv[argi], "--c-output") == 0 && argi + 1 < argc) {
cOutput = argv[++argi];
argi++;
continue;
}
fprintf(stderr, "unknown option %s\n", argv[argi]);
return 1;
}
if (argi >= argc) {
fprintf(stderr, "usage: %s [--c-output FILE] file.dsk [file.dsk ...]\n", argv[0]);
return 1;
}
// Advance counts pulled from chunk5 dispatcher (scanning every
// `lda #$N; jmp SceneryOpAdvanceAndContinue` / `jmp AddTo8B`
// exit). Most opcodes fall through their main path; a few
// (vertex-emit family) take different advances depending on
// L7EBC's inner branch -- I use the most common 5-byte path.
int t[][2] = {
{ 0x00, 5 }, { 0x01, 5 }, { 0x02, 5 }, // L69C8/L6888/L689F: L7EBC reads 4 stream bytes then `lda #$05; jmp AddTo8B` (chunk5 line 4356)
{ 0x03, 6 }, { 0x04, 2 }, { 0x05, 9 }, // Call64K_2 + skip + ADF station
{ 0x06, 5 }, { 0x07, 14 }, // DrawLine + L6BA0
{ 0x09, 3 }, { 0x0A, 3 }, { 0x0B, 3 }, // Skip3 + JMP relative
{ 0x0D, 6 }, { 0x0E, 1 }, // Header + Call64K (no-op fall through)
{ 0x11, 1 }, { 0x12, 2 }, // Skip1 + SetColor
{ 0x13, 10 }, { 0x14, 10 }, // L6E17-driven (advance 9 + 1 from caller)
{ 0x18, 3 }, { 0x19, 1 }, // SubInvoke + Return
{ 0x1A, 5 }, { 0x1B, 1 }, { 0x1C, 1 }, // L6D66 + ModeWhite + DayOnly
{ 0x1D, 11 }, // NAV station
{ 0x20, 9 }, { 0x21, 15 }, { 0x22, 21 }, // 1/2/3-axis cull
{ 0x23, 7 }, { 0x24, 8 }, { 0x25, 5 }, // L6EF1 + L6B64 + L6D8E
{ 0x28, 8 }, { 0x29, 1 }, { 0x2B, 5 }, // L6F1D + CopyToD2 + L69E6 (guess)
{ 0x2F, 1 },
{ 0x31, 2 }, { 0x32, 2 }, { 0x33, 2 }, { 0x35, 2 }, // L6987-family: 1 opcode + 1 vertex-index byte
{ 0x40, 5 }, { 0x41, 5 }, { 0x42, 5 } // L688F/L68A6/L694E: same L7EBC tail as $00-$02
};
for (size_t i = 0; i < sizeof(t) / sizeof(t[0]); i++) {
adv[t[i][0]] = t[i][1];
}
// Opcodes that dispatch to SceneryOpInvalid in chunk5 -- treat
// them as stream terminators so the walker doesn't desync by
// guessing a 1-byte advance over what's almost certainly data
// misaligned as bytecode. Marked with -1 (sentinel for "halt").
int invalid[] = {
0x08, 0x0C, 0x0F, 0x10,
0x15, 0x16, 0x17, 0x1F,
0x26, 0x27, 0x2A, 0x2C,
0x2D, 0x2E, 0x30, 0x34,
0x36, 0x37, 0x38, 0x39,
0x3A, 0x3B, 0x3C, 0x3D,
0x3E, 0x3F, 0x43, 0x44, 0x45
};
for (size_t i = 0; i < sizeof(invalid) / sizeof(invalid[0]); i++) {
adv[invalid[i]] = -1;
}
int totalAdf = 0;
int totalNav = 0;
int totalCom = 0;
for (; argi < argc; argi++) {
FILE *f = fopen(argv[argi], "rb");
if (f == NULL) {
fprintf(stderr, "cannot open %s\n", argv[argi]);
continue;
}
fseek(f, 0, SEEK_END);
long sz = ftell(f);
fseek(f, 0, SEEK_SET);
uint8_t *buf = malloc((size_t)sz);
if (buf == NULL || fread(buf, 1, (size_t)sz, f) != (size_t)sz) {
fclose(f);
free(buf);
continue;
}
fclose(f);
int beforeAdf = 0, beforeNav = 0, beforeCom = 0;
for (int i = 0; i < stationCount; i++) {
if (stations[i].type == 'A') beforeAdf++;
if (stations[i].type == 'N') beforeNav++;
if (stations[i].type == 'C') beforeCom++;
}
int countBefore = stationCount;
// Reset visited tracker for this file.
memset(visited, 0, sizeof(visited));
// Walk from every byte in the FS2 content region.
// Real scenery records are nested inside outer opcodes
// (e.g. a NAV record at file offset $01452 sits in the
// payload of the $13 record at $01449), so 256-byte
// sector starts miss most of them. The `visited` guard
// makes the walk O(file_size) overall: once a position
// is visited any later entry that reaches it bails
// immediately. SceneryOpInvalid + freq validation prune
// the false-record paths.
for (int entry = 0x2000; entry < (int)sz; entry++) {
scanBytecode(buf, (int)sz, entry, argi);
}
int gotAdf = 0, gotNav = 0, gotCom = 0;
for (int i = countBefore; i < stationCount; i++) {
if (stations[i].type == 'A') gotAdf++;
if (stations[i].type == 'N') gotNav++;
if (stations[i].type == 'C') gotCom++;
// Tag with region label (last path component
// after the last '/' or '\\') for the C-emit
// comment.
const char *base = argv[argi];
for (const char *p = argv[argi]; *p != '\0'; p++) {
if (*p == '/' || *p == '\\') {
base = p + 1;
}
}
// Pack first letter of region tail (e.g. "A2.SD3" -> '3').
char tag = '?';
for (const char *p = base; *p != '\0'; p++) {
if (*p >= '0' && *p <= '9') { tag = *p; break; }
}
stationRegion[i] = tag;
}
fprintf(stderr, "%-40s ADF=%d NAV=%d COM=%d\n", argv[argi], gotAdf, gotNav, gotCom);
(void)beforeAdf; (void)beforeNav; (void)beforeCom;
totalAdf += gotAdf;
totalNav += gotNav;
totalCom += gotCom;
free(buf);
}
// Dedupe (cross-region): same physical station appears in
// every overlapping disk, plus the brute-force walker hits
// each in-disk record from multiple aliasing entry points.
qsort(stations, (size_t)stationCount, sizeof(stations[0]), stationCmp);
int unique = 0;
for (int i = 0; i < stationCount; i++) {
if (unique > 0 && stationKeyEq(&stations[i], &stations[unique - 1])) {
// Prefer the entry that has a name attached
// (COM records carry airport names).
if (stations[unique - 1].name[0] == '\0' && stations[i].name[0] != '\0') {
stations[unique - 1] = stations[i];
stationRegion[unique - 1] = stationRegion[i];
}
continue;
}
stations[unique] = stations[i];
stationRegion[unique] = stationRegion[i];
unique++;
}
int beforeUnique = stationCount;
stationCount = unique;
int uniqueAdf = 0, uniqueNav = 0, uniqueCom = 0;
for (int i = 0; i < stationCount; i++) {
if (stations[i].type == 'A') uniqueAdf++;
if (stations[i].type == 'N') uniqueNav++;
if (stations[i].type == 'C') uniqueCom++;
}
fprintf(stderr, "\nraw: %d ADF, %d NAV, %d COM (%d total)\n", totalAdf, totalNav, totalCom, beforeUnique);
fprintf(stderr, "unique: %d ADF, %d NAV, %d COM (%d total)\n", uniqueAdf, uniqueNav, uniqueCom, stationCount);
if (cOutput != NULL) {
FILE *out = fopen(cOutput, "w");
if (out == NULL) {
fprintf(stderr, "cannot write %s\n", cOutput);
return 1;
}
fprintf(out, "// Generated by port/tools/extractstations --c-output. Do not edit.\n");
fprintf(out, "// Source: A2.SD* scenery files. %d unique stations.\n\n", stationCount);
fprintf(out, "#ifndef SCENERY_STATIONS_DATA_H\n");
fprintf(out, "#define SCENERY_STATIONS_DATA_H\n\n");
fprintf(out, "#include <stdint.h>\n\n");
fprintf(out, "typedef struct StationDataT {\n");
fprintf(out, " char type; // 'A'=ADF, 'N'=NAV, 'C'=COM\n");
fprintf(out, " uint16_t freq; // BCD-packed (NAV/COM) or BCD pair + high digit (ADF)\n");
fprintf(out, " int32_t x; // FS2 scenery units, +X = east\n");
fprintf(out, " int32_t y; // +Y = north\n");
fprintf(out, " int32_t z; // NAV only (altitude); 0 for ADF/COM\n");
fprintf(out, " char name[16]; // COM airport name; \"\" otherwise\n");
fprintf(out, " char region; // ASCII digit of source A2.SD* file\n");
fprintf(out, "} StationDataT;\n\n");
fprintf(out, "static const StationDataT kSceneryStations[] = {\n");
for (int i = 0; i < stationCount; i++) {
StationT *s = &stations[i];
fprintf(out, " { '%c', 0x%04X, %9d, %9d, %9d, \"",
s->type, s->freq, s->x, s->y, s->z);
for (int j = 0; j < (int)sizeof(s->name) && s->name[j] != '\0'; j++) {
char c = s->name[j];
if (c == '"' || c == '\\') {
fputc('\\', out);
}
fputc(c, out);
}
fprintf(out, "\", '%c' },\n", stationRegion[i]);
}
fprintf(out, "};\n\n");
fprintf(out, "#define SCENERY_STATIONS_COUNT ((int)(sizeof(kSceneryStations) / sizeof(kSceneryStations[0])))\n\n");
fprintf(out, "#endif\n");
fclose(out);
fprintf(stderr, "wrote %s (%d entries)\n", cOutput, stationCount);
return 0;
}
printf("\ntotal raw: %d ADF, %d NAV, %d COM\n", totalAdf, totalNav, totalCom);
printf("unique: %d ADF, %d NAV, %d COM\n", uniqueAdf, uniqueNav, uniqueCom);
for (int i = 0; i < stationCount; i++) {
StationT *s = &stations[i];
printf(" %c freq=$%04X x=%d y=%d z=%d %s\n",
s->type, s->freq, s->x, s->y, s->z, s->name);
}
return 0;
}