// Atari ST HAL for M2 + M2.5. // // M2 scope: // * XBIOS Setscreen to ST low-res (320x200x16, mode 0). // * Chunky 4bpp to word-interleaved ST planar c2p at present time. // // M2.5 scope (per-band palette / SCB emulation): // * halPresent scans the SurfaceT's SCB array and builds a compact // transitions table: each entry is (start_line, palette_index) // for a new palette region. For pattern.c's 8 uniform bands this // is 8 entries; in the worst case it is 200 (one per scanline). // * VBL ISR pre-loads the first band's palette, then programs // MFP Timer B (event-count mode, TBDR = HBL delta to first // transition) to fire at the END of the last scanline before // the next band starts. // * Timer B ISR writes the current band's palette, advances the // transition index, and (stop/reload TBDR/restart) reprograms // Timer B to fire at the next transition. With 8 transitions per // frame the ISR runs 8 times instead of 313 -- well under the // ~147-HBL-fires-per-frame cap Hatari's MFP emulation imposes on // event-count mode, and ~0.2% CPU overhead vs ~60% for per-HBL. // * gLinePalettes is a flat pre-quantized (line, color)->$0RGB // table built in halPresent by flattenScbPalettes; the ISR uses // its first row per band as the source of 16 shifter writes. // // Deferred: // * Takeover mode (direct shifter programming without TOS). // * STE's extended palette bits (we drop to STF 9-bit for now). #include #include #include #include #include "hal.h" #include "surfaceInternal.h" // ----- Constants ----- // Word-interleaved ST planar uses the same 160 bytes/scanline as our // chunky source, but organized as 20 groups of 4 words per scanline, // with each word holding the 16 one-bit samples for one bitplane. #define ST_BYTES_PER_ROW 160 #define ST_GROUPS_PER_ROW 20 #define ST_SCREEN_ALIGN 256 // Shifter palette registers: 16 words at $FFFF8240..$FFFF825F. #define ST_PALETTE_REGS ((volatile uint16_t *)0xFFFF8240L) // MFP hardware addresses. #define ST_MFP_TBCR ((volatile uint8_t *)0xFFFFFA1BL) // Timer B control #define ST_MFP_TBDR ((volatile uint8_t *)0xFFFFFA21L) // Timer B data #define ST_MFP_ISRA ((volatile uint8_t *)0xFFFFFA0FL) // In-service A #define MFP_TBCR_STOP 0x00 #define MFP_TBCR_EVENT 0x08 #define MFP_TB_CLEAR 0xFE // clear bit 0 of ISRA (Timer B) // Exception-vector numbers passed to Setexc (= vector offset / 4). #define VEC_VBL (0x70 / 4) // 68k autovector IRQ 4 #define VEC_MFP_TB (0x120 / 4) // MFP Timer B #define INT_TIMER_B 8 // ----- Prototypes ----- static uint16_t quantizeColorToSt(uint16_t orgb); static void c2pRange(const SurfaceT *src, int16_t y0, int16_t y1, uint16_t groupStart, uint16_t groupEnd); static void flattenScbPalettes(const SurfaceT *src); static void initC2pLut(void); static void writeDiagnostics(void); static long writePrevPaletteRegs(void); // Provided by src/port/atarist/c2p.s. extern void chunkyToPlanarRowSt(const uint8_t *src, uint16_t *dst, uint16_t groupStart, uint16_t groupEnd, const uint8_t *lut); static __attribute__((interrupt_handler)) void timerBIsr(void); static __attribute__((interrupt_handler)) void vblIsr(void); static void buildTransitions(const SurfaceT *src); static bool paletteOrScbChanged(const SurfaceT *src); static void refreshPaletteStateIfNeeded(const SurfaceT *src); // ----- Module state ----- // Screen buffer: enough for 320x200x4bpp planar plus padding for // runtime 256-byte alignment. TOS .PRG format only supports 2-byte // object-file alignment, so we overallocate and align the pointer // manually in halInit. static uint8_t gScreenBuffer[SURFACE_PIXELS_SIZE + ST_SCREEN_ALIGN]; static uint8_t *gScreenBase = NULL; static void *gPrevPhysbase = NULL; static void *gPrevLogbase = NULL; static int16_t gPrevRez = 0; static uint16_t gPrevPalette[SURFACE_COLORS_PER_PALETTE]; static bool gModeSet = false; // Per-scanline pre-quantized palette table. Indexed by display line; // each row is a 16-word palette ready to be copied straight into the // shifter registers. Written at present() time, read by the Timer B // ISR with no CPU-side math beyond a counter subtract. static uint16_t gLinePalettes[SURFACE_HEIGHT][SURFACE_COLORS_PER_PALETTE]; // Band-transition table. Each entry is one palette change: at // display line gBandStart[i], load palette indexed by gBandPalIdx[i]. // Built once per halPresent from the SurfaceT's SCB array. #define MAX_BANDS SURFACE_HEIGHT static uint16_t gBandStart [MAX_BANDS]; static uint8_t gBandPalIdx[MAX_BANDS]; static uint16_t gBandCount = 0; // Index of the band the Timer B ISR is currently scheduling TO. At // VBL this is 0 (band 0 palette pre-loaded, Timer B scheduled to // fire when it's time to transition to band 1). Each ISR fire writes // the palette for gCurrentBand and advances to the next. static volatile uint16_t gCurrentBand = 0; // Diagnostic captures. static volatile int16_t gFrameCount = 0; static volatile uint16_t gLastBandCount = 0; // Saved exception vectors for restore on shutdown. static void (*gOldVblVec)(void) = NULL; static void (*gOldTimerBVec)(void) = NULL; // Cached SCB + palette from the last present. flattenScbPalettes runs // 200 * 16 quantize conversions and buildTransitions rescans the full // SCB; neither is cheap on a 7 MHz 68000. In the typical game loop // (and every frame of the keys demo after the initial paint) SCB and // palette never change, so caching and skipping those passes keeps // rect presents down to just the c2p work. static uint8_t gCachedScb [SURFACE_HEIGHT]; static uint16_t gCachedPalette[SURFACE_PALETTE_COUNT][SURFACE_COLORS_PER_PALETTE]; static bool gCacheValid = false; // 4 KB chunky-to-planar lookup table consumed by chunkyToPlanarRowSt // (src/port/atarist/c2p.s). Layout: gC2pLut[src*16 + pos*4 + plane] // = the 2-bit plane-byte contribution for source byte `src` at // byte-position `pos` (0..3 within a 4-byte chunk) going to plane // `plane`. Bit positions inside the byte are (7-2*pos, 6-2*pos), so // the same table feeds both halves of an ST plane word: positions // 0..3 land in the high byte, 4..7 (re-indexed mod 4) in the low // byte. Built once by initC2pLut on the first halPresent call. static uint8_t gC2pLut[4 * 1024]; static bool gC2pLutReady = false; // ----- Internal helpers (alphabetical) ----- static void c2pRange(const SurfaceT *src, int16_t y0, int16_t y1, uint16_t groupStart, uint16_t groupEnd) { int16_t y; const uint8_t *srcLine; uint16_t *dstLine; if (!gC2pLutReady) { initC2pLut(); } for (y = y0; y < y1; y++) { srcLine = &src->pixels[y * SURFACE_BYTES_PER_ROW]; dstLine = (uint16_t *)&gScreenBase[y * ST_BYTES_PER_ROW]; chunkyToPlanarRowSt(srcLine, dstLine, groupStart, groupEnd, gC2pLut); } } // Scan the surface's SCB and record one transition entry for each // run of the same palette index. gBandCount is the number of // distinct bands; gBandStart[i] is the display line where band i // begins; gBandPalIdx[i] is the palette index that band uses. static void buildTransitions(const SurfaceT *src) { uint16_t line; uint8_t idx; uint8_t prev; gBandCount = 0; prev = 0xFF; for (line = 0; line < SURFACE_HEIGHT; line++) { idx = src->scb[line]; if (idx >= SURFACE_PALETTE_COUNT) { idx = 0; } if (idx != prev) { if (gBandCount < MAX_BANDS) { gBandStart [gBandCount] = line; gBandPalIdx[gBandCount] = idx; gBandCount++; } prev = idx; } } gLastBandCount = gBandCount; } // Pre-quantize every palette row indexed by scanline through the SCB // into gLinePalettes, so the Timer B ISR can do a flat indexed copy // without any surface-level lookups. Called once per halPresent. static void flattenScbPalettes(const SurfaceT *src) { uint16_t line; uint16_t col; uint8_t idx; for (line = 0; line < SURFACE_HEIGHT; line++) { idx = src->scb[line]; if (idx >= SURFACE_PALETTE_COUNT) { idx = 0; } for (col = 0; col < SURFACE_COLORS_PER_PALETTE; col++) { gLinePalettes[line][col] = quantizeColorToSt(src->palette[idx][col]); } } } // Returns true if SCB or palette values differ from the last present. static bool paletteOrScbChanged(const SurfaceT *src) { if (!gCacheValid) { return true; } if (memcmp(gCachedScb, src->scb, sizeof(gCachedScb)) != 0) { return true; } if (memcmp(gCachedPalette, src->palette, sizeof(gCachedPalette)) != 0) { return true; } return false; } // Rebuild the per-line palette table and band-transition table only // when the SCB/palette state has actually changed. Both are hot -- the // flatten pass runs 3200 palette entries through quantization -- so // skipping them on clean frames dominates rect-present timing. static void refreshPaletteStateIfNeeded(const SurfaceT *src) { if (!paletteOrScbChanged(src)) { return; } flattenScbPalettes(src); buildTransitions(src); memcpy(gCachedScb, src->scb, sizeof(gCachedScb)); memcpy(gCachedPalette, src->palette, sizeof(gCachedPalette)); gCacheValid = true; } // Build the 4 KB chunky-to-planar lookup table consumed by // chunkyToPlanarRowSt. Same layout/contents as the Amiga c2p LUT; // see src/port/atarist/c2p.s for the addressing math. static void initC2pLut(void) { uint16_t pos; uint16_t plane; uint16_t src; uint8_t highShift; uint8_t lowShift; uint8_t highBit; uint8_t lowBit; if (gC2pLutReady) { return; } for (src = 0; src < 256; src++) { for (pos = 0; pos < 4; pos++) { highShift = (uint8_t)(7 - 2 * pos); lowShift = (uint8_t)(6 - 2 * pos); for (plane = 0; plane < 4; plane++) { highBit = (uint8_t)(((src >> 4) >> plane) & 1); lowBit = (uint8_t)(((src & 0x0F) >> plane) & 1); gC2pLut[src * 16 + pos * 4 + plane] = (uint8_t)((highBit << highShift) | (lowBit << lowShift)); } } } gC2pLutReady = true; } // 12-bit $0RGB to STF 9-bit palette register (drops the low bit of // each 4-bit channel). static uint16_t quantizeColorToSt(uint16_t orgb) { uint16_t r; uint16_t g; uint16_t b; r = (orgb >> 8) & 0x0F; g = (orgb >> 4) & 0x0F; b = orgb & 0x0F; r = r >> 1; g = g >> 1; b = b >> 1; return (uint16_t)((r << 8) | (g << 4) | b); } // Timer B interrupt handler. Fires once at each band transition; // writes the band's palette to the shifter and lets Timer B's // auto-reload keep counting for the next fire. We deliberately do // NOT stop/reload/restart the timer here: that sequence would cost // 1-2 HBL edges each fire, and those losses compound across 7+ // transitions into a visible "last band short" drift. Updating // TBDR in place is enough for variable-length bands -- the new // value takes effect on the fire AFTER next, which is acceptable // when adjacent bands have similar lengths; uniform bands (like // pattern.c) don't need TBDR updates at all so stay perfectly // aligned with no drift. static void timerBIsr(void) { uint16_t band; uint8_t palIdx; const uint16_t *src; volatile uint16_t *dst; uint16_t nextDelta; band = gCurrentBand + 1; gCurrentBand = band; if (band < gBandCount) { palIdx = gBandPalIdx[band]; if (palIdx >= SURFACE_PALETTE_COUNT) { palIdx = 0; } src = &gLinePalettes[gBandStart[band]][0]; dst = ST_PALETTE_REGS; dst[ 0] = src[ 0]; dst[ 1] = src[ 1]; dst[ 2] = src[ 2]; dst[ 3] = src[ 3]; dst[ 4] = src[ 4]; dst[ 5] = src[ 5]; dst[ 6] = src[ 6]; dst[ 7] = src[ 7]; dst[ 8] = src[ 8]; dst[ 9] = src[ 9]; dst[10] = src[10]; dst[11] = src[11]; dst[12] = src[12]; dst[13] = src[13]; dst[14] = src[14]; dst[15] = src[15]; if (band + 1 < gBandCount) { // Update TBDR for the fire-after-next (auto-reload at // the NEXT fire still uses the old value). Don't stop // the timer. nextDelta = gBandStart[band + 1] - gBandStart[band]; if (nextDelta == 0 || nextDelta > 255) { nextDelta = 1; } *ST_MFP_TBDR = (uint8_t)nextDelta; *ST_MFP_ISRA = MFP_TB_CLEAR; return; } } // No further transitions this frame; stopping Timer B here only // affects the (never-used) next fire, not timing of any band // we've already scheduled. *ST_MFP_TBCR = MFP_TBCR_STOP; *ST_MFP_ISRA = MFP_TB_CLEAR; } // Vertical blank handler. Pre-loads band 0's palette so the first // visible scanline is correct, then programs Timer B to fire at // the HBL delta to the next band transition (if any). static void vblIsr(void) { uint16_t delta; const uint16_t *src; volatile uint16_t *dst; gFrameCount = gFrameCount + 1; gCurrentBand = 0; if (gBandCount == 0) { return; } // Stage band 0's palette into the shifter registers. src = &gLinePalettes[gBandStart[0]][0]; dst = ST_PALETTE_REGS; dst[ 0] = src[ 0]; dst[ 1] = src[ 1]; dst[ 2] = src[ 2]; dst[ 3] = src[ 3]; dst[ 4] = src[ 4]; dst[ 5] = src[ 5]; dst[ 6] = src[ 6]; dst[ 7] = src[ 7]; dst[ 8] = src[ 8]; dst[ 9] = src[ 9]; dst[10] = src[10]; dst[11] = src[11]; dst[12] = src[12]; dst[13] = src[13]; dst[14] = src[14]; dst[15] = src[15]; // Program Timer B for the next band transition. if (gBandCount > 1) { delta = gBandStart[1] - gBandStart[0]; if (delta == 0 || delta > 255) { delta = 1; } *ST_MFP_TBCR = MFP_TBCR_STOP; *ST_MFP_TBDR = (uint8_t)delta; *ST_MFP_ISRA = MFP_TB_CLEAR; *ST_MFP_TBCR = MFP_TBCR_EVENT; } else { *ST_MFP_TBCR = MFP_TBCR_STOP; *ST_MFP_ISRA = MFP_TB_CLEAR; } } static long writePrevPaletteRegs(void) { uint16_t i; for (i = 0; i < SURFACE_COLORS_PER_PALETTE; i++) { ST_PALETTE_REGS[i] = gPrevPalette[i]; } return 0; } static void writeDiagnostics(void) { FILE *fp; uint16_t i; fp = fopen("diag.txt", "w"); if (fp == NULL) { return; } fprintf(fp, "frames observed: %d\n", (int)gFrameCount); fprintf(fp, "band count: %d\n", (int)gLastBandCount); for (i = 0; i < gLastBandCount && i < 16; i++) { fprintf(fp, " band %2d: start line %3d, palIdx %d\n", (int)i, (int)gBandStart[i], (int)gBandPalIdx[i]); } fclose(fp); } // ----- HAL API (alphabetical) ----- bool halInit(const JoeyConfigT *config) { uintptr_t addr; (void)config; // Align screen buffer to 256 bytes inside the static storage. addr = (uintptr_t)gScreenBuffer; addr = (addr + (ST_SCREEN_ALIGN - 1)) & ~((uintptr_t)ST_SCREEN_ALIGN - 1); gScreenBase = (uint8_t *)addr; memset(gScreenBase, 0, SURFACE_PIXELS_SIZE); gPrevPhysbase = Physbase(); gPrevLogbase = Logbase(); gPrevRez = Getrez(); // Capture current palette so we can restore exactly on shutdown. { uint16_t i; for (i = 0; i < SURFACE_COLORS_PER_PALETTE; i++) { gPrevPalette[i] = (uint16_t)Setcolor((int16_t)i, -1); } } // Switch to ST low-res: 320x200x16, mode 0. Setscreen((long)gScreenBase, (long)gScreenBase, 0); gModeSet = true; // Force hardware palette entry 0 to black so the overscan border // (which the ST shows in palette[0]) stays black until the app's // first refreshPaletteStateIfNeeded uploads its own palette. Setcolor(0, 0x000); // Save previous VBL + Timer B vectors, install ours. Timer B // is at MFP vector $120; vector installed by Xbtimer below. gOldVblVec = (void (*)(void))Setexc(VEC_VBL, -1L); gOldTimerBVec = (void (*)(void))Setexc(VEC_MFP_TB, -1L); (void)Setexc(VEC_VBL, (long)vblIsr); // Program MFP Timer B: event-count (HBL) mode, initial TBDR=1 // (a placeholder -- VBL ISR reprograms it for the first real // transition per frame), vector=timerBIsr, then enable in IMRA. Xbtimer(1, MFP_TBCR_EVENT, 1, timerBIsr); Jenabint(INT_TIMER_B); return true; } const char *halLastError(void) { return NULL; } void halPresent(const SurfaceT *src) { if (src == NULL || !gModeSet) { return; } refreshPaletteStateIfNeeded(src); c2pRange(src, 0, SURFACE_HEIGHT, 0, ST_GROUPS_PER_ROW); } void halPresentRect(const SurfaceT *src, int16_t x, int16_t y, uint16_t w, uint16_t h) { uint16_t groupStart; uint16_t groupEnd; if (src == NULL || !gModeSet) { return; } refreshPaletteStateIfNeeded(src); // Each c2p group covers 16 horizontal pixels. Round dirty pixel // range to the enclosing group range to keep the planar word // alignment without missing edge pixels. groupStart = (uint16_t)(x >> 4); groupEnd = (uint16_t)(((uint16_t)x + w + 15) >> 4); if (groupEnd > ST_GROUPS_PER_ROW) { groupEnd = ST_GROUPS_PER_ROW; } c2pRange(src, y, y + (int16_t)h, groupStart, groupEnd); } // Vsync() is XBIOS opcode 37; mintlib exposes it directly. It blocks // until the next 50 Hz (PAL) or 60 Hz (NTSC) vertical blank. void halWaitVBL(void) { int16_t before; // Can't use Vsync(): TOS's Vsync increments _vblsem inside its // own VBL ISR, which we replaced (Setexc(VEC_VBL, vblIsr)) with // our SCB-emulating ISR that doesn't chain to the original. // Spin on gFrameCount instead -- it's volatile and bumped every // VBL by our ISR. before = gFrameCount; while (gFrameCount == before) { // wait } } void halShutdown(void) { if (!gModeSet) { return; } // Disable MFP Timer B and restore the exception vectors before // changing the screen -- a late ISR firing mid-Setscreen would // write palette into whatever buffer TOS remapped. Jdisint(INT_TIMER_B); if (gOldTimerBVec != NULL) { (void)Setexc(VEC_MFP_TB, (long)gOldTimerBVec); } if (gOldVblVec != NULL) { (void)Setexc(VEC_VBL, (long)gOldVblVec); } Setscreen((long)gPrevLogbase, (long)gPrevPhysbase, gPrevRez); Supexec(writePrevPaletteRegs); writeDiagnostics(); gModeSet = false; }