815 lines
26 KiB
C
815 lines
26 KiB
C
// Commodore Amiga HAL for M2 + M2.5.
|
|
//
|
|
// M2 scope:
|
|
// * OpenScreen (Intuition) for a CUSTOMSCREEN at 320x200x4 bitplanes.
|
|
// * Chunky 4bpp to 4 separate bitplanes c2p at present time.
|
|
// * Partial-rect present covers only the dirty scanlines.
|
|
//
|
|
// M2.5 scope (per-scanline palette / SCB emulation):
|
|
// * Build a user copper list that WAITs for each display scanline
|
|
// and MOVEs the 16 color registers with that line's palette.
|
|
// * Install it via ViewPort.UCopIns + MakeScreen + RethinkDisplay.
|
|
// * Rebuild only when SCB or palette state differs from the last
|
|
// presented frame (cached in gCachedScb / gCachedPalette). On
|
|
// clean frames (typical game loop, where only pixel bytes change)
|
|
// we skip AllocMem + MrgCop + LoadView + WaitTOF entirely.
|
|
//
|
|
// Deferred:
|
|
// * Blitter-assisted c2p for speed on A500.
|
|
// * Takeover mode (LoadView(NULL) + OwnBlitter + direct hardware).
|
|
|
|
#include <stddef.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include <exec/types.h>
|
|
#include <exec/interrupts.h>
|
|
#include <hardware/intbits.h>
|
|
#include <intuition/intuition.h>
|
|
#include <intuition/screens.h>
|
|
#include <graphics/copper.h>
|
|
#include <graphics/gfxbase.h>
|
|
#include <graphics/gfxmacros.h>
|
|
#include <graphics/displayinfo.h>
|
|
#include <graphics/modeid.h>
|
|
#include <graphics/rastport.h>
|
|
#include <graphics/view.h>
|
|
|
|
#include <hardware/custom.h>
|
|
|
|
#include <proto/exec.h>
|
|
#include <proto/intuition.h>
|
|
#include <proto/graphics.h>
|
|
|
|
#include "hal.h"
|
|
#include "surfaceInternal.h"
|
|
#include "draw68k_inline.h"
|
|
|
|
extern struct Custom custom;
|
|
|
|
|
|
// Frame-counter VBL server lives at end of file; forward-declare so
|
|
// halInit / halShutdown can install / remove it without C inferring
|
|
// implicit non-static linkage at the call sites.
|
|
static void installVblServer(void);
|
|
static void removeVblServer(void);
|
|
|
|
// ----- Constants -----
|
|
|
|
#define AMIGA_BITPLANES 4
|
|
#define AMIGA_BYTES_PER_ROW 40
|
|
|
|
// ----- Prototypes -----
|
|
|
|
static void buildCopperList(const SurfaceT *src);
|
|
static void c2pRange(const SurfaceT *src, int16_t y0, int16_t y1, uint16_t byteStart, uint16_t byteEnd);
|
|
static void dumpCopperList(void);
|
|
static void installCopperList(void);
|
|
static void uploadFirstBandPalette(const SurfaceT *src);
|
|
static void updateCopperIfNeeded(const SurfaceT *src);
|
|
|
|
// ----- Module state -----
|
|
|
|
struct Screen *gScreen = NULL; // shared with input.c
|
|
static struct BitMap *gBitMap = NULL;
|
|
static UBYTE *gPlanes[AMIGA_BITPLANES];
|
|
static struct UCopList *gNewUCL = NULL; // built but not yet installed
|
|
|
|
// Cached SCB + palettes from the last present. halPresent* only needs
|
|
// to rebuild/install the copper list when SCB assignments or palette
|
|
// RGB values differ from what is already on screen; pure pixel updates
|
|
// (which dominate a typical game loop and every frame of the keys
|
|
// demo after the initial paint) leave both alone. MrgCop + LoadView +
|
|
// WaitTOF is hundreds of milliseconds on a 7 MHz 68000, so skipping
|
|
// them on clean frames is a major win.
|
|
static uint8_t gCachedScb [SURFACE_HEIGHT];
|
|
static uint16_t gCachedPalette[SURFACE_PALETTE_COUNT][SURFACE_COLORS_PER_PALETTE];
|
|
static bool gCacheValid = false;
|
|
|
|
// 4 KB chunky-to-planar lookup table consumed by chunkyToPlanarRow
|
|
// (src/port/amiga/c2p.s). Layout: gC2pLut[src*16 + pos*4 + plane] =
|
|
// the plane-byte bit contribution that source byte `src` makes to
|
|
// plane `plane` when it sits at byte-position `pos` within a 4-byte
|
|
// (8-pixel) planar group. The src-major layout lets the asm inner
|
|
// loop reach all 16 (pos, plane) entries for a single src byte via
|
|
// 8-bit displacements off (a5, d4.w) without any LEA between reads.
|
|
static uint8_t gC2pLut[4 * 1024];
|
|
static bool gC2pLutReady = false;
|
|
|
|
static bool paletteOrScbChanged(const SurfaceT *src);
|
|
static void initC2pLut(void);
|
|
|
|
// Provided by src/port/amiga/c2p.s.
|
|
extern void chunkyToPlanarRow(const uint8_t *src,
|
|
uint8_t *p0, uint8_t *p1, uint8_t *p2, uint8_t *p3,
|
|
uint16_t numPlanarBytes,
|
|
const uint8_t *lut);
|
|
|
|
// ----- Internal helpers (alphabetical) -----
|
|
|
|
// Build the 4 KB chunky-to-planar lookup table consumed by
|
|
// chunkyToPlanarRow. For each (pos, plane, src) tuple, store the
|
|
// bit contribution that source byte `src` makes to plane `plane`
|
|
// when it sits at byte-position `pos` (0..3) within a 4-byte
|
|
// (8-pixel) planar group:
|
|
//
|
|
// - src high nibble = leftmost pixel -> plane bit (7 - 2*pos)
|
|
// - src low nibble = rightmost pixel -> plane bit (6 - 2*pos)
|
|
static void initC2pLut(void) {
|
|
uint16_t pos;
|
|
uint16_t plane;
|
|
uint16_t src;
|
|
uint8_t highShift;
|
|
uint8_t lowShift;
|
|
uint8_t highBit;
|
|
uint8_t lowBit;
|
|
|
|
if (gC2pLutReady) {
|
|
return;
|
|
}
|
|
for (src = 0; src < 256; src++) {
|
|
for (pos = 0; pos < 4; pos++) {
|
|
highShift = (uint8_t)(7 - 2 * pos);
|
|
lowShift = (uint8_t)(6 - 2 * pos);
|
|
for (plane = 0; plane < 4; plane++) {
|
|
highBit = (uint8_t)(((src >> 4) >> plane) & 1);
|
|
lowBit = (uint8_t)(((src & 0x0F) >> plane) & 1);
|
|
gC2pLut[src * 16 + pos * 4 + plane] =
|
|
(uint8_t)((highBit << highShift) | (lowBit << lowShift));
|
|
}
|
|
}
|
|
}
|
|
gC2pLutReady = true;
|
|
}
|
|
|
|
|
|
// Convert a range of chunky scanlines [y0, y1) to Amiga planar over
|
|
// planar-byte columns [byteStart, byteEnd). Per row the work is dropped
|
|
// into chunkyToPlanarRow (src/port/amiga/c2p.s) which is ~5x faster
|
|
// than the old per-pixel C inner loop GCC emits for m68k.
|
|
//
|
|
// Each planar byte corresponds to 8 horizontal pixels = 4 source bytes
|
|
// at 4bpp packed; partial-rect callers should round byteStart down and
|
|
// byteEnd up to keep the 8-pixel alignment.
|
|
static void c2pRange(const SurfaceT *src, int16_t y0, int16_t y1, uint16_t byteStart, uint16_t byteEnd) {
|
|
const uint8_t *srcLine;
|
|
UBYTE *p0;
|
|
UBYTE *p1;
|
|
UBYTE *p2;
|
|
UBYTE *p3;
|
|
int16_t y;
|
|
uint16_t numBytes;
|
|
|
|
if (byteStart >= byteEnd) {
|
|
return;
|
|
}
|
|
if (!gC2pLutReady) {
|
|
initC2pLut();
|
|
}
|
|
numBytes = (uint16_t)(byteEnd - byteStart);
|
|
|
|
for (y = y0; y < y1; y++) {
|
|
// 4 source bytes per planar byte: source-byte offset =
|
|
// byteStart * 4 within the chunky row.
|
|
srcLine = &src->pixels[y * SURFACE_BYTES_PER_ROW + byteStart * 4];
|
|
p0 = &gPlanes[0][y * AMIGA_BYTES_PER_ROW + byteStart];
|
|
p1 = &gPlanes[1][y * AMIGA_BYTES_PER_ROW + byteStart];
|
|
p2 = &gPlanes[2][y * AMIGA_BYTES_PER_ROW + byteStart];
|
|
p3 = &gPlanes[3][y * AMIGA_BYTES_PER_ROW + byteStart];
|
|
chunkyToPlanarRow(srcLine, p0, p1, p2, p3, numBytes, gC2pLut);
|
|
}
|
|
}
|
|
|
|
|
|
// Build a user copper list for per-scanline palette (SCB emulation).
|
|
// One WAIT + 16 MOVEs per displayed scanline + one CEND. The list is
|
|
// stored in gNewUCL until installCopperList swaps it onto the screen.
|
|
// DyOffset tells us where display line 0 sits in hardware coordinates
|
|
// so the WAITs line up with the real visible region regardless of
|
|
// PAL/NTSC or any overscan the user may have configured.
|
|
static void buildCopperList(const SurfaceT *src) {
|
|
struct UCopList *ucl;
|
|
UWORD line;
|
|
UWORD col;
|
|
UBYTE palIdx;
|
|
UWORD prevPalIdx;
|
|
UWORD vpos;
|
|
UWORD topBorder;
|
|
|
|
ucl = (struct UCopList *)AllocMem(sizeof(struct UCopList),
|
|
MEMF_PUBLIC | MEMF_CLEAR);
|
|
if (ucl == NULL) {
|
|
gNewUCL = NULL;
|
|
return;
|
|
}
|
|
|
|
// Worst-case reservation is one band-change per scanline (16 MOVEs
|
|
// + 1 WAIT per change), plus the terminal wait. For realistic SCB
|
|
// tables the actual count is far smaller, but CINIT only takes a
|
|
// single number so we size for the cap.
|
|
CINIT(ucl, (SURFACE_HEIGHT * 17) + 1);
|
|
|
|
// Hardware scanline where display line 0 lives. 0x2C is the
|
|
// standard top border for a PAL screen at TopEdge=0; we hardcode
|
|
// rather than reading ViewPort.DyOffset because DyOffset is a
|
|
// signed +/- adjustment around the standard value, not the
|
|
// absolute hardware line.
|
|
// User-copper vpos values are DISPLAY-RELATIVE -- graphics.lib
|
|
// MrgCop adds the active View's DyOffset to each WAIT before
|
|
// emitting, so a vp=0 user WAIT lands at beam line DyOffset,
|
|
// which is where Intuition places display line 0. Emitting at
|
|
// vpos=line keeps merged vpos under 256 even for the last band
|
|
// (175 + 44 = 219 < 256), avoiding MrgCop's destructive wrap-
|
|
// handling path that would otherwise disable bitplane DMA at
|
|
// the viewport end.
|
|
topBorder = 0;
|
|
prevPalIdx = 0xFFFF;
|
|
|
|
for (line = 0; line < SURFACE_HEIGHT; line++) {
|
|
palIdx = src->scb[line];
|
|
if (palIdx >= SURFACE_PALETTE_COUNT) {
|
|
palIdx = 0;
|
|
}
|
|
if ((UWORD)palIdx == prevPalIdx) {
|
|
continue;
|
|
}
|
|
|
|
vpos = (UWORD)(line + topBorder);
|
|
CWAIT(ucl, vpos, 0);
|
|
for (col = 0; col < SURFACE_COLORS_PER_PALETTE; col++) {
|
|
CMOVE(ucl, custom.color[col], src->palette[palIdx][col]);
|
|
}
|
|
prevPalIdx = (UWORD)palIdx;
|
|
}
|
|
CEND(ucl);
|
|
|
|
gNewUCL = ucl;
|
|
}
|
|
|
|
|
|
// Swap the freshly built user copper list onto the screen's ViewPort
|
|
// and force a full graphics-library recomputation of the hardware
|
|
// copper list. MakeScreen regenerates the viewport copper to include
|
|
// our UCopIns; MrgCop merges every viewport's copper into one hardware
|
|
// list; LoadView swaps the live copper pointers. Calling the graphics
|
|
// primitives directly (rather than only Intuition's RethinkDisplay /
|
|
// RemakeDisplay) was observed here to be the step that actually makes
|
|
// the user copper list visible -- Intuition's wrappers sometimes
|
|
// skipped the merge.
|
|
static void installCopperList(void) {
|
|
struct View *view;
|
|
|
|
if (gNewUCL == NULL || gScreen == NULL) {
|
|
return;
|
|
}
|
|
Forbid();
|
|
if (gScreen->ViewPort.UCopIns != NULL) {
|
|
FreeVPortCopLists(&gScreen->ViewPort);
|
|
}
|
|
gScreen->ViewPort.UCopIns = gNewUCL;
|
|
gNewUCL = NULL;
|
|
Permit();
|
|
|
|
MakeScreen(gScreen);
|
|
|
|
view = ViewAddress();
|
|
Forbid();
|
|
MrgCop(view);
|
|
LoadView(view);
|
|
Permit();
|
|
WaitTOF();
|
|
}
|
|
|
|
|
|
// Diagnostic: dump the merged hardware copper list (LOFCprList) to a
|
|
// text file on the current volume. Written once per halPresent after
|
|
// MrgCop, so the host can inspect exactly what the copper is being
|
|
// asked to execute. Each line is either MOVE (destination offset +
|
|
// data) or WAIT (vp, hp, mask). The dump stops at the first "end of
|
|
// copper" marker (0xFFFF + any mask with bit15 clear would be a wait
|
|
// past frame end).
|
|
static void dumpCopperList(void) {
|
|
FILE *fp;
|
|
struct View *view;
|
|
struct cprlist *cl;
|
|
UWORD *p;
|
|
WORD i;
|
|
WORD count;
|
|
UWORD w1;
|
|
UWORD w2;
|
|
|
|
fp = fopen("copper.txt", "w");
|
|
if (fp == NULL) {
|
|
return;
|
|
}
|
|
|
|
view = ViewAddress();
|
|
if (view == NULL) {
|
|
fprintf(fp, "view is NULL\n");
|
|
fclose(fp);
|
|
return;
|
|
}
|
|
|
|
cl = view->LOFCprList;
|
|
if (cl == NULL) {
|
|
fprintf(fp, "LOFCprList is NULL\n");
|
|
fclose(fp);
|
|
return;
|
|
}
|
|
|
|
p = cl->start;
|
|
count = cl->MaxCount;
|
|
fprintf(fp, "LOFCprList.start=0x%08lx MaxCount=%d\n",
|
|
(unsigned long)p, (int)count);
|
|
fprintf(fp, "vp.DyOffset=%d vp.DxOffset=%d\n",
|
|
(int)gScreen->ViewPort.DyOffset,
|
|
(int)gScreen->ViewPort.DxOffset);
|
|
fprintf(fp, "view.DyOffset=%d view.DxOffset=%d\n",
|
|
(int)view->DyOffset, (int)view->DxOffset);
|
|
fprintf(fp, "--\n");
|
|
|
|
if (p == NULL) {
|
|
fclose(fp);
|
|
return;
|
|
}
|
|
|
|
for (i = 0; i < count; i++) {
|
|
w1 = p[i * 2];
|
|
w2 = p[i * 2 + 1];
|
|
if (w1 == 0xFFFF && w2 == 0xFFFE) {
|
|
fprintf(fp, "%4d: END %04x %04x\n", (int)i, w1, w2);
|
|
break;
|
|
}
|
|
if (w1 & 1) {
|
|
fprintf(fp, "%4d: %s vp=%3d hp=%3d mask=%04x\n",
|
|
(int)i,
|
|
(w2 & 0x8000) ? "SKIP" : "WAIT",
|
|
(int)(w1 >> 8),
|
|
(int)(w1 & 0xFE),
|
|
(unsigned)w2);
|
|
} else {
|
|
fprintf(fp, "%4d: MOVE dst=%03x data=%04x\n",
|
|
(int)i,
|
|
(unsigned)(w1 & 0x1FE),
|
|
(unsigned)w2);
|
|
}
|
|
}
|
|
|
|
fclose(fp);
|
|
}
|
|
|
|
|
|
// Returns true if the SCB table or palette RGB values differ from the
|
|
// last presented frame, or if no frame has been presented yet.
|
|
static bool paletteOrScbChanged(const SurfaceT *src) {
|
|
if (!gCacheValid) {
|
|
return true;
|
|
}
|
|
if (memcmp(gCachedScb, src->scb, sizeof(gCachedScb)) != 0) {
|
|
return true;
|
|
}
|
|
if (memcmp(gCachedPalette, src->palette, sizeof(gCachedPalette)) != 0) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
// Rebuild and install the user copper list only if the palette/SCB
|
|
// state visible to the display differs from what the surface carries
|
|
// now. On clean frames we skip the AllocMem + MrgCop + LoadView +
|
|
// WaitTOF chain entirely.
|
|
static void updateCopperIfNeeded(const SurfaceT *src) {
|
|
if (!paletteOrScbChanged(src)) {
|
|
return;
|
|
}
|
|
uploadFirstBandPalette(src);
|
|
buildCopperList(src);
|
|
installCopperList();
|
|
memcpy(gCachedScb, src->scb, sizeof(gCachedScb));
|
|
memcpy(gCachedPalette, src->palette, sizeof(gCachedPalette));
|
|
gCacheValid = true;
|
|
}
|
|
|
|
|
|
// Load the first band's palette into the screen's ColorMap so the
|
|
// Intuition-generated frame-start copper writes those values on each
|
|
// frame. This acts as a safety net: even if our user copper list does
|
|
// not fire (or fires late) for the very first band, the top of the
|
|
// display still shows the correct colors because Intuition's own
|
|
// COLORxx loads happen before any user copper instruction.
|
|
static void uploadFirstBandPalette(const SurfaceT *src) {
|
|
UWORD aPalette[SURFACE_COLORS_PER_PALETTE];
|
|
UWORD i;
|
|
UBYTE palIdx;
|
|
|
|
palIdx = src->scb[0];
|
|
if (palIdx >= SURFACE_PALETTE_COUNT) {
|
|
palIdx = 0;
|
|
}
|
|
for (i = 0; i < SURFACE_COLORS_PER_PALETTE; i++) {
|
|
aPalette[i] = (UWORD)src->palette[palIdx][i];
|
|
}
|
|
LoadRGB4(&gScreen->ViewPort, aPalette, SURFACE_COLORS_PER_PALETTE);
|
|
}
|
|
|
|
|
|
// ----- HAL API (alphabetical) -----
|
|
|
|
bool halInit(const JoeyConfigT *config) {
|
|
uint16_t i;
|
|
|
|
(void)config;
|
|
|
|
// SA_DisplayID pins us to OCS PAL low-res so Intuition opens a
|
|
// real planar screen rather than an RTG substitute.
|
|
gScreen = OpenScreenTags(NULL,
|
|
(ULONG)SA_Width, (ULONG)SURFACE_WIDTH,
|
|
(ULONG)SA_Height, (ULONG)SURFACE_HEIGHT,
|
|
(ULONG)SA_Depth, (ULONG)AMIGA_BITPLANES,
|
|
(ULONG)SA_DisplayID, (ULONG)(PAL_MONITOR_ID | LORES_KEY),
|
|
(ULONG)SA_DetailPen, (ULONG)0,
|
|
(ULONG)SA_BlockPen, (ULONG)1,
|
|
(ULONG)SA_Title, (ULONG)"JoeyLib",
|
|
(ULONG)SA_Type, (ULONG)CUSTOMSCREEN,
|
|
(ULONG)SA_Quiet, (ULONG)TRUE,
|
|
TAG_DONE);
|
|
|
|
if (gScreen == NULL) {
|
|
return false;
|
|
}
|
|
gBitMap = gScreen->RastPort.BitMap;
|
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
|
gPlanes[i] = gBitMap->Planes[i];
|
|
if (gPlanes[i] == NULL) {
|
|
CloseScreen(gScreen);
|
|
gScreen = NULL;
|
|
return false;
|
|
}
|
|
}
|
|
// Force COLOR00 to black so the overscan/border region around the
|
|
// 320x200 display is black until the app's palette load takes over
|
|
// on the first stagePresent. Apps that paint a non-black bg need
|
|
// do nothing -- their palette[0] writes the same COLOR00 once the
|
|
// first LoadRGB4 fires from uploadScbAndPalette.
|
|
SetRGB4(&gScreen->ViewPort, 0, 0, 0, 0);
|
|
installVblServer();
|
|
return true;
|
|
}
|
|
|
|
|
|
const char *halLastError(void) {
|
|
return NULL;
|
|
}
|
|
|
|
|
|
void halPresent(const SurfaceT *src) {
|
|
int16_t y;
|
|
uint8_t minWord;
|
|
uint8_t maxWord;
|
|
uint16_t byteStart;
|
|
uint16_t byteEnd;
|
|
|
|
if (src == NULL || gScreen == NULL) {
|
|
return;
|
|
}
|
|
updateCopperIfNeeded(src);
|
|
|
|
// Walk per-row dirty bands: each planar byte covers 8 px = 2 chunky
|
|
// words, so byteStart = minWord/2 and byteEnd = maxWord/2 + 1
|
|
// converts dirty-word units to the planar-byte units c2pRange wants.
|
|
for (y = 0; y < SURFACE_HEIGHT; y++) {
|
|
minWord = gStageMinWord[y];
|
|
maxWord = gStageMaxWord[y];
|
|
if (minWord > maxWord) {
|
|
continue;
|
|
}
|
|
byteStart = (uint16_t)(minWord >> 1);
|
|
byteEnd = (uint16_t)((maxWord >> 1) + 1);
|
|
c2pRange(src, y, (int16_t)(y + 1), byteStart, byteEnd);
|
|
}
|
|
}
|
|
|
|
|
|
void halPresentRect(const SurfaceT *src, int16_t x, int16_t y, uint16_t w, uint16_t h) {
|
|
uint16_t byteStart;
|
|
uint16_t byteEnd;
|
|
|
|
if (src == NULL || gScreen == NULL) {
|
|
return;
|
|
}
|
|
updateCopperIfNeeded(src);
|
|
// Each planar byte covers 8 horizontal pixels. Round dirty pixel
|
|
// range to the enclosing planar-byte range so we never miss an
|
|
// edge pixel while still honoring the rect width.
|
|
byteStart = (uint16_t)(x >> 3);
|
|
byteEnd = (uint16_t)(((uint16_t)x + w + 7) >> 3);
|
|
if (byteEnd > AMIGA_BYTES_PER_ROW) {
|
|
byteEnd = AMIGA_BYTES_PER_ROW;
|
|
}
|
|
c2pRange(src, y, y + (int16_t)h, byteStart, byteEnd);
|
|
}
|
|
|
|
|
|
// WaitTOF() blocks the calling task until the next "top of frame"
|
|
// VBlank interrupt -- 50 Hz on PAL, 60 Hz on NTSC. graphics.library
|
|
// is auto-opened by libnix so no extra plumbing is needed.
|
|
void halWaitVBL(void) {
|
|
WaitTOF();
|
|
}
|
|
|
|
|
|
// Frame counter via a VBL interrupt server (AddIntServer on
|
|
// INTB_VERTB). Polling VPOSR/VHPOSR is unreliable across chipsets
|
|
// and OS versions -- the bit positions vary and the polling rate has
|
|
// to win against the high-bit window per frame, which it doesn't
|
|
// when the caller's loop body is long. The interrupt server fires
|
|
// exactly once per VBlank regardless of caller cadence.
|
|
//
|
|
// halFrameCount just reads the volatile counter -- no edge detection
|
|
// needed in the polling path.
|
|
|
|
static volatile uint16_t gFrameCount = 0;
|
|
static struct Interrupt gVblIntServer;
|
|
static bool gVblInstalled = false;
|
|
|
|
// Server protocol: called by the interrupt dispatcher with A1 =
|
|
// is_Data, A6 = ExecBase. __saveds gives us the libnix data base in
|
|
// A4 so we can reference gFrameCount through the small-data ABI.
|
|
// Return Z=0 (non-zero result) to keep the chain going so other
|
|
// VBL servers further down the priority list still fire.
|
|
static __saveds ULONG vblServer(void) {
|
|
gFrameCount = (uint16_t)(gFrameCount + 1u);
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void installVblServer(void) {
|
|
if (gVblInstalled) {
|
|
return;
|
|
}
|
|
gVblIntServer.is_Node.ln_Type = NT_INTERRUPT;
|
|
gVblIntServer.is_Node.ln_Pri = -60;
|
|
gVblIntServer.is_Node.ln_Name = (char *)"joeyFrameCount";
|
|
gVblIntServer.is_Data = NULL;
|
|
gVblIntServer.is_Code = (void (*)())vblServer;
|
|
AddIntServer(INTB_VERTB, &gVblIntServer);
|
|
gVblInstalled = true;
|
|
}
|
|
|
|
|
|
static void removeVblServer(void) {
|
|
if (!gVblInstalled) {
|
|
return;
|
|
}
|
|
RemIntServer(INTB_VERTB, &gVblIntServer);
|
|
gVblInstalled = false;
|
|
}
|
|
|
|
|
|
uint16_t halFrameCount(void) {
|
|
return gFrameCount;
|
|
}
|
|
|
|
|
|
uint16_t halFrameHz(void) {
|
|
/* PAL by default. The toolchain doesn't currently switch modes
|
|
* at runtime; if we ever expose NTSC this returns 60. */
|
|
return 50u;
|
|
}
|
|
|
|
|
|
void halShutdown(void) {
|
|
// Tear down the VBL server before closing the screen so the
|
|
// interrupt chain is clean if anything else is watching.
|
|
removeVblServer();
|
|
if (gScreen != NULL) {
|
|
// CloseScreen should free attached UCopList, but be explicit
|
|
// to catch any case where the screen close path skips it.
|
|
Forbid();
|
|
if (gScreen->ViewPort.UCopIns != NULL) {
|
|
FreeVPortCopLists(&gScreen->ViewPort);
|
|
}
|
|
Permit();
|
|
CloseScreen(gScreen);
|
|
gScreen = NULL;
|
|
gBitMap = NULL;
|
|
}
|
|
if (gNewUCL != NULL) {
|
|
FreeMem(gNewUCL, sizeof(struct UCopList));
|
|
gNewUCL = NULL;
|
|
}
|
|
}
|
|
|
|
|
|
// Shared 68k fast paths for the chunky surface buffer (src/shared68k/
|
|
// surface68k.s). Same primitives used by the Atari ST port -- the
|
|
// stage / surfaces are an identical 4bpp packed layout on both.
|
|
extern void surface68kClearLong(uint8_t *pixels, uint16_t fillByte);
|
|
extern void surface68kFillRectFull(uint8_t *pixels, int16_t y, uint16_t h, uint16_t fillByte);
|
|
extern void surface68kFillRectByteAligned(uint8_t *rowFirst, uint16_t midBytes, uint16_t h, uint16_t fillByte);
|
|
|
|
|
|
bool halFastSurfaceClear(SurfaceT *s, uint8_t doubled) {
|
|
if (s != stageGet()) {
|
|
return false;
|
|
}
|
|
surface68kClearLong(s->pixels, (uint16_t)doubled);
|
|
return true;
|
|
}
|
|
|
|
|
|
// Fast path bands:
|
|
// - x == 0 && w == SURFACE_WIDTH (full row): one move.l-stream per
|
|
// row via surface68kFillRectFull. No nibble fixups needed -- both
|
|
// nibbles in every byte get the same value, and rowFirst is the
|
|
// surface base which is always word-aligned by calloc.
|
|
// - x % 4 == 0 && w even (byte-aligned AND word-aligned): inner
|
|
// bytes via the asm. The (x % 4 == 0) part is the 68000 alignment
|
|
// requirement for the move.l writes inside the asm -- byte index
|
|
// = x/2, so x must be a multiple of 4 for the byte index to be
|
|
// even.
|
|
// - everything else: fall through to C's fillRectClipped, which
|
|
// does per-byte writes (no alignment needed) and handles the
|
|
// leading / trailing nibble RMW correctly.
|
|
bool halFastFillRect(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t colorIndex) {
|
|
uint8_t doubled;
|
|
|
|
if (s != stageGet()) {
|
|
return false;
|
|
}
|
|
if (h == 0u || w == 0u) {
|
|
return true; /* clipped-out: nothing to do, but we "handled" it */
|
|
}
|
|
doubled = (uint8_t)(((colorIndex & 0x0Fu) << 4) | (colorIndex & 0x0Fu));
|
|
|
|
if (x == 0 && w == (uint16_t)SURFACE_WIDTH) {
|
|
surface68kFillRectFull(s->pixels, y, h, (uint16_t)doubled);
|
|
return true;
|
|
}
|
|
if (((x & 3) == 0) && ((w & 1u) == 0u)) {
|
|
uint8_t *rowFirst = &s->pixels[(uint16_t)y * (uint16_t)SURFACE_BYTES_PER_ROW + ((uint16_t)x >> 1)];
|
|
surface68kFillRectByteAligned(rowFirst, w >> 1, h, (uint16_t)doubled);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
|
|
bool halFastTileCopy(uint8_t *dstRow0, const uint8_t *srcRow0) {
|
|
(void)dstRow0;
|
|
(void)srcRow0;
|
|
return false;
|
|
}
|
|
|
|
|
|
bool halFastTileCopyMasked(uint8_t *dstRow0, const uint8_t *srcRow0, uint8_t transparent) {
|
|
(void)dstRow0;
|
|
(void)srcRow0;
|
|
(void)transparent;
|
|
return false;
|
|
}
|
|
|
|
|
|
bool halFastTilePaste(uint8_t *dstRow0, const uint8_t *srcTilePixels) {
|
|
(void)dstRow0;
|
|
(void)srcTilePixels;
|
|
return false;
|
|
}
|
|
|
|
|
|
bool halFastTileSnap(uint8_t *dstTilePixels, const uint8_t *srcRow0) {
|
|
(void)dstTilePixels;
|
|
(void)srcRow0;
|
|
return false;
|
|
}
|
|
|
|
|
|
bool halFastDrawPixel(SurfaceT *s, uint16_t x, uint16_t y, uint8_t colorIndex) {
|
|
uint8_t nibLo;
|
|
if (s != stageGet()) {
|
|
return false;
|
|
}
|
|
nibLo = (uint8_t)(colorIndex & 0x0Fu);
|
|
draw68kPlotPixel(s->pixels, (int16_t)x, (int16_t)y, nibLo, (uint8_t)(nibLo << 4));
|
|
return true;
|
|
}
|
|
|
|
|
|
bool halFastDrawLine(SurfaceT *s, int16_t x0, int16_t y0, int16_t x1, int16_t y1, uint8_t colorIndex) {
|
|
if (s != stageGet()) {
|
|
return false;
|
|
}
|
|
draw68kLine(s->pixels, x0, y0, x1, y1, colorIndex);
|
|
return true;
|
|
}
|
|
|
|
|
|
bool halFastDrawCircle(SurfaceT *s, int16_t cx, int16_t cy, uint16_t r, uint8_t colorIndex) {
|
|
if (s != stageGet()) {
|
|
return false;
|
|
}
|
|
draw68kCircleOutline(s->pixels, cx, cy, r, colorIndex);
|
|
return true;
|
|
}
|
|
|
|
|
|
bool halFastFillCircle(SurfaceT *s, int16_t cx, int16_t cy, uint16_t r, uint8_t colorIndex) {
|
|
if (s != stageGet()) {
|
|
return false;
|
|
}
|
|
draw68kCircleFill(s->pixels, cx, cy, r, colorIndex);
|
|
return true;
|
|
}
|
|
|
|
|
|
bool halFastFloodWalk(uint8_t *row, int16_t startX, uint8_t matchColor, uint8_t newColor, bool matchEqual, bool *seedMatched, int16_t *leftXOut, int16_t *rightXOut) {
|
|
(void)row;
|
|
(void)startX;
|
|
(void)matchColor;
|
|
(void)newColor;
|
|
(void)matchEqual;
|
|
(void)seedMatched;
|
|
(void)leftXOut;
|
|
(void)rightXOut;
|
|
return false;
|
|
}
|
|
|
|
|
|
bool halFastFloodScanRow(uint8_t *row, int16_t leftX, int16_t rightX, uint8_t matchColor, uint8_t newColor, bool matchEqual, uint8_t *markBuf) {
|
|
(void)row;
|
|
(void)leftX;
|
|
(void)rightX;
|
|
(void)matchColor;
|
|
(void)newColor;
|
|
(void)matchEqual;
|
|
(void)markBuf;
|
|
return false;
|
|
}
|
|
|
|
|
|
bool halFastBlitRect(uint8_t *dstRow0, int16_t dstX, const uint8_t *srcRow0, int16_t srcX, int16_t copyW, int16_t copyH, int16_t srcRowBytes, uint16_t transparent) {
|
|
(void)dstRow0;
|
|
(void)dstX;
|
|
(void)srcRow0;
|
|
(void)srcX;
|
|
(void)copyW;
|
|
(void)copyH;
|
|
(void)srcRowBytes;
|
|
(void)transparent;
|
|
return false;
|
|
}
|
|
|
|
|
|
bool halFastFloodScanAndPush(uint8_t *row, int16_t leftX, int16_t rightX, uint8_t matchColor, uint8_t newColor, bool matchEqual, int16_t scanY, int16_t *stackX, int16_t *stackY, int16_t *spInOut, int16_t maxSp) {
|
|
(void)row;
|
|
(void)leftX;
|
|
(void)rightX;
|
|
(void)matchColor;
|
|
(void)newColor;
|
|
(void)matchEqual;
|
|
(void)scanY;
|
|
(void)stackX;
|
|
(void)stackY;
|
|
(void)spInOut;
|
|
(void)maxSp;
|
|
return false;
|
|
}
|
|
|
|
|
|
bool halFastFloodWalkAndScans(uint8_t *pixels, int16_t x, int16_t y, uint8_t matchColor, uint8_t newColor, bool matchEqual, int16_t *stackX, int16_t *stackY, int16_t *spInOut, int16_t maxSp, bool *seedMatched, int16_t *leftXOut, int16_t *rightXOut) {
|
|
(void)pixels;
|
|
(void)x;
|
|
(void)y;
|
|
(void)matchColor;
|
|
(void)newColor;
|
|
(void)matchEqual;
|
|
(void)stackX;
|
|
(void)stackY;
|
|
(void)spInOut;
|
|
(void)maxSp;
|
|
(void)seedMatched;
|
|
(void)leftXOut;
|
|
(void)rightXOut;
|
|
return false;
|
|
}
|
|
|
|
|
|
bool halFastTileFill(SurfaceT *s, uint8_t bx, uint8_t by, uint16_t fillWord) {
|
|
(void)s;
|
|
(void)bx;
|
|
(void)by;
|
|
(void)fillWord;
|
|
return false;
|
|
}
|
|
|
|
|
|
uint8_t *halStageAllocPixels(void) {
|
|
return (uint8_t *)malloc(SURFACE_PIXELS_SIZE);
|
|
}
|
|
|
|
|
|
void halStageFreePixels(uint8_t *pixels) {
|
|
free(pixels);
|
|
}
|