Amiga parity with IIgs!
This commit is contained in:
parent
6c03d93e88
commit
b1e24b4650
37 changed files with 4312 additions and 493 deletions
|
|
@ -171,11 +171,11 @@ int main(void) {
|
||||||
|
|
||||||
if (flashFrames > 0) {
|
if (flashFrames > 0) {
|
||||||
fillRect(screen, BAR_X, BAR_Y, BAR_W, BAR_H, COLOR_BAR);
|
fillRect(screen, BAR_X, BAR_Y, BAR_W, BAR_H, COLOR_BAR);
|
||||||
stagePresentRect(BAR_X, BAR_Y, BAR_W, BAR_H);
|
stagePresent();
|
||||||
flashFrames--;
|
flashFrames--;
|
||||||
if (flashFrames == 0) {
|
if (flashFrames == 0) {
|
||||||
fillRect(screen, BAR_X, BAR_Y, BAR_W, BAR_H, COLOR_HINT);
|
fillRect(screen, BAR_X, BAR_Y, BAR_W, BAR_H, COLOR_HINT);
|
||||||
stagePresentRect(BAR_X, BAR_Y, BAR_W, BAR_H);
|
stagePresent();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -80,8 +80,10 @@ static void buildPalette(SurfaceT *screen) {
|
||||||
|
|
||||||
|
|
||||||
static void drawAndPresent(SurfaceT *screen, int16_t x, int16_t y, int16_t w, int16_t h, uint8_t color) {
|
static void drawAndPresent(SurfaceT *screen, int16_t x, int16_t y, int16_t w, int16_t h, uint8_t color) {
|
||||||
|
/* fillRect marks the rect dirty; stagePresent flushes only that
|
||||||
|
* dirty band. */
|
||||||
fillRect(screen, x, y, (uint16_t)w, (uint16_t)h, color);
|
fillRect(screen, x, y, (uint16_t)w, (uint16_t)h, color);
|
||||||
stagePresentRect(x, y, (uint16_t)w, (uint16_t)h);
|
stagePresent();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -158,8 +158,6 @@ static void presentChangedCells(SurfaceT *screen, int16_t cursorCol, int16_t cur
|
||||||
int16_t row;
|
int16_t row;
|
||||||
JoeyKeyE key;
|
JoeyKeyE key;
|
||||||
bool lit;
|
bool lit;
|
||||||
int16_t x;
|
|
||||||
int16_t y;
|
|
||||||
|
|
||||||
for (row = 0; row < GRID_ROWS; row++) {
|
for (row = 0; row < GRID_ROWS; row++) {
|
||||||
for (col = 0; col < GRID_COLS; col++) {
|
for (col = 0; col < GRID_COLS; col++) {
|
||||||
|
|
@ -171,10 +169,10 @@ static void presentChangedCells(SurfaceT *screen, int16_t cursorCol, int16_t cur
|
||||||
if (lit == gCellLit[row][col]) {
|
if (lit == gCellLit[row][col]) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
/* drawCell marks the cell's rect dirty; stagePresent
|
||||||
|
* flushes that one band. */
|
||||||
drawCell(screen, col, row, lit);
|
drawCell(screen, col, row, lit);
|
||||||
x = (int16_t)(MARGIN_X + col * (CELL_W + GAP));
|
stagePresent();
|
||||||
y = (int16_t)(MARGIN_Y + row * (CELL_H + GAP));
|
|
||||||
stagePresentRect(x, y, CELL_W, CELL_H);
|
|
||||||
gCellLit[row][col] = lit;
|
gCellLit[row][col] = lit;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -195,19 +193,16 @@ static void updateCursor(SurfaceT *screen, int16_t cursorCol, int16_t cursorRow)
|
||||||
if (gLastCursorX != mouseX || gLastCursorY != mouseY) {
|
if (gLastCursorX != mouseX || gLastCursorY != mouseY) {
|
||||||
if (gLastCursorCol != CELL_NONE) {
|
if (gLastCursorCol != CELL_NONE) {
|
||||||
drawCell(screen, gLastCursorCol, gLastCursorRow, gCellLit[gLastCursorRow][gLastCursorCol]);
|
drawCell(screen, gLastCursorCol, gLastCursorRow, gCellLit[gLastCursorRow][gLastCursorCol]);
|
||||||
stagePresentRect(
|
|
||||||
(int16_t)(MARGIN_X + gLastCursorCol * (CELL_W + GAP)),
|
|
||||||
(int16_t)(MARGIN_Y + gLastCursorRow * (CELL_H + GAP)),
|
|
||||||
CELL_W, CELL_H);
|
|
||||||
} else if (gLastCursorX >= 0 && gLastCursorY >= 0) {
|
} else if (gLastCursorX >= 0 && gLastCursorY >= 0) {
|
||||||
// Old cursor was in a gap region. Stamp background over it.
|
// Old cursor was in a gap region. Stamp background over it.
|
||||||
fillRect(screen, gLastCursorX, gLastCursorY, CURSOR_W, CURSOR_H, COLOR_BACKGROUND);
|
fillRect(screen, gLastCursorX, gLastCursorY, CURSOR_W, CURSOR_H, COLOR_BACKGROUND);
|
||||||
stagePresentRect(gLastCursorX, gLastCursorY, CURSOR_W, CURSOR_H);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
drawCursor(screen, mouseX, mouseY);
|
drawCursor(screen, mouseX, mouseY);
|
||||||
stagePresentRect(mouseX, mouseY, CURSOR_W, CURSOR_H);
|
/* All draw calls above marked their rects dirty; one stagePresent
|
||||||
|
* flushes the union (cursor erase + cursor draw). */
|
||||||
|
stagePresent();
|
||||||
|
|
||||||
gLastCursorX = mouseX;
|
gLastCursorX = mouseX;
|
||||||
gLastCursorY = mouseY;
|
gLastCursorY = mouseY;
|
||||||
|
|
|
||||||
|
|
@ -15,11 +15,11 @@
|
||||||
#define BALL_TILES_Y (BALL_H / 8)
|
#define BALL_TILES_Y (BALL_H / 8)
|
||||||
|
|
||||||
#define BALL_TILE_BYTES (BALL_TILES_X * BALL_TILES_Y * TILE_BYTES)
|
#define BALL_TILE_BYTES (BALL_TILES_X * BALL_TILES_Y * TILE_BYTES)
|
||||||
// SaveUnder must store rounded-up byte boundaries: x rounded down to
|
// SaveUnder rounds x down to the platform's storage alignment: 2 px
|
||||||
// even, width rounded up to even. Worst case for BALL_W=16 (already
|
// for chunky 4bpp (1 extra byte/row worst case), 8 px for planar
|
||||||
// even) is 8 bytes per row + alignment slack of 1 byte; size for the
|
// 4-plane (4 extra bytes/row worst case -- one per plane). The +4
|
||||||
// pessimistic case so the buffer never overflows.
|
// covers the planar case and is a no-op overhead on chunky.
|
||||||
#define BALL_BACKUP_BYTES (((BALL_W + 2) >> 1) * BALL_H)
|
#define BALL_BACKUP_BYTES (((BALL_W >> 1) + 4) * BALL_H)
|
||||||
|
|
||||||
#define BALL_PALETTE_IDX 0
|
#define BALL_PALETTE_IDX 0
|
||||||
|
|
||||||
|
|
@ -100,18 +100,14 @@ int main(void) {
|
||||||
int16_t y;
|
int16_t y;
|
||||||
int16_t vx;
|
int16_t vx;
|
||||||
int16_t vy;
|
int16_t vy;
|
||||||
int16_t oldX;
|
|
||||||
int16_t oldY;
|
|
||||||
uint16_t oldW;
|
|
||||||
uint16_t oldH;
|
|
||||||
int16_t unionX;
|
|
||||||
int16_t unionY;
|
|
||||||
int16_t unionRight;
|
|
||||||
int16_t unionBottom;
|
|
||||||
bool haveBackup;
|
bool haveBackup;
|
||||||
|
|
||||||
config.hostMode = HOST_MODE_TAKEOVER;
|
config.hostMode = HOST_MODE_TAKEOVER;
|
||||||
config.codegenBytes = 8 * 1024;
|
/* Amiga planar emits 8 pre-shifted DRAW variants per sprite (one
|
||||||
|
* per x % 8 alignment) so the codegen arena needs roughly 8x what
|
||||||
|
* the chunky two-shift case asks for. 32 KB fits a 16x16 ball
|
||||||
|
* with all variants. */
|
||||||
|
config.codegenBytes = 32UL * 1024;
|
||||||
config.maxSurfaces = 4;
|
config.maxSurfaces = 4;
|
||||||
config.audioBytes = 64UL * 1024;
|
config.audioBytes = 64UL * 1024;
|
||||||
config.assetBytes = 128UL * 1024;
|
config.assetBytes = 128UL * 1024;
|
||||||
|
|
@ -155,7 +151,7 @@ int main(void) {
|
||||||
haveBackup = false;
|
haveBackup = false;
|
||||||
|
|
||||||
spriteSaveAndDraw(screen, ball, x, y, &backup);
|
spriteSaveAndDraw(screen, ball, x, y, &backup);
|
||||||
stagePresentRect(backup.x, backup.y, backup.width, backup.height);
|
stagePresent();
|
||||||
haveBackup = true;
|
haveBackup = true;
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
|
@ -164,19 +160,15 @@ int main(void) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Stash the prior ball's region before restoring the bytes
|
// Do all off-screen work first (restore + move + draw), then
|
||||||
// under it. Do all off-screen work (restore + move + draw)
|
// ONE stagePresent flushes the union of dirty bands set by
|
||||||
// first, then waitVBL + ONE stagePresentRect covering both
|
// restoreUnder + draw. Add a joeyWaitVBL() before the present
|
||||||
// old and new regions. Putting waitVBL immediately before the
|
// to land it inside the VBL window so the CRT never sees a
|
||||||
// present lets the present land inside the VBL window so the
|
// half-updated framebuffer (matters most on single-buffered
|
||||||
// CRT never sees a half-updated framebuffer (matters most on
|
// chunky targets like IIgs SHR; on planar c2p platforms it
|
||||||
// single-buffered chunky targets like IIgs SHR; on planar
|
// also avoids c2p racing the raster). VBL wait is omitted
|
||||||
// c2p platforms it also avoids c2p racing the raster).
|
// here so the demo runs at the sprite pipeline's native
|
||||||
oldX = backup.x;
|
// throughput -- expect tearing on the ball.
|
||||||
oldY = backup.y;
|
|
||||||
oldW = backup.width;
|
|
||||||
oldH = backup.height;
|
|
||||||
|
|
||||||
if (haveBackup) {
|
if (haveBackup) {
|
||||||
spriteRestoreUnder(screen, &backup);
|
spriteRestoreUnder(screen, &backup);
|
||||||
}
|
}
|
||||||
|
|
@ -190,27 +182,7 @@ int main(void) {
|
||||||
|
|
||||||
spriteSaveAndDraw(screen, ball, x, y, &backup);
|
spriteSaveAndDraw(screen, ball, x, y, &backup);
|
||||||
|
|
||||||
// Bounding box of (old rect) U (new rect). For typical
|
stagePresent();
|
||||||
// small-step motion the rects overlap heavily so the union
|
|
||||||
// is barely larger than one ball.
|
|
||||||
unionX = (oldX < backup.x) ? oldX : backup.x;
|
|
||||||
unionY = (oldY < backup.y) ? oldY : backup.y;
|
|
||||||
unionRight = (int16_t)((oldX + oldW > backup.x + backup.width)
|
|
||||||
? (oldX + oldW)
|
|
||||||
: (backup.x + backup.width));
|
|
||||||
unionBottom = (int16_t)((oldY + oldH > backup.y + backup.height)
|
|
||||||
? (oldY + oldH)
|
|
||||||
: (backup.y + backup.height));
|
|
||||||
|
|
||||||
// VBL wait removed -- the demo runs at the native compute speed
|
|
||||||
// of save+restore+draw+presentRect so we can SEE the sprite
|
|
||||||
// pipeline's actual throughput. Expect tearing on the ball
|
|
||||||
// since the present can land mid-scan; that's the cost of
|
|
||||||
// showing real frame rate. Add joeyWaitVBL() back here for
|
|
||||||
// tear-free 60 Hz motion.
|
|
||||||
stagePresentRect(unionX, unionY,
|
|
||||||
(uint16_t)(unionRight - unionX),
|
|
||||||
(uint16_t)(unionBottom - unionY));
|
|
||||||
haveBackup = true;
|
haveBackup = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -28,7 +28,16 @@
|
||||||
|
|
||||||
// 4-frame measurement window. Long enough that loop overhead doesn't
|
// 4-frame measurement window. Long enough that loop overhead doesn't
|
||||||
// dominate; short enough to keep the full demo run under ~10 sec.
|
// dominate; short enough to keep the full demo run under ~10 sec.
|
||||||
#define UBER_FRAMES 4u
|
/* 16 frames per timed op gives 4x the iter-count resolution of the
|
||||||
|
* earlier 4-frame budget. Exposes the actual per-op cost on slow
|
||||||
|
* ops where 4 frames produced the same iter count on different
|
||||||
|
* framerates -- e.g. drawCircle r=80 read as "4 iters / 4 frames"
|
||||||
|
* on both 60 Hz IIgs (16.7 ms/frame, 67 ms window) and 50 Hz Amiga
|
||||||
|
* (20 ms/frame, 80 ms window) even though per-op cost was equal,
|
||||||
|
* just because 4 ops at 16-17 ms happen to fit both windows. The
|
||||||
|
* 16-frame budget extends the windows to 267 ms / 320 ms; quantum
|
||||||
|
* gap shrinks to ~6%. Total run time scales 4x (~80 sec each). */
|
||||||
|
#define UBER_FRAMES 16u
|
||||||
|
|
||||||
|
|
||||||
typedef void (*OpFn)(void);
|
typedef void (*OpFn)(void);
|
||||||
|
|
@ -44,9 +53,10 @@ static TileT gTileScratch;
|
||||||
|
|
||||||
// Run `op` in a tight loop until `targetFrames` joeyFrameCount ticks
|
// Run `op` in a tight loop until `targetFrames` joeyFrameCount ticks
|
||||||
// have elapsed. Returns iterations completed.
|
// have elapsed. Returns iterations completed.
|
||||||
static unsigned long runForFrames(OpFn op, unsigned int targetFrames) {
|
static unsigned long runForFrames(OpFn op, unsigned int targetFrames, uint16_t *actualFramesOut) {
|
||||||
unsigned long count;
|
unsigned long count;
|
||||||
uint16_t startFrame;
|
uint16_t startFrame;
|
||||||
|
uint16_t endFrame;
|
||||||
|
|
||||||
count = 0UL;
|
count = 0UL;
|
||||||
|
|
||||||
|
|
@ -57,29 +67,50 @@ static unsigned long runForFrames(OpFn op, unsigned int targetFrames) {
|
||||||
op();
|
op();
|
||||||
count++;
|
count++;
|
||||||
}
|
}
|
||||||
|
/* Capture the actual elapsed frames -- the last iter typically
|
||||||
|
* overruns the target. Using actual instead of target as the
|
||||||
|
* ops/sec divisor stays honest for ops slower than 1 frame
|
||||||
|
* (where count is forced low while real time stretches well
|
||||||
|
* past targetFrames). */
|
||||||
|
endFrame = joeyFrameCount();
|
||||||
|
*actualFramesOut = (uint16_t)(endFrame - startFrame);
|
||||||
|
if (*actualFramesOut == 0u) {
|
||||||
|
*actualFramesOut = 1u; /* defensive: avoid div-by-zero */
|
||||||
|
}
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Time and log one op. Reports iters / N frames AND the derived
|
// Time and log one op. Reports iters / N frames AND the derived
|
||||||
// ops/sec so per-port results are directly comparable against IIgs
|
// ops/sec so per-port results are directly comparable against IIgs
|
||||||
// regardless of CPU speed or display refresh rate.
|
// regardless of CPU speed or display refresh rate. Also logs an
|
||||||
|
// FNV-1a hash of the surface state after timing -- this is the
|
||||||
|
// pixel-perfect comparison input for the cross-port validation
|
||||||
|
// harness (tools/diff-uber-hashes.py). Captured against IIgs as the
|
||||||
|
// golden reference; planar 68k rewrites validate by matching it.
|
||||||
static void timeOp(const char *name, OpFn op) {
|
static void timeOp(const char *name, OpFn op) {
|
||||||
unsigned long iters;
|
unsigned long iters;
|
||||||
unsigned long opsPerSec;
|
unsigned long opsPerSec;
|
||||||
|
uint16_t actualFrames;
|
||||||
|
uint32_t hash;
|
||||||
|
|
||||||
gCurName = name;
|
gCurName = name;
|
||||||
|
|
||||||
iters = runForFrames(op, UBER_FRAMES);
|
iters = runForFrames(op, UBER_FRAMES, &actualFrames);
|
||||||
|
|
||||||
if (iters == 0UL) {
|
if (iters == 0UL) {
|
||||||
joeyLogF("UBER: %s: 0 iters (op too slow?)\n", name);
|
joeyLogF("UBER: %s: 0 iters (op too slow?)\n", name);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
opsPerSec = (iters * (unsigned long)joeyFrameHz()) / (unsigned long)UBER_FRAMES;
|
/* Divide by ACTUAL elapsed frames, not the target. For sub-frame
|
||||||
joeyLogF("UBER: %s: %lu iters / %u frames = %lu ops/sec\n",
|
* ops actualFrames ~= UBER_FRAMES so the answer is unchanged;
|
||||||
name, iters, UBER_FRAMES, opsPerSec);
|
* for ops that overrun (slow stagePresent etc.), this stops
|
||||||
|
* inflating ops/sec. */
|
||||||
|
opsPerSec = (iters * (unsigned long)joeyFrameHz()) / (unsigned long)actualFrames;
|
||||||
|
hash = surfaceHash(gStage);
|
||||||
|
joeyLogF("UBER: %s: %lu iters / %u frames = %lu ops/sec | hash=%08lX\n",
|
||||||
|
name, iters, actualFrames, opsPerSec, (unsigned long)hash);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -125,8 +156,6 @@ static void op_spriteRestore (void) { spriteRestoreUnder(gStage, &gBackup);
|
||||||
static void op_spriteSaveAndDraw (void) { spriteSaveAndDraw (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); }
|
static void op_spriteSaveAndDraw (void) { spriteSaveAndDraw (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); }
|
||||||
|
|
||||||
static void op_stagePresent (void) { stagePresent(); }
|
static void op_stagePresent (void) { stagePresent(); }
|
||||||
static void op_stagePresentRect8(void) { stagePresentRect( 40, 30, 16, 16); }
|
|
||||||
static void op_stagePresentRectF(void) { stagePresentRect( 0, 0, 320, 200); }
|
|
||||||
|
|
||||||
static void op_inputPoll (void) { joeyInputPoll(); }
|
static void op_inputPoll (void) { joeyInputPoll(); }
|
||||||
static void op_keyDown (void) { (void)joeyKeyDown(KEY_A); }
|
static void op_keyDown (void) { (void)joeyKeyDown(KEY_A); }
|
||||||
|
|
@ -229,10 +258,14 @@ static void runAllTests(void) {
|
||||||
timeOp("spriteRestoreUnder", op_spriteRestore);
|
timeOp("spriteRestoreUnder", op_spriteRestore);
|
||||||
timeOp("spriteSaveAndDraw", op_spriteSaveAndDraw);
|
timeOp("spriteSaveAndDraw", op_spriteSaveAndDraw);
|
||||||
|
|
||||||
// Present.
|
// Present. One warm-up call before each timed loop primes any
|
||||||
|
// per-port one-time setup (Amiga: copper list rebuild after the
|
||||||
|
// paletteSet / scbSetRange tests dirty the cache; without warm-up
|
||||||
|
// the rebuild's MakeScreen + MrgCop + WaitTOF chain consumes the
|
||||||
|
// entire 4-frame measurement window) so we measure steady-state
|
||||||
|
// throughput rather than first-call penalty.
|
||||||
|
stagePresent();
|
||||||
timeOp("stagePresent full", op_stagePresent);
|
timeOp("stagePresent full", op_stagePresent);
|
||||||
timeOp("stagePresentRect 8b",op_stagePresentRect8);
|
|
||||||
timeOp("stagePresentRect F", op_stagePresentRectF);
|
|
||||||
|
|
||||||
// Input.
|
// Input.
|
||||||
timeOp("joeyInputPoll", op_inputPoll);
|
timeOp("joeyInputPoll", op_inputPoll);
|
||||||
|
|
@ -253,12 +286,19 @@ static void runAllTests(void) {
|
||||||
|
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
JoeyConfigT config;
|
JoeyConfigT config;
|
||||||
uint16_t pal[16];
|
uint16_t pal[16];
|
||||||
int i;
|
int i;
|
||||||
|
uint16_t startFrame;
|
||||||
|
uint16_t endFrame;
|
||||||
|
uint16_t elapsedFrames;
|
||||||
|
unsigned long elapsedMs;
|
||||||
|
|
||||||
config.hostMode = HOST_MODE_TAKEOVER;
|
config.hostMode = HOST_MODE_TAKEOVER;
|
||||||
config.codegenBytes = 8 * 1024;
|
/* 32 KB fits the 8 pre-shifted DRAW variants the Amiga planar
|
||||||
|
* compiled sprite emitter generates. UL on the multiply because
|
||||||
|
* ORCA-C's 16-bit int overflows on 32 * 1024. */
|
||||||
|
config.codegenBytes = 32UL * 1024;
|
||||||
config.maxSurfaces = 4;
|
config.maxSurfaces = 4;
|
||||||
config.audioBytes = 64UL * 1024;
|
config.audioBytes = 64UL * 1024;
|
||||||
config.assetBytes = 128UL * 1024;
|
config.assetBytes = 128UL * 1024;
|
||||||
|
|
@ -266,6 +306,11 @@ int main(void) {
|
||||||
if (!joeyInit(&config)) {
|
if (!joeyInit(&config)) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
/* joeyFrameCount is VBL-driven, so it only ticks after halInit
|
||||||
|
* installed its VBL ISR -- captured here is "everything from now
|
||||||
|
* to press-any-key". Pre-init setup time is small and not the
|
||||||
|
* cost the user is chasing; runAllTests dominates. */
|
||||||
|
startFrame = joeyFrameCount();
|
||||||
|
|
||||||
gStage = stageGet();
|
gStage = stageGet();
|
||||||
if (gStage == NULL) {
|
if (gStage == NULL) {
|
||||||
|
|
@ -337,6 +382,12 @@ int main(void) {
|
||||||
|
|
||||||
runAllTests();
|
runAllTests();
|
||||||
|
|
||||||
|
endFrame = joeyFrameCount();
|
||||||
|
elapsedFrames = (uint16_t)(endFrame - startFrame);
|
||||||
|
elapsedMs = ((unsigned long)elapsedFrames * 1000UL) / (unsigned long)joeyFrameHz();
|
||||||
|
joeyLogF("UBER: total wall time: %lu ms (%u frames @ %u Hz)\n",
|
||||||
|
elapsedMs, elapsedFrames, (unsigned)joeyFrameHz());
|
||||||
|
|
||||||
// Done. Green screen + waitForKey.
|
// Done. Green screen + waitForKey.
|
||||||
surfaceClear(gStage, 2);
|
surfaceClear(gStage, 2);
|
||||||
stagePresent();
|
stagePresent();
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@
|
||||||
|
|
||||||
void joeyLog (const char *msg);
|
void joeyLog (const char *msg);
|
||||||
void joeyLogF (const char *fmt, ...);
|
void joeyLogF (const char *fmt, ...);
|
||||||
|
void joeyLogFlush(void);
|
||||||
void joeyLogReset(void);
|
void joeyLogReset(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -15,14 +15,14 @@
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
|
|
||||||
// Flip the dirty regions of the stage to the display, then clear the
|
// Flip the dirty regions of the stage to the display, then clear the
|
||||||
// dirty state. Cheap when nothing has changed since the last call.
|
// dirty state. Cheap when nothing has changed since the last call
|
||||||
|
// (gStageAnyDirty short-circuit). Drawing primitives mark dirty as
|
||||||
|
// a side effect, so callers only need to call stagePresent at the
|
||||||
|
// end of a frame -- everything they drew shows up.
|
||||||
|
//
|
||||||
|
// To present a region you didn't draw with the standard primitives
|
||||||
|
// (e.g. direct framebuffer poking), call surfaceMarkDirtyRect on
|
||||||
|
// the same rect first, then stagePresent.
|
||||||
void stagePresent(void);
|
void stagePresent(void);
|
||||||
|
|
||||||
// Flip a specific rectangular region of the stage to the display,
|
|
||||||
// regardless of dirty state. Coordinates are clipped to the surface;
|
|
||||||
// negative or zero dimensions are no-ops. Does not consult or modify
|
|
||||||
// the dirty arrays -- callers mixing stagePresentRect with stagePresent
|
|
||||||
// in the same frame may see redundant work on the next stagePresent.
|
|
||||||
void stagePresentRect(int16_t x, int16_t y, uint16_t w, uint16_t h);
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -27,13 +27,16 @@
|
||||||
#include "surface.h"
|
#include "surface.h"
|
||||||
#include "types.h"
|
#include "types.h"
|
||||||
|
|
||||||
// Sprites always write to a 4bpp packed SurfaceT, never to display
|
// Sprite codegen emits per-shift variants. Chunky 4bpp ports (DOS,
|
||||||
// memory directly (halPresent owns that path). The codegen emits 2
|
// IIgs, Atari ST) only need 2 shifts -- pixel offset 0 (sprite/dest
|
||||||
// shift variants on every platform: shift 0 for even x (sprite byte
|
// byte boundaries align) and offset 1 (every dest byte combines two
|
||||||
// boundaries match destination byte boundaries) and shift 1 for odd
|
// sprite bytes' nibbles). Planar ports (Amiga -- 8 px per plane byte)
|
||||||
// x (each destination byte combines two adjacent sprite bytes'
|
// need 8 shifts: one for each x % 8 alignment, so smooth horizontal
|
||||||
// nibbles).
|
// motion at any pixel position uses pre-shifted source bytes without
|
||||||
#define JOEY_SPRITE_SHIFT_COUNT 2
|
// runtime bit-shifting. Allocate the max so routineOffsets[] has
|
||||||
|
// slots for every variant; chunky ports leave shifts 2..7 as
|
||||||
|
// SPRITE_NOT_COMPILED, planar ports use all 8.
|
||||||
|
#define JOEY_SPRITE_SHIFT_COUNT 8
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
SPRITE_FLAGS_NONE = 0
|
SPRITE_FLAGS_NONE = 0
|
||||||
|
|
|
||||||
|
|
@ -58,4 +58,13 @@ bool surfaceSaveFile(const SurfaceT *src, const char *path);
|
||||||
// identity (no reallocation).
|
// identity (no reallocation).
|
||||||
bool surfaceLoadFile(SurfaceT *dst, const char *path);
|
bool surfaceLoadFile(SurfaceT *dst, const char *path);
|
||||||
|
|
||||||
|
// FNV-1a 32-bit hash of the surface's logical pixel content (color
|
||||||
|
// indices in row-major order, 0..15 per pixel). Same logical pixels
|
||||||
|
// produce the same hash on every port regardless of internal storage
|
||||||
|
// format -- so a hash captured on IIgs (chunky) compares directly
|
||||||
|
// against the same op's output on Amiga (planar) once the planar
|
||||||
|
// rewrite is done. Used by the UBER validation harness to
|
||||||
|
// pixel-compare ports against an IIgs golden reference.
|
||||||
|
uint32_t surfaceHash(const SurfaceT *s);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ BINDIR := $(BUILD)/bin
|
||||||
# independently. -I on $(SRC_PORT)/amiga lets ptplayer.h resolve
|
# independently. -I on $(SRC_PORT)/amiga lets ptplayer.h resolve
|
||||||
# <SDI_compiler.h> from the port-local shim alongside our HAL code.
|
# <SDI_compiler.h> from the port-local shim alongside our HAL code.
|
||||||
PTPLAYER_DIR := $(REPO_DIR)/toolchains/amiga/ptplayer
|
PTPLAYER_DIR := $(REPO_DIR)/toolchains/amiga/ptplayer
|
||||||
CFLAGS := $(COMMON_CFLAGS) -DJOEYLIB_PLATFORM_AMIGA -m68000 -fomit-frame-pointer -noixemul -D__OSCOMPAT -I$(SRC_PORT)/amiga -I$(SRC_68K) -I$(PTPLAYER_DIR)
|
CFLAGS := $(COMMON_CFLAGS) -DJOEYLIB_PLATFORM_AMIGA -m68000 -fomit-frame-pointer -noixemul -D__OSCOMPAT -I$(SRC_PORT)/amiga -I$(SRC_68K) -I$(PTPLAYER_DIR) -MMD -MP $(CFLAGS_EXTRA)
|
||||||
# OSCOMPAT=1 selects PTPlayer's audio.device-friendly variant (uses
|
# OSCOMPAT=1 selects PTPlayer's audio.device-friendly variant (uses
|
||||||
# CIA-B + audio.device interrupts via the OS rather than taking over
|
# CIA-B + audio.device interrupts via the OS rather than taking over
|
||||||
# Paula directly), matching the way our HAL cooperates with Intuition.
|
# Paula directly), matching the way our HAL cooperates with Intuition.
|
||||||
|
|
@ -52,6 +52,7 @@ LIB_OBJS := \
|
||||||
$(patsubst $(SRC_68K)/%.s,$(BUILD)/obj/68k/%.o,$(SHARED_S)) \
|
$(patsubst $(SRC_68K)/%.s,$(BUILD)/obj/68k/%.o,$(SHARED_S)) \
|
||||||
$(BUILD)/obj/port/ptplayer.o \
|
$(BUILD)/obj/port/ptplayer.o \
|
||||||
$(BUILD)/obj/codegen/spriteEmit68k.o \
|
$(BUILD)/obj/codegen/spriteEmit68k.o \
|
||||||
|
$(BUILD)/obj/codegen/spriteEmitPlanar68k.o \
|
||||||
$(BUILD)/obj/codegen/spriteCompile.o
|
$(BUILD)/obj/codegen/spriteCompile.o
|
||||||
|
|
||||||
LIB := $(LIBDIR)/libjoey.a
|
LIB := $(LIBDIR)/libjoey.a
|
||||||
|
|
@ -156,3 +157,9 @@ $(DATA_DIR)/test.sfx: $(REPO_DIR)/assets/test.sfx
|
||||||
|
|
||||||
clean-amiga:
|
clean-amiga:
|
||||||
rm -rf $(BUILD)
|
rm -rf $(BUILD)
|
||||||
|
|
||||||
|
# Pull in per-object header-dependency files generated by gcc -MMD/-MP.
|
||||||
|
# Without this, editing a header (e.g. surfaceInternal.h) doesn't rebuild
|
||||||
|
# the .c files that include it, leaving a frankenstein binary where
|
||||||
|
# different TUs see different struct layouts.
|
||||||
|
-include $(LIB_OBJS:.o=.d)
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ BUILD := $(REPO_DIR)/build/$(PLATFORM)
|
||||||
LIBDIR := $(BUILD)/lib
|
LIBDIR := $(BUILD)/lib
|
||||||
BINDIR := $(BUILD)/bin
|
BINDIR := $(BUILD)/bin
|
||||||
|
|
||||||
CFLAGS := $(COMMON_CFLAGS) -DJOEYLIB_PLATFORM_ATARIST -m68000 -fomit-frame-pointer -I$(REPO_DIR)/toolchains/audio/libxmp-lite/include -I$(REPO_DIR)/toolchains/atarist/include-shim -I$(SRC_68K)
|
CFLAGS := $(COMMON_CFLAGS) -DJOEYLIB_PLATFORM_ATARIST -m68000 -fomit-frame-pointer -I$(REPO_DIR)/toolchains/audio/libxmp-lite/include -I$(REPO_DIR)/toolchains/atarist/include-shim -I$(SRC_68K) -MMD -MP
|
||||||
LDFLAGS :=
|
LDFLAGS :=
|
||||||
|
|
||||||
# libxmp-lite shared with the DOS port. Built as a static archive that
|
# libxmp-lite shared with the DOS port. Built as a static archive that
|
||||||
|
|
@ -148,3 +148,9 @@ $(DATA_DIR)/test.sfx: $(REPO_DIR)/assets/test.sfx
|
||||||
|
|
||||||
clean-atarist:
|
clean-atarist:
|
||||||
rm -rf $(BUILD)
|
rm -rf $(BUILD)
|
||||||
|
|
||||||
|
# Pull in per-object header-dependency files generated by gcc -MMD/-MP.
|
||||||
|
# Without this, editing a header (e.g. surfaceInternal.h) doesn't rebuild
|
||||||
|
# the .c files that include it, leaving a frankenstein binary where
|
||||||
|
# different TUs see different struct layouts.
|
||||||
|
-include $(LIB_OBJS:.o=.d)
|
||||||
|
|
|
||||||
|
|
@ -7,7 +7,7 @@ BUILD := $(REPO_DIR)/build/$(PLATFORM)
|
||||||
LIBDIR := $(BUILD)/lib
|
LIBDIR := $(BUILD)/lib
|
||||||
BINDIR := $(BUILD)/bin
|
BINDIR := $(BUILD)/bin
|
||||||
|
|
||||||
CFLAGS := $(COMMON_CFLAGS) -DJOEYLIB_PLATFORM_DOS -march=i386 -m32 -I$(REPO_DIR)/toolchains/audio/libxmp-lite/include
|
CFLAGS := $(COMMON_CFLAGS) -DJOEYLIB_PLATFORM_DOS -march=i386 -m32 -I$(REPO_DIR)/toolchains/audio/libxmp-lite/include -MMD -MP
|
||||||
ASFLAGS := -f coff
|
ASFLAGS := -f coff
|
||||||
LDFLAGS :=
|
LDFLAGS :=
|
||||||
|
|
||||||
|
|
@ -138,3 +138,9 @@ $(DATA_DIR)/test.sfx: $(REPO_DIR)/assets/test.sfx
|
||||||
|
|
||||||
clean-dos:
|
clean-dos:
|
||||||
rm -rf $(BUILD)
|
rm -rf $(BUILD)
|
||||||
|
|
||||||
|
# Pull in per-object header-dependency files generated by gcc -MMD/-MP.
|
||||||
|
# Without this, editing a header (e.g. surfaceInternal.h) doesn't rebuild
|
||||||
|
# the .c files that include it, leaving a frankenstein binary where
|
||||||
|
# different TUs see different struct layouts.
|
||||||
|
-include $(LIB_OBJS:.o=.d)
|
||||||
|
|
|
||||||
64
make/iigs.mk
64
make/iigs.mk
|
|
@ -51,11 +51,11 @@ IIGS_MERLIN := $(REPO_DIR)/toolchains/iigs/merlin32/bin/merlin32
|
||||||
|
|
||||||
LIB_SRCS := $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS) $(PORT_ASM_SRCS_ALL) $(NTP_ASM) $(CODEGEN_SRCS)
|
LIB_SRCS := $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS) $(PORT_ASM_SRCS_ALL) $(NTP_ASM) $(CODEGEN_SRCS)
|
||||||
|
|
||||||
# HELLO and PATTERN are intentionally omitted from this list. The UBER
|
# HELLO is omitted from the disk because UBER exercises everything it
|
||||||
# demo (below) exercises every public API, including what those two
|
# does and the disk was tight. PATTERN is included as the SCB / palette
|
||||||
# small examples covered, and the IIgs disk image was running out of
|
# golden-reference for cross-port debugging.
|
||||||
# room. Source for HELLO/PATTERN is still in examples/{hello,pattern}/
|
PATTERN_SRC := $(EXAMPLES)/pattern/pattern.c
|
||||||
# for reference and for other ports that want them.
|
PATTERN_BIN := $(BINDIR)/PATTERN
|
||||||
DRAW_SRC := $(EXAMPLES)/draw/draw.c
|
DRAW_SRC := $(EXAMPLES)/draw/draw.c
|
||||||
DRAW_BIN := $(BINDIR)/DRAW
|
DRAW_BIN := $(BINDIR)/DRAW
|
||||||
KEYS_SRC := $(EXAMPLES)/keys/keys.c
|
KEYS_SRC := $(EXAMPLES)/keys/keys.c
|
||||||
|
|
@ -120,24 +120,44 @@ $(NTP_ASM): $(NTP_BIN) $(REPO_DIR)/toolchains/iigs/bin-to-asm.sh
|
||||||
# everywhere, so library asm can take SurfaceT* args via one
|
# everywhere, so library asm can take SurfaceT* args via one
|
||||||
# consistent ABI (small-mm 16-bit pointers truncated bank bytes,
|
# consistent ABI (small-mm 16-bit pointers truncated bank bytes,
|
||||||
# which broke any asm that wanted to address bank-1 stage memory).
|
# which broke any asm that wanted to address bank-1 stage memory).
|
||||||
|
# Per-binary header dependency files. iix-build.sh -M emits one .d
|
||||||
|
# alongside each binary covering every header transitively included
|
||||||
|
# by the C sources in that binary's build. Pulled in via -include at
|
||||||
|
# the bottom of this file so editing a shared header (e.g.
|
||||||
|
# surfaceInternal.h) triggers a rebuild of every IIgs binary that
|
||||||
|
# transitively depends on it.
|
||||||
|
DEP_DIR := $(BUILD)/dep
|
||||||
|
PATTERN_DEP := $(DEP_DIR)/PATTERN.d
|
||||||
|
DRAW_DEP := $(DEP_DIR)/DRAW.d
|
||||||
|
KEYS_DEP := $(DEP_DIR)/KEYS.d
|
||||||
|
JOY_DEP := $(DEP_DIR)/JOY.d
|
||||||
|
SPRITE_DEP := $(DEP_DIR)/SPRITE.d
|
||||||
|
UBER_DEP := $(DEP_DIR)/UBER.d
|
||||||
|
AUDIO_DEP := $(DEP_DIR)/AUDIO.d
|
||||||
|
|
||||||
|
$(PATTERN_BIN): $(PATTERN_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
||||||
|
@mkdir -p $(dir $@) $(DEP_DIR)
|
||||||
|
$(IIGS_BUILD) -b -M $(PATTERN_DEP) $(IIX_INCLUDES) -o $@ $(PATTERN_SRC) $(LIB_SRCS)
|
||||||
|
$(IIGS_IIX) chtyp -t S16 $@
|
||||||
|
|
||||||
$(DRAW_BIN): $(DRAW_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
$(DRAW_BIN): $(DRAW_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@) $(DEP_DIR)
|
||||||
$(IIGS_BUILD) -b $(IIX_INCLUDES) -o $@ $(DRAW_SRC) $(LIB_SRCS)
|
$(IIGS_BUILD) -b -M $(DRAW_DEP) $(IIX_INCLUDES) -o $@ $(DRAW_SRC) $(LIB_SRCS)
|
||||||
$(IIGS_IIX) chtyp -t S16 $@
|
$(IIGS_IIX) chtyp -t S16 $@
|
||||||
|
|
||||||
$(KEYS_BIN): $(KEYS_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
$(KEYS_BIN): $(KEYS_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@) $(DEP_DIR)
|
||||||
$(IIGS_BUILD) -b $(IIX_INCLUDES) -o $@ $(KEYS_SRC) $(LIB_SRCS)
|
$(IIGS_BUILD) -b -M $(KEYS_DEP) $(IIX_INCLUDES) -o $@ $(KEYS_SRC) $(LIB_SRCS)
|
||||||
$(IIGS_IIX) chtyp -t S16 $@
|
$(IIGS_IIX) chtyp -t S16 $@
|
||||||
|
|
||||||
$(JOY_BIN): $(JOY_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
$(JOY_BIN): $(JOY_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@) $(DEP_DIR)
|
||||||
$(IIGS_BUILD) -b $(IIX_INCLUDES) -o $@ $(JOY_SRC) $(LIB_SRCS)
|
$(IIGS_BUILD) -b -M $(JOY_DEP) $(IIX_INCLUDES) -o $@ $(JOY_SRC) $(LIB_SRCS)
|
||||||
$(IIGS_IIX) chtyp -t S16 $@
|
$(IIGS_IIX) chtyp -t S16 $@
|
||||||
|
|
||||||
$(SPRITE_BIN): $(SPRITE_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
$(SPRITE_BIN): $(SPRITE_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@) $(DEP_DIR)
|
||||||
$(IIGS_BUILD) -b $(IIX_INCLUDES) -o $@ $(SPRITE_SRC) $(LIB_SRCS)
|
$(IIGS_BUILD) -b -M $(SPRITE_DEP) $(IIX_INCLUDES) -o $@ $(SPRITE_SRC) $(LIB_SRCS)
|
||||||
$(IIGS_IIX) chtyp -t S16 $@
|
$(IIGS_IIX) chtyp -t S16 $@
|
||||||
|
|
||||||
# UBER bumps user stack to 16 KB. ORCA-C's default user stack is small
|
# UBER bumps user stack to 16 KB. ORCA-C's default user stack is small
|
||||||
|
|
@ -147,8 +167,8 @@ $(SPRITE_BIN): $(SPRITE_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
||||||
# decimal formatter in uber.c also uses larger stack-local buffers
|
# decimal formatter in uber.c also uses larger stack-local buffers
|
||||||
# (line[96], num[16]) than typical demos. 16 KB is plenty of headroom.
|
# (line[96], num[16]) than typical demos. 16 KB is plenty of headroom.
|
||||||
$(UBER_BIN): $(UBER_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
$(UBER_BIN): $(UBER_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@) $(DEP_DIR)
|
||||||
$(IIGS_BUILD) -b -s 16384 $(IIX_INCLUDES) -o $@ $(UBER_SRC) $(LIB_SRCS)
|
$(IIGS_BUILD) -b -s 16384 -M $(UBER_DEP) $(IIX_INCLUDES) -o $@ $(UBER_SRC) $(LIB_SRCS)
|
||||||
$(IIGS_IIX) chtyp -t S16 $@
|
$(IIGS_IIX) chtyp -t S16 $@
|
||||||
|
|
||||||
# Convert the cross-platform .MOD asset to NinjaTrackerPlus runtime
|
# Convert the cross-platform .MOD asset to NinjaTrackerPlus runtime
|
||||||
|
|
@ -170,17 +190,23 @@ AUDIO_DATA_FILES := $(AUDIO_SFX)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
$(AUDIO_BIN): $(AUDIO_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
$(AUDIO_BIN): $(AUDIO_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@) $(DEP_DIR)
|
||||||
$(IIGS_BUILD) -b $(IIX_INCLUDES) -I $(EXAMPLES)/audio -o $@ $(AUDIO_SRC) $(LIB_SRCS)
|
$(IIGS_BUILD) -b -M $(AUDIO_DEP) $(IIX_INCLUDES) -I $(EXAMPLES)/audio -o $@ $(AUDIO_SRC) $(LIB_SRCS)
|
||||||
$(IIGS_IIX) chtyp -t S16 $@
|
$(IIGS_IIX) chtyp -t S16 $@
|
||||||
|
|
||||||
# Assemble a ProDOS 2img containing the examples, ready to mount in
|
# Assemble a ProDOS 2img containing the examples, ready to mount in
|
||||||
# GSplus alongside a GS/OS boot volume.
|
# GSplus alongside a GS/OS boot volume.
|
||||||
iigs-disk: $(DISK_IMG)
|
iigs-disk: $(DISK_IMG)
|
||||||
|
|
||||||
$(DISK_IMG): $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(UBER_BIN) $(AUDIO_DATA_FILES) $(IIGS_PACKAGE)
|
$(DISK_IMG): $(PATTERN_BIN) $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(UBER_BIN) $(AUDIO_DATA_FILES) $(IIGS_PACKAGE)
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@)
|
||||||
$(IIGS_PACKAGE) $@ $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(UBER_BIN) -- $(AUDIO_DATA_FILES)
|
$(IIGS_PACKAGE) $@ $(PATTERN_BIN) $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(UBER_BIN) -- $(AUDIO_DATA_FILES)
|
||||||
|
|
||||||
clean-iigs:
|
clean-iigs:
|
||||||
rm -rf $(BUILD)
|
rm -rf $(BUILD)
|
||||||
|
|
||||||
|
# Pull in per-binary header-dependency files generated by iix-build.sh -M.
|
||||||
|
# Without this, editing a header (e.g. surfaceInternal.h) doesn't rebuild
|
||||||
|
# IIgs binaries that include it -- the IIgs's iix toolchain has no native
|
||||||
|
# -MMD analog, so iix-build.sh shells out to host gcc for the scan.
|
||||||
|
-include $(PATTERN_DEP) $(DRAW_DEP) $(KEYS_DEP) $(JOY_DEP) $(SPRITE_DEP) $(UBER_DEP) $(AUDIO_DEP)
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,7 @@
|
||||||
#include "joey/sprite.h"
|
#include "joey/sprite.h"
|
||||||
#include "joey/surface.h"
|
#include "joey/surface.h"
|
||||||
#include "codegenArenaInternal.h"
|
#include "codegenArenaInternal.h"
|
||||||
|
#include "hal.h"
|
||||||
#include "spriteEmitter.h"
|
#include "spriteEmitter.h"
|
||||||
#include "spriteInternal.h"
|
#include "spriteInternal.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
@ -33,7 +34,9 @@
|
||||||
static uint16_t emitDrawForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
static uint16_t emitDrawForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
#if defined(JOEYLIB_PLATFORM_DOS)
|
#if defined(JOEYLIB_PLATFORM_DOS)
|
||||||
return spriteEmitDrawX86(out, sp, shift);
|
return spriteEmitDrawX86(out, sp, shift);
|
||||||
#elif defined(JOEYLIB_PLATFORM_AMIGA) || defined(JOEYLIB_PLATFORM_ATARIST)
|
#elif defined(JOEYLIB_PLATFORM_AMIGA)
|
||||||
|
return spriteEmitDrawPlanar68k(out, sp, shift);
|
||||||
|
#elif defined(JOEYLIB_PLATFORM_ATARIST)
|
||||||
return spriteEmitDraw68k(out, sp, shift);
|
return spriteEmitDraw68k(out, sp, shift);
|
||||||
#elif defined(JOEYLIB_PLATFORM_IIGS)
|
#elif defined(JOEYLIB_PLATFORM_IIGS)
|
||||||
return spriteEmitDrawIigs(out, sp, shift);
|
return spriteEmitDrawIigs(out, sp, shift);
|
||||||
|
|
@ -51,7 +54,9 @@ static uint16_t emitDrawForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift
|
||||||
static uint16_t emitSaveForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
static uint16_t emitSaveForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
#if defined(JOEYLIB_PLATFORM_DOS)
|
#if defined(JOEYLIB_PLATFORM_DOS)
|
||||||
return spriteEmitSaveX86(out, sp, shift);
|
return spriteEmitSaveX86(out, sp, shift);
|
||||||
#elif defined(JOEYLIB_PLATFORM_AMIGA) || defined(JOEYLIB_PLATFORM_ATARIST)
|
#elif defined(JOEYLIB_PLATFORM_AMIGA)
|
||||||
|
return spriteEmitSavePlanar68k(out, sp, shift);
|
||||||
|
#elif defined(JOEYLIB_PLATFORM_ATARIST)
|
||||||
return spriteEmitSave68k(out, sp, shift);
|
return spriteEmitSave68k(out, sp, shift);
|
||||||
#elif defined(JOEYLIB_PLATFORM_IIGS)
|
#elif defined(JOEYLIB_PLATFORM_IIGS)
|
||||||
return spriteEmitSaveIigs(out, sp, shift);
|
return spriteEmitSaveIigs(out, sp, shift);
|
||||||
|
|
@ -65,7 +70,9 @@ static uint16_t emitSaveForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift
|
||||||
static uint16_t emitRestoreForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
static uint16_t emitRestoreForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
#if defined(JOEYLIB_PLATFORM_DOS)
|
#if defined(JOEYLIB_PLATFORM_DOS)
|
||||||
return spriteEmitRestoreX86(out, sp, shift);
|
return spriteEmitRestoreX86(out, sp, shift);
|
||||||
#elif defined(JOEYLIB_PLATFORM_AMIGA) || defined(JOEYLIB_PLATFORM_ATARIST)
|
#elif defined(JOEYLIB_PLATFORM_AMIGA)
|
||||||
|
return spriteEmitRestorePlanar68k(out, sp, shift);
|
||||||
|
#elif defined(JOEYLIB_PLATFORM_ATARIST)
|
||||||
return spriteEmitRestore68k(out, sp, shift);
|
return spriteEmitRestore68k(out, sp, shift);
|
||||||
#elif defined(JOEYLIB_PLATFORM_IIGS)
|
#elif defined(JOEYLIB_PLATFORM_IIGS)
|
||||||
return spriteEmitRestoreIigs(out, sp, shift);
|
return spriteEmitRestoreIigs(out, sp, shift);
|
||||||
|
|
@ -114,6 +121,13 @@ bool spriteCompile(SpriteT *sp) {
|
||||||
if (sp->tileData == NULL) {
|
if (sp->tileData == NULL) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
/* Amiga (post-Phase 9) uses spriteEmitPlanar68k.c which writes
|
||||||
|
* directly to bitplanes. DRAW emits a unique pre-shifted variant
|
||||||
|
* per shift in 0..7 (smooth horizontal motion at any pixel x);
|
||||||
|
* SAVE/RESTORE emit only shift 0 and shift 1 since shifted variants
|
||||||
|
* 1..7 share identical bytes (plain memcpy of widthTiles+1 plane
|
||||||
|
* bytes per row). The post-emit pass below aliases slots 2..7
|
||||||
|
* for save/restore to slot 1's bytes. */
|
||||||
|
|
||||||
scratch = (uint8_t *)malloc(SPRITE_EMIT_SCRATCH_BYTES);
|
scratch = (uint8_t *)malloc(SPRITE_EMIT_SCRATCH_BYTES);
|
||||||
if (scratch == NULL) {
|
if (scratch == NULL) {
|
||||||
|
|
@ -150,6 +164,16 @@ bool spriteCompile(SpriteT *sp) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#if defined(JOEYLIB_PLATFORM_AMIGA)
|
||||||
|
/* Save/restore bytes for any non-zero shift are identical (plain
|
||||||
|
* memcpy of widthTiles+1 plane bytes per row). The emitter emits
|
||||||
|
* them once at slot 1; alias slots 2..7 here so the dispatcher
|
||||||
|
* gate (sprite.c) sees them as compiled. */
|
||||||
|
for (shift = 2; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) {
|
||||||
|
sp->routineOffsets[shift][SPRITE_OP_SAVE] = sp->routineOffsets[1][SPRITE_OP_SAVE];
|
||||||
|
sp->routineOffsets[shift][SPRITE_OP_RESTORE] = sp->routineOffsets[1][SPRITE_OP_RESTORE];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
sp->slot = slot;
|
sp->slot = slot;
|
||||||
free(scratch);
|
free(scratch);
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -554,6 +578,112 @@ void spriteCompiledRestoreUnder(SurfaceT *dst, const SpriteBackupT *backup) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#elif defined(JOEYLIB_PLATFORM_AMIGA)
|
||||||
|
|
||||||
|
/* Amiga planar dispatchers. spriteEmitPlanar68k.c emits routines with
|
||||||
|
* cdecl(p0, p1, p2, p3[, backup]) signatures that write directly to
|
||||||
|
* bitplanes. Compute byteOff = y*40 + x/8 and pass plane[i]+byteOff
|
||||||
|
* as the 4 plane args. shift = x % 8 selects the variant; today only
|
||||||
|
* shift 0 emits non-zero bytes, so callers should already have
|
||||||
|
* gated on routineOffsets[shift][op] != SPRITE_NOT_COMPILED.
|
||||||
|
*
|
||||||
|
* For non-zero shifts (x not 8-px-aligned), the dispatcher in
|
||||||
|
* src/core/sprite.c (spriteDraw / spriteSaveUnder / spriteRestoreUnder)
|
||||||
|
* sees SPRITE_NOT_COMPILED for the shift and falls back to the
|
||||||
|
* interpreter, which handles arbitrary x via halSpriteDrawPlanes /
|
||||||
|
* halSpriteSavePlanes / halSpriteRestorePlanes. */
|
||||||
|
|
||||||
|
#define AMIGA_BYTES_PER_ROW_LOCAL 40
|
||||||
|
|
||||||
|
void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) {
|
||||||
|
typedef void (*DrawFn)(uint8_t *p0, uint8_t *p1, uint8_t *p2, uint8_t *p3);
|
||||||
|
uint8_t shift;
|
||||||
|
uint16_t byteOff;
|
||||||
|
uint8_t *p0;
|
||||||
|
uint8_t *p1;
|
||||||
|
uint8_t *p2;
|
||||||
|
uint8_t *p3;
|
||||||
|
DrawFn fn;
|
||||||
|
|
||||||
|
shift = (uint8_t)(x & 7);
|
||||||
|
byteOff = (uint16_t)((uint16_t)y * AMIGA_BYTES_PER_ROW_LOCAL + ((uint16_t)x >> 3));
|
||||||
|
p0 = halSurfacePlanePtr(dst, 0); if (p0 == NULL) return;
|
||||||
|
p1 = halSurfacePlanePtr(dst, 1);
|
||||||
|
p2 = halSurfacePlanePtr(dst, 2);
|
||||||
|
p3 = halSurfacePlanePtr(dst, 3);
|
||||||
|
fn = (DrawFn)(codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_DRAW]);
|
||||||
|
fn(p0 + byteOff, p1 + byteOff, p2 + byteOff, p3 + byteOff);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup) {
|
||||||
|
typedef void (*SaveFn)(uint8_t *p0, uint8_t *p1, uint8_t *p2, uint8_t *p3, uint8_t *backup);
|
||||||
|
uint8_t shift;
|
||||||
|
int16_t clippedX;
|
||||||
|
uint16_t widthPx;
|
||||||
|
uint16_t heightPx;
|
||||||
|
uint16_t byteOff;
|
||||||
|
uint8_t *p0;
|
||||||
|
uint8_t *p1;
|
||||||
|
uint8_t *p2;
|
||||||
|
uint8_t *p3;
|
||||||
|
SaveFn fn;
|
||||||
|
|
||||||
|
shift = (uint8_t)(x & 7);
|
||||||
|
clippedX = (int16_t)(x & ~7);
|
||||||
|
widthPx = (uint16_t)(sp->widthTiles * 8);
|
||||||
|
heightPx = (uint16_t)(sp->heightTiles * 8);
|
||||||
|
/* Shifts 1..7 spill into one extra plane byte per row (= +8 px). */
|
||||||
|
if (shift != 0u) {
|
||||||
|
widthPx = (uint16_t)(widthPx + 8u);
|
||||||
|
}
|
||||||
|
byteOff = (uint16_t)((uint16_t)y * AMIGA_BYTES_PER_ROW_LOCAL + ((uint16_t)clippedX >> 3));
|
||||||
|
|
||||||
|
backup->sprite = sp;
|
||||||
|
backup->x = clippedX;
|
||||||
|
backup->y = y;
|
||||||
|
backup->width = widthPx;
|
||||||
|
backup->height = heightPx;
|
||||||
|
/* 4 planes * h * (widthPx/8) bytes = h * widthPx/2. */
|
||||||
|
backup->sizeBytes = (uint16_t)((uint16_t)heightPx * (widthPx >> 1));
|
||||||
|
|
||||||
|
p0 = halSurfacePlanePtr(src, 0); if (p0 == NULL) return;
|
||||||
|
p1 = halSurfacePlanePtr(src, 1);
|
||||||
|
p2 = halSurfacePlanePtr(src, 2);
|
||||||
|
p3 = halSurfacePlanePtr(src, 3);
|
||||||
|
fn = (SaveFn)(codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_SAVE]);
|
||||||
|
fn(p0 + byteOff, p1 + byteOff, p2 + byteOff, p3 + byteOff, backup->bytes);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void spriteCompiledRestoreUnder(SurfaceT *dst, const SpriteBackupT *backup) {
|
||||||
|
typedef void (*RestoreFn)(uint8_t *p0, uint8_t *p1, uint8_t *p2, uint8_t *p3, const uint8_t *backup);
|
||||||
|
SpriteT *sp;
|
||||||
|
uint8_t shift;
|
||||||
|
uint16_t byteOff;
|
||||||
|
uint8_t *p0;
|
||||||
|
uint8_t *p1;
|
||||||
|
uint8_t *p2;
|
||||||
|
uint8_t *p3;
|
||||||
|
RestoreFn fn;
|
||||||
|
|
||||||
|
sp = backup->sprite;
|
||||||
|
/* backup->x is 8-px aligned (clippedX from save), so x & 7 is
|
||||||
|
* useless for picking the original shift. Encode it via
|
||||||
|
* backup->width: == widthTiles*8 means shift 0; > means shifted.
|
||||||
|
* Shifted slots 1..7 all alias to the same restore bytes, so
|
||||||
|
* slot 1 stands in for any non-zero shift. */
|
||||||
|
shift = (uint8_t)(backup->width > (uint16_t)(sp->widthTiles * 8) ? 1u : 0u);
|
||||||
|
byteOff = (uint16_t)((uint16_t)backup->y * AMIGA_BYTES_PER_ROW_LOCAL + ((uint16_t)backup->x >> 3));
|
||||||
|
|
||||||
|
p0 = halSurfacePlanePtr(dst, 0); if (p0 == NULL) return;
|
||||||
|
p1 = halSurfacePlanePtr(dst, 1);
|
||||||
|
p2 = halSurfacePlanePtr(dst, 2);
|
||||||
|
p3 = halSurfacePlanePtr(dst, 3);
|
||||||
|
fn = (RestoreFn)(codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_RESTORE]);
|
||||||
|
fn(p0 + byteOff, p1 + byteOff, p2 + byteOff, p3 + byteOff, backup->bytes);
|
||||||
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) {
|
void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) {
|
||||||
|
|
|
||||||
|
|
@ -166,6 +166,13 @@ uint16_t spriteEmitDraw68k(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
uint8_t value;
|
uint8_t value;
|
||||||
uint8_t opaqueMask;
|
uint8_t opaqueMask;
|
||||||
|
|
||||||
|
// Chunky 4bpp has only two nibble-alignment positions; the
|
||||||
|
// dispatcher uses x & 1 so shifts 2..7 are unreachable. Bail
|
||||||
|
// early so the arena slot stays SPRITE_NOT_COMPILED.
|
||||||
|
if (shift > 1u) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
cursor = 0;
|
cursor = 0;
|
||||||
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
|
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
|
||||||
|
|
@ -225,6 +232,10 @@ uint16_t spriteEmitRestore68k(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
uint16_t heightPx;
|
uint16_t heightPx;
|
||||||
uint16_t copyBytes;
|
uint16_t copyBytes;
|
||||||
|
|
||||||
|
if (shift > 1u) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
cursor = 0;
|
cursor = 0;
|
||||||
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
copyBytes = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW + (shift == 1u ? 1u : 0u));
|
copyBytes = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW + (shift == 1u ? 1u : 0u));
|
||||||
|
|
@ -248,6 +259,10 @@ uint16_t spriteEmitSave68k(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
uint16_t heightPx;
|
uint16_t heightPx;
|
||||||
uint16_t copyBytes;
|
uint16_t copyBytes;
|
||||||
|
|
||||||
|
if (shift > 1u) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
cursor = 0;
|
cursor = 0;
|
||||||
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
copyBytes = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW + (shift == 1u ? 1u : 0u));
|
copyBytes = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW + (shift == 1u ? 1u : 0u));
|
||||||
|
|
|
||||||
|
|
@ -189,6 +189,10 @@ uint16_t spriteEmitSaveIigs(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
uint16_t spriteBytesPerRow;
|
uint16_t spriteBytesPerRow;
|
||||||
uint16_t copyBytes;
|
uint16_t copyBytes;
|
||||||
|
|
||||||
|
if (shift > 1u) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
|
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
|
||||||
copyBytes = (uint16_t)(spriteBytesPerRow + (shift == 1 ? 1 : 0));
|
copyBytes = (uint16_t)(spriteBytesPerRow + (shift == 1 ? 1 : 0));
|
||||||
|
|
@ -205,6 +209,10 @@ uint16_t spriteEmitRestoreIigs(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
uint16_t spriteBytesPerRow;
|
uint16_t spriteBytesPerRow;
|
||||||
uint16_t copyBytes;
|
uint16_t copyBytes;
|
||||||
|
|
||||||
|
if (shift > 1u) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
|
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
|
||||||
copyBytes = (uint16_t)(spriteBytesPerRow + (shift == 1 ? 1 : 0));
|
copyBytes = (uint16_t)(spriteBytesPerRow + (shift == 1 ? 1 : 0));
|
||||||
|
|
@ -258,6 +266,10 @@ uint16_t spriteEmitDrawIigs(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
uint8_t nextOpaqueMask;
|
uint8_t nextOpaqueMask;
|
||||||
bool wide;
|
bool wide;
|
||||||
|
|
||||||
|
if (shift > 1u) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
cursor = 0;
|
cursor = 0;
|
||||||
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
|
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
|
||||||
|
|
|
||||||
505
src/codegen/spriteEmitPlanar68k.c
Normal file
505
src/codegen/spriteEmitPlanar68k.c
Normal file
|
|
@ -0,0 +1,505 @@
|
||||||
|
// Planar 68k sprite codegen for Amiga (post-Phase 9, no chunky shadow).
|
||||||
|
//
|
||||||
|
// Emits PIC routines that write directly to the four bitplanes via 4
|
||||||
|
// address-register pointers (a0..a3 = plane[0..3] base + byteOff,
|
||||||
|
// where byteOff = y*40 + x/8 -- the dispatcher pre-computes this).
|
||||||
|
//
|
||||||
|
// Calling convention (cdecl on m68k-amigaos-gcc):
|
||||||
|
// draw(p0, p1, p2, p3):
|
||||||
|
// args at 4(sp), 8(sp), 12(sp), 16(sp) -- one ULONG per plane.
|
||||||
|
// loaded into a0..a3 by the prologue.
|
||||||
|
// save(p0, p1, p2, p3, backup):
|
||||||
|
// 5 args; backup at 20(sp), loaded into a4.
|
||||||
|
// restore(p0, p1, p2, p3, backup):
|
||||||
|
// same as save but reads backup, writes planes.
|
||||||
|
//
|
||||||
|
// Per-byte plane write encoding decisions:
|
||||||
|
// - all-transparent (mask=0): skip the byte entirely
|
||||||
|
// - all-opaque (mask=0xFF): move.b #imm, d16(an) (6 bytes)
|
||||||
|
// - mixed (0<mask<0xFF): move.b d16(an), d0;
|
||||||
|
// andi.b #~mask, d0;
|
||||||
|
// ori.b #imm, d0;
|
||||||
|
// move.b d0, d16(an) (4+6+6+4 = 20 bytes)
|
||||||
|
//
|
||||||
|
// Per row advance: 4 plane pointers each get adda.w #SURFACE_WIDTH/8
|
||||||
|
// = adda.w #40, an (4 bytes encoded each, 16 bytes total per row).
|
||||||
|
// We omit the advance after the last row.
|
||||||
|
//
|
||||||
|
// Shift handling: shifts 0..7 are pre-baked. The dispatcher selects
|
||||||
|
// the variant via x % 8 and pre-computes byteOff = y*40 + (x & ~7)/8
|
||||||
|
// (i.e. round x DOWN to 8-pixel boundary). The variant for shift s
|
||||||
|
// then emits to (widthTiles + 1) plane bytes per row when s != 0
|
||||||
|
// (the rightmost shift bits spill into one extra plane byte) and to
|
||||||
|
// widthTiles plane bytes per row when s == 0.
|
||||||
|
//
|
||||||
|
// The emitter assumes sprite width is a multiple of 8 (= a multiple
|
||||||
|
// of one tile = a multiple of 8 pixels) so plane bytes per row are
|
||||||
|
// integer. JoeyLib sprites are always tile-multiple by API contract.
|
||||||
|
|
||||||
|
#include "joey/sprite.h"
|
||||||
|
#include "joey/surface.h"
|
||||||
|
#include "spriteEmitter.h"
|
||||||
|
#include "spriteInternal.h"
|
||||||
|
|
||||||
|
|
||||||
|
// ----- Constants -----
|
||||||
|
|
||||||
|
#define TILE_PIXELS 8
|
||||||
|
#define TILE_BYTES 32
|
||||||
|
#define TILE_BYTES_PER_ROW 4
|
||||||
|
#define TRANSPARENT_NIBBLE 0
|
||||||
|
#define AMIGA_BITPLANES 4
|
||||||
|
#define AMIGA_BYTES_PER_ROW 40
|
||||||
|
|
||||||
|
|
||||||
|
// ----- Instruction encoding helpers -----
|
||||||
|
|
||||||
|
static uint16_t writeBE16(uint8_t *out, uint16_t value) {
|
||||||
|
out[0] = (uint8_t)((value >> 8) & 0xFFu);
|
||||||
|
out[1] = (uint8_t)(value & 0xFFu);
|
||||||
|
return 2u;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// movea.l <d16,SP>, an -- load arg at SP+disp into An.
|
||||||
|
// Encoding: 0010 nnn 001 010 111 + disp16
|
||||||
|
// = 0x2057 + (n << 9), where n is dst An.
|
||||||
|
// a0: 0x206F, a1: 0x226F, a2: 0x246F, a3: 0x266F, a4: 0x286F.
|
||||||
|
static const uint16_t kMoveaSpToAn[] = {
|
||||||
|
0x206Fu, 0x226Fu, 0x246Fu, 0x266Fu, 0x286Fu, 0x2A6Fu, 0x2C6Fu, 0x2E6Fu
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// adda.w #imm, an -- adds 16-bit signed imm to An (sign-extended).
|
||||||
|
// Encoding: 1101 nnn 011 111 100 + imm
|
||||||
|
// = 0xD0FC + (n << 9).
|
||||||
|
static const uint16_t kAddaWImmToAn[] = {
|
||||||
|
0xD0FCu, 0xD2FCu, 0xD4FCu, 0xD6FCu, 0xD8FCu, 0xDAFCu, 0xDCFCu, 0xDEFCu
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// ANDI.B #imm, D0 -- 4 bytes (opcode word + imm word, byte in low half).
|
||||||
|
// Opcode: 0000 0010 00 000 000 (size=byte, mode=Dn, reg=D0)
|
||||||
|
#define ANDI_B_IMM_D0 0x0200u
|
||||||
|
|
||||||
|
// ORI.B #imm, D0 -- 4 bytes (opcode word + imm word, byte in low half).
|
||||||
|
// Opcode: 0000 0000 00 000 000
|
||||||
|
#define ORI_B_IMM_D0 0x0000u
|
||||||
|
|
||||||
|
|
||||||
|
// MOVE.B d16(An), D0 -- 4 bytes (opcode + disp).
|
||||||
|
// Encoding: 0001 000 000 mode reg
|
||||||
|
// = size=01 (byte), dst reg=000 (D0), dst mode=000 (Dn),
|
||||||
|
// src mode=101 (d16,An), src reg=An.
|
||||||
|
// = 0001000 000 101 nnn = 0x1028 + An.
|
||||||
|
static const uint16_t kMoveBD16AnToD0[] = {
|
||||||
|
0x1028u, 0x1029u, 0x102Au, 0x102Bu
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// MOVE.B D0, d16(An) -- 4 bytes (opcode + disp).
|
||||||
|
// Encoding: 0001 nnn 101 000 000 = 0x1140 + (An << 9).
|
||||||
|
static const uint16_t kMoveBD0ToD16An[] = {
|
||||||
|
0x1140u, 0x1340u, 0x1540u, 0x1740u
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// MOVE.B #imm, d16(An) -- 6 bytes (opcode + imm + disp).
|
||||||
|
// Encoding: 0001 nnn 101 111 100 = 0x117C + (An << 9).
|
||||||
|
// (Was 0x113C earlier -- that's mode=100=predec; mode=101=d16(An)
|
||||||
|
// is the bit difference. Predec emits a 4-byte instruction with no
|
||||||
|
// disp word, so the byte stream went out of sync and every
|
||||||
|
// subsequent instruction decoded into garbage.)
|
||||||
|
static const uint16_t kMoveBImmToD16An[] = {
|
||||||
|
0x117Cu, 0x137Cu, 0x157Cu, 0x177Cu
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// MOVE.B (a4)+, d16(An) -- 4 bytes (opcode + disp). -- used by save/restore (backup in a4)
|
||||||
|
// Encoding: 0001 nnn 101 011 100 = 0x115C + (An << 9).
|
||||||
|
static const uint16_t kMoveBA4PostincToD16An[] = {
|
||||||
|
0x115Cu, 0x135Cu, 0x155Cu, 0x175Cu
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// MOVE.B d16(An), (a4)+ -- 4 bytes (opcode + disp). -- used by save (planes -> backup)
|
||||||
|
// Encoding: 1001 100 011 mode reg
|
||||||
|
// Wait, MOVE.B src,(a4)+ : dst mode = 011 (an+), dst reg = 100 (A4),
|
||||||
|
// so dst reg=100, dst mode=011 -> opcode high = 0001 100 011 ...
|
||||||
|
// = 0001100011 mode reg = 0x18C0..
|
||||||
|
// 0001 100 011 101 nnn = 0x18E8 + An.
|
||||||
|
static const uint16_t kMoveBD16AnToA4Postinc[] = {
|
||||||
|
0x18E8u, 0x18E9u, 0x18EAu, 0x18EBu
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// MOVEM.L reglist, -(SP) -- 4 bytes (opcode + reglist mask).
|
||||||
|
// Opcode 0x48E7. Predec mask is REVERSED vs all other modes:
|
||||||
|
// bit 15 = D0, ..., bit 8 = D7, bit 7 = A0, bit 6 = A1, bit 5 = A2,
|
||||||
|
// bit 4 = A3, bit 3 = A4, bit 2 = A5, bit 1 = A6, bit 0 = A7.
|
||||||
|
#define MOVEM_L_PUSH_OPCODE 0x48E7u
|
||||||
|
#define MOVEM_L_MASK_A2_A3 0x0030u /* bits 5,4 = A2,A3 (predec order) */
|
||||||
|
#define MOVEM_L_MASK_A2_A3_A4 0x0038u /* bits 5,4,3 = A2,A3,A4 */
|
||||||
|
|
||||||
|
// MOVEM.L (SP)+, reglist -- 4 bytes (opcode + reglist mask).
|
||||||
|
// Opcode 0x4CDF. Postinc mask follows the standard layout:
|
||||||
|
// bit 0 = D0, ..., bit 7 = D7, bit 8 = A0, ..., bit 15 = A7.
|
||||||
|
#define MOVEM_L_POP_OPCODE 0x4CDFu
|
||||||
|
#define MOVEM_L_MASK_POP_A2_A3 0x0C00u /* bits 11,10 = A3,A2 */
|
||||||
|
#define MOVEM_L_MASK_POP_A2_A3_A4 0x1C00u /* bits 12,11,10 = A4,A3,A2 */
|
||||||
|
|
||||||
|
// RTS opcode.
|
||||||
|
#define OPCODE_RTS 0x4E75u
|
||||||
|
|
||||||
|
|
||||||
|
// ----- Emit helpers -----
|
||||||
|
|
||||||
|
// For shift 0 (byte-aligned x), the sprite's chunky tile data converts
|
||||||
|
// directly to plane bytes without any sub-byte shifting. For each
|
||||||
|
// (row, col-byte, plane) we extract the 8 plane bits from 4 chunky
|
||||||
|
// bytes (= 8 pixels) and produce one plane byte; we also produce a
|
||||||
|
// mask byte indicating which pixel positions are non-transparent
|
||||||
|
// (any plane bit != 0 in the source means non-transparent if
|
||||||
|
// transparent index is 0, the JoeyLib convention).
|
||||||
|
//
|
||||||
|
// Sprite layout: tileData = wTiles x hTiles tiles, each tile = 8 rows
|
||||||
|
// x 4 chunky bytes (32 bytes). Tiles laid out row-major within the
|
||||||
|
// sprite. For plane-byte column `c` of row `r`:
|
||||||
|
// tileX = c (since each plane byte covers exactly one tile column)
|
||||||
|
// tileY = r / 8
|
||||||
|
// inTileY = r % 8
|
||||||
|
// chunky bytes = tileData + (tileY*wTiles + tileX)*32 + inTileY*4 + 0..3
|
||||||
|
//
|
||||||
|
// `col` must be in [0, widthTiles); callers handle out-of-range cols
|
||||||
|
// (used when computing shifted variants that span widthTiles+1 output
|
||||||
|
// bytes per row) by passing a sentinel and checking against widthTiles
|
||||||
|
// before invoking this helper.
|
||||||
|
static void planeByteAndMaskAt(const SpriteT *sp, uint16_t row, uint16_t col,
|
||||||
|
uint8_t *planeBytes /*[4]*/, uint8_t *maskByte)
|
||||||
|
{
|
||||||
|
uint16_t tileX;
|
||||||
|
uint16_t tileY;
|
||||||
|
uint16_t inTileY;
|
||||||
|
const uint8_t *tile;
|
||||||
|
const uint8_t *chunky;
|
||||||
|
uint8_t nibbles[8];
|
||||||
|
uint8_t b0, b1, b2, b3;
|
||||||
|
uint16_t p;
|
||||||
|
uint8_t bitMask;
|
||||||
|
uint8_t pix;
|
||||||
|
|
||||||
|
tileX = col;
|
||||||
|
tileY = row >> 3;
|
||||||
|
inTileY = row & 7u;
|
||||||
|
|
||||||
|
tile = sp->tileData + (uint32_t)((tileY * sp->widthTiles + tileX) * 32u);
|
||||||
|
chunky = tile + inTileY * 4u;
|
||||||
|
|
||||||
|
nibbles[0] = (uint8_t)(chunky[0] >> 4);
|
||||||
|
nibbles[1] = (uint8_t)(chunky[0] & 0x0Fu);
|
||||||
|
nibbles[2] = (uint8_t)(chunky[1] >> 4);
|
||||||
|
nibbles[3] = (uint8_t)(chunky[1] & 0x0Fu);
|
||||||
|
nibbles[4] = (uint8_t)(chunky[2] >> 4);
|
||||||
|
nibbles[5] = (uint8_t)(chunky[2] & 0x0Fu);
|
||||||
|
nibbles[6] = (uint8_t)(chunky[3] >> 4);
|
||||||
|
nibbles[7] = (uint8_t)(chunky[3] & 0x0Fu);
|
||||||
|
|
||||||
|
b0 = 0u; b1 = 0u; b2 = 0u; b3 = 0u;
|
||||||
|
*maskByte = 0u;
|
||||||
|
for (p = 0; p < 8u; p++) {
|
||||||
|
pix = nibbles[p];
|
||||||
|
if (pix == TRANSPARENT_NIBBLE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
bitMask = (uint8_t)(0x80u >> p);
|
||||||
|
*maskByte = (uint8_t)(*maskByte | bitMask);
|
||||||
|
if (pix & 1u) b0 = (uint8_t)(b0 | bitMask);
|
||||||
|
if (pix & 2u) b1 = (uint8_t)(b1 | bitMask);
|
||||||
|
if (pix & 4u) b2 = (uint8_t)(b2 | bitMask);
|
||||||
|
if (pix & 8u) b3 = (uint8_t)(b3 | bitMask);
|
||||||
|
}
|
||||||
|
planeBytes[0] = b0;
|
||||||
|
planeBytes[1] = b1;
|
||||||
|
planeBytes[2] = b2;
|
||||||
|
planeBytes[3] = b3;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Shifted variant: produces 4 plane bytes and 1 mask byte for output
|
||||||
|
// column `outCol` (0..widthTiles inclusive) of row `row` when the
|
||||||
|
// sprite is shifted right by `shift` pixels (1..7). For shift 0,
|
||||||
|
// callers should use planeByteAndMaskAt directly (faster, no spill).
|
||||||
|
//
|
||||||
|
// Each output byte is composed of bits drawn from up to two source
|
||||||
|
// plane bytes:
|
||||||
|
// leftPart = src[outCol-1] << (8 - shift) (high (shift) bits)
|
||||||
|
// rightPart = src[outCol] >> shift (low (8-shift) bits)
|
||||||
|
// with src[-1] and src[widthTiles] treated as 0/transparent. The
|
||||||
|
// resulting plane byte is leftPart | rightPart; the mask byte is the
|
||||||
|
// shifted union of the per-byte source masks.
|
||||||
|
static void planeByteAndMaskShifted(const SpriteT *sp, uint16_t row, uint16_t outCol,
|
||||||
|
uint8_t shift, uint16_t widthTiles,
|
||||||
|
uint8_t *planeBytes /*[4]*/, uint8_t *maskByte)
|
||||||
|
{
|
||||||
|
uint8_t leftPlanes[AMIGA_BITPLANES];
|
||||||
|
uint8_t leftMask;
|
||||||
|
uint8_t rightPlanes[AMIGA_BITPLANES];
|
||||||
|
uint8_t rightMask;
|
||||||
|
uint8_t i;
|
||||||
|
|
||||||
|
leftMask = 0u;
|
||||||
|
rightMask = 0u;
|
||||||
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
||||||
|
leftPlanes[i] = 0u;
|
||||||
|
rightPlanes[i] = 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (outCol > 0u && (uint16_t)(outCol - 1u) < widthTiles) {
|
||||||
|
planeByteAndMaskAt(sp, row, (uint16_t)(outCol - 1u), leftPlanes, &leftMask);
|
||||||
|
}
|
||||||
|
if (outCol < widthTiles) {
|
||||||
|
planeByteAndMaskAt(sp, row, outCol, rightPlanes, &rightMask);
|
||||||
|
}
|
||||||
|
|
||||||
|
*maskByte = (uint8_t)(((leftMask << (8u - shift)) & 0xFFu) |
|
||||||
|
((rightMask >> shift) & 0xFFu));
|
||||||
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
||||||
|
planeBytes[i] = (uint8_t)(((leftPlanes[i] << (8u - shift)) & 0xFFu) |
|
||||||
|
((rightPlanes[i] >> shift) & 0xFFu));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Emit code that merges one plane byte into d16(an) where d16 is the
|
||||||
|
// row-relative byte offset (0 since we re-base each row by adda.w).
|
||||||
|
// The choice of all-opaque vs mixed encoding cuts code size when many
|
||||||
|
// pixels are opaque (typical for sprite interiors).
|
||||||
|
static uint16_t emitMergeByteToD16An(uint8_t *out, uint16_t cursor,
|
||||||
|
uint8_t an, uint8_t disp,
|
||||||
|
uint8_t maskByte, uint8_t srcByte)
|
||||||
|
{
|
||||||
|
if (maskByte == 0u) {
|
||||||
|
return cursor; /* nothing to write */
|
||||||
|
}
|
||||||
|
if (maskByte == 0xFFu) {
|
||||||
|
/* All-opaque shortcut: move.b #src, d16(an). */
|
||||||
|
cursor += writeBE16(out + cursor, kMoveBImmToD16An[an]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)srcByte);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)disp);
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
/* Mixed: load existing, clear mask bits, OR in src, write back. */
|
||||||
|
cursor += writeBE16(out + cursor, kMoveBD16AnToD0[an]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)disp);
|
||||||
|
cursor += writeBE16(out + cursor, ANDI_B_IMM_D0);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)((~maskByte) & 0xFFu));
|
||||||
|
cursor += writeBE16(out + cursor, ORI_B_IMM_D0);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)srcByte);
|
||||||
|
cursor += writeBE16(out + cursor, kMoveBD0ToD16An[an]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)disp);
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ----- Public API -----
|
||||||
|
|
||||||
|
uint16_t spriteEmitDrawPlanar68k(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
|
uint16_t cursor;
|
||||||
|
uint16_t row;
|
||||||
|
uint16_t col;
|
||||||
|
uint16_t heightPx;
|
||||||
|
uint16_t widthTiles;
|
||||||
|
uint16_t bytesPerRow; /* per plane, per row */
|
||||||
|
uint8_t planeBytes[AMIGA_BITPLANES];
|
||||||
|
uint8_t maskByte;
|
||||||
|
uint8_t i;
|
||||||
|
|
||||||
|
if (shift > 7u) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
cursor = 0;
|
||||||
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
|
widthTiles = (uint16_t)sp->widthTiles;
|
||||||
|
bytesPerRow = (uint16_t)(widthTiles + (shift == 0u ? 0u : 1u));
|
||||||
|
|
||||||
|
/* Prologue: m68k cdecl callee-saves a2-a6; we clobber a2 and a3
|
||||||
|
* loading plane pointers, so push them first. After the push, all
|
||||||
|
* stack arg displacements shift by +8 (two longs). */
|
||||||
|
cursor += writeBE16(out + cursor, MOVEM_L_PUSH_OPCODE);
|
||||||
|
cursor += writeBE16(out + cursor, MOVEM_L_MASK_A2_A3);
|
||||||
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
||||||
|
cursor += writeBE16(out + cursor, kMoveaSpToAn[i]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)(8u + 4u + i * 4u));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (row = 0; row < heightPx; row++) {
|
||||||
|
for (col = 0; col < bytesPerRow; col++) {
|
||||||
|
if (shift == 0u) {
|
||||||
|
planeByteAndMaskAt(sp, row, col, planeBytes, &maskByte);
|
||||||
|
} else {
|
||||||
|
planeByteAndMaskShifted(sp, row, col, shift, widthTiles, planeBytes, &maskByte);
|
||||||
|
}
|
||||||
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
||||||
|
cursor = emitMergeByteToD16An(out, cursor, i, (uint8_t)col,
|
||||||
|
maskByte, planeBytes[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (row + 1u < heightPx) {
|
||||||
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
||||||
|
cursor += writeBE16(out + cursor, kAddaWImmToAn[i]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)AMIGA_BYTES_PER_ROW);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Epilogue: restore a2-a3, rts. */
|
||||||
|
cursor += writeBE16(out + cursor, MOVEM_L_POP_OPCODE);
|
||||||
|
cursor += writeBE16(out + cursor, MOVEM_L_MASK_POP_A2_A3);
|
||||||
|
cursor += writeBE16(out + cursor, OPCODE_RTS);
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// SAVE: planes -> backup. backup is one contiguous 4*H*W/8 byte buffer
|
||||||
|
// laid out as 4 plane stripes, matching halSpriteSavePlanes format
|
||||||
|
// (so cross-platform save buffer is interchangeable).
|
||||||
|
//
|
||||||
|
// Per row: for each plane, copy bytesPerRow bytes from d16(an) to
|
||||||
|
// (a4)+. After the row's reads, the planes need to advance by 40,
|
||||||
|
// while a4 advances naturally via post-increment.
|
||||||
|
//
|
||||||
|
// Plane stripes are sequential in backup. We could either (a) do all
|
||||||
|
// rows of plane 0, then plane 1, etc. (matches halSpriteSavePlanes
|
||||||
|
// layout), or (b) interleave rows of all 4 planes (different layout).
|
||||||
|
// halSpriteSavePlanes does (a) -- 4 separate plane stripes. The
|
||||||
|
// emitted code below matches that layout for compat.
|
||||||
|
uint16_t spriteEmitSavePlanar68k(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
|
uint16_t cursor;
|
||||||
|
uint16_t row;
|
||||||
|
uint16_t col;
|
||||||
|
uint16_t heightPx;
|
||||||
|
uint16_t bytesPerRow;
|
||||||
|
uint8_t i;
|
||||||
|
|
||||||
|
/* Shifts 2..7 reuse shift 1's bytes (identical memcpy). The
|
||||||
|
* spriteCompile post-emit pass aliases their routineOffsets to
|
||||||
|
* slot 1 so this routine is emitted once. */
|
||||||
|
if (shift > 1u) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
cursor = 0;
|
||||||
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
|
bytesPerRow = (uint16_t)(sp->widthTiles + (shift == 0u ? 0u : 1u));
|
||||||
|
|
||||||
|
/* Prologue: callee-save a2/a3/a4 (m68k cdecl), then load 4 plane
|
||||||
|
* pointers + backup pointer. After the push, all stack arg disps
|
||||||
|
* shift by +12 (three longs). */
|
||||||
|
cursor += writeBE16(out + cursor, MOVEM_L_PUSH_OPCODE);
|
||||||
|
cursor += writeBE16(out + cursor, MOVEM_L_MASK_A2_A3_A4);
|
||||||
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
||||||
|
cursor += writeBE16(out + cursor, kMoveaSpToAn[i]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)(12u + 4u + i * 4u));
|
||||||
|
}
|
||||||
|
/* a4 = backup. */
|
||||||
|
cursor += writeBE16(out + cursor, kMoveaSpToAn[4]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)(12u + 4u + 4u * 4u));
|
||||||
|
|
||||||
|
/* Plane-major: for each plane, walk all rows. After this routine,
|
||||||
|
* each An has advanced by H*40 (one frame full); we don't need to
|
||||||
|
* unwind because the function returns. We DO need to reset An
|
||||||
|
* back to start before walking the NEXT plane though.
|
||||||
|
*
|
||||||
|
* Simpler alternative: row-major (interleaved). Per row, copy
|
||||||
|
* bytesPerRow bytes from each plane to (a4)+, then advance all
|
||||||
|
* 4 planes by 40. Net: a4 advances by 4*H*bytesPerRow; planes
|
||||||
|
* advance by H*40. Backup layout becomes interleaved (plane0_row0,
|
||||||
|
* plane1_row0, plane2_row0, plane3_row0, plane0_row1, ...).
|
||||||
|
*
|
||||||
|
* That doesn't match halSpriteSavePlanes' plane-major layout. Need
|
||||||
|
* to either (a) match it -- emit per-plane outer loop with a4
|
||||||
|
* stride between planes -- or (b) change halSpriteSavePlanes to
|
||||||
|
* interleaved. Picking (b) is simpler in emitted code, but ALSO
|
||||||
|
* requires updating halSpriteRestorePlanes and halSpriteRestoreUnder
|
||||||
|
* fallback math.
|
||||||
|
*
|
||||||
|
* For now: use plane-major matching halSpriteSavePlanes. Per
|
||||||
|
* plane: walk rows, copy bytes from d16(an) to (a4)+, advance an
|
||||||
|
* by 40 after each row except the last; reset an back to start
|
||||||
|
* before next plane. */
|
||||||
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
||||||
|
for (row = 0; row < heightPx; row++) {
|
||||||
|
for (col = 0; col < bytesPerRow; col++) {
|
||||||
|
cursor += writeBE16(out + cursor, kMoveBD16AnToA4Postinc[i]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)col);
|
||||||
|
}
|
||||||
|
if (row + 1u < heightPx) {
|
||||||
|
cursor += writeBE16(out + cursor, kAddaWImmToAn[i]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)AMIGA_BYTES_PER_ROW);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Reset An back to the plane base for next iteration. The
|
||||||
|
* total advance was (heightPx - 1) * 40. Subtract that. */
|
||||||
|
if (i + 1u < AMIGA_BITPLANES) {
|
||||||
|
cursor += writeBE16(out + cursor, kAddaWImmToAn[i]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)(0u - ((heightPx - 1u) * AMIGA_BYTES_PER_ROW)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cursor += writeBE16(out + cursor, MOVEM_L_POP_OPCODE);
|
||||||
|
cursor += writeBE16(out + cursor, MOVEM_L_MASK_POP_A2_A3_A4);
|
||||||
|
cursor += writeBE16(out + cursor, OPCODE_RTS);
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// RESTORE: backup -> planes. Mirror of save. Uses MOVE.B (a4)+, d16(an).
|
||||||
|
uint16_t spriteEmitRestorePlanar68k(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
|
uint16_t cursor;
|
||||||
|
uint16_t row;
|
||||||
|
uint16_t col;
|
||||||
|
uint16_t heightPx;
|
||||||
|
uint16_t bytesPerRow;
|
||||||
|
uint8_t i;
|
||||||
|
|
||||||
|
if (shift > 1u) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
|
cursor = 0;
|
||||||
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
|
bytesPerRow = (uint16_t)(sp->widthTiles + (shift == 0u ? 0u : 1u));
|
||||||
|
|
||||||
|
/* Callee-save a2/a3/a4; arg disps shift by +12. */
|
||||||
|
cursor += writeBE16(out + cursor, MOVEM_L_PUSH_OPCODE);
|
||||||
|
cursor += writeBE16(out + cursor, MOVEM_L_MASK_A2_A3_A4);
|
||||||
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
||||||
|
cursor += writeBE16(out + cursor, kMoveaSpToAn[i]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)(12u + 4u + i * 4u));
|
||||||
|
}
|
||||||
|
cursor += writeBE16(out + cursor, kMoveaSpToAn[4]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)(12u + 4u + 4u * 4u));
|
||||||
|
|
||||||
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
||||||
|
for (row = 0; row < heightPx; row++) {
|
||||||
|
for (col = 0; col < bytesPerRow; col++) {
|
||||||
|
cursor += writeBE16(out + cursor, kMoveBA4PostincToD16An[i]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)col);
|
||||||
|
}
|
||||||
|
if (row + 1u < heightPx) {
|
||||||
|
cursor += writeBE16(out + cursor, kAddaWImmToAn[i]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)AMIGA_BYTES_PER_ROW);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (i + 1u < AMIGA_BITPLANES) {
|
||||||
|
cursor += writeBE16(out + cursor, kAddaWImmToAn[i]);
|
||||||
|
cursor += writeBE16(out + cursor, (uint16_t)(0u - ((heightPx - 1u) * AMIGA_BYTES_PER_ROW)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
cursor += writeBE16(out + cursor, MOVEM_L_POP_OPCODE);
|
||||||
|
cursor += writeBE16(out + cursor, MOVEM_L_MASK_POP_A2_A3_A4);
|
||||||
|
cursor += writeBE16(out + cursor, OPCODE_RTS);
|
||||||
|
return cursor;
|
||||||
|
}
|
||||||
|
|
@ -200,6 +200,10 @@ uint16_t spriteEmitDrawX86(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
uint8_t v3;
|
uint8_t v3;
|
||||||
uint8_t m;
|
uint8_t m;
|
||||||
|
|
||||||
|
if (shift > 1u) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
cursor = 0;
|
cursor = 0;
|
||||||
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
|
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
|
||||||
|
|
@ -313,6 +317,10 @@ uint16_t spriteEmitRestoreX86(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
uint16_t heightPx;
|
uint16_t heightPx;
|
||||||
uint16_t copyBytes;
|
uint16_t copyBytes;
|
||||||
|
|
||||||
|
if (shift > 1u) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
cursor = 0;
|
cursor = 0;
|
||||||
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
copyBytes = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW + (shift == 1u ? 1u : 0u));
|
copyBytes = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW + (shift == 1u ? 1u : 0u));
|
||||||
|
|
@ -339,6 +347,10 @@ uint16_t spriteEmitSaveX86(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
uint16_t heightPx;
|
uint16_t heightPx;
|
||||||
uint16_t copyBytes;
|
uint16_t copyBytes;
|
||||||
|
|
||||||
|
if (shift > 1u) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
|
||||||
cursor = 0;
|
cursor = 0;
|
||||||
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
copyBytes = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW + (shift == 1u ? 1u : 0u));
|
copyBytes = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW + (shift == 1u ? 1u : 0u));
|
||||||
|
|
|
||||||
|
|
@ -42,4 +42,19 @@ uint16_t spriteEmitRestoreX86 (uint8_t *out, const SpriteT *sp, uint8_t shift);
|
||||||
uint16_t spriteEmitSave68k (uint8_t *out, const SpriteT *sp, uint8_t shift);
|
uint16_t spriteEmitSave68k (uint8_t *out, const SpriteT *sp, uint8_t shift);
|
||||||
uint16_t spriteEmitRestore68k (uint8_t *out, const SpriteT *sp, uint8_t shift);
|
uint16_t spriteEmitRestore68k (uint8_t *out, const SpriteT *sp, uint8_t shift);
|
||||||
|
|
||||||
|
// Planar 68k emitters (Amiga). Distinct from the chunky 68k emitters
|
||||||
|
// above because the destination addressing is across 4 separate
|
||||||
|
// bitplane buffers, not a single packed-pixel surface. Calling
|
||||||
|
// convention for the emitted bytes (cdecl):
|
||||||
|
// void draw (uint8_t *p0, uint8_t *p1, uint8_t *p2, uint8_t *p3);
|
||||||
|
// void save (uint8_t *p0, uint8_t *p1, uint8_t *p2, uint8_t *p3, uint8_t *backup);
|
||||||
|
// void restore (uint8_t *p0, uint8_t *p1, uint8_t *p2, uint8_t *p3, const uint8_t *backup);
|
||||||
|
// Each pi is plane_base + byteOff (= y*40 + x/8 already added by the
|
||||||
|
// dispatcher). Returns 0 for shifts not yet implemented (today only
|
||||||
|
// shift 0 == byte-aligned x is emitted; shifts 1..7 fall back to the
|
||||||
|
// cross-platform interpreter).
|
||||||
|
uint16_t spriteEmitDrawPlanar68k (uint8_t *out, const SpriteT *sp, uint8_t shift);
|
||||||
|
uint16_t spriteEmitSavePlanar68k (uint8_t *out, const SpriteT *sp, uint8_t shift);
|
||||||
|
uint16_t spriteEmitRestorePlanar68k (uint8_t *out, const SpriteT *sp, uint8_t shift);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,18 @@
|
||||||
// Cross-platform "where did it hang?" logger. Each call opens
|
// Cross-platform "where did it hang?" logger. Holds joeylog.txt open
|
||||||
// joeylog.txt, appends a line, fflushes, closes. Slow but durable
|
// across calls; libc's stdio buffer absorbs writes (~4 KB) and the
|
||||||
// -- the last line in the file is guaranteed to be on disk before
|
// final fclose at program exit (via atexit) gets the buffer to disk.
|
||||||
// any subsequent operation that might hang.
|
|
||||||
//
|
//
|
||||||
// Build only as needed for diagnostics; remove the calls when the
|
// Earlier rev opened+closed per call for crash durability ("last line
|
||||||
// bug is fixed. The hang on ST kept us looking at the wrong layer
|
// guaranteed on disk if we hang"); that cost ~1 second per call
|
||||||
// without this kind of trace.
|
// through GoldenGate's ProDOS FST emulation -- a 50-line UBER run
|
||||||
|
// burned ~5 minutes in IO. Even per-line fflush is too expensive
|
||||||
|
// because every fflush forces an FST WRITE, and host-OS file IO time
|
||||||
|
// isn't tracked by the IIgs VBL counter so wall-time logs underreport.
|
||||||
|
//
|
||||||
|
// Tradeoff: if the program crashes mid-run, buffered log lines may
|
||||||
|
// not reach disk. For UBER and similar batch demos that's acceptable;
|
||||||
|
// for hang-debugging where durability matters, call joeyLogFlush()
|
||||||
|
// at the suspected hang points.
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
|
|
@ -15,6 +22,27 @@
|
||||||
|
|
||||||
|
|
||||||
static const char *kLogPath = "joeylog.txt";
|
static const char *kLogPath = "joeylog.txt";
|
||||||
|
static FILE *gLogFp = NULL;
|
||||||
|
/* 16 KB is enough for UBER's full log (~5 KB) plus generous headroom,
|
||||||
|
* so the file never auto-flushes mid-run. ORCA-C / libnix default
|
||||||
|
* buffers are only ~512 bytes; with that, a 50-line log triggers ~10
|
||||||
|
* ProDOS / AmigaDOS WRITEs through the host FST, each of which is
|
||||||
|
* untracked-host-time (seconds). Buffer the whole thing in memory and
|
||||||
|
* let the atexit fclose flush once. */
|
||||||
|
#define JOEY_LOG_BUF_BYTES 16384
|
||||||
|
static char gLogBuf[JOEY_LOG_BUF_BYTES];
|
||||||
|
|
||||||
|
|
||||||
|
/* Lazy-open. Returns NULL if the open failed (silently disable). */
|
||||||
|
static FILE *logFile(void) {
|
||||||
|
if (gLogFp == NULL) {
|
||||||
|
gLogFp = fopen(kLogPath, "a");
|
||||||
|
if (gLogFp != NULL) {
|
||||||
|
(void)setvbuf(gLogFp, gLogBuf, _IOFBF, sizeof(gLogBuf));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return gLogFp;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void joeyLog(const char *msg) {
|
void joeyLog(const char *msg) {
|
||||||
|
|
@ -22,13 +50,12 @@ void joeyLog(const char *msg) {
|
||||||
if (msg == NULL) {
|
if (msg == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fp = fopen(kLogPath, "a");
|
fp = logFile();
|
||||||
if (fp == NULL) {
|
if (fp == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fputs(msg, fp);
|
fputs(msg, fp);
|
||||||
fputc('\n', fp);
|
fputc('\n', fp);
|
||||||
fclose(fp);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -38,7 +65,7 @@ void joeyLogF(const char *fmt, ...) {
|
||||||
if (fmt == NULL) {
|
if (fmt == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
fp = fopen(kLogPath, "a");
|
fp = logFile();
|
||||||
if (fp == NULL) {
|
if (fp == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -46,14 +73,27 @@ void joeyLogF(const char *fmt, ...) {
|
||||||
vfprintf(fp, fmt, args);
|
vfprintf(fp, fmt, args);
|
||||||
va_end(args);
|
va_end(args);
|
||||||
fputc('\n', fp);
|
fputc('\n', fp);
|
||||||
fclose(fp);
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void joeyLogFlush(void) {
|
||||||
|
if (gLogFp != NULL) {
|
||||||
|
fflush(gLogFp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void joeyLogReset(void) {
|
void joeyLogReset(void) {
|
||||||
FILE *fp;
|
if (gLogFp != NULL) {
|
||||||
fp = fopen(kLogPath, "w");
|
fclose(gLogFp);
|
||||||
if (fp != NULL) {
|
gLogFp = NULL;
|
||||||
fclose(fp);
|
}
|
||||||
|
/* Truncate by opening for write then closing; subsequent
|
||||||
|
* joeyLog* will reopen for append. */
|
||||||
|
{
|
||||||
|
FILE *fp = fopen(kLogPath, "w");
|
||||||
|
if (fp != NULL) {
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
100
src/core/draw.c
100
src/core/draw.c
|
|
@ -186,13 +186,17 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Highest-tier asm fast path: seed-test + walk-left + walk-right
|
/* Phase 9: planar ports have NULL s->pixels and the asm fast
|
||||||
// + 1-row fill + scan-above + scan-below + push, all in one
|
* paths take a chunky-row pointer. Skip them on planar; the C
|
||||||
// cross-segment call. The asm caches row addr / match decoder
|
* fallback below uses halSamplePixel which works on both
|
||||||
// across every sub-operation. C just pops and dispatches; this
|
* storage layouts. */
|
||||||
// path completes the entire per-seed work and computes the row
|
if (s->pixels != NULL) {
|
||||||
// address itself, so we don't pay y*160 in C unless we fall back.
|
// Highest-tier asm fast path: seed-test + walk-left + walk-right
|
||||||
{
|
// + 1-row fill + scan-above + scan-below + push, all in one
|
||||||
|
// cross-segment call. The asm caches row addr / match decoder
|
||||||
|
// across every sub-operation. C just pops and dispatches; this
|
||||||
|
// path completes the entire per-seed work and computes the row
|
||||||
|
// address itself, so we don't pay y*160 in C unless we fall back.
|
||||||
bool seedMatched;
|
bool seedMatched;
|
||||||
if (halFastFloodWalkAndScans(s->pixels, x, y,
|
if (halFastFloodWalkAndScans(s->pixels, x, y,
|
||||||
matchColor, newNibble, matchEqual,
|
matchColor, newNibble, matchEqual,
|
||||||
|
|
@ -203,22 +207,27 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback path needs row; compute it here so the asm path
|
/* Fallback path: compute row only if chunky; halFastFloodWalk
|
||||||
// above doesn't pay for an unused y*160 multiply on every iter.
|
* needs it but isn't implemented on Amiga. */
|
||||||
row = &s->pixels[SURFACE_ROW_OFFSET(y)];
|
row = (s->pixels != NULL) ? &s->pixels[SURFACE_ROW_OFFSET(y)] : NULL;
|
||||||
|
|
||||||
// Tier-2 asm fast path: combined seed test + walk-left +
|
// Tier-2 asm fast path: combined seed test + walk-left +
|
||||||
// walk-right in one cross-segment call. Falls back to the
|
// walk-right in one cross-segment call. Falls back to the
|
||||||
// pure-C walks below on ports without an asm implementation.
|
// pure-C walks below on ports without an asm implementation.
|
||||||
{
|
{
|
||||||
bool seedMatched;
|
bool seedMatched;
|
||||||
if (halFastFloodWalk(row, x, matchColor, newNibble, matchEqual,
|
if (row != NULL && halFastFloodWalk(row, x, matchColor, newNibble, matchEqual,
|
||||||
&seedMatched, &leftX, &rightX)) {
|
&seedMatched, &leftX, &rightX)) {
|
||||||
|
if (!seedMatched) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
} else if (halFloodWalkPlanes(s, x, y, matchColor, newNibble, matchEqual,
|
||||||
|
&seedMatched, &leftX, &rightX)) {
|
||||||
if (!seedMatched) {
|
if (!seedMatched) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
pix = srcPixel(row, x);
|
pix = halSamplePixel(s, x, y);
|
||||||
pixMatch = (pix == matchColor);
|
pixMatch = (pix == matchColor);
|
||||||
if (matchEqual) {
|
if (matchEqual) {
|
||||||
if (!pixMatch) {
|
if (!pixMatch) {
|
||||||
|
|
@ -233,7 +242,7 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
||||||
// Walk left to find the start of the matching run.
|
// Walk left to find the start of the matching run.
|
||||||
leftX = x;
|
leftX = x;
|
||||||
while (leftX > 0) {
|
while (leftX > 0) {
|
||||||
pix = srcPixel(row, (int16_t)(leftX - 1));
|
pix = halSamplePixel(s, (int16_t)(leftX - 1), y);
|
||||||
pixMatch = (pix == matchColor);
|
pixMatch = (pix == matchColor);
|
||||||
if (matchEqual ? !pixMatch : (pixMatch || pix == newNibble)) {
|
if (matchEqual ? !pixMatch : (pixMatch || pix == newNibble)) {
|
||||||
break;
|
break;
|
||||||
|
|
@ -244,7 +253,7 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
||||||
// Walk right to find the end.
|
// Walk right to find the end.
|
||||||
rightX = x;
|
rightX = x;
|
||||||
while (rightX < SURFACE_WIDTH - 1) {
|
while (rightX < SURFACE_WIDTH - 1) {
|
||||||
pix = srcPixel(row, (int16_t)(rightX + 1));
|
pix = halSamplePixel(s, (int16_t)(rightX + 1), y);
|
||||||
pixMatch = (pix == matchColor);
|
pixMatch = (pix == matchColor);
|
||||||
if (matchEqual ? !pixMatch : (pixMatch || pix == newNibble)) {
|
if (matchEqual ? !pixMatch : (pixMatch || pix == newNibble)) {
|
||||||
break;
|
break;
|
||||||
|
|
@ -256,12 +265,18 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
||||||
|
|
||||||
// Fill the span. Bypass fillRect's clipping wrapper: walk-out
|
// Fill the span. Bypass fillRect's clipping wrapper: walk-out
|
||||||
// already guaranteed leftX/rightX are in [0..SURFACE_WIDTH-1]
|
// already guaranteed leftX/rightX are in [0..SURFACE_WIDTH-1]
|
||||||
// and the seed-pop bounds check did the same for y.
|
// and the seed-pop bounds check did the same for y. We DO
|
||||||
|
// need the planar dual-write (which fillRect's wrapper would
|
||||||
|
// call), so invoke halFillRectPlanes explicitly after the
|
||||||
|
// chunky span fill -- otherwise PLANAR_PRESENT builds (and,
|
||||||
|
// post-Phase-9, every build) display flood-filled regions
|
||||||
|
// as the unfilled background.
|
||||||
{
|
{
|
||||||
int16_t spanW = (int16_t)(rightX - leftX + 1);
|
int16_t spanW = (int16_t)(rightX - leftX + 1);
|
||||||
if (!halFastFillRect(s, leftX, y, (uint16_t)spanW, 1, newNibble)) {
|
if (!halFastFillRect(s, leftX, y, (uint16_t)spanW, 1, newNibble)) {
|
||||||
fillRectClipped(s, leftX, y, spanW, 1, newNibble);
|
fillRectClipped(s, leftX, y, spanW, 1, newNibble);
|
||||||
}
|
}
|
||||||
|
halFillRectPlanes(s, leftX, y, (uint16_t)spanW, 1, newNibble);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Scan rows above and below for run boundaries. The hot
|
// Scan rows above and below for run boundaries. The hot
|
||||||
|
|
@ -291,19 +306,26 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
||||||
}
|
}
|
||||||
scanY = (int16_t)(y + 1);
|
scanY = (int16_t)(y + 1);
|
||||||
}
|
}
|
||||||
scanRow = &s->pixels[SURFACE_ROW_OFFSET(scanY)];
|
scanRow = (s->pixels != NULL) ? &s->pixels[SURFACE_ROW_OFFSET(scanY)] : NULL;
|
||||||
// Prefer the combined scan+push asm path (one call per
|
// Prefer the combined scan+push asm path (one call per
|
||||||
// scan, no markBuf and no per-pixel C edge walk).
|
// scan, no markBuf and no per-pixel C edge walk). Skip
|
||||||
if (!halFastFloodScanAndPush(scanRow, leftX, rightX,
|
// the asm tiers if we don't have a chunky row pointer
|
||||||
|
// (Phase 9 planar ports).
|
||||||
|
if (scanRow == NULL ||
|
||||||
|
!halFastFloodScanAndPush(scanRow, leftX, rightX,
|
||||||
matchColor, newNibble, matchEqual,
|
matchColor, newNibble, matchEqual,
|
||||||
scanY, stackX, stackY,
|
scanY, stackX, stackY,
|
||||||
&sp, FLOOD_STACK_SIZE)) {
|
&sp, FLOOD_STACK_SIZE)) {
|
||||||
if (!halFastFloodScanRow(scanRow, leftX, rightX,
|
if ((scanRow == NULL ||
|
||||||
matchColor, newNibble, matchEqual,
|
!halFastFloodScanRow(scanRow, leftX, rightX,
|
||||||
floodMarkBuf)) {
|
matchColor, newNibble, matchEqual,
|
||||||
|
floodMarkBuf)) &&
|
||||||
|
!halFloodScanRowPlanes(s, leftX, rightX, scanY,
|
||||||
|
matchColor, newNibble, matchEqual,
|
||||||
|
floodMarkBuf)) {
|
||||||
// C fallback: fill markBuf the slow way.
|
// C fallback: fill markBuf the slow way.
|
||||||
for (i = 0; i < spanLen; i++) {
|
for (i = 0; i < spanLen; i++) {
|
||||||
pix = srcPixel(scanRow, (int16_t)(leftX + i));
|
pix = halSamplePixel(s, (int16_t)(leftX + i), scanY);
|
||||||
pixMatch = (pix == matchColor);
|
pixMatch = (pix == matchColor);
|
||||||
floodMarkBuf[i] = (uint8_t)(matchEqual
|
floodMarkBuf[i] = (uint8_t)(matchEqual
|
||||||
? (pixMatch ? 1 : 0)
|
? (pixMatch ? 1 : 0)
|
||||||
|
|
@ -621,12 +643,12 @@ void fillRect(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t
|
||||||
if (!halFastFillRect(s, sx, sy, (uint16_t)sw, (uint16_t)sh, colorIndex)) {
|
if (!halFastFillRect(s, sx, sy, (uint16_t)sw, (uint16_t)sh, colorIndex)) {
|
||||||
fillRectClipped(s, sx, sy, sw, sh, colorIndex);
|
fillRectClipped(s, sx, sy, sw, sh, colorIndex);
|
||||||
}
|
}
|
||||||
|
halFillRectPlanes(s, sx, sy, (uint16_t)sw, (uint16_t)sh, colorIndex);
|
||||||
surfaceMarkDirtyRect(s, sx, sy, sw, sh);
|
surfaceMarkDirtyRect(s, sx, sy, sw, sh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void floodFill(SurfaceT *s, int16_t x, int16_t y, uint8_t newColor) {
|
void floodFill(SurfaceT *s, int16_t x, int16_t y, uint8_t newColor) {
|
||||||
uint8_t *row;
|
|
||||||
uint8_t seedColor;
|
uint8_t seedColor;
|
||||||
|
|
||||||
if (s == NULL) {
|
if (s == NULL) {
|
||||||
|
|
@ -635,8 +657,9 @@ void floodFill(SurfaceT *s, int16_t x, int16_t y, uint8_t newColor) {
|
||||||
if (x < 0 || x >= SURFACE_WIDTH || y < 0 || y >= SURFACE_HEIGHT) {
|
if (x < 0 || x >= SURFACE_WIDTH || y < 0 || y >= SURFACE_HEIGHT) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
row = &s->pixels[SURFACE_ROW_OFFSET(y)];
|
/* halSamplePixel reads from whichever storage the port uses --
|
||||||
seedColor = srcPixel(row, x);
|
* works on both chunky (s->pixels) and planar (s->portData) ports. */
|
||||||
|
seedColor = halSamplePixel(s, x, y);
|
||||||
if ((seedColor & 0x0F) == (newColor & 0x0F)) {
|
if ((seedColor & 0x0F) == (newColor & 0x0F)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -645,7 +668,6 @@ void floodFill(SurfaceT *s, int16_t x, int16_t y, uint8_t newColor) {
|
||||||
|
|
||||||
|
|
||||||
void floodFillBounded(SurfaceT *s, int16_t x, int16_t y, uint8_t newColor, uint8_t boundaryColor) {
|
void floodFillBounded(SurfaceT *s, int16_t x, int16_t y, uint8_t newColor, uint8_t boundaryColor) {
|
||||||
uint8_t *row;
|
|
||||||
uint8_t pix;
|
uint8_t pix;
|
||||||
|
|
||||||
if (s == NULL) {
|
if (s == NULL) {
|
||||||
|
|
@ -654,8 +676,7 @@ void floodFillBounded(SurfaceT *s, int16_t x, int16_t y, uint8_t newColor, uint8
|
||||||
if (x < 0 || x >= SURFACE_WIDTH || y < 0 || y >= SURFACE_HEIGHT) {
|
if (x < 0 || x >= SURFACE_WIDTH || y < 0 || y >= SURFACE_HEIGHT) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
row = &s->pixels[SURFACE_ROW_OFFSET(y)];
|
pix = halSamplePixel(s, x, y);
|
||||||
pix = srcPixel(row, x);
|
|
||||||
// Starting on a boundary pixel or already-filled pixel: nothing
|
// Starting on a boundary pixel or already-filled pixel: nothing
|
||||||
// to do.
|
// to do.
|
||||||
if ((pix & 0x0F) == (boundaryColor & 0x0F)) {
|
if ((pix & 0x0F) == (boundaryColor & 0x0F)) {
|
||||||
|
|
@ -669,25 +690,16 @@ void floodFillBounded(SurfaceT *s, int16_t x, int16_t y, uint8_t newColor, uint8
|
||||||
|
|
||||||
|
|
||||||
uint8_t samplePixel(const SurfaceT *s, int16_t x, int16_t y) {
|
uint8_t samplePixel(const SurfaceT *s, int16_t x, int16_t y) {
|
||||||
uint8_t byte;
|
|
||||||
|
|
||||||
if (s == NULL) {
|
if (s == NULL) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
if (x < 0 || x >= SURFACE_WIDTH || y < 0 || y >= SURFACE_HEIGHT) {
|
if (x < 0 || x >= SURFACE_WIDTH || y < 0 || y >= SURFACE_HEIGHT) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
/* halSamplePixel reads from whichever storage the port uses --
|
||||||
/* Cast to uint16_t before shift -- already validated x >= 0,
|
* chunky ports return a nibble extracted from s->pixels; planar
|
||||||
* unsigned semantics match. Avoids ~SSHIFTRIGHT helper. */
|
* ports read 4 plane bits and assemble the nibble. */
|
||||||
byte = s->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)x >> 1)];
|
return halSamplePixel(s, x, y);
|
||||||
if (x & 1) {
|
|
||||||
return (uint8_t)(byte & 0x0F);
|
|
||||||
}
|
|
||||||
/* `byte >> 4` is uint8_t but ORCA-C promotes to int (signed 16-bit)
|
|
||||||
* for the shift, then narrows -- triggers ~SSHIFTRIGHT. The
|
|
||||||
* mask-then-shift sidesteps the promotion path. */
|
|
||||||
return (uint8_t)((byte & 0xF0u) >> 4);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -725,6 +737,8 @@ void surfaceBlit(SurfaceT *dst, const JoeyAssetT *src, int16_t x, int16_t y) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
halBlitRectPlanes(dst, x, y, src->pixels, srcX0, srcY0,
|
||||||
|
copyW, copyH, srcRowBytes, 0xFFFFu);
|
||||||
surfaceMarkDirtyRect(dst, x, y, copyW, copyH);
|
surfaceMarkDirtyRect(dst, x, y, copyW, copyH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -768,6 +782,8 @@ void surfaceBlitMasked(SurfaceT *dst, const JoeyAssetT *src, int16_t x, int16_t
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
halBlitRectPlanes(dst, x, y, src->pixels, srcX0, srcY0,
|
||||||
|
copyW, copyH, srcRowBytes, (uint16_t)transparent);
|
||||||
surfaceMarkDirtyRect(dst, x, y, copyW, copyH);
|
surfaceMarkDirtyRect(dst, x, y, copyW, copyH);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
172
src/core/hal.h
172
src/core/hal.h
|
|
@ -9,8 +9,11 @@
|
||||||
#ifndef JOEYLIB_HAL_H
|
#ifndef JOEYLIB_HAL_H
|
||||||
#define JOEYLIB_HAL_H
|
#define JOEYLIB_HAL_H
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
#include "joey/core.h"
|
#include "joey/core.h"
|
||||||
#include "joey/input.h"
|
#include "joey/input.h"
|
||||||
|
#include "joey/sprite.h"
|
||||||
#include "joey/surface.h"
|
#include "joey/surface.h"
|
||||||
|
|
||||||
// Per-port one-shot initialization. Called from joeyInit after config
|
// Per-port one-shot initialization. Called from joeyInit after config
|
||||||
|
|
@ -27,17 +30,131 @@ void halShutdown(void);
|
||||||
// backs the library-owned stage surface. Ports that have a
|
// backs the library-owned stage surface. Ports that have a
|
||||||
// hardware-friendly pin location for the back buffer (IIgs $01/2000
|
// hardware-friendly pin location for the back buffer (IIgs $01/2000
|
||||||
// with SHR shadow inhibited) return that address here; ports with no
|
// with SHR shadow inhibited) return that address here; ports with no
|
||||||
// such constraint just malloc/free.
|
// such constraint just malloc/free. Planar 68k ports may return NULL
|
||||||
|
// if the surface is planar-only and has no chunky shadow.
|
||||||
uint8_t *halStageAllocPixels(void);
|
uint8_t *halStageAllocPixels(void);
|
||||||
void halStageFreePixels(uint8_t *pixels);
|
void halStageFreePixels(uint8_t *pixels);
|
||||||
|
|
||||||
// Present the entire source surface to the display.
|
// Allocate / release the per-surface portData blob (see SurfaceT in
|
||||||
void halPresent(const SurfaceT *src);
|
// surfaceInternal.h). Chunky ports return NULL from Init -- they keep
|
||||||
|
// portData unused and operate on the chunky `pixels` buffer. Planar
|
||||||
|
// 68k ports allocate a per-surface struct here describing the
|
||||||
|
// bitplane storage (Amiga: 4 separate plane buffers + stride; ST: one
|
||||||
|
// interleaved buffer + stride). Called by surfaceCreate / stageAlloc
|
||||||
|
// after pixels is allocated; freed by surfaceDestroy / stageFree
|
||||||
|
// before pixels is freed. `isStage` lets the port short-circuit for
|
||||||
|
// the stage if its planes are display-owned (e.g. Amiga's BitMap
|
||||||
|
// planes from OpenScreen) rather than allocated per surface.
|
||||||
|
void *halSurfaceAllocPortData(SurfaceT *s, bool isStage);
|
||||||
|
void halSurfaceFreePortData(SurfaceT *s, bool isStage, void *portData);
|
||||||
|
|
||||||
// Present a rectangular region of the source surface. The caller has
|
// Phase 3 planar dual-write: called from cross-platform fillRect AFTER
|
||||||
// already validated and clipped the rect to be fully inside the
|
// the chunky shadow has been written, with the same already-clipped
|
||||||
// surface bounds and to have positive extents.
|
// (x, y, w, h) and the raw color index 0..15. Planar ports update
|
||||||
void halPresentRect(const SurfaceT *src, int16_t x, int16_t y, uint16_t w, uint16_t h);
|
// the bitplanes with the rect's bit pattern (per-plane bit value =
|
||||||
|
// (color >> plane) & 1). Chunky ports (DOS, IIgs) provide a no-op
|
||||||
|
// stub. Called unconditionally so cross-platform code doesn't have
|
||||||
|
// to know the port is planar; the per-port stub is the cheapest
|
||||||
|
// possible thing on chunky ports.
|
||||||
|
void halFillRectPlanes(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t colorIndex);
|
||||||
|
|
||||||
|
// Phase 3 planar dual-write for surfaceCopy: called from cross-platform
|
||||||
|
// surfaceCopy AFTER the chunky pixel buffer is memcpy'd. Planar ports
|
||||||
|
// also memcpy the bitplanes from src to dst so JOEYLIB_PLANAR_PRESENT
|
||||||
|
// builds see correct planes. dst and src are non-NULL and distinct
|
||||||
|
// (caller's no-op guards already passed).
|
||||||
|
void halSurfaceCopyPlanes(SurfaceT *dst, const SurfaceT *src);
|
||||||
|
|
||||||
|
// Phase 5 planar dual-write for tile ops. Called from cross-platform
|
||||||
|
// tile.c AFTER the chunky path completes. (bx, by) are tile-grid
|
||||||
|
// coords (0..39 horiz, 0..24 vert; surface is 40x25 tiles).
|
||||||
|
// transparentIndex for tileCopyMasked: pixel value to skip. tilePaste
|
||||||
|
// reads from a packed 32-byte chunky TileT (4 bytes/row x 8 rows).
|
||||||
|
// All Amiga impls operate on the off-screen shadow planes via
|
||||||
|
// AmigaPlanarT; chunky-port stubs are no-ops. tileSnap is read-only
|
||||||
|
// so has no planar dual-write hook.
|
||||||
|
void halTileFillPlanes(SurfaceT *s, uint8_t bx, uint8_t by, uint8_t colorIndex);
|
||||||
|
void halTileCopyPlanes(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT *src, uint8_t srcBx, uint8_t srcBy);
|
||||||
|
void halTileCopyMaskedPlanes(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT *src, uint8_t srcBx, uint8_t srcBy, uint8_t transparentIndex);
|
||||||
|
void halTilePastePlanes(SurfaceT *dst, uint8_t bx, uint8_t by, const uint8_t *chunkyTile);
|
||||||
|
|
||||||
|
// tileSnap: cross-platform code reads s->pixels chunky bytes into a
|
||||||
|
// 32-byte TileT. On planar ports (s->pixels NULL) the chunky read
|
||||||
|
// crashes -- this hook is the planar derivation: reads bitplane bits
|
||||||
|
// for the tile rect and assembles 32 chunky bytes (4 bytes/row x 8
|
||||||
|
// rows) into chunkyTileOut. Chunky ports (s->pixels valid) implement
|
||||||
|
// this as a no-op since the cross-platform fallback already filled
|
||||||
|
// chunkyTileOut from s->pixels.
|
||||||
|
void halTileSnapPlanes(const SurfaceT *src, uint8_t bx, uint8_t by, uint8_t *chunkyTileOut);
|
||||||
|
|
||||||
|
// Phase 6 planar dual-write for spriteDraw. Called from cross-platform
|
||||||
|
// sprite.c AFTER spriteCompiledDraw or spriteDrawInterpreted has
|
||||||
|
// updated the chunky shadow. (x, y) is the destination top-left in
|
||||||
|
// surface pixels (may be partially off-surface; the hook does its own
|
||||||
|
// clipping). Walks the sprite's chunky tile data and updates dst
|
||||||
|
// surface planes for every non-transparent pixel (nibble != 0).
|
||||||
|
// Save/restore have NO planar dual-write yet -- after spriteSaveUnder
|
||||||
|
// + spriteDraw + spriteRestoreUnder under JOEYLIB_PLANAR_PRESENT, the
|
||||||
|
// planes still show the sprite (chunky restored, planes unchanged).
|
||||||
|
// Workable approach for that needs a parallel plane backup buffer;
|
||||||
|
// deferred until apps actually depend on PLANAR_PRESENT save/restore.
|
||||||
|
void halSpriteDrawPlanes(SurfaceT *s, const SpriteT *sp, int16_t x, int16_t y);
|
||||||
|
|
||||||
|
// Phase 8 planar dual-write for asset blits and full surface loads.
|
||||||
|
// halBlitRectPlanes is called from surfaceBlit / surfaceBlitMasked
|
||||||
|
// AFTER the chunky path. transparent == 0xFFFF means opaque blit; any
|
||||||
|
// other value is a nibble (0..15) to skip. srcBytes is the asset's
|
||||||
|
// raw chunky pixel buffer; srcRowBytes is its stride. (x, y) is the
|
||||||
|
// already-clipped destination top-left in dst surface pixels;
|
||||||
|
// srcX0/srcY0 is where in the asset the visible region starts after
|
||||||
|
// clip; copyW/copyH is the visible region size in pixels.
|
||||||
|
//
|
||||||
|
void halBlitRectPlanes(SurfaceT *dst, int16_t x, int16_t y, const uint8_t *srcBytes, int16_t srcX0, int16_t srcY0, int16_t copyW, int16_t copyH, int16_t srcRowBytes, uint16_t transparent);
|
||||||
|
|
||||||
|
// Phase 9 sprite save/restore plane data. Chunky ports already hold
|
||||||
|
// pixel data in backup->bytes via the cross-platform memcpy. Planar
|
||||||
|
// ports (Amiga) DO have chunky NULL, so backup->bytes is unused by
|
||||||
|
// the chunky path -- we repurpose it to hold per-plane bytes. Layout:
|
||||||
|
// 4 plane stripes of (h * bytesPerPlaneRow) bytes each, where
|
||||||
|
// bytesPerPlaneRow = w/8 (sprite x and w are guaranteed 2-pixel
|
||||||
|
// aligned by spriteSaveUnder; planar requires further 8-pixel
|
||||||
|
// rounding -- see Amiga impl notes). Total bytes:
|
||||||
|
// 4 * h * w/8 = h * w/2 = same as chunky. backup->sizeBytes capacity
|
||||||
|
// works on both ports. Chunky-port impls are no-ops; Amiga writes /
|
||||||
|
// reads plane bytes via AmigaPlanarT.
|
||||||
|
void halSpriteSavePlanes(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t *dstPlaneBytes);
|
||||||
|
void halSpriteRestorePlanes(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, const uint8_t *srcPlaneBytes);
|
||||||
|
|
||||||
|
// Phase 9 reader hooks. Cross-platform code calls these instead of
|
||||||
|
// reading from s->pixels directly so it works regardless of whether
|
||||||
|
// the port stores chunky or planar as the source of truth. Chunky
|
||||||
|
// ports (DOS, IIgs) implement these reading from s->pixels (cheap);
|
||||||
|
// Amiga reads from the bitplanes in AmigaPlanarT. (x, y) bounds are
|
||||||
|
// already validated by the caller.
|
||||||
|
//
|
||||||
|
// halSamplePixel: returns the 0..15 nibble at (x, y).
|
||||||
|
// halSurfaceHash: returns the FNV-style hash of pixel + scb + palette
|
||||||
|
// that surfaceHash currently computes by walking s->pixels. Allows
|
||||||
|
// ports to use their native pixel storage instead.
|
||||||
|
// halSurfaceCopyChunky: cross-platform surfaceCopy used to memcpy
|
||||||
|
// s->pixels src->dst; on planar ports there is no chunky to copy
|
||||||
|
// (planes already covered by halSurfaceCopyPlanes). Chunky ports
|
||||||
|
// do the memcpy here; Amiga is a no-op.
|
||||||
|
// halSurfaceLoadFileChunky / halSurfaceSaveFileChunky wrap fread /
|
||||||
|
// fwrite of the pixel data. Chunky ports stream directly to/from
|
||||||
|
// s->pixels; Amiga uses a scratch buffer + c2p (load) or
|
||||||
|
// plane->chunky derivation (save).
|
||||||
|
uint8_t halSamplePixel(const SurfaceT *s, int16_t x, int16_t y);
|
||||||
|
uint32_t halSurfaceHash(const SurfaceT *s);
|
||||||
|
void halSurfaceCopyChunky(SurfaceT *dst, const SurfaceT *src);
|
||||||
|
bool halSurfaceLoadFileChunky(SurfaceT *dst, FILE *fp);
|
||||||
|
bool halSurfaceSaveFileChunky(const SurfaceT *src, FILE *fp);
|
||||||
|
|
||||||
|
// Present the dirty regions of the source surface to the display.
|
||||||
|
// The cross-platform stagePresent walks the dirty arrays before
|
||||||
|
// calling this; ports may use the dirty arrays themselves to skip
|
||||||
|
// untouched rows.
|
||||||
|
void halPresent(const SurfaceT *src);
|
||||||
|
|
||||||
// Optional: returns a port-specific error message string for the last
|
// Optional: returns a port-specific error message string for the last
|
||||||
// HAL failure, or NULL if none. Ports may return NULL always.
|
// HAL failure, or NULL if none. Ports may return NULL always.
|
||||||
|
|
@ -73,9 +190,23 @@ uint16_t halFrameHz(void);
|
||||||
|
|
||||||
// Audio: per-port engine setup, module + SFX playback, teardown.
|
// Audio: per-port engine setup, module + SFX playback, teardown.
|
||||||
// halAudioInit returns true if the platform has a working engine.
|
// halAudioInit returns true if the platform has a working engine.
|
||||||
// All entry points are safe to call when init failed -- they become
|
// Per-surface chunky pixel allocation. Chunky ports (DOS, IIgs, ST
|
||||||
// no-ops. See joey/audio.h for the public API contract that wraps
|
// while still chunky) allocate SURFACE_PIXELS_SIZE bytes (calloc-
|
||||||
// these.
|
// style, zero-filled). Pure-planar Amiga returns NULL -- there's no
|
||||||
|
// chunky shadow; cross-platform code that previously read s->pixels
|
||||||
|
// goes through halSamplePixel / halSurfaceCopyChunky / etc. instead.
|
||||||
|
// halSurfaceFreePixels mirrors free(); NULL is a valid input on
|
||||||
|
// planar ports.
|
||||||
|
uint8_t *halSurfaceAllocPixels(void);
|
||||||
|
void halSurfaceFreePixels(uint8_t *pixels);
|
||||||
|
|
||||||
|
// Get a pointer to the start of bitplane `planeIdx` (0..3) for surface
|
||||||
|
// `s`. Returns NULL on chunky ports (no planes). On Amiga returns
|
||||||
|
// pd->planes[planeIdx] from the AmigaPlanarT struct in portData.
|
||||||
|
// Used by the planar sprite codegen dispatcher to compute the 4
|
||||||
|
// plane addresses to hand the emitted asm.
|
||||||
|
uint8_t *halSurfacePlanePtr(const SurfaceT *s, uint8_t planeIdx);
|
||||||
|
|
||||||
bool halAudioInit(void);
|
bool halAudioInit(void);
|
||||||
void halAudioShutdown(void);
|
void halAudioShutdown(void);
|
||||||
void halAudioPlayMod(const uint8_t *data, uint32_t length, bool loop);
|
void halAudioPlayMod(const uint8_t *data, uint32_t length, bool loop);
|
||||||
|
|
@ -185,6 +316,21 @@ bool halFastFloodWalkAndScans(uint8_t *pixels, int16_t x, int16_t y,
|
||||||
bool *seedMatched,
|
bool *seedMatched,
|
||||||
int16_t *leftXOut, int16_t *rightXOut);
|
int16_t *leftXOut, int16_t *rightXOut);
|
||||||
|
|
||||||
|
// Planar variants of halFastFloodWalk / halFastFloodScanRow. Take a
|
||||||
|
// SurfaceT* instead of a chunky-row pointer so they work on planar
|
||||||
|
// ports (Amiga post-Phase 9) where s->pixels is NULL. Same semantics;
|
||||||
|
// chunky ports return false (the chunky variants above are faster
|
||||||
|
// when a chunky row is available). Replace the per-pixel
|
||||||
|
// halSamplePixel walk on planar ports.
|
||||||
|
bool halFloodWalkPlanes(const SurfaceT *s, int16_t startX, int16_t y,
|
||||||
|
uint8_t matchColor, uint8_t newColor, bool matchEqual,
|
||||||
|
bool *seedMatched,
|
||||||
|
int16_t *leftXOut, int16_t *rightXOut);
|
||||||
|
|
||||||
|
bool halFloodScanRowPlanes(const SurfaceT *s, int16_t leftX, int16_t rightX, int16_t scanY,
|
||||||
|
uint8_t matchColor, uint8_t newColor, bool matchEqual,
|
||||||
|
uint8_t *markBuf);
|
||||||
|
|
||||||
// surfaceBlit / surfaceBlitMasked rect-copy helper. Caller has done
|
// surfaceBlit / surfaceBlitMasked rect-copy helper. Caller has done
|
||||||
// the clip math: dstRow0 / srcRow0 point at row 0 of the source/dest
|
// the clip math: dstRow0 / srcRow0 point at row 0 of the source/dest
|
||||||
// regions, dstX / srcX are intra-row pixel offsets, copyW/copyH are
|
// regions, dstX / srcX are intra-row pixel offsets, copyW/copyH are
|
||||||
|
|
@ -333,6 +479,12 @@ extern uint16_t gFloodRightX;
|
||||||
#undef halFastFloodScanAndPush
|
#undef halFastFloodScanAndPush
|
||||||
#define halFastFloodScanAndPush(_row, _lx, _rx, _mc, _nc, _me, _sy, _sx, _syA, _sp, _ms) (false)
|
#define halFastFloodScanAndPush(_row, _lx, _rx, _mc, _nc, _me, _sy, _sx, _syA, _sp, _ms) (false)
|
||||||
|
|
||||||
|
// IIgs is chunky; the planar flood hooks are never reachable.
|
||||||
|
#undef halFloodWalkPlanes
|
||||||
|
#define halFloodWalkPlanes(_s, _sx, _y, _mc, _nc, _me, _sm, _lx, _rx) (false)
|
||||||
|
#undef halFloodScanRowPlanes
|
||||||
|
#define halFloodScanRowPlanes(_s, _lx, _rx, _sy, _mc, _nc, _me, _mb) (false)
|
||||||
|
|
||||||
// Tier-1 flood: multi-output. Asm sets gFloodSeedMatch / gFloodLeftX /
|
// Tier-1 flood: multi-output. Asm sets gFloodSeedMatch / gFloodLeftX /
|
||||||
// gFloodRightX; macro reads those into the caller's out-ptrs.
|
// gFloodRightX; macro reads those into the caller's out-ptrs.
|
||||||
#undef halFastFloodWalkAndScans
|
#undef halFastFloodWalkAndScans
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,7 @@
|
||||||
//
|
//
|
||||||
// stagePresent walks the per-row dirty bands set by drawing primitives
|
// stagePresent walks the per-row dirty bands set by drawing primitives
|
||||||
// and asks the port HAL to flip just those rows to the display, then
|
// and asks the port HAL to flip just those rows to the display, then
|
||||||
// resets the dirty state. stagePresentRect bypasses dirty tracking
|
// resets the dirty state.
|
||||||
// entirely and slams a caller-specified rectangle (after clipping).
|
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
|
||||||
|
|
@ -25,48 +24,3 @@ void stagePresent(void) {
|
||||||
halPresent(stage);
|
halPresent(stage);
|
||||||
stageDirtyClearAll();
|
stageDirtyClearAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void stagePresentRect(int16_t x, int16_t y, uint16_t w, uint16_t h) {
|
|
||||||
SurfaceT *stage;
|
|
||||||
int16_t sx;
|
|
||||||
int16_t sy;
|
|
||||||
int16_t sw;
|
|
||||||
int16_t sh;
|
|
||||||
|
|
||||||
stage = stageGet();
|
|
||||||
if (stage == NULL) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
sx = x;
|
|
||||||
sy = y;
|
|
||||||
sw = (int16_t)w;
|
|
||||||
sh = (int16_t)h;
|
|
||||||
|
|
||||||
if (sw <= 0 || sh <= 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (sx < 0) {
|
|
||||||
sw += sx;
|
|
||||||
sx = 0;
|
|
||||||
}
|
|
||||||
if (sy < 0) {
|
|
||||||
sh += sy;
|
|
||||||
sy = 0;
|
|
||||||
}
|
|
||||||
if (sx >= SURFACE_WIDTH || sy >= SURFACE_HEIGHT) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (sx + sw > SURFACE_WIDTH) {
|
|
||||||
sw = SURFACE_WIDTH - sx;
|
|
||||||
}
|
|
||||||
if (sy + sh > SURFACE_HEIGHT) {
|
|
||||||
sh = SURFACE_HEIGHT - sy;
|
|
||||||
}
|
|
||||||
if (sw <= 0 || sh <= 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
halPresentRect(stage, sx, sy, (uint16_t)sw, (uint16_t)sh);
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@
|
||||||
|
|
||||||
#include "joey/sprite.h"
|
#include "joey/sprite.h"
|
||||||
#include "codegenArenaInternal.h"
|
#include "codegenArenaInternal.h"
|
||||||
|
#include "hal.h"
|
||||||
#include "spriteInternal.h"
|
#include "spriteInternal.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
|
|
@ -22,6 +23,20 @@
|
||||||
// Color 0 is always transparent for sprites (DESIGN.md contract).
|
// Color 0 is always transparent for sprites (DESIGN.md contract).
|
||||||
#define TRANSPARENT_NIBBLE 0
|
#define TRANSPARENT_NIBBLE 0
|
||||||
|
|
||||||
|
// On Amiga (post-Phase 9 / Phase 6 redux) the compiled sprite emitter
|
||||||
|
// writes directly to the bitplanes, so the halSpritePlanes hooks are
|
||||||
|
// pure duplicate work after a compiled call. On other ports the
|
||||||
|
// hooks are either no-op stubs (chunky-only IIgs/DOS) or the only
|
||||||
|
// thing writing planes (ST: chunky-shadow + planes). Slow / interpreter
|
||||||
|
// paths still need the hooks unconditionally on every platform -- the
|
||||||
|
// chunky interpreter is a no-op on Amiga (s->pixels NULL) so the hook
|
||||||
|
// is the only draw.
|
||||||
|
#if defined(JOEYLIB_PLATFORM_AMIGA)
|
||||||
|
#define COMPILED_SPRITE_WRITES_PLANES 1
|
||||||
|
#else
|
||||||
|
#define COMPILED_SPRITE_WRITES_PLANES 0
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
// ----- Prototypes -----
|
// ----- Prototypes -----
|
||||||
|
|
||||||
|
|
@ -144,14 +159,20 @@ static void spriteDrawInterpreted(SurfaceT *s, SpriteT *sp, int16_t x, int16_t y
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (row = 0; row < h; row++) {
|
/* Skip the chunky write loop on planar ports (s->pixels == NULL).
|
||||||
dstRow = &s->pixels[(dy + row) * SURFACE_BYTES_PER_ROW];
|
* halSpriteDrawPlanes is called by the spriteDraw caller and does
|
||||||
for (col = 0; col < w; col++) {
|
* its own clip + plane write, so the dirty mark + planar update
|
||||||
nibble = srcNibble(sp, (int16_t)(sx + col), (int16_t)(sy + row));
|
* happen there. Phase 9 dropped the chunky shadow on Amiga. */
|
||||||
if (nibble == TRANSPARENT_NIBBLE) {
|
if (s->pixels != NULL) {
|
||||||
continue;
|
for (row = 0; row < h; row++) {
|
||||||
|
dstRow = &s->pixels[(dy + row) * SURFACE_BYTES_PER_ROW];
|
||||||
|
for (col = 0; col < w; col++) {
|
||||||
|
nibble = srcNibble(sp, (int16_t)(sx + col), (int16_t)(sy + row));
|
||||||
|
if (nibble == TRANSPARENT_NIBBLE) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
writeDstNibble(dstRow, (int16_t)(dx + col), nibble);
|
||||||
}
|
}
|
||||||
writeDstNibble(dstRow, (int16_t)(dx + col), nibble);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
surfaceMarkDirtyRect(s, dx, dy, w, h);
|
surfaceMarkDirtyRect(s, dx, dy, w, h);
|
||||||
|
|
@ -200,6 +221,13 @@ SpriteT *spriteCreateFromSurface(const SurfaceT *src, int16_t x, int16_t y,
|
||||||
if (src == NULL || widthTiles == 0 || heightTiles == 0) {
|
if (src == NULL || widthTiles == 0 || heightTiles == 0) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
/* Phase 9: planar ports have NULL src->pixels. Capturing a sprite
|
||||||
|
* from such a surface needs a planar-to-chunky derivation hook;
|
||||||
|
* not implemented yet, so refuse the call. Apps targeting Amiga
|
||||||
|
* should ship sprites as static tile data instead. */
|
||||||
|
if (src->pixels == NULL) {
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
// Source x/y must be on a tile boundary so each captured tile lands
|
// Source x/y must be on a tile boundary so each captured tile lands
|
||||||
// on whole bytes -- mid-byte snapshots would lose half a pixel at
|
// on whole bytes -- mid-byte snapshots would lose half a pixel at
|
||||||
// the left edge.
|
// the left edge.
|
||||||
|
|
@ -284,10 +312,14 @@ void spriteDraw(SurfaceT *s, SpriteT *sp, int16_t x, int16_t y) {
|
||||||
// need clip math (they walk fixed offsets).
|
// need clip math (they walk fixed offsets).
|
||||||
if (sp->slot != NULL && isFullyOnSurface(x, y, widthPx, heightPx)) {
|
if (sp->slot != NULL && isFullyOnSurface(x, y, widthPx, heightPx)) {
|
||||||
spriteCompiledDraw(s, sp, x, y);
|
spriteCompiledDraw(s, sp, x, y);
|
||||||
|
if (!COMPILED_SPRITE_WRITES_PLANES) {
|
||||||
|
halSpriteDrawPlanes(s, sp, x, y);
|
||||||
|
}
|
||||||
surfaceMarkDirtyRect(s, x, y, (int16_t)widthPx, (int16_t)heightPx);
|
surfaceMarkDirtyRect(s, x, y, (int16_t)widthPx, (int16_t)heightPx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
spriteDrawInterpreted(s, sp, x, y);
|
spriteDrawInterpreted(s, sp, x, y);
|
||||||
|
halSpriteDrawPlanes(s, sp, x, y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -332,7 +364,7 @@ void spriteSaveAndDraw(SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, SpriteBac
|
||||||
uint16_t saveIdx;
|
uint16_t saveIdx;
|
||||||
uint16_t drawIdx;
|
uint16_t drawIdx;
|
||||||
uint8_t *offsetsBase;
|
uint8_t *offsetsBase;
|
||||||
shift = (uint8_t)(x & 1);
|
shift = SPRITE_SHIFT_INDEX(x);
|
||||||
saveIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_SAVE);
|
saveIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_SAVE);
|
||||||
drawIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_DRAW);
|
drawIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_DRAW);
|
||||||
offsetsBase = (uint8_t *)sp->routineOffsets;
|
offsetsBase = (uint8_t *)sp->routineOffsets;
|
||||||
|
|
@ -340,6 +372,10 @@ void spriteSaveAndDraw(SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, SpriteBac
|
||||||
*(uint16_t *)(offsetsBase + (drawIdx << 1)) != SPRITE_NOT_COMPILED) {
|
*(uint16_t *)(offsetsBase + (drawIdx << 1)) != SPRITE_NOT_COMPILED) {
|
||||||
spriteCompiledSaveUnder(s, sp, x, y, backup);
|
spriteCompiledSaveUnder(s, sp, x, y, backup);
|
||||||
spriteCompiledDraw (s, sp, x, y);
|
spriteCompiledDraw (s, sp, x, y);
|
||||||
|
if (!COMPILED_SPRITE_WRITES_PLANES) {
|
||||||
|
halSpriteSavePlanes(s, backup->x, backup->y, backup->width, backup->height, backup->bytes);
|
||||||
|
halSpriteDrawPlanes(s, sp, x, y);
|
||||||
|
}
|
||||||
surfaceMarkDirtyRect (s, x, y, (int16_t)widthPx, (int16_t)heightPx);
|
surfaceMarkDirtyRect (s, x, y, (int16_t)widthPx, (int16_t)heightPx);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -630,13 +666,18 @@ void spriteRestoreUnder(SurfaceT *s, const SpriteBackupT *backup) {
|
||||||
routeOffset = *(uint16_t *)((uint8_t *)sp->routineOffsets + (routeIdx << 1));
|
routeOffset = *(uint16_t *)((uint8_t *)sp->routineOffsets + (routeIdx << 1));
|
||||||
if (routeOffset != SPRITE_NOT_COMPILED) {
|
if (routeOffset != SPRITE_NOT_COMPILED) {
|
||||||
spriteCompiledRestoreUnder(s, backup);
|
spriteCompiledRestoreUnder(s, backup);
|
||||||
|
if (!COMPILED_SPRITE_WRITES_PLANES) {
|
||||||
|
halSpriteRestorePlanes(s, bx, by, bw, bh, backup->bytes);
|
||||||
|
}
|
||||||
surfaceMarkDirtyRect(s, bx, by, (int16_t)bw, (int16_t)bh);
|
surfaceMarkDirtyRect(s, bx, by, (int16_t)bw, (int16_t)bh);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Slow / interpreted memcpy fallback. */
|
/* Slow / interpreted memcpy fallback. Skip the chunky memcpy if
|
||||||
{
|
* the port has no chunky shadow (Phase 9 Amiga: s->pixels NULL);
|
||||||
|
* halSpriteRestorePlanes below does the planar restore. */
|
||||||
|
if (s->pixels != NULL) {
|
||||||
int16_t row;
|
int16_t row;
|
||||||
int16_t byteStart;
|
int16_t byteStart;
|
||||||
uint8_t *dstRow;
|
uint8_t *dstRow;
|
||||||
|
|
@ -650,6 +691,7 @@ void spriteRestoreUnder(SurfaceT *s, const SpriteBackupT *backup) {
|
||||||
(size_t)copyBytes);
|
(size_t)copyBytes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
halSpriteRestorePlanes(s, bx, by, bw, bh, backup->bytes);
|
||||||
surfaceMarkDirtyRect(s, bx, by, (int16_t)bw, (int16_t)bh);
|
surfaceMarkDirtyRect(s, bx, by, (int16_t)bw, (int16_t)bh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -684,11 +726,14 @@ void spriteSaveUnder(const SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, Sprit
|
||||||
if (backup->bytes != NULL && slot != NULL && isFullyOnSurface(x, y, widthPx, heightPx)) {
|
if (backup->bytes != NULL && slot != NULL && isFullyOnSurface(x, y, widthPx, heightPx)) {
|
||||||
uint16_t routeIdx;
|
uint16_t routeIdx;
|
||||||
uint16_t routeOffset;
|
uint16_t routeOffset;
|
||||||
shift = (uint8_t)(x & 1);
|
shift = SPRITE_SHIFT_INDEX(x);
|
||||||
routeIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_SAVE);
|
routeIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_SAVE);
|
||||||
routeOffset = *(uint16_t *)((uint8_t *)sp->routineOffsets + (routeIdx << 1));
|
routeOffset = *(uint16_t *)((uint8_t *)sp->routineOffsets + (routeIdx << 1));
|
||||||
if (routeOffset != SPRITE_NOT_COMPILED) {
|
if (routeOffset != SPRITE_NOT_COMPILED) {
|
||||||
spriteCompiledSaveUnder(s, sp, x, y, backup);
|
spriteCompiledSaveUnder(s, sp, x, y, backup);
|
||||||
|
if (!COMPILED_SPRITE_WRITES_PLANES) {
|
||||||
|
halSpriteSavePlanes(s, backup->x, backup->y, backup->width, backup->height, backup->bytes);
|
||||||
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -744,11 +789,16 @@ void spriteSaveUnder(const SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, Sprit
|
||||||
// backup with bytes==NULL.
|
// backup with bytes==NULL.
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (row = 0; row < h; row++) {
|
/* Chunky save path: skip on planar ports (s->pixels NULL).
|
||||||
srcRow = &s->pixels[(dy + row) * SURFACE_BYTES_PER_ROW];
|
* halSpriteSavePlanes below covers the planar case. */
|
||||||
memcpy(&backup->bytes[(uint16_t)row * (uint16_t)copyBytes],
|
if (s->pixels != NULL) {
|
||||||
&srcRow[byteStart],
|
for (row = 0; row < h; row++) {
|
||||||
(size_t)copyBytes);
|
srcRow = &s->pixels[(dy + row) * SURFACE_BYTES_PER_ROW];
|
||||||
|
memcpy(&backup->bytes[(uint16_t)row * (uint16_t)copyBytes],
|
||||||
|
&srcRow[byteStart],
|
||||||
|
(size_t)copyBytes);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
halSpriteSavePlanes(s, clippedX, dy, (uint16_t)clippedW, (uint16_t)h, backup->bytes);
|
||||||
} /* end slow path */
|
} /* end slow path */
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,16 @@
|
||||||
#define SPRITE_OP_RESTORE 2
|
#define SPRITE_OP_RESTORE 2
|
||||||
#define SPRITE_OP_COUNT 3
|
#define SPRITE_OP_COUNT 3
|
||||||
|
|
||||||
|
// Per-platform shift index used by the dispatcher. Chunky 4bpp ports
|
||||||
|
// store one nibble per pixel pair so the only sub-byte alignment is
|
||||||
|
// x % 2. Amiga planar packs 8 pixels per plane byte so all 8
|
||||||
|
// alignments matter.
|
||||||
|
#if defined(JOEYLIB_PLATFORM_AMIGA)
|
||||||
|
#define SPRITE_SHIFT_INDEX(x) ((uint8_t)((x) & 7))
|
||||||
|
#else
|
||||||
|
#define SPRITE_SHIFT_INDEX(x) ((uint8_t)((x) & 1))
|
||||||
|
#endif
|
||||||
|
|
||||||
// Sentinel stored in routineOffsets[shift][op] when that op's emitter
|
// Sentinel stored in routineOffsets[shift][op] when that op's emitter
|
||||||
// returned 0 bytes (i.e., the platform doesn't implement compiled
|
// returned 0 bytes (i.e., the platform doesn't implement compiled
|
||||||
// codegen for that op yet). Distinct from a real offset of 0, which
|
// codegen for that op yet). Distinct from a real offset of 0, which
|
||||||
|
|
|
||||||
|
|
@ -65,9 +65,10 @@ void surfaceCopy(SurfaceT *dst, const SurfaceT *src) {
|
||||||
if (dst == NULL || src == NULL || dst == src) {
|
if (dst == NULL || src == NULL || dst == src) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
memcpy(dst->pixels, src->pixels, SURFACE_PIXELS_SIZE);
|
halSurfaceCopyChunky(dst, src); /* memcpy on chunky ports; no-op on planar */
|
||||||
memcpy(dst->scb, src->scb, sizeof(src->scb));
|
memcpy(dst->scb, src->scb, sizeof(src->scb));
|
||||||
memcpy(dst->palette, src->palette, sizeof(src->palette));
|
memcpy(dst->palette, src->palette, sizeof(src->palette));
|
||||||
|
halSurfaceCopyPlanes(dst, src); /* 4 plane memcpys on planar ports; no-op on chunky */
|
||||||
surfaceMarkDirtyAll(dst);
|
surfaceMarkDirtyAll(dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -79,11 +80,10 @@ SurfaceT *surfaceCreate(void) {
|
||||||
if (s == NULL) {
|
if (s == NULL) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
s->pixels = (uint8_t *)calloc(1, SURFACE_PIXELS_SIZE);
|
/* halSurfaceAllocPixels returns NULL on planar ports (Amiga); the
|
||||||
if (s->pixels == NULL) {
|
* primary storage is the port-allocated planes via portData below. */
|
||||||
free(s);
|
s->pixels = halSurfaceAllocPixels();
|
||||||
return NULL;
|
s->portData = halSurfaceAllocPortData(s, false);
|
||||||
}
|
|
||||||
paletteInitDefault(s);
|
paletteInitDefault(s);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
@ -96,11 +96,44 @@ void surfaceDestroy(SurfaceT *s) {
|
||||||
if (s == gStage) {
|
if (s == gStage) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
free(s->pixels);
|
halSurfaceFreePortData(s, false, s->portData);
|
||||||
|
halSurfaceFreePixels(s->pixels);
|
||||||
free(s);
|
free(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Cheapest deterministic hash that still detects per-byte changes:
|
||||||
|
// (hash << 1) ^ byte, a single 16-bit accumulator. ORCA-C / 65816
|
||||||
|
// compiles to ASL + EOR -- about 35 cyc per byte. A 32-bit multiply
|
||||||
|
// FNV-style hash takes ~200 cyc per byte via ~UMUL4, which adds
|
||||||
|
// 80+ seconds to a UBER run on IIgs. Discrimination is weaker than
|
||||||
|
// FNV but plenty for cross-port validation: we only need "did the
|
||||||
|
// same logical-pixel sequence produce the same hash?" -- not
|
||||||
|
// crypto-grade collision resistance over arbitrary inputs.
|
||||||
|
//
|
||||||
|
// Walks the chunky pixel buffer byte-by-byte, the same logical-pixel
|
||||||
|
// ordering on every chunky-format port (IIgs, DOS, Amiga and ST
|
||||||
|
// while still chunky). When the planar rewrite drops s->pixels on
|
||||||
|
// Amiga/ST this function will need a HAL hook (halSurfaceHash) to
|
||||||
|
// read planes natively while producing the same logical hash.
|
||||||
|
/* Cross-port FNV-style hash of pixels + SCB + palette. The hash logic
|
||||||
|
* (multiplier streams, byte ordering for palette) is identical across
|
||||||
|
* ports, but the pixel READS go through the port HAL so chunky ports
|
||||||
|
* walk s->pixels and planar ports walk plane bits and assemble nibble
|
||||||
|
* pairs into chunky bytes for the hash. Both produce the same logical-
|
||||||
|
* pixel hash because they hash the same logical pixel sequence in the
|
||||||
|
* same chunky byte order. SCB and palette are still hashed inline
|
||||||
|
* here because they live in the SurfaceT struct on every port (no
|
||||||
|
* port-specific storage) and the byte/value-with-explicit-byte-order
|
||||||
|
* walks are already endian-independent. */
|
||||||
|
uint32_t surfaceHash(const SurfaceT *s) {
|
||||||
|
if (s == NULL) {
|
||||||
|
return 0u;
|
||||||
|
}
|
||||||
|
return halSurfaceHash(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool surfaceLoadFile(SurfaceT *dst, const char *path) {
|
bool surfaceLoadFile(SurfaceT *dst, const char *path) {
|
||||||
FILE *fp;
|
FILE *fp;
|
||||||
long fileSize;
|
long fileSize;
|
||||||
|
|
@ -125,7 +158,7 @@ bool surfaceLoadFile(SurfaceT *dst, const char *path) {
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (fread(dst->pixels, 1, SURFACE_PIXELS_SIZE, fp) != SURFACE_PIXELS_SIZE) {
|
if (!halSurfaceLoadFileChunky(dst, fp)) {
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -153,7 +186,7 @@ bool surfaceSaveFile(const SurfaceT *src, const char *path) {
|
||||||
if (fp == NULL) {
|
if (fp == NULL) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (fwrite(src->pixels, 1, SURFACE_PIXELS_SIZE, fp) != SURFACE_PIXELS_SIZE) {
|
if (!halSurfaceSaveFileChunky(src, fp)) {
|
||||||
fclose(fp);
|
fclose(fp);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
@ -228,13 +261,14 @@ bool stageAlloc(void) {
|
||||||
if (gStage == NULL) {
|
if (gStage == NULL) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
/* halStageAllocPixels returns NULL on planar ports (Amiga) where
|
||||||
|
* the chunky shadow doesn't exist; the planes from portData are
|
||||||
|
* the source of truth. NULL pixels is no longer a failure. */
|
||||||
gStage->pixels = halStageAllocPixels();
|
gStage->pixels = halStageAllocPixels();
|
||||||
if (gStage->pixels == NULL) {
|
if (gStage->pixels != NULL) {
|
||||||
free(gStage);
|
memset(gStage->pixels, 0, SURFACE_PIXELS_SIZE);
|
||||||
gStage = NULL;
|
|
||||||
return false;
|
|
||||||
}
|
}
|
||||||
memset(gStage->pixels, 0, SURFACE_PIXELS_SIZE);
|
gStage->portData = halSurfaceAllocPortData(gStage, true);
|
||||||
stageDirtyClearAll();
|
stageDirtyClearAll();
|
||||||
paletteInitDefault(gStage);
|
paletteInitDefault(gStage);
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -255,6 +289,7 @@ void stageFree(void) {
|
||||||
if (gStage == NULL) {
|
if (gStage == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
halSurfaceFreePortData(gStage, true, gStage->portData);
|
||||||
halStageFreePixels(gStage->pixels);
|
halStageFreePixels(gStage->pixels);
|
||||||
free(gStage);
|
free(gStage);
|
||||||
gStage = NULL;
|
gStage = NULL;
|
||||||
|
|
|
||||||
|
|
@ -14,8 +14,17 @@
|
||||||
// auto-mirroring to $E1). Caller-side `s->pixels[i]` syntax is
|
// auto-mirroring to $E1). Caller-side `s->pixels[i]` syntax is
|
||||||
// unchanged; only allocation/copy paths in surface.c shift to a
|
// unchanged; only allocation/copy paths in surface.c shift to a
|
||||||
// two-buffer model.
|
// two-buffer model.
|
||||||
|
//
|
||||||
|
// portData is per-port opaque storage. On chunky ports (IIgs, DOS) it
|
||||||
|
// stays NULL -- pixels is the source of truth. On planar ports
|
||||||
|
// (Amiga, Atari ST) it points to a port-private struct describing the
|
||||||
|
// 4 bitplanes (Amiga: 4 separate plane buffers + stride; ST: single
|
||||||
|
// interleaved buffer + stride). Cross-platform code never touches it
|
||||||
|
// directly -- all primitive access goes through halFast* on planar
|
||||||
|
// ports. See project_planar_68k_plan.md for the full architecture.
|
||||||
struct SurfaceT {
|
struct SurfaceT {
|
||||||
uint8_t *pixels;
|
uint8_t *pixels;
|
||||||
|
void *portData;
|
||||||
uint8_t scb[SURFACE_HEIGHT];
|
uint8_t scb[SURFACE_HEIGHT];
|
||||||
uint16_t palette[SURFACE_PALETTE_COUNT][SURFACE_COLORS_PER_PALETTE];
|
uint16_t palette[SURFACE_PALETTE_COUNT][SURFACE_COLORS_PER_PALETTE];
|
||||||
};
|
};
|
||||||
|
|
@ -38,6 +47,18 @@ struct SurfaceT {
|
||||||
extern uint8_t gStageMinWord[SURFACE_HEIGHT];
|
extern uint8_t gStageMinWord[SURFACE_HEIGHT];
|
||||||
extern uint8_t gStageMaxWord[SURFACE_HEIGHT];
|
extern uint8_t gStageMaxWord[SURFACE_HEIGHT];
|
||||||
|
|
||||||
|
// Per-byte mixer for surfaceHash. Two-stream: lo *= 31 + b, hi *= 251 + b.
|
||||||
|
// Strength-reduced to shifts so ORCA-C doesn't emit `~UMUL2` (~150 cyc
|
||||||
|
// per call); 32 KB hashed twice -> ~5 minutes per UBER run. The
|
||||||
|
// shift form is 16-bit-equivalent (mod 2^16) so hash values are
|
||||||
|
// identical to the original `* 31u` / `* 251u` form.
|
||||||
|
// lo *= 31 == (lo << 5) - lo
|
||||||
|
// hi *= 251 == (hi << 8) - (hi << 2) - hi
|
||||||
|
#define SURFACE_HASH_MIX_BYTE(lo_, hi_, b_) do { \
|
||||||
|
(lo_) = (uint16_t)(((((lo_) << 5) - (lo_)) + (b_))); \
|
||||||
|
(hi_) = (uint16_t)((((hi_) << 8) - ((hi_) << 2) - (hi_)) + (b_)); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
// Stage SCB / palette dirty flags. scbSet* and paletteSet set them
|
// Stage SCB / palette dirty flags. scbSet* and paletteSet set them
|
||||||
// true when the stage's data is modified; the per-port present code
|
// true when the stage's data is modified; the per-port present code
|
||||||
// checks the flags and clears after upload. Replaces a per-frame
|
// checks the flags and clears after upload. Replaces a per-frame
|
||||||
|
|
@ -50,6 +71,15 @@ extern bool gStagePaletteDirty;
|
||||||
// bands are widened to cover the rect. If `s` is any other surface,
|
// bands are widened to cover the rect. If `s` is any other surface,
|
||||||
// the call is a no-op -- non-stage surfaces never get presented, so
|
// the call is a no-op -- non-stage surfaces never get presented, so
|
||||||
// they don't carry dirty state.
|
// they don't carry dirty state.
|
||||||
|
//
|
||||||
|
// Planar ports rely on the chunky shadow + c2p path through Phase 8.
|
||||||
|
// Planar-native primitives (Phases 3+) dual-write: they update both
|
||||||
|
// the chunky pixels and the bitplanes in the same call, so c2p at
|
||||||
|
// present time always derives correct planes from up-to-date chunky.
|
||||||
|
// Phase 9 deletes the chunky shadow + c2p; only at that point will
|
||||||
|
// per-row planar-vs-chunky tracking even be a possible question, and
|
||||||
|
// the plan is to avoid it entirely there too (planes become the only
|
||||||
|
// source of truth).
|
||||||
void surfaceMarkDirtyRect(const SurfaceT *s, int16_t x, int16_t y, int16_t w, int16_t h);
|
void surfaceMarkDirtyRect(const SurfaceT *s, int16_t x, int16_t y, int16_t w, int16_t h);
|
||||||
|
|
||||||
// Shorthand for "every row, full width" -- used by surfaceClear and
|
// Shorthand for "every row, full width" -- used by surfaceClear and
|
||||||
|
|
|
||||||
|
|
@ -147,6 +147,7 @@ void tileCopy(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT *src,
|
||||||
if (!halFastTileCopy(dstRow0, srcRow0)) {
|
if (!halFastTileCopy(dstRow0, srcRow0)) {
|
||||||
copyTileOpaque(dstRow0, srcRow0);
|
copyTileOpaque(dstRow0, srcRow0);
|
||||||
}
|
}
|
||||||
|
halTileCopyPlanes(dst, dstBx, dstBy, src, srcBx, srcBy);
|
||||||
surfaceMarkDirtyRect(dst, (int16_t)dstPixelX, (int16_t)dstPixelY,
|
surfaceMarkDirtyRect(dst, (int16_t)dstPixelX, (int16_t)dstPixelY,
|
||||||
TILE_PIXELS_PER_SIDE, TILE_PIXELS_PER_SIDE);
|
TILE_PIXELS_PER_SIDE, TILE_PIXELS_PER_SIDE);
|
||||||
}
|
}
|
||||||
|
|
@ -178,6 +179,7 @@ void tileCopyMasked(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT
|
||||||
if (!halFastTileCopyMasked(dstRow0, srcRow0, transparentIndex)) {
|
if (!halFastTileCopyMasked(dstRow0, srcRow0, transparentIndex)) {
|
||||||
copyTileMasked(dstRow0, srcRow0, transparentIndex);
|
copyTileMasked(dstRow0, srcRow0, transparentIndex);
|
||||||
}
|
}
|
||||||
|
halTileCopyMaskedPlanes(dst, dstBx, dstBy, src, srcBx, srcBy, transparentIndex);
|
||||||
surfaceMarkDirtyRect(dst, (int16_t)dstPixelX, (int16_t)dstPixelY,
|
surfaceMarkDirtyRect(dst, (int16_t)dstPixelX, (int16_t)dstPixelY,
|
||||||
TILE_PIXELS_PER_SIDE, TILE_PIXELS_PER_SIDE);
|
TILE_PIXELS_PER_SIDE, TILE_PIXELS_PER_SIDE);
|
||||||
}
|
}
|
||||||
|
|
@ -209,6 +211,7 @@ void tileFill(SurfaceT *s, uint8_t bx, uint8_t by, uint8_t colorIndex) {
|
||||||
row += SURFACE_BYTES_PER_ROW;
|
row += SURFACE_BYTES_PER_ROW;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
halTileFillPlanes(s, bx, by, colorIndex);
|
||||||
surfaceMarkDirtyRect(s, (int16_t)pixelX, (int16_t)pixelY,
|
surfaceMarkDirtyRect(s, (int16_t)pixelX, (int16_t)pixelY,
|
||||||
TILE_PIXELS_PER_SIDE, TILE_PIXELS_PER_SIDE);
|
TILE_PIXELS_PER_SIDE, TILE_PIXELS_PER_SIDE);
|
||||||
}
|
}
|
||||||
|
|
@ -241,6 +244,7 @@ void tilePaste(SurfaceT *dst, uint8_t bx, uint8_t by, const TileT *in) {
|
||||||
src += TILE_BYTES_PER_ROW;
|
src += TILE_BYTES_PER_ROW;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
halTilePastePlanes(dst, bx, by, &in->pixels[0]);
|
||||||
surfaceMarkDirtyRect(dst, (int16_t)pixelX, (int16_t)pixelY,
|
surfaceMarkDirtyRect(dst, (int16_t)pixelX, (int16_t)pixelY,
|
||||||
TILE_PIXELS_PER_SIDE, TILE_PIXELS_PER_SIDE);
|
TILE_PIXELS_PER_SIDE, TILE_PIXELS_PER_SIDE);
|
||||||
}
|
}
|
||||||
|
|
@ -261,9 +265,12 @@ void tileSnap(const SurfaceT *src, uint8_t bx, uint8_t by, TileT *out) {
|
||||||
}
|
}
|
||||||
pixelX = (uint16_t)((uint16_t)bx * TILE_PIXELS_PER_SIDE);
|
pixelX = (uint16_t)((uint16_t)bx * TILE_PIXELS_PER_SIDE);
|
||||||
pixelY = (uint16_t)((uint16_t)by * TILE_PIXELS_PER_SIDE);
|
pixelY = (uint16_t)((uint16_t)by * TILE_PIXELS_PER_SIDE);
|
||||||
srcRow = &src->pixels[SURFACE_ROW_OFFSET(pixelY) + (pixelX >> 1)];
|
|
||||||
dst = &out->pixels[0];
|
dst = &out->pixels[0];
|
||||||
if (!halFastTileSnap(dst, srcRow)) {
|
/* On planar ports (s->pixels NULL) the chunky read path is
|
||||||
|
* skipped; halTileSnapPlanes below derives the tile bytes from
|
||||||
|
* the bitplanes. */
|
||||||
|
if (src->pixels != NULL && !halFastTileSnap(dst, &src->pixels[SURFACE_ROW_OFFSET(pixelY) + (pixelX >> 1)])) {
|
||||||
|
srcRow = &src->pixels[SURFACE_ROW_OFFSET(pixelY) + (pixelX >> 1)];
|
||||||
for (row = 0; row < TILE_PIXELS_PER_SIDE; row++) {
|
for (row = 0; row < TILE_PIXELS_PER_SIDE; row++) {
|
||||||
dst[0] = srcRow[0];
|
dst[0] = srcRow[0];
|
||||||
dst[1] = srcRow[1];
|
dst[1] = srcRow[1];
|
||||||
|
|
@ -273,4 +280,5 @@ void tileSnap(const SurfaceT *src, uint8_t bx, uint8_t by, TileT *out) {
|
||||||
dst += TILE_BYTES_PER_ROW;
|
dst += TILE_BYTES_PER_ROW;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
halTileSnapPlanes(src, bx, by, &out->pixels[0]);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
270
src/port/amiga/circle.s
Normal file
270
src/port/amiga/circle.s
Normal file
|
|
@ -0,0 +1,270 @@
|
||||||
|
| Amiga planar circle outline V4 -- 16-way color-specialized.
|
||||||
|
|
|
||||||
|
| Per Bresenham iter:
|
||||||
|
| 1. Precompute 4 xp records (xp_byte_w + bitMask_b + notMask_b) for
|
||||||
|
| cx +/- bx and cx +/- by, stored at sp+0..15 (4 records x 4 bytes).
|
||||||
|
| 2. Precompute 4 yp40 words for cy +/- by and cy +/- bx, stored at
|
||||||
|
| sp+16..23 (4 words x 2 bytes).
|
||||||
|
| 3. Plot 8 octant pixels with hardcoded color: each pixel does 4
|
||||||
|
| branchless plane RMW ops (or.b for set bits, and.b for clear
|
||||||
|
| bits) -- no btst, no per-plane branch.
|
||||||
|
| 4. Bresenham step.
|
||||||
|
|
|
||||||
|
| At function entry the color is masked to 4 bits and used as the index
|
||||||
|
| into a 16-entry jump table that selects the matching main loop.
|
||||||
|
| Each main loop has the color hardcoded into the per-plane RMW ops.
|
||||||
|
|
|
||||||
|
| The branchless plot saves ~20-28 cyc per plane vs V3's btst+branch
|
||||||
|
| pattern -- ~640-900 cyc per Bresenham iter.
|
||||||
|
|
|
||||||
|
| ABI: cdecl. d2-d7/a2-a6 callee-save.
|
||||||
|
|
|
||||||
|
| void surface68kAmigaCircleOutline(uint8_t *p0, uint8_t *p1,
|
||||||
|
| uint8_t *p2, uint8_t *p3,
|
||||||
|
| uint16_t cx, uint16_t cy,
|
||||||
|
| uint16_t r, uint8_t color);
|
||||||
|
|
|
||||||
|
| Register allocation across the iter loop:
|
||||||
|
| d2.w = bx (Bresenham)
|
||||||
|
| d3.w = by (Bresenham)
|
||||||
|
| d4.w = err (Bresenham)
|
||||||
|
| d5.w = cx (cached)
|
||||||
|
| a4 = cy (cached, sign-extended)
|
||||||
|
| a0..a3 = plane bases
|
||||||
|
| a5 = bitMaskLut
|
||||||
|
| d0,d1,d6,d7 = scratch in precompute / plot
|
||||||
|
|
|
||||||
|
| Scratch block (24 bytes) at sp+0..23:
|
||||||
|
| sp+0..3: xp1 record [xp_byte_w, bitMask_b, notMask_b] for cx+bx
|
||||||
|
| sp+4..7: xp2 record for cx-bx
|
||||||
|
| sp+8..11: xp3 record for cx+by
|
||||||
|
| sp+12..15: xp4 record for cx-by
|
||||||
|
| sp+16..17: yp1 word (cy+by) * 40
|
||||||
|
| sp+18..19: yp2 word (cy-by) * 40
|
||||||
|
| sp+20..21: yp3 word (cy+bx) * 40
|
||||||
|
| sp+22..23: yp4 word (cy-bx) * 40
|
||||||
|
|
||||||
|
.text
|
||||||
|
|
||||||
|
|
||||||
|
| ---- XP_REC: build xp record at sp+slot for xp = cx <signOp> <xreg> ----
|
||||||
|
| signOp: add or sub
|
||||||
|
| xreg: %d2 (bx) or %d3 (by)
|
||||||
|
| slot: 0, 4, 8, or 12
|
||||||
|
| Trashes: d0, d1, d6, d7
|
||||||
|
|
||||||
|
.macro XP_REC slot, signOp, xreg
|
||||||
|
move.w %d5,%d6
|
||||||
|
\signOp\().w \xreg,%d6 | d6 = xp
|
||||||
|
move.w %d6,%d7
|
||||||
|
lsr.w #3,%d7 | d7 = xp >> 3 (xp_byte)
|
||||||
|
and.w #7,%d6 | d6 = xp & 7
|
||||||
|
move.b (%a5,%d6.w),%d6 | d6 = bitMask
|
||||||
|
move.b %d6,%d1
|
||||||
|
not.b %d1 | d1 = notMask
|
||||||
|
move.w %d7,\slot(%sp) | xp_byte word
|
||||||
|
move.b %d6,\slot+2(%sp) | bitMask byte
|
||||||
|
move.b %d1,\slot+3(%sp) | notMask byte
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
| ---- YP_REC: build yp40 word at sp+slot for yp = cy <signOp> <yreg> ----
|
||||||
|
|
||||||
|
.macro YP_REC slot, signOp, yreg
|
||||||
|
move.l %a4,%d6
|
||||||
|
\signOp\().w \yreg,%d6 | d6.w = yp
|
||||||
|
move.w %d6,%d0
|
||||||
|
lsl.w #3,%d6 | d6 = yp << 3
|
||||||
|
lsl.w #5,%d0 | d0 = yp << 5
|
||||||
|
add.w %d6,%d0 | d0 = yp * 40
|
||||||
|
move.w %d0,\slot(%sp)
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
| ---- PLOT_FIXED: plot one pixel with hardcoded 4-bit color ----
|
||||||
|
| slotYp: 16, 18, 20, or 22 (yp40 word slot)
|
||||||
|
| slotXp: 0, 4, 8, or 12 (xp record slot)
|
||||||
|
| color: literal 0..15
|
||||||
|
| Trashes: d0, d1, d7
|
||||||
|
|
||||||
|
.macro PLOT_FIXED slotYp, slotXp, color
|
||||||
|
move.w \slotYp(%sp),%d0 | d0 = yp40
|
||||||
|
add.w \slotXp(%sp),%d0 | d0 += xp_byte
|
||||||
|
move.b \slotXp+2(%sp),%d1 | d1.b = bitMask
|
||||||
|
move.b \slotXp+3(%sp),%d7 | d7.b = notMask
|
||||||
|
.if ((\color) & 1)
|
||||||
|
or.b %d1,(%a0,%d0.w)
|
||||||
|
.else
|
||||||
|
and.b %d7,(%a0,%d0.w)
|
||||||
|
.endif
|
||||||
|
.if ((\color) & 2)
|
||||||
|
or.b %d1,(%a1,%d0.w)
|
||||||
|
.else
|
||||||
|
and.b %d7,(%a1,%d0.w)
|
||||||
|
.endif
|
||||||
|
.if ((\color) & 4)
|
||||||
|
or.b %d1,(%a2,%d0.w)
|
||||||
|
.else
|
||||||
|
and.b %d7,(%a2,%d0.w)
|
||||||
|
.endif
|
||||||
|
.if ((\color) & 8)
|
||||||
|
or.b %d1,(%a3,%d0.w)
|
||||||
|
.else
|
||||||
|
and.b %d7,(%a3,%d0.w)
|
||||||
|
.endif
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
| ---- PLOT_8: plot all 8 octant pixels for a given hardcoded color ----
|
||||||
|
|
||||||
|
.macro PLOT_8 color
|
||||||
|
PLOT_FIXED 16, 0, \color | (cx+bx, cy+by)
|
||||||
|
PLOT_FIXED 16, 4, \color | (cx-bx, cy+by)
|
||||||
|
PLOT_FIXED 18, 0, \color | (cx+bx, cy-by)
|
||||||
|
PLOT_FIXED 18, 4, \color | (cx-bx, cy-by)
|
||||||
|
PLOT_FIXED 20, 8, \color | (cx+by, cy+bx)
|
||||||
|
PLOT_FIXED 20, 12, \color | (cx-by, cy+bx)
|
||||||
|
PLOT_FIXED 22, 8, \color | (cx+by, cy-bx)
|
||||||
|
PLOT_FIXED 22, 12, \color | (cx-by, cy-bx)
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
| ---- CO_BODY: full Bresenham loop body for a hardcoded color ----
|
||||||
|
| Generates the per-iter precompute, branchless plot, and Bresenham
|
||||||
|
| step. Uses unique labels via \color suffix.
|
||||||
|
|
||||||
|
.macro CO_BODY color
|
||||||
|
XP_REC 0, add, %d2 | xp1 = cx+bx
|
||||||
|
XP_REC 4, sub, %d2 | xp2 = cx-bx
|
||||||
|
XP_REC 8, add, %d3 | xp3 = cx+by
|
||||||
|
XP_REC 12, sub, %d3 | xp4 = cx-by
|
||||||
|
YP_REC 16, add, %d3 | yp1 = cy+by
|
||||||
|
YP_REC 18, sub, %d3 | yp2 = cy-by
|
||||||
|
YP_REC 20, add, %d2 | yp3 = cy+bx
|
||||||
|
YP_REC 22, sub, %d2 | yp4 = cy-bx
|
||||||
|
|
||||||
|
PLOT_8 \color
|
||||||
|
|
||||||
|
addq.w #1,%d3
|
||||||
|
tst.w %d4
|
||||||
|
bgt .LcoDecX_\color
|
||||||
|
add.w %d3,%d4
|
||||||
|
add.w %d3,%d4
|
||||||
|
addq.w #1,%d4
|
||||||
|
bra.w .LcoLoop_\color
|
||||||
|
.LcoDecX_\color:
|
||||||
|
subq.w #1,%d2
|
||||||
|
add.w %d3,%d4
|
||||||
|
add.w %d3,%d4
|
||||||
|
sub.w %d2,%d4
|
||||||
|
sub.w %d2,%d4
|
||||||
|
addq.w #1,%d4
|
||||||
|
bra.w .LcoLoop_\color
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
| ---- CO_LOOP_HDR: emit a labelled loop header for a color ----
|
||||||
|
|
||||||
|
.macro CO_LOOP_HDR color
|
||||||
|
.LcoLoop_\color:
|
||||||
|
cmp.w %d3,%d2
|
||||||
|
bcs.w .LcoDone
|
||||||
|
CO_BODY \color
|
||||||
|
.endm
|
||||||
|
|
||||||
|
|
||||||
|
| ---- Function entry ----
|
||||||
|
|
||||||
|
.equ SP_SAVED, 44
|
||||||
|
.equ SP_LOCAL, 24
|
||||||
|
|
||||||
|
.equ SP_OFF, (SP_SAVED + 4 + SP_LOCAL)
|
||||||
|
|
||||||
|
.equ SP_P0, SP_OFF + 0
|
||||||
|
.equ SP_P1, SP_OFF + 4
|
||||||
|
.equ SP_P2, SP_OFF + 8
|
||||||
|
.equ SP_P3, SP_OFF + 12
|
||||||
|
.equ SP_CX, SP_OFF + 16 + 2
|
||||||
|
.equ SP_CY, SP_OFF + 20 + 2
|
||||||
|
.equ SP_R, SP_OFF + 24 + 2
|
||||||
|
.equ SP_COLOR, SP_OFF + 28 + 3
|
||||||
|
|
||||||
|
.globl _surface68kAmigaCircleOutline
|
||||||
|
|
||||||
|
_surface68kAmigaCircleOutline:
|
||||||
|
movem.l %d2-%d7/%a2-%a6,-(%sp)
|
||||||
|
lea -SP_LOCAL(%sp),%sp
|
||||||
|
|
||||||
|
| Plane bases.
|
||||||
|
move.l SP_P0(%sp),%a0
|
||||||
|
move.l SP_P1(%sp),%a1
|
||||||
|
move.l SP_P2(%sp),%a2
|
||||||
|
move.l SP_P3(%sp),%a3
|
||||||
|
lea bitMaskLut(%pc),%a5
|
||||||
|
|
||||||
|
| Cache cx in d5, cy (sign-extended) in a4.
|
||||||
|
move.w SP_CX(%sp),%d5
|
||||||
|
move.w SP_CY(%sp),%d6
|
||||||
|
ext.l %d6
|
||||||
|
movea.l %d6,%a4
|
||||||
|
|
||||||
|
| Bresenham init.
|
||||||
|
move.w SP_R(%sp),%d2 | bx = r
|
||||||
|
moveq #0,%d3 | by = 0
|
||||||
|
moveq #1,%d4
|
||||||
|
sub.w %d2,%d4 | err = 1 - bx
|
||||||
|
|
||||||
|
| Dispatch on color (low 4 bits) -> one of 16 main loops.
|
||||||
|
| Each table entry is a bra.w (4 bytes), so index *= 4.
|
||||||
|
moveq #0,%d6
|
||||||
|
move.b SP_COLOR(%sp),%d6
|
||||||
|
and.w #0x0F,%d6
|
||||||
|
add.w %d6,%d6
|
||||||
|
add.w %d6,%d6
|
||||||
|
lea .LcoTable(%pc),%a6
|
||||||
|
jmp 0(%a6,%d6.w)
|
||||||
|
|
||||||
|
.LcoTable:
|
||||||
|
bra.w .LcoLoop_0
|
||||||
|
bra.w .LcoLoop_1
|
||||||
|
bra.w .LcoLoop_2
|
||||||
|
bra.w .LcoLoop_3
|
||||||
|
bra.w .LcoLoop_4
|
||||||
|
bra.w .LcoLoop_5
|
||||||
|
bra.w .LcoLoop_6
|
||||||
|
bra.w .LcoLoop_7
|
||||||
|
bra.w .LcoLoop_8
|
||||||
|
bra.w .LcoLoop_9
|
||||||
|
bra.w .LcoLoop_10
|
||||||
|
bra.w .LcoLoop_11
|
||||||
|
bra.w .LcoLoop_12
|
||||||
|
bra.w .LcoLoop_13
|
||||||
|
bra.w .LcoLoop_14
|
||||||
|
bra.w .LcoLoop_15
|
||||||
|
|
||||||
|
CO_LOOP_HDR 0
|
||||||
|
CO_LOOP_HDR 1
|
||||||
|
CO_LOOP_HDR 2
|
||||||
|
CO_LOOP_HDR 3
|
||||||
|
CO_LOOP_HDR 4
|
||||||
|
CO_LOOP_HDR 5
|
||||||
|
CO_LOOP_HDR 6
|
||||||
|
CO_LOOP_HDR 7
|
||||||
|
CO_LOOP_HDR 8
|
||||||
|
CO_LOOP_HDR 9
|
||||||
|
CO_LOOP_HDR 10
|
||||||
|
CO_LOOP_HDR 11
|
||||||
|
CO_LOOP_HDR 12
|
||||||
|
CO_LOOP_HDR 13
|
||||||
|
CO_LOOP_HDR 14
|
||||||
|
CO_LOOP_HDR 15
|
||||||
|
|
||||||
|
.LcoDone:
|
||||||
|
lea SP_LOCAL(%sp),%sp
|
||||||
|
movem.l (%sp)+,%d2-%d7/%a2-%a6
|
||||||
|
rts
|
||||||
|
|
||||||
|
|
||||||
|
.align 2
|
||||||
|
bitMaskLut:
|
||||||
|
.byte 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01
|
||||||
1857
src/port/amiga/hal.c
1857
src/port/amiga/hal.c
File diff suppressed because it is too large
Load diff
|
|
@ -526,26 +526,6 @@ void halPresent(const SurfaceT *src) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void halPresentRect(const SurfaceT *src, int16_t x, int16_t y, uint16_t w, uint16_t h) {
|
|
||||||
uint16_t groupStart;
|
|
||||||
uint16_t groupEnd;
|
|
||||||
|
|
||||||
if (src == NULL || !gModeSet) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
refreshPaletteStateIfNeeded(src);
|
|
||||||
// Each c2p group covers 16 horizontal pixels. Round dirty pixel
|
|
||||||
// range to the enclosing group range to keep the planar word
|
|
||||||
// alignment without missing edge pixels.
|
|
||||||
groupStart = (uint16_t)(x >> 4);
|
|
||||||
groupEnd = (uint16_t)(((uint16_t)x + w + 15) >> 4);
|
|
||||||
if (groupEnd > ST_GROUPS_PER_ROW) {
|
|
||||||
groupEnd = ST_GROUPS_PER_ROW;
|
|
||||||
}
|
|
||||||
c2pRange(src, y, y + (int16_t)h, groupStart, groupEnd);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Vsync() is XBIOS opcode 37; mintlib exposes it directly. It blocks
|
// Vsync() is XBIOS opcode 37; mintlib exposes it directly. It blocks
|
||||||
// until the next 50 Hz (PAL) or 60 Hz (NTSC) vertical blank.
|
// until the next 50 Hz (PAL) or 60 Hz (NTSC) vertical blank.
|
||||||
void halWaitVBL(void) {
|
void halWaitVBL(void) {
|
||||||
|
|
@ -730,6 +710,20 @@ bool halFastFloodWalk(uint8_t *row, int16_t startX, uint8_t matchColor, uint8_t
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool halFloodWalkPlanes(const SurfaceT *s, int16_t startX, int16_t y, uint8_t matchColor, uint8_t newColor, bool matchEqual, bool *seedMatched, int16_t *leftXOut, int16_t *rightXOut) {
|
||||||
|
(void)s; (void)startX; (void)y; (void)matchColor; (void)newColor; (void)matchEqual;
|
||||||
|
(void)seedMatched; (void)leftXOut; (void)rightXOut;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool halFloodScanRowPlanes(const SurfaceT *s, int16_t leftX, int16_t rightX, int16_t scanY, uint8_t matchColor, uint8_t newColor, bool matchEqual, uint8_t *markBuf) {
|
||||||
|
(void)s; (void)leftX; (void)rightX; (void)scanY; (void)matchColor; (void)newColor; (void)matchEqual;
|
||||||
|
(void)markBuf;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool halFastFloodScanRow(uint8_t *row, int16_t leftX, int16_t rightX, uint8_t matchColor, uint8_t newColor, bool matchEqual, uint8_t *markBuf) {
|
bool halFastFloodScanRow(uint8_t *row, int16_t leftX, int16_t rightX, uint8_t matchColor, uint8_t newColor, bool matchEqual, uint8_t *markBuf) {
|
||||||
(void)row;
|
(void)row;
|
||||||
(void)leftX;
|
(void)leftX;
|
||||||
|
|
@ -798,6 +792,146 @@ bool halFastTileFill(SurfaceT *s, uint8_t bx, uint8_t by, uint16_t fillWord) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Phase-1 planar plumbing: portData hooks declared and exported, but
|
||||||
|
// returning NULL keeps the ST port operating in the legacy
|
||||||
|
// chunky-with-c2p model. Phase 4 replaces this with an interleaved
|
||||||
|
// planar buffer + stride blob, and rewrites every halFast* primitive
|
||||||
|
// to read/write planes directly.
|
||||||
|
void *halSurfaceAllocPortData(SurfaceT *s, bool isStage) {
|
||||||
|
(void)s;
|
||||||
|
(void)isStage;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halSurfaceFreePortData(SurfaceT *s, bool isStage, void *portData) {
|
||||||
|
(void)s;
|
||||||
|
(void)isStage;
|
||||||
|
(void)portData;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ST planar dual-write isn't implemented yet (interleaved word-planar
|
||||||
|
// layout needs a different code path than Amiga's separate plane
|
||||||
|
// buffers). Stub for now; chunky shadow + c2p still drives display.
|
||||||
|
void halFillRectPlanes(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t colorIndex) {
|
||||||
|
(void)s;
|
||||||
|
(void)x;
|
||||||
|
(void)y;
|
||||||
|
(void)w;
|
||||||
|
(void)h;
|
||||||
|
(void)colorIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halSurfaceCopyPlanes(SurfaceT *dst, const SurfaceT *src) {
|
||||||
|
(void)dst;
|
||||||
|
(void)src;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halTileFillPlanes(SurfaceT *s, uint8_t bx, uint8_t by, uint8_t colorIndex) {
|
||||||
|
(void)s; (void)bx; (void)by; (void)colorIndex;
|
||||||
|
}
|
||||||
|
void halTileCopyPlanes(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT *src, uint8_t srcBx, uint8_t srcBy) {
|
||||||
|
(void)dst; (void)dstBx; (void)dstBy; (void)src; (void)srcBx; (void)srcBy;
|
||||||
|
}
|
||||||
|
void halTileCopyMaskedPlanes(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT *src, uint8_t srcBx, uint8_t srcBy, uint8_t transparentIndex) {
|
||||||
|
(void)dst; (void)dstBx; (void)dstBy; (void)src; (void)srcBx; (void)srcBy; (void)transparentIndex;
|
||||||
|
}
|
||||||
|
void halTilePastePlanes(SurfaceT *dst, uint8_t bx, uint8_t by, const uint8_t *chunkyTile) {
|
||||||
|
(void)dst; (void)bx; (void)by; (void)chunkyTile;
|
||||||
|
}
|
||||||
|
void halTileSnapPlanes(const SurfaceT *src, uint8_t bx, uint8_t by, uint8_t *chunkyTileOut) {
|
||||||
|
(void)src; (void)bx; (void)by; (void)chunkyTileOut;
|
||||||
|
}
|
||||||
|
void halSpriteDrawPlanes(SurfaceT *s, const SpriteT *sp, int16_t x, int16_t y) {
|
||||||
|
(void)s; (void)sp; (void)x; (void)y;
|
||||||
|
}
|
||||||
|
void halBlitRectPlanes(SurfaceT *dst, int16_t x, int16_t y, const uint8_t *srcBytes, int16_t srcX0, int16_t srcY0, int16_t copyW, int16_t copyH, int16_t srcRowBytes, uint16_t transparent) {
|
||||||
|
(void)dst; (void)x; (void)y; (void)srcBytes; (void)srcX0; (void)srcY0;
|
||||||
|
(void)copyW; (void)copyH; (void)srcRowBytes; (void)transparent;
|
||||||
|
}
|
||||||
|
void halSpriteSavePlanes(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t *dstPlaneBytes) {
|
||||||
|
(void)s; (void)x; (void)y; (void)w; (void)h; (void)dstPlaneBytes;
|
||||||
|
}
|
||||||
|
void halSpriteRestorePlanes(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, const uint8_t *srcPlaneBytes) {
|
||||||
|
(void)s; (void)x; (void)y; (void)w; (void)h; (void)srcPlaneBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Phase 9 chunky reader hooks -- ST is still chunky-shadow + c2p,
|
||||||
|
* so reads come from s->pixels just like DOS / IIgs. */
|
||||||
|
uint8_t halSamplePixel(const SurfaceT *s, int16_t x, int16_t y) {
|
||||||
|
uint8_t byte = s->pixels[(uint16_t)y * SURFACE_BYTES_PER_ROW + ((uint16_t)x >> 1)];
|
||||||
|
if (x & 1) return (uint8_t)(byte & 0x0Fu);
|
||||||
|
return (uint8_t)((byte & 0xF0u) >> 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t halSurfaceHash(const SurfaceT *s) {
|
||||||
|
uint16_t lo = 0xACE1u, hi = 0x1357u, blocks, n, v;
|
||||||
|
const uint8_t *p;
|
||||||
|
const uint16_t *w;
|
||||||
|
uint8_t b;
|
||||||
|
p = s->pixels;
|
||||||
|
blocks = (uint16_t)(SURFACE_PIXELS_SIZE / 8);
|
||||||
|
do {
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
blocks--;
|
||||||
|
} while (blocks > 0u);
|
||||||
|
p = s->scb;
|
||||||
|
for (n = 0; n < (uint16_t)SURFACE_HEIGHT; n++) {
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
}
|
||||||
|
w = &s->palette[0][0];
|
||||||
|
for (n = 0; n < (uint16_t)SURFACE_PALETTE_ENTRIES; n++) {
|
||||||
|
v = *w++;
|
||||||
|
b = (uint8_t)((v >> 8) & 0xFFu); SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = (uint8_t)(v & 0xFFu); SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
}
|
||||||
|
return ((uint32_t)hi << 16) | (uint32_t)lo;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halSurfaceCopyChunky(SurfaceT *dst, const SurfaceT *src) {
|
||||||
|
memcpy(dst->pixels, src->pixels, SURFACE_PIXELS_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool halSurfaceLoadFileChunky(SurfaceT *dst, FILE *fp) {
|
||||||
|
return fread(dst->pixels, 1, SURFACE_PIXELS_SIZE, fp) == SURFACE_PIXELS_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool halSurfaceSaveFileChunky(const SurfaceT *src, FILE *fp) {
|
||||||
|
return fwrite(src->pixels, 1, SURFACE_PIXELS_SIZE, fp) == SURFACE_PIXELS_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint8_t *halSurfaceAllocPixels(void) {
|
||||||
|
return (uint8_t *)calloc(1, SURFACE_PIXELS_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halSurfaceFreePixels(uint8_t *pixels) {
|
||||||
|
free(pixels);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint8_t *halSurfacePlanePtr(const SurfaceT *s, uint8_t planeIdx) {
|
||||||
|
(void)s; (void)planeIdx;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
uint8_t *halStageAllocPixels(void) {
|
uint8_t *halStageAllocPixels(void) {
|
||||||
return (uint8_t *)malloc(SURFACE_PIXELS_SIZE);
|
return (uint8_t *)malloc(SURFACE_PIXELS_SIZE);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -244,21 +244,6 @@ void halPresent(const SurfaceT *src) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void halPresentRect(const SurfaceT *src, int16_t x, int16_t y, uint16_t w, uint16_t h) {
|
|
||||||
int16_t py;
|
|
||||||
int16_t yEnd;
|
|
||||||
|
|
||||||
if (src == NULL || gVgaMem == NULL) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
uploadPaletteIfNeeded(src);
|
|
||||||
yEnd = y + (int16_t)h;
|
|
||||||
for (py = y; py < yEnd; py++) {
|
|
||||||
expandAndWriteLine(src, py, x, w, &gVgaMem[py * VGA_STRIDE]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// VGA mode 13h vertical refresh on a real CRT runs at ~70 Hz. We
|
// VGA mode 13h vertical refresh on a real CRT runs at ~70 Hz. We
|
||||||
// detect the start of vertical retrace by polling input status
|
// detect the start of vertical retrace by polling input status
|
||||||
// register 1 ($3DA) bit 3: 1 = currently in vretrace. To get a
|
// register 1 ($3DA) bit 3: 1 = currently in vretrace. To get a
|
||||||
|
|
@ -423,6 +408,20 @@ bool halFastFloodWalk(uint8_t *row, int16_t startX, uint8_t matchColor, uint8_t
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool halFloodWalkPlanes(const SurfaceT *s, int16_t startX, int16_t y, uint8_t matchColor, uint8_t newColor, bool matchEqual, bool *seedMatched, int16_t *leftXOut, int16_t *rightXOut) {
|
||||||
|
(void)s; (void)startX; (void)y; (void)matchColor; (void)newColor; (void)matchEqual;
|
||||||
|
(void)seedMatched; (void)leftXOut; (void)rightXOut;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool halFloodScanRowPlanes(const SurfaceT *s, int16_t leftX, int16_t rightX, int16_t scanY, uint8_t matchColor, uint8_t newColor, bool matchEqual, uint8_t *markBuf) {
|
||||||
|
(void)s; (void)leftX; (void)rightX; (void)scanY; (void)matchColor; (void)newColor; (void)matchEqual;
|
||||||
|
(void)markBuf;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool halFastFloodScanRow(uint8_t *row, int16_t leftX, int16_t rightX, uint8_t matchColor, uint8_t newColor, bool matchEqual, uint8_t *markBuf) {
|
bool halFastFloodScanRow(uint8_t *row, int16_t leftX, int16_t rightX, uint8_t matchColor, uint8_t newColor, bool matchEqual, uint8_t *markBuf) {
|
||||||
(void)row;
|
(void)row;
|
||||||
(void)leftX;
|
(void)leftX;
|
||||||
|
|
@ -499,3 +498,143 @@ uint8_t *halStageAllocPixels(void) {
|
||||||
void halStageFreePixels(uint8_t *pixels) {
|
void halStageFreePixels(uint8_t *pixels) {
|
||||||
free(pixels);
|
free(pixels);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// DOS / VGA mode 13h is chunky-native (8bpp linear). portData is
|
||||||
|
// unused; the chunky `pixels` buffer feeds the present-time
|
||||||
|
// nearest-neighbor copy to VGA RAM.
|
||||||
|
void *halSurfaceAllocPortData(SurfaceT *s, bool isStage) {
|
||||||
|
(void)s;
|
||||||
|
(void)isStage;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halSurfaceFreePortData(SurfaceT *s, bool isStage, void *portData) {
|
||||||
|
(void)s;
|
||||||
|
(void)isStage;
|
||||||
|
(void)portData;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// DOS has no bitplanes -- chunky pixels are the source of truth and
|
||||||
|
// expandAndWriteLine derives the VGA DAC indices straight from them.
|
||||||
|
// This hook is a stub here; the cross-platform fillRect calls it
|
||||||
|
// unconditionally.
|
||||||
|
void halFillRectPlanes(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t colorIndex) {
|
||||||
|
(void)s;
|
||||||
|
(void)x;
|
||||||
|
(void)y;
|
||||||
|
(void)w;
|
||||||
|
(void)h;
|
||||||
|
(void)colorIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halSurfaceCopyPlanes(SurfaceT *dst, const SurfaceT *src) {
|
||||||
|
(void)dst;
|
||||||
|
(void)src;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halTileFillPlanes(SurfaceT *s, uint8_t bx, uint8_t by, uint8_t colorIndex) {
|
||||||
|
(void)s; (void)bx; (void)by; (void)colorIndex;
|
||||||
|
}
|
||||||
|
void halTileCopyPlanes(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT *src, uint8_t srcBx, uint8_t srcBy) {
|
||||||
|
(void)dst; (void)dstBx; (void)dstBy; (void)src; (void)srcBx; (void)srcBy;
|
||||||
|
}
|
||||||
|
void halTileCopyMaskedPlanes(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT *src, uint8_t srcBx, uint8_t srcBy, uint8_t transparentIndex) {
|
||||||
|
(void)dst; (void)dstBx; (void)dstBy; (void)src; (void)srcBx; (void)srcBy; (void)transparentIndex;
|
||||||
|
}
|
||||||
|
void halTilePastePlanes(SurfaceT *dst, uint8_t bx, uint8_t by, const uint8_t *chunkyTile) {
|
||||||
|
(void)dst; (void)bx; (void)by; (void)chunkyTile;
|
||||||
|
}
|
||||||
|
void halTileSnapPlanes(const SurfaceT *src, uint8_t bx, uint8_t by, uint8_t *chunkyTileOut) {
|
||||||
|
(void)src; (void)bx; (void)by; (void)chunkyTileOut;
|
||||||
|
}
|
||||||
|
void halSpriteDrawPlanes(SurfaceT *s, const SpriteT *sp, int16_t x, int16_t y) {
|
||||||
|
(void)s; (void)sp; (void)x; (void)y;
|
||||||
|
}
|
||||||
|
void halBlitRectPlanes(SurfaceT *dst, int16_t x, int16_t y, const uint8_t *srcBytes, int16_t srcX0, int16_t srcY0, int16_t copyW, int16_t copyH, int16_t srcRowBytes, uint16_t transparent) {
|
||||||
|
(void)dst; (void)x; (void)y; (void)srcBytes; (void)srcX0; (void)srcY0;
|
||||||
|
(void)copyW; (void)copyH; (void)srcRowBytes; (void)transparent;
|
||||||
|
}
|
||||||
|
void halSpriteSavePlanes(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t *dstPlaneBytes) {
|
||||||
|
(void)s; (void)x; (void)y; (void)w; (void)h; (void)dstPlaneBytes;
|
||||||
|
}
|
||||||
|
void halSpriteRestorePlanes(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, const uint8_t *srcPlaneBytes) {
|
||||||
|
(void)s; (void)x; (void)y; (void)w; (void)h; (void)srcPlaneBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Phase 9 reader hooks: chunky ports use the original s->pixels-based
|
||||||
|
* paths. */
|
||||||
|
|
||||||
|
uint8_t halSamplePixel(const SurfaceT *s, int16_t x, int16_t y) {
|
||||||
|
uint8_t byte = s->pixels[(uint16_t)y * SURFACE_BYTES_PER_ROW + ((uint16_t)x >> 1)];
|
||||||
|
if (x & 1) return (uint8_t)(byte & 0x0Fu);
|
||||||
|
return (uint8_t)((byte & 0xF0u) >> 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t halSurfaceHash(const SurfaceT *s) {
|
||||||
|
uint16_t lo = 0xACE1u, hi = 0x1357u, blocks, n, v;
|
||||||
|
const uint8_t *p;
|
||||||
|
const uint16_t *w;
|
||||||
|
uint8_t b;
|
||||||
|
p = s->pixels;
|
||||||
|
blocks = (uint16_t)(SURFACE_PIXELS_SIZE / 8);
|
||||||
|
do {
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
blocks--;
|
||||||
|
} while (blocks > 0u);
|
||||||
|
p = s->scb;
|
||||||
|
for (n = 0; n < (uint16_t)SURFACE_HEIGHT; n++) {
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
}
|
||||||
|
w = &s->palette[0][0];
|
||||||
|
for (n = 0; n < (uint16_t)SURFACE_PALETTE_ENTRIES; n++) {
|
||||||
|
v = *w++;
|
||||||
|
b = (uint8_t)((v >> 8) & 0xFFu); SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = (uint8_t)(v & 0xFFu); SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
}
|
||||||
|
return ((uint32_t)hi << 16) | (uint32_t)lo;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halSurfaceCopyChunky(SurfaceT *dst, const SurfaceT *src) {
|
||||||
|
memcpy(dst->pixels, src->pixels, SURFACE_PIXELS_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool halSurfaceLoadFileChunky(SurfaceT *dst, FILE *fp) {
|
||||||
|
return fread(dst->pixels, 1, SURFACE_PIXELS_SIZE, fp) == SURFACE_PIXELS_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool halSurfaceSaveFileChunky(const SurfaceT *src, FILE *fp) {
|
||||||
|
return fwrite(src->pixels, 1, SURFACE_PIXELS_SIZE, fp) == SURFACE_PIXELS_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint8_t *halSurfaceAllocPixels(void) {
|
||||||
|
return (uint8_t *)calloc(1, SURFACE_PIXELS_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halSurfaceFreePixels(uint8_t *pixels) {
|
||||||
|
free(pixels);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint8_t *halSurfacePlanePtr(const SurfaceT *s, uint8_t planeIdx) {
|
||||||
|
(void)s; (void)planeIdx;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,12 +26,25 @@
|
||||||
// crowd up against the 64 KB-per-bank limit).
|
// crowd up against the 64 KB-per-bank limit).
|
||||||
|
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
#include "joey/debug.h"
|
#include "joey/debug.h"
|
||||||
#include "hal.h"
|
#include "hal.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
|
/* GetTick wrapper in peislam.asm: invokes the Misc Toolset GetTick
|
||||||
|
* ($2503) and returns the low 16 bits of the system's tick counter
|
||||||
|
* (firmware VBL ISR-driven). Polling $C019 from C user code missed
|
||||||
|
* transitions for any op over ~1 ms; the system's tick counter is
|
||||||
|
* updated by the actual interrupt handler so it stays accurate
|
||||||
|
* regardless of caller polling rate. Tick rate matches the video
|
||||||
|
* field rate -- 60 Hz on NTSC, 50 Hz on PAL. */
|
||||||
|
extern uint16_t iigsGetTickWord(void);
|
||||||
|
/* Reads battery RAM hrtz50or60: 0 = NTSC, 1 = PAL. */
|
||||||
|
extern uint16_t iigsReadHzParam(void);
|
||||||
|
static uint16_t gFrameHz = 60u;
|
||||||
|
|
||||||
// hal.c is the single TU that calls into joeyDraw.asm. Cross-
|
// hal.c is the single TU that calls into joeyDraw.asm. Cross-
|
||||||
// platform draw.c / tile.c / etc. dispatch through halFast*
|
// platform draw.c / tile.c / etc. dispatch through halFast*
|
||||||
// functions defined here; they never reference the asm symbols
|
// functions defined here; they never reference the asm symbols
|
||||||
|
|
@ -210,6 +223,7 @@ bool halInit(const JoeyConfigT *config) {
|
||||||
// is unreliable from halInit's calling context, so we don't try
|
// is unreliable from halInit's calling context, so we don't try
|
||||||
// it here -- the first present will set up SCB to 320 mode.
|
// it here -- the first present will set up SCB to 320 mode.
|
||||||
iigsInitRowLut();
|
iigsInitRowLut();
|
||||||
|
gFrameHz = (iigsReadHzParam() == 1u) ? 50u : 60u;
|
||||||
gModeSet = true;
|
gModeSet = true;
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
@ -234,40 +248,6 @@ void halPresent(const SurfaceT *src) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void halPresentRect(const SurfaceT *src, int16_t x, int16_t y, uint16_t w, uint16_t h) {
|
|
||||||
uint16_t copyBytes;
|
|
||||||
int16_t byteStart;
|
|
||||||
uint16_t srcOffset;
|
|
||||||
|
|
||||||
if (src == NULL) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
uploadScbAndPaletteIfNeeded(src);
|
|
||||||
|
|
||||||
// Pixel copy: byte-aligned runs per scanline. x is always >= 0
|
|
||||||
// after API-level clipping. Use unsigned shifts to avoid
|
|
||||||
// ~SSHIFTRIGHT helper for `x >> 1` on signed int16_t.
|
|
||||||
byteStart = (int16_t)((uint16_t)x >> 1);
|
|
||||||
copyBytes = (uint16_t)((((uint16_t)x + w + 1u) >> 1) - (uint16_t)byteStart);
|
|
||||||
|
|
||||||
if (copyBytes == 0 || h == 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pixel copy: prefer the PEI-slam variant when the rect satisfies
|
|
||||||
// its contract (copyBytes even, 2..80). Sprite-rect presents
|
|
||||||
// (typical 8 bytes wide) hit this ~3x faster than MVN. Wider or
|
|
||||||
// odd-byte rects fall back to MVN, which has no width cap.
|
|
||||||
srcOffset = (uint16_t)(0x2000 + SURFACE_ROW_OFFSET(y) + byteStart);
|
|
||||||
if ((copyBytes & 1) == 0 && copyBytes >= 2 && copyBytes <= 80) {
|
|
||||||
iigsBlitRectStageToShrPEI(srcOffset, copyBytes, h);
|
|
||||||
} else {
|
|
||||||
iigsBlitRectStageToShr(srcOffset, copyBytes, h);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void halShutdown(void) {
|
void halShutdown(void) {
|
||||||
if (gModeSet) {
|
if (gModeSet) {
|
||||||
*IIGS_NEWVIDEO_REG = gPreviousNewVideo;
|
*IIGS_NEWVIDEO_REG = gPreviousNewVideo;
|
||||||
|
|
@ -305,6 +285,142 @@ void halStageFreePixels(uint8_t *pixels) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// IIgs is chunky-native: portData is unused. The chunky `pixels`
|
||||||
|
// buffer at $01:2000 is the stage's pixel storage and the source for
|
||||||
|
// stagePresent's PEI-slam to $E1.
|
||||||
|
void *halSurfaceAllocPortData(SurfaceT *s, bool isStage) {
|
||||||
|
(void)s;
|
||||||
|
(void)isStage;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halSurfaceFreePortData(SurfaceT *s, bool isStage, void *portData) {
|
||||||
|
(void)s;
|
||||||
|
(void)isStage;
|
||||||
|
(void)portData;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// IIgs SHR is chunky-native; no bitplanes to update.
|
||||||
|
void halFillRectPlanes(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t colorIndex) {
|
||||||
|
(void)s;
|
||||||
|
(void)x;
|
||||||
|
(void)y;
|
||||||
|
(void)w;
|
||||||
|
(void)h;
|
||||||
|
(void)colorIndex;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halSurfaceCopyPlanes(SurfaceT *dst, const SurfaceT *src) {
|
||||||
|
(void)dst;
|
||||||
|
(void)src;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halTileFillPlanes(SurfaceT *s, uint8_t bx, uint8_t by, uint8_t colorIndex) {
|
||||||
|
(void)s; (void)bx; (void)by; (void)colorIndex;
|
||||||
|
}
|
||||||
|
void halTileCopyPlanes(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT *src, uint8_t srcBx, uint8_t srcBy) {
|
||||||
|
(void)dst; (void)dstBx; (void)dstBy; (void)src; (void)srcBx; (void)srcBy;
|
||||||
|
}
|
||||||
|
void halTileCopyMaskedPlanes(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT *src, uint8_t srcBx, uint8_t srcBy, uint8_t transparentIndex) {
|
||||||
|
(void)dst; (void)dstBx; (void)dstBy; (void)src; (void)srcBx; (void)srcBy; (void)transparentIndex;
|
||||||
|
}
|
||||||
|
void halTilePastePlanes(SurfaceT *dst, uint8_t bx, uint8_t by, const uint8_t *chunkyTile) {
|
||||||
|
(void)dst; (void)bx; (void)by; (void)chunkyTile;
|
||||||
|
}
|
||||||
|
void halTileSnapPlanes(const SurfaceT *src, uint8_t bx, uint8_t by, uint8_t *chunkyTileOut) {
|
||||||
|
(void)src; (void)bx; (void)by; (void)chunkyTileOut;
|
||||||
|
}
|
||||||
|
void halSpriteDrawPlanes(SurfaceT *s, const SpriteT *sp, int16_t x, int16_t y) {
|
||||||
|
(void)s; (void)sp; (void)x; (void)y;
|
||||||
|
}
|
||||||
|
void halBlitRectPlanes(SurfaceT *dst, int16_t x, int16_t y, const uint8_t *srcBytes, int16_t srcX0, int16_t srcY0, int16_t copyW, int16_t copyH, int16_t srcRowBytes, uint16_t transparent) {
|
||||||
|
(void)dst; (void)x; (void)y; (void)srcBytes; (void)srcX0; (void)srcY0;
|
||||||
|
(void)copyW; (void)copyH; (void)srcRowBytes; (void)transparent;
|
||||||
|
}
|
||||||
|
void halSpriteSavePlanes(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t *dstPlaneBytes) {
|
||||||
|
(void)s; (void)x; (void)y; (void)w; (void)h; (void)dstPlaneBytes;
|
||||||
|
}
|
||||||
|
void halSpriteRestorePlanes(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, const uint8_t *srcPlaneBytes) {
|
||||||
|
(void)s; (void)x; (void)y; (void)w; (void)h; (void)srcPlaneBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Phase 9 chunky reader hooks: IIgs reads from s->pixels just like
|
||||||
|
* the legacy paths did. Same logic as the DOS port. */
|
||||||
|
uint8_t halSamplePixel(const SurfaceT *s, int16_t x, int16_t y) {
|
||||||
|
uint8_t byte = s->pixels[(uint16_t)y * SURFACE_BYTES_PER_ROW + ((uint16_t)x >> 1)];
|
||||||
|
if (x & 1) return (uint8_t)(byte & 0x0Fu);
|
||||||
|
return (uint8_t)((byte & 0xF0u) >> 4);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t halSurfaceHash(const SurfaceT *s) {
|
||||||
|
uint16_t lo = 0xACE1u, hi = 0x1357u, blocks, n, v;
|
||||||
|
const uint8_t *p;
|
||||||
|
const uint16_t *w;
|
||||||
|
uint8_t b;
|
||||||
|
p = s->pixels;
|
||||||
|
blocks = (uint16_t)(SURFACE_PIXELS_SIZE / 8);
|
||||||
|
do {
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
blocks--;
|
||||||
|
} while (blocks > 0u);
|
||||||
|
p = s->scb;
|
||||||
|
for (n = 0; n < (uint16_t)SURFACE_HEIGHT; n++) {
|
||||||
|
b = *p++; SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
}
|
||||||
|
w = &s->palette[0][0];
|
||||||
|
for (n = 0; n < (uint16_t)SURFACE_PALETTE_ENTRIES; n++) {
|
||||||
|
v = *w++;
|
||||||
|
b = (uint8_t)((v >> 8) & 0xFFu); SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
b = (uint8_t)(v & 0xFFu); SURFACE_HASH_MIX_BYTE(lo, hi, b);
|
||||||
|
}
|
||||||
|
return ((uint32_t)hi << 16) | (uint32_t)lo;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halSurfaceCopyChunky(SurfaceT *dst, const SurfaceT *src) {
|
||||||
|
memcpy(dst->pixels, src->pixels, SURFACE_PIXELS_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool halSurfaceLoadFileChunky(SurfaceT *dst, FILE *fp) {
|
||||||
|
return fread(dst->pixels, 1, SURFACE_PIXELS_SIZE, fp) == SURFACE_PIXELS_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool halSurfaceSaveFileChunky(const SurfaceT *src, FILE *fp) {
|
||||||
|
return fwrite(src->pixels, 1, SURFACE_PIXELS_SIZE, fp) == SURFACE_PIXELS_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint8_t *halSurfaceAllocPixels(void) {
|
||||||
|
return (uint8_t *)calloc(1, SURFACE_PIXELS_SIZE);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void halSurfaceFreePixels(uint8_t *pixels) {
|
||||||
|
free(pixels);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint8_t *halSurfacePlanePtr(const SurfaceT *s, uint8_t planeIdx) {
|
||||||
|
(void)s; (void)planeIdx;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// $C019 RDVBLBAR: bit 7 = 0 during vertical blank, 1 during active
|
// $C019 RDVBLBAR: bit 7 = 0 during vertical blank, 1 during active
|
||||||
// scan. To produce a rising-edge wait (one VBL per call), first spin
|
// scan. To produce a rising-edge wait (one VBL per call), first spin
|
||||||
// while VBL is currently active (bit 7 = 0), then spin until VBL
|
// while VBL is currently active (bit 7 = 0), then spin until VBL
|
||||||
|
|
@ -333,24 +449,11 @@ void halWaitVBL(void) {
|
||||||
// byte and the counter never advances. The explicit lda > / sta >
|
// byte and the counter never advances. The explicit lda > / sta >
|
||||||
// pattern uses long-mode addressing throughout, which is
|
// pattern uses long-mode addressing throughout, which is
|
||||||
// DBR-independent.
|
// DBR-independent.
|
||||||
static uint16_t gFrameCount = 0;
|
|
||||||
static uint8_t gPrevInVbl = 0;
|
|
||||||
|
|
||||||
uint16_t halFrameCount(void) {
|
uint16_t halFrameCount(void) {
|
||||||
uint8_t now;
|
return iigsGetTickWord();
|
||||||
uint16_t cnt;
|
|
||||||
|
|
||||||
now = (*IIGS_VBL_STATUS & VBL_BAR_BIT) == 0;
|
|
||||||
if (now && !gPrevInVbl) {
|
|
||||||
cnt = gFrameCount;
|
|
||||||
cnt = (uint16_t)(cnt + 1u);
|
|
||||||
gFrameCount = cnt;
|
|
||||||
}
|
|
||||||
gPrevInVbl = now;
|
|
||||||
return gFrameCount;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
uint16_t halFrameHz(void) {
|
uint16_t halFrameHz(void) {
|
||||||
return 60u;
|
return gFrameHz;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,66 @@
|
||||||
* peislam.asm - placeholder.
|
* peislam.asm - originally a PEI-slam helper, now hosts the GetTick
|
||||||
*
|
* and ReadBParam trampolines. The PEI-slam logic was rolled into
|
||||||
* The original PEI-slam-per-row helper was removed; its functionality
|
* iigsBlitStageToShr in joeyDraw.asm.
|
||||||
* was rolled into iigsBlitStageToShr in joeyDraw.asm (full PEI-slam
|
|
||||||
* with per-row dirty skip). This stub remains so the build's
|
|
||||||
* PORT_ASM_SRCS_ALL wildcard pulls in a file with a recognized load
|
|
||||||
* segment and the linker keeps the same segment-bank layout it had
|
|
||||||
* when peislam.asm was a real translation unit.
|
|
||||||
|
|
||||||
keep PEISLAM
|
keep PEISLAM
|
||||||
case on
|
case on
|
||||||
|
|
||||||
|
|
||||||
|
* Stub kept so the PEISLAM load segment stays present (the build's
|
||||||
|
* PORT_ASM_SRCS_ALL wildcard pulls in this file by name).
|
||||||
peislamStub start IIGSASM
|
peislamStub start IIGSASM
|
||||||
rtl
|
rtl
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
****************************************************************
|
||||||
|
* uint16_t iigsGetTickWord(void)
|
||||||
|
*
|
||||||
|
* Calls Misc Toolset GetTick ($2503) and returns the low 16 bits of
|
||||||
|
* the 32-bit tick counter. The system increments this counter from
|
||||||
|
* the actual VBL hardware interrupt, so it stays accurate regardless
|
||||||
|
* of caller polling rate -- C-side polling of $C019 missed transitions
|
||||||
|
* for any op over ~1 ms.
|
||||||
|
*
|
||||||
|
* GetTick output convention: caller pushes 4 bytes of output space,
|
||||||
|
* tool dispatcher writes the LongWord into them. We pull the low 16
|
||||||
|
* bits into A (ORCA-C Word return convention -- A holds the result,
|
||||||
|
* not Y; verified against jIIgs.asm asmGetVbl) and discard the high
|
||||||
|
* 16 into X.
|
||||||
|
*
|
||||||
|
* ORCA-C cdecl ABI: caller has M=I=16. Word return in A.
|
||||||
|
****************************************************************
|
||||||
|
|
||||||
|
iigsGetTickWord start IIGSASM
|
||||||
|
pha ; output space high word
|
||||||
|
pha ; output space low word
|
||||||
|
ldx #$2503 ; _GetTick
|
||||||
|
jsl $E10000
|
||||||
|
|
||||||
|
pla ; A = low 16 bits (return value)
|
||||||
|
plx ; discard high 16 bits
|
||||||
|
rtl
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
****************************************************************
|
||||||
|
* uint16_t iigsReadHzParam(void)
|
||||||
|
*
|
||||||
|
* Reads battery RAM parameter hrtz50or60 ($1D) via _ReadBParam ($0C03)
|
||||||
|
* and returns the raw value: 0 = NTSC (60 Hz), 1 = PAL (50 Hz).
|
||||||
|
*
|
||||||
|
* GetTick fires from the hardware VBL ISR, so its rate matches the
|
||||||
|
* video field rate -- 60 Hz on NTSC, 50 Hz on PAL. halFrameHz must
|
||||||
|
* report whichever this machine actually runs so wall-clock math
|
||||||
|
* (frames * 1000 / halFrameHz) is correct on both.
|
||||||
|
****************************************************************
|
||||||
|
|
||||||
|
iigsReadHzParam start IIGSASM
|
||||||
|
pha ; output space (Word)
|
||||||
|
pea $001D ; hrtz50or60 parameter ID
|
||||||
|
ldx #$0C03 ; _ReadBParam
|
||||||
|
jsl $E10000
|
||||||
|
|
||||||
|
pla ; A = result (ORCA-C Word return)
|
||||||
|
rtl
|
||||||
|
end
|
||||||
|
|
|
||||||
|
|
@ -253,3 +253,253 @@ _surface68kFillRectByteAligned:
|
||||||
.Lfrb_done:
|
.Lfrb_done:
|
||||||
movem.l (%sp)+,%d2-%d6
|
movem.l (%sp)+,%d2-%d6
|
||||||
rts
|
rts
|
||||||
|
|
||||||
|
|
||||||
|
| ----------------------------------------------------------------
|
||||||
|
| void surface68kFillSpan4Planes(uint8_t *p0, uint8_t *p1,
|
||||||
|
| uint8_t *p2, uint8_t *p3,
|
||||||
|
| uint16_t numMid,
|
||||||
|
| uint8_t leftMask, uint8_t rightMask,
|
||||||
|
| uint8_t fb0, uint8_t fb1,
|
||||||
|
| uint8_t fb2, uint8_t fb3);
|
||||||
|
|
|
||||||
|
| Fill ONE planar row across 4 planes -- the per-row body of
|
||||||
|
| halFillRectPlanes lifted into asm. Each pN points at the leading
|
||||||
|
| byte (already advanced by planeBase + y*40 + byteFirst on the C
|
||||||
|
| side). leftMask and rightMask are the partial-byte masks for the
|
||||||
|
| left/right edges; numMid is the count of full bytes between them.
|
||||||
|
| fbN is 0x00 or 0xFF, the per-plane fill byte (caller pre-classifies
|
||||||
|
| (colorIndex >> N) & 1 -> 0xFF or 0x00).
|
||||||
|
|
|
||||||
|
| Used by Amiga halFastFillCircle (one call per scanline span) and
|
||||||
|
| Amiga halFillRectPlanes (one call per row of the rect). Replaces
|
||||||
|
| the C inner loop whose ~13 cyc/byte was the gating cost on
|
||||||
|
| fillCircle r=40 even after C-side inlining.
|
||||||
|
|
|
||||||
|
| Mask convention is uniform for all planes:
|
||||||
|
| leading byte := (*p & ~leftMask) | (fbN & leftMask)
|
||||||
|
| middle bytes := fbN
|
||||||
|
| trailing byte := (*p & ~rightMask) | (fbN & rightMask)
|
||||||
|
| -- branchless: the same arithmetic produces "set" or "clear" based
|
||||||
|
| on whether fbN is 0xFF or 0x00.
|
||||||
|
|
|
||||||
|
| ABI: m68k cdecl. d2-d7/a2-a6 callee-save (movem'd here).
|
||||||
|
| Stack offset to first arg after MOVEM: 11 regs * 4 = 44 bytes saved
|
||||||
|
| + 4 ret PC = 48.
|
||||||
|
| ----------------------------------------------------------------
|
||||||
|
.globl _surface68kFillSpan4Planes
|
||||||
|
|
||||||
|
.equ SP_SAVED, 44
|
||||||
|
.equ SP_RPC, 4
|
||||||
|
.equ SP_OFF, (SP_SAVED + SP_RPC)
|
||||||
|
|
||||||
|
.equ SP_P0, SP_OFF + 0
|
||||||
|
.equ SP_P1, SP_OFF + 4
|
||||||
|
.equ SP_P2, SP_OFF + 8
|
||||||
|
.equ SP_P3, SP_OFF + 12
|
||||||
|
.equ SP_NMID, SP_OFF + 16 + 2 | int -> low word at +2
|
||||||
|
.equ SP_LMASK, SP_OFF + 20 + 3 | int -> low byte at +3
|
||||||
|
.equ SP_RMASK, SP_OFF + 24 + 3
|
||||||
|
.equ SP_FB0, SP_OFF + 28 + 3
|
||||||
|
.equ SP_FB1, SP_OFF + 32 + 3
|
||||||
|
.equ SP_FB2, SP_OFF + 36 + 3
|
||||||
|
.equ SP_FB3, SP_OFF + 40 + 3
|
||||||
|
|
||||||
|
| Macro: per-plane work fully inlined. Args:
|
||||||
|
| plane_an = the address register holding this plane's pointer.
|
||||||
|
| fb_off = the stack offset for this plane's fillByte.
|
||||||
|
| Uses d6/d7 as scratch; d1=leftMask, d2=~leftMask, d3=rightMask,
|
||||||
|
| d4=~rightMask; d0=numMid-1 (only valid if mid_count > 0). The mid
|
||||||
|
| loop is skipped via .LfsSkipMid_<n> when numMid was 0 at entry --
|
||||||
|
| the per-plane caller branches to the right tail label.
|
||||||
|
|
|
||||||
|
| Hand-unrolled 4x rather than using bsr+rts to dodge ~12 cyc per
|
||||||
|
| return + the per-plane re-test of numMid that the previous build
|
||||||
|
| paid. The mid-loop label suffix is the plane index so all four
|
||||||
|
| inline copies can coexist without label collisions.
|
||||||
|
|
|
||||||
|
| Plain text version of the per-plane body (translate to asm 4x with
|
||||||
|
| different a-regs and fb stack offsets):
|
||||||
|
|
|
||||||
|
| move.b (an),%d6
|
||||||
|
| and.b %d2,%d6
|
||||||
|
| move.b fb,%d7
|
||||||
|
| and.b %d1,%d7
|
||||||
|
| or.b %d7,%d6
|
||||||
|
| move.b %d6,(an)+
|
||||||
|
| < if has-middle path: >
|
||||||
|
| move.w %d0,%d7
|
||||||
|
| .midN:
|
||||||
|
| move.b fb,(an)+
|
||||||
|
| dbra %d7,.midN
|
||||||
|
| < trailing: >
|
||||||
|
| move.b (an),%d6
|
||||||
|
| and.b %d4,%d6
|
||||||
|
| move.b fb,%d7
|
||||||
|
| and.b %d3,%d7
|
||||||
|
| or.b %d7,%d6
|
||||||
|
| move.b %d6,(an)
|
||||||
|
|
||||||
|
_surface68kFillSpan4Planes:
|
||||||
|
movem.l %d2-%d7/%a2-%a6,-(%sp)
|
||||||
|
|
||||||
|
move.b SP_LMASK(%sp),%d1
|
||||||
|
move.b %d1,%d2
|
||||||
|
not.b %d2
|
||||||
|
move.b SP_RMASK(%sp),%d3
|
||||||
|
move.b %d3,%d4
|
||||||
|
not.b %d4
|
||||||
|
|
||||||
|
move.l SP_P0(%sp),%a0
|
||||||
|
move.l SP_P1(%sp),%a1
|
||||||
|
move.l SP_P2(%sp),%a2
|
||||||
|
move.l SP_P3(%sp),%a3
|
||||||
|
|
||||||
|
| One-time numMid test. d0.w = numMid; if 0 jump to
|
||||||
|
| the no-middle entry, otherwise pre-decrement for dbra
|
||||||
|
| and fall into the with-middle entry. Both paths
|
||||||
|
| unroll all 4 planes.
|
||||||
|
move.w SP_NMID(%sp),%d0
|
||||||
|
beq .LfsNoMid
|
||||||
|
subq.w #1,%d0
|
||||||
|
|
||||||
|
| ---- WITH-MIDDLE PATH ----
|
||||||
|
| Plane 0
|
||||||
|
move.b (%a0),%d6
|
||||||
|
and.b %d2,%d6
|
||||||
|
move.b SP_FB0(%sp),%d5
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d1,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a0)+
|
||||||
|
move.w %d0,%d7
|
||||||
|
.LfsMid0: move.b %d5,(%a0)+
|
||||||
|
dbra %d7,.LfsMid0
|
||||||
|
move.b (%a0),%d6
|
||||||
|
and.b %d4,%d6
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d3,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a0)
|
||||||
|
|
||||||
|
| Plane 1
|
||||||
|
move.b (%a1),%d6
|
||||||
|
and.b %d2,%d6
|
||||||
|
move.b SP_FB1(%sp),%d5
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d1,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a1)+
|
||||||
|
move.w %d0,%d7
|
||||||
|
.LfsMid1: move.b %d5,(%a1)+
|
||||||
|
dbra %d7,.LfsMid1
|
||||||
|
move.b (%a1),%d6
|
||||||
|
and.b %d4,%d6
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d3,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a1)
|
||||||
|
|
||||||
|
| Plane 2
|
||||||
|
move.b (%a2),%d6
|
||||||
|
and.b %d2,%d6
|
||||||
|
move.b SP_FB2(%sp),%d5
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d1,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a2)+
|
||||||
|
move.w %d0,%d7
|
||||||
|
.LfsMid2: move.b %d5,(%a2)+
|
||||||
|
dbra %d7,.LfsMid2
|
||||||
|
move.b (%a2),%d6
|
||||||
|
and.b %d4,%d6
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d3,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a2)
|
||||||
|
|
||||||
|
| Plane 3
|
||||||
|
move.b (%a3),%d6
|
||||||
|
and.b %d2,%d6
|
||||||
|
move.b SP_FB3(%sp),%d5
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d1,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a3)+
|
||||||
|
move.w %d0,%d7
|
||||||
|
.LfsMid3: move.b %d5,(%a3)+
|
||||||
|
dbra %d7,.LfsMid3
|
||||||
|
move.b (%a3),%d6
|
||||||
|
and.b %d4,%d6
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d3,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a3)
|
||||||
|
|
||||||
|
movem.l (%sp)+,%d2-%d7/%a2-%a6
|
||||||
|
rts
|
||||||
|
|
||||||
|
.LfsNoMid:
|
||||||
|
| ---- NO-MIDDLE PATH (just leading + trailing) ----
|
||||||
|
| Plane 0
|
||||||
|
move.b (%a0),%d6
|
||||||
|
and.b %d2,%d6
|
||||||
|
move.b SP_FB0(%sp),%d5
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d1,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a0)+
|
||||||
|
move.b (%a0),%d6
|
||||||
|
and.b %d4,%d6
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d3,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a0)
|
||||||
|
|
||||||
|
| Plane 1
|
||||||
|
move.b (%a1),%d6
|
||||||
|
and.b %d2,%d6
|
||||||
|
move.b SP_FB1(%sp),%d5
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d1,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a1)+
|
||||||
|
move.b (%a1),%d6
|
||||||
|
and.b %d4,%d6
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d3,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a1)
|
||||||
|
|
||||||
|
| Plane 2
|
||||||
|
move.b (%a2),%d6
|
||||||
|
and.b %d2,%d6
|
||||||
|
move.b SP_FB2(%sp),%d5
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d1,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a2)+
|
||||||
|
move.b (%a2),%d6
|
||||||
|
and.b %d4,%d6
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d3,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a2)
|
||||||
|
|
||||||
|
| Plane 3
|
||||||
|
move.b (%a3),%d6
|
||||||
|
and.b %d2,%d6
|
||||||
|
move.b SP_FB3(%sp),%d5
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d1,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a3)+
|
||||||
|
move.b (%a3),%d6
|
||||||
|
and.b %d4,%d6
|
||||||
|
move.b %d5,%d7
|
||||||
|
and.b %d3,%d7
|
||||||
|
or.b %d7,%d6
|
||||||
|
move.b %d6,(%a3)
|
||||||
|
|
||||||
|
movem.l (%sp)+,%d2-%d7/%a2-%a6
|
||||||
|
rts
|
||||||
|
|
|
||||||
93
tools/diff-uber-hashes
Executable file
93
tools/diff-uber-hashes
Executable file
|
|
@ -0,0 +1,93 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Compare two UBER joeylog.txt files by per-op surface hash.
|
||||||
|
|
||||||
|
Used by the planar 68k rewrite (project_planar_68k_plan.md): IIgs
|
||||||
|
captures the golden reference, each 68k port re-runs UBER after a
|
||||||
|
primitive conversion, and this tool tells you which ops produced
|
||||||
|
different pixels. Without this, "looks right visually" misses the
|
||||||
|
subtle mismatches that cascade into hard-to-debug corruption.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
tools/diff-uber-hashes <reference-log> <test-log>
|
||||||
|
|
||||||
|
Exit code:
|
||||||
|
0 = all hashes match
|
||||||
|
1 = at least one mismatch
|
||||||
|
2 = usage error or missing file
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Match e.g.:
|
||||||
|
# UBER: drawCircle r=80: 56 iters / 4 frames = 840 ops/sec | hash=A1B2C3D4
|
||||||
|
LINE_RE = re.compile(
|
||||||
|
r"UBER:\s+(?P<op>[^:]+):\s+\d+\s+iters\s+/\s+\d+\s+frames\s+=\s+\d+\s+ops/sec\s+\|\s+hash=(?P<hash>[0-9A-Fa-f]+)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_log(path):
|
||||||
|
"""Return ordered dict {op_name: hash} from a UBER log file.
|
||||||
|
|
||||||
|
Multiple runs may be concatenated in the same log (joeyLog appends)
|
||||||
|
-- in that case the LAST hash for each op wins, matching the most
|
||||||
|
recent run.
|
||||||
|
"""
|
||||||
|
hashes = {}
|
||||||
|
with open(path) as f:
|
||||||
|
for line in f:
|
||||||
|
m = LINE_RE.search(line)
|
||||||
|
if m:
|
||||||
|
hashes[m.group("op").strip()] = m.group("hash").upper()
|
||||||
|
return hashes
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv):
|
||||||
|
if len(argv) != 3:
|
||||||
|
sys.stderr.write(
|
||||||
|
"usage: diff-uber-hashes <reference-log> <test-log>\n"
|
||||||
|
)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
try:
|
||||||
|
ref = parse_log(argv[1])
|
||||||
|
test = parse_log(argv[2])
|
||||||
|
except OSError as e:
|
||||||
|
sys.stderr.write(f"error: {e}\n")
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if not ref:
|
||||||
|
sys.stderr.write(f"error: no UBER hash lines found in {argv[1]}\n")
|
||||||
|
return 2
|
||||||
|
if not test:
|
||||||
|
sys.stderr.write(f"error: no UBER hash lines found in {argv[2]}\n")
|
||||||
|
return 2
|
||||||
|
|
||||||
|
mismatches = 0
|
||||||
|
matches = 0
|
||||||
|
for op, ref_hash in ref.items():
|
||||||
|
test_hash = test.get(op)
|
||||||
|
if test_hash is None:
|
||||||
|
print(f" MISSING in test: {op} (ref={ref_hash})")
|
||||||
|
mismatches += 1
|
||||||
|
elif test_hash != ref_hash:
|
||||||
|
print(f" MISMATCH {op}: ref={ref_hash} test={test_hash}")
|
||||||
|
mismatches += 1
|
||||||
|
else:
|
||||||
|
matches += 1
|
||||||
|
|
||||||
|
extras = [op for op in test if op not in ref]
|
||||||
|
for op in extras:
|
||||||
|
print(f" EXTRA in test: {op} (test={test[op]})")
|
||||||
|
|
||||||
|
total = len(ref) + len(extras)
|
||||||
|
print()
|
||||||
|
if mismatches == 0 and not extras:
|
||||||
|
print(f"OK: {matches}/{total} ops match")
|
||||||
|
return 0
|
||||||
|
print(f"FAIL: {matches} match, {mismatches} mismatch, {len(extras)} extras")
|
||||||
|
return 1
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main(sys.argv))
|
||||||
132
tools/diff-uber-perf
Executable file
132
tools/diff-uber-perf
Executable file
|
|
@ -0,0 +1,132 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
"""Compare two UBER joeylog.txt files by per-op ops/sec.
|
||||||
|
|
||||||
|
Sibling of diff-uber-hashes (which compares pixel correctness). This
|
||||||
|
tool drives Phase 10 of project_planar_68k_plan.md: pick the
|
||||||
|
biggest perf gaps vs the IIgs reference and target asm/algorithmic
|
||||||
|
optimization at those.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
tools/diff-uber-perf <reference-log> <test-log> [--threshold 1.0]
|
||||||
|
|
||||||
|
Output is sorted by speed ratio (test/ref) ascending, so the worst
|
||||||
|
gaps print first. Ops missing from either log are flagged. The
|
||||||
|
threshold flag (default 1.0) marks ops below that ratio as FAIL --
|
||||||
|
project_perf_directive.md says "IIgs is the perf floor; every
|
||||||
|
other target must match or beat it", so parity = 1.0x. Use
|
||||||
|
--threshold 0.8 for the project_planar_68k_plan looser acceptance.
|
||||||
|
|
||||||
|
Exit code:
|
||||||
|
0 = all common ops at >= threshold
|
||||||
|
1 = at least one op below threshold (or missing)
|
||||||
|
2 = usage error or missing file
|
||||||
|
"""
|
||||||
|
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Match e.g.:
|
||||||
|
# UBER: drawCircle r=80: 56 iters / 4 frames = 840 ops/sec | hash=A1B2C3D4
|
||||||
|
LINE_RE = re.compile(
|
||||||
|
r"UBER:\s+(?P<op>[^:]+):\s+\d+\s+iters\s+/\s+\d+\s+frames\s+=\s+(?P<ops>\d+)\s+ops/sec"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def parse_log(path):
|
||||||
|
"""Return ordered dict {op_name: ops_per_sec} from a UBER log file.
|
||||||
|
|
||||||
|
Multiple runs may be concatenated (joeyLog appends); last value
|
||||||
|
for each op wins, matching the most recent run.
|
||||||
|
"""
|
||||||
|
perf = {}
|
||||||
|
with open(path) as f:
|
||||||
|
for line in f:
|
||||||
|
m = LINE_RE.search(line)
|
||||||
|
if m:
|
||||||
|
perf[m.group("op").strip()] = int(m.group("ops"))
|
||||||
|
return perf
|
||||||
|
|
||||||
|
|
||||||
|
def main(argv):
|
||||||
|
threshold = 1.0
|
||||||
|
args = []
|
||||||
|
i = 1
|
||||||
|
while i < len(argv):
|
||||||
|
if argv[i] == "--threshold" and i + 1 < len(argv):
|
||||||
|
try:
|
||||||
|
threshold = float(argv[i + 1])
|
||||||
|
except ValueError:
|
||||||
|
sys.stderr.write(f"error: bad threshold {argv[i+1]}\n")
|
||||||
|
return 2
|
||||||
|
i += 2
|
||||||
|
else:
|
||||||
|
args.append(argv[i])
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if len(args) != 2:
|
||||||
|
sys.stderr.write(
|
||||||
|
"usage: diff-uber-perf <reference-log> <test-log> [--threshold 1.0]\n"
|
||||||
|
)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
try:
|
||||||
|
ref = parse_log(args[0])
|
||||||
|
test = parse_log(args[1])
|
||||||
|
except OSError as e:
|
||||||
|
sys.stderr.write(f"error: {e}\n")
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if not ref:
|
||||||
|
sys.stderr.write(f"error: no UBER lines found in {args[0]}\n")
|
||||||
|
return 2
|
||||||
|
if not test:
|
||||||
|
sys.stderr.write(f"error: no UBER lines found in {args[1]}\n")
|
||||||
|
return 2
|
||||||
|
|
||||||
|
rows = []
|
||||||
|
for op, ref_ops in ref.items():
|
||||||
|
test_ops = test.get(op)
|
||||||
|
if test_ops is None:
|
||||||
|
rows.append((op, ref_ops, None, None, "MISSING"))
|
||||||
|
continue
|
||||||
|
if ref_ops == 0:
|
||||||
|
ratio = float("inf") if test_ops > 0 else 1.0
|
||||||
|
else:
|
||||||
|
ratio = test_ops / ref_ops
|
||||||
|
status = "ok" if ratio >= threshold else "FAIL"
|
||||||
|
rows.append((op, ref_ops, test_ops, ratio, status))
|
||||||
|
|
||||||
|
extras = [(op, None, test[op], None, "EXTRA") for op in test if op not in ref]
|
||||||
|
|
||||||
|
# Sort: missing/fail first by worst ratio, then ok ascending by ratio.
|
||||||
|
def sort_key(row):
|
||||||
|
op, refv, testv, ratio, status = row
|
||||||
|
if status == "MISSING":
|
||||||
|
return (0, 0.0, op)
|
||||||
|
if status == "EXTRA":
|
||||||
|
return (3, 0.0, op)
|
||||||
|
return (1 if status == "FAIL" else 2, ratio, op)
|
||||||
|
|
||||||
|
rows.sort(key=sort_key)
|
||||||
|
|
||||||
|
op_w = max(len(op) for op in ref) if ref else 8
|
||||||
|
op_w = max(op_w, max((len(op) for op in test), default=8), len("op"))
|
||||||
|
|
||||||
|
print(f"{'op':<{op_w}} {'ref':>10} {'test':>10} {'ratio':>7} status")
|
||||||
|
print(f"{'-'*op_w} {'-'*10} {'-'*10} {'-'*7} ------")
|
||||||
|
fails = 0
|
||||||
|
for op, refv, testv, ratio, status in rows + extras:
|
||||||
|
refs = "" if refv is None else str(refv)
|
||||||
|
tests = "" if testv is None else str(testv)
|
||||||
|
rats = "" if ratio is None else f"{ratio:.2f}x"
|
||||||
|
print(f"{op:<{op_w}} {refs:>10} {tests:>10} {rats:>7} {status}")
|
||||||
|
if status in ("FAIL", "MISSING"):
|
||||||
|
fails += 1
|
||||||
|
|
||||||
|
print()
|
||||||
|
print(f"threshold: {threshold:.2f}x ({len(rows)} ops compared, {fails} below threshold)")
|
||||||
|
return 1 if fails > 0 else 0
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
sys.exit(main(sys.argv))
|
||||||
Loading…
Add table
Reference in a new issue