Major IIgs improvements. Preparing to benchmark all ports.
This commit is contained in:
parent
20cbccaca5
commit
91fcd49f6f
44 changed files with 1482 additions and 455 deletions
|
|
@ -120,8 +120,8 @@ int main(void) {
|
||||||
config.hostMode = HOST_MODE_TAKEOVER;
|
config.hostMode = HOST_MODE_TAKEOVER;
|
||||||
config.codegenBytes = 8 * 1024;
|
config.codegenBytes = 8 * 1024;
|
||||||
config.maxSurfaces = 4;
|
config.maxSurfaces = 4;
|
||||||
config.audioBytes = 64 * 1024;
|
config.audioBytes = 64UL * 1024;
|
||||||
config.assetBytes = 128 * 1024;
|
config.assetBytes = 128UL * 1024;
|
||||||
|
|
||||||
if (!joeyInit(&config)) {
|
if (!joeyInit(&config)) {
|
||||||
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
||||||
|
|
|
||||||
|
|
@ -246,8 +246,8 @@ int main(void) {
|
||||||
config.hostMode = HOST_MODE_TAKEOVER;
|
config.hostMode = HOST_MODE_TAKEOVER;
|
||||||
config.codegenBytes = 8 * 1024;
|
config.codegenBytes = 8 * 1024;
|
||||||
config.maxSurfaces = 4;
|
config.maxSurfaces = 4;
|
||||||
config.audioBytes = 64 * 1024;
|
config.audioBytes = 64UL * 1024;
|
||||||
config.assetBytes = 128 * 1024;
|
config.assetBytes = 128UL * 1024;
|
||||||
|
|
||||||
if (!joeyInit(&config)) {
|
if (!joeyInit(&config)) {
|
||||||
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,8 @@ int main(void) {
|
||||||
config.hostMode = HOST_MODE_OS;
|
config.hostMode = HOST_MODE_OS;
|
||||||
config.codegenBytes = 8 * 1024;
|
config.codegenBytes = 8 * 1024;
|
||||||
config.maxSurfaces = 4;
|
config.maxSurfaces = 4;
|
||||||
config.audioBytes = 64 * 1024;
|
config.audioBytes = 64UL * 1024;
|
||||||
config.assetBytes = 128 * 1024;
|
config.assetBytes = 128UL * 1024;
|
||||||
|
|
||||||
if (!joeyInit(&config)) {
|
if (!joeyInit(&config)) {
|
||||||
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
||||||
|
|
|
||||||
|
|
@ -218,8 +218,8 @@ int main(void) {
|
||||||
config.hostMode = HOST_MODE_TAKEOVER;
|
config.hostMode = HOST_MODE_TAKEOVER;
|
||||||
config.codegenBytes = 8 * 1024;
|
config.codegenBytes = 8 * 1024;
|
||||||
config.maxSurfaces = 4;
|
config.maxSurfaces = 4;
|
||||||
config.audioBytes = 64 * 1024;
|
config.audioBytes = 64UL * 1024;
|
||||||
config.assetBytes = 128 * 1024;
|
config.assetBytes = 128UL * 1024;
|
||||||
|
|
||||||
if (!joeyInit(&config)) {
|
if (!joeyInit(&config)) {
|
||||||
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
||||||
|
|
|
||||||
|
|
@ -225,8 +225,8 @@ int main(void) {
|
||||||
config.hostMode = HOST_MODE_TAKEOVER;
|
config.hostMode = HOST_MODE_TAKEOVER;
|
||||||
config.codegenBytes = 8 * 1024;
|
config.codegenBytes = 8 * 1024;
|
||||||
config.maxSurfaces = 4;
|
config.maxSurfaces = 4;
|
||||||
config.audioBytes = 64 * 1024;
|
config.audioBytes = 64UL * 1024;
|
||||||
config.assetBytes = 128 * 1024;
|
config.assetBytes = 128UL * 1024;
|
||||||
|
|
||||||
if (!joeyInit(&config)) {
|
if (!joeyInit(&config)) {
|
||||||
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
||||||
|
|
|
||||||
|
|
@ -108,8 +108,8 @@ int main(void) {
|
||||||
config.hostMode = HOST_MODE_TAKEOVER;
|
config.hostMode = HOST_MODE_TAKEOVER;
|
||||||
config.codegenBytes = 8 * 1024;
|
config.codegenBytes = 8 * 1024;
|
||||||
config.maxSurfaces = 4;
|
config.maxSurfaces = 4;
|
||||||
config.audioBytes = 64 * 1024;
|
config.audioBytes = 64UL * 1024;
|
||||||
config.assetBytes = 128 * 1024;
|
config.assetBytes = 128UL * 1024;
|
||||||
|
|
||||||
if (!joeyInit(&config)) {
|
if (!joeyInit(&config)) {
|
||||||
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
||||||
|
|
|
||||||
|
|
@ -113,8 +113,8 @@ int main(void) {
|
||||||
config.hostMode = HOST_MODE_TAKEOVER;
|
config.hostMode = HOST_MODE_TAKEOVER;
|
||||||
config.codegenBytes = 8 * 1024;
|
config.codegenBytes = 8 * 1024;
|
||||||
config.maxSurfaces = 4;
|
config.maxSurfaces = 4;
|
||||||
config.audioBytes = 64 * 1024;
|
config.audioBytes = 64UL * 1024;
|
||||||
config.assetBytes = 128 * 1024;
|
config.assetBytes = 128UL * 1024;
|
||||||
|
|
||||||
if (!joeyInit(&config)) {
|
if (!joeyInit(&config)) {
|
||||||
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
fprintf(stderr, "joeyInit failed: %s\n", joeyLastError());
|
||||||
|
|
@ -154,8 +154,7 @@ int main(void) {
|
||||||
vy = 1;
|
vy = 1;
|
||||||
haveBackup = false;
|
haveBackup = false;
|
||||||
|
|
||||||
spriteSaveUnder(screen, ball, x, y, &backup);
|
spriteSaveAndDraw(screen, ball, x, y, &backup);
|
||||||
spriteDraw(screen, ball, x, y);
|
|
||||||
stagePresentRect(backup.x, backup.y, backup.width, backup.height);
|
stagePresentRect(backup.x, backup.y, backup.width, backup.height);
|
||||||
haveBackup = true;
|
haveBackup = true;
|
||||||
|
|
||||||
|
|
@ -189,8 +188,7 @@ int main(void) {
|
||||||
if (y <= 0) { y = 0; vy = (int16_t)-vy; }
|
if (y <= 0) { y = 0; vy = (int16_t)-vy; }
|
||||||
if (y >= SURFACE_HEIGHT - BALL_H) { y = SURFACE_HEIGHT - BALL_H; vy = (int16_t)-vy; }
|
if (y >= SURFACE_HEIGHT - BALL_H) { y = SURFACE_HEIGHT - BALL_H; vy = (int16_t)-vy; }
|
||||||
|
|
||||||
spriteSaveUnder(screen, ball, x, y, &backup);
|
spriteSaveAndDraw(screen, ball, x, y, &backup);
|
||||||
spriteDraw(screen, ball, x, y);
|
|
||||||
|
|
||||||
// Bounding box of (old rect) U (new rect). For typical
|
// Bounding box of (old rect) U (new rect). For typical
|
||||||
// small-step motion the rects overlap heavily so the union
|
// small-step motion the rects overlap heavily so the union
|
||||||
|
|
|
||||||
350
examples/uber/uber.c
Normal file
350
examples/uber/uber.c
Normal file
|
|
@ -0,0 +1,350 @@
|
||||||
|
// Uber demo: exercise every JoeyLib public API and measure throughput
|
||||||
|
// of the per-frame-hot ones. Results are written to joeylog.txt via
|
||||||
|
// joeyLogF. A green screen on exit means the run completed.
|
||||||
|
//
|
||||||
|
// Timing model: each test aligns to a VBL boundary via joeyWaitVBL,
|
||||||
|
// records the starting joeyFrameCount, then runs the op in a tight
|
||||||
|
// loop polling joeyFrameCount until UBER_FRAMES frames have elapsed.
|
||||||
|
// Reported metric is ops/sec, computed as iters * joeyFrameHz() /
|
||||||
|
// UBER_FRAMES so results are directly comparable across ports
|
||||||
|
// regardless of CPU speed or VBL rate.
|
||||||
|
//
|
||||||
|
// joeyFrameCount is wall-clock-based per port; the per-iter poll
|
||||||
|
// adds ~10-30 cyc per op which shows up as noise on the very
|
||||||
|
// fastest ops but is below ~5% even for ~500 cyc/op work.
|
||||||
|
//
|
||||||
|
// One-shot ops (spriteCompile) get one call each, timed by frame
|
||||||
|
// delta -- coarser but representative.
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#include <joey/joey.h>
|
||||||
|
|
||||||
|
|
||||||
|
// ----- Timing primitives -----
|
||||||
|
|
||||||
|
// 4-frame measurement window. Long enough that loop overhead doesn't
|
||||||
|
// dominate; short enough to keep the full demo run under ~10 sec.
|
||||||
|
#define UBER_FRAMES 4u
|
||||||
|
|
||||||
|
|
||||||
|
typedef void (*OpFn)(void);
|
||||||
|
|
||||||
|
static const char *gCurName = "(none)";
|
||||||
|
static SurfaceT *gStage = NULL;
|
||||||
|
static SpriteT *gSprite = NULL;
|
||||||
|
static SpriteBackupT gBackup;
|
||||||
|
static unsigned char gBackupBytes[256];
|
||||||
|
|
||||||
|
static TileT gTileScratch;
|
||||||
|
|
||||||
|
|
||||||
|
// Run `op` in a tight loop until `targetFrames` joeyFrameCount ticks
|
||||||
|
// have elapsed. Returns iterations completed.
|
||||||
|
static unsigned long runForFrames(OpFn op, unsigned int targetFrames) {
|
||||||
|
unsigned long count;
|
||||||
|
uint16_t startFrame;
|
||||||
|
|
||||||
|
count = 0UL;
|
||||||
|
|
||||||
|
joeyWaitVBL();
|
||||||
|
startFrame = joeyFrameCount();
|
||||||
|
|
||||||
|
while ((uint16_t)(joeyFrameCount() - startFrame) < targetFrames) {
|
||||||
|
op();
|
||||||
|
count++;
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Time and log one op. Reports iters / N frames AND the derived
|
||||||
|
// ops/sec so per-port results are directly comparable against IIgs
|
||||||
|
// regardless of CPU speed or display refresh rate.
|
||||||
|
static void timeOp(const char *name, OpFn op) {
|
||||||
|
unsigned long iters;
|
||||||
|
unsigned long opsPerSec;
|
||||||
|
|
||||||
|
gCurName = name;
|
||||||
|
|
||||||
|
iters = runForFrames(op, UBER_FRAMES);
|
||||||
|
|
||||||
|
if (iters == 0UL) {
|
||||||
|
joeyLogF("UBER: %s: 0 iters (op too slow?)\n", name);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
opsPerSec = (iters * (unsigned long)joeyFrameHz()) / (unsigned long)UBER_FRAMES;
|
||||||
|
joeyLogF("UBER: %s: %lu iters / %u frames = %lu ops/sec\n",
|
||||||
|
name, iters, UBER_FRAMES, opsPerSec);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// ----- Test ops -----
|
||||||
|
|
||||||
|
static void op_drawPixel (void) { drawPixel (gStage, 100, 100, 5); }
|
||||||
|
static void op_drawLineH (void) { drawLine (gStage, 0, 50, 319, 50, 5); }
|
||||||
|
static void op_drawLineV (void) { drawLine (gStage, 50, 0, 50, 199, 5); }
|
||||||
|
static void op_drawLineDiag (void) { drawLine (gStage, 0, 0, 319, 199, 5); }
|
||||||
|
static void op_drawRect (void) { drawRect (gStage, 10, 10, 100, 100, 5); }
|
||||||
|
static void op_drawCircleSmall (void) { drawCircle (gStage, 160, 100, 16, 5); }
|
||||||
|
static void op_drawCircleLarge (void) { drawCircle (gStage, 160, 100, 80, 5); }
|
||||||
|
static void op_fillRectSmall (void) { fillRect (gStage, 20, 20, 16, 16, 7); }
|
||||||
|
static void op_fillRectMid (void) { fillRect (gStage, 20, 20, 80, 80, 7); }
|
||||||
|
static void op_fillRectFull (void) { fillRect (gStage, 0, 0, 320, 200, 7); }
|
||||||
|
static void op_fillCircle (void) { fillCircle (gStage, 160, 100, 40, 7); }
|
||||||
|
static void op_samplePixel (void) { (void)samplePixel(gStage, 100, 100); }
|
||||||
|
static void op_surfaceClear (void) { surfaceClear (gStage, 0); }
|
||||||
|
|
||||||
|
static void op_paletteSet(void) {
|
||||||
|
static uint16_t colors[16] = {
|
||||||
|
0x000, 0xF00, 0x0F0, 0x00F, 0xFF0, 0xF0F, 0x0FF, 0xFFF,
|
||||||
|
0x800, 0x080, 0x008, 0x880, 0x808, 0x088, 0x888, 0x444
|
||||||
|
};
|
||||||
|
paletteSet(gStage, 0, colors);
|
||||||
|
}
|
||||||
|
static void op_scbSetRange (void) { scbSetRange (gStage, 0, 199, 0); }
|
||||||
|
|
||||||
|
static void op_tileFill (void) { tileFill (gStage, 5, 5, 7); }
|
||||||
|
static void op_tileCopy (void) { tileCopy (gStage, 6, 6, gStage, 5, 5); }
|
||||||
|
static void op_tileCopyMasked (void) { tileCopyMasked (gStage, 7, 7, gStage, 5, 5, 0); }
|
||||||
|
static void op_tilePaste (void) { tilePaste (gStage, 8, 8, &gTileScratch); }
|
||||||
|
static void op_tileSnap (void) { tileSnap (gStage, 5, 5, &gTileScratch); }
|
||||||
|
|
||||||
|
static int16_t gSpriteX = 40;
|
||||||
|
static int16_t gSpriteY = 30;
|
||||||
|
|
||||||
|
static void op_spriteSave (void) { spriteSaveUnder (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); }
|
||||||
|
static void op_spriteDraw (void) { spriteDraw (gStage, gSprite, gSpriteX, gSpriteY); }
|
||||||
|
static void op_spriteRestore (void) { spriteRestoreUnder(gStage, &gBackup); }
|
||||||
|
static void op_spriteSaveAndDraw (void) { spriteSaveAndDraw (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); }
|
||||||
|
|
||||||
|
static void op_stagePresent (void) { stagePresent(); }
|
||||||
|
static void op_stagePresentRect8(void) { stagePresentRect( 40, 30, 16, 16); }
|
||||||
|
static void op_stagePresentRectF(void) { stagePresentRect( 0, 0, 320, 200); }
|
||||||
|
|
||||||
|
static void op_inputPoll (void) { joeyInputPoll(); }
|
||||||
|
static void op_keyDown (void) { (void)joeyKeyDown(KEY_A); }
|
||||||
|
static void op_keyPressed (void) { (void)joeyKeyPressed(KEY_A); }
|
||||||
|
static void op_mouseX (void) { (void)joeyMouseX(); }
|
||||||
|
static void op_joyConnected (void) { (void)joeyJoystickConnected(JOYSTICK_1); }
|
||||||
|
|
||||||
|
static void op_audioFrameTick (void) { joeyAudioFrameTick(); }
|
||||||
|
static void op_audioIsPlaying (void) { (void)joeyAudioIsPlayingMod(); }
|
||||||
|
|
||||||
|
static void op_surfaceMarkDirty(void) { /* drawPixel already marks; use fill instead */
|
||||||
|
fillRect(gStage, 0, 0, 32, 32, 0); }
|
||||||
|
|
||||||
|
|
||||||
|
// ----- Build the ball sprite procedurally -----
|
||||||
|
|
||||||
|
#define BALL_TILES_X 2
|
||||||
|
#define BALL_TILES_Y 2
|
||||||
|
#define BALL_TILE_BYTES (BALL_TILES_X * BALL_TILES_Y * 32u)
|
||||||
|
|
||||||
|
static const uint8_t gBallAuthored[16 * 8] = {
|
||||||
|
0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00,
|
||||||
|
0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00,
|
||||||
|
0x02, 0x22, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20,
|
||||||
|
0x02, 0x23, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20,
|
||||||
|
0x22, 0x33, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
||||||
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
||||||
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
||||||
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
||||||
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
||||||
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
||||||
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
||||||
|
0x02, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x20,
|
||||||
|
0x02, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x20,
|
||||||
|
0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00,
|
||||||
|
0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x22, 0x22, 0x00, 0x00, 0x00
|
||||||
|
};
|
||||||
|
static uint8_t gBallTiles[BALL_TILE_BYTES];
|
||||||
|
|
||||||
|
static void buildBallSprite(void) {
|
||||||
|
uint16_t tx;
|
||||||
|
uint16_t ty;
|
||||||
|
uint16_t row;
|
||||||
|
uint16_t b;
|
||||||
|
uint8_t *dst;
|
||||||
|
|
||||||
|
for (ty = 0; ty < BALL_TILES_Y; ty++) {
|
||||||
|
for (tx = 0; tx < BALL_TILES_X; tx++) {
|
||||||
|
dst = &gBallTiles[(ty * BALL_TILES_X + tx) * 32u];
|
||||||
|
for (row = 0; row < 8; row++) {
|
||||||
|
for (b = 0; b < 4; b++) {
|
||||||
|
dst[row * 4 + b] =
|
||||||
|
gBallAuthored[((ty * 8) + row) * 8 + (tx * 4) + b];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ----- Main -----
|
||||||
|
|
||||||
|
static void runAllTests(void) {
|
||||||
|
joeyLogF("UBER: ----- begin -----\n");
|
||||||
|
|
||||||
|
// Surface / palette / SCB.
|
||||||
|
timeOp("surfaceClear", op_surfaceClear);
|
||||||
|
timeOp("paletteSet", op_paletteSet);
|
||||||
|
timeOp("scbSetRange", op_scbSetRange);
|
||||||
|
|
||||||
|
// Drawing primitives.
|
||||||
|
timeOp("drawPixel", op_drawPixel);
|
||||||
|
timeOp("drawLine H", op_drawLineH);
|
||||||
|
timeOp("drawLine V", op_drawLineV);
|
||||||
|
timeOp("drawLine diag", op_drawLineDiag);
|
||||||
|
timeOp("drawRect 100x100", op_drawRect);
|
||||||
|
timeOp("drawCircle r=16", op_drawCircleSmall);
|
||||||
|
timeOp("drawCircle r=80", op_drawCircleLarge);
|
||||||
|
timeOp("fillRect 16x16", op_fillRectSmall);
|
||||||
|
timeOp("fillRect 80x80", op_fillRectMid);
|
||||||
|
timeOp("fillRect 320x200", op_fillRectFull);
|
||||||
|
timeOp("fillCircle r=40", op_fillCircle);
|
||||||
|
timeOp("samplePixel", op_samplePixel);
|
||||||
|
|
||||||
|
// Tiles. Seed scratch tile + dest cells with non-zero pixels first.
|
||||||
|
fillRect(gStage, 0, 0, 320, 64, 7);
|
||||||
|
tileSnap(gStage, 5, 5, &gTileScratch);
|
||||||
|
timeOp("tileFill", op_tileFill);
|
||||||
|
timeOp("tileCopy", op_tileCopy);
|
||||||
|
timeOp("tileCopyMasked", op_tileCopyMasked);
|
||||||
|
timeOp("tilePaste", op_tilePaste);
|
||||||
|
timeOp("tileSnap", op_tileSnap);
|
||||||
|
|
||||||
|
// Sprites. Background must be non-empty so save-under has work
|
||||||
|
// to do (otherwise it's a 4 KB memset of zeros, atypical).
|
||||||
|
surfaceClear(gStage, 4);
|
||||||
|
timeOp("spriteSaveUnder", op_spriteSave);
|
||||||
|
timeOp("spriteDraw", op_spriteDraw);
|
||||||
|
timeOp("spriteRestoreUnder", op_spriteRestore);
|
||||||
|
timeOp("spriteSaveAndDraw", op_spriteSaveAndDraw);
|
||||||
|
|
||||||
|
// Present.
|
||||||
|
timeOp("stagePresent full", op_stagePresent);
|
||||||
|
timeOp("stagePresentRect 8b",op_stagePresentRect8);
|
||||||
|
timeOp("stagePresentRect F", op_stagePresentRectF);
|
||||||
|
|
||||||
|
// Input.
|
||||||
|
timeOp("joeyInputPoll", op_inputPoll);
|
||||||
|
timeOp("joeyKeyDown", op_keyDown);
|
||||||
|
timeOp("joeyKeyPressed", op_keyPressed);
|
||||||
|
timeOp("joeyMouseX", op_mouseX);
|
||||||
|
timeOp("joeyJoyConnected", op_joyConnected);
|
||||||
|
|
||||||
|
// Audio.
|
||||||
|
timeOp("joeyAudioFrameTick", op_audioFrameTick);
|
||||||
|
timeOp("joeyAudioIsPlayingMod", op_audioIsPlaying);
|
||||||
|
|
||||||
|
// Surface mark dirty (via fillRect's mark step).
|
||||||
|
timeOp("surfaceMarkDirtyRect (via fillRect 32x32)", op_surfaceMarkDirty);
|
||||||
|
|
||||||
|
joeyLogF("UBER: ----- end -----\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
JoeyConfigT config;
|
||||||
|
uint16_t pal[16];
|
||||||
|
int i;
|
||||||
|
|
||||||
|
config.hostMode = HOST_MODE_TAKEOVER;
|
||||||
|
config.codegenBytes = 8 * 1024;
|
||||||
|
config.maxSurfaces = 4;
|
||||||
|
config.audioBytes = 64UL * 1024;
|
||||||
|
config.assetBytes = 128UL * 1024;
|
||||||
|
|
||||||
|
if (!joeyInit(&config)) {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
gStage = stageGet();
|
||||||
|
if (gStage == NULL) {
|
||||||
|
joeyShutdown();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// A simple visible palette so users see SOMETHING during the run.
|
||||||
|
for (i = 0; i < 16; i++) {
|
||||||
|
pal[i] = (uint16_t)((i << 8) | (i << 4) | i); // grey ramp
|
||||||
|
}
|
||||||
|
pal[ 0] = 0x000;
|
||||||
|
pal[ 1] = 0x800; // dark red (running)
|
||||||
|
pal[ 2] = 0x080; // green (done)
|
||||||
|
pal[ 3] = 0x008; // blue
|
||||||
|
pal[ 5] = 0xFF0; // yellow (test pixels)
|
||||||
|
pal[ 7] = 0xFFF; // white (fills)
|
||||||
|
pal[15] = 0xF00; // red
|
||||||
|
paletteSet(gStage, 0, pal);
|
||||||
|
scbSetRange(gStage, 0, 199, 0);
|
||||||
|
|
||||||
|
// Indicate "running": red bar at top of screen.
|
||||||
|
surfaceClear(gStage, 0);
|
||||||
|
fillRect(gStage, 0, 0, 320, 8, 1);
|
||||||
|
stagePresent();
|
||||||
|
|
||||||
|
buildBallSprite();
|
||||||
|
gSprite = spriteCreate(gBallTiles, BALL_TILES_X, BALL_TILES_Y, SPRITE_FLAGS_NONE);
|
||||||
|
if (gSprite == NULL) {
|
||||||
|
joeyLog("UBER: spriteCreate failed");
|
||||||
|
joeyShutdown();
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
// spriteCompile is a one-shot. Time at frame resolution.
|
||||||
|
{
|
||||||
|
uint16_t before;
|
||||||
|
|
||||||
|
joeyWaitVBL();
|
||||||
|
before = joeyFrameCount();
|
||||||
|
if (!spriteCompile(gSprite)) {
|
||||||
|
joeyLog("UBER: spriteCompile failed");
|
||||||
|
}
|
||||||
|
while (joeyFrameCount() == before) {
|
||||||
|
/* wait for next VBL edge */
|
||||||
|
}
|
||||||
|
joeyLogF("UBER: spriteCompile: 1 call in <= 1 frame\n");
|
||||||
|
}
|
||||||
|
gBackup.bytes = gBackupBytes;
|
||||||
|
|
||||||
|
// Audio: only init/shutdown is exercised. Triggering joeyAudioPlaySfx
|
||||||
|
// without first calling joeyAudioPlayMod leaves NTP's engine in a
|
||||||
|
// half-initialized state -- NTPstreamsound is designed to OVERLAY on
|
||||||
|
// an already-running module. Without NTPprepare/NTPplay first, the
|
||||||
|
// streamer oscillator is fired but no music tick ever advances or
|
||||||
|
// silences it, and you get a stuck high-pitched scream. UBER doesn't
|
||||||
|
// ship a MOD asset, so we skip the SFX exercise. The frame-tick and
|
||||||
|
// isPlayingMod calls below still get timed (both are no-op fast
|
||||||
|
// paths on IIgs).
|
||||||
|
if (joeyAudioInit()) {
|
||||||
|
joeyLogF("UBER: audioInit OK\n");
|
||||||
|
} else {
|
||||||
|
joeyLogF("UBER: audioInit failed (skipping audio)\n");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset stage + run all per-frame timed tests.
|
||||||
|
surfaceClear(gStage, 0);
|
||||||
|
fillRect(gStage, 0, 0, 320, 8, 1);
|
||||||
|
stagePresent();
|
||||||
|
|
||||||
|
runAllTests();
|
||||||
|
|
||||||
|
// Done. Green screen + waitForKey.
|
||||||
|
surfaceClear(gStage, 2);
|
||||||
|
stagePresent();
|
||||||
|
|
||||||
|
joeyLogF("UBER: press any key to exit\n");
|
||||||
|
joeyWaitForAnyKey();
|
||||||
|
|
||||||
|
spriteDestroy(gSprite);
|
||||||
|
joeyShutdown();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
@ -37,4 +37,17 @@ const char *joeyVersionString(void);
|
||||||
// always a hardware-level wait, not a software timer.
|
// always a hardware-level wait, not a software timer.
|
||||||
void joeyWaitVBL(void);
|
void joeyWaitVBL(void);
|
||||||
|
|
||||||
|
// Monotonic 16-bit frame counter. Polled by callers; ports detect
|
||||||
|
// the rising edge inside this call (IIgs $C019, DOS $3DA, Amiga
|
||||||
|
// VPOSR) or expose a counter maintained by a VBL ISR (Atari ST).
|
||||||
|
// Caller must poll faster than 2 * joeyFrameHz() so no edge is
|
||||||
|
// missed. Used by benchmarks and frame-rate-independent animation.
|
||||||
|
uint16_t joeyFrameCount(void);
|
||||||
|
|
||||||
|
// Nominal display frame rate in Hz: 50 (Amiga PAL), 60 (IIgs / ST
|
||||||
|
// NTSC default), 70 (VGA mode 13h). The actual VBL cadence may
|
||||||
|
// drift slightly; the value reported here is what benchmarks divide
|
||||||
|
// by to convert iters-per-N-frames to ops/sec.
|
||||||
|
uint16_t joeyFrameHz(void);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -103,6 +103,20 @@ void spriteSaveUnder(const SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, Sprit
|
||||||
// by other writes that overlapped its captured region.
|
// by other writes that overlapped its captured region.
|
||||||
void spriteRestoreUnder(SurfaceT *s, const SpriteBackupT *backup);
|
void spriteRestoreUnder(SurfaceT *s, const SpriteBackupT *backup);
|
||||||
|
|
||||||
|
// Combined save-then-draw entry point. The common animation pattern
|
||||||
|
// captures the destination bytes about to be overwritten, then draws
|
||||||
|
// the sprite. Both ops share validation, the destination ptr is
|
||||||
|
// computed once, and a single dirty-rect mark covers both. Saves
|
||||||
|
// roughly one full dispatcher chain (~150 cyc on IIgs ORCA-C) per
|
||||||
|
// frame versus calling spriteSaveUnder + spriteDraw separately.
|
||||||
|
//
|
||||||
|
// Identical semantics to:
|
||||||
|
// spriteSaveUnder(s, sp, x, y, backup);
|
||||||
|
// spriteDraw(s, sp, x, y);
|
||||||
|
// modulo: the dirty rect is marked once for the union (which here is
|
||||||
|
// just the draw rect, since save doesn't write).
|
||||||
|
void spriteSaveAndDraw(SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup);
|
||||||
|
|
||||||
// Snapshot an 8x8-aligned region of a SurfaceT into a new SpriteT.
|
// Snapshot an 8x8-aligned region of a SurfaceT into a new SpriteT.
|
||||||
// The captured pixel data is copied into a sprite-owned buffer so
|
// The captured pixel data is copied into a sprite-owned buffer so
|
||||||
// the source surface can be modified afterwards. Width and height
|
// the source surface can be modified afterwards. Width and height
|
||||||
|
|
|
||||||
|
|
@ -70,6 +70,8 @@ SPRITE_SRC := $(EXAMPLES)/sprite/sprite.c
|
||||||
SPRITE_BIN := $(BINDIR)/Sprite
|
SPRITE_BIN := $(BINDIR)/Sprite
|
||||||
AUDIO_SRC := $(EXAMPLES)/audio/audio.c
|
AUDIO_SRC := $(EXAMPLES)/audio/audio.c
|
||||||
AUDIO_BIN := $(BINDIR)/Audio
|
AUDIO_BIN := $(BINDIR)/Audio
|
||||||
|
UBER_SRC := $(EXAMPLES)/uber/uber.c
|
||||||
|
UBER_BIN := $(BINDIR)/Uber
|
||||||
|
|
||||||
# Game data lives under bin/DATA/, ready to be copied into the
|
# Game data lives under bin/DATA/, ready to be copied into the
|
||||||
# scratch JOEYLIB hard-drive dir staged by scripts/run-amiga.sh.
|
# scratch JOEYLIB hard-drive dir staged by scripts/run-amiga.sh.
|
||||||
|
|
@ -78,7 +80,7 @@ DATA_DIR := $(BINDIR)/DATA
|
||||||
DATA_FILES := $(DATA_DIR)/test.mod $(DATA_DIR)/test.sfx
|
DATA_FILES := $(DATA_DIR)/test.mod $(DATA_DIR)/test.sfx
|
||||||
|
|
||||||
.PHONY: all amiga clean-amiga
|
.PHONY: all amiga clean-amiga
|
||||||
all amiga: $(LIB) $(HELLO_BIN) $(PATTERN_BIN) $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(DATA_FILES)
|
all amiga: $(LIB) $(HELLO_BIN) $(PATTERN_BIN) $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(UBER_BIN) $(DATA_FILES)
|
||||||
|
|
||||||
$(BUILD)/obj/core/%.o: $(SRC_CORE)/%.c
|
$(BUILD)/obj/core/%.o: $(SRC_CORE)/%.c
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@)
|
||||||
|
|
@ -140,6 +142,10 @@ $(AUDIO_BIN): $(AUDIO_SRC) $(LIB)
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@)
|
||||||
$(AMIGA_CC) $(CFLAGS) $< $(LIB) -o $@ $(LDFLAGS)
|
$(AMIGA_CC) $(CFLAGS) $< $(LIB) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
$(UBER_BIN): $(UBER_SRC) $(LIB)
|
||||||
|
@mkdir -p $(dir $@)
|
||||||
|
$(AMIGA_CC) $(CFLAGS) $< $(LIB) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
$(DATA_DIR)/test.mod: $(REPO_DIR)/assets/test.mod
|
$(DATA_DIR)/test.mod: $(REPO_DIR)/assets/test.mod
|
||||||
@mkdir -p $(DATA_DIR)
|
@mkdir -p $(DATA_DIR)
|
||||||
cp $< $@
|
cp $< $@
|
||||||
|
|
|
||||||
|
|
@ -55,6 +55,8 @@ SPRITE_SRC := $(EXAMPLES)/sprite/sprite.c
|
||||||
SPRITE_BIN := $(BINDIR)/SPRITE.PRG
|
SPRITE_BIN := $(BINDIR)/SPRITE.PRG
|
||||||
AUDIO_SRC := $(EXAMPLES)/audio/audio.c
|
AUDIO_SRC := $(EXAMPLES)/audio/audio.c
|
||||||
AUDIO_BIN := $(BINDIR)/AUDIO.PRG
|
AUDIO_BIN := $(BINDIR)/AUDIO.PRG
|
||||||
|
UBER_SRC := $(EXAMPLES)/uber/uber.c
|
||||||
|
UBER_BIN := $(BINDIR)/UBER.PRG
|
||||||
|
|
||||||
# Game data lives under bin/DATA/, alongside the binaries Hatari picks
|
# Game data lives under bin/DATA/, alongside the binaries Hatari picks
|
||||||
# up when bin/ is mounted as the GEMDOS C: drive. audio.c fopens
|
# up when bin/ is mounted as the GEMDOS C: drive. audio.c fopens
|
||||||
|
|
@ -63,7 +65,7 @@ DATA_DIR := $(BINDIR)/DATA
|
||||||
DATA_FILES := $(DATA_DIR)/test.mod $(DATA_DIR)/test.sfx
|
DATA_FILES := $(DATA_DIR)/test.mod $(DATA_DIR)/test.sfx
|
||||||
|
|
||||||
.PHONY: all atarist clean-atarist
|
.PHONY: all atarist clean-atarist
|
||||||
all atarist: $(LIB) $(LIBXMP_AR) $(HELLO_BIN) $(PATTERN_BIN) $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(DATA_FILES)
|
all atarist: $(LIB) $(LIBXMP_AR) $(HELLO_BIN) $(PATTERN_BIN) $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(UBER_BIN) $(DATA_FILES)
|
||||||
|
|
||||||
$(BUILD)/obj/core/%.o: $(SRC_CORE)/%.c
|
$(BUILD)/obj/core/%.o: $(SRC_CORE)/%.c
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@)
|
||||||
|
|
@ -132,6 +134,10 @@ $(AUDIO_BIN): $(AUDIO_SRC) $(LIB)
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@)
|
||||||
$(ST_CC) $(CFLAGS) $< $(LIB) $(LIBXMP_AR) -o $@ $(LDFLAGS)
|
$(ST_CC) $(CFLAGS) $< $(LIB) $(LIBXMP_AR) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
|
$(UBER_BIN): $(UBER_SRC) $(LIB)
|
||||||
|
@mkdir -p $(dir $@)
|
||||||
|
$(ST_CC) $(CFLAGS) $< $(LIB) $(LIBXMP_AR) -o $@ $(LDFLAGS)
|
||||||
|
|
||||||
$(DATA_DIR)/test.mod: $(REPO_DIR)/assets/test.mod
|
$(DATA_DIR)/test.mod: $(REPO_DIR)/assets/test.mod
|
||||||
@mkdir -p $(DATA_DIR)
|
@mkdir -p $(DATA_DIR)
|
||||||
cp $< $@
|
cp $< $@
|
||||||
|
|
|
||||||
|
|
@ -49,6 +49,8 @@ SPRITE_SRC := $(EXAMPLES)/sprite/sprite.c
|
||||||
SPRITE_BIN := $(BINDIR)/SPRITE.EXE
|
SPRITE_BIN := $(BINDIR)/SPRITE.EXE
|
||||||
AUDIO_SRC := $(EXAMPLES)/audio/audio.c
|
AUDIO_SRC := $(EXAMPLES)/audio/audio.c
|
||||||
AUDIO_BIN := $(BINDIR)/AUDIO.EXE
|
AUDIO_BIN := $(BINDIR)/AUDIO.EXE
|
||||||
|
UBER_SRC := $(EXAMPLES)/uber/uber.c
|
||||||
|
UBER_BIN := $(BINDIR)/UBER.EXE
|
||||||
|
|
||||||
# Game data lives under bin/DATA/, alongside the binaries DOSBox picks
|
# Game data lives under bin/DATA/, alongside the binaries DOSBox picks
|
||||||
# up when bin/ is mounted as C:. audio.c fopens "DATA/test.mod" etc.
|
# up when bin/ is mounted as C:. audio.c fopens "DATA/test.mod" etc.
|
||||||
|
|
@ -56,7 +58,7 @@ DATA_DIR := $(BINDIR)/DATA
|
||||||
DATA_FILES := $(DATA_DIR)/test.mod $(DATA_DIR)/test.sfx
|
DATA_FILES := $(DATA_DIR)/test.mod $(DATA_DIR)/test.sfx
|
||||||
|
|
||||||
.PHONY: all dos clean-dos
|
.PHONY: all dos clean-dos
|
||||||
all dos: $(LIB) $(LIBXMP_AR) $(HELLO_BIN) $(PATTERN_BIN) $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(DATA_FILES)
|
all dos: $(LIB) $(LIBXMP_AR) $(HELLO_BIN) $(PATTERN_BIN) $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(UBER_BIN) $(DATA_FILES)
|
||||||
|
|
||||||
$(BUILD)/obj/core/%.o: $(SRC_CORE)/%.c
|
$(BUILD)/obj/core/%.o: $(SRC_CORE)/%.c
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@)
|
||||||
|
|
@ -121,6 +123,11 @@ $(AUDIO_BIN): $(AUDIO_SRC) $(LIB)
|
||||||
$(DOS_CC) $(CFLAGS) $< $(LIB) $(LIBXMP_AR) -o $@
|
$(DOS_CC) $(CFLAGS) $< $(LIB) $(LIBXMP_AR) -o $@
|
||||||
$(DOS_EMBED_DPMI) $@
|
$(DOS_EMBED_DPMI) $@
|
||||||
|
|
||||||
|
$(UBER_BIN): $(UBER_SRC) $(LIB)
|
||||||
|
@mkdir -p $(dir $@)
|
||||||
|
$(DOS_CC) $(CFLAGS) $< $(LIB) $(LIBXMP_AR) -o $@
|
||||||
|
$(DOS_EMBED_DPMI) $@
|
||||||
|
|
||||||
$(DATA_DIR)/test.mod: $(REPO_DIR)/assets/test.mod
|
$(DATA_DIR)/test.mod: $(REPO_DIR)/assets/test.mod
|
||||||
@mkdir -p $(DATA_DIR)
|
@mkdir -p $(DATA_DIR)
|
||||||
cp $< $@
|
cp $< $@
|
||||||
|
|
|
||||||
53
make/iigs.mk
53
make/iigs.mk
|
|
@ -49,23 +49,13 @@ NTP_BIN := $(BUILD)/audio/ntpplayer.bin
|
||||||
NTP_ASM := $(BUILD)/audio/ntpdata.asm
|
NTP_ASM := $(BUILD)/audio/ntpdata.asm
|
||||||
IIGS_MERLIN := $(REPO_DIR)/toolchains/iigs/merlin32/bin/merlin32
|
IIGS_MERLIN := $(REPO_DIR)/toolchains/iigs/merlin32/bin/merlin32
|
||||||
|
|
||||||
# IMPORTANT: CODEGEN_SRCS (specifically spriteEmitIigs.c) MUST be the
|
LIB_SRCS := $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS) $(PORT_ASM_SRCS_ALL) $(NTP_ASM) $(CODEGEN_SRCS)
|
||||||
# first entry after the main object in the link order. ORCA-Linker's
|
|
||||||
# bank assignment is order-sensitive: when spriteEmitIigs.c lands at
|
|
||||||
# any later position, the linker assigns SPRITECG to a bank where its
|
|
||||||
# intra-OMF-segment static-symbol relocations (emitMvnCopyRoutine,
|
|
||||||
# shiftedByteAt, writeLE16) can't be encoded -- you get cryptic
|
|
||||||
# "Addressing error" / "Unresolved reference Label: ..." failures
|
|
||||||
# whose root cause is bank packing, not source. Putting CODEGEN_SRCS
|
|
||||||
# first gives SPRITECG prime placement and the relocations resolve.
|
|
||||||
# This was the underlying cause of feedback_orca_link_segment_count
|
|
||||||
# cases 2-5 (we'd been working around it by managing _ROOT mass).
|
|
||||||
LIB_SRCS := $(CODEGEN_SRCS) $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS) $(PORT_ASM_SRCS_ALL) $(NTP_ASM)
|
|
||||||
|
|
||||||
HELLO_SRC := $(EXAMPLES)/hello/hello.c
|
# HELLO and PATTERN are intentionally omitted from this list. The UBER
|
||||||
HELLO_BIN := $(BINDIR)/HELLO
|
# demo (below) exercises every public API, including what those two
|
||||||
PATTERN_SRC := $(EXAMPLES)/pattern/pattern.c
|
# small examples covered, and the IIgs disk image was running out of
|
||||||
PATTERN_BIN := $(BINDIR)/PATTERN
|
# room. Source for HELLO/PATTERN is still in examples/{hello,pattern}/
|
||||||
|
# for reference and for other ports that want them.
|
||||||
DRAW_SRC := $(EXAMPLES)/draw/draw.c
|
DRAW_SRC := $(EXAMPLES)/draw/draw.c
|
||||||
DRAW_BIN := $(BINDIR)/DRAW
|
DRAW_BIN := $(BINDIR)/DRAW
|
||||||
KEYS_SRC := $(EXAMPLES)/keys/keys.c
|
KEYS_SRC := $(EXAMPLES)/keys/keys.c
|
||||||
|
|
@ -74,6 +64,8 @@ JOY_SRC := $(EXAMPLES)/joy/joy.c
|
||||||
JOY_BIN := $(BINDIR)/JOY
|
JOY_BIN := $(BINDIR)/JOY
|
||||||
SPRITE_SRC := $(EXAMPLES)/sprite/sprite.c
|
SPRITE_SRC := $(EXAMPLES)/sprite/sprite.c
|
||||||
SPRITE_BIN := $(BINDIR)/SPRITE
|
SPRITE_BIN := $(BINDIR)/SPRITE
|
||||||
|
UBER_SRC := $(EXAMPLES)/uber/uber.c
|
||||||
|
UBER_BIN := $(BINDIR)/UBER
|
||||||
AUDIO_SRC := $(EXAMPLES)/audio/audio.c
|
AUDIO_SRC := $(EXAMPLES)/audio/audio.c
|
||||||
AUDIO_BIN := $(BINDIR)/AUDIO
|
AUDIO_BIN := $(BINDIR)/AUDIO
|
||||||
AUDIO_MOD := $(REPO_DIR)/assets/test.mod
|
AUDIO_MOD := $(REPO_DIR)/assets/test.mod
|
||||||
|
|
@ -128,16 +120,6 @@ $(NTP_ASM): $(NTP_BIN) $(REPO_DIR)/toolchains/iigs/bin-to-asm.sh
|
||||||
# everywhere, so library asm can take SurfaceT* args via one
|
# everywhere, so library asm can take SurfaceT* args via one
|
||||||
# consistent ABI (small-mm 16-bit pointers truncated bank bytes,
|
# consistent ABI (small-mm 16-bit pointers truncated bank bytes,
|
||||||
# which broke any asm that wanted to address bank-1 stage memory).
|
# which broke any asm that wanted to address bank-1 stage memory).
|
||||||
$(HELLO_BIN): $(HELLO_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
|
||||||
@mkdir -p $(dir $@)
|
|
||||||
$(IIGS_BUILD) -b $(IIX_INCLUDES) -o $@ $(HELLO_SRC) $(LIB_SRCS)
|
|
||||||
$(IIGS_IIX) chtyp -t S16 $@
|
|
||||||
|
|
||||||
$(PATTERN_BIN): $(PATTERN_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
|
||||||
@mkdir -p $(dir $@)
|
|
||||||
$(IIGS_BUILD) -b $(IIX_INCLUDES) -o $@ $(PATTERN_SRC) $(LIB_SRCS)
|
|
||||||
$(IIGS_IIX) chtyp -t S16 $@
|
|
||||||
|
|
||||||
$(DRAW_BIN): $(DRAW_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
$(DRAW_BIN): $(DRAW_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@)
|
||||||
$(IIGS_BUILD) -b $(IIX_INCLUDES) -o $@ $(DRAW_SRC) $(LIB_SRCS)
|
$(IIGS_BUILD) -b $(IIX_INCLUDES) -o $@ $(DRAW_SRC) $(LIB_SRCS)
|
||||||
|
|
@ -158,6 +140,17 @@ $(SPRITE_BIN): $(SPRITE_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
||||||
$(IIGS_BUILD) -b $(IIX_INCLUDES) -o $@ $(SPRITE_SRC) $(LIB_SRCS)
|
$(IIGS_BUILD) -b $(IIX_INCLUDES) -o $@ $(SPRITE_SRC) $(LIB_SRCS)
|
||||||
$(IIGS_IIX) chtyp -t S16 $@
|
$(IIGS_IIX) chtyp -t S16 $@
|
||||||
|
|
||||||
|
# UBER bumps user stack to 16 KB. ORCA-C's default user stack is small
|
||||||
|
# (~1 KB) and vfprintf's parsing buffer + the demo's own stack-local
|
||||||
|
# format buffers were spilling past it -- the symptom was a crash to
|
||||||
|
# monitor on the second varargs-style joeyLogF call. The hand-rolled
|
||||||
|
# decimal formatter in uber.c also uses larger stack-local buffers
|
||||||
|
# (line[96], num[16]) than typical demos. 16 KB is plenty of headroom.
|
||||||
|
$(UBER_BIN): $(UBER_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
||||||
|
@mkdir -p $(dir $@)
|
||||||
|
$(IIGS_BUILD) -b -s 16384 $(IIX_INCLUDES) -o $@ $(UBER_SRC) $(LIB_SRCS)
|
||||||
|
$(IIGS_IIX) chtyp -t S16 $@
|
||||||
|
|
||||||
# Convert the cross-platform .MOD asset to NinjaTrackerPlus runtime
|
# Convert the cross-platform .MOD asset to NinjaTrackerPlus runtime
|
||||||
# format via joeymod (which shells out to ntpconverter.php). Without
|
# format via joeymod (which shells out to ntpconverter.php). Without
|
||||||
# php-cli the conversion is skipped; in that case the IIgs disk just
|
# php-cli the conversion is skipped; in that case the IIgs disk just
|
||||||
|
|
@ -181,13 +174,13 @@ $(AUDIO_BIN): $(AUDIO_SRC) $(LIB_SRCS) $(NTP_ASM) $(IIGS_BUILD)
|
||||||
$(IIGS_BUILD) -b $(IIX_INCLUDES) -I $(EXAMPLES)/audio -o $@ $(AUDIO_SRC) $(LIB_SRCS)
|
$(IIGS_BUILD) -b $(IIX_INCLUDES) -I $(EXAMPLES)/audio -o $@ $(AUDIO_SRC) $(LIB_SRCS)
|
||||||
$(IIGS_IIX) chtyp -t S16 $@
|
$(IIGS_IIX) chtyp -t S16 $@
|
||||||
|
|
||||||
# Assemble an 800KB ProDOS 2img containing the examples, ready to
|
# Assemble a ProDOS 2img containing the examples, ready to mount in
|
||||||
# mount in GSplus alongside a GS/OS boot volume.
|
# GSplus alongside a GS/OS boot volume.
|
||||||
iigs-disk: $(DISK_IMG)
|
iigs-disk: $(DISK_IMG)
|
||||||
|
|
||||||
$(DISK_IMG): $(HELLO_BIN) $(PATTERN_BIN) $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(AUDIO_DATA_FILES) $(IIGS_PACKAGE)
|
$(DISK_IMG): $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(UBER_BIN) $(AUDIO_DATA_FILES) $(IIGS_PACKAGE)
|
||||||
@mkdir -p $(dir $@)
|
@mkdir -p $(dir $@)
|
||||||
$(IIGS_PACKAGE) $@ $(HELLO_BIN) $(PATTERN_BIN) $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) -- $(AUDIO_DATA_FILES)
|
$(IIGS_PACKAGE) $@ $(DRAW_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(UBER_BIN) -- $(AUDIO_DATA_FILES)
|
||||||
|
|
||||||
clean-iigs:
|
clean-iigs:
|
||||||
rm -rf $(BUILD)
|
rm -rf $(BUILD)
|
||||||
|
|
|
||||||
|
|
@ -1,29 +1,24 @@
|
||||||
#!/usr/bin/env bash
|
#!/usr/bin/env bash
|
||||||
# Launch the built Apple IIgs examples in GSplus. GSplus is booted from
|
# Launch the built Apple IIgs examples in GSplus. GSplus is booted from
|
||||||
# a GS/OS 6.0.4 System disk (toolchains/emulators/support/gsos-system.po)
|
# a GS/OS 6.0.4 System disk (toolchains/emulators/support/gsos-system.po)
|
||||||
# with joey.2mg mounted as the data disk on slot 5 drive 2. The user
|
# with joey.2mg mounted as the data disk on slot 5 drive 2. GS/OS drops
|
||||||
# navigates to the JOEYLIB volume in Finder and double-clicks the
|
# to Finder; the user navigates to the JOEYLIB volume and double-clicks
|
||||||
# example to run it.
|
# whichever example they want to run.
|
||||||
#
|
#
|
||||||
# Unlike the other emulators, GS/OS does not auto-run on boot -- it
|
# No argument: GSplus has no way to dispatch a specific binary on boot
|
||||||
# drops to Finder. The argument just prints a reminder of which
|
# (Finder is interactive), so this script just stages the disk and
|
||||||
# example to launch.
|
# launches the emulator. The post-run trap below extracts joeylog.txt
|
||||||
#
|
# from the data disk so demos that left a breadcrumb file are visible
|
||||||
# scripts/run-iigs.sh # boots (Pattern hint)
|
# from the host shell after the emulator exits.
|
||||||
# scripts/run-iigs.sh hello # boots, hints HELLO
|
|
||||||
# scripts/run-iigs.sh draw # boots, hints DRAW
|
|
||||||
#
|
|
||||||
# Argument is any built example name (case-insensitive); upper-case
|
|
||||||
# it for the Finder hint and existence-check.
|
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
|
|
||||||
if [[ $# -gt 1 ]]; then
|
if [[ $# -ne 0 ]]; then
|
||||||
echo "usage: $0 [example-name]" >&2
|
echo "usage: $0" >&2
|
||||||
|
echo " (no arguments -- launch GSplus, pick the demo in Finder)" >&2
|
||||||
exit 2
|
exit 2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
prog=${1:-pattern}
|
|
||||||
repo=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
|
repo=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)
|
||||||
|
|
||||||
# profuse looks up its FST helpers under $GOLDEN_GATE / $ORCA_ROOT and
|
# profuse looks up its FST helpers under $GOLDEN_GATE / $ORCA_ROOT and
|
||||||
|
|
@ -38,18 +33,6 @@ sys_disk=$repo/toolchains/emulators/support/gsos-system.po
|
||||||
data_disk=$repo/build/iigs/bin/joey.2mg
|
data_disk=$repo/build/iigs/bin/joey.2mg
|
||||||
null_c600=$repo/toolchains/emulators/support/iigs-null-c600.rom
|
null_c600=$repo/toolchains/emulators/support/iigs-null-c600.rom
|
||||||
|
|
||||||
target=${prog^^}
|
|
||||||
bin_dir=$repo/build/iigs/bin
|
|
||||||
if [[ ! -f "$bin_dir/$target" ]]; then
|
|
||||||
echo "$bin_dir/$target not built. Run 'make iigs' first." >&2
|
|
||||||
if compgen -G "$bin_dir/*" > /dev/null; then
|
|
||||||
echo "available examples in $bin_dir:" >&2
|
|
||||||
find "$bin_dir" -maxdepth 1 -type f -printf '%f\n' \
|
|
||||||
| grep -vE '\.2mg$|\.txt$' >&2 || true
|
|
||||||
fi
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
for f in "$gsplus" "$rom" "$sys_disk" "$data_disk" "$null_c600"; do
|
for f in "$gsplus" "$rom" "$sys_disk" "$data_disk" "$null_c600"; do
|
||||||
if [[ ! -f $f ]]; then
|
if [[ ! -f $f ]]; then
|
||||||
echo "missing: $f" >&2
|
echo "missing: $f" >&2
|
||||||
|
|
@ -123,7 +106,7 @@ cat <<EOF
|
||||||
GSplus launching GS/OS 6.0.4.
|
GSplus launching GS/OS 6.0.4.
|
||||||
Once Finder is up:
|
Once Finder is up:
|
||||||
1. Open the JOEYLIB disk on the desktop.
|
1. Open the JOEYLIB disk on the desktop.
|
||||||
2. Double-click $target to run.
|
2. Double-click whichever demo you want to run.
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# GSplus auto-creates config.kegs in its cwd on first run. cd into
|
# GSplus auto-creates config.kegs in its cwd on first run. cd into
|
||||||
|
|
|
||||||
|
|
@ -209,13 +209,9 @@ void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y)
|
||||||
|
|
||||||
{
|
{
|
||||||
uint8_t *destPtr;
|
uint8_t *destPtr;
|
||||||
uint8_t destBytes[4];
|
|
||||||
shift = (uint8_t)(x & 1);
|
shift = (uint8_t)(x & 1);
|
||||||
destPtr = &dst->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)x >> 1)];
|
destPtr = &dst->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)x >> 1)];
|
||||||
memcpy(destBytes, &destPtr, 4);
|
destAddr = (uint32_t)destPtr;
|
||||||
destAddr = (uint32_t)destBytes[0]
|
|
||||||
| ((uint32_t)destBytes[1] << 8)
|
|
||||||
| ((uint32_t)destBytes[2] << 16);
|
|
||||||
destOffset = (uint16_t)(destAddr & 0xFFFFu);
|
destOffset = (uint16_t)(destAddr & 0xFFFFu);
|
||||||
destBank = (uint8_t)((destAddr >> 16) & 0xFFu);
|
destBank = (uint8_t)((destAddr >> 16) & 0xFFu);
|
||||||
fnAddr = codegenArenaBaseAddr()
|
fnAddr = codegenArenaBaseAddr()
|
||||||
|
|
@ -248,9 +244,10 @@ void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y)
|
||||||
|
|
||||||
// fnAddr changes only on shift parity flips or sprite swaps.
|
// fnAddr changes only on shift parity flips or sprite swaps.
|
||||||
if (fnAddr != gDrawStubLastFnAddr) {
|
if (fnAddr != gDrawStubLastFnAddr) {
|
||||||
gSpriteCallStub[ 9] = (unsigned char)(fnAddr & 0xFFu);
|
const uint8_t *fnB_ = (const uint8_t *)&fnAddr;
|
||||||
gSpriteCallStub[10] = (unsigned char)((fnAddr >> 8) & 0xFFu);
|
gSpriteCallStub[ 9] = fnB_[0];
|
||||||
gSpriteCallStub[11] = (unsigned char)((fnAddr >> 16) & 0xFFu);
|
gSpriteCallStub[10] = fnB_[1];
|
||||||
|
gSpriteCallStub[11] = fnB_[2];
|
||||||
gDrawStubLastFnAddr = fnAddr;
|
gDrawStubLastFnAddr = fnAddr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -329,20 +326,41 @@ static void patchMvnBanks(uint8_t *routine, uint16_t heightPx, uint8_t dstBank,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Common helper: dump a 24-bit pointer's raw bytes via memcpy
|
|
||||||
// (avoiding ORCA-C's lossy (uint32_t) pointer cast under memorymodel
|
|
||||||
// 1) and split into low 16 bits + bank.
|
|
||||||
static void splitPointer(const void *ptr, uint16_t *outLo, uint8_t *outBank) {
|
|
||||||
uint8_t bytes[4];
|
|
||||||
uint32_t addr;
|
|
||||||
|
|
||||||
memcpy(bytes, &ptr, 4);
|
// Split a 24-bit pointer into its low 16 bits + bank byte. The
|
||||||
addr = (uint32_t)bytes[0]
|
// (uint32_t) cast works correctly in ORCA/C 2.2.1 (the 2.1.0 lossy-
|
||||||
| ((uint32_t)bytes[1] << 8)
|
// bank-byte bug is fixed). To avoid invoking the ~LSHR4 32-bit-shift
|
||||||
| ((uint32_t)bytes[2] << 16);
|
// helper for the `>> 16` to extract the bank byte, we cast to
|
||||||
*outLo = (uint16_t)(addr & 0xFFFFu);
|
// uint32_t and then byte-alias the storage -- gets the same bytes
|
||||||
*outBank = (uint8_t)((addr >> 16) & 0xFFu);
|
// with simple loads.
|
||||||
}
|
#define SPLIT_POINTER(_ptr, _outLo, _outBank) \
|
||||||
|
do { \
|
||||||
|
uint32_t spAddr_ = (uint32_t)(_ptr); \
|
||||||
|
const uint8_t *spB_ = (const uint8_t *)&spAddr_; \
|
||||||
|
*(_outLo) = (uint16_t)(spB_[0] | ((uint16_t)spB_[1] << 8)); \
|
||||||
|
*(_outBank) = spB_[2]; \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
// Backup-buffer pointer split cache. backup->bytes is a user-supplied
|
||||||
|
// buffer (e.g. a static array) and effectively never changes after
|
||||||
|
// the first call -- caching its split saves both Save and Restore the
|
||||||
|
// macro expansion per frame.
|
||||||
|
static const void *gLastBackupBytes = (const void *)0;
|
||||||
|
static uint16_t gLastBackupBytesLo = 0;
|
||||||
|
static uint8_t gLastBackupBytesBank = 0;
|
||||||
|
|
||||||
|
#define SPLIT_BACKUP_CACHED(_bytes, _outLo, _outBank) \
|
||||||
|
do { \
|
||||||
|
if ((const void *)(_bytes) == gLastBackupBytes) { \
|
||||||
|
*(_outLo) = gLastBackupBytesLo; \
|
||||||
|
*(_outBank) = gLastBackupBytesBank; \
|
||||||
|
} else { \
|
||||||
|
SPLIT_POINTER((_bytes), (_outLo), (_outBank)); \
|
||||||
|
gLastBackupBytes = (const void *)(_bytes); \
|
||||||
|
gLastBackupBytesLo = *(_outLo); \
|
||||||
|
gLastBackupBytesBank = *(_outBank); \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
|
||||||
void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup) {
|
void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup) {
|
||||||
|
|
@ -358,6 +376,10 @@ void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_
|
||||||
uint32_t fnAddr;
|
uint32_t fnAddr;
|
||||||
uint8_t *routine;
|
uint8_t *routine;
|
||||||
uint8_t *screenPtr;
|
uint8_t *screenPtr;
|
||||||
|
uint16_t cacheIdx; /* shift * SPRITE_OP_COUNT + SPRITE_OP_SAVE, computed once */
|
||||||
|
uint8_t *cachedDst; /* &sp->cachedDstBank[0][0] + cacheIdx */
|
||||||
|
uint8_t *cachedSrc; /* &sp->cachedSrcBank[0][0] + cacheIdx */
|
||||||
|
uint16_t routineOffset; /* sp->routineOffsets[shift][SPRITE_OP_SAVE], computed once */
|
||||||
|
|
||||||
shift = (uint8_t)(x & 1);
|
shift = (uint8_t)(x & 1);
|
||||||
clippedX = (int16_t)(x & ~1);
|
clippedX = (int16_t)(x & ~1);
|
||||||
|
|
@ -366,19 +388,39 @@ void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_
|
||||||
copyBytes = (uint16_t)((widthPx >> 1) + (shift == 1 ? 1 : 0));
|
copyBytes = (uint16_t)((widthPx >> 1) + (shift == 1 ? 1 : 0));
|
||||||
|
|
||||||
screenPtr = (uint8_t *)&src->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)clippedX >> 1)];
|
screenPtr = (uint8_t *)&src->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)clippedX >> 1)];
|
||||||
splitPointer(screenPtr, &screenLo, &screenBank);
|
SPLIT_POINTER(screenPtr, &screenLo, &screenBank);
|
||||||
splitPointer(backup->bytes, &backupLo, &backupBank);
|
SPLIT_BACKUP_CACHED(backup->bytes, &backupLo, &backupBank);
|
||||||
|
|
||||||
backup->sprite = sp;
|
backup->sprite = sp;
|
||||||
backup->x = clippedX;
|
backup->x = clippedX;
|
||||||
backup->y = y;
|
backup->y = y;
|
||||||
backup->width = (uint16_t)(copyBytes << 1);
|
backup->width = (uint16_t)(copyBytes << 1);
|
||||||
backup->height = heightPx;
|
backup->height = heightPx;
|
||||||
backup->sizeBytes = (uint16_t)(copyBytes * heightPx);
|
/* sizeBytes is constant per (sprite, shift); cache to dodge the
|
||||||
|
* per-call ~CUMUL2 (uint16_t * uint16_t) helper. The byte-pointer
|
||||||
|
* arithmetic avoids reintroducing ~MUL4 for the uint16_t array
|
||||||
|
* indexing. */
|
||||||
|
{
|
||||||
|
uint16_t *sizeCachePtr = (uint16_t *)((uint8_t *)sp->cachedSizeBytes + ((uint16_t)shift << 1));
|
||||||
|
if (*sizeCachePtr == 0) {
|
||||||
|
*sizeCachePtr = (uint16_t)(copyBytes * heightPx);
|
||||||
|
}
|
||||||
|
backup->sizeBytes = *sizeCachePtr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Compute the 1D index into the cached* / routineOffsets 2D arrays
|
||||||
|
* once. ORCA-C 2.2.1 lowers `shift * SPRITE_OP_COUNT` (where
|
||||||
|
* SPRITE_OP_COUNT==3) to a ~MUL4 helper call; (shift<<1)+shift
|
||||||
|
* compiles to two ASLs and an ADC, no helper. */
|
||||||
|
cacheIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_SAVE);
|
||||||
|
cachedDst = (uint8_t *)sp->cachedDstBank + cacheIdx;
|
||||||
|
cachedSrc = (uint8_t *)sp->cachedSrcBank + cacheIdx;
|
||||||
|
/* Same byte-pointer trick as SURFACE_ROW_OFFSET to dodge ~MUL4. */
|
||||||
|
routineOffset = *(uint16_t *)((uint8_t *)sp->routineOffsets + (cacheIdx << 1));
|
||||||
|
|
||||||
fnAddr = codegenArenaBaseAddr()
|
fnAddr = codegenArenaBaseAddr()
|
||||||
+ sp->slot->offset
|
+ sp->slot->offset
|
||||||
+ (uint32_t)sp->routineOffsets[shift][SPRITE_OP_SAVE];
|
+ (uint32_t)routineOffset;
|
||||||
|
|
||||||
// Stub: X = screen (source), Y = backup (destination).
|
// Stub: X = screen (source), Y = backup (destination).
|
||||||
if (!gSaveStubInited) {
|
if (!gSaveStubInited) {
|
||||||
|
|
@ -401,22 +443,22 @@ void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_
|
||||||
gSaveStubLastYLo = backupLo;
|
gSaveStubLastYLo = backupLo;
|
||||||
}
|
}
|
||||||
if (fnAddr != gSaveStubLastFnAddr) {
|
if (fnAddr != gSaveStubLastFnAddr) {
|
||||||
gSpriteSaveStub[ 8] = (unsigned char)(fnAddr & 0xFFu);
|
/* Byte-alias the uint32_t to grab the 3 bank/lo/hi bytes
|
||||||
gSpriteSaveStub[ 9] = (unsigned char)((fnAddr >> 8) & 0xFFu);
|
* without invoking ~LSHR4 for the >>16. */
|
||||||
gSpriteSaveStub[10] = (unsigned char)((fnAddr >> 16) & 0xFFu);
|
const uint8_t *fnB_ = (const uint8_t *)&fnAddr;
|
||||||
|
gSpriteSaveStub[ 8] = fnB_[0];
|
||||||
|
gSpriteSaveStub[ 9] = fnB_[1];
|
||||||
|
gSpriteSaveStub[10] = fnB_[2];
|
||||||
gSaveStubLastFnAddr = fnAddr;
|
gSaveStubLastFnAddr = fnAddr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip the 16+ MVN-bank rewrites if the dst/src bank pair is the
|
// Skip the 16+ MVN-bank rewrites if the dst/src bank pair is the
|
||||||
// same as last call. Screen and backup buffer banks are stable
|
// same as last call.
|
||||||
// for essentially every frame past the first, so this short-
|
if (*cachedDst != backupBank || *cachedSrc != screenBank) {
|
||||||
// circuits ~5000 cyc/frame on the ball demo.
|
routine = codegenArenaBase() + sp->slot->offset + routineOffset;
|
||||||
if (sp->cachedDstBank[shift][SPRITE_OP_SAVE] != backupBank ||
|
|
||||||
sp->cachedSrcBank[shift][SPRITE_OP_SAVE] != screenBank) {
|
|
||||||
routine = codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_SAVE];
|
|
||||||
patchMvnBanks(routine, heightPx, /*dst*/backupBank, /*src*/screenBank);
|
patchMvnBanks(routine, heightPx, /*dst*/backupBank, /*src*/screenBank);
|
||||||
sp->cachedDstBank[shift][SPRITE_OP_SAVE] = backupBank;
|
*cachedDst = backupBank;
|
||||||
sp->cachedSrcBank[shift][SPRITE_OP_SAVE] = screenBank;
|
*cachedSrc = screenBank;
|
||||||
}
|
}
|
||||||
|
|
||||||
// MVN-based routine: needs M=16 / X=16; restore M=16 on exit
|
// MVN-based routine: needs M=16 / X=16; restore M=16 on exit
|
||||||
|
|
@ -442,6 +484,10 @@ void spriteCompiledRestoreUnder(SurfaceT *dst, const SpriteBackupT *backup) {
|
||||||
uint8_t *routine;
|
uint8_t *routine;
|
||||||
uint8_t *screenPtr;
|
uint8_t *screenPtr;
|
||||||
SpriteT *sp;
|
SpriteT *sp;
|
||||||
|
uint16_t cacheIdx; /* shift * SPRITE_OP_COUNT + SPRITE_OP_RESTORE, computed once */
|
||||||
|
uint8_t *cachedDst;
|
||||||
|
uint8_t *cachedSrc;
|
||||||
|
uint16_t routineOffset;
|
||||||
|
|
||||||
sp = backup->sprite;
|
sp = backup->sprite;
|
||||||
heightPx = backup->height;
|
heightPx = backup->height;
|
||||||
|
|
@ -450,12 +496,19 @@ void spriteCompiledRestoreUnder(SurfaceT *dst, const SpriteBackupT *backup) {
|
||||||
shift = (copyBytes == spriteBytesPerRow) ? 0 : 1;
|
shift = (copyBytes == spriteBytesPerRow) ? 0 : 1;
|
||||||
|
|
||||||
screenPtr = (uint8_t *)&dst->pixels[SURFACE_ROW_OFFSET(backup->y) + ((uint16_t)backup->x >> 1)];
|
screenPtr = (uint8_t *)&dst->pixels[SURFACE_ROW_OFFSET(backup->y) + ((uint16_t)backup->x >> 1)];
|
||||||
splitPointer(screenPtr, &screenLo, &screenBank);
|
SPLIT_POINTER(screenPtr, &screenLo, &screenBank);
|
||||||
splitPointer(backup->bytes, &backupLo, &backupBank);
|
SPLIT_BACKUP_CACHED(backup->bytes, &backupLo, &backupBank);
|
||||||
|
|
||||||
|
/* Hoist 2D-array indexing -- see save-side comment. */
|
||||||
|
cacheIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_RESTORE);
|
||||||
|
cachedDst = (uint8_t *)sp->cachedDstBank + cacheIdx;
|
||||||
|
cachedSrc = (uint8_t *)sp->cachedSrcBank + cacheIdx;
|
||||||
|
/* Same byte-pointer trick as SURFACE_ROW_OFFSET to dodge ~MUL4. */
|
||||||
|
routineOffset = *(uint16_t *)((uint8_t *)sp->routineOffsets + (cacheIdx << 1));
|
||||||
|
|
||||||
fnAddr = codegenArenaBaseAddr()
|
fnAddr = codegenArenaBaseAddr()
|
||||||
+ sp->slot->offset
|
+ sp->slot->offset
|
||||||
+ (uint32_t)sp->routineOffsets[shift][SPRITE_OP_RESTORE];
|
+ (uint32_t)routineOffset;
|
||||||
|
|
||||||
// Stub: X = backup (source), Y = screen (destination).
|
// Stub: X = backup (source), Y = screen (destination).
|
||||||
if (!gRestoreStubInited) {
|
if (!gRestoreStubInited) {
|
||||||
|
|
@ -478,20 +531,20 @@ void spriteCompiledRestoreUnder(SurfaceT *dst, const SpriteBackupT *backup) {
|
||||||
gRestoreStubLastYLo = screenLo;
|
gRestoreStubLastYLo = screenLo;
|
||||||
}
|
}
|
||||||
if (fnAddr != gRestoreStubLastFnAddr) {
|
if (fnAddr != gRestoreStubLastFnAddr) {
|
||||||
gSpriteRestoreStub[ 8] = (unsigned char)(fnAddr & 0xFFu);
|
const uint8_t *fnB_ = (const uint8_t *)&fnAddr;
|
||||||
gSpriteRestoreStub[ 9] = (unsigned char)((fnAddr >> 8) & 0xFFu);
|
gSpriteRestoreStub[ 8] = fnB_[0];
|
||||||
gSpriteRestoreStub[10] = (unsigned char)((fnAddr >> 16) & 0xFFu);
|
gSpriteRestoreStub[ 9] = fnB_[1];
|
||||||
|
gSpriteRestoreStub[10] = fnB_[2];
|
||||||
gRestoreStubLastFnAddr = fnAddr;
|
gRestoreStubLastFnAddr = fnAddr;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Same short-circuit as save: only re-stamp the bank operands if
|
// Same short-circuit as save: only re-stamp the bank operands if
|
||||||
// they actually changed since last call.
|
// they actually changed since last call.
|
||||||
if (sp->cachedDstBank[shift][SPRITE_OP_RESTORE] != screenBank ||
|
if (*cachedDst != screenBank || *cachedSrc != backupBank) {
|
||||||
sp->cachedSrcBank[shift][SPRITE_OP_RESTORE] != backupBank) {
|
routine = codegenArenaBase() + sp->slot->offset + routineOffset;
|
||||||
routine = codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_RESTORE];
|
|
||||||
patchMvnBanks(routine, heightPx, /*dst*/screenBank, /*src*/backupBank);
|
patchMvnBanks(routine, heightPx, /*dst*/screenBank, /*src*/backupBank);
|
||||||
sp->cachedDstBank[shift][SPRITE_OP_RESTORE] = screenBank;
|
*cachedDst = screenBank;
|
||||||
sp->cachedSrcBank[shift][SPRITE_OP_RESTORE] = backupBank;
|
*cachedSrc = backupBank;
|
||||||
}
|
}
|
||||||
|
|
||||||
asm {
|
asm {
|
||||||
|
|
|
||||||
|
|
@ -31,16 +31,6 @@
|
||||||
#include "spriteEmitter.h"
|
#include "spriteEmitter.h"
|
||||||
#include "spriteInternal.h"
|
#include "spriteInternal.h"
|
||||||
|
|
||||||
// Pin the IIgs sprite codegen statics into their own load segment
|
|
||||||
// instead of letting them ride in _ROOT. _ROOT also collects every
|
|
||||||
// other unsegmented .c (init.c, sprite.c, present.c, the example
|
|
||||||
// main, ...), so growth in any of those can shift the linker's
|
|
||||||
// per-bank packing and orphan intra-file static refs (we hit this
|
|
||||||
// when DRAWPRIMS grew with the chunked PEI-slam: PATTERN's link
|
|
||||||
// reported "Unresolved reference: emitMvnCopyRoutine" purely from
|
|
||||||
// _ROOT crowding). A dedicated load segment isolates this file.
|
|
||||||
JOEYLIB_SEGMENT("SPRITECG")
|
|
||||||
|
|
||||||
|
|
||||||
// ----- Constants -----
|
// ----- Constants -----
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,6 @@
|
||||||
#include "joey/asset.h"
|
#include "joey/asset.h"
|
||||||
#include "joey/palette.h"
|
#include "joey/palette.h"
|
||||||
|
|
||||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
|
||||||
JOEYLIB_SEGMENT("CORESYS")
|
|
||||||
|
|
||||||
#define JAS_HEADER_SIZE 44
|
#define JAS_HEADER_SIZE 44
|
||||||
#define JAS_PIXELS_OFFSET JAS_HEADER_SIZE
|
#define JAS_PIXELS_OFFSET JAS_HEADER_SIZE
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,6 @@
|
||||||
#include "joey/audio.h"
|
#include "joey/audio.h"
|
||||||
#include "hal.h"
|
#include "hal.h"
|
||||||
|
|
||||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
|
||||||
JOEYLIB_SEGMENT("CORESYS")
|
|
||||||
|
|
||||||
static bool gAudioReady = false;
|
static bool gAudioReady = false;
|
||||||
|
|
||||||
|
|
@ -79,5 +77,10 @@ void joeyAudioFrameTick(void) {
|
||||||
if (!gAudioReady) {
|
if (!gAudioReady) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
#ifndef JOEYLIB_PLATFORM_IIGS
|
||||||
|
// IIgs: NTPstreamsound is fully DOC-IRQ-driven, halAudioFrameTick
|
||||||
|
// is an empty no-op there. Skip the wrapper JSL entirely on IIgs
|
||||||
|
// so per-frame audio cost stays at the gAudioReady branch above.
|
||||||
halAudioFrameTick();
|
halAudioFrameTick();
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -23,22 +23,27 @@
|
||||||
|
|
||||||
#include "codegenArenaInternal.h"
|
#include "codegenArenaInternal.h"
|
||||||
|
|
||||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
|
||||||
JOEYLIB_SEGMENT("CORESYS")
|
|
||||||
|
|
||||||
|
|
||||||
// ----- Module state -----
|
// ----- Module state -----
|
||||||
|
|
||||||
static uint8_t *gBase = NULL;
|
// gCodegenArenaBase / gCodegenArenaBaseAddr are non-static so spriteCompile.c can read them
|
||||||
// gBaseAddr mirrors gBase as a 24-bit absolute address. ORCA-C's
|
// directly via extern instead of paying a JSL/RTL per access through
|
||||||
// (uint32_t)pointer cast on the IIgs zeros the bank byte for some
|
// the codegenArenaBase() / codegenArenaBaseAddr() wrappers. Both are
|
||||||
// pointer expressions, so JSL targets read this field directly.
|
// set once at codegenArenaInit and never moved (the underlying
|
||||||
static uint32_t gBaseAddr = 0;
|
// Memory Manager handle is locked-in-place on IIgs). Callers MUST
|
||||||
|
// treat them as read-only.
|
||||||
|
uint8_t *gCodegenArenaBase = NULL;
|
||||||
|
// gCodegenArenaBaseAddr mirrors gCodegenArenaBase as a 24-bit
|
||||||
|
// absolute address. ORCA-C's (uint32_t)pointer cast on the IIgs
|
||||||
|
// zeros the bank byte for some pointer expressions, so JSL targets
|
||||||
|
// read this field directly.
|
||||||
|
uint32_t gCodegenArenaBaseAddr = 0;
|
||||||
static uint32_t gTotalBytes = 0;
|
static uint32_t gTotalBytes = 0;
|
||||||
static uint32_t gUsedBytes = 0;
|
static uint32_t gUsedBytes = 0;
|
||||||
static ArenaSlotT *gFirstSlot = NULL;
|
static ArenaSlotT *gFirstSlot = NULL;
|
||||||
#if defined(JOEYLIB_PLATFORM_IIGS)
|
#if defined(JOEYLIB_PLATFORM_IIGS)
|
||||||
static Handle gBaseHandle = NULL;
|
static Handle gCodegenArenaBaseHandle = NULL;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -93,7 +98,7 @@ ArenaSlotT *codegenArenaAlloc(uint32_t bytes) {
|
||||||
ArenaSlotT *slot;
|
ArenaSlotT *slot;
|
||||||
ArenaSlotT *remainder;
|
ArenaSlotT *remainder;
|
||||||
|
|
||||||
if (gBase == NULL || bytes == 0) {
|
if (gCodegenArenaBase == NULL || bytes == 0) {
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
for (slot = gFirstSlot; slot != NULL; slot = slot->next) {
|
for (slot = gFirstSlot; slot != NULL; slot = slot->next) {
|
||||||
|
|
@ -123,14 +128,11 @@ ArenaSlotT *codegenArenaAlloc(uint32_t bytes) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
uint8_t *codegenArenaBase(void) {
|
// codegenArenaBase() / codegenArenaBaseAddr() are now header-only
|
||||||
return gBase;
|
// macros that read gCodegenArenaBase / gCodegenArenaBaseAddr
|
||||||
}
|
// directly, so the C function bodies that used to live here are
|
||||||
|
// gone. The wrappers cost ~30 cyc per call on IIgs and were hit
|
||||||
|
// 3x per sprite frame.
|
||||||
uint32_t codegenArenaBaseAddr(void) {
|
|
||||||
return gBaseAddr;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
uint32_t codegenArenaBytesTotal(void) {
|
uint32_t codegenArenaBytesTotal(void) {
|
||||||
|
|
@ -149,7 +151,7 @@ void codegenArenaCompact(void) {
|
||||||
ArenaSlotT *trailing;
|
ArenaSlotT *trailing;
|
||||||
uint32_t cursor;
|
uint32_t cursor;
|
||||||
|
|
||||||
if (gBase == NULL) {
|
if (gCodegenArenaBase == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
cursor = 0;
|
cursor = 0;
|
||||||
|
|
@ -158,7 +160,7 @@ void codegenArenaCompact(void) {
|
||||||
next = slot->next;
|
next = slot->next;
|
||||||
if (slot->used) {
|
if (slot->used) {
|
||||||
if (slot->offset != cursor) {
|
if (slot->offset != cursor) {
|
||||||
memmove(gBase + cursor, gBase + slot->offset, slot->size);
|
memmove(gCodegenArenaBase + cursor, gCodegenArenaBase + slot->offset, slot->size);
|
||||||
slot->offset = cursor;
|
slot->offset = cursor;
|
||||||
}
|
}
|
||||||
cursor += slot->size;
|
cursor += slot->size;
|
||||||
|
|
@ -200,7 +202,7 @@ void codegenArenaCompact(void) {
|
||||||
|
|
||||||
|
|
||||||
void codegenArenaFree(ArenaSlotT *slot) {
|
void codegenArenaFree(ArenaSlotT *slot) {
|
||||||
if (slot == NULL || gBase == NULL) {
|
if (slot == NULL || gCodegenArenaBase == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!slot->used) {
|
if (!slot->used) {
|
||||||
|
|
@ -215,21 +217,21 @@ void codegenArenaFree(ArenaSlotT *slot) {
|
||||||
|
|
||||||
|
|
||||||
bool codegenArenaInit(uint32_t totalBytes) {
|
bool codegenArenaInit(uint32_t totalBytes) {
|
||||||
if (gBase != NULL) {
|
if (gCodegenArenaBase != NULL) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
if (totalBytes == 0) {
|
if (totalBytes == 0) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#if defined(JOEYLIB_PLATFORM_IIGS)
|
#if defined(JOEYLIB_PLATFORM_IIGS)
|
||||||
gBaseHandle = NewHandle(totalBytes, _ownerid,
|
gCodegenArenaBaseHandle = NewHandle(totalBytes, _ownerid,
|
||||||
attrFixed | attrLocked | attrPage | attrNoCross,
|
attrFixed | attrLocked | attrPage | attrNoCross,
|
||||||
NULL);
|
NULL);
|
||||||
if (gBaseHandle == NULL || _toolErr != 0) {
|
if (gCodegenArenaBaseHandle == NULL || _toolErr != 0) {
|
||||||
gBaseHandle = NULL;
|
gCodegenArenaBaseHandle = NULL;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
HLock(gBaseHandle);
|
HLock(gCodegenArenaBaseHandle);
|
||||||
// Capture the 24-bit absolute address by copying the Pointer's
|
// Capture the 24-bit absolute address by copying the Pointer's
|
||||||
// raw bytes -- (uint32_t)pointer through a chain of expressions
|
// raw bytes -- (uint32_t)pointer through a chain of expressions
|
||||||
// has been observed to drop the bank byte under ORCA-C's
|
// has been observed to drop the bank byte under ORCA-C's
|
||||||
|
|
@ -238,35 +240,35 @@ bool codegenArenaInit(uint32_t totalBytes) {
|
||||||
{
|
{
|
||||||
Pointer p;
|
Pointer p;
|
||||||
uint8_t bytes[4];
|
uint8_t bytes[4];
|
||||||
p = *gBaseHandle;
|
p = *gCodegenArenaBaseHandle;
|
||||||
gBase = (uint8_t *)p;
|
gCodegenArenaBase = (uint8_t *)p;
|
||||||
memcpy(bytes, &p, 4);
|
memcpy(bytes, &p, 4);
|
||||||
gBaseAddr = (uint32_t)bytes[0]
|
gCodegenArenaBaseAddr = (uint32_t)bytes[0]
|
||||||
| ((uint32_t)bytes[1] << 8)
|
| ((uint32_t)bytes[1] << 8)
|
||||||
| ((uint32_t)bytes[2] << 16);
|
| ((uint32_t)bytes[2] << 16);
|
||||||
}
|
}
|
||||||
if (gBase == NULL) {
|
if (gCodegenArenaBase == NULL) {
|
||||||
DisposeHandle(gBaseHandle);
|
DisposeHandle(gCodegenArenaBaseHandle);
|
||||||
gBaseHandle = NULL;
|
gCodegenArenaBaseHandle = NULL;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
gBase = (uint8_t *)malloc(totalBytes);
|
gCodegenArenaBase = (uint8_t *)malloc(totalBytes);
|
||||||
if (gBase == NULL) {
|
if (gCodegenArenaBase == NULL) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
gBaseAddr = (uint32_t)gBase;
|
gCodegenArenaBaseAddr = (uint32_t)gCodegenArenaBase;
|
||||||
#endif
|
#endif
|
||||||
gFirstSlot = newSlot(0, totalBytes, false);
|
gFirstSlot = newSlot(0, totalBytes, false);
|
||||||
if (gFirstSlot == NULL) {
|
if (gFirstSlot == NULL) {
|
||||||
#if defined(JOEYLIB_PLATFORM_IIGS)
|
#if defined(JOEYLIB_PLATFORM_IIGS)
|
||||||
DisposeHandle(gBaseHandle);
|
DisposeHandle(gCodegenArenaBaseHandle);
|
||||||
gBaseHandle = NULL;
|
gCodegenArenaBaseHandle = NULL;
|
||||||
#else
|
#else
|
||||||
free(gBase);
|
free(gCodegenArenaBase);
|
||||||
#endif
|
#endif
|
||||||
gBase = NULL;
|
gCodegenArenaBase = NULL;
|
||||||
gBaseAddr = 0;
|
gCodegenArenaBaseAddr = 0;
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
gTotalBytes = totalBytes;
|
gTotalBytes = totalBytes;
|
||||||
|
|
@ -279,7 +281,7 @@ void codegenArenaShutdown(void) {
|
||||||
ArenaSlotT *slot;
|
ArenaSlotT *slot;
|
||||||
ArenaSlotT *next;
|
ArenaSlotT *next;
|
||||||
|
|
||||||
if (gBase == NULL) {
|
if (gCodegenArenaBase == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
for (slot = gFirstSlot; slot != NULL; slot = next) {
|
for (slot = gFirstSlot; slot != NULL; slot = next) {
|
||||||
|
|
@ -287,13 +289,13 @@ void codegenArenaShutdown(void) {
|
||||||
free(slot);
|
free(slot);
|
||||||
}
|
}
|
||||||
#if defined(JOEYLIB_PLATFORM_IIGS)
|
#if defined(JOEYLIB_PLATFORM_IIGS)
|
||||||
DisposeHandle(gBaseHandle);
|
DisposeHandle(gCodegenArenaBaseHandle);
|
||||||
gBaseHandle = NULL;
|
gCodegenArenaBaseHandle = NULL;
|
||||||
#else
|
#else
|
||||||
free(gBase);
|
free(gCodegenArenaBase);
|
||||||
#endif
|
#endif
|
||||||
gBase = NULL;
|
gCodegenArenaBase = NULL;
|
||||||
gBaseAddr = 0;
|
gCodegenArenaBaseAddr = 0;
|
||||||
gFirstSlot = NULL;
|
gFirstSlot = NULL;
|
||||||
gTotalBytes = 0;
|
gTotalBytes = 0;
|
||||||
gUsedBytes = 0;
|
gUsedBytes = 0;
|
||||||
|
|
|
||||||
|
|
@ -58,14 +58,15 @@ void codegenArenaCompact(void);
|
||||||
|
|
||||||
// Used for spriteDraw's address computation. The base pointer is
|
// Used for spriteDraw's address computation. The base pointer is
|
||||||
// stable for the lifetime of the arena; only slot->offset moves.
|
// stable for the lifetime of the arena; only slot->offset moves.
|
||||||
uint8_t *codegenArenaBase(void);
|
//
|
||||||
|
// Direct extern access (instead of a getter function) so per-frame
|
||||||
// Same address as codegenArenaBase() but returned as an integer. The
|
// hot paths in spriteCompile.c skip the JSL/PHB/RTL/PLB the wrapper
|
||||||
// IIgs JSL trampoline needs the 24-bit absolute address as a number
|
// would impose. Both globals are read-only after codegenArenaInit;
|
||||||
// it can split into bank/offset bytes; ORCA-C's pointer-to-uint32_t
|
// the function-form getters below are kept as a back-compat shim.
|
||||||
// cast has dropped the bank byte in some expressions, so we expose
|
extern uint8_t *gCodegenArenaBase;
|
||||||
// the integer view directly.
|
extern uint32_t gCodegenArenaBaseAddr;
|
||||||
uint32_t codegenArenaBaseAddr(void);
|
#define codegenArenaBase() ((uint8_t *)gCodegenArenaBase)
|
||||||
|
#define codegenArenaBaseAddr() ((uint32_t)gCodegenArenaBaseAddr)
|
||||||
|
|
||||||
// Public-API support: sum of live slot sizes, total arena size.
|
// Public-API support: sum of live slot sizes, total arena size.
|
||||||
// Difference is free space (which may be fragmented across holes
|
// Difference is free space (which may be fragmented across holes
|
||||||
|
|
|
||||||
|
|
@ -13,8 +13,6 @@
|
||||||
#include "joey/platform.h"
|
#include "joey/platform.h"
|
||||||
#include "joey/debug.h"
|
#include "joey/debug.h"
|
||||||
|
|
||||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
|
||||||
JOEYLIB_SEGMENT("CORESYS")
|
|
||||||
|
|
||||||
static const char *kLogPath = "joeylog.txt";
|
static const char *kLogPath = "joeylog.txt";
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,12 +12,6 @@
|
||||||
#include "hal.h"
|
#include "hal.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
// On IIgs, hoist all primitive functions out of _ROOT into a named
|
|
||||||
// DRAWPRIMS load segment. drawLine/drawCircle/fillCircle/floodFill/
|
|
||||||
// floodFillBounded together push past the 64 KB-per-bank budget for
|
|
||||||
// the simpler binaries (PATTERN was the first to fail). On other
|
|
||||||
// ports this macro vanishes.
|
|
||||||
JOEYLIB_SEGMENT("DRAWPRIMS")
|
|
||||||
|
|
||||||
// ----- Constants -----
|
// ----- Constants -----
|
||||||
|
|
||||||
|
|
@ -107,25 +101,28 @@ static void fillRectClipped(SurfaceT *s, int16_t x, int16_t y, int16_t w, int16_
|
||||||
uint8_t nibble = colorIndex & 0x0F;
|
uint8_t nibble = colorIndex & 0x0F;
|
||||||
uint8_t doubled = (uint8_t)((nibble << 4) | nibble);
|
uint8_t doubled = (uint8_t)((nibble << 4) | nibble);
|
||||||
int16_t row;
|
int16_t row;
|
||||||
int16_t pxStart;
|
uint16_t pxStart;
|
||||||
int16_t pxEnd;
|
uint16_t pxEnd;
|
||||||
int16_t midBytes;
|
uint16_t midBytes;
|
||||||
uint8_t *line;
|
uint8_t *line;
|
||||||
|
|
||||||
|
/* px* and midBytes are uint16_t (clipped values are non-negative)
|
||||||
|
* so `>>1` lowers to a single LSR instead of ORCA-C's
|
||||||
|
* ~SSHIFTRIGHT helper. Same with `<<1` for midBytes. */
|
||||||
for (row = 0; row < h; row++) {
|
for (row = 0; row < h; row++) {
|
||||||
line = &s->pixels[SURFACE_ROW_OFFSET(y + row)];
|
line = &s->pixels[SURFACE_ROW_OFFSET(y + row)];
|
||||||
pxStart = x;
|
pxStart = (uint16_t)x;
|
||||||
pxEnd = x + w;
|
pxEnd = (uint16_t)(x + w);
|
||||||
|
|
||||||
if (pxStart & 1) {
|
if (pxStart & 1u) {
|
||||||
line[pxStart >> 1] = (uint8_t)((line[pxStart >> 1] & 0xF0) | nibble);
|
line[pxStart >> 1] = (uint8_t)((line[pxStart >> 1] & 0xF0) | nibble);
|
||||||
pxStart++;
|
pxStart++;
|
||||||
}
|
}
|
||||||
|
|
||||||
midBytes = (pxEnd - pxStart) >> 1;
|
midBytes = (uint16_t)((pxEnd - pxStart) >> 1);
|
||||||
if (midBytes > 0) {
|
if (midBytes > 0u) {
|
||||||
memset(&line[pxStart >> 1], doubled, (size_t)midBytes);
|
memset(&line[pxStart >> 1], doubled, (size_t)midBytes);
|
||||||
pxStart += midBytes << 1;
|
pxStart = (uint16_t)(pxStart + (midBytes << 1));
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pxStart < pxEnd) {
|
if (pxStart < pxEnd) {
|
||||||
|
|
@ -343,7 +340,10 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
||||||
static void dstPixel(uint8_t *row, int16_t x, uint8_t nibble) {
|
static void dstPixel(uint8_t *row, int16_t x, uint8_t nibble) {
|
||||||
uint8_t *byte;
|
uint8_t *byte;
|
||||||
|
|
||||||
byte = &row[x >> 1];
|
/* `(uint16_t)x >> 1` instead of `x >> 1` -- caller has already
|
||||||
|
* range-checked x non-negative, and unsigned shift dodges the
|
||||||
|
* ~SSHIFTRIGHT helper ORCA-C emits for signed `>>`. */
|
||||||
|
byte = &row[(uint16_t)x >> 1];
|
||||||
if (x & 1) {
|
if (x & 1) {
|
||||||
*byte = (uint8_t)((*byte & 0xF0) | nibble);
|
*byte = (uint8_t)((*byte & 0xF0) | nibble);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -355,7 +355,7 @@ static void dstPixel(uint8_t *row, int16_t x, uint8_t nibble) {
|
||||||
static uint8_t srcPixel(const uint8_t *row, int16_t x) {
|
static uint8_t srcPixel(const uint8_t *row, int16_t x) {
|
||||||
uint8_t byte;
|
uint8_t byte;
|
||||||
|
|
||||||
byte = row[x >> 1];
|
byte = row[(uint16_t)x >> 1];
|
||||||
if (x & 1) {
|
if (x & 1) {
|
||||||
return (uint8_t)(byte & 0x0F);
|
return (uint8_t)(byte & 0x0F);
|
||||||
}
|
}
|
||||||
|
|
@ -407,11 +407,13 @@ void drawCircle(SurfaceT *s, int16_t cx, int16_t cy, uint16_t r, uint8_t colorIn
|
||||||
drawPixel(s, (int16_t)(cx + y), (int16_t)(cy - x), colorIndex);
|
drawPixel(s, (int16_t)(cx + y), (int16_t)(cy - x), colorIndex);
|
||||||
drawPixel(s, (int16_t)(cx - y), (int16_t)(cy - x), colorIndex);
|
drawPixel(s, (int16_t)(cx - y), (int16_t)(cy - x), colorIndex);
|
||||||
y++;
|
y++;
|
||||||
|
/* Use `+ + 1` instead of `2 * y + 1` so ORCA-C never emits
|
||||||
|
* the ~SMUL2 helper -- two ADDs are unconditionally cheaper. */
|
||||||
if (err <= 0) {
|
if (err <= 0) {
|
||||||
err = (int16_t)(err + 2 * y + 1);
|
err = (int16_t)(err + y + y + 1);
|
||||||
} else {
|
} else {
|
||||||
x--;
|
x--;
|
||||||
err = (int16_t)(err + 2 * (y - x) + 1);
|
err = (int16_t)(err + y + y - x - x + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -502,7 +504,9 @@ void drawPixel(SurfaceT *s, int16_t x, int16_t y, uint8_t colorIndex) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!halFastDrawPixel(s, (uint16_t)x, (uint16_t)y, colorIndex)) {
|
if (!halFastDrawPixel(s, (uint16_t)x, (uint16_t)y, colorIndex)) {
|
||||||
byte = &s->pixels[SURFACE_ROW_OFFSET(y) + (x >> 1)];
|
/* Cast to uint16_t before shift -- already validated x >= 0,
|
||||||
|
* so unsigned semantics match. Avoids ~SSHIFTRIGHT helper. */
|
||||||
|
byte = &s->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)x >> 1)];
|
||||||
nibble = colorIndex & 0x0F;
|
nibble = colorIndex & 0x0F;
|
||||||
if (x & 1) {
|
if (x & 1) {
|
||||||
*byte = (uint8_t)((*byte & 0xF0) | nibble);
|
*byte = (uint8_t)((*byte & 0xF0) | nibble);
|
||||||
|
|
@ -571,20 +575,26 @@ void fillCircle(SurfaceT *s, int16_t cx, int16_t cy, uint16_t r, uint8_t colorIn
|
||||||
// (y+1)^2 = y^2 + 2y + 1; (x-1)^2 = x^2 - 2x + 1. r is uint16_t
|
// (y+1)^2 = y^2 + 2y + 1; (x-1)^2 = x^2 - 2x + 1. r is uint16_t
|
||||||
// so xx, yy, r2 fit in uint16_t for any r where x*x+y*y can equal
|
// so xx, yy, r2 fit in uint16_t for any r where x*x+y*y can equal
|
||||||
// r2 (i.e. r <= 255 -> r2 <= 65025).
|
// r2 (i.e. r <= 255 -> r2 <= 65025).
|
||||||
|
/* Same `+ +` pattern as drawCircle so ORCA-C doesn't emit ~SMUL2 /
|
||||||
|
* ~CUMUL2 helpers for the `2 * ...` constants. spanWidth is hoisted
|
||||||
|
* because both fillRect calls in the body need it. */
|
||||||
xx = (uint16_t)(r * r);
|
xx = (uint16_t)(r * r);
|
||||||
r2 = xx;
|
r2 = xx;
|
||||||
yy = 0;
|
yy = 0;
|
||||||
x = (int16_t)r;
|
x = (int16_t)r;
|
||||||
for (y = 0; y <= (int16_t)r; y++) {
|
for (y = 0; y <= (int16_t)r; y++) {
|
||||||
|
uint16_t spanWidth;
|
||||||
|
|
||||||
while (xx + yy > r2) {
|
while (xx + yy > r2) {
|
||||||
xx = (uint16_t)(xx - (uint16_t)(2 * x - 1));
|
xx = (uint16_t)(xx - (uint16_t)((uint16_t)x + (uint16_t)x - 1u));
|
||||||
x--;
|
x--;
|
||||||
}
|
}
|
||||||
fillRect(s, (int16_t)(cx - x), (int16_t)(cy + y), (uint16_t)(2 * x + 1), 1, colorIndex);
|
spanWidth = (uint16_t)((uint16_t)x + (uint16_t)x + 1u);
|
||||||
|
fillRect(s, (int16_t)(cx - x), (int16_t)(cy + y), spanWidth, 1, colorIndex);
|
||||||
if (y > 0) {
|
if (y > 0) {
|
||||||
fillRect(s, (int16_t)(cx - x), (int16_t)(cy - y), (uint16_t)(2 * x + 1), 1, colorIndex);
|
fillRect(s, (int16_t)(cx - x), (int16_t)(cy - y), spanWidth, 1, colorIndex);
|
||||||
}
|
}
|
||||||
yy = (uint16_t)(yy + (uint16_t)(2 * y + 1));
|
yy = (uint16_t)(yy + (uint16_t)((uint16_t)y + (uint16_t)y + 1u));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -668,11 +678,16 @@ uint8_t samplePixel(const SurfaceT *s, int16_t x, int16_t y) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
byte = s->pixels[SURFACE_ROW_OFFSET(y) + (x >> 1)];
|
/* Cast to uint16_t before shift -- already validated x >= 0,
|
||||||
|
* unsigned semantics match. Avoids ~SSHIFTRIGHT helper. */
|
||||||
|
byte = s->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)x >> 1)];
|
||||||
if (x & 1) {
|
if (x & 1) {
|
||||||
return (uint8_t)(byte & 0x0F);
|
return (uint8_t)(byte & 0x0F);
|
||||||
}
|
}
|
||||||
return (uint8_t)(byte >> 4);
|
/* `byte >> 4` is uint8_t but ORCA-C promotes to int (signed 16-bit)
|
||||||
|
* for the shift, then narrows -- triggers ~SSHIFTRIGHT. The
|
||||||
|
* mask-then-shift sidesteps the promotion path. */
|
||||||
|
return (uint8_t)((byte & 0xF0u) >> 4);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -58,6 +58,19 @@ void halInputPoll(void);
|
||||||
// graphics.library WaitTOF, XBIOS Vsync, $C019 polling).
|
// graphics.library WaitTOF, XBIOS Vsync, $C019 polling).
|
||||||
void halWaitVBL(void);
|
void halWaitVBL(void);
|
||||||
|
|
||||||
|
// Monotonic 16-bit frame counter. Caller polls; ports either detect
|
||||||
|
// the rising edge inside this call (IIgs $C019 / DOS $3DA / Amiga
|
||||||
|
// VPOSR) or return a counter maintained by a VBL ISR (ST). Required
|
||||||
|
// caller invariant: poll faster than 2 * halFrameHz() so no edge is
|
||||||
|
// missed. Used by benchmarks; cheap enough for animation cadence too.
|
||||||
|
uint16_t halFrameCount(void);
|
||||||
|
|
||||||
|
// Nominal display frame rate in Hz (50 PAL Amiga, 60 NTSC IIgs / ST,
|
||||||
|
// ~70 VGA mode 13h). Reported only -- no API contract that VBLs
|
||||||
|
// arrive at exactly this rate. Benchmarks divide by it to convert
|
||||||
|
// iters-per-N-frames to ops/sec.
|
||||||
|
uint16_t halFrameHz(void);
|
||||||
|
|
||||||
// Audio: per-port engine setup, module + SFX playback, teardown.
|
// Audio: per-port engine setup, module + SFX playback, teardown.
|
||||||
// halAudioInit returns true if the platform has a working engine.
|
// halAudioInit returns true if the platform has a working engine.
|
||||||
// All entry points are safe to call when init failed -- they become
|
// All entry points are safe to call when init failed -- they become
|
||||||
|
|
@ -278,11 +291,12 @@ extern uint16_t gFloodRightX;
|
||||||
|
|
||||||
// Tile primitives operate on caller-computed row pointers; just
|
// Tile primitives operate on caller-computed row pointers; just
|
||||||
// forward the args. by/bx are tile coords -> bx*4 + by*8*160 byte
|
// forward the args. by/bx are tile coords -> bx*4 + by*8*160 byte
|
||||||
// offset within the surface.
|
// offset within the surface. Use SURFACE_ROW_OFFSET (LUT lookup) to
|
||||||
|
// dodge ORCA-C 2.2.1's ~CUMUL2 helper for the *160 multiply.
|
||||||
#undef halFastTileFill
|
#undef halFastTileFill
|
||||||
#define halFastTileFill(_s, _bx, _by, _fw) \
|
#define halFastTileFill(_s, _bx, _by, _fw) \
|
||||||
(iigsTileFillInner(&(_s)->pixels[(uint16_t)(_by) * 8 * SURFACE_BYTES_PER_ROW \
|
(iigsTileFillInner(&(_s)->pixels[SURFACE_ROW_OFFSET((uint16_t)(_by) << 3) \
|
||||||
+ (uint16_t)(_bx) * 4], \
|
+ ((uint16_t)(_bx) << 2)], \
|
||||||
(_fw)), \
|
(_fw)), \
|
||||||
true)
|
true)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,6 @@
|
||||||
#include "hal.h"
|
#include "hal.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
|
||||||
JOEYLIB_SEGMENT("CORESYS")
|
|
||||||
|
|
||||||
// 8 KB fits the largest typical sprite working set (~3-4 KB per
|
// 8 KB fits the largest typical sprite working set (~3-4 KB per
|
||||||
// 32x32 sprite at all opaque) and keeps malloc requests small enough
|
// 32x32 sprite at all opaque) and keeps malloc requests small enough
|
||||||
|
|
@ -121,3 +119,13 @@ const char *joeyVersionString(void) {
|
||||||
void joeyWaitVBL(void) {
|
void joeyWaitVBL(void) {
|
||||||
halWaitVBL();
|
halWaitVBL();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint16_t joeyFrameCount(void) {
|
||||||
|
return halFrameCount();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint16_t joeyFrameHz(void) {
|
||||||
|
return halFrameHz();
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -15,34 +15,39 @@
|
||||||
#include "hal.h"
|
#include "hal.h"
|
||||||
#include "inputInternal.h"
|
#include "inputInternal.h"
|
||||||
|
|
||||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
|
||||||
JOEYLIB_SEGMENT("CORESYS")
|
|
||||||
|
|
||||||
bool gKeyState [KEY_COUNT];
|
// See inputInternal.h for why these are uint8_t and not bool.
|
||||||
bool gKeyPrev [KEY_COUNT];
|
uint8_t gKeyState [KEY_COUNT];
|
||||||
|
uint8_t gKeyPrev [KEY_COUNT];
|
||||||
|
|
||||||
int16_t gMouseX = 0;
|
int16_t gMouseX = 0;
|
||||||
int16_t gMouseY = 0;
|
int16_t gMouseY = 0;
|
||||||
bool gMouseButtonState[MOUSE_BUTTON_COUNT];
|
uint8_t gMouseButtonState[MOUSE_BUTTON_COUNT];
|
||||||
bool gMouseButtonPrev [MOUSE_BUTTON_COUNT];
|
uint8_t gMouseButtonPrev [MOUSE_BUTTON_COUNT];
|
||||||
|
|
||||||
bool gJoyConnected [JOYSTICK_COUNT];
|
uint8_t gJoyConnected [JOYSTICK_COUNT];
|
||||||
int8_t gJoyAxisX [JOYSTICK_COUNT];
|
int8_t gJoyAxisX [JOYSTICK_COUNT];
|
||||||
int8_t gJoyAxisY [JOYSTICK_COUNT];
|
int8_t gJoyAxisY [JOYSTICK_COUNT];
|
||||||
bool gJoyButtonState[JOYSTICK_COUNT][JOY_BUTTON_COUNT];
|
uint8_t gJoyButtonState[JOYSTICK_COUNT][JOY_BUTTON_COUNT];
|
||||||
bool gJoyButtonPrev [JOYSTICK_COUNT][JOY_BUTTON_COUNT];
|
uint8_t gJoyButtonPrev [JOYSTICK_COUNT][JOY_BUTTON_COUNT];
|
||||||
uint8_t gJoyDeadZone [JOYSTICK_COUNT];
|
uint8_t gJoyDeadZone [JOYSTICK_COUNT];
|
||||||
|
|
||||||
|
|
||||||
#ifdef JOEYLIB_PLATFORM_IIGS
|
#ifdef JOEYLIB_PLATFORM_IIGS
|
||||||
extern void iigsInputSnapshot(void);
|
extern void iigsInputSnapshot(void);
|
||||||
// Build-time check: iigsInputSnapshot's asm hard-codes KEY_COUNT=60
|
// Build-time checks: iigsInputSnapshot's asm hard-codes KEY_COUNT=60
|
||||||
// and the small button counts. If a future change adds/removes keys
|
// and the small button counts, and walks every array one byte per
|
||||||
// or buttons the asm must be updated; this declares a zero-size
|
// element. If a future change adds/removes keys or buttons the asm
|
||||||
// array if the math no longer matches, which is a compile error.
|
// must be updated; if anyone re-types the arrays back to bool the
|
||||||
|
// per-element size grows to ORCA-C's 2-byte _Bool and the asm reads
|
||||||
|
// the wrong bytes. Either condition declares a zero-size array
|
||||||
|
// below, which is a compile error.
|
||||||
typedef int joey_keycount_check [(KEY_COUNT == 60) ? 1 : -1];
|
typedef int joey_keycount_check [(KEY_COUNT == 60) ? 1 : -1];
|
||||||
typedef int joey_mousebtn_check [(MOUSE_BUTTON_COUNT == 4) ? 1 : -1];
|
typedef int joey_mousebtn_check [(MOUSE_BUTTON_COUNT == 4) ? 1 : -1];
|
||||||
typedef int joey_joybtn_check [(JOYSTICK_COUNT * JOY_BUTTON_COUNT == 4) ? 1 : -1];
|
typedef int joey_joybtn_check [(JOYSTICK_COUNT * JOY_BUTTON_COUNT == 4) ? 1 : -1];
|
||||||
|
typedef int joey_keystate_size_check [(sizeof(gKeyState) == KEY_COUNT) ? 1 : -1];
|
||||||
|
typedef int joey_mousebtn_size_check [(sizeof(gMouseButtonState) == MOUSE_BUTTON_COUNT) ? 1 : -1];
|
||||||
|
typedef int joey_joybtn_size_check [(sizeof(gJoyButtonState) == JOYSTICK_COUNT * JOY_BUTTON_COUNT) ? 1 : -1];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void joeyInputPoll(void) {
|
void joeyInputPoll(void) {
|
||||||
|
|
@ -79,8 +84,14 @@ void joeyWaitForAnyKey(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* All six key/mouse predicates fold the lower-bound check (`<= NONE`)
|
||||||
|
* and upper-bound check (`>= COUNT`) into a single unsigned compare.
|
||||||
|
* Index 0 (KEY_NONE / MOUSE_BUTTON_NONE) is a sentinel that no HAL
|
||||||
|
* ever writes, so reading gKeyState[0] / gMouseButtonState[0] is
|
||||||
|
* always 0 -- the predicate result is unchanged but ORCA-C drops the
|
||||||
|
* compound `||` into one branch each. */
|
||||||
bool joeyKeyDown(JoeyKeyE key) {
|
bool joeyKeyDown(JoeyKeyE key) {
|
||||||
if (key <= KEY_NONE || key >= KEY_COUNT) {
|
if ((uint16_t)key >= (uint16_t)KEY_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return gKeyState[key];
|
return gKeyState[key];
|
||||||
|
|
@ -88,7 +99,7 @@ bool joeyKeyDown(JoeyKeyE key) {
|
||||||
|
|
||||||
|
|
||||||
bool joeyKeyPressed(JoeyKeyE key) {
|
bool joeyKeyPressed(JoeyKeyE key) {
|
||||||
if (key <= KEY_NONE || key >= KEY_COUNT) {
|
if ((uint16_t)key >= (uint16_t)KEY_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return gKeyState[key] && !gKeyPrev[key];
|
return gKeyState[key] && !gKeyPrev[key];
|
||||||
|
|
@ -96,7 +107,7 @@ bool joeyKeyPressed(JoeyKeyE key) {
|
||||||
|
|
||||||
|
|
||||||
bool joeyKeyReleased(JoeyKeyE key) {
|
bool joeyKeyReleased(JoeyKeyE key) {
|
||||||
if (key <= KEY_NONE || key >= KEY_COUNT) {
|
if ((uint16_t)key >= (uint16_t)KEY_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return !gKeyState[key] && gKeyPrev[key];
|
return !gKeyState[key] && gKeyPrev[key];
|
||||||
|
|
@ -104,7 +115,7 @@ bool joeyKeyReleased(JoeyKeyE key) {
|
||||||
|
|
||||||
|
|
||||||
bool joeyMouseDown(JoeyMouseButtonE button) {
|
bool joeyMouseDown(JoeyMouseButtonE button) {
|
||||||
if (button <= MOUSE_BUTTON_NONE || button >= MOUSE_BUTTON_COUNT) {
|
if ((uint16_t)button >= (uint16_t)MOUSE_BUTTON_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return gMouseButtonState[button];
|
return gMouseButtonState[button];
|
||||||
|
|
@ -112,7 +123,7 @@ bool joeyMouseDown(JoeyMouseButtonE button) {
|
||||||
|
|
||||||
|
|
||||||
bool joeyMousePressed(JoeyMouseButtonE button) {
|
bool joeyMousePressed(JoeyMouseButtonE button) {
|
||||||
if (button <= MOUSE_BUTTON_NONE || button >= MOUSE_BUTTON_COUNT) {
|
if ((uint16_t)button >= (uint16_t)MOUSE_BUTTON_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return gMouseButtonState[button] && !gMouseButtonPrev[button];
|
return gMouseButtonState[button] && !gMouseButtonPrev[button];
|
||||||
|
|
@ -120,7 +131,7 @@ bool joeyMousePressed(JoeyMouseButtonE button) {
|
||||||
|
|
||||||
|
|
||||||
bool joeyMouseReleased(JoeyMouseButtonE button) {
|
bool joeyMouseReleased(JoeyMouseButtonE button) {
|
||||||
if (button <= MOUSE_BUTTON_NONE || button >= MOUSE_BUTTON_COUNT) {
|
if ((uint16_t)button >= (uint16_t)MOUSE_BUTTON_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return !gMouseButtonState[button] && gMouseButtonPrev[button];
|
return !gMouseButtonState[button] && gMouseButtonPrev[button];
|
||||||
|
|
@ -138,7 +149,7 @@ int16_t joeyMouseY(void) {
|
||||||
|
|
||||||
|
|
||||||
bool joeyJoystickConnected(JoeyJoystickE js) {
|
bool joeyJoystickConnected(JoeyJoystickE js) {
|
||||||
if ((int)js < 0 || (int)js >= JOYSTICK_COUNT) {
|
if ((uint16_t)js >= (uint16_t)JOYSTICK_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return gJoyConnected[js];
|
return gJoyConnected[js];
|
||||||
|
|
@ -146,7 +157,7 @@ bool joeyJoystickConnected(JoeyJoystickE js) {
|
||||||
|
|
||||||
|
|
||||||
int8_t joeyJoystickX(JoeyJoystickE js) {
|
int8_t joeyJoystickX(JoeyJoystickE js) {
|
||||||
if ((int)js < 0 || (int)js >= JOYSTICK_COUNT) {
|
if ((uint16_t)js >= (uint16_t)JOYSTICK_COUNT) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return gJoyAxisX[js];
|
return gJoyAxisX[js];
|
||||||
|
|
@ -154,48 +165,59 @@ int8_t joeyJoystickX(JoeyJoystickE js) {
|
||||||
|
|
||||||
|
|
||||||
int8_t joeyJoystickY(JoeyJoystickE js) {
|
int8_t joeyJoystickY(JoeyJoystickE js) {
|
||||||
if ((int)js < 0 || (int)js >= JOYSTICK_COUNT) {
|
if ((uint16_t)js >= (uint16_t)JOYSTICK_COUNT) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
return gJoyAxisY[js];
|
return gJoyAxisY[js];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/* Joystick button predicates: ORCA-C 2.2.1 lowers `gJoyButtonState[js][button]`
|
||||||
|
* to a ~MUL4 helper per access. Compute the 1D byte index once and read
|
||||||
|
* via an explicit (uint8_t *) cast -- no helpers. */
|
||||||
bool joeyJoyDown(JoeyJoystickE js, JoeyJoyButtonE button) {
|
bool joeyJoyDown(JoeyJoystickE js, JoeyJoyButtonE button) {
|
||||||
if ((int)js < 0 || (int)js >= JOYSTICK_COUNT) {
|
uint16_t idx;
|
||||||
|
if ((uint16_t)js >= (uint16_t)JOYSTICK_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if ((int)button < 0 || (int)button >= JOY_BUTTON_COUNT) {
|
if ((uint16_t)button >= (uint16_t)JOY_BUTTON_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return gJoyButtonState[js][button];
|
idx = (uint16_t)((uint16_t)js * JOY_BUTTON_COUNT + (uint16_t)button);
|
||||||
|
return ((const uint8_t *)gJoyButtonState)[idx] != 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool joeyJoyPressed(JoeyJoystickE js, JoeyJoyButtonE button) {
|
bool joeyJoyPressed(JoeyJoystickE js, JoeyJoyButtonE button) {
|
||||||
if ((int)js < 0 || (int)js >= JOYSTICK_COUNT) {
|
uint16_t idx;
|
||||||
|
if ((uint16_t)js >= (uint16_t)JOYSTICK_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if ((int)button < 0 || (int)button >= JOY_BUTTON_COUNT) {
|
if ((uint16_t)button >= (uint16_t)JOY_BUTTON_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return gJoyButtonState[js][button] && !gJoyButtonPrev[js][button];
|
idx = (uint16_t)((uint16_t)js * JOY_BUTTON_COUNT + (uint16_t)button);
|
||||||
|
return (((const uint8_t *)gJoyButtonState)[idx] != 0) &&
|
||||||
|
(((const uint8_t *)gJoyButtonPrev) [idx] == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool joeyJoyReleased(JoeyJoystickE js, JoeyJoyButtonE button) {
|
bool joeyJoyReleased(JoeyJoystickE js, JoeyJoyButtonE button) {
|
||||||
if ((int)js < 0 || (int)js >= JOYSTICK_COUNT) {
|
uint16_t idx;
|
||||||
|
if ((uint16_t)js >= (uint16_t)JOYSTICK_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if ((int)button < 0 || (int)button >= JOY_BUTTON_COUNT) {
|
if ((uint16_t)button >= (uint16_t)JOY_BUTTON_COUNT) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
return !gJoyButtonState[js][button] && gJoyButtonPrev[js][button];
|
idx = (uint16_t)((uint16_t)js * JOY_BUTTON_COUNT + (uint16_t)button);
|
||||||
|
return (((const uint8_t *)gJoyButtonState)[idx] == 0) &&
|
||||||
|
(((const uint8_t *)gJoyButtonPrev) [idx] != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void joeyJoystickReset(JoeyJoystickE js, uint8_t deadZone) {
|
void joeyJoystickReset(JoeyJoystickE js, uint8_t deadZone) {
|
||||||
if ((int)js < 0 || (int)js >= JOYSTICK_COUNT) {
|
if ((uint16_t)js >= (uint16_t)JOYSTICK_COUNT) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
gJoyDeadZone[js] = deadZone;
|
gJoyDeadZone[js] = deadZone;
|
||||||
|
|
|
||||||
|
|
@ -12,19 +12,26 @@
|
||||||
#include "joey/input.h"
|
#include "joey/input.h"
|
||||||
#include "joey/types.h"
|
#include "joey/types.h"
|
||||||
|
|
||||||
extern bool gKeyState[KEY_COUNT];
|
// Stored as uint8_t (not bool) because ORCA-C compiles _Bool as a
|
||||||
extern bool gKeyPrev [KEY_COUNT];
|
// 2-byte word (Symbol.pas: size := cgWordSize). The IIgs asm fast
|
||||||
|
// path (iigsInputSnapshot) walks these arrays one byte per element;
|
||||||
|
// a 2-byte bool would put element k at byte offset 2*k and the asm's
|
||||||
|
// per-byte clear would never reach the live half. uint8_t pins the
|
||||||
|
// storage to one byte per element on every port. Public predicates
|
||||||
|
// still return bool via implicit coercion.
|
||||||
|
extern uint8_t gKeyState[KEY_COUNT];
|
||||||
|
extern uint8_t gKeyPrev [KEY_COUNT];
|
||||||
|
|
||||||
extern int16_t gMouseX;
|
extern int16_t gMouseX;
|
||||||
extern int16_t gMouseY;
|
extern int16_t gMouseY;
|
||||||
extern bool gMouseButtonState[MOUSE_BUTTON_COUNT];
|
extern uint8_t gMouseButtonState[MOUSE_BUTTON_COUNT];
|
||||||
extern bool gMouseButtonPrev [MOUSE_BUTTON_COUNT];
|
extern uint8_t gMouseButtonPrev [MOUSE_BUTTON_COUNT];
|
||||||
|
|
||||||
extern bool gJoyConnected[JOYSTICK_COUNT];
|
extern uint8_t gJoyConnected[JOYSTICK_COUNT];
|
||||||
extern int8_t gJoyAxisX [JOYSTICK_COUNT];
|
extern int8_t gJoyAxisX [JOYSTICK_COUNT];
|
||||||
extern int8_t gJoyAxisY [JOYSTICK_COUNT];
|
extern int8_t gJoyAxisY [JOYSTICK_COUNT];
|
||||||
extern bool gJoyButtonState[JOYSTICK_COUNT][JOY_BUTTON_COUNT];
|
extern uint8_t gJoyButtonState[JOYSTICK_COUNT][JOY_BUTTON_COUNT];
|
||||||
extern bool gJoyButtonPrev [JOYSTICK_COUNT][JOY_BUTTON_COUNT];
|
extern uint8_t gJoyButtonPrev [JOYSTICK_COUNT][JOY_BUTTON_COUNT];
|
||||||
|
|
||||||
// Per-stick analog calibration. Set by joeyJoystickReset on platforms
|
// Per-stick analog calibration. Set by joeyJoystickReset on platforms
|
||||||
// with analog paddles (IIgs); ignored on digital-stick platforms.
|
// with analog paddles (IIgs); ignored on digital-stick platforms.
|
||||||
|
|
|
||||||
|
|
@ -10,24 +10,69 @@
|
||||||
#include "joey/palette.h"
|
#include "joey/palette.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
|
||||||
JOEYLIB_SEGMENT("CORESYS")
|
// Standard 16-color EGA palette in IIgs $0RGB format. Used as the
|
||||||
|
// per-surface default at allocation time (paletteInitDefault) so a
|
||||||
|
// program that draws without first calling paletteSet still gets
|
||||||
|
// recognizable colors instead of an all-black palette. EGA index 6
|
||||||
|
// is the canonical "brown" hack ($0A50, half-green) so CGA monitors
|
||||||
|
// rendered the third primary as brown rather than dark yellow.
|
||||||
|
static const uint16_t kDefaultPaletteEga[SURFACE_COLORS_PER_PALETTE] = {
|
||||||
|
0x0000, // 0: Black
|
||||||
|
0x000A, // 1: Blue
|
||||||
|
0x00A0, // 2: Green
|
||||||
|
0x00AA, // 3: Cyan
|
||||||
|
0x0A00, // 4: Red
|
||||||
|
0x0A0A, // 5: Magenta
|
||||||
|
0x0A50, // 6: Brown
|
||||||
|
0x0AAA, // 7: Light Gray
|
||||||
|
0x0555, // 8: Dark Gray
|
||||||
|
0x055F, // 9: Light Blue
|
||||||
|
0x05F5, // 10: Light Green
|
||||||
|
0x05FF, // 11: Light Cyan
|
||||||
|
0x0F55, // 12: Light Red
|
||||||
|
0x0F5F, // 13: Light Magenta
|
||||||
|
0x0FF5, // 14: Yellow
|
||||||
|
0x0FFF // 15: White
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// ----- Internal API -----
|
||||||
|
|
||||||
|
void paletteInitDefault(SurfaceT *s) {
|
||||||
|
uint8_t i;
|
||||||
|
|
||||||
|
if (s == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
for (i = 0; i < SURFACE_PALETTE_COUNT; i++) {
|
||||||
|
paletteSet(s, i, kDefaultPaletteEga);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// ----- Public API (alphabetical) -----
|
// ----- Public API (alphabetical) -----
|
||||||
|
|
||||||
void paletteGet(const SurfaceT *s, uint8_t paletteIndex, uint16_t *out16) {
|
void paletteGet(const SurfaceT *s, uint8_t paletteIndex, uint16_t *out16) {
|
||||||
|
const uint16_t *row;
|
||||||
|
|
||||||
if (s == NULL || out16 == NULL) {
|
if (s == NULL || out16 == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (paletteIndex >= SURFACE_PALETTE_COUNT) {
|
if (paletteIndex >= SURFACE_PALETTE_COUNT) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
memcpy(out16, s->palette[paletteIndex], SURFACE_COLORS_PER_PALETTE * sizeof(uint16_t));
|
/* Byte-pointer math + shift to skip the ~MUL4 helper -- see
|
||||||
|
* paletteSet for the reasoning. */
|
||||||
|
row = (const uint16_t *)((const uint8_t *)s->palette + ((uint16_t)paletteIndex << 5));
|
||||||
|
memcpy(out16, row, SURFACE_COLORS_PER_PALETTE * sizeof(uint16_t));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void paletteSet(SurfaceT *s, uint8_t paletteIndex, const uint16_t *colors16) {
|
void paletteSet(SurfaceT *s, uint8_t paletteIndex, const uint16_t *colors16) {
|
||||||
uint8_t i;
|
uint8_t i;
|
||||||
|
uint16_t *row;
|
||||||
|
const uint16_t *src;
|
||||||
|
|
||||||
if (s == NULL || colors16 == NULL) {
|
if (s == NULL || colors16 == NULL) {
|
||||||
return;
|
return;
|
||||||
|
|
@ -36,9 +81,18 @@ void paletteSet(SurfaceT *s, uint8_t paletteIndex, const uint16_t *colors16) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
s->palette[paletteIndex][0] = 0x0000;
|
/* Compute the row pointer via byte-pointer math + a single shift
|
||||||
|
* (16 entries * 2 bytes = 32 = 1 << 5) so ORCA-C doesn't emit a
|
||||||
|
* ~MUL4 helper for the 2D-array indexing. Then walk both arrays
|
||||||
|
* with post-increment pointers so the inner loop avoids ~MUL4
|
||||||
|
* for every `row[i]` / `colors16[i]` index multiply too. */
|
||||||
|
row = (uint16_t *)((uint8_t *)s->palette + ((uint16_t)paletteIndex << 5));
|
||||||
|
src = colors16;
|
||||||
|
|
||||||
|
*row++ = 0x0000;
|
||||||
|
src++;
|
||||||
for (i = 1; i < SURFACE_COLORS_PER_PALETTE; i++) {
|
for (i = 1; i < SURFACE_COLORS_PER_PALETTE; i++) {
|
||||||
s->palette[paletteIndex][i] = colors16[i] & 0x0FFF;
|
*row++ = (uint16_t)(*src++ & 0x0FFF);
|
||||||
}
|
}
|
||||||
if (s == stageGet()) {
|
if (s == stageGet()) {
|
||||||
gStagePaletteDirty = true;
|
gStagePaletteDirty = true;
|
||||||
|
|
|
||||||
|
|
@ -12,8 +12,6 @@
|
||||||
#include "hal.h"
|
#include "hal.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
|
||||||
JOEYLIB_SEGMENT("CORESYS")
|
|
||||||
|
|
||||||
// ----- Public API (alphabetical) -----
|
// ----- Public API (alphabetical) -----
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,8 +9,6 @@
|
||||||
#include "joey/palette.h"
|
#include "joey/palette.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
|
||||||
JOEYLIB_SEGMENT("CORESYS")
|
|
||||||
|
|
||||||
// ----- Public API (alphabetical) -----
|
// ----- Public API (alphabetical) -----
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,8 +13,6 @@
|
||||||
#include "spriteInternal.h"
|
#include "spriteInternal.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
|
||||||
JOEYLIB_SEGMENT("CORESYS")
|
|
||||||
|
|
||||||
// 8x8 tiles, 4bpp packed = 4 bytes/row * 8 rows = 32 bytes/tile.
|
// 8x8 tiles, 4bpp packed = 4 bytes/row * 8 rows = 32 bytes/tile.
|
||||||
#define TILE_BYTES 32
|
#define TILE_BYTES 32
|
||||||
|
|
@ -180,6 +178,7 @@ SpriteT *spriteCreate(const uint8_t *tileData, uint8_t widthTiles, uint8_t heigh
|
||||||
memset(sp->routineOffsets, 0, sizeof(sp->routineOffsets));
|
memset(sp->routineOffsets, 0, sizeof(sp->routineOffsets));
|
||||||
memset(sp->cachedDstBank, 0xFF, sizeof(sp->cachedDstBank));
|
memset(sp->cachedDstBank, 0xFF, sizeof(sp->cachedDstBank));
|
||||||
memset(sp->cachedSrcBank, 0xFF, sizeof(sp->cachedSrcBank));
|
memset(sp->cachedSrcBank, 0xFF, sizeof(sp->cachedSrcBank));
|
||||||
|
memset(sp->cachedSizeBytes, 0, sizeof(sp->cachedSizeBytes));
|
||||||
sp->flags = flags;
|
sp->flags = flags;
|
||||||
return sp;
|
return sp;
|
||||||
}
|
}
|
||||||
|
|
@ -249,6 +248,7 @@ SpriteT *spriteCreateFromSurface(const SurfaceT *src, int16_t x, int16_t y,
|
||||||
memset(sp->routineOffsets, 0, sizeof(sp->routineOffsets));
|
memset(sp->routineOffsets, 0, sizeof(sp->routineOffsets));
|
||||||
memset(sp->cachedDstBank, 0xFF, sizeof(sp->cachedDstBank));
|
memset(sp->cachedDstBank, 0xFF, sizeof(sp->cachedDstBank));
|
||||||
memset(sp->cachedSrcBank, 0xFF, sizeof(sp->cachedSrcBank));
|
memset(sp->cachedSrcBank, 0xFF, sizeof(sp->cachedSrcBank));
|
||||||
|
memset(sp->cachedSizeBytes, 0, sizeof(sp->cachedSizeBytes));
|
||||||
sp->flags = flags;
|
sp->flags = flags;
|
||||||
return sp;
|
return sp;
|
||||||
}
|
}
|
||||||
|
|
@ -296,6 +296,63 @@ void spritePrewarm(SpriteT *sp) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Combined save-then-draw fast path. Routes both ops through the
|
||||||
|
// compiled save/draw entry points after a single shared validation
|
||||||
|
// pass. Falls back to calling the public spriteSaveUnder + spriteDraw
|
||||||
|
// when the fast path isn't applicable -- semantically identical, just
|
||||||
|
// pays the dispatcher overhead twice.
|
||||||
|
void spriteSaveAndDraw(SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup) {
|
||||||
|
uint16_t widthPx;
|
||||||
|
uint16_t heightPx;
|
||||||
|
uint8_t wTiles;
|
||||||
|
uint8_t hTiles;
|
||||||
|
ArenaSlotT *slot;
|
||||||
|
uint8_t shift;
|
||||||
|
|
||||||
|
if (s == NULL || sp == NULL || backup == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
backup->sprite = sp;
|
||||||
|
backup->sizeBytes = 0;
|
||||||
|
|
||||||
|
wTiles = sp->widthTiles;
|
||||||
|
hTiles = sp->heightTiles;
|
||||||
|
slot = sp->slot;
|
||||||
|
|
||||||
|
widthPx = (uint16_t)(wTiles * TILE_PIXELS);
|
||||||
|
heightPx = (uint16_t)(hTiles * TILE_PIXELS);
|
||||||
|
|
||||||
|
// Fast path: compiled bytes available, fully on surface, backup
|
||||||
|
// buffer supplied. Save fills out backup->{x,y,width,height,
|
||||||
|
// sizeBytes}; draw reuses (x,y,widthPx,heightPx) for the dirty
|
||||||
|
// mark. One mark instead of two (save doesn't dirty -- it's a
|
||||||
|
// read; only draw dirties).
|
||||||
|
if (slot != NULL && backup->bytes != NULL && isFullyOnSurface(x, y, widthPx, heightPx)) {
|
||||||
|
/* Byte-pointer arithmetic dodges ~MUL4 for 2D-array indexing. */
|
||||||
|
uint16_t saveIdx;
|
||||||
|
uint16_t drawIdx;
|
||||||
|
uint8_t *offsetsBase;
|
||||||
|
shift = (uint8_t)(x & 1);
|
||||||
|
saveIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_SAVE);
|
||||||
|
drawIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_DRAW);
|
||||||
|
offsetsBase = (uint8_t *)sp->routineOffsets;
|
||||||
|
if (*(uint16_t *)(offsetsBase + (saveIdx << 1)) != SPRITE_NOT_COMPILED &&
|
||||||
|
*(uint16_t *)(offsetsBase + (drawIdx << 1)) != SPRITE_NOT_COMPILED) {
|
||||||
|
spriteCompiledSaveUnder(s, sp, x, y, backup);
|
||||||
|
spriteCompiledDraw (s, sp, x, y);
|
||||||
|
surfaceMarkDirtyRect (s, x, y, (int16_t)widthPx, (int16_t)heightPx);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fall back to the slow paths through the public API. These
|
||||||
|
// pay the full dispatcher chain twice but handle every edge
|
||||||
|
// case (interpreter, partial clip, no-backup-buffer modes).
|
||||||
|
spriteSaveUnder(s, sp, x, y, backup);
|
||||||
|
spriteDraw (s, sp, x, y);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// .spr file format:
|
// .spr file format:
|
||||||
// offset bytes field
|
// offset bytes field
|
||||||
// ------ ----- --------------------------------------------
|
// ------ ----- --------------------------------------------
|
||||||
|
|
@ -394,6 +451,7 @@ SpriteT *spriteFromCompiledMem(const uint8_t *data, uint32_t length, SpriteFlags
|
||||||
sp->flags = flags;
|
sp->flags = flags;
|
||||||
memset(sp->cachedDstBank, 0xFF, sizeof(sp->cachedDstBank));
|
memset(sp->cachedDstBank, 0xFF, sizeof(sp->cachedDstBank));
|
||||||
memset(sp->cachedSrcBank, 0xFF, sizeof(sp->cachedSrcBank));
|
memset(sp->cachedSrcBank, 0xFF, sizeof(sp->cachedSrcBank));
|
||||||
|
memset(sp->cachedSizeBytes, 0, sizeof(sp->cachedSizeBytes));
|
||||||
return sp;
|
return sp;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -528,65 +586,115 @@ uint32_t spriteCodegenBytesUsed(void) {
|
||||||
|
|
||||||
|
|
||||||
void spriteRestoreUnder(SurfaceT *s, const SpriteBackupT *backup) {
|
void spriteRestoreUnder(SurfaceT *s, const SpriteBackupT *backup) {
|
||||||
int16_t row;
|
/* Fast-path locals only. Slow-path uses an inner block. */
|
||||||
int16_t byteStart;
|
int16_t bx;
|
||||||
int16_t copyBytes;
|
int16_t by;
|
||||||
uint16_t spriteBytesPerRow;
|
uint16_t bw;
|
||||||
uint8_t shift;
|
uint16_t bh;
|
||||||
uint8_t *dstRow;
|
|
||||||
SpriteT *sp;
|
SpriteT *sp;
|
||||||
|
uint16_t spriteBytesPerRow;
|
||||||
|
int16_t copyBytes;
|
||||||
|
uint8_t shift;
|
||||||
|
|
||||||
if (s == NULL || backup == NULL || backup->bytes == NULL) {
|
if (s == NULL || backup == NULL) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (backup->width == 0 || backup->height == 0) {
|
bx = backup->x;
|
||||||
return;
|
by = backup->y;
|
||||||
}
|
bw = backup->width;
|
||||||
if (backup->x < 0 || backup->y < 0) {
|
bh = backup->height;
|
||||||
return;
|
|
||||||
}
|
/* Validate. Note: SURFACE_WIDTH - bx and SURFACE_HEIGHT - by stay
|
||||||
if (backup->x >= SURFACE_WIDTH || backup->y >= SURFACE_HEIGHT) {
|
* in uint16_t range once bx >= 0 / by >= 0 has been checked, so
|
||||||
return;
|
* the right-edge / bottom-edge tests don't need 32-bit arithmetic
|
||||||
}
|
* (which would invoke ORCA-C's ~GRTL helper, ~50 cyc per call). */
|
||||||
if (backup->x + backup->width > SURFACE_WIDTH) {
|
if (backup->bytes == NULL ||
|
||||||
return;
|
bw == 0 || bh == 0 ||
|
||||||
}
|
bx < 0 || by < 0 ||
|
||||||
if (backup->y + backup->height > SURFACE_HEIGHT) {
|
bx >= SURFACE_WIDTH || by >= SURFACE_HEIGHT ||
|
||||||
return;
|
bw > (uint16_t)(SURFACE_WIDTH - bx) ||
|
||||||
}
|
bh > (uint16_t)(SURFACE_HEIGHT - by) ||
|
||||||
// Saved region is byte-aligned; sub-byte boundaries can't be
|
(bx & 1) || (bw & 1)) {
|
||||||
// represented without losing the neighboring pixel under the byte.
|
|
||||||
if ((backup->x & 1) || (backup->width & 1)) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
sp = backup->sprite;
|
sp = backup->sprite;
|
||||||
if (sp != NULL && sp->slot != NULL && backup->height == sp->heightTiles * TILE_PIXELS) {
|
if (sp != NULL && sp->slot != NULL && bh == sp->heightTiles * TILE_PIXELS) {
|
||||||
|
uint16_t routeIdx;
|
||||||
|
uint16_t routeOffset;
|
||||||
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
|
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
|
||||||
copyBytes = (int16_t)(backup->width >> 1);
|
copyBytes = (int16_t)(bw >> 1);
|
||||||
shift = (copyBytes == (int16_t)spriteBytesPerRow) ? 0 : 1;
|
shift = (copyBytes == (int16_t)spriteBytesPerRow) ? 0 : 1;
|
||||||
if (sp->routineOffsets[shift][SPRITE_OP_RESTORE] != SPRITE_NOT_COMPILED) {
|
/* Byte-pointer arithmetic dodges ~MUL4 for 2D-array indexing. */
|
||||||
|
routeIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_RESTORE);
|
||||||
|
routeOffset = *(uint16_t *)((uint8_t *)sp->routineOffsets + (routeIdx << 1));
|
||||||
|
if (routeOffset != SPRITE_NOT_COMPILED) {
|
||||||
spriteCompiledRestoreUnder(s, backup);
|
spriteCompiledRestoreUnder(s, backup);
|
||||||
surfaceMarkDirtyRect(s, backup->x, backup->y,
|
surfaceMarkDirtyRect(s, bx, by, (int16_t)bw, (int16_t)bh);
|
||||||
(int16_t)backup->width, (int16_t)backup->height);
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
byteStart = (int16_t)(backup->x >> 1);
|
/* Slow / interpreted memcpy fallback. */
|
||||||
copyBytes = (int16_t)(backup->width >> 1);
|
{
|
||||||
for (row = 0; row < backup->height; row++) {
|
int16_t row;
|
||||||
dstRow = &s->pixels[(backup->y + row) * SURFACE_BYTES_PER_ROW];
|
int16_t byteStart;
|
||||||
|
uint8_t *dstRow;
|
||||||
|
|
||||||
|
byteStart = (int16_t)(bx >> 1);
|
||||||
|
copyBytes = (int16_t)(bw >> 1);
|
||||||
|
for (row = 0; row < (int16_t)bh; row++) {
|
||||||
|
dstRow = &s->pixels[(by + row) * SURFACE_BYTES_PER_ROW];
|
||||||
memcpy(&dstRow[byteStart],
|
memcpy(&dstRow[byteStart],
|
||||||
&backup->bytes[(uint16_t)row * (uint16_t)copyBytes],
|
&backup->bytes[(uint16_t)row * (uint16_t)copyBytes],
|
||||||
(size_t)copyBytes);
|
(size_t)copyBytes);
|
||||||
}
|
}
|
||||||
surfaceMarkDirtyRect(s, backup->x, backup->y,
|
}
|
||||||
(int16_t)backup->width, (int16_t)backup->height);
|
surfaceMarkDirtyRect(s, bx, by, (int16_t)bw, (int16_t)bh);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void spriteSaveUnder(const SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup) {
|
void spriteSaveUnder(const SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup) {
|
||||||
|
/* Only fast-path locals here. Slow-path declarations live inside
|
||||||
|
* the slow-path block below so ORCA-C with -b doesn't reserve
|
||||||
|
* stack frame for them on every fast-path call. */
|
||||||
|
uint16_t widthPx;
|
||||||
|
uint16_t heightPx;
|
||||||
|
ArenaSlotT *slot;
|
||||||
|
uint8_t shift;
|
||||||
|
|
||||||
|
if (s == NULL || sp == NULL || backup == NULL) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
backup->sprite = sp;
|
||||||
|
backup->sizeBytes = 0;
|
||||||
|
|
||||||
|
slot = sp->slot;
|
||||||
|
widthPx = (uint16_t)(sp->widthTiles * TILE_PIXELS);
|
||||||
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
||||||
|
|
||||||
|
// Compiled fast path: fully on surface and the platform emitted
|
||||||
|
// bytes for SAVE at this shift. The compiled routine assumes a
|
||||||
|
// full-size, unclipped rectangle, so anything off-edge falls
|
||||||
|
// through to the interpreted memcpy loop below.
|
||||||
|
//
|
||||||
|
// The routineOffsets[shift][SPRITE_OP_SAVE] access is rewritten as
|
||||||
|
// explicit byte-pointer arithmetic to dodge ORCA-C 2.2.1's ~MUL4
|
||||||
|
// helper that gets emitted for `uint16_t arr[N][M]` indexing.
|
||||||
|
if (backup->bytes != NULL && slot != NULL && isFullyOnSurface(x, y, widthPx, heightPx)) {
|
||||||
|
uint16_t routeIdx;
|
||||||
|
uint16_t routeOffset;
|
||||||
|
shift = (uint8_t)(x & 1);
|
||||||
|
routeIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_SAVE);
|
||||||
|
routeOffset = *(uint16_t *)((uint8_t *)sp->routineOffsets + (routeIdx << 1));
|
||||||
|
if (routeOffset != SPRITE_NOT_COMPILED) {
|
||||||
|
spriteCompiledSaveUnder(s, sp, x, y, backup);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Slow / fallback path: clipping + interpreted memcpy. */
|
||||||
|
{
|
||||||
int16_t dx;
|
int16_t dx;
|
||||||
int16_t dy;
|
int16_t dy;
|
||||||
int16_t sx;
|
int16_t sx;
|
||||||
|
|
@ -598,31 +706,12 @@ void spriteSaveUnder(const SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, Sprit
|
||||||
int16_t copyBytes;
|
int16_t copyBytes;
|
||||||
int16_t clippedX;
|
int16_t clippedX;
|
||||||
int16_t clippedW;
|
int16_t clippedW;
|
||||||
uint8_t shift;
|
|
||||||
const uint8_t *srcRow;
|
const uint8_t *srcRow;
|
||||||
|
|
||||||
if (s == NULL || sp == NULL || backup == NULL) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
backup->sprite = sp;
|
|
||||||
backup->sizeBytes = 0;
|
|
||||||
|
|
||||||
dx = x;
|
dx = x;
|
||||||
dy = y;
|
dy = y;
|
||||||
w = (int16_t)(sp->widthTiles * TILE_PIXELS);
|
w = (int16_t)widthPx;
|
||||||
h = (int16_t)(sp->heightTiles * TILE_PIXELS);
|
h = (int16_t)heightPx;
|
||||||
|
|
||||||
// Compiled fast path: fully on surface and the platform emitted
|
|
||||||
// bytes for SAVE at this shift. The compiled routine assumes a
|
|
||||||
// full-size, unclipped rectangle, so anything off-edge falls
|
|
||||||
// through to the interpreted memcpy loop below.
|
|
||||||
if (backup->bytes != NULL && sp->slot != NULL && isFullyOnSurface(x, y, (uint16_t)w, (uint16_t)h)) {
|
|
||||||
shift = (uint8_t)(x & 1);
|
|
||||||
if (sp->routineOffsets[shift][SPRITE_OP_SAVE] != SPRITE_NOT_COMPILED) {
|
|
||||||
spriteCompiledSaveUnder(s, sp, x, y, backup);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!clipRect(&dx, &dy, &sx, &sy, &w, &h)) {
|
if (!clipRect(&dx, &dy, &sx, &sy, &w, &h)) {
|
||||||
backup->x = 0;
|
backup->x = 0;
|
||||||
|
|
@ -661,4 +750,5 @@ void spriteSaveUnder(const SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, Sprit
|
||||||
&srcRow[byteStart],
|
&srcRow[byteStart],
|
||||||
(size_t)copyBytes);
|
(size_t)copyBytes);
|
||||||
}
|
}
|
||||||
|
} /* end slow path */
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -45,6 +45,12 @@ struct SpriteT {
|
||||||
// 12 bytes per sprite. Unused on non-IIgs.
|
// 12 bytes per sprite. Unused on non-IIgs.
|
||||||
uint8_t cachedDstBank[JOEY_SPRITE_SHIFT_COUNT][SPRITE_OP_COUNT];
|
uint8_t cachedDstBank[JOEY_SPRITE_SHIFT_COUNT][SPRITE_OP_COUNT];
|
||||||
uint8_t cachedSrcBank[JOEY_SPRITE_SHIFT_COUNT][SPRITE_OP_COUNT];
|
uint8_t cachedSrcBank[JOEY_SPRITE_SHIFT_COUNT][SPRITE_OP_COUNT];
|
||||||
|
|
||||||
|
// Cached `copyBytes * heightPx` per shift for spriteCompiledSaveUnder's
|
||||||
|
// `backup->sizeBytes` field. uint16_t * uint16_t goes through ORCA-C
|
||||||
|
// 2.2.1's ~CUMUL2 helper (~30-50 cyc); cache hit dodges it. Filled
|
||||||
|
// lazily on first call (0 sentinel = uncached).
|
||||||
|
uint16_t cachedSizeBytes[JOEY_SPRITE_SHIFT_COUNT];
|
||||||
};
|
};
|
||||||
|
|
||||||
// Compiled entry points. Implemented alongside spriteCompile in
|
// Compiled entry points. Implemented alongside spriteCompile in
|
||||||
|
|
|
||||||
|
|
@ -10,13 +10,6 @@
|
||||||
#include "hal.h"
|
#include "hal.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
// Hoist into a CORESYS load segment alongside the other small core
|
|
||||||
// files. Keeps _ROOT thin and stable so it stops reacting to per-file
|
|
||||||
// source changes -- _ROOT size flux was tripping ORCA-Linker bank
|
|
||||||
// packing in spriteEmitIigs.c (see feedback_orca_link_segment_count
|
|
||||||
// cases 2-4).
|
|
||||||
JOEYLIB_SEGMENT("CORESYS")
|
|
||||||
|
|
||||||
#ifdef JOEYLIB_PLATFORM_IIGS
|
#ifdef JOEYLIB_PLATFORM_IIGS
|
||||||
extern void iigsMarkDirtyRowsInner(uint16_t yStart, uint16_t yEnd, uint16_t minWord, uint16_t maxWord);
|
extern void iigsMarkDirtyRowsInner(uint16_t yStart, uint16_t yEnd, uint16_t minWord, uint16_t maxWord);
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -91,6 +84,7 @@ SurfaceT *surfaceCreate(void) {
|
||||||
free(s);
|
free(s);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
paletteInitDefault(s);
|
||||||
return s;
|
return s;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -207,8 +201,11 @@ void surfaceMarkDirtyRect(const SurfaceT *s, int16_t x, int16_t y, int16_t w, in
|
||||||
if (w <= 0 || h <= 0) {
|
if (w <= 0 || h <= 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
minWord = (uint8_t)(x >> 2);
|
/* Clipped x/w are non-negative; cast to uint16_t before `>> 2` so
|
||||||
maxWord = (uint8_t)((x + w - 1) >> 2);
|
* ORCA-C lowers to a pair of LSRs instead of the ~SSHIFTRIGHT
|
||||||
|
* helper signed shifts emit. */
|
||||||
|
minWord = (uint8_t)((uint16_t)x >> 2);
|
||||||
|
maxWord = (uint8_t)((uint16_t)(x + w - 1) >> 2);
|
||||||
yEnd = y + h;
|
yEnd = y + h;
|
||||||
#ifdef JOEYLIB_PLATFORM_IIGS
|
#ifdef JOEYLIB_PLATFORM_IIGS
|
||||||
iigsMarkDirtyRowsInner((uint16_t)y, (uint16_t)yEnd,
|
iigsMarkDirtyRowsInner((uint16_t)y, (uint16_t)yEnd,
|
||||||
|
|
@ -239,6 +236,7 @@ bool stageAlloc(void) {
|
||||||
}
|
}
|
||||||
memset(gStage->pixels, 0, SURFACE_PIXELS_SIZE);
|
memset(gStage->pixels, 0, SURFACE_PIXELS_SIZE);
|
||||||
stageDirtyClearAll();
|
stageDirtyClearAll();
|
||||||
|
paletteInitDefault(gStage);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -62,13 +62,19 @@ void stageDirtyClearAll(void);
|
||||||
|
|
||||||
// y -> byte offset of row y in a SURFACE_BYTES_PER_ROW-strided buffer.
|
// y -> byte offset of row y in a SURFACE_BYTES_PER_ROW-strided buffer.
|
||||||
// On IIgs this expands to a single indexed long-mode read against
|
// On IIgs this expands to a single indexed long-mode read against
|
||||||
// gRowOffsetLut (built once at halInit). On other ports it's the
|
// gRowOffsetLut (built once at halInit).
|
||||||
// straight multiply -- those compilers (gcc, OpenWatcom) optimize the
|
//
|
||||||
// constant 160 to a shift+add chain that's already cheap. The point
|
// The explicit (y << 1) byte-pointer arithmetic dodges ORCA-C 2.2.1's
|
||||||
// is to dodge ORCA-C's __mul16 JSL on every per-row pointer compute.
|
// `~MUL4` helper that gets emitted for `uint16_t arr[N]` indexing
|
||||||
|
// (the implicit *sizeof(uint16_t)). With the byte-cast + shift, the
|
||||||
|
// compiler emits a single ASL + indexed long-mode read.
|
||||||
|
//
|
||||||
|
// Other ports get the straight multiply -- gcc / OpenWatcom optimize
|
||||||
|
// the constant 160 to a shift+add chain.
|
||||||
#ifdef JOEYLIB_PLATFORM_IIGS
|
#ifdef JOEYLIB_PLATFORM_IIGS
|
||||||
extern const uint16_t gRowOffsetLut[200];
|
extern const uint16_t gRowOffsetLut[200];
|
||||||
#define SURFACE_ROW_OFFSET(_y) ((uint16_t)gRowOffsetLut[(uint16_t)(_y)])
|
#define SURFACE_ROW_OFFSET(_y) \
|
||||||
|
(*((const uint16_t *)((const uint8_t *)gRowOffsetLut + ((uint16_t)(_y) << 1))))
|
||||||
#else
|
#else
|
||||||
#define SURFACE_ROW_OFFSET(_y) ((uint16_t)((uint16_t)(_y) * SURFACE_BYTES_PER_ROW))
|
#define SURFACE_ROW_OFFSET(_y) ((uint16_t)((uint16_t)(_y) * SURFACE_BYTES_PER_ROW))
|
||||||
#endif
|
#endif
|
||||||
|
|
@ -80,4 +86,10 @@ extern const uint16_t gRowOffsetLut[200];
|
||||||
bool stageAlloc(void);
|
bool stageAlloc(void);
|
||||||
void stageFree(void);
|
void stageFree(void);
|
||||||
|
|
||||||
|
// Fill all 16 of `s`'s palettes with the standard 16-color EGA
|
||||||
|
// palette. Called by stageAlloc and surfaceCreate so a program that
|
||||||
|
// draws without first calling paletteSet still gets recognizable
|
||||||
|
// colors instead of an all-black palette.
|
||||||
|
void paletteInitDefault(SurfaceT *s);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -20,10 +20,6 @@
|
||||||
// without that the ORCA Linker hits "Expression too complex" on
|
// without that the ORCA Linker hits "Expression too complex" on
|
||||||
// the small-binary builds.)
|
// the small-binary builds.)
|
||||||
|
|
||||||
// Hoist tile primitives into the DRAWPRIMS load segment. Asm
|
|
||||||
// dispatches go through halFast* hooks in src/port/iigs/hal.c so
|
|
||||||
// only one TU references the asm symbols (avoids the cumulative
|
|
||||||
// "Expression too complex" link failure).
|
|
||||||
JOEYLIB_SEGMENT("DRAWPRIMS")
|
JOEYLIB_SEGMENT("DRAWPRIMS")
|
||||||
|
|
||||||
// ----- Prototypes -----
|
// ----- Prototypes -----
|
||||||
|
|
|
||||||
|
|
@ -509,6 +509,37 @@ void halWaitVBL(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// VPOSR ($DFF004) upper byte: low 3 bits = vertical scanline bits
|
||||||
|
// 8..10. The bit-8 transition from 1 -> 0 marks "vertical wrap" --
|
||||||
|
// a fresh frame. Edge-detected per call so caller (UBER, etc.)
|
||||||
|
// just polls; no IRQ server needed.
|
||||||
|
#define AMIGA_VPOSR ((volatile uint16_t *)0xDFF004UL)
|
||||||
|
|
||||||
|
static uint16_t gFrameCount = 0;
|
||||||
|
static uint8_t gPrevVbHi = 0;
|
||||||
|
|
||||||
|
uint16_t halFrameCount(void) {
|
||||||
|
uint8_t now;
|
||||||
|
|
||||||
|
/* Bit 0 of the upper byte = scanline bit 8. PAL frame is ~313
|
||||||
|
* lines, NTSC ~263 -- both wrap bit 8 once per frame, which is
|
||||||
|
* what we want as the "frame edge" signal. */
|
||||||
|
now = (uint8_t)((*AMIGA_VPOSR >> 8) & 1u);
|
||||||
|
if (gPrevVbHi && !now) {
|
||||||
|
gFrameCount++;
|
||||||
|
}
|
||||||
|
gPrevVbHi = now;
|
||||||
|
return gFrameCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint16_t halFrameHz(void) {
|
||||||
|
/* PAL by default. The toolchain doesn't currently switch modes
|
||||||
|
* at runtime; if we ever expose NTSC this returns 60. */
|
||||||
|
return 50u;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void halShutdown(void) {
|
void halShutdown(void) {
|
||||||
if (gScreen != NULL) {
|
if (gScreen != NULL) {
|
||||||
// CloseScreen should free attached UCopList, but be explicit
|
// CloseScreen should free attached UCopList, but be explicit
|
||||||
|
|
|
||||||
|
|
@ -562,6 +562,21 @@ void halWaitVBL(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// gFrameCount is already maintained by our VBL ISR; just narrow to
|
||||||
|
// uint16_t for the cross-port HAL contract.
|
||||||
|
uint16_t halFrameCount(void) {
|
||||||
|
return (uint16_t)gFrameCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint16_t halFrameHz(void) {
|
||||||
|
/* PAL ST is 50 Hz; NTSC ST and SM124 mono are ~60 / ~70. We
|
||||||
|
* report 50 as the baseline -- close enough for ops/sec scaling,
|
||||||
|
* and the actual frame rate is still observable via iter counts. */
|
||||||
|
return 50u;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void halShutdown(void) {
|
void halShutdown(void) {
|
||||||
if (!gModeSet) {
|
if (!gModeSet) {
|
||||||
return;
|
return;
|
||||||
|
|
|
||||||
|
|
@ -150,7 +150,9 @@ static volatile uint8_t gPacketRemaining = 0;
|
||||||
static volatile uint8_t gPacketKind = PKT_KIND_NONE;
|
static volatile uint8_t gPacketKind = PKT_KIND_NONE;
|
||||||
static volatile uint8_t gMousePacketByte = 0; // bytes consumed in current packet
|
static volatile uint8_t gMousePacketByte = 0; // bytes consumed in current packet
|
||||||
static bool gHooked = false;
|
static bool gHooked = false;
|
||||||
static volatile bool gIsrState[KEY_COUNT];
|
// uint8_t (not bool) so element size matches gKeyState's. See
|
||||||
|
// src/core/inputInternal.h for the full rationale.
|
||||||
|
static volatile uint8_t gIsrState[KEY_COUNT];
|
||||||
|
|
||||||
// Mouse delta accumulator. Each ACIA mouse packet adds dx/dy here; the
|
// Mouse delta accumulator. Each ACIA mouse packet adds dx/dy here; the
|
||||||
// poll routine clamps the running absolute position into the surface
|
// poll routine clamps the running absolute position into the surface
|
||||||
|
|
|
||||||
|
|
@ -275,6 +275,32 @@ void halWaitVBL(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Frame counter via $3DA bit 3 polling; rising edge marks the start
|
||||||
|
// of vertical retrace. Caller polls fast enough that no edge is
|
||||||
|
// missed (UBER's hot loop is far below 70 Hz period even on a 386).
|
||||||
|
static uint16_t gFrameCount = 0;
|
||||||
|
static uint8_t gPrevInVret = 0;
|
||||||
|
|
||||||
|
uint16_t halFrameCount(void) {
|
||||||
|
uint8_t now;
|
||||||
|
|
||||||
|
now = (uint8_t)(inportb(VGA_INPUT_STAT_1) & VGA_VRETRACE_BIT);
|
||||||
|
if (now && !gPrevInVret) {
|
||||||
|
gFrameCount++;
|
||||||
|
}
|
||||||
|
gPrevInVret = now;
|
||||||
|
return gFrameCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint16_t halFrameHz(void) {
|
||||||
|
/* VGA mode 13h vertical refresh on a real CRT runs at ~70 Hz
|
||||||
|
* (70.086 to be exact). Reporting 70 keeps ops/sec scaling
|
||||||
|
* accurate within ~0.1%. */
|
||||||
|
return 70u;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void halShutdown(void) {
|
void halShutdown(void) {
|
||||||
__dpmi_regs regs;
|
__dpmi_regs regs;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -152,7 +152,9 @@ static const uint8_t gScanToKey[SCAN_TABLE_SIZE] = {
|
||||||
static _go32_dpmi_seginfo gOldHandler;
|
static _go32_dpmi_seginfo gOldHandler;
|
||||||
static _go32_dpmi_seginfo gNewHandler;
|
static _go32_dpmi_seginfo gNewHandler;
|
||||||
static bool gHooked = false;
|
static bool gHooked = false;
|
||||||
static volatile bool gIsrState[KEY_COUNT];
|
// uint8_t (not bool) so element size matches gKeyState's. See
|
||||||
|
// src/core/inputInternal.h for the full rationale.
|
||||||
|
static volatile uint8_t gIsrState[KEY_COUNT];
|
||||||
|
|
||||||
static bool gMousePresent = false;
|
static bool gMousePresent = false;
|
||||||
static bool gJoystickPresent = false;
|
static bool gJoystickPresent = false;
|
||||||
|
|
|
||||||
|
|
@ -29,9 +29,7 @@
|
||||||
// _ROOT in every binary that includes this TU. (See ORCA/C ch. 30
|
// _ROOT in every binary that includes this TU. (See ORCA/C ch. 30
|
||||||
// "segment statement". Reusing the same segment as draw.c / tile.c
|
// "segment statement". Reusing the same segment as draw.c / tile.c
|
||||||
// rather than picking a unique name keeps the linker's symbol-
|
// rather than picking a unique name keeps the linker's symbol-
|
||||||
// resolution expressions flat -- per-name extras nest the
|
// resolution expressions flat.)
|
||||||
// expression and trip the "too complex" threshold on small
|
|
||||||
// binaries.)
|
|
||||||
//
|
//
|
||||||
// The 34 KB NTP replayer bytes are NOT in this segment -- ORCA/C's
|
// The 34 KB NTP replayer bytes are NOT in this segment -- ORCA/C's
|
||||||
// `segment` statement only relocates functions, not data. They live
|
// `segment` statement only relocates functions, not data. They live
|
||||||
|
|
@ -99,6 +97,17 @@ static uint32_t gSfxBase = 0;
|
||||||
static bool gNTPReady = false;
|
static bool gNTPReady = false;
|
||||||
static bool gNTPPlaying = false;
|
static bool gNTPPlaying = false;
|
||||||
|
|
||||||
|
// Per-slot config cache. halAudioPlaySfx's biggest cost is the
|
||||||
|
// per-byte XOR-with-$80 loop over the entire sample (signed -> DOC's
|
||||||
|
// unsigned format), which on a 4 KB sample is ~120 k cyc / ~43 ms in
|
||||||
|
// ORCA-C. Most callers play the same SFX repeatedly into the same
|
||||||
|
// slot. Cache (sample ptr, length, rate) per slot; on cache hit
|
||||||
|
// (same sample re-triggered) skip the byte copy AND the struct
|
||||||
|
// rebuild, just re-fire NTPstreamsound.
|
||||||
|
static const uint8_t *gSfxSlotSample[JOEY_AUDIO_SFX_SLOTS] = { 0 };
|
||||||
|
static uint32_t gSfxSlotLength[JOEY_AUDIO_SFX_SLOTS] = { 0 };
|
||||||
|
static uint16_t gSfxSlotRateHz[JOEY_AUDIO_SFX_SLOTS] = { 0 };
|
||||||
|
|
||||||
// SFX handle layout: stream structure first, sample bytes after.
|
// SFX handle layout: stream structure first, sample bytes after.
|
||||||
// Both end up at known 24-bit addresses, side-stepping the small
|
// Both end up at known 24-bit addresses, side-stepping the small
|
||||||
// memory model's 16-bit pointer issue.
|
// memory model's 16-bit pointer issue.
|
||||||
|
|
@ -244,6 +253,21 @@ void halAudioShutdown(void) {
|
||||||
if (gNTPPlaying) {
|
if (gNTPPlaying) {
|
||||||
halAudioStopMod();
|
halAudioStopMod();
|
||||||
}
|
}
|
||||||
|
// Silence every SFX slot before disposing the handles. NTP's DOC
|
||||||
|
// IRQ vector points into the buffer we are about to free; if any
|
||||||
|
// oscillator finishes its sample after the dispose, the wave-done
|
||||||
|
// interrupt fires into freed memory and the IIgs reports
|
||||||
|
// "Unclaimed Sound Interrupt" plus a stuck high-pitched whine
|
||||||
|
// (whatever sample byte was last loaded into the DOC).
|
||||||
|
{
|
||||||
|
uint8_t i;
|
||||||
|
for (i = 0; i < JOEY_AUDIO_SFX_SLOTS; i++) {
|
||||||
|
halAudioStopSfx(i);
|
||||||
|
gSfxSlotSample[i] = (const uint8_t *)0;
|
||||||
|
gSfxSlotLength[i] = 0;
|
||||||
|
gSfxSlotRateHz[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (gSfxHandle != NULL) {
|
if (gSfxHandle != NULL) {
|
||||||
DisposeHandle(gSfxHandle);
|
DisposeHandle(gSfxHandle);
|
||||||
gSfxHandle = NULL;
|
gSfxHandle = NULL;
|
||||||
|
|
@ -325,9 +349,19 @@ void halAudioPlaySfx(uint8_t slot, const uint8_t *sample, uint32_t length, uint1
|
||||||
structAddr = slotBase;
|
structAddr = slotBase;
|
||||||
sampleAddr = slotBase + SFX_SAMPLE_OFFSET;
|
sampleAddr = slotBase + SFX_SAMPLE_OFFSET;
|
||||||
|
|
||||||
// Copy the sample into this slot's fixed-bank region, converting
|
// Cache check: same sample, length, and rate as the prior trigger
|
||||||
// signed 8-bit (public API contract) to unsigned 8-bit (DOC RAM
|
// for this slot? Then the slot's DOC sample bytes and stream
|
||||||
// format) by flipping the sign bit.
|
// struct are already correct -- skip the 4 KB byte-XOR loop and
|
||||||
|
// the 15-byte struct rebuild, both of which together can run
|
||||||
|
// ~50 ms per call in ORCA-C.
|
||||||
|
if (sample == gSfxSlotSample[slot] &&
|
||||||
|
length == gSfxSlotLength[slot] &&
|
||||||
|
rateHz == gSfxSlotRateHz[slot]) {
|
||||||
|
// Cache hit -- jump straight to the NTPstreamsound trigger.
|
||||||
|
} else {
|
||||||
|
// Cache miss: copy the sample into this slot's fixed-bank
|
||||||
|
// region, converting signed 8-bit (public API contract) to
|
||||||
|
// unsigned 8-bit (DOC RAM format) by flipping the sign bit.
|
||||||
{
|
{
|
||||||
unsigned char *dst;
|
unsigned char *dst;
|
||||||
uint32_t i;
|
uint32_t i;
|
||||||
|
|
@ -356,6 +390,11 @@ void halAudioPlaySfx(uint8_t slot, const uint8_t *sample, uint32_t length, uint1
|
||||||
sfx[13] = SFX_VOLUME;
|
sfx[13] = SFX_VOLUME;
|
||||||
sfx[14] = SFX_CHANNEL_LEFT;
|
sfx[14] = SFX_CHANNEL_LEFT;
|
||||||
|
|
||||||
|
gSfxSlotSample[slot] = sample;
|
||||||
|
gSfxSlotLength[slot] = length;
|
||||||
|
gSfxSlotRateHz[slot] = rateHz;
|
||||||
|
}
|
||||||
|
|
||||||
// NTPstreamsound(structPtr in X/Y). Same 24-bit address packing
|
// NTPstreamsound(structPtr in X/Y). Same 24-bit address packing
|
||||||
// pattern as NTPprepare: low 16 in X, bank in Y.
|
// pattern as NTPprepare: low 16 in X, bank in Y.
|
||||||
buildCallStub(gNTPBase + 24,
|
buildCallStub(gNTPBase + 24,
|
||||||
|
|
|
||||||
|
|
@ -95,6 +95,11 @@ extern void iigsInitRowLut(void);
|
||||||
// subsequent rows are at srcOffset + 160, etc. ~9 cyc/byte vs
|
// subsequent rows are at srcOffset + 160, etc. ~9 cyc/byte vs
|
||||||
// ORCA-C memcpy's ~30 cyc/byte.
|
// ORCA-C memcpy's ~30 cyc/byte.
|
||||||
extern void iigsBlitRectStageToShr(uint16_t srcOffset, uint16_t copyBytes, uint16_t rowsLeft);
|
extern void iigsBlitRectStageToShr(uint16_t srcOffset, uint16_t copyBytes, uint16_t rowsLeft);
|
||||||
|
// PEI-slam variant of the per-row rect blit. ~3 cyc/byte vs MVN's
|
||||||
|
// ~9 cyc/byte. Constraints: copyBytes must be even and 2..80
|
||||||
|
// (caller / dispatcher checks). For sprite-rect presents (typical
|
||||||
|
// 8 bytes wide x 16 rows) saves ~600 cyc/frame vs the MVN form.
|
||||||
|
extern void iigsBlitRectStageToShrPEI(uint16_t srcOffset, uint16_t copyBytes, uint16_t rowsLeft);
|
||||||
// Filled circle, scanline-style. fillWord low byte is the doubled
|
// Filled circle, scanline-style. fillWord low byte is the doubled
|
||||||
// nibble (e.g., 0x33 for nibble 3).
|
// nibble (e.g., 0x33 for nibble 3).
|
||||||
extern void iigsFillCircleInner(uint8_t *pixels, uint16_t cx, uint16_t cy, uint16_t r, uint16_t fillWord);
|
extern void iigsFillCircleInner(uint8_t *pixels, uint16_t cx, uint16_t cy, uint16_t r, uint16_t fillWord);
|
||||||
|
|
@ -240,22 +245,27 @@ void halPresentRect(const SurfaceT *src, int16_t x, int16_t y, uint16_t w, uint1
|
||||||
|
|
||||||
uploadScbAndPaletteIfNeeded(src);
|
uploadScbAndPaletteIfNeeded(src);
|
||||||
|
|
||||||
// Pixel copy: byte-aligned runs per scanline. x is always even
|
// Pixel copy: byte-aligned runs per scanline. x is always >= 0
|
||||||
// after API-level clipping for 4bpp packed if caller aligned it;
|
// after API-level clipping. Use unsigned shifts to avoid
|
||||||
// otherwise we include the byte containing the leftmost pixel.
|
// ~SSHIFTRIGHT helper for `x >> 1` on signed int16_t.
|
||||||
byteStart = x >> 1;
|
byteStart = (int16_t)((uint16_t)x >> 1);
|
||||||
copyBytes = (uint16_t)(((x + (int16_t)w + 1) >> 1) - byteStart);
|
copyBytes = (uint16_t)((((uint16_t)x + w + 1u) >> 1) - (uint16_t)byteStart);
|
||||||
|
|
||||||
if (copyBytes == 0 || h == 0) {
|
if (copyBytes == 0 || h == 0) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Asm per-row MVN blit. Stage pixels live at $01:2000; SHR display
|
// Pixel copy: prefer the PEI-slam variant when the rect satisfies
|
||||||
// at $E1:2000 (same offset within their banks). srcOffset is the
|
// its contract (copyBytes even, 2..80). Sprite-rect presents
|
||||||
// byte offset of the first byte to copy on the first row.
|
// (typical 8 bytes wide) hit this ~3x faster than MVN. Wider or
|
||||||
|
// odd-byte rects fall back to MVN, which has no width cap.
|
||||||
srcOffset = (uint16_t)(0x2000 + SURFACE_ROW_OFFSET(y) + byteStart);
|
srcOffset = (uint16_t)(0x2000 + SURFACE_ROW_OFFSET(y) + byteStart);
|
||||||
|
if ((copyBytes & 1) == 0 && copyBytes >= 2 && copyBytes <= 80) {
|
||||||
|
iigsBlitRectStageToShrPEI(srcOffset, copyBytes, h);
|
||||||
|
} else {
|
||||||
iigsBlitRectStageToShr(srcOffset, copyBytes, h);
|
iigsBlitRectStageToShr(srcOffset, copyBytes, h);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void halShutdown(void) {
|
void halShutdown(void) {
|
||||||
|
|
@ -307,3 +317,27 @@ void halWaitVBL(void) {
|
||||||
/* scanning: wait for next VBL */;
|
/* scanning: wait for next VBL */;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Frame counter via $C019 polling. Edge-detected on each call: the
|
||||||
|
// caller (UBER, animation loops) polls fast enough that we never
|
||||||
|
// miss a VBL transition. No IRQ involvement; safe in the S16 takeover
|
||||||
|
// context where ToolBox interrupt setup would be intrusive.
|
||||||
|
static uint16_t gFrameCount = 0;
|
||||||
|
static uint8_t gPrevInVbl = 0;
|
||||||
|
|
||||||
|
uint16_t halFrameCount(void) {
|
||||||
|
uint8_t now;
|
||||||
|
|
||||||
|
now = (*IIGS_VBL_STATUS & VBL_BAR_BIT) == 0;
|
||||||
|
if (now && !gPrevInVbl) {
|
||||||
|
gFrameCount++;
|
||||||
|
}
|
||||||
|
gPrevInVbl = now;
|
||||||
|
return gFrameCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint16_t halFrameHz(void) {
|
||||||
|
return 60u;
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -14,10 +14,15 @@
|
||||||
// it is enough for feature parity with the other platforms on typical
|
// it is enough for feature parity with the other platforms on typical
|
||||||
// "press a key, act on it" flows.
|
// "press a key, act on it" flows.
|
||||||
//
|
//
|
||||||
// Held-key state is synthesized via a TTL counter: a fresh strobe on
|
// Release detection uses the IIe-inherited "any key currently down"
|
||||||
// $C000 refreshes the TTL; each halInputPoll decays it; when TTL hits
|
// live flag at $C010 bit 7 (set by the keyboard scanner independently
|
||||||
// zero we assume the key was released. KEY_TTL is sized to cover the
|
// of the strobe). Each halInputPoll drains pending strobe events to
|
||||||
// typematic initial delay so that a held key does not flicker.
|
// pick up presses, then samples $C010: bit 7 == 0 means no
|
||||||
|
// non-modifier key is physically held, and we wholesale-clear
|
||||||
|
// gKeyState. readModifierKeys then re-asserts the modifiers from
|
||||||
|
// $C025's live state, so shift/ctrl/option stay accurate. Avoids
|
||||||
|
// the inferred-release lag the old TTL-decay scheme had, and works
|
||||||
|
// on every IIgs (real or stealth) without ToolBox / ADB Tool init.
|
||||||
//
|
//
|
||||||
// Mouse: $C024 (delta data) and $C027 (status). Each $C024 read
|
// Mouse: $C024 (delta data) and $C027 (status). Each $C024 read
|
||||||
// returns one signed 7-bit delta; $C027 bit 1 indicates whether the
|
// returns one signed 7-bit delta; $C027 bit 1 indicates whether the
|
||||||
|
|
@ -37,8 +42,6 @@
|
||||||
#include "inputInternal.h"
|
#include "inputInternal.h"
|
||||||
#include "joey/surface.h"
|
#include "joey/surface.h"
|
||||||
|
|
||||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
|
||||||
JOEYLIB_SEGMENT("CORESYS")
|
|
||||||
|
|
||||||
// ----- Hardware registers -----
|
// ----- Hardware registers -----
|
||||||
|
|
||||||
|
|
@ -63,6 +66,18 @@ JOEYLIB_SEGMENT("CORESYS")
|
||||||
#define KBD_STROBE_BIT 0x80
|
#define KBD_STROBE_BIT 0x80
|
||||||
#define KBD_ASCII_MASK 0x7F
|
#define KBD_ASCII_MASK 0x7F
|
||||||
|
|
||||||
|
// $C010 RDKBDSTRB: reading clears the keyboard strobe at $C000 and
|
||||||
|
// returns the live "any key currently held" flag in bit 7 (set by
|
||||||
|
// the keyboard scanner / ADB MCU independently of the strobe). Used
|
||||||
|
// to drive immediate release detection without an inferred-release
|
||||||
|
// TTL counter.
|
||||||
|
#define KBD_ANY_KEY_DOWN_BIT 0x80
|
||||||
|
|
||||||
|
// Cap on the per-poll keyboard-FIFO drain. The IIgs ADB queue is
|
||||||
|
// small in practice; this is purely a defensive bound so a stuck
|
||||||
|
// strobe can't spin halInputPoll forever.
|
||||||
|
#define KBD_DRAIN_GUARD 32u
|
||||||
|
|
||||||
// $C025 layout (IIgs Hardware Reference): bit 0 = shift, bit 1 = ctrl,
|
// $C025 layout (IIgs Hardware Reference): bit 0 = shift, bit 1 = ctrl,
|
||||||
// bit 6 = option (Closed-Apple), bit 7 = command (Open-Apple).
|
// bit 6 = option (Closed-Apple), bit 7 = command (Open-Apple).
|
||||||
#define MOD_SHIFT 0x01
|
#define MOD_SHIFT 0x01
|
||||||
|
|
@ -79,11 +94,6 @@ JOEYLIB_SEGMENT("CORESYS")
|
||||||
#define MOUSE_DELTA_SIGN_BIT 0x40
|
#define MOUSE_DELTA_SIGN_BIT 0x40
|
||||||
#define MOUSE_BUTTON_INV 0x80
|
#define MOUSE_BUTTON_INV 0x80
|
||||||
|
|
||||||
// Polls a key stays "down" after the last observed strobe. Covers the
|
|
||||||
// typematic initial delay so a held key does not flicker off/on between
|
|
||||||
// repeats.
|
|
||||||
#define KEY_TTL 45
|
|
||||||
|
|
||||||
#define ASCII_TABLE_SIZE 128
|
#define ASCII_TABLE_SIZE 128
|
||||||
|
|
||||||
// Apple II arrow-key ASCII conventions.
|
// Apple II arrow-key ASCII conventions.
|
||||||
|
|
@ -113,11 +123,6 @@ static int8_t thresholdPaddle(uint8_t v);
|
||||||
// O(1) instead of a 40-plus-case switch.
|
// O(1) instead of a 40-plus-case switch.
|
||||||
static uint8_t gAsciiToKey[ASCII_TABLE_SIZE];
|
static uint8_t gAsciiToKey[ASCII_TABLE_SIZE];
|
||||||
|
|
||||||
// Non-static so iigsInputSnapshot (joeyDraw.asm) can reference it via
|
|
||||||
// long-mode addressing through the linker. The C TTL-decrement loop
|
|
||||||
// that used to live in halInputPoll moved to that asm helper.
|
|
||||||
uint8_t gKeyTtl [KEY_COUNT];
|
|
||||||
|
|
||||||
static int16_t gMouseAbsX = SURFACE_WIDTH / 2;
|
static int16_t gMouseAbsX = SURFACE_WIDTH / 2;
|
||||||
static int16_t gMouseAbsY = SURFACE_HEIGHT / 2;
|
static int16_t gMouseAbsY = SURFACE_HEIGHT / 2;
|
||||||
|
|
||||||
|
|
@ -246,14 +251,18 @@ static bool gJoyDisconnectLatched = false;
|
||||||
// to the digital threshold mapping. gJoyRecalibrate is set by
|
// to the digital threshold mapping. gJoyRecalibrate is set by
|
||||||
// halJoystickReset and cleared on the next successful poll, which
|
// halJoystickReset and cleared on the next successful poll, which
|
||||||
// captures the new center.
|
// captures the new center.
|
||||||
|
// uint8_t (not bool) so the per-element stride is 1 byte. ORCA-C's
|
||||||
|
// _Bool is 2 bytes, which forces a ~MUL4 helper for every index
|
||||||
|
// multiply -- even when the index is a constant the compiler doesn't
|
||||||
|
// fold. Storage is still 0 or 1 either way.
|
||||||
static uint8_t gJoyCenterX [JOYSTICK_COUNT];
|
static uint8_t gJoyCenterX [JOYSTICK_COUNT];
|
||||||
static uint8_t gJoyCenterY [JOYSTICK_COUNT];
|
static uint8_t gJoyCenterY [JOYSTICK_COUNT];
|
||||||
static bool gJoyCenterValid [JOYSTICK_COUNT];
|
static uint8_t gJoyCenterValid [JOYSTICK_COUNT];
|
||||||
static bool gJoyRecalibrate [JOYSTICK_COUNT];
|
static uint8_t gJoyRecalibrate [JOYSTICK_COUNT];
|
||||||
|
|
||||||
|
|
||||||
void halJoystickReset(JoeyJoystickE js) {
|
void halJoystickReset(JoeyJoystickE js) {
|
||||||
if ((int)js < 0 || (int)js >= JOYSTICK_COUNT) {
|
if ((uint16_t)js >= (uint16_t)JOYSTICK_COUNT) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Re-enable polling and arm a fresh center capture for the next
|
// Re-enable polling and arm a fresh center capture for the next
|
||||||
|
|
@ -281,8 +290,14 @@ static void pollJoystick(void) {
|
||||||
bool yResolved;
|
bool yResolved;
|
||||||
|
|
||||||
// Buttons are I/O reads -- always cheap, do them every frame.
|
// Buttons are I/O reads -- always cheap, do them every frame.
|
||||||
gJoyButtonState[JOYSTICK_0][JOY_BUTTON_0] = (*IIGS_BTN0 & IIGS_BUTTON_BIT) != 0;
|
// ORCA-C 2.2.1 doesn't constant-fold the row-stride multiply for
|
||||||
gJoyButtonState[JOYSTICK_0][JOY_BUTTON_1] = (*IIGS_BTN1 & IIGS_BUTTON_BIT) != 0;
|
// 2D arrays even when both indices are constants, so each
|
||||||
|
// gJoyButtonState[i][j] write emits a ~MUL4 helper. Indexing
|
||||||
|
// through a (uint8_t *) cast collapses to a literal byte offset.
|
||||||
|
((uint8_t *)gJoyButtonState)[JOYSTICK_0 * JOY_BUTTON_COUNT + JOY_BUTTON_0]
|
||||||
|
= (*IIGS_BTN0 & IIGS_BUTTON_BIT) != 0;
|
||||||
|
((uint8_t *)gJoyButtonState)[JOYSTICK_0 * JOY_BUTTON_COUNT + JOY_BUTTON_1]
|
||||||
|
= (*IIGS_BTN1 & IIGS_BUTTON_BIT) != 0;
|
||||||
gJoyConnected[JOYSTICK_1] = false;
|
gJoyConnected[JOYSTICK_1] = false;
|
||||||
|
|
||||||
// Once the stick has been latched as disconnected, only buttons
|
// Once the stick has been latched as disconnected, only buttons
|
||||||
|
|
@ -394,7 +409,6 @@ static void pollMouse(void) {
|
||||||
void halInputInit(void) {
|
void halInputInit(void) {
|
||||||
memset(gKeyState, 0, sizeof(gKeyState));
|
memset(gKeyState, 0, sizeof(gKeyState));
|
||||||
memset(gKeyPrev, 0, sizeof(gKeyPrev));
|
memset(gKeyPrev, 0, sizeof(gKeyPrev));
|
||||||
memset(gKeyTtl, 0, sizeof(gKeyTtl));
|
|
||||||
buildAsciiTable();
|
buildAsciiTable();
|
||||||
|
|
||||||
gMouseAbsX = SURFACE_WIDTH / 2;
|
gMouseAbsX = SURFACE_WIDTH / 2;
|
||||||
|
|
@ -411,23 +425,50 @@ void halInputPoll(void) {
|
||||||
uint8_t kbd;
|
uint8_t kbd;
|
||||||
uint8_t ascii;
|
uint8_t ascii;
|
||||||
uint8_t key;
|
uint8_t key;
|
||||||
|
uint8_t kbdStrb;
|
||||||
|
uint16_t drainGuard;
|
||||||
|
bool strobeObserved;
|
||||||
|
|
||||||
// The KEY_COUNT TTL-decrement loop and the gKeyState/gKeyPrev/
|
// The gKeyState/gKeyPrev/gMouseButtonPrev/gJoyButtonPrev snapshots
|
||||||
// gMouseButtonPrev/gJoyButtonPrev snapshots all happen earlier in
|
// all happen earlier in joeyInputPoll's call to iigsInputSnapshot
|
||||||
// joeyInputPoll's call to iigsInputSnapshot (asm). We just read
|
// (asm). We just read the live hardware state here.
|
||||||
// the live hardware state here.
|
|
||||||
|
|
||||||
|
// Drain the keyboard FIFO, not just the head. The IIgs ADB MCU
|
||||||
|
// queues press + autorepeat events; consuming only one per poll
|
||||||
|
// would leave queued events waiting to refresh state on later
|
||||||
|
// polls. KBD_DRAIN_GUARD bounds the loop in case a stuck strobe
|
||||||
|
// ever fails to clear.
|
||||||
|
strobeObserved = false;
|
||||||
|
for (drainGuard = 0; drainGuard < KBD_DRAIN_GUARD; drainGuard++) {
|
||||||
kbd = *IIGS_KBD;
|
kbd = *IIGS_KBD;
|
||||||
if (kbd & KBD_STROBE_BIT) {
|
if ((kbd & KBD_STROBE_BIT) == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
strobeObserved = true;
|
||||||
ascii = (uint8_t)(kbd & KBD_ASCII_MASK);
|
ascii = (uint8_t)(kbd & KBD_ASCII_MASK);
|
||||||
key = gAsciiToKey[ascii];
|
key = gAsciiToKey[ascii];
|
||||||
if (key != KEY_NONE) {
|
if (key != KEY_NONE) {
|
||||||
gKeyState[key] = true;
|
gKeyState[key] = true;
|
||||||
gKeyTtl[key] = KEY_TTL;
|
|
||||||
}
|
}
|
||||||
(void)*IIGS_KBDSTRB;
|
(void)*IIGS_KBDSTRB;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// $C010 bit 7 is the live "any non-modifier key currently held"
|
||||||
|
// flag (IIe-inherited; updated by the keyboard scanner / ADB MCU
|
||||||
|
// independently of the strobe). When 0 we know all non-modifier
|
||||||
|
// keys are physically released, so wholesale-clear gKeyState and
|
||||||
|
// let readModifierKeys re-assert the modifiers from $C025 below.
|
||||||
|
//
|
||||||
|
// strobeObserved guard: a press that arrived AND was released
|
||||||
|
// between two polls would otherwise be set-then-cleared in a
|
||||||
|
// single poll, losing the rising edge that joeyKeyPressed needs.
|
||||||
|
// Holding the press for one poll preserves it; the next poll's
|
||||||
|
// bit-7 read will clear normally.
|
||||||
|
kbdStrb = *IIGS_KBDSTRB;
|
||||||
|
if (!strobeObserved && (kbdStrb & KBD_ANY_KEY_DOWN_BIT) == 0) {
|
||||||
|
memset(gKeyState, 0, sizeof(gKeyState));
|
||||||
|
}
|
||||||
|
|
||||||
readModifierKeys();
|
readModifierKeys();
|
||||||
pollMouse();
|
pollMouse();
|
||||||
pollJoystick();
|
pollJoystick();
|
||||||
|
|
|
||||||
|
|
@ -2740,6 +2740,221 @@ brsBytesM1 data DRAWPRIMS
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
****************************************************************
|
||||||
|
* iigsBlitRectStageToShrPEI(srcOffset, copyBytes, rowsLeft)
|
||||||
|
*
|
||||||
|
* PEI-slam variant of iigsBlitRectStageToShr for partial-rect
|
||||||
|
* presents. Uses the SHR shadow trick + AUXWRITE/RAMRD stack hijack
|
||||||
|
* to push pixel words from $01:row to $E1:row at ~3 cyc/byte instead
|
||||||
|
* of MVN's ~9 cyc/byte (against $E1 wait states). For a 16x16 sprite
|
||||||
|
* present (16 rows x 8 bytes) that's ~640 cyc vs MVN's ~1300 cyc.
|
||||||
|
*
|
||||||
|
* Caller contract:
|
||||||
|
* - copyBytes must be even and >= 2 and <= 80. Caller (C wrapper)
|
||||||
|
* verifies; this asm assumes the contract holds.
|
||||||
|
* - srcOffset is the byte offset within bank $01 of the FIRST byte
|
||||||
|
* of the FIRST row to copy. Rows advance by 160.
|
||||||
|
*
|
||||||
|
* SEI window for the duration: copyBytes/2 PEIs * rowsLeft + setup
|
||||||
|
* per row. For a 16x16 sprite that's ~700 cyc = ~0.25 ms; safe for
|
||||||
|
* DOC IRQ. For larger rects the C wrapper falls back to MVN to keep
|
||||||
|
* the SEI window tiny.
|
||||||
|
*
|
||||||
|
* Args after PHP+PHB+PHD (TCD = SP+8):
|
||||||
|
* srcOffset at D+0..1
|
||||||
|
* copyBytes at D+2..3
|
||||||
|
* rowsLeft at D+4..5
|
||||||
|
****************************************************************
|
||||||
|
|
||||||
|
iigsBlitRectStageToShrPEI start RECTPEI
|
||||||
|
brpOff equ 0
|
||||||
|
brpBytes equ 2
|
||||||
|
brpRows equ 4
|
||||||
|
|
||||||
|
php
|
||||||
|
phb
|
||||||
|
phd
|
||||||
|
rep #$30
|
||||||
|
LONGA ON
|
||||||
|
LONGI ON
|
||||||
|
tsc
|
||||||
|
clc
|
||||||
|
adc #8
|
||||||
|
tcd
|
||||||
|
|
||||||
|
* Save SP and shadow state for teardown.
|
||||||
|
tsc
|
||||||
|
sta >brpOrigSp
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda >$00C035
|
||||||
|
sta >brpOrigShadow
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
|
||||||
|
* Stash inputs into long-mode globals so they survive TCD changes.
|
||||||
|
* Per-row code does TCD = rowBase, which means D-relative reads no
|
||||||
|
* longer reach the original args at D+0..5. Everything we still need
|
||||||
|
* per-row goes into a long-mode global below.
|
||||||
|
lda brpOff
|
||||||
|
sta >brpRowBase
|
||||||
|
lda brpRows
|
||||||
|
sta >brpRowsRem
|
||||||
|
lda brpBytes
|
||||||
|
dec a
|
||||||
|
sta >brpBytesM1Saved ; copyBytes - 1, for TCS = base + bytes - 1
|
||||||
|
|
||||||
|
* Compute jump entry into the unrolled PEI sequence.
|
||||||
|
* words = copyBytes / 2
|
||||||
|
* entry = peiSeqEnd - words * 2 (each PEI dp is 2 bytes; sequence
|
||||||
|
* ends at peiSeqEnd with PEI $00 as
|
||||||
|
* the LAST entry; offsets descend so
|
||||||
|
* jumping `words*2` bytes BEFORE the
|
||||||
|
* end starts at PEI $(2*(words-1)).
|
||||||
|
lda brpBytes
|
||||||
|
lsr a ; A = words
|
||||||
|
asl a ; A = words * 2 (bytes of PEI to execute)
|
||||||
|
sta >brpJmpDelta
|
||||||
|
lda #peiSeqEnd
|
||||||
|
sec
|
||||||
|
sbc >brpJmpDelta
|
||||||
|
sta >brpJmpTarget+1 ; patch JMP abs operand
|
||||||
|
|
||||||
|
sei
|
||||||
|
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda >brpOrigShadow
|
||||||
|
and #$F1 ; SHR shadow ON (clear bits 1,2,3)
|
||||||
|
sta >$00C035
|
||||||
|
lda #0
|
||||||
|
sta >$00C005 ; AUXWRITE on
|
||||||
|
sta >$00C003 ; RAMRD on
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
|
||||||
|
brpRowLoop anop
|
||||||
|
lda >brpRowsRem
|
||||||
|
bne brpDoRow
|
||||||
|
brl brpExit
|
||||||
|
brpDoRow anop
|
||||||
|
|
||||||
|
* Per-row: set DP = row base (so PEI dp pulls from the source row),
|
||||||
|
* set SP = row base + copyBytes - 1 (so PEIs decrement-push into the
|
||||||
|
* row in-place; bytes mirror to $E1 via SHR shadow).
|
||||||
|
* NB: brpBytes is at original D+2 -- after TCD = rowBase that read
|
||||||
|
* would land in pixel data. Use the long-mode brpBytesM1Saved instead.
|
||||||
|
lda >brpRowBase
|
||||||
|
clc
|
||||||
|
adc >brpBytesM1Saved
|
||||||
|
tcs ; SP = row base + copyBytes - 1
|
||||||
|
lda >brpRowBase
|
||||||
|
tcd ; D = row base
|
||||||
|
|
||||||
|
* Jump into the unrolled PEI sequence at the right offset. operand
|
||||||
|
* low byte was patched above; high byte is fixed at link time.
|
||||||
|
brpJmpTarget anop
|
||||||
|
jmp peiSeqEnd ; operand low byte is patched per call
|
||||||
|
|
||||||
|
* ----- Unrolled PEI sequence: 40 PEIs, walking DP offsets DOWN from
|
||||||
|
* $4E to $00 in 2-byte steps. JMP target lands at the right offset
|
||||||
|
* so only `words` PEIs execute. Each PEI: 6 cyc, pushes 2 bytes to
|
||||||
|
* SP (which mirrors to $E1 via shadow). Falls through to row
|
||||||
|
* advance after PEI $00.
|
||||||
|
pei $4E
|
||||||
|
pei $4C
|
||||||
|
pei $4A
|
||||||
|
pei $48
|
||||||
|
pei $46
|
||||||
|
pei $44
|
||||||
|
pei $42
|
||||||
|
pei $40
|
||||||
|
pei $3E
|
||||||
|
pei $3C
|
||||||
|
pei $3A
|
||||||
|
pei $38
|
||||||
|
pei $36
|
||||||
|
pei $34
|
||||||
|
pei $32
|
||||||
|
pei $30
|
||||||
|
pei $2E
|
||||||
|
pei $2C
|
||||||
|
pei $2A
|
||||||
|
pei $28
|
||||||
|
pei $26
|
||||||
|
pei $24
|
||||||
|
pei $22
|
||||||
|
pei $20
|
||||||
|
pei $1E
|
||||||
|
pei $1C
|
||||||
|
pei $1A
|
||||||
|
pei $18
|
||||||
|
pei $16
|
||||||
|
pei $14
|
||||||
|
pei $12
|
||||||
|
pei $10
|
||||||
|
pei $0E
|
||||||
|
pei $0C
|
||||||
|
pei $0A
|
||||||
|
pei $08
|
||||||
|
pei $06
|
||||||
|
pei $04
|
||||||
|
pei $02
|
||||||
|
pei $00
|
||||||
|
peiSeqEnd anop
|
||||||
|
|
||||||
|
* Advance row base by 160 and decrement rows-remaining.
|
||||||
|
lda >brpRowBase
|
||||||
|
clc
|
||||||
|
adc #160
|
||||||
|
sta >brpRowBase
|
||||||
|
lda >brpRowsRem
|
||||||
|
dec a
|
||||||
|
sta >brpRowsRem
|
||||||
|
brl brpRowLoop
|
||||||
|
|
||||||
|
brpExit anop
|
||||||
|
lda >brpOrigSp
|
||||||
|
tcs
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda >brpOrigShadow
|
||||||
|
sta >$00C035
|
||||||
|
lda #0
|
||||||
|
sta >$00C004 ; AUXWRITE off
|
||||||
|
sta >$00C002 ; RAMRD off
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
|
||||||
|
LONGA OFF
|
||||||
|
LONGI OFF
|
||||||
|
pld
|
||||||
|
plb
|
||||||
|
plp ; restores I (pre-SEI value)
|
||||||
|
rtl
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
brpOrigSp data RECTPEI
|
||||||
|
ds 2
|
||||||
|
end
|
||||||
|
brpOrigShadow data RECTPEI
|
||||||
|
ds 1
|
||||||
|
end
|
||||||
|
brpRowBase data RECTPEI
|
||||||
|
ds 2
|
||||||
|
end
|
||||||
|
brpRowsRem data RECTPEI
|
||||||
|
ds 2
|
||||||
|
end
|
||||||
|
brpJmpDelta data RECTPEI
|
||||||
|
ds 2
|
||||||
|
end
|
||||||
|
brpBytesM1Saved data RECTPEI
|
||||||
|
ds 2
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
****************************************************************
|
****************************************************************
|
||||||
* iigsMarkDirtyRowsInner(yStart, yEnd, minWord, maxWord)
|
* iigsMarkDirtyRowsInner(yStart, yEnd, minWord, maxWord)
|
||||||
*
|
*
|
||||||
|
|
@ -2969,16 +3184,18 @@ gJoyOrigSpeed data DRAWPRIMS
|
||||||
* iigsInputSnapshot(void)
|
* iigsInputSnapshot(void)
|
||||||
*
|
*
|
||||||
* Per-frame input bookkeeping done in one tight asm pass instead of
|
* Per-frame input bookkeeping done in one tight asm pass instead of
|
||||||
* the three C memcpys + C TTL loop that joeyInputPoll used to do.
|
* three C memcpys. Saves ~0.5 ms per frame in animated demos.
|
||||||
* Saves ~0.6 ms per frame in animated demos.
|
|
||||||
*
|
*
|
||||||
* Three combined operations:
|
* Two combined operations:
|
||||||
* 1. Decrement gKeyTtl[i] for every key; on transition to zero,
|
* 1. Snapshot gKeyState -> gKeyPrev (KEY_COUNT bytes via long-mode
|
||||||
* clear gKeyState[i] (key is now "released").
|
|
||||||
* 2. Snapshot gKeyState -> gKeyPrev (KEY_COUNT bytes via long-mode
|
|
||||||
* lda/sta loop, ~15 cyc/byte).
|
* lda/sta loop, ~15 cyc/byte).
|
||||||
* 3. Snapshot gMouseButtonState/gJoyButtonState (4 bytes each)
|
* 2. Snapshot gMouseButtonState/gJoyButtonState (4 bytes each) via
|
||||||
* via 4 inline lda/sta pairs.
|
* 4 inline lda/sta pairs.
|
||||||
|
*
|
||||||
|
* The TTL-decay loop this used to run has been removed: the IIgs
|
||||||
|
* port now derives release directly from $C010 bit 7 in halInputPoll
|
||||||
|
* (the live "any key currently held" flag), so the inferred-release
|
||||||
|
* TTL mechanism is no longer needed.
|
||||||
*
|
*
|
||||||
* IMPORTANT: KEY_COUNT is hard-coded at 60 below. If you add or
|
* IMPORTANT: KEY_COUNT is hard-coded at 60 below. If you add or
|
||||||
* remove a key in joey/input.h, bump the constant or the loop bounds
|
* remove a key in joey/input.h, bump the constant or the loop bounds
|
||||||
|
|
@ -2994,19 +3211,6 @@ iigsInputSnapshot start IIGSASM
|
||||||
sep #$20
|
sep #$20
|
||||||
LONGA OFF
|
LONGA OFF
|
||||||
|
|
||||||
* TTL decrement + key-released detection. ~12 cyc / iter fast path.
|
|
||||||
ldx #59 ; KEY_COUNT - 1
|
|
||||||
isnTtlLoop anop
|
|
||||||
lda >gKeyTtl,x
|
|
||||||
beq isnTtlNext ; ttl==0, nothing to do
|
|
||||||
dec a
|
|
||||||
sta >gKeyTtl,x
|
|
||||||
bne isnTtlNext ; not yet zero
|
|
||||||
sta >gKeyState,x ; A==0 -> mark released
|
|
||||||
isnTtlNext anop
|
|
||||||
dex
|
|
||||||
bpl isnTtlLoop
|
|
||||||
|
|
||||||
* Snapshot gKeyState -> gKeyPrev (60 bytes), long-mode loop.
|
* Snapshot gKeyState -> gKeyPrev (60 bytes), long-mode loop.
|
||||||
ldx #59
|
ldx #59
|
||||||
isnKeyLoop anop
|
isnKeyLoop anop
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue