joeylib2/examples/uber/uber.c
2026-05-04 11:23:28 -05:00

401 lines
15 KiB
C

// Uber demo: exercise every JoeyLib public API and measure throughput
// of the per-frame-hot ones. Results are written to joeylog.txt via
// joeyLogF. A green screen on exit means the run completed.
//
// Timing model: each test aligns to a VBL boundary via joeyWaitVBL,
// records the starting joeyFrameCount, then runs the op in a tight
// loop polling joeyFrameCount until UBER_FRAMES frames have elapsed.
// Reported metric is ops/sec, computed as iters * joeyFrameHz() /
// UBER_FRAMES so results are directly comparable across ports
// regardless of CPU speed or VBL rate.
//
// joeyFrameCount is wall-clock-based per port; the per-iter poll
// adds ~10-30 cyc per op which shows up as noise on the very
// fastest ops but is below ~5% even for ~500 cyc/op work.
//
// One-shot ops (spriteCompile) get one call each, timed by frame
// delta -- coarser but representative.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <joey/joey.h>
// ----- Timing primitives -----
// 4-frame measurement window. Long enough that loop overhead doesn't
// dominate; short enough to keep the full demo run under ~10 sec.
/* 16 frames per timed op gives 4x the iter-count resolution of the
* earlier 4-frame budget. Exposes the actual per-op cost on slow
* ops where 4 frames produced the same iter count on different
* framerates -- e.g. drawCircle r=80 read as "4 iters / 4 frames"
* on both 60 Hz IIgs (16.7 ms/frame, 67 ms window) and 50 Hz Amiga
* (20 ms/frame, 80 ms window) even though per-op cost was equal,
* just because 4 ops at 16-17 ms happen to fit both windows. The
* 16-frame budget extends the windows to 267 ms / 320 ms; quantum
* gap shrinks to ~6%. Total run time scales 4x (~80 sec each). */
#define UBER_FRAMES 16u
typedef void (*OpFn)(void);
static const char *gCurName = "(none)";
static SurfaceT *gStage = NULL;
static SpriteT *gSprite = NULL;
static SpriteBackupT gBackup;
static unsigned char gBackupBytes[256];
static TileT gTileScratch;
// Run `op` in a tight loop until `targetFrames` joeyFrameCount ticks
// have elapsed. Returns iterations completed.
static unsigned long runForFrames(OpFn op, unsigned int targetFrames, uint16_t *actualFramesOut) {
unsigned long count;
uint16_t startFrame;
uint16_t endFrame;
count = 0UL;
joeyWaitVBL();
startFrame = joeyFrameCount();
while ((uint16_t)(joeyFrameCount() - startFrame) < targetFrames) {
op();
count++;
}
/* Capture the actual elapsed frames -- the last iter typically
* overruns the target. Using actual instead of target as the
* ops/sec divisor stays honest for ops slower than 1 frame
* (where count is forced low while real time stretches well
* past targetFrames). */
endFrame = joeyFrameCount();
*actualFramesOut = (uint16_t)(endFrame - startFrame);
if (*actualFramesOut == 0u) {
*actualFramesOut = 1u; /* defensive: avoid div-by-zero */
}
return count;
}
// Time and log one op. Reports iters / N frames AND the derived
// ops/sec so per-port results are directly comparable against IIgs
// regardless of CPU speed or display refresh rate. Also logs an
// FNV-1a hash of the surface state after timing -- this is the
// pixel-perfect comparison input for the cross-port validation
// harness (tools/diff-uber-hashes.py). Captured against IIgs as the
// golden reference; planar 68k rewrites validate by matching it.
static void timeOp(const char *name, OpFn op) {
unsigned long iters;
unsigned long opsPerSec;
uint16_t actualFrames;
uint32_t hash;
gCurName = name;
iters = runForFrames(op, UBER_FRAMES, &actualFrames);
if (iters == 0UL) {
joeyLogF("UBER: %s: 0 iters (op too slow?)\n", name);
return;
}
/* Divide by ACTUAL elapsed frames, not the target. For sub-frame
* ops actualFrames ~= UBER_FRAMES so the answer is unchanged;
* for ops that overrun (slow stagePresent etc.), this stops
* inflating ops/sec. */
opsPerSec = (iters * (unsigned long)joeyFrameHz()) / (unsigned long)actualFrames;
hash = surfaceHash(gStage);
joeyLogF("UBER: %s: %lu iters / %u frames = %lu ops/sec | hash=%08lX\n",
name, iters, actualFrames, opsPerSec, (unsigned long)hash);
}
// ----- Test ops -----
static void op_drawPixel (void) { drawPixel (gStage, 100, 100, 5); }
static void op_drawLineH (void) { drawLine (gStage, 0, 50, 319, 50, 5); }
static void op_drawLineV (void) { drawLine (gStage, 50, 0, 50, 199, 5); }
static void op_drawLineDiag (void) { drawLine (gStage, 0, 0, 319, 199, 5); }
static void op_drawRect (void) { drawRect (gStage, 10, 10, 100, 100, 5); }
static void op_drawCircleSmall (void) { drawCircle (gStage, 160, 100, 16, 5); }
static void op_drawCircleLarge (void) { drawCircle (gStage, 160, 100, 80, 5); }
static void op_fillRectSmall (void) { fillRect (gStage, 20, 20, 16, 16, 7); }
static void op_fillRectMid (void) { fillRect (gStage, 20, 20, 80, 80, 7); }
static void op_fillRectFull (void) { fillRect (gStage, 0, 0, 320, 200, 7); }
static void op_fillCircle (void) { fillCircle (gStage, 160, 100, 40, 7); }
static void op_samplePixel (void) { (void)samplePixel(gStage, 100, 100); }
static void op_surfaceClear (void) { surfaceClear (gStage, 0); }
static void op_paletteSet(void) {
static uint16_t colors[16] = {
0x000, 0xF00, 0x0F0, 0x00F, 0xFF0, 0xF0F, 0x0FF, 0xFFF,
0x800, 0x080, 0x008, 0x880, 0x808, 0x088, 0x888, 0x444
};
paletteSet(gStage, 0, colors);
}
static void op_scbSetRange (void) { scbSetRange (gStage, 0, 199, 0); }
static void op_tileFill (void) { tileFill (gStage, 5, 5, 7); }
static void op_tileCopy (void) { tileCopy (gStage, 6, 6, gStage, 5, 5); }
static void op_tileCopyMasked (void) { tileCopyMasked (gStage, 7, 7, gStage, 5, 5, 0); }
static void op_tilePaste (void) { tilePaste (gStage, 8, 8, &gTileScratch); }
static void op_tileSnap (void) { tileSnap (gStage, 5, 5, &gTileScratch); }
static int16_t gSpriteX = 40;
static int16_t gSpriteY = 30;
static void op_spriteSave (void) { spriteSaveUnder (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); }
static void op_spriteDraw (void) { spriteDraw (gStage, gSprite, gSpriteX, gSpriteY); }
static void op_spriteRestore (void) { spriteRestoreUnder(gStage, &gBackup); }
static void op_spriteSaveAndDraw (void) { spriteSaveAndDraw (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); }
static void op_stagePresent (void) { stagePresent(); }
static void op_inputPoll (void) { joeyInputPoll(); }
static void op_keyDown (void) { (void)joeyKeyDown(KEY_A); }
static void op_keyPressed (void) { (void)joeyKeyPressed(KEY_A); }
static void op_mouseX (void) { (void)joeyMouseX(); }
static void op_joyConnected (void) { (void)joeyJoystickConnected(JOYSTICK_1); }
static void op_audioFrameTick (void) { joeyAudioFrameTick(); }
static void op_audioIsPlaying (void) { (void)joeyAudioIsPlayingMod(); }
static void op_surfaceMarkDirty(void) { /* drawPixel already marks; use fill instead */
fillRect(gStage, 0, 0, 32, 32, 0); }
// ----- Build the ball sprite procedurally -----
#define BALL_TILES_X 2
#define BALL_TILES_Y 2
#define BALL_TILE_BYTES (BALL_TILES_X * BALL_TILES_Y * 32u)
static const uint8_t gBallAuthored[16 * 8] = {
0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00,
0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00,
0x02, 0x22, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20,
0x02, 0x23, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20,
0x22, 0x33, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x02, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x20,
0x02, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x20,
0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00,
0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00,
0x00, 0x00, 0x00, 0x22, 0x22, 0x00, 0x00, 0x00
};
static uint8_t gBallTiles[BALL_TILE_BYTES];
static void buildBallSprite(void) {
uint16_t tx;
uint16_t ty;
uint16_t row;
uint16_t b;
uint8_t *dst;
for (ty = 0; ty < BALL_TILES_Y; ty++) {
for (tx = 0; tx < BALL_TILES_X; tx++) {
dst = &gBallTiles[(ty * BALL_TILES_X + tx) * 32u];
for (row = 0; row < 8; row++) {
for (b = 0; b < 4; b++) {
dst[row * 4 + b] =
gBallAuthored[((ty * 8) + row) * 8 + (tx * 4) + b];
}
}
}
}
}
// ----- Main -----
static void runAllTests(void) {
joeyLogF("UBER: ----- begin -----\n");
// Surface / palette / SCB.
timeOp("surfaceClear", op_surfaceClear);
timeOp("paletteSet", op_paletteSet);
timeOp("scbSetRange", op_scbSetRange);
// Drawing primitives.
timeOp("drawPixel", op_drawPixel);
timeOp("drawLine H", op_drawLineH);
timeOp("drawLine V", op_drawLineV);
timeOp("drawLine diag", op_drawLineDiag);
timeOp("drawRect 100x100", op_drawRect);
timeOp("drawCircle r=16", op_drawCircleSmall);
timeOp("drawCircle r=80", op_drawCircleLarge);
timeOp("fillRect 16x16", op_fillRectSmall);
timeOp("fillRect 80x80", op_fillRectMid);
timeOp("fillRect 320x200", op_fillRectFull);
timeOp("fillCircle r=40", op_fillCircle);
timeOp("samplePixel", op_samplePixel);
// Tiles. Seed scratch tile + dest cells with non-zero pixels first.
fillRect(gStage, 0, 0, 320, 64, 7);
tileSnap(gStage, 5, 5, &gTileScratch);
timeOp("tileFill", op_tileFill);
timeOp("tileCopy", op_tileCopy);
timeOp("tileCopyMasked", op_tileCopyMasked);
timeOp("tilePaste", op_tilePaste);
timeOp("tileSnap", op_tileSnap);
// Sprites. Background must be non-empty so save-under has work
// to do (otherwise it's a 4 KB memset of zeros, atypical).
surfaceClear(gStage, 4);
timeOp("spriteSaveUnder", op_spriteSave);
timeOp("spriteDraw", op_spriteDraw);
timeOp("spriteRestoreUnder", op_spriteRestore);
timeOp("spriteSaveAndDraw", op_spriteSaveAndDraw);
// Present. One warm-up call before each timed loop primes any
// per-port one-time setup (Amiga: copper list rebuild after the
// paletteSet / scbSetRange tests dirty the cache; without warm-up
// the rebuild's MakeScreen + MrgCop + WaitTOF chain consumes the
// entire 4-frame measurement window) so we measure steady-state
// throughput rather than first-call penalty.
stagePresent();
timeOp("stagePresent full", op_stagePresent);
// Input.
timeOp("joeyInputPoll", op_inputPoll);
timeOp("joeyKeyDown", op_keyDown);
timeOp("joeyKeyPressed", op_keyPressed);
timeOp("joeyMouseX", op_mouseX);
timeOp("joeyJoyConnected", op_joyConnected);
// Audio.
timeOp("joeyAudioFrameTick", op_audioFrameTick);
timeOp("joeyAudioIsPlayingMod", op_audioIsPlaying);
// Surface mark dirty (via fillRect's mark step).
timeOp("surfaceMarkDirtyRect (via fillRect 32x32)", op_surfaceMarkDirty);
joeyLogF("UBER: ----- end -----\n");
}
int main(void) {
JoeyConfigT config;
uint16_t pal[16];
int i;
uint16_t startFrame;
uint16_t endFrame;
uint16_t elapsedFrames;
unsigned long elapsedMs;
config.hostMode = HOST_MODE_TAKEOVER;
/* 32 KB fits the 8 pre-shifted DRAW variants the Amiga planar
* compiled sprite emitter generates. UL on the multiply because
* ORCA-C's 16-bit int overflows on 32 * 1024. */
config.codegenBytes = 32UL * 1024;
config.maxSurfaces = 4;
config.audioBytes = 64UL * 1024;
config.assetBytes = 128UL * 1024;
if (!joeyInit(&config)) {
return 1;
}
/* joeyFrameCount is VBL-driven, so it only ticks after halInit
* installed its VBL ISR -- captured here is "everything from now
* to press-any-key". Pre-init setup time is small and not the
* cost the user is chasing; runAllTests dominates. */
startFrame = joeyFrameCount();
gStage = stageGet();
if (gStage == NULL) {
joeyShutdown();
return 1;
}
// A simple visible palette so users see SOMETHING during the run.
for (i = 0; i < 16; i++) {
pal[i] = (uint16_t)((i << 8) | (i << 4) | i); // grey ramp
}
pal[ 0] = 0x000;
pal[ 1] = 0x800; // dark red (running)
pal[ 2] = 0x080; // green (done)
pal[ 3] = 0x008; // blue
pal[ 5] = 0xFF0; // yellow (test pixels)
pal[ 7] = 0xFFF; // white (fills)
pal[15] = 0xF00; // red
paletteSet(gStage, 0, pal);
scbSetRange(gStage, 0, 199, 0);
// Indicate "running": red bar at top of screen.
surfaceClear(gStage, 0);
fillRect(gStage, 0, 0, 320, 8, 1);
stagePresent();
buildBallSprite();
gSprite = spriteCreate(gBallTiles, BALL_TILES_X, BALL_TILES_Y);
if (gSprite == NULL) {
joeyLog("UBER: spriteCreate failed");
joeyShutdown();
return 1;
}
// spriteCompile is a one-shot. Time at frame resolution.
{
uint16_t before;
joeyWaitVBL();
before = joeyFrameCount();
if (!spriteCompile(gSprite)) {
joeyLog("UBER: spriteCompile failed");
}
while (joeyFrameCount() == before) {
/* wait for next VBL edge */
}
joeyLogF("UBER: spriteCompile: 1 call in <= 1 frame\n");
}
gBackup.bytes = gBackupBytes;
// Audio: only init/shutdown is exercised. Triggering joeyAudioPlaySfx
// without first calling joeyAudioPlayMod leaves NTP's engine in a
// half-initialized state -- NTPstreamsound is designed to OVERLAY on
// an already-running module. Without NTPprepare/NTPplay first, the
// streamer oscillator is fired but no music tick ever advances or
// silences it, and you get a stuck high-pitched scream. UBER doesn't
// ship a MOD asset, so we skip the SFX exercise. The frame-tick and
// isPlayingMod calls below still get timed (both are no-op fast
// paths on IIgs).
if (joeyAudioInit()) {
joeyLogF("UBER: audioInit OK\n");
} else {
joeyLogF("UBER: audioInit failed (skipping audio)\n");
}
// Reset stage + run all per-frame timed tests.
surfaceClear(gStage, 0);
fillRect(gStage, 0, 0, 320, 8, 1);
stagePresent();
runAllTests();
endFrame = joeyFrameCount();
elapsedFrames = (uint16_t)(endFrame - startFrame);
elapsedMs = ((unsigned long)elapsedFrames * 1000UL) / (unsigned long)joeyFrameHz();
joeyLogF("UBER: total wall time: %lu ms (%u frames @ %u Hz)\n",
elapsedMs, elapsedFrames, (unsigned)joeyFrameHz());
// Done. Green screen + waitForKey.
surfaceClear(gStage, 2);
stagePresent();
joeyLogF("UBER: press any key to exit\n");
joeyWaitForAnyKey();
spriteDestroy(gSprite);
joeyShutdown();
return 0;
}