// Uber demo: exercise every JoeyLib public API and measure throughput // of the per-frame-hot ones. Results are written to joeylog.txt via // joeyLogF. A green screen on exit means the run completed. // // Timing model: each test aligns to a VBL boundary via joeyWaitVBL, // records the starting joeyFrameCount, then runs the op in a tight // loop polling joeyFrameCount until UBER_FRAMES frames have elapsed. // Reported metric is ops/sec, computed as iters * joeyFrameHz() / // UBER_FRAMES so results are directly comparable across ports // regardless of CPU speed or VBL rate. // // joeyFrameCount is wall-clock-based per port; the per-iter poll // adds ~10-30 cyc per op which shows up as noise on the very // fastest ops but is below ~5% even for ~500 cyc/op work. // // One-shot ops (spriteCompile) get one call each, timed by frame // delta -- coarser but representative. #include #include #include #include #include // ----- Timing primitives ----- // 4-frame measurement window. Long enough that loop overhead doesn't // dominate; short enough to keep the full demo run under ~10 sec. /* 16 frames per timed op gives 4x the iter-count resolution of the * earlier 4-frame budget. Exposes the actual per-op cost on slow * ops where 4 frames produced the same iter count on different * framerates -- e.g. drawCircle r=80 read as "4 iters / 4 frames" * on both 60 Hz IIgs (16.7 ms/frame, 67 ms window) and 50 Hz Amiga * (20 ms/frame, 80 ms window) even though per-op cost was equal, * just because 4 ops at 16-17 ms happen to fit both windows. The * 16-frame budget extends the windows to 267 ms / 320 ms; quantum * gap shrinks to ~6%. Total run time scales 4x (~80 sec each). */ #define UBER_FRAMES 16u typedef void (*OpFn)(void); static const char *gCurName = "(none)"; static SurfaceT *gStage = NULL; static SpriteT *gSprite = NULL; static SpriteBackupT gBackup; static unsigned char gBackupBytes[256]; static TileT gTileScratch; // Run `op` in a tight loop until `targetFrames` joeyFrameCount ticks // have elapsed. Returns iterations completed. static unsigned long runForFrames(OpFn op, unsigned int targetFrames, uint16_t *actualFramesOut) { unsigned long count; uint16_t startFrame; uint16_t endFrame; count = 0UL; joeyWaitVBL(); startFrame = joeyFrameCount(); while ((uint16_t)(joeyFrameCount() - startFrame) < targetFrames) { op(); count++; } /* Capture the actual elapsed frames -- the last iter typically * overruns the target. Using actual instead of target as the * ops/sec divisor stays honest for ops slower than 1 frame * (where count is forced low while real time stretches well * past targetFrames). */ endFrame = joeyFrameCount(); *actualFramesOut = (uint16_t)(endFrame - startFrame); if (*actualFramesOut == 0u) { *actualFramesOut = 1u; /* defensive: avoid div-by-zero */ } return count; } // Time and log one op. Reports iters / N frames AND the derived // ops/sec so per-port results are directly comparable against IIgs // regardless of CPU speed or display refresh rate. Also logs an // FNV-1a hash of the surface state after timing -- this is the // pixel-perfect comparison input for the cross-port validation // harness (tools/diff-uber-hashes.py). Captured against IIgs as the // golden reference; planar 68k rewrites validate by matching it. static void timeOp(const char *name, OpFn op) { unsigned long iters; unsigned long opsPerSec; uint16_t actualFrames; uint32_t hash; gCurName = name; iters = runForFrames(op, UBER_FRAMES, &actualFrames); if (iters == 0UL) { joeyLogF("UBER: %s: 0 iters (op too slow?)\n", name); return; } /* Divide by ACTUAL elapsed frames, not the target. For sub-frame * ops actualFrames ~= UBER_FRAMES so the answer is unchanged; * for ops that overrun (slow stagePresent etc.), this stops * inflating ops/sec. */ opsPerSec = (iters * (unsigned long)joeyFrameHz()) / (unsigned long)actualFrames; hash = surfaceHash(gStage); joeyLogF("UBER: %s: %lu iters / %u frames = %lu ops/sec | hash=%08lX\n", name, iters, actualFrames, opsPerSec, (unsigned long)hash); } // ----- Test ops ----- static void op_drawPixel (void) { drawPixel (gStage, 100, 100, 5); } static void op_drawLineH (void) { drawLine (gStage, 0, 50, 319, 50, 5); } static void op_drawLineV (void) { drawLine (gStage, 50, 0, 50, 199, 5); } static void op_drawLineDiag (void) { drawLine (gStage, 0, 0, 319, 199, 5); } static void op_drawRect (void) { drawRect (gStage, 10, 10, 100, 100, 5); } static void op_drawCircleSmall (void) { drawCircle (gStage, 160, 100, 16, 5); } static void op_drawCircleLarge (void) { drawCircle (gStage, 160, 100, 80, 5); } static void op_fillRectSmall (void) { fillRect (gStage, 20, 20, 16, 16, 7); } static void op_fillRectMid (void) { fillRect (gStage, 20, 20, 80, 80, 7); } static void op_fillRectFull (void) { fillRect (gStage, 0, 0, 320, 200, 7); } static void op_fillCircle (void) { fillCircle (gStage, 160, 100, 40, 7); } static void op_samplePixel (void) { (void)samplePixel(gStage, 100, 100); } static void op_surfaceClear (void) { surfaceClear (gStage, 0); } static void op_paletteSet(void) { static uint16_t colors[16] = { 0x000, 0xF00, 0x0F0, 0x00F, 0xFF0, 0xF0F, 0x0FF, 0xFFF, 0x800, 0x080, 0x008, 0x880, 0x808, 0x088, 0x888, 0x444 }; paletteSet(gStage, 0, colors); } static void op_scbSetRange (void) { scbSetRange (gStage, 0, 199, 0); } static void op_tileFill (void) { tileFill (gStage, 5, 5, 7); } static void op_tileCopy (void) { tileCopy (gStage, 6, 6, gStage, 5, 5); } static void op_tileCopyMasked (void) { tileCopyMasked (gStage, 7, 7, gStage, 5, 5, 0); } static void op_tilePaste (void) { tilePaste (gStage, 8, 8, &gTileScratch); } static void op_tileSnap (void) { tileSnap (gStage, 5, 5, &gTileScratch); } static int16_t gSpriteX = 40; static int16_t gSpriteY = 30; static void op_spriteSave (void) { spriteSaveUnder (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); } static void op_spriteDraw (void) { spriteDraw (gStage, gSprite, gSpriteX, gSpriteY); } static void op_spriteRestore (void) { spriteRestoreUnder(gStage, &gBackup); } static void op_spriteSaveAndDraw (void) { spriteSaveAndDraw (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); } static void op_stagePresent (void) { stagePresent(); } static void op_inputPoll (void) { joeyInputPoll(); } static void op_keyDown (void) { (void)joeyKeyDown(KEY_A); } static void op_keyPressed (void) { (void)joeyKeyPressed(KEY_A); } static void op_mouseX (void) { (void)joeyMouseX(); } static void op_joyConnected (void) { (void)joeyJoystickConnected(JOYSTICK_1); } static void op_audioFrameTick (void) { joeyAudioFrameTick(); } static void op_audioIsPlaying (void) { (void)joeyAudioIsPlayingMod(); } static void op_surfaceMarkDirty(void) { /* drawPixel already marks; use fill instead */ fillRect(gStage, 0, 0, 32, 32, 0); } // ----- Build the ball sprite procedurally ----- #define BALL_TILES_X 2 #define BALL_TILES_Y 2 #define BALL_TILE_BYTES (BALL_TILES_X * BALL_TILES_Y * 32u) static const uint8_t gBallAuthored[16 * 8] = { 0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00, 0x02, 0x22, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20, 0x02, 0x23, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20, 0x22, 0x33, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x02, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x20, 0x02, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x20, 0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x22, 0x00, 0x00, 0x00 }; static uint8_t gBallTiles[BALL_TILE_BYTES]; static void buildBallSprite(void) { uint16_t tx; uint16_t ty; uint16_t row; uint16_t b; uint8_t *dst; for (ty = 0; ty < BALL_TILES_Y; ty++) { for (tx = 0; tx < BALL_TILES_X; tx++) { dst = &gBallTiles[(ty * BALL_TILES_X + tx) * 32u]; for (row = 0; row < 8; row++) { for (b = 0; b < 4; b++) { dst[row * 4 + b] = gBallAuthored[((ty * 8) + row) * 8 + (tx * 4) + b]; } } } } } // ----- Main ----- static void runAllTests(void) { joeyLogF("UBER: ----- begin -----\n"); // Surface / palette / SCB. timeOp("surfaceClear", op_surfaceClear); timeOp("paletteSet", op_paletteSet); timeOp("scbSetRange", op_scbSetRange); // Drawing primitives. timeOp("drawPixel", op_drawPixel); timeOp("drawLine H", op_drawLineH); timeOp("drawLine V", op_drawLineV); timeOp("drawLine diag", op_drawLineDiag); timeOp("drawRect 100x100", op_drawRect); timeOp("drawCircle r=16", op_drawCircleSmall); timeOp("drawCircle r=80", op_drawCircleLarge); timeOp("fillRect 16x16", op_fillRectSmall); timeOp("fillRect 80x80", op_fillRectMid); timeOp("fillRect 320x200", op_fillRectFull); timeOp("fillCircle r=40", op_fillCircle); timeOp("samplePixel", op_samplePixel); // Tiles. Seed scratch tile + dest cells with non-zero pixels first. fillRect(gStage, 0, 0, 320, 64, 7); tileSnap(gStage, 5, 5, &gTileScratch); timeOp("tileFill", op_tileFill); timeOp("tileCopy", op_tileCopy); timeOp("tileCopyMasked", op_tileCopyMasked); timeOp("tilePaste", op_tilePaste); timeOp("tileSnap", op_tileSnap); // Sprites. Background must be non-empty so save-under has work // to do (otherwise it's a 4 KB memset of zeros, atypical). surfaceClear(gStage, 4); timeOp("spriteSaveUnder", op_spriteSave); timeOp("spriteDraw", op_spriteDraw); timeOp("spriteRestoreUnder", op_spriteRestore); timeOp("spriteSaveAndDraw", op_spriteSaveAndDraw); // Present. One warm-up call before each timed loop primes any // per-port one-time setup (Amiga: copper list rebuild after the // paletteSet / scbSetRange tests dirty the cache; without warm-up // the rebuild's MakeScreen + MrgCop + WaitTOF chain consumes the // entire 4-frame measurement window) so we measure steady-state // throughput rather than first-call penalty. stagePresent(); timeOp("stagePresent full", op_stagePresent); // Input. timeOp("joeyInputPoll", op_inputPoll); timeOp("joeyKeyDown", op_keyDown); timeOp("joeyKeyPressed", op_keyPressed); timeOp("joeyMouseX", op_mouseX); timeOp("joeyJoyConnected", op_joyConnected); // Audio. timeOp("joeyAudioFrameTick", op_audioFrameTick); timeOp("joeyAudioIsPlayingMod", op_audioIsPlaying); // Surface mark dirty (via fillRect's mark step). timeOp("surfaceMarkDirtyRect (via fillRect 32x32)", op_surfaceMarkDirty); joeyLogF("UBER: ----- end -----\n"); } int main(void) { JoeyConfigT config; uint16_t pal[16]; int i; uint16_t startFrame; uint16_t endFrame; uint16_t elapsedFrames; unsigned long elapsedMs; config.hostMode = HOST_MODE_TAKEOVER; /* 32 KB fits the 8 pre-shifted DRAW variants the Amiga planar * compiled sprite emitter generates. UL on the multiply because * ORCA-C's 16-bit int overflows on 32 * 1024. */ config.codegenBytes = 32UL * 1024; config.maxSurfaces = 4; config.audioBytes = 64UL * 1024; config.assetBytes = 128UL * 1024; if (!joeyInit(&config)) { return 1; } /* joeyFrameCount is VBL-driven, so it only ticks after halInit * installed its VBL ISR -- captured here is "everything from now * to press-any-key". Pre-init setup time is small and not the * cost the user is chasing; runAllTests dominates. */ startFrame = joeyFrameCount(); gStage = stageGet(); if (gStage == NULL) { joeyShutdown(); return 1; } // A simple visible palette so users see SOMETHING during the run. for (i = 0; i < 16; i++) { pal[i] = (uint16_t)((i << 8) | (i << 4) | i); // grey ramp } pal[ 0] = 0x000; pal[ 1] = 0x800; // dark red (running) pal[ 2] = 0x080; // green (done) pal[ 3] = 0x008; // blue pal[ 5] = 0xFF0; // yellow (test pixels) pal[ 7] = 0xFFF; // white (fills) pal[15] = 0xF00; // red paletteSet(gStage, 0, pal); scbSetRange(gStage, 0, 199, 0); // Indicate "running": red bar at top of screen. surfaceClear(gStage, 0); fillRect(gStage, 0, 0, 320, 8, 1); stagePresent(); buildBallSprite(); gSprite = spriteCreate(gBallTiles, BALL_TILES_X, BALL_TILES_Y, SPRITE_FLAGS_NONE); if (gSprite == NULL) { joeyLog("UBER: spriteCreate failed"); joeyShutdown(); return 1; } // spriteCompile is a one-shot. Time at frame resolution. { uint16_t before; joeyWaitVBL(); before = joeyFrameCount(); if (!spriteCompile(gSprite)) { joeyLog("UBER: spriteCompile failed"); } while (joeyFrameCount() == before) { /* wait for next VBL edge */ } joeyLogF("UBER: spriteCompile: 1 call in <= 1 frame\n"); } gBackup.bytes = gBackupBytes; // Audio: only init/shutdown is exercised. Triggering joeyAudioPlaySfx // without first calling joeyAudioPlayMod leaves NTP's engine in a // half-initialized state -- NTPstreamsound is designed to OVERLAY on // an already-running module. Without NTPprepare/NTPplay first, the // streamer oscillator is fired but no music tick ever advances or // silences it, and you get a stuck high-pitched scream. UBER doesn't // ship a MOD asset, so we skip the SFX exercise. The frame-tick and // isPlayingMod calls below still get timed (both are no-op fast // paths on IIgs). if (joeyAudioInit()) { joeyLogF("UBER: audioInit OK\n"); } else { joeyLogF("UBER: audioInit failed (skipping audio)\n"); } // Reset stage + run all per-frame timed tests. surfaceClear(gStage, 0); fillRect(gStage, 0, 0, 320, 8, 1); stagePresent(); runAllTests(); endFrame = joeyFrameCount(); elapsedFrames = (uint16_t)(endFrame - startFrame); elapsedMs = ((unsigned long)elapsedFrames * 1000UL) / (unsigned long)joeyFrameHz(); joeyLogF("UBER: total wall time: %lu ms (%u frames @ %u Hz)\n", elapsedMs, elapsedFrames, (unsigned)joeyFrameHz()); // Done. Green screen + waitForKey. surfaceClear(gStage, 2); stagePresent(); joeyLogF("UBER: press any key to exit\n"); joeyWaitForAnyKey(); spriteDestroy(gSprite); joeyShutdown(); return 0; }