// Uber demo: exercise every JoeyLib public API and measure throughput // of the per-frame-hot ones. Results are written to joeylog.txt via // jlLogF. A green screen on exit means the run completed. // // Timing model: each test aligns to a VBL boundary via jlWaitVBL, // records the starting jlFrameCount, then runs the op in a tight // loop polling jlFrameCount until UBER_FRAMES frames have elapsed. // Reported metric is ops/sec, computed as iters * jlFrameHz() / // UBER_FRAMES so results are directly comparable across ports // regardless of CPU speed or VBL rate. // // jlFrameCount is wall-clock-based per port; the per-iter poll // adds ~10-30 cyc per op which shows up as noise on the very // fastest ops but is below ~5% even for ~500 cyc/op work. // // One-shot ops (jlSpriteCompile) get one call each, timed by frame // delta -- coarser but representative. #include #include #include #include #include // ----- Timing primitives ----- // 4-frame measurement window. Long enough that loop overhead doesn't // dominate; short enough to keep the full demo run under ~10 sec. /* 16 frames per timed op gives 4x the iter-count resolution of the * earlier 4-frame budget. Exposes the actual per-op cost on slow * ops where 4 frames produced the same iter count on different * framerates -- e.g. jlDrawCircle r=80 read as "4 iters / 4 frames" * on both 60 Hz IIgs (16.7 ms/frame, 67 ms window) and 50 Hz Amiga * (20 ms/frame, 80 ms window) even though per-op cost was equal, * just because 4 ops at 16-17 ms happen to fit both windows. The * 16-frame budget extends the windows to 267 ms / 320 ms; quantum * gap shrinks to ~6%. Total run time scales 4x (~80 sec each). */ #define UBER_FRAMES 16u typedef void (*OpFn)(void); static const char *gCurName = "(none)"; static jlSurfaceT *gStage = NULL; static jlSpriteT *gSprite = NULL; static jlSpriteBackupT gBackup; static unsigned char gBackupBytes[256]; static jlTileT gTileScratch; // Run `op` in a tight loop until `targetFrames` jlFrameCount ticks // have elapsed. Returns iterations completed. static unsigned long runForFrames(OpFn op, unsigned int targetFrames, uint16_t *actualFramesOut) { unsigned long count; uint16_t startFrame; uint16_t endFrame; count = 0UL; jlWaitVBL(); startFrame = jlFrameCount(); while ((uint16_t)(jlFrameCount() - startFrame) < targetFrames) { op(); count++; } /* Capture the actual elapsed frames -- the last iter typically * overruns the target. Using actual instead of target as the * ops/sec divisor stays honest for ops slower than 1 frame * (where count is forced low while real time stretches well * past targetFrames). */ endFrame = jlFrameCount(); *actualFramesOut = (uint16_t)(endFrame - startFrame); if (*actualFramesOut == 0u) { *actualFramesOut = 1u; /* defensive: avoid div-by-zero */ } return count; } // Time and log one op. Reports iters / N frames AND the derived // ops/sec so per-port results are directly comparable against IIgs // regardless of CPU speed or display refresh rate. Also logs an // FNV-1a hash of the surface state after timing -- this is the // pixel-perfect comparison input for the cross-port validation // harness (tools/diff-uber-hashes.py). Captured against IIgs as the // golden reference; planar 68k rewrites validate by matching it. static void timeOp(const char *name, OpFn op) { unsigned long iters; unsigned long opsPerSec; uint16_t actualFrames; uint32_t hash; gCurName = name; iters = runForFrames(op, UBER_FRAMES, &actualFrames); if (iters == 0UL) { jlLogF("UBER: %s: 0 iters (op too slow?)\n", name); return; } /* Divide by ACTUAL elapsed frames, not the target. For sub-frame * ops actualFrames ~= UBER_FRAMES so the answer is unchanged; * for ops that overrun (slow jlStagePresent etc.), this stops * inflating ops/sec. */ opsPerSec = (iters * (unsigned long)jlFrameHz()) / (unsigned long)actualFrames; hash = jlSurfaceHash(gStage); jlLogF("UBER: %s: %lu iters / %u frames = %lu ops/sec | hash=%08lX\n", name, iters, actualFrames, opsPerSec, (unsigned long)hash); } // ----- Test ops ----- static void op_drawPixel (void) { jlDrawPixel (gStage, 100, 100, 5); } static void op_drawLineH (void) { jlDrawLine (gStage, 0, 50, 319, 50, 5); } static void op_drawLineV (void) { jlDrawLine (gStage, 50, 0, 50, 199, 5); } static void op_drawLineDiag (void) { jlDrawLine (gStage, 0, 0, 319, 199, 5); } static void op_drawRect (void) { jlDrawRect (gStage, 10, 10, 100, 100, 5); } static void op_drawCircleSmall (void) { jlDrawCircle (gStage, 160, 100, 16, 5); } static void op_drawCircleLarge (void) { jlDrawCircle (gStage, 160, 100, 80, 5); } static void op_fillRectSmall (void) { jlFillRect (gStage, 20, 20, 16, 16, 7); } static void op_fillRectMid (void) { jlFillRect (gStage, 20, 20, 80, 80, 7); } static void op_fillRectFull (void) { jlFillRect (gStage, 0, 0, 320, 200, 7); } static void op_fillCircle (void) { jlFillCircle (gStage, 160, 100, 40, 7); } static void op_samplePixel (void) { (void)jlSamplePixel(gStage, 100, 100); } static void op_surfaceClear (void) { jlSurfaceClear (gStage, 0); } static void op_paletteSet(void) { static uint16_t colors[16] = { 0x000, 0xF00, 0x0F0, 0x00F, 0xFF0, 0xF0F, 0x0FF, 0xFFF, 0x800, 0x080, 0x008, 0x880, 0x808, 0x088, 0x888, 0x444 }; jlPaletteSet(gStage, 0, colors); } static void op_scbSetRange (void) { jlScbSetRange (gStage, 0, 199, 0); } static void op_tileFill (void) { jlTileFill (gStage, 5, 5, 7); } static void op_tileCopy (void) { jlTileCopy (gStage, 6, 6, gStage, 5, 5); } static void op_tileCopyMasked (void) { jlTileCopyMasked (gStage, 7, 7, gStage, 5, 5, 0); } static void op_tilePaste (void) { jlTilePaste (gStage, 8, 8, &gTileScratch); } static void op_tileSnap (void) { jlTileSnap (gStage, 5, 5, &gTileScratch); } static int16_t gSpriteX = 40; static int16_t gSpriteY = 30; static void op_spriteSave (void) { jlSpriteSaveUnder (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); } static void op_spriteDraw (void) { jlSpriteDraw (gStage, gSprite, gSpriteX, gSpriteY); } static void op_spriteRestore (void) { jlSpriteRestoreUnder(gStage, &gBackup); } static void op_spriteSaveAndDraw (void) { jlSpriteSaveAndDraw (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); } static void op_stagePresent (void) { jlStagePresent(); } static void op_inputPoll (void) { jlInputPoll(); } static void op_keyDown (void) { (void)jlKeyDown(KEY_A); } static void op_keyPressed (void) { (void)jlKeyPressed(KEY_A); } static void op_mouseX (void) { (void)jlMouseX(); } static void op_joyConnected (void) { (void)jlJoystickConnected(JOYSTICK_1); } static void op_audioFrameTick (void) { jlAudioFrameTick(); } static void op_audioIsPlaying (void) { (void)jlAudioIsPlayingMod(); } static void op_surfaceMarkDirty(void) { /* jlDrawPixel already marks; use fill instead */ jlFillRect(gStage, 0, 0, 32, 32, 0); } // ----- Build the ball sprite procedurally ----- #define BALL_TILES_X 2 #define BALL_TILES_Y 2 #define BALL_TILE_BYTES (BALL_TILES_X * BALL_TILES_Y * 32u) static const uint8_t gBallAuthored[16 * 8] = { 0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00, 0x02, 0x22, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20, 0x02, 0x23, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20, 0x22, 0x33, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x02, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x20, 0x02, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x20, 0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00, 0x00, 0x00, 0x00, 0x22, 0x22, 0x00, 0x00, 0x00 }; static uint8_t gBallTiles[BALL_TILE_BYTES]; static void buildBallSprite(void) { uint16_t tx; uint16_t ty; uint16_t row; uint16_t b; uint8_t *dst; for (ty = 0; ty < BALL_TILES_Y; ty++) { for (tx = 0; tx < BALL_TILES_X; tx++) { dst = &gBallTiles[(ty * BALL_TILES_X + tx) * 32u]; for (row = 0; row < 8; row++) { for (b = 0; b < 4; b++) { dst[row * 4 + b] = gBallAuthored[((ty * 8) + row) * 8 + (tx * 4) + b]; } } } } } // ----- Main ----- static void runAllTests(void) { jlLogF("UBER: ----- begin -----\n"); // Surface / palette / SCB. timeOp("jlSurfaceClear", op_surfaceClear); timeOp("jlPaletteSet", op_paletteSet); timeOp("jlScbSetRange", op_scbSetRange); // Drawing primitives. timeOp("jlDrawPixel", op_drawPixel); timeOp("jlDrawLine H", op_drawLineH); timeOp("jlDrawLine V", op_drawLineV); timeOp("jlDrawLine diag", op_drawLineDiag); timeOp("jlDrawRect 100x100", op_drawRect); timeOp("jlDrawCircle r=16", op_drawCircleSmall); timeOp("jlDrawCircle r=80", op_drawCircleLarge); timeOp("jlFillRect 16x16", op_fillRectSmall); timeOp("jlFillRect 80x80", op_fillRectMid); timeOp("jlFillRect 320x200", op_fillRectFull); timeOp("jlFillCircle r=40", op_fillCircle); timeOp("jlSamplePixel", op_samplePixel); // Tiles. Seed scratch tile + dest cells with non-zero pixels first. jlFillRect(gStage, 0, 0, 320, 64, 7); jlTileSnap(gStage, 5, 5, &gTileScratch); timeOp("jlTileFill", op_tileFill); timeOp("jlTileCopy", op_tileCopy); timeOp("jlTileCopyMasked", op_tileCopyMasked); timeOp("jlTilePaste", op_tilePaste); timeOp("jlTileSnap", op_tileSnap); // Sprites. Background must be non-empty so save-under has work // to do (otherwise it's a 4 KB memset of zeros, atypical). jlSurfaceClear(gStage, 4); timeOp("jlSpriteSaveUnder", op_spriteSave); timeOp("jlSpriteDraw", op_spriteDraw); timeOp("jlSpriteRestoreUnder", op_spriteRestore); timeOp("jlSpriteSaveAndDraw", op_spriteSaveAndDraw); // Present. One warm-up call before each timed loop primes any // per-port one-time setup (Amiga: copper list rebuild after the // jlPaletteSet / jlScbSetRange tests dirty the cache; without warm-up // the rebuild's MakeScreen + MrgCop + WaitTOF chain consumes the // entire 4-frame measurement window) so we measure steady-state // throughput rather than first-call penalty. jlStagePresent(); timeOp("jlStagePresent full", op_stagePresent); // Input. timeOp("jlInputPoll", op_inputPoll); timeOp("jlKeyDown", op_keyDown); timeOp("jlKeyPressed", op_keyPressed); timeOp("jlMouseX", op_mouseX); timeOp("joeyJoyConnected", op_joyConnected); // Audio. timeOp("jlAudioFrameTick", op_audioFrameTick); timeOp("jlAudioIsPlayingMod", op_audioIsPlaying); // Surface mark dirty (via jlFillRect's mark step). timeOp("surfaceMarkDirtyRect (via jlFillRect 32x32)", op_surfaceMarkDirty); jlLogF("UBER: ----- end -----\n"); } int main(void) { jlConfigT config; uint16_t pal[16]; int i; uint16_t startFrame; uint16_t endFrame; uint16_t elapsedFrames; unsigned long elapsedMs; /* 32 KB fits the 8 pre-shifted DRAW variants the Amiga planar * compiled sprite emitter generates. UL on the multiply because * ORCA-C's 16-bit int overflows on 32 * 1024. */ config.codegenBytes = 32UL * 1024; config.maxSurfaces = 4; config.audioBytes = 64UL * 1024; if (!jlInit(&config)) { return 1; } /* jlFrameCount is VBL-driven, so it only ticks after halInit * installed its VBL ISR -- captured here is "everything from now * to press-any-key". Pre-init setup time is small and not the * cost the user is chasing; runAllTests dominates. */ startFrame = jlFrameCount(); gStage = jlStageGet(); if (gStage == NULL) { jlShutdown(); return 1; } // A simple visible palette so users see SOMETHING during the run. for (i = 0; i < 16; i++) { pal[i] = (uint16_t)((i << 8) | (i << 4) | i); // grey ramp } pal[ 0] = 0x000; pal[ 1] = 0x800; // dark red (running) pal[ 2] = 0x080; // green (done) pal[ 3] = 0x008; // blue pal[ 5] = 0xFF0; // yellow (test pixels) pal[ 7] = 0xFFF; // white (fills) pal[15] = 0xF00; // red jlPaletteSet(gStage, 0, pal); jlScbSetRange(gStage, 0, 199, 0); // Indicate "running": red bar at top of screen. jlSurfaceClear(gStage, 0); jlFillRect(gStage, 0, 0, 320, 8, 1); jlStagePresent(); buildBallSprite(); gSprite = jlSpriteCreate(gBallTiles, BALL_TILES_X, BALL_TILES_Y); if (gSprite == NULL) { jlLog("UBER: jlSpriteCreate failed"); jlShutdown(); return 1; } // jlSpriteCompile is a one-shot. Time at frame resolution. { uint16_t before; jlWaitVBL(); before = jlFrameCount(); if (!jlSpriteCompile(gSprite)) { jlLog("UBER: jlSpriteCompile failed"); } while (jlFrameCount() == before) { /* wait for next VBL edge */ } jlLogF("UBER: jlSpriteCompile: 1 call in <= 1 frame\n"); } gBackup.bytes = gBackupBytes; // Audio: only init/shutdown is exercised. Triggering jlAudioPlaySfx // without first calling jlAudioPlayMod leaves NTP's engine in a // half-initialized state -- NTPstreamsound is designed to OVERLAY on // an already-running module. Without NTPprepare/NTPplay first, the // streamer oscillator is fired but no music tick ever advances or // silences it, and you get a stuck high-pitched scream. UBER doesn't // ship a MOD asset, so we skip the SFX exercise. The frame-tick and // isPlayingMod calls below still get timed (both are no-op fast // paths on IIgs). if (jlAudioInit()) { jlLogF("UBER: audioInit OK\n"); } else { jlLogF("UBER: audioInit failed (skipping audio)\n"); } // Reset stage + run all per-frame timed tests. jlSurfaceClear(gStage, 0); jlFillRect(gStage, 0, 0, 320, 8, 1); jlStagePresent(); runAllTests(); endFrame = jlFrameCount(); elapsedFrames = (uint16_t)(endFrame - startFrame); elapsedMs = ((unsigned long)elapsedFrames * 1000UL) / (unsigned long)jlFrameHz(); jlLogF("UBER: total wall time: %lu ms (%u frames @ %u Hz)\n", elapsedMs, elapsedFrames, (unsigned)jlFrameHz()); // Done. Green screen + waitForKey. jlSurfaceClear(gStage, 2); jlStagePresent(); jlLogF("UBER: press any key to exit\n"); jlWaitForAnyKey(); jlSpriteDestroy(gSprite); jlShutdown(); return 0; }