joeylib2/examples/uber/uber.c

399 lines
15 KiB
C

// Uber demo: exercise every JoeyLib public API and measure throughput
// of the per-frame-hot ones. Results are written to joeylog.txt via
// jlLogF. A green screen on exit means the run completed.
//
// Timing model: each test aligns to a VBL boundary via jlWaitVBL,
// records the starting jlFrameCount, then runs the op in a tight
// loop polling jlFrameCount until UBER_FRAMES frames have elapsed.
// Reported metric is ops/sec, computed as iters * jlFrameHz() /
// UBER_FRAMES so results are directly comparable across ports
// regardless of CPU speed or VBL rate.
//
// jlFrameCount is wall-clock-based per port; the per-iter poll
// adds ~10-30 cyc per op which shows up as noise on the very
// fastest ops but is below ~5% even for ~500 cyc/op work.
//
// One-shot ops (jlSpriteCompile) get one call each, timed by frame
// delta -- coarser but representative.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <joey/joey.h>
// ----- Timing primitives -----
// 4-frame measurement window. Long enough that loop overhead doesn't
// dominate; short enough to keep the full demo run under ~10 sec.
/* 16 frames per timed op gives 4x the iter-count resolution of the
* earlier 4-frame budget. Exposes the actual per-op cost on slow
* ops where 4 frames produced the same iter count on different
* framerates -- e.g. jlDrawCircle r=80 read as "4 iters / 4 frames"
* on both 60 Hz IIgs (16.7 ms/frame, 67 ms window) and 50 Hz Amiga
* (20 ms/frame, 80 ms window) even though per-op cost was equal,
* just because 4 ops at 16-17 ms happen to fit both windows. The
* 16-frame budget extends the windows to 267 ms / 320 ms; quantum
* gap shrinks to ~6%. Total run time scales 4x (~80 sec each). */
#define UBER_FRAMES 16u
typedef void (*OpFn)(void);
static const char *gCurName = "(none)";
static jlSurfaceT *gStage = NULL;
static jlSpriteT *gSprite = NULL;
static jlSpriteBackupT gBackup;
static unsigned char gBackupBytes[256];
static jlTileT gTileScratch;
// Run `op` in a tight loop until `targetFrames` jlFrameCount ticks
// have elapsed. Returns iterations completed.
static unsigned long runForFrames(OpFn op, unsigned int targetFrames, uint16_t *actualFramesOut) {
unsigned long count;
uint16_t startFrame;
uint16_t endFrame;
count = 0UL;
jlWaitVBL();
startFrame = jlFrameCount();
while ((uint16_t)(jlFrameCount() - startFrame) < targetFrames) {
op();
count++;
}
/* Capture the actual elapsed frames -- the last iter typically
* overruns the target. Using actual instead of target as the
* ops/sec divisor stays honest for ops slower than 1 frame
* (where count is forced low while real time stretches well
* past targetFrames). */
endFrame = jlFrameCount();
*actualFramesOut = (uint16_t)(endFrame - startFrame);
if (*actualFramesOut == 0u) {
*actualFramesOut = 1u; /* defensive: avoid div-by-zero */
}
return count;
}
// Time and log one op. Reports iters / N frames AND the derived
// ops/sec so per-port results are directly comparable against IIgs
// regardless of CPU speed or display refresh rate. Also logs an
// FNV-1a hash of the surface state after timing -- this is the
// pixel-perfect comparison input for the cross-port validation
// harness (tools/diff-uber-hashes.py). Captured against IIgs as the
// golden reference; planar 68k rewrites validate by matching it.
static void timeOp(const char *name, OpFn op) {
unsigned long iters;
unsigned long opsPerSec;
uint16_t actualFrames;
uint32_t hash;
gCurName = name;
iters = runForFrames(op, UBER_FRAMES, &actualFrames);
if (iters == 0UL) {
jlLogF("UBER: %s: 0 iters (op too slow?)\n", name);
return;
}
/* Divide by ACTUAL elapsed frames, not the target. For sub-frame
* ops actualFrames ~= UBER_FRAMES so the answer is unchanged;
* for ops that overrun (slow jlStagePresent etc.), this stops
* inflating ops/sec. */
opsPerSec = (iters * (unsigned long)jlFrameHz()) / (unsigned long)actualFrames;
hash = jlSurfaceHash(gStage);
jlLogF("UBER: %s: %lu iters / %u frames = %lu ops/sec | hash=%08lX\n",
name, iters, actualFrames, opsPerSec, (unsigned long)hash);
}
// ----- Test ops -----
static void op_drawPixel (void) { jlDrawPixel (gStage, 100, 100, 5); }
static void op_drawLineH (void) { jlDrawLine (gStage, 0, 50, 319, 50, 5); }
static void op_drawLineV (void) { jlDrawLine (gStage, 50, 0, 50, 199, 5); }
static void op_drawLineDiag (void) { jlDrawLine (gStage, 0, 0, 319, 199, 5); }
static void op_drawRect (void) { jlDrawRect (gStage, 10, 10, 100, 100, 5); }
static void op_drawCircleSmall (void) { jlDrawCircle (gStage, 160, 100, 16, 5); }
static void op_drawCircleLarge (void) { jlDrawCircle (gStage, 160, 100, 80, 5); }
static void op_fillRectSmall (void) { jlFillRect (gStage, 20, 20, 16, 16, 7); }
static void op_fillRectMid (void) { jlFillRect (gStage, 20, 20, 80, 80, 7); }
static void op_fillRectFull (void) { jlFillRect (gStage, 0, 0, 320, 200, 7); }
static void op_fillCircle (void) { jlFillCircle (gStage, 160, 100, 40, 7); }
static void op_samplePixel (void) { (void)jlSamplePixel(gStage, 100, 100); }
static void op_surfaceClear (void) { jlSurfaceClear (gStage, 0); }
static void op_paletteSet(void) {
static uint16_t colors[16] = {
0x000, 0xF00, 0x0F0, 0x00F, 0xFF0, 0xF0F, 0x0FF, 0xFFF,
0x800, 0x080, 0x008, 0x880, 0x808, 0x088, 0x888, 0x444
};
jlPaletteSet(gStage, 0, colors);
}
static void op_scbSetRange (void) { jlScbSetRange (gStage, 0, 199, 0); }
static void op_tileFill (void) { jlTileFill (gStage, 5, 5, 7); }
static void op_tileCopy (void) { jlTileCopy (gStage, 6, 6, gStage, 5, 5); }
static void op_tileCopyMasked (void) { jlTileCopyMasked (gStage, 7, 7, gStage, 5, 5, 0); }
static void op_tilePaste (void) { jlTilePaste (gStage, 8, 8, &gTileScratch); }
static void op_tileSnap (void) { jlTileSnap (gStage, 5, 5, &gTileScratch); }
static int16_t gSpriteX = 40;
static int16_t gSpriteY = 30;
static void op_spriteSave (void) { jlSpriteSaveUnder (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); }
static void op_spriteDraw (void) { jlSpriteDraw (gStage, gSprite, gSpriteX, gSpriteY); }
static void op_spriteRestore (void) { jlSpriteRestoreUnder(gStage, &gBackup); }
static void op_spriteSaveAndDraw (void) { jlSpriteSaveAndDraw (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); }
static void op_stagePresent (void) { jlStagePresent(); }
static void op_inputPoll (void) { jlInputPoll(); }
static void op_keyDown (void) { (void)jlKeyDown(KEY_A); }
static void op_keyPressed (void) { (void)jlKeyPressed(KEY_A); }
static void op_mouseX (void) { (void)jlMouseX(); }
static void op_joyConnected (void) { (void)jlJoystickConnected(JOYSTICK_1); }
static void op_audioFrameTick (void) { jlAudioFrameTick(); }
static void op_audioIsPlaying (void) { (void)jlAudioIsPlayingMod(); }
static void op_surfaceMarkDirty(void) { /* jlDrawPixel already marks; use fill instead */
jlFillRect(gStage, 0, 0, 32, 32, 0); }
// ----- Build the ball sprite procedurally -----
#define BALL_TILES_X 2
#define BALL_TILES_Y 2
#define BALL_TILE_BYTES (BALL_TILES_X * BALL_TILES_Y * 32u)
static const uint8_t gBallAuthored[16 * 8] = {
0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00,
0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00,
0x02, 0x22, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20,
0x02, 0x23, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20,
0x22, 0x33, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
0x02, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x20,
0x02, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x20,
0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00,
0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00,
0x00, 0x00, 0x00, 0x22, 0x22, 0x00, 0x00, 0x00
};
static uint8_t gBallTiles[BALL_TILE_BYTES];
static void buildBallSprite(void) {
uint16_t tx;
uint16_t ty;
uint16_t row;
uint16_t b;
uint8_t *dst;
for (ty = 0; ty < BALL_TILES_Y; ty++) {
for (tx = 0; tx < BALL_TILES_X; tx++) {
dst = &gBallTiles[(ty * BALL_TILES_X + tx) * 32u];
for (row = 0; row < 8; row++) {
for (b = 0; b < 4; b++) {
dst[row * 4 + b] =
gBallAuthored[((ty * 8) + row) * 8 + (tx * 4) + b];
}
}
}
}
}
// ----- Main -----
static void runAllTests(void) {
jlLogF("UBER: ----- begin -----\n");
// Surface / palette / SCB.
timeOp("jlSurfaceClear", op_surfaceClear);
timeOp("jlPaletteSet", op_paletteSet);
timeOp("jlScbSetRange", op_scbSetRange);
// Drawing primitives.
timeOp("jlDrawPixel", op_drawPixel);
timeOp("jlDrawLine H", op_drawLineH);
timeOp("jlDrawLine V", op_drawLineV);
timeOp("jlDrawLine diag", op_drawLineDiag);
timeOp("jlDrawRect 100x100", op_drawRect);
timeOp("jlDrawCircle r=16", op_drawCircleSmall);
timeOp("jlDrawCircle r=80", op_drawCircleLarge);
timeOp("jlFillRect 16x16", op_fillRectSmall);
timeOp("jlFillRect 80x80", op_fillRectMid);
timeOp("jlFillRect 320x200", op_fillRectFull);
timeOp("jlFillCircle r=40", op_fillCircle);
timeOp("jlSamplePixel", op_samplePixel);
// Tiles. Seed scratch tile + dest cells with non-zero pixels first.
jlFillRect(gStage, 0, 0, 320, 64, 7);
jlTileSnap(gStage, 5, 5, &gTileScratch);
timeOp("jlTileFill", op_tileFill);
timeOp("jlTileCopy", op_tileCopy);
timeOp("jlTileCopyMasked", op_tileCopyMasked);
timeOp("jlTilePaste", op_tilePaste);
timeOp("jlTileSnap", op_tileSnap);
// Sprites. Background must be non-empty so save-under has work
// to do (otherwise it's a 4 KB memset of zeros, atypical).
jlSurfaceClear(gStage, 4);
timeOp("jlSpriteSaveUnder", op_spriteSave);
timeOp("jlSpriteDraw", op_spriteDraw);
timeOp("jlSpriteRestoreUnder", op_spriteRestore);
timeOp("jlSpriteSaveAndDraw", op_spriteSaveAndDraw);
// Present. One warm-up call before each timed loop primes any
// per-port one-time setup (Amiga: copper list rebuild after the
// jlPaletteSet / jlScbSetRange tests dirty the cache; without warm-up
// the rebuild's MakeScreen + MrgCop + WaitTOF chain consumes the
// entire 4-frame measurement window) so we measure steady-state
// throughput rather than first-call penalty.
jlStagePresent();
timeOp("jlStagePresent full", op_stagePresent);
// Input.
timeOp("jlInputPoll", op_inputPoll);
timeOp("jlKeyDown", op_keyDown);
timeOp("jlKeyPressed", op_keyPressed);
timeOp("jlMouseX", op_mouseX);
timeOp("joeyJoyConnected", op_joyConnected);
// Audio.
timeOp("jlAudioFrameTick", op_audioFrameTick);
timeOp("jlAudioIsPlayingMod", op_audioIsPlaying);
// Surface mark dirty (via jlFillRect's mark step).
timeOp("surfaceMarkDirtyRect (via jlFillRect 32x32)", op_surfaceMarkDirty);
jlLogF("UBER: ----- end -----\n");
}
int main(void) {
jlConfigT config;
uint16_t pal[16];
int i;
uint16_t startFrame;
uint16_t endFrame;
uint16_t elapsedFrames;
unsigned long elapsedMs;
/* 32 KB fits the 8 pre-shifted DRAW variants the Amiga planar
* compiled sprite emitter generates. UL on the multiply because
* ORCA-C's 16-bit int overflows on 32 * 1024. */
config.codegenBytes = 32UL * 1024;
config.maxSurfaces = 4;
config.audioBytes = 64UL * 1024;
if (!jlInit(&config)) {
return 1;
}
/* jlFrameCount is VBL-driven, so it only ticks after halInit
* installed its VBL ISR -- captured here is "everything from now
* to press-any-key". Pre-init setup time is small and not the
* cost the user is chasing; runAllTests dominates. */
startFrame = jlFrameCount();
gStage = jlStageGet();
if (gStage == NULL) {
jlShutdown();
return 1;
}
// A simple visible palette so users see SOMETHING during the run.
for (i = 0; i < 16; i++) {
pal[i] = (uint16_t)((i << 8) | (i << 4) | i); // grey ramp
}
pal[ 0] = 0x000;
pal[ 1] = 0x800; // dark red (running)
pal[ 2] = 0x080; // green (done)
pal[ 3] = 0x008; // blue
pal[ 5] = 0xFF0; // yellow (test pixels)
pal[ 7] = 0xFFF; // white (fills)
pal[15] = 0xF00; // red
jlPaletteSet(gStage, 0, pal);
jlScbSetRange(gStage, 0, 199, 0);
// Indicate "running": red bar at top of screen.
jlSurfaceClear(gStage, 0);
jlFillRect(gStage, 0, 0, 320, 8, 1);
jlStagePresent();
buildBallSprite();
gSprite = jlSpriteCreate(gBallTiles, BALL_TILES_X, BALL_TILES_Y);
if (gSprite == NULL) {
jlLog("UBER: jlSpriteCreate failed");
jlShutdown();
return 1;
}
// jlSpriteCompile is a one-shot. Time at frame resolution.
{
uint16_t before;
jlWaitVBL();
before = jlFrameCount();
if (!jlSpriteCompile(gSprite)) {
jlLog("UBER: jlSpriteCompile failed");
}
while (jlFrameCount() == before) {
/* wait for next VBL edge */
}
jlLogF("UBER: jlSpriteCompile: 1 call in <= 1 frame\n");
}
gBackup.bytes = gBackupBytes;
// Audio: only init/shutdown is exercised. Triggering jlAudioPlaySfx
// without first calling jlAudioPlayMod leaves NTP's engine in a
// half-initialized state -- NTPstreamsound is designed to OVERLAY on
// an already-running module. Without NTPprepare/NTPplay first, the
// streamer oscillator is fired but no music tick ever advances or
// silences it, and you get a stuck high-pitched scream. UBER doesn't
// ship a MOD asset, so we skip the SFX exercise. The frame-tick and
// isPlayingMod calls below still get timed (both are no-op fast
// paths on IIgs).
if (jlAudioInit()) {
jlLogF("UBER: audioInit OK\n");
} else {
jlLogF("UBER: audioInit failed (skipping audio)\n");
}
// Reset stage + run all per-frame timed tests.
jlSurfaceClear(gStage, 0);
jlFillRect(gStage, 0, 0, 320, 8, 1);
jlStagePresent();
runAllTests();
endFrame = jlFrameCount();
elapsedFrames = (uint16_t)(endFrame - startFrame);
elapsedMs = ((unsigned long)elapsedFrames * 1000UL) / (unsigned long)jlFrameHz();
jlLogF("UBER: total wall time: %lu ms (%u frames @ %u Hz)\n",
elapsedMs, elapsedFrames, (unsigned)jlFrameHz());
// Done. Green screen + waitForKey.
jlSurfaceClear(gStage, 2);
jlStagePresent();
jlLogF("UBER: press any key to exit\n");
jlWaitForAnyKey();
jlSpriteDestroy(gSprite);
jlShutdown();
return 0;
}