400 lines
15 KiB
C
400 lines
15 KiB
C
// Uber demo: exercise every JoeyLib public API and measure throughput
|
|
// of the per-frame-hot ones. Results are written to joeylog.txt via
|
|
// joeyLogF. A green screen on exit means the run completed.
|
|
//
|
|
// Timing model: each test aligns to a VBL boundary via joeyWaitVBL,
|
|
// records the starting joeyFrameCount, then runs the op in a tight
|
|
// loop polling joeyFrameCount until UBER_FRAMES frames have elapsed.
|
|
// Reported metric is ops/sec, computed as iters * joeyFrameHz() /
|
|
// UBER_FRAMES so results are directly comparable across ports
|
|
// regardless of CPU speed or VBL rate.
|
|
//
|
|
// joeyFrameCount is wall-clock-based per port; the per-iter poll
|
|
// adds ~10-30 cyc per op which shows up as noise on the very
|
|
// fastest ops but is below ~5% even for ~500 cyc/op work.
|
|
//
|
|
// One-shot ops (spriteCompile) get one call each, timed by frame
|
|
// delta -- coarser but representative.
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stddef.h>
|
|
|
|
#include <joey/joey.h>
|
|
|
|
|
|
// ----- Timing primitives -----
|
|
|
|
// 4-frame measurement window. Long enough that loop overhead doesn't
|
|
// dominate; short enough to keep the full demo run under ~10 sec.
|
|
/* 16 frames per timed op gives 4x the iter-count resolution of the
|
|
* earlier 4-frame budget. Exposes the actual per-op cost on slow
|
|
* ops where 4 frames produced the same iter count on different
|
|
* framerates -- e.g. drawCircle r=80 read as "4 iters / 4 frames"
|
|
* on both 60 Hz IIgs (16.7 ms/frame, 67 ms window) and 50 Hz Amiga
|
|
* (20 ms/frame, 80 ms window) even though per-op cost was equal,
|
|
* just because 4 ops at 16-17 ms happen to fit both windows. The
|
|
* 16-frame budget extends the windows to 267 ms / 320 ms; quantum
|
|
* gap shrinks to ~6%. Total run time scales 4x (~80 sec each). */
|
|
#define UBER_FRAMES 16u
|
|
|
|
|
|
typedef void (*OpFn)(void);
|
|
|
|
static const char *gCurName = "(none)";
|
|
static SurfaceT *gStage = NULL;
|
|
static SpriteT *gSprite = NULL;
|
|
static SpriteBackupT gBackup;
|
|
static unsigned char gBackupBytes[256];
|
|
|
|
static TileT gTileScratch;
|
|
|
|
|
|
// Run `op` in a tight loop until `targetFrames` joeyFrameCount ticks
|
|
// have elapsed. Returns iterations completed.
|
|
static unsigned long runForFrames(OpFn op, unsigned int targetFrames, uint16_t *actualFramesOut) {
|
|
unsigned long count;
|
|
uint16_t startFrame;
|
|
uint16_t endFrame;
|
|
|
|
count = 0UL;
|
|
|
|
joeyWaitVBL();
|
|
startFrame = joeyFrameCount();
|
|
|
|
while ((uint16_t)(joeyFrameCount() - startFrame) < targetFrames) {
|
|
op();
|
|
count++;
|
|
}
|
|
/* Capture the actual elapsed frames -- the last iter typically
|
|
* overruns the target. Using actual instead of target as the
|
|
* ops/sec divisor stays honest for ops slower than 1 frame
|
|
* (where count is forced low while real time stretches well
|
|
* past targetFrames). */
|
|
endFrame = joeyFrameCount();
|
|
*actualFramesOut = (uint16_t)(endFrame - startFrame);
|
|
if (*actualFramesOut == 0u) {
|
|
*actualFramesOut = 1u; /* defensive: avoid div-by-zero */
|
|
}
|
|
return count;
|
|
}
|
|
|
|
|
|
// Time and log one op. Reports iters / N frames AND the derived
|
|
// ops/sec so per-port results are directly comparable against IIgs
|
|
// regardless of CPU speed or display refresh rate. Also logs an
|
|
// FNV-1a hash of the surface state after timing -- this is the
|
|
// pixel-perfect comparison input for the cross-port validation
|
|
// harness (tools/diff-uber-hashes.py). Captured against IIgs as the
|
|
// golden reference; planar 68k rewrites validate by matching it.
|
|
static void timeOp(const char *name, OpFn op) {
|
|
unsigned long iters;
|
|
unsigned long opsPerSec;
|
|
uint16_t actualFrames;
|
|
uint32_t hash;
|
|
|
|
gCurName = name;
|
|
|
|
iters = runForFrames(op, UBER_FRAMES, &actualFrames);
|
|
|
|
if (iters == 0UL) {
|
|
joeyLogF("UBER: %s: 0 iters (op too slow?)\n", name);
|
|
return;
|
|
}
|
|
|
|
/* Divide by ACTUAL elapsed frames, not the target. For sub-frame
|
|
* ops actualFrames ~= UBER_FRAMES so the answer is unchanged;
|
|
* for ops that overrun (slow stagePresent etc.), this stops
|
|
* inflating ops/sec. */
|
|
opsPerSec = (iters * (unsigned long)joeyFrameHz()) / (unsigned long)actualFrames;
|
|
hash = surfaceHash(gStage);
|
|
joeyLogF("UBER: %s: %lu iters / %u frames = %lu ops/sec | hash=%08lX\n",
|
|
name, iters, actualFrames, opsPerSec, (unsigned long)hash);
|
|
}
|
|
|
|
|
|
|
|
|
|
// ----- Test ops -----
|
|
|
|
static void op_drawPixel (void) { drawPixel (gStage, 100, 100, 5); }
|
|
static void op_drawLineH (void) { drawLine (gStage, 0, 50, 319, 50, 5); }
|
|
static void op_drawLineV (void) { drawLine (gStage, 50, 0, 50, 199, 5); }
|
|
static void op_drawLineDiag (void) { drawLine (gStage, 0, 0, 319, 199, 5); }
|
|
static void op_drawRect (void) { drawRect (gStage, 10, 10, 100, 100, 5); }
|
|
static void op_drawCircleSmall (void) { drawCircle (gStage, 160, 100, 16, 5); }
|
|
static void op_drawCircleLarge (void) { drawCircle (gStage, 160, 100, 80, 5); }
|
|
static void op_fillRectSmall (void) { fillRect (gStage, 20, 20, 16, 16, 7); }
|
|
static void op_fillRectMid (void) { fillRect (gStage, 20, 20, 80, 80, 7); }
|
|
static void op_fillRectFull (void) { fillRect (gStage, 0, 0, 320, 200, 7); }
|
|
static void op_fillCircle (void) { fillCircle (gStage, 160, 100, 40, 7); }
|
|
static void op_samplePixel (void) { (void)samplePixel(gStage, 100, 100); }
|
|
static void op_surfaceClear (void) { surfaceClear (gStage, 0); }
|
|
|
|
static void op_paletteSet(void) {
|
|
static uint16_t colors[16] = {
|
|
0x000, 0xF00, 0x0F0, 0x00F, 0xFF0, 0xF0F, 0x0FF, 0xFFF,
|
|
0x800, 0x080, 0x008, 0x880, 0x808, 0x088, 0x888, 0x444
|
|
};
|
|
paletteSet(gStage, 0, colors);
|
|
}
|
|
static void op_scbSetRange (void) { scbSetRange (gStage, 0, 199, 0); }
|
|
|
|
static void op_tileFill (void) { tileFill (gStage, 5, 5, 7); }
|
|
static void op_tileCopy (void) { tileCopy (gStage, 6, 6, gStage, 5, 5); }
|
|
static void op_tileCopyMasked (void) { tileCopyMasked (gStage, 7, 7, gStage, 5, 5, 0); }
|
|
static void op_tilePaste (void) { tilePaste (gStage, 8, 8, &gTileScratch); }
|
|
static void op_tileSnap (void) { tileSnap (gStage, 5, 5, &gTileScratch); }
|
|
|
|
static int16_t gSpriteX = 40;
|
|
static int16_t gSpriteY = 30;
|
|
|
|
static void op_spriteSave (void) { spriteSaveUnder (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); }
|
|
static void op_spriteDraw (void) { spriteDraw (gStage, gSprite, gSpriteX, gSpriteY); }
|
|
static void op_spriteRestore (void) { spriteRestoreUnder(gStage, &gBackup); }
|
|
static void op_spriteSaveAndDraw (void) { spriteSaveAndDraw (gStage, gSprite, gSpriteX, gSpriteY, &gBackup); }
|
|
|
|
static void op_stagePresent (void) { stagePresent(); }
|
|
|
|
static void op_inputPoll (void) { joeyInputPoll(); }
|
|
static void op_keyDown (void) { (void)joeyKeyDown(KEY_A); }
|
|
static void op_keyPressed (void) { (void)joeyKeyPressed(KEY_A); }
|
|
static void op_mouseX (void) { (void)joeyMouseX(); }
|
|
static void op_joyConnected (void) { (void)joeyJoystickConnected(JOYSTICK_1); }
|
|
|
|
static void op_audioFrameTick (void) { joeyAudioFrameTick(); }
|
|
static void op_audioIsPlaying (void) { (void)joeyAudioIsPlayingMod(); }
|
|
|
|
static void op_surfaceMarkDirty(void) { /* drawPixel already marks; use fill instead */
|
|
fillRect(gStage, 0, 0, 32, 32, 0); }
|
|
|
|
|
|
// ----- Build the ball sprite procedurally -----
|
|
|
|
#define BALL_TILES_X 2
|
|
#define BALL_TILES_Y 2
|
|
#define BALL_TILE_BYTES (BALL_TILES_X * BALL_TILES_Y * 32u)
|
|
|
|
static const uint8_t gBallAuthored[16 * 8] = {
|
|
0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00,
|
|
0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00,
|
|
0x02, 0x22, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20,
|
|
0x02, 0x23, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20,
|
|
0x22, 0x33, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
|
0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22,
|
|
0x02, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x20,
|
|
0x02, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x20,
|
|
0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00,
|
|
0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00,
|
|
0x00, 0x00, 0x00, 0x22, 0x22, 0x00, 0x00, 0x00
|
|
};
|
|
static uint8_t gBallTiles[BALL_TILE_BYTES];
|
|
|
|
static void buildBallSprite(void) {
|
|
uint16_t tx;
|
|
uint16_t ty;
|
|
uint16_t row;
|
|
uint16_t b;
|
|
uint8_t *dst;
|
|
|
|
for (ty = 0; ty < BALL_TILES_Y; ty++) {
|
|
for (tx = 0; tx < BALL_TILES_X; tx++) {
|
|
dst = &gBallTiles[(ty * BALL_TILES_X + tx) * 32u];
|
|
for (row = 0; row < 8; row++) {
|
|
for (b = 0; b < 4; b++) {
|
|
dst[row * 4 + b] =
|
|
gBallAuthored[((ty * 8) + row) * 8 + (tx * 4) + b];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// ----- Main -----
|
|
|
|
static void runAllTests(void) {
|
|
joeyLogF("UBER: ----- begin -----\n");
|
|
|
|
// Surface / palette / SCB.
|
|
timeOp("surfaceClear", op_surfaceClear);
|
|
timeOp("paletteSet", op_paletteSet);
|
|
timeOp("scbSetRange", op_scbSetRange);
|
|
|
|
// Drawing primitives.
|
|
timeOp("drawPixel", op_drawPixel);
|
|
timeOp("drawLine H", op_drawLineH);
|
|
timeOp("drawLine V", op_drawLineV);
|
|
timeOp("drawLine diag", op_drawLineDiag);
|
|
timeOp("drawRect 100x100", op_drawRect);
|
|
timeOp("drawCircle r=16", op_drawCircleSmall);
|
|
timeOp("drawCircle r=80", op_drawCircleLarge);
|
|
timeOp("fillRect 16x16", op_fillRectSmall);
|
|
timeOp("fillRect 80x80", op_fillRectMid);
|
|
timeOp("fillRect 320x200", op_fillRectFull);
|
|
timeOp("fillCircle r=40", op_fillCircle);
|
|
timeOp("samplePixel", op_samplePixel);
|
|
|
|
// Tiles. Seed scratch tile + dest cells with non-zero pixels first.
|
|
fillRect(gStage, 0, 0, 320, 64, 7);
|
|
tileSnap(gStage, 5, 5, &gTileScratch);
|
|
timeOp("tileFill", op_tileFill);
|
|
timeOp("tileCopy", op_tileCopy);
|
|
timeOp("tileCopyMasked", op_tileCopyMasked);
|
|
timeOp("tilePaste", op_tilePaste);
|
|
timeOp("tileSnap", op_tileSnap);
|
|
|
|
// Sprites. Background must be non-empty so save-under has work
|
|
// to do (otherwise it's a 4 KB memset of zeros, atypical).
|
|
surfaceClear(gStage, 4);
|
|
timeOp("spriteSaveUnder", op_spriteSave);
|
|
timeOp("spriteDraw", op_spriteDraw);
|
|
timeOp("spriteRestoreUnder", op_spriteRestore);
|
|
timeOp("spriteSaveAndDraw", op_spriteSaveAndDraw);
|
|
|
|
// Present. One warm-up call before each timed loop primes any
|
|
// per-port one-time setup (Amiga: copper list rebuild after the
|
|
// paletteSet / scbSetRange tests dirty the cache; without warm-up
|
|
// the rebuild's MakeScreen + MrgCop + WaitTOF chain consumes the
|
|
// entire 4-frame measurement window) so we measure steady-state
|
|
// throughput rather than first-call penalty.
|
|
stagePresent();
|
|
timeOp("stagePresent full", op_stagePresent);
|
|
|
|
// Input.
|
|
timeOp("joeyInputPoll", op_inputPoll);
|
|
timeOp("joeyKeyDown", op_keyDown);
|
|
timeOp("joeyKeyPressed", op_keyPressed);
|
|
timeOp("joeyMouseX", op_mouseX);
|
|
timeOp("joeyJoyConnected", op_joyConnected);
|
|
|
|
// Audio.
|
|
timeOp("joeyAudioFrameTick", op_audioFrameTick);
|
|
timeOp("joeyAudioIsPlayingMod", op_audioIsPlaying);
|
|
|
|
// Surface mark dirty (via fillRect's mark step).
|
|
timeOp("surfaceMarkDirtyRect (via fillRect 32x32)", op_surfaceMarkDirty);
|
|
|
|
joeyLogF("UBER: ----- end -----\n");
|
|
}
|
|
|
|
|
|
int main(void) {
|
|
JoeyConfigT config;
|
|
uint16_t pal[16];
|
|
int i;
|
|
uint16_t startFrame;
|
|
uint16_t endFrame;
|
|
uint16_t elapsedFrames;
|
|
unsigned long elapsedMs;
|
|
|
|
/* 32 KB fits the 8 pre-shifted DRAW variants the Amiga planar
|
|
* compiled sprite emitter generates. UL on the multiply because
|
|
* ORCA-C's 16-bit int overflows on 32 * 1024. */
|
|
config.codegenBytes = 32UL * 1024;
|
|
config.maxSurfaces = 4;
|
|
config.audioBytes = 64UL * 1024;
|
|
config.assetBytes = 128UL * 1024;
|
|
|
|
if (!joeyInit(&config)) {
|
|
return 1;
|
|
}
|
|
/* joeyFrameCount is VBL-driven, so it only ticks after halInit
|
|
* installed its VBL ISR -- captured here is "everything from now
|
|
* to press-any-key". Pre-init setup time is small and not the
|
|
* cost the user is chasing; runAllTests dominates. */
|
|
startFrame = joeyFrameCount();
|
|
|
|
gStage = stageGet();
|
|
if (gStage == NULL) {
|
|
joeyShutdown();
|
|
return 1;
|
|
}
|
|
|
|
// A simple visible palette so users see SOMETHING during the run.
|
|
for (i = 0; i < 16; i++) {
|
|
pal[i] = (uint16_t)((i << 8) | (i << 4) | i); // grey ramp
|
|
}
|
|
pal[ 0] = 0x000;
|
|
pal[ 1] = 0x800; // dark red (running)
|
|
pal[ 2] = 0x080; // green (done)
|
|
pal[ 3] = 0x008; // blue
|
|
pal[ 5] = 0xFF0; // yellow (test pixels)
|
|
pal[ 7] = 0xFFF; // white (fills)
|
|
pal[15] = 0xF00; // red
|
|
paletteSet(gStage, 0, pal);
|
|
scbSetRange(gStage, 0, 199, 0);
|
|
|
|
// Indicate "running": red bar at top of screen.
|
|
surfaceClear(gStage, 0);
|
|
fillRect(gStage, 0, 0, 320, 8, 1);
|
|
stagePresent();
|
|
|
|
buildBallSprite();
|
|
gSprite = spriteCreate(gBallTiles, BALL_TILES_X, BALL_TILES_Y);
|
|
if (gSprite == NULL) {
|
|
joeyLog("UBER: spriteCreate failed");
|
|
joeyShutdown();
|
|
return 1;
|
|
}
|
|
// spriteCompile is a one-shot. Time at frame resolution.
|
|
{
|
|
uint16_t before;
|
|
|
|
joeyWaitVBL();
|
|
before = joeyFrameCount();
|
|
if (!spriteCompile(gSprite)) {
|
|
joeyLog("UBER: spriteCompile failed");
|
|
}
|
|
while (joeyFrameCount() == before) {
|
|
/* wait for next VBL edge */
|
|
}
|
|
joeyLogF("UBER: spriteCompile: 1 call in <= 1 frame\n");
|
|
}
|
|
gBackup.bytes = gBackupBytes;
|
|
|
|
// Audio: only init/shutdown is exercised. Triggering joeyAudioPlaySfx
|
|
// without first calling joeyAudioPlayMod leaves NTP's engine in a
|
|
// half-initialized state -- NTPstreamsound is designed to OVERLAY on
|
|
// an already-running module. Without NTPprepare/NTPplay first, the
|
|
// streamer oscillator is fired but no music tick ever advances or
|
|
// silences it, and you get a stuck high-pitched scream. UBER doesn't
|
|
// ship a MOD asset, so we skip the SFX exercise. The frame-tick and
|
|
// isPlayingMod calls below still get timed (both are no-op fast
|
|
// paths on IIgs).
|
|
if (joeyAudioInit()) {
|
|
joeyLogF("UBER: audioInit OK\n");
|
|
} else {
|
|
joeyLogF("UBER: audioInit failed (skipping audio)\n");
|
|
}
|
|
|
|
// Reset stage + run all per-frame timed tests.
|
|
surfaceClear(gStage, 0);
|
|
fillRect(gStage, 0, 0, 320, 8, 1);
|
|
stagePresent();
|
|
|
|
runAllTests();
|
|
|
|
endFrame = joeyFrameCount();
|
|
elapsedFrames = (uint16_t)(endFrame - startFrame);
|
|
elapsedMs = ((unsigned long)elapsedFrames * 1000UL) / (unsigned long)joeyFrameHz();
|
|
joeyLogF("UBER: total wall time: %lu ms (%u frames @ %u Hz)\n",
|
|
elapsedMs, elapsedFrames, (unsigned)joeyFrameHz());
|
|
|
|
// Done. Green screen + waitForKey.
|
|
surfaceClear(gStage, 2);
|
|
stagePresent();
|
|
|
|
joeyLogF("UBER: press any key to exit\n");
|
|
joeyWaitForAnyKey();
|
|
|
|
spriteDestroy(gSprite);
|
|
joeyShutdown();
|
|
return 0;
|
|
}
|