From d023d76ebe103f7fe74520af3cf88038c7c66711 Mon Sep 17 00:00:00 2001 From: Scott Duensing Date: Sun, 26 Apr 2026 20:41:31 -0500 Subject: [PATCH] Compiled sprite back/restore working. --- examples/audio/audio.c | 98 ++++++++++-- examples/audio/test_assets.h | 169 --------------------- examples/sprite/sprite.c | 43 +++++- include/joey/sprite.h | 19 ++- make/amiga.mk | 16 +- make/atarist.mk | 16 +- make/dos.mk | 15 +- make/iigs.mk | 61 ++++---- scripts/run-amiga.sh | 5 + src/codegen/spriteCompile.c | 275 +++++++++++++++++++++++++++++++--- src/codegen/spriteEmitIigs.c | 95 +++++++++++- src/codegen/spriteEmitter.h | 11 ++ src/core/sprite.c | 87 ++++++++++- src/core/spriteInternal.h | 23 ++- src/port/amiga/c2p.s | 125 ++++++++++++++++ src/port/amiga/hal.c | 127 ++++++++++------ src/port/atarist/hal.c | 5 + src/port/dos/hal.c | 10 ++ src/port/iigs/audio_full.c | 46 +++--- src/port/iigs/hal.c | 11 ++ tools/joeysprite/joeysprite.c | 39 +++-- 21 files changed, 953 insertions(+), 343 deletions(-) delete mode 100644 examples/audio/test_assets.h create mode 100644 src/port/amiga/c2p.s diff --git a/examples/audio/audio.c b/examples/audio/audio.c index b1815ed..1f90f3e 100644 --- a/examples/audio/audio.c +++ b/examples/audio/audio.c @@ -1,19 +1,29 @@ -// Audio demo: starts an embedded .MOD on entry, triggers a short -// digital SFX every time SPACE is tapped, and exits on ESC. The MOD -// and SFX bytes live in test_assets.h, generated from assets/test.mod -// and assets/test.sfx by the audio-asset build pipeline. +// Audio demo: starts a .MOD on entry, triggers a short digital SFX +// every time SPACE is tapped, and exits on ESC. The MOD and SFX bytes +// are NOT embedded in the binary -- they're loaded at runtime from +// the DATA folder shipped on the disk image (see make/.mk for +// each platform's packaging step). // -// On platforms where the audio HAL is still a stub (DOS / ST / IIgs -// at the moment), joeyAudioInit returns false, every audio call is a -// quiet no-op, and the demo runs as a silent input loop -- you can -// confirm the build links and the input plumbing still works without -// hearing anything. +// On platforms where the audio HAL is still a stub, joeyAudioInit +// returns false, every audio call is a quiet no-op, and the demo runs +// as a silent input loop -- you can confirm the build links and the +// input plumbing still works without hearing anything. #include +#include #include -#include "test_assets.h" +// Each platform encodes the MOD differently. The IIgs build converts +// test.mod -> test.ntp via joeymod at build time and ships the .NTP; +// every other platform ships the raw .MOD. test.sfx is the same raw +// PCM blob everywhere. +#if defined(JOEYLIB_PLATFORM_IIGS) +# define TEST_MOD_PATH "DATA/TEST.NTP" +#else +# define TEST_MOD_PATH "DATA/test.mod" +#endif +#define TEST_SFX_PATH "DATA/test.sfx" #define SFX_SLOT 0 #define SFX_RATE_HZ 8000 @@ -28,6 +38,49 @@ #define BAR_H 16 +// Read an entire file into a freshly allocated buffer. Caller must +// free(*outBytes) once the audio engine has consumed the data. +// Returns false if the file is missing, empty, or larger than maxLen. +static bool loadFile(const char *path, uint32_t maxLen, uint8_t **outBytes, uint32_t *outLen) { + FILE *fp; + long fileSize; + uint8_t *buf; + size_t readBytes; + + fp = fopen(path, "rb"); + if (fp == NULL) { + return false; + } + if (fseek(fp, 0L, SEEK_END) != 0) { + fclose(fp); + return false; + } + fileSize = ftell(fp); + if (fileSize <= 0 || (uint32_t)fileSize > maxLen) { + fclose(fp); + return false; + } + if (fseek(fp, 0L, SEEK_SET) != 0) { + fclose(fp); + return false; + } + buf = (uint8_t *)malloc((size_t)fileSize); + if (buf == NULL) { + fclose(fp); + return false; + } + readBytes = fread(buf, 1, (size_t)fileSize, fp); + fclose(fp); + if (readBytes != (size_t)fileSize) { + free(buf); + return false; + } + *outBytes = buf; + *outLen = (uint32_t)fileSize; + return true; +} + + static void buildPalette(SurfaceT *screen) { uint16_t colors[SURFACE_COLORS_PER_PALETTE]; uint16_t i; @@ -59,6 +112,10 @@ int main(void) { SurfaceT *screen; bool audioOk; int16_t flashFrames; + uint8_t *modBytes; + uint32_t modLen; + uint8_t *sfxBytes; + uint32_t sfxLen; config.hostMode = HOST_MODE_TAKEOVER; config.codegenBytes = 8 * 1024; @@ -78,9 +135,21 @@ int main(void) { return 1; } + modBytes = NULL; + sfxBytes = NULL; + modLen = 0; + sfxLen = 0; + audioOk = joeyAudioInit(); if (audioOk) { - joeyAudioPlayMod(gTestMod, gTestMod_len, true); + if (loadFile(TEST_MOD_PATH, 64UL * 1024UL, &modBytes, &modLen)) { + joeyAudioPlayMod(modBytes, modLen, true); + // joeyAudioPlayMod copies the bytes into the engine's own + // buffer; safe to release ours immediately. + free(modBytes); + modBytes = NULL; + } + (void)loadFile(TEST_SFX_PATH, 64UL * 1024UL, &sfxBytes, &sfxLen); } buildPalette(screen); @@ -95,8 +164,8 @@ int main(void) { if (joeyKeyPressed(KEY_ESCAPE)) { break; } - if (joeyKeyPressed(KEY_SPACE)) { - joeyAudioPlaySfx(SFX_SLOT, gTestSfx, gTestSfx_len, SFX_RATE_HZ); + if (joeyKeyPressed(KEY_SPACE) && sfxBytes != NULL) { + joeyAudioPlaySfx(SFX_SLOT, sfxBytes, sfxLen, SFX_RATE_HZ); flashFrames = 8; } @@ -115,6 +184,9 @@ int main(void) { joeyAudioStopMod(); joeyAudioShutdown(); } + if (sfxBytes != NULL) { + free(sfxBytes); + } joeyShutdown(); return 0; } diff --git a/examples/audio/test_assets.h b/examples/audio/test_assets.h deleted file mode 100644 index ab09d16..0000000 --- a/examples/audio/test_assets.h +++ /dev/null @@ -1,169 +0,0 @@ -// Generated by examples/audio/build (do not edit by hand). -// Source assets: assets/test.mod, assets/test.sfx. -// -// .MOD is consumed by Amiga/DOS/ST audio HALs directly. The IIgs -// HAL takes a .NTP blob -- a separate header for the IIgs build -// will be generated by joeymod once the IIgs port is wired up. - -#ifndef JOEY_AUDIO_TEST_ASSETS_H -#define JOEY_AUDIO_TEST_ASSETS_H - -static const unsigned char gTestMod[] = { - 0x4a, 0x6f, 0x65, 0x79, 0x41, 0x75, 0x64, 0x69, 0x6f, 0x54, 0x65, 0x73, 0x74, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x73, 0x71, 0x75, 0x61, 0x72, 0x65, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x00, 0x40, 0x00, 0x00, - 0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x7f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4d, 0x2e, 0x4b, 0x2e, 0x03, 0x58, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xfa, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xa6, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x80, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03, 0x58, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xa6, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0x80, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xfa, 0x10, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x7f, 0x7f, 0x7f, - 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x80, 0x80, 0x80, 0x80, - 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, -}; -static const unsigned int gTestMod_len = 2140; - -static const unsigned char gTestSfx[] = { - 0x00, 0x31, 0x5a, 0x74, 0x7c, 0x71, 0x53, 0x29, 0xf9, 0xca, 0xa4, 0x8d, 0x88, 0x96, 0xb5, 0xdf, - 0x0d, 0x3a, 0x5d, 0x71, 0x73, 0x63, 0x43, 0x19, 0xec, 0xc2, 0xa2, 0x91, 0x92, 0xa4, 0xc4, 0xee, - 0x19, 0x41, 0x5e, 0x6c, 0x69, 0x55, 0x34, 0x0b, 0xe2, 0xbc, 0xa2, 0x97, 0x9d, 0xb2, 0xd3, 0xfb, - 0x23, 0x46, 0x5d, 0x65, 0x5d, 0x47, 0x26, 0x00, 0xd9, 0xb9, 0xa5, 0x9f, 0xa8, 0xc0, 0xe1, 0x06, - 0x2a, 0x48, 0x59, 0x5d, 0x52, 0x39, 0x18, 0xf5, 0xd3, 0xb8, 0xa9, 0xa8, 0xb5, 0xcd, 0xee, 0x10, - 0x30, 0x48, 0x54, 0x53, 0x45, 0x2c, 0x0d, 0xec, 0xcf, 0xb9, 0xaf, 0xb2, 0xc1, 0xda, 0xf9, 0x17, - 0x33, 0x46, 0x4e, 0x49, 0x39, 0x20, 0x03, 0xe6, 0xcd, 0xbc, 0xb6, 0xbc, 0xcd, 0xe5, 0x01, 0x1d, - 0x34, 0x42, 0x46, 0x3f, 0x2d, 0x15, 0xfb, 0xe1, 0xcd, 0xc0, 0xbe, 0xc7, 0xd8, 0xef, 0x08, 0x20, - 0x33, 0x3d, 0x3d, 0x34, 0x23, 0x0c, 0xf5, 0xdf, 0xcf, 0xc6, 0xc7, 0xd1, 0xe2, 0xf8, 0x0e, 0x21, - 0x30, 0x36, 0x34, 0x2a, 0x19, 0x04, 0xf0, 0xdf, 0xd2, 0xcd, 0xd1, 0xdb, 0xec, 0xff, 0x11, 0x21, - 0x2b, 0x2f, 0x2a, 0x20, 0x10, 0xff, 0xee, 0xe0, 0xd7, 0xd5, 0xda, 0xe5, 0xf4, 0x03, 0x12, 0x1f, - 0x26, 0x26, 0x21, 0x17, 0x09, 0xfb, 0xee, 0xe3, 0xde, 0xde, 0xe3, 0xed, 0xfa, 0x06, 0x12, 0x1b, - 0x1f, 0x1e, 0x18, 0x0f, 0x04, 0xf9, 0xef, 0xe8, 0xe5, 0xe6, 0xec, 0xf4, 0xfe, 0x07, 0x10, 0x15, - 0x17, 0x15, 0x10, 0x09, 0x00, 0xf9, 0xf2, 0xee, 0xed, 0xef, 0xf3, 0xfa, 0x00, 0x07, 0x0c, 0x0f, - 0x0f, 0x0d, 0x09, 0x04, 0xff, 0xfa, 0xf6, 0xf5, 0xf5, 0xf7, 0xfa, 0xfe, 0x01, 0x04, 0x07, 0x08, - 0x07, 0x06, 0x03, 0x01, 0xff, 0xfd, 0xfc, 0xfc, 0xfd, 0xfe, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, -}; -static const unsigned int gTestSfx_len = 256; - -#endif diff --git a/examples/sprite/sprite.c b/examples/sprite/sprite.c index bb1d37b..18d8e14 100644 --- a/examples/sprite/sprite.c +++ b/examples/sprite/sprite.c @@ -101,6 +101,14 @@ int main(void) { int16_t y; int16_t vx; int16_t vy; + int16_t oldX; + int16_t oldY; + uint16_t oldW; + uint16_t oldH; + int16_t unionX; + int16_t unionY; + int16_t unionRight; + int16_t unionBottom; bool haveBackup; config.hostMode = HOST_MODE_TAKEOVER; @@ -153,17 +161,26 @@ int main(void) { haveBackup = true; for (;;) { - joeyWaitVBL(); joeyInputPoll(); if (joeyKeyPressed(KEY_ESCAPE)) { break; } - // Restore the bytes that lived under the previous-frame ball, - // then move + redraw + present the new region. + // Stash the prior ball's region before restoring the bytes + // under it. Do all off-screen work (restore + move + draw) + // first, then waitVBL + ONE surfacePresentRect covering both + // old and new regions. Putting waitVBL immediately before the + // present lets the present land inside the VBL window so the + // CRT never sees a half-updated framebuffer (matters most on + // single-buffered chunky targets like IIgs SHR; on planar + // c2p platforms it also avoids c2p racing the raster). + oldX = backup.x; + oldY = backup.y; + oldW = backup.width; + oldH = backup.height; + if (haveBackup) { spriteRestoreUnder(screen, &backup); - surfacePresentRect(screen, backup.x, backup.y, backup.width, backup.height); } x = (int16_t)(x + vx); @@ -175,7 +192,23 @@ int main(void) { spriteSaveUnder(screen, ball, x, y, &backup); spriteDraw(screen, ball, x, y); - surfacePresentRect(screen, backup.x, backup.y, backup.width, backup.height); + + // Bounding box of (old rect) U (new rect). For typical + // small-step motion the rects overlap heavily so the union + // is barely larger than one ball. + unionX = (oldX < backup.x) ? oldX : backup.x; + unionY = (oldY < backup.y) ? oldY : backup.y; + unionRight = (int16_t)((oldX + oldW > backup.x + backup.width) + ? (oldX + oldW) + : (backup.x + backup.width)); + unionBottom = (int16_t)((oldY + oldH > backup.y + backup.height) + ? (oldY + oldH) + : (backup.y + backup.height)); + + joeyWaitVBL(); + surfacePresentRect(screen, unionX, unionY, + (uint16_t)(unionRight - unionX), + (uint16_t)(unionBottom - unionY)); haveBackup = true; } diff --git a/include/joey/sprite.h b/include/joey/sprite.h index 04e519f..32ed7bb 100644 --- a/include/joey/sprite.h +++ b/include/joey/sprite.h @@ -113,12 +113,19 @@ SpriteT *spriteCreateFromSurface(const SurfaceT *src, int16_t x, int16_t y, uint8_t widthTiles, uint8_t heightTiles, SpriteFlagsE flags); // Load a sprite from a `.spr` file produced by the host-side -// joeysprite tool or by spriteSaveFile. Format is target-native: -// 4-byte header (widthTiles, heightTiles, codeSize), then a fixed- -// size offsets table (JOEY_SPRITE_SHIFT_COUNT * 3 * uint16_t), then -// codeSize bytes of position-independent machine code. The runtime -// copies the code into the codegen arena, so the file's bytes can be -// freed once this returns. +// joeysprite tool or by spriteSaveFile. Format is target-native for +// the compiled-code section: +// byte 0 widthTiles +// byte 1 heightTiles +// bytes 2-3 codeSize (LE16) +// bytes 4-5 tileBytes (LE16) = widthTiles*heightTiles*32 +// ... offsets table (JOEY_SPRITE_SHIFT_COUNT * +// SPRITE_OP_COUNT * uint16_t LE) +// ... compiled code (codeSize bytes) +// ... raw tile data (tileBytes bytes; tile-major 4bpp) +// The runtime keeps both the compiled bytes (fast-path draws) and +// the tile data (interpreter clip path), so loaded sprites work for +// partially-off-surface draws without crashing. SpriteT *spriteLoadFile(const char *path, SpriteFlagsE flags); // Same as spriteLoadFile but parses bytes already in memory. diff --git a/make/amiga.mk b/make/amiga.mk index d14928b..9bd4a1a 100644 --- a/make/amiga.mk +++ b/make/amiga.mk @@ -65,8 +65,14 @@ SPRITE_BIN := $(BINDIR)/Sprite AUDIO_SRC := $(EXAMPLES)/audio/audio.c AUDIO_BIN := $(BINDIR)/Audio +# Game data lives under bin/DATA/, ready to be copied into the +# scratch JOEYLIB hard-drive dir staged by scripts/run-amiga.sh. +# audio.c fopens "DATA/test.mod" etc. relative to the boot volume. +DATA_DIR := $(BINDIR)/DATA +DATA_FILES := $(DATA_DIR)/test.mod $(DATA_DIR)/test.sfx + .PHONY: all amiga clean-amiga -all amiga: $(LIB) $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) +all amiga: $(LIB) $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(DATA_FILES) $(BUILD)/obj/core/%.o: $(SRC_CORE)/%.c @mkdir -p $(dir $@) @@ -120,5 +126,13 @@ $(AUDIO_BIN): $(AUDIO_SRC) $(LIB) @mkdir -p $(dir $@) $(AMIGA_CC) $(CFLAGS) $< $(LIB) -o $@ $(LDFLAGS) +$(DATA_DIR)/test.mod: $(REPO_DIR)/assets/test.mod + @mkdir -p $(DATA_DIR) + cp $< $@ + +$(DATA_DIR)/test.sfx: $(REPO_DIR)/assets/test.sfx + @mkdir -p $(DATA_DIR) + cp $< $@ + clean-amiga: rm -rf $(BUILD) diff --git a/make/atarist.mk b/make/atarist.mk index 6f8f32b..9e77bca 100644 --- a/make/atarist.mk +++ b/make/atarist.mk @@ -58,8 +58,14 @@ SPRITE_BIN := $(BINDIR)/SPRITE.PRG AUDIO_SRC := $(EXAMPLES)/audio/audio.c AUDIO_BIN := $(BINDIR)/AUDIO.PRG +# Game data lives under bin/DATA/, alongside the binaries Hatari picks +# up when bin/ is mounted as the GEMDOS C: drive. audio.c fopens +# "DATA/test.mod" etc. +DATA_DIR := $(BINDIR)/DATA +DATA_FILES := $(DATA_DIR)/test.mod $(DATA_DIR)/test.sfx + .PHONY: all atarist clean-atarist -all atarist: $(LIB) $(LIBXMP_AR) $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) +all atarist: $(LIB) $(LIBXMP_AR) $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(DATA_FILES) $(BUILD)/obj/core/%.o: $(SRC_CORE)/%.c @mkdir -p $(dir $@) @@ -121,5 +127,13 @@ $(AUDIO_BIN): $(AUDIO_SRC) $(LIB) @mkdir -p $(dir $@) $(ST_CC) $(CFLAGS) $< $(LIB) $(LIBXMP_AR) -o $@ $(LDFLAGS) +$(DATA_DIR)/test.mod: $(REPO_DIR)/assets/test.mod + @mkdir -p $(DATA_DIR) + cp $< $@ + +$(DATA_DIR)/test.sfx: $(REPO_DIR)/assets/test.sfx + @mkdir -p $(DATA_DIR) + cp $< $@ + clean-atarist: rm -rf $(BUILD) diff --git a/make/dos.mk b/make/dos.mk index 0ffa004..a1b808c 100644 --- a/make/dos.mk +++ b/make/dos.mk @@ -48,8 +48,13 @@ SPRITE_BIN := $(BINDIR)/SPRITE.EXE AUDIO_SRC := $(EXAMPLES)/audio/audio.c AUDIO_BIN := $(BINDIR)/AUDIO.EXE +# Game data lives under bin/DATA/, alongside the binaries DOSBox picks +# up when bin/ is mounted as C:. audio.c fopens "DATA/test.mod" etc. +DATA_DIR := $(BINDIR)/DATA +DATA_FILES := $(DATA_DIR)/test.mod $(DATA_DIR)/test.sfx + .PHONY: all dos clean-dos -all dos: $(LIB) $(LIBXMP_AR) $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) +all dos: $(LIB) $(LIBXMP_AR) $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(DATA_FILES) $(BUILD)/obj/core/%.o: $(SRC_CORE)/%.c @mkdir -p $(dir $@) @@ -109,5 +114,13 @@ $(AUDIO_BIN): $(AUDIO_SRC) $(LIB) $(DOS_CC) $(CFLAGS) $< $(LIB) $(LIBXMP_AR) -o $@ $(DOS_EMBED_DPMI) $@ +$(DATA_DIR)/test.mod: $(REPO_DIR)/assets/test.mod + @mkdir -p $(DATA_DIR) + cp $< $@ + +$(DATA_DIR)/test.sfx: $(REPO_DIR)/assets/test.sfx + @mkdir -p $(DATA_DIR) + cp $< $@ + clean-dos: rm -rf $(BUILD) diff --git a/make/iigs.mk b/make/iigs.mk index 8cc1214..4315b4c 100644 --- a/make/iigs.mk +++ b/make/iigs.mk @@ -45,6 +45,7 @@ LIB_SRCS_AUDIO := $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS_AUDIO) $(CODEGEN_SRCS) # load address even though it was assembled with `org $0F0000`. NTP_SRC := $(REPO_DIR)/toolchains/iigs/ntp/ninjatrackerplus.s NTP_BIN := $(BUILD)/audio/ntpplayer.bin +NTP_HEADER := $(BUILD)/audio/ntpplayer_data.h IIGS_MERLIN := $(REPO_DIR)/toolchains/iigs/merlin32/bin/merlin32 HELLO_SRC := $(EXAMPLES)/hello/hello.c @@ -76,7 +77,7 @@ IIX_INCLUDES := \ -I $(REPO_DIR)/src/codegen .PHONY: all iigs iigs-disk clean-iigs -all iigs: $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(NTP_BIN) +all iigs: $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(NTP_BIN): $(NTP_SRC) $(IIGS_MERLIN) @mkdir -p $(dir $@) @@ -84,6 +85,21 @@ $(NTP_BIN): $(NTP_SRC) $(IIGS_MERLIN) cd $(BUILD)/audio && $(IIGS_MERLIN) . ninjatrackerplus.s mv $(BUILD)/audio/ntpplayer $@ +# Bake the NTP replayer bytes into a C header so audio_full.c can link +# the player into the AUDIO binary instead of fopen'ing a separate +# NTPPLAYER.BIN at runtime. NTP is bank-internal / PIC, so the linked +# bytes still BlockMove cleanly into the Memory Manager handle the HAL +# allocates. Same xxd-i pattern as test_assets.h. +$(NTP_HEADER): $(NTP_BIN) + @mkdir -p $(dir $@) + @echo "// Generated by make/iigs.mk -- NinjaTrackerPlus replayer bytes." > $@ + @echo "#ifndef JOEYLIB_NTPPLAYER_DATA_H" >> $@ + @echo "#define JOEYLIB_NTPPLAYER_DATA_H" >> $@ + @printf "static const unsigned char gNtpPlayerBytes[] = {\n" >> $@ + @xxd -i < $(NTP_BIN) >> $@ + @printf "};\nstatic const unsigned int gNtpPlayerBytes_len = %d;\n" $$(wc -c < $(NTP_BIN)) >> $@ + @echo "#endif" >> $@ + # iix-build.sh takes MAIN.c first, then EXTRA sources (compiled with # #pragma noroot). The example source supplies main(); libjoey sources # are the extras. The chtyp post-step tags the output as GS/OS S16 @@ -118,15 +134,11 @@ $(SPRITE_BIN): $(SPRITE_SRC) $(LIB_SRCS) $(IIGS_BUILD) $(IIGS_BUILD) -b $(IIX_INCLUDES) -o $@ $(SPRITE_SRC) $(LIB_SRCS) $(IIGS_IIX) chtyp -t S16 $@ -# IIgs override of test_assets.h: gTestMod[] holds .NTP-converted bytes -# (NinjaTrackerPlus runtime format) instead of raw .MOD. The SFX bytes -# are platform-independent so we just embed the same test.sfx. -# -# .NTP conversion runs joeymod (which shells out to ntpconverter.php). -# If PHP is missing we skip the override entirely and the regular .MOD -# bytes get linked instead -- the IIgs binary still builds and the -# trampoline still calls into NTP, but NTPprepare will reject the MOD -# magic at runtime. Install php-cli to get real audio playback. +# Convert the cross-platform .MOD asset to NinjaTrackerPlus runtime +# format via joeymod (which shells out to ntpconverter.php). Without +# php-cli the conversion is skipped; in that case the IIgs disk just +# won't carry a TEST.NTP and the audio demo will report "missing data +# file" at runtime instead of crashing on bad MOD magic. HAVE_PHP := $(shell command -v php >/dev/null 2>&1 && echo 1) ifeq ($(HAVE_PHP),1) @@ -134,39 +146,24 @@ $(AUDIO_NTP): $(AUDIO_MOD) $(JOEYMOD) @mkdir -p $(dir $@) $(JOEYMOD) $< $@ -$(AUDIO_HEADER): $(AUDIO_NTP) $(AUDIO_SFX) - @mkdir -p $(dir $@) - @echo "// Generated by make/iigs.mk -- gTestMod is .NTP, gTestSfx is raw PCM." > $@ - @echo "#ifndef JOEY_AUDIO_TEST_ASSETS_H" >> $@ - @echo "#define JOEY_AUDIO_TEST_ASSETS_H" >> $@ - @printf "static const unsigned char gTestMod[] = {\n" >> $@ - @xxd -i < $(AUDIO_NTP) >> $@ - @printf "};\nstatic const unsigned int gTestMod_len = %d;\n" $$(wc -c < $(AUDIO_NTP)) >> $@ - @printf "static const unsigned char gTestSfx[] = {\n" >> $@ - @xxd -i < $(AUDIO_SFX) >> $@ - @printf "};\nstatic const unsigned int gTestSfx_len = %d;\n" $$(wc -c < $(AUDIO_SFX)) >> $@ - @echo "#endif" >> $@ - -AUDIO_HEADER_DEP := $(AUDIO_HEADER) -AUDIO_HEADER_FLAGS := -I $(dir $(AUDIO_HEADER)) +AUDIO_DATA_FILES := $(AUDIO_NTP) $(AUDIO_SFX) else -$(info iigs: php-cli not installed -- AUDIO demo will embed raw .MOD bytes; install php-cli for real IIgs audio playback) -AUDIO_HEADER_DEP := -AUDIO_HEADER_FLAGS := +$(info iigs: php-cli not installed -- AUDIO demo will ship without TEST.NTP; install php-cli for real IIgs audio playback) +AUDIO_DATA_FILES := $(AUDIO_SFX) endif -$(AUDIO_BIN): $(AUDIO_SRC) $(LIB_SRCS_AUDIO) $(AUDIO_HEADER_DEP) $(IIGS_BUILD) +$(AUDIO_BIN): $(AUDIO_SRC) $(LIB_SRCS_AUDIO) $(NTP_HEADER) $(IIGS_BUILD) @mkdir -p $(dir $@) - $(IIGS_BUILD) -b $(IIX_INCLUDES) $(AUDIO_HEADER_FLAGS) -I $(EXAMPLES)/audio -o $@ $(AUDIO_SRC) $(LIB_SRCS_AUDIO) + $(IIGS_BUILD) -b $(IIX_INCLUDES) -I $(dir $(NTP_HEADER)) -I $(EXAMPLES)/audio -o $@ $(AUDIO_SRC) $(LIB_SRCS_AUDIO) $(IIGS_IIX) chtyp -t S16 $@ # Assemble an 800KB ProDOS 2img containing the examples, ready to # mount in GSplus alongside a GS/OS boot volume. iigs-disk: $(DISK_IMG) -$(DISK_IMG): $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(NTP_BIN) $(IIGS_PACKAGE) +$(DISK_IMG): $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(AUDIO_DATA_FILES) $(IIGS_PACKAGE) @mkdir -p $(dir $@) - $(IIGS_PACKAGE) $@ $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(NTP_BIN) + $(IIGS_PACKAGE) $@ $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) -- $(AUDIO_DATA_FILES) clean-iigs: rm -rf $(BUILD) diff --git a/scripts/run-amiga.sh b/scripts/run-amiga.sh index 63e343c..a289743 100755 --- a/scripts/run-amiga.sh +++ b/scripts/run-amiga.sh @@ -62,6 +62,11 @@ cp "$bin_dir/Keys" "$work/" 2>/dev/null || true cp "$bin_dir/Joy" "$work/" 2>/dev/null || true cp "$bin_dir/Sprite" "$work/" 2>/dev/null || true cp "$bin_dir/Audio" "$work/" 2>/dev/null || true +# Stage the DATA folder (test.mod, test.sfx) the audio demo loads from +# the boot volume at runtime. +if [[ -d "$bin_dir/DATA" ]]; then + cp -r "$bin_dir/DATA" "$work/" +fi # ':' prefix anchors to the root of the current volume; otherwise # AmigaDOS looks in C: and the command is not found. echo ":$file" > "$work/s/startup-sequence" diff --git a/src/codegen/spriteCompile.c b/src/codegen/spriteCompile.c index 44ab902..d87bf2a 100644 --- a/src/codegen/spriteCompile.c +++ b/src/codegen/spriteCompile.c @@ -42,10 +42,53 @@ static uint16_t emitDrawForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift } +// Save-under and restore-under emitters are IIgs-only at the moment; +// other CPUs return 0, the runtime treats that as "not compiled" and +// falls back to spriteSaveUnderInterpreted / spriteRestoreUnderInterpreted. +static uint16_t emitSaveForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) { +#if defined(JOEYLIB_PLATFORM_IIGS) + return spriteEmitSaveIigs(out, sp, shift); +#else + (void)out; (void)sp; (void)shift; + return 0; +#endif +} + + +static uint16_t emitRestoreForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) { +#if defined(JOEYLIB_PLATFORM_IIGS) + return spriteEmitRestoreIigs(out, sp, shift); +#else + (void)out; (void)sp; (void)shift; + return 0; +#endif +} + + +// Sizing pass: returns total bytes the emitters will produce for +// this sprite's DRAW (per shift) + SAVE (per shift) + RESTORE (per +// shift). Emitters that aren't implemented for the current platform +// return 0 here, so totalSize tracks only the ops that will actually +// land in the arena. +static uint32_t emitTotalSize(uint8_t *scratch, const SpriteT *sp) { + uint32_t total; + uint8_t shift; + + total = 0; + for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { + total += emitDrawForTarget(scratch, sp, shift); + total += emitSaveForTarget(scratch, sp, shift); + total += emitRestoreForTarget(scratch, sp, shift); + } + return total; +} + + bool spriteCompile(SpriteT *sp) { uint8_t *scratch; uint32_t totalSize; uint8_t shift; + uint8_t op; ArenaSlotT *slot; uint8_t *dst; uint16_t written; @@ -66,12 +109,7 @@ bool spriteCompile(SpriteT *sp) { return false; } - totalSize = 0; - for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { - written = emitDrawForTarget(scratch, sp, shift); - totalSize += written; - } - + totalSize = emitTotalSize(scratch, sp); if (totalSize > 0xFFFFu) { free(scratch); return false; @@ -86,11 +124,20 @@ bool spriteCompile(SpriteT *sp) { dst = codegenArenaBase() + slot->offset; offset = 0; for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { - written = emitDrawForTarget(dst + offset, sp, shift); - sp->routineOffsets[shift][SPRITE_OP_DRAW] = offset; - sp->routineOffsets[shift][SPRITE_OP_SAVE] = 0; - sp->routineOffsets[shift][SPRITE_OP_RESTORE] = 0; - offset = (uint16_t)(offset + written); + for (op = 0; op < SPRITE_OP_COUNT; op++) { + switch (op) { + case SPRITE_OP_DRAW: written = emitDrawForTarget (dst + offset, sp, shift); break; + case SPRITE_OP_SAVE: written = emitSaveForTarget (dst + offset, sp, shift); break; + case SPRITE_OP_RESTORE: written = emitRestoreForTarget(dst + offset, sp, shift); break; + default: written = 0; break; + } + if (written == 0) { + sp->routineOffsets[shift][op] = SPRITE_NOT_COMPILED; + } else { + sp->routineOffsets[shift][op] = offset; + offset = (uint16_t)(offset + written); + } + } } sp->slot = slot; free(scratch); @@ -166,20 +213,191 @@ void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) gSpriteCallStub[12] = 0xAB; gSpriteCallStub[13] = 0x6B; - // ORCA-C compiles this function under `longa on` (M=16) and emits - // the function epilogue assuming M=16 at exit -- the deallocation - // ADC takes a 2-byte immediate. The byte writes to gSpriteCallStub - // above leave M=8, so PHP captured M=8 and PLP would restore M=8. - // That mode mismatch caused the epilogue's `ADC #imm; TCS` bytes - // to be re-decoded as a wider ADC swallowing the TCS, S never - // adjusted, RTL popped the wrong bytes, control fell into BSS and - // BRK'd. Use REP/SEP without PHP/PLP and explicitly restore M=16 - // before returning to compiled C. + // ORCA-C compiles this function under `longa on / longi on` + // (M=16, X=16) and emits the function epilogue assuming those + // widths at exit -- the deallocation ADC takes a 2-byte immediate + // and any LDX/LDY use 2-byte immediates. The byte writes to + // gSpriteCallStub above leave M=8, and an earlier PHP/PLP-only + // wrapper let the asm block exit in the wrong M state. The + // epilogue's `ADC #imm; TCS` then decoded as a wider ADC that + // swallowed the TCS, S was never adjusted, RTL popped wrong + // bytes, control fell into BSS, and the IIgs hit BRK on a zero + // byte. Force M=16/X=16 before returning to compiled C. asm { rep #0x30 sep #0x20 jsl gSpriteCallStub - rep #0x20 + rep #0x30 + } +} + + +// Save/Restore call stub. The compiled MVN-row routines need +// M=16 / X=16 and expect index registers preset to source/dest +// offsets within their respective banks. MVN's own bank operands +// are in the routine bytes (patched per call), so this stub doesn't +// need to load DBR -- it just sets X and Y, JSLs, and restores DBR +// (MVN itself sets DBR to its destination bank as a side effect). +// +// Stub layout (13 bytes): +// 00: 8B PHB ; save caller DBR +// 01: A2 lo hi LDX #srcOffset +// 04: A0 lo hi LDY #dstOffset +// 07: 22 lo mid bk JSL routine +// 0B: AB PLB ; restore caller DBR +// 0C: 6B RTL +// +// For SAVE: X = screen lo, Y = backup lo +// For RESTORE: X = backup lo, Y = screen lo +static unsigned char gSpriteCopyStub[13]; + + +// patchMvnBanks stamps the destination and source bank operand bytes +// into each MVN inside an emitted save/restore routine. Layout from +// spriteEmitIigs.c::emitMvnCopyRoutine: +// row 0 (6 bytes): A9 lo hi 54 db sb +// row R (12 bytes, R>=1): 8A/98 18 69 lo hi AA/A8 A9 lo hi 54 db sb +// end (1 byte): 6B +// MVN dstbk is at offset (12*R + 4); srcbk at (12*R + 5). +static void patchMvnBanks(uint8_t *routine, uint16_t heightPx, uint8_t dstBank, uint8_t srcBank) { + uint16_t r; + + for (r = 0; r < heightPx; r++) { + routine[12u * r + 4u] = dstBank; + routine[12u * r + 5u] = srcBank; + } +} + + +// Common helper: dump a 24-bit pointer's raw bytes via memcpy +// (avoiding ORCA-C's lossy (uint32_t) pointer cast under memorymodel +// 1) and split into low 16 bits + bank. +static void splitPointer(const void *ptr, uint16_t *outLo, uint8_t *outBank) { + uint8_t bytes[4]; + uint32_t addr; + + memcpy(bytes, &ptr, 4); + addr = (uint32_t)bytes[0] + | ((uint32_t)bytes[1] << 8) + | ((uint32_t)bytes[2] << 16); + *outLo = (uint16_t)(addr & 0xFFFFu); + *outBank = (uint8_t)((addr >> 16) & 0xFFu); +} + + +void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup) { + uint8_t shift; + int16_t clippedX; + uint16_t widthPx; + uint16_t heightPx; + uint16_t copyBytes; + uint16_t screenLo; + uint16_t backupLo; + uint8_t screenBank; + uint8_t backupBank; + uint32_t fnAddr; + uint8_t *routine; + uint8_t *screenPtr; + + shift = (uint8_t)(x & 1); + clippedX = (int16_t)(x & ~1); + widthPx = (uint16_t)(sp->widthTiles * 8); + heightPx = (uint16_t)(sp->heightTiles * 8); + copyBytes = (uint16_t)((widthPx >> 1) + (shift == 1 ? 1 : 0)); + + screenPtr = (uint8_t *)&src->pixels[(uint16_t)y * SURFACE_BYTES_PER_ROW + ((uint16_t)clippedX >> 1)]; + splitPointer(screenPtr, &screenLo, &screenBank); + splitPointer(backup->bytes, &backupLo, &backupBank); + + backup->sprite = sp; + backup->x = clippedX; + backup->y = y; + backup->width = (uint16_t)(copyBytes << 1); + backup->height = heightPx; + backup->sizeBytes = (uint16_t)(copyBytes * heightPx); + + fnAddr = codegenArenaBaseAddr() + + sp->slot->offset + + (uint32_t)sp->routineOffsets[shift][SPRITE_OP_SAVE]; + + // Stub: X = screen (source), Y = backup (destination). + gSpriteCopyStub[ 0] = 0x8B; + gSpriteCopyStub[ 1] = 0xA2; + gSpriteCopyStub[ 2] = (unsigned char)(screenLo & 0xFFu); + gSpriteCopyStub[ 3] = (unsigned char)((screenLo >> 8) & 0xFFu); + gSpriteCopyStub[ 4] = 0xA0; + gSpriteCopyStub[ 5] = (unsigned char)(backupLo & 0xFFu); + gSpriteCopyStub[ 6] = (unsigned char)((backupLo >> 8) & 0xFFu); + gSpriteCopyStub[ 7] = 0x22; + gSpriteCopyStub[ 8] = (unsigned char)(fnAddr & 0xFFu); + gSpriteCopyStub[ 9] = (unsigned char)((fnAddr >> 8) & 0xFFu); + gSpriteCopyStub[10] = (unsigned char)((fnAddr >> 16) & 0xFFu); + gSpriteCopyStub[11] = 0xAB; + gSpriteCopyStub[12] = 0x6B; + + routine = codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_SAVE]; + patchMvnBanks(routine, heightPx, /*dst*/backupBank, /*src*/screenBank); + + // MVN-based routine: needs M=16 / X=16; restore M=16 on exit + // matches ORCA-C `longa on` epilogue expectations. + asm { + rep #0x30 + jsl gSpriteCopyStub + rep #0x30 + } +} + + +void spriteCompiledRestoreUnder(SurfaceT *dst, const SpriteBackupT *backup) { + uint8_t shift; + uint16_t heightPx; + uint16_t copyBytes; + uint16_t spriteBytesPerRow; + uint16_t screenLo; + uint16_t backupLo; + uint8_t screenBank; + uint8_t backupBank; + uint32_t fnAddr; + uint8_t *routine; + uint8_t *screenPtr; + SpriteT *sp; + + sp = backup->sprite; + heightPx = backup->height; + copyBytes = (uint16_t)(backup->width >> 1); + spriteBytesPerRow = (uint16_t)(sp->widthTiles * 4); + shift = (copyBytes == spriteBytesPerRow) ? 0 : 1; + + screenPtr = (uint8_t *)&dst->pixels[(uint16_t)backup->y * SURFACE_BYTES_PER_ROW + ((uint16_t)backup->x >> 1)]; + splitPointer(screenPtr, &screenLo, &screenBank); + splitPointer(backup->bytes, &backupLo, &backupBank); + + fnAddr = codegenArenaBaseAddr() + + sp->slot->offset + + (uint32_t)sp->routineOffsets[shift][SPRITE_OP_RESTORE]; + + // Stub: X = backup (source), Y = screen (destination). + gSpriteCopyStub[ 0] = 0x8B; + gSpriteCopyStub[ 1] = 0xA2; + gSpriteCopyStub[ 2] = (unsigned char)(backupLo & 0xFFu); + gSpriteCopyStub[ 3] = (unsigned char)((backupLo >> 8) & 0xFFu); + gSpriteCopyStub[ 4] = 0xA0; + gSpriteCopyStub[ 5] = (unsigned char)(screenLo & 0xFFu); + gSpriteCopyStub[ 6] = (unsigned char)((screenLo >> 8) & 0xFFu); + gSpriteCopyStub[ 7] = 0x22; + gSpriteCopyStub[ 8] = (unsigned char)(fnAddr & 0xFFu); + gSpriteCopyStub[ 9] = (unsigned char)((fnAddr >> 8) & 0xFFu); + gSpriteCopyStub[10] = (unsigned char)((fnAddr >> 16) & 0xFFu); + gSpriteCopyStub[11] = 0xAB; + gSpriteCopyStub[12] = 0x6B; + + routine = codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_RESTORE]; + patchMvnBanks(routine, heightPx, /*dst*/screenBank, /*src*/backupBank); + + asm { + rep #0x30 + jsl gSpriteCopyStub + rep #0x30 } } @@ -197,4 +415,19 @@ void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) fn(destRow); } + +// Non-IIgs platforms have no compiled save/restore yet. The dispatch +// in src/core/sprite.c gates on routineOffsets[shift][SPRITE_OP_*] != +// SPRITE_NOT_COMPILED, so these stubs should never actually run on +// those platforms; they exist so spriteInternal.h's prototypes stay +// resolved at link time. +void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup) { + (void)src; (void)sp; (void)x; (void)y; (void)backup; +} + + +void spriteCompiledRestoreUnder(SurfaceT *dst, const SpriteBackupT *backup) { + (void)dst; (void)backup; +} + #endif diff --git a/src/codegen/spriteEmitIigs.c b/src/codegen/spriteEmitIigs.c index adce9f0..6b1d93f 100644 --- a/src/codegen/spriteEmitIigs.c +++ b/src/codegen/spriteEmitIigs.c @@ -44,8 +44,9 @@ // ----- Prototypes ----- -static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col); +static uint16_t emitMvnCopyRoutine(uint8_t *out, uint16_t heightPx, uint16_t copyBytes, bool advanceX); static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask); +static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col); static uint16_t writeLE16(uint8_t *out, uint16_t value); @@ -123,6 +124,98 @@ static uint16_t writeLE16(uint8_t *out, uint16_t value) { } +// Common backbone for save and restore. Both ops copy a byte-aligned +// rectangle row-by-row using MVN; only the operand banks (which buffer +// is source vs destination) and which index register (X or Y) needs +// per-row advance differ. +// +// The IIgs supplies a single MVN instruction that copies (count + 1) +// bytes from srcBank:X to dstBank:Y, advancing X and Y as it goes. +// We unroll the row loop: +// +// row 0: row 1..H-1: +// LDA #(copyBytes-1) T_A (TXA for save / TYA for restore) +// MVN dstbk,srcbk CLC +// ADC #(rowStride - copyBytes) +// TA_ (TAX for save / TAY for restore) +// LDA #(copyBytes-1) +// MVN dstbk,srcbk +// end: +// RTL +// +// The bank operand bytes (offset 4 and 5 within each MVN row) are +// patched per call by spriteCompiledSaveUnder / spriteCompiledRestoreUnder +// so the routine works regardless of where the surface and backup +// happen to live in memory. +// +// Layout: +// row 0 (6 bytes): A9 lo hi 54 db sb +// row R (12 bytes, R>=1): 8A/98 18 69 lo hi AA/A8 A9 lo hi 54 db sb +// end (1 byte): 6B +// +// The MVN at row R has its dstbk at routine offset (12*R + 4) and +// srcbk at (12*R + 5). +static uint16_t emitMvnCopyRoutine(uint8_t *out, uint16_t heightPx, uint16_t copyBytes, bool advanceX) { + uint16_t cursor; + uint16_t advance; + uint16_t row; + + cursor = 0; + advance = (uint16_t)(SURFACE_BYTES_PER_ROW - copyBytes); + + for (row = 0; row < heightPx; row++) { + if (row > 0) { + out[cursor++] = advanceX ? 0x8A : 0x98; // TXA / TYA + out[cursor++] = 0x18; // CLC + out[cursor++] = 0x69; // ADC #imm (M=16) + cursor += writeLE16(out + cursor, advance); + out[cursor++] = advanceX ? 0xAA : 0xA8; // TAX / TAY + } + out[cursor++] = 0xA9; // LDA #imm (M=16) + cursor += writeLE16(out + cursor, (uint16_t)(copyBytes - 1)); + out[cursor++] = 0x54; // MVN + out[cursor++] = 0x00; // dstbk -- patched per call + out[cursor++] = 0x00; // srcbk -- patched per call + } + out[cursor++] = 0x6B; // RTL + return cursor; +} + + +// SAVE (screen -> backup). Stub passes X = screen low offset (the +// source), Y = backup low offset (the destination); MVN advances both +// by copyBytes per row. Backup rows are contiguous in memory so Y is +// already correct for the next row; screen rows are SURFACE_BYTES_PER_ROW +// apart so X needs an explicit ADC between rows. +uint16_t spriteEmitSaveIigs(uint8_t *out, const SpriteT *sp, uint8_t shift) { + uint16_t heightPx; + uint16_t spriteBytesPerRow; + uint16_t copyBytes; + + heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS); + spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW); + copyBytes = (uint16_t)(spriteBytesPerRow + (shift == 1 ? 1 : 0)); + + return emitMvnCopyRoutine(out, heightPx, copyBytes, /*advanceX*/true); +} + + +// RESTORE (backup -> screen). Stub passes X = backup low offset, +// Y = screen low offset. Backup is contiguous so X advances correctly +// via MVN; screen needs explicit advance, so Y is the one we ADC. +uint16_t spriteEmitRestoreIigs(uint8_t *out, const SpriteT *sp, uint8_t shift) { + uint16_t heightPx; + uint16_t spriteBytesPerRow; + uint16_t copyBytes; + + heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS); + spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW); + copyBytes = (uint16_t)(spriteBytesPerRow + (shift == 1 ? 1 : 0)); + + return emitMvnCopyRoutine(out, heightPx, copyBytes, /*advanceX*/false); +} + + // 65816 draw emit. Returns bytes written. uint16_t spriteEmitDrawIigs(uint8_t *out, const SpriteT *sp, uint8_t shift) { uint16_t cursor; diff --git a/src/codegen/spriteEmitter.h b/src/codegen/spriteEmitter.h index 940295b..8a8955f 100644 --- a/src/codegen/spriteEmitter.h +++ b/src/codegen/spriteEmitter.h @@ -23,4 +23,15 @@ uint16_t spriteEmitDrawX86 (uint8_t *out, const SpriteT *sp, uint8_t shift); uint16_t spriteEmitDraw68k (uint8_t *out, const SpriteT *sp, uint8_t shift); uint16_t spriteEmitDrawIigs(uint8_t *out, const SpriteT *sp, uint8_t shift); +// Save-under and restore-under emitters. Both copy a byte-aligned +// rectangle between the destination surface and a backup buffer. The +// rectangle's width and start position depend on the shift: for +// shift=0 (even x) it covers exactly the sprite's bytes per row; +// for shift=1 (odd x) it covers one extra byte on each side, rounded +// up to even. Per-CPU emitters return 0 to mean "not implemented" -- +// the runtime dispatch falls back to the interpreted path in that +// case. +uint16_t spriteEmitSaveIigs (uint8_t *out, const SpriteT *sp, uint8_t shift); +uint16_t spriteEmitRestoreIigs(uint8_t *out, const SpriteT *sp, uint8_t shift); + #endif diff --git a/src/core/sprite.c b/src/core/sprite.c index 0d3a1b2..fdd1257 100644 --- a/src/core/sprite.c +++ b/src/core/sprite.c @@ -287,8 +287,27 @@ void spritePrewarm(SpriteT *sp) { } -// .spr header is 4 bytes: widthTiles, heightTiles, codeSize lo/hi. -#define SPR_HEADER_SIZE 4 +// .spr file format: +// offset bytes field +// ------ ----- -------------------------------------------- +// 0 1 widthTiles +// 1 1 heightTiles +// 2 2 codeSize (LE16) +// 4 2 tileBytes (LE16) = widthTiles*heightTiles*32 +// 6 ... offsets table (JOEY_SPRITE_SHIFT_COUNT * +// SPRITE_OP_COUNT * uint16_t) +// ... codeSize compiled code +// ... tileBytes raw tile data (4bpp packed, tile-major) +// +// The tile data tail lets spriteDrawInterpreted handle partial-clip +// draws without dereferencing a NULL tileData. Without it, +// spriteLoadFile / spriteFromCompiledMem could only produce sprites +// that survive on the fully-on-surface fast path. +// +// File-extension-typed: callers are responsible for opening only +// `.spr` files. The format carries no magic on purpose -- every byte +// counts on retro targets. +#define SPR_HEADER_SIZE 6 #define SPR_OFFSETS_SIZE (JOEY_SPRITE_SHIFT_COUNT * SPRITE_OP_COUNT * (uint32_t)sizeof(uint16_t)) @@ -298,8 +317,12 @@ SpriteT *spriteFromCompiledMem(const uint8_t *data, uint32_t length, SpriteFlags uint8_t widthTiles; uint8_t heightTiles; uint16_t codeSize; + uint16_t tileBytes; + uint32_t expectedBytes; const uint8_t *offsetTable; const uint8_t *code; + const uint8_t *tiles; + uint8_t *tileBuf; uint16_t shift; uint16_t op; uint16_t o; @@ -310,27 +333,41 @@ SpriteT *spriteFromCompiledMem(const uint8_t *data, uint32_t length, SpriteFlags widthTiles = data[0]; heightTiles = data[1]; codeSize = (uint16_t)(data[2] | ((uint16_t)data[3] << 8)); + tileBytes = (uint16_t)(data[4] | ((uint16_t)data[5] << 8)); if (widthTiles == 0 || heightTiles == 0 || codeSize == 0) { return NULL; } - if (length < SPR_HEADER_SIZE + SPR_OFFSETS_SIZE + (uint32_t)codeSize) { + if (tileBytes != (uint16_t)(widthTiles * heightTiles * 32u)) { + return NULL; + } + expectedBytes = SPR_HEADER_SIZE + SPR_OFFSETS_SIZE + (uint32_t)codeSize + (uint32_t)tileBytes; + if (length < expectedBytes) { return NULL; } offsetTable = data + SPR_HEADER_SIZE; code = data + SPR_HEADER_SIZE + SPR_OFFSETS_SIZE; + tiles = code + codeSize; slot = codegenArenaAlloc((uint32_t)codeSize); if (slot == NULL) { return NULL; } + tileBuf = (uint8_t *)malloc((size_t)tileBytes); + if (tileBuf == NULL) { + codegenArenaFree(slot); + return NULL; + } + sp = (SpriteT *)malloc(sizeof(SpriteT)); if (sp == NULL) { + free(tileBuf); codegenArenaFree(slot); return NULL; } memcpy(codegenArenaBase() + slot->offset, code, codeSize); + memcpy(tileBuf, tiles, (size_t)tileBytes); for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { for (op = 0; op < SPRITE_OP_COUNT; op++) { @@ -340,10 +377,10 @@ SpriteT *spriteFromCompiledMem(const uint8_t *data, uint32_t length, SpriteFlags } } - sp->tileData = NULL; + sp->tileData = tileBuf; sp->widthTiles = widthTiles; sp->heightTiles = heightTiles; - sp->ownsTileData = false; + sp->ownsTileData = true; sp->slot = slot; sp->flags = flags; return sp; @@ -402,10 +439,11 @@ bool spriteSaveFile(SpriteT *sp, const char *path) { uint16_t op; uint16_t value; uint32_t codeSize; + uint16_t tileBytes; uint8_t *codeStart; - if (sp == NULL || path == NULL) { - return false; + if (sp == NULL || path == NULL || sp->tileData == NULL) { + return false; // tile data required to round-trip the sprite } if (sp->slot == NULL) { // Force-compile so the saved file is self-contained. @@ -418,6 +456,7 @@ bool spriteSaveFile(SpriteT *sp, const char *path) { codeSize = sp->slot->size; codeStart = codegenArenaBase() + sp->slot->offset; + tileBytes = (uint16_t)(sp->widthTiles * sp->heightTiles * 32u); if (codeSize > 0xFFFFu) { return false; // codeSize doesn't fit in the 16-bit header field @@ -427,6 +466,8 @@ bool spriteSaveFile(SpriteT *sp, const char *path) { header[1] = sp->heightTiles; header[2] = (uint8_t)(codeSize & 0xFFu); header[3] = (uint8_t)((codeSize >> 8) & 0xFFu); + header[4] = (uint8_t)(tileBytes & 0xFFu); + header[5] = (uint8_t)((tileBytes >> 8) & 0xFFu); fp = fopen(path, "wb"); if (fp == NULL) { @@ -451,6 +492,10 @@ bool spriteSaveFile(SpriteT *sp, const char *path) { fclose(fp); return false; } + if (fwrite(sp->tileData, 1, (size_t)tileBytes, fp) != (size_t)tileBytes) { + fclose(fp); + return false; + } fclose(fp); return true; } @@ -475,7 +520,10 @@ void spriteRestoreUnder(SurfaceT *s, const SpriteBackupT *backup) { int16_t row; int16_t byteStart; int16_t copyBytes; + uint16_t spriteBytesPerRow; + uint8_t shift; uint8_t *dstRow; + SpriteT *sp; if (s == NULL || backup == NULL || backup->bytes == NULL) { return; @@ -501,6 +549,17 @@ void spriteRestoreUnder(SurfaceT *s, const SpriteBackupT *backup) { return; } + sp = backup->sprite; + if (sp != NULL && sp->slot != NULL && backup->height == sp->heightTiles * TILE_PIXELS) { + spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW); + copyBytes = (int16_t)(backup->width >> 1); + shift = (copyBytes == (int16_t)spriteBytesPerRow) ? 0 : 1; + if (sp->routineOffsets[shift][SPRITE_OP_RESTORE] != SPRITE_NOT_COMPILED) { + spriteCompiledRestoreUnder(s, backup); + return; + } + } + byteStart = (int16_t)(backup->x >> 1); copyBytes = (int16_t)(backup->width >> 1); for (row = 0; row < backup->height; row++) { @@ -524,6 +583,7 @@ void spriteSaveUnder(const SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, Sprit int16_t copyBytes; int16_t clippedX; int16_t clippedW; + uint8_t shift; const uint8_t *srcRow; if (s == NULL || sp == NULL || backup == NULL) { @@ -536,6 +596,19 @@ void spriteSaveUnder(const SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, Sprit dy = y; w = (int16_t)(sp->widthTiles * TILE_PIXELS); h = (int16_t)(sp->heightTiles * TILE_PIXELS); + + // Compiled fast path: fully on surface and the platform emitted + // bytes for SAVE at this shift. The compiled routine assumes a + // full-size, unclipped rectangle, so anything off-edge falls + // through to the interpreted memcpy loop below. + if (backup->bytes != NULL && sp->slot != NULL && isFullyOnSurface(x, y, (uint16_t)w, (uint16_t)h)) { + shift = (uint8_t)(x & 1); + if (sp->routineOffsets[shift][SPRITE_OP_SAVE] != SPRITE_NOT_COMPILED) { + spriteCompiledSaveUnder(s, sp, x, y, backup); + return; + } + } + if (!clipRect(&dx, &dy, &sx, &sy, &w, &h)) { backup->x = 0; backup->y = 0; diff --git a/src/core/spriteInternal.h b/src/core/spriteInternal.h index f8585b3..d9971b7 100644 --- a/src/core/spriteInternal.h +++ b/src/core/spriteInternal.h @@ -13,6 +13,12 @@ #define SPRITE_OP_RESTORE 2 #define SPRITE_OP_COUNT 3 +// Sentinel stored in routineOffsets[shift][op] when that op's emitter +// returned 0 bytes (i.e., the platform doesn't implement compiled +// codegen for that op yet). Distinct from a real offset of 0, which +// is valid for the first emitted op (typically DRAW shift 0). +#define SPRITE_NOT_COMPILED 0xFFFFu + struct SpriteT { const uint8_t *tileData; // wTiles * hTiles * 32 bytes; NULL for loaded sprites uint8_t widthTiles; @@ -31,12 +37,15 @@ struct SpriteT { SpriteFlagsE flags; }; -// Compiled draw entry point. Implemented alongside spriteCompile in -// the per-CPU emitter file (src/codegen/spriteEmit*.c). Handles the -// calling convention the emitted bytes use (cdecl on x86, stack -// args on 68k, ORCA on IIgs). The dispatcher in src/core/sprite.c -// calls this when sp->slot is non-NULL and the draw is fully -// on-surface. spriteCompile itself is in the public API. -void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y); +// Compiled entry points. Implemented alongside spriteCompile in +// src/codegen/spriteCompile.c. Each handles the per-platform calling +// convention the emitted bytes use (cdecl on x86, stack args on 68k, +// inline asm + self-modifying stub on IIgs). The dispatchers in +// src/core/sprite.c call these when sp->slot is non-NULL, the +// matching routineOffsets entry is not SPRITE_NOT_COMPILED, and the +// draw/save/restore is fully on-surface. +void spriteCompiledDraw (SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y); +void spriteCompiledSaveUnder (const SurfaceT *src, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup); +void spriteCompiledRestoreUnder (SurfaceT *dst, const SpriteBackupT *backup); #endif diff --git a/src/port/amiga/c2p.s b/src/port/amiga/c2p.s new file mode 100644 index 0000000..dec46e2 --- /dev/null +++ b/src/port/amiga/c2p.s @@ -0,0 +1,125 @@ +; Amiga chunky-to-planar conversion -- 68000 hand-rolled. +; +; Drop-in replacement for hal.c's old c2pRange C inner loop. The C +; version walked every pixel and OR'd individual bits into 4 plane +; accumulators -- ~1.5 s for a full 320x200 frame on a 7 MHz 68000 +; (the GCC m68k codegen is poor for tight bit-twiddling). This rewrite +; uses a 4 KB lookup table built once at HAL init: each (sourceByte, +; bytePosition, plane) tuple maps to the plane-byte-bit contribution +; that source byte makes when it sits at that position within a +; 4-byte (= 8-pixel) planar group. +; +; Calling convention: m68k-amigaos-gcc cdecl. +; Args on stack at 4(sp), 8(sp), ... +; d2-d7, a2-a6 are callee-save. +; No return value. +; +; void chunkyToPlanarRow(const uint8_t *src, ; 4(sp) - 4bpp packed source row +; uint8_t *p0, ; 8(sp) - plane 0 dest row +; uint8_t *p1, ; 12(sp) - plane 1 dest row +; uint8_t *p2, ; 16(sp) - plane 2 dest row +; uint8_t *p3, ; 20(sp) - plane 3 dest row +; uint16_t n, ; 24(sp) - planar byte count (low word) +; const uint8_t *lut); ; 28(sp) - 4 KB LUT base +; +; LUT layout: lut[pos*1024 + plane*256 + src] = 1-byte plane contribution +; for source byte `src` sitting at byte-position `pos` within its +; 4-byte planar group, going to plane `plane`. Byte-position 0 is the +; leftmost (its two pixels land in plane-byte bits 7 and 6); position +; 3 is the rightmost (bits 1 and 0). Built once by chunkyToPlanarInit +; (in hal.c) at HAL boot. + + xdef _chunkyToPlanarRow + + section .text,code + +; Stack frame size of MOVEM.L block: d2-d7 (6) + a2-a6 (5) = 11 regs +; * 4 bytes = 44 bytes. Args therefore start at the original sp+4 +; offset PLUS 44. +SAVED_REGS_SIZE equ 44 + + +_chunkyToPlanarRow: + movem.l d2-d7/a2-a6,-(sp) + + move.l 4+SAVED_REGS_SIZE(sp),a0 ; src + move.l 8+SAVED_REGS_SIZE(sp),a1 ; p0 + move.l 12+SAVED_REGS_SIZE(sp),a2 ; p1 + move.l 16+SAVED_REGS_SIZE(sp),a3 ; p2 + move.l 20+SAVED_REGS_SIZE(sp),a4 ; p3 + ; n is a uint16_t but GCC promotes to int and pushes a + ; full 4 bytes -- the low word lives at +2 in big-endian + ; layout. + move.w 24+SAVED_REGS_SIZE+2(sp),d7 ; planar byte count + move.l 28+SAVED_REGS_SIZE(sp),a5 ; LUT base + + subq.w #1,d7 ; DBRA: count-1 + bmi .done ; nothing to do + +.byteLoop: + moveq #0,d0 ; plane 0 acc + moveq #0,d1 ; plane 1 acc + moveq #0,d2 ; plane 2 acc + moveq #0,d3 ; plane 3 acc + + ; ----- Source byte position 0 ----- + ; a5 points to start of LUT. Plane 0/1/2/3 sub-tables + ; for position 0 are at offsets 0/256/512/768. + moveq #0,d4 + move.b (a0)+,d4 ; src[0] + move.l a5,a6 + or.b (a6,d4.w),d0 ; +0 = pos0 plane 0 + lea 256(a6),a6 + or.b (a6,d4.w),d1 ; +256 = pos0 plane 1 + lea 256(a6),a6 + or.b (a6,d4.w),d2 ; +512 = pos0 plane 2 + lea 256(a6),a6 + or.b (a6,d4.w),d3 ; +768 = pos0 plane 3 + + ; ----- Source byte position 1 ----- + lea 256(a6),a6 ; advance to pos1 plane 0 + moveq #0,d4 + move.b (a0)+,d4 + or.b (a6,d4.w),d0 + lea 256(a6),a6 + or.b (a6,d4.w),d1 + lea 256(a6),a6 + or.b (a6,d4.w),d2 + lea 256(a6),a6 + or.b (a6,d4.w),d3 + + ; ----- Source byte position 2 ----- + lea 256(a6),a6 + moveq #0,d4 + move.b (a0)+,d4 + or.b (a6,d4.w),d0 + lea 256(a6),a6 + or.b (a6,d4.w),d1 + lea 256(a6),a6 + or.b (a6,d4.w),d2 + lea 256(a6),a6 + or.b (a6,d4.w),d3 + + ; ----- Source byte position 3 ----- + lea 256(a6),a6 + moveq #0,d4 + move.b (a0)+,d4 + or.b (a6,d4.w),d0 + lea 256(a6),a6 + or.b (a6,d4.w),d1 + lea 256(a6),a6 + or.b (a6,d4.w),d2 + lea 256(a6),a6 + or.b (a6,d4.w),d3 + + ; ----- Store plane bytes ----- + move.b d0,(a1)+ + move.b d1,(a2)+ + move.b d2,(a3)+ + move.b d3,(a4)+ + + dbra d7,.byteLoop + +.done: + movem.l (sp)+,d2-d7/a2-a6 + rts diff --git a/src/port/amiga/hal.c b/src/port/amiga/hal.c index a9c33b8..174cdee 100644 --- a/src/port/amiga/hal.c +++ b/src/port/amiga/hal.c @@ -76,18 +76,70 @@ static uint8_t gCachedScb [SURFACE_HEIGHT]; static uint16_t gCachedPalette[SURFACE_PALETTE_COUNT][SURFACE_COLORS_PER_PALETTE]; static bool gCacheValid = false; +// 4 KB chunky-to-planar lookup table consumed by chunkyToPlanarRow +// (src/port/amiga/c2p.s). Layout: gC2pLut[pos*1024 + plane*256 + src] +// = the plane-byte bit contribution that source byte `src` makes when +// it sits at byte-position `pos` within a 4-byte (8-pixel) planar +// group, going to plane `plane`. Built once by initC2pLut on the +// first halPresent call. +static uint8_t gC2pLut[4 * 1024]; +static bool gC2pLutReady = false; + static bool paletteOrScbChanged(const SurfaceT *src); +static void initC2pLut(void); + +// Provided by src/port/amiga/c2p.s. +extern void chunkyToPlanarRow(const uint8_t *src, + uint8_t *p0, uint8_t *p1, uint8_t *p2, uint8_t *p3, + uint16_t numPlanarBytes, + const uint8_t *lut); // ----- Internal helpers (alphabetical) ----- -// Convert a range of chunky scanlines [y0, y1) to Amiga planar. -// Each plane scanline is 40 bytes (1 bit per pixel x 320 pixels). -// For each destination byte, 8 pixels' worth of 4bpp chunky source is -// read and split into one bit per plane. -// c2p over rows y0..y1 and planar-byte columns byteStart..byteEnd. -// Each planar byte corresponds to 8 horizontal pixels = 4 source -// bytes; partial-rect callers should round byteStart down and byteEnd -// up to keep the 8-pixel alignment. +// Build the 4 KB chunky-to-planar lookup table consumed by +// chunkyToPlanarRow. For each (pos, plane, src) tuple, store the +// bit contribution that source byte `src` makes to plane `plane` +// when it sits at byte-position `pos` (0..3) within a 4-byte +// (8-pixel) planar group: +// +// - src high nibble = leftmost pixel -> plane bit (7 - 2*pos) +// - src low nibble = rightmost pixel -> plane bit (6 - 2*pos) +static void initC2pLut(void) { + uint16_t pos; + uint16_t plane; + uint16_t src; + uint8_t highShift; + uint8_t lowShift; + uint8_t highBit; + uint8_t lowBit; + + if (gC2pLutReady) { + return; + } + for (pos = 0; pos < 4; pos++) { + highShift = (uint8_t)(7 - 2 * pos); + lowShift = (uint8_t)(6 - 2 * pos); + for (plane = 0; plane < 4; plane++) { + for (src = 0; src < 256; src++) { + highBit = (uint8_t)(((src >> 4) >> plane) & 1); + lowBit = (uint8_t)(((src & 0x0F) >> plane) & 1); + gC2pLut[pos * 1024 + plane * 256 + src] = + (uint8_t)((highBit << highShift) | (lowBit << lowShift)); + } + } + } + gC2pLutReady = true; +} + + +// Convert a range of chunky scanlines [y0, y1) to Amiga planar over +// planar-byte columns [byteStart, byteEnd). Per row the work is dropped +// into chunkyToPlanarRow (src/port/amiga/c2p.s) which is ~5x faster +// than the old per-pixel C inner loop GCC emits for m68k. +// +// Each planar byte corresponds to 8 horizontal pixels = 4 source bytes +// at 4bpp packed; partial-rect callers should round byteStart down and +// byteEnd up to keep the 8-pixel alignment. static void c2pRange(const SurfaceT *src, int16_t y0, int16_t y1, uint16_t byteStart, uint16_t byteEnd) { const uint8_t *srcLine; UBYTE *p0; @@ -95,44 +147,25 @@ static void c2pRange(const SurfaceT *src, int16_t y0, int16_t y1, uint16_t byteS UBYTE *p2; UBYTE *p3; int16_t y; - uint16_t planarByte; - uint16_t px; - uint16_t pixel; - uint8_t srcByte; - uint8_t nibble; - uint8_t bit; - uint8_t b0; - uint8_t b1; - uint8_t b2; - uint8_t b3; + uint16_t numBytes; + + if (byteStart >= byteEnd) { + return; + } + if (!gC2pLutReady) { + initC2pLut(); + } + numBytes = (uint16_t)(byteEnd - byteStart); for (y = y0; y < y1; y++) { - srcLine = &src->pixels[y * SURFACE_BYTES_PER_ROW]; - p0 = &gPlanes[0][y * AMIGA_BYTES_PER_ROW]; - p1 = &gPlanes[1][y * AMIGA_BYTES_PER_ROW]; - p2 = &gPlanes[2][y * AMIGA_BYTES_PER_ROW]; - p3 = &gPlanes[3][y * AMIGA_BYTES_PER_ROW]; - - for (planarByte = byteStart; planarByte < byteEnd; planarByte++) { - b0 = 0; - b1 = 0; - b2 = 0; - b3 = 0; - for (px = 0; px < 8; px++) { - pixel = (uint16_t)(planarByte * 8 + px); - srcByte = srcLine[pixel >> 1]; - nibble = (uint8_t)((pixel & 1) ? (srcByte & 0x0F) : (srcByte >> 4)); - bit = (uint8_t)(7 - px); - b0 = (uint8_t)(b0 | (((nibble >> 0) & 1) << bit)); - b1 = (uint8_t)(b1 | (((nibble >> 1) & 1) << bit)); - b2 = (uint8_t)(b2 | (((nibble >> 2) & 1) << bit)); - b3 = (uint8_t)(b3 | (((nibble >> 3) & 1) << bit)); - } - p0[planarByte] = b0; - p1[planarByte] = b1; - p2[planarByte] = b2; - p3[planarByte] = b3; - } + // 4 source bytes per planar byte: source-byte offset = + // byteStart * 4 within the chunky row. + srcLine = &src->pixels[y * SURFACE_BYTES_PER_ROW + byteStart * 4]; + p0 = &gPlanes[0][y * AMIGA_BYTES_PER_ROW + byteStart]; + p1 = &gPlanes[1][y * AMIGA_BYTES_PER_ROW + byteStart]; + p2 = &gPlanes[2][y * AMIGA_BYTES_PER_ROW + byteStart]; + p3 = &gPlanes[3][y * AMIGA_BYTES_PER_ROW + byteStart]; + chunkyToPlanarRow(srcLine, p0, p1, p2, p3, numBytes, gC2pLut); } } @@ -403,6 +436,12 @@ bool halInit(const JoeyConfigT *config) { return false; } } + // Force COLOR00 to black so the overscan/border region around the + // 320x200 display is black until the app's palette load takes over + // on the first surfacePresent. Apps that paint a non-black bg need + // do nothing -- their palette[0] writes the same COLOR00 once the + // first LoadRGB4 fires from uploadScbAndPalette. + SetRGB4(&gScreen->ViewPort, 0, 0, 0, 0); return true; } diff --git a/src/port/atarist/hal.c b/src/port/atarist/hal.c index 7554d67..add8e1a 100644 --- a/src/port/atarist/hal.c +++ b/src/port/atarist/hal.c @@ -456,6 +456,11 @@ bool halInit(const JoeyConfigT *config) { Setscreen((long)gScreenBase, (long)gScreenBase, 0); gModeSet = true; + // Force hardware palette entry 0 to black so the overscan border + // (which the ST shows in palette[0]) stays black until the app's + // first refreshPaletteStateIfNeeded uploads its own palette. + Setcolor(0, 0x000); + // Save previous VBL + Timer B vectors, install ours. Timer B // is at MFP vector $120; vector installed by Xbtimer below. gOldVblVec = (void (*)(void))Setexc(VEC_VBL, -1L); diff --git a/src/port/dos/hal.c b/src/port/dos/hal.c index 58f7f79..f37da87 100644 --- a/src/port/dos/hal.c +++ b/src/port/dos/hal.c @@ -191,6 +191,16 @@ bool halInit(const JoeyConfigT *config) { regs.x.ax = 0x0013; __dpmi_int(0x10, ®s); + // BIOS int 10h fn 10h/01h: set the VGA overscan ("border") color. + // Force it to palette index 0 (black) so the bezel area outside + // mode 13h's 320x200 doesn't show whatever the prior text mode + // left in the attribute controller. + memset(®s, 0, sizeof(regs)); + regs.h.ah = 0x10; + regs.h.al = 0x01; + regs.h.bh = 0x00; + __dpmi_int(0x10, ®s); + if (!__djgpp_nearptr_enable()) { return false; } diff --git a/src/port/iigs/audio_full.c b/src/port/iigs/audio_full.c index f8bdf82..ea3c566 100644 --- a/src/port/iigs/audio_full.c +++ b/src/port/iigs/audio_full.c @@ -3,14 +3,13 @@ // stub for every other demo so the monolithic IIgs link budget stays // safe. // -// Stage 1 (this file's first cut): load ntpplayer.bin (the Merlin32- -// assembled NinjaTrackerPlus replayer staged by the iigs.mk Merlin -// rule and bundled on the disk image) into a Memory Manager handle. -// halAudioInit reports true if the load succeeds. PlayMod / PlaySfx / -// StopMod still no-op until the JSL trampoline lands -- that's stage -// 2 and gets its own file once the inline-asm syntax is nailed down. +// The NinjaTrackerPlus replayer is Merlin32-assembled at build time +// to ntpplayer.bin and baked into this TU as gNtpPlayerBytes via the +// iigs.mk xxd-i header rule. halAudioInit BlockMoves those bytes into +// a fixed-bank Memory Manager handle and JSLs into them through the +// self-modifying call stub below; NTP is bank-internal / position- +// independent so it runs at whatever address Memory Manager picked. -#include #include #include @@ -18,14 +17,11 @@ #include "hal.h" #include "joey/audio.h" +#include "ntpplayer_data.h" // ----- Constants ----- -#define NTP_FILENAME "NTPPLAYER.BIN" #define NTP_BUFFER_BYTES (64L * 1024L) -// Sanity check: NTP source assembles to ~34 KB; reject reads that -// come back too short to plausibly be the replayer. -#define NTP_MIN_BYTES 32000L // Ensoniq 5503 (DOC) I/O at $E1C03C..$E1C03E. NTP saves its preferred // sound_control byte at $E100CA so callers that want DOC-register @@ -97,7 +93,15 @@ static bool gNTPPlaying = false; // Self-modifying call stub. Bakes the X/Y/A register loads AND the // JSL target into the buffer, so the C-side inline asm only needs -// `jsl gCallStub` -- no global-operand references in the asm block. +// `jsl gCallStub` followed by `rep #0x30` -- no global-operand +// references in the asm block. +// +// The trailing REP is required to satisfy ORCA-C's `longa on / +// longi on` exit contract: the byte writes the call sites do to +// patch this stub leave M=8, and NTP's RTL leaves M/X in whatever +// state NTP chose. Without REP, the C epilogue's ADC/LDX widths +// decode wrong and the function returns to garbage. See +// project_iigs_inline_asm_mode.md. // // (We tried the obvious `lda gAsmGlobal / jsl gJslStub` shape first; // ORCA's inline assembler accepts the first absolute-global operand @@ -138,8 +142,6 @@ static void buildCallStub(uint32_t target, uint16_t x, uint16_t y, uint16_t a) { static bool loadNTP(void) { Handle h; Pointer p; - FILE *fp; - size_t bytesRead; h = NewHandle(NTP_BUFFER_BYTES, _ownerid, attrFixed | attrLocked | attrPage | attrNoCross, @@ -154,17 +156,7 @@ static bool loadNTP(void) { return false; } - fp = fopen(NTP_FILENAME, "rb"); - if (fp == NULL) { - DisposeHandle(h); - return false; - } - bytesRead = fread(p, 1, NTP_BUFFER_BYTES, fp); - fclose(fp); - if (bytesRead < NTP_MIN_BYTES) { - DisposeHandle(h); - return false; - } + BlockMove((Pointer)gNtpPlayerBytes, p, gNtpPlayerBytes_len); gNTPHandle = h; gNTPBase = (uint32_t)p; @@ -274,12 +266,14 @@ void halAudioPlayMod(const uint8_t *data, uint32_t length, bool loop) { 0); asm { jsl gCallStub + rep #0x30 } // NTPplay(loopFlag in A). 0 = loop forever, 1 = play once. buildCallStub(gNTPBase + 3, 0, 0, loop ? 0 : 1); asm { jsl gCallStub + rep #0x30 } gNTPPlaying = true; } @@ -346,6 +340,7 @@ void halAudioPlaySfx(uint8_t slot, const uint8_t *sample, uint32_t length, uint1 0); asm { jsl gCallStub + rep #0x30 } } @@ -357,6 +352,7 @@ void halAudioStopMod(void) { buildCallStub(gNTPBase + 6, 0, 0, 0); asm { jsl gCallStub + rep #0x30 } gNTPPlaying = false; } diff --git a/src/port/iigs/hal.c b/src/port/iigs/hal.c index 99ecffb..f05e13b 100644 --- a/src/port/iigs/hal.c +++ b/src/port/iigs/hal.c @@ -24,6 +24,7 @@ // ----- Hardware addresses (24-bit / long pointers) ----- #define IIGS_NEWVIDEO_REG ((volatile uint8_t *)0x00C029L) +#define IIGS_BORDER_REG ((volatile uint8_t *)0x00C034L) #define IIGS_VBL_STATUS ((volatile uint8_t *)0x00C019L) #define IIGS_SHR_PIXELS ((uint8_t *)0xE12000L) #define IIGS_SHR_SCB ((uint8_t *)0xE19D00L) @@ -40,9 +41,16 @@ // handler) and bumps its "Code: RED" status. Always include this bit. #define NEWVIDEO_RESERVED_BIT 0x01 +// $C034 BORDER register: high nibble = beep/IRQ enables (preserve), +// low nibble = border color index 0..15. Color 0 is the all-zero +// palette entry by IIgs convention; we force the low nibble to 0 +// in halInit so the visible bezel matches the cleared SHR background. +#define BORDER_COLOR_MASK 0xF0 + // ----- Module state ----- static uint8_t gPreviousNewVideo = 0; +static uint8_t gPreviousBorder = 0; static bool gModeSet = false; // Last-uploaded SCB and palette. Both registers live in bank $E1; on a @@ -77,7 +85,9 @@ static void uploadScbAndPaletteIfNeeded(const SurfaceT *src) { bool halInit(const JoeyConfigT *config) { (void)config; gPreviousNewVideo = *IIGS_NEWVIDEO_REG; + gPreviousBorder = *IIGS_BORDER_REG; *IIGS_NEWVIDEO_REG = (uint8_t)(NEWVIDEO_SHR_ON | NEWVIDEO_LINEARIZE | NEWVIDEO_RESERVED_BIT); + *IIGS_BORDER_REG = (uint8_t)(gPreviousBorder & BORDER_COLOR_MASK); gModeSet = true; return true; } @@ -141,6 +151,7 @@ void halWaitVBL(void) { void halShutdown(void) { if (gModeSet) { *IIGS_NEWVIDEO_REG = gPreviousNewVideo; + *IIGS_BORDER_REG = gPreviousBorder; gModeSet = false; } } diff --git a/tools/joeysprite/joeysprite.c b/tools/joeysprite/joeysprite.c index 6da65ec..886cd09 100644 --- a/tools/joeysprite/joeysprite.c +++ b/tools/joeysprite/joeysprite.c @@ -13,14 +13,21 @@ // top-to-bottom and each row is 4 bytes (8 pixels at 4bpp packed, // high nibble = left pixel). // -// Output `.spr` format (target-native byte order, see DESIGN.md -// §12 for details): -// header (4 bytes): widthTiles, heightTiles, codeSize lo/hi -// offsets (JOEY_SPRITE_SHIFT_COUNT * 3 * uint16_t): -// [draw_s0, save_s0, restore_s0, draw_s1, save_s1, restore_s1] -// Save and restore offsets are written as 0 (uniform memcpy on -// load; never compiled). -// code (codeSize bytes): emitted machine code per shift, in order. +// Output `.spr` format (target-native byte order for code; see +// DESIGN.md §12). Mirrors src/core/sprite.c's reader: +// byte 0 widthTiles +// byte 1 heightTiles +// bytes 2-3 codeSize (LE16) +// bytes 4-5 tileBytes (LE16) = widthTiles*heightTiles*32 +// ... offsets (JOEY_SPRITE_SHIFT_COUNT * SPRITE_OP_COUNT * +// uint16_t LE): [draw_s0, save_s0, restore_s0, +// draw_s1, save_s1, restore_s1]. Save/restore offsets +// are 0 here -- the runtime keeps the memcpy-based +// interpreter for those ops. +// ... compiled code (codeSize bytes) +// ... raw tile data (tileBytes bytes; same layout as the +// input file, lets the runtime interpreter handle +// clipped draws without decoding the compiled bytes). #include #include @@ -44,7 +51,7 @@ typedef enum { // ----- Constants ----- #define MAX_SCRATCH_BYTES (16u * 1024u) -#define SPR_HEADER_SIZE 4 +#define SPR_HEADER_SIZE 6 // Save/restore offsets are reserved (0) for now -- the runtime // memcpy interpreter handles them. #define SHIFT_OPS 3 @@ -118,8 +125,9 @@ static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath) rc = 0; if (fputc(sp->widthTiles, fp) == EOF) rc = 2; - if (fputc(sp->heightTiles, fp) == EOF) rc = 2; + if (rc == 0 && fputc(sp->heightTiles, fp) == EOF) rc = 2; if (rc == 0 && writeLE16(fp, (uint16_t)totalCodeSize) != 0) rc = 2; + if (rc == 0 && writeLE16(fp, (uint16_t)(sp->widthTiles * sp->heightTiles * 32u)) != 0) rc = 2; // Offset table: cumulative draw offsets + zeros for save/restore. offset = 0; @@ -144,6 +152,17 @@ static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath) rc = 2; } } + if (rc == 0) { + // Append the raw tile data so the runtime interpreter has it + // available for clipped draws. + uint32_t tileBytes = (uint32_t)sp->widthTiles * (uint32_t)sp->heightTiles * 32u; + if (sp->tileData == NULL) { + fprintf(stderr, "joeysprite: sprite missing tile data, cannot save\n"); + rc = 2; + } else if (fwrite(sp->tileData, 1, tileBytes, fp) != tileBytes) { + rc = 2; + } + } fclose(fp); free(codeBuf); free(scratch);