From 855d043f9c0832fb4344c0eb924411ef7107128e Mon Sep 17 00:00:00 2001 From: Scott Duensing Date: Sun, 26 Apr 2026 16:57:33 -0500 Subject: [PATCH] Compiled sprites are working. --- examples/audio/audio.c | 2 +- examples/hello/hello.c | 2 +- examples/joy/joy.c | 2 +- examples/keys/keys.c | 2 +- examples/pattern/pattern.c | 2 +- examples/sprite/sprite.c | 181 ++++++---- include/joey/debug.h | 10 + include/joey/joey.h | 2 + include/joey/sprite.h | 151 +++++++++ include/joey/surface.h | 13 + make/amiga.mk | 11 +- make/atarist.mk | 11 +- make/dos.mk | 9 +- make/iigs.mk | 17 +- make/tools.mk | 23 +- scripts/run-iigs-mame.sh | 203 +++++++++++ scripts/run-iigs.sh | 48 ++- src/codegen/spriteCompile.c | 200 +++++++++++ src/codegen/spriteEmit68k.c | 186 +++++++++++ src/codegen/spriteEmitIigs.c | 182 ++++++++++ src/codegen/spriteEmitStub.c | 25 ++ src/codegen/spriteEmitX86.c | 191 +++++++++++ src/codegen/spriteEmitter.h | 26 ++ src/core/codegenArena.c | 297 ++++++++++++++++ src/core/codegenArenaInternal.h | 76 +++++ src/core/debug.c | 57 ++++ src/core/init.c | 15 + src/core/sprite.c | 576 ++++++++++++++++++++++++++++++++ src/core/spriteInternal.h | 42 +++ src/core/surface.c | 75 ++++- src/port/atarist/hal.c | 12 +- tools/joeysprite/joeysprite.c | 323 ++++++++++++++++++ 32 files changed, 2884 insertions(+), 88 deletions(-) create mode 100644 include/joey/debug.h create mode 100644 include/joey/sprite.h create mode 100755 scripts/run-iigs-mame.sh create mode 100644 src/codegen/spriteCompile.c create mode 100644 src/codegen/spriteEmit68k.c create mode 100644 src/codegen/spriteEmitIigs.c create mode 100644 src/codegen/spriteEmitStub.c create mode 100644 src/codegen/spriteEmitX86.c create mode 100644 src/codegen/spriteEmitter.h create mode 100644 src/core/codegenArena.c create mode 100644 src/core/codegenArenaInternal.h create mode 100644 src/core/debug.c create mode 100644 src/core/sprite.c create mode 100644 src/core/spriteInternal.h create mode 100644 tools/joeysprite/joeysprite.c diff --git a/examples/audio/audio.c b/examples/audio/audio.c index 0743c8e..b1815ed 100644 --- a/examples/audio/audio.c +++ b/examples/audio/audio.c @@ -61,7 +61,7 @@ int main(void) { int16_t flashFrames; config.hostMode = HOST_MODE_TAKEOVER; - config.codegenBytes = 32 * 1024; + config.codegenBytes = 8 * 1024; config.maxSurfaces = 4; config.audioBytes = 64 * 1024; config.assetBytes = 128 * 1024; diff --git a/examples/hello/hello.c b/examples/hello/hello.c index 9d847d6..10a89bb 100644 --- a/examples/hello/hello.c +++ b/examples/hello/hello.c @@ -10,7 +10,7 @@ int main(void) { JoeyConfigT config; config.hostMode = HOST_MODE_OS; - config.codegenBytes = 32 * 1024; + config.codegenBytes = 8 * 1024; config.maxSurfaces = 4; config.audioBytes = 64 * 1024; config.assetBytes = 128 * 1024; diff --git a/examples/joy/joy.c b/examples/joy/joy.c index a6235f4..165b170 100644 --- a/examples/joy/joy.c +++ b/examples/joy/joy.c @@ -216,7 +216,7 @@ int main(void) { SurfaceT *screen; config.hostMode = HOST_MODE_TAKEOVER; - config.codegenBytes = 32 * 1024; + config.codegenBytes = 8 * 1024; config.maxSurfaces = 4; config.audioBytes = 64 * 1024; config.assetBytes = 128 * 1024; diff --git a/examples/keys/keys.c b/examples/keys/keys.c index 5d1d29f..7da35f0 100644 --- a/examples/keys/keys.c +++ b/examples/keys/keys.c @@ -223,7 +223,7 @@ int main(void) { int16_t cursorRow; config.hostMode = HOST_MODE_TAKEOVER; - config.codegenBytes = 32 * 1024; + config.codegenBytes = 8 * 1024; config.maxSurfaces = 4; config.audioBytes = 64 * 1024; config.assetBytes = 128 * 1024; diff --git a/examples/pattern/pattern.c b/examples/pattern/pattern.c index 8bdb3fb..36ec6f8 100644 --- a/examples/pattern/pattern.c +++ b/examples/pattern/pattern.c @@ -118,7 +118,7 @@ int main(void) { SurfaceT *screen; config.hostMode = HOST_MODE_TAKEOVER; - config.codegenBytes = 32 * 1024; + config.codegenBytes = 8 * 1024; config.maxSurfaces = 4; config.audioBytes = 64 * 1024; config.assetBytes = 128 * 1024; diff --git a/examples/sprite/sprite.c b/examples/sprite/sprite.c index 0b68434..bb1d37b 100644 --- a/examples/sprite/sprite.c +++ b/examples/sprite/sprite.c @@ -1,31 +1,37 @@ -// Sprite demo: bounces a 16x16 ball sprite around the screen using -// surfaceBlitMasked. The ball is embedded as a `const JoeyAssetT` -// directly in this file, so no .jas file or runtime allocation is -// involved -- this exercises the static / embedded path. Press ESC -// to quit. -// -// Each frame we redraw only the ball's old and new bounding boxes -// (and present those two small rects), so the cost stays small even -// with the slow 68000-class c2p in the ST and Amiga ports. +// Sprite demo: bounces a 16x16 ball using the SpriteT API. The ball +// art is authored as a 16x16 4bpp packed image in this file (one row +// per source line) and converted at startup into the 2x2 tile layout +// SpriteT expects. spriteDraw handles transparent-color-0; we use +// spriteSaveUnder/RestoreUnder to undraw the previous frame's ball +// without redrawing the whole screen. #include #include -#define BALL_W 16 -#define BALL_H 16 +#define BALL_W 16 +#define BALL_H 16 +#define BALL_TILES_X (BALL_W / 8) +#define BALL_TILES_Y (BALL_H / 8) -#define BALL_PALETTE_IDX 0 +#define TILE_BYTES 32 +#define BALL_TILE_BYTES (BALL_TILES_X * BALL_TILES_Y * TILE_BYTES) +// SaveUnder must store rounded-up byte boundaries: x rounded down to +// even, width rounded up to even. Worst case for BALL_W=16 (already +// even) is 8 bytes per row + alignment slack of 1 byte; size for the +// pessimistic case so the buffer never overflows. +#define BALL_BACKUP_BYTES (((BALL_W + 2) >> 1) * BALL_H) -#define COLOR_BG 0 -#define COLOR_TRANSPARENT 0 // first palette slot doubles as mask +#define BALL_PALETTE_IDX 0 -// 16x16 ball sprite, 4bpp packed (8 bytes per row): -// 0 = transparent (mask) +#define COLOR_BG 0 + +// Authored layout: 16 pixels wide x 16 rows = 8 bytes/row x 16 rows. +// 0 = transparent // 2 = ball body (yellow) // 3 = highlight (white) // High nibble of each byte is the LEFT pixel. -static const uint8_t gBallPixels[BALL_H * 8] = { +static const uint8_t gBallAuthored[BALL_H * (BALL_W / 2)] = { 0x00, 0x00, 0x22, 0x22, 0x22, 0x22, 0x00, 0x00, // row 0 0x00, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x00, // row 1 0x02, 0x22, 0x32, 0x22, 0x22, 0x22, 0x22, 0x20, // row 2 @@ -44,46 +50,61 @@ static const uint8_t gBallPixels[BALL_H * 8] = { 0x00, 0x00, 0x00, 0x22, 0x22, 0x00, 0x00, 0x00 // row 15 }; -// Build the embedded ball asset at runtime. We could declare a -// `static const JoeyAssetT` with this same data, but ORCA/C 2.1 -// does not handle a file-scope static struct whose pointer field is -// initialized from another static array's address (linker complains -// of unresolved references). Building the struct in main() with a -// local sidesteps the quirk and costs only a handful of stores. -static void buildBallAsset(JoeyAssetT *ball) { - uint16_t i; +static uint8_t gBallTiles[BALL_TILE_BYTES]; +static uint8_t gBallBackup[BALL_BACKUP_BYTES]; - ball->width = BALL_W; - ball->height = BALL_H; - ball->hasPalette = true; - ball->pixels = gBallPixels; - for (i = 0; i < 16; i++) { - ball->palette[i] = 0x0000; + +// Repack from authored "wide row" layout to tile-major SpriteT +// layout: tile (0,0), tile (1,0), tile (0,1), tile (1,1), each tile +// internally stored row-by-row at 4 bytes per row. +static void repackBallTiles(void) { + uint16_t tx; + uint16_t ty; + uint16_t row; + uint16_t b; + uint8_t *dst; + + for (ty = 0; ty < BALL_TILES_Y; ty++) { + for (tx = 0; tx < BALL_TILES_X; tx++) { + dst = &gBallTiles[(ty * BALL_TILES_X + tx) * TILE_BYTES]; + for (row = 0; row < 8; row++) { + for (b = 0; b < 4; b++) { + dst[row * 4 + b] = + gBallAuthored[((ty * 8) + row) * (BALL_W / 2) + + (tx * 4) + b]; + } + } + } } - ball->palette[2] = 0x0FF0; // yellow body - ball->palette[3] = 0x0FFF; // white highlight } -static void initialPaint(SurfaceT *screen) { - surfaceClear(screen, COLOR_BG); - surfacePresent(screen); +static void buildPalette(SurfaceT *screen) { + uint16_t colors[16]; + uint16_t i; + + for (i = 0; i < 16; i++) { + colors[i] = 0x0000; + } + colors[2] = 0x0FF0; // yellow body + colors[3] = 0x0FFF; // white highlight + paletteSet(screen, BALL_PALETTE_IDX, colors); } int main(void) { - JoeyConfigT config; - SurfaceT *screen; - JoeyAssetT ball; - int16_t x; - int16_t y; - int16_t vx; - int16_t vy; - int16_t lastX; - int16_t lastY; + JoeyConfigT config; + SurfaceT *screen; + SpriteT *ball; + SpriteBackupT backup; + int16_t x; + int16_t y; + int16_t vx; + int16_t vy; + bool haveBackup; config.hostMode = HOST_MODE_TAKEOVER; - config.codegenBytes = 32 * 1024; + config.codegenBytes = 8 * 1024; config.maxSurfaces = 4; config.audioBytes = 64 * 1024; config.assetBytes = 128 * 1024; @@ -100,20 +121,36 @@ int main(void) { return 1; } - buildBallAsset(&ball); - joeyAssetApplyPalette(screen, BALL_PALETTE_IDX, &ball); + repackBallTiles(); + ball = spriteCreate(gBallTiles, BALL_TILES_X, BALL_TILES_Y, SPRITE_FLAGS_NONE); + if (ball == NULL) { + fprintf(stderr, "spriteCreate failed\n"); + joeyShutdown(); + return 1; + } + // Compile draw routines into the codegen arena. Returns false on + // platforms that don't have a real emitter or if the arena is + // full -- either way the demo still runs via the interpreter + // path in spriteDraw. + (void)spriteCompile(ball); + + buildPalette(screen); scbSetRange(screen, 0, SURFACE_HEIGHT - 1, BALL_PALETTE_IDX); - initialPaint(screen); + surfaceClear(screen, COLOR_BG); + surfacePresent(screen); - x = 40; - y = 30; - vx = 2; - vy = 1; - lastX = x; - lastY = y; + backup.bytes = gBallBackup; - surfaceBlitMasked(screen, &ball, x, y, COLOR_TRANSPARENT); - surfacePresentRect(screen, x, y, BALL_W, BALL_H); + x = 40; + y = 30; + vx = 2; + vy = 1; + haveBackup = false; + + spriteSaveUnder(screen, ball, x, y, &backup); + spriteDraw(screen, ball, x, y); + surfacePresentRect(screen, backup.x, backup.y, backup.width, backup.height); + haveBackup = true; for (;;) { joeyWaitVBL(); @@ -122,27 +159,27 @@ int main(void) { break; } - // Erase old ball position by clearing its bounding rect. Any - // pixel outside the ball that we wrote (we wrote no pixels - // outside, since blitMasked respects transparency) stays as - // background already. - fillRect(screen, lastX, lastY, BALL_W, BALL_H, COLOR_BG); - surfacePresentRect(screen, lastX, lastY, BALL_W, BALL_H); + // Restore the bytes that lived under the previous-frame ball, + // then move + redraw + present the new region. + if (haveBackup) { + spriteRestoreUnder(screen, &backup); + surfacePresentRect(screen, backup.x, backup.y, backup.width, backup.height); + } x = (int16_t)(x + vx); y = (int16_t)(y + vy); - if (x <= 0) { x = 0; vx = (int16_t)-vx; } - if (x >= SURFACE_WIDTH - BALL_W) { x = SURFACE_WIDTH - BALL_W; vx = (int16_t)-vx; } - if (y <= 0) { y = 0; vy = (int16_t)-vy; } - if (y >= SURFACE_HEIGHT - BALL_H) { y = SURFACE_HEIGHT - BALL_H; vy = (int16_t)-vy; } + if (x <= 0) { x = 0; vx = (int16_t)-vx; } + if (x >= SURFACE_WIDTH - BALL_W) { x = SURFACE_WIDTH - BALL_W; vx = (int16_t)-vx; } + if (y <= 0) { y = 0; vy = (int16_t)-vy; } + if (y >= SURFACE_HEIGHT - BALL_H) { y = SURFACE_HEIGHT - BALL_H; vy = (int16_t)-vy; } - surfaceBlitMasked(screen, &ball, x, y, COLOR_TRANSPARENT); - surfacePresentRect(screen, x, y, BALL_W, BALL_H); - - lastX = x; - lastY = y; + spriteSaveUnder(screen, ball, x, y, &backup); + spriteDraw(screen, ball, x, y); + surfacePresentRect(screen, backup.x, backup.y, backup.width, backup.height); + haveBackup = true; } + spriteDestroy(ball); joeyShutdown(); return 0; } diff --git a/include/joey/debug.h b/include/joey/debug.h new file mode 100644 index 0000000..161fdef --- /dev/null +++ b/include/joey/debug.h @@ -0,0 +1,10 @@ +// Crash-tracing logger. See src/core/debug.c. + +#ifndef JOEYLIB_DEBUG_H +#define JOEYLIB_DEBUG_H + +void joeyLog (const char *msg); +void joeyLogF (const char *fmt, ...); +void joeyLogReset(void); + +#endif diff --git a/include/joey/joey.h b/include/joey/joey.h index f84072d..65300ea 100644 --- a/include/joey/joey.h +++ b/include/joey/joey.h @@ -16,5 +16,7 @@ #include "present.h" #include "input.h" #include "audio.h" +#include "sprite.h" +#include "debug.h" #endif diff --git a/include/joey/sprite.h b/include/joey/sprite.h new file mode 100644 index 0000000..04e519f --- /dev/null +++ b/include/joey/sprite.h @@ -0,0 +1,151 @@ +// Sprites: rectangles of 8x8 tiles drawn at arbitrary pixel positions +// with color-0 transparency. +// +// A sprite's pixel data is `widthTiles * heightTiles * 32` bytes, +// 4bpp packed, laid out as a flat blob of 8x8 tiles. Tile order is +// row-major (tile (0,0), tile (1,0), ..., tile (widthTiles-1,0), +// tile (0,1), ...). Within each tile, rows are top-to-bottom and +// each row is 4 bytes (8 pixels at 4bpp packed; high nibble = left +// pixel). +// +// Color 0 is always transparent on draw (DESIGN.md contract). Use a +// tile-block draw if you need an opaque rectangle. +// +// Performance contract: spriteDraw should be at least as fast as the +// IIgs reference (the DESIGN.md spec calls for runtime-compiled draw +// code per CPU). v1 ships with an interpreted fallback that gives +// correct output everywhere; codegen lands per platform after the +// API stabilizes. spritePrewarm is a hint that the application is +// about to draw the sprite repeatedly -- a future codegen-enabled +// build will use it to compile shift variants ahead of the first +// draw. With the interpreter it is a no-op. + +#ifndef JOEYLIB_SPRITE_H +#define JOEYLIB_SPRITE_H + +#include "platform.h" +#include "surface.h" +#include "types.h" + +// Sprites always write to a 4bpp packed SurfaceT, never to display +// memory directly (halPresent owns that path). The codegen emits 2 +// shift variants on every platform: shift 0 for even x (sprite byte +// boundaries match destination byte boundaries) and shift 1 for odd +// x (each destination byte combines two adjacent sprite bytes' +// nibbles). +#define JOEY_SPRITE_SHIFT_COUNT 2 + +typedef enum { + SPRITE_FLAGS_NONE = 0 +} SpriteFlagsE; + +typedef struct SpriteT SpriteT; + +// SpriteBackupT holds the destination bytes that lived under a sprite +// before it was drawn, so the application can restore them after the +// sprite moves. Sized for the largest backup the app will need; a +// stack-allocated SpriteBackupT plus a caller-owned byte buffer keeps +// the runtime allocation-free. +typedef struct { + SpriteT *sprite; + int16_t x; + int16_t y; + uint16_t width; // pixels + uint16_t height; // pixels + uint8_t *bytes; // caller-owned, capacity >= sizeBytes + uint16_t sizeBytes; +} SpriteBackupT; + +// Wrap a tile-data blob in a SpriteT. The tile data must outlive the +// SpriteT; we do not copy it. Returns NULL if widthTiles or +// heightTiles is 0, or if the codegen arena cannot fit a placeholder +// entry for this sprite. +SpriteT *spriteCreate(const uint8_t *tileData, uint8_t widthTiles, uint8_t heightTiles, SpriteFlagsE flags); + +// Release a SpriteT and any codegen entries cached for it. The tile +// data the sprite was constructed from is NOT freed -- the caller +// owns that buffer. +void spriteDestroy(SpriteT *sp); + +// Compile the sprite's draw routines into the codegen arena. After +// this returns true, spriteDraw uses the compiled fast path on +// platforms where the emitter is wired (currently x86/DOS). Returns +// false if the arena is full (caller may run spriteCompact and +// retry), the platform doesn't have a real emitter yet, or the +// sprite has no source tile data (e.g., it was loaded already +// compiled via spriteLoadFile). +// +// Idempotent: calling on a sprite that's already compiled is a +// no-op and returns true. +bool spriteCompile(SpriteT *sp); + +// Hint that this sprite will be drawn soon. Currently a wrapper +// around spriteCompile that ignores the return value, kept for API +// symmetry with the rest of the library and for callers that don't +// care about compile success. +void spritePrewarm(SpriteT *sp); + +// Draw the sprite at pixel (x,y) on the destination surface. Pixels +// equal to color 0 in the sprite source are skipped (transparent). +// Off-surface portions are clipped. +void spriteDraw(SurfaceT *s, SpriteT *sp, int16_t x, int16_t y); + +// Capture the destination region a subsequent spriteDraw at the same +// (x,y) would write to. backup->bytes must have at least +// (widthTiles*4) * (heightTiles*8) bytes of capacity for fully +// in-bounds draws; for clipped draws only the visible bytes are +// stored. The captured region's exact size is reported in +// backup->sizeBytes. +void spriteSaveUnder(const SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup); + +// Repaint the destination region from a SpriteBackupT captured by a +// prior spriteSaveUnder. The backup must not have been invalidated +// by other writes that overlapped its captured region. +void spriteRestoreUnder(SurfaceT *s, const SpriteBackupT *backup); + +// Snapshot an 8x8-aligned region of a SurfaceT into a new SpriteT. +// The captured pixel data is copied into a sprite-owned buffer so +// the source surface can be modified afterwards. Width and height +// are in TILES (each tile = 8x8 pixels). x and y are in pixels and +// must be aligned to a tile boundary (multiple of 8) on the source +// surface; misaligned coordinates return NULL. +SpriteT *spriteCreateFromSurface(const SurfaceT *src, int16_t x, int16_t y, + uint8_t widthTiles, uint8_t heightTiles, SpriteFlagsE flags); + +// Load a sprite from a `.spr` file produced by the host-side +// joeysprite tool or by spriteSaveFile. Format is target-native: +// 4-byte header (widthTiles, heightTiles, codeSize), then a fixed- +// size offsets table (JOEY_SPRITE_SHIFT_COUNT * 3 * uint16_t), then +// codeSize bytes of position-independent machine code. The runtime +// copies the code into the codegen arena, so the file's bytes can be +// freed once this returns. +SpriteT *spriteLoadFile(const char *path, SpriteFlagsE flags); + +// Same as spriteLoadFile but parses bytes already in memory. +SpriteT *spriteFromCompiledMem(const uint8_t *data, uint32_t length, SpriteFlagsE flags); + +// Persist a sprite to disk in `.spr` format. Sprites created via +// spriteCreate / spriteCreateFromSurface that have not been +// compiled yet are force-compiled here (so the resulting file can +// always be loaded back via spriteLoadFile). Returns false if the +// codegen arena is full or the platform's emitter is not yet +// implemented. +bool spriteSaveFile(SpriteT *sp, const char *path); + +// Defragment the codegen arena. Walks live sprite slots and +// memmoves them down to consolidate free space; any holes left by +// destroyed sprites are reclaimed. Costs O(arena_used_bytes); call +// between levels rather than per frame. SpriteT pointers held by +// the application are NOT invalidated -- internal indirection +// through the slot record means draw calls automatically pick up +// the new code address on the next call. +void spriteCompact(void); + +// Arena introspection. Used to gauge whether spriteCompact is +// worth running, or whether the codegenBytes budget needs to grow. +// Free space is (Total - Used), but it may be fragmented across +// holes until spriteCompact runs. +uint32_t spriteCodegenBytesUsed(void); +uint32_t spriteCodegenBytesTotal(void); + +#endif diff --git a/include/joey/surface.h b/include/joey/surface.h index 2b1db07..c046a63 100644 --- a/include/joey/surface.h +++ b/include/joey/surface.h @@ -43,4 +43,17 @@ SurfaceT *surfaceGetScreen(void); // surfaces. void surfaceCopy(SurfaceT *dst, const SurfaceT *src); +// Persist a surface to disk in target-native form: 32000 bytes of +// pixels followed by 200 bytes of SCB followed by 512 bytes of $0RGB +// palette table (16 palettes x 16 colors, native uint16_t). Returns +// false if the file can't be opened or the write fails. The same +// platform that wrote the file must read it -- no endianness +// conversion is performed. +bool surfaceSaveFile(const SurfaceT *src, const char *path); + +// Read a surface from disk, overwriting dst. Returns false if the +// file is missing, the wrong size, or the read fails. dst keeps its +// identity (no reallocation). +bool surfaceLoadFile(SurfaceT *dst, const char *path); + #endif diff --git a/make/amiga.mk b/make/amiga.mk index 1498832..d14928b 100644 --- a/make/amiga.mk +++ b/make/amiga.mk @@ -38,12 +38,17 @@ SHARED_S := $(wildcard $(SRC_68K)/*.s) # code out of every Amiga binary. CORE_C_SRCS_AMIGA := $(filter-out %/audioSfxMix.c, $(CORE_C_SRCS)) +# Sprite codegen: 68k emitter shared with the ST port. +CODEGEN_DIR := $(REPO_DIR)/src/codegen + LIB_OBJS := \ $(patsubst $(SRC_CORE)/%.c,$(BUILD)/obj/core/%.o,$(CORE_C_SRCS_AMIGA)) \ $(patsubst $(SRC_PORT)/amiga/%.c,$(BUILD)/obj/port/%.o,$(PORT_C_SRCS)) \ $(patsubst $(SRC_PORT)/amiga/%.s,$(BUILD)/obj/port/%.o,$(PORT_S_SRCS)) \ $(patsubst $(SRC_68K)/%.s,$(BUILD)/obj/68k/%.o,$(SHARED_S)) \ - $(BUILD)/obj/port/ptplayer.o + $(BUILD)/obj/port/ptplayer.o \ + $(BUILD)/obj/codegen/spriteEmit68k.o \ + $(BUILD)/obj/codegen/spriteCompile.o LIB := $(LIBDIR)/libjoey.a @@ -79,6 +84,10 @@ $(BUILD)/obj/port/ptplayer.o: $(PTPLAYER_DIR)/ptplayer.asm @mkdir -p $(dir $@) $(AMIGA_AS) $(ASFLAGS) $< -o $@ +$(BUILD)/obj/codegen/%.o: $(CODEGEN_DIR)/%.c + @mkdir -p $(dir $@) + $(AMIGA_CC) $(CFLAGS) -I$(CODEGEN_DIR) -c $< -o $@ + $(BUILD)/obj/68k/%.o: $(SRC_68K)/%.s @mkdir -p $(dir $@) $(AMIGA_AS) $(ASFLAGS) $< -o $@ diff --git a/make/atarist.mk b/make/atarist.mk index 354cb37..6f8f32b 100644 --- a/make/atarist.mk +++ b/make/atarist.mk @@ -32,11 +32,16 @@ PORT_C_SRCS := $(wildcard $(SRC_PORT)/atarist/*.c) PORT_S_SRCS := $(wildcard $(SRC_PORT)/atarist/*.s) SHARED_S := $(wildcard $(SRC_68K)/*.s) +# Sprite codegen: 68k emitter shared with the Amiga port. +CODEGEN_DIR := $(REPO_DIR)/src/codegen + LIB_OBJS := \ $(patsubst $(SRC_CORE)/%.c,$(BUILD)/obj/core/%.o,$(CORE_C_SRCS)) \ $(patsubst $(SRC_PORT)/atarist/%.c,$(BUILD)/obj/port/%.o,$(PORT_C_SRCS)) \ $(patsubst $(SRC_PORT)/atarist/%.s,$(BUILD)/obj/port/%.o,$(PORT_S_SRCS)) \ - $(patsubst $(SRC_68K)/%.s,$(BUILD)/obj/68k/%.o,$(SHARED_S)) + $(patsubst $(SRC_68K)/%.s,$(BUILD)/obj/68k/%.o,$(SHARED_S)) \ + $(BUILD)/obj/codegen/spriteEmit68k.o \ + $(BUILD)/obj/codegen/spriteCompile.o LIB := $(LIBDIR)/libjoey.a @@ -72,6 +77,10 @@ $(BUILD)/obj/68k/%.o: $(SRC_68K)/%.s @mkdir -p $(dir $@) $(ST_AS) $(ASFLAGS) $< -o $@ +$(BUILD)/obj/codegen/%.o: $(CODEGEN_DIR)/%.c + @mkdir -p $(dir $@) + $(ST_CC) $(CFLAGS) -I$(CODEGEN_DIR) -c $< -o $@ + $(LIB): $(LIB_OBJS) @mkdir -p $(dir $@) $(ST_AR) rcs $@ $^ diff --git a/make/dos.mk b/make/dos.mk index 662ede6..0ffa004 100644 --- a/make/dos.mk +++ b/make/dos.mk @@ -24,11 +24,14 @@ LIBXMP_CFLAGS := -DJOEY_LIBXMP_LITE -DLIBXMP_CORE_PLAYER -DHAVE_FNMATCH=0 -I$(L PORT_C_SRCS := $(wildcard $(SRC_PORT)/dos/*.c) PORT_S_SRCS := $(wildcard $(SRC_PORT)/dos/*.asm) +CODEGEN_DIR := $(REPO_DIR)/src/codegen LIB_OBJS := \ $(patsubst $(SRC_CORE)/%.c,$(BUILD)/obj/core/%.o,$(CORE_C_SRCS)) \ $(patsubst $(SRC_PORT)/dos/%.c,$(BUILD)/obj/port/%.o,$(PORT_C_SRCS)) \ - $(patsubst $(SRC_PORT)/dos/%.asm,$(BUILD)/obj/port/%.o,$(PORT_S_SRCS)) + $(patsubst $(SRC_PORT)/dos/%.asm,$(BUILD)/obj/port/%.o,$(PORT_S_SRCS)) \ + $(BUILD)/obj/codegen/spriteEmitX86.o \ + $(BUILD)/obj/codegen/spriteCompile.o LIB := $(LIBDIR)/libjoey.a @@ -60,6 +63,10 @@ $(BUILD)/obj/port/%.o: $(SRC_PORT)/dos/%.asm @mkdir -p $(dir $@) $(DOS_AS) $(ASFLAGS) $< -o $@ +$(BUILD)/obj/codegen/%.o: $(CODEGEN_DIR)/%.c + @mkdir -p $(dir $@) + $(DOS_CC) $(CFLAGS) -I$(CODEGEN_DIR) -c $< -o $@ + $(LIB): $(LIB_OBJS) @mkdir -p $(dir $@) $(DOS_AR) rcs $@ $^ diff --git a/make/iigs.mk b/make/iigs.mk index aef37be..8cc1214 100644 --- a/make/iigs.mk +++ b/make/iigs.mk @@ -30,8 +30,12 @@ PORT_C_SRCS_AUDIO := $(filter-out %/audio.c, $(PORT_C_SRCS_ALL)) # IIgs binary (the monolithic-link budget is tight). CORE_C_SRCS_IIGS := $(filter-out %/audioSfxMix.c, $(CORE_C_SRCS)) -LIB_SRCS := $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS) -LIB_SRCS_AUDIO := $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS_AUDIO) +# Sprite codegen: 65816 emitter + cross-platform compile dispatch. +CODEGEN_SRCS := $(REPO_DIR)/src/codegen/spriteEmitIigs.c \ + $(REPO_DIR)/src/codegen/spriteCompile.c + +LIB_SRCS := $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS) $(CODEGEN_SRCS) +LIB_SRCS_AUDIO := $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS_AUDIO) $(CODEGEN_SRCS) # NinjaTrackerPlus replayer. Assembled with Merlin32 from the staged # source at toolchains/iigs/ntp/ninjatrackerplus.s. Output is a 34 KB @@ -68,7 +72,8 @@ IIX_INCLUDES := \ -I $(IIGS_INCLUDE_SHIM) \ -I $(INCLUDE_DIR) \ -I $(INCLUDE_DIR)/joey \ - -I $(SRC_CORE) + -I $(SRC_CORE) \ + -I $(REPO_DIR)/src/codegen .PHONY: all iigs iigs-disk clean-iigs all iigs: $(HELLO_BIN) $(PATTERN_BIN) $(KEYS_BIN) $(JOY_BIN) $(SPRITE_BIN) $(AUDIO_BIN) $(NTP_BIN) @@ -104,9 +109,13 @@ $(JOY_BIN): $(JOY_SRC) $(LIB_SRCS) $(IIGS_BUILD) $(IIGS_BUILD) $(IIX_INCLUDES) -o $@ $(JOY_SRC) $(LIB_SRCS) $(IIGS_IIX) chtyp -t S16 $@ +# Sprite demo uses ORCA-C large memory model (-b) so pointers are +# 32-bit and the codegen-arena JSL stub can call cross-bank into the +# arena. Without -b, ORCA-C's 16-bit pointers would lose the bank +# byte and the stub would JSL into bank 0 (system memory) -> crash. $(SPRITE_BIN): $(SPRITE_SRC) $(LIB_SRCS) $(IIGS_BUILD) @mkdir -p $(dir $@) - $(IIGS_BUILD) $(IIX_INCLUDES) -o $@ $(SPRITE_SRC) $(LIB_SRCS) + $(IIGS_BUILD) -b $(IIX_INCLUDES) -o $@ $(SPRITE_SRC) $(LIB_SRCS) $(IIGS_IIX) chtyp -t S16 $@ # IIgs override of test_assets.h: gTestMod[] holds .NTP-converted bytes diff --git a/make/tools.mk b/make/tools.mk index cee55bd..9dcaf3f 100644 --- a/make/tools.mk +++ b/make/tools.mk @@ -20,8 +20,25 @@ JOEYASSET_BIN := $(BUILD_DIR)/joeyasset MODLOOPEND_SRC := $(TOOLS_DIR)/modloopend/modloopend.c MODLOOPEND_BIN := $(BUILD_DIR)/modloopend +# joeysprite: pre-compiles tile data into a target-native .spr file +# at build time. Links all three per-CPU emitters and dispatches by +# --target argument. Same emitter source files the runtime uses, so +# the output bytes are identical to runtime spriteCompile output. +JOEYSPRITE_SRCS := \ + $(TOOLS_DIR)/joeysprite/joeysprite.c \ + $(REPO_DIR)/src/codegen/spriteEmitX86.c \ + $(REPO_DIR)/src/codegen/spriteEmit68k.c \ + $(REPO_DIR)/src/codegen/spriteEmitIigs.c +JOEYSPRITE_BIN := $(BUILD_DIR)/joeysprite +JOEYSPRITE_INC := -I$(REPO_DIR)/include -I$(REPO_DIR)/src/core -I$(REPO_DIR)/src/codegen +# The host tool doesn't have a real "target" -- it dispatches on +# --target at runtime. Define one of the platforms just to satisfy +# joey/platform.h's "exactly one defined" check; the choice doesn't +# affect output (JOEY_SPRITE_SHIFT_COUNT is 2 everywhere). +JOEYSPRITE_DEFS := -DJOEYLIB_PLATFORM_DOS + .PHONY: all tools clean-tools -all tools: $(JOEYASSET_BIN) $(MODLOOPEND_BIN) +all tools: $(JOEYASSET_BIN) $(MODLOOPEND_BIN) $(JOEYSPRITE_BIN) $(JOEYASSET_BIN): $(JOEYASSET_SRC) @mkdir -p $(dir $@) @@ -31,5 +48,9 @@ $(MODLOOPEND_BIN): $(MODLOOPEND_SRC) @mkdir -p $(dir $@) $(HOST_CC) $(HOST_CFLAGS) $< -o $@ +$(JOEYSPRITE_BIN): $(JOEYSPRITE_SRCS) + @mkdir -p $(dir $@) + $(HOST_CC) $(HOST_CFLAGS) $(JOEYSPRITE_INC) $(JOEYSPRITE_DEFS) $^ -o $@ + clean-tools: rm -rf $(BUILD_DIR) diff --git a/scripts/run-iigs-mame.sh b/scripts/run-iigs-mame.sh new file mode 100755 index 0000000..0daba86 --- /dev/null +++ b/scripts/run-iigs-mame.sh @@ -0,0 +1,203 @@ +#!/usr/bin/env bash +# Launch the IIgs sprite demo (or other example) under MAME's apple2gs +# driver instead of GSplus. We get: +# - The MAME debugger window (always visible with -debug) +# - debug.log file written to the working directory +# - A Lua hook that, when the CPU halts (e.g., BRK), dumps registers +# and the surrounding code bytes to /tmp/mame-iigs/crash.txt +# +# The boot path is the same as run-iigs.sh: gsos-system.po as flop3, our +# joey.2mg as flop4. Once Finder is up, navigate to JOEYLIB and double- +# click the example. +# +# Outputs of a run land in /tmp/mame-iigs/: +# - debug.log MAME's debugger console output (-debuglog) +# - crash.txt Lua-hook crash dump (if anything halts the CPU) +# - joeylog.txt Extracted from the post-run disk image + +set -euo pipefail + +prog=${1:-pattern} +repo=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd) + +case $prog in + hello|pattern|keys|joy|sprite|audio) ;; + *) echo "usage: $0 [hello|pattern|keys|joy|sprite|audio]" >&2; exit 2 ;; +esac + +sys_disk=$repo/toolchains/emulators/support/gsos-system.po +data_disk=$repo/build/iigs/bin/joey.2mg + +for f in "$sys_disk" "$data_disk"; do + if [[ ! -f $f ]]; then + echo "missing: $f" >&2 + exit 1 + fi +done + +work=$(mktemp -d -t joeylib-mame.XXXXXX) +out=/tmp/mame-iigs +mkdir -p "$out" + +cp "$sys_disk" "$work/boot.po" +cp "$data_disk" "$work/joey.2mg" + +# Lua script: on every CPU stop (BRK, breakpoint, watchpoint, manual +# halt), append a state snapshot to crash.txt. This way we don't need +# the user to type anything at the debugger window -- whatever halts +# the CPU lands a record in crash.txt. +cat > "$work/crash-hook.lua" <<'LUA' +-- Crash diagnostics for IIgs demos. Auto-resumes the initial debug +-- pause so the user doesn't need to type "go". On any subsequent halt +-- (BRK, watchpoint, breakpoint) outside ROM, dumps registers + bytes +-- around PC to crash.txt. ROM halts (PB == 0xFE/0xFF) are skipped so +-- we don't fill the file with normal IIgs ROM stack walking. +local cpu = manager.machine.devices[":maincpu"] +local prog = cpu.spaces["program"] +local outpath = "/tmp/mame-iigs/crash.txt" + +local function in_rom(pb) + return pb == 0xFE or pb == 0xFF +end + +local function dump(label) + local f = io.open(outpath, "a") + if f == nil then return end + f:write(string.format("=== %s @ %s ===\n", label, os.date("%H:%M:%S"))) + for k, v in pairs(cpu.state) do + f:write(string.format(" %s = %X\n", k, v.value)) + end + local pc = cpu.state["CURPC"].value + local lo = (pc - 16) & 0xFFFFFF + f:write(string.format(" bytes %06X..%06X:", lo, lo + 32)) + for i = 0, 32 do + local b = prog:read_u8(lo + i) + f:write(string.format(" %02X", b)) + end + f:write("\n") + f:close() +end + +-- Lua can't reliably terminate MAME from this version's API; instead +-- write a marker file and let the bash launcher poll for it and kill +-- the process. "done" file = launcher should shut down. +local done_marker = "/tmp/mame-iigs/.done" +local started = false +local crashed = false +local boot_frames = 0 +local function signal_done(reason) + local f = io.open(done_marker, "w") + if f ~= nil then + f:write(reason) + f:close() + end +end + +emu.register_periodic(function() + local dbg = manager.machine.debugger + if dbg == nil then return end + if dbg.execution_state == "stop" then + if not started then + -- First halt is the -debug startup pause; auto-resume so + -- emulation begins without manual input. + started = true + dbg.execution_state = "run" + return + end + if not crashed then + crashed = true + dump("halt") + signal_done("halt") + end + else + boot_frames = boot_frames + 1 + -- Watchdog: ~30 wall-sec at 60 Hz. If nothing crashes by + -- then, dump current state (likely the demo running fine) + -- and tell the launcher to shut down so we can grab joeylog. + if boot_frames > 1800 and not crashed then + crashed = true + dump("watchdog") + signal_done("watchdog") + end + end +end) +LUA + +# Wipe prior crash.txt so we don't confuse runs. +: > "$out/crash.txt" + +cat </dev/null; then + for name in JOEYLOG.TXT joeylog.txt; do + if [[ -f $mnt/$name ]]; then + cp "$mnt/$name" "$out/joeylog.txt" + echo "extracted joeylog.txt -> $out/joeylog.txt" >&2 + break + fi + done + fusermount -u "$mnt" 2>/dev/null || true + fi + fi + rm -rf "$work" +} +trap cleanup EXIT + +# Headless by default (-video none). Set MAME_WINDOW=1 to get a real +# emulator window for interactive use. +video_arg="-video none" +if [[ "${MAME_WINDOW:-0}" = "1" ]]; then + video_arg="-window" +fi + +# Clear the done-marker the Lua hook uses to signal shutdown. +rm -f "$out/.done" + +cd "$work" +mame apple2gs \ + -flop3 "$work/boot.po" \ + -flop4 "$work/joey.2mg" \ + $video_arg -sound none \ + -debug -debuglog \ + -autoboot_script "$work/crash-hook.lua" & +mame_pid=$! + +# Poll for the done-marker. Kill MAME once Lua signals it. Cap total +# wall-clock at 60 s in case MAME never writes the marker. +deadline=$((SECONDS + 60)) +while kill -0 "$mame_pid" 2>/dev/null; do + if [[ -f $out/.done ]]; then + kill "$mame_pid" 2>/dev/null + break + fi + if (( SECONDS > deadline )); then + kill "$mame_pid" 2>/dev/null + break + fi + sleep 0.5 +done +wait "$mame_pid" 2>/dev/null || true diff --git a/scripts/run-iigs.sh b/scripts/run-iigs.sh index 5f749bf..423548e 100755 --- a/scripts/run-iigs.sh +++ b/scripts/run-iigs.sh @@ -21,6 +21,12 @@ set -euo pipefail prog=${1:-pattern} repo=$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd) +# profuse looks up its FST helpers under $GOLDEN_GATE / $ORCA_ROOT and +# falls back to /usr/share/orca which we don't install. Point both at +# our staged GoldenGate tree. +export GOLDEN_GATE="$repo/toolchains/iigs/goldengate" +export ORCA_ROOT="$GOLDEN_GATE" + gsplus=$repo/toolchains/emulators/gsplus/bin/gsplus rom=$repo/toolchains/emulators/support/apple-iigs.rom sys_disk=$repo/toolchains/emulators/support/gsos-system.po @@ -47,7 +53,47 @@ done # GSplus writes back to disk images during the session; stage writable # copies so repeated runs do not mutate the originals. work=$(mktemp -d -t joeylib-iigs.XXXXXX) -trap 'rm -rf "$work"' EXIT + +# After GSplus exits, mount the work copy of joey.2mg via profuse and +# pull joeylog.txt out (if the demo wrote one) before tearing down the +# scratch dir. This is a debug aid: the demos use joeyLog* to leave a +# breadcrumb file on the JOEYLIB volume, but the volume only lives +# inside the scratch dir while GSplus is running. +log_out=$repo/build/iigs/bin/joeylog.txt +extract_log() { + local profuse=$repo/toolchains/iigs/gg-tools/bin/profuse + local mnt=$work/_log_mnt + # Stash the post-run disk image for manual inspection. + if [[ -f $work/joey.2mg ]]; then + cp "$work/joey.2mg" "$repo/build/iigs/bin/joey-after-run.2mg" + echo "run-iigs: saved post-run disk -> $repo/build/iigs/bin/joey-after-run.2mg" >&2 + fi + if [[ ! -x $profuse ]]; then + echo "run-iigs: profuse not found at $profuse" >&2 + elif [[ ! -f $work/joey.2mg ]]; then + echo "run-iigs: $work/joey.2mg missing" >&2 + else + mkdir -p "$mnt" + if "$profuse" -oro "$work/joey.2mg" "$mnt"; then + echo "run-iigs: mounted $mnt; contents:" >&2 + ls -la "$mnt" >&2 || true + if [[ -f $mnt/JOEYLOG.TXT ]]; then + cp "$mnt/JOEYLOG.TXT" "$log_out" + echo "run-iigs: extracted joeylog.txt -> $log_out" >&2 + elif [[ -f $mnt/joeylog.txt ]]; then + cp "$mnt/joeylog.txt" "$log_out" + echo "run-iigs: extracted joeylog.txt -> $log_out" >&2 + else + echo "run-iigs: no JOEYLOG.TXT on disk" >&2 + fi + fusermount -u "$mnt" 2>/dev/null || true + else + echo "run-iigs: profuse mount FAILED" >&2 + fi + fi + rm -rf "$work" +} +trap extract_log EXIT cp "$sys_disk" "$work/boot.po" cp "$data_disk" "$work/joey.2mg" diff --git a/src/codegen/spriteCompile.c b/src/codegen/spriteCompile.c new file mode 100644 index 0000000..44ab902 --- /dev/null +++ b/src/codegen/spriteCompile.c @@ -0,0 +1,200 @@ +// Cross-platform sprite codegen runtime: spriteCompile uses the +// per-CPU emit function selected at compile time, allocates a slot +// in the codegen arena, copies the emitted bytes in, and populates +// sp->slot + sp->routineOffsets. spriteCompiledDraw casts the slot +// address to a function pointer and calls it through cdecl. +// +// Each per-CPU emitter (src/codegen/spriteEmit{X86,68k,Iigs}.c) +// just produces bytes; this file is the only consumer of the +// codegen arena from the sprite side. + +#include +#include + +#include "joey/sprite.h" +#include "joey/surface.h" +#include "codegenArenaInternal.h" +#include "spriteEmitter.h" +#include "spriteInternal.h" +#include "surfaceInternal.h" + +// Largest scratch buffer needed for any single emit call. 16 KB +// covers a 32x32 sprite even on 68k (the biggest mixed-RMW byte- +// emit at 16 bytes/byte * (16*17 dest bytes per shift) ~= 4.5 KB, +// times shift count 2). Round up generously. +#define SPRITE_EMIT_SCRATCH_BYTES (16u * 1024u) + + +// Compile-time selection of the per-CPU emitter. One src/codegen/ +// spriteEmit*.c file is built per platform, but the dispatch lives +// in this file so spriteCompile + spriteCompiledDraw aren't +// duplicated three times. +static uint16_t emitDrawForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) { +#if defined(JOEYLIB_PLATFORM_DOS) + return spriteEmitDrawX86(out, sp, shift); +#elif defined(JOEYLIB_PLATFORM_AMIGA) || defined(JOEYLIB_PLATFORM_ATARIST) + return spriteEmitDraw68k(out, sp, shift); +#elif defined(JOEYLIB_PLATFORM_IIGS) + return spriteEmitDrawIigs(out, sp, shift); +#else +# error "spriteCompile: no emitter selected for this platform" +#endif +} + + +bool spriteCompile(SpriteT *sp) { + uint8_t *scratch; + uint32_t totalSize; + uint8_t shift; + ArenaSlotT *slot; + uint8_t *dst; + uint16_t written; + uint16_t offset; + + if (sp == NULL) { + return false; + } + if (sp->slot != NULL) { + return true; + } + if (sp->tileData == NULL) { + return false; + } + + scratch = (uint8_t *)malloc(SPRITE_EMIT_SCRATCH_BYTES); + if (scratch == NULL) { + return false; + } + + totalSize = 0; + for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { + written = emitDrawForTarget(scratch, sp, shift); + totalSize += written; + } + + if (totalSize > 0xFFFFu) { + free(scratch); + return false; + } + + slot = codegenArenaAlloc(totalSize); + if (slot == NULL) { + free(scratch); + return false; + } + + dst = codegenArenaBase() + slot->offset; + offset = 0; + for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { + written = emitDrawForTarget(dst + offset, sp, shift); + sp->routineOffsets[shift][SPRITE_OP_DRAW] = offset; + sp->routineOffsets[shift][SPRITE_OP_SAVE] = 0; + sp->routineOffsets[shift][SPRITE_OP_RESTORE] = 0; + offset = (uint16_t)(offset + written); + } + sp->slot = slot; + free(scratch); + return true; +} + + +#if defined(JOEYLIB_PLATFORM_IIGS) + +// IIgs uses inline asm + a self-modifying call stub instead of a C +// function-pointer cast. The build uses ORCA-C large memory model +// (-b for sprite demos) so pointers are 24-bit and JSL works +// cross-bank. +// +// `sta abs,Y` on 65816 uses the data bank register (DBR) for the +// high byte of the effective address, so we need DBR = dst's bank +// during the body. malloc under -b can return memory in any bank, +// so we don't trust DBR to already match -- the stub explicitly +// sets DBR from the dst pointer's bank byte and restores it before +// returning to C. +// +// Stub layout (14 bytes): +// 00: 8B PHB ; save caller DBR +// 01: A9 bk LDA #destBank ; A = dst bank (8-bit M) +// 03: 48 PHA +// 04: AB PLB ; DBR = dst bank +// 05: A0 lo hi LDY #destOffset ; Y = low 16 of dst (X=16) +// 08: 22 lo mid bk JSL routine +// 0C: AB PLB ; restore caller DBR +// 0D: 6B RTL +// +// Patched per call: byte 2 (destBank), bytes 6-7 (destOffset16), +// bytes 9-11 (target 24-bit). The compiled routine assumes +// M=8 / X=16 / Y=destOffset on entry; the stub arranges that. +static unsigned char gSpriteCallStub[14]; + +void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) { + uint8_t shift; + uint32_t destAddr; + uint16_t destOffset; + uint8_t destBank; + uint32_t fnAddr; + + { + uint8_t *destPtr; + uint8_t destBytes[4]; + shift = (uint8_t)(x & 1); + destPtr = &dst->pixels[(uint16_t)y * SURFACE_BYTES_PER_ROW + ((uint16_t)x >> 1)]; + memcpy(destBytes, &destPtr, 4); + destAddr = (uint32_t)destBytes[0] + | ((uint32_t)destBytes[1] << 8) + | ((uint32_t)destBytes[2] << 16); + destOffset = (uint16_t)(destAddr & 0xFFFFu); + destBank = (uint8_t)((destAddr >> 16) & 0xFFu); + fnAddr = codegenArenaBaseAddr() + + sp->slot->offset + + (uint32_t)sp->routineOffsets[shift][SPRITE_OP_DRAW]; + } + (void)destAddr; + + gSpriteCallStub[ 0] = 0x8B; + gSpriteCallStub[ 1] = 0xA9; + gSpriteCallStub[ 2] = destBank; + gSpriteCallStub[ 3] = 0x48; + gSpriteCallStub[ 4] = 0xAB; + gSpriteCallStub[ 5] = 0xA0; + gSpriteCallStub[ 6] = (unsigned char)(destOffset & 0xFFu); + gSpriteCallStub[ 7] = (unsigned char)((destOffset >> 8) & 0xFFu); + gSpriteCallStub[ 8] = 0x22; + gSpriteCallStub[ 9] = (unsigned char)(fnAddr & 0xFFu); + gSpriteCallStub[10] = (unsigned char)((fnAddr >> 8) & 0xFFu); + gSpriteCallStub[11] = (unsigned char)((fnAddr >> 16) & 0xFFu); + gSpriteCallStub[12] = 0xAB; + gSpriteCallStub[13] = 0x6B; + + // ORCA-C compiles this function under `longa on` (M=16) and emits + // the function epilogue assuming M=16 at exit -- the deallocation + // ADC takes a 2-byte immediate. The byte writes to gSpriteCallStub + // above leave M=8, so PHP captured M=8 and PLP would restore M=8. + // That mode mismatch caused the epilogue's `ADC #imm; TCS` bytes + // to be re-decoded as a wider ADC swallowing the TCS, S never + // adjusted, RTL popped the wrong bytes, control fell into BSS and + // BRK'd. Use REP/SEP without PHP/PLP and explicitly restore M=16 + // before returning to compiled C. + asm { + rep #0x30 + sep #0x20 + jsl gSpriteCallStub + rep #0x20 + } +} + +#else + +void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) { + typedef void (*DrawFn)(uint8_t *destRow); + uint8_t shift; + uint8_t *destRow; + DrawFn fn; + + shift = (uint8_t)(x & 1); + destRow = &dst->pixels[(uint16_t)y * SURFACE_BYTES_PER_ROW + ((uint16_t)x >> 1)]; + fn = (DrawFn)(codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_DRAW]); + fn(destRow); +} + +#endif diff --git a/src/codegen/spriteEmit68k.c b/src/codegen/spriteEmit68k.c new file mode 100644 index 0000000..5147cec --- /dev/null +++ b/src/codegen/spriteEmit68k.c @@ -0,0 +1,186 @@ +// 68k sprite codegen (Amiga + Atari ST). Emits SysV-ish cdecl- +// callable PIC draw routines that write 4bpp packed surface bytes +// via d16(a0) chains. Same shape as the x86 emitter; only the +// instruction encoding differs. +// +// Calling convention (m68k gcc / mintlib): +// void draw(uint8_t *dst); -- arg in 4(sp); a0/a1/d0/d1 caller-saved. +// +// Per-byte emit (no run coalescing yet): +// - all-transparent: skip +// - all-opaque: move.b #imm, d16(a0) (6 bytes encoded) +// - mixed: move.b d16(a0),d0; andi.b #~mask,d0; +// ori.b #val,d0; move.b d0,d16(a0) (4*4 = 16 bytes) +// Per row (after first): adda.w #SURFACE_BYTES_PER_ROW, a0 +// (4 bytes encoded) +// Prologue: movea.l 4(sp), a0 (4 bytes) +// Epilogue: rts (2 bytes) +// +// All multi-byte instruction fields are big-endian; the emit writes +// high-byte-first into the output stream so the target reads them +// in native order. + +#include "joey/sprite.h" +#include "joey/surface.h" +#include "spriteEmitter.h" +#include "spriteInternal.h" + + +// ----- Constants ----- + +#define TILE_PIXELS 8 +#define TILE_BYTES 32 +#define TILE_BYTES_PER_ROW 4 +#define TRANSPARENT_NIBBLE 0 + +#define MAX_ROUTINE_BYTES 16384 + + +// ----- Prototypes ----- + +static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col); +static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask); +static uint16_t writeBE16(uint8_t *out, uint16_t value); + + +// ----- Emit helpers (alphabetical) ----- + +// Same logic as the x86 shiftedByteAt -- per-byte transparency +// decomposition for shift in {0,1}. opaqueMask high nibble 0xF0 if +// dest high nibble is opaque, 0x0F if low is opaque. +static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask) { + uint8_t srcByte; + uint8_t hi; + uint8_t lo; + bool hasLeft; + bool hasRight; + + *outValue = 0; + *outOpaqueMask = 0; + + if (shift == 0) { + if (col >= spriteBytesPerRow) { + return; + } + srcByte = spriteSourceByte(sp, row, col); + hi = (uint8_t)((srcByte >> 4) & 0x0Fu); + lo = (uint8_t)(srcByte & 0x0Fu); + if (hi != TRANSPARENT_NIBBLE) { + *outValue |= (uint8_t)(hi << 4); + *outOpaqueMask |= 0xF0u; + } + if (lo != TRANSPARENT_NIBBLE) { + *outValue |= lo; + *outOpaqueMask |= 0x0Fu; + } + return; + } + hasLeft = (col >= 1) && ((uint16_t)(col - 1) < spriteBytesPerRow); + hasRight = (col < spriteBytesPerRow); + if (hasLeft) { + srcByte = spriteSourceByte(sp, row, (uint16_t)(col - 1)); + hi = (uint8_t)(srcByte & 0x0Fu); + if (hi != TRANSPARENT_NIBBLE) { + *outValue |= (uint8_t)(hi << 4); + *outOpaqueMask |= 0xF0u; + } + } + if (hasRight) { + srcByte = spriteSourceByte(sp, row, col); + lo = (uint8_t)((srcByte >> 4) & 0x0Fu); + if (lo != TRANSPARENT_NIBBLE) { + *outValue |= lo; + *outOpaqueMask |= 0x0Fu; + } + } +} + + +static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col) { + uint16_t tileX; + uint16_t tileY; + uint16_t inTileX; + uint16_t inTileY; + const uint8_t *tile; + + tileX = (uint16_t)(col / TILE_BYTES_PER_ROW); + tileY = (uint16_t)(row / TILE_PIXELS); + inTileX = (uint16_t)(col & (TILE_BYTES_PER_ROW - 1)); + inTileY = (uint16_t)(row & (TILE_PIXELS - 1)); + tile = sp->tileData + ((uint32_t)(tileY * sp->widthTiles + tileX)) * TILE_BYTES; + return tile[inTileY * TILE_BYTES_PER_ROW + inTileX]; +} + + +// Emit a 16-bit big-endian value into the output stream. Returns 2. +static uint16_t writeBE16(uint8_t *out, uint16_t value) { + out[0] = (uint8_t)((value >> 8) & 0xFFu); + out[1] = (uint8_t)(value & 0xFFu); + return 2; +} + + +// 68k draw emit. Returns bytes written. +uint16_t spriteEmitDraw68k(uint8_t *out, const SpriteT *sp, uint8_t shift) { + uint16_t cursor; + uint16_t row; + uint16_t col; + uint16_t heightPx; + uint16_t spriteBytesPerRow; + uint16_t destBytesPerRow; + uint8_t value; + uint8_t opaqueMask; + + cursor = 0; + heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS); + spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW); + destBytesPerRow = (uint16_t)(spriteBytesPerRow + (shift == 1 ? 1 : 0)); + + // Prologue: movea.l 4(sp), a0 + cursor += writeBE16(out + cursor, 0x206Fu); + cursor += writeBE16(out + cursor, 0x0004u); + + for (row = 0; row < heightPx; row++) { + if (row > 0) { + // adda.w #SURFACE_BYTES_PER_ROW, a0 + cursor += writeBE16(out + cursor, 0xD0FCu); + cursor += writeBE16(out + cursor, (uint16_t)SURFACE_BYTES_PER_ROW); + } + for (col = 0; col < destBytesPerRow; col++) { + shiftedByteAt(sp, row, col, shift, spriteBytesPerRow, &value, &opaqueMask); + if (opaqueMask == 0x00) { + continue; + } + if (opaqueMask == 0xFFu) { + // move.b #imm, d16(a0) + // Opcode 0x117C: bits 11-9 = dst reg (0=a0), bits 8-6 = + // dst mode (101 = an+d16), bits 5-3 = src mode (111), + // bits 2-0 = src reg (100 = immediate). Source + // extension (imm word, byte in low half) comes BEFORE + // dest extension (d16) in the instruction stream. + cursor += writeBE16(out + cursor, 0x117Cu); + cursor += writeBE16(out + cursor, (uint16_t)value); + cursor += writeBE16(out + cursor, col); + } else { + // move.b d16(a0), d0 + cursor += writeBE16(out + cursor, 0x1028u); + cursor += writeBE16(out + cursor, col); + // andi.b #~opaqueMask, d0 + cursor += writeBE16(out + cursor, 0x0200u); + cursor += writeBE16(out + cursor, (uint16_t)(~opaqueMask & 0xFFu)); + // ori.b #value, d0 + cursor += writeBE16(out + cursor, 0x0000u); + cursor += writeBE16(out + cursor, (uint16_t)value); + // move.b d0, d16(a0) + cursor += writeBE16(out + cursor, 0x1140u); + cursor += writeBE16(out + cursor, col); + } + } + } + + // Epilogue: rts + cursor += writeBE16(out + cursor, 0x4E75u); + return cursor; +} + + diff --git a/src/codegen/spriteEmitIigs.c b/src/codegen/spriteEmitIigs.c new file mode 100644 index 0000000..adce9f0 --- /dev/null +++ b/src/codegen/spriteEmitIigs.c @@ -0,0 +1,182 @@ +// IIgs (65816) sprite codegen. Emits PIC draw routines that write +// 4bpp packed surface bytes via abs,Y indexed addressing. +// +// CALLING CONVENTION: NOT the ORCA-C fn-pointer convention. The +// runtime never calls these routines via a C cast -- instead, +// spriteCompiledDraw (in spriteCompile.c, gated on +// JOEYLIB_PLATFORM_IIGS) builds a self-modifying JSL stub that +// loads Y with destRow then JSLs the routine. The routine assumes: +// - M = 8-bit (set by stub before JSL) +// - X = 16-bit (set by stub) +// - Y = destRow (loaded by stub from immediate) +// - DBR = program bank (ORCA-C default) +// No stack arg, no prologue. Body executes directly. +// +// Routine shape (per-byte emit, no PEA optimization yet): +// ... per byte: +// lda #imm ; A = pixel-pair byte (opaque) +// sta abs,Y ; write to dst[abs] +// ... mixed: +// lda abs,Y; and #~mask; ora #val; sta abs,Y +// rtl +// +// Position-independent: only abs constants are dest-byte offsets +// (small, baked at emit time); Y holds the runtime dst pointer. +// +// All multi-byte operands are little-endian, written low byte first +// into the output stream. + +#include "joey/sprite.h" +#include "joey/surface.h" +#include "spriteEmitter.h" +#include "spriteInternal.h" + + +// ----- Constants ----- + +#define TILE_PIXELS 8 +#define TILE_BYTES 32 +#define TILE_BYTES_PER_ROW 4 +#define TRANSPARENT_NIBBLE 0 + +#define MAX_ROUTINE_BYTES 8192 + + +// ----- Prototypes ----- + +static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col); +static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask); +static uint16_t writeLE16(uint8_t *out, uint16_t value); + + +// ----- Emit helpers (alphabetical) ----- + +static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask) { + uint8_t srcByte; + uint8_t hi; + uint8_t lo; + bool hasLeft; + bool hasRight; + + *outValue = 0; + *outOpaqueMask = 0; + + if (shift == 0) { + if (col >= spriteBytesPerRow) { + return; + } + srcByte = spriteSourceByte(sp, row, col); + hi = (uint8_t)((srcByte >> 4) & 0x0Fu); + lo = (uint8_t)(srcByte & 0x0Fu); + if (hi != TRANSPARENT_NIBBLE) { + *outValue |= (uint8_t)(hi << 4); + *outOpaqueMask |= 0xF0u; + } + if (lo != TRANSPARENT_NIBBLE) { + *outValue |= lo; + *outOpaqueMask |= 0x0Fu; + } + return; + } + hasLeft = (col >= 1) && ((uint16_t)(col - 1) < spriteBytesPerRow); + hasRight = (col < spriteBytesPerRow); + if (hasLeft) { + srcByte = spriteSourceByte(sp, row, (uint16_t)(col - 1)); + hi = (uint8_t)(srcByte & 0x0Fu); + if (hi != TRANSPARENT_NIBBLE) { + *outValue |= (uint8_t)(hi << 4); + *outOpaqueMask |= 0xF0u; + } + } + if (hasRight) { + srcByte = spriteSourceByte(sp, row, col); + lo = (uint8_t)((srcByte >> 4) & 0x0Fu); + if (lo != TRANSPARENT_NIBBLE) { + *outValue |= lo; + *outOpaqueMask |= 0x0Fu; + } + } +} + + +static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col) { + uint16_t tileX; + uint16_t tileY; + uint16_t inTileX; + uint16_t inTileY; + const uint8_t *tile; + + tileX = (uint16_t)(col / TILE_BYTES_PER_ROW); + tileY = (uint16_t)(row / TILE_PIXELS); + inTileX = (uint16_t)(col & (TILE_BYTES_PER_ROW - 1)); + inTileY = (uint16_t)(row & (TILE_PIXELS - 1)); + tile = sp->tileData + ((uint32_t)(tileY * sp->widthTiles + tileX)) * TILE_BYTES; + return tile[inTileY * TILE_BYTES_PER_ROW + inTileX]; +} + + +// 65816 is little-endian; write low byte first. +static uint16_t writeLE16(uint8_t *out, uint16_t value) { + out[0] = (uint8_t)(value & 0xFFu); + out[1] = (uint8_t)((value >> 8) & 0xFFu); + return 2; +} + + +// 65816 draw emit. Returns bytes written. +uint16_t spriteEmitDrawIigs(uint8_t *out, const SpriteT *sp, uint8_t shift) { + uint16_t cursor; + uint16_t row; + uint16_t col; + uint16_t heightPx; + uint16_t spriteBytesPerRow; + uint16_t destBytesPerRow; + uint16_t absOffset; + uint8_t value; + uint8_t opaqueMask; + + cursor = 0; + heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS); + spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW); + destBytesPerRow = (uint16_t)(spriteBytesPerRow + (shift == 1 ? 1 : 0)); + + // No prologue: caller (the inline-asm stub in spriteCompile.c) + // sets M=8/X=16/Y=destRow before JSL'ing here. + + for (row = 0; row < heightPx; row++) { + for (col = 0; col < destBytesPerRow; col++) { + shiftedByteAt(sp, row, col, shift, spriteBytesPerRow, &value, &opaqueMask); + if (opaqueMask == 0x00) { + continue; + } + absOffset = (uint16_t)(row * SURFACE_BYTES_PER_ROW + col); + if (opaqueMask == 0xFFu) { + // lda #imm A9 ii + // sta abs,Y 99 lo hi + out[cursor++] = 0xA9; + out[cursor++] = value; + out[cursor++] = 0x99; + cursor += writeLE16(out + cursor, absOffset); + } else { + // lda abs,Y B9 lo hi + // and #mask 29 mm + // ora #val 09 vv + // sta abs,Y 99 lo hi + out[cursor++] = 0xB9; + cursor += writeLE16(out + cursor, absOffset); + out[cursor++] = 0x29; + out[cursor++] = (uint8_t)(~opaqueMask & 0xFFu); + out[cursor++] = 0x09; + out[cursor++] = value; + out[cursor++] = 0x99; + cursor += writeLE16(out + cursor, absOffset); + } + } + } + + // Epilogue: rtl (large memory model -b uses JSL/RTL). + out[cursor++] = 0x6B; + return cursor; +} + + diff --git a/src/codegen/spriteEmitStub.c b/src/codegen/spriteEmitStub.c new file mode 100644 index 0000000..db6206f --- /dev/null +++ b/src/codegen/spriteEmitStub.c @@ -0,0 +1,25 @@ +// Stub sprite codegen. Used by ports that don't yet have a per-CPU +// emitter so the link still resolves. spriteCompile always returns +// false (sprite stays interpreter-only); spriteCompiledDraw is +// unreachable because the dispatcher in src/core/sprite.c gates the +// call on sp->slot != NULL. + +#include "joey/sprite.h" +#include "joey/surface.h" +#include "spriteInternal.h" + + +bool spriteCompile(SpriteT *sp) { + (void)sp; + return false; +} + + +void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) { + (void)dst; + (void)sp; + (void)x; + (void)y; + // unreachable: spriteDraw guards on sp->slot != NULL, which the + // stub never sets. Body is here only to satisfy the linker. +} diff --git a/src/codegen/spriteEmitX86.c b/src/codegen/spriteEmitX86.c new file mode 100644 index 0000000..8247811 --- /dev/null +++ b/src/codegen/spriteEmitX86.c @@ -0,0 +1,191 @@ +// x86 sprite codegen (DOS port). Emits 32-bit cdecl-callable PIC +// draw routines that write 4bpp packed surface bytes via +// [esi+disp8] chains. The C side calls them through a function +// pointer cast. +// +// Calling convention: +// draw(uint8_t *dst) -- esi advances row by row +// +// Save and restore are not compiled -- they're uniform memcpy- +// shaped operations and the C interpreter handles them at memcpy +// speed via the standard library. +// +// Per-byte emit (no run coalescing yet): +// - all-transparent (both nibbles 0): skip, no instruction +// - all-opaque: mov byte [esi+col], imm8 (4 bytes encoded) +// - mixed: mov al,[esi+col]; and al,mask; or al,val; mov [esi+col],al +// (3 + 2 + 2 + 3 = 10 bytes) +// Per row: +// add esi, SURFACE_BYTES_PER_ROW (6 bytes encoded) +// Prologue: +// push esi; mov esi, [esp+8] (1 + 4 = 5 bytes) +// Epilogue: +// pop esi; ret (1 + 1 = 2 bytes) + +#include "joey/sprite.h" +#include "joey/surface.h" +#include "spriteEmitter.h" +#include "spriteInternal.h" + + +// ----- Constants ----- + +#define TILE_PIXELS 8 +#define TILE_BYTES 32 +#define TILE_BYTES_PER_ROW 4 +#define TRANSPARENT_NIBBLE 0 + +// Worst-case bytes per emitted routine, used to size the scratch +// buffer. A 32x32 sprite is 16 rows * (16 dest bytes + 1 for shift1) +// = 272 dest-byte slots, each up to 10 bytes mixed = 2720; plus +// per-row prologues 32*6=192; plus prologue/epilogue 8. Round up +// generously. +#define MAX_ROUTINE_BYTES 8192 + + +// ----- Prototypes ----- + +static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col); +static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask); + + +// ----- Emit helpers (alphabetical) ----- + +// Decompose a destination byte's contribution from the sprite into +// (value, opaqueMask) for shift in {0, 1}. opaqueMask high nibble +// 0xF0 means high dest nibble is opaque; 0x0F means low is opaque; +// 0x00 means both transparent. value's transparent nibbles are 0. +static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask) { + uint8_t srcByte; + uint8_t hi; + uint8_t lo; + bool hasLeft; + bool hasRight; + + *outValue = 0; + *outOpaqueMask = 0; + + if (shift == 0) { + if (col >= spriteBytesPerRow) { + return; + } + srcByte = spriteSourceByte(sp, row, col); + hi = (uint8_t)((srcByte >> 4) & 0x0Fu); + lo = (uint8_t)(srcByte & 0x0Fu); + if (hi != TRANSPARENT_NIBBLE) { + *outValue |= (uint8_t)(hi << 4); + *outOpaqueMask |= 0xF0u; + } + if (lo != TRANSPARENT_NIBBLE) { + *outValue |= lo; + *outOpaqueMask |= 0x0Fu; + } + return; + } + // shift = 1 + hasLeft = (col >= 1) && ((uint16_t)(col - 1) < spriteBytesPerRow); + hasRight = (col < spriteBytesPerRow); + if (hasLeft) { + srcByte = spriteSourceByte(sp, row, (uint16_t)(col - 1)); + hi = (uint8_t)(srcByte & 0x0Fu); // sprite byte's LOW nibble + if (hi != TRANSPARENT_NIBBLE) { + *outValue |= (uint8_t)(hi << 4); + *outOpaqueMask |= 0xF0u; + } + } + if (hasRight) { + srcByte = spriteSourceByte(sp, row, col); + lo = (uint8_t)((srcByte >> 4) & 0x0Fu); // sprite byte's HIGH nibble + if (lo != TRANSPARENT_NIBBLE) { + *outValue |= lo; + *outOpaqueMask |= 0x0Fu; + } + } +} + + +// Sample a sprite tile-data byte at (row, col) where col is in +// sprite-byte coordinates (0..spriteBytesPerRow-1). +static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col) { + uint16_t tileX; + uint16_t tileY; + uint16_t inTileX; + uint16_t inTileY; + const uint8_t *tile; + + tileX = (uint16_t)(col / TILE_BYTES_PER_ROW); + tileY = (uint16_t)(row / TILE_PIXELS); + inTileX = (uint16_t)(col & (TILE_BYTES_PER_ROW - 1)); + inTileY = (uint16_t)(row & (TILE_PIXELS - 1)); + tile = sp->tileData + ((uint32_t)(tileY * sp->widthTiles + tileX)) * TILE_BYTES; + return tile[inTileY * TILE_BYTES_PER_ROW + inTileX]; +} + + +// Emit a draw routine for one shift variant. Returns bytes written. +// Routine signature: void f(uint8_t *dst). +uint16_t spriteEmitDrawX86(uint8_t *out, const SpriteT *sp, uint8_t shift) { + uint16_t cursor; + uint16_t row; + uint16_t col; + uint16_t heightPx; + uint16_t spriteBytesPerRow; + uint16_t destBytesPerRow; + uint8_t value; + uint8_t opaqueMask; + + cursor = 0; + heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS); + spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW); + destBytesPerRow = (uint16_t)(spriteBytesPerRow + (shift == 1 ? 1 : 0)); + + // Prologue: push esi; mov esi, [esp+8] + out[cursor++] = 0x56; // push esi + out[cursor++] = 0x8B; out[cursor++] = 0x74; + out[cursor++] = 0x24; out[cursor++] = 0x08; + + // Body: per row, per dest byte. + for (row = 0; row < heightPx; row++) { + if (row > 0) { + // add esi, SURFACE_BYTES_PER_ROW (32-bit imm) + out[cursor++] = 0x81; out[cursor++] = 0xC6; + out[cursor++] = (uint8_t)(SURFACE_BYTES_PER_ROW & 0xFFu); + out[cursor++] = (uint8_t)((SURFACE_BYTES_PER_ROW >> 8) & 0xFFu); + out[cursor++] = 0x00; + out[cursor++] = 0x00; + } + for (col = 0; col < destBytesPerRow; col++) { + shiftedByteAt(sp, row, col, shift, spriteBytesPerRow, &value, &opaqueMask); + if (opaqueMask == 0x00) { + continue; // both nibbles transparent + } + if (opaqueMask == 0xFFu) { + // mov byte [esi+col], imm8 (C6 46 cc ii) + out[cursor++] = 0xC6; out[cursor++] = 0x46; + out[cursor++] = (uint8_t)(col & 0xFFu); + out[cursor++] = value; + } else { + // Mixed: read-modify-write. + // mov al, [esi+col] (8A 46 cc) + // and al, ~opaqueMask (24 mm) + // or al, value (0C vv) + // mov [esi+col], al (88 46 cc) + out[cursor++] = 0x8A; out[cursor++] = 0x46; + out[cursor++] = (uint8_t)(col & 0xFFu); + out[cursor++] = 0x24; + out[cursor++] = (uint8_t)(~opaqueMask & 0xFFu); + out[cursor++] = 0x0C; + out[cursor++] = value; + out[cursor++] = 0x88; out[cursor++] = 0x46; + out[cursor++] = (uint8_t)(col & 0xFFu); + } + } + } + + // Epilogue: pop esi; ret + out[cursor++] = 0x5E; + out[cursor++] = 0xC3; + return cursor; +} + + diff --git a/src/codegen/spriteEmitter.h b/src/codegen/spriteEmitter.h new file mode 100644 index 0000000..940295b --- /dev/null +++ b/src/codegen/spriteEmitter.h @@ -0,0 +1,26 @@ +// Internal interface for per-CPU sprite emitters. +// +// Each src/codegen/spriteEmit.c file implements its own emit +// function. spriteCompile.c picks the right one at compile time +// (via #ifdef on JOEYLIB_PLATFORM_*) for the runtime build; the +// host-side joeysprite tool links all of them and dispatches by +// --target argument. +// +// Each emit function takes the sprite + shift variant and writes +// position-independent draw-routine bytes into `out`. Returns the +// number of bytes written. The output bytes follow the target CPU's +// calling convention as documented in the per-CPU file's header +// comment, so the bytes can be called via a C function pointer cast +// once placed in executable memory. + +#ifndef JOEYLIB_SPRITE_EMITTER_H +#define JOEYLIB_SPRITE_EMITTER_H + +#include "joey/sprite.h" +#include "spriteInternal.h" + +uint16_t spriteEmitDrawX86 (uint8_t *out, const SpriteT *sp, uint8_t shift); +uint16_t spriteEmitDraw68k (uint8_t *out, const SpriteT *sp, uint8_t shift); +uint16_t spriteEmitDrawIigs(uint8_t *out, const SpriteT *sp, uint8_t shift); + +#endif diff --git a/src/core/codegenArena.c b/src/core/codegenArena.c new file mode 100644 index 0000000..46d3487 --- /dev/null +++ b/src/core/codegenArena.c @@ -0,0 +1,297 @@ +// Codegen arena: free-list allocator with adjacent-hole coalescing +// and manual compaction. See codegenArenaInternal.h for the contract. + +#include "joey/platform.h" + +#if defined(JOEYLIB_PLATFORM_IIGS) +// On the IIgs the arena holds 65816 machine code that callers JSL +// into. ORCA-C's malloc returns memory from the C heap (often bank 0, +// which is system RAM) so JSL'ing into it lands on whatever happens +// to live there -> instant crash. Memory Manager NewHandle with +// attrFixed | attrLocked | attrPage | attrNoCross gives us a fixed +// page-aligned region in a single bank we can safely jump into. +// +// types.h must be included before our stdbool shim because ORCA's +// types.h defines true/false as #define ... without #ifndef guards +// and would re-#define our shim's macros. +#include +#include +#endif + +#include +#include + +#include "codegenArenaInternal.h" + + +// ----- Module state ----- + +static uint8_t *gBase = NULL; +// gBaseAddr mirrors gBase as a 24-bit absolute address. ORCA-C's +// (uint32_t)pointer cast on the IIgs zeros the bank byte for some +// pointer expressions, so JSL targets read this field directly. +static uint32_t gBaseAddr = 0; +static uint32_t gTotalBytes = 0; +static uint32_t gUsedBytes = 0; +static ArenaSlotT *gFirstSlot = NULL; +#if defined(JOEYLIB_PLATFORM_IIGS) +static Handle gBaseHandle = NULL; +#endif + + +// ----- Prototypes ----- + +static ArenaSlotT *newSlot(uint32_t offset, uint32_t size, bool used); +static void coalesceWithNext(ArenaSlotT *slot); + + +// ----- Internal helpers (alphabetical) ----- + +// If `slot` is free and slot->next is also free, merge them into a +// single free slot. Caller must already have ensured slot is free. +static void coalesceWithNext(ArenaSlotT *slot) { + ArenaSlotT *victim; + + if (slot == NULL || slot->next == NULL) { + return; + } + if (slot->used || slot->next->used) { + return; + } + victim = slot->next; + slot->size += victim->size; + slot->next = victim->next; + if (slot->next != NULL) { + slot->next->prev = slot; + } + free(victim); +} + + +static ArenaSlotT *newSlot(uint32_t offset, uint32_t size, bool used) { + ArenaSlotT *s; + + s = (ArenaSlotT *)malloc(sizeof(ArenaSlotT)); + if (s == NULL) { + return NULL; + } + s->offset = offset; + s->size = size; + s->used = used; + s->next = NULL; + s->prev = NULL; + return s; +} + + +// ----- Public-internal API (alphabetical) ----- + +ArenaSlotT *codegenArenaAlloc(uint32_t bytes) { + ArenaSlotT *slot; + ArenaSlotT *remainder; + + if (gBase == NULL || bytes == 0) { + return NULL; + } + for (slot = gFirstSlot; slot != NULL; slot = slot->next) { + if (slot->used || slot->size < bytes) { + continue; + } + // First fit. If there's slack, split: shrink this slot to + // exactly `bytes` and insert a new free slot for the rest. + if (slot->size > bytes) { + remainder = newSlot(slot->offset + bytes, slot->size - bytes, false); + if (remainder == NULL) { + return NULL; + } + remainder->prev = slot; + remainder->next = slot->next; + if (slot->next != NULL) { + slot->next->prev = remainder; + } + slot->next = remainder; + slot->size = bytes; + } + slot->used = true; + gUsedBytes += bytes; + return slot; + } + return NULL; +} + + +uint8_t *codegenArenaBase(void) { + return gBase; +} + + +uint32_t codegenArenaBaseAddr(void) { + return gBaseAddr; +} + + +uint32_t codegenArenaBytesTotal(void) { + return gTotalBytes; +} + + +uint32_t codegenArenaBytesUsed(void) { + return gUsedBytes; +} + + +void codegenArenaCompact(void) { + ArenaSlotT *slot; + ArenaSlotT *next; + ArenaSlotT *trailing; + uint32_t cursor; + + if (gBase == NULL) { + return; + } + cursor = 0; + slot = gFirstSlot; + while (slot != NULL) { + next = slot->next; + if (slot->used) { + if (slot->offset != cursor) { + memmove(gBase + cursor, gBase + slot->offset, slot->size); + slot->offset = cursor; + } + cursor += slot->size; + slot = next; + continue; + } + // Free slot: drop from the list. The caller-side ArenaSlotT* + // for any free slot was already invalidated when it was + // freed (coalesce released the struct); nothing live points + // at it. + if (slot->prev != NULL) { + slot->prev->next = next; + } else { + gFirstSlot = next; + } + if (next != NULL) { + next->prev = slot->prev; + } + free(slot); + slot = next; + } + if (cursor < gTotalBytes) { + trailing = newSlot(cursor, gTotalBytes - cursor, false); + if (trailing == NULL) { + return; // Compaction succeeded; just skip the free-slot record. + } + trailing->prev = NULL; + for (slot = gFirstSlot; slot != NULL && slot->next != NULL; slot = slot->next) { + // walk to last + } + if (slot == NULL) { + gFirstSlot = trailing; + } else { + slot->next = trailing; + trailing->prev = slot; + } + } +} + + +void codegenArenaFree(ArenaSlotT *slot) { + if (slot == NULL || gBase == NULL) { + return; + } + if (!slot->used) { + return; // double-free; ignore + } + slot->used = false; + gUsedBytes -= slot->size; + + coalesceWithNext(slot); + coalesceWithNext(slot->prev); +} + + +bool codegenArenaInit(uint32_t totalBytes) { + if (gBase != NULL) { + return true; + } + if (totalBytes == 0) { + return false; + } +#if defined(JOEYLIB_PLATFORM_IIGS) + gBaseHandle = NewHandle(totalBytes, _ownerid, + attrFixed | attrLocked | attrPage | attrNoCross, + NULL); + if (gBaseHandle == NULL || _toolErr != 0) { + gBaseHandle = NULL; + return false; + } + HLock(gBaseHandle); + // Capture the 24-bit absolute address by copying the Pointer's + // raw bytes -- (uint32_t)pointer through a chain of expressions + // has been observed to drop the bank byte under ORCA-C's + // memorymodel 1, but a memcpy of the underlying 4 bytes is + // reliable. The high byte (bytes[3]) is undefined and masked off. + { + Pointer p; + uint8_t bytes[4]; + p = *gBaseHandle; + gBase = (uint8_t *)p; + memcpy(bytes, &p, 4); + gBaseAddr = (uint32_t)bytes[0] + | ((uint32_t)bytes[1] << 8) + | ((uint32_t)bytes[2] << 16); + } + if (gBase == NULL) { + DisposeHandle(gBaseHandle); + gBaseHandle = NULL; + return false; + } +#else + gBase = (uint8_t *)malloc(totalBytes); + if (gBase == NULL) { + return false; + } + gBaseAddr = (uint32_t)gBase; +#endif + gFirstSlot = newSlot(0, totalBytes, false); + if (gFirstSlot == NULL) { +#if defined(JOEYLIB_PLATFORM_IIGS) + DisposeHandle(gBaseHandle); + gBaseHandle = NULL; +#else + free(gBase); +#endif + gBase = NULL; + gBaseAddr = 0; + return false; + } + gTotalBytes = totalBytes; + gUsedBytes = 0; + return true; +} + + +void codegenArenaShutdown(void) { + ArenaSlotT *slot; + ArenaSlotT *next; + + if (gBase == NULL) { + return; + } + for (slot = gFirstSlot; slot != NULL; slot = next) { + next = slot->next; + free(slot); + } +#if defined(JOEYLIB_PLATFORM_IIGS) + DisposeHandle(gBaseHandle); + gBaseHandle = NULL; +#else + free(gBase); +#endif + gBase = NULL; + gBaseAddr = 0; + gFirstSlot = NULL; + gTotalBytes = 0; + gUsedBytes = 0; +} diff --git a/src/core/codegenArenaInternal.h b/src/core/codegenArenaInternal.h new file mode 100644 index 0000000..1e28347 --- /dev/null +++ b/src/core/codegenArenaInternal.h @@ -0,0 +1,76 @@ +// Codegen arena: a pool of executable memory that holds compiled +// sprite routines. Allocator is first-fit with adjacent-hole +// coalescing on free; manual compaction (codegenArenaCompact) +// memmoves live slots down to consolidate free space. +// +// SpriteT references slots via ArenaSlotT*, never raw function +// pointers. spriteDraw computes the call address each invocation as +// `gArenaBase + slot->offset + routineOffsets[shift][op]` so a +// compaction that moves a slot's bytes is transparent to callers. +// +// Allocations go through plain malloc; on every supported port +// (IIgs ORCA, Amiga libnix, ST mintlib, DJGPP+CWSDPMI) the heap is +// readable, writable, AND executable. We are not running in a W^X +// environment. +// +// The arena is a process-singleton: codegenArenaInit creates it from +// JoeyConfigT.codegenBytes during joeyInit; codegenArenaShutdown +// frees it on joeyShutdown. Most callers should go through the +// sprite API rather than touching the arena directly. + +#ifndef JOEYLIB_CODEGEN_ARENA_INTERNAL_H +#define JOEYLIB_CODEGEN_ARENA_INTERNAL_H + +#include "joey/types.h" + +typedef struct ArenaSlotT { + uint32_t offset; // byte offset within the arena base + uint32_t size; // bytes occupied by this slot + bool used; // true = held by a sprite; false = free + struct ArenaSlotT *next; // linked-list, sorted by offset + struct ArenaSlotT *prev; +} ArenaSlotT; + + +// Lifecycle. Returns false if totalBytes is 0 or the underlying +// allocation fails. Idempotent: calling Init twice without an +// intervening Shutdown is a no-op. +bool codegenArenaInit(uint32_t totalBytes); +void codegenArenaShutdown(void); + +// First-fit allocate `bytes` of executable memory. Returns NULL if +// no free slot is large enough -- the caller should run +// codegenArenaCompact and retry, or surface the failure. +ArenaSlotT *codegenArenaAlloc(uint32_t bytes); + +// Mark the slot free and merge with adjacent free neighbors. The +// caller must drop its ArenaSlotT* immediately; the struct may be +// freed (if it coalesced into a neighbor). +void codegenArenaFree(ArenaSlotT *slot); + +// Walk live slots in offset order, memmove each down to fill any +// preceding hole, drop all free slots from the list, and finish with +// one trailing free slot covering the remaining space. Per-slot +// `offset` fields are updated atomically with the memmove so any +// callers indexing through `gArenaBase + slot->offset` see the new +// value on their next read. +void codegenArenaCompact(void); + +// Used for spriteDraw's address computation. The base pointer is +// stable for the lifetime of the arena; only slot->offset moves. +uint8_t *codegenArenaBase(void); + +// Same address as codegenArenaBase() but returned as an integer. The +// IIgs JSL trampoline needs the 24-bit absolute address as a number +// it can split into bank/offset bytes; ORCA-C's pointer-to-uint32_t +// cast has dropped the bank byte in some expressions, so we expose +// the integer view directly. +uint32_t codegenArenaBaseAddr(void); + +// Public-API support: sum of live slot sizes, total arena size. +// Difference is free space (which may be fragmented across holes +// until codegenArenaCompact runs). +uint32_t codegenArenaBytesUsed(void); +uint32_t codegenArenaBytesTotal(void); + +#endif diff --git a/src/core/debug.c b/src/core/debug.c new file mode 100644 index 0000000..5c57137 --- /dev/null +++ b/src/core/debug.c @@ -0,0 +1,57 @@ +// Cross-platform "where did it hang?" logger. Each call opens +// joeylog.txt, appends a line, fflushes, closes. Slow but durable +// -- the last line in the file is guaranteed to be on disk before +// any subsequent operation that might hang. +// +// Build only as needed for diagnostics; remove the calls when the +// bug is fixed. The hang on ST kept us looking at the wrong layer +// without this kind of trace. + +#include +#include + +#include "joey/debug.h" + +static const char *kLogPath = "joeylog.txt"; + + +void joeyLog(const char *msg) { + FILE *fp; + if (msg == NULL) { + return; + } + fp = fopen(kLogPath, "a"); + if (fp == NULL) { + return; + } + fputs(msg, fp); + fputc('\n', fp); + fclose(fp); +} + + +void joeyLogF(const char *fmt, ...) { + FILE *fp; + va_list args; + if (fmt == NULL) { + return; + } + fp = fopen(kLogPath, "a"); + if (fp == NULL) { + return; + } + va_start(args, fmt); + vfprintf(fp, fmt, args); + va_end(args); + fputc('\n', fp); + fclose(fp); +} + + +void joeyLogReset(void) { + FILE *fp; + fp = fopen(kLogPath, "w"); + if (fp != NULL) { + fclose(fp); + } +} diff --git a/src/core/init.c b/src/core/init.c index b9b22aa..1e2c5b7 100644 --- a/src/core/init.c +++ b/src/core/init.c @@ -8,9 +8,15 @@ #include #include "joey/core.h" +#include "codegenArenaInternal.h" #include "hal.h" #include "surfaceInternal.h" +// 8 KB fits the largest typical sprite working set (~3-4 KB per +// 32x32 sprite at all opaque) and keeps malloc requests small enough +// for IIgs ORCA-C's small-memory-model heap to satisfy them. +#define DEFAULT_CODEGEN_BYTES (8u * 1024u) + // ----- Prototypes ----- static void clearError(void); @@ -56,9 +62,17 @@ bool joeyInit(const JoeyConfigT *config) { return false; } + if (!codegenArenaInit(gConfig.codegenBytes != 0 ? gConfig.codegenBytes + : DEFAULT_CODEGEN_BYTES)) { + setError("failed to allocate codegen arena"); + surfaceFreeScreen(); + return false; + } + if (!halInit(&gConfig)) { const char *halMsg = halLastError(); setError(halMsg != NULL ? halMsg : "halInit failed"); + codegenArenaShutdown(); surfaceFreeScreen(); return false; } @@ -86,6 +100,7 @@ void joeyShutdown(void) { } halInputShutdown(); halShutdown(); + codegenArenaShutdown(); surfaceFreeScreen(); gInitialized = false; clearError(); diff --git a/src/core/sprite.c b/src/core/sprite.c new file mode 100644 index 0000000..0d3a1b2 --- /dev/null +++ b/src/core/sprite.c @@ -0,0 +1,576 @@ +// Sprite system: create, destroy, draw, save/restore-under. +// +// This file is the interpreted fallback path. It is correct on every +// platform and serves as the reference implementation for the +// runtime-compiled emitters that will land per-CPU later. + +#include +#include +#include + +#include "joey/sprite.h" +#include "codegenArenaInternal.h" +#include "spriteInternal.h" +#include "surfaceInternal.h" + +// 8x8 tiles, 4bpp packed = 4 bytes/row * 8 rows = 32 bytes/tile. +#define TILE_BYTES 32 +#define TILE_PIXELS 8 +#define TILE_BYTES_PER_ROW 4 + +// Color 0 is always transparent for sprites (DESIGN.md contract). +#define TRANSPARENT_NIBBLE 0 + + +// ----- Prototypes ----- + +static uint8_t srcNibble(const SpriteT *sp, int16_t pxX, int16_t pxY); +static void writeDstNibble(uint8_t *row, int16_t x, uint8_t nibble); +static bool clipRect(int16_t *dstX, int16_t *dstY, int16_t *srcX, int16_t *srcY, int16_t *w, int16_t *h); +static bool isFullyOnSurface(int16_t x, int16_t y, uint16_t widthPx, uint16_t heightPx); +static void spriteDrawInterpreted(SurfaceT *s, SpriteT *sp, int16_t x, int16_t y); + + +// ----- Internal helpers (alphabetical) ----- + +// Clip a draw at (dstX,dstY) of size (w,h) against the destination +// surface. Updates srcX/srcY to the offset into the sprite that the +// remaining visible region starts at. Returns false if entirely +// off-surface. +static bool clipRect(int16_t *dstX, int16_t *dstY, int16_t *srcX, int16_t *srcY, int16_t *w, int16_t *h) { + *srcX = 0; + *srcY = 0; + if (*w <= 0 || *h <= 0) { + return false; + } + if (*dstX < 0) { + *srcX = -(*dstX); + *w -= *srcX; + *dstX = 0; + } + if (*dstY < 0) { + *srcY = -(*dstY); + *h -= *srcY; + *dstY = 0; + } + if (*dstX >= SURFACE_WIDTH || *dstY >= SURFACE_HEIGHT) { + return false; + } + if (*dstX + *w > SURFACE_WIDTH) { + *w = SURFACE_WIDTH - *dstX; + } + if (*dstY + *h > SURFACE_HEIGHT) { + *h = SURFACE_HEIGHT - *dstY; + } + return (*w > 0 && *h > 0); +} + + +// Sample one source pixel from the sprite at (pxX, pxY) where (0,0) +// is the sprite's top-left tile. The tile data is laid out tile by +// tile in row-major order, each tile internally row-major with 4 +// bytes per row. High nibble = left pixel within each byte. +static uint8_t srcNibble(const SpriteT *sp, int16_t pxX, int16_t pxY) { + int16_t tileX; + int16_t tileY; + int16_t inTileX; + int16_t inTileY; + uint32_t byteOff; + uint8_t byte; + const uint8_t *tile; + + tileX = (int16_t)(pxX / TILE_PIXELS); + tileY = (int16_t)(pxY / TILE_PIXELS); + inTileX = (int16_t)(pxX & (TILE_PIXELS - 1)); + inTileY = (int16_t)(pxY & (TILE_PIXELS - 1)); + + tile = sp->tileData + ((uint32_t)(tileY * sp->widthTiles + tileX)) * TILE_BYTES; + byteOff = (uint32_t)(inTileY * TILE_BYTES_PER_ROW + (inTileX >> 1)); + byte = tile[byteOff]; + if (inTileX & 1) { + return (uint8_t)(byte & 0x0F); + } + return (uint8_t)(byte >> 4); +} + + +static void writeDstNibble(uint8_t *row, int16_t x, uint8_t nibble) { + uint8_t *byte; + + byte = &row[x >> 1]; + if (x & 1) { + *byte = (uint8_t)((*byte & 0xF0) | (nibble & 0x0F)); + } else { + *byte = (uint8_t)((*byte & 0x0F) | ((nibble & 0x0F) << 4)); + } +} + + +static bool isFullyOnSurface(int16_t x, int16_t y, uint16_t widthPx, uint16_t heightPx) { + if (x < 0 || y < 0) { + return false; + } + if ((int32_t)x + widthPx > SURFACE_WIDTH) { + return false; + } + if ((int32_t)y + heightPx > SURFACE_HEIGHT) { + return false; + } + return true; +} + + +// Pixel-by-pixel draw. Used directly when the sprite has no compiled +// slot, and as the clip-edge fallback when a compiled draw would +// extend off the surface. +static void spriteDrawInterpreted(SurfaceT *s, SpriteT *sp, int16_t x, int16_t y) { + int16_t dx; + int16_t dy; + int16_t sx; + int16_t sy; + int16_t w; + int16_t h; + int16_t row; + int16_t col; + uint8_t nibble; + uint8_t *dstRow; + + dx = x; + dy = y; + w = (int16_t)(sp->widthTiles * TILE_PIXELS); + h = (int16_t)(sp->heightTiles * TILE_PIXELS); + if (!clipRect(&dx, &dy, &sx, &sy, &w, &h)) { + return; + } + + for (row = 0; row < h; row++) { + dstRow = &s->pixels[(dy + row) * SURFACE_BYTES_PER_ROW]; + for (col = 0; col < w; col++) { + nibble = srcNibble(sp, (int16_t)(sx + col), (int16_t)(sy + row)); + if (nibble == TRANSPARENT_NIBBLE) { + continue; + } + writeDstNibble(dstRow, (int16_t)(dx + col), nibble); + } + } +} + + +// ----- Public API (alphabetical) ----- + +SpriteT *spriteCreate(const uint8_t *tileData, uint8_t widthTiles, uint8_t heightTiles, SpriteFlagsE flags) { + SpriteT *sp; + + if (tileData == NULL || widthTiles == 0 || heightTiles == 0) { + return NULL; + } + sp = (SpriteT *)malloc(sizeof(SpriteT)); + if (sp == NULL) { + return NULL; + } + sp->tileData = tileData; + sp->widthTiles = widthTiles; + sp->heightTiles = heightTiles; + sp->ownsTileData = false; + sp->slot = NULL; + memset(sp->routineOffsets, 0, sizeof(sp->routineOffsets)); + sp->flags = flags; + return sp; +} + + +SpriteT *spriteCreateFromSurface(const SurfaceT *src, int16_t x, int16_t y, + uint8_t widthTiles, uint8_t heightTiles, SpriteFlagsE flags) { + SpriteT *sp; + uint8_t *buf; + uint16_t tx; + uint16_t ty; + uint16_t row; + uint16_t widthPx; + uint16_t heightPx; + uint32_t tileBytes; + const uint8_t *srcRow; + uint8_t *dstTile; + + if (src == NULL || widthTiles == 0 || heightTiles == 0) { + return NULL; + } + // Source x/y must be on a tile boundary so each captured tile lands + // on whole bytes -- mid-byte snapshots would lose half a pixel at + // the left edge. + if ((x & (TILE_PIXELS - 1)) != 0 || (y & (TILE_PIXELS - 1)) != 0) { + return NULL; + } + widthPx = (uint16_t)(widthTiles * TILE_PIXELS); + heightPx = (uint16_t)(heightTiles * TILE_PIXELS); + if (x < 0 || y < 0 || + x + (int16_t)widthPx > SURFACE_WIDTH || + y + (int16_t)heightPx > SURFACE_HEIGHT) { + return NULL; + } + + tileBytes = (uint32_t)widthTiles * heightTiles * TILE_BYTES; + buf = (uint8_t *)malloc(tileBytes); + if (buf == NULL) { + return NULL; + } + + // Pack src->pixels (full-row 4bpp packed) into tile-major layout. + // Each tile reads 4 bytes per row from src at column offset (x/2 + + // tx*4) and writes them contiguously into the tile slot. + for (ty = 0; ty < heightTiles; ty++) { + for (tx = 0; tx < widthTiles; tx++) { + dstTile = &buf[(ty * widthTiles + tx) * TILE_BYTES]; + for (row = 0; row < TILE_PIXELS; row++) { + srcRow = &src->pixels[((uint16_t)y + ty * TILE_PIXELS + row) * SURFACE_BYTES_PER_ROW]; + memcpy(&dstTile[row * TILE_BYTES_PER_ROW], + &srcRow[((uint16_t)x >> 1) + tx * TILE_BYTES_PER_ROW], + TILE_BYTES_PER_ROW); + } + } + } + + sp = (SpriteT *)malloc(sizeof(SpriteT)); + if (sp == NULL) { + free(buf); + return NULL; + } + sp->tileData = buf; + sp->widthTiles = widthTiles; + sp->heightTiles = heightTiles; + sp->ownsTileData = true; + sp->slot = NULL; + memset(sp->routineOffsets, 0, sizeof(sp->routineOffsets)); + sp->flags = flags; + return sp; +} + + +void spriteDestroy(SpriteT *sp) { + if (sp == NULL) { + return; + } + if (sp->slot != NULL) { + codegenArenaFree(sp->slot); + sp->slot = NULL; + } + if (sp->ownsTileData) { + free((void *)sp->tileData); + } + free(sp); +} + + +void spriteDraw(SurfaceT *s, SpriteT *sp, int16_t x, int16_t y) { + uint16_t widthPx; + uint16_t heightPx; + + if (s == NULL || sp == NULL) { + return; + } + widthPx = (uint16_t)(sp->widthTiles * TILE_PIXELS); + heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS); + + // Fast path: compiled bytes + fully on surface. Off-surface draws + // fall back to the interpreter so the compiled routines never + // need clip math (they walk fixed offsets). + if (sp->slot != NULL && isFullyOnSurface(x, y, widthPx, heightPx)) { + spriteCompiledDraw(s, sp, x, y); + return; + } + spriteDrawInterpreted(s, sp, x, y); +} + + +void spritePrewarm(SpriteT *sp) { + (void)spriteCompile(sp); +} + + +// .spr header is 4 bytes: widthTiles, heightTiles, codeSize lo/hi. +#define SPR_HEADER_SIZE 4 +#define SPR_OFFSETS_SIZE (JOEY_SPRITE_SHIFT_COUNT * SPRITE_OP_COUNT * (uint32_t)sizeof(uint16_t)) + + +SpriteT *spriteFromCompiledMem(const uint8_t *data, uint32_t length, SpriteFlagsE flags) { + SpriteT *sp; + ArenaSlotT *slot; + uint8_t widthTiles; + uint8_t heightTiles; + uint16_t codeSize; + const uint8_t *offsetTable; + const uint8_t *code; + uint16_t shift; + uint16_t op; + uint16_t o; + + if (data == NULL || length < SPR_HEADER_SIZE + SPR_OFFSETS_SIZE) { + return NULL; + } + widthTiles = data[0]; + heightTiles = data[1]; + codeSize = (uint16_t)(data[2] | ((uint16_t)data[3] << 8)); + if (widthTiles == 0 || heightTiles == 0 || codeSize == 0) { + return NULL; + } + if (length < SPR_HEADER_SIZE + SPR_OFFSETS_SIZE + (uint32_t)codeSize) { + return NULL; + } + offsetTable = data + SPR_HEADER_SIZE; + code = data + SPR_HEADER_SIZE + SPR_OFFSETS_SIZE; + + slot = codegenArenaAlloc((uint32_t)codeSize); + if (slot == NULL) { + return NULL; + } + + sp = (SpriteT *)malloc(sizeof(SpriteT)); + if (sp == NULL) { + codegenArenaFree(slot); + return NULL; + } + + memcpy(codegenArenaBase() + slot->offset, code, codeSize); + + for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { + for (op = 0; op < SPRITE_OP_COUNT; op++) { + o = (uint16_t)((shift * SPRITE_OP_COUNT + op) * sizeof(uint16_t)); + sp->routineOffsets[shift][op] = + (uint16_t)(offsetTable[o] | ((uint16_t)offsetTable[o + 1] << 8)); + } + } + + sp->tileData = NULL; + sp->widthTiles = widthTiles; + sp->heightTiles = heightTiles; + sp->ownsTileData = false; + sp->slot = slot; + sp->flags = flags; + return sp; +} + + +SpriteT *spriteLoadFile(const char *path, SpriteFlagsE flags) { + FILE *fp; + long fileSize; + uint8_t *buf; + SpriteT *sp; + size_t readBytes; + + if (path == NULL) { + return NULL; + } + fp = fopen(path, "rb"); + if (fp == NULL) { + return NULL; + } + if (fseek(fp, 0L, SEEK_END) != 0) { + fclose(fp); + return NULL; + } + fileSize = ftell(fp); + if (fileSize <= 0) { + fclose(fp); + return NULL; + } + if (fseek(fp, 0L, SEEK_SET) != 0) { + fclose(fp); + return NULL; + } + buf = (uint8_t *)malloc((size_t)fileSize); + if (buf == NULL) { + fclose(fp); + return NULL; + } + readBytes = fread(buf, 1, (size_t)fileSize, fp); + fclose(fp); + if (readBytes != (size_t)fileSize) { + free(buf); + return NULL; + } + sp = spriteFromCompiledMem(buf, (uint32_t)fileSize, flags); + free(buf); + return sp; +} + + +bool spriteSaveFile(SpriteT *sp, const char *path) { + FILE *fp; + uint8_t header[SPR_HEADER_SIZE]; + uint8_t offsetBytes[2]; + uint16_t shift; + uint16_t op; + uint16_t value; + uint32_t codeSize; + uint8_t *codeStart; + + if (sp == NULL || path == NULL) { + return false; + } + if (sp->slot == NULL) { + // Force-compile so the saved file is self-contained. + // Returns false if the platform's emitter is a stub or the + // arena is full. + if (!spriteCompile(sp)) { + return false; + } + } + + codeSize = sp->slot->size; + codeStart = codegenArenaBase() + sp->slot->offset; + + if (codeSize > 0xFFFFu) { + return false; // codeSize doesn't fit in the 16-bit header field + } + + header[0] = sp->widthTiles; + header[1] = sp->heightTiles; + header[2] = (uint8_t)(codeSize & 0xFFu); + header[3] = (uint8_t)((codeSize >> 8) & 0xFFu); + + fp = fopen(path, "wb"); + if (fp == NULL) { + return false; + } + if (fwrite(header, 1, SPR_HEADER_SIZE, fp) != SPR_HEADER_SIZE) { + fclose(fp); + return false; + } + for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { + for (op = 0; op < SPRITE_OP_COUNT; op++) { + value = sp->routineOffsets[shift][op]; + offsetBytes[0] = (uint8_t)(value & 0xFFu); + offsetBytes[1] = (uint8_t)((value >> 8) & 0xFFu); + if (fwrite(offsetBytes, 1, 2, fp) != 2) { + fclose(fp); + return false; + } + } + } + if (fwrite(codeStart, 1, codeSize, fp) != codeSize) { + fclose(fp); + return false; + } + fclose(fp); + return true; +} + + +void spriteCompact(void) { + codegenArenaCompact(); +} + + +uint32_t spriteCodegenBytesTotal(void) { + return codegenArenaBytesTotal(); +} + + +uint32_t spriteCodegenBytesUsed(void) { + return codegenArenaBytesUsed(); +} + + +void spriteRestoreUnder(SurfaceT *s, const SpriteBackupT *backup) { + int16_t row; + int16_t byteStart; + int16_t copyBytes; + uint8_t *dstRow; + + if (s == NULL || backup == NULL || backup->bytes == NULL) { + return; + } + if (backup->width == 0 || backup->height == 0) { + return; + } + if (backup->x < 0 || backup->y < 0) { + return; + } + if (backup->x >= SURFACE_WIDTH || backup->y >= SURFACE_HEIGHT) { + return; + } + if (backup->x + backup->width > SURFACE_WIDTH) { + return; + } + if (backup->y + backup->height > SURFACE_HEIGHT) { + return; + } + // Saved region is byte-aligned; sub-byte boundaries can't be + // represented without losing the neighboring pixel under the byte. + if ((backup->x & 1) || (backup->width & 1)) { + return; + } + + byteStart = (int16_t)(backup->x >> 1); + copyBytes = (int16_t)(backup->width >> 1); + for (row = 0; row < backup->height; row++) { + dstRow = &s->pixels[(backup->y + row) * SURFACE_BYTES_PER_ROW]; + memcpy(&dstRow[byteStart], + &backup->bytes[(uint16_t)row * (uint16_t)copyBytes], + (size_t)copyBytes); + } +} + + +void spriteSaveUnder(const SurfaceT *s, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup) { + int16_t dx; + int16_t dy; + int16_t sx; + int16_t sy; + int16_t w; + int16_t h; + int16_t row; + int16_t byteStart; + int16_t copyBytes; + int16_t clippedX; + int16_t clippedW; + const uint8_t *srcRow; + + if (s == NULL || sp == NULL || backup == NULL) { + return; + } + backup->sprite = sp; + backup->sizeBytes = 0; + + dx = x; + dy = y; + w = (int16_t)(sp->widthTiles * TILE_PIXELS); + h = (int16_t)(sp->heightTiles * TILE_PIXELS); + if (!clipRect(&dx, &dy, &sx, &sy, &w, &h)) { + backup->x = 0; + backup->y = 0; + backup->width = 0; + backup->height = 0; + return; + } + // Round x DOWN and width UP to byte boundaries so we always grab + // entire 4bpp-packed bytes. RestoreUnder requires byte alignment. + clippedX = (int16_t)(dx & ~1); + clippedW = (int16_t)(((dx + w) - clippedX + 1) & ~1); + if (clippedX + clippedW > SURFACE_WIDTH) { + clippedW = SURFACE_WIDTH - clippedX; + } + + backup->x = clippedX; + backup->y = dy; + backup->width = (uint16_t)clippedW; + backup->height = (uint16_t)h; + + byteStart = (int16_t)(clippedX >> 1); + copyBytes = (int16_t)(clippedW >> 1); + backup->sizeBytes = (uint16_t)(copyBytes * h); + + if (backup->bytes == NULL) { + // Caller didn't supply a byte buffer; we just record the + // metadata so they can size their buffer for the next + // SaveUnder call. RestoreUnder will refuse to operate on a + // backup with bytes==NULL. + return; + } + for (row = 0; row < h; row++) { + srcRow = &s->pixels[(dy + row) * SURFACE_BYTES_PER_ROW]; + memcpy(&backup->bytes[(uint16_t)row * (uint16_t)copyBytes], + &srcRow[byteStart], + (size_t)copyBytes); + } +} diff --git a/src/core/spriteInternal.h b/src/core/spriteInternal.h new file mode 100644 index 0000000..f8585b3 --- /dev/null +++ b/src/core/spriteInternal.h @@ -0,0 +1,42 @@ +// Internal sprite definitions shared between sprite.c and the +// per-platform codegen emitters. Public API users include +// joey/sprite.h instead. + +#ifndef JOEYLIB_SPRITE_INTERNAL_H +#define JOEYLIB_SPRITE_INTERNAL_H + +#include "codegenArenaInternal.h" +#include "joey/sprite.h" + +#define SPRITE_OP_DRAW 0 +#define SPRITE_OP_SAVE 1 +#define SPRITE_OP_RESTORE 2 +#define SPRITE_OP_COUNT 3 + +struct SpriteT { + const uint8_t *tileData; // wTiles * hTiles * 32 bytes; NULL for loaded sprites + uint8_t widthTiles; + uint8_t heightTiles; + bool ownsTileData; // true if spriteDestroy must free tileData + + // Compiled-path state. slot==NULL means not yet compiled (or + // compile failed); spriteDraw falls back to the interpreter. + // The fn-call address for (shift, op) is computed at draw time: + // (codegenArenaBase() + slot->offset + routineOffsets[shift][op]) + // so a codegenArenaCompact that moves the slot's bytes is + // transparent to the caller. + ArenaSlotT *slot; + uint16_t routineOffsets[JOEY_SPRITE_SHIFT_COUNT][SPRITE_OP_COUNT]; + + SpriteFlagsE flags; +}; + +// Compiled draw entry point. Implemented alongside spriteCompile in +// the per-CPU emitter file (src/codegen/spriteEmit*.c). Handles the +// calling convention the emitted bytes use (cdecl on x86, stack +// args on 68k, ORCA on IIgs). The dispatcher in src/core/sprite.c +// calls this when sp->slot is non-NULL and the draw is fully +// on-surface. spriteCompile itself is in the public API. +void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y); + +#endif diff --git a/src/core/surface.c b/src/core/surface.c index 5171a53..493fc22 100644 --- a/src/core/surface.c +++ b/src/core/surface.c @@ -1,12 +1,17 @@ -// Surface allocation, destruction, and the library-owned screen surface. +// Surface allocation, destruction, persistence, and the library-owned +// screen surface. #include +#include #include #include #include "joey/surface.h" #include "surfaceInternal.h" +#define SURFACE_PALETTE_BYTES (SURFACE_PALETTE_ENTRIES * (uint32_t)sizeof(uint16_t)) +#define SURFACE_FILE_BYTES (SURFACE_PIXELS_SIZE + SURFACE_HEIGHT + SURFACE_PALETTE_BYTES) + // ----- Prototypes ----- // (public API declared in joey/surface.h) @@ -48,6 +53,74 @@ SurfaceT *surfaceGetScreen(void) { } +bool surfaceLoadFile(SurfaceT *dst, const char *path) { + FILE *fp; + long fileSize; + + if (dst == NULL || path == NULL) { + return false; + } + fp = fopen(path, "rb"); + if (fp == NULL) { + return false; + } + if (fseek(fp, 0L, SEEK_END) != 0) { + fclose(fp); + return false; + } + fileSize = ftell(fp); + if (fileSize != (long)SURFACE_FILE_BYTES) { + fclose(fp); + return false; + } + if (fseek(fp, 0L, SEEK_SET) != 0) { + fclose(fp); + return false; + } + if (fread(dst->pixels, 1, SURFACE_PIXELS_SIZE, fp) != SURFACE_PIXELS_SIZE) { + fclose(fp); + return false; + } + if (fread(dst->scb, 1, SURFACE_HEIGHT, fp) != SURFACE_HEIGHT) { + fclose(fp); + return false; + } + if (fread(dst->palette, 1, SURFACE_PALETTE_BYTES, fp) != SURFACE_PALETTE_BYTES) { + fclose(fp); + return false; + } + fclose(fp); + return true; +} + + +bool surfaceSaveFile(const SurfaceT *src, const char *path) { + FILE *fp; + + if (src == NULL || path == NULL) { + return false; + } + fp = fopen(path, "wb"); + if (fp == NULL) { + return false; + } + if (fwrite(src->pixels, 1, SURFACE_PIXELS_SIZE, fp) != SURFACE_PIXELS_SIZE) { + fclose(fp); + return false; + } + if (fwrite(src->scb, 1, SURFACE_HEIGHT, fp) != SURFACE_HEIGHT) { + fclose(fp); + return false; + } + if (fwrite(src->palette, 1, SURFACE_PALETTE_BYTES, fp) != SURFACE_PALETTE_BYTES) { + fclose(fp); + return false; + } + fclose(fp); + return true; +} + + // ----- Internal (alphabetical) ----- bool surfaceAllocScreen(void) { diff --git a/src/port/atarist/hal.c b/src/port/atarist/hal.c index f9d0526..7554d67 100644 --- a/src/port/atarist/hal.c +++ b/src/port/atarist/hal.c @@ -509,7 +509,17 @@ void halPresentRect(const SurfaceT *src, int16_t x, int16_t y, uint16_t w, uint1 // Vsync() is XBIOS opcode 37; mintlib exposes it directly. It blocks // until the next 50 Hz (PAL) or 60 Hz (NTSC) vertical blank. void halWaitVBL(void) { - Vsync(); + int16_t before; + + // Can't use Vsync(): TOS's Vsync increments _vblsem inside its + // own VBL ISR, which we replaced (Setexc(VEC_VBL, vblIsr)) with + // our SCB-emulating ISR that doesn't chain to the original. + // Spin on gFrameCount instead -- it's volatile and bumped every + // VBL by our ISR. + before = gFrameCount; + while (gFrameCount == before) { + // wait + } } diff --git a/tools/joeysprite/joeysprite.c b/tools/joeysprite/joeysprite.c new file mode 100644 index 0000000..6da65ec --- /dev/null +++ b/tools/joeysprite/joeysprite.c @@ -0,0 +1,323 @@ +// joeysprite: host-side compiler that turns raw tile data into a +// `.spr` file ready to be loaded at runtime by spriteLoadFile. +// +// Usage: +// joeysprite --target {iigs,amiga,atarist,dos} +// --width-tiles N --height-tiles M +// input.tiles output.spr +// +// `input.tiles` is widthTiles * heightTiles * 32 bytes, laid out +// tile-major as the runtime SpriteT.tileData expects: tile (0,0) +// first 32 bytes, tile (1,0) next 32, ... tile (widthTiles-1, 0), +// then tile (0,1), and so on. Inside each tile, rows are stored +// top-to-bottom and each row is 4 bytes (8 pixels at 4bpp packed, +// high nibble = left pixel). +// +// Output `.spr` format (target-native byte order, see DESIGN.md +// ยง12 for details): +// header (4 bytes): widthTiles, heightTiles, codeSize lo/hi +// offsets (JOEY_SPRITE_SHIFT_COUNT * 3 * uint16_t): +// [draw_s0, save_s0, restore_s0, draw_s1, save_s1, restore_s1] +// Save and restore offsets are written as 0 (uniform memcpy on +// load; never compiled). +// code (codeSize bytes): emitted machine code per shift, in order. + +#include +#include +#include +#include + +#include "joey/sprite.h" +#include "spriteEmitter.h" +#include "spriteInternal.h" + + +typedef enum { + TARGET_IIGS, + TARGET_AMIGA, + TARGET_ATARIST, + TARGET_DOS, + TARGET_INVALID +} TargetE; + + +// ----- Constants ----- + +#define MAX_SCRATCH_BYTES (16u * 1024u) +#define SPR_HEADER_SIZE 4 +// Save/restore offsets are reserved (0) for now -- the runtime +// memcpy interpreter handles them. +#define SHIFT_OPS 3 +#define OFFSET_TABLE_BYTES (JOEY_SPRITE_SHIFT_COUNT * SHIFT_OPS * 2u) + + +// ----- Prototypes ----- + +static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath); +static uint16_t emitForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift, TargetE target); +static int loadTileData(const char *path, uint8_t **outBytes, uint32_t *outSize); +static TargetE parseTarget(const char *name); +static int usage(const char *prog); +static int writeLE16(FILE *fp, uint16_t v); + + +// ----- Internal helpers (alphabetical) ----- + +static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath) { + uint8_t *scratch; + uint8_t *codeBuf; + uint16_t shiftLengths[JOEY_SPRITE_SHIFT_COUNT]; + uint32_t totalCodeSize; + uint8_t shift; + uint8_t op; + uint16_t written; + uint16_t cursor; + uint16_t offset; + FILE *fp; + int rc; + + scratch = (uint8_t *)malloc(MAX_SCRATCH_BYTES); + if (scratch == NULL) { + fprintf(stderr, "joeysprite: out of memory\n"); + return 2; + } + + totalCodeSize = 0; + for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { + written = emitForTarget(scratch, sp, shift, target); + shiftLengths[shift] = written; + totalCodeSize += written; + } + if (totalCodeSize > 0xFFFFu) { + fprintf(stderr, "joeysprite: emitted %u code bytes; max is 65535\n", + (unsigned)totalCodeSize); + free(scratch); + return 2; + } + + codeBuf = (uint8_t *)malloc(totalCodeSize); + if (codeBuf == NULL) { + fprintf(stderr, "joeysprite: out of memory for code buffer\n"); + free(scratch); + return 2; + } + + cursor = 0; + for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { + written = emitForTarget(codeBuf + cursor, sp, shift, target); + cursor = (uint16_t)(cursor + written); + } + + fp = fopen(outPath, "wb"); + if (fp == NULL) { + fprintf(stderr, "joeysprite: cannot open %s for writing\n", outPath); + free(codeBuf); + free(scratch); + return 2; + } + + rc = 0; + if (fputc(sp->widthTiles, fp) == EOF) rc = 2; + if (fputc(sp->heightTiles, fp) == EOF) rc = 2; + if (rc == 0 && writeLE16(fp, (uint16_t)totalCodeSize) != 0) rc = 2; + + // Offset table: cumulative draw offsets + zeros for save/restore. + offset = 0; + for (shift = 0; rc == 0 && shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { + for (op = 0; op < SHIFT_OPS; op++) { + uint16_t value; + if (op == SPRITE_OP_DRAW) { + value = offset; + } else { + value = 0; + } + if (writeLE16(fp, value) != 0) { + rc = 2; + break; + } + } + offset = (uint16_t)(offset + shiftLengths[shift]); + } + + if (rc == 0) { + if (fwrite(codeBuf, 1, totalCodeSize, fp) != totalCodeSize) { + rc = 2; + } + } + fclose(fp); + free(codeBuf); + free(scratch); + + if (rc == 0) { + printf("joeysprite: %u code bytes -> %s (target=%s, %ux%u tiles)\n", + (unsigned)totalCodeSize, outPath, + target == TARGET_IIGS ? "iigs" : + target == TARGET_AMIGA ? "amiga" : + target == TARGET_ATARIST ? "atarist" : "dos", + sp->widthTiles, sp->heightTiles); + } + return rc; +} + + +static uint16_t emitForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift, TargetE target) { + switch (target) { + case TARGET_DOS: + return spriteEmitDrawX86(out, sp, shift); + case TARGET_AMIGA: + case TARGET_ATARIST: + return spriteEmitDraw68k(out, sp, shift); + case TARGET_IIGS: + return spriteEmitDrawIigs(out, sp, shift); + default: + return 0; + } +} + + +static int loadTileData(const char *path, uint8_t **outBytes, uint32_t *outSize) { + FILE *fp; + long fileSize; + uint8_t *buf; + size_t read; + + fp = fopen(path, "rb"); + if (fp == NULL) { + fprintf(stderr, "joeysprite: cannot open %s\n", path); + return 2; + } + if (fseek(fp, 0L, SEEK_END) != 0) { + fclose(fp); + return 2; + } + fileSize = ftell(fp); + if (fileSize <= 0) { + fprintf(stderr, "joeysprite: %s is empty\n", path); + fclose(fp); + return 2; + } + if (fseek(fp, 0L, SEEK_SET) != 0) { + fclose(fp); + return 2; + } + buf = (uint8_t *)malloc((size_t)fileSize); + if (buf == NULL) { + fclose(fp); + return 2; + } + read = fread(buf, 1, (size_t)fileSize, fp); + fclose(fp); + if (read != (size_t)fileSize) { + free(buf); + return 2; + } + *outBytes = buf; + *outSize = (uint32_t)fileSize; + return 0; +} + + +static TargetE parseTarget(const char *name) { + if (strcmp(name, "iigs") == 0) return TARGET_IIGS; + if (strcmp(name, "amiga") == 0) return TARGET_AMIGA; + if (strcmp(name, "atarist") == 0) return TARGET_ATARIST; + if (strcmp(name, "dos") == 0) return TARGET_DOS; + return TARGET_INVALID; +} + + +static int usage(const char *prog) { + fprintf(stderr, + "usage: %s --target {iigs,amiga,atarist,dos} \\\n" + " --width-tiles N --height-tiles M \\\n" + " input.tiles output.spr\n", prog); + return 2; +} + + +// 65816 / x86 / 68k all expect target-native byte order in the .spr +// header offsets, but the file format is little-endian (matches the +// runtime spriteFromCompiledMem parser, which reads byte-by-byte). +static int writeLE16(FILE *fp, uint16_t v) { + if (fputc((int)(v & 0xFFu), fp) == EOF) return -1; + if (fputc((int)((v >> 8) & 0xFFu), fp) == EOF) return -1; + return 0; +} + + +// ----- main ----- + +int main(int argc, char **argv) { + const char *targetName; + const char *inPath; + const char *outPath; + long widthTiles; + long heightTiles; + int i; + TargetE target; + uint8_t *tileBytes; + uint32_t tileSize; + uint32_t expectedTileSize; + SpriteT sp; + int rc; + + targetName = NULL; + widthTiles = 0; + heightTiles = 0; + inPath = NULL; + outPath = NULL; + + for (i = 1; i < argc; i++) { + if (strcmp(argv[i], "--target") == 0 && i + 1 < argc) { + targetName = argv[++i]; + } else if (strcmp(argv[i], "--width-tiles") == 0 && i + 1 < argc) { + widthTiles = strtol(argv[++i], NULL, 10); + } else if (strcmp(argv[i], "--height-tiles") == 0 && i + 1 < argc) { + heightTiles = strtol(argv[++i], NULL, 10); + } else if (inPath == NULL) { + inPath = argv[i]; + } else if (outPath == NULL) { + outPath = argv[i]; + } else { + return usage(argv[0]); + } + } + if (targetName == NULL || widthTiles <= 0 || widthTiles > 255 || + heightTiles <= 0 || heightTiles > 255 || + inPath == NULL || outPath == NULL) { + return usage(argv[0]); + } + + target = parseTarget(targetName); + if (target == TARGET_INVALID) { + fprintf(stderr, "joeysprite: unknown --target %s\n", targetName); + return usage(argv[0]); + } + + rc = loadTileData(inPath, &tileBytes, &tileSize); + if (rc != 0) { + return rc; + } + + expectedTileSize = (uint32_t)(widthTiles * heightTiles * 32); + if (tileSize != expectedTileSize) { + fprintf(stderr, + "joeysprite: %s is %u bytes; expected %u (%ld * %ld tiles * 32 bytes)\n", + inPath, (unsigned)tileSize, (unsigned)expectedTileSize, + widthTiles, heightTiles); + free(tileBytes); + return 2; + } + + sp.tileData = tileBytes; + sp.widthTiles = (uint8_t)widthTiles; + sp.heightTiles = (uint8_t)heightTiles; + sp.ownsTileData = false; + sp.slot = NULL; + memset(sp.routineOffsets, 0, sizeof(sp.routineOffsets)); + sp.flags = SPRITE_FLAGS_NONE; + + rc = compileToSpr(&sp, target, outPath); + free(tileBytes); + return rc; +}