diff --git a/src/port/atarist/c2p.s b/src/port/atarist/c2p.s new file mode 100644 index 0000000..c4a2df6 --- /dev/null +++ b/src/port/atarist/c2p.s @@ -0,0 +1,188 @@ +| Atari ST chunky-to-planar conversion -- 68000 hand-rolled. +| +| Drop-in replacement for hal.c's old c2pRow C inner loop. The C +| version walked every pixel and built each plane word with a +| run-time variable bit shift (`1 << bit`), which costs ~6+2*bit +| cycles on 68000 -- roughly 100+ cycles per pixel after GCC's m68k +| codegen overhead. This rewrite uses a 4 KB lookup table built once +| at HAL init: same layout as the Amiga c2p LUT, so the +| (sourceByte, position, plane) -> 2-bit contribution mapping is +| identical, but the routine packs results into ST word-interleaved +| planar (4 plane words per 16-pixel group) instead of 4 separate +| plane bytes. +| +| Each ST group is 8 source bytes -> 4 plane words. Source byte +| positions 0..3 contribute to the HIGH byte of each plane word +| (bits 15..8); positions 4..7 contribute to the LOW byte (bits +| 7..0). Within a byte, the LUT for (src, bp%4, plane) already +| places bits at (7-2*(bp%4), 6-2*(bp%4)), so we use the SAME LUT +| entries for both halves -- we just shift d0..d3 left by 8 between +| the halves to move the high-half bits up before the low half ORs +| into the now-empty low byte. +| +| Calling convention: m68k-atari-mint-gcc cdecl. +| Args on stack at 4(sp), 8(sp), ... +| d2-d7, a2-a6 are callee-save. +| No return value. +| +| void chunkyToPlanarRowSt(const uint8_t *src, ; 4(sp) - 4bpp packed source row +| uint16_t *dst, ; 8(sp) - planar dest row (uint16_t*) +| uint16_t groupStart, ; 12(sp) - first group index (low word) +| uint16_t groupEnd, ; 16(sp) - one-past-last group index (low word) +| const uint8_t *lut); ; 20(sp) - 4 KB LUT base +| +| LUT layout: lut[src*16 + pos*4 + plane] (uint8) = the 2-bit plane +| contribution for source byte `src` at byte-position `pos` (0..3 +| within a 4-byte chunk) going to plane `plane` (0..3). All 16 +| (pos, plane) entries for one src byte are contiguous, so the inner +| loop reaches every entry off (a5, d4.w) with an 8-bit displacement +| (0..15) without LEA between reads. +| +| GAS-syntax (binutils m68k); assembled by m68k-atari-mint-as via +| the gcc driver. + + .text + .globl _chunkyToPlanarRowSt + +| MOVEM frame: d2-d7 (6) + a2-a6 (5) = 11 regs * 4 bytes = 44 bytes. + .equ SAVED_REGS_SIZE, 44 + + +_chunkyToPlanarRowSt: + movem.l %d2-%d7/%a2-%a6,-(%sp) + + move.l 4+SAVED_REGS_SIZE(%sp),%a0 | src row base + move.l 8+SAVED_REGS_SIZE(%sp),%a1 | dst (uint16_t*) + | Both groupStart and groupEnd are uint16_t but GCC + | promotes them to int and pushes 4 bytes each; the + | low word lives at +2 in big-endian layout. + move.w 12+SAVED_REGS_SIZE+2(%sp),%d6 | groupStart + move.w 16+SAVED_REGS_SIZE+2(%sp),%d7 | groupEnd + move.l 20+SAVED_REGS_SIZE(%sp),%a5 | LUT base + + | Advance src and dst to the first group's data. + | Each group consumes 8 source bytes and produces 4 + | dest words (8 bytes), so both pointers advance by + | groupStart * 8. + move.w %d6,%d4 + lsl.w #3,%d4 + add.w %d4,%a0 + add.w %d4,%a1 + + sub.w %d6,%d7 | groupCount = end - start + subq.w #1,%d7 | DBRA bias + bmi .Ldone + +.LgroupLoop: + moveq #0,%d0 | plane 0 acc + moveq #0,%d1 | plane 1 acc + moveq #0,%d2 | plane 2 acc + moveq #0,%d3 | plane 3 acc + + | ===== Source bytes 0..3 -> high byte of each plane word ===== + moveq #0,%d4 + move.b (%a0)+,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 | d4 = src * 16 + or.b 0(%a5,%d4.w),%d0 + or.b 1(%a5,%d4.w),%d1 + or.b 2(%a5,%d4.w),%d2 + or.b 3(%a5,%d4.w),%d3 + + moveq #0,%d4 + move.b (%a0)+,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + or.b 4(%a5,%d4.w),%d0 + or.b 5(%a5,%d4.w),%d1 + or.b 6(%a5,%d4.w),%d2 + or.b 7(%a5,%d4.w),%d3 + + moveq #0,%d4 + move.b (%a0)+,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + or.b 8(%a5,%d4.w),%d0 + or.b 9(%a5,%d4.w),%d1 + or.b 10(%a5,%d4.w),%d2 + or.b 11(%a5,%d4.w),%d3 + + moveq #0,%d4 + move.b (%a0)+,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + or.b 12(%a5,%d4.w),%d0 + or.b 13(%a5,%d4.w),%d1 + or.b 14(%a5,%d4.w),%d2 + or.b 15(%a5,%d4.w),%d3 + + | Move accumulated bits into the HIGH byte of each word. + lsl.w #8,%d0 + lsl.w #8,%d1 + lsl.w #8,%d2 + lsl.w #8,%d3 + + | ===== Source bytes 4..7 -> low byte of each plane word ===== + moveq #0,%d4 + move.b (%a0)+,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + or.b 0(%a5,%d4.w),%d0 + or.b 1(%a5,%d4.w),%d1 + or.b 2(%a5,%d4.w),%d2 + or.b 3(%a5,%d4.w),%d3 + + moveq #0,%d4 + move.b (%a0)+,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + or.b 4(%a5,%d4.w),%d0 + or.b 5(%a5,%d4.w),%d1 + or.b 6(%a5,%d4.w),%d2 + or.b 7(%a5,%d4.w),%d3 + + moveq #0,%d4 + move.b (%a0)+,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + or.b 8(%a5,%d4.w),%d0 + or.b 9(%a5,%d4.w),%d1 + or.b 10(%a5,%d4.w),%d2 + or.b 11(%a5,%d4.w),%d3 + + moveq #0,%d4 + move.b (%a0)+,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + add.w %d4,%d4 + or.b 12(%a5,%d4.w),%d0 + or.b 13(%a5,%d4.w),%d1 + or.b 14(%a5,%d4.w),%d2 + or.b 15(%a5,%d4.w),%d3 + + | Store 4 plane words. + move.w %d0,(%a1)+ + move.w %d1,(%a1)+ + move.w %d2,(%a1)+ + move.w %d3,(%a1)+ + + dbra %d7,.LgroupLoop + +.Ldone: + movem.l (%sp)+,%d2-%d7/%a2-%a6 + rts diff --git a/src/port/atarist/hal.c b/src/port/atarist/hal.c index add8e1a..57d4ced 100644 --- a/src/port/atarist/hal.c +++ b/src/port/atarist/hal.c @@ -64,12 +64,19 @@ // ----- Prototypes ----- static uint16_t quantizeColorToSt(uint16_t orgb); -static void c2pRow(const uint8_t *src, uint16_t *dst, uint16_t groupStart, uint16_t groupEnd); static void c2pRange(const SurfaceT *src, int16_t y0, int16_t y1, uint16_t groupStart, uint16_t groupEnd); static void flattenScbPalettes(const SurfaceT *src); +static void initC2pLut(void); static void writeDiagnostics(void); static long writePrevPaletteRegs(void); +// Provided by src/port/atarist/c2p.s. +extern void chunkyToPlanarRowSt(const uint8_t *src, + uint16_t *dst, + uint16_t groupStart, + uint16_t groupEnd, + const uint8_t *lut); + static __attribute__((interrupt_handler)) void timerBIsr(void); static __attribute__((interrupt_handler)) void vblIsr(void); static void buildTransitions(const SurfaceT *src); @@ -129,55 +136,31 @@ static uint8_t gCachedScb [SURFACE_HEIGHT]; static uint16_t gCachedPalette[SURFACE_PALETTE_COUNT][SURFACE_COLORS_PER_PALETTE]; static bool gCacheValid = false; +// 4 KB chunky-to-planar lookup table consumed by chunkyToPlanarRowSt +// (src/port/atarist/c2p.s). Layout: gC2pLut[src*16 + pos*4 + plane] +// = the 2-bit plane-byte contribution for source byte `src` at +// byte-position `pos` (0..3 within a 4-byte chunk) going to plane +// `plane`. Bit positions inside the byte are (7-2*pos, 6-2*pos), so +// the same table feeds both halves of an ST plane word: positions +// 0..3 land in the high byte, 4..7 (re-indexed mod 4) in the low +// byte. Built once by initC2pLut on the first halPresent call. +static uint8_t gC2pLut[4 * 1024]; +static bool gC2pLutReady = false; + // ----- Internal helpers (alphabetical) ----- -// Convert 16 chunky pixels (8 bytes 4bpp packed) to 4 ST planar words -// per group. groupStart..groupEnd selects a horizontal sub-range so -// halPresentRect can avoid touching unchanged groups. -static void c2pRow(const uint8_t *src, uint16_t *dst, uint16_t groupStart, uint16_t groupEnd) { - uint16_t group; - uint16_t px; - uint16_t plane0; - uint16_t plane1; - uint16_t plane2; - uint16_t plane3; - uint8_t byte; - uint8_t nibble; - uint16_t bit; - - for (group = groupStart; group < groupEnd; group++) { - plane0 = 0; - plane1 = 0; - plane2 = 0; - plane3 = 0; - - for (px = 0; px < 16; px++) { - byte = src[(group * 8) + (px >> 1)]; - nibble = (uint8_t)((px & 1) ? (byte & 0x0F) : (byte >> 4)); - bit = (uint16_t)(15 - px); - plane0 = (uint16_t)(plane0 | (((nibble >> 0) & 1) << bit)); - plane1 = (uint16_t)(plane1 | (((nibble >> 1) & 1) << bit)); - plane2 = (uint16_t)(plane2 | (((nibble >> 2) & 1) << bit)); - plane3 = (uint16_t)(plane3 | (((nibble >> 3) & 1) << bit)); - } - - dst[(group * 4) + 0] = plane0; - dst[(group * 4) + 1] = plane1; - dst[(group * 4) + 2] = plane2; - dst[(group * 4) + 3] = plane3; - } -} - - static void c2pRange(const SurfaceT *src, int16_t y0, int16_t y1, uint16_t groupStart, uint16_t groupEnd) { int16_t y; const uint8_t *srcLine; uint16_t *dstLine; + if (!gC2pLutReady) { + initC2pLut(); + } for (y = y0; y < y1; y++) { srcLine = &src->pixels[y * SURFACE_BYTES_PER_ROW]; dstLine = (uint16_t *)&gScreenBase[y * ST_BYTES_PER_ROW]; - c2pRow(srcLine, dstLine, groupStart, groupEnd); + chunkyToPlanarRowSt(srcLine, dstLine, groupStart, groupEnd, gC2pLut); } } @@ -263,6 +246,37 @@ static void refreshPaletteStateIfNeeded(const SurfaceT *src) { } +// Build the 4 KB chunky-to-planar lookup table consumed by +// chunkyToPlanarRowSt. Same layout/contents as the Amiga c2p LUT; +// see src/port/atarist/c2p.s for the addressing math. +static void initC2pLut(void) { + uint16_t pos; + uint16_t plane; + uint16_t src; + uint8_t highShift; + uint8_t lowShift; + uint8_t highBit; + uint8_t lowBit; + + if (gC2pLutReady) { + return; + } + for (src = 0; src < 256; src++) { + for (pos = 0; pos < 4; pos++) { + highShift = (uint8_t)(7 - 2 * pos); + lowShift = (uint8_t)(6 - 2 * pos); + for (plane = 0; plane < 4; plane++) { + highBit = (uint8_t)(((src >> 4) >> plane) & 1); + lowBit = (uint8_t)(((src & 0x0F) >> plane) & 1); + gC2pLut[src * 16 + pos * 4 + plane] = + (uint8_t)((highBit << highShift) | (lowBit << lowShift)); + } + } + } + gC2pLutReady = true; +} + + // 12-bit $0RGB to STF 9-bit palette register (drops the low bit of // each 4-bit channel). static uint16_t quantizeColorToSt(uint16_t orgb) { diff --git a/tools/joeysprite/joeysprite.c b/tools/joeysprite/joeysprite.c index 886cd09..996751a 100644 --- a/tools/joeysprite/joeysprite.c +++ b/tools/joeysprite/joeysprite.c @@ -1,17 +1,36 @@ -// joeysprite: host-side compiler that turns raw tile data into a -// `.spr` file ready to be loaded at runtime by spriteLoadFile. +// joeysprite: host-side compiler that turns sprite art into a `.spr` +// file ready to be loaded at runtime by spriteLoadFile. // // Usage: // joeysprite --target {iigs,amiga,atarist,dos} -// --width-tiles N --height-tiles M -// input.tiles output.spr +// [--width-tiles N --height-tiles M] +// INPUT OUTPUT.spr // -// `input.tiles` is widthTiles * heightTiles * 32 bytes, laid out -// tile-major as the runtime SpriteT.tileData expects: tile (0,0) -// first 32 bytes, tile (1,0) next 32, ... tile (widthTiles-1, 0), -// then tile (0,1), and so on. Inside each tile, rows are stored -// top-to-bottom and each row is 4 bytes (8 pixels at 4bpp packed, -// high nibble = left pixel). +// Two input formats are accepted; the first 2 bytes select the path: +// +// PPM (P6) -- 8-bit-per-channel raster from any pixel-art tool that +// exports PPM (GIMP, ImageMagick `convert`, paint.net, etc.). Image +// dimensions must be multiples of 8 in both axes; widthTiles / +// heightTiles are auto-derived as W/8 and H/8 (CLI overrides are +// optional and must match). Each input RGB is reduced to a 12-bit +// $0RGB color (high nibble of each channel); the input must use +// no more than 16 distinct $0RGB colors after that reduction. The +// FIRST color encountered (typically the top-left pixel) is bound +// to palette index 0, which the runtime treats as transparent -- +// so paint your sprite background with that pixel's color. +// +// Raw `.tiles` -- widthTiles * heightTiles * 32 bytes, laid out +// tile-major as the runtime SpriteT.tileData expects: tile (0,0) +// first 32 bytes, tile (1,0) next 32, ... tile (widthTiles-1, 0), +// then tile (0,1), and so on. Inside each tile, rows are stored +// top-to-bottom and each row is 4 bytes (8 pixels at 4bpp packed, +// high nibble = left pixel). --width-tiles / --height-tiles are +// required for this path since the file carries no header. +// +// The .spr output carries indices only -- the palette mapping is the +// application's responsibility (typical pattern: ship a separate +// .jas built from the same PPM via joeyasset, or hand-author the +// palette in code). // // Output `.spr` format (target-native byte order for code; see // DESIGN.md ยง12). Mirrors src/core/sprite.c's reader: @@ -21,14 +40,19 @@ // bytes 4-5 tileBytes (LE16) = widthTiles*heightTiles*32 // ... offsets (JOEY_SPRITE_SHIFT_COUNT * SPRITE_OP_COUNT * // uint16_t LE): [draw_s0, save_s0, restore_s0, -// draw_s1, save_s1, restore_s1]. Save/restore offsets -// are 0 here -- the runtime keeps the memcpy-based -// interpreter for those ops. +// draw_s1, save_s1, restore_s1]. Each entry is the +// byte offset of that routine within the compiled-code +// region, or 0xFFFF (SPRITE_NOT_COMPILED) if the per-CPU +// emitter returned 0 bytes for that op -- the runtime +// then falls back to the interpreted memcpy/RMW path. // ... compiled code (codeSize bytes) // ... raw tile data (tileBytes bytes; same layout as the // input file, lets the runtime interpreter handle // clipped draws without decoding the compiled bytes). +#include +#include +#include #include #include #include @@ -51,18 +75,26 @@ typedef enum { // ----- Constants ----- #define MAX_SCRATCH_BYTES (16u * 1024u) -#define SPR_HEADER_SIZE 6 -// Save/restore offsets are reserved (0) for now -- the runtime -// memcpy interpreter handles them. -#define SHIFT_OPS 3 -#define OFFSET_TABLE_BYTES (JOEY_SPRITE_SHIFT_COUNT * SHIFT_OPS * 2u) +// Pixel art conventions for sprite work. +#define TILE_PIXELS 8 +#define TILE_BYTES 32 +#define TILE_BYTES_PER_ROW 4 +#define MAX_PALETTE_ENTRIES 16 + +#define PPM_TOKEN_MAX 64 // ----- Prototypes ----- +static int buildPalette(const uint8_t *rgb, int width, int height, uint8_t *outIndices); static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath); -static uint16_t emitForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift, TargetE target); +static uint16_t emitForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift, uint8_t op, TargetE target); +static bool fileIsPpm(const char *path); +static int loadPpm(const char *path, int *outWidth, int *outHeight, uint8_t **outPixels); +static int loadPpmAsTiles(const char *path, long *widthTiles, long *heightTiles, uint8_t **outTiles, uint32_t *outSize); static int loadTileData(const char *path, uint8_t **outBytes, uint32_t *outSize); +static void packIndicesToTiles(const uint8_t *indices, int width, int height, uint8_t *outTiles); +static int parsePpmToken(FILE *fp, char *out, int outLen); static TargetE parseTarget(const char *name); static int usage(const char *prog); static int writeLE16(FILE *fp, uint16_t v); @@ -70,16 +102,68 @@ static int writeLE16(FILE *fp, uint16_t v); // ----- Internal helpers (alphabetical) ----- +// Reduce every input RGB triple to a 12-bit $0RGB color and assign +// palette indices in encounter order: top-left pixel = index 0, +// next-encountered = index 1, etc. The runtime treats index 0 as +// transparent, so the top-left pixel must be the sprite's background +// color. Returns the number of distinct colors found, or -1 if the +// image needs more than 16 entries after $0RGB quantization. +// +// Mirrors joeyasset's buildPalette but only emits the index array; +// joeysprite drops the $0RGB palette since the .spr format carries +// indices alone. +static int buildPalette(const uint8_t *rgb, int width, int height, uint8_t *outIndices) { + uint16_t palette[MAX_PALETTE_ENTRIES]; + int paletteCount; + int total; + int i; + int j; + uint8_t r; + uint8_t g; + uint8_t b; + uint16_t color; + + total = width * height; + paletteCount = 0; + for (i = 0; i < total; i++) { + r = (uint8_t)(rgb[i * 3 + 0] >> 4); + g = (uint8_t)(rgb[i * 3 + 1] >> 4); + b = (uint8_t)(rgb[i * 3 + 2] >> 4); + color = (uint16_t)((r << 8) | (g << 4) | b); + for (j = 0; j < paletteCount; j++) { + if (palette[j] == color) { + break; + } + } + if (j == paletteCount) { + if (paletteCount >= MAX_PALETTE_ENTRIES) { + return -1; + } + palette[paletteCount] = color; + paletteCount++; + } + outIndices[i] = (uint8_t)j; + } + return paletteCount; +} + + +// Two-pass: pass 1 sizes every (shift, op) routine into shiftOpSizes; +// pass 2 stamps them into the code buffer at their cumulative offsets. +// Routines that return 0 bytes (the per-CPU emitter doesn't implement +// that op) get SPRITE_NOT_COMPILED in their offset slot so the runtime +// dispatch falls back to the interpreted path. static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath) { uint8_t *scratch; uint8_t *codeBuf; - uint16_t shiftLengths[JOEY_SPRITE_SHIFT_COUNT]; + uint16_t routineSizes[JOEY_SPRITE_SHIFT_COUNT][SPRITE_OP_COUNT]; + uint16_t routineOffsets[JOEY_SPRITE_SHIFT_COUNT][SPRITE_OP_COUNT]; uint32_t totalCodeSize; uint8_t shift; uint8_t op; uint16_t written; uint16_t cursor; - uint16_t offset; + uint16_t value; FILE *fp; int rc; @@ -91,9 +175,16 @@ static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath) totalCodeSize = 0; for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { - written = emitForTarget(scratch, sp, shift, target); - shiftLengths[shift] = written; - totalCodeSize += written; + for (op = 0; op < SPRITE_OP_COUNT; op++) { + written = emitForTarget(scratch, sp, shift, op, target); + routineSizes[shift][op] = written; + if (written == 0) { + routineOffsets[shift][op] = SPRITE_NOT_COMPILED; + } else { + routineOffsets[shift][op] = (uint16_t)totalCodeSize; + totalCodeSize += written; + } + } } if (totalCodeSize > 0xFFFFu) { fprintf(stderr, "joeysprite: emitted %u code bytes; max is 65535\n", @@ -102,7 +193,7 @@ static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath) return 2; } - codeBuf = (uint8_t *)malloc(totalCodeSize); + codeBuf = (uint8_t *)malloc(totalCodeSize > 0 ? totalCodeSize : 1); if (codeBuf == NULL) { fprintf(stderr, "joeysprite: out of memory for code buffer\n"); free(scratch); @@ -111,8 +202,13 @@ static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath) cursor = 0; for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { - written = emitForTarget(codeBuf + cursor, sp, shift, target); - cursor = (uint16_t)(cursor + written); + for (op = 0; op < SPRITE_OP_COUNT; op++) { + if (routineSizes[shift][op] == 0) { + continue; + } + written = emitForTarget(codeBuf + cursor, sp, shift, op, target); + cursor = (uint16_t)(cursor + written); + } } fp = fopen(outPath, "wb"); @@ -129,25 +225,17 @@ static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath) if (rc == 0 && writeLE16(fp, (uint16_t)totalCodeSize) != 0) rc = 2; if (rc == 0 && writeLE16(fp, (uint16_t)(sp->widthTiles * sp->heightTiles * 32u)) != 0) rc = 2; - // Offset table: cumulative draw offsets + zeros for save/restore. - offset = 0; for (shift = 0; rc == 0 && shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { - for (op = 0; op < SHIFT_OPS; op++) { - uint16_t value; - if (op == SPRITE_OP_DRAW) { - value = offset; - } else { - value = 0; - } + for (op = 0; op < SPRITE_OP_COUNT; op++) { + value = routineOffsets[shift][op]; if (writeLE16(fp, value) != 0) { rc = 2; break; } } - offset = (uint16_t)(offset + shiftLengths[shift]); } - if (rc == 0) { + if (rc == 0 && totalCodeSize > 0) { if (fwrite(codeBuf, 1, totalCodeSize, fp) != totalCodeSize) { rc = 2; } @@ -179,21 +267,207 @@ static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath) } -static uint16_t emitForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift, TargetE target) { +static uint16_t emitForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift, uint8_t op, TargetE target) { switch (target) { case TARGET_DOS: - return spriteEmitDrawX86(out, sp, shift); + switch (op) { + case SPRITE_OP_DRAW: return spriteEmitDrawX86 (out, sp, shift); + case SPRITE_OP_SAVE: return spriteEmitSaveX86 (out, sp, shift); + case SPRITE_OP_RESTORE: return spriteEmitRestoreX86(out, sp, shift); + default: return 0; + } case TARGET_AMIGA: case TARGET_ATARIST: - return spriteEmitDraw68k(out, sp, shift); + switch (op) { + case SPRITE_OP_DRAW: return spriteEmitDraw68k (out, sp, shift); + case SPRITE_OP_SAVE: return spriteEmitSave68k (out, sp, shift); + case SPRITE_OP_RESTORE: return spriteEmitRestore68k(out, sp, shift); + default: return 0; + } case TARGET_IIGS: - return spriteEmitDrawIigs(out, sp, shift); + switch (op) { + case SPRITE_OP_DRAW: return spriteEmitDrawIigs (out, sp, shift); + case SPRITE_OP_SAVE: return spriteEmitSaveIigs (out, sp, shift); + case SPRITE_OP_RESTORE: return spriteEmitRestoreIigs(out, sp, shift); + default: return 0; + } default: return 0; } } +// Sniff the first 2 bytes for the PPM magic. Errors return false (the +// caller will fall through to the .tiles loader, which surfaces a +// clear error if the bytes aren't valid tile data either). +static bool fileIsPpm(const char *path) { + FILE *fp; + int c0; + int c1; + + fp = fopen(path, "rb"); + if (fp == NULL) { + return false; + } + c0 = fgetc(fp); + c1 = fgetc(fp); + fclose(fp); + return (c0 == 'P' && c1 == '6'); +} + + +// Read a PPM (P6) raster into a freshly allocated 8-bit RGB buffer. +// Mirrors joeyasset's loadPpm. Caller frees *outPixels. +static int loadPpm(const char *path, int *outWidth, int *outHeight, uint8_t **outPixels) { + FILE *fp; + char tok[PPM_TOKEN_MAX]; + int width; + int height; + int maxval; + size_t pixelBytes; + uint8_t *buf; + size_t read; + + fp = fopen(path, "rb"); + if (fp == NULL) { + fprintf(stderr, "joeysprite: cannot open %s: %s\n", path, strerror(errno)); + return 2; + } + if (parsePpmToken(fp, tok, sizeof(tok)) != 0 || strcmp(tok, "P6") != 0) { + fprintf(stderr, "joeysprite: %s is not a PPM (P6) file\n", path); + fclose(fp); + return 2; + } + if (parsePpmToken(fp, tok, sizeof(tok)) != 0) { + fclose(fp); + return 2; + } + width = atoi(tok); + if (parsePpmToken(fp, tok, sizeof(tok)) != 0) { + fclose(fp); + return 2; + } + height = atoi(tok); + if (parsePpmToken(fp, tok, sizeof(tok)) != 0) { + fclose(fp); + return 2; + } + maxval = atoi(tok); + if (width <= 0 || height <= 0) { + fprintf(stderr, "joeysprite: %s has non-positive dimensions\n", path); + fclose(fp); + return 2; + } + if (maxval != 255) { + fprintf(stderr, "joeysprite: %s maxval %d unsupported (must be 255)\n", path, maxval); + fclose(fp); + return 2; + } + pixelBytes = (size_t)width * (size_t)height * 3u; + buf = (uint8_t *)malloc(pixelBytes); + if (buf == NULL) { + fprintf(stderr, "joeysprite: out of memory (%zu bytes)\n", pixelBytes); + fclose(fp); + return 2; + } + read = fread(buf, 1, pixelBytes, fp); + fclose(fp); + if (read != pixelBytes) { + fprintf(stderr, "joeysprite: short raster in %s (got %zu, need %zu)\n", + path, read, pixelBytes); + free(buf); + return 2; + } + *outWidth = width; + *outHeight = height; + *outPixels = buf; + return 0; +} + + +// End-to-end PPM -> tile-major 4bpp packed. On entry, *widthTiles / +// *heightTiles are 0 if the user didn't pass --width-tiles / +// --height-tiles, or the user-provided values otherwise; we fill in +// the auto-derived values when the user left them at 0, and validate +// against the image when they didn't. +static int loadPpmAsTiles(const char *path, long *widthTiles, long *heightTiles, uint8_t **outTiles, uint32_t *outSize) { + uint8_t *rgb; + uint8_t *indices; + uint8_t *tiles; + int width; + int height; + long wTiles; + long hTiles; + uint32_t tileBytes; + int paletteCount; + int rc; + + rc = loadPpm(path, &width, &height, &rgb); + if (rc != 0) { + return rc; + } + if ((width % TILE_PIXELS) != 0 || (height % TILE_PIXELS) != 0) { + fprintf(stderr, + "joeysprite: %s is %dx%d -- both dimensions must be multiples of %d\n", + path, width, height, TILE_PIXELS); + free(rgb); + return 2; + } + wTiles = width / TILE_PIXELS; + hTiles = height / TILE_PIXELS; + if (*widthTiles == 0) { + *widthTiles = wTiles; + } else if (*widthTiles != wTiles) { + fprintf(stderr, + "joeysprite: --width-tiles %ld disagrees with image width %d (%ld tiles)\n", + *widthTiles, width, wTiles); + free(rgb); + return 2; + } + if (*heightTiles == 0) { + *heightTiles = hTiles; + } else if (*heightTiles != hTiles) { + fprintf(stderr, + "joeysprite: --height-tiles %ld disagrees with image height %d (%ld tiles)\n", + *heightTiles, height, hTiles); + free(rgb); + return 2; + } + + indices = (uint8_t *)malloc((size_t)width * (size_t)height); + if (indices == NULL) { + fprintf(stderr, "joeysprite: out of memory for index buffer\n"); + free(rgb); + return 2; + } + paletteCount = buildPalette(rgb, width, height, indices); + free(rgb); + if (paletteCount < 0) { + fprintf(stderr, + "joeysprite: %s has more than 16 distinct $0RGB colors after\n" + " 4-bit-per-channel quantization. Reduce the input palette and\n" + " retry (e.g. pngquant --nofs 16, or GIMP -> Image -> Mode ->\n" + " Indexed... with 16 colors and no dithering).\n", path); + free(indices); + return 2; + } + + tileBytes = (uint32_t)wTiles * (uint32_t)hTiles * TILE_BYTES; + tiles = (uint8_t *)malloc(tileBytes); + if (tiles == NULL) { + fprintf(stderr, "joeysprite: out of memory for tile buffer\n"); + free(indices); + return 2; + } + packIndicesToTiles(indices, width, height, tiles); + free(indices); + + *outTiles = tiles; + *outSize = tileBytes; + return 0; +} + + static int loadTileData(const char *path, uint8_t **outBytes, uint32_t *outSize) { FILE *fp; long fileSize; @@ -236,6 +510,76 @@ static int loadTileData(const char *path, uint8_t **outBytes, uint32_t *outSize) } +// Reshuffle row-major palette indices into the tile-major 4bpp packed +// layout the runtime SpriteT.tileData expects: tile (tx,ty)'s 32 bytes +// land contiguously at outTiles[(ty*widthTiles + tx) * 32], with each +// row inside the tile as 4 packed bytes (high nibble = left pixel). +static void packIndicesToTiles(const uint8_t *indices, int width, int height, uint8_t *outTiles) { + int widthTiles; + int heightTiles; + int tx; + int ty; + int row; + int col; + int pxX; + int pxY; + uint8_t hi; + uint8_t lo; + uint8_t *tile; + + widthTiles = width / TILE_PIXELS; + heightTiles = height / TILE_PIXELS; + for (ty = 0; ty < heightTiles; ty++) { + for (tx = 0; tx < widthTiles; tx++) { + tile = &outTiles[(ty * widthTiles + tx) * TILE_BYTES]; + for (row = 0; row < TILE_PIXELS; row++) { + pxY = ty * TILE_PIXELS + row; + for (col = 0; col < TILE_BYTES_PER_ROW; col++) { + pxX = tx * TILE_PIXELS + col * 2; + hi = (uint8_t)(indices[pxY * width + pxX] & 0x0Fu); + lo = (uint8_t)(indices[pxY * width + pxX + 1] & 0x0Fu); + tile[row * TILE_BYTES_PER_ROW + col] = (uint8_t)((hi << 4) | lo); + } + } + } + } +} + + +// Reads a single whitespace-separated token from a PPM header, +// skipping `#` comments to end-of-line. Mirrors joeyasset. +static int parsePpmToken(FILE *fp, char *out, int outLen) { + int c; + int pos; + + pos = 0; + for (;;) { + c = fgetc(fp); + if (c == EOF) { + return -1; + } + if (isspace(c)) { + continue; + } + if (c == '#') { + while ((c = fgetc(fp)) != EOF && c != '\n') { + /* skip */; + } + continue; + } + break; + } + while (c != EOF && !isspace(c) && c != '#') { + if (pos < outLen - 1) { + out[pos++] = (char)c; + } + c = fgetc(fp); + } + out[pos] = 0; + return 0; +} + + static TargetE parseTarget(const char *name) { if (strcmp(name, "iigs") == 0) return TARGET_IIGS; if (strcmp(name, "amiga") == 0) return TARGET_AMIGA; @@ -248,8 +592,11 @@ static TargetE parseTarget(const char *name) { static int usage(const char *prog) { fprintf(stderr, "usage: %s --target {iigs,amiga,atarist,dos} \\\n" - " --width-tiles N --height-tiles M \\\n" - " input.tiles output.spr\n", prog); + " [--width-tiles N --height-tiles M] \\\n" + " INPUT OUTPUT.spr\n" + " INPUT is a PPM (P6) file (auto-derives tile dims from W/8, H/8)\n" + " or a raw .tiles byte stream (requires --width-tiles/--height-tiles).\n", + prog); return 2; } @@ -301,9 +648,11 @@ int main(int argc, char **argv) { return usage(argv[0]); } } - if (targetName == NULL || widthTiles <= 0 || widthTiles > 255 || - heightTiles <= 0 || heightTiles > 255 || - inPath == NULL || outPath == NULL) { + if (targetName == NULL || inPath == NULL || outPath == NULL) { + return usage(argv[0]); + } + if (widthTiles < 0 || widthTiles > 255 || + heightTiles < 0 || heightTiles > 255) { return usage(argv[0]); } @@ -313,9 +662,24 @@ int main(int argc, char **argv) { return usage(argv[0]); } - rc = loadTileData(inPath, &tileBytes, &tileSize); - if (rc != 0) { - return rc; + if (fileIsPpm(inPath)) { + // PPM path: tile dims auto-derive (or validate against CLI). + rc = loadPpmAsTiles(inPath, &widthTiles, &heightTiles, &tileBytes, &tileSize); + if (rc != 0) { + return rc; + } + } else { + // Raw .tiles path: tile dims required. + if (widthTiles <= 0 || heightTiles <= 0) { + fprintf(stderr, + "joeysprite: %s is not a PPM; --width-tiles and --height-tiles are required\n", + inPath); + return usage(argv[0]); + } + rc = loadTileData(inPath, &tileBytes, &tileSize); + if (rc != 0) { + return rc; + } } expectedTileSize = (uint32_t)(widthTiles * heightTiles * 32);