joeysprite tool working. Amiga and ST c2p moved to ASM.

This commit is contained in:
Scott Duensing 2026-04-26 22:09:49 -05:00
parent 6dd2266f13
commit ea1e853d5d
3 changed files with 656 additions and 90 deletions

188
src/port/atarist/c2p.s Normal file
View file

@ -0,0 +1,188 @@
| Atari ST chunky-to-planar conversion -- 68000 hand-rolled.
|
| Drop-in replacement for hal.c's old c2pRow C inner loop. The C
| version walked every pixel and built each plane word with a
| run-time variable bit shift (`1 << bit`), which costs ~6+2*bit
| cycles on 68000 -- roughly 100+ cycles per pixel after GCC's m68k
| codegen overhead. This rewrite uses a 4 KB lookup table built once
| at HAL init: same layout as the Amiga c2p LUT, so the
| (sourceByte, position, plane) -> 2-bit contribution mapping is
| identical, but the routine packs results into ST word-interleaved
| planar (4 plane words per 16-pixel group) instead of 4 separate
| plane bytes.
|
| Each ST group is 8 source bytes -> 4 plane words. Source byte
| positions 0..3 contribute to the HIGH byte of each plane word
| (bits 15..8); positions 4..7 contribute to the LOW byte (bits
| 7..0). Within a byte, the LUT for (src, bp%4, plane) already
| places bits at (7-2*(bp%4), 6-2*(bp%4)), so we use the SAME LUT
| entries for both halves -- we just shift d0..d3 left by 8 between
| the halves to move the high-half bits up before the low half ORs
| into the now-empty low byte.
|
| Calling convention: m68k-atari-mint-gcc cdecl.
| Args on stack at 4(sp), 8(sp), ...
| d2-d7, a2-a6 are callee-save.
| No return value.
|
| void chunkyToPlanarRowSt(const uint8_t *src, ; 4(sp) - 4bpp packed source row
| uint16_t *dst, ; 8(sp) - planar dest row (uint16_t*)
| uint16_t groupStart, ; 12(sp) - first group index (low word)
| uint16_t groupEnd, ; 16(sp) - one-past-last group index (low word)
| const uint8_t *lut); ; 20(sp) - 4 KB LUT base
|
| LUT layout: lut[src*16 + pos*4 + plane] (uint8) = the 2-bit plane
| contribution for source byte `src` at byte-position `pos` (0..3
| within a 4-byte chunk) going to plane `plane` (0..3). All 16
| (pos, plane) entries for one src byte are contiguous, so the inner
| loop reaches every entry off (a5, d4.w) with an 8-bit displacement
| (0..15) without LEA between reads.
|
| GAS-syntax (binutils m68k); assembled by m68k-atari-mint-as via
| the gcc driver.
.text
.globl _chunkyToPlanarRowSt
| MOVEM frame: d2-d7 (6) + a2-a6 (5) = 11 regs * 4 bytes = 44 bytes.
.equ SAVED_REGS_SIZE, 44
_chunkyToPlanarRowSt:
movem.l %d2-%d7/%a2-%a6,-(%sp)
move.l 4+SAVED_REGS_SIZE(%sp),%a0 | src row base
move.l 8+SAVED_REGS_SIZE(%sp),%a1 | dst (uint16_t*)
| Both groupStart and groupEnd are uint16_t but GCC
| promotes them to int and pushes 4 bytes each; the
| low word lives at +2 in big-endian layout.
move.w 12+SAVED_REGS_SIZE+2(%sp),%d6 | groupStart
move.w 16+SAVED_REGS_SIZE+2(%sp),%d7 | groupEnd
move.l 20+SAVED_REGS_SIZE(%sp),%a5 | LUT base
| Advance src and dst to the first group's data.
| Each group consumes 8 source bytes and produces 4
| dest words (8 bytes), so both pointers advance by
| groupStart * 8.
move.w %d6,%d4
lsl.w #3,%d4
add.w %d4,%a0
add.w %d4,%a1
sub.w %d6,%d7 | groupCount = end - start
subq.w #1,%d7 | DBRA bias
bmi .Ldone
.LgroupLoop:
moveq #0,%d0 | plane 0 acc
moveq #0,%d1 | plane 1 acc
moveq #0,%d2 | plane 2 acc
moveq #0,%d3 | plane 3 acc
| ===== Source bytes 0..3 -> high byte of each plane word =====
moveq #0,%d4
move.b (%a0)+,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4 | d4 = src * 16
or.b 0(%a5,%d4.w),%d0
or.b 1(%a5,%d4.w),%d1
or.b 2(%a5,%d4.w),%d2
or.b 3(%a5,%d4.w),%d3
moveq #0,%d4
move.b (%a0)+,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
or.b 4(%a5,%d4.w),%d0
or.b 5(%a5,%d4.w),%d1
or.b 6(%a5,%d4.w),%d2
or.b 7(%a5,%d4.w),%d3
moveq #0,%d4
move.b (%a0)+,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
or.b 8(%a5,%d4.w),%d0
or.b 9(%a5,%d4.w),%d1
or.b 10(%a5,%d4.w),%d2
or.b 11(%a5,%d4.w),%d3
moveq #0,%d4
move.b (%a0)+,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
or.b 12(%a5,%d4.w),%d0
or.b 13(%a5,%d4.w),%d1
or.b 14(%a5,%d4.w),%d2
or.b 15(%a5,%d4.w),%d3
| Move accumulated bits into the HIGH byte of each word.
lsl.w #8,%d0
lsl.w #8,%d1
lsl.w #8,%d2
lsl.w #8,%d3
| ===== Source bytes 4..7 -> low byte of each plane word =====
moveq #0,%d4
move.b (%a0)+,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
or.b 0(%a5,%d4.w),%d0
or.b 1(%a5,%d4.w),%d1
or.b 2(%a5,%d4.w),%d2
or.b 3(%a5,%d4.w),%d3
moveq #0,%d4
move.b (%a0)+,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
or.b 4(%a5,%d4.w),%d0
or.b 5(%a5,%d4.w),%d1
or.b 6(%a5,%d4.w),%d2
or.b 7(%a5,%d4.w),%d3
moveq #0,%d4
move.b (%a0)+,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
or.b 8(%a5,%d4.w),%d0
or.b 9(%a5,%d4.w),%d1
or.b 10(%a5,%d4.w),%d2
or.b 11(%a5,%d4.w),%d3
moveq #0,%d4
move.b (%a0)+,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
add.w %d4,%d4
or.b 12(%a5,%d4.w),%d0
or.b 13(%a5,%d4.w),%d1
or.b 14(%a5,%d4.w),%d2
or.b 15(%a5,%d4.w),%d3
| Store 4 plane words.
move.w %d0,(%a1)+
move.w %d1,(%a1)+
move.w %d2,(%a1)+
move.w %d3,(%a1)+
dbra %d7,.LgroupLoop
.Ldone:
movem.l (%sp)+,%d2-%d7/%a2-%a6
rts

View file

@ -64,12 +64,19 @@
// ----- Prototypes ----- // ----- Prototypes -----
static uint16_t quantizeColorToSt(uint16_t orgb); static uint16_t quantizeColorToSt(uint16_t orgb);
static void c2pRow(const uint8_t *src, uint16_t *dst, uint16_t groupStart, uint16_t groupEnd);
static void c2pRange(const SurfaceT *src, int16_t y0, int16_t y1, uint16_t groupStart, uint16_t groupEnd); static void c2pRange(const SurfaceT *src, int16_t y0, int16_t y1, uint16_t groupStart, uint16_t groupEnd);
static void flattenScbPalettes(const SurfaceT *src); static void flattenScbPalettes(const SurfaceT *src);
static void initC2pLut(void);
static void writeDiagnostics(void); static void writeDiagnostics(void);
static long writePrevPaletteRegs(void); static long writePrevPaletteRegs(void);
// Provided by src/port/atarist/c2p.s.
extern void chunkyToPlanarRowSt(const uint8_t *src,
uint16_t *dst,
uint16_t groupStart,
uint16_t groupEnd,
const uint8_t *lut);
static __attribute__((interrupt_handler)) void timerBIsr(void); static __attribute__((interrupt_handler)) void timerBIsr(void);
static __attribute__((interrupt_handler)) void vblIsr(void); static __attribute__((interrupt_handler)) void vblIsr(void);
static void buildTransitions(const SurfaceT *src); static void buildTransitions(const SurfaceT *src);
@ -129,55 +136,31 @@ static uint8_t gCachedScb [SURFACE_HEIGHT];
static uint16_t gCachedPalette[SURFACE_PALETTE_COUNT][SURFACE_COLORS_PER_PALETTE]; static uint16_t gCachedPalette[SURFACE_PALETTE_COUNT][SURFACE_COLORS_PER_PALETTE];
static bool gCacheValid = false; static bool gCacheValid = false;
// 4 KB chunky-to-planar lookup table consumed by chunkyToPlanarRowSt
// (src/port/atarist/c2p.s). Layout: gC2pLut[src*16 + pos*4 + plane]
// = the 2-bit plane-byte contribution for source byte `src` at
// byte-position `pos` (0..3 within a 4-byte chunk) going to plane
// `plane`. Bit positions inside the byte are (7-2*pos, 6-2*pos), so
// the same table feeds both halves of an ST plane word: positions
// 0..3 land in the high byte, 4..7 (re-indexed mod 4) in the low
// byte. Built once by initC2pLut on the first halPresent call.
static uint8_t gC2pLut[4 * 1024];
static bool gC2pLutReady = false;
// ----- Internal helpers (alphabetical) ----- // ----- Internal helpers (alphabetical) -----
// Convert 16 chunky pixels (8 bytes 4bpp packed) to 4 ST planar words
// per group. groupStart..groupEnd selects a horizontal sub-range so
// halPresentRect can avoid touching unchanged groups.
static void c2pRow(const uint8_t *src, uint16_t *dst, uint16_t groupStart, uint16_t groupEnd) {
uint16_t group;
uint16_t px;
uint16_t plane0;
uint16_t plane1;
uint16_t plane2;
uint16_t plane3;
uint8_t byte;
uint8_t nibble;
uint16_t bit;
for (group = groupStart; group < groupEnd; group++) {
plane0 = 0;
plane1 = 0;
plane2 = 0;
plane3 = 0;
for (px = 0; px < 16; px++) {
byte = src[(group * 8) + (px >> 1)];
nibble = (uint8_t)((px & 1) ? (byte & 0x0F) : (byte >> 4));
bit = (uint16_t)(15 - px);
plane0 = (uint16_t)(plane0 | (((nibble >> 0) & 1) << bit));
plane1 = (uint16_t)(plane1 | (((nibble >> 1) & 1) << bit));
plane2 = (uint16_t)(plane2 | (((nibble >> 2) & 1) << bit));
plane3 = (uint16_t)(plane3 | (((nibble >> 3) & 1) << bit));
}
dst[(group * 4) + 0] = plane0;
dst[(group * 4) + 1] = plane1;
dst[(group * 4) + 2] = plane2;
dst[(group * 4) + 3] = plane3;
}
}
static void c2pRange(const SurfaceT *src, int16_t y0, int16_t y1, uint16_t groupStart, uint16_t groupEnd) { static void c2pRange(const SurfaceT *src, int16_t y0, int16_t y1, uint16_t groupStart, uint16_t groupEnd) {
int16_t y; int16_t y;
const uint8_t *srcLine; const uint8_t *srcLine;
uint16_t *dstLine; uint16_t *dstLine;
if (!gC2pLutReady) {
initC2pLut();
}
for (y = y0; y < y1; y++) { for (y = y0; y < y1; y++) {
srcLine = &src->pixels[y * SURFACE_BYTES_PER_ROW]; srcLine = &src->pixels[y * SURFACE_BYTES_PER_ROW];
dstLine = (uint16_t *)&gScreenBase[y * ST_BYTES_PER_ROW]; dstLine = (uint16_t *)&gScreenBase[y * ST_BYTES_PER_ROW];
c2pRow(srcLine, dstLine, groupStart, groupEnd); chunkyToPlanarRowSt(srcLine, dstLine, groupStart, groupEnd, gC2pLut);
} }
} }
@ -263,6 +246,37 @@ static void refreshPaletteStateIfNeeded(const SurfaceT *src) {
} }
// Build the 4 KB chunky-to-planar lookup table consumed by
// chunkyToPlanarRowSt. Same layout/contents as the Amiga c2p LUT;
// see src/port/atarist/c2p.s for the addressing math.
static void initC2pLut(void) {
uint16_t pos;
uint16_t plane;
uint16_t src;
uint8_t highShift;
uint8_t lowShift;
uint8_t highBit;
uint8_t lowBit;
if (gC2pLutReady) {
return;
}
for (src = 0; src < 256; src++) {
for (pos = 0; pos < 4; pos++) {
highShift = (uint8_t)(7 - 2 * pos);
lowShift = (uint8_t)(6 - 2 * pos);
for (plane = 0; plane < 4; plane++) {
highBit = (uint8_t)(((src >> 4) >> plane) & 1);
lowBit = (uint8_t)(((src & 0x0F) >> plane) & 1);
gC2pLut[src * 16 + pos * 4 + plane] =
(uint8_t)((highBit << highShift) | (lowBit << lowShift));
}
}
}
gC2pLutReady = true;
}
// 12-bit $0RGB to STF 9-bit palette register (drops the low bit of // 12-bit $0RGB to STF 9-bit palette register (drops the low bit of
// each 4-bit channel). // each 4-bit channel).
static uint16_t quantizeColorToSt(uint16_t orgb) { static uint16_t quantizeColorToSt(uint16_t orgb) {

View file

@ -1,17 +1,36 @@
// joeysprite: host-side compiler that turns raw tile data into a // joeysprite: host-side compiler that turns sprite art into a `.spr`
// `.spr` file ready to be loaded at runtime by spriteLoadFile. // file ready to be loaded at runtime by spriteLoadFile.
// //
// Usage: // Usage:
// joeysprite --target {iigs,amiga,atarist,dos} // joeysprite --target {iigs,amiga,atarist,dos}
// --width-tiles N --height-tiles M // [--width-tiles N --height-tiles M]
// input.tiles output.spr // INPUT OUTPUT.spr
// //
// `input.tiles` is widthTiles * heightTiles * 32 bytes, laid out // Two input formats are accepted; the first 2 bytes select the path:
// tile-major as the runtime SpriteT.tileData expects: tile (0,0) //
// first 32 bytes, tile (1,0) next 32, ... tile (widthTiles-1, 0), // PPM (P6) -- 8-bit-per-channel raster from any pixel-art tool that
// then tile (0,1), and so on. Inside each tile, rows are stored // exports PPM (GIMP, ImageMagick `convert`, paint.net, etc.). Image
// top-to-bottom and each row is 4 bytes (8 pixels at 4bpp packed, // dimensions must be multiples of 8 in both axes; widthTiles /
// high nibble = left pixel). // heightTiles are auto-derived as W/8 and H/8 (CLI overrides are
// optional and must match). Each input RGB is reduced to a 12-bit
// $0RGB color (high nibble of each channel); the input must use
// no more than 16 distinct $0RGB colors after that reduction. The
// FIRST color encountered (typically the top-left pixel) is bound
// to palette index 0, which the runtime treats as transparent --
// so paint your sprite background with that pixel's color.
//
// Raw `.tiles` -- widthTiles * heightTiles * 32 bytes, laid out
// tile-major as the runtime SpriteT.tileData expects: tile (0,0)
// first 32 bytes, tile (1,0) next 32, ... tile (widthTiles-1, 0),
// then tile (0,1), and so on. Inside each tile, rows are stored
// top-to-bottom and each row is 4 bytes (8 pixels at 4bpp packed,
// high nibble = left pixel). --width-tiles / --height-tiles are
// required for this path since the file carries no header.
//
// The .spr output carries indices only -- the palette mapping is the
// application's responsibility (typical pattern: ship a separate
// .jas built from the same PPM via joeyasset, or hand-author the
// palette in code).
// //
// Output `.spr` format (target-native byte order for code; see // Output `.spr` format (target-native byte order for code; see
// DESIGN.md §12). Mirrors src/core/sprite.c's reader: // DESIGN.md §12). Mirrors src/core/sprite.c's reader:
@ -21,14 +40,19 @@
// bytes 4-5 tileBytes (LE16) = widthTiles*heightTiles*32 // bytes 4-5 tileBytes (LE16) = widthTiles*heightTiles*32
// ... offsets (JOEY_SPRITE_SHIFT_COUNT * SPRITE_OP_COUNT * // ... offsets (JOEY_SPRITE_SHIFT_COUNT * SPRITE_OP_COUNT *
// uint16_t LE): [draw_s0, save_s0, restore_s0, // uint16_t LE): [draw_s0, save_s0, restore_s0,
// draw_s1, save_s1, restore_s1]. Save/restore offsets // draw_s1, save_s1, restore_s1]. Each entry is the
// are 0 here -- the runtime keeps the memcpy-based // byte offset of that routine within the compiled-code
// interpreter for those ops. // region, or 0xFFFF (SPRITE_NOT_COMPILED) if the per-CPU
// emitter returned 0 bytes for that op -- the runtime
// then falls back to the interpreted memcpy/RMW path.
// ... compiled code (codeSize bytes) // ... compiled code (codeSize bytes)
// ... raw tile data (tileBytes bytes; same layout as the // ... raw tile data (tileBytes bytes; same layout as the
// input file, lets the runtime interpreter handle // input file, lets the runtime interpreter handle
// clipped draws without decoding the compiled bytes). // clipped draws without decoding the compiled bytes).
#include <ctype.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h> #include <stdint.h>
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
@ -51,18 +75,26 @@ typedef enum {
// ----- Constants ----- // ----- Constants -----
#define MAX_SCRATCH_BYTES (16u * 1024u) #define MAX_SCRATCH_BYTES (16u * 1024u)
#define SPR_HEADER_SIZE 6 // Pixel art conventions for sprite work.
// Save/restore offsets are reserved (0) for now -- the runtime #define TILE_PIXELS 8
// memcpy interpreter handles them. #define TILE_BYTES 32
#define SHIFT_OPS 3 #define TILE_BYTES_PER_ROW 4
#define OFFSET_TABLE_BYTES (JOEY_SPRITE_SHIFT_COUNT * SHIFT_OPS * 2u) #define MAX_PALETTE_ENTRIES 16
#define PPM_TOKEN_MAX 64
// ----- Prototypes ----- // ----- Prototypes -----
static int buildPalette(const uint8_t *rgb, int width, int height, uint8_t *outIndices);
static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath); static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath);
static uint16_t emitForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift, TargetE target); static uint16_t emitForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift, uint8_t op, TargetE target);
static bool fileIsPpm(const char *path);
static int loadPpm(const char *path, int *outWidth, int *outHeight, uint8_t **outPixels);
static int loadPpmAsTiles(const char *path, long *widthTiles, long *heightTiles, uint8_t **outTiles, uint32_t *outSize);
static int loadTileData(const char *path, uint8_t **outBytes, uint32_t *outSize); static int loadTileData(const char *path, uint8_t **outBytes, uint32_t *outSize);
static void packIndicesToTiles(const uint8_t *indices, int width, int height, uint8_t *outTiles);
static int parsePpmToken(FILE *fp, char *out, int outLen);
static TargetE parseTarget(const char *name); static TargetE parseTarget(const char *name);
static int usage(const char *prog); static int usage(const char *prog);
static int writeLE16(FILE *fp, uint16_t v); static int writeLE16(FILE *fp, uint16_t v);
@ -70,16 +102,68 @@ static int writeLE16(FILE *fp, uint16_t v);
// ----- Internal helpers (alphabetical) ----- // ----- Internal helpers (alphabetical) -----
// Reduce every input RGB triple to a 12-bit $0RGB color and assign
// palette indices in encounter order: top-left pixel = index 0,
// next-encountered = index 1, etc. The runtime treats index 0 as
// transparent, so the top-left pixel must be the sprite's background
// color. Returns the number of distinct colors found, or -1 if the
// image needs more than 16 entries after $0RGB quantization.
//
// Mirrors joeyasset's buildPalette but only emits the index array;
// joeysprite drops the $0RGB palette since the .spr format carries
// indices alone.
static int buildPalette(const uint8_t *rgb, int width, int height, uint8_t *outIndices) {
uint16_t palette[MAX_PALETTE_ENTRIES];
int paletteCount;
int total;
int i;
int j;
uint8_t r;
uint8_t g;
uint8_t b;
uint16_t color;
total = width * height;
paletteCount = 0;
for (i = 0; i < total; i++) {
r = (uint8_t)(rgb[i * 3 + 0] >> 4);
g = (uint8_t)(rgb[i * 3 + 1] >> 4);
b = (uint8_t)(rgb[i * 3 + 2] >> 4);
color = (uint16_t)((r << 8) | (g << 4) | b);
for (j = 0; j < paletteCount; j++) {
if (palette[j] == color) {
break;
}
}
if (j == paletteCount) {
if (paletteCount >= MAX_PALETTE_ENTRIES) {
return -1;
}
palette[paletteCount] = color;
paletteCount++;
}
outIndices[i] = (uint8_t)j;
}
return paletteCount;
}
// Two-pass: pass 1 sizes every (shift, op) routine into shiftOpSizes;
// pass 2 stamps them into the code buffer at their cumulative offsets.
// Routines that return 0 bytes (the per-CPU emitter doesn't implement
// that op) get SPRITE_NOT_COMPILED in their offset slot so the runtime
// dispatch falls back to the interpreted path.
static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath) { static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath) {
uint8_t *scratch; uint8_t *scratch;
uint8_t *codeBuf; uint8_t *codeBuf;
uint16_t shiftLengths[JOEY_SPRITE_SHIFT_COUNT]; uint16_t routineSizes[JOEY_SPRITE_SHIFT_COUNT][SPRITE_OP_COUNT];
uint16_t routineOffsets[JOEY_SPRITE_SHIFT_COUNT][SPRITE_OP_COUNT];
uint32_t totalCodeSize; uint32_t totalCodeSize;
uint8_t shift; uint8_t shift;
uint8_t op; uint8_t op;
uint16_t written; uint16_t written;
uint16_t cursor; uint16_t cursor;
uint16_t offset; uint16_t value;
FILE *fp; FILE *fp;
int rc; int rc;
@ -91,9 +175,16 @@ static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath)
totalCodeSize = 0; totalCodeSize = 0;
for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) {
written = emitForTarget(scratch, sp, shift, target); for (op = 0; op < SPRITE_OP_COUNT; op++) {
shiftLengths[shift] = written; written = emitForTarget(scratch, sp, shift, op, target);
totalCodeSize += written; routineSizes[shift][op] = written;
if (written == 0) {
routineOffsets[shift][op] = SPRITE_NOT_COMPILED;
} else {
routineOffsets[shift][op] = (uint16_t)totalCodeSize;
totalCodeSize += written;
}
}
} }
if (totalCodeSize > 0xFFFFu) { if (totalCodeSize > 0xFFFFu) {
fprintf(stderr, "joeysprite: emitted %u code bytes; max is 65535\n", fprintf(stderr, "joeysprite: emitted %u code bytes; max is 65535\n",
@ -102,7 +193,7 @@ static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath)
return 2; return 2;
} }
codeBuf = (uint8_t *)malloc(totalCodeSize); codeBuf = (uint8_t *)malloc(totalCodeSize > 0 ? totalCodeSize : 1);
if (codeBuf == NULL) { if (codeBuf == NULL) {
fprintf(stderr, "joeysprite: out of memory for code buffer\n"); fprintf(stderr, "joeysprite: out of memory for code buffer\n");
free(scratch); free(scratch);
@ -111,8 +202,13 @@ static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath)
cursor = 0; cursor = 0;
for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) {
written = emitForTarget(codeBuf + cursor, sp, shift, target); for (op = 0; op < SPRITE_OP_COUNT; op++) {
cursor = (uint16_t)(cursor + written); if (routineSizes[shift][op] == 0) {
continue;
}
written = emitForTarget(codeBuf + cursor, sp, shift, op, target);
cursor = (uint16_t)(cursor + written);
}
} }
fp = fopen(outPath, "wb"); fp = fopen(outPath, "wb");
@ -129,25 +225,17 @@ static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath)
if (rc == 0 && writeLE16(fp, (uint16_t)totalCodeSize) != 0) rc = 2; if (rc == 0 && writeLE16(fp, (uint16_t)totalCodeSize) != 0) rc = 2;
if (rc == 0 && writeLE16(fp, (uint16_t)(sp->widthTiles * sp->heightTiles * 32u)) != 0) rc = 2; if (rc == 0 && writeLE16(fp, (uint16_t)(sp->widthTiles * sp->heightTiles * 32u)) != 0) rc = 2;
// Offset table: cumulative draw offsets + zeros for save/restore.
offset = 0;
for (shift = 0; rc == 0 && shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { for (shift = 0; rc == 0 && shift < JOEY_SPRITE_SHIFT_COUNT; shift++) {
for (op = 0; op < SHIFT_OPS; op++) { for (op = 0; op < SPRITE_OP_COUNT; op++) {
uint16_t value; value = routineOffsets[shift][op];
if (op == SPRITE_OP_DRAW) {
value = offset;
} else {
value = 0;
}
if (writeLE16(fp, value) != 0) { if (writeLE16(fp, value) != 0) {
rc = 2; rc = 2;
break; break;
} }
} }
offset = (uint16_t)(offset + shiftLengths[shift]);
} }
if (rc == 0) { if (rc == 0 && totalCodeSize > 0) {
if (fwrite(codeBuf, 1, totalCodeSize, fp) != totalCodeSize) { if (fwrite(codeBuf, 1, totalCodeSize, fp) != totalCodeSize) {
rc = 2; rc = 2;
} }
@ -179,21 +267,207 @@ static int compileToSpr(const SpriteT *sp, TargetE target, const char *outPath)
} }
static uint16_t emitForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift, TargetE target) { static uint16_t emitForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift, uint8_t op, TargetE target) {
switch (target) { switch (target) {
case TARGET_DOS: case TARGET_DOS:
return spriteEmitDrawX86(out, sp, shift); switch (op) {
case SPRITE_OP_DRAW: return spriteEmitDrawX86 (out, sp, shift);
case SPRITE_OP_SAVE: return spriteEmitSaveX86 (out, sp, shift);
case SPRITE_OP_RESTORE: return spriteEmitRestoreX86(out, sp, shift);
default: return 0;
}
case TARGET_AMIGA: case TARGET_AMIGA:
case TARGET_ATARIST: case TARGET_ATARIST:
return spriteEmitDraw68k(out, sp, shift); switch (op) {
case SPRITE_OP_DRAW: return spriteEmitDraw68k (out, sp, shift);
case SPRITE_OP_SAVE: return spriteEmitSave68k (out, sp, shift);
case SPRITE_OP_RESTORE: return spriteEmitRestore68k(out, sp, shift);
default: return 0;
}
case TARGET_IIGS: case TARGET_IIGS:
return spriteEmitDrawIigs(out, sp, shift); switch (op) {
case SPRITE_OP_DRAW: return spriteEmitDrawIigs (out, sp, shift);
case SPRITE_OP_SAVE: return spriteEmitSaveIigs (out, sp, shift);
case SPRITE_OP_RESTORE: return spriteEmitRestoreIigs(out, sp, shift);
default: return 0;
}
default: default:
return 0; return 0;
} }
} }
// Sniff the first 2 bytes for the PPM magic. Errors return false (the
// caller will fall through to the .tiles loader, which surfaces a
// clear error if the bytes aren't valid tile data either).
static bool fileIsPpm(const char *path) {
FILE *fp;
int c0;
int c1;
fp = fopen(path, "rb");
if (fp == NULL) {
return false;
}
c0 = fgetc(fp);
c1 = fgetc(fp);
fclose(fp);
return (c0 == 'P' && c1 == '6');
}
// Read a PPM (P6) raster into a freshly allocated 8-bit RGB buffer.
// Mirrors joeyasset's loadPpm. Caller frees *outPixels.
static int loadPpm(const char *path, int *outWidth, int *outHeight, uint8_t **outPixels) {
FILE *fp;
char tok[PPM_TOKEN_MAX];
int width;
int height;
int maxval;
size_t pixelBytes;
uint8_t *buf;
size_t read;
fp = fopen(path, "rb");
if (fp == NULL) {
fprintf(stderr, "joeysprite: cannot open %s: %s\n", path, strerror(errno));
return 2;
}
if (parsePpmToken(fp, tok, sizeof(tok)) != 0 || strcmp(tok, "P6") != 0) {
fprintf(stderr, "joeysprite: %s is not a PPM (P6) file\n", path);
fclose(fp);
return 2;
}
if (parsePpmToken(fp, tok, sizeof(tok)) != 0) {
fclose(fp);
return 2;
}
width = atoi(tok);
if (parsePpmToken(fp, tok, sizeof(tok)) != 0) {
fclose(fp);
return 2;
}
height = atoi(tok);
if (parsePpmToken(fp, tok, sizeof(tok)) != 0) {
fclose(fp);
return 2;
}
maxval = atoi(tok);
if (width <= 0 || height <= 0) {
fprintf(stderr, "joeysprite: %s has non-positive dimensions\n", path);
fclose(fp);
return 2;
}
if (maxval != 255) {
fprintf(stderr, "joeysprite: %s maxval %d unsupported (must be 255)\n", path, maxval);
fclose(fp);
return 2;
}
pixelBytes = (size_t)width * (size_t)height * 3u;
buf = (uint8_t *)malloc(pixelBytes);
if (buf == NULL) {
fprintf(stderr, "joeysprite: out of memory (%zu bytes)\n", pixelBytes);
fclose(fp);
return 2;
}
read = fread(buf, 1, pixelBytes, fp);
fclose(fp);
if (read != pixelBytes) {
fprintf(stderr, "joeysprite: short raster in %s (got %zu, need %zu)\n",
path, read, pixelBytes);
free(buf);
return 2;
}
*outWidth = width;
*outHeight = height;
*outPixels = buf;
return 0;
}
// End-to-end PPM -> tile-major 4bpp packed. On entry, *widthTiles /
// *heightTiles are 0 if the user didn't pass --width-tiles /
// --height-tiles, or the user-provided values otherwise; we fill in
// the auto-derived values when the user left them at 0, and validate
// against the image when they didn't.
static int loadPpmAsTiles(const char *path, long *widthTiles, long *heightTiles, uint8_t **outTiles, uint32_t *outSize) {
uint8_t *rgb;
uint8_t *indices;
uint8_t *tiles;
int width;
int height;
long wTiles;
long hTiles;
uint32_t tileBytes;
int paletteCount;
int rc;
rc = loadPpm(path, &width, &height, &rgb);
if (rc != 0) {
return rc;
}
if ((width % TILE_PIXELS) != 0 || (height % TILE_PIXELS) != 0) {
fprintf(stderr,
"joeysprite: %s is %dx%d -- both dimensions must be multiples of %d\n",
path, width, height, TILE_PIXELS);
free(rgb);
return 2;
}
wTiles = width / TILE_PIXELS;
hTiles = height / TILE_PIXELS;
if (*widthTiles == 0) {
*widthTiles = wTiles;
} else if (*widthTiles != wTiles) {
fprintf(stderr,
"joeysprite: --width-tiles %ld disagrees with image width %d (%ld tiles)\n",
*widthTiles, width, wTiles);
free(rgb);
return 2;
}
if (*heightTiles == 0) {
*heightTiles = hTiles;
} else if (*heightTiles != hTiles) {
fprintf(stderr,
"joeysprite: --height-tiles %ld disagrees with image height %d (%ld tiles)\n",
*heightTiles, height, hTiles);
free(rgb);
return 2;
}
indices = (uint8_t *)malloc((size_t)width * (size_t)height);
if (indices == NULL) {
fprintf(stderr, "joeysprite: out of memory for index buffer\n");
free(rgb);
return 2;
}
paletteCount = buildPalette(rgb, width, height, indices);
free(rgb);
if (paletteCount < 0) {
fprintf(stderr,
"joeysprite: %s has more than 16 distinct $0RGB colors after\n"
" 4-bit-per-channel quantization. Reduce the input palette and\n"
" retry (e.g. pngquant --nofs 16, or GIMP -> Image -> Mode ->\n"
" Indexed... with 16 colors and no dithering).\n", path);
free(indices);
return 2;
}
tileBytes = (uint32_t)wTiles * (uint32_t)hTiles * TILE_BYTES;
tiles = (uint8_t *)malloc(tileBytes);
if (tiles == NULL) {
fprintf(stderr, "joeysprite: out of memory for tile buffer\n");
free(indices);
return 2;
}
packIndicesToTiles(indices, width, height, tiles);
free(indices);
*outTiles = tiles;
*outSize = tileBytes;
return 0;
}
static int loadTileData(const char *path, uint8_t **outBytes, uint32_t *outSize) { static int loadTileData(const char *path, uint8_t **outBytes, uint32_t *outSize) {
FILE *fp; FILE *fp;
long fileSize; long fileSize;
@ -236,6 +510,76 @@ static int loadTileData(const char *path, uint8_t **outBytes, uint32_t *outSize)
} }
// Reshuffle row-major palette indices into the tile-major 4bpp packed
// layout the runtime SpriteT.tileData expects: tile (tx,ty)'s 32 bytes
// land contiguously at outTiles[(ty*widthTiles + tx) * 32], with each
// row inside the tile as 4 packed bytes (high nibble = left pixel).
static void packIndicesToTiles(const uint8_t *indices, int width, int height, uint8_t *outTiles) {
int widthTiles;
int heightTiles;
int tx;
int ty;
int row;
int col;
int pxX;
int pxY;
uint8_t hi;
uint8_t lo;
uint8_t *tile;
widthTiles = width / TILE_PIXELS;
heightTiles = height / TILE_PIXELS;
for (ty = 0; ty < heightTiles; ty++) {
for (tx = 0; tx < widthTiles; tx++) {
tile = &outTiles[(ty * widthTiles + tx) * TILE_BYTES];
for (row = 0; row < TILE_PIXELS; row++) {
pxY = ty * TILE_PIXELS + row;
for (col = 0; col < TILE_BYTES_PER_ROW; col++) {
pxX = tx * TILE_PIXELS + col * 2;
hi = (uint8_t)(indices[pxY * width + pxX] & 0x0Fu);
lo = (uint8_t)(indices[pxY * width + pxX + 1] & 0x0Fu);
tile[row * TILE_BYTES_PER_ROW + col] = (uint8_t)((hi << 4) | lo);
}
}
}
}
}
// Reads a single whitespace-separated token from a PPM header,
// skipping `#` comments to end-of-line. Mirrors joeyasset.
static int parsePpmToken(FILE *fp, char *out, int outLen) {
int c;
int pos;
pos = 0;
for (;;) {
c = fgetc(fp);
if (c == EOF) {
return -1;
}
if (isspace(c)) {
continue;
}
if (c == '#') {
while ((c = fgetc(fp)) != EOF && c != '\n') {
/* skip */;
}
continue;
}
break;
}
while (c != EOF && !isspace(c) && c != '#') {
if (pos < outLen - 1) {
out[pos++] = (char)c;
}
c = fgetc(fp);
}
out[pos] = 0;
return 0;
}
static TargetE parseTarget(const char *name) { static TargetE parseTarget(const char *name) {
if (strcmp(name, "iigs") == 0) return TARGET_IIGS; if (strcmp(name, "iigs") == 0) return TARGET_IIGS;
if (strcmp(name, "amiga") == 0) return TARGET_AMIGA; if (strcmp(name, "amiga") == 0) return TARGET_AMIGA;
@ -248,8 +592,11 @@ static TargetE parseTarget(const char *name) {
static int usage(const char *prog) { static int usage(const char *prog) {
fprintf(stderr, fprintf(stderr,
"usage: %s --target {iigs,amiga,atarist,dos} \\\n" "usage: %s --target {iigs,amiga,atarist,dos} \\\n"
" --width-tiles N --height-tiles M \\\n" " [--width-tiles N --height-tiles M] \\\n"
" input.tiles output.spr\n", prog); " INPUT OUTPUT.spr\n"
" INPUT is a PPM (P6) file (auto-derives tile dims from W/8, H/8)\n"
" or a raw .tiles byte stream (requires --width-tiles/--height-tiles).\n",
prog);
return 2; return 2;
} }
@ -301,9 +648,11 @@ int main(int argc, char **argv) {
return usage(argv[0]); return usage(argv[0]);
} }
} }
if (targetName == NULL || widthTiles <= 0 || widthTiles > 255 || if (targetName == NULL || inPath == NULL || outPath == NULL) {
heightTiles <= 0 || heightTiles > 255 || return usage(argv[0]);
inPath == NULL || outPath == NULL) { }
if (widthTiles < 0 || widthTiles > 255 ||
heightTiles < 0 || heightTiles > 255) {
return usage(argv[0]); return usage(argv[0]);
} }
@ -313,9 +662,24 @@ int main(int argc, char **argv) {
return usage(argv[0]); return usage(argv[0]);
} }
rc = loadTileData(inPath, &tileBytes, &tileSize); if (fileIsPpm(inPath)) {
if (rc != 0) { // PPM path: tile dims auto-derive (or validate against CLI).
return rc; rc = loadPpmAsTiles(inPath, &widthTiles, &heightTiles, &tileBytes, &tileSize);
if (rc != 0) {
return rc;
}
} else {
// Raw .tiles path: tile dims required.
if (widthTiles <= 0 || heightTiles <= 0) {
fprintf(stderr,
"joeysprite: %s is not a PPM; --width-tiles and --height-tiles are required\n",
inPath);
return usage(argv[0]);
}
rc = loadTileData(inPath, &tileBytes, &tileSize);
if (rc != 0) {
return rc;
}
} }
expectedTileSize = (uint32_t)(widthTiles * heightTiles * 32); expectedTileSize = (uint32_t)(widthTiles * heightTiles * 32);