// 68k sprite codegen (Amiga + Atari ST). Emits SysV-ish cdecl- // callable PIC draw / save / restore routines that read or write // 4bpp packed surface bytes via d16(a0) chains. Same shape as the // x86 emitter; only the instruction encoding differs. // // Calling convention (m68k gcc / mintlib): // void draw(uint8_t *dst); -- arg in 4(sp) // void save/restore(const uint8_t *src, uint8_t *dst); -- args in 4(sp)/8(sp) // a0/a1/d0/d1 are caller-saved. // // Per-byte emit (no run coalescing yet): // - all-transparent: skip // - all-opaque: move.b #imm, d16(a0) (6 bytes encoded) // - mixed: move.b d16(a0),d0; andi.b #~mask,d0; // ori.b #val,d0; move.b d0,d16(a0) (4*4 = 16 bytes) // Per row (after first): adda.w #SURFACE_BYTES_PER_ROW, a0 // (4 bytes encoded) // Prologue: movea.l 4(sp), a0 (4 bytes) // Epilogue: rts (2 bytes) // // All multi-byte instruction fields are big-endian; the emit writes // high-byte-first into the output stream so the target reads them // in native order. #include "joey/sprite.h" #include "joey/surface.h" #include "spriteEmitter.h" #include "spriteInternal.h" // ----- Constants ----- #define TILE_PIXELS 8 #define TILE_BYTES 32 #define TILE_BYTES_PER_ROW 4 #define TRANSPARENT_NIBBLE 0 #define MAX_ROUTINE_BYTES 16384 // ----- Prototypes ----- static uint16_t emitCopyBody68k(uint8_t *out, uint16_t cursor, uint16_t heightPx, uint16_t copyBytes, bool strideOnSrc); static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask); static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col); static uint16_t writeBE16(uint8_t *out, uint16_t value); // ----- Emit helpers (alphabetical) ----- // Shared body for save/restore. Walks heightPx rows of copyBytes // using `move.b (a0)+, (a1)+` byte-wise (safe regardless of pointer // alignment, since the screen-side x can land on an odd byte). After // each row except the last, advances either a0 (SAVE: src=screen) or // a1 (RESTORE: dst=screen) by (SURFACE_BYTES_PER_ROW - copyBytes) so // the strided side lines up with the next scanline; the contiguous // side advances naturally via the post-increment. // // strideOnSrc=true -> source has the screen stride (SAVE) // strideOnSrc=false -> destination has the screen stride (RESTORE) static uint16_t emitCopyBody68k(uint8_t *out, uint16_t cursor, uint16_t heightPx, uint16_t copyBytes, bool strideOnSrc) { uint16_t row; uint16_t col; uint16_t advance; advance = (uint16_t)(SURFACE_BYTES_PER_ROW - copyBytes); for (row = 0; row < heightPx; row++) { // Unrolled: move.b (a0)+, (a1)+ -- 0x12D8. for (col = 0; col < copyBytes; col++) { cursor += writeBE16(out + cursor, 0x12D8u); } if (row + 1u < heightPx) { // adda.w #advance, a0 (0xD0FC) for SAVE // adda.w #advance, a1 (0xD2FC) for RESTORE cursor += writeBE16(out + cursor, strideOnSrc ? 0xD0FCu : 0xD2FCu); cursor += writeBE16(out + cursor, advance); } } return cursor; } // Same logic as the x86 shiftedByteAt -- per-byte transparency // decomposition for shift in {0,1}. opaqueMask high nibble 0xF0 if // dest high nibble is opaque, 0x0F if low is opaque. static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask) { uint8_t srcByte; uint8_t hi; uint8_t lo; bool hasLeft; bool hasRight; *outValue = 0; *outOpaqueMask = 0; if (shift == 0) { if (col >= spriteBytesPerRow) { return; } srcByte = spriteSourceByte(sp, row, col); hi = (uint8_t)((srcByte >> 4) & 0x0Fu); lo = (uint8_t)(srcByte & 0x0Fu); if (hi != TRANSPARENT_NIBBLE) { *outValue |= (uint8_t)(hi << 4); *outOpaqueMask |= 0xF0u; } if (lo != TRANSPARENT_NIBBLE) { *outValue |= lo; *outOpaqueMask |= 0x0Fu; } return; } hasLeft = (col >= 1) && ((uint16_t)(col - 1) < spriteBytesPerRow); hasRight = (col < spriteBytesPerRow); if (hasLeft) { srcByte = spriteSourceByte(sp, row, (uint16_t)(col - 1)); hi = (uint8_t)(srcByte & 0x0Fu); if (hi != TRANSPARENT_NIBBLE) { *outValue |= (uint8_t)(hi << 4); *outOpaqueMask |= 0xF0u; } } if (hasRight) { srcByte = spriteSourceByte(sp, row, col); lo = (uint8_t)((srcByte >> 4) & 0x0Fu); if (lo != TRANSPARENT_NIBBLE) { *outValue |= lo; *outOpaqueMask |= 0x0Fu; } } } static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col) { uint16_t tileX; uint16_t tileY; uint16_t inTileX; uint16_t inTileY; const uint8_t *tile; tileX = (uint16_t)(col / TILE_BYTES_PER_ROW); tileY = (uint16_t)(row / TILE_PIXELS); inTileX = (uint16_t)(col & (TILE_BYTES_PER_ROW - 1)); inTileY = (uint16_t)(row & (TILE_PIXELS - 1)); tile = sp->tileData + ((uint32_t)(tileY * sp->widthTiles + tileX)) * TILE_BYTES; return tile[inTileY * TILE_BYTES_PER_ROW + inTileX]; } // Emit a 16-bit big-endian value into the output stream. Returns 2. static uint16_t writeBE16(uint8_t *out, uint16_t value) { out[0] = (uint8_t)((value >> 8) & 0xFFu); out[1] = (uint8_t)(value & 0xFFu); return 2; } // 68k draw emit. Returns bytes written. uint16_t spriteEmitDraw68k(uint8_t *out, const SpriteT *sp, uint8_t shift) { uint16_t cursor; uint16_t row; uint16_t col; uint16_t heightPx; uint16_t spriteBytesPerRow; uint16_t destBytesPerRow; uint8_t value; uint8_t opaqueMask; // Chunky 4bpp has only two nibble-alignment positions; the // dispatcher uses x & 1 so shifts 2..7 are unreachable. Bail // early so the arena slot stays SPRITE_NOT_COMPILED. if (shift > 1u) { return 0u; } cursor = 0; heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS); spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW); destBytesPerRow = (uint16_t)(spriteBytesPerRow + (shift == 1 ? 1 : 0)); // Prologue: movea.l 4(sp), a0 cursor += writeBE16(out + cursor, 0x206Fu); cursor += writeBE16(out + cursor, 0x0004u); for (row = 0; row < heightPx; row++) { if (row > 0) { // adda.w #SURFACE_BYTES_PER_ROW, a0 cursor += writeBE16(out + cursor, 0xD0FCu); cursor += writeBE16(out + cursor, (uint16_t)SURFACE_BYTES_PER_ROW); } for (col = 0; col < destBytesPerRow; col++) { shiftedByteAt(sp, row, col, shift, spriteBytesPerRow, &value, &opaqueMask); if (opaqueMask == 0x00) { continue; } if (opaqueMask == 0xFFu) { // move.b #imm, d16(a0) // Opcode 0x117C: bits 11-9 = dst reg (0=a0), bits 8-6 = // dst mode (101 = an+d16), bits 5-3 = src mode (111), // bits 2-0 = src reg (100 = immediate). Source // extension (imm word, byte in low half) comes BEFORE // dest extension (d16) in the instruction stream. cursor += writeBE16(out + cursor, 0x117Cu); cursor += writeBE16(out + cursor, (uint16_t)value); cursor += writeBE16(out + cursor, col); } else { // move.b d16(a0), d0 cursor += writeBE16(out + cursor, 0x1028u); cursor += writeBE16(out + cursor, col); // andi.b #~opaqueMask, d0 cursor += writeBE16(out + cursor, 0x0200u); cursor += writeBE16(out + cursor, (uint16_t)(~opaqueMask & 0xFFu)); // ori.b #value, d0 cursor += writeBE16(out + cursor, 0x0000u); cursor += writeBE16(out + cursor, (uint16_t)value); // move.b d0, d16(a0) cursor += writeBE16(out + cursor, 0x1140u); cursor += writeBE16(out + cursor, col); } } } // Epilogue: rts cursor += writeBE16(out + cursor, 0x4E75u); return cursor; } // RESTORE: copy backup -> screen. Destination has the screen stride. uint16_t spriteEmitRestore68k(uint8_t *out, const SpriteT *sp, uint8_t shift) { uint16_t cursor; uint16_t heightPx; uint16_t copyBytes; if (shift > 1u) { return 0u; } cursor = 0; heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS); copyBytes = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW + (shift == 1u ? 1u : 0u)); // Prologue: movea.l 4(sp), a0 (src); movea.l 8(sp), a1 (dst). cursor += writeBE16(out + cursor, 0x206Fu); cursor += writeBE16(out + cursor, 0x0004u); cursor += writeBE16(out + cursor, 0x226Fu); cursor += writeBE16(out + cursor, 0x0008u); cursor = emitCopyBody68k(out, cursor, heightPx, copyBytes, false); cursor += writeBE16(out + cursor, 0x4E75u); return cursor; } // SAVE: copy screen -> backup. Source has the screen stride. uint16_t spriteEmitSave68k(uint8_t *out, const SpriteT *sp, uint8_t shift) { uint16_t cursor; uint16_t heightPx; uint16_t copyBytes; if (shift > 1u) { return 0u; } cursor = 0; heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS); copyBytes = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW + (shift == 1u ? 1u : 0u)); cursor += writeBE16(out + cursor, 0x206Fu); cursor += writeBE16(out + cursor, 0x0004u); cursor += writeBE16(out + cursor, 0x226Fu); cursor += writeBE16(out + cursor, 0x0008u); cursor = emitCopyBody68k(out, cursor, heightPx, copyBytes, true); cursor += writeBE16(out + cursor, 0x4E75u); return cursor; }