joeylib2/src/codegen/spriteEmit68k.c

279 lines
10 KiB
C

// 68k sprite codegen (Amiga + Atari ST). Emits SysV-ish cdecl-
// callable PIC draw / save / restore routines that read or write
// 4bpp packed surface bytes via d16(a0) chains. Same shape as the
// x86 emitter; only the instruction encoding differs.
//
// Calling convention (m68k gcc / mintlib):
// void draw(uint8_t *dst); -- arg in 4(sp)
// void save/restore(const uint8_t *src, uint8_t *dst); -- args in 4(sp)/8(sp)
// a0/a1/d0/d1 are caller-saved.
//
// Per-byte emit (no run coalescing yet):
// - all-transparent: skip
// - all-opaque: move.b #imm, d16(a0) (6 bytes encoded)
// - mixed: move.b d16(a0),d0; andi.b #~mask,d0;
// ori.b #val,d0; move.b d0,d16(a0) (4*4 = 16 bytes)
// Per row (after first): adda.w #SURFACE_BYTES_PER_ROW, a0
// (4 bytes encoded)
// Prologue: movea.l 4(sp), a0 (4 bytes)
// Epilogue: rts (2 bytes)
//
// All multi-byte instruction fields are big-endian; the emit writes
// high-byte-first into the output stream so the target reads them
// in native order.
#include "joey/sprite.h"
#include "joey/surface.h"
#include "spriteEmitter.h"
#include "spriteInternal.h"
// ----- Constants -----
#define TILE_PIXELS 8
#define TILE_BYTES 32
#define TILE_BYTES_PER_ROW 4
#define TRANSPARENT_NIBBLE 0
#define MAX_ROUTINE_BYTES 16384
// ----- Prototypes -----
static uint16_t emitCopyBody68k(uint8_t *out, uint16_t cursor, uint16_t heightPx, uint16_t copyBytes, bool strideOnSrc);
static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask);
static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col);
static uint16_t writeBE16(uint8_t *out, uint16_t value);
// ----- Emit helpers (alphabetical) -----
// Shared body for save/restore. Walks heightPx rows of copyBytes
// using `move.b (a0)+, (a1)+` byte-wise (safe regardless of pointer
// alignment, since the screen-side x can land on an odd byte). After
// each row except the last, advances either a0 (SAVE: src=screen) or
// a1 (RESTORE: dst=screen) by (SURFACE_BYTES_PER_ROW - copyBytes) so
// the strided side lines up with the next scanline; the contiguous
// side advances naturally via the post-increment.
//
// strideOnSrc=true -> source has the screen stride (SAVE)
// strideOnSrc=false -> destination has the screen stride (RESTORE)
static uint16_t emitCopyBody68k(uint8_t *out, uint16_t cursor, uint16_t heightPx, uint16_t copyBytes, bool strideOnSrc) {
uint16_t row;
uint16_t col;
uint16_t advance;
advance = (uint16_t)(SURFACE_BYTES_PER_ROW - copyBytes);
for (row = 0; row < heightPx; row++) {
// Unrolled: move.b (a0)+, (a1)+ -- 0x12D8.
for (col = 0; col < copyBytes; col++) {
cursor += writeBE16(out + cursor, 0x12D8u);
}
if (row + 1u < heightPx) {
// adda.w #advance, a0 (0xD0FC) for SAVE
// adda.w #advance, a1 (0xD2FC) for RESTORE
cursor += writeBE16(out + cursor, strideOnSrc ? 0xD0FCu : 0xD2FCu);
cursor += writeBE16(out + cursor, advance);
}
}
return cursor;
}
// Same logic as the x86 shiftedByteAt -- per-byte transparency
// decomposition for shift in {0,1}. opaqueMask high nibble 0xF0 if
// dest high nibble is opaque, 0x0F if low is opaque.
static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask) {
uint8_t srcByte;
uint8_t hi;
uint8_t lo;
bool hasLeft;
bool hasRight;
*outValue = 0;
*outOpaqueMask = 0;
if (shift == 0) {
if (col >= spriteBytesPerRow) {
return;
}
srcByte = spriteSourceByte(sp, row, col);
hi = (uint8_t)((srcByte >> 4) & 0x0Fu);
lo = (uint8_t)(srcByte & 0x0Fu);
if (hi != TRANSPARENT_NIBBLE) {
*outValue |= (uint8_t)(hi << 4);
*outOpaqueMask |= 0xF0u;
}
if (lo != TRANSPARENT_NIBBLE) {
*outValue |= lo;
*outOpaqueMask |= 0x0Fu;
}
return;
}
hasLeft = (col >= 1) && ((uint16_t)(col - 1) < spriteBytesPerRow);
hasRight = (col < spriteBytesPerRow);
if (hasLeft) {
srcByte = spriteSourceByte(sp, row, (uint16_t)(col - 1));
hi = (uint8_t)(srcByte & 0x0Fu);
if (hi != TRANSPARENT_NIBBLE) {
*outValue |= (uint8_t)(hi << 4);
*outOpaqueMask |= 0xF0u;
}
}
if (hasRight) {
srcByte = spriteSourceByte(sp, row, col);
lo = (uint8_t)((srcByte >> 4) & 0x0Fu);
if (lo != TRANSPARENT_NIBBLE) {
*outValue |= lo;
*outOpaqueMask |= 0x0Fu;
}
}
}
static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col) {
uint16_t tileX;
uint16_t tileY;
uint16_t inTileX;
uint16_t inTileY;
const uint8_t *tile;
tileX = (uint16_t)(col / TILE_BYTES_PER_ROW);
tileY = (uint16_t)(row / TILE_PIXELS);
inTileX = (uint16_t)(col & (TILE_BYTES_PER_ROW - 1));
inTileY = (uint16_t)(row & (TILE_PIXELS - 1));
tile = sp->tileData + ((uint32_t)(tileY * sp->widthTiles + tileX)) * TILE_BYTES;
return tile[inTileY * TILE_BYTES_PER_ROW + inTileX];
}
// Emit a 16-bit big-endian value into the output stream. Returns 2.
static uint16_t writeBE16(uint8_t *out, uint16_t value) {
out[0] = (uint8_t)((value >> 8) & 0xFFu);
out[1] = (uint8_t)(value & 0xFFu);
return 2;
}
// 68k draw emit. Returns bytes written.
uint16_t spriteEmitDraw68k(uint8_t *out, const SpriteT *sp, uint8_t shift) {
uint16_t cursor;
uint16_t row;
uint16_t col;
uint16_t heightPx;
uint16_t spriteBytesPerRow;
uint16_t destBytesPerRow;
uint8_t value;
uint8_t opaqueMask;
// Chunky 4bpp has only two nibble-alignment positions; the
// dispatcher uses x & 1 so shifts 2..7 are unreachable. Bail
// early so the arena slot stays SPRITE_NOT_COMPILED.
if (shift > 1u) {
return 0u;
}
cursor = 0;
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
spriteBytesPerRow = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW);
destBytesPerRow = (uint16_t)(spriteBytesPerRow + (shift == 1 ? 1 : 0));
// Prologue: movea.l 4(sp), a0
cursor += writeBE16(out + cursor, 0x206Fu);
cursor += writeBE16(out + cursor, 0x0004u);
for (row = 0; row < heightPx; row++) {
if (row > 0) {
// adda.w #SURFACE_BYTES_PER_ROW, a0
cursor += writeBE16(out + cursor, 0xD0FCu);
cursor += writeBE16(out + cursor, (uint16_t)SURFACE_BYTES_PER_ROW);
}
for (col = 0; col < destBytesPerRow; col++) {
shiftedByteAt(sp, row, col, shift, spriteBytesPerRow, &value, &opaqueMask);
if (opaqueMask == 0x00) {
continue;
}
if (opaqueMask == 0xFFu) {
// move.b #imm, d16(a0)
// Opcode 0x117C: bits 11-9 = dst reg (0=a0), bits 8-6 =
// dst mode (101 = an+d16), bits 5-3 = src mode (111),
// bits 2-0 = src reg (100 = immediate). Source
// extension (imm word, byte in low half) comes BEFORE
// dest extension (d16) in the instruction stream.
cursor += writeBE16(out + cursor, 0x117Cu);
cursor += writeBE16(out + cursor, (uint16_t)value);
cursor += writeBE16(out + cursor, col);
} else {
// move.b d16(a0), d0
cursor += writeBE16(out + cursor, 0x1028u);
cursor += writeBE16(out + cursor, col);
// andi.b #~opaqueMask, d0
cursor += writeBE16(out + cursor, 0x0200u);
cursor += writeBE16(out + cursor, (uint16_t)(~opaqueMask & 0xFFu));
// ori.b #value, d0
cursor += writeBE16(out + cursor, 0x0000u);
cursor += writeBE16(out + cursor, (uint16_t)value);
// move.b d0, d16(a0)
cursor += writeBE16(out + cursor, 0x1140u);
cursor += writeBE16(out + cursor, col);
}
}
}
// Epilogue: rts
cursor += writeBE16(out + cursor, 0x4E75u);
return cursor;
}
// RESTORE: copy backup -> screen. Destination has the screen stride.
uint16_t spriteEmitRestore68k(uint8_t *out, const SpriteT *sp, uint8_t shift) {
uint16_t cursor;
uint16_t heightPx;
uint16_t copyBytes;
if (shift > 1u) {
return 0u;
}
cursor = 0;
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
copyBytes = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW + (shift == 1u ? 1u : 0u));
// Prologue: movea.l 4(sp), a0 (src); movea.l 8(sp), a1 (dst).
cursor += writeBE16(out + cursor, 0x206Fu);
cursor += writeBE16(out + cursor, 0x0004u);
cursor += writeBE16(out + cursor, 0x226Fu);
cursor += writeBE16(out + cursor, 0x0008u);
cursor = emitCopyBody68k(out, cursor, heightPx, copyBytes, false);
cursor += writeBE16(out + cursor, 0x4E75u);
return cursor;
}
// SAVE: copy screen -> backup. Source has the screen stride.
uint16_t spriteEmitSave68k(uint8_t *out, const SpriteT *sp, uint8_t shift) {
uint16_t cursor;
uint16_t heightPx;
uint16_t copyBytes;
if (shift > 1u) {
return 0u;
}
cursor = 0;
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
copyBytes = (uint16_t)(sp->widthTiles * TILE_BYTES_PER_ROW + (shift == 1u ? 1u : 0u));
cursor += writeBE16(out + cursor, 0x206Fu);
cursor += writeBE16(out + cursor, 0x0004u);
cursor += writeBE16(out + cursor, 0x226Fu);
cursor += writeBE16(out + cursor, 0x0008u);
cursor = emitCopyBody68k(out, cursor, heightPx, copyBytes, true);
cursor += writeBE16(out + cursor, 0x4E75u);
return cursor;
}