505 lines
20 KiB
C
505 lines
20 KiB
C
// Planar 68k sprite codegen for Amiga (post-Phase 9, no chunky shadow).
|
|
//
|
|
// Emits PIC routines that write directly to the four bitplanes via 4
|
|
// address-register pointers (a0..a3 = plane[0..3] base + byteOff,
|
|
// where byteOff = y*40 + x/8 -- the dispatcher pre-computes this).
|
|
//
|
|
// Calling convention (cdecl on m68k-amigaos-gcc):
|
|
// draw(p0, p1, p2, p3):
|
|
// args at 4(sp), 8(sp), 12(sp), 16(sp) -- one ULONG per plane.
|
|
// loaded into a0..a3 by the prologue.
|
|
// save(p0, p1, p2, p3, backup):
|
|
// 5 args; backup at 20(sp), loaded into a4.
|
|
// restore(p0, p1, p2, p3, backup):
|
|
// same as save but reads backup, writes planes.
|
|
//
|
|
// Per-byte plane write encoding decisions:
|
|
// - all-transparent (mask=0): skip the byte entirely
|
|
// - all-opaque (mask=0xFF): move.b #imm, d16(an) (6 bytes)
|
|
// - mixed (0<mask<0xFF): move.b d16(an), d0;
|
|
// andi.b #~mask, d0;
|
|
// ori.b #imm, d0;
|
|
// move.b d0, d16(an) (4+6+6+4 = 20 bytes)
|
|
//
|
|
// Per row advance: 4 plane pointers each get adda.w #SURFACE_WIDTH/8
|
|
// = adda.w #40, an (4 bytes encoded each, 16 bytes total per row).
|
|
// We omit the advance after the last row.
|
|
//
|
|
// Shift handling: shifts 0..7 are pre-baked. The dispatcher selects
|
|
// the variant via x % 8 and pre-computes byteOff = y*40 + (x & ~7)/8
|
|
// (i.e. round x DOWN to 8-pixel boundary). The variant for shift s
|
|
// then emits to (widthTiles + 1) plane bytes per row when s != 0
|
|
// (the rightmost shift bits spill into one extra plane byte) and to
|
|
// widthTiles plane bytes per row when s == 0.
|
|
//
|
|
// The emitter assumes sprite width is a multiple of 8 (= a multiple
|
|
// of one tile = a multiple of 8 pixels) so plane bytes per row are
|
|
// integer. JoeyLib sprites are always tile-multiple by API contract.
|
|
|
|
#include "joey/sprite.h"
|
|
#include "joey/surface.h"
|
|
#include "spriteEmitter.h"
|
|
#include "spriteInternal.h"
|
|
|
|
|
|
// ----- Constants -----
|
|
|
|
#define TILE_PIXELS 8
|
|
#define TILE_BYTES 32
|
|
#define TILE_BYTES_PER_ROW 4
|
|
#define TRANSPARENT_NIBBLE 0
|
|
#define AMIGA_BITPLANES 4
|
|
#define AMIGA_BYTES_PER_ROW 40
|
|
|
|
|
|
// ----- Instruction encoding helpers -----
|
|
|
|
static uint16_t writeBE16(uint8_t *out, uint16_t value) {
|
|
out[0] = (uint8_t)((value >> 8) & 0xFFu);
|
|
out[1] = (uint8_t)(value & 0xFFu);
|
|
return 2u;
|
|
}
|
|
|
|
|
|
// movea.l <d16,SP>, an -- load arg at SP+disp into An.
|
|
// Encoding: 0010 nnn 001 010 111 + disp16
|
|
// = 0x2057 + (n << 9), where n is dst An.
|
|
// a0: 0x206F, a1: 0x226F, a2: 0x246F, a3: 0x266F, a4: 0x286F.
|
|
static const uint16_t kMoveaSpToAn[] = {
|
|
0x206Fu, 0x226Fu, 0x246Fu, 0x266Fu, 0x286Fu, 0x2A6Fu, 0x2C6Fu, 0x2E6Fu
|
|
};
|
|
|
|
|
|
// adda.w #imm, an -- adds 16-bit signed imm to An (sign-extended).
|
|
// Encoding: 1101 nnn 011 111 100 + imm
|
|
// = 0xD0FC + (n << 9).
|
|
static const uint16_t kAddaWImmToAn[] = {
|
|
0xD0FCu, 0xD2FCu, 0xD4FCu, 0xD6FCu, 0xD8FCu, 0xDAFCu, 0xDCFCu, 0xDEFCu
|
|
};
|
|
|
|
|
|
// ANDI.B #imm, D0 -- 4 bytes (opcode word + imm word, byte in low half).
|
|
// Opcode: 0000 0010 00 000 000 (size=byte, mode=Dn, reg=D0)
|
|
#define ANDI_B_IMM_D0 0x0200u
|
|
|
|
// ORI.B #imm, D0 -- 4 bytes (opcode word + imm word, byte in low half).
|
|
// Opcode: 0000 0000 00 000 000
|
|
#define ORI_B_IMM_D0 0x0000u
|
|
|
|
|
|
// MOVE.B d16(An), D0 -- 4 bytes (opcode + disp).
|
|
// Encoding: 0001 000 000 mode reg
|
|
// = size=01 (byte), dst reg=000 (D0), dst mode=000 (Dn),
|
|
// src mode=101 (d16,An), src reg=An.
|
|
// = 0001000 000 101 nnn = 0x1028 + An.
|
|
static const uint16_t kMoveBD16AnToD0[] = {
|
|
0x1028u, 0x1029u, 0x102Au, 0x102Bu
|
|
};
|
|
|
|
|
|
// MOVE.B D0, d16(An) -- 4 bytes (opcode + disp).
|
|
// Encoding: 0001 nnn 101 000 000 = 0x1140 + (An << 9).
|
|
static const uint16_t kMoveBD0ToD16An[] = {
|
|
0x1140u, 0x1340u, 0x1540u, 0x1740u
|
|
};
|
|
|
|
|
|
// MOVE.B #imm, d16(An) -- 6 bytes (opcode + imm + disp).
|
|
// Encoding: 0001 nnn 101 111 100 = 0x117C + (An << 9).
|
|
// (Was 0x113C earlier -- that's mode=100=predec; mode=101=d16(An)
|
|
// is the bit difference. Predec emits a 4-byte instruction with no
|
|
// disp word, so the byte stream went out of sync and every
|
|
// subsequent instruction decoded into garbage.)
|
|
static const uint16_t kMoveBImmToD16An[] = {
|
|
0x117Cu, 0x137Cu, 0x157Cu, 0x177Cu
|
|
};
|
|
|
|
|
|
// MOVE.B (a4)+, d16(An) -- 4 bytes (opcode + disp). -- used by save/restore (backup in a4)
|
|
// Encoding: 0001 nnn 101 011 100 = 0x115C + (An << 9).
|
|
static const uint16_t kMoveBA4PostincToD16An[] = {
|
|
0x115Cu, 0x135Cu, 0x155Cu, 0x175Cu
|
|
};
|
|
|
|
|
|
// MOVE.B d16(An), (a4)+ -- 4 bytes (opcode + disp). -- used by save (planes -> backup)
|
|
// Encoding: 1001 100 011 mode reg
|
|
// Wait, MOVE.B src,(a4)+ : dst mode = 011 (an+), dst reg = 100 (A4),
|
|
// so dst reg=100, dst mode=011 -> opcode high = 0001 100 011 ...
|
|
// = 0001100011 mode reg = 0x18C0..
|
|
// 0001 100 011 101 nnn = 0x18E8 + An.
|
|
static const uint16_t kMoveBD16AnToA4Postinc[] = {
|
|
0x18E8u, 0x18E9u, 0x18EAu, 0x18EBu
|
|
};
|
|
|
|
|
|
// MOVEM.L reglist, -(SP) -- 4 bytes (opcode + reglist mask).
|
|
// Opcode 0x48E7. Predec mask is REVERSED vs all other modes:
|
|
// bit 15 = D0, ..., bit 8 = D7, bit 7 = A0, bit 6 = A1, bit 5 = A2,
|
|
// bit 4 = A3, bit 3 = A4, bit 2 = A5, bit 1 = A6, bit 0 = A7.
|
|
#define MOVEM_L_PUSH_OPCODE 0x48E7u
|
|
#define MOVEM_L_MASK_A2_A3 0x0030u /* bits 5,4 = A2,A3 (predec order) */
|
|
#define MOVEM_L_MASK_A2_A3_A4 0x0038u /* bits 5,4,3 = A2,A3,A4 */
|
|
|
|
// MOVEM.L (SP)+, reglist -- 4 bytes (opcode + reglist mask).
|
|
// Opcode 0x4CDF. Postinc mask follows the standard layout:
|
|
// bit 0 = D0, ..., bit 7 = D7, bit 8 = A0, ..., bit 15 = A7.
|
|
#define MOVEM_L_POP_OPCODE 0x4CDFu
|
|
#define MOVEM_L_MASK_POP_A2_A3 0x0C00u /* bits 11,10 = A3,A2 */
|
|
#define MOVEM_L_MASK_POP_A2_A3_A4 0x1C00u /* bits 12,11,10 = A4,A3,A2 */
|
|
|
|
// RTS opcode.
|
|
#define OPCODE_RTS 0x4E75u
|
|
|
|
|
|
// ----- Emit helpers -----
|
|
|
|
// For shift 0 (byte-aligned x), the sprite's chunky tile data converts
|
|
// directly to plane bytes without any sub-byte shifting. For each
|
|
// (row, col-byte, plane) we extract the 8 plane bits from 4 chunky
|
|
// bytes (= 8 pixels) and produce one plane byte; we also produce a
|
|
// mask byte indicating which pixel positions are non-transparent
|
|
// (any plane bit != 0 in the source means non-transparent if
|
|
// transparent index is 0, the JoeyLib convention).
|
|
//
|
|
// Sprite layout: tileData = wTiles x hTiles tiles, each tile = 8 rows
|
|
// x 4 chunky bytes (32 bytes). Tiles laid out row-major within the
|
|
// sprite. For plane-byte column `c` of row `r`:
|
|
// tileX = c (since each plane byte covers exactly one tile column)
|
|
// tileY = r / 8
|
|
// inTileY = r % 8
|
|
// chunky bytes = tileData + (tileY*wTiles + tileX)*32 + inTileY*4 + 0..3
|
|
//
|
|
// `col` must be in [0, widthTiles); callers handle out-of-range cols
|
|
// (used when computing shifted variants that span widthTiles+1 output
|
|
// bytes per row) by passing a sentinel and checking against widthTiles
|
|
// before invoking this helper.
|
|
static void planeByteAndMaskAt(const SpriteT *sp, uint16_t row, uint16_t col,
|
|
uint8_t *planeBytes /*[4]*/, uint8_t *maskByte)
|
|
{
|
|
uint16_t tileX;
|
|
uint16_t tileY;
|
|
uint16_t inTileY;
|
|
const uint8_t *tile;
|
|
const uint8_t *chunky;
|
|
uint8_t nibbles[8];
|
|
uint8_t b0, b1, b2, b3;
|
|
uint16_t p;
|
|
uint8_t bitMask;
|
|
uint8_t pix;
|
|
|
|
tileX = col;
|
|
tileY = row >> 3;
|
|
inTileY = row & 7u;
|
|
|
|
tile = sp->tileData + (uint32_t)((tileY * sp->widthTiles + tileX) * 32u);
|
|
chunky = tile + inTileY * 4u;
|
|
|
|
nibbles[0] = (uint8_t)(chunky[0] >> 4);
|
|
nibbles[1] = (uint8_t)(chunky[0] & 0x0Fu);
|
|
nibbles[2] = (uint8_t)(chunky[1] >> 4);
|
|
nibbles[3] = (uint8_t)(chunky[1] & 0x0Fu);
|
|
nibbles[4] = (uint8_t)(chunky[2] >> 4);
|
|
nibbles[5] = (uint8_t)(chunky[2] & 0x0Fu);
|
|
nibbles[6] = (uint8_t)(chunky[3] >> 4);
|
|
nibbles[7] = (uint8_t)(chunky[3] & 0x0Fu);
|
|
|
|
b0 = 0u; b1 = 0u; b2 = 0u; b3 = 0u;
|
|
*maskByte = 0u;
|
|
for (p = 0; p < 8u; p++) {
|
|
pix = nibbles[p];
|
|
if (pix == TRANSPARENT_NIBBLE) {
|
|
continue;
|
|
}
|
|
bitMask = (uint8_t)(0x80u >> p);
|
|
*maskByte = (uint8_t)(*maskByte | bitMask);
|
|
if (pix & 1u) b0 = (uint8_t)(b0 | bitMask);
|
|
if (pix & 2u) b1 = (uint8_t)(b1 | bitMask);
|
|
if (pix & 4u) b2 = (uint8_t)(b2 | bitMask);
|
|
if (pix & 8u) b3 = (uint8_t)(b3 | bitMask);
|
|
}
|
|
planeBytes[0] = b0;
|
|
planeBytes[1] = b1;
|
|
planeBytes[2] = b2;
|
|
planeBytes[3] = b3;
|
|
}
|
|
|
|
|
|
// Shifted variant: produces 4 plane bytes and 1 mask byte for output
|
|
// column `outCol` (0..widthTiles inclusive) of row `row` when the
|
|
// sprite is shifted right by `shift` pixels (1..7). For shift 0,
|
|
// callers should use planeByteAndMaskAt directly (faster, no spill).
|
|
//
|
|
// Each output byte is composed of bits drawn from up to two source
|
|
// plane bytes:
|
|
// leftPart = src[outCol-1] << (8 - shift) (high (shift) bits)
|
|
// rightPart = src[outCol] >> shift (low (8-shift) bits)
|
|
// with src[-1] and src[widthTiles] treated as 0/transparent. The
|
|
// resulting plane byte is leftPart | rightPart; the mask byte is the
|
|
// shifted union of the per-byte source masks.
|
|
static void planeByteAndMaskShifted(const SpriteT *sp, uint16_t row, uint16_t outCol,
|
|
uint8_t shift, uint16_t widthTiles,
|
|
uint8_t *planeBytes /*[4]*/, uint8_t *maskByte)
|
|
{
|
|
uint8_t leftPlanes[AMIGA_BITPLANES];
|
|
uint8_t leftMask;
|
|
uint8_t rightPlanes[AMIGA_BITPLANES];
|
|
uint8_t rightMask;
|
|
uint8_t i;
|
|
|
|
leftMask = 0u;
|
|
rightMask = 0u;
|
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
|
leftPlanes[i] = 0u;
|
|
rightPlanes[i] = 0u;
|
|
}
|
|
|
|
if (outCol > 0u && (uint16_t)(outCol - 1u) < widthTiles) {
|
|
planeByteAndMaskAt(sp, row, (uint16_t)(outCol - 1u), leftPlanes, &leftMask);
|
|
}
|
|
if (outCol < widthTiles) {
|
|
planeByteAndMaskAt(sp, row, outCol, rightPlanes, &rightMask);
|
|
}
|
|
|
|
*maskByte = (uint8_t)(((leftMask << (8u - shift)) & 0xFFu) |
|
|
((rightMask >> shift) & 0xFFu));
|
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
|
planeBytes[i] = (uint8_t)(((leftPlanes[i] << (8u - shift)) & 0xFFu) |
|
|
((rightPlanes[i] >> shift) & 0xFFu));
|
|
}
|
|
}
|
|
|
|
|
|
// Emit code that merges one plane byte into d16(an) where d16 is the
|
|
// row-relative byte offset (0 since we re-base each row by adda.w).
|
|
// The choice of all-opaque vs mixed encoding cuts code size when many
|
|
// pixels are opaque (typical for sprite interiors).
|
|
static uint16_t emitMergeByteToD16An(uint8_t *out, uint16_t cursor,
|
|
uint8_t an, uint8_t disp,
|
|
uint8_t maskByte, uint8_t srcByte)
|
|
{
|
|
if (maskByte == 0u) {
|
|
return cursor; /* nothing to write */
|
|
}
|
|
if (maskByte == 0xFFu) {
|
|
/* All-opaque shortcut: move.b #src, d16(an). */
|
|
cursor += writeBE16(out + cursor, kMoveBImmToD16An[an]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)srcByte);
|
|
cursor += writeBE16(out + cursor, (uint16_t)disp);
|
|
return cursor;
|
|
}
|
|
/* Mixed: load existing, clear mask bits, OR in src, write back. */
|
|
cursor += writeBE16(out + cursor, kMoveBD16AnToD0[an]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)disp);
|
|
cursor += writeBE16(out + cursor, ANDI_B_IMM_D0);
|
|
cursor += writeBE16(out + cursor, (uint16_t)((~maskByte) & 0xFFu));
|
|
cursor += writeBE16(out + cursor, ORI_B_IMM_D0);
|
|
cursor += writeBE16(out + cursor, (uint16_t)srcByte);
|
|
cursor += writeBE16(out + cursor, kMoveBD0ToD16An[an]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)disp);
|
|
return cursor;
|
|
}
|
|
|
|
|
|
// ----- Public API -----
|
|
|
|
uint16_t spriteEmitDrawPlanar68k(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
|
uint16_t cursor;
|
|
uint16_t row;
|
|
uint16_t col;
|
|
uint16_t heightPx;
|
|
uint16_t widthTiles;
|
|
uint16_t bytesPerRow; /* per plane, per row */
|
|
uint8_t planeBytes[AMIGA_BITPLANES];
|
|
uint8_t maskByte;
|
|
uint8_t i;
|
|
|
|
if (shift > 7u) {
|
|
return 0u;
|
|
}
|
|
|
|
cursor = 0;
|
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
|
widthTiles = (uint16_t)sp->widthTiles;
|
|
bytesPerRow = (uint16_t)(widthTiles + (shift == 0u ? 0u : 1u));
|
|
|
|
/* Prologue: m68k cdecl callee-saves a2-a6; we clobber a2 and a3
|
|
* loading plane pointers, so push them first. After the push, all
|
|
* stack arg displacements shift by +8 (two longs). */
|
|
cursor += writeBE16(out + cursor, MOVEM_L_PUSH_OPCODE);
|
|
cursor += writeBE16(out + cursor, MOVEM_L_MASK_A2_A3);
|
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
|
cursor += writeBE16(out + cursor, kMoveaSpToAn[i]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)(8u + 4u + i * 4u));
|
|
}
|
|
|
|
for (row = 0; row < heightPx; row++) {
|
|
for (col = 0; col < bytesPerRow; col++) {
|
|
if (shift == 0u) {
|
|
planeByteAndMaskAt(sp, row, col, planeBytes, &maskByte);
|
|
} else {
|
|
planeByteAndMaskShifted(sp, row, col, shift, widthTiles, planeBytes, &maskByte);
|
|
}
|
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
|
cursor = emitMergeByteToD16An(out, cursor, i, (uint8_t)col,
|
|
maskByte, planeBytes[i]);
|
|
}
|
|
}
|
|
if (row + 1u < heightPx) {
|
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
|
cursor += writeBE16(out + cursor, kAddaWImmToAn[i]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)AMIGA_BYTES_PER_ROW);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Epilogue: restore a2-a3, rts. */
|
|
cursor += writeBE16(out + cursor, MOVEM_L_POP_OPCODE);
|
|
cursor += writeBE16(out + cursor, MOVEM_L_MASK_POP_A2_A3);
|
|
cursor += writeBE16(out + cursor, OPCODE_RTS);
|
|
return cursor;
|
|
}
|
|
|
|
|
|
// SAVE: planes -> backup. backup is one contiguous 4*H*W/8 byte buffer
|
|
// laid out as 4 plane stripes, matching halSpriteSavePlanes format
|
|
// (so cross-platform save buffer is interchangeable).
|
|
//
|
|
// Per row: for each plane, copy bytesPerRow bytes from d16(an) to
|
|
// (a4)+. After the row's reads, the planes need to advance by 40,
|
|
// while a4 advances naturally via post-increment.
|
|
//
|
|
// Plane stripes are sequential in backup. We could either (a) do all
|
|
// rows of plane 0, then plane 1, etc. (matches halSpriteSavePlanes
|
|
// layout), or (b) interleave rows of all 4 planes (different layout).
|
|
// halSpriteSavePlanes does (a) -- 4 separate plane stripes. The
|
|
// emitted code below matches that layout for compat.
|
|
uint16_t spriteEmitSavePlanar68k(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
|
uint16_t cursor;
|
|
uint16_t row;
|
|
uint16_t col;
|
|
uint16_t heightPx;
|
|
uint16_t bytesPerRow;
|
|
uint8_t i;
|
|
|
|
/* Shifts 2..7 reuse shift 1's bytes (identical memcpy). The
|
|
* spriteCompile post-emit pass aliases their routineOffsets to
|
|
* slot 1 so this routine is emitted once. */
|
|
if (shift > 1u) {
|
|
return 0u;
|
|
}
|
|
|
|
cursor = 0;
|
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
|
bytesPerRow = (uint16_t)(sp->widthTiles + (shift == 0u ? 0u : 1u));
|
|
|
|
/* Prologue: callee-save a2/a3/a4 (m68k cdecl), then load 4 plane
|
|
* pointers + backup pointer. After the push, all stack arg disps
|
|
* shift by +12 (three longs). */
|
|
cursor += writeBE16(out + cursor, MOVEM_L_PUSH_OPCODE);
|
|
cursor += writeBE16(out + cursor, MOVEM_L_MASK_A2_A3_A4);
|
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
|
cursor += writeBE16(out + cursor, kMoveaSpToAn[i]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)(12u + 4u + i * 4u));
|
|
}
|
|
/* a4 = backup. */
|
|
cursor += writeBE16(out + cursor, kMoveaSpToAn[4]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)(12u + 4u + 4u * 4u));
|
|
|
|
/* Plane-major: for each plane, walk all rows. After this routine,
|
|
* each An has advanced by H*40 (one frame full); we don't need to
|
|
* unwind because the function returns. We DO need to reset An
|
|
* back to start before walking the NEXT plane though.
|
|
*
|
|
* Simpler alternative: row-major (interleaved). Per row, copy
|
|
* bytesPerRow bytes from each plane to (a4)+, then advance all
|
|
* 4 planes by 40. Net: a4 advances by 4*H*bytesPerRow; planes
|
|
* advance by H*40. Backup layout becomes interleaved (plane0_row0,
|
|
* plane1_row0, plane2_row0, plane3_row0, plane0_row1, ...).
|
|
*
|
|
* That doesn't match halSpriteSavePlanes' plane-major layout. Need
|
|
* to either (a) match it -- emit per-plane outer loop with a4
|
|
* stride between planes -- or (b) change halSpriteSavePlanes to
|
|
* interleaved. Picking (b) is simpler in emitted code, but ALSO
|
|
* requires updating halSpriteRestorePlanes and halSpriteRestoreUnder
|
|
* fallback math.
|
|
*
|
|
* For now: use plane-major matching halSpriteSavePlanes. Per
|
|
* plane: walk rows, copy bytes from d16(an) to (a4)+, advance an
|
|
* by 40 after each row except the last; reset an back to start
|
|
* before next plane. */
|
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
|
for (row = 0; row < heightPx; row++) {
|
|
for (col = 0; col < bytesPerRow; col++) {
|
|
cursor += writeBE16(out + cursor, kMoveBD16AnToA4Postinc[i]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)col);
|
|
}
|
|
if (row + 1u < heightPx) {
|
|
cursor += writeBE16(out + cursor, kAddaWImmToAn[i]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)AMIGA_BYTES_PER_ROW);
|
|
}
|
|
}
|
|
/* Reset An back to the plane base for next iteration. The
|
|
* total advance was (heightPx - 1) * 40. Subtract that. */
|
|
if (i + 1u < AMIGA_BITPLANES) {
|
|
cursor += writeBE16(out + cursor, kAddaWImmToAn[i]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)(0u - ((heightPx - 1u) * AMIGA_BYTES_PER_ROW)));
|
|
}
|
|
}
|
|
|
|
cursor += writeBE16(out + cursor, MOVEM_L_POP_OPCODE);
|
|
cursor += writeBE16(out + cursor, MOVEM_L_MASK_POP_A2_A3_A4);
|
|
cursor += writeBE16(out + cursor, OPCODE_RTS);
|
|
return cursor;
|
|
}
|
|
|
|
|
|
// RESTORE: backup -> planes. Mirror of save. Uses MOVE.B (a4)+, d16(an).
|
|
uint16_t spriteEmitRestorePlanar68k(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
|
uint16_t cursor;
|
|
uint16_t row;
|
|
uint16_t col;
|
|
uint16_t heightPx;
|
|
uint16_t bytesPerRow;
|
|
uint8_t i;
|
|
|
|
if (shift > 1u) {
|
|
return 0u;
|
|
}
|
|
|
|
cursor = 0;
|
|
heightPx = (uint16_t)(sp->heightTiles * TILE_PIXELS);
|
|
bytesPerRow = (uint16_t)(sp->widthTiles + (shift == 0u ? 0u : 1u));
|
|
|
|
/* Callee-save a2/a3/a4; arg disps shift by +12. */
|
|
cursor += writeBE16(out + cursor, MOVEM_L_PUSH_OPCODE);
|
|
cursor += writeBE16(out + cursor, MOVEM_L_MASK_A2_A3_A4);
|
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
|
cursor += writeBE16(out + cursor, kMoveaSpToAn[i]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)(12u + 4u + i * 4u));
|
|
}
|
|
cursor += writeBE16(out + cursor, kMoveaSpToAn[4]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)(12u + 4u + 4u * 4u));
|
|
|
|
for (i = 0; i < AMIGA_BITPLANES; i++) {
|
|
for (row = 0; row < heightPx; row++) {
|
|
for (col = 0; col < bytesPerRow; col++) {
|
|
cursor += writeBE16(out + cursor, kMoveBA4PostincToD16An[i]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)col);
|
|
}
|
|
if (row + 1u < heightPx) {
|
|
cursor += writeBE16(out + cursor, kAddaWImmToAn[i]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)AMIGA_BYTES_PER_ROW);
|
|
}
|
|
}
|
|
if (i + 1u < AMIGA_BITPLANES) {
|
|
cursor += writeBE16(out + cursor, kAddaWImmToAn[i]);
|
|
cursor += writeBE16(out + cursor, (uint16_t)(0u - ((heightPx - 1u) * AMIGA_BYTES_PER_ROW)));
|
|
}
|
|
}
|
|
|
|
cursor += writeBE16(out + cursor, MOVEM_L_POP_OPCODE);
|
|
cursor += writeBE16(out + cursor, MOVEM_L_MASK_POP_A2_A3_A4);
|
|
cursor += writeBE16(out + cursor, OPCODE_RTS);
|
|
return cursor;
|
|
}
|