joeylib2/src/codegen/spriteCompile.c

200 lines
6.8 KiB
C

// Cross-platform sprite codegen runtime: spriteCompile uses the
// per-CPU emit function selected at compile time, allocates a slot
// in the codegen arena, copies the emitted bytes in, and populates
// sp->slot + sp->routineOffsets. spriteCompiledDraw casts the slot
// address to a function pointer and calls it through cdecl.
//
// Each per-CPU emitter (src/codegen/spriteEmit{X86,68k,Iigs}.c)
// just produces bytes; this file is the only consumer of the
// codegen arena from the sprite side.
#include <stdlib.h>
#include <string.h>
#include "joey/sprite.h"
#include "joey/surface.h"
#include "codegenArenaInternal.h"
#include "spriteEmitter.h"
#include "spriteInternal.h"
#include "surfaceInternal.h"
// Largest scratch buffer needed for any single emit call. 16 KB
// covers a 32x32 sprite even on 68k (the biggest mixed-RMW byte-
// emit at 16 bytes/byte * (16*17 dest bytes per shift) ~= 4.5 KB,
// times shift count 2). Round up generously.
#define SPRITE_EMIT_SCRATCH_BYTES (16u * 1024u)
// Compile-time selection of the per-CPU emitter. One src/codegen/
// spriteEmit*.c file is built per platform, but the dispatch lives
// in this file so spriteCompile + spriteCompiledDraw aren't
// duplicated three times.
static uint16_t emitDrawForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) {
#if defined(JOEYLIB_PLATFORM_DOS)
return spriteEmitDrawX86(out, sp, shift);
#elif defined(JOEYLIB_PLATFORM_AMIGA) || defined(JOEYLIB_PLATFORM_ATARIST)
return spriteEmitDraw68k(out, sp, shift);
#elif defined(JOEYLIB_PLATFORM_IIGS)
return spriteEmitDrawIigs(out, sp, shift);
#else
# error "spriteCompile: no emitter selected for this platform"
#endif
}
bool spriteCompile(SpriteT *sp) {
uint8_t *scratch;
uint32_t totalSize;
uint8_t shift;
ArenaSlotT *slot;
uint8_t *dst;
uint16_t written;
uint16_t offset;
if (sp == NULL) {
return false;
}
if (sp->slot != NULL) {
return true;
}
if (sp->tileData == NULL) {
return false;
}
scratch = (uint8_t *)malloc(SPRITE_EMIT_SCRATCH_BYTES);
if (scratch == NULL) {
return false;
}
totalSize = 0;
for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) {
written = emitDrawForTarget(scratch, sp, shift);
totalSize += written;
}
if (totalSize > 0xFFFFu) {
free(scratch);
return false;
}
slot = codegenArenaAlloc(totalSize);
if (slot == NULL) {
free(scratch);
return false;
}
dst = codegenArenaBase() + slot->offset;
offset = 0;
for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) {
written = emitDrawForTarget(dst + offset, sp, shift);
sp->routineOffsets[shift][SPRITE_OP_DRAW] = offset;
sp->routineOffsets[shift][SPRITE_OP_SAVE] = 0;
sp->routineOffsets[shift][SPRITE_OP_RESTORE] = 0;
offset = (uint16_t)(offset + written);
}
sp->slot = slot;
free(scratch);
return true;
}
#if defined(JOEYLIB_PLATFORM_IIGS)
// IIgs uses inline asm + a self-modifying call stub instead of a C
// function-pointer cast. The build uses ORCA-C large memory model
// (-b for sprite demos) so pointers are 24-bit and JSL works
// cross-bank.
//
// `sta abs,Y` on 65816 uses the data bank register (DBR) for the
// high byte of the effective address, so we need DBR = dst's bank
// during the body. malloc under -b can return memory in any bank,
// so we don't trust DBR to already match -- the stub explicitly
// sets DBR from the dst pointer's bank byte and restores it before
// returning to C.
//
// Stub layout (14 bytes):
// 00: 8B PHB ; save caller DBR
// 01: A9 bk LDA #destBank ; A = dst bank (8-bit M)
// 03: 48 PHA
// 04: AB PLB ; DBR = dst bank
// 05: A0 lo hi LDY #destOffset ; Y = low 16 of dst (X=16)
// 08: 22 lo mid bk JSL routine
// 0C: AB PLB ; restore caller DBR
// 0D: 6B RTL
//
// Patched per call: byte 2 (destBank), bytes 6-7 (destOffset16),
// bytes 9-11 (target 24-bit). The compiled routine assumes
// M=8 / X=16 / Y=destOffset on entry; the stub arranges that.
static unsigned char gSpriteCallStub[14];
void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) {
uint8_t shift;
uint32_t destAddr;
uint16_t destOffset;
uint8_t destBank;
uint32_t fnAddr;
{
uint8_t *destPtr;
uint8_t destBytes[4];
shift = (uint8_t)(x & 1);
destPtr = &dst->pixels[(uint16_t)y * SURFACE_BYTES_PER_ROW + ((uint16_t)x >> 1)];
memcpy(destBytes, &destPtr, 4);
destAddr = (uint32_t)destBytes[0]
| ((uint32_t)destBytes[1] << 8)
| ((uint32_t)destBytes[2] << 16);
destOffset = (uint16_t)(destAddr & 0xFFFFu);
destBank = (uint8_t)((destAddr >> 16) & 0xFFu);
fnAddr = codegenArenaBaseAddr()
+ sp->slot->offset
+ (uint32_t)sp->routineOffsets[shift][SPRITE_OP_DRAW];
}
(void)destAddr;
gSpriteCallStub[ 0] = 0x8B;
gSpriteCallStub[ 1] = 0xA9;
gSpriteCallStub[ 2] = destBank;
gSpriteCallStub[ 3] = 0x48;
gSpriteCallStub[ 4] = 0xAB;
gSpriteCallStub[ 5] = 0xA0;
gSpriteCallStub[ 6] = (unsigned char)(destOffset & 0xFFu);
gSpriteCallStub[ 7] = (unsigned char)((destOffset >> 8) & 0xFFu);
gSpriteCallStub[ 8] = 0x22;
gSpriteCallStub[ 9] = (unsigned char)(fnAddr & 0xFFu);
gSpriteCallStub[10] = (unsigned char)((fnAddr >> 8) & 0xFFu);
gSpriteCallStub[11] = (unsigned char)((fnAddr >> 16) & 0xFFu);
gSpriteCallStub[12] = 0xAB;
gSpriteCallStub[13] = 0x6B;
// ORCA-C compiles this function under `longa on` (M=16) and emits
// the function epilogue assuming M=16 at exit -- the deallocation
// ADC takes a 2-byte immediate. The byte writes to gSpriteCallStub
// above leave M=8, so PHP captured M=8 and PLP would restore M=8.
// That mode mismatch caused the epilogue's `ADC #imm; TCS` bytes
// to be re-decoded as a wider ADC swallowing the TCS, S never
// adjusted, RTL popped the wrong bytes, control fell into BSS and
// BRK'd. Use REP/SEP without PHP/PLP and explicitly restore M=16
// before returning to compiled C.
asm {
rep #0x30
sep #0x20
jsl gSpriteCallStub
rep #0x20
}
}
#else
void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) {
typedef void (*DrawFn)(uint8_t *destRow);
uint8_t shift;
uint8_t *destRow;
DrawFn fn;
shift = (uint8_t)(x & 1);
destRow = &dst->pixels[(uint16_t)y * SURFACE_BYTES_PER_ROW + ((uint16_t)x >> 1)];
fn = (DrawFn)(codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_DRAW]);
fn(destRow);
}
#endif