200 lines
6.8 KiB
C
200 lines
6.8 KiB
C
// Cross-platform sprite codegen runtime: spriteCompile uses the
|
|
// per-CPU emit function selected at compile time, allocates a slot
|
|
// in the codegen arena, copies the emitted bytes in, and populates
|
|
// sp->slot + sp->routineOffsets. spriteCompiledDraw casts the slot
|
|
// address to a function pointer and calls it through cdecl.
|
|
//
|
|
// Each per-CPU emitter (src/codegen/spriteEmit{X86,68k,Iigs}.c)
|
|
// just produces bytes; this file is the only consumer of the
|
|
// codegen arena from the sprite side.
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "joey/sprite.h"
|
|
#include "joey/surface.h"
|
|
#include "codegenArenaInternal.h"
|
|
#include "spriteEmitter.h"
|
|
#include "spriteInternal.h"
|
|
#include "surfaceInternal.h"
|
|
|
|
// Largest scratch buffer needed for any single emit call. 16 KB
|
|
// covers a 32x32 sprite even on 68k (the biggest mixed-RMW byte-
|
|
// emit at 16 bytes/byte * (16*17 dest bytes per shift) ~= 4.5 KB,
|
|
// times shift count 2). Round up generously.
|
|
#define SPRITE_EMIT_SCRATCH_BYTES (16u * 1024u)
|
|
|
|
|
|
// Compile-time selection of the per-CPU emitter. One src/codegen/
|
|
// spriteEmit*.c file is built per platform, but the dispatch lives
|
|
// in this file so spriteCompile + spriteCompiledDraw aren't
|
|
// duplicated three times.
|
|
static uint16_t emitDrawForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
|
#if defined(JOEYLIB_PLATFORM_DOS)
|
|
return spriteEmitDrawX86(out, sp, shift);
|
|
#elif defined(JOEYLIB_PLATFORM_AMIGA) || defined(JOEYLIB_PLATFORM_ATARIST)
|
|
return spriteEmitDraw68k(out, sp, shift);
|
|
#elif defined(JOEYLIB_PLATFORM_IIGS)
|
|
return spriteEmitDrawIigs(out, sp, shift);
|
|
#else
|
|
# error "spriteCompile: no emitter selected for this platform"
|
|
#endif
|
|
}
|
|
|
|
|
|
bool spriteCompile(SpriteT *sp) {
|
|
uint8_t *scratch;
|
|
uint32_t totalSize;
|
|
uint8_t shift;
|
|
ArenaSlotT *slot;
|
|
uint8_t *dst;
|
|
uint16_t written;
|
|
uint16_t offset;
|
|
|
|
if (sp == NULL) {
|
|
return false;
|
|
}
|
|
if (sp->slot != NULL) {
|
|
return true;
|
|
}
|
|
if (sp->tileData == NULL) {
|
|
return false;
|
|
}
|
|
|
|
scratch = (uint8_t *)malloc(SPRITE_EMIT_SCRATCH_BYTES);
|
|
if (scratch == NULL) {
|
|
return false;
|
|
}
|
|
|
|
totalSize = 0;
|
|
for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) {
|
|
written = emitDrawForTarget(scratch, sp, shift);
|
|
totalSize += written;
|
|
}
|
|
|
|
if (totalSize > 0xFFFFu) {
|
|
free(scratch);
|
|
return false;
|
|
}
|
|
|
|
slot = codegenArenaAlloc(totalSize);
|
|
if (slot == NULL) {
|
|
free(scratch);
|
|
return false;
|
|
}
|
|
|
|
dst = codegenArenaBase() + slot->offset;
|
|
offset = 0;
|
|
for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) {
|
|
written = emitDrawForTarget(dst + offset, sp, shift);
|
|
sp->routineOffsets[shift][SPRITE_OP_DRAW] = offset;
|
|
sp->routineOffsets[shift][SPRITE_OP_SAVE] = 0;
|
|
sp->routineOffsets[shift][SPRITE_OP_RESTORE] = 0;
|
|
offset = (uint16_t)(offset + written);
|
|
}
|
|
sp->slot = slot;
|
|
free(scratch);
|
|
return true;
|
|
}
|
|
|
|
|
|
#if defined(JOEYLIB_PLATFORM_IIGS)
|
|
|
|
// IIgs uses inline asm + a self-modifying call stub instead of a C
|
|
// function-pointer cast. The build uses ORCA-C large memory model
|
|
// (-b for sprite demos) so pointers are 24-bit and JSL works
|
|
// cross-bank.
|
|
//
|
|
// `sta abs,Y` on 65816 uses the data bank register (DBR) for the
|
|
// high byte of the effective address, so we need DBR = dst's bank
|
|
// during the body. malloc under -b can return memory in any bank,
|
|
// so we don't trust DBR to already match -- the stub explicitly
|
|
// sets DBR from the dst pointer's bank byte and restores it before
|
|
// returning to C.
|
|
//
|
|
// Stub layout (14 bytes):
|
|
// 00: 8B PHB ; save caller DBR
|
|
// 01: A9 bk LDA #destBank ; A = dst bank (8-bit M)
|
|
// 03: 48 PHA
|
|
// 04: AB PLB ; DBR = dst bank
|
|
// 05: A0 lo hi LDY #destOffset ; Y = low 16 of dst (X=16)
|
|
// 08: 22 lo mid bk JSL routine
|
|
// 0C: AB PLB ; restore caller DBR
|
|
// 0D: 6B RTL
|
|
//
|
|
// Patched per call: byte 2 (destBank), bytes 6-7 (destOffset16),
|
|
// bytes 9-11 (target 24-bit). The compiled routine assumes
|
|
// M=8 / X=16 / Y=destOffset on entry; the stub arranges that.
|
|
static unsigned char gSpriteCallStub[14];
|
|
|
|
void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) {
|
|
uint8_t shift;
|
|
uint32_t destAddr;
|
|
uint16_t destOffset;
|
|
uint8_t destBank;
|
|
uint32_t fnAddr;
|
|
|
|
{
|
|
uint8_t *destPtr;
|
|
uint8_t destBytes[4];
|
|
shift = (uint8_t)(x & 1);
|
|
destPtr = &dst->pixels[(uint16_t)y * SURFACE_BYTES_PER_ROW + ((uint16_t)x >> 1)];
|
|
memcpy(destBytes, &destPtr, 4);
|
|
destAddr = (uint32_t)destBytes[0]
|
|
| ((uint32_t)destBytes[1] << 8)
|
|
| ((uint32_t)destBytes[2] << 16);
|
|
destOffset = (uint16_t)(destAddr & 0xFFFFu);
|
|
destBank = (uint8_t)((destAddr >> 16) & 0xFFu);
|
|
fnAddr = codegenArenaBaseAddr()
|
|
+ sp->slot->offset
|
|
+ (uint32_t)sp->routineOffsets[shift][SPRITE_OP_DRAW];
|
|
}
|
|
(void)destAddr;
|
|
|
|
gSpriteCallStub[ 0] = 0x8B;
|
|
gSpriteCallStub[ 1] = 0xA9;
|
|
gSpriteCallStub[ 2] = destBank;
|
|
gSpriteCallStub[ 3] = 0x48;
|
|
gSpriteCallStub[ 4] = 0xAB;
|
|
gSpriteCallStub[ 5] = 0xA0;
|
|
gSpriteCallStub[ 6] = (unsigned char)(destOffset & 0xFFu);
|
|
gSpriteCallStub[ 7] = (unsigned char)((destOffset >> 8) & 0xFFu);
|
|
gSpriteCallStub[ 8] = 0x22;
|
|
gSpriteCallStub[ 9] = (unsigned char)(fnAddr & 0xFFu);
|
|
gSpriteCallStub[10] = (unsigned char)((fnAddr >> 8) & 0xFFu);
|
|
gSpriteCallStub[11] = (unsigned char)((fnAddr >> 16) & 0xFFu);
|
|
gSpriteCallStub[12] = 0xAB;
|
|
gSpriteCallStub[13] = 0x6B;
|
|
|
|
// ORCA-C compiles this function under `longa on` (M=16) and emits
|
|
// the function epilogue assuming M=16 at exit -- the deallocation
|
|
// ADC takes a 2-byte immediate. The byte writes to gSpriteCallStub
|
|
// above leave M=8, so PHP captured M=8 and PLP would restore M=8.
|
|
// That mode mismatch caused the epilogue's `ADC #imm; TCS` bytes
|
|
// to be re-decoded as a wider ADC swallowing the TCS, S never
|
|
// adjusted, RTL popped the wrong bytes, control fell into BSS and
|
|
// BRK'd. Use REP/SEP without PHP/PLP and explicitly restore M=16
|
|
// before returning to compiled C.
|
|
asm {
|
|
rep #0x30
|
|
sep #0x20
|
|
jsl gSpriteCallStub
|
|
rep #0x20
|
|
}
|
|
}
|
|
|
|
#else
|
|
|
|
void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) {
|
|
typedef void (*DrawFn)(uint8_t *destRow);
|
|
uint8_t shift;
|
|
uint8_t *destRow;
|
|
DrawFn fn;
|
|
|
|
shift = (uint8_t)(x & 1);
|
|
destRow = &dst->pixels[(uint16_t)y * SURFACE_BYTES_PER_ROW + ((uint16_t)x >> 1)];
|
|
fn = (DrawFn)(codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_DRAW]);
|
|
fn(destRow);
|
|
}
|
|
|
|
#endif
|