// Cross-platform sprite codegen runtime: spriteCompile uses the // per-CPU emit function selected at compile time, allocates a slot // in the codegen arena, copies the emitted bytes in, and populates // sp->slot + sp->routineOffsets. spriteCompiledDraw casts the slot // address to a function pointer and calls it through cdecl. // // Each per-CPU emitter (src/codegen/spriteEmit{X86,68k,Iigs}.c) // just produces bytes; this file is the only consumer of the // codegen arena from the sprite side. #include #include #include "joey/sprite.h" #include "joey/surface.h" #include "codegenArenaInternal.h" #include "hal.h" #include "spriteEmitter.h" #include "spriteInternal.h" #include "surfaceInternal.h" // Largest scratch buffer needed for any single emit call. 16 KB // covers a 32x32 sprite even on 68k (the biggest mixed-RMW byte- // emit at 16 bytes/byte * (16*17 dest bytes per shift) ~= 4.5 KB, // times shift count 2). Round up generously. #define SPRITE_EMIT_SCRATCH_BYTES (16u * 1024u) // Compile-time selection of the per-CPU emitter. One src/codegen/ // spriteEmit*.c file is built per platform, but the dispatch lives // in this file so spriteCompile + spriteCompiledDraw aren't // duplicated three times. static uint16_t emitDrawForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) { #if defined(JOEYLIB_PLATFORM_DOS) return spriteEmitDrawX86(out, sp, shift); #elif defined(JOEYLIB_PLATFORM_AMIGA) return spriteEmitDrawPlanar68k(out, sp, shift); #elif defined(JOEYLIB_PLATFORM_ATARIST) return spriteEmitDraw68k(out, sp, shift); #elif defined(JOEYLIB_PLATFORM_IIGS) return spriteEmitDrawIigs(out, sp, shift); #else # error "spriteCompile: no emitter selected for this platform" #endif } // Save-under and restore-under emit dispatch. Each per-CPU pair // produces row-by-row copy bytes; the runtime dispatch in // src/core/sprite.c gates on routineOffsets[shift][SPRITE_OP_SAVE] // != SPRITE_NOT_COMPILED and falls back to the interpreted memcpy // path otherwise. static uint16_t emitSaveForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) { #if defined(JOEYLIB_PLATFORM_DOS) return spriteEmitSaveX86(out, sp, shift); #elif defined(JOEYLIB_PLATFORM_AMIGA) return spriteEmitSavePlanar68k(out, sp, shift); #elif defined(JOEYLIB_PLATFORM_ATARIST) return spriteEmitSave68k(out, sp, shift); #elif defined(JOEYLIB_PLATFORM_IIGS) return spriteEmitSaveIigs(out, sp, shift); #else (void)out; (void)sp; (void)shift; return 0; #endif } static uint16_t emitRestoreForTarget(uint8_t *out, const SpriteT *sp, uint8_t shift) { #if defined(JOEYLIB_PLATFORM_DOS) return spriteEmitRestoreX86(out, sp, shift); #elif defined(JOEYLIB_PLATFORM_AMIGA) return spriteEmitRestorePlanar68k(out, sp, shift); #elif defined(JOEYLIB_PLATFORM_ATARIST) return spriteEmitRestore68k(out, sp, shift); #elif defined(JOEYLIB_PLATFORM_IIGS) return spriteEmitRestoreIigs(out, sp, shift); #else (void)out; (void)sp; (void)shift; return 0; #endif } // Sizing pass: returns total bytes the emitters will produce for // this sprite's DRAW (per shift) + SAVE (per shift) + RESTORE (per // shift). Emitters that aren't implemented for the current platform // return 0 here, so totalSize tracks only the ops that will actually // land in the arena. static uint32_t emitTotalSize(uint8_t *scratch, const SpriteT *sp) { uint32_t total; uint8_t shift; total = 0; for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { total += emitDrawForTarget(scratch, sp, shift); total += emitSaveForTarget(scratch, sp, shift); total += emitRestoreForTarget(scratch, sp, shift); } return total; } bool spriteCompile(SpriteT *sp) { uint8_t *scratch; uint32_t totalSize; uint8_t shift; uint8_t op; ArenaSlotT *slot; uint8_t *dst; uint16_t written; uint16_t offset; if (sp == NULL) { return false; } if (sp->slot != NULL) { return true; } if (sp->tileData == NULL) { return false; } /* Amiga (post-Phase 9) uses spriteEmitPlanar68k.c which writes * directly to bitplanes. DRAW emits a unique pre-shifted variant * per shift in 0..7 (smooth horizontal motion at any pixel x); * SAVE/RESTORE emit only shift 0 and shift 1 since shifted variants * 1..7 share identical bytes (plain memcpy of widthTiles+1 plane * bytes per row). The post-emit pass below aliases slots 2..7 * for save/restore to slot 1's bytes. */ scratch = (uint8_t *)malloc(SPRITE_EMIT_SCRATCH_BYTES); if (scratch == NULL) { return false; } totalSize = emitTotalSize(scratch, sp); if (totalSize > 0xFFFFu) { free(scratch); return false; } slot = codegenArenaAlloc(totalSize); if (slot == NULL) { free(scratch); return false; } dst = codegenArenaBase() + slot->offset; offset = 0; for (shift = 0; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { for (op = 0; op < SPRITE_OP_COUNT; op++) { switch (op) { case SPRITE_OP_DRAW: written = emitDrawForTarget (dst + offset, sp, shift); break; case SPRITE_OP_SAVE: written = emitSaveForTarget (dst + offset, sp, shift); break; case SPRITE_OP_RESTORE: written = emitRestoreForTarget(dst + offset, sp, shift); break; default: written = 0; break; } if (written == 0) { sp->routineOffsets[shift][op] = SPRITE_NOT_COMPILED; } else { sp->routineOffsets[shift][op] = offset; offset = (uint16_t)(offset + written); } } } #if defined(JOEYLIB_PLATFORM_AMIGA) /* Save/restore bytes for any non-zero shift are identical (plain * memcpy of widthTiles+1 plane bytes per row). The emitter emits * them once at slot 1; alias slots 2..7 here so the dispatcher * gate (sprite.c) sees them as compiled. */ for (shift = 2; shift < JOEY_SPRITE_SHIFT_COUNT; shift++) { sp->routineOffsets[shift][SPRITE_OP_SAVE] = sp->routineOffsets[1][SPRITE_OP_SAVE]; sp->routineOffsets[shift][SPRITE_OP_RESTORE] = sp->routineOffsets[1][SPRITE_OP_RESTORE]; } #endif sp->slot = slot; free(scratch); return true; } #if defined(JOEYLIB_PLATFORM_IIGS) // SURFACE_ROW_OFFSET dispatches to the gRowOffsetLut lookup on IIgs; // declared in surfaceInternal.h. Replaces ORCA-C's __mul16 JSL with a // single indexed long-mode read. // IIgs uses inline asm + a self-modifying call stub instead of a C // function-pointer cast. The build uses ORCA-C large memory model // (-b for sprite demos) so pointers are 24-bit and JSL works // cross-bank. // // `sta abs,Y` on 65816 uses the data bank register (DBR) for the // high byte of the effective address, so we need DBR = dst's bank // during the body. malloc under -b can return memory in any bank, // so we don't trust DBR to already match -- the stub explicitly // sets DBR from the dst pointer's bank byte and restores it before // returning to C. // // Stub layout (14 bytes): // 00: 8B PHB ; save caller DBR // 01: A9 bk LDA #destBank ; A = dst bank (8-bit M) // 03: 48 PHA // 04: AB PLB ; DBR = dst bank // 05: A0 lo hi LDY #destOffset ; Y = low 16 of dst (X=16) // 08: 22 lo mid bk JSL routine // 0C: AB PLB ; restore caller DBR // 0D: 6B RTL // // Patched per call: byte 2 (destBank), bytes 6-7 (destOffset16), // bytes 9-11 (target 24-bit). The compiled routine assumes // M=8 / X=16 / Y=destOffset on entry; the stub arranges that. // // Stub bytes are split into two phases: // 1. The 8 opcode bytes are written ONCE on first call (gDrawStubInited). // 2. Of the 6 operand bytes, only those that actually changed since // the previous call get re-stamped: destBank and fnAddr are cached // and rarely change (per-shift / per-bank). destOffset is the only // one that changes every call as the sprite moves. Net per-frame // patching for the typical case drops from 14 stores to 2. static unsigned char gSpriteCallStub[14]; static bool gDrawStubInited = false; static uint8_t gDrawStubLastBank = 0xFF; static uint32_t gDrawStubLastFnAddr = 0xFFFFFFFFul; void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) { uint8_t shift; uint32_t destAddr; uint16_t destOffset; uint8_t destBank; uint32_t fnAddr; { uint8_t *destPtr; shift = (uint8_t)(x & 1); destPtr = &dst->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)x >> 1)]; destAddr = (uint32_t)destPtr; destOffset = (uint16_t)(destAddr & 0xFFFFu); destBank = (uint8_t)((destAddr >> 16) & 0xFFu); fnAddr = codegenArenaBaseAddr() + sp->slot->offset + (uint32_t)sp->routineOffsets[shift][SPRITE_OP_DRAW]; } (void)destAddr; if (!gDrawStubInited) { gSpriteCallStub[ 0] = 0x8B; gSpriteCallStub[ 1] = 0xA9; gSpriteCallStub[ 3] = 0x48; gSpriteCallStub[ 4] = 0xAB; gSpriteCallStub[ 5] = 0xA0; gSpriteCallStub[ 8] = 0x22; gSpriteCallStub[12] = 0xAB; gSpriteCallStub[13] = 0x6B; gDrawStubInited = true; } // destOffset always changes (sprite moves every frame). gSpriteCallStub[ 6] = (unsigned char)(destOffset & 0xFFu); gSpriteCallStub[ 7] = (unsigned char)((destOffset >> 8) & 0xFFu); // destBank only changes if the dst surface migrates banks (~never). if (destBank != gDrawStubLastBank) { gSpriteCallStub[ 2] = destBank; gDrawStubLastBank = destBank; } // fnAddr changes only on shift parity flips or sprite swaps. if (fnAddr != gDrawStubLastFnAddr) { const uint8_t *fnB_ = (const uint8_t *)&fnAddr; gSpriteCallStub[ 9] = fnB_[0]; gSpriteCallStub[10] = fnB_[1]; gSpriteCallStub[11] = fnB_[2]; gDrawStubLastFnAddr = fnAddr; } // ORCA-C compiles this function under `longa on / longi on` // (M=16, X=16) and emits the function epilogue assuming those // widths at exit -- the deallocation ADC takes a 2-byte immediate // and any LDX/LDY use 2-byte immediates. The byte writes to // gSpriteCallStub above leave M=8, and an earlier PHP/PLP-only // wrapper let the asm block exit in the wrong M state. The // epilogue's `ADC #imm; TCS` then decoded as a wider ADC that // swallowed the TCS, S was never adjusted, RTL popped wrong // bytes, control fell into BSS, and the IIgs hit BRK on a zero // byte. Force M=16/X=16 before returning to compiled C. asm { rep #0x30 sep #0x20 jsl gSpriteCallStub rep #0x30 } } // Save/Restore call stub. The compiled MVN-row routines need // M=16 / X=16 and expect index registers preset to source/dest // offsets within their respective banks. MVN's own bank operands // are in the routine bytes (patched per call), so this stub doesn't // need to load DBR -- it just sets X and Y, JSLs, and restores DBR // (MVN itself sets DBR to its destination bank as a side effect). // // Stub layout (13 bytes): // 00: 8B PHB ; save caller DBR // 01: A2 lo hi LDX #srcOffset // 04: A0 lo hi LDY #dstOffset // 07: 22 lo mid bk JSL routine // 0B: AB PLB ; restore caller DBR // 0C: 6B RTL // // For SAVE: X = screen lo, Y = backup lo // For RESTORE: X = backup lo, Y = screen lo // // Two distinct stubs (one per op) instead of a shared one. Save and // restore alternate every frame and they swap the X/Y meanings, so a // shared stub forced a full re-stamp on every call. Per-op stubs let // us cache: only the bytes that genuinely change frame-to-frame // (typically just one of screenLo/backupLo as the sprite moves) get // rewritten. Cuts per-call patching from 13 stores to 2 in the typical // case (static backup buffer, stable shift parity). static unsigned char gSpriteSaveStub[13]; static unsigned char gSpriteRestoreStub[13]; static bool gSaveStubInited = false; static uint16_t gSaveStubLastXLo = 0xFFFFu; static uint16_t gSaveStubLastYLo = 0xFFFFu; static uint32_t gSaveStubLastFnAddr = 0xFFFFFFFFul; static bool gRestoreStubInited = false; static uint16_t gRestoreStubLastXLo = 0xFFFFu; static uint16_t gRestoreStubLastYLo = 0xFFFFu; static uint32_t gRestoreStubLastFnAddr= 0xFFFFFFFFul; // patchMvnBanks stamps the destination and source bank operand bytes // into each MVN inside an emitted save/restore routine. Layout from // spriteEmitIigs.c::emitMvnCopyRoutine: // row 0 (6 bytes): A9 lo hi 54 db sb // row R (12 bytes, R>=1): 8A/98 18 69 lo hi AA/A8 A9 lo hi 54 db sb // end (1 byte): 6B // MVN dstbk is at offset (12*R + 4); srcbk at (12*R + 5). static void patchMvnBanks(uint8_t *routine, uint16_t heightPx, uint8_t dstBank, uint8_t srcBank) { uint16_t r; for (r = 0; r < heightPx; r++) { routine[12u * r + 4u] = dstBank; routine[12u * r + 5u] = srcBank; } } // Split a 24-bit pointer into its low 16 bits + bank byte. The // (uint32_t) cast works correctly in ORCA/C 2.2.1 (the 2.1.0 lossy- // bank-byte bug is fixed). To avoid invoking the ~LSHR4 32-bit-shift // helper for the `>> 16` to extract the bank byte, we cast to // uint32_t and then byte-alias the storage -- gets the same bytes // with simple loads. #define SPLIT_POINTER(_ptr, _outLo, _outBank) \ do { \ uint32_t spAddr_ = (uint32_t)(_ptr); \ const uint8_t *spB_ = (const uint8_t *)&spAddr_; \ *(_outLo) = (uint16_t)(spB_[0] | ((uint16_t)spB_[1] << 8)); \ *(_outBank) = spB_[2]; \ } while (0) // Backup-buffer pointer split cache. backup->bytes is a user-supplied // buffer (e.g. a static array) and effectively never changes after // the first call -- caching its split saves both Save and Restore the // macro expansion per frame. static const void *gLastBackupBytes = (const void *)0; static uint16_t gLastBackupBytesLo = 0; static uint8_t gLastBackupBytesBank = 0; #define SPLIT_BACKUP_CACHED(_bytes, _outLo, _outBank) \ do { \ if ((const void *)(_bytes) == gLastBackupBytes) { \ *(_outLo) = gLastBackupBytesLo; \ *(_outBank) = gLastBackupBytesBank; \ } else { \ SPLIT_POINTER((_bytes), (_outLo), (_outBank)); \ gLastBackupBytes = (const void *)(_bytes); \ gLastBackupBytesLo = *(_outLo); \ gLastBackupBytesBank = *(_outBank); \ } \ } while (0) void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup) { uint8_t shift; int16_t clippedX; uint16_t widthPx; uint16_t heightPx; uint16_t copyBytes; uint16_t screenLo; uint16_t backupLo; uint8_t screenBank; uint8_t backupBank; uint32_t fnAddr; uint8_t *routine; uint8_t *screenPtr; uint16_t cacheIdx; /* shift * SPRITE_OP_COUNT + SPRITE_OP_SAVE, computed once */ uint8_t *cachedDst; /* &sp->cachedDstBank[0][0] + cacheIdx */ uint8_t *cachedSrc; /* &sp->cachedSrcBank[0][0] + cacheIdx */ uint16_t routineOffset; /* sp->routineOffsets[shift][SPRITE_OP_SAVE], computed once */ shift = (uint8_t)(x & 1); clippedX = (int16_t)(x & ~1); widthPx = (uint16_t)(sp->widthTiles * 8); heightPx = (uint16_t)(sp->heightTiles * 8); copyBytes = (uint16_t)((widthPx >> 1) + (shift == 1 ? 1 : 0)); screenPtr = (uint8_t *)&src->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)clippedX >> 1)]; SPLIT_POINTER(screenPtr, &screenLo, &screenBank); SPLIT_BACKUP_CACHED(backup->bytes, &backupLo, &backupBank); backup->sprite = sp; backup->x = clippedX; backup->y = y; backup->width = (uint16_t)(copyBytes << 1); backup->height = heightPx; /* sizeBytes is constant per (sprite, shift); cache to dodge the * per-call ~CUMUL2 (uint16_t * uint16_t) helper. The byte-pointer * arithmetic avoids reintroducing ~MUL4 for the uint16_t array * indexing. */ { uint16_t *sizeCachePtr = (uint16_t *)((uint8_t *)sp->cachedSizeBytes + ((uint16_t)shift << 1)); if (*sizeCachePtr == 0) { *sizeCachePtr = (uint16_t)(copyBytes * heightPx); } backup->sizeBytes = *sizeCachePtr; } /* Compute the 1D index into the cached* / routineOffsets 2D arrays * once. ORCA-C 2.2.1 lowers `shift * SPRITE_OP_COUNT` (where * SPRITE_OP_COUNT==3) to a ~MUL4 helper call; (shift<<1)+shift * compiles to two ASLs and an ADC, no helper. */ cacheIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_SAVE); cachedDst = (uint8_t *)sp->cachedDstBank + cacheIdx; cachedSrc = (uint8_t *)sp->cachedSrcBank + cacheIdx; /* Same byte-pointer trick as SURFACE_ROW_OFFSET to dodge ~MUL4. */ routineOffset = *(uint16_t *)((uint8_t *)sp->routineOffsets + (cacheIdx << 1)); fnAddr = codegenArenaBaseAddr() + sp->slot->offset + (uint32_t)routineOffset; // Stub: X = screen (source), Y = backup (destination). if (!gSaveStubInited) { gSpriteSaveStub[ 0] = 0x8B; gSpriteSaveStub[ 1] = 0xA2; gSpriteSaveStub[ 4] = 0xA0; gSpriteSaveStub[ 7] = 0x22; gSpriteSaveStub[11] = 0xAB; gSpriteSaveStub[12] = 0x6B; gSaveStubInited = true; } if (screenLo != gSaveStubLastXLo) { gSpriteSaveStub[ 2] = (unsigned char)(screenLo & 0xFFu); gSpriteSaveStub[ 3] = (unsigned char)((screenLo >> 8) & 0xFFu); gSaveStubLastXLo = screenLo; } if (backupLo != gSaveStubLastYLo) { gSpriteSaveStub[ 5] = (unsigned char)(backupLo & 0xFFu); gSpriteSaveStub[ 6] = (unsigned char)((backupLo >> 8) & 0xFFu); gSaveStubLastYLo = backupLo; } if (fnAddr != gSaveStubLastFnAddr) { /* Byte-alias the uint32_t to grab the 3 bank/lo/hi bytes * without invoking ~LSHR4 for the >>16. */ const uint8_t *fnB_ = (const uint8_t *)&fnAddr; gSpriteSaveStub[ 8] = fnB_[0]; gSpriteSaveStub[ 9] = fnB_[1]; gSpriteSaveStub[10] = fnB_[2]; gSaveStubLastFnAddr = fnAddr; } // Skip the 16+ MVN-bank rewrites if the dst/src bank pair is the // same as last call. if (*cachedDst != backupBank || *cachedSrc != screenBank) { routine = codegenArenaBase() + sp->slot->offset + routineOffset; patchMvnBanks(routine, heightPx, /*dst*/backupBank, /*src*/screenBank); *cachedDst = backupBank; *cachedSrc = screenBank; } // MVN-based routine: needs M=16 / X=16; restore M=16 on exit // matches ORCA-C `longa on` epilogue expectations. asm { rep #0x30 jsl gSpriteSaveStub rep #0x30 } } void spriteCompiledRestoreUnder(SurfaceT *dst, const SpriteBackupT *backup) { uint8_t shift; uint16_t heightPx; uint16_t copyBytes; uint16_t spriteBytesPerRow; uint16_t screenLo; uint16_t backupLo; uint8_t screenBank; uint8_t backupBank; uint32_t fnAddr; uint8_t *routine; uint8_t *screenPtr; SpriteT *sp; uint16_t cacheIdx; /* shift * SPRITE_OP_COUNT + SPRITE_OP_RESTORE, computed once */ uint8_t *cachedDst; uint8_t *cachedSrc; uint16_t routineOffset; sp = backup->sprite; heightPx = backup->height; copyBytes = (uint16_t)(backup->width >> 1); spriteBytesPerRow = (uint16_t)(sp->widthTiles * 4); shift = (copyBytes == spriteBytesPerRow) ? 0 : 1; screenPtr = (uint8_t *)&dst->pixels[SURFACE_ROW_OFFSET(backup->y) + ((uint16_t)backup->x >> 1)]; SPLIT_POINTER(screenPtr, &screenLo, &screenBank); SPLIT_BACKUP_CACHED(backup->bytes, &backupLo, &backupBank); /* Hoist 2D-array indexing -- see save-side comment. */ cacheIdx = (uint16_t)(((uint16_t)shift << 1) + (uint16_t)shift + SPRITE_OP_RESTORE); cachedDst = (uint8_t *)sp->cachedDstBank + cacheIdx; cachedSrc = (uint8_t *)sp->cachedSrcBank + cacheIdx; /* Same byte-pointer trick as SURFACE_ROW_OFFSET to dodge ~MUL4. */ routineOffset = *(uint16_t *)((uint8_t *)sp->routineOffsets + (cacheIdx << 1)); fnAddr = codegenArenaBaseAddr() + sp->slot->offset + (uint32_t)routineOffset; // Stub: X = backup (source), Y = screen (destination). if (!gRestoreStubInited) { gSpriteRestoreStub[ 0] = 0x8B; gSpriteRestoreStub[ 1] = 0xA2; gSpriteRestoreStub[ 4] = 0xA0; gSpriteRestoreStub[ 7] = 0x22; gSpriteRestoreStub[11] = 0xAB; gSpriteRestoreStub[12] = 0x6B; gRestoreStubInited = true; } if (backupLo != gRestoreStubLastXLo) { gSpriteRestoreStub[ 2] = (unsigned char)(backupLo & 0xFFu); gSpriteRestoreStub[ 3] = (unsigned char)((backupLo >> 8) & 0xFFu); gRestoreStubLastXLo = backupLo; } if (screenLo != gRestoreStubLastYLo) { gSpriteRestoreStub[ 5] = (unsigned char)(screenLo & 0xFFu); gSpriteRestoreStub[ 6] = (unsigned char)((screenLo >> 8) & 0xFFu); gRestoreStubLastYLo = screenLo; } if (fnAddr != gRestoreStubLastFnAddr) { const uint8_t *fnB_ = (const uint8_t *)&fnAddr; gSpriteRestoreStub[ 8] = fnB_[0]; gSpriteRestoreStub[ 9] = fnB_[1]; gSpriteRestoreStub[10] = fnB_[2]; gRestoreStubLastFnAddr = fnAddr; } // Same short-circuit as save: only re-stamp the bank operands if // they actually changed since last call. if (*cachedDst != screenBank || *cachedSrc != backupBank) { routine = codegenArenaBase() + sp->slot->offset + routineOffset; patchMvnBanks(routine, heightPx, /*dst*/screenBank, /*src*/backupBank); *cachedDst = screenBank; *cachedSrc = backupBank; } asm { rep #0x30 jsl gSpriteRestoreStub rep #0x30 } } #elif defined(JOEYLIB_PLATFORM_AMIGA) /* Amiga planar dispatchers. spriteEmitPlanar68k.c emits routines with * cdecl(p0, p1, p2, p3[, backup]) signatures that write directly to * bitplanes. Compute byteOff = y*40 + x/8 and pass plane[i]+byteOff * as the 4 plane args. shift = x % 8 selects the variant; today only * shift 0 emits non-zero bytes, so callers should already have * gated on routineOffsets[shift][op] != SPRITE_NOT_COMPILED. * * For non-zero shifts (x not 8-px-aligned), the dispatcher in * src/core/sprite.c (spriteDraw / spriteSaveUnder / spriteRestoreUnder) * sees SPRITE_NOT_COMPILED for the shift and falls back to the * interpreter, which handles arbitrary x via halSpriteDrawPlanes / * halSpriteSavePlanes / halSpriteRestorePlanes. */ #define AMIGA_BYTES_PER_ROW_LOCAL 40 void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) { typedef void (*DrawFn)(uint8_t *p0, uint8_t *p1, uint8_t *p2, uint8_t *p3); uint8_t shift; uint16_t byteOff; uint8_t *p0; uint8_t *p1; uint8_t *p2; uint8_t *p3; DrawFn fn; shift = (uint8_t)(x & 7); byteOff = (uint16_t)((uint16_t)y * AMIGA_BYTES_PER_ROW_LOCAL + ((uint16_t)x >> 3)); p0 = halSurfacePlanePtr(dst, 0); if (p0 == NULL) return; p1 = halSurfacePlanePtr(dst, 1); p2 = halSurfacePlanePtr(dst, 2); p3 = halSurfacePlanePtr(dst, 3); fn = (DrawFn)(codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_DRAW]); fn(p0 + byteOff, p1 + byteOff, p2 + byteOff, p3 + byteOff); } void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup) { typedef void (*SaveFn)(uint8_t *p0, uint8_t *p1, uint8_t *p2, uint8_t *p3, uint8_t *backup); uint8_t shift; int16_t clippedX; uint16_t widthPx; uint16_t heightPx; uint16_t byteOff; uint8_t *p0; uint8_t *p1; uint8_t *p2; uint8_t *p3; SaveFn fn; shift = (uint8_t)(x & 7); clippedX = (int16_t)(x & ~7); widthPx = (uint16_t)(sp->widthTiles * 8); heightPx = (uint16_t)(sp->heightTiles * 8); /* Shifts 1..7 spill into one extra plane byte per row (= +8 px). */ if (shift != 0u) { widthPx = (uint16_t)(widthPx + 8u); } byteOff = (uint16_t)((uint16_t)y * AMIGA_BYTES_PER_ROW_LOCAL + ((uint16_t)clippedX >> 3)); backup->sprite = sp; backup->x = clippedX; backup->y = y; backup->width = widthPx; backup->height = heightPx; /* 4 planes * h * (widthPx/8) bytes = h * widthPx/2. */ backup->sizeBytes = (uint16_t)((uint16_t)heightPx * (widthPx >> 1)); p0 = halSurfacePlanePtr(src, 0); if (p0 == NULL) return; p1 = halSurfacePlanePtr(src, 1); p2 = halSurfacePlanePtr(src, 2); p3 = halSurfacePlanePtr(src, 3); fn = (SaveFn)(codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_SAVE]); fn(p0 + byteOff, p1 + byteOff, p2 + byteOff, p3 + byteOff, backup->bytes); } void spriteCompiledRestoreUnder(SurfaceT *dst, const SpriteBackupT *backup) { typedef void (*RestoreFn)(uint8_t *p0, uint8_t *p1, uint8_t *p2, uint8_t *p3, const uint8_t *backup); SpriteT *sp; uint8_t shift; uint16_t byteOff; uint8_t *p0; uint8_t *p1; uint8_t *p2; uint8_t *p3; RestoreFn fn; sp = backup->sprite; /* backup->x is 8-px aligned (clippedX from save), so x & 7 is * useless for picking the original shift. Encode it via * backup->width: == widthTiles*8 means shift 0; > means shifted. * Shifted slots 1..7 all alias to the same restore bytes, so * slot 1 stands in for any non-zero shift. */ shift = (uint8_t)(backup->width > (uint16_t)(sp->widthTiles * 8) ? 1u : 0u); byteOff = (uint16_t)((uint16_t)backup->y * AMIGA_BYTES_PER_ROW_LOCAL + ((uint16_t)backup->x >> 3)); p0 = halSurfacePlanePtr(dst, 0); if (p0 == NULL) return; p1 = halSurfacePlanePtr(dst, 1); p2 = halSurfacePlanePtr(dst, 2); p3 = halSurfacePlanePtr(dst, 3); fn = (RestoreFn)(codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_RESTORE]); fn(p0 + byteOff, p1 + byteOff, p2 + byteOff, p3 + byteOff, backup->bytes); } #else void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y) { typedef void (*DrawFn)(uint8_t *destRow); uint8_t shift; uint8_t *destRow; DrawFn fn; shift = (uint8_t)(x & 1); destRow = &dst->pixels[(uint16_t)y * SURFACE_BYTES_PER_ROW + ((uint16_t)x >> 1)]; fn = (DrawFn)(codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_DRAW]); fn(destRow); } // x86 / 68k compiled save: bytes are a cdecl // void copy(const uint8_t *src, uint8_t *dst) // that walks heightPx rows of copyBytes from screen (stride // SURFACE_BYTES_PER_ROW) into the contiguous backup buffer. void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_t y, SpriteBackupT *backup) { typedef void (*CopyFn)(const uint8_t *src, uint8_t *dst); uint8_t shift; int16_t clippedX; uint16_t widthPx; uint16_t heightPx; uint16_t copyBytes; uint8_t *screenPtr; CopyFn fn; shift = (uint8_t)(x & 1); clippedX = (int16_t)(x & ~1); widthPx = (uint16_t)(sp->widthTiles * 8); heightPx = (uint16_t)(sp->heightTiles * 8); copyBytes = (uint16_t)((widthPx >> 1) + (shift == 1 ? 1 : 0)); screenPtr = (uint8_t *)&src->pixels[(uint16_t)y * SURFACE_BYTES_PER_ROW + ((uint16_t)clippedX >> 1)]; backup->sprite = sp; backup->x = clippedX; backup->y = y; backup->width = (uint16_t)(copyBytes << 1); backup->height = heightPx; backup->sizeBytes = (uint16_t)(copyBytes * heightPx); fn = (CopyFn)(codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_SAVE]); fn(screenPtr, backup->bytes); } // Mirror of save: caller swaps arg order so the same emitted shape // drives backup -> screen. The screen-side stride lives inside the // emitted bytes, so RESTORE has its own routine bytes (stride is // applied to dst instead of src). void spriteCompiledRestoreUnder(SurfaceT *dst, const SpriteBackupT *backup) { typedef void (*CopyFn)(const uint8_t *src, uint8_t *dst); SpriteT *sp; uint8_t shift; uint16_t copyBytes; uint16_t spriteBytesPerRow; uint8_t *screenPtr; CopyFn fn; sp = backup->sprite; copyBytes = (uint16_t)(backup->width >> 1); spriteBytesPerRow = (uint16_t)(sp->widthTiles * 4); shift = (copyBytes == spriteBytesPerRow) ? 0 : 1; screenPtr = (uint8_t *)&dst->pixels[(uint16_t)backup->y * SURFACE_BYTES_PER_ROW + ((uint16_t)backup->x >> 1)]; fn = (CopyFn)(codegenArenaBase() + sp->slot->offset + sp->routineOffsets[shift][SPRITE_OP_RESTORE]); fn(backup->bytes, screenPtr); } #endif