More speed!
This commit is contained in:
parent
04a9550421
commit
20cbccaca5
20 changed files with 130 additions and 71 deletions
13
make/iigs.mk
13
make/iigs.mk
|
|
@ -49,7 +49,18 @@ NTP_BIN := $(BUILD)/audio/ntpplayer.bin
|
|||
NTP_ASM := $(BUILD)/audio/ntpdata.asm
|
||||
IIGS_MERLIN := $(REPO_DIR)/toolchains/iigs/merlin32/bin/merlin32
|
||||
|
||||
LIB_SRCS := $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS) $(PORT_ASM_SRCS_ALL) $(NTP_ASM) $(CODEGEN_SRCS)
|
||||
# IMPORTANT: CODEGEN_SRCS (specifically spriteEmitIigs.c) MUST be the
|
||||
# first entry after the main object in the link order. ORCA-Linker's
|
||||
# bank assignment is order-sensitive: when spriteEmitIigs.c lands at
|
||||
# any later position, the linker assigns SPRITECG to a bank where its
|
||||
# intra-OMF-segment static-symbol relocations (emitMvnCopyRoutine,
|
||||
# shiftedByteAt, writeLE16) can't be encoded -- you get cryptic
|
||||
# "Addressing error" / "Unresolved reference Label: ..." failures
|
||||
# whose root cause is bank packing, not source. Putting CODEGEN_SRCS
|
||||
# first gives SPRITECG prime placement and the relocations resolve.
|
||||
# This was the underlying cause of feedback_orca_link_segment_count
|
||||
# cases 2-5 (we'd been working around it by managing _ROOT mass).
|
||||
LIB_SRCS := $(CODEGEN_SRCS) $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS) $(PORT_ASM_SRCS_ALL) $(NTP_ASM)
|
||||
|
||||
HELLO_SRC := $(EXAMPLES)/hello/hello.c
|
||||
HELLO_BIN := $(BINDIR)/HELLO
|
||||
|
|
|
|||
|
|
@ -158,10 +158,9 @@ bool spriteCompile(SpriteT *sp) {
|
|||
|
||||
#if defined(JOEYLIB_PLATFORM_IIGS)
|
||||
|
||||
// y*160 lookup. gRowOffsetLut is the 200-entry uint16_t table built
|
||||
// once by iigsInitRowLut at halInit. Replaces ORCA-C's runtime
|
||||
// multiply (a JSL into __mul16) with a single indexed long-mode read.
|
||||
extern const uint16_t gRowOffsetLut[200];
|
||||
// SURFACE_ROW_OFFSET dispatches to the gRowOffsetLut lookup on IIgs;
|
||||
// declared in surfaceInternal.h. Replaces ORCA-C's __mul16 JSL with a
|
||||
// single indexed long-mode read.
|
||||
|
||||
// IIgs uses inline asm + a self-modifying call stub instead of a C
|
||||
// function-pointer cast. The build uses ORCA-C large memory model
|
||||
|
|
@ -212,7 +211,7 @@ void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y)
|
|||
uint8_t *destPtr;
|
||||
uint8_t destBytes[4];
|
||||
shift = (uint8_t)(x & 1);
|
||||
destPtr = &dst->pixels[gRowOffsetLut[(uint16_t)y] + ((uint16_t)x >> 1)];
|
||||
destPtr = &dst->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)x >> 1)];
|
||||
memcpy(destBytes, &destPtr, 4);
|
||||
destAddr = (uint32_t)destBytes[0]
|
||||
| ((uint32_t)destBytes[1] << 8)
|
||||
|
|
@ -366,7 +365,7 @@ void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_
|
|||
heightPx = (uint16_t)(sp->heightTiles * 8);
|
||||
copyBytes = (uint16_t)((widthPx >> 1) + (shift == 1 ? 1 : 0));
|
||||
|
||||
screenPtr = (uint8_t *)&src->pixels[gRowOffsetLut[(uint16_t)y] + ((uint16_t)clippedX >> 1)];
|
||||
screenPtr = (uint8_t *)&src->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)clippedX >> 1)];
|
||||
splitPointer(screenPtr, &screenLo, &screenBank);
|
||||
splitPointer(backup->bytes, &backupLo, &backupBank);
|
||||
|
||||
|
|
@ -450,7 +449,7 @@ void spriteCompiledRestoreUnder(SurfaceT *dst, const SpriteBackupT *backup) {
|
|||
spriteBytesPerRow = (uint16_t)(sp->widthTiles * 4);
|
||||
shift = (copyBytes == spriteBytesPerRow) ? 0 : 1;
|
||||
|
||||
screenPtr = (uint8_t *)&dst->pixels[gRowOffsetLut[(uint16_t)backup->y] + ((uint16_t)backup->x >> 1)];
|
||||
screenPtr = (uint8_t *)&dst->pixels[SURFACE_ROW_OFFSET(backup->y) + ((uint16_t)backup->x >> 1)];
|
||||
splitPointer(screenPtr, &screenLo, &screenBank);
|
||||
splitPointer(backup->bytes, &backupLo, &backupBank);
|
||||
|
||||
|
|
|
|||
|
|
@ -57,7 +57,6 @@ JOEYLIB_SEGMENT("SPRITECG")
|
|||
static uint16_t emitMvnCopyRoutine(uint8_t *out, uint16_t heightPx, uint16_t copyBytes, bool advanceX);
|
||||
static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask);
|
||||
static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col);
|
||||
static uint16_t writeLE16(uint8_t *out, uint16_t value);
|
||||
|
||||
|
||||
// ----- Emit helpers (alphabetical) -----
|
||||
|
|
@ -126,13 +125,9 @@ static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col) {
|
|||
}
|
||||
|
||||
|
||||
// 65816 is little-endian; write low byte first.
|
||||
static uint16_t writeLE16(uint8_t *out, uint16_t value) {
|
||||
out[0] = (uint8_t)(value & 0xFFu);
|
||||
out[1] = (uint8_t)((value >> 8) & 0xFFu);
|
||||
return 2;
|
||||
}
|
||||
|
||||
// writeLE16 was inlined at every call site. Inlining cuts a JSL/RTL
|
||||
// per emitted 16-bit immediate (4 instructions per byte * 12 sites)
|
||||
// and avoids ORCA-Linker bank-fragility around tiny-helper resolution.
|
||||
|
||||
// Common backbone for save and restore. Both ops copy a byte-aligned
|
||||
// rectangle row-by-row using MVN; only the operand banks (which buffer
|
||||
|
|
@ -178,11 +173,13 @@ static uint16_t emitMvnCopyRoutine(uint8_t *out, uint16_t heightPx, uint16_t cop
|
|||
out[cursor++] = advanceX ? 0x8A : 0x98; // TXA / TYA
|
||||
out[cursor++] = 0x18; // CLC
|
||||
out[cursor++] = 0x69; // ADC #imm (M=16)
|
||||
cursor += writeLE16(out + cursor, advance);
|
||||
out[cursor++] = (uint8_t)(advance & 0xFFu);
|
||||
out[cursor++] = (uint8_t)((advance >> 8) & 0xFFu);
|
||||
out[cursor++] = advanceX ? 0xAA : 0xA8; // TAX / TAY
|
||||
}
|
||||
out[cursor++] = 0xA9; // LDA #imm (M=16)
|
||||
cursor += writeLE16(out + cursor, (uint16_t)(copyBytes - 1));
|
||||
out[cursor++] = (uint8_t)((copyBytes - 1) & 0xFFu);
|
||||
out[cursor++] = (uint8_t)(((copyBytes - 1) >> 8) & 0xFFu);
|
||||
out[cursor++] = 0x54; // MVN
|
||||
out[cursor++] = 0x00; // dstbk -- patched per call
|
||||
out[cursor++] = 0x00; // srcbk -- patched per call
|
||||
|
|
@ -299,11 +296,15 @@ uint16_t spriteEmitDrawIigs(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
|||
out[cursor++] = 0x20;
|
||||
wide = true;
|
||||
}
|
||||
out[cursor++] = 0xA9; // LDA #imm16
|
||||
cursor += writeLE16(out + cursor,
|
||||
(uint16_t)(((uint16_t)nextValue << 8) | value));
|
||||
out[cursor++] = 0x99; // STA abs,Y
|
||||
cursor += writeLE16(out + cursor, absOffset);
|
||||
{
|
||||
uint16_t pair = (uint16_t)(((uint16_t)nextValue << 8) | value);
|
||||
out[cursor++] = 0xA9; // LDA #imm16
|
||||
out[cursor++] = (uint8_t)(pair & 0xFFu);
|
||||
out[cursor++] = (uint8_t)((pair >> 8) & 0xFFu);
|
||||
out[cursor++] = 0x99; // STA abs,Y
|
||||
out[cursor++] = (uint8_t)(absOffset & 0xFFu);
|
||||
out[cursor++] = (uint8_t)((absOffset >> 8) & 0xFFu);
|
||||
}
|
||||
col++; // consumed col+1
|
||||
continue;
|
||||
}
|
||||
|
|
@ -321,16 +322,19 @@ uint16_t spriteEmitDrawIigs(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
|||
out[cursor++] = 0xA9;
|
||||
out[cursor++] = value;
|
||||
out[cursor++] = 0x99;
|
||||
cursor += writeLE16(out + cursor, absOffset);
|
||||
out[cursor++] = (uint8_t)(absOffset & 0xFFu);
|
||||
out[cursor++] = (uint8_t)((absOffset >> 8) & 0xFFu);
|
||||
} else {
|
||||
out[cursor++] = 0xB9;
|
||||
cursor += writeLE16(out + cursor, absOffset);
|
||||
out[cursor++] = (uint8_t)(absOffset & 0xFFu);
|
||||
out[cursor++] = (uint8_t)((absOffset >> 8) & 0xFFu);
|
||||
out[cursor++] = 0x29;
|
||||
out[cursor++] = (uint8_t)(~opaqueMask & 0xFFu);
|
||||
out[cursor++] = 0x09;
|
||||
out[cursor++] = value;
|
||||
out[cursor++] = 0x99;
|
||||
cursor += writeLE16(out + cursor, absOffset);
|
||||
out[cursor++] = (uint8_t)(absOffset & 0xFFu);
|
||||
out[cursor++] = (uint8_t)((absOffset >> 8) & 0xFFu);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,9 @@
|
|||
#include "joey/asset.h"
|
||||
#include "joey/palette.h"
|
||||
|
||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||
JOEYLIB_SEGMENT("CORESYS")
|
||||
|
||||
#define JAS_HEADER_SIZE 44
|
||||
#define JAS_PIXELS_OFFSET JAS_HEADER_SIZE
|
||||
#define JAS_PALETTE_OFFSET 12
|
||||
|
|
|
|||
|
|
@ -8,6 +8,9 @@
|
|||
#include "joey/audio.h"
|
||||
#include "hal.h"
|
||||
|
||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||
JOEYLIB_SEGMENT("CORESYS")
|
||||
|
||||
static bool gAudioReady = false;
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -23,6 +23,9 @@
|
|||
|
||||
#include "codegenArenaInternal.h"
|
||||
|
||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||
JOEYLIB_SEGMENT("CORESYS")
|
||||
|
||||
|
||||
// ----- Module state -----
|
||||
|
||||
|
|
|
|||
|
|
@ -10,8 +10,12 @@
|
|||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
#include "joey/platform.h"
|
||||
#include "joey/debug.h"
|
||||
|
||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||
JOEYLIB_SEGMENT("CORESYS")
|
||||
|
||||
static const char *kLogPath = "joeylog.txt";
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ static void fillRectClipped(SurfaceT *s, int16_t x, int16_t y, int16_t w, int16_
|
|||
uint8_t *line;
|
||||
|
||||
for (row = 0; row < h; row++) {
|
||||
line = &s->pixels[(y + row) * SURFACE_BYTES_PER_ROW];
|
||||
line = &s->pixels[SURFACE_ROW_OFFSET(y + row)];
|
||||
pxStart = x;
|
||||
pxEnd = x + w;
|
||||
|
||||
|
|
@ -208,7 +208,7 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
|||
|
||||
// Fallback path needs row; compute it here so the asm path
|
||||
// above doesn't pay for an unused y*160 multiply on every iter.
|
||||
row = &s->pixels[y * SURFACE_BYTES_PER_ROW];
|
||||
row = &s->pixels[SURFACE_ROW_OFFSET(y)];
|
||||
|
||||
// Tier-2 asm fast path: combined seed test + walk-left +
|
||||
// walk-right in one cross-segment call. Falls back to the
|
||||
|
|
@ -294,7 +294,7 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
|||
}
|
||||
scanY = (int16_t)(y + 1);
|
||||
}
|
||||
scanRow = &s->pixels[scanY * SURFACE_BYTES_PER_ROW];
|
||||
scanRow = &s->pixels[SURFACE_ROW_OFFSET(scanY)];
|
||||
// Prefer the combined scan+push asm path (one call per
|
||||
// scan, no markBuf and no per-pixel C edge walk).
|
||||
if (!halFastFloodScanAndPush(scanRow, leftX, rightX,
|
||||
|
|
@ -502,7 +502,7 @@ void drawPixel(SurfaceT *s, int16_t x, int16_t y, uint8_t colorIndex) {
|
|||
}
|
||||
|
||||
if (!halFastDrawPixel(s, (uint16_t)x, (uint16_t)y, colorIndex)) {
|
||||
byte = &s->pixels[y * SURFACE_BYTES_PER_ROW + (x >> 1)];
|
||||
byte = &s->pixels[SURFACE_ROW_OFFSET(y) + (x >> 1)];
|
||||
nibble = colorIndex & 0x0F;
|
||||
if (x & 1) {
|
||||
*byte = (uint8_t)((*byte & 0xF0) | nibble);
|
||||
|
|
@ -625,7 +625,7 @@ void floodFill(SurfaceT *s, int16_t x, int16_t y, uint8_t newColor) {
|
|||
if (x < 0 || x >= SURFACE_WIDTH || y < 0 || y >= SURFACE_HEIGHT) {
|
||||
return;
|
||||
}
|
||||
row = &s->pixels[y * SURFACE_BYTES_PER_ROW];
|
||||
row = &s->pixels[SURFACE_ROW_OFFSET(y)];
|
||||
seedColor = srcPixel(row, x);
|
||||
if ((seedColor & 0x0F) == (newColor & 0x0F)) {
|
||||
return;
|
||||
|
|
@ -644,7 +644,7 @@ void floodFillBounded(SurfaceT *s, int16_t x, int16_t y, uint8_t newColor, uint8
|
|||
if (x < 0 || x >= SURFACE_WIDTH || y < 0 || y >= SURFACE_HEIGHT) {
|
||||
return;
|
||||
}
|
||||
row = &s->pixels[y * SURFACE_BYTES_PER_ROW];
|
||||
row = &s->pixels[SURFACE_ROW_OFFSET(y)];
|
||||
pix = srcPixel(row, x);
|
||||
// Starting on a boundary pixel or already-filled pixel: nothing
|
||||
// to do.
|
||||
|
|
@ -668,7 +668,7 @@ uint8_t samplePixel(const SurfaceT *s, int16_t x, int16_t y) {
|
|||
return 0;
|
||||
}
|
||||
|
||||
byte = s->pixels[y * SURFACE_BYTES_PER_ROW + (x >> 1)];
|
||||
byte = s->pixels[SURFACE_ROW_OFFSET(y) + (x >> 1)];
|
||||
if (x & 1) {
|
||||
return (uint8_t)(byte & 0x0F);
|
||||
}
|
||||
|
|
@ -698,12 +698,12 @@ void surfaceBlit(SurfaceT *dst, const JoeyAssetT *src, int16_t x, int16_t y) {
|
|||
|
||||
srcRowBytes = (int16_t)((src->width + 1) >> 1);
|
||||
srcRow = &src->pixels[srcY0 * srcRowBytes];
|
||||
dstRow = &dst->pixels[y * SURFACE_BYTES_PER_ROW];
|
||||
dstRow = &dst->pixels[SURFACE_ROW_OFFSET(y)];
|
||||
if (!halFastBlitRect(dstRow, x, srcRow, srcX0,
|
||||
copyW, copyH, srcRowBytes, 0xFFFFu)) {
|
||||
for (row = 0; row < copyH; row++) {
|
||||
srcRow = &src->pixels[(srcY0 + row) * srcRowBytes];
|
||||
dstRow = &dst->pixels[(y + row) * SURFACE_BYTES_PER_ROW];
|
||||
dstRow = &dst->pixels[SURFACE_ROW_OFFSET(y + row)];
|
||||
for (col = 0; col < copyW; col++) {
|
||||
nibble = srcPixel(srcRow, srcX0 + col);
|
||||
dstPixel(dstRow, x + col, nibble);
|
||||
|
|
@ -738,12 +738,12 @@ void surfaceBlitMasked(SurfaceT *dst, const JoeyAssetT *src, int16_t x, int16_t
|
|||
transparent = (uint8_t)(transparentIndex & 0x0F);
|
||||
srcRowBytes = (int16_t)((src->width + 1) >> 1);
|
||||
srcRow = &src->pixels[srcY0 * srcRowBytes];
|
||||
dstRow = &dst->pixels[y * SURFACE_BYTES_PER_ROW];
|
||||
dstRow = &dst->pixels[SURFACE_ROW_OFFSET(y)];
|
||||
if (!halFastBlitRect(dstRow, x, srcRow, srcX0,
|
||||
copyW, copyH, srcRowBytes, (uint16_t)transparent)) {
|
||||
for (row = 0; row < copyH; row++) {
|
||||
srcRow = &src->pixels[(srcY0 + row) * srcRowBytes];
|
||||
dstRow = &dst->pixels[(y + row) * SURFACE_BYTES_PER_ROW];
|
||||
dstRow = &dst->pixels[SURFACE_ROW_OFFSET(y + row)];
|
||||
for (col = 0; col < copyW; col++) {
|
||||
nibble = srcPixel(srcRow, srcX0 + col);
|
||||
if (nibble == transparent) {
|
||||
|
|
|
|||
|
|
@ -262,15 +262,19 @@ extern uint16_t gFloodRightX;
|
|||
true) \
|
||||
: false)
|
||||
|
||||
// halFastFillRect stays as a real C wrapper -- removing it triggered
|
||||
// an unrelated ORCA linker bank-placement failure (same mode as the
|
||||
// peislam.asm deletion: `Unresolved reference Label:
|
||||
// emitMvnCopyRoutine` in sprite codegen). The wrapper now just
|
||||
// forwards to iigsFillRectInner (asm does partial+middle); we lose
|
||||
// the call-site macro inlining for fillRect specifically but keep
|
||||
// the rest of the macros AND the new asm helper. Per-call wrapper
|
||||
// overhead for halFastFillRect is back (~80 cyc) but at least the
|
||||
// per-row partial-byte logic happens in asm now.
|
||||
// halFastFillRect: macro form, same shape as the others. Builds
|
||||
// clean now that _ROOT has been thinned out via the CORESYS load
|
||||
// segment migration -- previous attempts shrank _ROOT enough to
|
||||
// retrip the bank-packing fragility, but with most core .c files
|
||||
// out of _ROOT that's no longer reactive. Saves ~80 cyc/call.
|
||||
#undef halFastFillRect
|
||||
#define halFastFillRect(_s, _x, _y, _w, _h, _c) \
|
||||
((_s) == stageGet() \
|
||||
? (iigsFillRectInner((_s)->pixels, (uint16_t)(_x), (uint16_t)(_y), \
|
||||
(uint16_t)(_w), (uint16_t)(_h), \
|
||||
(uint16_t)((_c) & 0x0F)), \
|
||||
true) \
|
||||
: false)
|
||||
|
||||
// Tile primitives operate on caller-computed row pointers; just
|
||||
// forward the args. by/bx are tile coords -> bx*4 + by*8*160 byte
|
||||
|
|
|
|||
|
|
@ -12,6 +12,9 @@
|
|||
#include "hal.h"
|
||||
#include "surfaceInternal.h"
|
||||
|
||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||
JOEYLIB_SEGMENT("CORESYS")
|
||||
|
||||
// 8 KB fits the largest typical sprite working set (~3-4 KB per
|
||||
// 32x32 sprite at all opaque) and keeps malloc requests small enough
|
||||
// for IIgs ORCA-C's small-memory-model heap to satisfy them.
|
||||
|
|
|
|||
|
|
@ -15,6 +15,9 @@
|
|||
#include "hal.h"
|
||||
#include "inputInternal.h"
|
||||
|
||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||
JOEYLIB_SEGMENT("CORESYS")
|
||||
|
||||
bool gKeyState [KEY_COUNT];
|
||||
bool gKeyPrev [KEY_COUNT];
|
||||
|
||||
|
|
|
|||
|
|
@ -10,6 +10,9 @@
|
|||
#include "joey/palette.h"
|
||||
#include "surfaceInternal.h"
|
||||
|
||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||
JOEYLIB_SEGMENT("CORESYS")
|
||||
|
||||
// ----- Public API (alphabetical) -----
|
||||
|
||||
void paletteGet(const SurfaceT *s, uint8_t paletteIndex, uint16_t *out16) {
|
||||
|
|
|
|||
|
|
@ -12,6 +12,9 @@
|
|||
#include "hal.h"
|
||||
#include "surfaceInternal.h"
|
||||
|
||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||
JOEYLIB_SEGMENT("CORESYS")
|
||||
|
||||
// ----- Public API (alphabetical) -----
|
||||
|
||||
void stagePresent(void) {
|
||||
|
|
|
|||
|
|
@ -9,6 +9,9 @@
|
|||
#include "joey/palette.h"
|
||||
#include "surfaceInternal.h"
|
||||
|
||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||
JOEYLIB_SEGMENT("CORESYS")
|
||||
|
||||
// ----- Public API (alphabetical) -----
|
||||
|
||||
uint8_t scbGet(const SurfaceT *s, uint16_t line) {
|
||||
|
|
|
|||
|
|
@ -13,6 +13,9 @@
|
|||
#include "spriteInternal.h"
|
||||
#include "surfaceInternal.h"
|
||||
|
||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||
JOEYLIB_SEGMENT("CORESYS")
|
||||
|
||||
// 8x8 tiles, 4bpp packed = 4 bytes/row * 8 rows = 32 bytes/tile.
|
||||
#define TILE_BYTES 32
|
||||
#define TILE_PIXELS 8
|
||||
|
|
|
|||
|
|
@ -10,6 +10,13 @@
|
|||
#include "hal.h"
|
||||
#include "surfaceInternal.h"
|
||||
|
||||
// Hoist into a CORESYS load segment alongside the other small core
|
||||
// files. Keeps _ROOT thin and stable so it stops reacting to per-file
|
||||
// source changes -- _ROOT size flux was tripping ORCA-Linker bank
|
||||
// packing in spriteEmitIigs.c (see feedback_orca_link_segment_count
|
||||
// cases 2-4).
|
||||
JOEYLIB_SEGMENT("CORESYS")
|
||||
|
||||
#ifdef JOEYLIB_PLATFORM_IIGS
|
||||
extern void iigsMarkDirtyRowsInner(uint16_t yStart, uint16_t yEnd, uint16_t minWord, uint16_t maxWord);
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -60,6 +60,19 @@ void surfaceMarkDirtyAll(const SurfaceT *s);
|
|||
// Reset every row to CLEAN. Called by stagePresent after the slam.
|
||||
void stageDirtyClearAll(void);
|
||||
|
||||
// y -> byte offset of row y in a SURFACE_BYTES_PER_ROW-strided buffer.
|
||||
// On IIgs this expands to a single indexed long-mode read against
|
||||
// gRowOffsetLut (built once at halInit). On other ports it's the
|
||||
// straight multiply -- those compilers (gcc, OpenWatcom) optimize the
|
||||
// constant 160 to a shift+add chain that's already cheap. The point
|
||||
// is to dodge ORCA-C's __mul16 JSL on every per-row pointer compute.
|
||||
#ifdef JOEYLIB_PLATFORM_IIGS
|
||||
extern const uint16_t gRowOffsetLut[200];
|
||||
#define SURFACE_ROW_OFFSET(_y) ((uint16_t)gRowOffsetLut[(uint16_t)(_y)])
|
||||
#else
|
||||
#define SURFACE_ROW_OFFSET(_y) ((uint16_t)((uint16_t)(_y) * SURFACE_BYTES_PER_ROW))
|
||||
#endif
|
||||
|
||||
// Allocate and free the library-owned stage (the back-buffer surface
|
||||
// that stagePresent flips to the display). Called from init.c during
|
||||
// joeyInit / joeyShutdown. The stage's pixel storage is supplied by
|
||||
|
|
|
|||
|
|
@ -145,8 +145,8 @@ void tileCopy(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT *src,
|
|||
srcPixelX = (uint16_t)((uint16_t)srcBx * TILE_PIXELS_PER_SIDE);
|
||||
srcPixelY = (uint16_t)((uint16_t)srcBy * TILE_PIXELS_PER_SIDE);
|
||||
|
||||
dstRow0 = &dst->pixels[dstPixelY * SURFACE_BYTES_PER_ROW + (dstPixelX >> 1)];
|
||||
srcRow0 = &src->pixels[srcPixelY * SURFACE_BYTES_PER_ROW + (srcPixelX >> 1)];
|
||||
dstRow0 = &dst->pixels[SURFACE_ROW_OFFSET(dstPixelY) + (dstPixelX >> 1)];
|
||||
srcRow0 = &src->pixels[SURFACE_ROW_OFFSET(srcPixelY) + (srcPixelX >> 1)];
|
||||
|
||||
if (!halFastTileCopy(dstRow0, srcRow0)) {
|
||||
copyTileOpaque(dstRow0, srcRow0);
|
||||
|
|
@ -176,8 +176,8 @@ void tileCopyMasked(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT
|
|||
srcPixelX = (uint16_t)((uint16_t)srcBx * TILE_PIXELS_PER_SIDE);
|
||||
srcPixelY = (uint16_t)((uint16_t)srcBy * TILE_PIXELS_PER_SIDE);
|
||||
|
||||
dstRow0 = &dst->pixels[dstPixelY * SURFACE_BYTES_PER_ROW + (dstPixelX >> 1)];
|
||||
srcRow0 = &src->pixels[srcPixelY * SURFACE_BYTES_PER_ROW + (srcPixelX >> 1)];
|
||||
dstRow0 = &dst->pixels[SURFACE_ROW_OFFSET(dstPixelY) + (dstPixelX >> 1)];
|
||||
srcRow0 = &src->pixels[SURFACE_ROW_OFFSET(srcPixelY) + (srcPixelX >> 1)];
|
||||
|
||||
if (!halFastTileCopyMasked(dstRow0, srcRow0, transparentIndex)) {
|
||||
copyTileMasked(dstRow0, srcRow0, transparentIndex);
|
||||
|
|
@ -203,7 +203,7 @@ void tileFill(SurfaceT *s, uint8_t bx, uint8_t by, uint8_t colorIndex) {
|
|||
doubled = (uint8_t)(((colorIndex & 0x0F) << 4) | (colorIndex & 0x0F));
|
||||
if (!halFastTileFill(s, bx, by,
|
||||
(uint16_t)((uint16_t)doubled | ((uint16_t)doubled << 8)))) {
|
||||
uint8_t *row = &s->pixels[pixelY * SURFACE_BYTES_PER_ROW + (pixelX >> 1)];
|
||||
uint8_t *row = &s->pixels[SURFACE_ROW_OFFSET(pixelY) + (pixelX >> 1)];
|
||||
uint8_t i;
|
||||
for (i = 0; i < TILE_PIXELS_PER_SIDE; i++) {
|
||||
row[0] = doubled;
|
||||
|
|
@ -233,7 +233,7 @@ void tilePaste(SurfaceT *dst, uint8_t bx, uint8_t by, const TileT *in) {
|
|||
}
|
||||
pixelX = (uint16_t)((uint16_t)bx * TILE_PIXELS_PER_SIDE);
|
||||
pixelY = (uint16_t)((uint16_t)by * TILE_PIXELS_PER_SIDE);
|
||||
dstRow = &dst->pixels[pixelY * SURFACE_BYTES_PER_ROW + (pixelX >> 1)];
|
||||
dstRow = &dst->pixels[SURFACE_ROW_OFFSET(pixelY) + (pixelX >> 1)];
|
||||
src = &in->pixels[0];
|
||||
if (!halFastTilePaste(dstRow, src)) {
|
||||
for (row = 0; row < TILE_PIXELS_PER_SIDE; row++) {
|
||||
|
|
@ -265,7 +265,7 @@ void tileSnap(const SurfaceT *src, uint8_t bx, uint8_t by, TileT *out) {
|
|||
}
|
||||
pixelX = (uint16_t)((uint16_t)bx * TILE_PIXELS_PER_SIDE);
|
||||
pixelY = (uint16_t)((uint16_t)by * TILE_PIXELS_PER_SIDE);
|
||||
srcRow = &src->pixels[pixelY * SURFACE_BYTES_PER_ROW + (pixelX >> 1)];
|
||||
srcRow = &src->pixels[SURFACE_ROW_OFFSET(pixelY) + (pixelX >> 1)];
|
||||
dst = &out->pixels[0];
|
||||
if (!halFastTileSnap(dst, srcRow)) {
|
||||
for (row = 0; row < TILE_PIXELS_PER_SIDE; row++) {
|
||||
|
|
|
|||
|
|
@ -253,7 +253,7 @@ void halPresentRect(const SurfaceT *src, int16_t x, int16_t y, uint16_t w, uint1
|
|||
// Asm per-row MVN blit. Stage pixels live at $01:2000; SHR display
|
||||
// at $E1:2000 (same offset within their banks). srcOffset is the
|
||||
// byte offset of the first byte to copy on the first row.
|
||||
srcOffset = (uint16_t)(0x2000 + y * SURFACE_BYTES_PER_ROW + byteStart);
|
||||
srcOffset = (uint16_t)(0x2000 + SURFACE_ROW_OFFSET(y) + byteStart);
|
||||
iigsBlitRectStageToShr(srcOffset, copyBytes, h);
|
||||
}
|
||||
|
||||
|
|
@ -278,23 +278,10 @@ void halShutdown(void) {
|
|||
// partial-byte (nibble-edge) handling is too gnarly for a macro.
|
||||
|
||||
|
||||
// halFastFillRect: thin wrapper around iigsFillRectInner. The asm
|
||||
// helper now handles the partial-byte (nibble-edge) logic that used
|
||||
// to live here, so this function is just a stage-check + forward.
|
||||
// (It's not macro-dispatched like the others because removing it
|
||||
// from the C side triggers an unrelated ORCA-linker bank-placement
|
||||
// failure -- the binary needs enough mass in _ROOT to keep sprite
|
||||
// codegen's static symbols at addresses the linker can resolve.)
|
||||
bool halFastFillRect(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t colorIndex) {
|
||||
if (s == NULL || s != stageGet()) {
|
||||
return false;
|
||||
}
|
||||
iigsFillRectInner(s->pixels,
|
||||
(uint16_t)x, (uint16_t)y,
|
||||
(uint16_t)w, (uint16_t)h,
|
||||
(uint16_t)(colorIndex & 0x0F));
|
||||
return true;
|
||||
}
|
||||
// halFastFillRect: macro-dispatched in core/hal.h, same as the other
|
||||
// halFast* primitives. The C wrapper that used to live here was kept
|
||||
// as load-bearing _ROOT mass to defeat ORCA-Linker bank fragility;
|
||||
// since the CORESYS migration drained _ROOT, the macro form is safe.
|
||||
|
||||
|
||||
uint8_t *halStageAllocPixels(void) {
|
||||
|
|
|
|||
|
|
@ -37,6 +37,9 @@
|
|||
#include "inputInternal.h"
|
||||
#include "joey/surface.h"
|
||||
|
||||
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||
JOEYLIB_SEGMENT("CORESYS")
|
||||
|
||||
// ----- Hardware registers -----
|
||||
|
||||
#define IIGS_KBD ((volatile uint8_t *)0x00C000L)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue