More speed!
This commit is contained in:
parent
04a9550421
commit
20cbccaca5
20 changed files with 130 additions and 71 deletions
13
make/iigs.mk
13
make/iigs.mk
|
|
@ -49,7 +49,18 @@ NTP_BIN := $(BUILD)/audio/ntpplayer.bin
|
||||||
NTP_ASM := $(BUILD)/audio/ntpdata.asm
|
NTP_ASM := $(BUILD)/audio/ntpdata.asm
|
||||||
IIGS_MERLIN := $(REPO_DIR)/toolchains/iigs/merlin32/bin/merlin32
|
IIGS_MERLIN := $(REPO_DIR)/toolchains/iigs/merlin32/bin/merlin32
|
||||||
|
|
||||||
LIB_SRCS := $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS) $(PORT_ASM_SRCS_ALL) $(NTP_ASM) $(CODEGEN_SRCS)
|
# IMPORTANT: CODEGEN_SRCS (specifically spriteEmitIigs.c) MUST be the
|
||||||
|
# first entry after the main object in the link order. ORCA-Linker's
|
||||||
|
# bank assignment is order-sensitive: when spriteEmitIigs.c lands at
|
||||||
|
# any later position, the linker assigns SPRITECG to a bank where its
|
||||||
|
# intra-OMF-segment static-symbol relocations (emitMvnCopyRoutine,
|
||||||
|
# shiftedByteAt, writeLE16) can't be encoded -- you get cryptic
|
||||||
|
# "Addressing error" / "Unresolved reference Label: ..." failures
|
||||||
|
# whose root cause is bank packing, not source. Putting CODEGEN_SRCS
|
||||||
|
# first gives SPRITECG prime placement and the relocations resolve.
|
||||||
|
# This was the underlying cause of feedback_orca_link_segment_count
|
||||||
|
# cases 2-5 (we'd been working around it by managing _ROOT mass).
|
||||||
|
LIB_SRCS := $(CODEGEN_SRCS) $(CORE_C_SRCS_IIGS) $(PORT_C_SRCS) $(PORT_ASM_SRCS_ALL) $(NTP_ASM)
|
||||||
|
|
||||||
HELLO_SRC := $(EXAMPLES)/hello/hello.c
|
HELLO_SRC := $(EXAMPLES)/hello/hello.c
|
||||||
HELLO_BIN := $(BINDIR)/HELLO
|
HELLO_BIN := $(BINDIR)/HELLO
|
||||||
|
|
|
||||||
|
|
@ -158,10 +158,9 @@ bool spriteCompile(SpriteT *sp) {
|
||||||
|
|
||||||
#if defined(JOEYLIB_PLATFORM_IIGS)
|
#if defined(JOEYLIB_PLATFORM_IIGS)
|
||||||
|
|
||||||
// y*160 lookup. gRowOffsetLut is the 200-entry uint16_t table built
|
// SURFACE_ROW_OFFSET dispatches to the gRowOffsetLut lookup on IIgs;
|
||||||
// once by iigsInitRowLut at halInit. Replaces ORCA-C's runtime
|
// declared in surfaceInternal.h. Replaces ORCA-C's __mul16 JSL with a
|
||||||
// multiply (a JSL into __mul16) with a single indexed long-mode read.
|
// single indexed long-mode read.
|
||||||
extern const uint16_t gRowOffsetLut[200];
|
|
||||||
|
|
||||||
// IIgs uses inline asm + a self-modifying call stub instead of a C
|
// IIgs uses inline asm + a self-modifying call stub instead of a C
|
||||||
// function-pointer cast. The build uses ORCA-C large memory model
|
// function-pointer cast. The build uses ORCA-C large memory model
|
||||||
|
|
@ -212,7 +211,7 @@ void spriteCompiledDraw(SurfaceT *dst, const SpriteT *sp, int16_t x, int16_t y)
|
||||||
uint8_t *destPtr;
|
uint8_t *destPtr;
|
||||||
uint8_t destBytes[4];
|
uint8_t destBytes[4];
|
||||||
shift = (uint8_t)(x & 1);
|
shift = (uint8_t)(x & 1);
|
||||||
destPtr = &dst->pixels[gRowOffsetLut[(uint16_t)y] + ((uint16_t)x >> 1)];
|
destPtr = &dst->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)x >> 1)];
|
||||||
memcpy(destBytes, &destPtr, 4);
|
memcpy(destBytes, &destPtr, 4);
|
||||||
destAddr = (uint32_t)destBytes[0]
|
destAddr = (uint32_t)destBytes[0]
|
||||||
| ((uint32_t)destBytes[1] << 8)
|
| ((uint32_t)destBytes[1] << 8)
|
||||||
|
|
@ -366,7 +365,7 @@ void spriteCompiledSaveUnder(const SurfaceT *src, SpriteT *sp, int16_t x, int16_
|
||||||
heightPx = (uint16_t)(sp->heightTiles * 8);
|
heightPx = (uint16_t)(sp->heightTiles * 8);
|
||||||
copyBytes = (uint16_t)((widthPx >> 1) + (shift == 1 ? 1 : 0));
|
copyBytes = (uint16_t)((widthPx >> 1) + (shift == 1 ? 1 : 0));
|
||||||
|
|
||||||
screenPtr = (uint8_t *)&src->pixels[gRowOffsetLut[(uint16_t)y] + ((uint16_t)clippedX >> 1)];
|
screenPtr = (uint8_t *)&src->pixels[SURFACE_ROW_OFFSET(y) + ((uint16_t)clippedX >> 1)];
|
||||||
splitPointer(screenPtr, &screenLo, &screenBank);
|
splitPointer(screenPtr, &screenLo, &screenBank);
|
||||||
splitPointer(backup->bytes, &backupLo, &backupBank);
|
splitPointer(backup->bytes, &backupLo, &backupBank);
|
||||||
|
|
||||||
|
|
@ -450,7 +449,7 @@ void spriteCompiledRestoreUnder(SurfaceT *dst, const SpriteBackupT *backup) {
|
||||||
spriteBytesPerRow = (uint16_t)(sp->widthTiles * 4);
|
spriteBytesPerRow = (uint16_t)(sp->widthTiles * 4);
|
||||||
shift = (copyBytes == spriteBytesPerRow) ? 0 : 1;
|
shift = (copyBytes == spriteBytesPerRow) ? 0 : 1;
|
||||||
|
|
||||||
screenPtr = (uint8_t *)&dst->pixels[gRowOffsetLut[(uint16_t)backup->y] + ((uint16_t)backup->x >> 1)];
|
screenPtr = (uint8_t *)&dst->pixels[SURFACE_ROW_OFFSET(backup->y) + ((uint16_t)backup->x >> 1)];
|
||||||
splitPointer(screenPtr, &screenLo, &screenBank);
|
splitPointer(screenPtr, &screenLo, &screenBank);
|
||||||
splitPointer(backup->bytes, &backupLo, &backupBank);
|
splitPointer(backup->bytes, &backupLo, &backupBank);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -57,7 +57,6 @@ JOEYLIB_SEGMENT("SPRITECG")
|
||||||
static uint16_t emitMvnCopyRoutine(uint8_t *out, uint16_t heightPx, uint16_t copyBytes, bool advanceX);
|
static uint16_t emitMvnCopyRoutine(uint8_t *out, uint16_t heightPx, uint16_t copyBytes, bool advanceX);
|
||||||
static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask);
|
static void shiftedByteAt(const SpriteT *sp, uint16_t row, uint16_t col, uint8_t shift, uint16_t spriteBytesPerRow, uint8_t *outValue, uint8_t *outOpaqueMask);
|
||||||
static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col);
|
static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col);
|
||||||
static uint16_t writeLE16(uint8_t *out, uint16_t value);
|
|
||||||
|
|
||||||
|
|
||||||
// ----- Emit helpers (alphabetical) -----
|
// ----- Emit helpers (alphabetical) -----
|
||||||
|
|
@ -126,13 +125,9 @@ static uint8_t spriteSourceByte(const SpriteT *sp, uint16_t row, uint16_t col) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// 65816 is little-endian; write low byte first.
|
// writeLE16 was inlined at every call site. Inlining cuts a JSL/RTL
|
||||||
static uint16_t writeLE16(uint8_t *out, uint16_t value) {
|
// per emitted 16-bit immediate (4 instructions per byte * 12 sites)
|
||||||
out[0] = (uint8_t)(value & 0xFFu);
|
// and avoids ORCA-Linker bank-fragility around tiny-helper resolution.
|
||||||
out[1] = (uint8_t)((value >> 8) & 0xFFu);
|
|
||||||
return 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Common backbone for save and restore. Both ops copy a byte-aligned
|
// Common backbone for save and restore. Both ops copy a byte-aligned
|
||||||
// rectangle row-by-row using MVN; only the operand banks (which buffer
|
// rectangle row-by-row using MVN; only the operand banks (which buffer
|
||||||
|
|
@ -178,11 +173,13 @@ static uint16_t emitMvnCopyRoutine(uint8_t *out, uint16_t heightPx, uint16_t cop
|
||||||
out[cursor++] = advanceX ? 0x8A : 0x98; // TXA / TYA
|
out[cursor++] = advanceX ? 0x8A : 0x98; // TXA / TYA
|
||||||
out[cursor++] = 0x18; // CLC
|
out[cursor++] = 0x18; // CLC
|
||||||
out[cursor++] = 0x69; // ADC #imm (M=16)
|
out[cursor++] = 0x69; // ADC #imm (M=16)
|
||||||
cursor += writeLE16(out + cursor, advance);
|
out[cursor++] = (uint8_t)(advance & 0xFFu);
|
||||||
|
out[cursor++] = (uint8_t)((advance >> 8) & 0xFFu);
|
||||||
out[cursor++] = advanceX ? 0xAA : 0xA8; // TAX / TAY
|
out[cursor++] = advanceX ? 0xAA : 0xA8; // TAX / TAY
|
||||||
}
|
}
|
||||||
out[cursor++] = 0xA9; // LDA #imm (M=16)
|
out[cursor++] = 0xA9; // LDA #imm (M=16)
|
||||||
cursor += writeLE16(out + cursor, (uint16_t)(copyBytes - 1));
|
out[cursor++] = (uint8_t)((copyBytes - 1) & 0xFFu);
|
||||||
|
out[cursor++] = (uint8_t)(((copyBytes - 1) >> 8) & 0xFFu);
|
||||||
out[cursor++] = 0x54; // MVN
|
out[cursor++] = 0x54; // MVN
|
||||||
out[cursor++] = 0x00; // dstbk -- patched per call
|
out[cursor++] = 0x00; // dstbk -- patched per call
|
||||||
out[cursor++] = 0x00; // srcbk -- patched per call
|
out[cursor++] = 0x00; // srcbk -- patched per call
|
||||||
|
|
@ -299,11 +296,15 @@ uint16_t spriteEmitDrawIigs(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
out[cursor++] = 0x20;
|
out[cursor++] = 0x20;
|
||||||
wide = true;
|
wide = true;
|
||||||
}
|
}
|
||||||
|
{
|
||||||
|
uint16_t pair = (uint16_t)(((uint16_t)nextValue << 8) | value);
|
||||||
out[cursor++] = 0xA9; // LDA #imm16
|
out[cursor++] = 0xA9; // LDA #imm16
|
||||||
cursor += writeLE16(out + cursor,
|
out[cursor++] = (uint8_t)(pair & 0xFFu);
|
||||||
(uint16_t)(((uint16_t)nextValue << 8) | value));
|
out[cursor++] = (uint8_t)((pair >> 8) & 0xFFu);
|
||||||
out[cursor++] = 0x99; // STA abs,Y
|
out[cursor++] = 0x99; // STA abs,Y
|
||||||
cursor += writeLE16(out + cursor, absOffset);
|
out[cursor++] = (uint8_t)(absOffset & 0xFFu);
|
||||||
|
out[cursor++] = (uint8_t)((absOffset >> 8) & 0xFFu);
|
||||||
|
}
|
||||||
col++; // consumed col+1
|
col++; // consumed col+1
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
@ -321,16 +322,19 @@ uint16_t spriteEmitDrawIigs(uint8_t *out, const SpriteT *sp, uint8_t shift) {
|
||||||
out[cursor++] = 0xA9;
|
out[cursor++] = 0xA9;
|
||||||
out[cursor++] = value;
|
out[cursor++] = value;
|
||||||
out[cursor++] = 0x99;
|
out[cursor++] = 0x99;
|
||||||
cursor += writeLE16(out + cursor, absOffset);
|
out[cursor++] = (uint8_t)(absOffset & 0xFFu);
|
||||||
|
out[cursor++] = (uint8_t)((absOffset >> 8) & 0xFFu);
|
||||||
} else {
|
} else {
|
||||||
out[cursor++] = 0xB9;
|
out[cursor++] = 0xB9;
|
||||||
cursor += writeLE16(out + cursor, absOffset);
|
out[cursor++] = (uint8_t)(absOffset & 0xFFu);
|
||||||
|
out[cursor++] = (uint8_t)((absOffset >> 8) & 0xFFu);
|
||||||
out[cursor++] = 0x29;
|
out[cursor++] = 0x29;
|
||||||
out[cursor++] = (uint8_t)(~opaqueMask & 0xFFu);
|
out[cursor++] = (uint8_t)(~opaqueMask & 0xFFu);
|
||||||
out[cursor++] = 0x09;
|
out[cursor++] = 0x09;
|
||||||
out[cursor++] = value;
|
out[cursor++] = value;
|
||||||
out[cursor++] = 0x99;
|
out[cursor++] = 0x99;
|
||||||
cursor += writeLE16(out + cursor, absOffset);
|
out[cursor++] = (uint8_t)(absOffset & 0xFFu);
|
||||||
|
out[cursor++] = (uint8_t)((absOffset >> 8) & 0xFFu);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,9 @@
|
||||||
#include "joey/asset.h"
|
#include "joey/asset.h"
|
||||||
#include "joey/palette.h"
|
#include "joey/palette.h"
|
||||||
|
|
||||||
|
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||||
|
JOEYLIB_SEGMENT("CORESYS")
|
||||||
|
|
||||||
#define JAS_HEADER_SIZE 44
|
#define JAS_HEADER_SIZE 44
|
||||||
#define JAS_PIXELS_OFFSET JAS_HEADER_SIZE
|
#define JAS_PIXELS_OFFSET JAS_HEADER_SIZE
|
||||||
#define JAS_PALETTE_OFFSET 12
|
#define JAS_PALETTE_OFFSET 12
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,9 @@
|
||||||
#include "joey/audio.h"
|
#include "joey/audio.h"
|
||||||
#include "hal.h"
|
#include "hal.h"
|
||||||
|
|
||||||
|
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||||
|
JOEYLIB_SEGMENT("CORESYS")
|
||||||
|
|
||||||
static bool gAudioReady = false;
|
static bool gAudioReady = false;
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,9 @@
|
||||||
|
|
||||||
#include "codegenArenaInternal.h"
|
#include "codegenArenaInternal.h"
|
||||||
|
|
||||||
|
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||||
|
JOEYLIB_SEGMENT("CORESYS")
|
||||||
|
|
||||||
|
|
||||||
// ----- Module state -----
|
// ----- Module state -----
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,8 +10,12 @@
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <stdarg.h>
|
#include <stdarg.h>
|
||||||
|
|
||||||
|
#include "joey/platform.h"
|
||||||
#include "joey/debug.h"
|
#include "joey/debug.h"
|
||||||
|
|
||||||
|
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||||
|
JOEYLIB_SEGMENT("CORESYS")
|
||||||
|
|
||||||
static const char *kLogPath = "joeylog.txt";
|
static const char *kLogPath = "joeylog.txt";
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -113,7 +113,7 @@ static void fillRectClipped(SurfaceT *s, int16_t x, int16_t y, int16_t w, int16_
|
||||||
uint8_t *line;
|
uint8_t *line;
|
||||||
|
|
||||||
for (row = 0; row < h; row++) {
|
for (row = 0; row < h; row++) {
|
||||||
line = &s->pixels[(y + row) * SURFACE_BYTES_PER_ROW];
|
line = &s->pixels[SURFACE_ROW_OFFSET(y + row)];
|
||||||
pxStart = x;
|
pxStart = x;
|
||||||
pxEnd = x + w;
|
pxEnd = x + w;
|
||||||
|
|
||||||
|
|
@ -208,7 +208,7 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
||||||
|
|
||||||
// Fallback path needs row; compute it here so the asm path
|
// Fallback path needs row; compute it here so the asm path
|
||||||
// above doesn't pay for an unused y*160 multiply on every iter.
|
// above doesn't pay for an unused y*160 multiply on every iter.
|
||||||
row = &s->pixels[y * SURFACE_BYTES_PER_ROW];
|
row = &s->pixels[SURFACE_ROW_OFFSET(y)];
|
||||||
|
|
||||||
// Tier-2 asm fast path: combined seed test + walk-left +
|
// Tier-2 asm fast path: combined seed test + walk-left +
|
||||||
// walk-right in one cross-segment call. Falls back to the
|
// walk-right in one cross-segment call. Falls back to the
|
||||||
|
|
@ -294,7 +294,7 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
||||||
}
|
}
|
||||||
scanY = (int16_t)(y + 1);
|
scanY = (int16_t)(y + 1);
|
||||||
}
|
}
|
||||||
scanRow = &s->pixels[scanY * SURFACE_BYTES_PER_ROW];
|
scanRow = &s->pixels[SURFACE_ROW_OFFSET(scanY)];
|
||||||
// Prefer the combined scan+push asm path (one call per
|
// Prefer the combined scan+push asm path (one call per
|
||||||
// scan, no markBuf and no per-pixel C edge walk).
|
// scan, no markBuf and no per-pixel C edge walk).
|
||||||
if (!halFastFloodScanAndPush(scanRow, leftX, rightX,
|
if (!halFastFloodScanAndPush(scanRow, leftX, rightX,
|
||||||
|
|
@ -502,7 +502,7 @@ void drawPixel(SurfaceT *s, int16_t x, int16_t y, uint8_t colorIndex) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!halFastDrawPixel(s, (uint16_t)x, (uint16_t)y, colorIndex)) {
|
if (!halFastDrawPixel(s, (uint16_t)x, (uint16_t)y, colorIndex)) {
|
||||||
byte = &s->pixels[y * SURFACE_BYTES_PER_ROW + (x >> 1)];
|
byte = &s->pixels[SURFACE_ROW_OFFSET(y) + (x >> 1)];
|
||||||
nibble = colorIndex & 0x0F;
|
nibble = colorIndex & 0x0F;
|
||||||
if (x & 1) {
|
if (x & 1) {
|
||||||
*byte = (uint8_t)((*byte & 0xF0) | nibble);
|
*byte = (uint8_t)((*byte & 0xF0) | nibble);
|
||||||
|
|
@ -625,7 +625,7 @@ void floodFill(SurfaceT *s, int16_t x, int16_t y, uint8_t newColor) {
|
||||||
if (x < 0 || x >= SURFACE_WIDTH || y < 0 || y >= SURFACE_HEIGHT) {
|
if (x < 0 || x >= SURFACE_WIDTH || y < 0 || y >= SURFACE_HEIGHT) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
row = &s->pixels[y * SURFACE_BYTES_PER_ROW];
|
row = &s->pixels[SURFACE_ROW_OFFSET(y)];
|
||||||
seedColor = srcPixel(row, x);
|
seedColor = srcPixel(row, x);
|
||||||
if ((seedColor & 0x0F) == (newColor & 0x0F)) {
|
if ((seedColor & 0x0F) == (newColor & 0x0F)) {
|
||||||
return;
|
return;
|
||||||
|
|
@ -644,7 +644,7 @@ void floodFillBounded(SurfaceT *s, int16_t x, int16_t y, uint8_t newColor, uint8
|
||||||
if (x < 0 || x >= SURFACE_WIDTH || y < 0 || y >= SURFACE_HEIGHT) {
|
if (x < 0 || x >= SURFACE_WIDTH || y < 0 || y >= SURFACE_HEIGHT) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
row = &s->pixels[y * SURFACE_BYTES_PER_ROW];
|
row = &s->pixels[SURFACE_ROW_OFFSET(y)];
|
||||||
pix = srcPixel(row, x);
|
pix = srcPixel(row, x);
|
||||||
// Starting on a boundary pixel or already-filled pixel: nothing
|
// Starting on a boundary pixel or already-filled pixel: nothing
|
||||||
// to do.
|
// to do.
|
||||||
|
|
@ -668,7 +668,7 @@ uint8_t samplePixel(const SurfaceT *s, int16_t x, int16_t y) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
byte = s->pixels[y * SURFACE_BYTES_PER_ROW + (x >> 1)];
|
byte = s->pixels[SURFACE_ROW_OFFSET(y) + (x >> 1)];
|
||||||
if (x & 1) {
|
if (x & 1) {
|
||||||
return (uint8_t)(byte & 0x0F);
|
return (uint8_t)(byte & 0x0F);
|
||||||
}
|
}
|
||||||
|
|
@ -698,12 +698,12 @@ void surfaceBlit(SurfaceT *dst, const JoeyAssetT *src, int16_t x, int16_t y) {
|
||||||
|
|
||||||
srcRowBytes = (int16_t)((src->width + 1) >> 1);
|
srcRowBytes = (int16_t)((src->width + 1) >> 1);
|
||||||
srcRow = &src->pixels[srcY0 * srcRowBytes];
|
srcRow = &src->pixels[srcY0 * srcRowBytes];
|
||||||
dstRow = &dst->pixels[y * SURFACE_BYTES_PER_ROW];
|
dstRow = &dst->pixels[SURFACE_ROW_OFFSET(y)];
|
||||||
if (!halFastBlitRect(dstRow, x, srcRow, srcX0,
|
if (!halFastBlitRect(dstRow, x, srcRow, srcX0,
|
||||||
copyW, copyH, srcRowBytes, 0xFFFFu)) {
|
copyW, copyH, srcRowBytes, 0xFFFFu)) {
|
||||||
for (row = 0; row < copyH; row++) {
|
for (row = 0; row < copyH; row++) {
|
||||||
srcRow = &src->pixels[(srcY0 + row) * srcRowBytes];
|
srcRow = &src->pixels[(srcY0 + row) * srcRowBytes];
|
||||||
dstRow = &dst->pixels[(y + row) * SURFACE_BYTES_PER_ROW];
|
dstRow = &dst->pixels[SURFACE_ROW_OFFSET(y + row)];
|
||||||
for (col = 0; col < copyW; col++) {
|
for (col = 0; col < copyW; col++) {
|
||||||
nibble = srcPixel(srcRow, srcX0 + col);
|
nibble = srcPixel(srcRow, srcX0 + col);
|
||||||
dstPixel(dstRow, x + col, nibble);
|
dstPixel(dstRow, x + col, nibble);
|
||||||
|
|
@ -738,12 +738,12 @@ void surfaceBlitMasked(SurfaceT *dst, const JoeyAssetT *src, int16_t x, int16_t
|
||||||
transparent = (uint8_t)(transparentIndex & 0x0F);
|
transparent = (uint8_t)(transparentIndex & 0x0F);
|
||||||
srcRowBytes = (int16_t)((src->width + 1) >> 1);
|
srcRowBytes = (int16_t)((src->width + 1) >> 1);
|
||||||
srcRow = &src->pixels[srcY0 * srcRowBytes];
|
srcRow = &src->pixels[srcY0 * srcRowBytes];
|
||||||
dstRow = &dst->pixels[y * SURFACE_BYTES_PER_ROW];
|
dstRow = &dst->pixels[SURFACE_ROW_OFFSET(y)];
|
||||||
if (!halFastBlitRect(dstRow, x, srcRow, srcX0,
|
if (!halFastBlitRect(dstRow, x, srcRow, srcX0,
|
||||||
copyW, copyH, srcRowBytes, (uint16_t)transparent)) {
|
copyW, copyH, srcRowBytes, (uint16_t)transparent)) {
|
||||||
for (row = 0; row < copyH; row++) {
|
for (row = 0; row < copyH; row++) {
|
||||||
srcRow = &src->pixels[(srcY0 + row) * srcRowBytes];
|
srcRow = &src->pixels[(srcY0 + row) * srcRowBytes];
|
||||||
dstRow = &dst->pixels[(y + row) * SURFACE_BYTES_PER_ROW];
|
dstRow = &dst->pixels[SURFACE_ROW_OFFSET(y + row)];
|
||||||
for (col = 0; col < copyW; col++) {
|
for (col = 0; col < copyW; col++) {
|
||||||
nibble = srcPixel(srcRow, srcX0 + col);
|
nibble = srcPixel(srcRow, srcX0 + col);
|
||||||
if (nibble == transparent) {
|
if (nibble == transparent) {
|
||||||
|
|
|
||||||
|
|
@ -262,15 +262,19 @@ extern uint16_t gFloodRightX;
|
||||||
true) \
|
true) \
|
||||||
: false)
|
: false)
|
||||||
|
|
||||||
// halFastFillRect stays as a real C wrapper -- removing it triggered
|
// halFastFillRect: macro form, same shape as the others. Builds
|
||||||
// an unrelated ORCA linker bank-placement failure (same mode as the
|
// clean now that _ROOT has been thinned out via the CORESYS load
|
||||||
// peislam.asm deletion: `Unresolved reference Label:
|
// segment migration -- previous attempts shrank _ROOT enough to
|
||||||
// emitMvnCopyRoutine` in sprite codegen). The wrapper now just
|
// retrip the bank-packing fragility, but with most core .c files
|
||||||
// forwards to iigsFillRectInner (asm does partial+middle); we lose
|
// out of _ROOT that's no longer reactive. Saves ~80 cyc/call.
|
||||||
// the call-site macro inlining for fillRect specifically but keep
|
#undef halFastFillRect
|
||||||
// the rest of the macros AND the new asm helper. Per-call wrapper
|
#define halFastFillRect(_s, _x, _y, _w, _h, _c) \
|
||||||
// overhead for halFastFillRect is back (~80 cyc) but at least the
|
((_s) == stageGet() \
|
||||||
// per-row partial-byte logic happens in asm now.
|
? (iigsFillRectInner((_s)->pixels, (uint16_t)(_x), (uint16_t)(_y), \
|
||||||
|
(uint16_t)(_w), (uint16_t)(_h), \
|
||||||
|
(uint16_t)((_c) & 0x0F)), \
|
||||||
|
true) \
|
||||||
|
: false)
|
||||||
|
|
||||||
// Tile primitives operate on caller-computed row pointers; just
|
// Tile primitives operate on caller-computed row pointers; just
|
||||||
// forward the args. by/bx are tile coords -> bx*4 + by*8*160 byte
|
// forward the args. by/bx are tile coords -> bx*4 + by*8*160 byte
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,9 @@
|
||||||
#include "hal.h"
|
#include "hal.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
|
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||||
|
JOEYLIB_SEGMENT("CORESYS")
|
||||||
|
|
||||||
// 8 KB fits the largest typical sprite working set (~3-4 KB per
|
// 8 KB fits the largest typical sprite working set (~3-4 KB per
|
||||||
// 32x32 sprite at all opaque) and keeps malloc requests small enough
|
// 32x32 sprite at all opaque) and keeps malloc requests small enough
|
||||||
// for IIgs ORCA-C's small-memory-model heap to satisfy them.
|
// for IIgs ORCA-C's small-memory-model heap to satisfy them.
|
||||||
|
|
|
||||||
|
|
@ -15,6 +15,9 @@
|
||||||
#include "hal.h"
|
#include "hal.h"
|
||||||
#include "inputInternal.h"
|
#include "inputInternal.h"
|
||||||
|
|
||||||
|
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||||
|
JOEYLIB_SEGMENT("CORESYS")
|
||||||
|
|
||||||
bool gKeyState [KEY_COUNT];
|
bool gKeyState [KEY_COUNT];
|
||||||
bool gKeyPrev [KEY_COUNT];
|
bool gKeyPrev [KEY_COUNT];
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,9 @@
|
||||||
#include "joey/palette.h"
|
#include "joey/palette.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
|
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||||
|
JOEYLIB_SEGMENT("CORESYS")
|
||||||
|
|
||||||
// ----- Public API (alphabetical) -----
|
// ----- Public API (alphabetical) -----
|
||||||
|
|
||||||
void paletteGet(const SurfaceT *s, uint8_t paletteIndex, uint16_t *out16) {
|
void paletteGet(const SurfaceT *s, uint8_t paletteIndex, uint16_t *out16) {
|
||||||
|
|
|
||||||
|
|
@ -12,6 +12,9 @@
|
||||||
#include "hal.h"
|
#include "hal.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
|
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||||
|
JOEYLIB_SEGMENT("CORESYS")
|
||||||
|
|
||||||
// ----- Public API (alphabetical) -----
|
// ----- Public API (alphabetical) -----
|
||||||
|
|
||||||
void stagePresent(void) {
|
void stagePresent(void) {
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,9 @@
|
||||||
#include "joey/palette.h"
|
#include "joey/palette.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
|
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||||
|
JOEYLIB_SEGMENT("CORESYS")
|
||||||
|
|
||||||
// ----- Public API (alphabetical) -----
|
// ----- Public API (alphabetical) -----
|
||||||
|
|
||||||
uint8_t scbGet(const SurfaceT *s, uint16_t line) {
|
uint8_t scbGet(const SurfaceT *s, uint16_t line) {
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,9 @@
|
||||||
#include "spriteInternal.h"
|
#include "spriteInternal.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
|
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||||
|
JOEYLIB_SEGMENT("CORESYS")
|
||||||
|
|
||||||
// 8x8 tiles, 4bpp packed = 4 bytes/row * 8 rows = 32 bytes/tile.
|
// 8x8 tiles, 4bpp packed = 4 bytes/row * 8 rows = 32 bytes/tile.
|
||||||
#define TILE_BYTES 32
|
#define TILE_BYTES 32
|
||||||
#define TILE_PIXELS 8
|
#define TILE_PIXELS 8
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,13 @@
|
||||||
#include "hal.h"
|
#include "hal.h"
|
||||||
#include "surfaceInternal.h"
|
#include "surfaceInternal.h"
|
||||||
|
|
||||||
|
// Hoist into a CORESYS load segment alongside the other small core
|
||||||
|
// files. Keeps _ROOT thin and stable so it stops reacting to per-file
|
||||||
|
// source changes -- _ROOT size flux was tripping ORCA-Linker bank
|
||||||
|
// packing in spriteEmitIigs.c (see feedback_orca_link_segment_count
|
||||||
|
// cases 2-4).
|
||||||
|
JOEYLIB_SEGMENT("CORESYS")
|
||||||
|
|
||||||
#ifdef JOEYLIB_PLATFORM_IIGS
|
#ifdef JOEYLIB_PLATFORM_IIGS
|
||||||
extern void iigsMarkDirtyRowsInner(uint16_t yStart, uint16_t yEnd, uint16_t minWord, uint16_t maxWord);
|
extern void iigsMarkDirtyRowsInner(uint16_t yStart, uint16_t yEnd, uint16_t minWord, uint16_t maxWord);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -60,6 +60,19 @@ void surfaceMarkDirtyAll(const SurfaceT *s);
|
||||||
// Reset every row to CLEAN. Called by stagePresent after the slam.
|
// Reset every row to CLEAN. Called by stagePresent after the slam.
|
||||||
void stageDirtyClearAll(void);
|
void stageDirtyClearAll(void);
|
||||||
|
|
||||||
|
// y -> byte offset of row y in a SURFACE_BYTES_PER_ROW-strided buffer.
|
||||||
|
// On IIgs this expands to a single indexed long-mode read against
|
||||||
|
// gRowOffsetLut (built once at halInit). On other ports it's the
|
||||||
|
// straight multiply -- those compilers (gcc, OpenWatcom) optimize the
|
||||||
|
// constant 160 to a shift+add chain that's already cheap. The point
|
||||||
|
// is to dodge ORCA-C's __mul16 JSL on every per-row pointer compute.
|
||||||
|
#ifdef JOEYLIB_PLATFORM_IIGS
|
||||||
|
extern const uint16_t gRowOffsetLut[200];
|
||||||
|
#define SURFACE_ROW_OFFSET(_y) ((uint16_t)gRowOffsetLut[(uint16_t)(_y)])
|
||||||
|
#else
|
||||||
|
#define SURFACE_ROW_OFFSET(_y) ((uint16_t)((uint16_t)(_y) * SURFACE_BYTES_PER_ROW))
|
||||||
|
#endif
|
||||||
|
|
||||||
// Allocate and free the library-owned stage (the back-buffer surface
|
// Allocate and free the library-owned stage (the back-buffer surface
|
||||||
// that stagePresent flips to the display). Called from init.c during
|
// that stagePresent flips to the display). Called from init.c during
|
||||||
// joeyInit / joeyShutdown. The stage's pixel storage is supplied by
|
// joeyInit / joeyShutdown. The stage's pixel storage is supplied by
|
||||||
|
|
|
||||||
|
|
@ -145,8 +145,8 @@ void tileCopy(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT *src,
|
||||||
srcPixelX = (uint16_t)((uint16_t)srcBx * TILE_PIXELS_PER_SIDE);
|
srcPixelX = (uint16_t)((uint16_t)srcBx * TILE_PIXELS_PER_SIDE);
|
||||||
srcPixelY = (uint16_t)((uint16_t)srcBy * TILE_PIXELS_PER_SIDE);
|
srcPixelY = (uint16_t)((uint16_t)srcBy * TILE_PIXELS_PER_SIDE);
|
||||||
|
|
||||||
dstRow0 = &dst->pixels[dstPixelY * SURFACE_BYTES_PER_ROW + (dstPixelX >> 1)];
|
dstRow0 = &dst->pixels[SURFACE_ROW_OFFSET(dstPixelY) + (dstPixelX >> 1)];
|
||||||
srcRow0 = &src->pixels[srcPixelY * SURFACE_BYTES_PER_ROW + (srcPixelX >> 1)];
|
srcRow0 = &src->pixels[SURFACE_ROW_OFFSET(srcPixelY) + (srcPixelX >> 1)];
|
||||||
|
|
||||||
if (!halFastTileCopy(dstRow0, srcRow0)) {
|
if (!halFastTileCopy(dstRow0, srcRow0)) {
|
||||||
copyTileOpaque(dstRow0, srcRow0);
|
copyTileOpaque(dstRow0, srcRow0);
|
||||||
|
|
@ -176,8 +176,8 @@ void tileCopyMasked(SurfaceT *dst, uint8_t dstBx, uint8_t dstBy, const SurfaceT
|
||||||
srcPixelX = (uint16_t)((uint16_t)srcBx * TILE_PIXELS_PER_SIDE);
|
srcPixelX = (uint16_t)((uint16_t)srcBx * TILE_PIXELS_PER_SIDE);
|
||||||
srcPixelY = (uint16_t)((uint16_t)srcBy * TILE_PIXELS_PER_SIDE);
|
srcPixelY = (uint16_t)((uint16_t)srcBy * TILE_PIXELS_PER_SIDE);
|
||||||
|
|
||||||
dstRow0 = &dst->pixels[dstPixelY * SURFACE_BYTES_PER_ROW + (dstPixelX >> 1)];
|
dstRow0 = &dst->pixels[SURFACE_ROW_OFFSET(dstPixelY) + (dstPixelX >> 1)];
|
||||||
srcRow0 = &src->pixels[srcPixelY * SURFACE_BYTES_PER_ROW + (srcPixelX >> 1)];
|
srcRow0 = &src->pixels[SURFACE_ROW_OFFSET(srcPixelY) + (srcPixelX >> 1)];
|
||||||
|
|
||||||
if (!halFastTileCopyMasked(dstRow0, srcRow0, transparentIndex)) {
|
if (!halFastTileCopyMasked(dstRow0, srcRow0, transparentIndex)) {
|
||||||
copyTileMasked(dstRow0, srcRow0, transparentIndex);
|
copyTileMasked(dstRow0, srcRow0, transparentIndex);
|
||||||
|
|
@ -203,7 +203,7 @@ void tileFill(SurfaceT *s, uint8_t bx, uint8_t by, uint8_t colorIndex) {
|
||||||
doubled = (uint8_t)(((colorIndex & 0x0F) << 4) | (colorIndex & 0x0F));
|
doubled = (uint8_t)(((colorIndex & 0x0F) << 4) | (colorIndex & 0x0F));
|
||||||
if (!halFastTileFill(s, bx, by,
|
if (!halFastTileFill(s, bx, by,
|
||||||
(uint16_t)((uint16_t)doubled | ((uint16_t)doubled << 8)))) {
|
(uint16_t)((uint16_t)doubled | ((uint16_t)doubled << 8)))) {
|
||||||
uint8_t *row = &s->pixels[pixelY * SURFACE_BYTES_PER_ROW + (pixelX >> 1)];
|
uint8_t *row = &s->pixels[SURFACE_ROW_OFFSET(pixelY) + (pixelX >> 1)];
|
||||||
uint8_t i;
|
uint8_t i;
|
||||||
for (i = 0; i < TILE_PIXELS_PER_SIDE; i++) {
|
for (i = 0; i < TILE_PIXELS_PER_SIDE; i++) {
|
||||||
row[0] = doubled;
|
row[0] = doubled;
|
||||||
|
|
@ -233,7 +233,7 @@ void tilePaste(SurfaceT *dst, uint8_t bx, uint8_t by, const TileT *in) {
|
||||||
}
|
}
|
||||||
pixelX = (uint16_t)((uint16_t)bx * TILE_PIXELS_PER_SIDE);
|
pixelX = (uint16_t)((uint16_t)bx * TILE_PIXELS_PER_SIDE);
|
||||||
pixelY = (uint16_t)((uint16_t)by * TILE_PIXELS_PER_SIDE);
|
pixelY = (uint16_t)((uint16_t)by * TILE_PIXELS_PER_SIDE);
|
||||||
dstRow = &dst->pixels[pixelY * SURFACE_BYTES_PER_ROW + (pixelX >> 1)];
|
dstRow = &dst->pixels[SURFACE_ROW_OFFSET(pixelY) + (pixelX >> 1)];
|
||||||
src = &in->pixels[0];
|
src = &in->pixels[0];
|
||||||
if (!halFastTilePaste(dstRow, src)) {
|
if (!halFastTilePaste(dstRow, src)) {
|
||||||
for (row = 0; row < TILE_PIXELS_PER_SIDE; row++) {
|
for (row = 0; row < TILE_PIXELS_PER_SIDE; row++) {
|
||||||
|
|
@ -265,7 +265,7 @@ void tileSnap(const SurfaceT *src, uint8_t bx, uint8_t by, TileT *out) {
|
||||||
}
|
}
|
||||||
pixelX = (uint16_t)((uint16_t)bx * TILE_PIXELS_PER_SIDE);
|
pixelX = (uint16_t)((uint16_t)bx * TILE_PIXELS_PER_SIDE);
|
||||||
pixelY = (uint16_t)((uint16_t)by * TILE_PIXELS_PER_SIDE);
|
pixelY = (uint16_t)((uint16_t)by * TILE_PIXELS_PER_SIDE);
|
||||||
srcRow = &src->pixels[pixelY * SURFACE_BYTES_PER_ROW + (pixelX >> 1)];
|
srcRow = &src->pixels[SURFACE_ROW_OFFSET(pixelY) + (pixelX >> 1)];
|
||||||
dst = &out->pixels[0];
|
dst = &out->pixels[0];
|
||||||
if (!halFastTileSnap(dst, srcRow)) {
|
if (!halFastTileSnap(dst, srcRow)) {
|
||||||
for (row = 0; row < TILE_PIXELS_PER_SIDE; row++) {
|
for (row = 0; row < TILE_PIXELS_PER_SIDE; row++) {
|
||||||
|
|
|
||||||
|
|
@ -253,7 +253,7 @@ void halPresentRect(const SurfaceT *src, int16_t x, int16_t y, uint16_t w, uint1
|
||||||
// Asm per-row MVN blit. Stage pixels live at $01:2000; SHR display
|
// Asm per-row MVN blit. Stage pixels live at $01:2000; SHR display
|
||||||
// at $E1:2000 (same offset within their banks). srcOffset is the
|
// at $E1:2000 (same offset within their banks). srcOffset is the
|
||||||
// byte offset of the first byte to copy on the first row.
|
// byte offset of the first byte to copy on the first row.
|
||||||
srcOffset = (uint16_t)(0x2000 + y * SURFACE_BYTES_PER_ROW + byteStart);
|
srcOffset = (uint16_t)(0x2000 + SURFACE_ROW_OFFSET(y) + byteStart);
|
||||||
iigsBlitRectStageToShr(srcOffset, copyBytes, h);
|
iigsBlitRectStageToShr(srcOffset, copyBytes, h);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -278,23 +278,10 @@ void halShutdown(void) {
|
||||||
// partial-byte (nibble-edge) handling is too gnarly for a macro.
|
// partial-byte (nibble-edge) handling is too gnarly for a macro.
|
||||||
|
|
||||||
|
|
||||||
// halFastFillRect: thin wrapper around iigsFillRectInner. The asm
|
// halFastFillRect: macro-dispatched in core/hal.h, same as the other
|
||||||
// helper now handles the partial-byte (nibble-edge) logic that used
|
// halFast* primitives. The C wrapper that used to live here was kept
|
||||||
// to live here, so this function is just a stage-check + forward.
|
// as load-bearing _ROOT mass to defeat ORCA-Linker bank fragility;
|
||||||
// (It's not macro-dispatched like the others because removing it
|
// since the CORESYS migration drained _ROOT, the macro form is safe.
|
||||||
// from the C side triggers an unrelated ORCA-linker bank-placement
|
|
||||||
// failure -- the binary needs enough mass in _ROOT to keep sprite
|
|
||||||
// codegen's static symbols at addresses the linker can resolve.)
|
|
||||||
bool halFastFillRect(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t colorIndex) {
|
|
||||||
if (s == NULL || s != stageGet()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
iigsFillRectInner(s->pixels,
|
|
||||||
(uint16_t)x, (uint16_t)y,
|
|
||||||
(uint16_t)w, (uint16_t)h,
|
|
||||||
(uint16_t)(colorIndex & 0x0F));
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
uint8_t *halStageAllocPixels(void) {
|
uint8_t *halStageAllocPixels(void) {
|
||||||
|
|
|
||||||
|
|
@ -37,6 +37,9 @@
|
||||||
#include "inputInternal.h"
|
#include "inputInternal.h"
|
||||||
#include "joey/surface.h"
|
#include "joey/surface.h"
|
||||||
|
|
||||||
|
// CORESYS: hoisted out of _ROOT (see surface.c for rationale).
|
||||||
|
JOEYLIB_SEGMENT("CORESYS")
|
||||||
|
|
||||||
// ----- Hardware registers -----
|
// ----- Hardware registers -----
|
||||||
|
|
||||||
#define IIGS_KBD ((volatile uint8_t *)0x00C000L)
|
#define IIGS_KBD ((volatile uint8_t *)0x00C000L)
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue