336 lines
17 KiB
C
336 lines
17 KiB
C
// Internal HAL (hardware abstraction layer) interface.
|
|
//
|
|
// This header is included by src/core/*.c and by per-port source under
|
|
// src/port/<platform>/. It is NOT part of the public API and must not
|
|
// be installed or exposed to game code.
|
|
//
|
|
// Each port must implement every function declared here.
|
|
|
|
#ifndef JOEYLIB_HAL_H
|
|
#define JOEYLIB_HAL_H
|
|
|
|
#include "joey/core.h"
|
|
#include "joey/input.h"
|
|
#include "joey/surface.h"
|
|
|
|
// Per-port one-shot initialization. Called from joeyInit after config
|
|
// has been stored but before any surfaces are created. The port sets up
|
|
// the display mode, allocates any HW-adjacent buffers (chip RAM on
|
|
// Amiga, VGA mode on DOS, SHR on IIgs), and prepares for presents.
|
|
// Returns true on success. On failure, halLastError may be set.
|
|
bool halInit(const JoeyConfigT *config);
|
|
|
|
// Per-port teardown. Restores display mode, frees HW-adjacent buffers.
|
|
void halShutdown(void);
|
|
|
|
// Allocate / release the SURFACE_PIXELS_SIZE-byte pixel buffer that
|
|
// backs the library-owned stage surface. Ports that have a
|
|
// hardware-friendly pin location for the back buffer (IIgs $01/2000
|
|
// with SHR shadow inhibited) return that address here; ports with no
|
|
// such constraint just malloc/free.
|
|
uint8_t *halStageAllocPixels(void);
|
|
void halStageFreePixels(uint8_t *pixels);
|
|
|
|
// Present the entire source surface to the display.
|
|
void halPresent(const SurfaceT *src);
|
|
|
|
// Present a rectangular region of the source surface. The caller has
|
|
// already validated and clipped the rect to be fully inside the
|
|
// surface bounds and to have positive extents.
|
|
void halPresentRect(const SurfaceT *src, int16_t x, int16_t y, uint16_t w, uint16_t h);
|
|
|
|
// Optional: returns a port-specific error message string for the last
|
|
// HAL failure, or NULL if none. Ports may return NULL always.
|
|
const char *halLastError(void);
|
|
|
|
// Input: per-port keyboard setup, polling, and teardown.
|
|
// halInputInit is called at the end of joeyInit; halInputShutdown
|
|
// from joeyShutdown before halShutdown. halInputPoll refreshes the
|
|
// core-owned key-state array (declared in core/inputInternal.h) --
|
|
// the port writes true into gKeyState[key] for held keys. Keys the
|
|
// port does not recognize simply stay zero.
|
|
void halInputInit(void);
|
|
void halInputShutdown(void);
|
|
void halInputPoll(void);
|
|
|
|
// Block until the next display vertical blank. Each port implements
|
|
// this with whatever native wait the hardware provides (VGA $3DA,
|
|
// graphics.library WaitTOF, XBIOS Vsync, $C019 polling).
|
|
void halWaitVBL(void);
|
|
|
|
// Audio: per-port engine setup, module + SFX playback, teardown.
|
|
// halAudioInit returns true if the platform has a working engine.
|
|
// All entry points are safe to call when init failed -- they become
|
|
// no-ops. See joey/audio.h for the public API contract that wraps
|
|
// these.
|
|
bool halAudioInit(void);
|
|
void halAudioShutdown(void);
|
|
void halAudioPlayMod(const uint8_t *data, uint32_t length, bool loop);
|
|
void halAudioStopMod(void);
|
|
bool halAudioIsPlayingMod(void);
|
|
void halAudioPlaySfx(uint8_t slot, const uint8_t *sample, uint32_t length, uint16_t rateHz);
|
|
void halAudioStopSfx(uint8_t slot);
|
|
void halAudioFrameTick(void);
|
|
|
|
// Optional fast-path hooks. Each returns true if the port handled the
|
|
// operation in a port-specific accelerated path; false means the
|
|
// caller should fall back to the platform-agnostic C implementation.
|
|
//
|
|
// Funneling all asm dispatches through hal.c (one TU per port) avoids
|
|
// the cumulative ORCA Linker "Expression too complex" failure that
|
|
// hits when multiple cross-platform TUs each call into a named load
|
|
// segment full of asm primitives. Cross-platform code in src/core/
|
|
// only ever calls into HAL, so the link-time expression cost is paid
|
|
// once per binary -- not once per TU that wants speed.
|
|
//
|
|
// Each port must provide all of these; ports without an accelerated
|
|
// path simply return false from every hook.
|
|
bool halFastSurfaceClear(SurfaceT *s, uint8_t doubled);
|
|
bool halFastFillRect(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t colorIndex);
|
|
bool halFastTileFill(SurfaceT *s, uint8_t bx, uint8_t by, uint16_t fillWord);
|
|
// Tile primitives operate on already-computed row-0 pointers from
|
|
// the C wrapper. dstRow0 / srcRow0 point at the first byte of the
|
|
// 8x8 region within their respective surfaces (stride 160). For
|
|
// tilePaste / tileSnap the TileT side is a packed 32-byte buffer
|
|
// (stride 4); the corresponding pointer points at byte 0 of that
|
|
// buffer.
|
|
bool halFastTileCopy(uint8_t *dstRow0, const uint8_t *srcRow0);
|
|
bool halFastTileCopyMasked(uint8_t *dstRow0, const uint8_t *srcRow0, uint8_t transparent);
|
|
bool halFastTilePaste(uint8_t *dstRow0, const uint8_t *srcTilePixels);
|
|
bool halFastTileSnap(uint8_t *dstTilePixels, const uint8_t *srcRow0);
|
|
|
|
// drawPixel inner: caller has already done NULL + bounds checks.
|
|
// (x, y) are guaranteed in [0..SURFACE_WIDTH-1] x [0..SURFACE_HEIGHT-1].
|
|
// colorIndex is the 0..15 nibble. Surface dirty marking happens in
|
|
// the C wrapper after this returns.
|
|
bool halFastDrawPixel(SurfaceT *s, uint16_t x, uint16_t y, uint8_t colorIndex);
|
|
|
|
// drawLine inner for the diagonal case. Caller ensures both endpoints
|
|
// are inside the surface bounds, so the inner loop runs without
|
|
// per-pixel clip checks. The C wrapper still routes pure horizontal
|
|
// and vertical lines through fillRect (which has its own fast path).
|
|
bool halFastDrawLine(SurfaceT *s, int16_t x0, int16_t y0, int16_t x1, int16_t y1, uint8_t colorIndex);
|
|
|
|
// drawCircle / fillCircle inner. Caller has already validated that
|
|
// the entire bounding circle (cx-r .. cx+r, cy-r .. cy+r) fits inside
|
|
// the surface bounds, so the inner loop plots every octant pixel
|
|
// unconditionally. r is guaranteed > 0; the cx == 0 / r == 0 cases
|
|
// stay in the C wrapper.
|
|
bool halFastDrawCircle(SurfaceT *s, int16_t cx, int16_t cy, uint16_t r, uint8_t colorIndex);
|
|
bool halFastFillCircle(SurfaceT *s, int16_t cx, int16_t cy, uint16_t r, uint8_t colorIndex);
|
|
|
|
// floodFill helper: combined seed test + walk-left + walk-right for
|
|
// one row. Returns true if the port handled it (asm path taken). The
|
|
// out-param seedMatched tells the caller whether the seed pixel
|
|
// satisfied the match criterion -- if false, caller skips this pop;
|
|
// if true, leftXOut/rightXOut hold the run boundaries.
|
|
// Returns false if no asm path; caller falls back to C walks.
|
|
bool halFastFloodWalk(uint8_t *row, int16_t startX,
|
|
uint8_t matchColor, uint8_t newColor, bool matchEqual,
|
|
bool *seedMatched,
|
|
int16_t *leftXOut, int16_t *rightXOut);
|
|
|
|
// floodFill helper for the row-above / row-below run-detection scans.
|
|
// Walks pixels [leftX..rightX] inclusive of `row`, writing 1 byte per
|
|
// pixel into markBuf (1 = qualifies for flood, 0 = does not). The C
|
|
// side then walks markBuf for run-edge transitions, replacing the
|
|
// per-pixel srcPixel + match check inside the inner loop.
|
|
// Returns true if the port handled it; false to fall back to C.
|
|
bool halFastFloodScanRow(uint8_t *row, int16_t leftX, int16_t rightX,
|
|
uint8_t matchColor, uint8_t newColor, bool matchEqual,
|
|
uint8_t *markBuf);
|
|
|
|
// Combined per-pixel scan + run-edge walk + seed push. Higher-level
|
|
// than halFastFloodScanRow: replaces both the markBuf fill AND the C
|
|
// loop that walks markBuf for falling edges. *spInOut is read on entry
|
|
// and updated with the new top-of-stack on return. Returns true if
|
|
// the port handled it (caller skips the C run-edge walk entirely);
|
|
// false to fall back to halFastFloodScanRow + C walk.
|
|
bool halFastFloodScanAndPush(uint8_t *row, int16_t leftX, int16_t rightX,
|
|
uint8_t matchColor, uint8_t newColor, bool matchEqual,
|
|
int16_t scanY,
|
|
int16_t *stackX, int16_t *stackY,
|
|
int16_t *spInOut, int16_t maxSp);
|
|
|
|
// Highest-level flood helper: combined seed-test + walk-left + walk-right
|
|
// + scan-above + scan-below + push for ONE popped seed. Replaces three
|
|
// cross-segment HAL calls (halFastFloodWalk + 2x halFastFloodScanAndPush)
|
|
// per dispatch loop iteration with one. The asm internally caches row
|
|
// addr / matchByte / nibble decoder across all three sub-operations.
|
|
//
|
|
// pixels is the surface base (s->pixels). On return, leftXOut / rightXOut
|
|
// hold the matching-run boundaries (only valid if seedMatched != 0); the
|
|
// caller does the 1-row halFastFillRect using those bounds. *spInOut is
|
|
// updated with any new seeds the asm pushed for the row above/below.
|
|
//
|
|
// Returns true if the port handled it; false to fall back to
|
|
// halFastFloodWalk + the per-side halFastFloodScanAndPush calls.
|
|
bool halFastFloodWalkAndScans(uint8_t *pixels, int16_t x, int16_t y,
|
|
uint8_t matchColor, uint8_t newColor, bool matchEqual,
|
|
int16_t *stackX, int16_t *stackY,
|
|
int16_t *spInOut, int16_t maxSp,
|
|
bool *seedMatched,
|
|
int16_t *leftXOut, int16_t *rightXOut);
|
|
|
|
// surfaceBlit / surfaceBlitMasked rect-copy helper. Caller has done
|
|
// the clip math: dstRow0 / srcRow0 point at row 0 of the source/dest
|
|
// regions, dstX / srcX are intra-row pixel offsets, copyW/copyH are
|
|
// the clipped extents. dst stride is hardcoded SURFACE_BYTES_PER_ROW.
|
|
// transparent == $FFFF means opaque (always copy); any 0..15 value
|
|
// means src nibbles equal to that index are skipped.
|
|
// Returns true if the port handled it; false to fall back to C.
|
|
bool halFastBlitRect(uint8_t *dstRow0, int16_t dstX,
|
|
const uint8_t *srcRow0, int16_t srcX,
|
|
int16_t copyW, int16_t copyH, int16_t srcRowBytes,
|
|
uint16_t transparent);
|
|
|
|
|
|
#ifdef JOEYLIB_PLATFORM_IIGS
|
|
// =====================================================================
|
|
// IIgs direct-dispatch macros.
|
|
//
|
|
// The halFast* function declarations above are the cross-platform API.
|
|
// On IIgs, those wrappers were ~60-80 cyc/call of pure plumbing on top
|
|
// of the asm itself: wrapper prologue (PHB/PHD/TCD), redundant arg
|
|
// re-push for the inner JSL, then wrapper epilogue. The macros below
|
|
// take effect at preprocess time and inline the asm call at the call
|
|
// site, eliminating the wrapper layer entirely.
|
|
//
|
|
// Cross-platform code in src/core/*.c is unchanged -- it still calls
|
|
// halFastDrawPixel(...) etc. On IIgs the preprocessor swaps that for
|
|
// the macro expansion before ORCA-C compiles the file. The matching
|
|
// halFast* C definitions in src/port/iigs/hal.c are deleted, since
|
|
// nothing references them once the macros take effect.
|
|
//
|
|
// Macros use comma-expression form so they evaluate to a `bool` value
|
|
// (most halFast* return true on IIgs since the asm always succeeds).
|
|
// =====================================================================
|
|
|
|
extern void iigsDrawPixelInner (uint8_t *pixels, uint16_t x, uint16_t y, uint16_t nibble);
|
|
extern void iigsDrawLineInner (uint8_t *pixels, uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, uint16_t nibble);
|
|
extern void iigsDrawCircleInner (uint8_t *pixels, uint16_t cx, uint16_t cy, uint16_t r, uint16_t nibble);
|
|
extern void iigsFillCircleInner (uint8_t *pixels, uint16_t cx, uint16_t cy, uint16_t r, uint16_t fillWord);
|
|
extern void iigsSurfaceClearInner(uint8_t *pixels, uint16_t fillWord);
|
|
extern void iigsTileFillInner (uint8_t *dstRow0, uint16_t fillWord);
|
|
extern void iigsTileCopyInner (uint8_t *dstRow0, const uint8_t *srcRow0);
|
|
extern void iigsTileCopyMaskedInner(uint8_t *dstRow0, const uint8_t *srcRow0, uint16_t transparent);
|
|
extern void iigsTilePasteInner (uint8_t *dstRow0, const uint8_t *srcTilePixels);
|
|
extern void iigsTileSnapInner (uint8_t *dstTilePixels, const uint8_t *srcRow0);
|
|
extern void iigsBlitRectInner (uint8_t *dstRow0, uint16_t dstX, const uint8_t *srcRow0, uint16_t srcX, uint16_t copyW, uint16_t copyH, uint16_t srcRowBytes, uint16_t transparent);
|
|
extern void iigsFillRectInner (uint8_t *pixels, uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t nibble);
|
|
extern void iigsFloodWalkAndScansInner(uint8_t *pixels, uint16_t x, uint16_t y, uint16_t matchColor, uint16_t newColor, uint16_t matchEqual, int16_t *stackX, int16_t *stackY, uint16_t *spInOut, uint16_t maxSp);
|
|
extern uint16_t gFloodSeedMatch;
|
|
extern uint16_t gFloodLeftX;
|
|
extern uint16_t gFloodRightX;
|
|
|
|
#undef halFastDrawPixel
|
|
#define halFastDrawPixel(_s, _x, _y, _c) \
|
|
(iigsDrawPixelInner((_s)->pixels, (uint16_t)(_x), (uint16_t)(_y), \
|
|
(uint16_t)((_c) & 0x0F)), \
|
|
true)
|
|
|
|
#undef halFastDrawLine
|
|
#define halFastDrawLine(_s, _x0, _y0, _x1, _y1, _c) \
|
|
(iigsDrawLineInner((_s)->pixels, (uint16_t)(_x0), (uint16_t)(_y0), \
|
|
(uint16_t)(_x1), (uint16_t)(_y1), \
|
|
(uint16_t)((_c) & 0x0F)), \
|
|
true)
|
|
|
|
#undef halFastDrawCircle
|
|
#define halFastDrawCircle(_s, _cx, _cy, _r, _c) \
|
|
(iigsDrawCircleInner((_s)->pixels, (uint16_t)(_cx), (uint16_t)(_cy), \
|
|
(_r), (uint16_t)((_c) & 0x0F)), \
|
|
true)
|
|
|
|
// fillWord = doubled byte * $0101 = (nib*$11) * $101 = nib * $1111.
|
|
// Compile-time arithmetic when caller passes a constant; at most a
|
|
// single multiply when the nibble is variable (still cheaper than
|
|
// the wrapper's three sequential ORs / shifts).
|
|
#undef halFastFillCircle
|
|
#define halFastFillCircle(_s, _cx, _cy, _r, _c) \
|
|
((_s) == stageGet() \
|
|
? (iigsFillCircleInner((_s)->pixels, (uint16_t)(_cx), (uint16_t)(_cy), \
|
|
(_r), (uint16_t)(((_c) & 0x0F) * 0x1111)), \
|
|
true) \
|
|
: false)
|
|
|
|
#undef halFastSurfaceClear
|
|
#define halFastSurfaceClear(_s, _d) \
|
|
((_s) == stageGet() \
|
|
? (iigsSurfaceClearInner((_s)->pixels, \
|
|
(uint16_t)((uint16_t)(_d) | ((uint16_t)(_d) << 8))), \
|
|
true) \
|
|
: false)
|
|
|
|
// halFastFillRect stays as a real C wrapper -- removing it triggered
|
|
// an unrelated ORCA linker bank-placement failure (same mode as the
|
|
// peislam.asm deletion: `Unresolved reference Label:
|
|
// emitMvnCopyRoutine` in sprite codegen). The wrapper now just
|
|
// forwards to iigsFillRectInner (asm does partial+middle); we lose
|
|
// the call-site macro inlining for fillRect specifically but keep
|
|
// the rest of the macros AND the new asm helper. Per-call wrapper
|
|
// overhead for halFastFillRect is back (~80 cyc) but at least the
|
|
// per-row partial-byte logic happens in asm now.
|
|
|
|
// Tile primitives operate on caller-computed row pointers; just
|
|
// forward the args. by/bx are tile coords -> bx*4 + by*8*160 byte
|
|
// offset within the surface.
|
|
#undef halFastTileFill
|
|
#define halFastTileFill(_s, _bx, _by, _fw) \
|
|
(iigsTileFillInner(&(_s)->pixels[(uint16_t)(_by) * 8 * SURFACE_BYTES_PER_ROW \
|
|
+ (uint16_t)(_bx) * 4], \
|
|
(_fw)), \
|
|
true)
|
|
|
|
#undef halFastTileCopy
|
|
#define halFastTileCopy(_d, _s) (iigsTileCopyInner((_d), (_s)), true)
|
|
|
|
#undef halFastTileCopyMasked
|
|
#define halFastTileCopyMasked(_d, _s, _t) \
|
|
(iigsTileCopyMaskedInner((_d), (_s), (uint16_t)(_t)), true)
|
|
|
|
#undef halFastTilePaste
|
|
#define halFastTilePaste(_d, _s) (iigsTilePasteInner((_d), (_s)), true)
|
|
|
|
#undef halFastTileSnap
|
|
#define halFastTileSnap(_d, _s) (iigsTileSnapInner((_d), (_s)), true)
|
|
|
|
#undef halFastBlitRect
|
|
#define halFastBlitRect(_dr, _dx, _sr, _sx, _w, _h, _ss, _t) \
|
|
(iigsBlitRectInner((_dr), (uint16_t)(_dx), (_sr), (uint16_t)(_sx), \
|
|
(uint16_t)(_w), (uint16_t)(_h), \
|
|
(uint16_t)(_ss), (_t)), \
|
|
true)
|
|
|
|
// Tier 2/3 flood fallbacks always returned false on IIgs (the asm
|
|
// impls were deleted as unreachable). Macros to constant false so
|
|
// ORCA-C dead-code-eliminates the never-taken fallback branches in
|
|
// floodFillInternal.
|
|
#undef halFastFloodWalk
|
|
#define halFastFloodWalk(_row, _sx, _mc, _nc, _me, _sm, _lx, _rx) (false)
|
|
|
|
#undef halFastFloodScanRow
|
|
#define halFastFloodScanRow(_row, _lx, _rx, _mc, _nc, _me, _mb) (false)
|
|
|
|
#undef halFastFloodScanAndPush
|
|
#define halFastFloodScanAndPush(_row, _lx, _rx, _mc, _nc, _me, _sy, _sx, _syA, _sp, _ms) (false)
|
|
|
|
// Tier-1 flood: multi-output. Asm sets gFloodSeedMatch / gFloodLeftX /
|
|
// gFloodRightX; macro reads those into the caller's out-ptrs.
|
|
#undef halFastFloodWalkAndScans
|
|
#define halFastFloodWalkAndScans(_pix, _x, _y, _mc, _nc, _me, _sx, _sy, _sp, _ms, _smOut, _lxOut, _rxOut) \
|
|
(iigsFloodWalkAndScansInner((_pix), (uint16_t)(_x), (uint16_t)(_y), \
|
|
(uint16_t)((_mc) & 0x0F), \
|
|
(uint16_t)((_nc) & 0x0F), \
|
|
(uint16_t)((_me) ? 1 : 0), \
|
|
(_sx), (_sy), \
|
|
(uint16_t *)(_sp), \
|
|
(uint16_t)(_ms)), \
|
|
*(_smOut) = (gFloodSeedMatch != 0), \
|
|
*(_lxOut) = (int16_t)gFloodLeftX, \
|
|
*(_rxOut) = (int16_t)gFloodRightX, \
|
|
true)
|
|
|
|
#endif /* JOEYLIB_PLATFORM_IIGS */
|
|
|
|
#endif
|