// Internal HAL (hardware abstraction layer) interface. // // This header is included by src/core/*.c and by per-port source under // src/port//. It is NOT part of the public API and must not // be installed or exposed to game code. // // Each port must implement every function declared here. #ifndef JOEYLIB_HAL_H #define JOEYLIB_HAL_H #include "joey/core.h" #include "joey/input.h" #include "joey/surface.h" // Per-port one-shot initialization. Called from joeyInit after config // has been stored but before any surfaces are created. The port sets up // the display mode, allocates any HW-adjacent buffers (chip RAM on // Amiga, VGA mode on DOS, SHR on IIgs), and prepares for presents. // Returns true on success. On failure, halLastError may be set. bool halInit(const JoeyConfigT *config); // Per-port teardown. Restores display mode, frees HW-adjacent buffers. void halShutdown(void); // Allocate / release the SURFACE_PIXELS_SIZE-byte pixel buffer that // backs the library-owned stage surface. Ports that have a // hardware-friendly pin location for the back buffer (IIgs $01/2000 // with SHR shadow inhibited) return that address here; ports with no // such constraint just malloc/free. uint8_t *halStageAllocPixels(void); void halStageFreePixels(uint8_t *pixels); // Present the entire source surface to the display. void halPresent(const SurfaceT *src); // Present a rectangular region of the source surface. The caller has // already validated and clipped the rect to be fully inside the // surface bounds and to have positive extents. void halPresentRect(const SurfaceT *src, int16_t x, int16_t y, uint16_t w, uint16_t h); // Optional: returns a port-specific error message string for the last // HAL failure, or NULL if none. Ports may return NULL always. const char *halLastError(void); // Input: per-port keyboard setup, polling, and teardown. // halInputInit is called at the end of joeyInit; halInputShutdown // from joeyShutdown before halShutdown. halInputPoll refreshes the // core-owned key-state array (declared in core/inputInternal.h) -- // the port writes true into gKeyState[key] for held keys. Keys the // port does not recognize simply stay zero. void halInputInit(void); void halInputShutdown(void); void halInputPoll(void); // Block until the next display vertical blank. Each port implements // this with whatever native wait the hardware provides (VGA $3DA, // graphics.library WaitTOF, XBIOS Vsync, $C019 polling). void halWaitVBL(void); // Monotonic 16-bit frame counter. Caller polls; ports either detect // the rising edge inside this call (IIgs $C019 / DOS $3DA / Amiga // VPOSR) or return a counter maintained by a VBL ISR (ST). Required // caller invariant: poll faster than 2 * halFrameHz() so no edge is // missed. Used by benchmarks; cheap enough for animation cadence too. uint16_t halFrameCount(void); // Nominal display frame rate in Hz (50 PAL Amiga, 60 NTSC IIgs / ST, // ~70 VGA mode 13h). Reported only -- no API contract that VBLs // arrive at exactly this rate. Benchmarks divide by it to convert // iters-per-N-frames to ops/sec. uint16_t halFrameHz(void); // Audio: per-port engine setup, module + SFX playback, teardown. // halAudioInit returns true if the platform has a working engine. // All entry points are safe to call when init failed -- they become // no-ops. See joey/audio.h for the public API contract that wraps // these. bool halAudioInit(void); void halAudioShutdown(void); void halAudioPlayMod(const uint8_t *data, uint32_t length, bool loop); void halAudioStopMod(void); bool halAudioIsPlayingMod(void); void halAudioPlaySfx(uint8_t slot, const uint8_t *sample, uint32_t length, uint16_t rateHz); void halAudioStopSfx(uint8_t slot); void halAudioFrameTick(void); // Optional fast-path hooks. Each returns true if the port handled the // operation in a port-specific accelerated path; false means the // caller should fall back to the platform-agnostic C implementation. // // Funneling all asm dispatches through hal.c (one TU per port) avoids // the cumulative ORCA Linker "Expression too complex" failure that // hits when multiple cross-platform TUs each call into a named load // segment full of asm primitives. Cross-platform code in src/core/ // only ever calls into HAL, so the link-time expression cost is paid // once per binary -- not once per TU that wants speed. // // Each port must provide all of these; ports without an accelerated // path simply return false from every hook. bool halFastSurfaceClear(SurfaceT *s, uint8_t doubled); bool halFastFillRect(SurfaceT *s, int16_t x, int16_t y, uint16_t w, uint16_t h, uint8_t colorIndex); bool halFastTileFill(SurfaceT *s, uint8_t bx, uint8_t by, uint16_t fillWord); // Tile primitives operate on already-computed row-0 pointers from // the C wrapper. dstRow0 / srcRow0 point at the first byte of the // 8x8 region within their respective surfaces (stride 160). For // tilePaste / tileSnap the TileT side is a packed 32-byte buffer // (stride 4); the corresponding pointer points at byte 0 of that // buffer. bool halFastTileCopy(uint8_t *dstRow0, const uint8_t *srcRow0); bool halFastTileCopyMasked(uint8_t *dstRow0, const uint8_t *srcRow0, uint8_t transparent); bool halFastTilePaste(uint8_t *dstRow0, const uint8_t *srcTilePixels); bool halFastTileSnap(uint8_t *dstTilePixels, const uint8_t *srcRow0); // drawPixel inner: caller has already done NULL + bounds checks. // (x, y) are guaranteed in [0..SURFACE_WIDTH-1] x [0..SURFACE_HEIGHT-1]. // colorIndex is the 0..15 nibble. Surface dirty marking happens in // the C wrapper after this returns. bool halFastDrawPixel(SurfaceT *s, uint16_t x, uint16_t y, uint8_t colorIndex); // drawLine inner for the diagonal case. Caller ensures both endpoints // are inside the surface bounds, so the inner loop runs without // per-pixel clip checks. The C wrapper still routes pure horizontal // and vertical lines through fillRect (which has its own fast path). bool halFastDrawLine(SurfaceT *s, int16_t x0, int16_t y0, int16_t x1, int16_t y1, uint8_t colorIndex); // drawCircle / fillCircle inner. Caller has already validated that // the entire bounding circle (cx-r .. cx+r, cy-r .. cy+r) fits inside // the surface bounds, so the inner loop plots every octant pixel // unconditionally. r is guaranteed > 0; the cx == 0 / r == 0 cases // stay in the C wrapper. bool halFastDrawCircle(SurfaceT *s, int16_t cx, int16_t cy, uint16_t r, uint8_t colorIndex); bool halFastFillCircle(SurfaceT *s, int16_t cx, int16_t cy, uint16_t r, uint8_t colorIndex); // floodFill helper: combined seed test + walk-left + walk-right for // one row. Returns true if the port handled it (asm path taken). The // out-param seedMatched tells the caller whether the seed pixel // satisfied the match criterion -- if false, caller skips this pop; // if true, leftXOut/rightXOut hold the run boundaries. // Returns false if no asm path; caller falls back to C walks. bool halFastFloodWalk(uint8_t *row, int16_t startX, uint8_t matchColor, uint8_t newColor, bool matchEqual, bool *seedMatched, int16_t *leftXOut, int16_t *rightXOut); // floodFill helper for the row-above / row-below run-detection scans. // Walks pixels [leftX..rightX] inclusive of `row`, writing 1 byte per // pixel into markBuf (1 = qualifies for flood, 0 = does not). The C // side then walks markBuf for run-edge transitions, replacing the // per-pixel srcPixel + match check inside the inner loop. // Returns true if the port handled it; false to fall back to C. bool halFastFloodScanRow(uint8_t *row, int16_t leftX, int16_t rightX, uint8_t matchColor, uint8_t newColor, bool matchEqual, uint8_t *markBuf); // Combined per-pixel scan + run-edge walk + seed push. Higher-level // than halFastFloodScanRow: replaces both the markBuf fill AND the C // loop that walks markBuf for falling edges. *spInOut is read on entry // and updated with the new top-of-stack on return. Returns true if // the port handled it (caller skips the C run-edge walk entirely); // false to fall back to halFastFloodScanRow + C walk. bool halFastFloodScanAndPush(uint8_t *row, int16_t leftX, int16_t rightX, uint8_t matchColor, uint8_t newColor, bool matchEqual, int16_t scanY, int16_t *stackX, int16_t *stackY, int16_t *spInOut, int16_t maxSp); // Highest-level flood helper: combined seed-test + walk-left + walk-right // + scan-above + scan-below + push for ONE popped seed. Replaces three // cross-segment HAL calls (halFastFloodWalk + 2x halFastFloodScanAndPush) // per dispatch loop iteration with one. The asm internally caches row // addr / matchByte / nibble decoder across all three sub-operations. // // pixels is the surface base (s->pixels). On return, leftXOut / rightXOut // hold the matching-run boundaries (only valid if seedMatched != 0); the // caller does the 1-row halFastFillRect using those bounds. *spInOut is // updated with any new seeds the asm pushed for the row above/below. // // Returns true if the port handled it; false to fall back to // halFastFloodWalk + the per-side halFastFloodScanAndPush calls. bool halFastFloodWalkAndScans(uint8_t *pixels, int16_t x, int16_t y, uint8_t matchColor, uint8_t newColor, bool matchEqual, int16_t *stackX, int16_t *stackY, int16_t *spInOut, int16_t maxSp, bool *seedMatched, int16_t *leftXOut, int16_t *rightXOut); // surfaceBlit / surfaceBlitMasked rect-copy helper. Caller has done // the clip math: dstRow0 / srcRow0 point at row 0 of the source/dest // regions, dstX / srcX are intra-row pixel offsets, copyW/copyH are // the clipped extents. dst stride is hardcoded SURFACE_BYTES_PER_ROW. // transparent == $FFFF means opaque (always copy); any 0..15 value // means src nibbles equal to that index are skipped. // Returns true if the port handled it; false to fall back to C. bool halFastBlitRect(uint8_t *dstRow0, int16_t dstX, const uint8_t *srcRow0, int16_t srcX, int16_t copyW, int16_t copyH, int16_t srcRowBytes, uint16_t transparent); #ifdef JOEYLIB_PLATFORM_IIGS // ===================================================================== // IIgs direct-dispatch macros. // // The halFast* function declarations above are the cross-platform API. // On IIgs, those wrappers were ~60-80 cyc/call of pure plumbing on top // of the asm itself: wrapper prologue (PHB/PHD/TCD), redundant arg // re-push for the inner JSL, then wrapper epilogue. The macros below // take effect at preprocess time and inline the asm call at the call // site, eliminating the wrapper layer entirely. // // Cross-platform code in src/core/*.c is unchanged -- it still calls // halFastDrawPixel(...) etc. On IIgs the preprocessor swaps that for // the macro expansion before ORCA-C compiles the file. The matching // halFast* C definitions in src/port/iigs/hal.c are deleted, since // nothing references them once the macros take effect. // // Macros use comma-expression form so they evaluate to a `bool` value // (most halFast* return true on IIgs since the asm always succeeds). // ===================================================================== extern void iigsDrawPixelInner (uint8_t *pixels, uint16_t x, uint16_t y, uint16_t nibble); extern void iigsDrawLineInner (uint8_t *pixels, uint16_t x0, uint16_t y0, uint16_t x1, uint16_t y1, uint16_t nibble); extern void iigsDrawCircleInner (uint8_t *pixels, uint16_t cx, uint16_t cy, uint16_t r, uint16_t nibble); extern void iigsFillCircleInner (uint8_t *pixels, uint16_t cx, uint16_t cy, uint16_t r, uint16_t fillWord); extern void iigsSurfaceClearInner(uint8_t *pixels, uint16_t fillWord); extern void iigsTileFillInner (uint8_t *dstRow0, uint16_t fillWord); extern void iigsTileCopyInner (uint8_t *dstRow0, const uint8_t *srcRow0); extern void iigsTileCopyMaskedInner(uint8_t *dstRow0, const uint8_t *srcRow0, uint16_t transparent); extern void iigsTilePasteInner (uint8_t *dstRow0, const uint8_t *srcTilePixels); extern void iigsTileSnapInner (uint8_t *dstTilePixels, const uint8_t *srcRow0); extern void iigsBlitRectInner (uint8_t *dstRow0, uint16_t dstX, const uint8_t *srcRow0, uint16_t srcX, uint16_t copyW, uint16_t copyH, uint16_t srcRowBytes, uint16_t transparent); extern void iigsFillRectInner (uint8_t *pixels, uint16_t x, uint16_t y, uint16_t w, uint16_t h, uint16_t nibble); extern void iigsFloodWalkAndScansInner(uint8_t *pixels, uint16_t x, uint16_t y, uint16_t matchColor, uint16_t newColor, uint16_t matchEqual, int16_t *stackX, int16_t *stackY, uint16_t *spInOut, uint16_t maxSp); extern uint16_t gFloodSeedMatch; extern uint16_t gFloodLeftX; extern uint16_t gFloodRightX; #undef halFastDrawPixel #define halFastDrawPixel(_s, _x, _y, _c) \ (iigsDrawPixelInner((_s)->pixels, (uint16_t)(_x), (uint16_t)(_y), \ (uint16_t)((_c) & 0x0F)), \ true) #undef halFastDrawLine #define halFastDrawLine(_s, _x0, _y0, _x1, _y1, _c) \ (iigsDrawLineInner((_s)->pixels, (uint16_t)(_x0), (uint16_t)(_y0), \ (uint16_t)(_x1), (uint16_t)(_y1), \ (uint16_t)((_c) & 0x0F)), \ true) #undef halFastDrawCircle #define halFastDrawCircle(_s, _cx, _cy, _r, _c) \ (iigsDrawCircleInner((_s)->pixels, (uint16_t)(_cx), (uint16_t)(_cy), \ (_r), (uint16_t)((_c) & 0x0F)), \ true) // fillWord = doubled byte * $0101 = (nib*$11) * $101 = nib * $1111. // Compile-time arithmetic when caller passes a constant; at most a // single multiply when the nibble is variable (still cheaper than // the wrapper's three sequential ORs / shifts). #undef halFastFillCircle #define halFastFillCircle(_s, _cx, _cy, _r, _c) \ ((_s) == stageGet() \ ? (iigsFillCircleInner((_s)->pixels, (uint16_t)(_cx), (uint16_t)(_cy), \ (_r), (uint16_t)(((_c) & 0x0F) * 0x1111)), \ true) \ : false) #undef halFastSurfaceClear #define halFastSurfaceClear(_s, _d) \ ((_s) == stageGet() \ ? (iigsSurfaceClearInner((_s)->pixels, \ (uint16_t)((uint16_t)(_d) | ((uint16_t)(_d) << 8))), \ true) \ : false) // halFastFillRect: macro form, same shape as the others. Builds // clean now that _ROOT has been thinned out via the CORESYS load // segment migration -- previous attempts shrank _ROOT enough to // retrip the bank-packing fragility, but with most core .c files // out of _ROOT that's no longer reactive. Saves ~80 cyc/call. #undef halFastFillRect #define halFastFillRect(_s, _x, _y, _w, _h, _c) \ ((_s) == stageGet() \ ? (iigsFillRectInner((_s)->pixels, (uint16_t)(_x), (uint16_t)(_y), \ (uint16_t)(_w), (uint16_t)(_h), \ (uint16_t)((_c) & 0x0F)), \ true) \ : false) // Tile primitives operate on caller-computed row pointers; just // forward the args. by/bx are tile coords -> bx*4 + by*8*160 byte // offset within the surface. Use SURFACE_ROW_OFFSET (LUT lookup) to // dodge ORCA-C 2.2.1's ~CUMUL2 helper for the *160 multiply. #undef halFastTileFill #define halFastTileFill(_s, _bx, _by, _fw) \ (iigsTileFillInner(&(_s)->pixels[SURFACE_ROW_OFFSET((uint16_t)(_by) << 3) \ + ((uint16_t)(_bx) << 2)], \ (_fw)), \ true) #undef halFastTileCopy #define halFastTileCopy(_d, _s) (iigsTileCopyInner((_d), (_s)), true) #undef halFastTileCopyMasked #define halFastTileCopyMasked(_d, _s, _t) \ (iigsTileCopyMaskedInner((_d), (_s), (uint16_t)(_t)), true) #undef halFastTilePaste #define halFastTilePaste(_d, _s) (iigsTilePasteInner((_d), (_s)), true) #undef halFastTileSnap #define halFastTileSnap(_d, _s) (iigsTileSnapInner((_d), (_s)), true) #undef halFastBlitRect #define halFastBlitRect(_dr, _dx, _sr, _sx, _w, _h, _ss, _t) \ (iigsBlitRectInner((_dr), (uint16_t)(_dx), (_sr), (uint16_t)(_sx), \ (uint16_t)(_w), (uint16_t)(_h), \ (uint16_t)(_ss), (_t)), \ true) // Tier 2/3 flood fallbacks always returned false on IIgs (the asm // impls were deleted as unreachable). Macros to constant false so // ORCA-C dead-code-eliminates the never-taken fallback branches in // floodFillInternal. #undef halFastFloodWalk #define halFastFloodWalk(_row, _sx, _mc, _nc, _me, _sm, _lx, _rx) (false) #undef halFastFloodScanRow #define halFastFloodScanRow(_row, _lx, _rx, _mc, _nc, _me, _mb) (false) #undef halFastFloodScanAndPush #define halFastFloodScanAndPush(_row, _lx, _rx, _mc, _nc, _me, _sy, _sx, _syA, _sp, _ms) (false) // Tier-1 flood: multi-output. Asm sets gFloodSeedMatch / gFloodLeftX / // gFloodRightX; macro reads those into the caller's out-ptrs. #undef halFastFloodWalkAndScans #define halFastFloodWalkAndScans(_pix, _x, _y, _mc, _nc, _me, _sx, _sy, _sp, _ms, _smOut, _lxOut, _rxOut) \ (iigsFloodWalkAndScansInner((_pix), (uint16_t)(_x), (uint16_t)(_y), \ (uint16_t)((_mc) & 0x0F), \ (uint16_t)((_nc) & 0x0F), \ (uint16_t)((_me) ? 1 : 0), \ (_sx), (_sy), \ (uint16_t *)(_sp), \ (uint16_t)(_ms)), \ *(_smOut) = (gFloodSeedMatch != 0), \ *(_lxOut) = (int16_t)gFloodLeftX, \ *(_rxOut) = (int16_t)gFloodRightX, \ true) #endif /* JOEYLIB_PLATFORM_IIGS */ #endif