DVX_GUI/dvx/dvxComp.c

266 lines
10 KiB
C

// dvx_comp.c — Layer 3: Dirty rectangle compositor for DVX GUI (optimized)
//
// This layer implements dirty rectangle tracking and merging. The compositor
// avoids full-screen redraws, which would be prohibitively expensive on the
// target 486/Pentium hardware over ISA bus VESA LFB. A full 640x480x16bpp
// framebuffer is ~600KB — at ISA's ~8MB/s theoretical peak, a blind full
// flush costs ~75ms (>1 frame at 60Hz). By tracking which rectangles have
// actually changed and flushing only those regions from the system RAM
// backbuffer to the LFB, the bandwidth consumed per frame scales with the
// amount of visual change rather than the screen resolution.
//
// The compositing loop lives in dvxApp.c (compositeAndFlush). For each dirty
// rect, it repaints the desktop, then walks the window stack bottom-to-top
// painting chrome, content, scrollbars, popup menus, and the cursor — all
// clipped to the dirty rect. Only then is the dirty rect flushed to the LFB.
// This means each pixel in a dirty region is written to system RAM potentially
// multiple times (painter's algorithm), but the expensive LFB write happens
// exactly once per pixel per frame.
#include "dvxComp.h"
#include "platform/dvxPlatform.h"
#include <string.h>
// Rects within this many pixels of each other get merged even if they don't
// overlap. A small gap tolerance absorbs jitter from mouse movement and
// closely-spaced UI invalidations (e.g. title bar + content during a drag)
// without bloating merged rects excessively. The value 4 was chosen to match
// the chrome border width — adjacent chrome/content invalidations merge
// naturally.
#define DIRTY_MERGE_GAP 4
// ============================================================
// Prototypes
// ============================================================
static inline bool rectsOverlapOrAdjacent(const RectT *a, const RectT *b, int32_t gap);
static inline void rectUnion(const RectT *a, const RectT *b, RectT *result);
// ============================================================
// dirtyListAdd
// ============================================================
//
// Appends a dirty rect to the list. Uses a fixed-size array (MAX_DIRTY_RECTS
// = 128) rather than a dynamic allocation — this is called on every UI
// mutation (drag, repaint, focus change) so allocation overhead must be zero.
//
// When the list fills up, an eager merge pass tries to consolidate rects.
// If the list is STILL full after merging (pathological scatter), the
// nuclear option collapses everything into one bounding box. This guarantees
// the list never overflows, at the cost of potentially over-painting a large
// rect. In practice the merge pass almost always frees enough slots because
// GUI mutations tend to cluster spatially.
void dirtyListAdd(DirtyListT *dl, int32_t x, int32_t y, int32_t w, int32_t h) {
// Branch hint: degenerate rects are rare — callers usually validate first
if (__builtin_expect(w <= 0 || h <= 0, 0)) {
return;
}
// Overflow path: try merging, then fall back to a single bounding rect
if (__builtin_expect(dl->count >= MAX_DIRTY_RECTS, 0)) {
dirtyListMerge(dl);
if (dl->count >= MAX_DIRTY_RECTS) {
// Still full — collapse the entire list plus the new rect into one
// bounding box. This is a last resort; it means the next flush will
// repaint a potentially large region, but at least we won't lose
// dirty information or crash.
RectT merged = dl->rects[0];
for (int32_t i = 1; i < dl->count; i++) {
rectUnion(&merged, &dl->rects[i], &merged);
}
RectT newRect = {x, y, w, h};
rectUnion(&merged, &newRect, &merged);
dl->rects[0] = merged;
dl->count = 1;
return;
}
}
dl->rects[dl->count].x = x;
dl->rects[dl->count].y = y;
dl->rects[dl->count].w = w;
dl->rects[dl->count].h = h;
dl->count++;
}
// ============================================================
// dirtyListClear
// ============================================================
void dirtyListClear(DirtyListT *dl) {
dl->count = 0;
}
// ============================================================
// dirtyListInit
// ============================================================
void dirtyListInit(DirtyListT *dl) {
dl->count = 0;
}
// ============================================================
// dirtyListMerge
// ============================================================
//
// Coalesces overlapping or nearby dirty rects to reduce the number of
// composite+flush passes. The trade-off: merging two rects into their
// bounding box may add "clean" pixels that get needlessly repainted, but
// this is far cheaper than the per-rect overhead of an extra composite
// pass (clip setup, window-stack walk, LFB flush). On 486/Pentium ISA,
// the LFB write latency per-rect dominates, so fewer larger rects win.
//
// Algorithm: O(N^2) pairwise sweep with bounded restarts. For each rect i,
// scan all rects j>i and merge any that overlap or are within DIRTY_MERGE_GAP
// pixels. When a merge happens, rect i grows and may now overlap rects that
// it previously missed, so the inner scan restarts — but restarts are capped
// at 3 per slot to prevent O(N^3) cascading in pathological layouts (e.g.
// a diagonal scatter of tiny rects). The cap of 3 was chosen empirically:
// typical GUI operations produce clustered invalidations that converge in
// 1-2 passes; 3 gives a safety margin without measurable overhead.
//
// Merged-away rects are removed by swap-with-last (O(1) removal from an
// unordered list), which is why the rects array is not kept sorted.
void dirtyListMerge(DirtyListT *dl) {
if (dl->count <= 1) {
return;
}
for (int32_t i = 0; i < dl->count; i++) {
int32_t restarts = 0;
bool merged = true;
while (merged && restarts < 3) {
merged = false;
for (int32_t j = i + 1; j < dl->count; j++) {
if (rectsOverlapOrAdjacent(&dl->rects[i], &dl->rects[j], DIRTY_MERGE_GAP)) {
rectUnion(&dl->rects[i], &dl->rects[j], &dl->rects[i]);
// Swap-with-last removal: order doesn't matter for merging
dl->rects[j] = dl->rects[dl->count - 1];
dl->count--;
j--;
merged = true;
}
}
restarts++;
}
}
}
// ============================================================
// flushRect
// ============================================================
//
// Copies one dirty rect from the system RAM backbuffer to the VESA LFB.
// This is the single most bandwidth-sensitive operation in the entire GUI:
// the LFB lives behind the ISA/PCI bus, so every byte written here is a
// bus transaction. The platform layer (platformFlushRect) uses rep movsd
// on 486+ to move aligned 32-bit words, maximizing bus utilization.
//
// Crucially, we flush per dirty rect AFTER all painting for that rect is
// complete. This avoids visible tearing — the LFB is never in a half-painted
// state for any given region.
void flushRect(DisplayT *d, const RectT *r) {
platformFlushRect(d, r);
}
// ============================================================
// rectIntersect
// ============================================================
//
// Used heavily in the compositing loop to test whether a window overlaps
// a dirty rect before painting it. The branch hint marks the non-overlapping
// case as unlikely because the compositing loop already does a coarse AABB
// check before calling this — when we get here, intersection is expected.
// The min/max formulation avoids branches in the hot path.
bool rectIntersect(const RectT *a, const RectT *b, RectT *result) {
int32_t ix1 = a->x > b->x ? a->x : b->x;
int32_t iy1 = a->y > b->y ? a->y : b->y;
int32_t ix2 = (a->x + a->w) < (b->x + b->w) ? (a->x + a->w) : (b->x + b->w);
int32_t iy2 = (a->y + a->h) < (b->y + b->h) ? (a->y + a->h) : (b->y + b->h);
if (__builtin_expect(ix1 >= ix2 || iy1 >= iy2, 0)) {
return false;
}
result->x = ix1;
result->y = iy1;
result->w = ix2 - ix1;
result->h = iy2 - iy1;
return true;
}
// ============================================================
// rectIsEmpty
// ============================================================
bool rectIsEmpty(const RectT *r) {
return (r->w <= 0 || r->h <= 0);
}
// ============================================================
// rectsOverlapOrAdjacent
// ============================================================
//
// Separating-axis test with a gap tolerance. Two rects merge if they
// overlap OR if the gap between them is <= DIRTY_MERGE_GAP pixels.
// The gap tolerance is the key tuning parameter for the merge algorithm:
// too small and you get many tiny rects (expensive per-rect flush overhead);
// too large and you merge distant rects into one huge bounding box
// (wasted repaint of clean pixels). The early-out on each axis makes this
// very cheap for non-overlapping rects, which is the common case during
// the inner loop of dirtyListMerge.
static inline bool rectsOverlapOrAdjacent(const RectT *a, const RectT *b, int32_t gap) {
if (a->x + a->w + gap < b->x) { return false; }
if (b->x + b->w + gap < a->x) { return false; }
if (a->y + a->h + gap < b->y) { return false; }
if (b->y + b->h + gap < a->y) { return false; }
return true;
}
// ============================================================
// rectUnion
// ============================================================
//
// Axis-aligned bounding box of two rects. Supports in-place operation
// (result == a) for the merge loop. Note that this may grow the rect
// substantially if the two inputs are far apart — this is the inherent
// cost of bounding-box merging vs. maintaining a true region (list of
// non-overlapping rects). Bounding-box was chosen because the merge
// list is bounded to 128 entries and the extra repaint cost of a few
// clean pixels is negligible compared to the complexity of a proper
// region algebra on 486-class hardware.
static inline void rectUnion(const RectT *a, const RectT *b, RectT *result) {
int32_t x1 = a->x < b->x ? a->x : b->x;
int32_t y1 = a->y < b->y ? a->y : b->y;
int32_t x2 = (a->x + a->w) > (b->x + b->w) ? (a->x + a->w) : (b->x + b->w);
int32_t y2 = (a->y + a->h) > (b->y + b->h) ? (a->y + a->h) : (b->y + b->h);
result->x = x1;
result->y = y1;
result->w = x2 - x1;
result->h = y2 - y1;
}