266 lines
10 KiB
C
266 lines
10 KiB
C
// dvx_comp.c — Layer 3: Dirty rectangle compositor for DVX GUI (optimized)
|
|
//
|
|
// This layer implements dirty rectangle tracking and merging. The compositor
|
|
// avoids full-screen redraws, which would be prohibitively expensive on the
|
|
// target 486/Pentium hardware over ISA bus VESA LFB. A full 640x480x16bpp
|
|
// framebuffer is ~600KB — at ISA's ~8MB/s theoretical peak, a blind full
|
|
// flush costs ~75ms (>1 frame at 60Hz). By tracking which rectangles have
|
|
// actually changed and flushing only those regions from the system RAM
|
|
// backbuffer to the LFB, the bandwidth consumed per frame scales with the
|
|
// amount of visual change rather than the screen resolution.
|
|
//
|
|
// The compositing loop lives in dvxApp.c (compositeAndFlush). For each dirty
|
|
// rect, it repaints the desktop, then walks the window stack bottom-to-top
|
|
// painting chrome, content, scrollbars, popup menus, and the cursor — all
|
|
// clipped to the dirty rect. Only then is the dirty rect flushed to the LFB.
|
|
// This means each pixel in a dirty region is written to system RAM potentially
|
|
// multiple times (painter's algorithm), but the expensive LFB write happens
|
|
// exactly once per pixel per frame.
|
|
|
|
#include "dvxComp.h"
|
|
#include "platform/dvxPlatform.h"
|
|
|
|
#include <string.h>
|
|
|
|
// Rects within this many pixels of each other get merged even if they don't
|
|
// overlap. A small gap tolerance absorbs jitter from mouse movement and
|
|
// closely-spaced UI invalidations (e.g. title bar + content during a drag)
|
|
// without bloating merged rects excessively. The value 4 was chosen to match
|
|
// the chrome border width — adjacent chrome/content invalidations merge
|
|
// naturally.
|
|
#define DIRTY_MERGE_GAP 4
|
|
|
|
// ============================================================
|
|
// Prototypes
|
|
// ============================================================
|
|
|
|
static inline bool rectsOverlapOrAdjacent(const RectT *a, const RectT *b, int32_t gap);
|
|
static inline void rectUnion(const RectT *a, const RectT *b, RectT *result);
|
|
|
|
|
|
// ============================================================
|
|
// dirtyListAdd
|
|
// ============================================================
|
|
//
|
|
// Appends a dirty rect to the list. Uses a fixed-size array (MAX_DIRTY_RECTS
|
|
// = 128) rather than a dynamic allocation — this is called on every UI
|
|
// mutation (drag, repaint, focus change) so allocation overhead must be zero.
|
|
//
|
|
// When the list fills up, an eager merge pass tries to consolidate rects.
|
|
// If the list is STILL full after merging (pathological scatter), the
|
|
// nuclear option collapses everything into one bounding box. This guarantees
|
|
// the list never overflows, at the cost of potentially over-painting a large
|
|
// rect. In practice the merge pass almost always frees enough slots because
|
|
// GUI mutations tend to cluster spatially.
|
|
|
|
void dirtyListAdd(DirtyListT *dl, int32_t x, int32_t y, int32_t w, int32_t h) {
|
|
// Branch hint: degenerate rects are rare — callers usually validate first
|
|
if (__builtin_expect(w <= 0 || h <= 0, 0)) {
|
|
return;
|
|
}
|
|
|
|
// Overflow path: try merging, then fall back to a single bounding rect
|
|
if (__builtin_expect(dl->count >= MAX_DIRTY_RECTS, 0)) {
|
|
dirtyListMerge(dl);
|
|
|
|
if (dl->count >= MAX_DIRTY_RECTS) {
|
|
// Still full — collapse the entire list plus the new rect into one
|
|
// bounding box. This is a last resort; it means the next flush will
|
|
// repaint a potentially large region, but at least we won't lose
|
|
// dirty information or crash.
|
|
RectT merged = dl->rects[0];
|
|
|
|
for (int32_t i = 1; i < dl->count; i++) {
|
|
rectUnion(&merged, &dl->rects[i], &merged);
|
|
}
|
|
|
|
RectT newRect = {x, y, w, h};
|
|
rectUnion(&merged, &newRect, &merged);
|
|
|
|
dl->rects[0] = merged;
|
|
dl->count = 1;
|
|
return;
|
|
}
|
|
}
|
|
|
|
dl->rects[dl->count].x = x;
|
|
dl->rects[dl->count].y = y;
|
|
dl->rects[dl->count].w = w;
|
|
dl->rects[dl->count].h = h;
|
|
dl->count++;
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// dirtyListClear
|
|
// ============================================================
|
|
|
|
void dirtyListClear(DirtyListT *dl) {
|
|
dl->count = 0;
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// dirtyListInit
|
|
// ============================================================
|
|
|
|
void dirtyListInit(DirtyListT *dl) {
|
|
dl->count = 0;
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// dirtyListMerge
|
|
// ============================================================
|
|
//
|
|
// Coalesces overlapping or nearby dirty rects to reduce the number of
|
|
// composite+flush passes. The trade-off: merging two rects into their
|
|
// bounding box may add "clean" pixels that get needlessly repainted, but
|
|
// this is far cheaper than the per-rect overhead of an extra composite
|
|
// pass (clip setup, window-stack walk, LFB flush). On 486/Pentium ISA,
|
|
// the LFB write latency per-rect dominates, so fewer larger rects win.
|
|
//
|
|
// Algorithm: O(N^2) pairwise sweep with bounded restarts. For each rect i,
|
|
// scan all rects j>i and merge any that overlap or are within DIRTY_MERGE_GAP
|
|
// pixels. When a merge happens, rect i grows and may now overlap rects that
|
|
// it previously missed, so the inner scan restarts — but restarts are capped
|
|
// at 3 per slot to prevent O(N^3) cascading in pathological layouts (e.g.
|
|
// a diagonal scatter of tiny rects). The cap of 3 was chosen empirically:
|
|
// typical GUI operations produce clustered invalidations that converge in
|
|
// 1-2 passes; 3 gives a safety margin without measurable overhead.
|
|
//
|
|
// Merged-away rects are removed by swap-with-last (O(1) removal from an
|
|
// unordered list), which is why the rects array is not kept sorted.
|
|
|
|
void dirtyListMerge(DirtyListT *dl) {
|
|
if (dl->count <= 1) {
|
|
return;
|
|
}
|
|
|
|
for (int32_t i = 0; i < dl->count; i++) {
|
|
int32_t restarts = 0;
|
|
bool merged = true;
|
|
|
|
while (merged && restarts < 3) {
|
|
merged = false;
|
|
|
|
for (int32_t j = i + 1; j < dl->count; j++) {
|
|
if (rectsOverlapOrAdjacent(&dl->rects[i], &dl->rects[j], DIRTY_MERGE_GAP)) {
|
|
rectUnion(&dl->rects[i], &dl->rects[j], &dl->rects[i]);
|
|
// Swap-with-last removal: order doesn't matter for merging
|
|
dl->rects[j] = dl->rects[dl->count - 1];
|
|
dl->count--;
|
|
j--;
|
|
merged = true;
|
|
}
|
|
}
|
|
|
|
restarts++;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// flushRect
|
|
// ============================================================
|
|
//
|
|
// Copies one dirty rect from the system RAM backbuffer to the VESA LFB.
|
|
// This is the single most bandwidth-sensitive operation in the entire GUI:
|
|
// the LFB lives behind the ISA/PCI bus, so every byte written here is a
|
|
// bus transaction. The platform layer (platformFlushRect) uses rep movsd
|
|
// on 486+ to move aligned 32-bit words, maximizing bus utilization.
|
|
//
|
|
// Crucially, we flush per dirty rect AFTER all painting for that rect is
|
|
// complete. This avoids visible tearing — the LFB is never in a half-painted
|
|
// state for any given region.
|
|
|
|
void flushRect(DisplayT *d, const RectT *r) {
|
|
platformFlushRect(d, r);
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// rectIntersect
|
|
// ============================================================
|
|
//
|
|
// Used heavily in the compositing loop to test whether a window overlaps
|
|
// a dirty rect before painting it. The branch hint marks the non-overlapping
|
|
// case as unlikely because the compositing loop already does a coarse AABB
|
|
// check before calling this — when we get here, intersection is expected.
|
|
// The min/max formulation avoids branches in the hot path.
|
|
|
|
bool rectIntersect(const RectT *a, const RectT *b, RectT *result) {
|
|
int32_t ix1 = a->x > b->x ? a->x : b->x;
|
|
int32_t iy1 = a->y > b->y ? a->y : b->y;
|
|
int32_t ix2 = (a->x + a->w) < (b->x + b->w) ? (a->x + a->w) : (b->x + b->w);
|
|
int32_t iy2 = (a->y + a->h) < (b->y + b->h) ? (a->y + a->h) : (b->y + b->h);
|
|
|
|
if (__builtin_expect(ix1 >= ix2 || iy1 >= iy2, 0)) {
|
|
return false;
|
|
}
|
|
|
|
result->x = ix1;
|
|
result->y = iy1;
|
|
result->w = ix2 - ix1;
|
|
result->h = iy2 - iy1;
|
|
|
|
return true;
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// rectIsEmpty
|
|
// ============================================================
|
|
|
|
bool rectIsEmpty(const RectT *r) {
|
|
return (r->w <= 0 || r->h <= 0);
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// rectsOverlapOrAdjacent
|
|
// ============================================================
|
|
//
|
|
// Separating-axis test with a gap tolerance. Two rects merge if they
|
|
// overlap OR if the gap between them is <= DIRTY_MERGE_GAP pixels.
|
|
// The gap tolerance is the key tuning parameter for the merge algorithm:
|
|
// too small and you get many tiny rects (expensive per-rect flush overhead);
|
|
// too large and you merge distant rects into one huge bounding box
|
|
// (wasted repaint of clean pixels). The early-out on each axis makes this
|
|
// very cheap for non-overlapping rects, which is the common case during
|
|
// the inner loop of dirtyListMerge.
|
|
|
|
static inline bool rectsOverlapOrAdjacent(const RectT *a, const RectT *b, int32_t gap) {
|
|
if (a->x + a->w + gap < b->x) { return false; }
|
|
if (b->x + b->w + gap < a->x) { return false; }
|
|
if (a->y + a->h + gap < b->y) { return false; }
|
|
if (b->y + b->h + gap < a->y) { return false; }
|
|
return true;
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// rectUnion
|
|
// ============================================================
|
|
//
|
|
// Axis-aligned bounding box of two rects. Supports in-place operation
|
|
// (result == a) for the merge loop. Note that this may grow the rect
|
|
// substantially if the two inputs are far apart — this is the inherent
|
|
// cost of bounding-box merging vs. maintaining a true region (list of
|
|
// non-overlapping rects). Bounding-box was chosen because the merge
|
|
// list is bounded to 128 entries and the extra repaint cost of a few
|
|
// clean pixels is negligible compared to the complexity of a proper
|
|
// region algebra on 486-class hardware.
|
|
|
|
static inline void rectUnion(const RectT *a, const RectT *b, RectT *result) {
|
|
int32_t x1 = a->x < b->x ? a->x : b->x;
|
|
int32_t y1 = a->y < b->y ? a->y : b->y;
|
|
int32_t x2 = (a->x + a->w) > (b->x + b->w) ? (a->x + a->w) : (b->x + b->w);
|
|
int32_t y2 = (a->y + a->h) > (b->y + b->h) ? (a->y + a->h) : (b->y + b->h);
|
|
|
|
result->x = x1;
|
|
result->y = y1;
|
|
result->w = x2 - x1;
|
|
result->h = y2 - y1;
|
|
}
|