// dvx_comp.c -- Layer 3: Dirty rectangle compositor for DVX GUI (optimized) // // This layer implements dirty rectangle tracking and merging. The compositor // avoids full-screen redraws, which would be prohibitively expensive on the // target 486/Pentium hardware over ISA bus VESA LFB. A full 640x480x16bpp // framebuffer is ~600KB -- at ISA's ~8MB/s theoretical peak, a blind full // flush costs ~75ms (>1 frame at 60Hz). By tracking which rectangles have // actually changed and flushing only those regions from the system RAM // backbuffer to the LFB, the bandwidth consumed per frame scales with the // amount of visual change rather than the screen resolution. // // The compositing loop lives in dvxApp.c (compositeAndFlush). For each dirty // rect, it repaints the desktop, then walks the window stack bottom-to-top // painting chrome, content, scrollbars, popup menus, and the cursor -- all // clipped to the dirty rect. Only then is the dirty rect flushed to the LFB. // This means each pixel in a dirty region is written to system RAM potentially // multiple times (painter's algorithm), but the expensive LFB write happens // exactly once per pixel per frame. #include "dvxComp.h" #include "platform/dvxPlatform.h" #include // Rects within this many pixels of each other get merged even if they don't // overlap. A small gap tolerance absorbs jitter from mouse movement and // closely-spaced UI invalidations (e.g. title bar + content during a drag) // without bloating merged rects excessively. The value 4 was chosen to match // the chrome border width -- adjacent chrome/content invalidations merge // naturally. #define DIRTY_MERGE_GAP 4 // ============================================================ // Prototypes // ============================================================ static inline bool rectsOverlapOrAdjacent(const RectT *a, const RectT *b, int32_t gap); static inline void rectUnion(const RectT *a, const RectT *b, RectT *result); // ============================================================ // dirtyListAdd // ============================================================ // // Appends a dirty rect to the list. Uses a fixed-size array (MAX_DIRTY_RECTS // = 128) rather than a dynamic allocation -- this is called on every UI // mutation (drag, repaint, focus change) so allocation overhead must be zero. // // When the list fills up, an eager merge pass tries to consolidate rects. // If the list is STILL full after merging (pathological scatter), the // nuclear option collapses everything into one bounding box. This guarantees // the list never overflows, at the cost of potentially over-painting a large // rect. In practice the merge pass almost always frees enough slots because // GUI mutations tend to cluster spatially. void dirtyListAdd(DirtyListT *dl, int32_t x, int32_t y, int32_t w, int32_t h) { // Branch hint: degenerate rects are rare -- callers usually validate first if (__builtin_expect(w <= 0 || h <= 0, 0)) { return; } // Overflow path: try merging, then fall back to a single bounding rect if (__builtin_expect(dl->count >= MAX_DIRTY_RECTS, 0)) { dirtyListMerge(dl); if (dl->count >= MAX_DIRTY_RECTS) { // Still full -- collapse the entire list plus the new rect into one // bounding box. This is a last resort; it means the next flush will // repaint a potentially large region, but at least we won't lose // dirty information or crash. RectT merged = dl->rects[0]; for (int32_t i = 1; i < dl->count; i++) { rectUnion(&merged, &dl->rects[i], &merged); } RectT newRect = {x, y, w, h}; rectUnion(&merged, &newRect, &merged); dl->rects[0] = merged; dl->count = 1; return; } } dl->rects[dl->count].x = x; dl->rects[dl->count].y = y; dl->rects[dl->count].w = w; dl->rects[dl->count].h = h; dl->count++; } // ============================================================ // dirtyListClear // ============================================================ void dirtyListClear(DirtyListT *dl) { dl->count = 0; } // ============================================================ // dirtyListInit // ============================================================ void dirtyListInit(DirtyListT *dl) { dl->count = 0; } // ============================================================ // dirtyListMerge // ============================================================ // // Coalesces overlapping or nearby dirty rects to reduce the number of // composite+flush passes. The trade-off: merging two rects into their // bounding box may add "clean" pixels that get needlessly repainted, but // this is far cheaper than the per-rect overhead of an extra composite // pass (clip setup, window-stack walk, LFB flush). On 486/Pentium ISA, // the LFB write latency per-rect dominates, so fewer larger rects win. // // Algorithm: O(N^2) pairwise sweep with bounded restarts. For each rect i, // scan all rects j>i and merge any that overlap or are within DIRTY_MERGE_GAP // pixels. When a merge happens, rect i grows and may now overlap rects that // it previously missed, so the inner scan restarts -- but restarts are capped // at 3 per slot to prevent O(N^3) cascading in pathological layouts (e.g. // a diagonal scatter of tiny rects). The cap of 3 was chosen empirically: // typical GUI operations produce clustered invalidations that converge in // 1-2 passes; 3 gives a safety margin without measurable overhead. // // Merged-away rects are removed by swap-with-last (O(1) removal from an // unordered list), which is why the rects array is not kept sorted. void dirtyListMerge(DirtyListT *dl) { if (dl->count <= 1) { return; } for (int32_t i = 0; i < dl->count; i++) { int32_t restarts = 0; bool merged = true; while (merged && restarts < 3) { merged = false; for (int32_t j = i + 1; j < dl->count; j++) { if (rectsOverlapOrAdjacent(&dl->rects[i], &dl->rects[j], DIRTY_MERGE_GAP)) { rectUnion(&dl->rects[i], &dl->rects[j], &dl->rects[i]); // Swap-with-last removal: order doesn't matter for merging dl->rects[j] = dl->rects[dl->count - 1]; dl->count--; j--; merged = true; } } restarts++; } } } // ============================================================ // flushRect // ============================================================ // // Copies one dirty rect from the system RAM backbuffer to the VESA LFB. // This is the single most bandwidth-sensitive operation in the entire GUI: // the LFB lives behind the ISA/PCI bus, so every byte written here is a // bus transaction. The platform layer (platformFlushRect) uses rep movsd // on 486+ to move aligned 32-bit words, maximizing bus utilization. // // Crucially, we flush per dirty rect AFTER all painting for that rect is // complete. This avoids visible tearing -- the LFB is never in a half-painted // state for any given region. void flushRect(DisplayT *d, const RectT *r) { platformFlushRect(d, r); } // ============================================================ // rectIntersect // ============================================================ // // Used heavily in the compositing loop to test whether a window overlaps // a dirty rect before painting it. The branch hint marks the non-overlapping // case as unlikely because the compositing loop already does a coarse AABB // check before calling this -- when we get here, intersection is expected. // The min/max formulation avoids branches in the hot path. bool rectIntersect(const RectT *a, const RectT *b, RectT *result) { int32_t ix1 = a->x > b->x ? a->x : b->x; int32_t iy1 = a->y > b->y ? a->y : b->y; int32_t ix2 = (a->x + a->w) < (b->x + b->w) ? (a->x + a->w) : (b->x + b->w); int32_t iy2 = (a->y + a->h) < (b->y + b->h) ? (a->y + a->h) : (b->y + b->h); if (__builtin_expect(ix1 >= ix2 || iy1 >= iy2, 0)) { return false; } result->x = ix1; result->y = iy1; result->w = ix2 - ix1; result->h = iy2 - iy1; return true; } // ============================================================ // rectIsEmpty // ============================================================ bool rectIsEmpty(const RectT *r) { return (r->w <= 0 || r->h <= 0); } // ============================================================ // rectsOverlapOrAdjacent // ============================================================ // // Separating-axis test with a gap tolerance. Two rects merge if they // overlap OR if the gap between them is <= DIRTY_MERGE_GAP pixels. // The gap tolerance is the key tuning parameter for the merge algorithm: // too small and you get many tiny rects (expensive per-rect flush overhead); // too large and you merge distant rects into one huge bounding box // (wasted repaint of clean pixels). The early-out on each axis makes this // very cheap for non-overlapping rects, which is the common case during // the inner loop of dirtyListMerge. static inline bool rectsOverlapOrAdjacent(const RectT *a, const RectT *b, int32_t gap) { if (a->x + a->w + gap < b->x) { return false; } if (b->x + b->w + gap < a->x) { return false; } if (a->y + a->h + gap < b->y) { return false; } if (b->y + b->h + gap < a->y) { return false; } return true; } // ============================================================ // rectUnion // ============================================================ // // Axis-aligned bounding box of two rects. Supports in-place operation // (result == a) for the merge loop. Note that this may grow the rect // substantially if the two inputs are far apart -- this is the inherent // cost of bounding-box merging vs. maintaining a true region (list of // non-overlapping rects). Bounding-box was chosen because the merge // list is bounded to 128 entries and the extra repaint cost of a few // clean pixels is negligible compared to the complexity of a proper // region algebra on 486-class hardware. static inline void rectUnion(const RectT *a, const RectT *b, RectT *result) { int32_t x1 = a->x < b->x ? a->x : b->x; int32_t y1 = a->y < b->y ? a->y : b->y; int32_t x2 = (a->x + a->w) > (b->x + b->w) ? (a->x + a->w) : (b->x + b->w); int32_t y2 = (a->y + a->h) > (b->y + b->h) ? (a->y + a->h) : (b->y + b->h); result->x = x1; result->y = y1; result->w = x2 - x1; result->h = y2 - y1; }