1358 lines
49 KiB
C
1358 lines
49 KiB
C
// dvx_draw.c -- Layer 2: Drawing primitives for DVX GUI (optimized)
|
|
//
|
|
// This is the second layer of the DVX compositor stack, sitting on top
|
|
// of dvxVideo (layer 1) and below dvxComp (layer 3). It provides all
|
|
// rasterization primitives: filled rects, buffer copies, beveled
|
|
// frames, bitmap font text, masked bitmaps (cursors/icons), and
|
|
// single-pixel operations.
|
|
//
|
|
// Every function here draws into the system-RAM backbuffer (d->backBuf),
|
|
// never directly to the LFB. The compositor layer is responsible for
|
|
// flushing changed regions to the hardware framebuffer via rep movsd.
|
|
// This separation means draw operations benefit from CPU cache (the
|
|
// backbuffer lives in cacheable system RAM) while LFB writes are
|
|
// batched into large sequential bursts.
|
|
//
|
|
// Performance strategy overview:
|
|
//
|
|
// The core tension on 486/Pentium is between generality and speed.
|
|
// The draw layer resolves this with a two-tier approach:
|
|
//
|
|
// 1) Span operations (spanFill/spanCopy) are dispatched through
|
|
// function pointers in BlitOpsT, set once at init based on bpp.
|
|
// The platform implementations use rep stosl/rep movsd inline asm
|
|
// for maximum throughput (the 486 executes rep stosl at 1 dword
|
|
// per clock after startup; the Pentium pairs it in the U-pipe).
|
|
// Using function pointers here costs one indirect call per span
|
|
// but avoids a bpp switch in the inner loop of rectFill, which
|
|
// would otherwise be a branch per scanline.
|
|
//
|
|
// 2) Character rendering (drawChar, drawTextN, drawTermRow) uses
|
|
// explicit if/else chains on bpp rather than function pointers.
|
|
// This is deliberate: the per-pixel work inside glyph rendering
|
|
// is a tight bit-test loop where an indirect call per pixel would
|
|
// be catastrophic, and the bpp branch is taken once per glyph row
|
|
// (hoisted out of the pixel loop). The compiler can also inline
|
|
// the pixel store when the bpp is a compile-time constant within
|
|
// each branch.
|
|
//
|
|
// 3) For the most critical glyph paths (unclipped 32bpp and 16bpp),
|
|
// the pixel loops are fully unrolled into 8 direct array stores
|
|
// with literal bit masks. This eliminates the sGlyphBit[] table
|
|
// lookup, the loop counter, and the loop branch -- saving ~3 cycles
|
|
// per pixel on a 486. The clipped path falls back to the table.
|
|
//
|
|
// Clip rectangle handling: All draw functions clip against
|
|
// d->clipX/Y/W/H (set by setClipRect in layer 1). The clipRect()
|
|
// helper is marked static inline so it compiles to straight-line
|
|
// compare-and-clamp code at each call site with no function call
|
|
// overhead. __builtin_expect hints mark the clipping branches as
|
|
// unlikely, helping the branch predictor on Pentium and later.
|
|
|
|
#include "dvxDraw.h"
|
|
#include "platform/dvxPlatform.h"
|
|
|
|
#include <string.h>
|
|
|
|
// ============================================================
|
|
// Prototypes
|
|
// ============================================================
|
|
|
|
char accelParse(const char *text);
|
|
static inline void clipRect(const DisplayT *d, int32_t *x, int32_t *y, int32_t *w, int32_t *h);
|
|
static inline void putPixel(uint8_t *dst, uint32_t color, int32_t bpp);
|
|
|
|
// Bit lookup tables for glyph and mask rendering. On a 486, a variable
|
|
// shift (1 << (7 - col)) costs 4 cycles per bit position; a table
|
|
// lookup is a fixed 1-cycle load from L1. The 8-entry sGlyphBit table
|
|
// maps column index 0..7 to the corresponding bit mask in a 1bpp glyph
|
|
// byte (MSB-first, matching standard VGA/bitmap font layout). The
|
|
// 16-entry sMaskBit table does the same for 16-pixel-wide cursor/icon
|
|
// masks.
|
|
static const uint8_t sGlyphBit[8] = {0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01};
|
|
static const uint16_t sMaskBit[16] = {0x8000, 0x4000, 0x2000, 0x1000, 0x0800, 0x0400, 0x0200, 0x0100, 0x0080, 0x0040, 0x0020, 0x0010, 0x0008, 0x0004, 0x0002, 0x0001};
|
|
|
|
|
|
// ============================================================
|
|
// accelParse
|
|
// ============================================================
|
|
//
|
|
// Scans a menu/button label for the & accelerator marker and returns
|
|
// the character after it (lowercased). Follows the Windows/Motif
|
|
// convention: "&File" means Alt+F activates it, "&&" is a literal &.
|
|
// Returns 0 if no accelerator is found. The result is always
|
|
// lowercased so the WM can do a single case-insensitive compare
|
|
// against incoming Alt+key events.
|
|
|
|
char accelParse(const char *text) {
|
|
if (!text) {
|
|
return 0;
|
|
}
|
|
|
|
while (*text) {
|
|
if (*text == '&') {
|
|
text++;
|
|
|
|
if (*text == '&') {
|
|
// Escaped && -- literal &, not an accelerator
|
|
text++;
|
|
continue;
|
|
}
|
|
|
|
if (*text && *text != '&') {
|
|
char ch = *text;
|
|
|
|
if (ch >= 'A' && ch <= 'Z') {
|
|
return (char)(ch + 32);
|
|
}
|
|
|
|
if (ch >= 'a' && ch <= 'z') {
|
|
return ch;
|
|
}
|
|
|
|
if (ch >= '0' && ch <= '9') {
|
|
return ch;
|
|
}
|
|
|
|
return ch;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
text++;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// clipRect
|
|
// ============================================================
|
|
//
|
|
// Intersects a rectangle with the display's current clip rect,
|
|
// modifying the rect in place. If the rect is fully outside the
|
|
// clip region, w or h will be <= 0 and callers bail out.
|
|
//
|
|
// Marked static inline because this is called on every rectFill,
|
|
// rectCopy, and indirectly on every glyph -- it must compile to
|
|
// straight-line clamp instructions with zero call overhead.
|
|
// __builtin_expect(..., 0) marks clipping as unlikely; in the
|
|
// common case windows are fully within the clip rect and all
|
|
// four branches fall through untaken. On Pentium this keeps the
|
|
// branch predictor happy (static not-taken prediction for forward
|
|
// branches), and on 486 it at least avoids the taken-branch penalty.
|
|
|
|
static inline void clipRect(const DisplayT *d, int32_t *x, int32_t *y, int32_t *w, int32_t *h) {
|
|
int32_t cx2 = d->clipX + d->clipW;
|
|
int32_t cy2 = d->clipY + d->clipH;
|
|
|
|
int32_t rx1 = *x;
|
|
int32_t ry1 = *y;
|
|
int32_t rx2 = rx1 + *w;
|
|
int32_t ry2 = ry1 + *h;
|
|
|
|
if (__builtin_expect(rx1 < d->clipX, 0)) { rx1 = d->clipX; }
|
|
if (__builtin_expect(ry1 < d->clipY, 0)) { ry1 = d->clipY; }
|
|
if (__builtin_expect(rx2 > cx2, 0)) { rx2 = cx2; }
|
|
if (__builtin_expect(ry2 > cy2, 0)) { ry2 = cy2; }
|
|
|
|
*x = rx1;
|
|
*y = ry1;
|
|
*w = rx2 - rx1;
|
|
*h = ry2 - ry1;
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// drawBevel
|
|
// ============================================================
|
|
//
|
|
// Draws a Motif/DESQview-style beveled rectangular frame. The bevel
|
|
// creates the illusion of a raised or sunken 3D surface by drawing
|
|
// lighter "highlight" edges on the top and left, and darker "shadow"
|
|
// edges on the bottom and right. Swapping highlight and shadow gives
|
|
// a sunken appearance (see BEVEL_RAISED/BEVEL_SUNKEN macros in
|
|
// dvxTypes.h).
|
|
//
|
|
// BevelStyleT.width controls the border thickness. DV/X uses 2px
|
|
// bevels for most window chrome (matching the original DESQview/X
|
|
// and Motif look), 1px for inner borders and scrollbar elements.
|
|
//
|
|
// The implementation has special-cased fast paths for bw==2 and bw==1
|
|
// that emit exact spans via rectFill rather than looping. This
|
|
// matters because drawBevel is called for every window frame, button,
|
|
// menu, and scrollbar element on every repaint -- the loop overhead
|
|
// and extra rectFill calls in the general case add up. Each rectFill
|
|
// call already handles clipping internally, so the bevels clip
|
|
// correctly even when a window is partially off-screen.
|
|
//
|
|
// face==0 means "don't fill the interior", which is used for frame-only
|
|
// bevels where the content area is painted separately by a callback.
|
|
|
|
void drawBevel(DisplayT *d, const BlitOpsT *ops, int32_t x, int32_t y, int32_t w, int32_t h, const BevelStyleT *style) {
|
|
int32_t bw = style->width;
|
|
|
|
// Fill interior if requested
|
|
if (style->face != 0) {
|
|
rectFill(d, ops, x + bw, y + bw, w - bw * 2, h - bw * 2, style->face);
|
|
}
|
|
|
|
// Fast path for the common bevel widths (1 and 2)
|
|
// Directly emit spans instead of calling drawHLine->rectFill->clipRect per line
|
|
if (bw == 2) {
|
|
// Top 2 highlight lines
|
|
rectFill(d, ops, x, y, w, 1, style->highlight);
|
|
rectFill(d, ops, x + 1, y + 1, w - 2, 1, style->highlight);
|
|
// Left 2 highlight columns
|
|
rectFill(d, ops, x, y + 1, 1, h - 1, style->highlight);
|
|
rectFill(d, ops, x + 1, y + 2, 1, h - 3, style->highlight);
|
|
// Bottom 2 shadow lines
|
|
rectFill(d, ops, x, y + h - 1, w, 1, style->shadow);
|
|
rectFill(d, ops, x + 1, y + h - 2, w - 2, 1, style->shadow);
|
|
// Right 2 shadow columns
|
|
rectFill(d, ops, x + w - 1, y + 1, 1, h - 2, style->shadow);
|
|
rectFill(d, ops, x + w - 2, y + 2, 1, h - 4, style->shadow);
|
|
} else if (bw == 1) {
|
|
rectFill(d, ops, x, y, w, 1, style->highlight);
|
|
rectFill(d, ops, x, y + 1, 1, h - 1, style->highlight);
|
|
rectFill(d, ops, x, y + h - 1, w, 1, style->shadow);
|
|
rectFill(d, ops, x + w - 1, y + 1, 1, h - 2, style->shadow);
|
|
} else {
|
|
for (int32_t i = 0; i < bw; i++) {
|
|
rectFill(d, ops, x + i, y + i, w - i * 2, 1, style->highlight);
|
|
}
|
|
for (int32_t i = 0; i < bw; i++) {
|
|
rectFill(d, ops, x + i, y + i + 1, 1, h - i * 2 - 1, style->highlight);
|
|
}
|
|
for (int32_t i = 0; i < bw; i++) {
|
|
rectFill(d, ops, x + i, y + h - 1 - i, w - i * 2, 1, style->shadow);
|
|
}
|
|
for (int32_t i = 0; i < bw; i++) {
|
|
rectFill(d, ops, x + w - 1 - i, y + i + 1, 1, h - i * 2 - 2, style->shadow);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// drawChar
|
|
// ============================================================
|
|
//
|
|
// Renders a single fixed-width bitmap font character into the
|
|
// backbuffer. Returns the character advance width (always
|
|
// font->charWidth) so callers can accumulate cursor position.
|
|
//
|
|
// Font format: each glyph is charHeight bytes of 1bpp data, MSB-first
|
|
// (bit 7 = leftmost pixel). This is the standard VGA/PC BIOS font
|
|
// format. We use 8-pixel-wide glyphs exclusively because 8 bits fit
|
|
// in one byte per scanline, making the inner loop a single byte load
|
|
// plus 8 bit tests -- no multi-byte glyph row assembly needed.
|
|
//
|
|
// The function has six specialized code paths (3 bpp x 2 modes),
|
|
// chosen with if/else chains rather than function pointers. On 486
|
|
// and Pentium, an indirect call through a function pointer stalls the
|
|
// pipeline (no branch target buffer for indirect calls on 486, and
|
|
// a mandatory bubble on Pentium). The if/else chain resolves at the
|
|
// outer loop level (once per glyph, not per pixel), so the per-pixel
|
|
// inner code is branch-free within each path.
|
|
//
|
|
// Opaque vs transparent mode:
|
|
// opaque=true: Fills the entire character cell (bg then fg). Used
|
|
// for normal text where the background must overwrite
|
|
// whatever was previously in the cell.
|
|
// opaque=false: Only writes foreground pixels; background shows
|
|
// through. Used for overlay text on existing content.
|
|
//
|
|
// The "unclipped fast path" (colStart==0, colEnd==cw) avoids the
|
|
// sGlyphBit[] table lookup by testing literal bit masks directly.
|
|
// This matters because the table lookup involves an indexed load
|
|
// (base + index * element_size), while the literal mask is an
|
|
// immediate operand in the compare instruction. At 8 pixels per row
|
|
// and 14-16 rows per glyph, saving even 1 cycle per pixel adds up
|
|
// across a full screen of text (~6400 characters at 80x80).
|
|
|
|
int32_t drawChar(DisplayT *d, const BlitOpsT *ops, const BitmapFontT *font, int32_t x, int32_t y, char ch, uint32_t fg, uint32_t bg, bool opaque) {
|
|
int32_t cw = font->charWidth;
|
|
int32_t chh = font->charHeight;
|
|
|
|
// Quick reject: entirely outside clip rect
|
|
if (__builtin_expect(x + cw <= d->clipX || x >= d->clipX + d->clipW || y + chh <= d->clipY || y >= d->clipY + d->clipH, 0)) {
|
|
return cw;
|
|
}
|
|
|
|
int32_t idx = (uint8_t)ch - font->firstChar;
|
|
if (__builtin_expect(idx < 0 || idx >= font->numChars, 0)) {
|
|
if (opaque) {
|
|
rectFill(d, ops, x, y, cw, chh, bg);
|
|
}
|
|
return cw;
|
|
}
|
|
|
|
const uint8_t *glyph = font->glyphData + idx * chh;
|
|
int32_t bpp = ops->bytesPerPixel;
|
|
int32_t pitch = d->pitch;
|
|
|
|
// Calculate clipped row/col bounds once
|
|
int32_t clipX1 = d->clipX;
|
|
int32_t clipX2 = d->clipX + d->clipW;
|
|
int32_t clipY1 = d->clipY;
|
|
int32_t clipY2 = d->clipY + d->clipH;
|
|
|
|
int32_t rowStart = 0;
|
|
int32_t rowEnd = chh;
|
|
if (y < clipY1) { rowStart = clipY1 - y; }
|
|
if (y + chh > clipY2) { rowEnd = clipY2 - y; }
|
|
|
|
int32_t colStart = 0;
|
|
int32_t colEnd = cw;
|
|
if (x < clipX1) { colStart = clipX1 - x; }
|
|
if (x + cw > clipX2) { colEnd = clipX2 - x; }
|
|
|
|
// Unclipped fast path: when the character cell is fully within the
|
|
// clip rect we can skip per-pixel clip checks and use the fully
|
|
// unrolled 8-store sequences below. This is the hot path for all
|
|
// text that isn't at the edge of a window.
|
|
bool unclipped = (colStart == 0 && colEnd == cw);
|
|
|
|
if (opaque) {
|
|
// Opaque mode: every pixel in the cell gets written (fg or bg).
|
|
// The unclipped 32bpp and 16bpp paths use branchless ternary
|
|
// stores -- the compiler emits cmov or conditional-set sequences
|
|
// that avoid branch misprediction penalties. Each row is 8
|
|
// direct array stores with no loop, no table lookup.
|
|
if (unclipped && bpp == 4) {
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint32_t *dst32 = (uint32_t *)(d->backBuf + (y + row) * pitch + x * 4);
|
|
uint8_t bits = glyph[row];
|
|
|
|
dst32[0] = (bits & 0x80) ? fg : bg;
|
|
dst32[1] = (bits & 0x40) ? fg : bg;
|
|
dst32[2] = (bits & 0x20) ? fg : bg;
|
|
dst32[3] = (bits & 0x10) ? fg : bg;
|
|
dst32[4] = (bits & 0x08) ? fg : bg;
|
|
dst32[5] = (bits & 0x04) ? fg : bg;
|
|
dst32[6] = (bits & 0x02) ? fg : bg;
|
|
dst32[7] = (bits & 0x01) ? fg : bg;
|
|
}
|
|
} else if (unclipped && bpp == 2) {
|
|
uint16_t fg16 = (uint16_t)fg;
|
|
uint16_t bg16 = (uint16_t)bg;
|
|
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint16_t *dst16 = (uint16_t *)(d->backBuf + (y + row) * pitch + x * 2);
|
|
uint8_t bits = glyph[row];
|
|
|
|
dst16[0] = (bits & 0x80) ? fg16 : bg16;
|
|
dst16[1] = (bits & 0x40) ? fg16 : bg16;
|
|
dst16[2] = (bits & 0x20) ? fg16 : bg16;
|
|
dst16[3] = (bits & 0x10) ? fg16 : bg16;
|
|
dst16[4] = (bits & 0x08) ? fg16 : bg16;
|
|
dst16[5] = (bits & 0x04) ? fg16 : bg16;
|
|
dst16[6] = (bits & 0x02) ? fg16 : bg16;
|
|
dst16[7] = (bits & 0x01) ? fg16 : bg16;
|
|
}
|
|
} else {
|
|
// Clipped path or 8bpp: use spanFill for bg (leveraging
|
|
// rep stosl), then iterate visible columns with sGlyphBit[]
|
|
// table for fg. 8bpp always takes this path because 8-bit
|
|
// stores can't be branchlessly ternary'd as efficiently --
|
|
// the compiler can't cmov into a byte store.
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
int32_t py = y + row;
|
|
uint8_t *dst = d->backBuf + py * pitch + (x + colStart) * bpp;
|
|
|
|
ops->spanFill(dst, bg, colEnd - colStart);
|
|
|
|
uint8_t bits = glyph[row];
|
|
if (bits == 0) {
|
|
continue;
|
|
}
|
|
|
|
dst = d->backBuf + py * pitch + x * bpp;
|
|
|
|
if (bpp == 2) {
|
|
uint16_t fg16 = (uint16_t)fg;
|
|
for (int32_t col = colStart; col < colEnd; col++) {
|
|
if (bits & sGlyphBit[col]) {
|
|
*(uint16_t *)(dst + col * 2) = fg16;
|
|
}
|
|
}
|
|
} else if (bpp == 4) {
|
|
for (int32_t col = colStart; col < colEnd; col++) {
|
|
if (bits & sGlyphBit[col]) {
|
|
*(uint32_t *)(dst + col * 4) = fg;
|
|
}
|
|
}
|
|
} else {
|
|
uint8_t fg8 = (uint8_t)fg;
|
|
for (int32_t col = colStart; col < colEnd; col++) {
|
|
if (bits & sGlyphBit[col]) {
|
|
dst[col] = fg8;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
// Transparent mode: only fg pixels are written; bg is untouched.
|
|
// The "bits == 0" early-out per row is important here: blank
|
|
// rows in the glyph (common in the top/bottom padding of most
|
|
// characters) skip all pixel work entirely. In opaque mode
|
|
// blank rows still need the bg fill so we can't skip them.
|
|
if (unclipped && bpp == 4) {
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint8_t bits = glyph[row];
|
|
if (bits == 0) {
|
|
continue;
|
|
}
|
|
|
|
uint32_t *dst32 = (uint32_t *)(d->backBuf + (y + row) * pitch + x * 4);
|
|
|
|
if (bits & 0x80) { dst32[0] = fg; }
|
|
if (bits & 0x40) { dst32[1] = fg; }
|
|
if (bits & 0x20) { dst32[2] = fg; }
|
|
if (bits & 0x10) { dst32[3] = fg; }
|
|
if (bits & 0x08) { dst32[4] = fg; }
|
|
if (bits & 0x04) { dst32[5] = fg; }
|
|
if (bits & 0x02) { dst32[6] = fg; }
|
|
if (bits & 0x01) { dst32[7] = fg; }
|
|
}
|
|
} else if (unclipped && bpp == 2) {
|
|
uint16_t fg16 = (uint16_t)fg;
|
|
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint8_t bits = glyph[row];
|
|
if (bits == 0) {
|
|
continue;
|
|
}
|
|
|
|
uint16_t *dst16 = (uint16_t *)(d->backBuf + (y + row) * pitch + x * 2);
|
|
|
|
if (bits & 0x80) { dst16[0] = fg16; }
|
|
if (bits & 0x40) { dst16[1] = fg16; }
|
|
if (bits & 0x20) { dst16[2] = fg16; }
|
|
if (bits & 0x10) { dst16[3] = fg16; }
|
|
if (bits & 0x08) { dst16[4] = fg16; }
|
|
if (bits & 0x04) { dst16[5] = fg16; }
|
|
if (bits & 0x02) { dst16[6] = fg16; }
|
|
if (bits & 0x01) { dst16[7] = fg16; }
|
|
}
|
|
} else {
|
|
// Clipped path or 8bpp
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint8_t bits = glyph[row];
|
|
if (bits == 0) {
|
|
continue;
|
|
}
|
|
|
|
int32_t py = y + row;
|
|
uint8_t *dst = d->backBuf + py * pitch + x * bpp;
|
|
|
|
if (bpp == 2) {
|
|
uint16_t fg16 = (uint16_t)fg;
|
|
for (int32_t col = colStart; col < colEnd; col++) {
|
|
if (bits & sGlyphBit[col]) {
|
|
*(uint16_t *)(dst + col * 2) = fg16;
|
|
}
|
|
}
|
|
} else if (bpp == 4) {
|
|
for (int32_t col = colStart; col < colEnd; col++) {
|
|
if (bits & sGlyphBit[col]) {
|
|
*(uint32_t *)(dst + col * 4) = fg;
|
|
}
|
|
}
|
|
} else {
|
|
uint8_t fg8 = (uint8_t)fg;
|
|
for (int32_t col = colStart; col < colEnd; col++) {
|
|
if (bits & sGlyphBit[col]) {
|
|
dst[col] = fg8;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return cw;
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// drawTextN
|
|
// ============================================================
|
|
//
|
|
// Renders exactly 'count' characters from a buffer in one pass.
|
|
// Same idea as drawTermRow but for uniform fg/bg text runs.
|
|
// Avoids per-character function call overhead, redundant clip
|
|
// calculation, and spanFill startup costs.
|
|
//
|
|
// The key optimization over calling drawChar() in a loop is the
|
|
// bg fill strategy: in opaque mode, instead of calling spanFill
|
|
// once per character cell per row (count * charHeight spanFill
|
|
// calls), we fill the entire visible span's background in one
|
|
// spanFill per scanline (just charHeight calls total). Then we
|
|
// overlay only the fg glyph pixels. For an 80-column line this
|
|
// reduces spanFill calls from 80*16=1280 to just 16. Each
|
|
// spanFill maps to a single rep stosl, so we're also getting
|
|
// better write-combine utilization from the larger sequential
|
|
// stores.
|
|
//
|
|
// Horizontal clipping is done at the character level (firstChar/
|
|
// lastChar) to avoid iterating invisible characters, with per-pixel
|
|
// edge clipping only for the partially visible first and last chars.
|
|
|
|
void drawTextN(DisplayT *d, const BlitOpsT *ops, const BitmapFontT *font, int32_t x, int32_t y, const char *text, int32_t count, uint32_t fg, uint32_t bg, bool opaque) {
|
|
if (count <= 0) {
|
|
return;
|
|
}
|
|
|
|
int32_t cw = font->charWidth;
|
|
int32_t ch = font->charHeight;
|
|
int32_t bpp = ops->bytesPerPixel;
|
|
int32_t pitch = d->pitch;
|
|
|
|
// Row-level clip: reject if entirely outside vertically
|
|
int32_t clipX1 = d->clipX;
|
|
int32_t clipX2 = d->clipX + d->clipW;
|
|
int32_t clipY1 = d->clipY;
|
|
int32_t clipY2 = d->clipY + d->clipH;
|
|
|
|
if (y + ch <= clipY1 || y >= clipY2) {
|
|
return;
|
|
}
|
|
|
|
int32_t totalW = count * cw;
|
|
|
|
if (x + totalW <= clipX1 || x >= clipX2) {
|
|
return;
|
|
}
|
|
|
|
// Vertical clip for glyph scanlines
|
|
int32_t rowStart = 0;
|
|
int32_t rowEnd = ch;
|
|
if (y < clipY1) { rowStart = clipY1 - y; }
|
|
if (y + ch > clipY2) { rowEnd = clipY2 - y; }
|
|
|
|
// Horizontal clip: find first and last visible column (character index)
|
|
int32_t firstChar = 0;
|
|
int32_t lastChar = count;
|
|
|
|
if (x < clipX1) {
|
|
firstChar = (clipX1 - x) / cw;
|
|
}
|
|
|
|
if (x + totalW > clipX2) {
|
|
lastChar = (clipX2 - x + cw - 1) / cw;
|
|
if (lastChar > count) { lastChar = count; }
|
|
}
|
|
|
|
// Per-pixel clip for partially visible edge characters
|
|
int32_t edgeColStart = 0;
|
|
|
|
if (x + firstChar * cw < clipX1) {
|
|
edgeColStart = clipX1 - (x + firstChar * cw);
|
|
}
|
|
|
|
if (opaque) {
|
|
// Opaque: fill background for the entire visible span once per scanline,
|
|
// then overlay foreground glyph pixels
|
|
int32_t fillX1 = x + firstChar * cw;
|
|
int32_t fillX2 = x + lastChar * cw;
|
|
|
|
if (fillX1 < clipX1) { fillX1 = clipX1; }
|
|
if (fillX2 > clipX2) { fillX2 = clipX2; }
|
|
|
|
int32_t fillW = fillX2 - fillX1;
|
|
|
|
if (fillW > 0) {
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint8_t *dst = d->backBuf + (y + row) * pitch + fillX1 * bpp;
|
|
ops->spanFill(dst, bg, fillW);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Render glyph foreground pixels
|
|
for (int32_t ci = firstChar; ci < lastChar; ci++) {
|
|
int32_t cx = x + ci * cw;
|
|
|
|
int32_t cStart = 0;
|
|
int32_t cEnd = cw;
|
|
|
|
if (ci == firstChar) {
|
|
cStart = edgeColStart;
|
|
}
|
|
|
|
if (cx + cw > clipX2) {
|
|
cEnd = clipX2 - cx;
|
|
}
|
|
|
|
int32_t idx = (uint8_t)text[ci] - font->firstChar;
|
|
const uint8_t *glyph = NULL;
|
|
|
|
if (idx >= 0 && idx < font->numChars) {
|
|
glyph = font->glyphData + idx * ch;
|
|
}
|
|
|
|
if (!glyph) {
|
|
continue;
|
|
}
|
|
|
|
if (bpp == 2) {
|
|
uint16_t fg16 = (uint16_t)fg;
|
|
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint8_t bits = glyph[row];
|
|
if (bits == 0) { continue; }
|
|
|
|
uint16_t *dst = (uint16_t *)(d->backBuf + (y + row) * pitch + cx * 2);
|
|
|
|
for (int32_t p = cStart; p < cEnd; p++) {
|
|
if (bits & sGlyphBit[p]) {
|
|
dst[p] = fg16;
|
|
}
|
|
}
|
|
}
|
|
} else if (bpp == 4) {
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint8_t bits = glyph[row];
|
|
if (bits == 0) { continue; }
|
|
|
|
uint32_t *dst = (uint32_t *)(d->backBuf + (y + row) * pitch + cx * 4);
|
|
|
|
for (int32_t p = cStart; p < cEnd; p++) {
|
|
if (bits & sGlyphBit[p]) {
|
|
dst[p] = fg;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
uint8_t fg8 = (uint8_t)fg;
|
|
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint8_t bits = glyph[row];
|
|
if (bits == 0) { continue; }
|
|
|
|
uint8_t *dst = d->backBuf + (y + row) * pitch + cx;
|
|
|
|
for (int32_t p = cStart; p < cEnd; p++) {
|
|
if (bits & sGlyphBit[p]) {
|
|
dst[p] = fg8;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// drawFocusRect
|
|
// ============================================================
|
|
//
|
|
// Draws a dotted (every-other-pixel) rectangle to indicate keyboard
|
|
// focus, matching the Windows/Motif convention. Uses putPixel per
|
|
// dot rather than spanFill because the alternating pattern can't be
|
|
// expressed as a span fill (which writes uniform color).
|
|
//
|
|
// The parity calculations on the bottom and right edges ensure the
|
|
// dot pattern is visually continuous around corners -- the starting
|
|
// pixel of each edge is offset so dots don't double up or gap at
|
|
// the corner where two edges meet.
|
|
//
|
|
// This is not performance-critical; focus rects are drawn at most
|
|
// once per focused widget per repaint.
|
|
|
|
void drawFocusRect(DisplayT *d, const BlitOpsT *ops, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
|
|
int32_t bpp = ops->bytesPerPixel;
|
|
int32_t pitch = d->pitch;
|
|
|
|
int32_t clipX1 = d->clipX;
|
|
int32_t clipX2 = d->clipX + d->clipW;
|
|
int32_t clipY1 = d->clipY;
|
|
int32_t clipY2 = d->clipY + d->clipH;
|
|
|
|
int32_t x2 = x + w - 1;
|
|
int32_t y2 = y + h - 1;
|
|
|
|
// Top edge
|
|
if (y >= clipY1 && y < clipY2) {
|
|
for (int32_t px = x; px <= x2; px += 2) {
|
|
if (px >= clipX1 && px < clipX2) {
|
|
putPixel(d->backBuf + y * pitch + px * bpp, color, bpp);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Bottom edge
|
|
if (y2 >= clipY1 && y2 < clipY2 && y2 != y) {
|
|
int32_t parity = (y2 - y) & 1;
|
|
|
|
for (int32_t px = x + parity; px <= x2; px += 2) {
|
|
if (px >= clipX1 && px < clipX2) {
|
|
putPixel(d->backBuf + y2 * pitch + px * bpp, color, bpp);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Left edge (skip corners already drawn)
|
|
if (x >= clipX1 && x < clipX2) {
|
|
for (int32_t py = y + 2; py < y2; py += 2) {
|
|
if (py >= clipY1 && py < clipY2) {
|
|
putPixel(d->backBuf + py * pitch + x * bpp, color, bpp);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Right edge (skip corners already drawn)
|
|
if (x2 >= clipX1 && x2 < clipX2 && x2 != x) {
|
|
int32_t parity = (x2 - x) & 1;
|
|
|
|
for (int32_t py = y + 2 - parity; py < y2; py += 2) {
|
|
if (py >= clipY1 && py < clipY2) {
|
|
putPixel(d->backBuf + py * pitch + x2 * bpp, color, bpp);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// drawHLine
|
|
// ============================================================
|
|
//
|
|
// Thin convenience wrapper -- a horizontal line is just a 1px-tall rect.
|
|
// Delegates to rectFill which handles clipping and uses spanFill (rep
|
|
// stosl) for the actual write.
|
|
|
|
void drawHLine(DisplayT *d, const BlitOpsT *ops, int32_t x, int32_t y, int32_t w, uint32_t color) {
|
|
rectFill(d, ops, x, y, w, 1, color);
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// drawInit
|
|
// ============================================================
|
|
//
|
|
// Wires up the BlitOpsT function pointers to the correct
|
|
// platform-specific span operations for the active pixel format.
|
|
// Called once during startup after videoInit determines the bpp.
|
|
//
|
|
// The span ops are the only place where function pointers are used
|
|
// in the draw layer. This is a deliberate performance tradeoff:
|
|
// spanFill and spanCopy are called per-scanline (not per-pixel),
|
|
// so the indirect call overhead (~5 cycles on Pentium for the
|
|
// mispredicted first call, then predicted afterward) is amortized
|
|
// over an entire row of pixels. The alternative -- a switch inside
|
|
// rectFill's inner loop -- would branch every scanline for no gain.
|
|
//
|
|
// The platform implementations (dvxPlatformDos.c) use inline asm:
|
|
// spanFill8/16/32 -> rep stosl (fills 4 bytes per clock)
|
|
// spanCopy8/16/32 -> rep movsd (copies 4 bytes per clock)
|
|
// These are the fastest bulk memory operations available on 486/
|
|
// Pentium without SSE. The 8-bit and 16-bit variants handle
|
|
// alignment preambles to get to dword boundaries, then use
|
|
// rep stosl/movsd for the bulk.
|
|
|
|
void drawInit(BlitOpsT *ops, const DisplayT *d) {
|
|
ops->bytesPerPixel = d->format.bytesPerPixel;
|
|
ops->pitch = d->pitch;
|
|
|
|
switch (d->format.bytesPerPixel) {
|
|
case 1:
|
|
ops->spanFill = platformSpanFill8;
|
|
ops->spanCopy = platformSpanCopy8;
|
|
break;
|
|
case 2:
|
|
ops->spanFill = platformSpanFill16;
|
|
ops->spanCopy = platformSpanCopy16;
|
|
break;
|
|
case 4:
|
|
ops->spanFill = platformSpanFill32;
|
|
ops->spanCopy = platformSpanCopy32;
|
|
break;
|
|
default:
|
|
ops->spanFill = platformSpanFill8;
|
|
ops->spanCopy = platformSpanCopy8;
|
|
break;
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// drawMaskedBitmap
|
|
// ============================================================
|
|
//
|
|
// Renders a 1-bit masked bitmap (used for mouse cursors and icons).
|
|
// The two-plane format mirrors the hardware cursor format used by
|
|
// VGA and early SVGA cards:
|
|
//
|
|
// andMask bit=1, xorData bit=X -> transparent (pixel unchanged)
|
|
// andMask bit=0, xorData bit=0 -> bgColor
|
|
// andMask bit=0, xorData bit=1 -> fgColor
|
|
//
|
|
// Each row is a uint16_t (supporting up to 16 pixels wide), stored
|
|
// MSB-first. This is sufficient for standard 16x16 mouse cursors.
|
|
//
|
|
// The colMask optimization pre-computes which bits in each row fall
|
|
// within the visible (clipped) columns. For fully transparent rows
|
|
// (all visible bits have andMask=1), the entire row is skipped with
|
|
// a single bitwise AND + compare -- no per-pixel iteration needed.
|
|
|
|
void drawMaskedBitmap(DisplayT *d, const BlitOpsT *ops, int32_t x, int32_t y, int32_t w, int32_t h, const uint16_t *andMask, const uint16_t *xorData, uint32_t fgColor, uint32_t bgColor) {
|
|
int32_t bpp = ops->bytesPerPixel;
|
|
int32_t pitch = d->pitch;
|
|
|
|
// Pre-clip row/col bounds
|
|
int32_t clipX1 = d->clipX;
|
|
int32_t clipX2 = d->clipX + d->clipW;
|
|
int32_t clipY1 = d->clipY;
|
|
int32_t clipY2 = d->clipY + d->clipH;
|
|
|
|
int32_t rowStart = 0;
|
|
int32_t rowEnd = h;
|
|
if (y < clipY1) { rowStart = clipY1 - y; }
|
|
if (y + h > clipY2) { rowEnd = clipY2 - y; }
|
|
|
|
int32_t colStart = 0;
|
|
int32_t colEnd = w;
|
|
if (x < clipX1) { colStart = clipX1 - x; }
|
|
if (x + w > clipX2) { colEnd = clipX2 - x; }
|
|
|
|
if (colStart >= colEnd || rowStart >= rowEnd) {
|
|
return;
|
|
}
|
|
|
|
// Pre-compute column mask once (loop-invariant)
|
|
uint16_t colMask = 0;
|
|
for (int32_t col = colStart; col < colEnd; col++) {
|
|
colMask |= sMaskBit[col];
|
|
}
|
|
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint16_t mask = andMask[row];
|
|
uint16_t data = xorData[row];
|
|
|
|
// Skip fully transparent rows
|
|
if ((mask & colMask) == colMask) {
|
|
continue;
|
|
}
|
|
|
|
int32_t py = y + row;
|
|
uint8_t *dst = d->backBuf + py * pitch + x * bpp;
|
|
|
|
if (bpp == 2) {
|
|
uint16_t fg16 = (uint16_t)fgColor;
|
|
uint16_t bg16 = (uint16_t)bgColor;
|
|
for (int32_t col = colStart; col < colEnd; col++) {
|
|
uint16_t bit = sMaskBit[col];
|
|
if (!(mask & bit)) {
|
|
*(uint16_t *)(dst + col * 2) = (data & bit) ? fg16 : bg16;
|
|
}
|
|
}
|
|
} else if (bpp == 4) {
|
|
for (int32_t col = colStart; col < colEnd; col++) {
|
|
uint16_t bit = sMaskBit[col];
|
|
if (!(mask & bit)) {
|
|
*(uint32_t *)(dst + col * 4) = (data & bit) ? fgColor : bgColor;
|
|
}
|
|
}
|
|
} else {
|
|
uint8_t fg8 = (uint8_t)fgColor;
|
|
uint8_t bg8 = (uint8_t)bgColor;
|
|
for (int32_t col = colStart; col < colEnd; col++) {
|
|
uint16_t bit = sMaskBit[col];
|
|
if (!(mask & bit)) {
|
|
dst[col] = (data & bit) ? fg8 : bg8;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// drawTermRow
|
|
// ============================================================
|
|
//
|
|
// Renders an entire row of terminal character cells in one pass.
|
|
// lineData points to (ch, attr) pairs. palette is a 16-entry
|
|
// packed-color table. This avoids per-character function call
|
|
// overhead, redundant clip calculation, and spanFill startup
|
|
// costs that make drawChar expensive when called 80x per row.
|
|
//
|
|
// This is the primary rendering function for the terminal emulator.
|
|
// The attribute byte uses the standard CGA/VGA format:
|
|
// bits 0-3: foreground color (0-15)
|
|
// bits 4-6: background color (0-7)
|
|
// bit 7: blink flag
|
|
//
|
|
// Unlike drawTextN (which handles uniform fg/bg), every cell here
|
|
// can have a different fg/bg pair, so the bg can't be filled in a
|
|
// single bulk pass. Instead each cell is rendered individually,
|
|
// always in opaque mode (every pixel gets a write). The bpp branch
|
|
// is still hoisted outside the per-pixel loop -- the outer loop
|
|
// selects the bpp path once, then iterates cells within it.
|
|
//
|
|
// blinkVisible controls the blink phase: when false, fg is replaced
|
|
// with bg for characters that have bit 7 set, effectively hiding them.
|
|
// cursorCol specifies which cell (if any) should be drawn with
|
|
// inverted fg/bg to show the text cursor.
|
|
|
|
void drawTermRow(DisplayT *d, const BlitOpsT *ops, const BitmapFontT *font, int32_t x, int32_t y, int32_t cols, const uint8_t *lineData, const uint32_t *palette, bool blinkVisible, int32_t cursorCol) {
|
|
int32_t cw = font->charWidth;
|
|
int32_t ch = font->charHeight;
|
|
int32_t bpp = ops->bytesPerPixel;
|
|
int32_t pitch = d->pitch;
|
|
|
|
// Row-level clip: reject if entirely outside vertically
|
|
int32_t clipX1 = d->clipX;
|
|
int32_t clipX2 = d->clipX + d->clipW;
|
|
int32_t clipY1 = d->clipY;
|
|
int32_t clipY2 = d->clipY + d->clipH;
|
|
|
|
if (y + ch <= clipY1 || y >= clipY2) {
|
|
return;
|
|
}
|
|
|
|
// Vertical clip for glyph scanlines
|
|
int32_t rowStart = 0;
|
|
int32_t rowEnd = ch;
|
|
if (y < clipY1) { rowStart = clipY1 - y; }
|
|
if (y + ch > clipY2) { rowEnd = clipY2 - y; }
|
|
|
|
// Horizontal clip: find first and last visible column
|
|
int32_t rowW = cols * cw;
|
|
int32_t firstCol = 0;
|
|
int32_t lastCol = cols;
|
|
|
|
if (x + rowW <= clipX1 || x >= clipX2) {
|
|
return;
|
|
}
|
|
|
|
if (x < clipX1) {
|
|
firstCol = (clipX1 - x) / cw;
|
|
}
|
|
|
|
if (x + rowW > clipX2) {
|
|
lastCol = (clipX2 - x + cw - 1) / cw;
|
|
if (lastCol > cols) { lastCol = cols; }
|
|
}
|
|
|
|
// Per-column clip for partially visible edge cells
|
|
int32_t edgeColStart = 0;
|
|
|
|
if (x + firstCol * cw < clipX1) {
|
|
edgeColStart = clipX1 - (x + firstCol * cw);
|
|
}
|
|
|
|
// Render each visible cell
|
|
for (int32_t col = firstCol; col < lastCol; col++) {
|
|
uint8_t gch = lineData[col * 2];
|
|
uint8_t attr = lineData[col * 2 + 1];
|
|
uint32_t fg = palette[attr & 0x0F];
|
|
uint32_t bg = palette[(attr >> 4) & 0x07];
|
|
|
|
// Blink: hide text during off phase
|
|
if ((attr & 0x80) && !blinkVisible) {
|
|
fg = bg;
|
|
}
|
|
|
|
// Cursor: invert colors
|
|
if (col == cursorCol) {
|
|
uint32_t tmp = fg;
|
|
fg = bg;
|
|
bg = tmp;
|
|
}
|
|
|
|
int32_t cx = x + col * cw;
|
|
|
|
// Determine per-cell horizontal clip
|
|
int32_t cStart = 0;
|
|
int32_t cEnd = cw;
|
|
|
|
if (col == firstCol) {
|
|
cStart = edgeColStart;
|
|
}
|
|
|
|
if (cx + cw > clipX2) {
|
|
cEnd = clipX2 - cx;
|
|
}
|
|
|
|
// Look up glyph data
|
|
int32_t idx = (uint8_t)gch - font->firstChar;
|
|
const uint8_t *glyph = NULL;
|
|
|
|
if (idx >= 0 && idx < font->numChars) {
|
|
glyph = font->glyphData + idx * ch;
|
|
}
|
|
|
|
// Render scanlines
|
|
if (bpp == 2) {
|
|
uint16_t fg16 = (uint16_t)fg;
|
|
uint16_t bg16 = (uint16_t)bg;
|
|
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint16_t *dst = (uint16_t *)(d->backBuf + (y + row) * pitch + cx * 2);
|
|
uint8_t bits = glyph ? glyph[row] : 0;
|
|
|
|
for (int32_t p = cStart; p < cEnd; p++) {
|
|
dst[p] = (bits & sGlyphBit[p]) ? fg16 : bg16;
|
|
}
|
|
}
|
|
} else if (bpp == 4) {
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint32_t *dst = (uint32_t *)(d->backBuf + (y + row) * pitch + cx * 4);
|
|
uint8_t bits = glyph ? glyph[row] : 0;
|
|
|
|
for (int32_t p = cStart; p < cEnd; p++) {
|
|
dst[p] = (bits & sGlyphBit[p]) ? fg : bg;
|
|
}
|
|
}
|
|
} else {
|
|
uint8_t fg8 = (uint8_t)fg;
|
|
uint8_t bg8 = (uint8_t)bg;
|
|
|
|
for (int32_t row = rowStart; row < rowEnd; row++) {
|
|
uint8_t *dst = d->backBuf + (y + row) * pitch + cx;
|
|
uint8_t bits = glyph ? glyph[row] : 0;
|
|
|
|
for (int32_t p = cStart; p < cEnd; p++) {
|
|
dst[p] = (bits & sGlyphBit[p]) ? fg8 : bg8;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// drawText
|
|
// ============================================================
|
|
//
|
|
// Renders a null-terminated string by calling drawChar per character.
|
|
// Simpler than drawTextN but slower for long runs because each
|
|
// drawChar call independently clips, computes row bounds, and
|
|
// dispatches on bpp. Used for short labels and ad-hoc text where
|
|
// the call overhead doesn't matter; drawTextN is preferred for
|
|
// bulk text (editor buffers, list views, etc.).
|
|
//
|
|
// The left-of-clip skip avoids calling drawChar for characters that
|
|
// are entirely to the left of the visible area. The right-of-clip
|
|
// early-out breaks the loop as soon as we've passed the right edge.
|
|
// These are both marked unlikely (__builtin_expect) because the
|
|
// common case is text fully within the clip rect.
|
|
|
|
void drawText(DisplayT *d, const BlitOpsT *ops, const BitmapFontT *font, int32_t x, int32_t y, const char *text, uint32_t fg, uint32_t bg, bool opaque) {
|
|
int32_t cw = font->charWidth;
|
|
int32_t clipX2 = d->clipX + d->clipW;
|
|
|
|
while (*text) {
|
|
// Early out if we've moved past the right clip edge
|
|
if (__builtin_expect(x >= clipX2, 0)) {
|
|
break;
|
|
}
|
|
|
|
// Skip characters entirely to the left of clip
|
|
if (__builtin_expect(x + cw <= d->clipX, 0)) {
|
|
x += cw;
|
|
text++;
|
|
continue;
|
|
}
|
|
|
|
x += drawChar(d, ops, font, x, y, *text, fg, bg, opaque);
|
|
text++;
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// drawTextAccel
|
|
// ============================================================
|
|
//
|
|
// Like drawText but interprets & markers in the string: the character
|
|
// following & is drawn with an underline to indicate it's the keyboard
|
|
// accelerator (e.g. "&File" draws "File" with F underlined). "&&"
|
|
// draws a literal &. This matches the Windows/Motif convention for
|
|
// menu and button labels.
|
|
//
|
|
// The underline is drawn as a 1px horizontal line at the bottom of
|
|
// the character cell (y + charHeight - 1), which is the standard
|
|
// placement for accelerator underlines.
|
|
|
|
void drawTextAccel(DisplayT *d, const BlitOpsT *ops, const BitmapFontT *font, int32_t x, int32_t y, const char *text, uint32_t fg, uint32_t bg, bool opaque) {
|
|
int32_t cw = font->charWidth;
|
|
int32_t clipX2 = d->clipX + d->clipW;
|
|
|
|
while (*text) {
|
|
if (__builtin_expect(x >= clipX2, 0)) {
|
|
break;
|
|
}
|
|
|
|
if (*text == '&') {
|
|
text++;
|
|
|
|
if (*text == '&') {
|
|
// Escaped && -- draw literal &
|
|
if (x + cw > d->clipX) {
|
|
drawChar(d, ops, font, x, y, '&', fg, bg, opaque);
|
|
}
|
|
|
|
x += cw;
|
|
text++;
|
|
continue;
|
|
}
|
|
|
|
if (*text) {
|
|
// Accelerator character -- draw it then underline
|
|
if (x + cw > d->clipX) {
|
|
drawChar(d, ops, font, x, y, *text, fg, bg, opaque);
|
|
drawHLine(d, ops, x, y + font->charHeight - 1, cw, fg);
|
|
}
|
|
|
|
x += cw;
|
|
text++;
|
|
continue;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
if (x + cw > d->clipX) {
|
|
drawChar(d, ops, font, x, y, *text, fg, bg, opaque);
|
|
}
|
|
|
|
x += cw;
|
|
text++;
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// drawVLine
|
|
// ============================================================
|
|
//
|
|
// Draws a vertical line pixel-by-pixel. Unlike drawHLine (which
|
|
// delegates to rectFill -> spanFill for a single-row span), a
|
|
// vertical line can't use spanFill because each pixel is on a
|
|
// different scanline. Instead we advance by d->pitch per pixel
|
|
// and write directly, branching on bpp once at the top.
|
|
//
|
|
// The ops parameter is unused (suppressed with (void)ops) because
|
|
// spanFill operates on contiguous horizontal runs and is useless
|
|
// for vertical lines. We keep the parameter for API consistency
|
|
// with the rest of the draw layer.
|
|
|
|
void drawVLine(DisplayT *d, const BlitOpsT *ops, int32_t x, int32_t y, int32_t h, uint32_t color) {
|
|
(void)ops;
|
|
|
|
// Inline single-pixel-wide fill to avoid rectFill overhead for narrow lines
|
|
if (__builtin_expect(x < d->clipX || x >= d->clipX + d->clipW, 0)) {
|
|
return;
|
|
}
|
|
|
|
int32_t y1 = y;
|
|
int32_t y2 = y + h;
|
|
if (y1 < d->clipY) { y1 = d->clipY; }
|
|
if (y2 > d->clipY + d->clipH) { y2 = d->clipY + d->clipH; }
|
|
if (y1 >= y2) { return; }
|
|
|
|
int32_t bpp = d->format.bytesPerPixel;
|
|
uint8_t *dst = d->backBuf + y1 * d->pitch + x * bpp;
|
|
int32_t pitch = d->pitch;
|
|
|
|
if (bpp == 2) {
|
|
uint16_t c16 = (uint16_t)color;
|
|
for (int32_t i = y1; i < y2; i++) {
|
|
*(uint16_t *)dst = c16;
|
|
dst += pitch;
|
|
}
|
|
} else if (bpp == 4) {
|
|
for (int32_t i = y1; i < y2; i++) {
|
|
*(uint32_t *)dst = color;
|
|
dst += pitch;
|
|
}
|
|
} else {
|
|
uint8_t c8 = (uint8_t)color;
|
|
for (int32_t i = y1; i < y2; i++) {
|
|
*dst = c8;
|
|
dst += pitch;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// putPixel
|
|
// ============================================================
|
|
//
|
|
// Writes a single pixel at an already-computed buffer address.
|
|
// Only used by drawFocusRect for its alternating dot pattern.
|
|
// Marked static inline so it compiles to a direct store at the
|
|
// call site with no function call overhead. The bpp chain here
|
|
// is acceptable because focus rect drawing is infrequent.
|
|
|
|
static inline void putPixel(uint8_t *dst, uint32_t color, int32_t bpp) {
|
|
if (bpp == 2) {
|
|
*(uint16_t *)dst = (uint16_t)color;
|
|
} else if (bpp == 4) {
|
|
*(uint32_t *)dst = color;
|
|
} else {
|
|
*dst = (uint8_t)color;
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// rectCopy
|
|
// ============================================================
|
|
//
|
|
// Copies a rectangular region from an arbitrary source buffer into
|
|
// the display backbuffer. Used by the compositor to blit per-window
|
|
// content buffers (win->contentBuf) into the shared backbuffer during
|
|
// the composite pass.
|
|
//
|
|
// Clipping adjusts both the destination and source positions by the
|
|
// same delta so the visible portion maps to the correct source pixels.
|
|
// When the source and destination pitches match and equal the row byte
|
|
// count, the entire block is copied in a single memcpy (which the
|
|
// compiler/libc can optimize to rep movsd). Otherwise it falls back
|
|
// to per-row memcpy.
|
|
//
|
|
// This function does NOT handle overlapping source and destination
|
|
// regions (no memmove). That's fine because the source is always a
|
|
// per-window content buffer and the destination is the shared
|
|
// backbuffer -- they never overlap.
|
|
|
|
void rectCopy(DisplayT *d, const BlitOpsT *ops, int32_t dstX, int32_t dstY, const uint8_t *srcBuf, int32_t srcPitch, int32_t srcX, int32_t srcY, int32_t w, int32_t h) {
|
|
int32_t bpp = ops->bytesPerPixel;
|
|
|
|
// Clip to display clip rect
|
|
int32_t origDstX = dstX;
|
|
int32_t origDstY = dstY;
|
|
|
|
clipRect(d, &dstX, &dstY, &w, &h);
|
|
|
|
if (__builtin_expect(w <= 0 || h <= 0, 0)) {
|
|
return;
|
|
}
|
|
|
|
// Adjust source position by the amount we clipped
|
|
srcX += dstX - origDstX;
|
|
srcY += dstY - origDstY;
|
|
|
|
const uint8_t *srcRow = srcBuf + srcY * srcPitch + srcX * bpp;
|
|
uint8_t *dstRow = d->backBuf + dstY * d->pitch + dstX * bpp;
|
|
int32_t rowBytes = w * bpp;
|
|
int32_t dstPitch = d->pitch;
|
|
|
|
// For full-width copies aligned to pitch, use memcpy (may optimize to rep movsd)
|
|
if (rowBytes == dstPitch && rowBytes == srcPitch) {
|
|
memcpy(dstRow, srcRow, rowBytes * h);
|
|
} else {
|
|
for (int32_t i = 0; i < h; i++) {
|
|
memcpy(dstRow, srcRow, rowBytes);
|
|
srcRow += srcPitch;
|
|
dstRow += dstPitch;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// rectFill
|
|
// ============================================================
|
|
//
|
|
// The workhorse fill primitive. Clips to the display clip rect,
|
|
// then fills one scanline at a time via the spanFill function
|
|
// pointer (which routes to rep stosl on DOS). This is the most
|
|
// frequently called function in the draw layer -- it backs rectFill
|
|
// directly, plus drawHLine, drawBevel interior fills, and the bg
|
|
// fill in opaque text rendering.
|
|
//
|
|
// The clip + early-out pattern (clipRect then check w/h <= 0) is
|
|
// the same in every draw function. The __builtin_expect marks the
|
|
// zero-size case as unlikely to avoid a taken-branch penalty in the
|
|
// common case where the rect is visible after clipping.
|
|
|
|
void rectFill(DisplayT *d, const BlitOpsT *ops, int32_t x, int32_t y, int32_t w, int32_t h, uint32_t color) {
|
|
clipRect(d, &x, &y, &w, &h);
|
|
|
|
if (__builtin_expect(w <= 0 || h <= 0, 0)) {
|
|
return;
|
|
}
|
|
|
|
uint8_t *row = d->backBuf + y * d->pitch + x * d->format.bytesPerPixel;
|
|
int32_t pitch = d->pitch;
|
|
|
|
for (int32_t i = 0; i < h; i++) {
|
|
ops->spanFill(row, color, w);
|
|
row += pitch;
|
|
}
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// textWidth
|
|
// ============================================================
|
|
//
|
|
// Returns the pixel width of a null-terminated string. Because all
|
|
// fonts are fixed-width, this is just strlen * charWidth -- but we
|
|
// iterate manually rather than calling strlen to avoid a second pass
|
|
// over the string. This is used heavily for layout calculations
|
|
// (centering text in buttons, sizing menu popups, etc.).
|
|
|
|
int32_t textWidth(const BitmapFontT *font, const char *text) {
|
|
int32_t w = 0;
|
|
|
|
while (*text) {
|
|
w += font->charWidth;
|
|
text++;
|
|
}
|
|
|
|
return w;
|
|
}
|
|
|
|
|
|
// ============================================================
|
|
// textWidthAccel
|
|
// ============================================================
|
|
//
|
|
// Like textWidth but accounts for & accelerator markers: a single &
|
|
// is not rendered (it just marks the next character as the accelerator),
|
|
// so it doesn't contribute to width. "&&" renders as one "&" character.
|
|
// Used to compute the correct pixel width for menu items and button
|
|
// labels that contain accelerator markers.
|
|
|
|
int32_t textWidthAccel(const BitmapFontT *font, const char *text) {
|
|
int32_t w = 0;
|
|
|
|
while (*text) {
|
|
if (*text == '&') {
|
|
text++;
|
|
|
|
if (*text == '&') {
|
|
// Escaped && -- counts as one character
|
|
w += font->charWidth;
|
|
text++;
|
|
continue;
|
|
}
|
|
|
|
if (*text) {
|
|
// Accelerator character -- counts as one character, & is skipped
|
|
w += font->charWidth;
|
|
text++;
|
|
continue;
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
w += font->charWidth;
|
|
text++;
|
|
}
|
|
|
|
return w;
|
|
}
|