Replace DIB pixel rendering with ExtTextOut + memory DC BitBlt

The 8bpp DIB pipeline (font atlas, nibble lookup table, inline ASM glyph
expansion, SetDIBitsToDevice) is replaced with GDI text output: ExtTextOut
per color run into a memory DC, then BitBlt per row to the screen.

The memory bitmap is in native device format, so BitBlt is a raw copy with
no 8bpp-to-device color conversion.  ExtTextOut goes through the display
driver's optimized text path instead of software pixel expansion.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Scott Duensing 2026-03-02 19:00:58 -06:00
parent 40dabea161
commit a7780c8030

View file

@ -7,11 +7,11 @@ unit KPAnsi;
{ Renders incoming data using standard ANSI/VT100 escape sequences for } { Renders incoming data using standard ANSI/VT100 escape sequences for }
{ cursor positioning, color attributes, and screen manipulation. } { cursor positioning, color attributes, and screen manipulation. }
{ } { }
{ Rendering uses a font atlas with a nibble lookup table and inline ASM } { Rendering uses ExtTextOut into a memory DC with color-run batching, then }
{ to expand glyph bitmaps directly into a reusable 8bpp DIB pixel buffer. } { BitBlt to the screen. The memory bitmap is in native device format, so }
{ Constant mini-frame values are hoisted outside the column loop to reduce } { BitBlt is a raw copy with no color conversion. Smart blink tracking }
{ per-cell overhead. Smart blink tracking dirties only cursor and blink } { dirties only cursor and blink rows instead of the entire screen, }
{ rows instead of the entire screen, eliminating wasteful full repaints. } { eliminating wasteful full repaints. }
{ } { }
{ Installs to the "KP" palette tab alongside TKPComm. } { Installs to the "KP" palette tab alongside TKPComm. }
@ -38,11 +38,6 @@ type
Cells: array[0..255] of TTermCell; Cells: array[0..255] of TTermCell;
end; end;
TDibInfo = record
bmiHeader: TBitmapInfoHeader;
bmiColors: array[0..15] of TRGBQuad;
end;
TKPAnsi = class(TCustomControl) TKPAnsi = class(TCustomControl)
private private
{ Terminal buffer state } { Terminal buffer state }
@ -108,28 +103,18 @@ type
FScrollbarDirty: Boolean; { True = scrollbar range/position needs update } FScrollbarDirty: Boolean; { True = scrollbar range/position needs update }
FTextBlinkOn: Boolean; { Text blink phase: True=visible, False=hidden } FTextBlinkOn: Boolean; { Text blink phase: True=visible, False=hidden }
{ Font atlas: glyph bitmaps + nibble lookup table (GlobalAlloc) } { Off-screen memory DC for ExtTextOut rendering }
FGlyphBufH: THandle; { GlobalAlloc handle for glyph block (8256 bytes) } FMemDC: HDC; { Compatible memory DC for off-screen ExtTextOut }
FGlyphBuf: Pointer; { Far ptr: offset 0..63 = nibble table, 64+ = glyphs } FMemBmp: HBitmap; { Compatible bitmap, one row tall, native format }
FOldBmp: HBitmap; { Previous bitmap in FMemDC for cleanup }
{ Row pixel buffer: reusable 8bpp DIB for one terminal row }
FRowBufH: THandle; { GlobalAlloc handle for pixel buffer }
FRowBuf: Pointer; { Far ptr to pixel data (cols*cellW*cellH bytes) }
FDibInfo: TDibInfo; { BITMAPINFO with 16-color ANSI palette }
FRowBufSize: Integer; { Pixel buffer size in bytes }
{ Nibble table color cache: avoids rebuild when colors unchanged }
FNibbleFG: Byte; { FG index currently in nibble table (255=invalid) }
FNibbleBG: Byte; { BG index currently in nibble table (255=invalid) }
procedure AllocLine(Line: PTermLine); procedure AllocLine(Line: PTermLine);
procedure BuildAtlas;
procedure ClearLine(Line: PTermLine); procedure ClearLine(Line: PTermLine);
procedure CMFontChanged(var Msg: TMessage); message cm_FontChanged; procedure CMFontChanged(var Msg: TMessage); message cm_FontChanged;
procedure CreateMemDC;
procedure CreatePaintFont; procedure CreatePaintFont;
procedure CreateRowBuffers;
procedure DeleteChars(N: Integer); procedure DeleteChars(N: Integer);
procedure DeleteLines(N: Integer); procedure DeleteLines(N: Integer);
procedure DestroyRowBuffers; procedure DestroyMemDC;
procedure DirtyAll; procedure DirtyAll;
procedure DirtyBlinkRows; procedure DirtyBlinkRows;
procedure DirtyRow(Row: Integer); procedure DirtyRow(Row: Integer);
@ -142,7 +127,6 @@ type
procedure FreeLineList(List: TList); procedure FreeLineList(List: TList);
function GetCursorCol: Integer; function GetCursorCol: Integer;
function GetCursorRow: Integer; function GetCursorRow: Integer;
procedure InitDibInfo;
procedure InsertChars(N: Integer); procedure InsertChars(N: Integer);
procedure InsertLines(N: Integer); procedure InsertLines(N: Integer);
procedure ParseData(const S: string); procedure ParseData(const S: string);
@ -221,17 +205,16 @@ const
{ OUT_RASTER_PRECIS may not be defined in Delphi 1.0 WinTypes } { OUT_RASTER_PRECIS may not be defined in Delphi 1.0 WinTypes }
OutRasterPrecis = 6; OutRasterPrecis = 6;
{ ExtTextOut option flags (may not be in Delphi 1.0 WinTypes) }
ETO_OPAQUE = $0002;
ETO_CLIPPED = $0004;
{ ANSI music note frequencies (octave 0, multiply by 2^octave) } { ANSI music note frequencies (octave 0, multiply by 2^octave) }
{ C, C#, D, D#, E, F, F#, G, G#, A, A#, B } { C, C#, D, D#, E, F, F#, G, G#, A, A#, B }
BaseNoteFreq: array[0..11] of Word = ( BaseNoteFreq: array[0..11] of Word = (
262, 277, 294, 311, 330, 349, 370, 392, 415, 440, 466, 494 262, 277, 294, 311, 330, 349, 370, 392, 415, 440, 466, 494
); );
type
{ Typed pointer for direct byte access to 8bpp row pixel buffers }
TPixelBuf = array[0..65519] of Byte;
PPixelBuf = ^TPixelBuf;
{ ----------------------------------------------------------------------- } { ----------------------------------------------------------------------- }
{ Helper: parse semicolon-delimited parameters from char buffer } { Helper: parse semicolon-delimited parameters from char buffer }
@ -300,112 +283,6 @@ begin
end; end;
procedure TKPAnsi.BuildAtlas;
{ Render all 256 CP437 characters into a monochrome bitmap, then extract }
{ per-glyph pixel masks into the glyph block at offset 64. Each glyph }
{ byte is an 8-bit mask: MSB = leftmost pixel, 1 = FG, 0 = BG. The }
{ nibble lookup table at offset 0..63 is built at render time. This is a }
{ one-time GDI cost at startup; after extraction, the bitmap and DC are }
{ deleted and never needed again. }
var
AtlasBmp: HBitmap;
AtlasDC: HDC;
OldBmp: HBitmap;
OldFont: HFont;
I: Integer;
Row: Integer;
RawH: THandle;
RawPtr: PPixelBuf;
GlyphPtr: PPixelBuf;
Stride: Longint;
BmpSize: Longint;
Ch: Char;
begin
{ Free old glyph block }
if FGlyphBufH <> 0 then
begin
GlobalUnlock(FGlyphBufH);
GlobalFree(FGlyphBufH);
FGlyphBufH := 0;
FGlyphBuf := nil;
end;
if FPaintFont = 0 then
Exit;
if (FCellWidth < 1) or (FCellHeight < 1) or (FCellHeight > 32) then
Exit;
{ Allocate glyph block: 64 bytes nibble table + 256*32 glyph data }
FGlyphBufH := GlobalAlloc(GMEM_FIXED or GMEM_ZEROINIT, 8256);
if FGlyphBufH = 0 then
Exit;
FGlyphBuf := GlobalLock(FGlyphBufH);
if FGlyphBuf = nil then
begin
GlobalFree(FGlyphBufH);
FGlyphBufH := 0;
Exit;
end;
GlyphPtr := PPixelBuf(FGlyphBuf);
{ Create monochrome bitmap: 256 chars side-by-side, FCellHeight tall }
AtlasBmp := CreateBitmap(256 * FCellWidth, FCellHeight, 1, 1, nil);
if AtlasBmp = 0 then
Exit;
AtlasDC := CreateCompatibleDC(0);
if AtlasDC = 0 then
begin
DeleteObject(AtlasBmp);
Exit;
end;
OldBmp := SelectObject(AtlasDC, AtlasBmp);
OldFont := SelectObject(AtlasDC, FPaintFont);
SetTextColor(AtlasDC, RGB(255, 255, 255));
SetBkColor(AtlasDC, RGB(0, 0, 0));
SetBkMode(AtlasDC, OPAQUE);
{ Render all 256 CP437 characters }
for I := 0 to 255 do
begin
Ch := Char(I);
WinProcs.TextOut(AtlasDC, I * FCellWidth, 0, @Ch, 1);
end;
{ Extract raw monochrome bitmap data }
Stride := ((Longint(256) * FCellWidth + 15) div 16) * 2;
BmpSize := Stride * FCellHeight;
RawH := GlobalAlloc(GMEM_FIXED, BmpSize);
if RawH <> 0 then
begin
RawPtr := GlobalLock(RawH);
if RawPtr <> nil then
begin
GetBitmapBits(AtlasBmp, BmpSize, RawPtr);
{ Extract per-glyph bytes into glyph block at offset 64. }
{ For 8-pixel-wide fonts each glyph is exactly one byte per scan }
{ line, aligned to byte boundaries. }
for I := 0 to 255 do
begin
for Row := 0 to FCellHeight - 1 do
begin
GlyphPtr^[64 + I * 32 + Row] := RawPtr^[Row * Stride + I];
end;
end;
GlobalUnlock(RawH);
end;
GlobalFree(RawH);
end;
{ Clean up -- atlas DC and bitmap are never needed again }
SelectObject(AtlasDC, OldFont);
SelectObject(AtlasDC, OldBmp);
DeleteDC(AtlasDC);
DeleteObject(AtlasBmp);
end;
procedure TKPAnsi.Clear; procedure TKPAnsi.Clear;
var var
I: Integer; I: Integer;
@ -503,13 +380,9 @@ begin
FAllDirty := True; FAllDirty := True;
FScrollbarDirty := False; FScrollbarDirty := False;
FTextBlinkOn := True; FTextBlinkOn := True;
FRowBufSize := 0; FMemDC := 0;
FGlyphBufH := 0; FMemBmp := 0;
FGlyphBuf := nil; FOldBmp := 0;
FRowBufH := 0;
FRowBuf := nil;
FNibbleFG := 255;
FNibbleBG := 255;
{ Set a monospace font -- OEM charset selected in CreatePaintFont } { Set a monospace font -- OEM charset selected in CreatePaintFont }
Font.Name := 'Terminal'; Font.Name := 'Terminal';
@ -526,6 +399,28 @@ begin
end; end;
procedure TKPAnsi.CreateMemDC;
var
ScrDC: HDC;
begin
DestroyMemDC;
if (FCellWidth < 1) or (FCellHeight < 1) then
Exit;
ScrDC := GetDC(0);
FMemDC := CreateCompatibleDC(ScrDC);
FMemBmp := CreateCompatibleBitmap(ScrDC, FCols * FCellWidth, FCellHeight);
ReleaseDC(0, ScrDC);
if (FMemDC = 0) or (FMemBmp = 0) then
begin
DestroyMemDC;
Exit;
end;
FOldBmp := SelectObject(FMemDC, FMemBmp);
SelectObject(FMemDC, FPaintFont);
SetBkMode(FMemDC, OPAQUE);
end;
procedure TKPAnsi.CreatePaintFont; procedure TKPAnsi.CreatePaintFont;
var var
LF: TLogFont; LF: TLogFont;
@ -582,30 +477,6 @@ begin
end; end;
procedure TKPAnsi.CreateRowBuffers;
begin
{ Free old row buffer (glyph block is managed by BuildAtlas) }
if FRowBufH <> 0 then
begin
GlobalUnlock(FRowBufH);
GlobalFree(FRowBufH);
FRowBufH := 0;
FRowBuf := nil;
end;
FRowBufSize := FCols * FCellWidth * FCellHeight;
if FRowBufSize < 1 then
Exit;
{ Single reusable buffer for one terminal row }
FRowBufH := GlobalAlloc(GMEM_FIXED or GMEM_ZEROINIT, FRowBufSize);
if FRowBufH <> 0 then
FRowBuf := GlobalLock(FRowBufH)
else
FRowBuf := nil;
end;
procedure TKPAnsi.DeleteChars(N: Integer); procedure TKPAnsi.DeleteChars(N: Integer);
var var
Line: PTermLine; Line: PTermLine;
@ -658,7 +529,7 @@ end;
destructor TKPAnsi.Destroy; destructor TKPAnsi.Destroy;
begin begin
DestroyRowBuffers; DestroyMemDC;
if (FPaintFont <> 0) and not FStockFont then if (FPaintFont <> 0) and not FStockFont then
begin begin
DeleteObject(FPaintFont); DeleteObject(FPaintFont);
@ -672,21 +543,20 @@ begin
end; end;
procedure TKPAnsi.DestroyRowBuffers; procedure TKPAnsi.DestroyMemDC;
begin begin
if FRowBufH <> 0 then if FMemDC <> 0 then
begin begin
GlobalUnlock(FRowBufH); if FOldBmp <> 0 then
GlobalFree(FRowBufH); SelectObject(FMemDC, FOldBmp);
FRowBufH := 0; DeleteDC(FMemDC);
FRowBuf := nil; FMemDC := 0;
FOldBmp := 0;
end; end;
if FGlyphBufH <> 0 then if FMemBmp <> 0 then
begin begin
GlobalUnlock(FGlyphBufH); DeleteObject(FMemBmp);
GlobalFree(FGlyphBufH); FMemBmp := 0;
FGlyphBufH := 0;
FGlyphBuf := nil;
end; end;
end; end;
@ -699,10 +569,10 @@ end;
procedure TKPAnsi.DirtyBlinkRows; procedure TKPAnsi.DirtyBlinkRows;
{ Targeted dirty marking for blink toggle. Instead of DirtyAll (which } { Targeted dirty marking for blink toggle. Instead of DirtyAll (which }
{ forces a full 25-row re-render and 25 SetDIBitsToDevice calls), only } { forces a full 25-row re-render and 25 BitBlt calls), only dirty the }
{ dirty the cursor row (cursor blink) and rows containing blink cells } { cursor row (cursor blink) and rows containing blink cells (text blink). }
{ (text blink). Typical BBS content has 0-3 blink rows, so this reduces } { Typical BBS content has 0-3 blink rows, so this reduces blink overhead }
{ blink overhead from ~63ms to ~3ms on a 486. } { from ~63ms to ~3ms on a 486. }
var var
I: Integer; I: Integer;
J: Integer; J: Integer;
@ -1226,11 +1096,11 @@ end;
procedure TKPAnsi.FlipToScreen; procedure TKPAnsi.FlipToScreen;
{ Render dirty rows into the shared 8bpp DIB buffer, blasting each to the } { Render dirty rows via ExtTextOut into the memory DC, then BitBlt each }
{ screen via SetDIBitsToDevice immediately after rendering. One GDI call } { to the screen (device format to device format = raw copy). Coalesced }
{ per dirty row, zero for the pixel expansion itself. Coalesced ScrollDC } { ScrollDC shifts on-screen pixels to match FScreen after scrolling, }
{ shifts on-screen pixels to match FScreen after scrolling, reducing the } { reducing the per-scroll GDI cost from 25 rows to just the newly exposed }
{ per-scroll GDI cost from 25 rows to just the newly exposed rows. } { rows. }
var var
DC: HDC; DC: HDC;
Row: Integer; Row: Integer;
@ -1242,9 +1112,9 @@ var
begin begin
if not HandleAllocated then if not HandleAllocated then
Exit; Exit;
if FRowBuf = nil then if FMemDC = 0 then
RecalcCellSize; RecalcCellSize;
if FRowBuf = nil then if FMemDC = 0 then
Exit; Exit;
{ Scrollback view: force full redraw (line mapping changes) } { Scrollback view: force full redraw (line mapping changes) }
@ -1320,21 +1190,16 @@ begin
Exit; Exit;
end; end;
{ Interleaved render + blast: single buffer is reused per row } { Interleaved render + blast: memory DC is reused per row }
DC := GetDC(Handle); DC := GetDC(Handle);
for Row := 0 to FRows - 1 do for Row := 0 to FRows - 1 do
begin begin
if FAllDirty or FDirtyRow[Row] then if FAllDirty or FDirtyRow[Row] then
begin begin
RenderRow(Row); RenderRow(Row);
SetDIBitsToDevice(DC, BitBlt(DC, 0, Row * FCellHeight,
0, Row * FCellHeight,
FCols * FCellWidth, FCellHeight, FCols * FCellWidth, FCellHeight,
0, 0, FMemDC, 0, 0, SRCCOPY);
0, FCellHeight,
FRowBuf,
PBitmapInfo(@FDibInfo)^,
0); { DIB_RGB_COLORS }
FDirtyRow[Row] := False; FDirtyRow[Row] := False;
end; end;
end; end;
@ -1367,34 +1232,6 @@ begin
end; end;
procedure TKPAnsi.InitDibInfo;
{ Initialize the BITMAPINFOHEADER and 16-color palette for the 8bpp DIB }
{ row buffers. The palette maps indices 0-15 to the ANSI color table. }
var
I: Integer;
C: TColor;
begin
FillChar(FDibInfo, SizeOf(FDibInfo), 0);
with FDibInfo.bmiHeader do
begin
biSize := SizeOf(TBitmapInfoHeader);
biWidth := FCols * FCellWidth;
biHeight := FCellHeight;
biPlanes := 1;
biBitCount := 8;
biCompression := 0; { BI_RGB }
end;
for I := 0 to 15 do
begin
C := AnsiColors[I];
FDibInfo.bmiColors[I].rgbRed := C and $FF;
FDibInfo.bmiColors[I].rgbGreen := (C shr 8) and $FF;
FDibInfo.bmiColors[I].rgbBlue := (C shr 16) and $FF;
FDibInfo.bmiColors[I].rgbReserved := 0;
end;
end;
procedure TKPAnsi.InsertChars(N: Integer); procedure TKPAnsi.InsertChars(N: Integer);
var var
Line: PTermLine; Line: PTermLine;
@ -1529,25 +1366,20 @@ procedure TKPAnsi.Paint;
var var
Row: Integer; Row: Integer;
begin begin
if FRowBuf = nil then if FMemDC = 0 then
RecalcCellSize; RecalcCellSize;
if FRowBuf = nil then if FMemDC = 0 then
Exit; Exit;
{ Full repaint: render each row into the shared buffer and blast it } { Full repaint: render each row into the memory DC and blast it }
FAllDirty := True; FAllDirty := True;
for Row := 0 to FRows - 1 do for Row := 0 to FRows - 1 do
begin begin
RenderRow(Row); RenderRow(Row);
SetDIBitsToDevice(Canvas.Handle, BitBlt(Canvas.Handle, 0, Row * FCellHeight,
0, Row * FCellHeight,
FCols * FCellWidth, FCellHeight, FCols * FCellWidth, FCellHeight,
0, 0, FMemDC, 0, 0, SRCCOPY);
0, FCellHeight,
FRowBuf,
PBitmapInfo(@FDibInfo)^,
0); { DIB_RGB_COLORS }
FDirtyRow[Row] := False; FDirtyRow[Row] := False;
end; end;
FAllDirty := False; FAllDirty := False;
@ -2077,10 +1909,8 @@ begin
Width := FCols * FCellWidth + GetSystemMetrics(sm_CxVScroll); Width := FCols * FCellWidth + GetSystemMetrics(sm_CxVScroll);
Height := FRows * FCellHeight; Height := FRows * FCellHeight;
{ Build font atlas, initialize DIB palette, allocate row buffers } { Create off-screen memory DC for ExtTextOut rendering }
BuildAtlas; CreateMemDC;
InitDibInfo;
CreateRowBuffers;
FAllDirty := True; FAllDirty := True;
Invalidate; Invalidate;
@ -2088,60 +1918,28 @@ end;
procedure TKPAnsi.RenderRow(Row: Integer); procedure TKPAnsi.RenderRow(Row: Integer);
{ Core atlas renderer with nibble lookup table and inline ASM. For each } { Render one terminal row into FMemDC using ExtTextOut with ETO_OPAQUE. }
{ cell in the row, the Pascal outer loop resolves colors and rebuilds the } { Scans cells for color runs (consecutive cells with same effective FG+BG) }
{ 16-entry nibble table on color change. The inline ASM inner loop } { and emits one ExtTextOut call per run. Typical BBS content = 2-5 runs }
{ expands one glyph (all scanlines) by splitting each glyph byte into } { per row. Uniform color content (e.g. LORD II spaces) = 1 run per row. }
{ high and low nibbles, looking up 4 pre-resolved pixels per nibble, and }
{ writing them as word stores. Zero branching in the inner loop. }
{ }
{ Register allocation in ASM block: }
{ DS = glyph block segment (table at 0..63, glyph data at 64+) }
{ SI = glyph data offset (increments through scanlines) }
{ ES = pixel buffer segment }
{ DI = pixel buffer offset (decrements by Stride for bottom-up DIB) }
{ BX = table index (BH=0, BL = nibble * 4) }
{ CX = scanline counter }
{ AX/DX = temporaries }
{ }
{ Critical: Delphi 1.0 may allocate local variables to SI/DI as register }
{ variables. The ASM block clobbers SI/DI for its own purposes, so ALL }
{ local variable values are pushed to an explicit mini-frame (via PUSH) }
{ BEFORE any register clobber, then accessed via BP-relative offsets. }
{ BP-relative addressing defaults to SS segment, safe after DS change. }
var var
Line: PTermLine; Line: PTermLine;
Col: Integer; Col: Integer;
CurCol: Integer; { Cursor column on this row, or -1 if no cursor } CurCol: Integer;
FGIdx: Byte; FGIdx: Byte;
BGIdx: Byte; BGIdx: Byte;
TmpIdx: Byte; TmpIdx: Byte;
CharCode: Integer; SbkCount: Integer;
SbkCount: Integer; VisRow: Integer;
VisRow: Integer; RunStart: Integer;
TabPtr: PPixelBuf; RunFG: Byte;
I: Integer; RunBG: Byte;
Ofs: Integer; RunBuf: array[0..255] of Char;
GlyphSeg: Word; RunLen: Integer;
GlyphBase: Word; { Offset component of FGlyphBuf (nibble table at +0) } R: TRect;
PixSeg: Word;
GlyphOfs: Word;
PixOfs: Word;
Stride: Word;
CellH: Word;
begin begin
if FRowBuf = nil then if FMemDC = 0 then
Exit; Exit;
if FGlyphBuf = nil then
Exit;
Stride := Word(FCols) * Word(FCellWidth);
CellH := FCellHeight;
{ Extract segments/offsets from far pointers -- pure Pascal, no register clobber }
GlyphSeg := Seg(PPixelBuf(FGlyphBuf)^);
GlyphBase := 0; { GMEM_FIXED: offset is always 0 }
PixSeg := Seg(PPixelBuf(FRowBuf)^);
{ Determine which line to render (handles scrollback view) } { Determine which line to render (handles scrollback view) }
if FScrollPos <> 0 then if FScrollPos <> 0 then
@ -2167,49 +1965,38 @@ begin
if Line = nil then if Line = nil then
begin begin
{ Blank row: fill with background color 0 (black) } { Blank row: fill with black background }
FillChar(PPixelBuf(FRowBuf)^, FRowBufSize, 0); SetBkColor(FMemDC, AnsiColors[0]);
R.Left := 0;
R.Top := 0;
R.Right := FCols * FCellWidth;
R.Bottom := FCellHeight;
ExtTextOut(FMemDC, 0, 0, ETO_OPAQUE, @R, nil, 0, nil);
Exit; Exit;
end; end;
{ Determine cursor column for this row (-1 if cursor not on this row). } { Determine cursor column for this row (-1 if no cursor) }
{ The cursor swap is integrated into the main column loop, eliminating }
{ the separate cursor overlay pass (saves nibble rebuild + ASM per cell). }
if FCursorVisible and FBlinkOn and (FScrollPos = 0) and if FCursorVisible and FBlinkOn and (FScrollPos = 0) and
(Row = FCursorRow) and (FCursorCol >= 0) and (FCursorCol < FCols) then (Row = FCursorRow) and (FCursorCol >= 0) and (FCursorCol < FCols) then
CurCol := FCursorCol CurCol := FCursorCol
else else
CurCol := -1; CurCol := -1;
{ Force nibble table rebuild on first cell } RunStart := 0;
FNibbleFG := 255; RunLen := 0;
FNibbleBG := 255; RunFG := 255;
RunBG := 255;
{ Push constant mini-frame values ONCE before the column loop. }
{ These 4 values (Stride, CellH, PixSeg, GlyphSeg) don't change }
{ across cells. Only per-cell values (GlyphOfs, PixOfs) are pushed }
{ inside the loop. This saves 320 push instructions per row (4 pushes }
{ x 80 cells). SP is 8 bytes below Delphi's expectation until the }
{ matching ADD SP,8 at the end, but local variable access uses BP, }
{ not SP, so this is safe. }
asm
push Stride
push CellH
push PixSeg
push GlyphSeg
end;
for Col := 0 to FCols - 1 do for Col := 0 to FCols - 1 do
begin begin
{ Determine effective colors } { Determine effective colors }
if Line^.Cells[Col].Blink and not FTextBlinkOn then if Line^.Cells[Col].Blink and not FTextBlinkOn then
FGIdx := Line^.Cells[Col].BG { hide blinking text } FGIdx := Line^.Cells[Col].BG
else else
FGIdx := Line^.Cells[Col].FG; FGIdx := Line^.Cells[Col].FG;
BGIdx := Line^.Cells[Col].BG; BGIdx := Line^.Cells[Col].BG;
CharCode := Ord(Line^.Cells[Col].Ch);
{ Cursor: swap FG/BG inline -- no separate overlay pass needed } { Cursor: swap FG/BG }
if Col = CurCol then if Col = CurCol then
begin begin
TmpIdx := FGIdx; TmpIdx := FGIdx;
@ -2217,184 +2004,41 @@ begin
BGIdx := TmpIdx; BGIdx := TmpIdx;
end; end;
if CharCode = 32 then { If colors changed, flush current run }
if (FGIdx <> RunFG) or (BGIdx <> RunBG) then
begin begin
{ Space fast path: solid background fill, no glyph expansion. } if RunLen > 0 then
{ Skips nibble table rebuild and ASM glyph loop entirely. }
{ 4 word stores per scanline vs full nibble lookup + expansion. }
PixOfs := Word(CellH - 1) * Stride + Word(Col) * 8;
asm
push di
mov es, PixSeg
mov di, PixOfs
mov al, BGIdx
mov ah, al { AX = BGIdx:BGIdx }
mov cx, CellH
@spfill:
mov es:[di], ax
mov es:[di+2], ax
mov es:[di+4], ax
mov es:[di+6], ax
sub di, Stride
dec cx
jnz @spfill
pop di
end;
end
else
begin
{ Rebuild nibble table on color change: 16 entries x 4 bytes. }
{ Pre-compute 4 word values (BGBG, BGFG, FGBG, FGFG) in AX/BX/CX/DX }
{ and write all 32 words directly. Replaces 64 branch+store Pascal }
{ operations with 32 straight-line MOV instructions. }
if (FGIdx <> FNibbleFG) or (BGIdx <> FNibbleBG) then
begin begin
asm SetTextColor(FMemDC, AnsiColors[RunFG]);
push di SetBkColor(FMemDC, AnsiColors[RunBG]);
push bx R.Left := RunStart * FCellWidth;
push es R.Top := 0;
mov es, GlyphSeg R.Right := (RunStart + RunLen) * FCellWidth;
mov di, GlyphBase R.Bottom := FCellHeight;
mov al, BGIdx ExtTextOut(FMemDC, RunStart * FCellWidth, 0, ETO_OPAQUE, @R,
mov ah, al { AX = BG:BG } @RunBuf[0], RunLen, nil);
mov dl, FGIdx
mov dh, dl { DX = FG:FG }
mov bl, al
mov bh, dl { BX = BG:FG (lo=BG, hi=FG) }
mov cl, dl
mov ch, al { CX = FG:BG (lo=FG, hi=BG) }
{ Entry 0 (0000): BG BG BG BG }
mov es:[di+ 0], ax
mov es:[di+ 2], ax
{ Entry 1 (0001): BG BG BG FG }
mov es:[di+ 4], ax
mov es:[di+ 6], bx
{ Entry 2 (0010): BG BG FG BG }
mov es:[di+ 8], ax
mov es:[di+10], cx
{ Entry 3 (0011): BG BG FG FG }
mov es:[di+12], ax
mov es:[di+14], dx
{ Entry 4 (0100): BG FG BG BG }
mov es:[di+16], bx
mov es:[di+18], ax
{ Entry 5 (0101): BG FG BG FG }
mov es:[di+20], bx
mov es:[di+22], bx
{ Entry 6 (0110): BG FG FG BG }
mov es:[di+24], bx
mov es:[di+26], cx
{ Entry 7 (0111): BG FG FG FG }
mov es:[di+28], bx
mov es:[di+30], dx
{ Entry 8 (1000): FG BG BG BG }
mov es:[di+32], cx
mov es:[di+34], ax
{ Entry 9 (1001): FG BG BG FG }
mov es:[di+36], cx
mov es:[di+38], bx
{ Entry 10 (1010): FG BG FG BG }
mov es:[di+40], cx
mov es:[di+42], cx
{ Entry 11 (1011): FG BG FG FG }
mov es:[di+44], cx
mov es:[di+46], dx
{ Entry 12 (1100): FG FG BG BG }
mov es:[di+48], dx
mov es:[di+50], ax
{ Entry 13 (1101): FG FG BG FG }
mov es:[di+52], dx
mov es:[di+54], bx
{ Entry 14 (1110): FG FG FG BG }
mov es:[di+56], dx
mov es:[di+58], cx
{ Entry 15 (1111): FG FG FG FG }
mov es:[di+60], dx
mov es:[di+62], dx
pop es
pop bx
pop di
end;
FNibbleFG := FGIdx;
FNibbleBG := BGIdx;
end;
{ Compute offsets -- all 16-bit, no Longint }
GlyphOfs := 64 + Word(CharCode) shl 5;
PixOfs := Word(CellH - 1) * Stride + Word(Col) * 8;
asm
{ Push only per-cell values. Constants already on stack above. }
push PixOfs
push GlyphOfs
push bp
mov bp, sp
{ Mini-frame layout (same offsets as before): }
{ [bp] = saved original BP }
{ [bp+2] = GlyphOfs (pushed this cell) }
{ [bp+4] = PixOfs (pushed this cell) }
{ [bp+6] = GlyphSeg (pushed once before loop) }
{ [bp+8] = PixSeg (pushed once before loop) }
{ [bp+10] = CellH (pushed once before loop) }
{ [bp+12] = Stride (pushed once before loop) }
push ds
push bx
push si
push di
mov si, [bp+2]
mov es, [bp+8]
mov di, [bp+4]
mov cx, [bp+10]
xor bh, bh
mov ds, [bp+6]
@rowloop:
mov al, [si] { load glyph byte from DS:SI }
inc si
mov ah, al { save copy }
{ High nibble -> 4 pixels }
and al, $F0
shr al, 1
shr al, 1 { AL = high_nibble * 4 }
mov bl, al
mov dx, [bx] { 2 table bytes (DS:BX, table at offset 0) }
mov es:[di], dx
mov dx, [bx+2] { 2 more table bytes }
mov es:[di+2], dx
{ Low nibble -> 4 pixels }
mov al, ah
and al, $0F
shl al, 1
shl al, 1 { AL = low_nibble * 4 }
mov bl, al
mov dx, [bx]
mov es:[di+4], dx
mov dx, [bx+2]
mov es:[di+6], dx
sub di, [bp+12] { Stride via SS:[BP+12] -- safe after DS change }
dec cx
jnz @rowloop
pop di
pop si
pop bx
pop ds
pop bp
add sp, 4 { remove per-cell GlyphOfs + PixOfs only }
end; end;
RunStart := Col;
RunLen := 0;
RunFG := FGIdx;
RunBG := BGIdx;
end; end;
RunBuf[RunLen] := Line^.Cells[Col].Ch;
Inc(RunLen);
end; end;
{ Remove constant mini-frame words pushed before the column loop } { Flush final run }
asm if RunLen > 0 then
add sp, 8 begin
SetTextColor(FMemDC, AnsiColors[RunFG]);
SetBkColor(FMemDC, AnsiColors[RunBG]);
R.Left := RunStart * FCellWidth;
R.Top := 0;
R.Right := (RunStart + RunLen) * FCellWidth;
R.Bottom := FCellHeight;
ExtTextOut(FMemDC, RunStart * FCellWidth, 0, ETO_OPAQUE, @R,
@RunBuf[0], RunLen, nil);
end; end;
end; end;
@ -2536,7 +2180,7 @@ end;
procedure TKPAnsi.WMEraseBkgnd(var Msg: TWMEraseBkgnd); procedure TKPAnsi.WMEraseBkgnd(var Msg: TWMEraseBkgnd);
begin begin
{ Suppress background erase -- SetDIBitsToDevice covers everything } { Suppress background erase -- BitBlt covers everything }
Msg.Result := 1; Msg.Result := 1;
end; end;