diff --git a/delphi/KPANSI.PAS b/delphi/KPANSI.PAS index aaa18ea..5fbee4f 100644 --- a/delphi/KPANSI.PAS +++ b/delphi/KPANSI.PAS @@ -7,10 +7,10 @@ unit KPAnsi; { Renders incoming data using standard ANSI/VT100 escape sequences for } { cursor positioning, color attributes, and screen manipulation. } { } -{ Rendering uses a font atlas with direct pixel writes into 8bpp DIB row } -{ buffers, minimizing GDI calls to a single SetDIBitsToDevice per dirty } -{ row. This eliminates the per-call overhead of TextOut/SetTextColor/ } -{ SetBkColor that dominates rendering time on Win16. } +{ Rendering uses a font atlas with a nibble lookup table and inline ASM } +{ to expand glyph bitmaps directly into a reusable 8bpp DIB pixel buffer. } +{ This eliminates per-pixel branching and 32-bit arithmetic from the inner } +{ loop, with one SetDIBitsToDevice call per dirty row. } { } { Installs to the "KP" palette tab alongside TKPComm. } @@ -78,11 +78,14 @@ type FDirtyRow: array[0..255] of Boolean; FAllDirty: Boolean; FTextBlinkOn: Boolean; - FGlyphBits: array[0..255, 0..31] of Byte; - FRowBufH: array[0..255] of THandle; - FRowBuf: array[0..255] of Pointer; + FGlyphBufH: THandle; + FGlyphBuf: Pointer; + FRowBufH: THandle; + FRowBuf: Pointer; FDibInfo: TDibInfo; FRowBufSize: Integer; + FNibbleFG: Byte; + FNibbleBG: Byte; procedure AllocLine(Line: PTermLine); procedure BuildAtlas; procedure ClearLine(Line: PTermLine); @@ -266,10 +269,11 @@ end; procedure TKPAnsi.BuildAtlas; { Render all 256 CP437 characters into a monochrome bitmap, then extract } -{ per-glyph pixel masks into FGlyphBits. Each entry FGlyphBits[ch, row] } -{ is an 8-bit mask: MSB = leftmost pixel, 1 = foreground, 0 = background. } -{ This is a one-time GDI cost at startup; after extraction, the bitmap } -{ and DC are deleted and never needed again. } +{ per-glyph pixel masks into the glyph block at offset 64. Each glyph } +{ byte is an 8-bit mask: MSB = leftmost pixel, 1 = FG, 0 = BG. The } +{ nibble lookup table at offset 0..63 is built at render time. This is a } +{ one-time GDI cost at startup; after extraction, the bitmap and DC are } +{ deleted and never needed again. } var AtlasBmp: HBitmap; AtlasDC: HDC; @@ -279,17 +283,38 @@ var Row: Integer; RawH: THandle; RawPtr: PPixelBuf; + GlyphPtr: PPixelBuf; Stride: Longint; BmpSize: Longint; Ch: Char; begin - FillChar(FGlyphBits, SizeOf(FGlyphBits), 0); + { Free old glyph block } + if FGlyphBufH <> 0 then + begin + GlobalUnlock(FGlyphBufH); + GlobalFree(FGlyphBufH); + FGlyphBufH := 0; + FGlyphBuf := nil; + end; if FPaintFont = 0 then Exit; if (FCellWidth < 1) or (FCellHeight < 1) or (FCellHeight > 32) then Exit; + { Allocate glyph block: 64 bytes nibble table + 256*32 glyph data } + FGlyphBufH := GlobalAlloc(GMEM_FIXED or GMEM_ZEROINIT, 8256); + if FGlyphBufH = 0 then + Exit; + FGlyphBuf := GlobalLock(FGlyphBufH); + if FGlyphBuf = nil then + begin + GlobalFree(FGlyphBufH); + FGlyphBufH := 0; + Exit; + end; + GlyphPtr := PPixelBuf(FGlyphBuf); + { Create monochrome bitmap: 256 chars side-by-side, FCellHeight tall } AtlasBmp := CreateBitmap(256 * FCellWidth, FCellHeight, 1, 1, nil); if AtlasBmp = 0 then @@ -325,13 +350,14 @@ begin if RawPtr <> nil then begin GetBitmapBits(AtlasBmp, BmpSize, RawPtr); - { Extract per-glyph bytes. For 8-pixel-wide fonts each glyph is } - { exactly one byte per scan line, aligned to byte boundaries. } + { Extract per-glyph bytes into glyph block at offset 64. } + { For 8-pixel-wide fonts each glyph is exactly one byte per scan } + { line, aligned to byte boundaries. } for I := 0 to 255 do begin for Row := 0 to FCellHeight - 1 do begin - FGlyphBits[I, Row] := RawPtr^[Row * Stride + I]; + GlyphPtr^[64 + I * 32 + Row] := RawPtr^[Row * Stride + I]; end; end; GlobalUnlock(RawH); @@ -443,6 +469,12 @@ begin FAllDirty := True; FTextBlinkOn := True; FRowBufSize := 0; + FGlyphBufH := 0; + FGlyphBuf := nil; + FRowBufH := 0; + FRowBuf := nil; + FNibbleFG := 255; + FNibbleBG := 255; { Set a monospace font -- OEM charset selected in CreatePaintFont } Font.Name := 'Terminal'; @@ -516,21 +548,26 @@ end; procedure TKPAnsi.CreateRowBuffers; -var - I: Integer; begin - DestroyRowBuffers; + { Free old row buffer (glyph block is managed by BuildAtlas) } + if FRowBufH <> 0 then + begin + GlobalUnlock(FRowBufH); + GlobalFree(FRowBufH); + FRowBufH := 0; + FRowBuf := nil; + end; + FRowBufSize := FCols * FCellWidth * FCellHeight; if FRowBufSize < 1 then Exit; - for I := 0 to FRows - 1 do - begin - FRowBufH[I] := GlobalAlloc(GMEM_FIXED or GMEM_ZEROINIT, FRowBufSize); - if FRowBufH[I] <> 0 then - FRowBuf[I] := GlobalLock(FRowBufH[I]) - else - FRowBuf[I] := nil; - end; + + { Single reusable buffer for one terminal row } + FRowBufH := GlobalAlloc(GMEM_FIXED or GMEM_ZEROINIT, FRowBufSize); + if FRowBufH <> 0 then + FRowBuf := GlobalLock(FRowBufH) + else + FRowBuf := nil; end; @@ -606,18 +643,20 @@ end; procedure TKPAnsi.DestroyRowBuffers; -var - I: Integer; begin - for I := 0 to 255 do + if FRowBufH <> 0 then begin - if FRowBufH[I] <> 0 then - begin - GlobalUnlock(FRowBufH[I]); - GlobalFree(FRowBufH[I]); - FRowBufH[I] := 0; - FRowBuf[I] := nil; - end; + GlobalUnlock(FRowBufH); + GlobalFree(FRowBufH); + FRowBufH := 0; + FRowBuf := nil; + end; + if FGlyphBufH <> 0 then + begin + GlobalUnlock(FGlyphBufH); + GlobalFree(FGlyphBufH); + FGlyphBufH := 0; + FGlyphBuf := nil; end; end; @@ -1157,8 +1196,9 @@ end; procedure TKPAnsi.FlipToScreen; -{ Render dirty rows into 8bpp DIB buffers, then blast to screen via } -{ SetDIBitsToDevice. One GDI call per dirty row, zero for rendering. } +{ Render dirty rows into the shared 8bpp DIB buffer, blasting each to the } +{ screen via SetDIBitsToDevice immediately after rendering. One GDI call } +{ per dirty row, zero for the pixel expansion itself. } var DC: HDC; Row: Integer; @@ -1167,9 +1207,9 @@ var begin if not HandleAllocated then Exit; - if FRowBuf[0] = nil then + if FRowBuf = nil then RecalcCellSize; - if FRowBuf[0] = nil then + if FRowBuf = nil then Exit; { Scrollback view: force full redraw, ignore pending scroll } @@ -1179,8 +1219,7 @@ begin FPendingScroll := 0; end; - { Deferred scroll: shift existing screen pixels up. Row buffers are } - { per-row so they don't need scrolling; only the screen DC is shifted. } + { Deferred scroll: shift existing screen pixels up } if (FPendingScroll > 0) and not FAllDirty then begin R.Left := 0; @@ -1194,32 +1233,21 @@ begin end; FPendingScroll := 0; - { Render dirty rows into per-row pixel buffers (pure memory, zero GDI) } - for Row := 0 to FRows - 1 do - begin - if (FAllDirty or FDirtyRow[Row]) and (FRowBuf[Row] <> nil) then - begin - RenderRow(Row); - end; - end; - - { Blast dirty rows to screen } + { Interleaved render + blast: single buffer is reused per row } DC := GetDC(Handle); for Row := 0 to FRows - 1 do begin if FAllDirty or FDirtyRow[Row] then begin - if FRowBuf[Row] <> nil then - begin - SetDIBitsToDevice(DC, - 0, Row * FCellHeight, - FCols * FCellWidth, FCellHeight, - 0, 0, - 0, FCellHeight, - FRowBuf[Row], - PBitmapInfo(@FDibInfo)^, - 0); { DIB_RGB_COLORS } - end; + RenderRow(Row); + SetDIBitsToDevice(DC, + 0, Row * FCellHeight, + FCols * FCellWidth, FCellHeight, + 0, 0, + 0, FCellHeight, + FRowBuf, + PBitmapInfo(@FDibInfo)^, + 0); { DIB_RGB_COLORS } FDirtyRow[Row] := False; end; end; @@ -1414,36 +1442,26 @@ procedure TKPAnsi.Paint; var Row: Integer; begin - if FRowBuf[0] = nil then + if FRowBuf = nil then RecalcCellSize; - if FRowBuf[0] = nil then + if FRowBuf = nil then Exit; - { Full repaint: render all rows into buffers, then blast to canvas } + { Full repaint: render each row into the shared buffer and blast it } FPendingScroll := 0; FAllDirty := True; for Row := 0 to FRows - 1 do begin - if FRowBuf[Row] <> nil then - begin - RenderRow(Row); - end; - end; - - for Row := 0 to FRows - 1 do - begin - if FRowBuf[Row] <> nil then - begin - SetDIBitsToDevice(Canvas.Handle, - 0, Row * FCellHeight, - FCols * FCellWidth, FCellHeight, - 0, 0, - 0, FCellHeight, - FRowBuf[Row], - PBitmapInfo(@FDibInfo)^, - 0); { DIB_RGB_COLORS } - end; + RenderRow(Row); + SetDIBitsToDevice(Canvas.Handle, + 0, Row * FCellHeight, + FCols * FCellWidth, FCellHeight, + 0, 0, + 0, FCellHeight, + FRowBuf, + PBitmapInfo(@FDibInfo)^, + 0); { DIB_RGB_COLORS } FDirtyRow[Row] := False; end; FAllDirty := False; @@ -1775,31 +1793,56 @@ end; procedure TKPAnsi.RenderRow(Row: Integer); -{ Core atlas renderer. For each cell in the row, look up the glyph in } -{ the monochrome atlas and write FG/BG palette indices directly into the } -{ 8bpp row buffer. Zero GDI calls. Cursor overlay is also rendered here } -{ by swapping FG/BG for the cursor cell. } +{ Core atlas renderer with nibble lookup table and inline ASM. For each } +{ cell in the row, the Pascal outer loop resolves colors and rebuilds the } +{ 16-entry nibble table on color change. The inline ASM inner loop } +{ expands one glyph (all scanlines) by splitting each glyph byte into } +{ high and low nibbles, looking up 4 pre-resolved pixels per nibble, and } +{ writing them as word stores. Zero branching in the inner loop. } +{ } +{ Register allocation in ASM block: } +{ DS = glyph block segment (table at 0..63, glyph data at 64+) } +{ SI = glyph data offset (increments through scanlines) } +{ ES = pixel buffer segment } +{ DI = pixel buffer offset (decrements by Stride for bottom-up DIB) } +{ BX = table index (BH=0, BL = nibble * 4) } +{ CX = scanline counter } +{ AX/DX = temporaries } +{ } +{ Critical: Delphi 1.0 may allocate local variables to SI/DI as register } +{ variables. The ASM block clobbers SI/DI for its own purposes, so ALL } +{ local variable values are pushed to an explicit mini-frame (via PUSH) } +{ BEFORE any register clobber, then accessed via BP-relative offsets. } +{ BP-relative addressing defaults to SS segment, safe after DS change. } var - Line: PTermLine; - Pix: PPixelBuf; - Stride: Longint; - Col: Integer; - GlyphRow: Integer; - BufScanline: Integer; - Bits: Byte; - Offset: Longint; - Pixel: Integer; - FGIdx: Byte; - BGIdx: Byte; - CharCode: Integer; - SbkCount: Integer; - VisRow: Integer; + Line: PTermLine; + Col: Integer; + FGIdx: Byte; + BGIdx: Byte; + CharCode: Integer; + SbkCount: Integer; + VisRow: Integer; + TabPtr: PPixelBuf; + I: Integer; + Ofs: Integer; + GlyphSeg: Word; + PixSeg: Word; + GlyphOfs: Word; + PixOfs: Word; + Stride: Word; + CellH: Word; begin - if FRowBuf[Row] = nil then + if FRowBuf = nil then + Exit; + if FGlyphBuf = nil then Exit; - Pix := FRowBuf[Row]; - Stride := Longint(FCols) * FCellWidth; + Stride := Word(FCols) * Word(FCellWidth); + CellH := FCellHeight; + + { Extract segments from far pointers -- pure Pascal, no register clobber } + GlyphSeg := Seg(PPixelBuf(FGlyphBuf)^); + PixSeg := Seg(PPixelBuf(FRowBuf)^); { Determine which line to render (handles scrollback view) } if FScrollPos <> 0 then @@ -1826,10 +1869,14 @@ begin if Line = nil then begin { Blank row: fill with background color 0 (black) } - FillChar(Pix^, FRowBufSize, 0); + FillChar(PPixelBuf(FRowBuf)^, FRowBufSize, 0); Exit; end; + { Force nibble table rebuild on first cell } + FNibbleFG := 255; + FNibbleBG := 255; + for Col := 0 to FCols - 1 do begin { Determine effective colors } @@ -1840,44 +1887,189 @@ begin BGIdx := Line^.Cells[Col].BG; CharCode := Ord(Line^.Cells[Col].Ch); - { Render glyph into buffer -- bottom-up for DIB format } - for GlyphRow := 0 to FCellHeight - 1 do + { Rebuild nibble table on color change: 16 entries x 4 bytes } + if (FGIdx <> FNibbleFG) or (BGIdx <> FNibbleBG) then begin - BufScanline := FCellHeight - 1 - GlyphRow; - Bits := FGlyphBits[CharCode, GlyphRow]; - Offset := Longint(BufScanline) * Stride + Longint(Col) * FCellWidth; - for Pixel := 0 to FCellWidth - 1 do + TabPtr := PPixelBuf(FGlyphBuf); + for I := 0 to 15 do begin - if (Bits and $80) <> 0 then - Pix^[Offset + Pixel] := FGIdx - else - Pix^[Offset + Pixel] := BGIdx; - Bits := Bits shl 1; + Ofs := I * 4; + if (I and 8) <> 0 then TabPtr^[Ofs] := FGIdx + else TabPtr^[Ofs] := BGIdx; + if (I and 4) <> 0 then TabPtr^[Ofs + 1] := FGIdx + else TabPtr^[Ofs + 1] := BGIdx; + if (I and 2) <> 0 then TabPtr^[Ofs + 2] := FGIdx + else TabPtr^[Ofs + 2] := BGIdx; + if (I and 1) <> 0 then TabPtr^[Ofs + 3] := FGIdx + else TabPtr^[Ofs + 3] := BGIdx; end; + FNibbleFG := FGIdx; + FNibbleBG := BGIdx; + end; + + { Compute offsets -- all 16-bit, no Longint } + GlyphOfs := 64 + Word(CharCode) shl 5; + PixOfs := Word(CellH - 1) * Stride + Word(Col) * 8; + + asm + { Push all values to explicit mini-frame BEFORE any register } + { clobber. BASM reads register variables from SI/DI correctly } + { here since nothing has been overwritten yet. } + push Stride + push CellH + push PixSeg + push GlyphSeg + push PixOfs + push GlyphOfs + + push bp + mov bp, sp + { Mini-frame layout (all accessed via SS:[BP+n]): } + { [bp] = saved original BP } + { [bp+2] = GlyphOfs } + { [bp+4] = PixOfs } + { [bp+6] = GlyphSeg } + { [bp+8] = PixSeg } + { [bp+10] = CellH } + { [bp+12] = Stride } + + push ds + push bx + push si + push di + + mov si, [bp+2] + mov es, [bp+8] + mov di, [bp+4] + mov cx, [bp+10] + xor bh, bh + mov ds, [bp+6] + + @rowloop: + mov al, [si] { load glyph byte from DS:SI } + inc si + mov ah, al { save copy } + + { High nibble -> 4 pixels } + and al, $F0 + shr al, 1 + shr al, 1 { AL = high_nibble * 4 } + mov bl, al + mov dx, [bx] { 2 table bytes (DS:BX, table at offset 0) } + mov es:[di], dx + mov dx, [bx+2] { 2 more table bytes } + mov es:[di+2], dx + + { Low nibble -> 4 pixels } + mov al, ah + and al, $0F + shl al, 1 + shl al, 1 { AL = low_nibble * 4 } + mov bl, al + mov dx, [bx] + mov es:[di+4], dx + mov dx, [bx+2] + mov es:[di+6], dx + + sub di, [bp+12] { Stride via SS:[BP+12] -- safe after DS change } + dec cx + jnz @rowloop + + pop di + pop si + pop bx + pop ds + pop bp + add sp, 12 { remove 6 mini-frame words } end; end; { Cursor overlay: if cursor is on this row and visible, re-render the } - { cursor cell with swapped FG/BG using the same atlas lookup. } + { cursor cell with swapped FG/BG using the same ASM inner loop. } if FCursorVisible and FBlinkOn and (FScrollPos = 0) and (Row = FCursorRow) and (FCursorCol >= 0) and (FCursorCol < FCols) then begin FGIdx := Line^.Cells[FCursorCol].BG; BGIdx := Line^.Cells[FCursorCol].FG; CharCode := Ord(Line^.Cells[FCursorCol].Ch); - for GlyphRow := 0 to FCellHeight - 1 do + + { Rebuild nibble table for cursor colors } + TabPtr := PPixelBuf(FGlyphBuf); + for I := 0 to 15 do begin - BufScanline := FCellHeight - 1 - GlyphRow; - Bits := FGlyphBits[CharCode, GlyphRow]; - Offset := Longint(BufScanline) * Stride + Longint(FCursorCol) * FCellWidth; - for Pixel := 0 to FCellWidth - 1 do - begin - if (Bits and $80) <> 0 then - Pix^[Offset + Pixel] := FGIdx - else - Pix^[Offset + Pixel] := BGIdx; - Bits := Bits shl 1; - end; + Ofs := I * 4; + if (I and 8) <> 0 then TabPtr^[Ofs] := FGIdx + else TabPtr^[Ofs] := BGIdx; + if (I and 4) <> 0 then TabPtr^[Ofs + 1] := FGIdx + else TabPtr^[Ofs + 1] := BGIdx; + if (I and 2) <> 0 then TabPtr^[Ofs + 2] := FGIdx + else TabPtr^[Ofs + 2] := BGIdx; + if (I and 1) <> 0 then TabPtr^[Ofs + 3] := FGIdx + else TabPtr^[Ofs + 3] := BGIdx; + end; + FNibbleFG := FGIdx; + FNibbleBG := BGIdx; + + GlyphOfs := 64 + Word(CharCode) shl 5; + PixOfs := Word(CellH - 1) * Stride + Word(FCursorCol) * 8; + + asm + push Stride + push CellH + push PixSeg + push GlyphSeg + push PixOfs + push GlyphOfs + + push bp + mov bp, sp + + push ds + push bx + push si + push di + + mov si, [bp+2] + mov es, [bp+8] + mov di, [bp+4] + mov cx, [bp+10] + xor bh, bh + mov ds, [bp+6] + + @curloop: + mov al, [si] + inc si + mov ah, al + + and al, $F0 + shr al, 1 + shr al, 1 + mov bl, al + mov dx, [bx] + mov es:[di], dx + mov dx, [bx+2] + mov es:[di+2], dx + + mov al, ah + and al, $0F + shl al, 1 + shl al, 1 + mov bl, al + mov dx, [bx] + mov es:[di+4], dx + mov dx, [bx+2] + mov es:[di+6], dx + + sub di, [bp+12] + dec cx + jnz @curloop + + pop di + pop si + pop bx + pop ds + pop bp + add sp, 12 end; end; end;