Inline CSI parsing, ASM nibble table, cursor dedup, reccom block copy
Four performance optimizations targeting the hottest paths: - Parse CSI params (P1/P2) as integers during scan-ahead loop, eliminating ParseParamBuf call from ExecuteCSI (~200 cycles/seq) - Replace 16-iteration Pascal nibble table rebuild (64 branch+store) with 32 straight-line MOV word using precomputed BGBG/BGFG/FGBG/FGFG - Integrate cursor FG/BG swap into main RenderRow column loop, removing duplicate nibble rebuild + ASM glyph expansion overlay pass - Replace byte-at-a-time reccom loop with _fmemcpy block copy split at ring buffer wrap point, reducing far pointer overhead from O(n) to O(1) Also includes previously uncommitted space fast-path in RenderRow and inlined escape sequence handling in ParseDataBuf. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
8e3bad86e3
commit
c378abc9e5
2 changed files with 429 additions and 271 deletions
|
|
@ -66,6 +66,9 @@ type
|
|||
FParseState: TParseState; { Current parser state machine position }
|
||||
FParamBuf: array[0..31] of Char; { CSI parameter digits/semicolons }
|
||||
FParamLen: Integer; { Current length of FParamBuf }
|
||||
FCSIParam1: Integer; { First CSI param, parsed inline during scan }
|
||||
FCSIParam2: Integer; { Second CSI param, parsed inline during scan }
|
||||
FCSIParamIdx: Integer; { Which param we're accumulating (0=P1, 1=P2) }
|
||||
FMusicStr: string; { Accumulated ANSI music string (ESC[M..^N) }
|
||||
|
||||
{ Font metrics (measured from OEM charset paint font) }
|
||||
|
|
@ -484,6 +487,9 @@ begin
|
|||
FAttrReverse := False;
|
||||
FParseState := psNormal;
|
||||
FParamLen := 0;
|
||||
FCSIParam1 := 0;
|
||||
FCSIParam2 := 0;
|
||||
FCSIParamIdx := 0;
|
||||
FMusicStr := '';
|
||||
FCellWidth := 8;
|
||||
FCellHeight := 16;
|
||||
|
|
@ -892,22 +898,15 @@ end;
|
|||
|
||||
|
||||
procedure TKPAnsi.ExecuteCSI(FinalCh: Char);
|
||||
{ Uses FCSIParam1/FCSIParam2 parsed inline during CSI scan-ahead. }
|
||||
{ No ParseParamBuf call needed -- saves ~200 cycles per CSI sequence. }
|
||||
{ ParseSGR still uses FParamBuf for variable-count parameters. }
|
||||
var
|
||||
Params: array[0..15] of Integer;
|
||||
Count: Integer;
|
||||
P1: Integer;
|
||||
P2: Integer;
|
||||
P1: Integer;
|
||||
P2: Integer;
|
||||
begin
|
||||
ParseParamBuf(@FParamBuf[0], FParamLen, Params, Count);
|
||||
|
||||
if Count > 0 then
|
||||
P1 := Params[0]
|
||||
else
|
||||
P1 := 0;
|
||||
if Count > 1 then
|
||||
P2 := Params[1]
|
||||
else
|
||||
P2 := 0;
|
||||
P1 := FCSIParam1;
|
||||
P2 := FCSIParam2;
|
||||
|
||||
case FinalCh of
|
||||
'A': { CUU - Cursor Up }
|
||||
|
|
@ -1565,14 +1564,19 @@ end;
|
|||
|
||||
procedure TKPAnsi.ParseDataBuf(Buf: PChar; Len: Integer);
|
||||
{ Process incoming data from a PChar buffer (no string allocation needed). }
|
||||
{ Fast path batches runs of printable characters: colors are computed once }
|
||||
{ per run, and cells are filled in a tight loop without per-character state }
|
||||
{ checks. Run length is bounded by end of input, end of current row, or }
|
||||
{ next non-printable character -- whichever comes first. }
|
||||
{ }
|
||||
{ Three inlined fast paths eliminate ProcessChar method call overhead: }
|
||||
{ 1. Printable text runs: batch fill cells, one color computation per run }
|
||||
{ 2. CSI parameter accumulation: scan-ahead loop for digits/semicolons }
|
||||
{ 3. Common control chars: ESC, CR, LF handled inline }
|
||||
{ }
|
||||
{ Uncommon states (psCSIQuestion, psMusic) and rare control chars (TAB, }
|
||||
{ BS, BEL, ENQ) still delegate to ProcessChar. }
|
||||
{ }
|
||||
{ Does NOT call FlipToScreen -- the caller handles rendering. }
|
||||
var
|
||||
I: Integer;
|
||||
Ch: Char;
|
||||
Line: PTermLine;
|
||||
FGIdx: Byte;
|
||||
BGIdx: Byte;
|
||||
|
|
@ -1584,79 +1588,182 @@ begin
|
|||
|
||||
while I < Len do
|
||||
begin
|
||||
{ Fast path: printable character in normal state }
|
||||
if (FParseState = psNormal) and (Buf[I] >= ' ') then
|
||||
begin
|
||||
{ Handle wrap at right margin }
|
||||
if FCursorCol >= FCols then
|
||||
begin
|
||||
if FWrapMode then
|
||||
case FParseState of
|
||||
psNormal:
|
||||
begin
|
||||
FCursorCol := 0;
|
||||
Inc(FCursorRow);
|
||||
if FCursorRow >= FRows then
|
||||
if Buf[I] >= ' ' then
|
||||
begin
|
||||
FCursorRow := FRows - 1;
|
||||
DoScrollUp;
|
||||
{ Fast path: batch printable characters }
|
||||
if FCursorCol >= FCols then
|
||||
begin
|
||||
if FWrapMode then
|
||||
begin
|
||||
FCursorCol := 0;
|
||||
Inc(FCursorRow);
|
||||
if FCursorRow >= FRows then
|
||||
begin
|
||||
FCursorRow := FRows - 1;
|
||||
DoScrollUp;
|
||||
end;
|
||||
Line := nil;
|
||||
end
|
||||
else
|
||||
FCursorCol := FCols - 1;
|
||||
end;
|
||||
|
||||
if Line = nil then
|
||||
Line := FScreen[FCursorRow];
|
||||
|
||||
{ Compute colors once for the entire run }
|
||||
if FAttrBold then
|
||||
FGIdx := FAttrFG + 8
|
||||
else
|
||||
FGIdx := FAttrFG;
|
||||
BGIdx := FAttrBG;
|
||||
|
||||
{ Find run end: stop at control char, end of input, or end of row }
|
||||
Remaining := FCols - FCursorCol;
|
||||
RunEnd := I;
|
||||
while (RunEnd < Len) and (Buf[RunEnd] >= ' ') and
|
||||
(RunEnd - I < Remaining) do
|
||||
Inc(RunEnd);
|
||||
|
||||
{ Fill cells in tight loop }
|
||||
if FAttrReverse then
|
||||
begin
|
||||
while I < RunEnd do
|
||||
begin
|
||||
Line^.Cells[FCursorCol].Ch := Buf[I];
|
||||
Line^.Cells[FCursorCol].FG := BGIdx;
|
||||
Line^.Cells[FCursorCol].BG := FGIdx;
|
||||
Line^.Cells[FCursorCol].Bold := FAttrBold;
|
||||
Line^.Cells[FCursorCol].Blink := FAttrBlink;
|
||||
Inc(FCursorCol);
|
||||
Inc(I);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
while I < RunEnd do
|
||||
begin
|
||||
Line^.Cells[FCursorCol].Ch := Buf[I];
|
||||
Line^.Cells[FCursorCol].FG := FGIdx;
|
||||
Line^.Cells[FCursorCol].BG := BGIdx;
|
||||
Line^.Cells[FCursorCol].Bold := FAttrBold;
|
||||
Line^.Cells[FCursorCol].Blink := FAttrBlink;
|
||||
Inc(FCursorCol);
|
||||
Inc(I);
|
||||
end;
|
||||
end;
|
||||
FDirtyRow[FCursorRow] := True;
|
||||
end
|
||||
else if Buf[I] = #27 then
|
||||
begin
|
||||
{ ESC: start escape sequence }
|
||||
FParseState := psEscape;
|
||||
Line := nil;
|
||||
Inc(I);
|
||||
end
|
||||
else if Buf[I] = #10 then
|
||||
begin
|
||||
{ LF: line feed }
|
||||
Inc(FCursorRow);
|
||||
if FCursorRow >= FRows then
|
||||
begin
|
||||
FCursorRow := FRows - 1;
|
||||
DoScrollUp;
|
||||
end;
|
||||
Line := nil;
|
||||
Inc(I);
|
||||
end
|
||||
else if Buf[I] = #13 then
|
||||
begin
|
||||
{ CR: carriage return }
|
||||
FCursorCol := 0;
|
||||
Inc(I);
|
||||
end
|
||||
else
|
||||
begin
|
||||
{ Uncommon control chars: BS, TAB, BEL, ENQ }
|
||||
Line := nil;
|
||||
ProcessChar(Buf[I]);
|
||||
Inc(I);
|
||||
end;
|
||||
end;
|
||||
|
||||
psEscape:
|
||||
begin
|
||||
if Buf[I] = '[' then
|
||||
begin
|
||||
FParamLen := 0;
|
||||
FCSIParam1 := 0;
|
||||
FCSIParam2 := 0;
|
||||
FCSIParamIdx := 0;
|
||||
FParseState := psCSI;
|
||||
end
|
||||
else
|
||||
FParseState := psNormal;
|
||||
Inc(I);
|
||||
end;
|
||||
|
||||
psCSI:
|
||||
begin
|
||||
{ Scan ahead: parse integers inline while accumulating FParamBuf. }
|
||||
{ FCSIParam1/FCSIParam2 are built digit-by-digit during the scan }
|
||||
{ so ExecuteCSI can use them directly without ParseParamBuf. }
|
||||
{ FParamBuf is still maintained for ParseSGR (variable param count). }
|
||||
while (I < Len) and
|
||||
((Buf[I] >= '0') and (Buf[I] <= '9') or (Buf[I] = ';')) do
|
||||
begin
|
||||
if Buf[I] = ';' then
|
||||
begin
|
||||
Inc(FCSIParamIdx);
|
||||
end
|
||||
else if FCSIParamIdx = 0 then
|
||||
begin
|
||||
FCSIParam1 := FCSIParam1 * 10 + (Ord(Buf[I]) - 48);
|
||||
end
|
||||
else if FCSIParamIdx = 1 then
|
||||
begin
|
||||
FCSIParam2 := FCSIParam2 * 10 + (Ord(Buf[I]) - 48);
|
||||
end;
|
||||
if FParamLen < 32 then
|
||||
begin
|
||||
FParamBuf[FParamLen] := Buf[I];
|
||||
Inc(FParamLen);
|
||||
end;
|
||||
Inc(I);
|
||||
end;
|
||||
|
||||
{ Process final command byte if available }
|
||||
if I < Len then
|
||||
begin
|
||||
Ch := Buf[I];
|
||||
if Ch = '?' then
|
||||
begin
|
||||
FParseState := psCSIQuestion;
|
||||
end
|
||||
else if (Ch = 'M') and (FParamLen = 0) then
|
||||
begin
|
||||
FMusicStr := '';
|
||||
FParseState := psMusic;
|
||||
end
|
||||
else
|
||||
begin
|
||||
ExecuteCSI(Ch);
|
||||
FParseState := psNormal;
|
||||
end;
|
||||
Inc(I);
|
||||
end;
|
||||
end;
|
||||
|
||||
else
|
||||
begin
|
||||
{ psCSIQuestion, psMusic: delegate to ProcessChar }
|
||||
Line := nil;
|
||||
end
|
||||
else
|
||||
FCursorCol := FCols - 1;
|
||||
end;
|
||||
|
||||
if Line = nil then
|
||||
Line := FScreen[FCursorRow];
|
||||
|
||||
{ Compute colors once for the entire run }
|
||||
if FAttrBold then
|
||||
FGIdx := FAttrFG + 8
|
||||
else
|
||||
FGIdx := FAttrFG;
|
||||
BGIdx := FAttrBG;
|
||||
|
||||
{ Find run end: stop at control char, end of input, or end of row }
|
||||
Remaining := FCols - FCursorCol;
|
||||
RunEnd := I;
|
||||
while (RunEnd < Len) and (Buf[RunEnd] >= ' ') and
|
||||
(RunEnd - I < Remaining) do
|
||||
Inc(RunEnd);
|
||||
|
||||
{ Fill cells in tight loop -- no per-character state/wrap checks }
|
||||
if FAttrReverse then
|
||||
begin
|
||||
while I < RunEnd do
|
||||
begin
|
||||
Line^.Cells[FCursorCol].Ch := Buf[I];
|
||||
Line^.Cells[FCursorCol].FG := BGIdx;
|
||||
Line^.Cells[FCursorCol].BG := FGIdx;
|
||||
Line^.Cells[FCursorCol].Bold := FAttrBold;
|
||||
Line^.Cells[FCursorCol].Blink := FAttrBlink;
|
||||
Inc(FCursorCol);
|
||||
ProcessChar(Buf[I]);
|
||||
Inc(I);
|
||||
end;
|
||||
end
|
||||
else
|
||||
begin
|
||||
while I < RunEnd do
|
||||
begin
|
||||
Line^.Cells[FCursorCol].Ch := Buf[I];
|
||||
Line^.Cells[FCursorCol].FG := FGIdx;
|
||||
Line^.Cells[FCursorCol].BG := BGIdx;
|
||||
Line^.Cells[FCursorCol].Bold := FAttrBold;
|
||||
Line^.Cells[FCursorCol].Blink := FAttrBlink;
|
||||
Inc(FCursorCol);
|
||||
Inc(I);
|
||||
end;
|
||||
end;
|
||||
FDirtyRow[FCursorRow] := True;
|
||||
end
|
||||
else
|
||||
begin
|
||||
{ Slow path: control chars, escape sequences }
|
||||
Line := nil;
|
||||
ProcessChar(Buf[I]);
|
||||
Inc(I);
|
||||
end;
|
||||
end;
|
||||
|
||||
|
|
@ -1828,8 +1935,11 @@ begin
|
|||
case Ch of
|
||||
'[':
|
||||
begin
|
||||
FParamLen := 0;
|
||||
FParseState := psCSI;
|
||||
FParamLen := 0;
|
||||
FCSIParam1 := 0;
|
||||
FCSIParam2 := 0;
|
||||
FCSIParamIdx := 0;
|
||||
FParseState := psCSI;
|
||||
end;
|
||||
else
|
||||
begin
|
||||
|
|
@ -1844,6 +1954,12 @@ begin
|
|||
case Ch of
|
||||
'0'..'9', ';':
|
||||
begin
|
||||
if Ch = ';' then
|
||||
Inc(FCSIParamIdx)
|
||||
else if FCSIParamIdx = 0 then
|
||||
FCSIParam1 := FCSIParam1 * 10 + (Ord(Ch) - 48)
|
||||
else if FCSIParamIdx = 1 then
|
||||
FCSIParam2 := FCSIParam2 * 10 + (Ord(Ch) - 48);
|
||||
if FParamLen < 32 then
|
||||
begin
|
||||
FParamBuf[FParamLen] := Ch;
|
||||
|
|
@ -1994,22 +2110,24 @@ procedure TKPAnsi.RenderRow(Row: Integer);
|
|||
{ BEFORE any register clobber, then accessed via BP-relative offsets. }
|
||||
{ BP-relative addressing defaults to SS segment, safe after DS change. }
|
||||
var
|
||||
Line: PTermLine;
|
||||
Col: Integer;
|
||||
FGIdx: Byte;
|
||||
BGIdx: Byte;
|
||||
CharCode: Integer;
|
||||
SbkCount: Integer;
|
||||
VisRow: Integer;
|
||||
TabPtr: PPixelBuf;
|
||||
I: Integer;
|
||||
Ofs: Integer;
|
||||
GlyphSeg: Word;
|
||||
PixSeg: Word;
|
||||
GlyphOfs: Word;
|
||||
PixOfs: Word;
|
||||
Stride: Word;
|
||||
CellH: Word;
|
||||
Line: PTermLine;
|
||||
Col: Integer;
|
||||
CurCol: Integer; { Cursor column on this row, or -1 if no cursor }
|
||||
FGIdx: Byte;
|
||||
BGIdx: Byte;
|
||||
TmpIdx: Byte;
|
||||
CharCode: Integer;
|
||||
SbkCount: Integer;
|
||||
VisRow: Integer;
|
||||
TabPtr: PPixelBuf;
|
||||
I: Integer;
|
||||
Ofs: Integer;
|
||||
GlyphSeg: Word;
|
||||
PixSeg: Word;
|
||||
GlyphOfs: Word;
|
||||
PixOfs: Word;
|
||||
Stride: Word;
|
||||
CellH: Word;
|
||||
begin
|
||||
if FRowBuf = nil then
|
||||
Exit;
|
||||
|
|
@ -2052,6 +2170,15 @@ begin
|
|||
Exit;
|
||||
end;
|
||||
|
||||
{ Determine cursor column for this row (-1 if cursor not on this row). }
|
||||
{ The cursor swap is integrated into the main column loop, eliminating }
|
||||
{ the separate cursor overlay pass (saves nibble rebuild + ASM per cell). }
|
||||
if FCursorVisible and FBlinkOn and (FScrollPos = 0) and
|
||||
(Row = FCursorRow) and (FCursorCol >= 0) and (FCursorCol < FCols) then
|
||||
CurCol := FCursorCol
|
||||
else
|
||||
CurCol := -1;
|
||||
|
||||
{ Force nibble table rebuild on first cell }
|
||||
FNibbleFG := 255;
|
||||
FNibbleBG := 255;
|
||||
|
|
@ -2080,180 +2207,185 @@ begin
|
|||
BGIdx := Line^.Cells[Col].BG;
|
||||
CharCode := Ord(Line^.Cells[Col].Ch);
|
||||
|
||||
{ Rebuild nibble table on color change: 16 entries x 4 bytes }
|
||||
if (FGIdx <> FNibbleFG) or (BGIdx <> FNibbleBG) then
|
||||
{ Cursor: swap FG/BG inline -- no separate overlay pass needed }
|
||||
if Col = CurCol then
|
||||
begin
|
||||
TabPtr := PPixelBuf(FGlyphBuf);
|
||||
for I := 0 to 15 do
|
||||
begin
|
||||
Ofs := I * 4;
|
||||
if (I and 8) <> 0 then TabPtr^[Ofs] := FGIdx
|
||||
else TabPtr^[Ofs] := BGIdx;
|
||||
if (I and 4) <> 0 then TabPtr^[Ofs + 1] := FGIdx
|
||||
else TabPtr^[Ofs + 1] := BGIdx;
|
||||
if (I and 2) <> 0 then TabPtr^[Ofs + 2] := FGIdx
|
||||
else TabPtr^[Ofs + 2] := BGIdx;
|
||||
if (I and 1) <> 0 then TabPtr^[Ofs + 3] := FGIdx
|
||||
else TabPtr^[Ofs + 3] := BGIdx;
|
||||
TmpIdx := FGIdx;
|
||||
FGIdx := BGIdx;
|
||||
BGIdx := TmpIdx;
|
||||
end;
|
||||
|
||||
if CharCode = 32 then
|
||||
begin
|
||||
{ Space fast path: solid background fill, no glyph expansion. }
|
||||
{ Skips nibble table rebuild and ASM glyph loop entirely. }
|
||||
{ 4 word stores per scanline vs full nibble lookup + expansion. }
|
||||
PixOfs := Word(CellH - 1) * Stride + Word(Col) * 8;
|
||||
asm
|
||||
push di
|
||||
mov es, PixSeg
|
||||
mov di, PixOfs
|
||||
mov al, BGIdx
|
||||
mov ah, al { AX = BGIdx:BGIdx }
|
||||
mov cx, CellH
|
||||
@spfill:
|
||||
mov es:[di], ax
|
||||
mov es:[di+2], ax
|
||||
mov es:[di+4], ax
|
||||
mov es:[di+6], ax
|
||||
sub di, Stride
|
||||
dec cx
|
||||
jnz @spfill
|
||||
pop di
|
||||
end;
|
||||
FNibbleFG := FGIdx;
|
||||
FNibbleBG := BGIdx;
|
||||
end;
|
||||
|
||||
{ Compute offsets -- all 16-bit, no Longint }
|
||||
GlyphOfs := 64 + Word(CharCode) shl 5;
|
||||
PixOfs := Word(CellH - 1) * Stride + Word(Col) * 8;
|
||||
|
||||
asm
|
||||
{ Push only per-cell values. Constants already on stack above. }
|
||||
push PixOfs
|
||||
push GlyphOfs
|
||||
|
||||
push bp
|
||||
mov bp, sp
|
||||
{ Mini-frame layout (same offsets as before): }
|
||||
{ [bp] = saved original BP }
|
||||
{ [bp+2] = GlyphOfs (pushed this cell) }
|
||||
{ [bp+4] = PixOfs (pushed this cell) }
|
||||
{ [bp+6] = GlyphSeg (pushed once before loop) }
|
||||
{ [bp+8] = PixSeg (pushed once before loop) }
|
||||
{ [bp+10] = CellH (pushed once before loop) }
|
||||
{ [bp+12] = Stride (pushed once before loop) }
|
||||
|
||||
push ds
|
||||
push bx
|
||||
push si
|
||||
push di
|
||||
|
||||
mov si, [bp+2]
|
||||
mov es, [bp+8]
|
||||
mov di, [bp+4]
|
||||
mov cx, [bp+10]
|
||||
xor bh, bh
|
||||
mov ds, [bp+6]
|
||||
|
||||
@rowloop:
|
||||
mov al, [si] { load glyph byte from DS:SI }
|
||||
inc si
|
||||
mov ah, al { save copy }
|
||||
|
||||
{ High nibble -> 4 pixels }
|
||||
and al, $F0
|
||||
shr al, 1
|
||||
shr al, 1 { AL = high_nibble * 4 }
|
||||
mov bl, al
|
||||
mov dx, [bx] { 2 table bytes (DS:BX, table at offset 0) }
|
||||
mov es:[di], dx
|
||||
mov dx, [bx+2] { 2 more table bytes }
|
||||
mov es:[di+2], dx
|
||||
|
||||
{ Low nibble -> 4 pixels }
|
||||
mov al, ah
|
||||
and al, $0F
|
||||
shl al, 1
|
||||
shl al, 1 { AL = low_nibble * 4 }
|
||||
mov bl, al
|
||||
mov dx, [bx]
|
||||
mov es:[di+4], dx
|
||||
mov dx, [bx+2]
|
||||
mov es:[di+6], dx
|
||||
|
||||
sub di, [bp+12] { Stride via SS:[BP+12] -- safe after DS change }
|
||||
dec cx
|
||||
jnz @rowloop
|
||||
|
||||
pop di
|
||||
pop si
|
||||
pop bx
|
||||
pop ds
|
||||
pop bp
|
||||
add sp, 4 { remove per-cell GlyphOfs + PixOfs only }
|
||||
end;
|
||||
end;
|
||||
|
||||
{ Cursor overlay: if cursor is on this row and visible, re-render the }
|
||||
{ cursor cell with swapped FG/BG using the same ASM inner loop. }
|
||||
{ Constants are still on the stack from above -- reused here. }
|
||||
if FCursorVisible and FBlinkOn and (FScrollPos = 0) and
|
||||
(Row = FCursorRow) and (FCursorCol >= 0) and (FCursorCol < FCols) then
|
||||
begin
|
||||
FGIdx := Line^.Cells[FCursorCol].BG;
|
||||
BGIdx := Line^.Cells[FCursorCol].FG;
|
||||
CharCode := Ord(Line^.Cells[FCursorCol].Ch);
|
||||
|
||||
{ Rebuild nibble table for cursor colors }
|
||||
TabPtr := PPixelBuf(FGlyphBuf);
|
||||
for I := 0 to 15 do
|
||||
end
|
||||
else
|
||||
begin
|
||||
Ofs := I * 4;
|
||||
if (I and 8) <> 0 then TabPtr^[Ofs] := FGIdx
|
||||
else TabPtr^[Ofs] := BGIdx;
|
||||
if (I and 4) <> 0 then TabPtr^[Ofs + 1] := FGIdx
|
||||
else TabPtr^[Ofs + 1] := BGIdx;
|
||||
if (I and 2) <> 0 then TabPtr^[Ofs + 2] := FGIdx
|
||||
else TabPtr^[Ofs + 2] := BGIdx;
|
||||
if (I and 1) <> 0 then TabPtr^[Ofs + 3] := FGIdx
|
||||
else TabPtr^[Ofs + 3] := BGIdx;
|
||||
end;
|
||||
FNibbleFG := FGIdx;
|
||||
FNibbleBG := BGIdx;
|
||||
{ Rebuild nibble table on color change: 16 entries x 4 bytes. }
|
||||
{ Pre-compute 4 word values (BGBG, BGFG, FGBG, FGFG) in AX/BX/CX/DX }
|
||||
{ and write all 32 words directly. Replaces 64 branch+store Pascal }
|
||||
{ operations with 32 straight-line MOV instructions. }
|
||||
if (FGIdx <> FNibbleFG) or (BGIdx <> FNibbleBG) then
|
||||
begin
|
||||
asm
|
||||
push di
|
||||
push bx
|
||||
push es
|
||||
les di, FGlyphBuf
|
||||
mov al, BGIdx
|
||||
mov ah, al { AX = BG:BG }
|
||||
mov dl, FGIdx
|
||||
mov dh, dl { DX = FG:FG }
|
||||
mov bl, al
|
||||
mov bh, dl { BX = BG:FG (lo=BG, hi=FG) }
|
||||
mov cl, dl
|
||||
mov ch, al { CX = FG:BG (lo=FG, hi=BG) }
|
||||
|
||||
GlyphOfs := 64 + Word(CharCode) shl 5;
|
||||
PixOfs := Word(CellH - 1) * Stride + Word(FCursorCol) * 8;
|
||||
{ Entry 0 (0000): BG BG BG BG }
|
||||
mov es:[di+ 0], ax
|
||||
mov es:[di+ 2], ax
|
||||
{ Entry 1 (0001): BG BG BG FG }
|
||||
mov es:[di+ 4], ax
|
||||
mov es:[di+ 6], bx
|
||||
{ Entry 2 (0010): BG BG FG BG }
|
||||
mov es:[di+ 8], ax
|
||||
mov es:[di+10], cx
|
||||
{ Entry 3 (0011): BG BG FG FG }
|
||||
mov es:[di+12], ax
|
||||
mov es:[di+14], dx
|
||||
{ Entry 4 (0100): BG FG BG BG }
|
||||
mov es:[di+16], bx
|
||||
mov es:[di+18], ax
|
||||
{ Entry 5 (0101): BG FG BG FG }
|
||||
mov es:[di+20], bx
|
||||
mov es:[di+22], bx
|
||||
{ Entry 6 (0110): BG FG FG BG }
|
||||
mov es:[di+24], bx
|
||||
mov es:[di+26], cx
|
||||
{ Entry 7 (0111): BG FG FG FG }
|
||||
mov es:[di+28], bx
|
||||
mov es:[di+30], dx
|
||||
{ Entry 8 (1000): FG BG BG BG }
|
||||
mov es:[di+32], cx
|
||||
mov es:[di+34], ax
|
||||
{ Entry 9 (1001): FG BG BG FG }
|
||||
mov es:[di+36], cx
|
||||
mov es:[di+38], bx
|
||||
{ Entry 10 (1010): FG BG FG BG }
|
||||
mov es:[di+40], cx
|
||||
mov es:[di+42], cx
|
||||
{ Entry 11 (1011): FG BG FG FG }
|
||||
mov es:[di+44], cx
|
||||
mov es:[di+46], dx
|
||||
{ Entry 12 (1100): FG FG BG BG }
|
||||
mov es:[di+48], dx
|
||||
mov es:[di+50], ax
|
||||
{ Entry 13 (1101): FG FG BG FG }
|
||||
mov es:[di+52], dx
|
||||
mov es:[di+54], bx
|
||||
{ Entry 14 (1110): FG FG FG BG }
|
||||
mov es:[di+56], dx
|
||||
mov es:[di+58], cx
|
||||
{ Entry 15 (1111): FG FG FG FG }
|
||||
mov es:[di+60], dx
|
||||
mov es:[di+62], dx
|
||||
pop es
|
||||
pop bx
|
||||
pop di
|
||||
end;
|
||||
FNibbleFG := FGIdx;
|
||||
FNibbleBG := BGIdx;
|
||||
end;
|
||||
|
||||
asm
|
||||
push PixOfs
|
||||
push GlyphOfs
|
||||
{ Compute offsets -- all 16-bit, no Longint }
|
||||
GlyphOfs := 64 + Word(CharCode) shl 5;
|
||||
PixOfs := Word(CellH - 1) * Stride + Word(Col) * 8;
|
||||
|
||||
push bp
|
||||
mov bp, sp
|
||||
asm
|
||||
{ Push only per-cell values. Constants already on stack above. }
|
||||
push PixOfs
|
||||
push GlyphOfs
|
||||
|
||||
push ds
|
||||
push bx
|
||||
push si
|
||||
push di
|
||||
push bp
|
||||
mov bp, sp
|
||||
{ Mini-frame layout (same offsets as before): }
|
||||
{ [bp] = saved original BP }
|
||||
{ [bp+2] = GlyphOfs (pushed this cell) }
|
||||
{ [bp+4] = PixOfs (pushed this cell) }
|
||||
{ [bp+6] = GlyphSeg (pushed once before loop) }
|
||||
{ [bp+8] = PixSeg (pushed once before loop) }
|
||||
{ [bp+10] = CellH (pushed once before loop) }
|
||||
{ [bp+12] = Stride (pushed once before loop) }
|
||||
|
||||
mov si, [bp+2]
|
||||
mov es, [bp+8]
|
||||
mov di, [bp+4]
|
||||
mov cx, [bp+10]
|
||||
xor bh, bh
|
||||
mov ds, [bp+6]
|
||||
push ds
|
||||
push bx
|
||||
push si
|
||||
push di
|
||||
|
||||
@curloop:
|
||||
mov al, [si]
|
||||
inc si
|
||||
mov ah, al
|
||||
mov si, [bp+2]
|
||||
mov es, [bp+8]
|
||||
mov di, [bp+4]
|
||||
mov cx, [bp+10]
|
||||
xor bh, bh
|
||||
mov ds, [bp+6]
|
||||
|
||||
and al, $F0
|
||||
shr al, 1
|
||||
shr al, 1
|
||||
mov bl, al
|
||||
mov dx, [bx]
|
||||
mov es:[di], dx
|
||||
mov dx, [bx+2]
|
||||
mov es:[di+2], dx
|
||||
@rowloop:
|
||||
mov al, [si] { load glyph byte from DS:SI }
|
||||
inc si
|
||||
mov ah, al { save copy }
|
||||
|
||||
mov al, ah
|
||||
and al, $0F
|
||||
shl al, 1
|
||||
shl al, 1
|
||||
mov bl, al
|
||||
mov dx, [bx]
|
||||
mov es:[di+4], dx
|
||||
mov dx, [bx+2]
|
||||
mov es:[di+6], dx
|
||||
{ High nibble -> 4 pixels }
|
||||
and al, $F0
|
||||
shr al, 1
|
||||
shr al, 1 { AL = high_nibble * 4 }
|
||||
mov bl, al
|
||||
mov dx, [bx] { 2 table bytes (DS:BX, table at offset 0) }
|
||||
mov es:[di], dx
|
||||
mov dx, [bx+2] { 2 more table bytes }
|
||||
mov es:[di+2], dx
|
||||
|
||||
sub di, [bp+12]
|
||||
dec cx
|
||||
jnz @curloop
|
||||
{ Low nibble -> 4 pixels }
|
||||
mov al, ah
|
||||
and al, $0F
|
||||
shl al, 1
|
||||
shl al, 1 { AL = low_nibble * 4 }
|
||||
mov bl, al
|
||||
mov dx, [bx]
|
||||
mov es:[di+4], dx
|
||||
mov dx, [bx+2]
|
||||
mov es:[di+6], dx
|
||||
|
||||
pop di
|
||||
pop si
|
||||
pop bx
|
||||
pop ds
|
||||
pop bp
|
||||
add sp, 4
|
||||
sub di, [bp+12] { Stride via SS:[BP+12] -- safe after DS change }
|
||||
dec cx
|
||||
jnz @rowloop
|
||||
|
||||
pop di
|
||||
pop si
|
||||
pop bx
|
||||
pop ds
|
||||
pop bp
|
||||
add sp, 4 { remove per-cell GlyphOfs + PixOfs only }
|
||||
end;
|
||||
end;
|
||||
end;
|
||||
|
||||
|
|
|
|||
|
|
@ -1256,15 +1256,41 @@ int16_t FAR PASCAL _export reccom(int16_t commId, void FAR *buf, int16_t len)
|
|||
dst = (uint8_t FAR *)buf;
|
||||
bytesRead = 0;
|
||||
|
||||
// Block copy from ring buffer, splitting at wrap point.
|
||||
// Two _fmemcpy calls replace per-byte loop with far pointer overhead.
|
||||
_disable();
|
||||
while (bytesRead < len && port->rxCount > 0) {
|
||||
*dst++ = port->rxBuf[port->rxTail];
|
||||
port->rxTail++;
|
||||
if (port->rxTail >= port->rxSize) {
|
||||
port->rxTail = 0;
|
||||
{
|
||||
uint16_t avail;
|
||||
uint16_t chunk;
|
||||
|
||||
avail = port->rxCount;
|
||||
if (avail > (uint16_t)len) {
|
||||
avail = (uint16_t)len;
|
||||
}
|
||||
|
||||
if (avail > 0) {
|
||||
// First chunk: tail to end of buffer (or avail, whichever is smaller)
|
||||
chunk = port->rxSize - port->rxTail;
|
||||
if (chunk > avail) {
|
||||
chunk = avail;
|
||||
}
|
||||
_fmemcpy(dst, port->rxBuf + port->rxTail, chunk);
|
||||
dst += chunk;
|
||||
bytesRead = chunk;
|
||||
|
||||
// Second chunk: wrap around to start of buffer
|
||||
if (bytesRead < avail) {
|
||||
chunk = avail - bytesRead;
|
||||
_fmemcpy(dst, port->rxBuf, chunk);
|
||||
bytesRead += chunk;
|
||||
}
|
||||
|
||||
port->rxTail += bytesRead;
|
||||
if (port->rxTail >= port->rxSize) {
|
||||
port->rxTail -= port->rxSize;
|
||||
}
|
||||
port->rxCount -= bytesRead;
|
||||
}
|
||||
port->rxCount--;
|
||||
bytesRead++;
|
||||
}
|
||||
_enable();
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue