From c378abc9e556fa59f7e2d0d434abe709ad365ef3 Mon Sep 17 00:00:00 2001 From: Scott Duensing Date: Mon, 2 Mar 2026 18:20:51 -0600 Subject: [PATCH] Inline CSI parsing, ASM nibble table, cursor dedup, reccom block copy Four performance optimizations targeting the hottest paths: - Parse CSI params (P1/P2) as integers during scan-ahead loop, eliminating ParseParamBuf call from ExecuteCSI (~200 cycles/seq) - Replace 16-iteration Pascal nibble table rebuild (64 branch+store) with 32 straight-line MOV word using precomputed BGBG/BGFG/FGBG/FGFG - Integrate cursor FG/BG swap into main RenderRow column loop, removing duplicate nibble rebuild + ASM glyph expansion overlay pass - Replace byte-at-a-time reccom loop with _fmemcpy block copy split at ring buffer wrap point, reducing far pointer overhead from O(n) to O(1) Also includes previously uncommitted space fast-path in RenderRow and inlined escape sequence handling in ParseDataBuf. Co-Authored-By: Claude Opus 4.6 --- delphi/KPANSI.PAS | 660 +++++++++++++++++++++++++++------------------- drv/commdrv.c | 40 ++- 2 files changed, 429 insertions(+), 271 deletions(-) diff --git a/delphi/KPANSI.PAS b/delphi/KPANSI.PAS index 7f4cde1..17e7c68 100644 --- a/delphi/KPANSI.PAS +++ b/delphi/KPANSI.PAS @@ -66,6 +66,9 @@ type FParseState: TParseState; { Current parser state machine position } FParamBuf: array[0..31] of Char; { CSI parameter digits/semicolons } FParamLen: Integer; { Current length of FParamBuf } + FCSIParam1: Integer; { First CSI param, parsed inline during scan } + FCSIParam2: Integer; { Second CSI param, parsed inline during scan } + FCSIParamIdx: Integer; { Which param we're accumulating (0=P1, 1=P2) } FMusicStr: string; { Accumulated ANSI music string (ESC[M..^N) } { Font metrics (measured from OEM charset paint font) } @@ -484,6 +487,9 @@ begin FAttrReverse := False; FParseState := psNormal; FParamLen := 0; + FCSIParam1 := 0; + FCSIParam2 := 0; + FCSIParamIdx := 0; FMusicStr := ''; FCellWidth := 8; FCellHeight := 16; @@ -892,22 +898,15 @@ end; procedure TKPAnsi.ExecuteCSI(FinalCh: Char); +{ Uses FCSIParam1/FCSIParam2 parsed inline during CSI scan-ahead. } +{ No ParseParamBuf call needed -- saves ~200 cycles per CSI sequence. } +{ ParseSGR still uses FParamBuf for variable-count parameters. } var - Params: array[0..15] of Integer; - Count: Integer; - P1: Integer; - P2: Integer; + P1: Integer; + P2: Integer; begin - ParseParamBuf(@FParamBuf[0], FParamLen, Params, Count); - - if Count > 0 then - P1 := Params[0] - else - P1 := 0; - if Count > 1 then - P2 := Params[1] - else - P2 := 0; + P1 := FCSIParam1; + P2 := FCSIParam2; case FinalCh of 'A': { CUU - Cursor Up } @@ -1565,14 +1564,19 @@ end; procedure TKPAnsi.ParseDataBuf(Buf: PChar; Len: Integer); { Process incoming data from a PChar buffer (no string allocation needed). } -{ Fast path batches runs of printable characters: colors are computed once } -{ per run, and cells are filled in a tight loop without per-character state } -{ checks. Run length is bounded by end of input, end of current row, or } -{ next non-printable character -- whichever comes first. } +{ } +{ Three inlined fast paths eliminate ProcessChar method call overhead: } +{ 1. Printable text runs: batch fill cells, one color computation per run } +{ 2. CSI parameter accumulation: scan-ahead loop for digits/semicolons } +{ 3. Common control chars: ESC, CR, LF handled inline } +{ } +{ Uncommon states (psCSIQuestion, psMusic) and rare control chars (TAB, } +{ BS, BEL, ENQ) still delegate to ProcessChar. } { } { Does NOT call FlipToScreen -- the caller handles rendering. } var I: Integer; + Ch: Char; Line: PTermLine; FGIdx: Byte; BGIdx: Byte; @@ -1584,79 +1588,182 @@ begin while I < Len do begin - { Fast path: printable character in normal state } - if (FParseState = psNormal) and (Buf[I] >= ' ') then - begin - { Handle wrap at right margin } - if FCursorCol >= FCols then - begin - if FWrapMode then + case FParseState of + psNormal: begin - FCursorCol := 0; - Inc(FCursorRow); - if FCursorRow >= FRows then + if Buf[I] >= ' ' then begin - FCursorRow := FRows - 1; - DoScrollUp; + { Fast path: batch printable characters } + if FCursorCol >= FCols then + begin + if FWrapMode then + begin + FCursorCol := 0; + Inc(FCursorRow); + if FCursorRow >= FRows then + begin + FCursorRow := FRows - 1; + DoScrollUp; + end; + Line := nil; + end + else + FCursorCol := FCols - 1; + end; + + if Line = nil then + Line := FScreen[FCursorRow]; + + { Compute colors once for the entire run } + if FAttrBold then + FGIdx := FAttrFG + 8 + else + FGIdx := FAttrFG; + BGIdx := FAttrBG; + + { Find run end: stop at control char, end of input, or end of row } + Remaining := FCols - FCursorCol; + RunEnd := I; + while (RunEnd < Len) and (Buf[RunEnd] >= ' ') and + (RunEnd - I < Remaining) do + Inc(RunEnd); + + { Fill cells in tight loop } + if FAttrReverse then + begin + while I < RunEnd do + begin + Line^.Cells[FCursorCol].Ch := Buf[I]; + Line^.Cells[FCursorCol].FG := BGIdx; + Line^.Cells[FCursorCol].BG := FGIdx; + Line^.Cells[FCursorCol].Bold := FAttrBold; + Line^.Cells[FCursorCol].Blink := FAttrBlink; + Inc(FCursorCol); + Inc(I); + end; + end + else + begin + while I < RunEnd do + begin + Line^.Cells[FCursorCol].Ch := Buf[I]; + Line^.Cells[FCursorCol].FG := FGIdx; + Line^.Cells[FCursorCol].BG := BGIdx; + Line^.Cells[FCursorCol].Bold := FAttrBold; + Line^.Cells[FCursorCol].Blink := FAttrBlink; + Inc(FCursorCol); + Inc(I); + end; + end; + FDirtyRow[FCursorRow] := True; + end + else if Buf[I] = #27 then + begin + { ESC: start escape sequence } + FParseState := psEscape; + Line := nil; + Inc(I); + end + else if Buf[I] = #10 then + begin + { LF: line feed } + Inc(FCursorRow); + if FCursorRow >= FRows then + begin + FCursorRow := FRows - 1; + DoScrollUp; + end; + Line := nil; + Inc(I); + end + else if Buf[I] = #13 then + begin + { CR: carriage return } + FCursorCol := 0; + Inc(I); + end + else + begin + { Uncommon control chars: BS, TAB, BEL, ENQ } + Line := nil; + ProcessChar(Buf[I]); + Inc(I); end; + end; + + psEscape: + begin + if Buf[I] = '[' then + begin + FParamLen := 0; + FCSIParam1 := 0; + FCSIParam2 := 0; + FCSIParamIdx := 0; + FParseState := psCSI; + end + else + FParseState := psNormal; + Inc(I); + end; + + psCSI: + begin + { Scan ahead: parse integers inline while accumulating FParamBuf. } + { FCSIParam1/FCSIParam2 are built digit-by-digit during the scan } + { so ExecuteCSI can use them directly without ParseParamBuf. } + { FParamBuf is still maintained for ParseSGR (variable param count). } + while (I < Len) and + ((Buf[I] >= '0') and (Buf[I] <= '9') or (Buf[I] = ';')) do + begin + if Buf[I] = ';' then + begin + Inc(FCSIParamIdx); + end + else if FCSIParamIdx = 0 then + begin + FCSIParam1 := FCSIParam1 * 10 + (Ord(Buf[I]) - 48); + end + else if FCSIParamIdx = 1 then + begin + FCSIParam2 := FCSIParam2 * 10 + (Ord(Buf[I]) - 48); + end; + if FParamLen < 32 then + begin + FParamBuf[FParamLen] := Buf[I]; + Inc(FParamLen); + end; + Inc(I); + end; + + { Process final command byte if available } + if I < Len then + begin + Ch := Buf[I]; + if Ch = '?' then + begin + FParseState := psCSIQuestion; + end + else if (Ch = 'M') and (FParamLen = 0) then + begin + FMusicStr := ''; + FParseState := psMusic; + end + else + begin + ExecuteCSI(Ch); + FParseState := psNormal; + end; + Inc(I); + end; + end; + + else + begin + { psCSIQuestion, psMusic: delegate to ProcessChar } Line := nil; - end - else - FCursorCol := FCols - 1; - end; - - if Line = nil then - Line := FScreen[FCursorRow]; - - { Compute colors once for the entire run } - if FAttrBold then - FGIdx := FAttrFG + 8 - else - FGIdx := FAttrFG; - BGIdx := FAttrBG; - - { Find run end: stop at control char, end of input, or end of row } - Remaining := FCols - FCursorCol; - RunEnd := I; - while (RunEnd < Len) and (Buf[RunEnd] >= ' ') and - (RunEnd - I < Remaining) do - Inc(RunEnd); - - { Fill cells in tight loop -- no per-character state/wrap checks } - if FAttrReverse then - begin - while I < RunEnd do - begin - Line^.Cells[FCursorCol].Ch := Buf[I]; - Line^.Cells[FCursorCol].FG := BGIdx; - Line^.Cells[FCursorCol].BG := FGIdx; - Line^.Cells[FCursorCol].Bold := FAttrBold; - Line^.Cells[FCursorCol].Blink := FAttrBlink; - Inc(FCursorCol); + ProcessChar(Buf[I]); Inc(I); end; - end - else - begin - while I < RunEnd do - begin - Line^.Cells[FCursorCol].Ch := Buf[I]; - Line^.Cells[FCursorCol].FG := FGIdx; - Line^.Cells[FCursorCol].BG := BGIdx; - Line^.Cells[FCursorCol].Bold := FAttrBold; - Line^.Cells[FCursorCol].Blink := FAttrBlink; - Inc(FCursorCol); - Inc(I); - end; - end; - FDirtyRow[FCursorRow] := True; - end - else - begin - { Slow path: control chars, escape sequences } - Line := nil; - ProcessChar(Buf[I]); - Inc(I); end; end; @@ -1828,8 +1935,11 @@ begin case Ch of '[': begin - FParamLen := 0; - FParseState := psCSI; + FParamLen := 0; + FCSIParam1 := 0; + FCSIParam2 := 0; + FCSIParamIdx := 0; + FParseState := psCSI; end; else begin @@ -1844,6 +1954,12 @@ begin case Ch of '0'..'9', ';': begin + if Ch = ';' then + Inc(FCSIParamIdx) + else if FCSIParamIdx = 0 then + FCSIParam1 := FCSIParam1 * 10 + (Ord(Ch) - 48) + else if FCSIParamIdx = 1 then + FCSIParam2 := FCSIParam2 * 10 + (Ord(Ch) - 48); if FParamLen < 32 then begin FParamBuf[FParamLen] := Ch; @@ -1994,22 +2110,24 @@ procedure TKPAnsi.RenderRow(Row: Integer); { BEFORE any register clobber, then accessed via BP-relative offsets. } { BP-relative addressing defaults to SS segment, safe after DS change. } var - Line: PTermLine; - Col: Integer; - FGIdx: Byte; - BGIdx: Byte; - CharCode: Integer; - SbkCount: Integer; - VisRow: Integer; - TabPtr: PPixelBuf; - I: Integer; - Ofs: Integer; - GlyphSeg: Word; - PixSeg: Word; - GlyphOfs: Word; - PixOfs: Word; - Stride: Word; - CellH: Word; + Line: PTermLine; + Col: Integer; + CurCol: Integer; { Cursor column on this row, or -1 if no cursor } + FGIdx: Byte; + BGIdx: Byte; + TmpIdx: Byte; + CharCode: Integer; + SbkCount: Integer; + VisRow: Integer; + TabPtr: PPixelBuf; + I: Integer; + Ofs: Integer; + GlyphSeg: Word; + PixSeg: Word; + GlyphOfs: Word; + PixOfs: Word; + Stride: Word; + CellH: Word; begin if FRowBuf = nil then Exit; @@ -2052,6 +2170,15 @@ begin Exit; end; + { Determine cursor column for this row (-1 if cursor not on this row). } + { The cursor swap is integrated into the main column loop, eliminating } + { the separate cursor overlay pass (saves nibble rebuild + ASM per cell). } + if FCursorVisible and FBlinkOn and (FScrollPos = 0) and + (Row = FCursorRow) and (FCursorCol >= 0) and (FCursorCol < FCols) then + CurCol := FCursorCol + else + CurCol := -1; + { Force nibble table rebuild on first cell } FNibbleFG := 255; FNibbleBG := 255; @@ -2080,180 +2207,185 @@ begin BGIdx := Line^.Cells[Col].BG; CharCode := Ord(Line^.Cells[Col].Ch); - { Rebuild nibble table on color change: 16 entries x 4 bytes } - if (FGIdx <> FNibbleFG) or (BGIdx <> FNibbleBG) then + { Cursor: swap FG/BG inline -- no separate overlay pass needed } + if Col = CurCol then begin - TabPtr := PPixelBuf(FGlyphBuf); - for I := 0 to 15 do - begin - Ofs := I * 4; - if (I and 8) <> 0 then TabPtr^[Ofs] := FGIdx - else TabPtr^[Ofs] := BGIdx; - if (I and 4) <> 0 then TabPtr^[Ofs + 1] := FGIdx - else TabPtr^[Ofs + 1] := BGIdx; - if (I and 2) <> 0 then TabPtr^[Ofs + 2] := FGIdx - else TabPtr^[Ofs + 2] := BGIdx; - if (I and 1) <> 0 then TabPtr^[Ofs + 3] := FGIdx - else TabPtr^[Ofs + 3] := BGIdx; + TmpIdx := FGIdx; + FGIdx := BGIdx; + BGIdx := TmpIdx; + end; + + if CharCode = 32 then + begin + { Space fast path: solid background fill, no glyph expansion. } + { Skips nibble table rebuild and ASM glyph loop entirely. } + { 4 word stores per scanline vs full nibble lookup + expansion. } + PixOfs := Word(CellH - 1) * Stride + Word(Col) * 8; + asm + push di + mov es, PixSeg + mov di, PixOfs + mov al, BGIdx + mov ah, al { AX = BGIdx:BGIdx } + mov cx, CellH + @spfill: + mov es:[di], ax + mov es:[di+2], ax + mov es:[di+4], ax + mov es:[di+6], ax + sub di, Stride + dec cx + jnz @spfill + pop di end; - FNibbleFG := FGIdx; - FNibbleBG := BGIdx; - end; - - { Compute offsets -- all 16-bit, no Longint } - GlyphOfs := 64 + Word(CharCode) shl 5; - PixOfs := Word(CellH - 1) * Stride + Word(Col) * 8; - - asm - { Push only per-cell values. Constants already on stack above. } - push PixOfs - push GlyphOfs - - push bp - mov bp, sp - { Mini-frame layout (same offsets as before): } - { [bp] = saved original BP } - { [bp+2] = GlyphOfs (pushed this cell) } - { [bp+4] = PixOfs (pushed this cell) } - { [bp+6] = GlyphSeg (pushed once before loop) } - { [bp+8] = PixSeg (pushed once before loop) } - { [bp+10] = CellH (pushed once before loop) } - { [bp+12] = Stride (pushed once before loop) } - - push ds - push bx - push si - push di - - mov si, [bp+2] - mov es, [bp+8] - mov di, [bp+4] - mov cx, [bp+10] - xor bh, bh - mov ds, [bp+6] - - @rowloop: - mov al, [si] { load glyph byte from DS:SI } - inc si - mov ah, al { save copy } - - { High nibble -> 4 pixels } - and al, $F0 - shr al, 1 - shr al, 1 { AL = high_nibble * 4 } - mov bl, al - mov dx, [bx] { 2 table bytes (DS:BX, table at offset 0) } - mov es:[di], dx - mov dx, [bx+2] { 2 more table bytes } - mov es:[di+2], dx - - { Low nibble -> 4 pixels } - mov al, ah - and al, $0F - shl al, 1 - shl al, 1 { AL = low_nibble * 4 } - mov bl, al - mov dx, [bx] - mov es:[di+4], dx - mov dx, [bx+2] - mov es:[di+6], dx - - sub di, [bp+12] { Stride via SS:[BP+12] -- safe after DS change } - dec cx - jnz @rowloop - - pop di - pop si - pop bx - pop ds - pop bp - add sp, 4 { remove per-cell GlyphOfs + PixOfs only } - end; - end; - - { Cursor overlay: if cursor is on this row and visible, re-render the } - { cursor cell with swapped FG/BG using the same ASM inner loop. } - { Constants are still on the stack from above -- reused here. } - if FCursorVisible and FBlinkOn and (FScrollPos = 0) and - (Row = FCursorRow) and (FCursorCol >= 0) and (FCursorCol < FCols) then - begin - FGIdx := Line^.Cells[FCursorCol].BG; - BGIdx := Line^.Cells[FCursorCol].FG; - CharCode := Ord(Line^.Cells[FCursorCol].Ch); - - { Rebuild nibble table for cursor colors } - TabPtr := PPixelBuf(FGlyphBuf); - for I := 0 to 15 do + end + else begin - Ofs := I * 4; - if (I and 8) <> 0 then TabPtr^[Ofs] := FGIdx - else TabPtr^[Ofs] := BGIdx; - if (I and 4) <> 0 then TabPtr^[Ofs + 1] := FGIdx - else TabPtr^[Ofs + 1] := BGIdx; - if (I and 2) <> 0 then TabPtr^[Ofs + 2] := FGIdx - else TabPtr^[Ofs + 2] := BGIdx; - if (I and 1) <> 0 then TabPtr^[Ofs + 3] := FGIdx - else TabPtr^[Ofs + 3] := BGIdx; - end; - FNibbleFG := FGIdx; - FNibbleBG := BGIdx; + { Rebuild nibble table on color change: 16 entries x 4 bytes. } + { Pre-compute 4 word values (BGBG, BGFG, FGBG, FGFG) in AX/BX/CX/DX } + { and write all 32 words directly. Replaces 64 branch+store Pascal } + { operations with 32 straight-line MOV instructions. } + if (FGIdx <> FNibbleFG) or (BGIdx <> FNibbleBG) then + begin + asm + push di + push bx + push es + les di, FGlyphBuf + mov al, BGIdx + mov ah, al { AX = BG:BG } + mov dl, FGIdx + mov dh, dl { DX = FG:FG } + mov bl, al + mov bh, dl { BX = BG:FG (lo=BG, hi=FG) } + mov cl, dl + mov ch, al { CX = FG:BG (lo=FG, hi=BG) } - GlyphOfs := 64 + Word(CharCode) shl 5; - PixOfs := Word(CellH - 1) * Stride + Word(FCursorCol) * 8; + { Entry 0 (0000): BG BG BG BG } + mov es:[di+ 0], ax + mov es:[di+ 2], ax + { Entry 1 (0001): BG BG BG FG } + mov es:[di+ 4], ax + mov es:[di+ 6], bx + { Entry 2 (0010): BG BG FG BG } + mov es:[di+ 8], ax + mov es:[di+10], cx + { Entry 3 (0011): BG BG FG FG } + mov es:[di+12], ax + mov es:[di+14], dx + { Entry 4 (0100): BG FG BG BG } + mov es:[di+16], bx + mov es:[di+18], ax + { Entry 5 (0101): BG FG BG FG } + mov es:[di+20], bx + mov es:[di+22], bx + { Entry 6 (0110): BG FG FG BG } + mov es:[di+24], bx + mov es:[di+26], cx + { Entry 7 (0111): BG FG FG FG } + mov es:[di+28], bx + mov es:[di+30], dx + { Entry 8 (1000): FG BG BG BG } + mov es:[di+32], cx + mov es:[di+34], ax + { Entry 9 (1001): FG BG BG FG } + mov es:[di+36], cx + mov es:[di+38], bx + { Entry 10 (1010): FG BG FG BG } + mov es:[di+40], cx + mov es:[di+42], cx + { Entry 11 (1011): FG BG FG FG } + mov es:[di+44], cx + mov es:[di+46], dx + { Entry 12 (1100): FG FG BG BG } + mov es:[di+48], dx + mov es:[di+50], ax + { Entry 13 (1101): FG FG BG FG } + mov es:[di+52], dx + mov es:[di+54], bx + { Entry 14 (1110): FG FG FG BG } + mov es:[di+56], dx + mov es:[di+58], cx + { Entry 15 (1111): FG FG FG FG } + mov es:[di+60], dx + mov es:[di+62], dx + pop es + pop bx + pop di + end; + FNibbleFG := FGIdx; + FNibbleBG := BGIdx; + end; - asm - push PixOfs - push GlyphOfs + { Compute offsets -- all 16-bit, no Longint } + GlyphOfs := 64 + Word(CharCode) shl 5; + PixOfs := Word(CellH - 1) * Stride + Word(Col) * 8; - push bp - mov bp, sp + asm + { Push only per-cell values. Constants already on stack above. } + push PixOfs + push GlyphOfs - push ds - push bx - push si - push di + push bp + mov bp, sp + { Mini-frame layout (same offsets as before): } + { [bp] = saved original BP } + { [bp+2] = GlyphOfs (pushed this cell) } + { [bp+4] = PixOfs (pushed this cell) } + { [bp+6] = GlyphSeg (pushed once before loop) } + { [bp+8] = PixSeg (pushed once before loop) } + { [bp+10] = CellH (pushed once before loop) } + { [bp+12] = Stride (pushed once before loop) } - mov si, [bp+2] - mov es, [bp+8] - mov di, [bp+4] - mov cx, [bp+10] - xor bh, bh - mov ds, [bp+6] + push ds + push bx + push si + push di - @curloop: - mov al, [si] - inc si - mov ah, al + mov si, [bp+2] + mov es, [bp+8] + mov di, [bp+4] + mov cx, [bp+10] + xor bh, bh + mov ds, [bp+6] - and al, $F0 - shr al, 1 - shr al, 1 - mov bl, al - mov dx, [bx] - mov es:[di], dx - mov dx, [bx+2] - mov es:[di+2], dx + @rowloop: + mov al, [si] { load glyph byte from DS:SI } + inc si + mov ah, al { save copy } - mov al, ah - and al, $0F - shl al, 1 - shl al, 1 - mov bl, al - mov dx, [bx] - mov es:[di+4], dx - mov dx, [bx+2] - mov es:[di+6], dx + { High nibble -> 4 pixels } + and al, $F0 + shr al, 1 + shr al, 1 { AL = high_nibble * 4 } + mov bl, al + mov dx, [bx] { 2 table bytes (DS:BX, table at offset 0) } + mov es:[di], dx + mov dx, [bx+2] { 2 more table bytes } + mov es:[di+2], dx - sub di, [bp+12] - dec cx - jnz @curloop + { Low nibble -> 4 pixels } + mov al, ah + and al, $0F + shl al, 1 + shl al, 1 { AL = low_nibble * 4 } + mov bl, al + mov dx, [bx] + mov es:[di+4], dx + mov dx, [bx+2] + mov es:[di+6], dx - pop di - pop si - pop bx - pop ds - pop bp - add sp, 4 + sub di, [bp+12] { Stride via SS:[BP+12] -- safe after DS change } + dec cx + jnz @rowloop + + pop di + pop si + pop bx + pop ds + pop bp + add sp, 4 { remove per-cell GlyphOfs + PixOfs only } + end; end; end; diff --git a/drv/commdrv.c b/drv/commdrv.c index fca2538..1e0901a 100644 --- a/drv/commdrv.c +++ b/drv/commdrv.c @@ -1256,15 +1256,41 @@ int16_t FAR PASCAL _export reccom(int16_t commId, void FAR *buf, int16_t len) dst = (uint8_t FAR *)buf; bytesRead = 0; + // Block copy from ring buffer, splitting at wrap point. + // Two _fmemcpy calls replace per-byte loop with far pointer overhead. _disable(); - while (bytesRead < len && port->rxCount > 0) { - *dst++ = port->rxBuf[port->rxTail]; - port->rxTail++; - if (port->rxTail >= port->rxSize) { - port->rxTail = 0; + { + uint16_t avail; + uint16_t chunk; + + avail = port->rxCount; + if (avail > (uint16_t)len) { + avail = (uint16_t)len; + } + + if (avail > 0) { + // First chunk: tail to end of buffer (or avail, whichever is smaller) + chunk = port->rxSize - port->rxTail; + if (chunk > avail) { + chunk = avail; + } + _fmemcpy(dst, port->rxBuf + port->rxTail, chunk); + dst += chunk; + bytesRead = chunk; + + // Second chunk: wrap around to start of buffer + if (bytesRead < avail) { + chunk = avail - bytesRead; + _fmemcpy(dst, port->rxBuf, chunk); + bytesRead += chunk; + } + + port->rxTail += bytesRead; + if (port->rxTail >= port->rxSize) { + port->rxTail -= port->rxSize; + } + port->rxCount -= bytesRead; } - port->rxCount--; - bytesRead++; } _enable();