PEI slam and dirty tracking!
This commit is contained in:
parent
af366e7e81
commit
065be89bff
3 changed files with 654 additions and 204 deletions
|
|
@ -188,13 +188,13 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
|||
if (y < 0 || y >= SURFACE_HEIGHT || x < 0 || x >= SURFACE_WIDTH) {
|
||||
continue;
|
||||
}
|
||||
row = &s->pixels[y * SURFACE_BYTES_PER_ROW];
|
||||
|
||||
// Highest-tier asm fast path: seed-test + walk-left + walk-right
|
||||
// + 1-row fill + scan-above + scan-below + push, all in one
|
||||
// cross-segment call. The asm caches row addr / match decoder
|
||||
// across every sub-operation. C just pops and dispatches; this
|
||||
// path completes the entire per-seed work.
|
||||
// path completes the entire per-seed work and computes the row
|
||||
// address itself, so we don't pay y*160 in C unless we fall back.
|
||||
{
|
||||
bool seedMatched;
|
||||
if (halFastFloodWalkAndScans(s->pixels, x, y,
|
||||
|
|
@ -206,6 +206,10 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
|||
}
|
||||
}
|
||||
|
||||
// Fallback path needs row; compute it here so the asm path
|
||||
// above doesn't pay for an unused y*160 multiply on every iter.
|
||||
row = &s->pixels[y * SURFACE_BYTES_PER_ROW];
|
||||
|
||||
// Tier-2 asm fast path: combined seed test + walk-left +
|
||||
// walk-right in one cross-segment call. Falls back to the
|
||||
// pure-C walks below on ports without an asm implementation.
|
||||
|
|
|
|||
|
|
@ -164,13 +164,13 @@ static uint8_t gCachedScb [SURFACE_HEIGHT];
|
|||
static uint16_t gCachedPalette[SURFACE_PALETTE_COUNT][SURFACE_COLORS_PER_PALETTE];
|
||||
static bool gCacheValid = false;
|
||||
|
||||
// PEI slam scratch shared with src/port/iigs/peislam.asm. File-scope
|
||||
// non-static so the asm can `ext` them; all accesses inside the slam
|
||||
// use long-mode addressing so they bypass the //e RAMRD redirect the
|
||||
// slam turns on for the duration of the run.
|
||||
// PEI slam scratch. File-scope non-static so the asm can `ext` them;
|
||||
// all accesses inside the slam use long-mode `>` addressing so they
|
||||
// bypass the //e RAMRD redirect the slam turns on for its duration.
|
||||
volatile uint16_t gPeiOrigSp;
|
||||
volatile uint8_t gPeiOrigShadow;
|
||||
volatile uint16_t gPeiTempRowBase;
|
||||
volatile uint16_t gPeiCurRow; // row counter saved across slam (stack is hijacked)
|
||||
|
||||
// Defined in src/port/iigs/peislam.asm, in its own load segment
|
||||
// (DRAWPRIMS) so the GS/OS loader places it in a different bank from
|
||||
|
|
|
|||
|
|
@ -1362,82 +1362,243 @@ dcLoopBody anop
|
|||
lda >gRowOffsetLut,x ; A = y*160
|
||||
sta >dcRowXN
|
||||
|
||||
* 8 octant plots. dcPlotPx wants A=col, X=rowBase. LDX has no long-
|
||||
* absolute mode, so for each plot we stash col, load row via LDA/TAX,
|
||||
* then reload col into A.
|
||||
* Octants 1-4 use the y-row pair (cx +/- x, cy +/- y).
|
||||
* 8 octant plots, fully inlined. Each plot:
|
||||
* 1. col = (acx +/- dcX|dcY) -> A
|
||||
* 2. save col -> dcSavedCol (for parity test)
|
||||
* 3. byteIdx = col >> 1; byte addr = byteIdx + rowBase -> Y
|
||||
* 4. test col & 1; do high or low nibble RMW
|
||||
* Skips the JSR/RTS to dcPlotPx (~12 cyc) and the load-row-via-X
|
||||
* dance (sta dcSavedCol + tax + reload). Per plot: ~60 cyc vs ~80
|
||||
* cyc with JSR. ~20 cyc/plot x ~2560 plots in the demo's 4 circles
|
||||
* = ~18 ms.
|
||||
*
|
||||
* Each plot has its own dcOddN / dcDoneN labels (ORCA-M needs unique).
|
||||
|
||||
* Octants 1-4: y-row pair (cx +/- x, cy +/- y).
|
||||
* Plot 1: (cx+x, cy+y)
|
||||
lda acx
|
||||
clc
|
||||
adc >dcX
|
||||
sta >dcSavedCol
|
||||
lda >dcRowYP
|
||||
tax
|
||||
lsr a
|
||||
clc
|
||||
adc >dcRowYP
|
||||
tay
|
||||
lda >dcSavedCol
|
||||
jsr dcPlotPx ; (cx+x, cy+y)
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
and #1
|
||||
bne dcOdd1
|
||||
lda [pix],y
|
||||
and #$0F
|
||||
ora >dcNibHi
|
||||
sta [pix],y
|
||||
bra dcDone1
|
||||
dcOdd1 anop
|
||||
lda [pix],y
|
||||
and #$F0
|
||||
ora >dcNibLo
|
||||
sta [pix],y
|
||||
dcDone1 anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
|
||||
* Plot 2: (cx-x, cy+y)
|
||||
lda acx
|
||||
sec
|
||||
sbc >dcX
|
||||
sta >dcSavedCol
|
||||
lda >dcRowYP
|
||||
tax
|
||||
lsr a
|
||||
clc
|
||||
adc >dcRowYP
|
||||
tay
|
||||
lda >dcSavedCol
|
||||
jsr dcPlotPx ; (cx-x, cy+y)
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
and #1
|
||||
bne dcOdd2
|
||||
lda [pix],y
|
||||
and #$0F
|
||||
ora >dcNibHi
|
||||
sta [pix],y
|
||||
bra dcDone2
|
||||
dcOdd2 anop
|
||||
lda [pix],y
|
||||
and #$F0
|
||||
ora >dcNibLo
|
||||
sta [pix],y
|
||||
dcDone2 anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
|
||||
* Plot 3: (cx+x, cy-y)
|
||||
lda acx
|
||||
clc
|
||||
adc >dcX
|
||||
sta >dcSavedCol
|
||||
lda >dcRowYN
|
||||
tax
|
||||
lsr a
|
||||
clc
|
||||
adc >dcRowYN
|
||||
tay
|
||||
lda >dcSavedCol
|
||||
jsr dcPlotPx ; (cx+x, cy-y)
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
and #1
|
||||
bne dcOdd3
|
||||
lda [pix],y
|
||||
and #$0F
|
||||
ora >dcNibHi
|
||||
sta [pix],y
|
||||
bra dcDone3
|
||||
dcOdd3 anop
|
||||
lda [pix],y
|
||||
and #$F0
|
||||
ora >dcNibLo
|
||||
sta [pix],y
|
||||
dcDone3 anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
|
||||
* Plot 4: (cx-x, cy-y)
|
||||
lda acx
|
||||
sec
|
||||
sbc >dcX
|
||||
sta >dcSavedCol
|
||||
lda >dcRowYN
|
||||
tax
|
||||
lsr a
|
||||
clc
|
||||
adc >dcRowYN
|
||||
tay
|
||||
lda >dcSavedCol
|
||||
jsr dcPlotPx ; (cx-x, cy-y)
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
and #1
|
||||
bne dcOdd4
|
||||
lda [pix],y
|
||||
and #$0F
|
||||
ora >dcNibHi
|
||||
sta [pix],y
|
||||
bra dcDone4
|
||||
dcOdd4 anop
|
||||
lda [pix],y
|
||||
and #$F0
|
||||
ora >dcNibLo
|
||||
sta [pix],y
|
||||
dcDone4 anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
|
||||
* Octants 5-8 use the x-row pair (cx +/- y, cy +/- x).
|
||||
* Octants 5-8: x-row pair (cx +/- y, cy +/- x).
|
||||
* Plot 5: (cx+y, cy+x)
|
||||
lda acx
|
||||
clc
|
||||
adc >dcY
|
||||
sta >dcSavedCol
|
||||
lda >dcRowXP
|
||||
tax
|
||||
lsr a
|
||||
clc
|
||||
adc >dcRowXP
|
||||
tay
|
||||
lda >dcSavedCol
|
||||
jsr dcPlotPx ; (cx+y, cy+x)
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
and #1
|
||||
bne dcOdd5
|
||||
lda [pix],y
|
||||
and #$0F
|
||||
ora >dcNibHi
|
||||
sta [pix],y
|
||||
bra dcDone5
|
||||
dcOdd5 anop
|
||||
lda [pix],y
|
||||
and #$F0
|
||||
ora >dcNibLo
|
||||
sta [pix],y
|
||||
dcDone5 anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
|
||||
* Plot 6: (cx-y, cy+x)
|
||||
lda acx
|
||||
sec
|
||||
sbc >dcY
|
||||
sta >dcSavedCol
|
||||
lda >dcRowXP
|
||||
tax
|
||||
lsr a
|
||||
clc
|
||||
adc >dcRowXP
|
||||
tay
|
||||
lda >dcSavedCol
|
||||
jsr dcPlotPx ; (cx-y, cy+x)
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
and #1
|
||||
bne dcOdd6
|
||||
lda [pix],y
|
||||
and #$0F
|
||||
ora >dcNibHi
|
||||
sta [pix],y
|
||||
bra dcDone6
|
||||
dcOdd6 anop
|
||||
lda [pix],y
|
||||
and #$F0
|
||||
ora >dcNibLo
|
||||
sta [pix],y
|
||||
dcDone6 anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
|
||||
* Plot 7: (cx+y, cy-x)
|
||||
lda acx
|
||||
clc
|
||||
adc >dcY
|
||||
sta >dcSavedCol
|
||||
lda >dcRowXN
|
||||
tax
|
||||
lsr a
|
||||
clc
|
||||
adc >dcRowXN
|
||||
tay
|
||||
lda >dcSavedCol
|
||||
jsr dcPlotPx ; (cx+y, cy-x)
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
and #1
|
||||
bne dcOdd7
|
||||
lda [pix],y
|
||||
and #$0F
|
||||
ora >dcNibHi
|
||||
sta [pix],y
|
||||
bra dcDone7
|
||||
dcOdd7 anop
|
||||
lda [pix],y
|
||||
and #$F0
|
||||
ora >dcNibLo
|
||||
sta [pix],y
|
||||
dcDone7 anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
|
||||
* Plot 8: (cx-y, cy-x)
|
||||
lda acx
|
||||
sec
|
||||
sbc >dcY
|
||||
sta >dcSavedCol
|
||||
lda >dcRowXN
|
||||
tax
|
||||
lsr a
|
||||
clc
|
||||
adc >dcRowXN
|
||||
tay
|
||||
lda >dcSavedCol
|
||||
jsr dcPlotPx ; (cx-y, cy-x)
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
and #1
|
||||
bne dcOdd8
|
||||
lda [pix],y
|
||||
and #$0F
|
||||
ora >dcNibHi
|
||||
sta [pix],y
|
||||
bra dcDone8
|
||||
dcOdd8 anop
|
||||
lda [pix],y
|
||||
and #$F0
|
||||
ora >dcNibLo
|
||||
sta [pix],y
|
||||
dcDone8 anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
|
||||
* Update Bresenham: y++; if err <= 0: err += 2y+1; else x--; err += 2(y-x)+1.
|
||||
lda >dcY
|
||||
|
|
@ -1483,44 +1644,9 @@ dcExit anop
|
|||
|
||||
* dcMul160 deleted -- callers now expand the y160lut macro inline.
|
||||
|
||||
****************************************************************
|
||||
* dcPlotPx: plot a pixel at column A, with row-base offset in X.
|
||||
* M=16, X=16 on entry. Trashes A, X, Y, P. D and B preserved.
|
||||
* Switches to M=8 for the byte RMW then back to M=16 for caller.
|
||||
****************************************************************
|
||||
dcPlotPx anop
|
||||
lsr a ; A = col>>1, C = col & 1
|
||||
bcs dcPlotOdd
|
||||
* Even column: high nibble.
|
||||
sta >dcMulTmp
|
||||
txa
|
||||
clc
|
||||
adc >dcMulTmp
|
||||
tay
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [pix],y
|
||||
and #$0F
|
||||
ora >dcNibHi
|
||||
sta [pix],y
|
||||
rep #$20
|
||||
LONGA ON
|
||||
rts
|
||||
dcPlotOdd anop
|
||||
sta >dcMulTmp
|
||||
txa
|
||||
clc
|
||||
adc >dcMulTmp
|
||||
tay
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [pix],y
|
||||
and #$F0
|
||||
ora >dcNibLo
|
||||
sta [pix],y
|
||||
rep #$20
|
||||
LONGA ON
|
||||
rts
|
||||
* dcPlotPx deleted -- the 8 octant plot sites now expand the plot
|
||||
* logic inline (no JSR/RTS, no STA dcSavedCol / TAX / reload-col
|
||||
* round-trip per plot).
|
||||
end
|
||||
|
||||
|
||||
|
|
@ -2224,39 +2350,205 @@ bpal equ 4
|
|||
adc #8
|
||||
tcd
|
||||
|
||||
* 1. Pixel blit (DBR ends up = $E1 after MVN).
|
||||
ldx #$2000
|
||||
ldy #$2000
|
||||
lda #31999
|
||||
mvn $010000,$E10000
|
||||
|
||||
* 2. SCB upload (200 bytes). DBR = $E1, so sta abs,Y -> $E1:abs+Y.
|
||||
ldy #0
|
||||
* 1. SCB upload (200 bytes) via MVN. Done BEFORE the PEI-slam so the
|
||||
* SEI window only spans the slam itself (~38 ms). Source bank is
|
||||
* runtime-patched into the MVN instruction (encoding: $54 dst src,
|
||||
* so byte +2 is src).
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
bscbLoop anop
|
||||
cpy #200
|
||||
beq bscbDone
|
||||
lda [bscb],y
|
||||
sta $9D00,y
|
||||
iny
|
||||
bra bscbLoop
|
||||
bscbDone anop
|
||||
lda bscb+2
|
||||
sta >mvnScbInst+2
|
||||
rep #$20
|
||||
LONGA ON
|
||||
lda bscb
|
||||
tax
|
||||
ldy #$9D00
|
||||
lda #199
|
||||
mvnScbInst mvn $000000,$E10000
|
||||
|
||||
* 2. Palette upload (512 bytes) via MVN. Same trick.
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda bpal+2
|
||||
sta >mvnPalInst+2
|
||||
rep #$20
|
||||
LONGA ON
|
||||
lda bpal
|
||||
tax
|
||||
ldy #$9E00
|
||||
lda #511
|
||||
mvnPalInst mvn $000000,$E10000
|
||||
|
||||
* 3. Pixel blit via PEI-slam, with per-row dirty skip.
|
||||
* PEI-slam: SP hijacked into the SHR shadow region of bank $01, AUXWRITE
|
||||
* + RAMRD remap bank-$00 stack pushes to bank $01, SHR shadow mirrors
|
||||
* bank-$01 writes to $E1. Result: PEI dp pushes from DP=$01:row_start
|
||||
* land at $E1:row_start (160 bytes / row at ~6 cyc per 2 bytes).
|
||||
* ~480 cyc/row vs MVN's ~1120 cyc/row -- 2.3x faster per row.
|
||||
* SEI for the duration: soft-switch state and stack hijack would
|
||||
* corrupt any IRQ handler that touches bank-0 globals. ~38 ms SEI
|
||||
* total for a full 200-row slam; chunk later if audio glitches.
|
||||
* Dirty skip: rows where gStageMinWord[y] > gStageMaxWord[y] are
|
||||
* clean and not slammed. Saves big on sparse-update demos; for
|
||||
* full-screen presents (DRAW), every row slams.
|
||||
|
||||
tsc
|
||||
sta >gPeiOrigSp
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda >$00C035
|
||||
sta >gPeiOrigShadow
|
||||
rep #$20
|
||||
LONGA ON
|
||||
|
||||
* 3. Palette upload (512 bytes).
|
||||
ldy #0
|
||||
sei
|
||||
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
bpalLoop anop
|
||||
cpy #512
|
||||
beq bpalDone
|
||||
lda [bpal],y
|
||||
sta $9E00,y
|
||||
iny
|
||||
bra bpalLoop
|
||||
bpalDone anop
|
||||
lda >gPeiOrigShadow
|
||||
and #$F1 ; clear bits 1,2,3 -> SHR shadow ON
|
||||
sta >$00C035
|
||||
lda #0
|
||||
sta >$00C005 ; AUXWRITE on
|
||||
sta >$00C003 ; RAMRD on
|
||||
rep #$20
|
||||
LONGA ON
|
||||
|
||||
ldx #0 ; X = row counter (need X because
|
||||
* long-abs,Y doesn't exist on 65816 --
|
||||
* only long-abs,X does, so the dirty-
|
||||
* check `lda >gStageMinWord,x` works.)
|
||||
peiRowLoop anop
|
||||
cpx #200
|
||||
bcc peiRowCheck
|
||||
brl peiRowsDone
|
||||
peiRowCheck anop
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda >gStageMinWord,x
|
||||
cmp >gStageMaxWord,x
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bcc peiSlamRow ; min < max -> dirty
|
||||
beq peiSlamRow ; min == max -> 1-word dirty
|
||||
inx ; clean row, skip
|
||||
brl peiRowLoop
|
||||
|
||||
peiSlamRow anop
|
||||
* Save X into long-mode scratch (stack is hijacked into $E1, can't PHX).
|
||||
txa
|
||||
sta >gPeiCurRow
|
||||
asl a ; A = y*2 (LUT byte offset)
|
||||
tax
|
||||
lda >gRowOffsetLut,x ; A = y*160
|
||||
clc
|
||||
adc #$2000 ; A = row_start
|
||||
tcd ; D = row_start (PEI dp base)
|
||||
clc
|
||||
adc #159
|
||||
tcs ; SP = row_start + 159
|
||||
|
||||
* 80 PEIs from DP+$9E down to DP+$00. Each pushes 2 bytes; SP decreases
|
||||
* by 2 each PEI. Final SP = row_start - 1. Bytes land at $E1:row_start
|
||||
* through $E1:row_start+159 (in correct memory order because we walk
|
||||
* DP offsets backwards).
|
||||
pei $9E
|
||||
pei $9C
|
||||
pei $9A
|
||||
pei $98
|
||||
pei $96
|
||||
pei $94
|
||||
pei $92
|
||||
pei $90
|
||||
pei $8E
|
||||
pei $8C
|
||||
pei $8A
|
||||
pei $88
|
||||
pei $86
|
||||
pei $84
|
||||
pei $82
|
||||
pei $80
|
||||
pei $7E
|
||||
pei $7C
|
||||
pei $7A
|
||||
pei $78
|
||||
pei $76
|
||||
pei $74
|
||||
pei $72
|
||||
pei $70
|
||||
pei $6E
|
||||
pei $6C
|
||||
pei $6A
|
||||
pei $68
|
||||
pei $66
|
||||
pei $64
|
||||
pei $62
|
||||
pei $60
|
||||
pei $5E
|
||||
pei $5C
|
||||
pei $5A
|
||||
pei $58
|
||||
pei $56
|
||||
pei $54
|
||||
pei $52
|
||||
pei $50
|
||||
pei $4E
|
||||
pei $4C
|
||||
pei $4A
|
||||
pei $48
|
||||
pei $46
|
||||
pei $44
|
||||
pei $42
|
||||
pei $40
|
||||
pei $3E
|
||||
pei $3C
|
||||
pei $3A
|
||||
pei $38
|
||||
pei $36
|
||||
pei $34
|
||||
pei $32
|
||||
pei $30
|
||||
pei $2E
|
||||
pei $2C
|
||||
pei $2A
|
||||
pei $28
|
||||
pei $26
|
||||
pei $24
|
||||
pei $22
|
||||
pei $20
|
||||
pei $1E
|
||||
pei $1C
|
||||
pei $1A
|
||||
pei $18
|
||||
pei $16
|
||||
pei $14
|
||||
pei $12
|
||||
pei $10
|
||||
pei $0E
|
||||
pei $0C
|
||||
pei $0A
|
||||
pei $08
|
||||
pei $06
|
||||
pei $04
|
||||
pei $02
|
||||
pei $00
|
||||
|
||||
lda >gPeiCurRow
|
||||
tax
|
||||
inx
|
||||
brl peiRowLoop
|
||||
|
||||
peiRowsDone anop
|
||||
* Restore SP, soft-switches.
|
||||
lda >gPeiOrigSp
|
||||
tcs
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda >gPeiOrigShadow
|
||||
sta >$00C035
|
||||
lda #0
|
||||
sta >$00C004 ; AUXWRITE off
|
||||
sta >$00C002 ; RAMRD off
|
||||
rep #$20
|
||||
LONGA ON
|
||||
|
||||
|
|
@ -2264,7 +2556,7 @@ bpalDone anop
|
|||
LONGI OFF
|
||||
pld
|
||||
plb
|
||||
plp
|
||||
plp ; restores I (pre-SEI value)
|
||||
rtl
|
||||
end
|
||||
|
||||
|
|
@ -2889,47 +3181,148 @@ wsScanCurHit equ 27 ; alias wsMaxSp.hi, 8-bit
|
|||
brl wsWalkBndEntry
|
||||
|
||||
***** EQUAL MODE WALK *****
|
||||
* Seed + walk-left + walk-right with the pixel test inlined (kills
|
||||
* JSR/RTS overhead per pixel) and per-iter long-mode access to
|
||||
* gFloodLeftX/RightX replaced by DP-relative <wsScanCurX. Pattern at
|
||||
* every test site:
|
||||
* lsr a ; byteIdx + parity
|
||||
* tay
|
||||
* sep #$20 / lda [wsRow],y / nibble extract / cmp >wsMatchByte / rep #$20
|
||||
wsWalkEqEntry anop
|
||||
* Seed test at wsX (inline eq).
|
||||
* --- SEED TEST EQ (inline) ---
|
||||
lda wsX
|
||||
jsr wsTestEq ; A = 1 if pix == matchByte
|
||||
lsr a
|
||||
tay
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
bcs wsSeedEqOdd
|
||||
lda [wsRow],y
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
bra wsSeedEqHave
|
||||
wsSeedEqOdd anop
|
||||
lda [wsRow],y
|
||||
and #$0F
|
||||
wsSeedEqHave anop
|
||||
cmp >wsMatchByte
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bne wsSeedEqMiss
|
||||
lda #1
|
||||
sta >gFloodSeedMatch
|
||||
bra wsWalkEqSeedOk
|
||||
wsSeedEqMiss anop
|
||||
lda #0
|
||||
sta >gFloodSeedMatch
|
||||
cmp #0
|
||||
bne wsWalkEqSeedOk
|
||||
brl wsExit
|
||||
wsWalkEqSeedOk anop
|
||||
|
||||
* Walk left: leftX = wsX; while leftX > 0 and pixel(leftX-1) matches,
|
||||
* leftX--. Holds leftX in <wsScanCurX (DP) for the loop.
|
||||
* --- WALK LEFT EQ (byte-cached: 1 byte read per 2 walked pixels) ---
|
||||
* Splits the loop into evenEntry / oddEntry paths.
|
||||
* evenEntry: currentX even -> test column C-1 (odd, low nib of byte Y-1).
|
||||
* Read NEW byte at Y-1; cache; test low nib.
|
||||
* oddEntry: currentX odd -> test column C-1 (even, high nib of byte Y).
|
||||
* Reuse CACHED byte from previous iter; extract high nib.
|
||||
* Initial: peel parity once to seed the cache and pick entry point.
|
||||
lda wsX
|
||||
sta <wsScanCurX
|
||||
wsLeftEqLoop anop
|
||||
lsr a
|
||||
tay ; Y = byteIdx
|
||||
bcc wsLEqEvenEntry ; parity 0: enter even path
|
||||
* parity 1 (odd): seed cache then drop into odd path.
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [wsRow],y
|
||||
sta <wsScanByte
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bra wsLEqOddEntry
|
||||
|
||||
wsLEqEvenEntry anop
|
||||
lda <wsScanCurX
|
||||
beq wsLeftEqDone
|
||||
dec a
|
||||
jsr wsTestEq
|
||||
cmp #0
|
||||
beq wsLeftEqDone
|
||||
dey
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [wsRow],y
|
||||
sta <wsScanByte
|
||||
and #$0F
|
||||
cmp >wsMatchByte
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bne wsLeftEqDone
|
||||
dec <wsScanCurX
|
||||
brl wsLeftEqLoop
|
||||
wsLEqOddEntry anop
|
||||
lda <wsScanCurX
|
||||
beq wsLeftEqDone
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda <wsScanByte
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
cmp >wsMatchByte
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bne wsLeftEqDone
|
||||
dec <wsScanCurX
|
||||
bra wsLEqEvenEntry
|
||||
wsLeftEqDone anop
|
||||
lda <wsScanCurX
|
||||
sta >gFloodLeftX
|
||||
|
||||
* Walk right: rightX = wsX; while rightX < 319 and pixel(rightX+1)
|
||||
* matches, rightX++. Holds rightX in <wsScanCurX.
|
||||
* --- WALK RIGHT EQ (byte-cached) ---
|
||||
* evenEntry: currentX even -> test C+1 (odd, low nib of CACHED byte at Y).
|
||||
* oddEntry: currentX odd -> test C+1 (even, high nib of byte Y+1).
|
||||
* Inc Y, read NEW byte, cache, test high nib.
|
||||
lda wsX
|
||||
sta <wsScanCurX
|
||||
wsRightEqLoop anop
|
||||
lsr a
|
||||
tay
|
||||
bcs wsREqOddEntry
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [wsRow],y
|
||||
sta <wsScanByte
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bra wsREqEvenEntry
|
||||
|
||||
wsREqEvenEntry anop
|
||||
lda <wsScanCurX
|
||||
cmp #319
|
||||
bcs wsRightEqDone
|
||||
inc a
|
||||
jsr wsTestEq
|
||||
cmp #0
|
||||
beq wsRightEqDone
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda <wsScanByte
|
||||
and #$0F
|
||||
cmp >wsMatchByte
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bne wsRightEqDone
|
||||
inc <wsScanCurX
|
||||
brl wsRightEqLoop
|
||||
wsREqOddEntry anop
|
||||
lda <wsScanCurX
|
||||
cmp #319
|
||||
bcs wsRightEqDone
|
||||
iny
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [wsRow],y
|
||||
sta <wsScanByte
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
cmp >wsMatchByte
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bne wsRightEqDone
|
||||
inc <wsScanCurX
|
||||
bra wsREqEvenEntry
|
||||
wsRightEqDone anop
|
||||
lda <wsScanCurX
|
||||
sta >gFloodRightX
|
||||
|
|
@ -2937,41 +3330,160 @@ wsRightEqDone anop
|
|||
|
||||
***** BOUNDARY MODE WALK *****
|
||||
wsWalkBndEntry anop
|
||||
* --- SEED TEST BND (inline) ---
|
||||
lda wsX
|
||||
jsr wsTestBnd
|
||||
lsr a
|
||||
tay
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
bcs wsSeedBndOdd
|
||||
lda [wsRow],y
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
bra wsSeedBndHave
|
||||
wsSeedBndOdd anop
|
||||
lda [wsRow],y
|
||||
and #$0F
|
||||
wsSeedBndHave anop
|
||||
cmp >wsMatchByte
|
||||
beq wsSeedBndMiss
|
||||
cmp >wsNewByte
|
||||
beq wsSeedBndMiss
|
||||
rep #$20
|
||||
LONGA ON
|
||||
lda #1
|
||||
sta >gFloodSeedMatch
|
||||
bra wsWalkBndSeedOk
|
||||
wsSeedBndMiss anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
lda #0
|
||||
sta >gFloodSeedMatch
|
||||
cmp #0
|
||||
bne wsWalkBndSeedOk
|
||||
brl wsExit
|
||||
wsWalkBndSeedOk anop
|
||||
|
||||
* --- WALK LEFT BND (byte-cached) ---
|
||||
lda wsX
|
||||
sta <wsScanCurX
|
||||
wsLeftBndLoop anop
|
||||
lsr a
|
||||
tay
|
||||
bcc wsLBndEvenEntry
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [wsRow],y
|
||||
sta <wsScanByte
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bra wsLBndOddEntry
|
||||
|
||||
wsLBndEvenEntry anop
|
||||
lda <wsScanCurX
|
||||
beq wsLeftBndDone
|
||||
dec a
|
||||
jsr wsTestBnd
|
||||
cmp #0
|
||||
beq wsLeftBndDone
|
||||
dey
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [wsRow],y
|
||||
sta <wsScanByte
|
||||
and #$0F
|
||||
cmp >wsMatchByte
|
||||
beq wsLBndStop
|
||||
cmp >wsNewByte
|
||||
beq wsLBndStop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
dec <wsScanCurX
|
||||
brl wsLeftBndLoop
|
||||
bra wsLBndOddEntry2
|
||||
wsLBndStop anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bra wsLeftBndDone
|
||||
wsLBndOddEntry2 anop
|
||||
wsLBndOddEntry anop
|
||||
lda <wsScanCurX
|
||||
beq wsLeftBndDone
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda <wsScanByte
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
cmp >wsMatchByte
|
||||
beq wsLBndStop2
|
||||
cmp >wsNewByte
|
||||
beq wsLBndStop2
|
||||
rep #$20
|
||||
LONGA ON
|
||||
dec <wsScanCurX
|
||||
bra wsLBndEvenEntry
|
||||
wsLBndStop2 anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
wsLeftBndDone anop
|
||||
lda <wsScanCurX
|
||||
sta >gFloodLeftX
|
||||
|
||||
* --- WALK RIGHT BND (byte-cached) ---
|
||||
lda wsX
|
||||
sta <wsScanCurX
|
||||
wsRightBndLoop anop
|
||||
lsr a
|
||||
tay
|
||||
bcs wsRBndOddEntry
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [wsRow],y
|
||||
sta <wsScanByte
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bra wsRBndEvenEntry
|
||||
|
||||
wsRBndEvenEntry anop
|
||||
lda <wsScanCurX
|
||||
cmp #319
|
||||
bcs wsRightBndDone
|
||||
inc a
|
||||
jsr wsTestBnd
|
||||
cmp #0
|
||||
beq wsRightBndDone
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda <wsScanByte
|
||||
and #$0F
|
||||
cmp >wsMatchByte
|
||||
beq wsRBndStop
|
||||
cmp >wsNewByte
|
||||
beq wsRBndStop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
inc <wsScanCurX
|
||||
brl wsRightBndLoop
|
||||
bra wsRBndOddEntry2
|
||||
wsRBndStop anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bra wsRightBndDone
|
||||
wsRBndOddEntry2 anop
|
||||
wsRBndOddEntry anop
|
||||
lda <wsScanCurX
|
||||
cmp #319
|
||||
bcs wsRightBndDone
|
||||
iny
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [wsRow],y
|
||||
sta <wsScanByte
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
cmp >wsMatchByte
|
||||
beq wsRBndStop2
|
||||
cmp >wsNewByte
|
||||
beq wsRBndStop2
|
||||
rep #$20
|
||||
LONGA ON
|
||||
inc <wsScanCurX
|
||||
bra wsRBndEvenEntry
|
||||
wsRBndStop2 anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
wsRightBndDone anop
|
||||
lda <wsScanCurX
|
||||
sta >gFloodRightX
|
||||
|
|
@ -3108,75 +3620,9 @@ wsExit anop
|
|||
plp
|
||||
rtl
|
||||
|
||||
* wsTestEq: test pixel at column A against matchByte (eq mode).
|
||||
* In: A = column (M=16). Reads from wsRow.
|
||||
* Out: A = 1 if pix == matchByte, 0 otherwise. M=16 on exit.
|
||||
* Trashes A, Y, P. Preserves X, D, B.
|
||||
wsTestEq anop
|
||||
lsr a ; A = byteIdx; C = column & 1
|
||||
tay
|
||||
bcs wsTeqOdd
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [wsRow],y
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
bra wsTeqHave
|
||||
wsTeqOdd anop
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [wsRow],y
|
||||
and #$0F
|
||||
wsTeqHave anop
|
||||
cmp >wsMatchByte
|
||||
rep #$20
|
||||
LONGA ON
|
||||
bne wsTeqNo
|
||||
lda #1
|
||||
rts
|
||||
wsTeqNo anop
|
||||
lda #0
|
||||
rts
|
||||
|
||||
|
||||
* wsTestBnd: test pixel at column A against (matchByte, newByte) for
|
||||
* boundary-mode flood: returns 1 iff pix != matchByte AND pix != newByte.
|
||||
* In: A = column (M=16). Reads from wsRow.
|
||||
* Out: A = 1 if qualifies, 0 otherwise. M=16 on exit.
|
||||
* Trashes A, Y, P. Preserves X, D, B.
|
||||
wsTestBnd anop
|
||||
lsr a
|
||||
tay
|
||||
bcs wsTbnOdd
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [wsRow],y
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
lsr a
|
||||
bra wsTbnHave
|
||||
wsTbnOdd anop
|
||||
sep #$20
|
||||
LONGA OFF
|
||||
lda [wsRow],y
|
||||
and #$0F
|
||||
wsTbnHave anop
|
||||
cmp >wsMatchByte
|
||||
beq wsTbnNo
|
||||
cmp >wsNewByte
|
||||
beq wsTbnNo
|
||||
rep #$20
|
||||
LONGA ON
|
||||
lda #1
|
||||
rts
|
||||
wsTbnNo anop
|
||||
rep #$20
|
||||
LONGA ON
|
||||
lda #0
|
||||
rts
|
||||
* wsTestEq / wsTestBnd helpers deleted -- the seed test and walk-out
|
||||
* loops now expand the test inline (no JSR/RTS overhead per walked
|
||||
* pixel; saves ~12 cyc/pixel x ~3600 walked pixels in the demo).
|
||||
|
||||
|
||||
* wsScanAndPush: walk wsScanRow[wsLeftX..wsRightX] for run-edge
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue