PEI slam and dirty tracking!
This commit is contained in:
parent
af366e7e81
commit
065be89bff
3 changed files with 654 additions and 204 deletions
|
|
@ -188,13 +188,13 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
||||||
if (y < 0 || y >= SURFACE_HEIGHT || x < 0 || x >= SURFACE_WIDTH) {
|
if (y < 0 || y >= SURFACE_HEIGHT || x < 0 || x >= SURFACE_WIDTH) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
row = &s->pixels[y * SURFACE_BYTES_PER_ROW];
|
|
||||||
|
|
||||||
// Highest-tier asm fast path: seed-test + walk-left + walk-right
|
// Highest-tier asm fast path: seed-test + walk-left + walk-right
|
||||||
// + 1-row fill + scan-above + scan-below + push, all in one
|
// + 1-row fill + scan-above + scan-below + push, all in one
|
||||||
// cross-segment call. The asm caches row addr / match decoder
|
// cross-segment call. The asm caches row addr / match decoder
|
||||||
// across every sub-operation. C just pops and dispatches; this
|
// across every sub-operation. C just pops and dispatches; this
|
||||||
// path completes the entire per-seed work.
|
// path completes the entire per-seed work and computes the row
|
||||||
|
// address itself, so we don't pay y*160 in C unless we fall back.
|
||||||
{
|
{
|
||||||
bool seedMatched;
|
bool seedMatched;
|
||||||
if (halFastFloodWalkAndScans(s->pixels, x, y,
|
if (halFastFloodWalkAndScans(s->pixels, x, y,
|
||||||
|
|
@ -206,6 +206,10 @@ static void floodFillInternal(SurfaceT *s, int16_t startX, int16_t startY, uint8
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Fallback path needs row; compute it here so the asm path
|
||||||
|
// above doesn't pay for an unused y*160 multiply on every iter.
|
||||||
|
row = &s->pixels[y * SURFACE_BYTES_PER_ROW];
|
||||||
|
|
||||||
// Tier-2 asm fast path: combined seed test + walk-left +
|
// Tier-2 asm fast path: combined seed test + walk-left +
|
||||||
// walk-right in one cross-segment call. Falls back to the
|
// walk-right in one cross-segment call. Falls back to the
|
||||||
// pure-C walks below on ports without an asm implementation.
|
// pure-C walks below on ports without an asm implementation.
|
||||||
|
|
|
||||||
|
|
@ -164,13 +164,13 @@ static uint8_t gCachedScb [SURFACE_HEIGHT];
|
||||||
static uint16_t gCachedPalette[SURFACE_PALETTE_COUNT][SURFACE_COLORS_PER_PALETTE];
|
static uint16_t gCachedPalette[SURFACE_PALETTE_COUNT][SURFACE_COLORS_PER_PALETTE];
|
||||||
static bool gCacheValid = false;
|
static bool gCacheValid = false;
|
||||||
|
|
||||||
// PEI slam scratch shared with src/port/iigs/peislam.asm. File-scope
|
// PEI slam scratch. File-scope non-static so the asm can `ext` them;
|
||||||
// non-static so the asm can `ext` them; all accesses inside the slam
|
// all accesses inside the slam use long-mode `>` addressing so they
|
||||||
// use long-mode addressing so they bypass the //e RAMRD redirect the
|
// bypass the //e RAMRD redirect the slam turns on for its duration.
|
||||||
// slam turns on for the duration of the run.
|
|
||||||
volatile uint16_t gPeiOrigSp;
|
volatile uint16_t gPeiOrigSp;
|
||||||
volatile uint8_t gPeiOrigShadow;
|
volatile uint8_t gPeiOrigShadow;
|
||||||
volatile uint16_t gPeiTempRowBase;
|
volatile uint16_t gPeiTempRowBase;
|
||||||
|
volatile uint16_t gPeiCurRow; // row counter saved across slam (stack is hijacked)
|
||||||
|
|
||||||
// Defined in src/port/iigs/peislam.asm, in its own load segment
|
// Defined in src/port/iigs/peislam.asm, in its own load segment
|
||||||
// (DRAWPRIMS) so the GS/OS loader places it in a different bank from
|
// (DRAWPRIMS) so the GS/OS loader places it in a different bank from
|
||||||
|
|
|
||||||
|
|
@ -1362,82 +1362,243 @@ dcLoopBody anop
|
||||||
lda >gRowOffsetLut,x ; A = y*160
|
lda >gRowOffsetLut,x ; A = y*160
|
||||||
sta >dcRowXN
|
sta >dcRowXN
|
||||||
|
|
||||||
* 8 octant plots. dcPlotPx wants A=col, X=rowBase. LDX has no long-
|
* 8 octant plots, fully inlined. Each plot:
|
||||||
* absolute mode, so for each plot we stash col, load row via LDA/TAX,
|
* 1. col = (acx +/- dcX|dcY) -> A
|
||||||
* then reload col into A.
|
* 2. save col -> dcSavedCol (for parity test)
|
||||||
* Octants 1-4 use the y-row pair (cx +/- x, cy +/- y).
|
* 3. byteIdx = col >> 1; byte addr = byteIdx + rowBase -> Y
|
||||||
|
* 4. test col & 1; do high or low nibble RMW
|
||||||
|
* Skips the JSR/RTS to dcPlotPx (~12 cyc) and the load-row-via-X
|
||||||
|
* dance (sta dcSavedCol + tax + reload). Per plot: ~60 cyc vs ~80
|
||||||
|
* cyc with JSR. ~20 cyc/plot x ~2560 plots in the demo's 4 circles
|
||||||
|
* = ~18 ms.
|
||||||
|
*
|
||||||
|
* Each plot has its own dcOddN / dcDoneN labels (ORCA-M needs unique).
|
||||||
|
|
||||||
|
* Octants 1-4: y-row pair (cx +/- x, cy +/- y).
|
||||||
|
* Plot 1: (cx+x, cy+y)
|
||||||
lda acx
|
lda acx
|
||||||
clc
|
clc
|
||||||
adc >dcX
|
adc >dcX
|
||||||
sta >dcSavedCol
|
sta >dcSavedCol
|
||||||
lda >dcRowYP
|
lsr a
|
||||||
tax
|
clc
|
||||||
|
adc >dcRowYP
|
||||||
|
tay
|
||||||
lda >dcSavedCol
|
lda >dcSavedCol
|
||||||
jsr dcPlotPx ; (cx+x, cy+y)
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
and #1
|
||||||
|
bne dcOdd1
|
||||||
|
lda [pix],y
|
||||||
|
and #$0F
|
||||||
|
ora >dcNibHi
|
||||||
|
sta [pix],y
|
||||||
|
bra dcDone1
|
||||||
|
dcOdd1 anop
|
||||||
|
lda [pix],y
|
||||||
|
and #$F0
|
||||||
|
ora >dcNibLo
|
||||||
|
sta [pix],y
|
||||||
|
dcDone1 anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
|
||||||
|
* Plot 2: (cx-x, cy+y)
|
||||||
lda acx
|
lda acx
|
||||||
sec
|
sec
|
||||||
sbc >dcX
|
sbc >dcX
|
||||||
sta >dcSavedCol
|
sta >dcSavedCol
|
||||||
lda >dcRowYP
|
lsr a
|
||||||
tax
|
clc
|
||||||
|
adc >dcRowYP
|
||||||
|
tay
|
||||||
lda >dcSavedCol
|
lda >dcSavedCol
|
||||||
jsr dcPlotPx ; (cx-x, cy+y)
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
and #1
|
||||||
|
bne dcOdd2
|
||||||
|
lda [pix],y
|
||||||
|
and #$0F
|
||||||
|
ora >dcNibHi
|
||||||
|
sta [pix],y
|
||||||
|
bra dcDone2
|
||||||
|
dcOdd2 anop
|
||||||
|
lda [pix],y
|
||||||
|
and #$F0
|
||||||
|
ora >dcNibLo
|
||||||
|
sta [pix],y
|
||||||
|
dcDone2 anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
|
||||||
|
* Plot 3: (cx+x, cy-y)
|
||||||
lda acx
|
lda acx
|
||||||
clc
|
clc
|
||||||
adc >dcX
|
adc >dcX
|
||||||
sta >dcSavedCol
|
sta >dcSavedCol
|
||||||
lda >dcRowYN
|
lsr a
|
||||||
tax
|
clc
|
||||||
|
adc >dcRowYN
|
||||||
|
tay
|
||||||
lda >dcSavedCol
|
lda >dcSavedCol
|
||||||
jsr dcPlotPx ; (cx+x, cy-y)
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
and #1
|
||||||
|
bne dcOdd3
|
||||||
|
lda [pix],y
|
||||||
|
and #$0F
|
||||||
|
ora >dcNibHi
|
||||||
|
sta [pix],y
|
||||||
|
bra dcDone3
|
||||||
|
dcOdd3 anop
|
||||||
|
lda [pix],y
|
||||||
|
and #$F0
|
||||||
|
ora >dcNibLo
|
||||||
|
sta [pix],y
|
||||||
|
dcDone3 anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
|
||||||
|
* Plot 4: (cx-x, cy-y)
|
||||||
lda acx
|
lda acx
|
||||||
sec
|
sec
|
||||||
sbc >dcX
|
sbc >dcX
|
||||||
sta >dcSavedCol
|
sta >dcSavedCol
|
||||||
lda >dcRowYN
|
lsr a
|
||||||
tax
|
clc
|
||||||
|
adc >dcRowYN
|
||||||
|
tay
|
||||||
lda >dcSavedCol
|
lda >dcSavedCol
|
||||||
jsr dcPlotPx ; (cx-x, cy-y)
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
and #1
|
||||||
|
bne dcOdd4
|
||||||
|
lda [pix],y
|
||||||
|
and #$0F
|
||||||
|
ora >dcNibHi
|
||||||
|
sta [pix],y
|
||||||
|
bra dcDone4
|
||||||
|
dcOdd4 anop
|
||||||
|
lda [pix],y
|
||||||
|
and #$F0
|
||||||
|
ora >dcNibLo
|
||||||
|
sta [pix],y
|
||||||
|
dcDone4 anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
|
||||||
* Octants 5-8 use the x-row pair (cx +/- y, cy +/- x).
|
* Octants 5-8: x-row pair (cx +/- y, cy +/- x).
|
||||||
|
* Plot 5: (cx+y, cy+x)
|
||||||
lda acx
|
lda acx
|
||||||
clc
|
clc
|
||||||
adc >dcY
|
adc >dcY
|
||||||
sta >dcSavedCol
|
sta >dcSavedCol
|
||||||
lda >dcRowXP
|
lsr a
|
||||||
tax
|
clc
|
||||||
|
adc >dcRowXP
|
||||||
|
tay
|
||||||
lda >dcSavedCol
|
lda >dcSavedCol
|
||||||
jsr dcPlotPx ; (cx+y, cy+x)
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
and #1
|
||||||
|
bne dcOdd5
|
||||||
|
lda [pix],y
|
||||||
|
and #$0F
|
||||||
|
ora >dcNibHi
|
||||||
|
sta [pix],y
|
||||||
|
bra dcDone5
|
||||||
|
dcOdd5 anop
|
||||||
|
lda [pix],y
|
||||||
|
and #$F0
|
||||||
|
ora >dcNibLo
|
||||||
|
sta [pix],y
|
||||||
|
dcDone5 anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
|
||||||
|
* Plot 6: (cx-y, cy+x)
|
||||||
lda acx
|
lda acx
|
||||||
sec
|
sec
|
||||||
sbc >dcY
|
sbc >dcY
|
||||||
sta >dcSavedCol
|
sta >dcSavedCol
|
||||||
lda >dcRowXP
|
lsr a
|
||||||
tax
|
clc
|
||||||
|
adc >dcRowXP
|
||||||
|
tay
|
||||||
lda >dcSavedCol
|
lda >dcSavedCol
|
||||||
jsr dcPlotPx ; (cx-y, cy+x)
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
and #1
|
||||||
|
bne dcOdd6
|
||||||
|
lda [pix],y
|
||||||
|
and #$0F
|
||||||
|
ora >dcNibHi
|
||||||
|
sta [pix],y
|
||||||
|
bra dcDone6
|
||||||
|
dcOdd6 anop
|
||||||
|
lda [pix],y
|
||||||
|
and #$F0
|
||||||
|
ora >dcNibLo
|
||||||
|
sta [pix],y
|
||||||
|
dcDone6 anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
|
||||||
|
* Plot 7: (cx+y, cy-x)
|
||||||
lda acx
|
lda acx
|
||||||
clc
|
clc
|
||||||
adc >dcY
|
adc >dcY
|
||||||
sta >dcSavedCol
|
sta >dcSavedCol
|
||||||
lda >dcRowXN
|
lsr a
|
||||||
tax
|
clc
|
||||||
|
adc >dcRowXN
|
||||||
|
tay
|
||||||
lda >dcSavedCol
|
lda >dcSavedCol
|
||||||
jsr dcPlotPx ; (cx+y, cy-x)
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
and #1
|
||||||
|
bne dcOdd7
|
||||||
|
lda [pix],y
|
||||||
|
and #$0F
|
||||||
|
ora >dcNibHi
|
||||||
|
sta [pix],y
|
||||||
|
bra dcDone7
|
||||||
|
dcOdd7 anop
|
||||||
|
lda [pix],y
|
||||||
|
and #$F0
|
||||||
|
ora >dcNibLo
|
||||||
|
sta [pix],y
|
||||||
|
dcDone7 anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
|
||||||
|
* Plot 8: (cx-y, cy-x)
|
||||||
lda acx
|
lda acx
|
||||||
sec
|
sec
|
||||||
sbc >dcY
|
sbc >dcY
|
||||||
sta >dcSavedCol
|
sta >dcSavedCol
|
||||||
lda >dcRowXN
|
lsr a
|
||||||
tax
|
clc
|
||||||
|
adc >dcRowXN
|
||||||
|
tay
|
||||||
lda >dcSavedCol
|
lda >dcSavedCol
|
||||||
jsr dcPlotPx ; (cx-y, cy-x)
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
and #1
|
||||||
|
bne dcOdd8
|
||||||
|
lda [pix],y
|
||||||
|
and #$0F
|
||||||
|
ora >dcNibHi
|
||||||
|
sta [pix],y
|
||||||
|
bra dcDone8
|
||||||
|
dcOdd8 anop
|
||||||
|
lda [pix],y
|
||||||
|
and #$F0
|
||||||
|
ora >dcNibLo
|
||||||
|
sta [pix],y
|
||||||
|
dcDone8 anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
|
||||||
* Update Bresenham: y++; if err <= 0: err += 2y+1; else x--; err += 2(y-x)+1.
|
* Update Bresenham: y++; if err <= 0: err += 2y+1; else x--; err += 2(y-x)+1.
|
||||||
lda >dcY
|
lda >dcY
|
||||||
|
|
@ -1483,44 +1644,9 @@ dcExit anop
|
||||||
|
|
||||||
* dcMul160 deleted -- callers now expand the y160lut macro inline.
|
* dcMul160 deleted -- callers now expand the y160lut macro inline.
|
||||||
|
|
||||||
****************************************************************
|
* dcPlotPx deleted -- the 8 octant plot sites now expand the plot
|
||||||
* dcPlotPx: plot a pixel at column A, with row-base offset in X.
|
* logic inline (no JSR/RTS, no STA dcSavedCol / TAX / reload-col
|
||||||
* M=16, X=16 on entry. Trashes A, X, Y, P. D and B preserved.
|
* round-trip per plot).
|
||||||
* Switches to M=8 for the byte RMW then back to M=16 for caller.
|
|
||||||
****************************************************************
|
|
||||||
dcPlotPx anop
|
|
||||||
lsr a ; A = col>>1, C = col & 1
|
|
||||||
bcs dcPlotOdd
|
|
||||||
* Even column: high nibble.
|
|
||||||
sta >dcMulTmp
|
|
||||||
txa
|
|
||||||
clc
|
|
||||||
adc >dcMulTmp
|
|
||||||
tay
|
|
||||||
sep #$20
|
|
||||||
LONGA OFF
|
|
||||||
lda [pix],y
|
|
||||||
and #$0F
|
|
||||||
ora >dcNibHi
|
|
||||||
sta [pix],y
|
|
||||||
rep #$20
|
|
||||||
LONGA ON
|
|
||||||
rts
|
|
||||||
dcPlotOdd anop
|
|
||||||
sta >dcMulTmp
|
|
||||||
txa
|
|
||||||
clc
|
|
||||||
adc >dcMulTmp
|
|
||||||
tay
|
|
||||||
sep #$20
|
|
||||||
LONGA OFF
|
|
||||||
lda [pix],y
|
|
||||||
and #$F0
|
|
||||||
ora >dcNibLo
|
|
||||||
sta [pix],y
|
|
||||||
rep #$20
|
|
||||||
LONGA ON
|
|
||||||
rts
|
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -2224,39 +2350,205 @@ bpal equ 4
|
||||||
adc #8
|
adc #8
|
||||||
tcd
|
tcd
|
||||||
|
|
||||||
* 1. Pixel blit (DBR ends up = $E1 after MVN).
|
* 1. SCB upload (200 bytes) via MVN. Done BEFORE the PEI-slam so the
|
||||||
ldx #$2000
|
* SEI window only spans the slam itself (~38 ms). Source bank is
|
||||||
ldy #$2000
|
* runtime-patched into the MVN instruction (encoding: $54 dst src,
|
||||||
lda #31999
|
* so byte +2 is src).
|
||||||
mvn $010000,$E10000
|
|
||||||
|
|
||||||
* 2. SCB upload (200 bytes). DBR = $E1, so sta abs,Y -> $E1:abs+Y.
|
|
||||||
ldy #0
|
|
||||||
sep #$20
|
sep #$20
|
||||||
LONGA OFF
|
LONGA OFF
|
||||||
bscbLoop anop
|
lda bscb+2
|
||||||
cpy #200
|
sta >mvnScbInst+2
|
||||||
beq bscbDone
|
rep #$20
|
||||||
lda [bscb],y
|
LONGA ON
|
||||||
sta $9D00,y
|
lda bscb
|
||||||
iny
|
tax
|
||||||
bra bscbLoop
|
ldy #$9D00
|
||||||
bscbDone anop
|
lda #199
|
||||||
|
mvnScbInst mvn $000000,$E10000
|
||||||
|
|
||||||
|
* 2. Palette upload (512 bytes) via MVN. Same trick.
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda bpal+2
|
||||||
|
sta >mvnPalInst+2
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
lda bpal
|
||||||
|
tax
|
||||||
|
ldy #$9E00
|
||||||
|
lda #511
|
||||||
|
mvnPalInst mvn $000000,$E10000
|
||||||
|
|
||||||
|
* 3. Pixel blit via PEI-slam, with per-row dirty skip.
|
||||||
|
* PEI-slam: SP hijacked into the SHR shadow region of bank $01, AUXWRITE
|
||||||
|
* + RAMRD remap bank-$00 stack pushes to bank $01, SHR shadow mirrors
|
||||||
|
* bank-$01 writes to $E1. Result: PEI dp pushes from DP=$01:row_start
|
||||||
|
* land at $E1:row_start (160 bytes / row at ~6 cyc per 2 bytes).
|
||||||
|
* ~480 cyc/row vs MVN's ~1120 cyc/row -- 2.3x faster per row.
|
||||||
|
* SEI for the duration: soft-switch state and stack hijack would
|
||||||
|
* corrupt any IRQ handler that touches bank-0 globals. ~38 ms SEI
|
||||||
|
* total for a full 200-row slam; chunk later if audio glitches.
|
||||||
|
* Dirty skip: rows where gStageMinWord[y] > gStageMaxWord[y] are
|
||||||
|
* clean and not slammed. Saves big on sparse-update demos; for
|
||||||
|
* full-screen presents (DRAW), every row slams.
|
||||||
|
|
||||||
|
tsc
|
||||||
|
sta >gPeiOrigSp
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda >$00C035
|
||||||
|
sta >gPeiOrigShadow
|
||||||
rep #$20
|
rep #$20
|
||||||
LONGA ON
|
LONGA ON
|
||||||
|
|
||||||
* 3. Palette upload (512 bytes).
|
sei
|
||||||
ldy #0
|
|
||||||
sep #$20
|
sep #$20
|
||||||
LONGA OFF
|
LONGA OFF
|
||||||
bpalLoop anop
|
lda >gPeiOrigShadow
|
||||||
cpy #512
|
and #$F1 ; clear bits 1,2,3 -> SHR shadow ON
|
||||||
beq bpalDone
|
sta >$00C035
|
||||||
lda [bpal],y
|
lda #0
|
||||||
sta $9E00,y
|
sta >$00C005 ; AUXWRITE on
|
||||||
iny
|
sta >$00C003 ; RAMRD on
|
||||||
bra bpalLoop
|
rep #$20
|
||||||
bpalDone anop
|
LONGA ON
|
||||||
|
|
||||||
|
ldx #0 ; X = row counter (need X because
|
||||||
|
* long-abs,Y doesn't exist on 65816 --
|
||||||
|
* only long-abs,X does, so the dirty-
|
||||||
|
* check `lda >gStageMinWord,x` works.)
|
||||||
|
peiRowLoop anop
|
||||||
|
cpx #200
|
||||||
|
bcc peiRowCheck
|
||||||
|
brl peiRowsDone
|
||||||
|
peiRowCheck anop
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda >gStageMinWord,x
|
||||||
|
cmp >gStageMaxWord,x
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
bcc peiSlamRow ; min < max -> dirty
|
||||||
|
beq peiSlamRow ; min == max -> 1-word dirty
|
||||||
|
inx ; clean row, skip
|
||||||
|
brl peiRowLoop
|
||||||
|
|
||||||
|
peiSlamRow anop
|
||||||
|
* Save X into long-mode scratch (stack is hijacked into $E1, can't PHX).
|
||||||
|
txa
|
||||||
|
sta >gPeiCurRow
|
||||||
|
asl a ; A = y*2 (LUT byte offset)
|
||||||
|
tax
|
||||||
|
lda >gRowOffsetLut,x ; A = y*160
|
||||||
|
clc
|
||||||
|
adc #$2000 ; A = row_start
|
||||||
|
tcd ; D = row_start (PEI dp base)
|
||||||
|
clc
|
||||||
|
adc #159
|
||||||
|
tcs ; SP = row_start + 159
|
||||||
|
|
||||||
|
* 80 PEIs from DP+$9E down to DP+$00. Each pushes 2 bytes; SP decreases
|
||||||
|
* by 2 each PEI. Final SP = row_start - 1. Bytes land at $E1:row_start
|
||||||
|
* through $E1:row_start+159 (in correct memory order because we walk
|
||||||
|
* DP offsets backwards).
|
||||||
|
pei $9E
|
||||||
|
pei $9C
|
||||||
|
pei $9A
|
||||||
|
pei $98
|
||||||
|
pei $96
|
||||||
|
pei $94
|
||||||
|
pei $92
|
||||||
|
pei $90
|
||||||
|
pei $8E
|
||||||
|
pei $8C
|
||||||
|
pei $8A
|
||||||
|
pei $88
|
||||||
|
pei $86
|
||||||
|
pei $84
|
||||||
|
pei $82
|
||||||
|
pei $80
|
||||||
|
pei $7E
|
||||||
|
pei $7C
|
||||||
|
pei $7A
|
||||||
|
pei $78
|
||||||
|
pei $76
|
||||||
|
pei $74
|
||||||
|
pei $72
|
||||||
|
pei $70
|
||||||
|
pei $6E
|
||||||
|
pei $6C
|
||||||
|
pei $6A
|
||||||
|
pei $68
|
||||||
|
pei $66
|
||||||
|
pei $64
|
||||||
|
pei $62
|
||||||
|
pei $60
|
||||||
|
pei $5E
|
||||||
|
pei $5C
|
||||||
|
pei $5A
|
||||||
|
pei $58
|
||||||
|
pei $56
|
||||||
|
pei $54
|
||||||
|
pei $52
|
||||||
|
pei $50
|
||||||
|
pei $4E
|
||||||
|
pei $4C
|
||||||
|
pei $4A
|
||||||
|
pei $48
|
||||||
|
pei $46
|
||||||
|
pei $44
|
||||||
|
pei $42
|
||||||
|
pei $40
|
||||||
|
pei $3E
|
||||||
|
pei $3C
|
||||||
|
pei $3A
|
||||||
|
pei $38
|
||||||
|
pei $36
|
||||||
|
pei $34
|
||||||
|
pei $32
|
||||||
|
pei $30
|
||||||
|
pei $2E
|
||||||
|
pei $2C
|
||||||
|
pei $2A
|
||||||
|
pei $28
|
||||||
|
pei $26
|
||||||
|
pei $24
|
||||||
|
pei $22
|
||||||
|
pei $20
|
||||||
|
pei $1E
|
||||||
|
pei $1C
|
||||||
|
pei $1A
|
||||||
|
pei $18
|
||||||
|
pei $16
|
||||||
|
pei $14
|
||||||
|
pei $12
|
||||||
|
pei $10
|
||||||
|
pei $0E
|
||||||
|
pei $0C
|
||||||
|
pei $0A
|
||||||
|
pei $08
|
||||||
|
pei $06
|
||||||
|
pei $04
|
||||||
|
pei $02
|
||||||
|
pei $00
|
||||||
|
|
||||||
|
lda >gPeiCurRow
|
||||||
|
tax
|
||||||
|
inx
|
||||||
|
brl peiRowLoop
|
||||||
|
|
||||||
|
peiRowsDone anop
|
||||||
|
* Restore SP, soft-switches.
|
||||||
|
lda >gPeiOrigSp
|
||||||
|
tcs
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda >gPeiOrigShadow
|
||||||
|
sta >$00C035
|
||||||
|
lda #0
|
||||||
|
sta >$00C004 ; AUXWRITE off
|
||||||
|
sta >$00C002 ; RAMRD off
|
||||||
rep #$20
|
rep #$20
|
||||||
LONGA ON
|
LONGA ON
|
||||||
|
|
||||||
|
|
@ -2264,7 +2556,7 @@ bpalDone anop
|
||||||
LONGI OFF
|
LONGI OFF
|
||||||
pld
|
pld
|
||||||
plb
|
plb
|
||||||
plp
|
plp ; restores I (pre-SEI value)
|
||||||
rtl
|
rtl
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -2889,47 +3181,148 @@ wsScanCurHit equ 27 ; alias wsMaxSp.hi, 8-bit
|
||||||
brl wsWalkBndEntry
|
brl wsWalkBndEntry
|
||||||
|
|
||||||
***** EQUAL MODE WALK *****
|
***** EQUAL MODE WALK *****
|
||||||
|
* Seed + walk-left + walk-right with the pixel test inlined (kills
|
||||||
|
* JSR/RTS overhead per pixel) and per-iter long-mode access to
|
||||||
|
* gFloodLeftX/RightX replaced by DP-relative <wsScanCurX. Pattern at
|
||||||
|
* every test site:
|
||||||
|
* lsr a ; byteIdx + parity
|
||||||
|
* tay
|
||||||
|
* sep #$20 / lda [wsRow],y / nibble extract / cmp >wsMatchByte / rep #$20
|
||||||
wsWalkEqEntry anop
|
wsWalkEqEntry anop
|
||||||
* Seed test at wsX (inline eq).
|
* --- SEED TEST EQ (inline) ---
|
||||||
lda wsX
|
lda wsX
|
||||||
jsr wsTestEq ; A = 1 if pix == matchByte
|
lsr a
|
||||||
|
tay
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
bcs wsSeedEqOdd
|
||||||
|
lda [wsRow],y
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
bra wsSeedEqHave
|
||||||
|
wsSeedEqOdd anop
|
||||||
|
lda [wsRow],y
|
||||||
|
and #$0F
|
||||||
|
wsSeedEqHave anop
|
||||||
|
cmp >wsMatchByte
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
bne wsSeedEqMiss
|
||||||
|
lda #1
|
||||||
|
sta >gFloodSeedMatch
|
||||||
|
bra wsWalkEqSeedOk
|
||||||
|
wsSeedEqMiss anop
|
||||||
|
lda #0
|
||||||
sta >gFloodSeedMatch
|
sta >gFloodSeedMatch
|
||||||
cmp #0
|
|
||||||
bne wsWalkEqSeedOk
|
|
||||||
brl wsExit
|
brl wsExit
|
||||||
wsWalkEqSeedOk anop
|
wsWalkEqSeedOk anop
|
||||||
|
|
||||||
* Walk left: leftX = wsX; while leftX > 0 and pixel(leftX-1) matches,
|
* --- WALK LEFT EQ (byte-cached: 1 byte read per 2 walked pixels) ---
|
||||||
* leftX--. Holds leftX in <wsScanCurX (DP) for the loop.
|
* Splits the loop into evenEntry / oddEntry paths.
|
||||||
|
* evenEntry: currentX even -> test column C-1 (odd, low nib of byte Y-1).
|
||||||
|
* Read NEW byte at Y-1; cache; test low nib.
|
||||||
|
* oddEntry: currentX odd -> test column C-1 (even, high nib of byte Y).
|
||||||
|
* Reuse CACHED byte from previous iter; extract high nib.
|
||||||
|
* Initial: peel parity once to seed the cache and pick entry point.
|
||||||
lda wsX
|
lda wsX
|
||||||
sta <wsScanCurX
|
sta <wsScanCurX
|
||||||
wsLeftEqLoop anop
|
lsr a
|
||||||
|
tay ; Y = byteIdx
|
||||||
|
bcc wsLEqEvenEntry ; parity 0: enter even path
|
||||||
|
* parity 1 (odd): seed cache then drop into odd path.
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda [wsRow],y
|
||||||
|
sta <wsScanByte
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
bra wsLEqOddEntry
|
||||||
|
|
||||||
|
wsLEqEvenEntry anop
|
||||||
lda <wsScanCurX
|
lda <wsScanCurX
|
||||||
beq wsLeftEqDone
|
beq wsLeftEqDone
|
||||||
dec a
|
dey
|
||||||
jsr wsTestEq
|
sep #$20
|
||||||
cmp #0
|
LONGA OFF
|
||||||
beq wsLeftEqDone
|
lda [wsRow],y
|
||||||
|
sta <wsScanByte
|
||||||
|
and #$0F
|
||||||
|
cmp >wsMatchByte
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
bne wsLeftEqDone
|
||||||
dec <wsScanCurX
|
dec <wsScanCurX
|
||||||
brl wsLeftEqLoop
|
wsLEqOddEntry anop
|
||||||
|
lda <wsScanCurX
|
||||||
|
beq wsLeftEqDone
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda <wsScanByte
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
cmp >wsMatchByte
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
bne wsLeftEqDone
|
||||||
|
dec <wsScanCurX
|
||||||
|
bra wsLEqEvenEntry
|
||||||
wsLeftEqDone anop
|
wsLeftEqDone anop
|
||||||
lda <wsScanCurX
|
lda <wsScanCurX
|
||||||
sta >gFloodLeftX
|
sta >gFloodLeftX
|
||||||
|
|
||||||
* Walk right: rightX = wsX; while rightX < 319 and pixel(rightX+1)
|
* --- WALK RIGHT EQ (byte-cached) ---
|
||||||
* matches, rightX++. Holds rightX in <wsScanCurX.
|
* evenEntry: currentX even -> test C+1 (odd, low nib of CACHED byte at Y).
|
||||||
|
* oddEntry: currentX odd -> test C+1 (even, high nib of byte Y+1).
|
||||||
|
* Inc Y, read NEW byte, cache, test high nib.
|
||||||
lda wsX
|
lda wsX
|
||||||
sta <wsScanCurX
|
sta <wsScanCurX
|
||||||
wsRightEqLoop anop
|
lsr a
|
||||||
|
tay
|
||||||
|
bcs wsREqOddEntry
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda [wsRow],y
|
||||||
|
sta <wsScanByte
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
bra wsREqEvenEntry
|
||||||
|
|
||||||
|
wsREqEvenEntry anop
|
||||||
lda <wsScanCurX
|
lda <wsScanCurX
|
||||||
cmp #319
|
cmp #319
|
||||||
bcs wsRightEqDone
|
bcs wsRightEqDone
|
||||||
inc a
|
sep #$20
|
||||||
jsr wsTestEq
|
LONGA OFF
|
||||||
cmp #0
|
lda <wsScanByte
|
||||||
beq wsRightEqDone
|
and #$0F
|
||||||
|
cmp >wsMatchByte
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
bne wsRightEqDone
|
||||||
inc <wsScanCurX
|
inc <wsScanCurX
|
||||||
brl wsRightEqLoop
|
wsREqOddEntry anop
|
||||||
|
lda <wsScanCurX
|
||||||
|
cmp #319
|
||||||
|
bcs wsRightEqDone
|
||||||
|
iny
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda [wsRow],y
|
||||||
|
sta <wsScanByte
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
cmp >wsMatchByte
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
bne wsRightEqDone
|
||||||
|
inc <wsScanCurX
|
||||||
|
bra wsREqEvenEntry
|
||||||
wsRightEqDone anop
|
wsRightEqDone anop
|
||||||
lda <wsScanCurX
|
lda <wsScanCurX
|
||||||
sta >gFloodRightX
|
sta >gFloodRightX
|
||||||
|
|
@ -2937,41 +3330,160 @@ wsRightEqDone anop
|
||||||
|
|
||||||
***** BOUNDARY MODE WALK *****
|
***** BOUNDARY MODE WALK *****
|
||||||
wsWalkBndEntry anop
|
wsWalkBndEntry anop
|
||||||
|
* --- SEED TEST BND (inline) ---
|
||||||
lda wsX
|
lda wsX
|
||||||
jsr wsTestBnd
|
lsr a
|
||||||
|
tay
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
bcs wsSeedBndOdd
|
||||||
|
lda [wsRow],y
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
bra wsSeedBndHave
|
||||||
|
wsSeedBndOdd anop
|
||||||
|
lda [wsRow],y
|
||||||
|
and #$0F
|
||||||
|
wsSeedBndHave anop
|
||||||
|
cmp >wsMatchByte
|
||||||
|
beq wsSeedBndMiss
|
||||||
|
cmp >wsNewByte
|
||||||
|
beq wsSeedBndMiss
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
lda #1
|
||||||
|
sta >gFloodSeedMatch
|
||||||
|
bra wsWalkBndSeedOk
|
||||||
|
wsSeedBndMiss anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
lda #0
|
||||||
sta >gFloodSeedMatch
|
sta >gFloodSeedMatch
|
||||||
cmp #0
|
|
||||||
bne wsWalkBndSeedOk
|
|
||||||
brl wsExit
|
brl wsExit
|
||||||
wsWalkBndSeedOk anop
|
wsWalkBndSeedOk anop
|
||||||
|
|
||||||
|
* --- WALK LEFT BND (byte-cached) ---
|
||||||
lda wsX
|
lda wsX
|
||||||
sta <wsScanCurX
|
sta <wsScanCurX
|
||||||
wsLeftBndLoop anop
|
lsr a
|
||||||
|
tay
|
||||||
|
bcc wsLBndEvenEntry
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda [wsRow],y
|
||||||
|
sta <wsScanByte
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
bra wsLBndOddEntry
|
||||||
|
|
||||||
|
wsLBndEvenEntry anop
|
||||||
lda <wsScanCurX
|
lda <wsScanCurX
|
||||||
beq wsLeftBndDone
|
beq wsLeftBndDone
|
||||||
dec a
|
dey
|
||||||
jsr wsTestBnd
|
sep #$20
|
||||||
cmp #0
|
LONGA OFF
|
||||||
beq wsLeftBndDone
|
lda [wsRow],y
|
||||||
|
sta <wsScanByte
|
||||||
|
and #$0F
|
||||||
|
cmp >wsMatchByte
|
||||||
|
beq wsLBndStop
|
||||||
|
cmp >wsNewByte
|
||||||
|
beq wsLBndStop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
dec <wsScanCurX
|
dec <wsScanCurX
|
||||||
brl wsLeftBndLoop
|
bra wsLBndOddEntry2
|
||||||
|
wsLBndStop anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
bra wsLeftBndDone
|
||||||
|
wsLBndOddEntry2 anop
|
||||||
|
wsLBndOddEntry anop
|
||||||
|
lda <wsScanCurX
|
||||||
|
beq wsLeftBndDone
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda <wsScanByte
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
cmp >wsMatchByte
|
||||||
|
beq wsLBndStop2
|
||||||
|
cmp >wsNewByte
|
||||||
|
beq wsLBndStop2
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
dec <wsScanCurX
|
||||||
|
bra wsLBndEvenEntry
|
||||||
|
wsLBndStop2 anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
wsLeftBndDone anop
|
wsLeftBndDone anop
|
||||||
lda <wsScanCurX
|
lda <wsScanCurX
|
||||||
sta >gFloodLeftX
|
sta >gFloodLeftX
|
||||||
|
|
||||||
|
* --- WALK RIGHT BND (byte-cached) ---
|
||||||
lda wsX
|
lda wsX
|
||||||
sta <wsScanCurX
|
sta <wsScanCurX
|
||||||
wsRightBndLoop anop
|
lsr a
|
||||||
|
tay
|
||||||
|
bcs wsRBndOddEntry
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda [wsRow],y
|
||||||
|
sta <wsScanByte
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
bra wsRBndEvenEntry
|
||||||
|
|
||||||
|
wsRBndEvenEntry anop
|
||||||
lda <wsScanCurX
|
lda <wsScanCurX
|
||||||
cmp #319
|
cmp #319
|
||||||
bcs wsRightBndDone
|
bcs wsRightBndDone
|
||||||
inc a
|
sep #$20
|
||||||
jsr wsTestBnd
|
LONGA OFF
|
||||||
cmp #0
|
lda <wsScanByte
|
||||||
beq wsRightBndDone
|
and #$0F
|
||||||
|
cmp >wsMatchByte
|
||||||
|
beq wsRBndStop
|
||||||
|
cmp >wsNewByte
|
||||||
|
beq wsRBndStop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
inc <wsScanCurX
|
inc <wsScanCurX
|
||||||
brl wsRightBndLoop
|
bra wsRBndOddEntry2
|
||||||
|
wsRBndStop anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
bra wsRightBndDone
|
||||||
|
wsRBndOddEntry2 anop
|
||||||
|
wsRBndOddEntry anop
|
||||||
|
lda <wsScanCurX
|
||||||
|
cmp #319
|
||||||
|
bcs wsRightBndDone
|
||||||
|
iny
|
||||||
|
sep #$20
|
||||||
|
LONGA OFF
|
||||||
|
lda [wsRow],y
|
||||||
|
sta <wsScanByte
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
lsr a
|
||||||
|
cmp >wsMatchByte
|
||||||
|
beq wsRBndStop2
|
||||||
|
cmp >wsNewByte
|
||||||
|
beq wsRBndStop2
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
|
inc <wsScanCurX
|
||||||
|
bra wsRBndEvenEntry
|
||||||
|
wsRBndStop2 anop
|
||||||
|
rep #$20
|
||||||
|
LONGA ON
|
||||||
wsRightBndDone anop
|
wsRightBndDone anop
|
||||||
lda <wsScanCurX
|
lda <wsScanCurX
|
||||||
sta >gFloodRightX
|
sta >gFloodRightX
|
||||||
|
|
@ -3108,75 +3620,9 @@ wsExit anop
|
||||||
plp
|
plp
|
||||||
rtl
|
rtl
|
||||||
|
|
||||||
* wsTestEq: test pixel at column A against matchByte (eq mode).
|
* wsTestEq / wsTestBnd helpers deleted -- the seed test and walk-out
|
||||||
* In: A = column (M=16). Reads from wsRow.
|
* loops now expand the test inline (no JSR/RTS overhead per walked
|
||||||
* Out: A = 1 if pix == matchByte, 0 otherwise. M=16 on exit.
|
* pixel; saves ~12 cyc/pixel x ~3600 walked pixels in the demo).
|
||||||
* Trashes A, Y, P. Preserves X, D, B.
|
|
||||||
wsTestEq anop
|
|
||||||
lsr a ; A = byteIdx; C = column & 1
|
|
||||||
tay
|
|
||||||
bcs wsTeqOdd
|
|
||||||
sep #$20
|
|
||||||
LONGA OFF
|
|
||||||
lda [wsRow],y
|
|
||||||
lsr a
|
|
||||||
lsr a
|
|
||||||
lsr a
|
|
||||||
lsr a
|
|
||||||
bra wsTeqHave
|
|
||||||
wsTeqOdd anop
|
|
||||||
sep #$20
|
|
||||||
LONGA OFF
|
|
||||||
lda [wsRow],y
|
|
||||||
and #$0F
|
|
||||||
wsTeqHave anop
|
|
||||||
cmp >wsMatchByte
|
|
||||||
rep #$20
|
|
||||||
LONGA ON
|
|
||||||
bne wsTeqNo
|
|
||||||
lda #1
|
|
||||||
rts
|
|
||||||
wsTeqNo anop
|
|
||||||
lda #0
|
|
||||||
rts
|
|
||||||
|
|
||||||
|
|
||||||
* wsTestBnd: test pixel at column A against (matchByte, newByte) for
|
|
||||||
* boundary-mode flood: returns 1 iff pix != matchByte AND pix != newByte.
|
|
||||||
* In: A = column (M=16). Reads from wsRow.
|
|
||||||
* Out: A = 1 if qualifies, 0 otherwise. M=16 on exit.
|
|
||||||
* Trashes A, Y, P. Preserves X, D, B.
|
|
||||||
wsTestBnd anop
|
|
||||||
lsr a
|
|
||||||
tay
|
|
||||||
bcs wsTbnOdd
|
|
||||||
sep #$20
|
|
||||||
LONGA OFF
|
|
||||||
lda [wsRow],y
|
|
||||||
lsr a
|
|
||||||
lsr a
|
|
||||||
lsr a
|
|
||||||
lsr a
|
|
||||||
bra wsTbnHave
|
|
||||||
wsTbnOdd anop
|
|
||||||
sep #$20
|
|
||||||
LONGA OFF
|
|
||||||
lda [wsRow],y
|
|
||||||
and #$0F
|
|
||||||
wsTbnHave anop
|
|
||||||
cmp >wsMatchByte
|
|
||||||
beq wsTbnNo
|
|
||||||
cmp >wsNewByte
|
|
||||||
beq wsTbnNo
|
|
||||||
rep #$20
|
|
||||||
LONGA ON
|
|
||||||
lda #1
|
|
||||||
rts
|
|
||||||
wsTbnNo anop
|
|
||||||
rep #$20
|
|
||||||
LONGA ON
|
|
||||||
lda #0
|
|
||||||
rts
|
|
||||||
|
|
||||||
|
|
||||||
* wsScanAndPush: walk wsScanRow[wsLeftX..wsRightX] for run-edge
|
* wsScanAndPush: walk wsScanRow[wsLeftX..wsRightX] for run-edge
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue