213 lines
6.4 KiB
ArmAsm
213 lines
6.4 KiB
ArmAsm
; crt0 — C runtime startup for the W65816 backend.
|
|
;
|
|
; Entry point invoked by the loader (or the OMF dispatcher). Sets up
|
|
; the processor mode the rest of the runtime expects, zeroes BSS,
|
|
; calls main, and halts on return.
|
|
;
|
|
; Conventions:
|
|
; - Native mode (E=0), 16-bit M and X (REP #$30) on entry to main.
|
|
; - DP=0, DBR=0 — assumed by the C runtime.
|
|
; - Linker-emitted symbols: __bss_start, __bss_end (16-bit addrs).
|
|
|
|
.text
|
|
|
|
.globl __start
|
|
__start:
|
|
; Disable IRQ first — the IIgs ROM hands a vsync IRQ on every frame,
|
|
; and its handler runs in 8-bit M/X mode, corrupting our state if
|
|
; we leave I clear. SEI is fine in either emulation or native
|
|
; mode and is always 1 byte / 2 cycles.
|
|
sei
|
|
; Native mode + 16-bit registers.
|
|
clc
|
|
xce
|
|
rep #0x30
|
|
; Disable IIgs peripheral interrupt sources at the chip level —
|
|
; SEI alone leaves the hardware lines asserted, and the IRQ trap
|
|
; in ROM keeps re-firing if the source isn't quiesced. STZ
|
|
; stores zero without going through A; in M=8 it stores 1 byte
|
|
; (matching the 8-bit registers), so no LDA #0 prelude is needed.
|
|
sep #0x20
|
|
stz 0xc041 ; INTEN = 0 (clear AN3/mouse/0.25s/VBL/mouse-IRQ enables)
|
|
stz 0xc023 ; VGCINT = 0 (clear external/1-sec/scan-line IRQ enables)
|
|
stz 0xc032 ; SCANINT clear
|
|
rep #0x20
|
|
|
|
; Top-of-stack at $0FFF. Native-mode S is 16-bit, so we don't need
|
|
; to stay in page 1. Soft-double frames can be ~170 bytes plus the
|
|
; usual call-chain overhead — at $01FF stack growth wraps into the
|
|
; direct page ($0000-$00FF) which holds our libcall scratch
|
|
; ($E0-$F4) and IMG slots ($D0-$DE), corrupting them. $0FFF gives
|
|
; ~3.5 KB of headroom and stays below the text base ($1000).
|
|
lda #0x0fff
|
|
tcs
|
|
|
|
; Enable Language Card RAM at $D000-$FFFF for read+write. This
|
|
; is 12KB (4KB at $D000-$DFFF in LC bank 1, plus 8KB at
|
|
; $E000-$FFFF common LC area). The IIgs LC area defaults to
|
|
; ROM-mapped; two reads of $C083 enable bank-1 RAM read AND
|
|
; write for the whole $D000-$FFFF range. link816 may auto-
|
|
; relocate BSS / heap into this area when text+rodata grows
|
|
; past $BFFF — without this enable, writes drop on the floor
|
|
; and reads return ROM bytes. The reads must be 8-bit (one
|
|
; byte at a time) — a 16-bit M read at $C083 would also touch
|
|
; $C084 (a different soft switch), wiping the LC enable.
|
|
sep #0x20
|
|
lda 0xc083
|
|
lda 0xc083
|
|
rep #0x20
|
|
|
|
; Persistent "current data bank" byte at DP $BE. The LDAptr/
|
|
; STAptr/STBptr inserters load this into $E2 before each [dp],Y
|
|
; deref so pointer-deref lands in the user's bank, matching where
|
|
; DBR-relative absolute stores go. Under MAME (no Loader), DBR=0
|
|
; and PBR=0 here, so $BE=0 — equivalent to the prior STZ $E2
|
|
; behavior. Under GS/OS Loader, crt0Gsos.s sets it to PBR.
|
|
;
|
|
; $BE chosen because it's outside both the libcall scratch range
|
|
; ($E0..$FF used by libgcc.s for i64 ops) and the IMG slot range
|
|
; ($C0..$DE). PHK pushes 1 byte; PLA in M=8 to pull just 1 byte.
|
|
sep #0x20
|
|
phk
|
|
pla ; A's low byte = current PBR
|
|
sta 0xbe ; persistent data bank
|
|
rep #0x20
|
|
|
|
; Zero BSS. Up to 4 segments — linker emits __bss_seg{0..3}_lo16
|
|
; / _bank / _size symbols. Segments with size=0 are skipped.
|
|
; Each segment is cleared with DBR-relative STZ abs,X after
|
|
; setting DBR to the segment's bank. Original DBR restored at
|
|
; end via PLB.
|
|
rep #0x10 ; ensure X is 16-bit
|
|
phb ; save current DBR
|
|
|
|
; ---- segment 0 ----
|
|
rep #0x20
|
|
ldx #__bss_seg0_size
|
|
beq .Lbss_seg1
|
|
sep #0x20
|
|
.byte 0xA9
|
|
.byte __bss_seg0_bank
|
|
pha
|
|
plb
|
|
rep #0x20
|
|
ldx #0
|
|
.Lbss_loop0:
|
|
cpx #__bss_seg0_size
|
|
bcs .Lbss_seg1
|
|
sep #0x20
|
|
stz __bss_seg0_lo16, x
|
|
rep #0x20
|
|
inx
|
|
bra .Lbss_loop0
|
|
.Lbss_seg1:
|
|
; ---- segment 1 ----
|
|
rep #0x20
|
|
ldx #__bss_seg1_size
|
|
beq .Lbss_seg2
|
|
sep #0x20
|
|
.byte 0xA9
|
|
.byte __bss_seg1_bank
|
|
pha
|
|
plb
|
|
rep #0x20
|
|
ldx #0
|
|
.Lbss_loop1:
|
|
cpx #__bss_seg1_size
|
|
bcs .Lbss_seg2
|
|
sep #0x20
|
|
stz __bss_seg1_lo16, x
|
|
rep #0x20
|
|
inx
|
|
bra .Lbss_loop1
|
|
.Lbss_seg2:
|
|
; ---- segment 2 ----
|
|
rep #0x20
|
|
ldx #__bss_seg2_size
|
|
beq .Lbss_seg3
|
|
sep #0x20
|
|
.byte 0xA9
|
|
.byte __bss_seg2_bank
|
|
pha
|
|
plb
|
|
rep #0x20
|
|
ldx #0
|
|
.Lbss_loop2:
|
|
cpx #__bss_seg2_size
|
|
bcs .Lbss_seg3
|
|
sep #0x20
|
|
stz __bss_seg2_lo16, x
|
|
rep #0x20
|
|
inx
|
|
bra .Lbss_loop2
|
|
.Lbss_seg3:
|
|
; ---- segment 3 ----
|
|
rep #0x20
|
|
ldx #__bss_seg3_size
|
|
beq .Lbss_done
|
|
sep #0x20
|
|
.byte 0xA9
|
|
.byte __bss_seg3_bank
|
|
pha
|
|
plb
|
|
rep #0x20
|
|
ldx #0
|
|
.Lbss_loop3:
|
|
cpx #__bss_seg3_size
|
|
bcs .Lbss_done
|
|
sep #0x20
|
|
stz __bss_seg3_lo16, x
|
|
rep #0x20
|
|
inx
|
|
bra .Lbss_loop3
|
|
.Lbss_done:
|
|
plb ; restore caller's DBR
|
|
|
|
; Run static constructors. The linker emits
|
|
; __init_array_start / __init_array_end around the .init_array
|
|
; section; under p:32:16 each entry is a 32-bit function pointer
|
|
; (low 16 bits = function offset, high 16 bits = bank, 0 for our
|
|
; single-bank programs). Walk in 4-byte stride and JSL each via
|
|
; __jsl_indir using only the low half.
|
|
rep #0x30 ; native, 16-bit M and X
|
|
ldx #__init_array_start
|
|
.Linit_loop:
|
|
cpx #__init_array_end
|
|
bcs .Linit_done
|
|
; __jsl_indir does `JMP (__indirTarget)` — reads a 16-bit ptr
|
|
; from __indirTarget and JMPs there. So __indirTarget must
|
|
; hold the function pointer itself (NOT the address of the
|
|
; init_array slot). Dereference the entry: ($E0)→A.
|
|
stx 0xe0 ; entry addr -> DP scratch
|
|
ldy #0
|
|
lda (0xe0), y ; A = mem[X] (DP-indirect-Y, opcode 0xb1)
|
|
sta __indirTarget ; __indirTarget = function pointer (lo16)
|
|
phx ; preserve X across the call
|
|
jsl __jsl_indir
|
|
plx
|
|
; Step by 4 bytes (sizeof(void*) under p:32:16).
|
|
inx
|
|
inx
|
|
inx
|
|
inx
|
|
bra .Linit_loop
|
|
.Linit_done:
|
|
|
|
; Note: the IIgs Tool Locator (JSL $E10000 dispatch) is NOT
|
|
; initialised here. We tried wiring TLStartUp into crt0 and
|
|
; MAME segfaulted in our specific test harness — the dispatcher
|
|
; appears to want some pre-setup we're missing. Programs that
|
|
; need toolbox calls should call `iigsToolboxInit()` from the
|
|
; runtime (declared in iigs/toolbox.h), which performs the
|
|
; sequence in a controlled context. time()/clock() check an
|
|
; in-process flag and return 0 if init hasn't been done.
|
|
|
|
; Call main. Standard W65816 ABI: i16 first arg in A; we pass
|
|
; nothing. After return, A holds the exit code.
|
|
jsl main
|
|
|
|
; Halt via BRK $00. MAME / debuggers catch this as a clean
|
|
; program termination.
|
|
.byte 0x00, 0x00
|
|
|
|
.size __start, . - __start
|