65816-llvm-mos/runtime/src/crt0.s
Scott Duensing e65fedc8e1 Checkpoint
2026-05-13 15:48:34 -05:00

213 lines
6.4 KiB
ArmAsm

; crt0 — C runtime startup for the W65816 backend.
;
; Entry point invoked by the loader (or the OMF dispatcher). Sets up
; the processor mode the rest of the runtime expects, zeroes BSS,
; calls main, and halts on return.
;
; Conventions:
; - Native mode (E=0), 16-bit M and X (REP #$30) on entry to main.
; - DP=0, DBR=0 — assumed by the C runtime.
; - Linker-emitted symbols: __bss_start, __bss_end (16-bit addrs).
.text
.globl __start
__start:
; Disable IRQ first — the IIgs ROM hands a vsync IRQ on every frame,
; and its handler runs in 8-bit M/X mode, corrupting our state if
; we leave I clear. SEI is fine in either emulation or native
; mode and is always 1 byte / 2 cycles.
sei
; Native mode + 16-bit registers.
clc
xce
rep #0x30
; Disable IIgs peripheral interrupt sources at the chip level —
; SEI alone leaves the hardware lines asserted, and the IRQ trap
; in ROM keeps re-firing if the source isn't quiesced. STZ
; stores zero without going through A; in M=8 it stores 1 byte
; (matching the 8-bit registers), so no LDA #0 prelude is needed.
sep #0x20
stz 0xc041 ; INTEN = 0 (clear AN3/mouse/0.25s/VBL/mouse-IRQ enables)
stz 0xc023 ; VGCINT = 0 (clear external/1-sec/scan-line IRQ enables)
stz 0xc032 ; SCANINT clear
rep #0x20
; Top-of-stack at $0FFF. Native-mode S is 16-bit, so we don't need
; to stay in page 1. Soft-double frames can be ~170 bytes plus the
; usual call-chain overhead — at $01FF stack growth wraps into the
; direct page ($0000-$00FF) which holds our libcall scratch
; ($E0-$F4) and IMG slots ($D0-$DE), corrupting them. $0FFF gives
; ~3.5 KB of headroom and stays below the text base ($1000).
lda #0x0fff
tcs
; Enable Language Card RAM at $D000-$FFFF for read+write. This
; is 12KB (4KB at $D000-$DFFF in LC bank 1, plus 8KB at
; $E000-$FFFF common LC area). The IIgs LC area defaults to
; ROM-mapped; two reads of $C083 enable bank-1 RAM read AND
; write for the whole $D000-$FFFF range. link816 may auto-
; relocate BSS / heap into this area when text+rodata grows
; past $BFFF — without this enable, writes drop on the floor
; and reads return ROM bytes. The reads must be 8-bit (one
; byte at a time) — a 16-bit M read at $C083 would also touch
; $C084 (a different soft switch), wiping the LC enable.
sep #0x20
lda 0xc083
lda 0xc083
rep #0x20
; Persistent "current data bank" byte at DP $BE. The LDAptr/
; STAptr/STBptr inserters load this into $E2 before each [dp],Y
; deref so pointer-deref lands in the user's bank, matching where
; DBR-relative absolute stores go. Under MAME (no Loader), DBR=0
; and PBR=0 here, so $BE=0 — equivalent to the prior STZ $E2
; behavior. Under GS/OS Loader, crt0Gsos.s sets it to PBR.
;
; $BE chosen because it's outside both the libcall scratch range
; ($E0..$FF used by libgcc.s for i64 ops) and the IMG slot range
; ($C0..$DE). PHK pushes 1 byte; PLA in M=8 to pull just 1 byte.
sep #0x20
phk
pla ; A's low byte = current PBR
sta 0xbe ; persistent data bank
rep #0x20
; Zero BSS. Up to 4 segments — linker emits __bss_seg{0..3}_lo16
; / _bank / _size symbols. Segments with size=0 are skipped.
; Each segment is cleared with DBR-relative STZ abs,X after
; setting DBR to the segment's bank. Original DBR restored at
; end via PLB.
rep #0x10 ; ensure X is 16-bit
phb ; save current DBR
; ---- segment 0 ----
rep #0x20
ldx #__bss_seg0_size
beq .Lbss_seg1
sep #0x20
.byte 0xA9
.byte __bss_seg0_bank
pha
plb
rep #0x20
ldx #0
.Lbss_loop0:
cpx #__bss_seg0_size
bcs .Lbss_seg1
sep #0x20
stz __bss_seg0_lo16, x
rep #0x20
inx
bra .Lbss_loop0
.Lbss_seg1:
; ---- segment 1 ----
rep #0x20
ldx #__bss_seg1_size
beq .Lbss_seg2
sep #0x20
.byte 0xA9
.byte __bss_seg1_bank
pha
plb
rep #0x20
ldx #0
.Lbss_loop1:
cpx #__bss_seg1_size
bcs .Lbss_seg2
sep #0x20
stz __bss_seg1_lo16, x
rep #0x20
inx
bra .Lbss_loop1
.Lbss_seg2:
; ---- segment 2 ----
rep #0x20
ldx #__bss_seg2_size
beq .Lbss_seg3
sep #0x20
.byte 0xA9
.byte __bss_seg2_bank
pha
plb
rep #0x20
ldx #0
.Lbss_loop2:
cpx #__bss_seg2_size
bcs .Lbss_seg3
sep #0x20
stz __bss_seg2_lo16, x
rep #0x20
inx
bra .Lbss_loop2
.Lbss_seg3:
; ---- segment 3 ----
rep #0x20
ldx #__bss_seg3_size
beq .Lbss_done
sep #0x20
.byte 0xA9
.byte __bss_seg3_bank
pha
plb
rep #0x20
ldx #0
.Lbss_loop3:
cpx #__bss_seg3_size
bcs .Lbss_done
sep #0x20
stz __bss_seg3_lo16, x
rep #0x20
inx
bra .Lbss_loop3
.Lbss_done:
plb ; restore caller's DBR
; Run static constructors. The linker emits
; __init_array_start / __init_array_end around the .init_array
; section; under p:32:16 each entry is a 32-bit function pointer
; (low 16 bits = function offset, high 16 bits = bank, 0 for our
; single-bank programs). Walk in 4-byte stride and JSL each via
; __jsl_indir using only the low half.
rep #0x30 ; native, 16-bit M and X
ldx #__init_array_start
.Linit_loop:
cpx #__init_array_end
bcs .Linit_done
; __jsl_indir does `JMP (__indirTarget)` — reads a 16-bit ptr
; from __indirTarget and JMPs there. So __indirTarget must
; hold the function pointer itself (NOT the address of the
; init_array slot). Dereference the entry: ($E0)→A.
stx 0xe0 ; entry addr -> DP scratch
ldy #0
lda (0xe0), y ; A = mem[X] (DP-indirect-Y, opcode 0xb1)
sta __indirTarget ; __indirTarget = function pointer (lo16)
phx ; preserve X across the call
jsl __jsl_indir
plx
; Step by 4 bytes (sizeof(void*) under p:32:16).
inx
inx
inx
inx
bra .Linit_loop
.Linit_done:
; Note: the IIgs Tool Locator (JSL $E10000 dispatch) is NOT
; initialised here. We tried wiring TLStartUp into crt0 and
; MAME segfaulted in our specific test harness — the dispatcher
; appears to want some pre-setup we're missing. Programs that
; need toolbox calls should call `iigsToolboxInit()` from the
; runtime (declared in iigs/toolbox.h), which performs the
; sequence in a controlled context. time()/clock() check an
; in-process flag and return 0 if init hasn't been done.
; Call main. Standard W65816 ABI: i16 first arg in A; we pass
; nothing. After return, A holds the exit code.
jsl main
; Halt via BRK $00. MAME / debuggers catch this as a clean
; program termination.
.byte 0x00, 0x00
.size __start, . - __start