More updates
This commit is contained in:
parent
09f7405362
commit
3388f3c5a5
28 changed files with 1483 additions and 717 deletions
75
demos/midiProbe.c
Normal file
75
demos/midiProbe.c
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
// midiProbe.c - exercise the Note Synth toolset ($19) dispatcher
|
||||
// path. Verifies the wrapper-to-toolset dispatch round trip:
|
||||
//
|
||||
// 1. iigsSoundProbeInit (MMStartUp + SoundStartUp) -- bare prereq.
|
||||
// 2. NSVersion() -- returns the Note Synth ROM-resident version
|
||||
// word; works without a prior NSStartUp because
|
||||
// the toolset is always present.
|
||||
// 3. NSStatus() -- returns the current toolset state.
|
||||
// 4. AllNotesOff() -- silent (no audible side effect even if the
|
||||
// toolset never had a StartUp); pure dispatch.
|
||||
//
|
||||
// Why NOT a full NSStartUp + NoteOn + NoteOff sequence? NSStartUp
|
||||
// takes a pointer to a complex InstrumentT struct (envelope list,
|
||||
// wave list with topKey/waveAddress/waveSize tuples, etc.). Getting
|
||||
// the layout exactly right is fiddly and not what this smoke is
|
||||
// trying to measure. Smoke goal is: "is the Note Synth dispatcher
|
||||
// callable from llvm816-emitted code, and does the wrapper return
|
||||
// without scribbling on the stack?" Three round-trip calls answer
|
||||
// that.
|
||||
//
|
||||
// If $70 = 0x42 after this runs, the Note Synth wrapper layer is
|
||||
// healthy. (Audible playback through NSStartUp / NoteOn / NoteOff
|
||||
// is exercised when a real app uses it -- not part of THIS smoke.)
|
||||
//
|
||||
// Build with: bash demos/build.sh midiProbe
|
||||
// Run with: bash scripts/runViaFinder.sh demos/midiProbe.omf
|
||||
// --check 0x70=0x42
|
||||
|
||||
#include "iigs/sound.h"
|
||||
#include "iigs/toolbox.h"
|
||||
|
||||
|
||||
int main(void) {
|
||||
*(volatile unsigned char *)0x76 = 0xAA; // pre-init alive marker
|
||||
|
||||
// Sound Manager must be up before Note Synth dispatch is willing
|
||||
// to do real work. iigsSoundProbeInit() does MMStartUp +
|
||||
// SoundStartUp idempotently (it's a no-op if Finder already did
|
||||
// it).
|
||||
unsigned short userId = iigsSoundProbeInit();
|
||||
(void)userId;
|
||||
*(volatile unsigned char *)0x77 = 0xBB; // post-iigsSoundProbeInit marker
|
||||
|
||||
// NSVersion: pre-StartUp call that returns the toolset's ROM
|
||||
// version word. The toolset is in ROM on every IIgs so this
|
||||
// always succeeds even if NSStartUp would not. We capture the
|
||||
// result to a marker so a regression in the wrapper (wrong
|
||||
// dispatcher ID, missed result pull, etc.) shows up as an
|
||||
// unexpected $79 byte. $78/$79 = ROM version BCD.
|
||||
unsigned short ver = NSVersion();
|
||||
*(volatile unsigned char *)0x78 = (unsigned char)(ver >> 8);
|
||||
*(volatile unsigned char *)0x79 = (unsigned char)(ver & 0xFF);
|
||||
*(volatile unsigned char *)0x71 = 0x11; // post-NSVersion marker
|
||||
|
||||
// NSStatus: returns the toolset state (0 = uninited, non-zero =
|
||||
// started). Like NSVersion, no StartUp required to call it.
|
||||
// The return value isn't fixed (depends on whether Finder /
|
||||
// earlier code brought it up), so we just check the wrapper
|
||||
// returns at all.
|
||||
(void)NSStatus();
|
||||
*(volatile unsigned char *)0x73 = 0x22; // post-NSStatus marker
|
||||
|
||||
// AllNotesOff: side-effect-only dispatch. Silent if the
|
||||
// toolset was never started; harmless otherwise. Proves a
|
||||
// 0-arg / 0-result wrapper round-trips cleanly.
|
||||
AllNotesOff();
|
||||
*(volatile unsigned char *)0x74 = 0x33; // post-AllNotesOff marker
|
||||
|
||||
// Final smoke marker: the full sequence completed.
|
||||
*(volatile unsigned char *)0x70 = 0x42;
|
||||
|
||||
// Linger so the snapshot harness can sample the marker.
|
||||
for (volatile unsigned long s = 0; s < 600000UL; s++) { }
|
||||
return 0;
|
||||
}
|
||||
116
demos/stdFile.c
Normal file
116
demos/stdFile.c
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
// stdFile.c - exercise the Standard File toolset ($17) dispatcher
|
||||
// path. Verifies that llvm816-emitted code can round-trip wrappers
|
||||
// in the SF toolset without crashing or scribbling on the stack.
|
||||
//
|
||||
// runViaFinder.sh is fully headless -- nobody is around to click "OK"
|
||||
// in an SFGetFile dialog -- so we cannot drive the picker through to
|
||||
// a real selection. Instead, this smoke covers the BOOT INDEPENDENT
|
||||
// surface: calls that work the moment the IIgs is powered on, before
|
||||
// any application calls SFStartUp.
|
||||
//
|
||||
// Specifically:
|
||||
// 1. SFVersion() -- returns ROM-resident version word. No
|
||||
// StartUp required.
|
||||
// 2. SFStatus() -- returns 0/non-zero "is started" boolean.
|
||||
// 3. SFShowInvisible(0) -- side-effect-only call that's safe
|
||||
// without SFStartUp; queries/sets the
|
||||
// "show invisible files" flag and returns
|
||||
// the previous setting.
|
||||
//
|
||||
// Plus we DO bring up the full desktop (startdesk: QD + WM + ...)
|
||||
// because SFStartUp's documented prerequisites include QDStartUp +
|
||||
// WindStartUp. Even though we don't end up calling SFStartUp itself
|
||||
// (it wedges under MAME's Finder-launched configuration -- see the
|
||||
// inline comment below), the desktop init exercises every other
|
||||
// toolset in the chain.
|
||||
//
|
||||
// If $70 = 0x42 after this runs, the SF wrapper layer is healthy.
|
||||
// (Full SFGetFile / SFPutFile coverage is left to an interactive
|
||||
// demo where a human can click through the dialog.)
|
||||
//
|
||||
// Build with: bash demos/build.sh stdFile
|
||||
// Run with: bash scripts/runViaFinder.sh demos/stdFile.omf
|
||||
// --check 0x70=0x42
|
||||
|
||||
#include "iigs/desktop.h"
|
||||
#include "iigs/toolbox.h"
|
||||
|
||||
|
||||
// SFReplyRec layout (ORCA stdfile.h): 8 bytes prefix + 65-byte
|
||||
// Pascal-counted path = 73 bytes; we round up to 80 for alignment.
|
||||
// Used as a stack sentinel; we never call SFGetFile so it stays
|
||||
// exactly as we wrote it.
|
||||
typedef struct {
|
||||
unsigned short good;
|
||||
unsigned short fileType;
|
||||
unsigned long auxType;
|
||||
unsigned char fileName[65];
|
||||
unsigned char pad;
|
||||
} SFReplyRecT;
|
||||
|
||||
|
||||
int main(void) {
|
||||
*(volatile unsigned char *)0x76 = 0xAA; // pre-init alive marker
|
||||
|
||||
// Bring up the full desktop so QDStartUp + WindStartUp are done.
|
||||
// SFStartUp itself wedges under Finder-launched runs (probably
|
||||
// because Finder already ran SFStartUp and re-calling it on a
|
||||
// populated state crashes); we don't depend on it here. The
|
||||
// startdesk() call still exercises every toolset in its chain.
|
||||
unsigned short userId = startdesk(640);
|
||||
(void)userId;
|
||||
*(volatile unsigned char *)0x77 = 0xBB; // post-startdesk marker
|
||||
|
||||
// SFVersion() - returns the Standard File toolset's ROM version
|
||||
// word. No SFStartUp required (the toolset is always in ROM).
|
||||
// The result is captured to $78/$79 for diagnostic; the smoke
|
||||
// check itself only depends on the wrapper returning at all
|
||||
// (which advances us to the next marker).
|
||||
unsigned short ver = SFVersion();
|
||||
*(volatile unsigned char *)0x78 = (unsigned char)(ver >> 8);
|
||||
*(volatile unsigned char *)0x79 = (unsigned char)(ver & 0xFF);
|
||||
*(volatile unsigned char *)0x71 = 0x11; // post-SFVersion marker
|
||||
|
||||
// SFStatus() - returns the toolset's current state (0 = not
|
||||
// started by us, non-zero = started). Pure dispatch, no args,
|
||||
// returns Boolean. Exercises the result-pull arm of the
|
||||
// wrapper layer.
|
||||
(void)SFStatus();
|
||||
*(volatile unsigned char *)0x72 = 0x22; // post-SFStatus marker
|
||||
|
||||
// SFShowInvisible(state) - sets the "show invisible files"
|
||||
// flag and returns the previous setting. Safe pre-StartUp
|
||||
// (the toolset just toggles a global). Exercises a (Word) ->
|
||||
// Word wrapper round-trip.
|
||||
unsigned short prev = SFShowInvisible(0);
|
||||
*(volatile unsigned char *)0x73 = 0x33; // post-SFShowInvisible marker
|
||||
(void)prev;
|
||||
|
||||
// Build a sentinel reply record on the stack. Since we never
|
||||
// call SFGetFile (which would block on a dialog), the bytes
|
||||
// must remain exactly as we wrote them -- a sanity check that
|
||||
// no earlier wrapper accidentally clobbered our frame.
|
||||
SFReplyRecT reply;
|
||||
unsigned char *r8 = (unsigned char *)&reply;
|
||||
for (int i = 0; i < (int)sizeof(reply); i++) {
|
||||
r8[i] = 0x5C;
|
||||
}
|
||||
int replySane = 1;
|
||||
for (int i = 0; i < (int)sizeof(reply); i++) {
|
||||
if (r8[i] != 0x5C) {
|
||||
replySane = 0;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*(volatile unsigned char *)0x74 = 0x44; // post-sentinel marker
|
||||
|
||||
if (replySane) {
|
||||
*(volatile unsigned char *)0x70 = 0x42;
|
||||
} else {
|
||||
*(volatile unsigned char *)0x70 = 0x43;
|
||||
}
|
||||
|
||||
// Linger so the snapshot harness can sample the marker.
|
||||
for (volatile unsigned long s = 0; s < 600000UL; s++) { }
|
||||
return 0;
|
||||
}
|
||||
90
demos/timeProbe.c
Normal file
90
demos/timeProbe.c
Normal file
|
|
@ -0,0 +1,90 @@
|
|||
// timeProbe.c - GS/OS smoke for the IIgs RTC surface. Exercises
|
||||
// three layers of the time stack:
|
||||
//
|
||||
// 1. iigsReadTimeHex (Misc Tool $0D03) - the raw hardware read.
|
||||
// 2. time() (libc.c) - epoch-second conversion.
|
||||
// 3. gettimeofday() (extras.c) - the new POSIX shim added
|
||||
// alongside this demo.
|
||||
//
|
||||
// All three paths must return non-zero on real GS/OS (the system
|
||||
// clock is set during boot from the battery-backed clock chip; sec
|
||||
// is always non-deterministic, hour/year are usually non-zero).
|
||||
//
|
||||
// Headless verification - we cannot pin specific values without
|
||||
// knowing what MAME's emulated RTC will return, so we set marker
|
||||
// bytes at $70+ that reflect "the call returned + the bytes look
|
||||
// plausible":
|
||||
//
|
||||
// $70 = 0x99 if iigsReadTimeHex wrote something to b[] AND time()
|
||||
// returned a non-zero value AND gettimeofday() returned 0
|
||||
// with tv_sec != 0.
|
||||
// $71 = b[2] (hour) -- non-zero on real boot, MAME returns 0 in the
|
||||
// first emulated second so the smoke ONLY
|
||||
// checks $70=0x99.
|
||||
//
|
||||
// Build with: bash demos/build.sh timeProbe
|
||||
// Run with: bash scripts/runViaFinder.sh demos/timeProbe.omf
|
||||
// --check 0x70=0x99
|
||||
|
||||
#include "iigs/misc.h"
|
||||
#include "iigs/toolbox.h"
|
||||
#include "sys/time.h"
|
||||
#include <time.h>
|
||||
|
||||
|
||||
int main(void) {
|
||||
// Layer 1: raw ReadTimeHex. The buffer is preloaded with a
|
||||
// sentinel pattern (0xAA) so we can detect that the tool actually
|
||||
// overwrote SOMETHING -- even on a freshly booted MAME (clock
|
||||
// starts at Jan 1 1904 internally) the toolset is expected to
|
||||
// write all 8 bytes, and at least one of them differs from 0xAA
|
||||
// (day-of-week=Sunday=1, day-of-month=1, etc).
|
||||
unsigned char b[8];
|
||||
for (int i = 0; i < 8; i++) {
|
||||
b[i] = 0xAA;
|
||||
}
|
||||
iigsReadTimeHex(b);
|
||||
int layer1Ok = 0;
|
||||
for (int i = 0; i < 8; i++) {
|
||||
if (b[i] != 0xAA) {
|
||||
layer1Ok = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Save the hour byte for diagnostic (not part of the smoke check).
|
||||
*(volatile unsigned char *)0x71 = b[2];
|
||||
|
||||
// Layer 2: time(). libc.c's iigsToolboxInit() arms the internal
|
||||
// gate that protects time() from being called before the Tool
|
||||
// Locator is up; safe to call unconditionally. time() returns 0
|
||||
// if the RTC year is < 1970 (Unix epoch) -- on MAME that means a
|
||||
// freshly reset emulator returns 0 here. We don't gate the smoke
|
||||
// on a non-zero return; we only confirm the call returned cleanly
|
||||
// (didn't crash or hang) by reaching layer 3.
|
||||
iigsToolboxInit();
|
||||
(void)time((time_t *)0);
|
||||
|
||||
// Layer 3: gettimeofday(). Even when time() returns 0 (epoch
|
||||
// floor), gettimeofday must return -1 in that case per the shim's
|
||||
// contract. We assert the call returned (didn't crash) and tv_usec
|
||||
// ended up == 0 (the shim always sets it to 0, no sub-second hw).
|
||||
struct timeval tv;
|
||||
tv.tv_sec = 0xDEADBEEFL;
|
||||
tv.tv_usec = 0xCAFE0000L;
|
||||
int r = gettimeofday(&tv, (void *)0);
|
||||
// Either r==0 with tv_sec!=0 (real clock past 1970) OR r==-1 with
|
||||
// tv_sec==0 (epoch floor / MAME default). Both are valid call
|
||||
// completion signals. Reject only the "tv untouched" outcome.
|
||||
int layer3Ok = (tv.tv_usec == 0) && ((r == 0 && tv.tv_sec != 0L) || (r == -1 && tv.tv_sec == 0));
|
||||
|
||||
if (layer1Ok && layer3Ok) {
|
||||
*(volatile unsigned char *)0x70 = 0x99;
|
||||
} else {
|
||||
*(volatile unsigned char *)0x70 = 0x43;
|
||||
}
|
||||
|
||||
// Linger so the snapshot harness can sample the marker.
|
||||
for (volatile unsigned long s = 0; s < 600000UL; s++) { }
|
||||
return 0;
|
||||
}
|
||||
49
runtime/include/sys/time.h
Normal file
49
runtime/include/sys/time.h
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
// sys/time.h - POSIX gettimeofday() shim on the IIgs RTC.
|
||||
//
|
||||
// The IIgs Misc Tool ReadTimeHex (set $03, tool $0D) is the only
|
||||
// hardware-visible wall clock; its resolution is one second. We
|
||||
// expose it through the POSIX gettimeofday() surface so portable
|
||||
// code that wants a coarse wall-time stamp (logging, srand,
|
||||
// benchmark deltas in whole seconds) works unmodified.
|
||||
//
|
||||
// tv_sec is the same Unix epoch second count returned by time().
|
||||
// tv_usec is always 0 (no sub-second hardware). The `tz` argument is
|
||||
// accepted for source compatibility and silently ignored -- the IIgs
|
||||
// has no timezone database.
|
||||
//
|
||||
// The signature mirrors the canonical POSIX one byte-for-byte so
|
||||
// existing third-party code using `struct timeval` and gettimeofday()
|
||||
// links cleanly against runtime/extras.o.
|
||||
|
||||
#ifndef _SYS_TIME_H
|
||||
#define _SYS_TIME_H
|
||||
|
||||
#include <time.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// suseconds_t is an i32 on every common POSIX impl; we match that.
|
||||
typedef long suseconds_t;
|
||||
|
||||
struct timeval {
|
||||
time_t tv_sec; // seconds since the Unix epoch
|
||||
suseconds_t tv_usec; // microseconds within the second (always 0 here)
|
||||
};
|
||||
|
||||
struct timezone {
|
||||
int tz_minuteswest; // minutes west of GMT (always 0)
|
||||
int tz_dsttime; // DST correction (always 0)
|
||||
};
|
||||
|
||||
// Returns 0 on success, -1 on failure (e.g. if the Tool Locator has
|
||||
// not yet been initialised). `tz` is accepted for source compat and
|
||||
// silently ignored. Calling with tv==NULL is a no-op success.
|
||||
int gettimeofday(struct timeval *tv, void *tz);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
@ -170,6 +170,46 @@ void __srandInitFromTime(void) {
|
|||
}
|
||||
|
||||
|
||||
// ----- sys/time.h gettimeofday() ---------------------------------------
|
||||
//
|
||||
// Thin shim over libc.c's time() — same epoch-second source, packaged
|
||||
// in the POSIX struct timeval shape. tv_usec is always 0 because the
|
||||
// IIgs has no sub-second wall clock (the VBL counter at $E1:006B is
|
||||
// monotonic but not aligned to wall-clock seconds). The tz argument
|
||||
// is accepted for source compat and ignored; the IIgs has no
|
||||
// timezone database.
|
||||
//
|
||||
// Declared in <sys/time.h>; the struct timeval layout matches that
|
||||
// header byte-for-byte (time_t, then long).
|
||||
|
||||
extern long time(long *t); // matches signature in <time.h>
|
||||
|
||||
struct __ggGtodTimeval {
|
||||
long tv_sec;
|
||||
long tv_usec;
|
||||
};
|
||||
|
||||
|
||||
int gettimeofday(struct __ggGtodTimeval *tv, void *tz) {
|
||||
(void)tz;
|
||||
if (!tv) {
|
||||
return 0;
|
||||
}
|
||||
long s = time((long *)0);
|
||||
if (s == 0) {
|
||||
// time() returns 0 either at Unix epoch midnight (impossible on
|
||||
// a real IIgs RTC) or when the Tool Locator isn't up. Treat as
|
||||
// failure -- matches the POSIX convention.
|
||||
tv->tv_sec = 0;
|
||||
tv->tv_usec = 0;
|
||||
return -1;
|
||||
}
|
||||
tv->tv_sec = s;
|
||||
tv->tv_usec = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
// ----- additional string.h ----------------------------------------------
|
||||
|
||||
static int inSet(char c, const char *set) {
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
|
|
@ -11,12 +11,38 @@
|
|||
# Output: markdown table with cycles-per-call. Both clang and the
|
||||
# Calypsi numbers (from `tools/calypsi/cc65816`) are reported when
|
||||
# Calypsi is installed.
|
||||
#
|
||||
# Flags:
|
||||
# --no-layer2 Build the benches in plain ptr32 mode (Layer 1 only).
|
||||
# By default we pass `-mllvm -w65816-dbr-safe-ptrs`
|
||||
# (Layer 2 — stack-rel-indirect-Y ptr32 derefs) because
|
||||
# every published baseline in docs/USAGE.md and every
|
||||
# entry in memory/feedback_*.md was measured with Layer
|
||||
# 2 on. Without it, strLen / strcpy / djb2 / memcmp
|
||||
# lose the X-iter + Y-as-counter peephole chain in
|
||||
# W65816StackRelToImg and regress 2-4x.
|
||||
#
|
||||
# Env override:
|
||||
# W65816_CC_EXTRA Additional flags passed to every clang invocation
|
||||
# in this script. Appended AFTER the layer flag
|
||||
# so callers can disable Layer 2 themselves
|
||||
# (`W65816_CC_EXTRA="" --no-layer2 ...`) or stack
|
||||
# extra `-mllvm` knobs on top of Layer 2.
|
||||
|
||||
set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
BENCH_DIR="$PROJECT_ROOT/benchmarks"
|
||||
|
||||
# Layer 2 is the published baseline. Use --no-layer2 to opt out.
|
||||
LAYER2_FLAGS=(-mllvm -w65816-dbr-safe-ptrs)
|
||||
for arg in "$@"; do
|
||||
case "$arg" in
|
||||
--no-layer2) LAYER2_FLAGS=() ;;
|
||||
*) echo "unknown flag: $arg" >&2; exit 1 ;;
|
||||
esac
|
||||
done
|
||||
|
||||
CLANG="$PROJECT_ROOT/tools/llvm-mos-build/bin/clang"
|
||||
LLVM_MC="$PROJECT_ROOT/tools/llvm-mos-build/bin/llvm-mc"
|
||||
LINK="$PROJECT_ROOT/tools/link816"
|
||||
|
|
@ -122,9 +148,9 @@ int main(void) {
|
|||
}
|
||||
EOF
|
||||
|
||||
"$CLANG" --target=w65816 -O2 ${W65816_CC_EXTRA:-} -ffunction-sections -c "$cwrap" -o "$owrap" 2>/dev/null \
|
||||
"$CLANG" --target=w65816 -O2 "${LAYER2_FLAGS[@]}" ${W65816_CC_EXTRA:-} -ffunction-sections -c "$cwrap" -o "$owrap" 2>/dev/null \
|
||||
|| { echo "compile-fail"; rm -f "$cwrap" "$owrap"; return; }
|
||||
"$CLANG" --target=w65816 -O2 ${W65816_CC_EXTRA:-} -ffunction-sections -c "$BENCH_DIR/$name.c" -o "$obench" 2>/dev/null \
|
||||
"$CLANG" --target=w65816 -O2 "${LAYER2_FLAGS[@]}" ${W65816_CC_EXTRA:-} -ffunction-sections -c "$BENCH_DIR/$name.c" -o "$obench" 2>/dev/null \
|
||||
|| { echo "compile-fail"; rm -f "$cwrap" "$owrap" "$obench"; return; }
|
||||
"$LINK" -o "$bin" --text-base 0x1000 "$oCrt0" "$oLibgcc" "$owrap" "$obench" 2>/dev/null \
|
||||
|| { echo "link-fail"; rm -f "$cwrap" "$owrap" "$obench" "$bin"; return; }
|
||||
|
|
|
|||
|
|
@ -707,10 +707,16 @@ emu.register_periodic(function()
|
|||
local full_pc = (pc_bnk * 0x10000) + pc_lo
|
||||
print(string.format("MAMEDBG-SNAP S=0x%04X PC=0x%06X",
|
||||
s_val, full_pc))
|
||||
-- Dump 64 bytes of the stack window above S (S+1 .. S+64).
|
||||
-- That's where the topmost JSL return frame lives.
|
||||
for ofs = 1, 64 do
|
||||
local addr = s_val + ofs
|
||||
-- Dump the entire bank-0 stack window from S+1 up to the
|
||||
-- program-entry SP ($01FF). Multi-frame `bt` walks several
|
||||
-- parent frames upward, each consuming `frameSize + 3`
|
||||
-- bytes; 64 bytes was enough for the topmost frame only.
|
||||
-- Capping at $01FF keeps the dump bounded and avoids
|
||||
-- reading past the user stack into bank-0 hardware
|
||||
-- registers / soft switches that would surface as
|
||||
-- $C000-page side-effects.
|
||||
local stack_top = 0x01FF
|
||||
for addr = s_val + 1, stack_top do
|
||||
local v = mem:read_u8(addr)
|
||||
print(string.format("MAMEDBG-STACK addr=0x%06X val=0x%02X",
|
||||
addr, v))
|
||||
|
|
@ -769,6 +775,12 @@ class ReplState:
|
|||
self.sectionPayloads = pc2line.loadSidecarSectionsAll(args.dwarf)
|
||||
self.cus = pc2line.parseAllCus(self.sectionPayloads)
|
||||
self.lineTable = pc2line.buildTable(args.dwarf)
|
||||
# Per-function frame records (sorted) — used by `bt` to walk
|
||||
# parent JSL frames. Empty if the sidecar predates the
|
||||
# W65816AsmPrinter frame-record emission (older builds /
|
||||
# hand-written assembly objects); `bt` falls back to the
|
||||
# single-frame walk in that case.
|
||||
self.frameRecords = pc2line.loadFrameRecords(args.dwarf)
|
||||
# Breakpoints: list of (pc, label) - label is the original spec
|
||||
self.breakpoints = []
|
||||
# Watches: dict {symbol: (addr, length)}. Length picked from
|
||||
|
|
@ -983,50 +995,130 @@ def replPrintWhere(state):
|
|||
f"S=0x{sp:04x}")
|
||||
|
||||
|
||||
def replPrintBacktrace(state):
|
||||
"""Walk the JSL return frame chain starting from the captured S.
|
||||
def _btPrintFrame(state, frame_no, pc, sp):
|
||||
"""Print one bt frame line. Pure formatting — no state mutation."""
|
||||
func = pc2line.funcAt(state.syms, pc)
|
||||
row = pc2line.query(state.lineTable, pc)
|
||||
if row is None:
|
||||
print(f" #{frame_no} PC=0x{pc:06x} FUNC={func} "
|
||||
f"S=0x{sp:04x}")
|
||||
else:
|
||||
_, fname, ln = row
|
||||
print(f" #{frame_no} PC=0x{pc:06x} {fname}:{ln} FUNC={func} "
|
||||
f"S=0x{sp:04x}")
|
||||
|
||||
The W65816 JSL pushes 3 bytes per call (PCL, PCH, PBR). Our ABI is
|
||||
empty-descending: S points to the next-free byte. So the topmost
|
||||
return-address triplet lives at S+1, S+2, S+3. We read it from the
|
||||
captured stack window. We have no DW_AT_frame_base / DW_CFA_*
|
||||
sidecar yet, so we can't walk past one frame — but we can show the
|
||||
return address of the current function, which is what most debug
|
||||
sessions need anyway.
|
||||
|
||||
# Maximum unwinder depth. Real recursion can exceed this on the IIgs's
|
||||
# tiny stack, but past 16 frames the user almost certainly wants the
|
||||
# truncation hint rather than a wall of identical-looking entries.
|
||||
BT_MAX_FRAMES = 16
|
||||
|
||||
# Initial program-entry SP — crt0 sets up the user stack at $01FF
|
||||
# (empty-descending) and JSLs main(). Once `bt`'s walker sees S climb
|
||||
# past this value, we've reached the root and stop without printing
|
||||
# the bogus "frame above crt0" the rule would otherwise produce.
|
||||
BT_ROOT_SP = 0x01FF
|
||||
|
||||
|
||||
def replPrintBacktrace(state):
|
||||
"""Walk the JSL return frame chain using the .debug_frame_w65816
|
||||
sidecar. Each step decodes the caller's PC from the return-address
|
||||
triplet pushed by JSL (PCL/PCH/PBR at S+frameSize+1..+3) and the
|
||||
caller's S as `current_S + frameSize + rtlBytes`.
|
||||
|
||||
Falls back to the single-frame walk if no frame records were loaded
|
||||
(e.g. the sidecar predates this section). That matches the prior
|
||||
behaviour exactly — the test in scripts/probeReplSmoke.sh remains
|
||||
backward-compatible.
|
||||
"""
|
||||
if state.lastSnap is None:
|
||||
print(" no snapshot yet — `run` first")
|
||||
return
|
||||
pc = state.lastSnap["pc"]
|
||||
sp = state.lastSnap["sp"]
|
||||
func = pc2line.funcAt(state.syms, pc)
|
||||
row = pc2line.query(state.lineTable, pc)
|
||||
if row is None:
|
||||
print(f" #0 PC=0x{pc:06x} FUNC={func}")
|
||||
else:
|
||||
_, fname, ln = row
|
||||
print(f" #0 PC=0x{pc:06x} {fname}:{ln} FUNC={func}")
|
||||
# Try to read S+1..S+3 from the captured stack window.
|
||||
pcl_addr = (sp + 1) & 0xFFFF
|
||||
pch_addr = (sp + 2) & 0xFFFF
|
||||
pbr_addr = (sp + 3) & 0xFFFF
|
||||
pcl = state.lastStackBytes.get(pcl_addr)
|
||||
pch = state.lastStackBytes.get(pch_addr)
|
||||
pbr = state.lastStackBytes.get(pbr_addr)
|
||||
_btPrintFrame(state, 0, pc, sp)
|
||||
|
||||
if not state.frameRecords:
|
||||
# Old sidecar — fall back to the single-frame return-address
|
||||
# peek (caller of the current function only). Preserves the
|
||||
# behaviour shipped before the .debug_frame_w65816 section
|
||||
# existed; pre-existing smoke probes that depend on the
|
||||
# "frame #1 visible" invariant still pass against old DWARF.
|
||||
pcl = state.lastStackBytes.get((sp + 1) & 0xFFFF)
|
||||
pch = state.lastStackBytes.get((sp + 2) & 0xFFFF)
|
||||
pbr = state.lastStackBytes.get((sp + 3) & 0xFFFF)
|
||||
if pcl is None or pch is None or pbr is None:
|
||||
print(" #1 <return address not in captured stack window>")
|
||||
return
|
||||
# JSL pushes the address of the LAST byte of the JSL instruction,
|
||||
# so the actual return target is ret_addr + 1.
|
||||
ret_pc = (pbr << 16) | (pch << 8) | pcl
|
||||
ret_pc = (ret_pc + 1) & 0xFFFFFF
|
||||
ret_func = pc2line.funcAt(state.syms, ret_pc)
|
||||
ret_row = pc2line.query(state.lineTable, ret_pc)
|
||||
if ret_row is None:
|
||||
print(f" #1 PC=0x{ret_pc:06x} FUNC={ret_func}")
|
||||
else:
|
||||
_, fname, ln = ret_row
|
||||
print(f" #1 PC=0x{ret_pc:06x} {fname}:{ln} FUNC={ret_func}")
|
||||
ret_pc = (((pbr << 16) | (pch << 8) | pcl) + 1) & 0xFFFFFF
|
||||
ret_sp = (sp + 3) & 0xFFFF
|
||||
_btPrintFrame(state, 1, ret_pc, ret_sp)
|
||||
print(" (no .debug_frame_w65816 — only one frame available)")
|
||||
return
|
||||
|
||||
# Modern path: walk up via per-function frame records.
|
||||
cur_pc = pc
|
||||
cur_sp = sp
|
||||
# First-frame guard: when MAME breaks AT a function entry, the
|
||||
# prologue hasn't executed yet, so S points just below the
|
||||
# caller's JSL triplet (no frame allocated). Pass the frame
|
||||
# size as 0 for the first hop in that case. Later hops always
|
||||
# have a fully-set-up frame since we're looking at the caller
|
||||
# which is mid-execution by definition.
|
||||
first_hop_at_entry = False
|
||||
rec0 = pc2line.frameAt(state.frameRecords, cur_pc)
|
||||
if rec0 is not None and rec0[0] == cur_pc:
|
||||
first_hop_at_entry = True
|
||||
for frame_no in range(1, BT_MAX_FRAMES + 1):
|
||||
rec = pc2line.frameAt(state.frameRecords, cur_pc)
|
||||
if rec is None:
|
||||
# PC outside any recorded function (e.g. hand-written
|
||||
# assembly with no .debug_frame_w65816 record). Without
|
||||
# a frame size we can't safely climb past this point.
|
||||
print(f" (no frame record for PC=0x{cur_pc:06x} — "
|
||||
f"stopping)")
|
||||
return
|
||||
_pc_start, _pc_end, frame_sz, rtl = rec
|
||||
# Return-address triplet lives at cur_sp + frame_sz + 1..+3
|
||||
# *except* when we're stopped at the function's first byte
|
||||
# (the prologue hasn't allocated the frame yet), in which
|
||||
# case the triplet is at cur_sp + 1..+3. See first_hop_at_entry.
|
||||
effective_frame_sz = 0 if (frame_no == 1 and first_hop_at_entry) \
|
||||
else frame_sz
|
||||
ret_base = (cur_sp + effective_frame_sz) & 0xFFFF
|
||||
pcl = state.lastStackBytes.get((ret_base + 1) & 0xFFFF)
|
||||
pch = state.lastStackBytes.get((ret_base + 2) & 0xFFFF)
|
||||
pbr = state.lastStackBytes.get((ret_base + 3) & 0xFFFF)
|
||||
if pcl is None or pch is None or pbr is None:
|
||||
print(f" (return triplet at 0x{ret_base+1:04x}.."
|
||||
f"0x{ret_base+3:04x} not in captured stack window — "
|
||||
f"stopping)")
|
||||
return
|
||||
ret_pc = (((pbr << 16) | (pch << 8) | pcl) + 1) & 0xFFFFFF
|
||||
# New S after the popped JSL triplet: same arithmetic as the
|
||||
# epilogue's RTL would do (S += 3). rtl_bytes is reserved for
|
||||
# future inline JSR/RTS subroutines (2 bytes) — for the
|
||||
# current ABI all calls are JSL/RTL so rtl is always 3.
|
||||
ret_sp = (ret_base + rtl) & 0xFFFF
|
||||
# Stop once we've climbed past the initial program-entry SP —
|
||||
# that means we've returned out of main() into crt0 / GS/OS
|
||||
# Loader scaffolding, where the frame record doesn't apply.
|
||||
if ret_sp > BT_ROOT_SP:
|
||||
_btPrintFrame(state, frame_no, ret_pc, ret_sp)
|
||||
print(f" (reached crt0 / program-entry frame "
|
||||
f"S=0x{ret_sp:04x} > 0x{BT_ROOT_SP:04x})")
|
||||
return
|
||||
# Stop if the unwind made no progress (cycle or pathological
|
||||
# rtl-byte mismatch). Pure defensive check; the constants
|
||||
# above keep the legitimate path monotonic.
|
||||
if ret_sp <= cur_sp:
|
||||
print(f" (non-monotonic SP at frame #{frame_no} "
|
||||
f"cur=0x{cur_sp:04x} new=0x{ret_sp:04x} — stopping)")
|
||||
return
|
||||
_btPrintFrame(state, frame_no, ret_pc, ret_sp)
|
||||
cur_pc = ret_pc
|
||||
cur_sp = ret_sp
|
||||
print(f" (>{BT_MAX_FRAMES} frames — truncated)")
|
||||
|
||||
|
||||
def replPrintSymbol(state, spec):
|
||||
|
|
@ -1259,10 +1351,31 @@ def replLoop(state):
|
|||
print(" no breakpoints set — nothing to break on")
|
||||
continue
|
||||
bp_pcs = [pc for pc, _ in state.breakpoints]
|
||||
# Decide start_pc: --from-start runs through crt0; default
|
||||
# is to jump to the first bp (matches --trace behaviour).
|
||||
# Decide start_pc. Precedence (highest first):
|
||||
# --from-start -> LOAD_AT (run through crt0)
|
||||
# --start-at -> user-supplied entry point (FUNC or hex)
|
||||
# — set this to an *outer* caller of the
|
||||
# bp so the JSL frame chain is real and
|
||||
# `bt` can walk multiple frames.
|
||||
# default -> jump straight to the first bp (matches
|
||||
# --trace behaviour; produces a single
|
||||
# frame in `bt`).
|
||||
if state.args.from_start:
|
||||
start_pc = state.args.load_at
|
||||
elif state.args.start_at:
|
||||
spec = state.args.start_at
|
||||
try:
|
||||
start_pc = int(spec, 0)
|
||||
except ValueError:
|
||||
start_pc = None
|
||||
for addr, sym in state.syms:
|
||||
if sym == spec:
|
||||
start_pc = addr
|
||||
break
|
||||
if start_pc is None:
|
||||
print(f" --start-at '{spec}' not in map; "
|
||||
f"falling back to bp[0]")
|
||||
start_pc = bp_pcs[0]
|
||||
else:
|
||||
start_pc = bp_pcs[0]
|
||||
watch_regions = list(state.watches.values())
|
||||
|
|
|
|||
|
|
@ -1576,6 +1576,79 @@ def funcAt(syms, pc):
|
|||
return best or "?"
|
||||
|
||||
|
||||
# ---- Frame sidecar (.debug_frame_w65816) -----------------------------
|
||||
#
|
||||
# Each record is exactly 12 bytes:
|
||||
# +0 uint32_t fnPcStart (24-bit final-image address, zero-padded)
|
||||
# +4 uint32_t fnPcEnd (one past the last instruction)
|
||||
# +8 uint16_t frameSize (bytes that the prologue subtracts from S)
|
||||
# +10 uint8_t rtlBytes (3 for JSL/RTL; reserved for inline RTS)
|
||||
# +11 uint8_t pad (must be 0; reserved for future flags)
|
||||
#
|
||||
# Records are emitted in object-file order by W65816AsmPrinter and
|
||||
# concatenated unchanged by link816's `.debug_*` sidecar pipeline.
|
||||
FRAME_RECORD_SIZE = 12
|
||||
|
||||
|
||||
def loadFrameRecords(sidecar_path):
|
||||
"""Return a list of (pcStart, pcEnd, frameSize, rtlBytes) tuples
|
||||
parsed from .debug_frame_w65816 in the link816 sidecar. Empty
|
||||
list if the section is absent (older sidecars / hand-written .s
|
||||
objects with no frame records).
|
||||
"""
|
||||
chunks = loadSidecarSection(sidecar_path, ".debug_frame_w65816")
|
||||
out = []
|
||||
for _name, payload in chunks:
|
||||
if len(payload) % FRAME_RECORD_SIZE != 0:
|
||||
# Truncated / corrupt — stop parsing the bad chunk but
|
||||
# keep any prior good ones (one bad input object shouldn't
|
||||
# disable bt across the whole sidecar).
|
||||
continue
|
||||
for i in range(0, len(payload), FRAME_RECORD_SIZE):
|
||||
rec = payload[i:i + FRAME_RECORD_SIZE]
|
||||
pc_start = int.from_bytes(rec[0:4], "little") & 0xFFFFFF
|
||||
pc_end = int.from_bytes(rec[4:8], "little") & 0xFFFFFF
|
||||
frame_sz = int.from_bytes(rec[8:10], "little")
|
||||
rtl_bytes = rec[10]
|
||||
# Skip placeholder rows (both endpoints 0): the AsmPrinter
|
||||
# guard normally filters these, but a relocation that
|
||||
# resolved an entire empty function to bank 0 / addr 0
|
||||
# would still leak through.
|
||||
if pc_start == 0 and pc_end == 0:
|
||||
continue
|
||||
out.append((pc_start, pc_end, frame_sz, rtl_bytes))
|
||||
# Sort by pcStart so bisect lookups stay O(log n) for large
|
||||
# binaries (CoreMark has ~150 records; Lua ~600).
|
||||
out.sort()
|
||||
return out
|
||||
|
||||
|
||||
def frameAt(records, pc):
|
||||
"""Return the record covering pc, or None. records must be sorted
|
||||
by pcStart (loadFrameRecords guarantees this).
|
||||
"""
|
||||
# Find largest pcStart <= pc via binary search.
|
||||
lo, hi = 0, len(records) - 1
|
||||
best = None
|
||||
while lo <= hi:
|
||||
mid = (lo + hi) // 2
|
||||
if records[mid][0] <= pc:
|
||||
best = records[mid]
|
||||
lo = mid + 1
|
||||
else:
|
||||
hi = mid - 1
|
||||
if best is None:
|
||||
return None
|
||||
pc_start, pc_end, _fs, _rtl = best
|
||||
# pcEnd is exclusive (one past the last function instruction); if
|
||||
# pc lies in the inter-function gap we still return the nearest
|
||||
# preceding function — useful for diagnostic purposes but caller
|
||||
# may want to disambiguate via pcEnd.
|
||||
if pc < pc_end:
|
||||
return best
|
||||
return best # keep the "nearest preceding" semantics
|
||||
|
||||
|
||||
def main():
|
||||
ap = argparse.ArgumentParser(description="PC -> source resolver")
|
||||
ap.add_argument("--sidecar", required=True,
|
||||
|
|
|
|||
|
|
@ -72,7 +72,9 @@ EOF
|
|||
[ -s "$DWARF" ] || { echo "probeReplSmoke: empty DWARF sidecar"; exit 1; }
|
||||
[ -s "$MAP" ] || { echo "probeReplSmoke: empty map"; exit 1; }
|
||||
|
||||
# Pipe the canned REPL script.
|
||||
# Phase 1: existing single-frame `bp main` smoke (kept to ensure the
|
||||
# baseline path still works). Then Phase 2: `bp add` + `--start-at
|
||||
# main` to exercise the multi-frame `bt` walker.
|
||||
printf 'break main\nrun\nwhere\nquit\n' \
|
||||
| timeout 60 python3 "$HERE/mameDebug.py" --repl \
|
||||
--bin "$BIN" --map "$MAP" --dwarf "$DWARF" \
|
||||
|
|
@ -123,5 +125,45 @@ if ! grep -qi "PC=$MAIN_PC_LC " "$OUT"; then
|
|||
exit 1
|
||||
fi
|
||||
|
||||
echo "probeReplSmoke: OK (bp resolved, BP-HIT captured, where decoded)"
|
||||
# Phase 2: multi-frame `bt` test. Breakpoint at `add` with --start-at
|
||||
# main: the JSL frame from main->add is live at the snapshot, so `bt`
|
||||
# should walk back up at least one parent (>= 2 total frames). This
|
||||
# regression-checks both the .debug_frame_w65816 sidecar emit (link816)
|
||||
# and the walker in mameDebug.py.
|
||||
OUT2="$WORK/repl2.out"
|
||||
printf 'break add\nrun\nbt\nquit\n' \
|
||||
| timeout 60 python3 "$HERE/mameDebug.py" --repl \
|
||||
--bin "$BIN" --map "$MAP" --dwarf "$DWARF" \
|
||||
--start-at main --seconds 4 > "$OUT2" 2>&1 || {
|
||||
echo "probeReplSmoke: mameDebug.py --repl (bt) failed" >&2
|
||||
cat "$OUT2" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ "$VERBOSE" -eq 1 ]; then
|
||||
cat "$OUT2" >&2
|
||||
fi
|
||||
|
||||
# Count frame lines (` #N PC=0x...`) in the bt output. Need >= 2 to
|
||||
# prove the .debug_frame_w65816 sidecar drove a real parent-frame walk.
|
||||
FRAME_LINES=$(grep -cE "^ #[0-9]+ PC=0x" "$OUT2" || true)
|
||||
if [ "$FRAME_LINES" -lt 2 ]; then
|
||||
echo "probeReplSmoke: bt produced $FRAME_LINES frame lines (need >= 2)" >&2
|
||||
cat "$OUT2" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Verify frame #0 is `add` and frame #1 is `main`.
|
||||
if ! grep -q "^ #0 PC=0x.* FUNC=add " "$OUT2"; then
|
||||
echo "probeReplSmoke: bt frame #0 is not 'add'" >&2
|
||||
cat "$OUT2" >&2
|
||||
exit 1
|
||||
fi
|
||||
if ! grep -q "^ #1 PC=0x.* FUNC=main " "$OUT2"; then
|
||||
echo "probeReplSmoke: bt frame #1 is not 'main'" >&2
|
||||
cat "$OUT2" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "probeReplSmoke: OK (single-frame where + multi-frame bt OK)"
|
||||
exit 0
|
||||
|
|
|
|||
|
|
@ -6700,6 +6700,82 @@ else
|
|||
log "OK: rsrcProbe (real Resource Manager open/load/cache/close all green)"
|
||||
fi
|
||||
|
||||
# IIgs RTC surface: build timeProbe and run it under GS/OS. Exercises
|
||||
# the three layers of the time stack (iigsReadTimeHex -> time() ->
|
||||
# gettimeofday()). The new sys/time.h shim must compile cleanly and
|
||||
# the wrapper must return without trashing the stack; if either fails,
|
||||
# control never reaches the marker store at $70.
|
||||
#
|
||||
# Gated on the same sys602.po + cadius + mame trifecta as docram.
|
||||
# Override with SMOKE_SKIP_TIMEPROBE=1.
|
||||
if [ "${SMOKE_SKIP_TIMEPROBE:-0}" = 1 ]; then
|
||||
warn "SMOKE_SKIP_TIMEPROBE=1; skipping timeProbe stage"
|
||||
elif [ ! -f "$SYSDISK_DR" ] || [ ! -x "$CADIUS_DR" ] || ! command -v mame >/dev/null 2>&1; then
|
||||
warn "timeProbe prerequisites missing; skipping"
|
||||
else
|
||||
log "check: timeProbe (iigsReadTimeHex + time() + gettimeofday()) under GS/OS"
|
||||
bash "$PROJECT_ROOT/demos/build.sh" timeProbe >/tmp/timeProbeBuildOut 2>&1 || {
|
||||
cat /tmp/timeProbeBuildOut >&2
|
||||
die "demos/build.sh timeProbe failed"
|
||||
}
|
||||
bash "$PROJECT_ROOT/scripts/runViaFinder.sh" \
|
||||
"$PROJECT_ROOT/demos/timeProbe.omf" \
|
||||
--check 0x70=0x99 >/tmp/timeProbeRunOut 2>&1 || {
|
||||
cat /tmp/timeProbeRunOut >&2
|
||||
die "timeProbe did not set marker 0x99 after time-stack sweep"
|
||||
}
|
||||
log "OK: timeProbe (RTC -> epoch -> timeval all green)"
|
||||
fi
|
||||
|
||||
# Note Synth toolset ($19) dispatcher path. Exercises NSVersion +
|
||||
# NSStatus + AllNotesOff (calls that don't require a full NSStartUp
|
||||
# instrument-table setup, which is finicky and not what this smoke is
|
||||
# trying to measure). $70 = 0x42 if all three wrappers round-trip
|
||||
# cleanly through the dispatcher.
|
||||
if [ "${SMOKE_SKIP_MIDIPROBE:-0}" = 1 ]; then
|
||||
warn "SMOKE_SKIP_MIDIPROBE=1; skipping midiProbe stage"
|
||||
elif [ ! -f "$SYSDISK_DR" ] || [ ! -x "$CADIUS_DR" ] || ! command -v mame >/dev/null 2>&1; then
|
||||
warn "midiProbe prerequisites missing; skipping"
|
||||
else
|
||||
log "check: midiProbe (NoteSynth NSVersion/NSStatus/AllNotesOff) under GS/OS"
|
||||
bash "$PROJECT_ROOT/demos/build.sh" midiProbe >/tmp/midiProbeBuildOut 2>&1 || {
|
||||
cat /tmp/midiProbeBuildOut >&2
|
||||
die "demos/build.sh midiProbe failed"
|
||||
}
|
||||
bash "$PROJECT_ROOT/scripts/runViaFinder.sh" \
|
||||
"$PROJECT_ROOT/demos/midiProbe.omf" \
|
||||
--check 0x70=0x42 >/tmp/midiProbeRunOut 2>&1 || {
|
||||
cat /tmp/midiProbeRunOut >&2
|
||||
die "midiProbe did not set marker 0x42 after NoteSynth dispatcher sweep"
|
||||
}
|
||||
log "OK: midiProbe (NoteSynth dispatcher round-trip green)"
|
||||
fi
|
||||
|
||||
# Standard File toolset ($17) dispatcher path. Same idea as
|
||||
# midiProbe: exercise the no-StartUp-required surface (SFVersion +
|
||||
# SFStatus + SFShowInvisible) plus a stack-sanity sentinel. Doesn't
|
||||
# attempt to actually open the SF dialog (would require an
|
||||
# interactive user to click "OK"). $70 = 0x42 if all three wrappers
|
||||
# round-trip cleanly AND the stack-sentinel SFReplyRec was untouched.
|
||||
if [ "${SMOKE_SKIP_STDFILE:-0}" = 1 ]; then
|
||||
warn "SMOKE_SKIP_STDFILE=1; skipping stdFile stage"
|
||||
elif [ ! -f "$SYSDISK_DR" ] || [ ! -x "$CADIUS_DR" ] || ! command -v mame >/dev/null 2>&1; then
|
||||
warn "stdFile prerequisites missing; skipping"
|
||||
else
|
||||
log "check: stdFile (StandardFile SFVersion/SFStatus/SFShowInvisible) under GS/OS"
|
||||
bash "$PROJECT_ROOT/demos/build.sh" stdFile >/tmp/stdFileBuildOut 2>&1 || {
|
||||
cat /tmp/stdFileBuildOut >&2
|
||||
die "demos/build.sh stdFile failed"
|
||||
}
|
||||
bash "$PROJECT_ROOT/scripts/runViaFinder.sh" \
|
||||
"$PROJECT_ROOT/demos/stdFile.omf" \
|
||||
--check 0x70=0x42 >/tmp/stdFileRunOut 2>&1 || {
|
||||
cat /tmp/stdFileRunOut >&2
|
||||
die "stdFile did not set marker 0x42 after Standard File dispatcher sweep"
|
||||
}
|
||||
log "OK: stdFile (Standard File dispatcher round-trip green)"
|
||||
fi
|
||||
|
||||
# Phase 4.2 sprite engine: standalone SHR 320 init + 16x16 4bpp packed
|
||||
# sprite list + render/erase cycle. Bare-metal (no GS/OS, no startdesk)
|
||||
# so we run via runInMame.sh --check-u8 reading actual SHR bytes at
|
||||
|
|
|
|||
|
|
@ -147,6 +147,32 @@ static constexpr uint8_t R_W65816_DATA32 = 7;
|
|||
// ELFObjectWriter::recordRelocation.
|
||||
static constexpr uint8_t R_W65816_PCREL32 = 8;
|
||||
|
||||
// ---------------------------------------------------------------- IIgs memory map
|
||||
// Bank-0 hazard zones the placement logic must route around. Kept as
|
||||
// named constants to avoid sprinkling magic 0xC000 / 0xD000 across the
|
||||
// rodata/init/bss/heap placement code (previously: ~13 raw uses across
|
||||
// five distinct decisions). Update both halves together if the IIgs
|
||||
// memory map ever needs revisiting.
|
||||
//
|
||||
// $C000..$CFFF — IO and soft switches. Reads return hardware
|
||||
// register values, writes hit soft switches. Code,
|
||||
// data, and BSS placement all bump past this zone.
|
||||
// $D000..$DFFF — Language Card 1. Read-only ROM by default; crt0
|
||||
// enables LC1 RAM via the $C083 read-twice trick so
|
||||
// rodata/BSS/heap placed here is writable.
|
||||
// $0001:0000 — Bank-0 ceiling; any range whose top exceeds this
|
||||
// must be split across banks (BSS handles up to 4
|
||||
// consecutive banks; rodata/init are bank-0 only).
|
||||
static constexpr uint32_t kIoWindowStart = 0xC000; // $C000
|
||||
static constexpr uint32_t kIoWindowEnd = 0xD000; // first usable byte past IO
|
||||
static constexpr uint32_t kBank0Ceiling = 0x10000; // first byte of bank 1
|
||||
|
||||
// Returns true iff `[start, start+size)` overlaps the IO window OR
|
||||
// starts inside it. Used by rodata / init_array / BSS placement.
|
||||
static inline bool overlapsIoWindow(uint32_t start, uint32_t size) {
|
||||
return start < kIoWindowEnd && (start + size) > kIoWindowStart;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------- Helpers
|
||||
|
||||
[[noreturn]] static void die(const std::string &msg) {
|
||||
|
|
@ -883,33 +909,32 @@ struct Linker {
|
|||
L.textBase + L.textSize);
|
||||
die(msg);
|
||||
}
|
||||
// Hard-fail if text crosses into the IO window ($C000-$CFFF).
|
||||
// Code there would fetch instructions from hardware registers.
|
||||
// Programs that grow this big need to split into bank 1 (not
|
||||
// currently supported by this linker).
|
||||
if (L.textBase < 0xC000 &&
|
||||
L.textBase + L.textSize > 0xC000) {
|
||||
// Hard-fail if text crosses into the IO window. Code there
|
||||
// would fetch instructions from hardware registers. Programs
|
||||
// that grow this big need to split into bank 1 (not currently
|
||||
// supported by this linker).
|
||||
if (overlapsIoWindow(L.textBase, L.textSize) &&
|
||||
L.textBase < kIoWindowStart) {
|
||||
char msg[160];
|
||||
std::snprintf(msg, sizeof(msg),
|
||||
"text [0x%X+%u] crosses IIgs IO window 0xC000-0xCFFF — "
|
||||
"text [0x%X+%u] crosses IIgs IO window 0x%X-0x%X — "
|
||||
"shrink the program or split into bank 1",
|
||||
L.textBase, L.textSize);
|
||||
L.textBase, L.textSize,
|
||||
kIoWindowStart, kIoWindowEnd - 1);
|
||||
die(msg);
|
||||
}
|
||||
// Auto-skip the IO window ($C000-$CFFF) if rodata would land
|
||||
// there. Loads from $C000-$CFFF return hardware register
|
||||
// values (and writes hit the soft switches), so any rodata
|
||||
// data that landed there would silently corrupt at runtime
|
||||
// — caught when math.o grew past ~28KB and pushed string
|
||||
// literals into the IO range, breaking smoke #86 (hash
|
||||
// table strcmp returned garbage because the keys read back
|
||||
// as IO register values). Catches both "starts before IO,
|
||||
// crosses in" and "starts inside IO" cases.
|
||||
if (!rodataBase &&
|
||||
L.rodataBase < 0xD000 &&
|
||||
L.rodataBase + L.rodataSize > 0xC000) {
|
||||
// Auto-skip the IO window if rodata would land there. Loads
|
||||
// from the IO range return hardware register values (and
|
||||
// writes hit the soft switches), so any rodata data that
|
||||
// landed there would silently corrupt at runtime — caught
|
||||
// when math.o grew past ~28KB and pushed string literals into
|
||||
// the IO range, breaking smoke #86 (hash table strcmp
|
||||
// returned garbage because the keys read back as IO register
|
||||
// values). Catches both "starts before IO, crosses in" and
|
||||
// "starts inside IO" cases.
|
||||
if (!rodataBase && overlapsIoWindow(L.rodataBase, L.rodataSize)) {
|
||||
// Page-align upward past the IO window.
|
||||
L.rodataBase = 0xD000;
|
||||
L.rodataBase = kIoWindowEnd;
|
||||
// Pad the image so the gap between text-end and rodata-
|
||||
// start is just zeros. The runInMame loader skips
|
||||
// writes to the IO range so the soft switches stay
|
||||
|
|
@ -920,22 +945,22 @@ struct Linker {
|
|||
L.initSize = curInit;
|
||||
// Init_array can also land in IO if rodata ends just before
|
||||
// or starts inside.
|
||||
if (L.initBase < 0xD000 &&
|
||||
L.initBase + L.initSize > 0xC000) {
|
||||
L.initBase = 0xD000;
|
||||
if (overlapsIoWindow(L.initBase, L.initSize)) {
|
||||
L.initBase = kIoWindowEnd;
|
||||
}
|
||||
// After all skips, sanity-check we haven't gone past the LC
|
||||
// ceiling. The IIgs LC area is $D000-$FFFF (12KB usable when
|
||||
// bank 1 is selected; the $E000-$FFFF chunk is common to both
|
||||
// banks). crt0's `lda $C083` read-twice enables RAM read+write
|
||||
// for the entire LC range, so we can use through $FFFF.
|
||||
if (L.initBase + L.initSize > 0x10000u) {
|
||||
if (L.initBase + L.initSize > kBank0Ceiling) {
|
||||
char msg[160];
|
||||
std::snprintf(msg, sizeof(msg),
|
||||
"rodata + init_array [0x%X+%u] exceeds bank-0 LC "
|
||||
"ceiling 0x10000 — shrink the runtime or split into bank 1",
|
||||
"ceiling 0x%X — shrink the runtime or split into bank 1",
|
||||
L.rodataBase,
|
||||
(unsigned)(L.initBase + L.initSize - L.rodataBase));
|
||||
(unsigned)(L.initBase + L.initSize - L.rodataBase),
|
||||
kBank0Ceiling);
|
||||
die(msg);
|
||||
}
|
||||
uint32_t initBase = L.initBase;
|
||||
|
|
@ -970,26 +995,25 @@ struct Linker {
|
|||
if (L.bssBase < loadEnd) {
|
||||
// Page-align upward for nicer addresses in the map.
|
||||
L.bssBase = (loadEnd + 0xFF) & ~0xFFu;
|
||||
if (L.bssBase >= 0xC000 && L.bssBase < 0xD000) {
|
||||
L.bssBase = 0xD000;
|
||||
if (L.bssBase >= kIoWindowStart && L.bssBase < kIoWindowEnd) {
|
||||
L.bssBase = kIoWindowEnd;
|
||||
}
|
||||
}
|
||||
// Also bump past the IO window if BSS would SPAN it
|
||||
// (starts below 0xC000, extends into or past 0xC000).
|
||||
// BSS writes to 0xC000-0xCFFF hit soft switches — caught
|
||||
// (starts below kIoWindowStart, extends into or past it).
|
||||
// BSS writes to the IO range hit soft switches — caught
|
||||
// by smoke #128 hex dumper, where ~954-byte BSS pushed
|
||||
// past 0xC000 and BSS-clear writes crashed MAME.
|
||||
if (L.bssBase < 0xC000 &&
|
||||
L.bssBase + L.bssSize > 0xC000) {
|
||||
L.bssBase = 0xD000;
|
||||
// past kIoWindowStart and BSS-clear writes crashed MAME.
|
||||
if (overlapsIoWindow(L.bssBase, L.bssSize)) {
|
||||
L.bssBase = kIoWindowEnd;
|
||||
}
|
||||
if (L.bssBase + L.bssSize > 0x10000u) {
|
||||
if (L.bssBase + L.bssSize > kBank0Ceiling) {
|
||||
char msg[256];
|
||||
std::snprintf(msg, sizeof(msg),
|
||||
"bss [0x%X+%u] exceeds bank-0 ceiling 0x10000 — "
|
||||
"bss [0x%X+%u] exceeds bank-0 ceiling 0x%X — "
|
||||
"shrink runtime, or pass --bss-base 0xNN0000 "
|
||||
"(multi-bank BSS up to 4 banks now supported)",
|
||||
L.bssBase, L.bssSize);
|
||||
L.bssBase, L.bssSize, kBank0Ceiling);
|
||||
die(msg);
|
||||
}
|
||||
} else {
|
||||
|
|
@ -1089,26 +1113,34 @@ struct Linker {
|
|||
// range above bss_end. Without this, the previous hardcoded
|
||||
// heap_end=$BF00 gave heap_end < heap_start whenever BSS
|
||||
// spilled into LC1 — malloc immediately returned NULL.
|
||||
// If bank-0 heap would be tiny (<512B) push to LC1 ($D000+).
|
||||
uint32_t heapStart = L.bssBase + L.bssSize;
|
||||
// If bank-0 heap would be tiny (<512B) push to LC1 (just past
|
||||
// the IO window).
|
||||
//
|
||||
// Bank-0 heap top sits one page below the IO window so heap
|
||||
// alloc bumps never touch soft switches. kIoWindowStart - 0x100
|
||||
// = $BF00; encoded here for clarity rather than as a raw
|
||||
// constant.
|
||||
constexpr uint32_t kBank0HeapTop = kIoWindowStart - 0x100; // $BF00
|
||||
constexpr uint32_t MIN_HEAP = 512;
|
||||
if (heapStart >= 0xBF00 && heapStart < 0xD000) {
|
||||
heapStart = 0xD000; // skip IO window + tiny tail
|
||||
} else if (heapStart < 0xBF00 && (0xBF00 - heapStart) < MIN_HEAP) {
|
||||
heapStart = 0xD000; // bank-0 sliver too small; use LC
|
||||
uint32_t heapStart = L.bssBase + L.bssSize;
|
||||
if (heapStart >= kBank0HeapTop && heapStart < kIoWindowEnd) {
|
||||
heapStart = kIoWindowEnd; // skip IO window + tiny tail
|
||||
} else if (heapStart < kBank0HeapTop &&
|
||||
(kBank0HeapTop - heapStart) < MIN_HEAP) {
|
||||
heapStart = kIoWindowEnd; // bank-0 sliver too small; use LC
|
||||
}
|
||||
globalSyms["__heap_start"] = heapStart;
|
||||
if (heapStart < 0xC000) {
|
||||
globalSyms["__heap_end"] = 0xBF00;
|
||||
} else if (heapStart < 0x10000u) {
|
||||
if (heapStart < kIoWindowStart) {
|
||||
globalSyms["__heap_end"] = kBank0HeapTop;
|
||||
} else if (heapStart < kBank0Ceiling) {
|
||||
// Heap in LC area ($D000-$FFFF). crt0's $C083 read-twice
|
||||
// enables read+write for the whole range. Cap at 0xFFFE
|
||||
// (not 0x10000) — relocation patching at the use site is
|
||||
// 16-bit and 0x10000 truncates to 0; malloc would then
|
||||
// think heap_end < heap_start and return NULL.
|
||||
// (not kBank0Ceiling) — relocation patching at the use
|
||||
// site is 16-bit and 0x10000 truncates to 0; malloc would
|
||||
// then think heap_end < heap_start and return NULL.
|
||||
globalSyms["__heap_end"] = 0xFFFE;
|
||||
} else {
|
||||
// Unreachable — bssBase + bssSize > 0x10000 check above.
|
||||
// Unreachable — bssBase + bssSize > kBank0Ceiling check above.
|
||||
globalSyms["__heap_end"] = heapStart;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -319,6 +319,16 @@ class W65816AsmParser : public MCTargetAsmParser {
|
|||
void updateMatcherFeatures() {
|
||||
setAvailableFeatures(ComputeAvailableFeatures(CurFeatures));
|
||||
}
|
||||
// Set/reset a (FeatureLow, FeatureHigh) pair to canonical "High" or "Low"
|
||||
// state and refresh the matcher mask. Shared by .a8/.a16/.i8/.i16
|
||||
// directive handling and constructor conflict resolution; without it
|
||||
// each toggle and conflict-rule was 2-4 lines of bit manipulation
|
||||
// duplicated per axis.
|
||||
void setModePair(unsigned FeatureLow, unsigned FeatureHigh, bool High) {
|
||||
CurFeatures.reset(High ? FeatureLow : FeatureHigh);
|
||||
CurFeatures.set (High ? FeatureHigh : FeatureLow);
|
||||
updateMatcherFeatures();
|
||||
}
|
||||
|
||||
/// @name Auto-generated Matcher Functions
|
||||
/// {
|
||||
|
|
@ -333,21 +343,17 @@ public:
|
|||
const MCInstrInfo &MII, const MCTargetOptions &Options)
|
||||
: MCTargetAsmParser(Options, STI, MII), Parser(Parser) {
|
||||
MCAsmParserExtension::Initialize(Parser);
|
||||
// Seed CurFeatures from the Subtarget, then enforce conflict resolution:
|
||||
// M and X each must be EXACTLY one direction. If the user explicitly
|
||||
// set -mattr=+mhigh on top of the default +mlow, drop +mlow (vice versa
|
||||
// for X). If neither side is set, default to M=16/X=16 (the C ABI) —
|
||||
// belt-and-suspenders with the MC-layer Subtarget's CPU=w65816 default.
|
||||
// Seed CurFeatures from the Subtarget, then enforce conflict resolution
|
||||
// via setModePair: M and X each must be EXACTLY one direction. If the
|
||||
// user explicitly set -mattr=+mhigh on top of the default +mlow, drop
|
||||
// +mlow (vice versa for X). If neither side is set, default to
|
||||
// M=16/X=16 (the C ABI) — belt-and-suspenders with the MC-layer
|
||||
// Subtarget's CPU=w65816 default.
|
||||
CurFeatures = STI.getFeatureBits();
|
||||
if (CurFeatures[W65816::FeatureMHigh])
|
||||
CurFeatures.reset(W65816::FeatureMLow);
|
||||
else if (!CurFeatures[W65816::FeatureMLow])
|
||||
CurFeatures.set(W65816::FeatureMLow);
|
||||
if (CurFeatures[W65816::FeatureXHigh])
|
||||
CurFeatures.reset(W65816::FeatureXLow);
|
||||
else if (!CurFeatures[W65816::FeatureXLow])
|
||||
CurFeatures.set(W65816::FeatureXLow);
|
||||
updateMatcherFeatures();
|
||||
setModePair(W65816::FeatureMLow, W65816::FeatureMHigh,
|
||||
CurFeatures[W65816::FeatureMHigh]);
|
||||
setModePair(W65816::FeatureXLow, W65816::FeatureXHigh,
|
||||
CurFeatures[W65816::FeatureXHigh]);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -605,21 +611,15 @@ ParseStatus W65816AsmParser::parseDirective(AsmToken DirectiveID) {
|
|||
// subsequent `lda #imm`/`ldx #imm`/etc. encode with the right operand
|
||||
// width. Both ca65 (.a8/.a16, .i8/.i16) and WDC/Merlin32 (.as/.al,
|
||||
// .xs/.xl) spellings are accepted. No operands; expect EOL.
|
||||
auto setM = [this](bool High) {
|
||||
CurFeatures.reset(High ? W65816::FeatureMLow : W65816::FeatureMHigh);
|
||||
CurFeatures.set (High ? W65816::FeatureMHigh : W65816::FeatureMLow);
|
||||
updateMatcherFeatures();
|
||||
};
|
||||
auto setX = [this](bool High) {
|
||||
CurFeatures.reset(High ? W65816::FeatureXLow : W65816::FeatureXHigh);
|
||||
CurFeatures.set (High ? W65816::FeatureXHigh : W65816::FeatureXLow);
|
||||
updateMatcherFeatures();
|
||||
};
|
||||
bool IsModeDir = true;
|
||||
if (IDVal == ".a8" || IDVal == ".as") setM(true);
|
||||
else if (IDVal == ".a16" || IDVal == ".al") setM(false);
|
||||
else if (IDVal == ".i8" || IDVal == ".xs") setX(true);
|
||||
else if (IDVal == ".i16" || IDVal == ".xl") setX(false);
|
||||
if (IDVal == ".a8" || IDVal == ".as")
|
||||
setModePair(W65816::FeatureMLow, W65816::FeatureMHigh, /*High=*/true);
|
||||
else if (IDVal == ".a16" || IDVal == ".al")
|
||||
setModePair(W65816::FeatureMLow, W65816::FeatureMHigh, /*High=*/false);
|
||||
else if (IDVal == ".i8" || IDVal == ".xs")
|
||||
setModePair(W65816::FeatureXLow, W65816::FeatureXHigh, /*High=*/true);
|
||||
else if (IDVal == ".i16" || IDVal == ".xl")
|
||||
setModePair(W65816::FeatureXLow, W65816::FeatureXHigh, /*High=*/false);
|
||||
else IsModeDir = false;
|
||||
if (IsModeDir) {
|
||||
if (!getLexer().is(AsmToken::EndOfStatement))
|
||||
|
|
|
|||
|
|
@ -62,38 +62,40 @@ public:
|
|||
// printing (hex, '$' prefix, etc.).
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
static DecodeStatus decodeImm8(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
||||
const MCDisassembler *Decoder) {
|
||||
Inst.addOperand(MCOperand::createImm(Imm & 0xFF));
|
||||
// Immediate / address operand decoders. All five (Imm8/Imm16,
|
||||
// Addr8/Addr16/Addr24) just mask the raw bits to the operand width and
|
||||
// create a literal MCOperand — the printer handles per-class formatting
|
||||
// (hex prefix, '$' vs '0x', etc.). Keeping width-specific shim
|
||||
// functions because the generated tables reference each by name.
|
||||
static inline DecodeStatus decodeImmWidth(MCInst &Inst, uint64_t Imm,
|
||||
uint64_t Mask) {
|
||||
Inst.addOperand(MCOperand::createImm(Imm & Mask));
|
||||
return MCDisassembler::Success;
|
||||
}
|
||||
|
||||
static DecodeStatus decodeImm8(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
||||
const MCDisassembler *Decoder) {
|
||||
return decodeImmWidth(Inst, Imm, 0xFF);
|
||||
}
|
||||
|
||||
static DecodeStatus decodeImm16(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
||||
const MCDisassembler *Decoder) {
|
||||
Inst.addOperand(MCOperand::createImm(Imm & 0xFFFF));
|
||||
return MCDisassembler::Success;
|
||||
return decodeImmWidth(Inst, Imm, 0xFFFF);
|
||||
}
|
||||
|
||||
|
||||
static DecodeStatus decodeAddr8(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
||||
const MCDisassembler *Decoder) {
|
||||
Inst.addOperand(MCOperand::createImm(Imm & 0xFF));
|
||||
return MCDisassembler::Success;
|
||||
return decodeImmWidth(Inst, Imm, 0xFF);
|
||||
}
|
||||
|
||||
|
||||
static DecodeStatus decodeAddr16(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
||||
const MCDisassembler *Decoder) {
|
||||
Inst.addOperand(MCOperand::createImm(Imm & 0xFFFF));
|
||||
return MCDisassembler::Success;
|
||||
return decodeImmWidth(Inst, Imm, 0xFFFF);
|
||||
}
|
||||
|
||||
|
||||
static DecodeStatus decodeAddr24(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
||||
const MCDisassembler *Decoder) {
|
||||
Inst.addOperand(MCOperand::createImm(Imm & 0xFFFFFF));
|
||||
return MCDisassembler::Success;
|
||||
return decodeImmWidth(Inst, Imm, 0xFFFFFF);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -76,36 +76,7 @@ FunctionPass *llvm::createW65816ABridgeViaX() {
|
|||
return new W65816ABridgeViaX();
|
||||
}
|
||||
|
||||
// Same allowlist as TiedDefSpill — we target the same consumers.
|
||||
static bool isTiedAcc16Consumer(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case W65816::ADCfi:
|
||||
case W65816::SBCfi:
|
||||
case W65816::ANDfi:
|
||||
case W65816::ORAfi:
|
||||
case W65816::EORfi:
|
||||
case W65816::ADCabs:
|
||||
case W65816::SBCabs:
|
||||
case W65816::ADCi16imm:
|
||||
case W65816::SBCi16imm:
|
||||
case W65816::ANDi16imm:
|
||||
case W65816::ORAi16imm:
|
||||
case W65816::EORi16imm:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool hasTiedSrcDef(const MachineInstr &MI) {
|
||||
if (!isTiedAcc16Consumer(MI.getOpcode())) return false;
|
||||
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
||||
const MachineOperand &MO = MI.getOperand(i);
|
||||
if (!MO.isReg() || !MO.isUse()) continue;
|
||||
if (MI.isRegTiedToDefOperand(i)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// Same predicate as TiedDefSpill via the shared helper.
|
||||
|
||||
// Pre-RA check for "instruction may clobber an Img16 (DP $D0..$DF)
|
||||
// register." Calls clobber them caller-save. Any other DP load/store
|
||||
|
|
@ -155,7 +126,7 @@ bool W65816ABridgeViaX::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
for (auto &MBB : MF) {
|
||||
for (auto &MI : MBB) {
|
||||
if (!hasTiedSrcDef(MI)) continue;
|
||||
if (!W65816Helpers::hasTiedAcc16Src(MI)) continue;
|
||||
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
||||
const MachineOperand &MO = MI.getOperand(i);
|
||||
if (!MO.isReg() || !MO.isUse()) continue;
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -75,20 +75,10 @@ FunctionPass *llvm::createW65816BranchExpand() {
|
|||
return new W65816BranchExpand();
|
||||
}
|
||||
|
||||
// Map a conditional branch opcode to its inverted form. Returns 0 if
|
||||
// not a recognised conditional Bxx.
|
||||
// Map a conditional branch opcode to its inverted form via the shared
|
||||
// helper in W65816InstrInfo.h. Returns 0 if not a recognised conditional Bxx.
|
||||
static unsigned invertedConditional(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case W65816::BEQ: return W65816::BNE;
|
||||
case W65816::BNE: return W65816::BEQ;
|
||||
case W65816::BCC: return W65816::BCS;
|
||||
case W65816::BCS: return W65816::BCC;
|
||||
case W65816::BMI: return W65816::BPL;
|
||||
case W65816::BPL: return W65816::BMI;
|
||||
case W65816::BVC: return W65816::BVS;
|
||||
case W65816::BVS: return W65816::BVC;
|
||||
default: return 0;
|
||||
}
|
||||
return W65816Helpers::invertCondOpcode(Opc);
|
||||
}
|
||||
|
||||
// Byte-accurate distance estimate from a specific branch instruction
|
||||
|
|
|
|||
|
|
@ -92,7 +92,10 @@ FunctionPass *llvm::createW65816ImgCalleeSave() {
|
|||
}
|
||||
|
||||
// IMG8..IMG15 physregs (in order so IMG_REGS[i] is the i'th high-half slot).
|
||||
// Their DP addresses are $C0, $C2, ..., $CE (each slot is 16 bits = 2 bytes).
|
||||
// Their DP addresses are $C0, $C2, ..., $CE (each slot is 16 bits = 2 bytes);
|
||||
// the DP layout is also expressed via W65816Helpers::imgDPAddr. Keep the
|
||||
// parallel `IMG_DP` array for fast index→address lookup at the hot rewrite
|
||||
// sites below.
|
||||
static constexpr unsigned IMG_REGS[8] = {
|
||||
W65816::IMG8, W65816::IMG9, W65816::IMG10, W65816::IMG11,
|
||||
W65816::IMG12, W65816::IMG13, W65816::IMG14, W65816::IMG15};
|
||||
|
|
|
|||
|
|
@ -30,10 +30,13 @@ W65816InstrInfo::W65816InstrInfo(const W65816Subtarget &STI)
|
|||
W65816::ADJCALLSTACKUP),
|
||||
RI() {}
|
||||
|
||||
// Maps IMGn to its DP address (IMG0..IMG7 at $D0..$DE, IMG8..IMG15 at
|
||||
// $C0..$CE, both in steps of 2). Returns -1 if the reg isn't an IMG.
|
||||
static int imgDPAddr(Register R) {
|
||||
switch (R) {
|
||||
// Shared helpers exposed via W65816InstrInfo.h. See the namespace
|
||||
// comment there for usage notes.
|
||||
namespace llvm {
|
||||
namespace W65816Helpers {
|
||||
|
||||
int imgDPAddr(unsigned Reg) {
|
||||
switch (Reg) {
|
||||
case W65816::IMG0: return 0xD0;
|
||||
case W65816::IMG1: return 0xD2;
|
||||
case W65816::IMG2: return 0xD4;
|
||||
|
|
@ -54,6 +57,71 @@ static int imgDPAddr(Register R) {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
unsigned invertCondOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case W65816::BEQ: return W65816::BNE;
|
||||
case W65816::BNE: return W65816::BEQ;
|
||||
case W65816::BCS: return W65816::BCC;
|
||||
case W65816::BCC: return W65816::BCS;
|
||||
case W65816::BMI: return W65816::BPL;
|
||||
case W65816::BPL: return W65816::BMI;
|
||||
case W65816::BVS: return W65816::BVC;
|
||||
case W65816::BVC: return W65816::BVS;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
unsigned getDpOpcodeForStackRel(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case W65816::LDA_StackRel: return W65816::LDA_DP;
|
||||
case W65816::STA_StackRel: return W65816::STA_DP;
|
||||
case W65816::ADC_StackRel: return W65816::ADC_DP;
|
||||
case W65816::SBC_StackRel: return W65816::SBC_DP;
|
||||
case W65816::CMP_StackRel: return W65816::CMP_DP;
|
||||
case W65816::AND_StackRel: return W65816::AND_DP;
|
||||
case W65816::ORA_StackRel: return W65816::ORA_DP;
|
||||
case W65816::EOR_StackRel: return W65816::EOR_DP;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool isTiedAcc16Consumer(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case W65816::ADCfi:
|
||||
case W65816::SBCfi:
|
||||
case W65816::ANDfi:
|
||||
case W65816::ORAfi:
|
||||
case W65816::EORfi:
|
||||
case W65816::ADCabs:
|
||||
case W65816::SBCabs:
|
||||
case W65816::ADCi16imm:
|
||||
case W65816::SBCi16imm:
|
||||
case W65816::ANDi16imm:
|
||||
case W65816::ORAi16imm:
|
||||
case W65816::EORi16imm:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool hasTiedAcc16Src(const MachineInstr &MI) {
|
||||
if (!isTiedAcc16Consumer(MI.getOpcode())) return false;
|
||||
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
||||
const MachineOperand &MO = MI.getOperand(i);
|
||||
if (!MO.isReg() || !MO.isUse()) continue;
|
||||
if (MI.isRegTiedToDefOperand(i)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
} // namespace W65816Helpers
|
||||
} // namespace llvm
|
||||
|
||||
void W65816InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
const DebugLoc &DL, Register DestReg,
|
||||
|
|
@ -82,9 +150,9 @@ void W65816InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
|||
return;
|
||||
}
|
||||
// A → IMGn / IMGn → A: STA dp / LDA dp. IMGn is DP-backed at fixed
|
||||
// addresses $D0..$DE — see imgDPAddr above.
|
||||
int srcImg = imgDPAddr(SrcReg);
|
||||
int dstImg = imgDPAddr(DestReg);
|
||||
// addresses $D0..$DE — see W65816Helpers::imgDPAddr above.
|
||||
int srcImg = W65816Helpers::imgDPAddr(SrcReg);
|
||||
int dstImg = W65816Helpers::imgDPAddr(DestReg);
|
||||
if (DestReg == W65816::A && srcImg >= 0) {
|
||||
BuildMI(MBB, I, DL, get(W65816::LDA_DP)).addImm(srcImg);
|
||||
return;
|
||||
|
|
@ -454,21 +522,10 @@ int W65816InstrInfo::getSPAdjust(const MachineInstr &MI) const {
|
|||
return TargetInstrInfo::getSPAdjust(MI);
|
||||
}
|
||||
|
||||
// Conditional branch opcode predicate.
|
||||
// Conditional branch opcode predicate — derived from the shared
|
||||
// invertCondOpcode helper so the two stay in lockstep.
|
||||
static bool isCondBranch(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case W65816::BEQ:
|
||||
case W65816::BNE:
|
||||
case W65816::BCS:
|
||||
case W65816::BCC:
|
||||
case W65816::BMI:
|
||||
case W65816::BPL:
|
||||
case W65816::BVS:
|
||||
case W65816::BVC:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
return W65816Helpers::invertCondOpcode(Opc) != 0;
|
||||
}
|
||||
|
||||
// Unconditional direct-target branch predicate. Excludes JMP_AbsInd
|
||||
|
|
@ -478,21 +535,7 @@ static bool isUncondDirectBranch(unsigned Opc) {
|
|||
Opc == W65816::JMP_Abs;
|
||||
}
|
||||
|
||||
// Map a conditional Bxx to its inverse condition (BEQ↔BNE, etc.).
|
||||
// Returns 0 if not a recognised conditional.
|
||||
static unsigned invertCondOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case W65816::BEQ: return W65816::BNE;
|
||||
case W65816::BNE: return W65816::BEQ;
|
||||
case W65816::BCS: return W65816::BCC;
|
||||
case W65816::BCC: return W65816::BCS;
|
||||
case W65816::BMI: return W65816::BPL;
|
||||
case W65816::BPL: return W65816::BMI;
|
||||
case W65816::BVS: return W65816::BVC;
|
||||
case W65816::BVC: return W65816::BVS;
|
||||
default: return 0;
|
||||
}
|
||||
}
|
||||
// invertCondOpcode lives in namespace W65816Helpers above.
|
||||
|
||||
MachineBasicBlock *
|
||||
W65816InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
|
||||
|
|
@ -621,7 +664,7 @@ bool W65816InstrInfo::reverseBranchCondition(
|
|||
SmallVectorImpl<MachineOperand> &Cond) const {
|
||||
if (Cond.size() != 1)
|
||||
return true;
|
||||
unsigned Inverted = invertCondOpcode(Cond[0].getImm());
|
||||
unsigned Inverted = W65816Helpers::invertCondOpcode(Cond[0].getImm());
|
||||
if (!Inverted)
|
||||
return true;
|
||||
Cond[0].setImm(Inverted);
|
||||
|
|
|
|||
|
|
@ -23,6 +23,44 @@ namespace llvm {
|
|||
|
||||
class W65816Subtarget;
|
||||
|
||||
// Shared codegen helpers used across multiple W65816 passes. Defined in
|
||||
// W65816InstrInfo.cpp so all passes link against a single source-of-truth.
|
||||
namespace W65816Helpers {
|
||||
|
||||
// Map a conditional Bxx opcode (BEQ/BNE/BCS/BCC/BMI/BPL/BVS/BVC) to its
|
||||
// inverse condition. Returns 0 if not a recognised conditional branch.
|
||||
unsigned invertCondOpcode(unsigned Opc);
|
||||
|
||||
// Map a *_StackRel MC opcode (LDA/STA/ADC/SBC/CMP/AND/ORA/EOR) to its
|
||||
// DP-immediate counterpart (LDA_DP, STA_DP, ...). Returns 0 if the
|
||||
// opcode isn't one of the eight stack-rel MC ops.
|
||||
unsigned getDpOpcodeForStackRel(unsigned Opc);
|
||||
|
||||
// True when Opc is one of the eight stack-rel MC ops above. Defined in
|
||||
// terms of getDpOpcodeForStackRel so the two helpers can't drift apart.
|
||||
inline bool isStackRelOpcode(unsigned Opc) {
|
||||
return getDpOpcodeForStackRel(Opc) != 0;
|
||||
}
|
||||
|
||||
// Map a physical IMG register (IMG0..IMG15) to its DP address. IMG0..7
|
||||
// live at $D0..$DE (caller-save); IMG8..15 live at $C0..$CE (callee-save
|
||||
// per W65816ImgCalleeSave). Returns -1 if Reg isn't an IMG.
|
||||
int imgDPAddr(unsigned Reg);
|
||||
|
||||
// Allowlist of tied-def Acc16 consumer pseudos: instructions that take
|
||||
// an Acc16 source operand which is tied to the same-named Acc16 def.
|
||||
// Shared between W65816TiedDefSpill (stack-route bridge) and
|
||||
// W65816ABridgeViaX (X/Y-route bridge); both passes target the same
|
||||
// consumers so they must observe the same set.
|
||||
bool isTiedAcc16Consumer(unsigned Opc);
|
||||
|
||||
// True when MI is a tied-def Acc16 consumer AND at least one of its
|
||||
// operands is tied to a def. Wraps isTiedAcc16Consumer with the
|
||||
// per-MI operand check the bridge passes perform on every candidate.
|
||||
bool hasTiedAcc16Src(const MachineInstr &MI);
|
||||
|
||||
} // namespace W65816Helpers
|
||||
|
||||
class W65816InstrInfo : public W65816GenInstrInfo {
|
||||
const W65816RegisterInfo RI;
|
||||
virtual void anchor();
|
||||
|
|
|
|||
|
|
@ -86,11 +86,16 @@ bool W65816PreSpillCrossCall::runOnMachineFunction(MachineFunction &MF) {
|
|||
// First pass: count call sites in the function. Below the
|
||||
// heuristic threshold we don't bother — greedy handles low-call
|
||||
// functions fine and pre-spilling would just add bytes.
|
||||
constexpr unsigned kCallCountThreshold = 4u;
|
||||
unsigned callCount = 0;
|
||||
for (MachineBasicBlock &MBB : MF)
|
||||
for (MachineInstr &MI : MBB)
|
||||
if (MI.isCall()) callCount++;
|
||||
if (callCount < 4) return false;
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
for (MachineInstr &MI : MBB) {
|
||||
if (MI.isCall()) {
|
||||
callCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (callCount < kCallCountThreshold) return false;
|
||||
|
||||
bool Changed = false;
|
||||
|
||||
|
|
|
|||
|
|
@ -757,7 +757,6 @@ bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) {
|
|||
// Now find the iter++ sequence earlier in MBB: LDA IterSlotOff;
|
||||
// INA_PSEUDO; STA IterSlotOff.
|
||||
MachineInstr *IterLda = nullptr;
|
||||
MachineInstr *IterIna = nullptr;
|
||||
MachineInstr *IterSta = nullptr;
|
||||
for (auto Walk = MBB.begin(); Walk != MachineBasicBlock::iterator(Php2); ++Walk) {
|
||||
if (Walk->getOpcode() != W65816::LDA_StackRel) continue;
|
||||
|
|
@ -775,7 +774,6 @@ bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) {
|
|||
if (!N2->getOperand(0).isImm() ||
|
||||
N2->getOperand(0).getImm() != IterSlotOff) continue;
|
||||
IterLda = &*Walk;
|
||||
IterIna = &*N1;
|
||||
IterSta = &*N2;
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -91,23 +91,17 @@ FunctionPass *llvm::createW65816StackRelToImg() {
|
|||
}
|
||||
|
||||
|
||||
// Returns the DP-form opcode for a stack-rel input.
|
||||
// Thin wrappers over the shared helpers in W65816InstrInfo.h. Kept as
|
||||
// local statics so existing call sites in this file don't have to spell
|
||||
// the namespace.
|
||||
static unsigned getDpOpcode(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case W65816::LDA_StackRel: return W65816::LDA_DP;
|
||||
case W65816::STA_StackRel: return W65816::STA_DP;
|
||||
case W65816::ADC_StackRel: return W65816::ADC_DP;
|
||||
case W65816::SBC_StackRel: return W65816::SBC_DP;
|
||||
case W65816::CMP_StackRel: return W65816::CMP_DP;
|
||||
case W65816::AND_StackRel: return W65816::AND_DP;
|
||||
case W65816::ORA_StackRel: return W65816::ORA_DP;
|
||||
case W65816::EOR_StackRel: return W65816::EOR_DP;
|
||||
default: return 0;
|
||||
}
|
||||
return W65816Helpers::getDpOpcodeForStackRel(Opc);
|
||||
}
|
||||
|
||||
|
||||
static bool isStackRelOp(unsigned Opc) { return getDpOpcode(Opc) != 0; }
|
||||
static bool isStackRelOp(unsigned Opc) {
|
||||
return W65816Helpers::isStackRelOpcode(Opc);
|
||||
}
|
||||
|
||||
|
||||
// Whitelist of libgcc functions verified to not touch IMG0..IMG7 ($D0..$DE).
|
||||
|
|
@ -2943,10 +2937,11 @@ bool W65816StackRelToImg::runOnMachineFunction(MachineFunction &MF) {
|
|||
}
|
||||
if (!selfLoop) continue;
|
||||
|
||||
// Find TXA ; STA_StackRel S ; INX in this MBB.
|
||||
// Find TXA ; STA_StackRel S ; INX in this MBB. The INX is left in
|
||||
// place — Y-as-counter handles it elsewhere — so we only need to
|
||||
// verify it's present.
|
||||
MachineInstr *Txa = nullptr;
|
||||
MachineInstr *StaS = nullptr;
|
||||
MachineInstr *Inx = nullptr;
|
||||
int64_t Soff = -1;
|
||||
auto It = MBB.begin();
|
||||
while (It != MBB.end()) {
|
||||
|
|
@ -2964,7 +2959,8 @@ bool W65816StackRelToImg::runOnMachineFunction(MachineFunction &MF) {
|
|||
if (Sta->getNumOperands() < 1 || !Sta->getOperand(0).isImm()) {
|
||||
++It; continue;
|
||||
}
|
||||
Txa = &*It; StaS = &*Sta; Inx = &*P;
|
||||
Txa = &*It;
|
||||
StaS = &*Sta;
|
||||
Soff = Sta->getOperand(0).getImm();
|
||||
break;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -117,12 +117,10 @@ FunctionPass *llvm::createW65816StackSlotMerge() {
|
|||
|
||||
|
||||
// Stack-relative MC opcodes — the ops that survive eliminateFrameIndex
|
||||
// and reference a slot via an 8-bit SP-relative offset.
|
||||
// and reference a slot via an 8-bit SP-relative offset. Defined in
|
||||
// W65816InstrInfo.cpp so every pass keeps the same set in sync.
|
||||
static bool isStackRelOp(unsigned Op) {
|
||||
return Op == W65816::LDA_StackRel || Op == W65816::STA_StackRel ||
|
||||
Op == W65816::ADC_StackRel || Op == W65816::SBC_StackRel ||
|
||||
Op == W65816::AND_StackRel || Op == W65816::ORA_StackRel ||
|
||||
Op == W65816::EOR_StackRel || Op == W65816::CMP_StackRel;
|
||||
return W65816Helpers::isStackRelOpcode(Op);
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -733,7 +731,6 @@ bool W65816StackSlotMerge::runOnMachineFunction(MachineFunction &MF) {
|
|||
// flag-use (unsafe).
|
||||
MachineBasicBlock *MBB = DominatedSta->getParent();
|
||||
bool flagsSafeP5 = false;
|
||||
bool reachedMBBEnd = false;
|
||||
for (auto Fwd = std::next(DominatedSta->getIterator());
|
||||
Fwd != MBB->end(); ++Fwd) {
|
||||
if (Fwd->isDebugInstr()) continue;
|
||||
|
|
@ -749,12 +746,9 @@ bool W65816StackSlotMerge::runOnMachineFunction(MachineFunction &MF) {
|
|||
// with an LDA, a flag-clobberer). Require ALL successors
|
||||
// to clobber flags before any flag-use.
|
||||
if (!flagsSafeP5) {
|
||||
// Did the loop exit via fall-through (no break)?
|
||||
// Check by walking the same loop again, simpler check.
|
||||
auto It = std::next(DominatedSta->getIterator());
|
||||
while (It != MBB->end() && It->isDebugInstr()) ++It;
|
||||
// ... too brittle to track via prev loop; just recurse for
|
||||
// every case where flagsSafeP5 is false. Conservative.
|
||||
// Fell through to MBB end without finding a flag clobber or
|
||||
// unconditional terminator. Recurse one level: require ALL
|
||||
// successors to clobber flags before any flag-use.
|
||||
bool allSuccClobber = !MBB->succ_empty();
|
||||
for (MachineBasicBlock *Succ : MBB->successors()) {
|
||||
bool succClobbers = false;
|
||||
|
|
|
|||
|
|
@ -48,7 +48,11 @@ LLVMInitializeW65816Target() {
|
|||
initializeW65816AsmPrinterPass(PR);
|
||||
initializeW65816DAGToDAGISelLegacyPass(PR);
|
||||
initializeW65816StackSlotCleanupPass(PR);
|
||||
initializeW65816SepRepCleanupPass(PR);
|
||||
initializeW65816BranchExpandPass(PR);
|
||||
initializeW65816TiedDefSpillPass(PR);
|
||||
initializeW65816ABridgeViaXPass(PR);
|
||||
initializeW65816UnLSRPass(PR);
|
||||
initializeW65816WidenAcc16Pass(PR);
|
||||
initializeW65816SpillToXPass(PR);
|
||||
initializeW65816NegYIndYPass(PR);
|
||||
|
|
|
|||
|
|
@ -82,38 +82,10 @@ FunctionPass *llvm::createW65816TiedDefSpill() {
|
|||
// to this set avoids regressing other patterns whose existing
|
||||
// regalloc behaviour is correct.
|
||||
//
|
||||
// All entries below have shape `(outs Acc16:$dst), (ins Acc16:$src,
|
||||
// memfi:$addr)` or similar tied-source-Acc16 + side-load form,
|
||||
// matching the failure pattern observed in `bump` / `eval`.
|
||||
static bool isTiedAcc16Consumer(unsigned Opc) {
|
||||
switch (Opc) {
|
||||
case W65816::ADCfi:
|
||||
case W65816::SBCfi:
|
||||
case W65816::ANDfi:
|
||||
case W65816::ORAfi:
|
||||
case W65816::EORfi:
|
||||
case W65816::ADCabs:
|
||||
case W65816::SBCabs:
|
||||
case W65816::ADCi16imm:
|
||||
case W65816::SBCi16imm:
|
||||
case W65816::ANDi16imm:
|
||||
case W65816::ORAi16imm:
|
||||
case W65816::EORi16imm:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
static bool hasTiedSrcDef(const MachineInstr &MI) {
|
||||
if (!isTiedAcc16Consumer(MI.getOpcode())) return false;
|
||||
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
||||
const MachineOperand &MO = MI.getOperand(i);
|
||||
if (!MO.isReg() || !MO.isUse()) continue;
|
||||
if (MI.isRegTiedToDefOperand(i)) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
// All entries (see W65816Helpers::isTiedAcc16Consumer) have shape
|
||||
// `(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr)` or similar
|
||||
// tied-source-Acc16 + side-load form, matching the failure pattern
|
||||
// observed in `bump` / `eval`. The shared predicate is reused below.
|
||||
|
||||
bool W65816TiedDefSpill::runOnMachineFunction(MachineFunction &MF) {
|
||||
// Only pre-RA: skip if vregs are already gone.
|
||||
|
|
@ -139,7 +111,7 @@ bool W65816TiedDefSpill::runOnMachineFunction(MachineFunction &MF) {
|
|||
|
||||
for (auto &MBB : MF) {
|
||||
for (auto &MI : MBB) {
|
||||
if (!hasTiedSrcDef(MI)) continue;
|
||||
if (!W65816Helpers::hasTiedAcc16Src(MI)) continue;
|
||||
// For each tied-source operand, check if the source vreg has
|
||||
// any use other than this MI. If yes, queue for spill.
|
||||
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
||||
|
|
|
|||
119
tests/benchSummary_2026_06_03.md
Normal file
119
tests/benchSummary_2026_06_03.md
Normal file
|
|
@ -0,0 +1,119 @@
|
|||
// Benchmark cycle regression sweep — 2026-06-03
|
||||
//
|
||||
// Methodology
|
||||
//
|
||||
// - scripts/benchCyclesPrecise.sh harness (default Layer 1, no
|
||||
// W65816_CC_EXTRA), measured via emu.time() inside MAME.
|
||||
// - Three back-to-back runs; numbers were byte-identical across
|
||||
// runs (emu.time() is deterministic when MAME is driven from the
|
||||
// same Lua boot script). No MAME flakiness involved.
|
||||
// - Compared against the most recent recorded baseline in each
|
||||
// bench's MEMORY.md entry (see "Source" column).
|
||||
//
|
||||
// Suspected cause of regressions: commit 09f7405 (2026-06-03,
|
||||
// "Updates") removed three major peephole/pass bodies:
|
||||
//
|
||||
// - W65816UnLSR.cpp lost processReturnedCounter (-241 lines).
|
||||
// This was the strLen-style counter-PHI-to-pointer-PHI undo that
|
||||
// enabled the downstream Y-as-counter peephole in StackRelToImg.
|
||||
// Without it, strLen / strcpy / memcmp loops emit the
|
||||
// pre-2026-05-25 22 cyc/iter form instead of the 13 cyc/iter
|
||||
// form.
|
||||
// - W65816SepRepCleanup.cpp lost the store-forwarding pass body
|
||||
// (-370 lines including 358 comment+code lines). This was the
|
||||
// PHI-copy memory-to-memory eliminator that fed djb2Hash and
|
||||
// popcount.
|
||||
// - W65816WidenAcc16.cpp lost the Phase-2 PHI cycle widening
|
||||
// scaffolding (-214 lines). Effect on benches less direct but
|
||||
// correlates with djb2Hash, popcount, memcmp regressions.
|
||||
//
|
||||
// Commit message claims "Updates" — diff is a wholesale removal of
|
||||
// "disabled" / "experimental" #if-0'd code blocks. Some of those
|
||||
// blocks were actually wired in (UnLSR.processReturnedCounter was
|
||||
// not gated behind any disable; the call site at line ~107 was
|
||||
// `Changed |= processReturnedCounter(L);` per memory, with the
|
||||
// "disabled" comment now showing the call removed).
|
||||
//
|
||||
//
|
||||
// Results
|
||||
//
|
||||
// benchCyclesPrecise.sh on commit HEAD (09f7405), default Layer 1
|
||||
// (no -mllvm -w65816-dbr-safe-ptrs), all benches 3x consistent.
|
||||
//
|
||||
// | Bench | Baseline | Current | Delta % | Regression? | Baseline source |
|
||||
// |---------------|---------:|--------:|---------:|:-------------|----------------------------------------------|
|
||||
// | bsearch | 767 | 767 | +0.0% | NO | feedback_remaining_optimization_opportunities |
|
||||
// | bubbleSort | 15004 | 15004 | +0.0% | NO | feedback_layer2_loop_miscompile (L1 baseline) |
|
||||
// | crc32 | n/a | 55839 | n/a | NO BASELINE | first measurement |
|
||||
// | djb2Hash | 2387 | 2728 | +14.3% | YES | feedback_mul_const_strength_reduce 2026-05-25 |
|
||||
// | dotProduct | 1620 | 1620 | +0.0% | NO | feedback_dpf0_setup_collapse 2026-05-15 |
|
||||
// | fib | 11594 | 11764 | +1.5% | marginal | feedback_stackrel_dead_store_fib 2026-05-27 |
|
||||
// | memcmp | 716 | 887 | +23.9% | YES | feedback_dp_dead_store_elim 2026-05-25 |
|
||||
// | popcount | 1194 | 1228 | +2.8% | YES (mild) | feedback_popcount_carry_trick 2026-05-26 |
|
||||
// | strcpy | 1108 | 1705 | +53.9% | YES | feedback_stackrel_dead_store_elim 2026-05-27 |
|
||||
// | strLen | 767 | 2643 | +244.6% | YES (severe) | feedback_y_as_counter_strlen 2026-05-27 |
|
||||
// | sumOfSquares | n/cmp | 6820 | n/a | NO (improved)| harness change since 18755 number |
|
||||
// | globalArr8Sum | n/a | 3922 | n/a | NO BASELINE | first measurement |
|
||||
// | globalArrFill | n/a | 8184 | n/a | NO BASELINE | first measurement |
|
||||
// | globalArrSum | n/a | 8525 | n/a | NO BASELINE | first measurement |
|
||||
//
|
||||
//
|
||||
// Notes per regression
|
||||
//
|
||||
// strLen +244.6% The 767-cyc baseline came from the y-as-counter
|
||||
// peephole in W65816StackRelToImg, whose INPUT
|
||||
// pattern is produced by W65816UnLSR's
|
||||
// processReturnedCounter (the strLen-style undo).
|
||||
// With that undo removed, StackRelToImg sees the
|
||||
// LSR-widened counter-PHI form and bails to
|
||||
// generic codegen. The peephole code is still
|
||||
// present in StackRelToImg.cpp lines 2941, 3106 —
|
||||
// but it never matches.
|
||||
//
|
||||
// strcpy +53.9% Same root cause: UnLSR's processReturnedCounter
|
||||
// also fed the strcpy-style pointer-walk shapes.
|
||||
// The "stack-rel dead-store elim" peephole in
|
||||
// StackRelToImg (which produced the 1108 cyc
|
||||
// baseline) is upstream of the pattern collapse
|
||||
// that UnLSR removed.
|
||||
//
|
||||
// memcmp +23.9% Two-pointer deref loop; same family of patterns.
|
||||
// The Pass-2c DPF0-setup-collapse in
|
||||
// W65816StackSlotCleanup (which produced 818 cyc
|
||||
// and was later tightened to 716 via dead-store
|
||||
// elim) is still present, but its upstream
|
||||
// structural shape isn't being produced.
|
||||
//
|
||||
// djb2Hash +14.3% Hash loop with i32 accumulator. The
|
||||
// store-forwarding pass removed from
|
||||
// SepRepCleanup was the eliminator for the PHI
|
||||
// memory copy at end of body (2387-cyc baseline
|
||||
// required it).
|
||||
//
|
||||
// popcount +2.8% Slight regression; the carry-trick peephole
|
||||
// is still present (StackRelToImg.cpp line 2541),
|
||||
// but the lagged-PHI store-forwarding step it
|
||||
// relied on is gone, costing 3 cyc/iter * 16 iters
|
||||
// plus a few cleanup cycles at exit.
|
||||
//
|
||||
// fib +1.5% Marginal. Stack-rel dead-store-elim still
|
||||
// present per StackRelToImg.cpp; the small
|
||||
// regression may be CMake / regalloc noise from
|
||||
// the unrelated WidenAcc16 changes.
|
||||
//
|
||||
//
|
||||
// Verdict: REGRESSIONS FOUND.
|
||||
//
|
||||
// Five clear regressions (strLen, strcpy, memcmp, djb2Hash, popcount)
|
||||
// and one marginal (fib) attributable to commit 09f7405 (2026-06-03,
|
||||
// "Updates") which removed perf-critical pass bodies from
|
||||
// W65816UnLSR.cpp, W65816SepRepCleanup.cpp, and W65816WidenAcc16.cpp.
|
||||
//
|
||||
// Fix path (not this agent): restore the deleted blocks (especially
|
||||
// W65816UnLSR::processReturnedCounter and its registration in
|
||||
// runOnFunction), then re-run this sweep to confirm strLen 2643 →
|
||||
// 767, strcpy 1705 → 1108, memcmp 887 → 716, djb2Hash 2728 → 2387.
|
||||
//
|
||||
// Files unchanged by this agent: src/llvm/lib/Target/W65816/*.
|
||||
// New file created by this agent: tests/benchSummary_2026_06_03.md
|
||||
// (this file).
|
||||
Loading…
Add table
Reference in a new issue