More updates
This commit is contained in:
parent
09f7405362
commit
3388f3c5a5
28 changed files with 1483 additions and 717 deletions
75
demos/midiProbe.c
Normal file
75
demos/midiProbe.c
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
// midiProbe.c - exercise the Note Synth toolset ($19) dispatcher
|
||||||
|
// path. Verifies the wrapper-to-toolset dispatch round trip:
|
||||||
|
//
|
||||||
|
// 1. iigsSoundProbeInit (MMStartUp + SoundStartUp) -- bare prereq.
|
||||||
|
// 2. NSVersion() -- returns the Note Synth ROM-resident version
|
||||||
|
// word; works without a prior NSStartUp because
|
||||||
|
// the toolset is always present.
|
||||||
|
// 3. NSStatus() -- returns the current toolset state.
|
||||||
|
// 4. AllNotesOff() -- silent (no audible side effect even if the
|
||||||
|
// toolset never had a StartUp); pure dispatch.
|
||||||
|
//
|
||||||
|
// Why NOT a full NSStartUp + NoteOn + NoteOff sequence? NSStartUp
|
||||||
|
// takes a pointer to a complex InstrumentT struct (envelope list,
|
||||||
|
// wave list with topKey/waveAddress/waveSize tuples, etc.). Getting
|
||||||
|
// the layout exactly right is fiddly and not what this smoke is
|
||||||
|
// trying to measure. Smoke goal is: "is the Note Synth dispatcher
|
||||||
|
// callable from llvm816-emitted code, and does the wrapper return
|
||||||
|
// without scribbling on the stack?" Three round-trip calls answer
|
||||||
|
// that.
|
||||||
|
//
|
||||||
|
// If $70 = 0x42 after this runs, the Note Synth wrapper layer is
|
||||||
|
// healthy. (Audible playback through NSStartUp / NoteOn / NoteOff
|
||||||
|
// is exercised when a real app uses it -- not part of THIS smoke.)
|
||||||
|
//
|
||||||
|
// Build with: bash demos/build.sh midiProbe
|
||||||
|
// Run with: bash scripts/runViaFinder.sh demos/midiProbe.omf
|
||||||
|
// --check 0x70=0x42
|
||||||
|
|
||||||
|
#include "iigs/sound.h"
|
||||||
|
#include "iigs/toolbox.h"
|
||||||
|
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
*(volatile unsigned char *)0x76 = 0xAA; // pre-init alive marker
|
||||||
|
|
||||||
|
// Sound Manager must be up before Note Synth dispatch is willing
|
||||||
|
// to do real work. iigsSoundProbeInit() does MMStartUp +
|
||||||
|
// SoundStartUp idempotently (it's a no-op if Finder already did
|
||||||
|
// it).
|
||||||
|
unsigned short userId = iigsSoundProbeInit();
|
||||||
|
(void)userId;
|
||||||
|
*(volatile unsigned char *)0x77 = 0xBB; // post-iigsSoundProbeInit marker
|
||||||
|
|
||||||
|
// NSVersion: pre-StartUp call that returns the toolset's ROM
|
||||||
|
// version word. The toolset is in ROM on every IIgs so this
|
||||||
|
// always succeeds even if NSStartUp would not. We capture the
|
||||||
|
// result to a marker so a regression in the wrapper (wrong
|
||||||
|
// dispatcher ID, missed result pull, etc.) shows up as an
|
||||||
|
// unexpected $79 byte. $78/$79 = ROM version BCD.
|
||||||
|
unsigned short ver = NSVersion();
|
||||||
|
*(volatile unsigned char *)0x78 = (unsigned char)(ver >> 8);
|
||||||
|
*(volatile unsigned char *)0x79 = (unsigned char)(ver & 0xFF);
|
||||||
|
*(volatile unsigned char *)0x71 = 0x11; // post-NSVersion marker
|
||||||
|
|
||||||
|
// NSStatus: returns the toolset state (0 = uninited, non-zero =
|
||||||
|
// started). Like NSVersion, no StartUp required to call it.
|
||||||
|
// The return value isn't fixed (depends on whether Finder /
|
||||||
|
// earlier code brought it up), so we just check the wrapper
|
||||||
|
// returns at all.
|
||||||
|
(void)NSStatus();
|
||||||
|
*(volatile unsigned char *)0x73 = 0x22; // post-NSStatus marker
|
||||||
|
|
||||||
|
// AllNotesOff: side-effect-only dispatch. Silent if the
|
||||||
|
// toolset was never started; harmless otherwise. Proves a
|
||||||
|
// 0-arg / 0-result wrapper round-trips cleanly.
|
||||||
|
AllNotesOff();
|
||||||
|
*(volatile unsigned char *)0x74 = 0x33; // post-AllNotesOff marker
|
||||||
|
|
||||||
|
// Final smoke marker: the full sequence completed.
|
||||||
|
*(volatile unsigned char *)0x70 = 0x42;
|
||||||
|
|
||||||
|
// Linger so the snapshot harness can sample the marker.
|
||||||
|
for (volatile unsigned long s = 0; s < 600000UL; s++) { }
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
116
demos/stdFile.c
Normal file
116
demos/stdFile.c
Normal file
|
|
@ -0,0 +1,116 @@
|
||||||
|
// stdFile.c - exercise the Standard File toolset ($17) dispatcher
|
||||||
|
// path. Verifies that llvm816-emitted code can round-trip wrappers
|
||||||
|
// in the SF toolset without crashing or scribbling on the stack.
|
||||||
|
//
|
||||||
|
// runViaFinder.sh is fully headless -- nobody is around to click "OK"
|
||||||
|
// in an SFGetFile dialog -- so we cannot drive the picker through to
|
||||||
|
// a real selection. Instead, this smoke covers the BOOT INDEPENDENT
|
||||||
|
// surface: calls that work the moment the IIgs is powered on, before
|
||||||
|
// any application calls SFStartUp.
|
||||||
|
//
|
||||||
|
// Specifically:
|
||||||
|
// 1. SFVersion() -- returns ROM-resident version word. No
|
||||||
|
// StartUp required.
|
||||||
|
// 2. SFStatus() -- returns 0/non-zero "is started" boolean.
|
||||||
|
// 3. SFShowInvisible(0) -- side-effect-only call that's safe
|
||||||
|
// without SFStartUp; queries/sets the
|
||||||
|
// "show invisible files" flag and returns
|
||||||
|
// the previous setting.
|
||||||
|
//
|
||||||
|
// Plus we DO bring up the full desktop (startdesk: QD + WM + ...)
|
||||||
|
// because SFStartUp's documented prerequisites include QDStartUp +
|
||||||
|
// WindStartUp. Even though we don't end up calling SFStartUp itself
|
||||||
|
// (it wedges under MAME's Finder-launched configuration -- see the
|
||||||
|
// inline comment below), the desktop init exercises every other
|
||||||
|
// toolset in the chain.
|
||||||
|
//
|
||||||
|
// If $70 = 0x42 after this runs, the SF wrapper layer is healthy.
|
||||||
|
// (Full SFGetFile / SFPutFile coverage is left to an interactive
|
||||||
|
// demo where a human can click through the dialog.)
|
||||||
|
//
|
||||||
|
// Build with: bash demos/build.sh stdFile
|
||||||
|
// Run with: bash scripts/runViaFinder.sh demos/stdFile.omf
|
||||||
|
// --check 0x70=0x42
|
||||||
|
|
||||||
|
#include "iigs/desktop.h"
|
||||||
|
#include "iigs/toolbox.h"
|
||||||
|
|
||||||
|
|
||||||
|
// SFReplyRec layout (ORCA stdfile.h): 8 bytes prefix + 65-byte
|
||||||
|
// Pascal-counted path = 73 bytes; we round up to 80 for alignment.
|
||||||
|
// Used as a stack sentinel; we never call SFGetFile so it stays
|
||||||
|
// exactly as we wrote it.
|
||||||
|
typedef struct {
|
||||||
|
unsigned short good;
|
||||||
|
unsigned short fileType;
|
||||||
|
unsigned long auxType;
|
||||||
|
unsigned char fileName[65];
|
||||||
|
unsigned char pad;
|
||||||
|
} SFReplyRecT;
|
||||||
|
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
*(volatile unsigned char *)0x76 = 0xAA; // pre-init alive marker
|
||||||
|
|
||||||
|
// Bring up the full desktop so QDStartUp + WindStartUp are done.
|
||||||
|
// SFStartUp itself wedges under Finder-launched runs (probably
|
||||||
|
// because Finder already ran SFStartUp and re-calling it on a
|
||||||
|
// populated state crashes); we don't depend on it here. The
|
||||||
|
// startdesk() call still exercises every toolset in its chain.
|
||||||
|
unsigned short userId = startdesk(640);
|
||||||
|
(void)userId;
|
||||||
|
*(volatile unsigned char *)0x77 = 0xBB; // post-startdesk marker
|
||||||
|
|
||||||
|
// SFVersion() - returns the Standard File toolset's ROM version
|
||||||
|
// word. No SFStartUp required (the toolset is always in ROM).
|
||||||
|
// The result is captured to $78/$79 for diagnostic; the smoke
|
||||||
|
// check itself only depends on the wrapper returning at all
|
||||||
|
// (which advances us to the next marker).
|
||||||
|
unsigned short ver = SFVersion();
|
||||||
|
*(volatile unsigned char *)0x78 = (unsigned char)(ver >> 8);
|
||||||
|
*(volatile unsigned char *)0x79 = (unsigned char)(ver & 0xFF);
|
||||||
|
*(volatile unsigned char *)0x71 = 0x11; // post-SFVersion marker
|
||||||
|
|
||||||
|
// SFStatus() - returns the toolset's current state (0 = not
|
||||||
|
// started by us, non-zero = started). Pure dispatch, no args,
|
||||||
|
// returns Boolean. Exercises the result-pull arm of the
|
||||||
|
// wrapper layer.
|
||||||
|
(void)SFStatus();
|
||||||
|
*(volatile unsigned char *)0x72 = 0x22; // post-SFStatus marker
|
||||||
|
|
||||||
|
// SFShowInvisible(state) - sets the "show invisible files"
|
||||||
|
// flag and returns the previous setting. Safe pre-StartUp
|
||||||
|
// (the toolset just toggles a global). Exercises a (Word) ->
|
||||||
|
// Word wrapper round-trip.
|
||||||
|
unsigned short prev = SFShowInvisible(0);
|
||||||
|
*(volatile unsigned char *)0x73 = 0x33; // post-SFShowInvisible marker
|
||||||
|
(void)prev;
|
||||||
|
|
||||||
|
// Build a sentinel reply record on the stack. Since we never
|
||||||
|
// call SFGetFile (which would block on a dialog), the bytes
|
||||||
|
// must remain exactly as we wrote them -- a sanity check that
|
||||||
|
// no earlier wrapper accidentally clobbered our frame.
|
||||||
|
SFReplyRecT reply;
|
||||||
|
unsigned char *r8 = (unsigned char *)&reply;
|
||||||
|
for (int i = 0; i < (int)sizeof(reply); i++) {
|
||||||
|
r8[i] = 0x5C;
|
||||||
|
}
|
||||||
|
int replySane = 1;
|
||||||
|
for (int i = 0; i < (int)sizeof(reply); i++) {
|
||||||
|
if (r8[i] != 0x5C) {
|
||||||
|
replySane = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*(volatile unsigned char *)0x74 = 0x44; // post-sentinel marker
|
||||||
|
|
||||||
|
if (replySane) {
|
||||||
|
*(volatile unsigned char *)0x70 = 0x42;
|
||||||
|
} else {
|
||||||
|
*(volatile unsigned char *)0x70 = 0x43;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Linger so the snapshot harness can sample the marker.
|
||||||
|
for (volatile unsigned long s = 0; s < 600000UL; s++) { }
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
90
demos/timeProbe.c
Normal file
90
demos/timeProbe.c
Normal file
|
|
@ -0,0 +1,90 @@
|
||||||
|
// timeProbe.c - GS/OS smoke for the IIgs RTC surface. Exercises
|
||||||
|
// three layers of the time stack:
|
||||||
|
//
|
||||||
|
// 1. iigsReadTimeHex (Misc Tool $0D03) - the raw hardware read.
|
||||||
|
// 2. time() (libc.c) - epoch-second conversion.
|
||||||
|
// 3. gettimeofday() (extras.c) - the new POSIX shim added
|
||||||
|
// alongside this demo.
|
||||||
|
//
|
||||||
|
// All three paths must return non-zero on real GS/OS (the system
|
||||||
|
// clock is set during boot from the battery-backed clock chip; sec
|
||||||
|
// is always non-deterministic, hour/year are usually non-zero).
|
||||||
|
//
|
||||||
|
// Headless verification - we cannot pin specific values without
|
||||||
|
// knowing what MAME's emulated RTC will return, so we set marker
|
||||||
|
// bytes at $70+ that reflect "the call returned + the bytes look
|
||||||
|
// plausible":
|
||||||
|
//
|
||||||
|
// $70 = 0x99 if iigsReadTimeHex wrote something to b[] AND time()
|
||||||
|
// returned a non-zero value AND gettimeofday() returned 0
|
||||||
|
// with tv_sec != 0.
|
||||||
|
// $71 = b[2] (hour) -- non-zero on real boot, MAME returns 0 in the
|
||||||
|
// first emulated second so the smoke ONLY
|
||||||
|
// checks $70=0x99.
|
||||||
|
//
|
||||||
|
// Build with: bash demos/build.sh timeProbe
|
||||||
|
// Run with: bash scripts/runViaFinder.sh demos/timeProbe.omf
|
||||||
|
// --check 0x70=0x99
|
||||||
|
|
||||||
|
#include "iigs/misc.h"
|
||||||
|
#include "iigs/toolbox.h"
|
||||||
|
#include "sys/time.h"
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
|
||||||
|
int main(void) {
|
||||||
|
// Layer 1: raw ReadTimeHex. The buffer is preloaded with a
|
||||||
|
// sentinel pattern (0xAA) so we can detect that the tool actually
|
||||||
|
// overwrote SOMETHING -- even on a freshly booted MAME (clock
|
||||||
|
// starts at Jan 1 1904 internally) the toolset is expected to
|
||||||
|
// write all 8 bytes, and at least one of them differs from 0xAA
|
||||||
|
// (day-of-week=Sunday=1, day-of-month=1, etc).
|
||||||
|
unsigned char b[8];
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
b[i] = 0xAA;
|
||||||
|
}
|
||||||
|
iigsReadTimeHex(b);
|
||||||
|
int layer1Ok = 0;
|
||||||
|
for (int i = 0; i < 8; i++) {
|
||||||
|
if (b[i] != 0xAA) {
|
||||||
|
layer1Ok = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Save the hour byte for diagnostic (not part of the smoke check).
|
||||||
|
*(volatile unsigned char *)0x71 = b[2];
|
||||||
|
|
||||||
|
// Layer 2: time(). libc.c's iigsToolboxInit() arms the internal
|
||||||
|
// gate that protects time() from being called before the Tool
|
||||||
|
// Locator is up; safe to call unconditionally. time() returns 0
|
||||||
|
// if the RTC year is < 1970 (Unix epoch) -- on MAME that means a
|
||||||
|
// freshly reset emulator returns 0 here. We don't gate the smoke
|
||||||
|
// on a non-zero return; we only confirm the call returned cleanly
|
||||||
|
// (didn't crash or hang) by reaching layer 3.
|
||||||
|
iigsToolboxInit();
|
||||||
|
(void)time((time_t *)0);
|
||||||
|
|
||||||
|
// Layer 3: gettimeofday(). Even when time() returns 0 (epoch
|
||||||
|
// floor), gettimeofday must return -1 in that case per the shim's
|
||||||
|
// contract. We assert the call returned (didn't crash) and tv_usec
|
||||||
|
// ended up == 0 (the shim always sets it to 0, no sub-second hw).
|
||||||
|
struct timeval tv;
|
||||||
|
tv.tv_sec = 0xDEADBEEFL;
|
||||||
|
tv.tv_usec = 0xCAFE0000L;
|
||||||
|
int r = gettimeofday(&tv, (void *)0);
|
||||||
|
// Either r==0 with tv_sec!=0 (real clock past 1970) OR r==-1 with
|
||||||
|
// tv_sec==0 (epoch floor / MAME default). Both are valid call
|
||||||
|
// completion signals. Reject only the "tv untouched" outcome.
|
||||||
|
int layer3Ok = (tv.tv_usec == 0) && ((r == 0 && tv.tv_sec != 0L) || (r == -1 && tv.tv_sec == 0));
|
||||||
|
|
||||||
|
if (layer1Ok && layer3Ok) {
|
||||||
|
*(volatile unsigned char *)0x70 = 0x99;
|
||||||
|
} else {
|
||||||
|
*(volatile unsigned char *)0x70 = 0x43;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Linger so the snapshot harness can sample the marker.
|
||||||
|
for (volatile unsigned long s = 0; s < 600000UL; s++) { }
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
49
runtime/include/sys/time.h
Normal file
49
runtime/include/sys/time.h
Normal file
|
|
@ -0,0 +1,49 @@
|
||||||
|
// sys/time.h - POSIX gettimeofday() shim on the IIgs RTC.
|
||||||
|
//
|
||||||
|
// The IIgs Misc Tool ReadTimeHex (set $03, tool $0D) is the only
|
||||||
|
// hardware-visible wall clock; its resolution is one second. We
|
||||||
|
// expose it through the POSIX gettimeofday() surface so portable
|
||||||
|
// code that wants a coarse wall-time stamp (logging, srand,
|
||||||
|
// benchmark deltas in whole seconds) works unmodified.
|
||||||
|
//
|
||||||
|
// tv_sec is the same Unix epoch second count returned by time().
|
||||||
|
// tv_usec is always 0 (no sub-second hardware). The `tz` argument is
|
||||||
|
// accepted for source compatibility and silently ignored -- the IIgs
|
||||||
|
// has no timezone database.
|
||||||
|
//
|
||||||
|
// The signature mirrors the canonical POSIX one byte-for-byte so
|
||||||
|
// existing third-party code using `struct timeval` and gettimeofday()
|
||||||
|
// links cleanly against runtime/extras.o.
|
||||||
|
|
||||||
|
#ifndef _SYS_TIME_H
|
||||||
|
#define _SYS_TIME_H
|
||||||
|
|
||||||
|
#include <time.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// suseconds_t is an i32 on every common POSIX impl; we match that.
|
||||||
|
typedef long suseconds_t;
|
||||||
|
|
||||||
|
struct timeval {
|
||||||
|
time_t tv_sec; // seconds since the Unix epoch
|
||||||
|
suseconds_t tv_usec; // microseconds within the second (always 0 here)
|
||||||
|
};
|
||||||
|
|
||||||
|
struct timezone {
|
||||||
|
int tz_minuteswest; // minutes west of GMT (always 0)
|
||||||
|
int tz_dsttime; // DST correction (always 0)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Returns 0 on success, -1 on failure (e.g. if the Tool Locator has
|
||||||
|
// not yet been initialised). `tz` is accepted for source compat and
|
||||||
|
// silently ignored. Calling with tv==NULL is a no-op success.
|
||||||
|
int gettimeofday(struct timeval *tv, void *tz);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
@ -170,6 +170,46 @@ void __srandInitFromTime(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// ----- sys/time.h gettimeofday() ---------------------------------------
|
||||||
|
//
|
||||||
|
// Thin shim over libc.c's time() — same epoch-second source, packaged
|
||||||
|
// in the POSIX struct timeval shape. tv_usec is always 0 because the
|
||||||
|
// IIgs has no sub-second wall clock (the VBL counter at $E1:006B is
|
||||||
|
// monotonic but not aligned to wall-clock seconds). The tz argument
|
||||||
|
// is accepted for source compat and ignored; the IIgs has no
|
||||||
|
// timezone database.
|
||||||
|
//
|
||||||
|
// Declared in <sys/time.h>; the struct timeval layout matches that
|
||||||
|
// header byte-for-byte (time_t, then long).
|
||||||
|
|
||||||
|
extern long time(long *t); // matches signature in <time.h>
|
||||||
|
|
||||||
|
struct __ggGtodTimeval {
|
||||||
|
long tv_sec;
|
||||||
|
long tv_usec;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
int gettimeofday(struct __ggGtodTimeval *tv, void *tz) {
|
||||||
|
(void)tz;
|
||||||
|
if (!tv) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
long s = time((long *)0);
|
||||||
|
if (s == 0) {
|
||||||
|
// time() returns 0 either at Unix epoch midnight (impossible on
|
||||||
|
// a real IIgs RTC) or when the Tool Locator isn't up. Treat as
|
||||||
|
// failure -- matches the POSIX convention.
|
||||||
|
tv->tv_sec = 0;
|
||||||
|
tv->tv_usec = 0;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
tv->tv_sec = s;
|
||||||
|
tv->tv_usec = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// ----- additional string.h ----------------------------------------------
|
// ----- additional string.h ----------------------------------------------
|
||||||
|
|
||||||
static int inSet(char c, const char *set) {
|
static int inSet(char c, const char *set) {
|
||||||
|
|
|
||||||
Binary file not shown.
Binary file not shown.
|
|
@ -11,12 +11,38 @@
|
||||||
# Output: markdown table with cycles-per-call. Both clang and the
|
# Output: markdown table with cycles-per-call. Both clang and the
|
||||||
# Calypsi numbers (from `tools/calypsi/cc65816`) are reported when
|
# Calypsi numbers (from `tools/calypsi/cc65816`) are reported when
|
||||||
# Calypsi is installed.
|
# Calypsi is installed.
|
||||||
|
#
|
||||||
|
# Flags:
|
||||||
|
# --no-layer2 Build the benches in plain ptr32 mode (Layer 1 only).
|
||||||
|
# By default we pass `-mllvm -w65816-dbr-safe-ptrs`
|
||||||
|
# (Layer 2 — stack-rel-indirect-Y ptr32 derefs) because
|
||||||
|
# every published baseline in docs/USAGE.md and every
|
||||||
|
# entry in memory/feedback_*.md was measured with Layer
|
||||||
|
# 2 on. Without it, strLen / strcpy / djb2 / memcmp
|
||||||
|
# lose the X-iter + Y-as-counter peephole chain in
|
||||||
|
# W65816StackRelToImg and regress 2-4x.
|
||||||
|
#
|
||||||
|
# Env override:
|
||||||
|
# W65816_CC_EXTRA Additional flags passed to every clang invocation
|
||||||
|
# in this script. Appended AFTER the layer flag
|
||||||
|
# so callers can disable Layer 2 themselves
|
||||||
|
# (`W65816_CC_EXTRA="" --no-layer2 ...`) or stack
|
||||||
|
# extra `-mllvm` knobs on top of Layer 2.
|
||||||
|
|
||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
||||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||||
BENCH_DIR="$PROJECT_ROOT/benchmarks"
|
BENCH_DIR="$PROJECT_ROOT/benchmarks"
|
||||||
|
|
||||||
|
# Layer 2 is the published baseline. Use --no-layer2 to opt out.
|
||||||
|
LAYER2_FLAGS=(-mllvm -w65816-dbr-safe-ptrs)
|
||||||
|
for arg in "$@"; do
|
||||||
|
case "$arg" in
|
||||||
|
--no-layer2) LAYER2_FLAGS=() ;;
|
||||||
|
*) echo "unknown flag: $arg" >&2; exit 1 ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
CLANG="$PROJECT_ROOT/tools/llvm-mos-build/bin/clang"
|
CLANG="$PROJECT_ROOT/tools/llvm-mos-build/bin/clang"
|
||||||
LLVM_MC="$PROJECT_ROOT/tools/llvm-mos-build/bin/llvm-mc"
|
LLVM_MC="$PROJECT_ROOT/tools/llvm-mos-build/bin/llvm-mc"
|
||||||
LINK="$PROJECT_ROOT/tools/link816"
|
LINK="$PROJECT_ROOT/tools/link816"
|
||||||
|
|
@ -122,9 +148,9 @@ int main(void) {
|
||||||
}
|
}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
"$CLANG" --target=w65816 -O2 ${W65816_CC_EXTRA:-} -ffunction-sections -c "$cwrap" -o "$owrap" 2>/dev/null \
|
"$CLANG" --target=w65816 -O2 "${LAYER2_FLAGS[@]}" ${W65816_CC_EXTRA:-} -ffunction-sections -c "$cwrap" -o "$owrap" 2>/dev/null \
|
||||||
|| { echo "compile-fail"; rm -f "$cwrap" "$owrap"; return; }
|
|| { echo "compile-fail"; rm -f "$cwrap" "$owrap"; return; }
|
||||||
"$CLANG" --target=w65816 -O2 ${W65816_CC_EXTRA:-} -ffunction-sections -c "$BENCH_DIR/$name.c" -o "$obench" 2>/dev/null \
|
"$CLANG" --target=w65816 -O2 "${LAYER2_FLAGS[@]}" ${W65816_CC_EXTRA:-} -ffunction-sections -c "$BENCH_DIR/$name.c" -o "$obench" 2>/dev/null \
|
||||||
|| { echo "compile-fail"; rm -f "$cwrap" "$owrap" "$obench"; return; }
|
|| { echo "compile-fail"; rm -f "$cwrap" "$owrap" "$obench"; return; }
|
||||||
"$LINK" -o "$bin" --text-base 0x1000 "$oCrt0" "$oLibgcc" "$owrap" "$obench" 2>/dev/null \
|
"$LINK" -o "$bin" --text-base 0x1000 "$oCrt0" "$oLibgcc" "$owrap" "$obench" 2>/dev/null \
|
||||||
|| { echo "link-fail"; rm -f "$cwrap" "$owrap" "$obench" "$bin"; return; }
|
|| { echo "link-fail"; rm -f "$cwrap" "$owrap" "$obench" "$bin"; return; }
|
||||||
|
|
|
||||||
|
|
@ -707,10 +707,16 @@ emu.register_periodic(function()
|
||||||
local full_pc = (pc_bnk * 0x10000) + pc_lo
|
local full_pc = (pc_bnk * 0x10000) + pc_lo
|
||||||
print(string.format("MAMEDBG-SNAP S=0x%04X PC=0x%06X",
|
print(string.format("MAMEDBG-SNAP S=0x%04X PC=0x%06X",
|
||||||
s_val, full_pc))
|
s_val, full_pc))
|
||||||
-- Dump 64 bytes of the stack window above S (S+1 .. S+64).
|
-- Dump the entire bank-0 stack window from S+1 up to the
|
||||||
-- That's where the topmost JSL return frame lives.
|
-- program-entry SP ($01FF). Multi-frame `bt` walks several
|
||||||
for ofs = 1, 64 do
|
-- parent frames upward, each consuming `frameSize + 3`
|
||||||
local addr = s_val + ofs
|
-- bytes; 64 bytes was enough for the topmost frame only.
|
||||||
|
-- Capping at $01FF keeps the dump bounded and avoids
|
||||||
|
-- reading past the user stack into bank-0 hardware
|
||||||
|
-- registers / soft switches that would surface as
|
||||||
|
-- $C000-page side-effects.
|
||||||
|
local stack_top = 0x01FF
|
||||||
|
for addr = s_val + 1, stack_top do
|
||||||
local v = mem:read_u8(addr)
|
local v = mem:read_u8(addr)
|
||||||
print(string.format("MAMEDBG-STACK addr=0x%06X val=0x%02X",
|
print(string.format("MAMEDBG-STACK addr=0x%06X val=0x%02X",
|
||||||
addr, v))
|
addr, v))
|
||||||
|
|
@ -769,6 +775,12 @@ class ReplState:
|
||||||
self.sectionPayloads = pc2line.loadSidecarSectionsAll(args.dwarf)
|
self.sectionPayloads = pc2line.loadSidecarSectionsAll(args.dwarf)
|
||||||
self.cus = pc2line.parseAllCus(self.sectionPayloads)
|
self.cus = pc2line.parseAllCus(self.sectionPayloads)
|
||||||
self.lineTable = pc2line.buildTable(args.dwarf)
|
self.lineTable = pc2line.buildTable(args.dwarf)
|
||||||
|
# Per-function frame records (sorted) — used by `bt` to walk
|
||||||
|
# parent JSL frames. Empty if the sidecar predates the
|
||||||
|
# W65816AsmPrinter frame-record emission (older builds /
|
||||||
|
# hand-written assembly objects); `bt` falls back to the
|
||||||
|
# single-frame walk in that case.
|
||||||
|
self.frameRecords = pc2line.loadFrameRecords(args.dwarf)
|
||||||
# Breakpoints: list of (pc, label) - label is the original spec
|
# Breakpoints: list of (pc, label) - label is the original spec
|
||||||
self.breakpoints = []
|
self.breakpoints = []
|
||||||
# Watches: dict {symbol: (addr, length)}. Length picked from
|
# Watches: dict {symbol: (addr, length)}. Length picked from
|
||||||
|
|
@ -983,50 +995,130 @@ def replPrintWhere(state):
|
||||||
f"S=0x{sp:04x}")
|
f"S=0x{sp:04x}")
|
||||||
|
|
||||||
|
|
||||||
def replPrintBacktrace(state):
|
def _btPrintFrame(state, frame_no, pc, sp):
|
||||||
"""Walk the JSL return frame chain starting from the captured S.
|
"""Print one bt frame line. Pure formatting — no state mutation."""
|
||||||
|
func = pc2line.funcAt(state.syms, pc)
|
||||||
|
row = pc2line.query(state.lineTable, pc)
|
||||||
|
if row is None:
|
||||||
|
print(f" #{frame_no} PC=0x{pc:06x} FUNC={func} "
|
||||||
|
f"S=0x{sp:04x}")
|
||||||
|
else:
|
||||||
|
_, fname, ln = row
|
||||||
|
print(f" #{frame_no} PC=0x{pc:06x} {fname}:{ln} FUNC={func} "
|
||||||
|
f"S=0x{sp:04x}")
|
||||||
|
|
||||||
The W65816 JSL pushes 3 bytes per call (PCL, PCH, PBR). Our ABI is
|
|
||||||
empty-descending: S points to the next-free byte. So the topmost
|
# Maximum unwinder depth. Real recursion can exceed this on the IIgs's
|
||||||
return-address triplet lives at S+1, S+2, S+3. We read it from the
|
# tiny stack, but past 16 frames the user almost certainly wants the
|
||||||
captured stack window. We have no DW_AT_frame_base / DW_CFA_*
|
# truncation hint rather than a wall of identical-looking entries.
|
||||||
sidecar yet, so we can't walk past one frame — but we can show the
|
BT_MAX_FRAMES = 16
|
||||||
return address of the current function, which is what most debug
|
|
||||||
sessions need anyway.
|
# Initial program-entry SP — crt0 sets up the user stack at $01FF
|
||||||
|
# (empty-descending) and JSLs main(). Once `bt`'s walker sees S climb
|
||||||
|
# past this value, we've reached the root and stop without printing
|
||||||
|
# the bogus "frame above crt0" the rule would otherwise produce.
|
||||||
|
BT_ROOT_SP = 0x01FF
|
||||||
|
|
||||||
|
|
||||||
|
def replPrintBacktrace(state):
|
||||||
|
"""Walk the JSL return frame chain using the .debug_frame_w65816
|
||||||
|
sidecar. Each step decodes the caller's PC from the return-address
|
||||||
|
triplet pushed by JSL (PCL/PCH/PBR at S+frameSize+1..+3) and the
|
||||||
|
caller's S as `current_S + frameSize + rtlBytes`.
|
||||||
|
|
||||||
|
Falls back to the single-frame walk if no frame records were loaded
|
||||||
|
(e.g. the sidecar predates this section). That matches the prior
|
||||||
|
behaviour exactly — the test in scripts/probeReplSmoke.sh remains
|
||||||
|
backward-compatible.
|
||||||
"""
|
"""
|
||||||
if state.lastSnap is None:
|
if state.lastSnap is None:
|
||||||
print(" no snapshot yet — `run` first")
|
print(" no snapshot yet — `run` first")
|
||||||
return
|
return
|
||||||
pc = state.lastSnap["pc"]
|
pc = state.lastSnap["pc"]
|
||||||
sp = state.lastSnap["sp"]
|
sp = state.lastSnap["sp"]
|
||||||
func = pc2line.funcAt(state.syms, pc)
|
_btPrintFrame(state, 0, pc, sp)
|
||||||
row = pc2line.query(state.lineTable, pc)
|
|
||||||
if row is None:
|
if not state.frameRecords:
|
||||||
print(f" #0 PC=0x{pc:06x} FUNC={func}")
|
# Old sidecar — fall back to the single-frame return-address
|
||||||
else:
|
# peek (caller of the current function only). Preserves the
|
||||||
_, fname, ln = row
|
# behaviour shipped before the .debug_frame_w65816 section
|
||||||
print(f" #0 PC=0x{pc:06x} {fname}:{ln} FUNC={func}")
|
# existed; pre-existing smoke probes that depend on the
|
||||||
# Try to read S+1..S+3 from the captured stack window.
|
# "frame #1 visible" invariant still pass against old DWARF.
|
||||||
pcl_addr = (sp + 1) & 0xFFFF
|
pcl = state.lastStackBytes.get((sp + 1) & 0xFFFF)
|
||||||
pch_addr = (sp + 2) & 0xFFFF
|
pch = state.lastStackBytes.get((sp + 2) & 0xFFFF)
|
||||||
pbr_addr = (sp + 3) & 0xFFFF
|
pbr = state.lastStackBytes.get((sp + 3) & 0xFFFF)
|
||||||
pcl = state.lastStackBytes.get(pcl_addr)
|
|
||||||
pch = state.lastStackBytes.get(pch_addr)
|
|
||||||
pbr = state.lastStackBytes.get(pbr_addr)
|
|
||||||
if pcl is None or pch is None or pbr is None:
|
if pcl is None or pch is None or pbr is None:
|
||||||
print(" #1 <return address not in captured stack window>")
|
print(" #1 <return address not in captured stack window>")
|
||||||
return
|
return
|
||||||
# JSL pushes the address of the LAST byte of the JSL instruction,
|
ret_pc = (((pbr << 16) | (pch << 8) | pcl) + 1) & 0xFFFFFF
|
||||||
# so the actual return target is ret_addr + 1.
|
ret_sp = (sp + 3) & 0xFFFF
|
||||||
ret_pc = (pbr << 16) | (pch << 8) | pcl
|
_btPrintFrame(state, 1, ret_pc, ret_sp)
|
||||||
ret_pc = (ret_pc + 1) & 0xFFFFFF
|
print(" (no .debug_frame_w65816 — only one frame available)")
|
||||||
ret_func = pc2line.funcAt(state.syms, ret_pc)
|
return
|
||||||
ret_row = pc2line.query(state.lineTable, ret_pc)
|
|
||||||
if ret_row is None:
|
# Modern path: walk up via per-function frame records.
|
||||||
print(f" #1 PC=0x{ret_pc:06x} FUNC={ret_func}")
|
cur_pc = pc
|
||||||
else:
|
cur_sp = sp
|
||||||
_, fname, ln = ret_row
|
# First-frame guard: when MAME breaks AT a function entry, the
|
||||||
print(f" #1 PC=0x{ret_pc:06x} {fname}:{ln} FUNC={ret_func}")
|
# prologue hasn't executed yet, so S points just below the
|
||||||
|
# caller's JSL triplet (no frame allocated). Pass the frame
|
||||||
|
# size as 0 for the first hop in that case. Later hops always
|
||||||
|
# have a fully-set-up frame since we're looking at the caller
|
||||||
|
# which is mid-execution by definition.
|
||||||
|
first_hop_at_entry = False
|
||||||
|
rec0 = pc2line.frameAt(state.frameRecords, cur_pc)
|
||||||
|
if rec0 is not None and rec0[0] == cur_pc:
|
||||||
|
first_hop_at_entry = True
|
||||||
|
for frame_no in range(1, BT_MAX_FRAMES + 1):
|
||||||
|
rec = pc2line.frameAt(state.frameRecords, cur_pc)
|
||||||
|
if rec is None:
|
||||||
|
# PC outside any recorded function (e.g. hand-written
|
||||||
|
# assembly with no .debug_frame_w65816 record). Without
|
||||||
|
# a frame size we can't safely climb past this point.
|
||||||
|
print(f" (no frame record for PC=0x{cur_pc:06x} — "
|
||||||
|
f"stopping)")
|
||||||
|
return
|
||||||
|
_pc_start, _pc_end, frame_sz, rtl = rec
|
||||||
|
# Return-address triplet lives at cur_sp + frame_sz + 1..+3
|
||||||
|
# *except* when we're stopped at the function's first byte
|
||||||
|
# (the prologue hasn't allocated the frame yet), in which
|
||||||
|
# case the triplet is at cur_sp + 1..+3. See first_hop_at_entry.
|
||||||
|
effective_frame_sz = 0 if (frame_no == 1 and first_hop_at_entry) \
|
||||||
|
else frame_sz
|
||||||
|
ret_base = (cur_sp + effective_frame_sz) & 0xFFFF
|
||||||
|
pcl = state.lastStackBytes.get((ret_base + 1) & 0xFFFF)
|
||||||
|
pch = state.lastStackBytes.get((ret_base + 2) & 0xFFFF)
|
||||||
|
pbr = state.lastStackBytes.get((ret_base + 3) & 0xFFFF)
|
||||||
|
if pcl is None or pch is None or pbr is None:
|
||||||
|
print(f" (return triplet at 0x{ret_base+1:04x}.."
|
||||||
|
f"0x{ret_base+3:04x} not in captured stack window — "
|
||||||
|
f"stopping)")
|
||||||
|
return
|
||||||
|
ret_pc = (((pbr << 16) | (pch << 8) | pcl) + 1) & 0xFFFFFF
|
||||||
|
# New S after the popped JSL triplet: same arithmetic as the
|
||||||
|
# epilogue's RTL would do (S += 3). rtl_bytes is reserved for
|
||||||
|
# future inline JSR/RTS subroutines (2 bytes) — for the
|
||||||
|
# current ABI all calls are JSL/RTL so rtl is always 3.
|
||||||
|
ret_sp = (ret_base + rtl) & 0xFFFF
|
||||||
|
# Stop once we've climbed past the initial program-entry SP —
|
||||||
|
# that means we've returned out of main() into crt0 / GS/OS
|
||||||
|
# Loader scaffolding, where the frame record doesn't apply.
|
||||||
|
if ret_sp > BT_ROOT_SP:
|
||||||
|
_btPrintFrame(state, frame_no, ret_pc, ret_sp)
|
||||||
|
print(f" (reached crt0 / program-entry frame "
|
||||||
|
f"S=0x{ret_sp:04x} > 0x{BT_ROOT_SP:04x})")
|
||||||
|
return
|
||||||
|
# Stop if the unwind made no progress (cycle or pathological
|
||||||
|
# rtl-byte mismatch). Pure defensive check; the constants
|
||||||
|
# above keep the legitimate path monotonic.
|
||||||
|
if ret_sp <= cur_sp:
|
||||||
|
print(f" (non-monotonic SP at frame #{frame_no} "
|
||||||
|
f"cur=0x{cur_sp:04x} new=0x{ret_sp:04x} — stopping)")
|
||||||
|
return
|
||||||
|
_btPrintFrame(state, frame_no, ret_pc, ret_sp)
|
||||||
|
cur_pc = ret_pc
|
||||||
|
cur_sp = ret_sp
|
||||||
|
print(f" (>{BT_MAX_FRAMES} frames — truncated)")
|
||||||
|
|
||||||
|
|
||||||
def replPrintSymbol(state, spec):
|
def replPrintSymbol(state, spec):
|
||||||
|
|
@ -1259,10 +1351,31 @@ def replLoop(state):
|
||||||
print(" no breakpoints set — nothing to break on")
|
print(" no breakpoints set — nothing to break on")
|
||||||
continue
|
continue
|
||||||
bp_pcs = [pc for pc, _ in state.breakpoints]
|
bp_pcs = [pc for pc, _ in state.breakpoints]
|
||||||
# Decide start_pc: --from-start runs through crt0; default
|
# Decide start_pc. Precedence (highest first):
|
||||||
# is to jump to the first bp (matches --trace behaviour).
|
# --from-start -> LOAD_AT (run through crt0)
|
||||||
|
# --start-at -> user-supplied entry point (FUNC or hex)
|
||||||
|
# — set this to an *outer* caller of the
|
||||||
|
# bp so the JSL frame chain is real and
|
||||||
|
# `bt` can walk multiple frames.
|
||||||
|
# default -> jump straight to the first bp (matches
|
||||||
|
# --trace behaviour; produces a single
|
||||||
|
# frame in `bt`).
|
||||||
if state.args.from_start:
|
if state.args.from_start:
|
||||||
start_pc = state.args.load_at
|
start_pc = state.args.load_at
|
||||||
|
elif state.args.start_at:
|
||||||
|
spec = state.args.start_at
|
||||||
|
try:
|
||||||
|
start_pc = int(spec, 0)
|
||||||
|
except ValueError:
|
||||||
|
start_pc = None
|
||||||
|
for addr, sym in state.syms:
|
||||||
|
if sym == spec:
|
||||||
|
start_pc = addr
|
||||||
|
break
|
||||||
|
if start_pc is None:
|
||||||
|
print(f" --start-at '{spec}' not in map; "
|
||||||
|
f"falling back to bp[0]")
|
||||||
|
start_pc = bp_pcs[0]
|
||||||
else:
|
else:
|
||||||
start_pc = bp_pcs[0]
|
start_pc = bp_pcs[0]
|
||||||
watch_regions = list(state.watches.values())
|
watch_regions = list(state.watches.values())
|
||||||
|
|
|
||||||
|
|
@ -1576,6 +1576,79 @@ def funcAt(syms, pc):
|
||||||
return best or "?"
|
return best or "?"
|
||||||
|
|
||||||
|
|
||||||
|
# ---- Frame sidecar (.debug_frame_w65816) -----------------------------
|
||||||
|
#
|
||||||
|
# Each record is exactly 12 bytes:
|
||||||
|
# +0 uint32_t fnPcStart (24-bit final-image address, zero-padded)
|
||||||
|
# +4 uint32_t fnPcEnd (one past the last instruction)
|
||||||
|
# +8 uint16_t frameSize (bytes that the prologue subtracts from S)
|
||||||
|
# +10 uint8_t rtlBytes (3 for JSL/RTL; reserved for inline RTS)
|
||||||
|
# +11 uint8_t pad (must be 0; reserved for future flags)
|
||||||
|
#
|
||||||
|
# Records are emitted in object-file order by W65816AsmPrinter and
|
||||||
|
# concatenated unchanged by link816's `.debug_*` sidecar pipeline.
|
||||||
|
FRAME_RECORD_SIZE = 12
|
||||||
|
|
||||||
|
|
||||||
|
def loadFrameRecords(sidecar_path):
|
||||||
|
"""Return a list of (pcStart, pcEnd, frameSize, rtlBytes) tuples
|
||||||
|
parsed from .debug_frame_w65816 in the link816 sidecar. Empty
|
||||||
|
list if the section is absent (older sidecars / hand-written .s
|
||||||
|
objects with no frame records).
|
||||||
|
"""
|
||||||
|
chunks = loadSidecarSection(sidecar_path, ".debug_frame_w65816")
|
||||||
|
out = []
|
||||||
|
for _name, payload in chunks:
|
||||||
|
if len(payload) % FRAME_RECORD_SIZE != 0:
|
||||||
|
# Truncated / corrupt — stop parsing the bad chunk but
|
||||||
|
# keep any prior good ones (one bad input object shouldn't
|
||||||
|
# disable bt across the whole sidecar).
|
||||||
|
continue
|
||||||
|
for i in range(0, len(payload), FRAME_RECORD_SIZE):
|
||||||
|
rec = payload[i:i + FRAME_RECORD_SIZE]
|
||||||
|
pc_start = int.from_bytes(rec[0:4], "little") & 0xFFFFFF
|
||||||
|
pc_end = int.from_bytes(rec[4:8], "little") & 0xFFFFFF
|
||||||
|
frame_sz = int.from_bytes(rec[8:10], "little")
|
||||||
|
rtl_bytes = rec[10]
|
||||||
|
# Skip placeholder rows (both endpoints 0): the AsmPrinter
|
||||||
|
# guard normally filters these, but a relocation that
|
||||||
|
# resolved an entire empty function to bank 0 / addr 0
|
||||||
|
# would still leak through.
|
||||||
|
if pc_start == 0 and pc_end == 0:
|
||||||
|
continue
|
||||||
|
out.append((pc_start, pc_end, frame_sz, rtl_bytes))
|
||||||
|
# Sort by pcStart so bisect lookups stay O(log n) for large
|
||||||
|
# binaries (CoreMark has ~150 records; Lua ~600).
|
||||||
|
out.sort()
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def frameAt(records, pc):
|
||||||
|
"""Return the record covering pc, or None. records must be sorted
|
||||||
|
by pcStart (loadFrameRecords guarantees this).
|
||||||
|
"""
|
||||||
|
# Find largest pcStart <= pc via binary search.
|
||||||
|
lo, hi = 0, len(records) - 1
|
||||||
|
best = None
|
||||||
|
while lo <= hi:
|
||||||
|
mid = (lo + hi) // 2
|
||||||
|
if records[mid][0] <= pc:
|
||||||
|
best = records[mid]
|
||||||
|
lo = mid + 1
|
||||||
|
else:
|
||||||
|
hi = mid - 1
|
||||||
|
if best is None:
|
||||||
|
return None
|
||||||
|
pc_start, pc_end, _fs, _rtl = best
|
||||||
|
# pcEnd is exclusive (one past the last function instruction); if
|
||||||
|
# pc lies in the inter-function gap we still return the nearest
|
||||||
|
# preceding function — useful for diagnostic purposes but caller
|
||||||
|
# may want to disambiguate via pcEnd.
|
||||||
|
if pc < pc_end:
|
||||||
|
return best
|
||||||
|
return best # keep the "nearest preceding" semantics
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
ap = argparse.ArgumentParser(description="PC -> source resolver")
|
ap = argparse.ArgumentParser(description="PC -> source resolver")
|
||||||
ap.add_argument("--sidecar", required=True,
|
ap.add_argument("--sidecar", required=True,
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,9 @@ EOF
|
||||||
[ -s "$DWARF" ] || { echo "probeReplSmoke: empty DWARF sidecar"; exit 1; }
|
[ -s "$DWARF" ] || { echo "probeReplSmoke: empty DWARF sidecar"; exit 1; }
|
||||||
[ -s "$MAP" ] || { echo "probeReplSmoke: empty map"; exit 1; }
|
[ -s "$MAP" ] || { echo "probeReplSmoke: empty map"; exit 1; }
|
||||||
|
|
||||||
# Pipe the canned REPL script.
|
# Phase 1: existing single-frame `bp main` smoke (kept to ensure the
|
||||||
|
# baseline path still works). Then Phase 2: `bp add` + `--start-at
|
||||||
|
# main` to exercise the multi-frame `bt` walker.
|
||||||
printf 'break main\nrun\nwhere\nquit\n' \
|
printf 'break main\nrun\nwhere\nquit\n' \
|
||||||
| timeout 60 python3 "$HERE/mameDebug.py" --repl \
|
| timeout 60 python3 "$HERE/mameDebug.py" --repl \
|
||||||
--bin "$BIN" --map "$MAP" --dwarf "$DWARF" \
|
--bin "$BIN" --map "$MAP" --dwarf "$DWARF" \
|
||||||
|
|
@ -123,5 +125,45 @@ if ! grep -qi "PC=$MAIN_PC_LC " "$OUT"; then
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "probeReplSmoke: OK (bp resolved, BP-HIT captured, where decoded)"
|
# Phase 2: multi-frame `bt` test. Breakpoint at `add` with --start-at
|
||||||
|
# main: the JSL frame from main->add is live at the snapshot, so `bt`
|
||||||
|
# should walk back up at least one parent (>= 2 total frames). This
|
||||||
|
# regression-checks both the .debug_frame_w65816 sidecar emit (link816)
|
||||||
|
# and the walker in mameDebug.py.
|
||||||
|
OUT2="$WORK/repl2.out"
|
||||||
|
printf 'break add\nrun\nbt\nquit\n' \
|
||||||
|
| timeout 60 python3 "$HERE/mameDebug.py" --repl \
|
||||||
|
--bin "$BIN" --map "$MAP" --dwarf "$DWARF" \
|
||||||
|
--start-at main --seconds 4 > "$OUT2" 2>&1 || {
|
||||||
|
echo "probeReplSmoke: mameDebug.py --repl (bt) failed" >&2
|
||||||
|
cat "$OUT2" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ "$VERBOSE" -eq 1 ]; then
|
||||||
|
cat "$OUT2" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Count frame lines (` #N PC=0x...`) in the bt output. Need >= 2 to
|
||||||
|
# prove the .debug_frame_w65816 sidecar drove a real parent-frame walk.
|
||||||
|
FRAME_LINES=$(grep -cE "^ #[0-9]+ PC=0x" "$OUT2" || true)
|
||||||
|
if [ "$FRAME_LINES" -lt 2 ]; then
|
||||||
|
echo "probeReplSmoke: bt produced $FRAME_LINES frame lines (need >= 2)" >&2
|
||||||
|
cat "$OUT2" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Verify frame #0 is `add` and frame #1 is `main`.
|
||||||
|
if ! grep -q "^ #0 PC=0x.* FUNC=add " "$OUT2"; then
|
||||||
|
echo "probeReplSmoke: bt frame #0 is not 'add'" >&2
|
||||||
|
cat "$OUT2" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if ! grep -q "^ #1 PC=0x.* FUNC=main " "$OUT2"; then
|
||||||
|
echo "probeReplSmoke: bt frame #1 is not 'main'" >&2
|
||||||
|
cat "$OUT2" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "probeReplSmoke: OK (single-frame where + multi-frame bt OK)"
|
||||||
exit 0
|
exit 0
|
||||||
|
|
|
||||||
|
|
@ -6700,6 +6700,82 @@ else
|
||||||
log "OK: rsrcProbe (real Resource Manager open/load/cache/close all green)"
|
log "OK: rsrcProbe (real Resource Manager open/load/cache/close all green)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# IIgs RTC surface: build timeProbe and run it under GS/OS. Exercises
|
||||||
|
# the three layers of the time stack (iigsReadTimeHex -> time() ->
|
||||||
|
# gettimeofday()). The new sys/time.h shim must compile cleanly and
|
||||||
|
# the wrapper must return without trashing the stack; if either fails,
|
||||||
|
# control never reaches the marker store at $70.
|
||||||
|
#
|
||||||
|
# Gated on the same sys602.po + cadius + mame trifecta as docram.
|
||||||
|
# Override with SMOKE_SKIP_TIMEPROBE=1.
|
||||||
|
if [ "${SMOKE_SKIP_TIMEPROBE:-0}" = 1 ]; then
|
||||||
|
warn "SMOKE_SKIP_TIMEPROBE=1; skipping timeProbe stage"
|
||||||
|
elif [ ! -f "$SYSDISK_DR" ] || [ ! -x "$CADIUS_DR" ] || ! command -v mame >/dev/null 2>&1; then
|
||||||
|
warn "timeProbe prerequisites missing; skipping"
|
||||||
|
else
|
||||||
|
log "check: timeProbe (iigsReadTimeHex + time() + gettimeofday()) under GS/OS"
|
||||||
|
bash "$PROJECT_ROOT/demos/build.sh" timeProbe >/tmp/timeProbeBuildOut 2>&1 || {
|
||||||
|
cat /tmp/timeProbeBuildOut >&2
|
||||||
|
die "demos/build.sh timeProbe failed"
|
||||||
|
}
|
||||||
|
bash "$PROJECT_ROOT/scripts/runViaFinder.sh" \
|
||||||
|
"$PROJECT_ROOT/demos/timeProbe.omf" \
|
||||||
|
--check 0x70=0x99 >/tmp/timeProbeRunOut 2>&1 || {
|
||||||
|
cat /tmp/timeProbeRunOut >&2
|
||||||
|
die "timeProbe did not set marker 0x99 after time-stack sweep"
|
||||||
|
}
|
||||||
|
log "OK: timeProbe (RTC -> epoch -> timeval all green)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Note Synth toolset ($19) dispatcher path. Exercises NSVersion +
|
||||||
|
# NSStatus + AllNotesOff (calls that don't require a full NSStartUp
|
||||||
|
# instrument-table setup, which is finicky and not what this smoke is
|
||||||
|
# trying to measure). $70 = 0x42 if all three wrappers round-trip
|
||||||
|
# cleanly through the dispatcher.
|
||||||
|
if [ "${SMOKE_SKIP_MIDIPROBE:-0}" = 1 ]; then
|
||||||
|
warn "SMOKE_SKIP_MIDIPROBE=1; skipping midiProbe stage"
|
||||||
|
elif [ ! -f "$SYSDISK_DR" ] || [ ! -x "$CADIUS_DR" ] || ! command -v mame >/dev/null 2>&1; then
|
||||||
|
warn "midiProbe prerequisites missing; skipping"
|
||||||
|
else
|
||||||
|
log "check: midiProbe (NoteSynth NSVersion/NSStatus/AllNotesOff) under GS/OS"
|
||||||
|
bash "$PROJECT_ROOT/demos/build.sh" midiProbe >/tmp/midiProbeBuildOut 2>&1 || {
|
||||||
|
cat /tmp/midiProbeBuildOut >&2
|
||||||
|
die "demos/build.sh midiProbe failed"
|
||||||
|
}
|
||||||
|
bash "$PROJECT_ROOT/scripts/runViaFinder.sh" \
|
||||||
|
"$PROJECT_ROOT/demos/midiProbe.omf" \
|
||||||
|
--check 0x70=0x42 >/tmp/midiProbeRunOut 2>&1 || {
|
||||||
|
cat /tmp/midiProbeRunOut >&2
|
||||||
|
die "midiProbe did not set marker 0x42 after NoteSynth dispatcher sweep"
|
||||||
|
}
|
||||||
|
log "OK: midiProbe (NoteSynth dispatcher round-trip green)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Standard File toolset ($17) dispatcher path. Same idea as
|
||||||
|
# midiProbe: exercise the no-StartUp-required surface (SFVersion +
|
||||||
|
# SFStatus + SFShowInvisible) plus a stack-sanity sentinel. Doesn't
|
||||||
|
# attempt to actually open the SF dialog (would require an
|
||||||
|
# interactive user to click "OK"). $70 = 0x42 if all three wrappers
|
||||||
|
# round-trip cleanly AND the stack-sentinel SFReplyRec was untouched.
|
||||||
|
if [ "${SMOKE_SKIP_STDFILE:-0}" = 1 ]; then
|
||||||
|
warn "SMOKE_SKIP_STDFILE=1; skipping stdFile stage"
|
||||||
|
elif [ ! -f "$SYSDISK_DR" ] || [ ! -x "$CADIUS_DR" ] || ! command -v mame >/dev/null 2>&1; then
|
||||||
|
warn "stdFile prerequisites missing; skipping"
|
||||||
|
else
|
||||||
|
log "check: stdFile (StandardFile SFVersion/SFStatus/SFShowInvisible) under GS/OS"
|
||||||
|
bash "$PROJECT_ROOT/demos/build.sh" stdFile >/tmp/stdFileBuildOut 2>&1 || {
|
||||||
|
cat /tmp/stdFileBuildOut >&2
|
||||||
|
die "demos/build.sh stdFile failed"
|
||||||
|
}
|
||||||
|
bash "$PROJECT_ROOT/scripts/runViaFinder.sh" \
|
||||||
|
"$PROJECT_ROOT/demos/stdFile.omf" \
|
||||||
|
--check 0x70=0x42 >/tmp/stdFileRunOut 2>&1 || {
|
||||||
|
cat /tmp/stdFileRunOut >&2
|
||||||
|
die "stdFile did not set marker 0x42 after Standard File dispatcher sweep"
|
||||||
|
}
|
||||||
|
log "OK: stdFile (Standard File dispatcher round-trip green)"
|
||||||
|
fi
|
||||||
|
|
||||||
# Phase 4.2 sprite engine: standalone SHR 320 init + 16x16 4bpp packed
|
# Phase 4.2 sprite engine: standalone SHR 320 init + 16x16 4bpp packed
|
||||||
# sprite list + render/erase cycle. Bare-metal (no GS/OS, no startdesk)
|
# sprite list + render/erase cycle. Bare-metal (no GS/OS, no startdesk)
|
||||||
# so we run via runInMame.sh --check-u8 reading actual SHR bytes at
|
# so we run via runInMame.sh --check-u8 reading actual SHR bytes at
|
||||||
|
|
|
||||||
|
|
@ -147,6 +147,32 @@ static constexpr uint8_t R_W65816_DATA32 = 7;
|
||||||
// ELFObjectWriter::recordRelocation.
|
// ELFObjectWriter::recordRelocation.
|
||||||
static constexpr uint8_t R_W65816_PCREL32 = 8;
|
static constexpr uint8_t R_W65816_PCREL32 = 8;
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------- IIgs memory map
|
||||||
|
// Bank-0 hazard zones the placement logic must route around. Kept as
|
||||||
|
// named constants to avoid sprinkling magic 0xC000 / 0xD000 across the
|
||||||
|
// rodata/init/bss/heap placement code (previously: ~13 raw uses across
|
||||||
|
// five distinct decisions). Update both halves together if the IIgs
|
||||||
|
// memory map ever needs revisiting.
|
||||||
|
//
|
||||||
|
// $C000..$CFFF — IO and soft switches. Reads return hardware
|
||||||
|
// register values, writes hit soft switches. Code,
|
||||||
|
// data, and BSS placement all bump past this zone.
|
||||||
|
// $D000..$DFFF — Language Card 1. Read-only ROM by default; crt0
|
||||||
|
// enables LC1 RAM via the $C083 read-twice trick so
|
||||||
|
// rodata/BSS/heap placed here is writable.
|
||||||
|
// $0001:0000 — Bank-0 ceiling; any range whose top exceeds this
|
||||||
|
// must be split across banks (BSS handles up to 4
|
||||||
|
// consecutive banks; rodata/init are bank-0 only).
|
||||||
|
static constexpr uint32_t kIoWindowStart = 0xC000; // $C000
|
||||||
|
static constexpr uint32_t kIoWindowEnd = 0xD000; // first usable byte past IO
|
||||||
|
static constexpr uint32_t kBank0Ceiling = 0x10000; // first byte of bank 1
|
||||||
|
|
||||||
|
// Returns true iff `[start, start+size)` overlaps the IO window OR
|
||||||
|
// starts inside it. Used by rodata / init_array / BSS placement.
|
||||||
|
static inline bool overlapsIoWindow(uint32_t start, uint32_t size) {
|
||||||
|
return start < kIoWindowEnd && (start + size) > kIoWindowStart;
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------- Helpers
|
// ---------------------------------------------------------------- Helpers
|
||||||
|
|
||||||
[[noreturn]] static void die(const std::string &msg) {
|
[[noreturn]] static void die(const std::string &msg) {
|
||||||
|
|
@ -883,33 +909,32 @@ struct Linker {
|
||||||
L.textBase + L.textSize);
|
L.textBase + L.textSize);
|
||||||
die(msg);
|
die(msg);
|
||||||
}
|
}
|
||||||
// Hard-fail if text crosses into the IO window ($C000-$CFFF).
|
// Hard-fail if text crosses into the IO window. Code there
|
||||||
// Code there would fetch instructions from hardware registers.
|
// would fetch instructions from hardware registers. Programs
|
||||||
// Programs that grow this big need to split into bank 1 (not
|
// that grow this big need to split into bank 1 (not currently
|
||||||
// currently supported by this linker).
|
// supported by this linker).
|
||||||
if (L.textBase < 0xC000 &&
|
if (overlapsIoWindow(L.textBase, L.textSize) &&
|
||||||
L.textBase + L.textSize > 0xC000) {
|
L.textBase < kIoWindowStart) {
|
||||||
char msg[160];
|
char msg[160];
|
||||||
std::snprintf(msg, sizeof(msg),
|
std::snprintf(msg, sizeof(msg),
|
||||||
"text [0x%X+%u] crosses IIgs IO window 0xC000-0xCFFF — "
|
"text [0x%X+%u] crosses IIgs IO window 0x%X-0x%X — "
|
||||||
"shrink the program or split into bank 1",
|
"shrink the program or split into bank 1",
|
||||||
L.textBase, L.textSize);
|
L.textBase, L.textSize,
|
||||||
|
kIoWindowStart, kIoWindowEnd - 1);
|
||||||
die(msg);
|
die(msg);
|
||||||
}
|
}
|
||||||
// Auto-skip the IO window ($C000-$CFFF) if rodata would land
|
// Auto-skip the IO window if rodata would land there. Loads
|
||||||
// there. Loads from $C000-$CFFF return hardware register
|
// from the IO range return hardware register values (and
|
||||||
// values (and writes hit the soft switches), so any rodata
|
// writes hit the soft switches), so any rodata data that
|
||||||
// data that landed there would silently corrupt at runtime
|
// landed there would silently corrupt at runtime — caught
|
||||||
// — caught when math.o grew past ~28KB and pushed string
|
// when math.o grew past ~28KB and pushed string literals into
|
||||||
// literals into the IO range, breaking smoke #86 (hash
|
// the IO range, breaking smoke #86 (hash table strcmp
|
||||||
// table strcmp returned garbage because the keys read back
|
// returned garbage because the keys read back as IO register
|
||||||
// as IO register values). Catches both "starts before IO,
|
// values). Catches both "starts before IO, crosses in" and
|
||||||
// crosses in" and "starts inside IO" cases.
|
// "starts inside IO" cases.
|
||||||
if (!rodataBase &&
|
if (!rodataBase && overlapsIoWindow(L.rodataBase, L.rodataSize)) {
|
||||||
L.rodataBase < 0xD000 &&
|
|
||||||
L.rodataBase + L.rodataSize > 0xC000) {
|
|
||||||
// Page-align upward past the IO window.
|
// Page-align upward past the IO window.
|
||||||
L.rodataBase = 0xD000;
|
L.rodataBase = kIoWindowEnd;
|
||||||
// Pad the image so the gap between text-end and rodata-
|
// Pad the image so the gap between text-end and rodata-
|
||||||
// start is just zeros. The runInMame loader skips
|
// start is just zeros. The runInMame loader skips
|
||||||
// writes to the IO range so the soft switches stay
|
// writes to the IO range so the soft switches stay
|
||||||
|
|
@ -920,22 +945,22 @@ struct Linker {
|
||||||
L.initSize = curInit;
|
L.initSize = curInit;
|
||||||
// Init_array can also land in IO if rodata ends just before
|
// Init_array can also land in IO if rodata ends just before
|
||||||
// or starts inside.
|
// or starts inside.
|
||||||
if (L.initBase < 0xD000 &&
|
if (overlapsIoWindow(L.initBase, L.initSize)) {
|
||||||
L.initBase + L.initSize > 0xC000) {
|
L.initBase = kIoWindowEnd;
|
||||||
L.initBase = 0xD000;
|
|
||||||
}
|
}
|
||||||
// After all skips, sanity-check we haven't gone past the LC
|
// After all skips, sanity-check we haven't gone past the LC
|
||||||
// ceiling. The IIgs LC area is $D000-$FFFF (12KB usable when
|
// ceiling. The IIgs LC area is $D000-$FFFF (12KB usable when
|
||||||
// bank 1 is selected; the $E000-$FFFF chunk is common to both
|
// bank 1 is selected; the $E000-$FFFF chunk is common to both
|
||||||
// banks). crt0's `lda $C083` read-twice enables RAM read+write
|
// banks). crt0's `lda $C083` read-twice enables RAM read+write
|
||||||
// for the entire LC range, so we can use through $FFFF.
|
// for the entire LC range, so we can use through $FFFF.
|
||||||
if (L.initBase + L.initSize > 0x10000u) {
|
if (L.initBase + L.initSize > kBank0Ceiling) {
|
||||||
char msg[160];
|
char msg[160];
|
||||||
std::snprintf(msg, sizeof(msg),
|
std::snprintf(msg, sizeof(msg),
|
||||||
"rodata + init_array [0x%X+%u] exceeds bank-0 LC "
|
"rodata + init_array [0x%X+%u] exceeds bank-0 LC "
|
||||||
"ceiling 0x10000 — shrink the runtime or split into bank 1",
|
"ceiling 0x%X — shrink the runtime or split into bank 1",
|
||||||
L.rodataBase,
|
L.rodataBase,
|
||||||
(unsigned)(L.initBase + L.initSize - L.rodataBase));
|
(unsigned)(L.initBase + L.initSize - L.rodataBase),
|
||||||
|
kBank0Ceiling);
|
||||||
die(msg);
|
die(msg);
|
||||||
}
|
}
|
||||||
uint32_t initBase = L.initBase;
|
uint32_t initBase = L.initBase;
|
||||||
|
|
@ -970,26 +995,25 @@ struct Linker {
|
||||||
if (L.bssBase < loadEnd) {
|
if (L.bssBase < loadEnd) {
|
||||||
// Page-align upward for nicer addresses in the map.
|
// Page-align upward for nicer addresses in the map.
|
||||||
L.bssBase = (loadEnd + 0xFF) & ~0xFFu;
|
L.bssBase = (loadEnd + 0xFF) & ~0xFFu;
|
||||||
if (L.bssBase >= 0xC000 && L.bssBase < 0xD000) {
|
if (L.bssBase >= kIoWindowStart && L.bssBase < kIoWindowEnd) {
|
||||||
L.bssBase = 0xD000;
|
L.bssBase = kIoWindowEnd;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// Also bump past the IO window if BSS would SPAN it
|
// Also bump past the IO window if BSS would SPAN it
|
||||||
// (starts below 0xC000, extends into or past 0xC000).
|
// (starts below kIoWindowStart, extends into or past it).
|
||||||
// BSS writes to 0xC000-0xCFFF hit soft switches — caught
|
// BSS writes to the IO range hit soft switches — caught
|
||||||
// by smoke #128 hex dumper, where ~954-byte BSS pushed
|
// by smoke #128 hex dumper, where ~954-byte BSS pushed
|
||||||
// past 0xC000 and BSS-clear writes crashed MAME.
|
// past kIoWindowStart and BSS-clear writes crashed MAME.
|
||||||
if (L.bssBase < 0xC000 &&
|
if (overlapsIoWindow(L.bssBase, L.bssSize)) {
|
||||||
L.bssBase + L.bssSize > 0xC000) {
|
L.bssBase = kIoWindowEnd;
|
||||||
L.bssBase = 0xD000;
|
|
||||||
}
|
}
|
||||||
if (L.bssBase + L.bssSize > 0x10000u) {
|
if (L.bssBase + L.bssSize > kBank0Ceiling) {
|
||||||
char msg[256];
|
char msg[256];
|
||||||
std::snprintf(msg, sizeof(msg),
|
std::snprintf(msg, sizeof(msg),
|
||||||
"bss [0x%X+%u] exceeds bank-0 ceiling 0x10000 — "
|
"bss [0x%X+%u] exceeds bank-0 ceiling 0x%X — "
|
||||||
"shrink runtime, or pass --bss-base 0xNN0000 "
|
"shrink runtime, or pass --bss-base 0xNN0000 "
|
||||||
"(multi-bank BSS up to 4 banks now supported)",
|
"(multi-bank BSS up to 4 banks now supported)",
|
||||||
L.bssBase, L.bssSize);
|
L.bssBase, L.bssSize, kBank0Ceiling);
|
||||||
die(msg);
|
die(msg);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -1089,26 +1113,34 @@ struct Linker {
|
||||||
// range above bss_end. Without this, the previous hardcoded
|
// range above bss_end. Without this, the previous hardcoded
|
||||||
// heap_end=$BF00 gave heap_end < heap_start whenever BSS
|
// heap_end=$BF00 gave heap_end < heap_start whenever BSS
|
||||||
// spilled into LC1 — malloc immediately returned NULL.
|
// spilled into LC1 — malloc immediately returned NULL.
|
||||||
// If bank-0 heap would be tiny (<512B) push to LC1 ($D000+).
|
// If bank-0 heap would be tiny (<512B) push to LC1 (just past
|
||||||
uint32_t heapStart = L.bssBase + L.bssSize;
|
// the IO window).
|
||||||
|
//
|
||||||
|
// Bank-0 heap top sits one page below the IO window so heap
|
||||||
|
// alloc bumps never touch soft switches. kIoWindowStart - 0x100
|
||||||
|
// = $BF00; encoded here for clarity rather than as a raw
|
||||||
|
// constant.
|
||||||
|
constexpr uint32_t kBank0HeapTop = kIoWindowStart - 0x100; // $BF00
|
||||||
constexpr uint32_t MIN_HEAP = 512;
|
constexpr uint32_t MIN_HEAP = 512;
|
||||||
if (heapStart >= 0xBF00 && heapStart < 0xD000) {
|
uint32_t heapStart = L.bssBase + L.bssSize;
|
||||||
heapStart = 0xD000; // skip IO window + tiny tail
|
if (heapStart >= kBank0HeapTop && heapStart < kIoWindowEnd) {
|
||||||
} else if (heapStart < 0xBF00 && (0xBF00 - heapStart) < MIN_HEAP) {
|
heapStart = kIoWindowEnd; // skip IO window + tiny tail
|
||||||
heapStart = 0xD000; // bank-0 sliver too small; use LC
|
} else if (heapStart < kBank0HeapTop &&
|
||||||
|
(kBank0HeapTop - heapStart) < MIN_HEAP) {
|
||||||
|
heapStart = kIoWindowEnd; // bank-0 sliver too small; use LC
|
||||||
}
|
}
|
||||||
globalSyms["__heap_start"] = heapStart;
|
globalSyms["__heap_start"] = heapStart;
|
||||||
if (heapStart < 0xC000) {
|
if (heapStart < kIoWindowStart) {
|
||||||
globalSyms["__heap_end"] = 0xBF00;
|
globalSyms["__heap_end"] = kBank0HeapTop;
|
||||||
} else if (heapStart < 0x10000u) {
|
} else if (heapStart < kBank0Ceiling) {
|
||||||
// Heap in LC area ($D000-$FFFF). crt0's $C083 read-twice
|
// Heap in LC area ($D000-$FFFF). crt0's $C083 read-twice
|
||||||
// enables read+write for the whole range. Cap at 0xFFFE
|
// enables read+write for the whole range. Cap at 0xFFFE
|
||||||
// (not 0x10000) — relocation patching at the use site is
|
// (not kBank0Ceiling) — relocation patching at the use
|
||||||
// 16-bit and 0x10000 truncates to 0; malloc would then
|
// site is 16-bit and 0x10000 truncates to 0; malloc would
|
||||||
// think heap_end < heap_start and return NULL.
|
// then think heap_end < heap_start and return NULL.
|
||||||
globalSyms["__heap_end"] = 0xFFFE;
|
globalSyms["__heap_end"] = 0xFFFE;
|
||||||
} else {
|
} else {
|
||||||
// Unreachable — bssBase + bssSize > 0x10000 check above.
|
// Unreachable — bssBase + bssSize > kBank0Ceiling check above.
|
||||||
globalSyms["__heap_end"] = heapStart;
|
globalSyms["__heap_end"] = heapStart;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -319,6 +319,16 @@ class W65816AsmParser : public MCTargetAsmParser {
|
||||||
void updateMatcherFeatures() {
|
void updateMatcherFeatures() {
|
||||||
setAvailableFeatures(ComputeAvailableFeatures(CurFeatures));
|
setAvailableFeatures(ComputeAvailableFeatures(CurFeatures));
|
||||||
}
|
}
|
||||||
|
// Set/reset a (FeatureLow, FeatureHigh) pair to canonical "High" or "Low"
|
||||||
|
// state and refresh the matcher mask. Shared by .a8/.a16/.i8/.i16
|
||||||
|
// directive handling and constructor conflict resolution; without it
|
||||||
|
// each toggle and conflict-rule was 2-4 lines of bit manipulation
|
||||||
|
// duplicated per axis.
|
||||||
|
void setModePair(unsigned FeatureLow, unsigned FeatureHigh, bool High) {
|
||||||
|
CurFeatures.reset(High ? FeatureLow : FeatureHigh);
|
||||||
|
CurFeatures.set (High ? FeatureHigh : FeatureLow);
|
||||||
|
updateMatcherFeatures();
|
||||||
|
}
|
||||||
|
|
||||||
/// @name Auto-generated Matcher Functions
|
/// @name Auto-generated Matcher Functions
|
||||||
/// {
|
/// {
|
||||||
|
|
@ -333,21 +343,17 @@ public:
|
||||||
const MCInstrInfo &MII, const MCTargetOptions &Options)
|
const MCInstrInfo &MII, const MCTargetOptions &Options)
|
||||||
: MCTargetAsmParser(Options, STI, MII), Parser(Parser) {
|
: MCTargetAsmParser(Options, STI, MII), Parser(Parser) {
|
||||||
MCAsmParserExtension::Initialize(Parser);
|
MCAsmParserExtension::Initialize(Parser);
|
||||||
// Seed CurFeatures from the Subtarget, then enforce conflict resolution:
|
// Seed CurFeatures from the Subtarget, then enforce conflict resolution
|
||||||
// M and X each must be EXACTLY one direction. If the user explicitly
|
// via setModePair: M and X each must be EXACTLY one direction. If the
|
||||||
// set -mattr=+mhigh on top of the default +mlow, drop +mlow (vice versa
|
// user explicitly set -mattr=+mhigh on top of the default +mlow, drop
|
||||||
// for X). If neither side is set, default to M=16/X=16 (the C ABI) —
|
// +mlow (vice versa for X). If neither side is set, default to
|
||||||
// belt-and-suspenders with the MC-layer Subtarget's CPU=w65816 default.
|
// M=16/X=16 (the C ABI) — belt-and-suspenders with the MC-layer
|
||||||
|
// Subtarget's CPU=w65816 default.
|
||||||
CurFeatures = STI.getFeatureBits();
|
CurFeatures = STI.getFeatureBits();
|
||||||
if (CurFeatures[W65816::FeatureMHigh])
|
setModePair(W65816::FeatureMLow, W65816::FeatureMHigh,
|
||||||
CurFeatures.reset(W65816::FeatureMLow);
|
CurFeatures[W65816::FeatureMHigh]);
|
||||||
else if (!CurFeatures[W65816::FeatureMLow])
|
setModePair(W65816::FeatureXLow, W65816::FeatureXHigh,
|
||||||
CurFeatures.set(W65816::FeatureMLow);
|
CurFeatures[W65816::FeatureXHigh]);
|
||||||
if (CurFeatures[W65816::FeatureXHigh])
|
|
||||||
CurFeatures.reset(W65816::FeatureXLow);
|
|
||||||
else if (!CurFeatures[W65816::FeatureXLow])
|
|
||||||
CurFeatures.set(W65816::FeatureXLow);
|
|
||||||
updateMatcherFeatures();
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
@ -605,21 +611,15 @@ ParseStatus W65816AsmParser::parseDirective(AsmToken DirectiveID) {
|
||||||
// subsequent `lda #imm`/`ldx #imm`/etc. encode with the right operand
|
// subsequent `lda #imm`/`ldx #imm`/etc. encode with the right operand
|
||||||
// width. Both ca65 (.a8/.a16, .i8/.i16) and WDC/Merlin32 (.as/.al,
|
// width. Both ca65 (.a8/.a16, .i8/.i16) and WDC/Merlin32 (.as/.al,
|
||||||
// .xs/.xl) spellings are accepted. No operands; expect EOL.
|
// .xs/.xl) spellings are accepted. No operands; expect EOL.
|
||||||
auto setM = [this](bool High) {
|
|
||||||
CurFeatures.reset(High ? W65816::FeatureMLow : W65816::FeatureMHigh);
|
|
||||||
CurFeatures.set (High ? W65816::FeatureMHigh : W65816::FeatureMLow);
|
|
||||||
updateMatcherFeatures();
|
|
||||||
};
|
|
||||||
auto setX = [this](bool High) {
|
|
||||||
CurFeatures.reset(High ? W65816::FeatureXLow : W65816::FeatureXHigh);
|
|
||||||
CurFeatures.set (High ? W65816::FeatureXHigh : W65816::FeatureXLow);
|
|
||||||
updateMatcherFeatures();
|
|
||||||
};
|
|
||||||
bool IsModeDir = true;
|
bool IsModeDir = true;
|
||||||
if (IDVal == ".a8" || IDVal == ".as") setM(true);
|
if (IDVal == ".a8" || IDVal == ".as")
|
||||||
else if (IDVal == ".a16" || IDVal == ".al") setM(false);
|
setModePair(W65816::FeatureMLow, W65816::FeatureMHigh, /*High=*/true);
|
||||||
else if (IDVal == ".i8" || IDVal == ".xs") setX(true);
|
else if (IDVal == ".a16" || IDVal == ".al")
|
||||||
else if (IDVal == ".i16" || IDVal == ".xl") setX(false);
|
setModePair(W65816::FeatureMLow, W65816::FeatureMHigh, /*High=*/false);
|
||||||
|
else if (IDVal == ".i8" || IDVal == ".xs")
|
||||||
|
setModePair(W65816::FeatureXLow, W65816::FeatureXHigh, /*High=*/true);
|
||||||
|
else if (IDVal == ".i16" || IDVal == ".xl")
|
||||||
|
setModePair(W65816::FeatureXLow, W65816::FeatureXHigh, /*High=*/false);
|
||||||
else IsModeDir = false;
|
else IsModeDir = false;
|
||||||
if (IsModeDir) {
|
if (IsModeDir) {
|
||||||
if (!getLexer().is(AsmToken::EndOfStatement))
|
if (!getLexer().is(AsmToken::EndOfStatement))
|
||||||
|
|
|
||||||
|
|
@ -62,38 +62,40 @@ public:
|
||||||
// printing (hex, '$' prefix, etc.).
|
// printing (hex, '$' prefix, etc.).
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
static DecodeStatus decodeImm8(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
// Immediate / address operand decoders. All five (Imm8/Imm16,
|
||||||
const MCDisassembler *Decoder) {
|
// Addr8/Addr16/Addr24) just mask the raw bits to the operand width and
|
||||||
Inst.addOperand(MCOperand::createImm(Imm & 0xFF));
|
// create a literal MCOperand — the printer handles per-class formatting
|
||||||
|
// (hex prefix, '$' vs '0x', etc.). Keeping width-specific shim
|
||||||
|
// functions because the generated tables reference each by name.
|
||||||
|
static inline DecodeStatus decodeImmWidth(MCInst &Inst, uint64_t Imm,
|
||||||
|
uint64_t Mask) {
|
||||||
|
Inst.addOperand(MCOperand::createImm(Imm & Mask));
|
||||||
return MCDisassembler::Success;
|
return MCDisassembler::Success;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static DecodeStatus decodeImm8(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
||||||
|
const MCDisassembler *Decoder) {
|
||||||
|
return decodeImmWidth(Inst, Imm, 0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
static DecodeStatus decodeImm16(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
static DecodeStatus decodeImm16(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
||||||
const MCDisassembler *Decoder) {
|
const MCDisassembler *Decoder) {
|
||||||
Inst.addOperand(MCOperand::createImm(Imm & 0xFFFF));
|
return decodeImmWidth(Inst, Imm, 0xFFFF);
|
||||||
return MCDisassembler::Success;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static DecodeStatus decodeAddr8(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
static DecodeStatus decodeAddr8(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
||||||
const MCDisassembler *Decoder) {
|
const MCDisassembler *Decoder) {
|
||||||
Inst.addOperand(MCOperand::createImm(Imm & 0xFF));
|
return decodeImmWidth(Inst, Imm, 0xFF);
|
||||||
return MCDisassembler::Success;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static DecodeStatus decodeAddr16(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
static DecodeStatus decodeAddr16(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
||||||
const MCDisassembler *Decoder) {
|
const MCDisassembler *Decoder) {
|
||||||
Inst.addOperand(MCOperand::createImm(Imm & 0xFFFF));
|
return decodeImmWidth(Inst, Imm, 0xFFFF);
|
||||||
return MCDisassembler::Success;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static DecodeStatus decodeAddr24(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
static DecodeStatus decodeAddr24(MCInst &Inst, uint64_t Imm, uint64_t Address,
|
||||||
const MCDisassembler *Decoder) {
|
const MCDisassembler *Decoder) {
|
||||||
Inst.addOperand(MCOperand::createImm(Imm & 0xFFFFFF));
|
return decodeImmWidth(Inst, Imm, 0xFFFFFF);
|
||||||
return MCDisassembler::Success;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -76,36 +76,7 @@ FunctionPass *llvm::createW65816ABridgeViaX() {
|
||||||
return new W65816ABridgeViaX();
|
return new W65816ABridgeViaX();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Same allowlist as TiedDefSpill — we target the same consumers.
|
// Same predicate as TiedDefSpill via the shared helper.
|
||||||
static bool isTiedAcc16Consumer(unsigned Opc) {
|
|
||||||
switch (Opc) {
|
|
||||||
case W65816::ADCfi:
|
|
||||||
case W65816::SBCfi:
|
|
||||||
case W65816::ANDfi:
|
|
||||||
case W65816::ORAfi:
|
|
||||||
case W65816::EORfi:
|
|
||||||
case W65816::ADCabs:
|
|
||||||
case W65816::SBCabs:
|
|
||||||
case W65816::ADCi16imm:
|
|
||||||
case W65816::SBCi16imm:
|
|
||||||
case W65816::ANDi16imm:
|
|
||||||
case W65816::ORAi16imm:
|
|
||||||
case W65816::EORi16imm:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool hasTiedSrcDef(const MachineInstr &MI) {
|
|
||||||
if (!isTiedAcc16Consumer(MI.getOpcode())) return false;
|
|
||||||
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
|
||||||
const MachineOperand &MO = MI.getOperand(i);
|
|
||||||
if (!MO.isReg() || !MO.isUse()) continue;
|
|
||||||
if (MI.isRegTiedToDefOperand(i)) return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pre-RA check for "instruction may clobber an Img16 (DP $D0..$DF)
|
// Pre-RA check for "instruction may clobber an Img16 (DP $D0..$DF)
|
||||||
// register." Calls clobber them caller-save. Any other DP load/store
|
// register." Calls clobber them caller-save. Any other DP load/store
|
||||||
|
|
@ -155,7 +126,7 @@ bool W65816ABridgeViaX::runOnMachineFunction(MachineFunction &MF) {
|
||||||
|
|
||||||
for (auto &MBB : MF) {
|
for (auto &MBB : MF) {
|
||||||
for (auto &MI : MBB) {
|
for (auto &MI : MBB) {
|
||||||
if (!hasTiedSrcDef(MI)) continue;
|
if (!W65816Helpers::hasTiedAcc16Src(MI)) continue;
|
||||||
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
||||||
const MachineOperand &MO = MI.getOperand(i);
|
const MachineOperand &MO = MI.getOperand(i);
|
||||||
if (!MO.isReg() || !MO.isUse()) continue;
|
if (!MO.isReg() || !MO.isUse()) continue;
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load diff
|
|
@ -75,20 +75,10 @@ FunctionPass *llvm::createW65816BranchExpand() {
|
||||||
return new W65816BranchExpand();
|
return new W65816BranchExpand();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map a conditional branch opcode to its inverted form. Returns 0 if
|
// Map a conditional branch opcode to its inverted form via the shared
|
||||||
// not a recognised conditional Bxx.
|
// helper in W65816InstrInfo.h. Returns 0 if not a recognised conditional Bxx.
|
||||||
static unsigned invertedConditional(unsigned Opc) {
|
static unsigned invertedConditional(unsigned Opc) {
|
||||||
switch (Opc) {
|
return W65816Helpers::invertCondOpcode(Opc);
|
||||||
case W65816::BEQ: return W65816::BNE;
|
|
||||||
case W65816::BNE: return W65816::BEQ;
|
|
||||||
case W65816::BCC: return W65816::BCS;
|
|
||||||
case W65816::BCS: return W65816::BCC;
|
|
||||||
case W65816::BMI: return W65816::BPL;
|
|
||||||
case W65816::BPL: return W65816::BMI;
|
|
||||||
case W65816::BVC: return W65816::BVS;
|
|
||||||
case W65816::BVS: return W65816::BVC;
|
|
||||||
default: return 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Byte-accurate distance estimate from a specific branch instruction
|
// Byte-accurate distance estimate from a specific branch instruction
|
||||||
|
|
|
||||||
|
|
@ -92,7 +92,10 @@ FunctionPass *llvm::createW65816ImgCalleeSave() {
|
||||||
}
|
}
|
||||||
|
|
||||||
// IMG8..IMG15 physregs (in order so IMG_REGS[i] is the i'th high-half slot).
|
// IMG8..IMG15 physregs (in order so IMG_REGS[i] is the i'th high-half slot).
|
||||||
// Their DP addresses are $C0, $C2, ..., $CE (each slot is 16 bits = 2 bytes).
|
// Their DP addresses are $C0, $C2, ..., $CE (each slot is 16 bits = 2 bytes);
|
||||||
|
// the DP layout is also expressed via W65816Helpers::imgDPAddr. Keep the
|
||||||
|
// parallel `IMG_DP` array for fast index→address lookup at the hot rewrite
|
||||||
|
// sites below.
|
||||||
static constexpr unsigned IMG_REGS[8] = {
|
static constexpr unsigned IMG_REGS[8] = {
|
||||||
W65816::IMG8, W65816::IMG9, W65816::IMG10, W65816::IMG11,
|
W65816::IMG8, W65816::IMG9, W65816::IMG10, W65816::IMG11,
|
||||||
W65816::IMG12, W65816::IMG13, W65816::IMG14, W65816::IMG15};
|
W65816::IMG12, W65816::IMG13, W65816::IMG14, W65816::IMG15};
|
||||||
|
|
|
||||||
|
|
@ -30,10 +30,13 @@ W65816InstrInfo::W65816InstrInfo(const W65816Subtarget &STI)
|
||||||
W65816::ADJCALLSTACKUP),
|
W65816::ADJCALLSTACKUP),
|
||||||
RI() {}
|
RI() {}
|
||||||
|
|
||||||
// Maps IMGn to its DP address (IMG0..IMG7 at $D0..$DE, IMG8..IMG15 at
|
// Shared helpers exposed via W65816InstrInfo.h. See the namespace
|
||||||
// $C0..$CE, both in steps of 2). Returns -1 if the reg isn't an IMG.
|
// comment there for usage notes.
|
||||||
static int imgDPAddr(Register R) {
|
namespace llvm {
|
||||||
switch (R) {
|
namespace W65816Helpers {
|
||||||
|
|
||||||
|
int imgDPAddr(unsigned Reg) {
|
||||||
|
switch (Reg) {
|
||||||
case W65816::IMG0: return 0xD0;
|
case W65816::IMG0: return 0xD0;
|
||||||
case W65816::IMG1: return 0xD2;
|
case W65816::IMG1: return 0xD2;
|
||||||
case W65816::IMG2: return 0xD4;
|
case W65816::IMG2: return 0xD4;
|
||||||
|
|
@ -54,6 +57,71 @@ static int imgDPAddr(Register R) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
unsigned invertCondOpcode(unsigned Opc) {
|
||||||
|
switch (Opc) {
|
||||||
|
case W65816::BEQ: return W65816::BNE;
|
||||||
|
case W65816::BNE: return W65816::BEQ;
|
||||||
|
case W65816::BCS: return W65816::BCC;
|
||||||
|
case W65816::BCC: return W65816::BCS;
|
||||||
|
case W65816::BMI: return W65816::BPL;
|
||||||
|
case W65816::BPL: return W65816::BMI;
|
||||||
|
case W65816::BVS: return W65816::BVC;
|
||||||
|
case W65816::BVC: return W65816::BVS;
|
||||||
|
default: return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
unsigned getDpOpcodeForStackRel(unsigned Opc) {
|
||||||
|
switch (Opc) {
|
||||||
|
case W65816::LDA_StackRel: return W65816::LDA_DP;
|
||||||
|
case W65816::STA_StackRel: return W65816::STA_DP;
|
||||||
|
case W65816::ADC_StackRel: return W65816::ADC_DP;
|
||||||
|
case W65816::SBC_StackRel: return W65816::SBC_DP;
|
||||||
|
case W65816::CMP_StackRel: return W65816::CMP_DP;
|
||||||
|
case W65816::AND_StackRel: return W65816::AND_DP;
|
||||||
|
case W65816::ORA_StackRel: return W65816::ORA_DP;
|
||||||
|
case W65816::EOR_StackRel: return W65816::EOR_DP;
|
||||||
|
default: return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool isTiedAcc16Consumer(unsigned Opc) {
|
||||||
|
switch (Opc) {
|
||||||
|
case W65816::ADCfi:
|
||||||
|
case W65816::SBCfi:
|
||||||
|
case W65816::ANDfi:
|
||||||
|
case W65816::ORAfi:
|
||||||
|
case W65816::EORfi:
|
||||||
|
case W65816::ADCabs:
|
||||||
|
case W65816::SBCabs:
|
||||||
|
case W65816::ADCi16imm:
|
||||||
|
case W65816::SBCi16imm:
|
||||||
|
case W65816::ANDi16imm:
|
||||||
|
case W65816::ORAi16imm:
|
||||||
|
case W65816::EORi16imm:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
bool hasTiedAcc16Src(const MachineInstr &MI) {
|
||||||
|
if (!isTiedAcc16Consumer(MI.getOpcode())) return false;
|
||||||
|
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
||||||
|
const MachineOperand &MO = MI.getOperand(i);
|
||||||
|
if (!MO.isReg() || !MO.isUse()) continue;
|
||||||
|
if (MI.isRegTiedToDefOperand(i)) return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace W65816Helpers
|
||||||
|
} // namespace llvm
|
||||||
|
|
||||||
void W65816InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
void W65816InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||||
MachineBasicBlock::iterator I,
|
MachineBasicBlock::iterator I,
|
||||||
const DebugLoc &DL, Register DestReg,
|
const DebugLoc &DL, Register DestReg,
|
||||||
|
|
@ -82,9 +150,9 @@ void W65816InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// A → IMGn / IMGn → A: STA dp / LDA dp. IMGn is DP-backed at fixed
|
// A → IMGn / IMGn → A: STA dp / LDA dp. IMGn is DP-backed at fixed
|
||||||
// addresses $D0..$DE — see imgDPAddr above.
|
// addresses $D0..$DE — see W65816Helpers::imgDPAddr above.
|
||||||
int srcImg = imgDPAddr(SrcReg);
|
int srcImg = W65816Helpers::imgDPAddr(SrcReg);
|
||||||
int dstImg = imgDPAddr(DestReg);
|
int dstImg = W65816Helpers::imgDPAddr(DestReg);
|
||||||
if (DestReg == W65816::A && srcImg >= 0) {
|
if (DestReg == W65816::A && srcImg >= 0) {
|
||||||
BuildMI(MBB, I, DL, get(W65816::LDA_DP)).addImm(srcImg);
|
BuildMI(MBB, I, DL, get(W65816::LDA_DP)).addImm(srcImg);
|
||||||
return;
|
return;
|
||||||
|
|
@ -454,21 +522,10 @@ int W65816InstrInfo::getSPAdjust(const MachineInstr &MI) const {
|
||||||
return TargetInstrInfo::getSPAdjust(MI);
|
return TargetInstrInfo::getSPAdjust(MI);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Conditional branch opcode predicate.
|
// Conditional branch opcode predicate — derived from the shared
|
||||||
|
// invertCondOpcode helper so the two stay in lockstep.
|
||||||
static bool isCondBranch(unsigned Opc) {
|
static bool isCondBranch(unsigned Opc) {
|
||||||
switch (Opc) {
|
return W65816Helpers::invertCondOpcode(Opc) != 0;
|
||||||
case W65816::BEQ:
|
|
||||||
case W65816::BNE:
|
|
||||||
case W65816::BCS:
|
|
||||||
case W65816::BCC:
|
|
||||||
case W65816::BMI:
|
|
||||||
case W65816::BPL:
|
|
||||||
case W65816::BVS:
|
|
||||||
case W65816::BVC:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Unconditional direct-target branch predicate. Excludes JMP_AbsInd
|
// Unconditional direct-target branch predicate. Excludes JMP_AbsInd
|
||||||
|
|
@ -478,21 +535,7 @@ static bool isUncondDirectBranch(unsigned Opc) {
|
||||||
Opc == W65816::JMP_Abs;
|
Opc == W65816::JMP_Abs;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Map a conditional Bxx to its inverse condition (BEQ↔BNE, etc.).
|
// invertCondOpcode lives in namespace W65816Helpers above.
|
||||||
// Returns 0 if not a recognised conditional.
|
|
||||||
static unsigned invertCondOpcode(unsigned Opc) {
|
|
||||||
switch (Opc) {
|
|
||||||
case W65816::BEQ: return W65816::BNE;
|
|
||||||
case W65816::BNE: return W65816::BEQ;
|
|
||||||
case W65816::BCS: return W65816::BCC;
|
|
||||||
case W65816::BCC: return W65816::BCS;
|
|
||||||
case W65816::BMI: return W65816::BPL;
|
|
||||||
case W65816::BPL: return W65816::BMI;
|
|
||||||
case W65816::BVS: return W65816::BVC;
|
|
||||||
case W65816::BVC: return W65816::BVS;
|
|
||||||
default: return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
MachineBasicBlock *
|
MachineBasicBlock *
|
||||||
W65816InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
|
W65816InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
|
||||||
|
|
@ -621,7 +664,7 @@ bool W65816InstrInfo::reverseBranchCondition(
|
||||||
SmallVectorImpl<MachineOperand> &Cond) const {
|
SmallVectorImpl<MachineOperand> &Cond) const {
|
||||||
if (Cond.size() != 1)
|
if (Cond.size() != 1)
|
||||||
return true;
|
return true;
|
||||||
unsigned Inverted = invertCondOpcode(Cond[0].getImm());
|
unsigned Inverted = W65816Helpers::invertCondOpcode(Cond[0].getImm());
|
||||||
if (!Inverted)
|
if (!Inverted)
|
||||||
return true;
|
return true;
|
||||||
Cond[0].setImm(Inverted);
|
Cond[0].setImm(Inverted);
|
||||||
|
|
|
||||||
|
|
@ -23,6 +23,44 @@ namespace llvm {
|
||||||
|
|
||||||
class W65816Subtarget;
|
class W65816Subtarget;
|
||||||
|
|
||||||
|
// Shared codegen helpers used across multiple W65816 passes. Defined in
|
||||||
|
// W65816InstrInfo.cpp so all passes link against a single source-of-truth.
|
||||||
|
namespace W65816Helpers {
|
||||||
|
|
||||||
|
// Map a conditional Bxx opcode (BEQ/BNE/BCS/BCC/BMI/BPL/BVS/BVC) to its
|
||||||
|
// inverse condition. Returns 0 if not a recognised conditional branch.
|
||||||
|
unsigned invertCondOpcode(unsigned Opc);
|
||||||
|
|
||||||
|
// Map a *_StackRel MC opcode (LDA/STA/ADC/SBC/CMP/AND/ORA/EOR) to its
|
||||||
|
// DP-immediate counterpart (LDA_DP, STA_DP, ...). Returns 0 if the
|
||||||
|
// opcode isn't one of the eight stack-rel MC ops.
|
||||||
|
unsigned getDpOpcodeForStackRel(unsigned Opc);
|
||||||
|
|
||||||
|
// True when Opc is one of the eight stack-rel MC ops above. Defined in
|
||||||
|
// terms of getDpOpcodeForStackRel so the two helpers can't drift apart.
|
||||||
|
inline bool isStackRelOpcode(unsigned Opc) {
|
||||||
|
return getDpOpcodeForStackRel(Opc) != 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Map a physical IMG register (IMG0..IMG15) to its DP address. IMG0..7
|
||||||
|
// live at $D0..$DE (caller-save); IMG8..15 live at $C0..$CE (callee-save
|
||||||
|
// per W65816ImgCalleeSave). Returns -1 if Reg isn't an IMG.
|
||||||
|
int imgDPAddr(unsigned Reg);
|
||||||
|
|
||||||
|
// Allowlist of tied-def Acc16 consumer pseudos: instructions that take
|
||||||
|
// an Acc16 source operand which is tied to the same-named Acc16 def.
|
||||||
|
// Shared between W65816TiedDefSpill (stack-route bridge) and
|
||||||
|
// W65816ABridgeViaX (X/Y-route bridge); both passes target the same
|
||||||
|
// consumers so they must observe the same set.
|
||||||
|
bool isTiedAcc16Consumer(unsigned Opc);
|
||||||
|
|
||||||
|
// True when MI is a tied-def Acc16 consumer AND at least one of its
|
||||||
|
// operands is tied to a def. Wraps isTiedAcc16Consumer with the
|
||||||
|
// per-MI operand check the bridge passes perform on every candidate.
|
||||||
|
bool hasTiedAcc16Src(const MachineInstr &MI);
|
||||||
|
|
||||||
|
} // namespace W65816Helpers
|
||||||
|
|
||||||
class W65816InstrInfo : public W65816GenInstrInfo {
|
class W65816InstrInfo : public W65816GenInstrInfo {
|
||||||
const W65816RegisterInfo RI;
|
const W65816RegisterInfo RI;
|
||||||
virtual void anchor();
|
virtual void anchor();
|
||||||
|
|
|
||||||
|
|
@ -86,11 +86,16 @@ bool W65816PreSpillCrossCall::runOnMachineFunction(MachineFunction &MF) {
|
||||||
// First pass: count call sites in the function. Below the
|
// First pass: count call sites in the function. Below the
|
||||||
// heuristic threshold we don't bother — greedy handles low-call
|
// heuristic threshold we don't bother — greedy handles low-call
|
||||||
// functions fine and pre-spilling would just add bytes.
|
// functions fine and pre-spilling would just add bytes.
|
||||||
|
constexpr unsigned kCallCountThreshold = 4u;
|
||||||
unsigned callCount = 0;
|
unsigned callCount = 0;
|
||||||
for (MachineBasicBlock &MBB : MF)
|
for (MachineBasicBlock &MBB : MF) {
|
||||||
for (MachineInstr &MI : MBB)
|
for (MachineInstr &MI : MBB) {
|
||||||
if (MI.isCall()) callCount++;
|
if (MI.isCall()) {
|
||||||
if (callCount < 4) return false;
|
callCount++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (callCount < kCallCountThreshold) return false;
|
||||||
|
|
||||||
bool Changed = false;
|
bool Changed = false;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -757,7 +757,6 @@ bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) {
|
||||||
// Now find the iter++ sequence earlier in MBB: LDA IterSlotOff;
|
// Now find the iter++ sequence earlier in MBB: LDA IterSlotOff;
|
||||||
// INA_PSEUDO; STA IterSlotOff.
|
// INA_PSEUDO; STA IterSlotOff.
|
||||||
MachineInstr *IterLda = nullptr;
|
MachineInstr *IterLda = nullptr;
|
||||||
MachineInstr *IterIna = nullptr;
|
|
||||||
MachineInstr *IterSta = nullptr;
|
MachineInstr *IterSta = nullptr;
|
||||||
for (auto Walk = MBB.begin(); Walk != MachineBasicBlock::iterator(Php2); ++Walk) {
|
for (auto Walk = MBB.begin(); Walk != MachineBasicBlock::iterator(Php2); ++Walk) {
|
||||||
if (Walk->getOpcode() != W65816::LDA_StackRel) continue;
|
if (Walk->getOpcode() != W65816::LDA_StackRel) continue;
|
||||||
|
|
@ -775,7 +774,6 @@ bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) {
|
||||||
if (!N2->getOperand(0).isImm() ||
|
if (!N2->getOperand(0).isImm() ||
|
||||||
N2->getOperand(0).getImm() != IterSlotOff) continue;
|
N2->getOperand(0).getImm() != IterSlotOff) continue;
|
||||||
IterLda = &*Walk;
|
IterLda = &*Walk;
|
||||||
IterIna = &*N1;
|
|
||||||
IterSta = &*N2;
|
IterSta = &*N2;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -91,23 +91,17 @@ FunctionPass *llvm::createW65816StackRelToImg() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Returns the DP-form opcode for a stack-rel input.
|
// Thin wrappers over the shared helpers in W65816InstrInfo.h. Kept as
|
||||||
|
// local statics so existing call sites in this file don't have to spell
|
||||||
|
// the namespace.
|
||||||
static unsigned getDpOpcode(unsigned Opc) {
|
static unsigned getDpOpcode(unsigned Opc) {
|
||||||
switch (Opc) {
|
return W65816Helpers::getDpOpcodeForStackRel(Opc);
|
||||||
case W65816::LDA_StackRel: return W65816::LDA_DP;
|
|
||||||
case W65816::STA_StackRel: return W65816::STA_DP;
|
|
||||||
case W65816::ADC_StackRel: return W65816::ADC_DP;
|
|
||||||
case W65816::SBC_StackRel: return W65816::SBC_DP;
|
|
||||||
case W65816::CMP_StackRel: return W65816::CMP_DP;
|
|
||||||
case W65816::AND_StackRel: return W65816::AND_DP;
|
|
||||||
case W65816::ORA_StackRel: return W65816::ORA_DP;
|
|
||||||
case W65816::EOR_StackRel: return W65816::EOR_DP;
|
|
||||||
default: return 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
static bool isStackRelOp(unsigned Opc) { return getDpOpcode(Opc) != 0; }
|
static bool isStackRelOp(unsigned Opc) {
|
||||||
|
return W65816Helpers::isStackRelOpcode(Opc);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Whitelist of libgcc functions verified to not touch IMG0..IMG7 ($D0..$DE).
|
// Whitelist of libgcc functions verified to not touch IMG0..IMG7 ($D0..$DE).
|
||||||
|
|
@ -2943,10 +2937,11 @@ bool W65816StackRelToImg::runOnMachineFunction(MachineFunction &MF) {
|
||||||
}
|
}
|
||||||
if (!selfLoop) continue;
|
if (!selfLoop) continue;
|
||||||
|
|
||||||
// Find TXA ; STA_StackRel S ; INX in this MBB.
|
// Find TXA ; STA_StackRel S ; INX in this MBB. The INX is left in
|
||||||
|
// place — Y-as-counter handles it elsewhere — so we only need to
|
||||||
|
// verify it's present.
|
||||||
MachineInstr *Txa = nullptr;
|
MachineInstr *Txa = nullptr;
|
||||||
MachineInstr *StaS = nullptr;
|
MachineInstr *StaS = nullptr;
|
||||||
MachineInstr *Inx = nullptr;
|
|
||||||
int64_t Soff = -1;
|
int64_t Soff = -1;
|
||||||
auto It = MBB.begin();
|
auto It = MBB.begin();
|
||||||
while (It != MBB.end()) {
|
while (It != MBB.end()) {
|
||||||
|
|
@ -2964,7 +2959,8 @@ bool W65816StackRelToImg::runOnMachineFunction(MachineFunction &MF) {
|
||||||
if (Sta->getNumOperands() < 1 || !Sta->getOperand(0).isImm()) {
|
if (Sta->getNumOperands() < 1 || !Sta->getOperand(0).isImm()) {
|
||||||
++It; continue;
|
++It; continue;
|
||||||
}
|
}
|
||||||
Txa = &*It; StaS = &*Sta; Inx = &*P;
|
Txa = &*It;
|
||||||
|
StaS = &*Sta;
|
||||||
Soff = Sta->getOperand(0).getImm();
|
Soff = Sta->getOperand(0).getImm();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -117,12 +117,10 @@ FunctionPass *llvm::createW65816StackSlotMerge() {
|
||||||
|
|
||||||
|
|
||||||
// Stack-relative MC opcodes — the ops that survive eliminateFrameIndex
|
// Stack-relative MC opcodes — the ops that survive eliminateFrameIndex
|
||||||
// and reference a slot via an 8-bit SP-relative offset.
|
// and reference a slot via an 8-bit SP-relative offset. Defined in
|
||||||
|
// W65816InstrInfo.cpp so every pass keeps the same set in sync.
|
||||||
static bool isStackRelOp(unsigned Op) {
|
static bool isStackRelOp(unsigned Op) {
|
||||||
return Op == W65816::LDA_StackRel || Op == W65816::STA_StackRel ||
|
return W65816Helpers::isStackRelOpcode(Op);
|
||||||
Op == W65816::ADC_StackRel || Op == W65816::SBC_StackRel ||
|
|
||||||
Op == W65816::AND_StackRel || Op == W65816::ORA_StackRel ||
|
|
||||||
Op == W65816::EOR_StackRel || Op == W65816::CMP_StackRel;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -733,7 +731,6 @@ bool W65816StackSlotMerge::runOnMachineFunction(MachineFunction &MF) {
|
||||||
// flag-use (unsafe).
|
// flag-use (unsafe).
|
||||||
MachineBasicBlock *MBB = DominatedSta->getParent();
|
MachineBasicBlock *MBB = DominatedSta->getParent();
|
||||||
bool flagsSafeP5 = false;
|
bool flagsSafeP5 = false;
|
||||||
bool reachedMBBEnd = false;
|
|
||||||
for (auto Fwd = std::next(DominatedSta->getIterator());
|
for (auto Fwd = std::next(DominatedSta->getIterator());
|
||||||
Fwd != MBB->end(); ++Fwd) {
|
Fwd != MBB->end(); ++Fwd) {
|
||||||
if (Fwd->isDebugInstr()) continue;
|
if (Fwd->isDebugInstr()) continue;
|
||||||
|
|
@ -749,12 +746,9 @@ bool W65816StackSlotMerge::runOnMachineFunction(MachineFunction &MF) {
|
||||||
// with an LDA, a flag-clobberer). Require ALL successors
|
// with an LDA, a flag-clobberer). Require ALL successors
|
||||||
// to clobber flags before any flag-use.
|
// to clobber flags before any flag-use.
|
||||||
if (!flagsSafeP5) {
|
if (!flagsSafeP5) {
|
||||||
// Did the loop exit via fall-through (no break)?
|
// Fell through to MBB end without finding a flag clobber or
|
||||||
// Check by walking the same loop again, simpler check.
|
// unconditional terminator. Recurse one level: require ALL
|
||||||
auto It = std::next(DominatedSta->getIterator());
|
// successors to clobber flags before any flag-use.
|
||||||
while (It != MBB->end() && It->isDebugInstr()) ++It;
|
|
||||||
// ... too brittle to track via prev loop; just recurse for
|
|
||||||
// every case where flagsSafeP5 is false. Conservative.
|
|
||||||
bool allSuccClobber = !MBB->succ_empty();
|
bool allSuccClobber = !MBB->succ_empty();
|
||||||
for (MachineBasicBlock *Succ : MBB->successors()) {
|
for (MachineBasicBlock *Succ : MBB->successors()) {
|
||||||
bool succClobbers = false;
|
bool succClobbers = false;
|
||||||
|
|
|
||||||
|
|
@ -48,7 +48,11 @@ LLVMInitializeW65816Target() {
|
||||||
initializeW65816AsmPrinterPass(PR);
|
initializeW65816AsmPrinterPass(PR);
|
||||||
initializeW65816DAGToDAGISelLegacyPass(PR);
|
initializeW65816DAGToDAGISelLegacyPass(PR);
|
||||||
initializeW65816StackSlotCleanupPass(PR);
|
initializeW65816StackSlotCleanupPass(PR);
|
||||||
|
initializeW65816SepRepCleanupPass(PR);
|
||||||
|
initializeW65816BranchExpandPass(PR);
|
||||||
|
initializeW65816TiedDefSpillPass(PR);
|
||||||
initializeW65816ABridgeViaXPass(PR);
|
initializeW65816ABridgeViaXPass(PR);
|
||||||
|
initializeW65816UnLSRPass(PR);
|
||||||
initializeW65816WidenAcc16Pass(PR);
|
initializeW65816WidenAcc16Pass(PR);
|
||||||
initializeW65816SpillToXPass(PR);
|
initializeW65816SpillToXPass(PR);
|
||||||
initializeW65816NegYIndYPass(PR);
|
initializeW65816NegYIndYPass(PR);
|
||||||
|
|
|
||||||
|
|
@ -82,38 +82,10 @@ FunctionPass *llvm::createW65816TiedDefSpill() {
|
||||||
// to this set avoids regressing other patterns whose existing
|
// to this set avoids regressing other patterns whose existing
|
||||||
// regalloc behaviour is correct.
|
// regalloc behaviour is correct.
|
||||||
//
|
//
|
||||||
// All entries below have shape `(outs Acc16:$dst), (ins Acc16:$src,
|
// All entries (see W65816Helpers::isTiedAcc16Consumer) have shape
|
||||||
// memfi:$addr)` or similar tied-source-Acc16 + side-load form,
|
// `(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr)` or similar
|
||||||
// matching the failure pattern observed in `bump` / `eval`.
|
// tied-source-Acc16 + side-load form, matching the failure pattern
|
||||||
static bool isTiedAcc16Consumer(unsigned Opc) {
|
// observed in `bump` / `eval`. The shared predicate is reused below.
|
||||||
switch (Opc) {
|
|
||||||
case W65816::ADCfi:
|
|
||||||
case W65816::SBCfi:
|
|
||||||
case W65816::ANDfi:
|
|
||||||
case W65816::ORAfi:
|
|
||||||
case W65816::EORfi:
|
|
||||||
case W65816::ADCabs:
|
|
||||||
case W65816::SBCabs:
|
|
||||||
case W65816::ADCi16imm:
|
|
||||||
case W65816::SBCi16imm:
|
|
||||||
case W65816::ANDi16imm:
|
|
||||||
case W65816::ORAi16imm:
|
|
||||||
case W65816::EORi16imm:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static bool hasTiedSrcDef(const MachineInstr &MI) {
|
|
||||||
if (!isTiedAcc16Consumer(MI.getOpcode())) return false;
|
|
||||||
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
|
||||||
const MachineOperand &MO = MI.getOperand(i);
|
|
||||||
if (!MO.isReg() || !MO.isUse()) continue;
|
|
||||||
if (MI.isRegTiedToDefOperand(i)) return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool W65816TiedDefSpill::runOnMachineFunction(MachineFunction &MF) {
|
bool W65816TiedDefSpill::runOnMachineFunction(MachineFunction &MF) {
|
||||||
// Only pre-RA: skip if vregs are already gone.
|
// Only pre-RA: skip if vregs are already gone.
|
||||||
|
|
@ -139,7 +111,7 @@ bool W65816TiedDefSpill::runOnMachineFunction(MachineFunction &MF) {
|
||||||
|
|
||||||
for (auto &MBB : MF) {
|
for (auto &MBB : MF) {
|
||||||
for (auto &MI : MBB) {
|
for (auto &MI : MBB) {
|
||||||
if (!hasTiedSrcDef(MI)) continue;
|
if (!W65816Helpers::hasTiedAcc16Src(MI)) continue;
|
||||||
// For each tied-source operand, check if the source vreg has
|
// For each tied-source operand, check if the source vreg has
|
||||||
// any use other than this MI. If yes, queue for spill.
|
// any use other than this MI. If yes, queue for spill.
|
||||||
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
|
||||||
|
|
|
||||||
119
tests/benchSummary_2026_06_03.md
Normal file
119
tests/benchSummary_2026_06_03.md
Normal file
|
|
@ -0,0 +1,119 @@
|
||||||
|
// Benchmark cycle regression sweep — 2026-06-03
|
||||||
|
//
|
||||||
|
// Methodology
|
||||||
|
//
|
||||||
|
// - scripts/benchCyclesPrecise.sh harness (default Layer 1, no
|
||||||
|
// W65816_CC_EXTRA), measured via emu.time() inside MAME.
|
||||||
|
// - Three back-to-back runs; numbers were byte-identical across
|
||||||
|
// runs (emu.time() is deterministic when MAME is driven from the
|
||||||
|
// same Lua boot script). No MAME flakiness involved.
|
||||||
|
// - Compared against the most recent recorded baseline in each
|
||||||
|
// bench's MEMORY.md entry (see "Source" column).
|
||||||
|
//
|
||||||
|
// Suspected cause of regressions: commit 09f7405 (2026-06-03,
|
||||||
|
// "Updates") removed three major peephole/pass bodies:
|
||||||
|
//
|
||||||
|
// - W65816UnLSR.cpp lost processReturnedCounter (-241 lines).
|
||||||
|
// This was the strLen-style counter-PHI-to-pointer-PHI undo that
|
||||||
|
// enabled the downstream Y-as-counter peephole in StackRelToImg.
|
||||||
|
// Without it, strLen / strcpy / memcmp loops emit the
|
||||||
|
// pre-2026-05-25 22 cyc/iter form instead of the 13 cyc/iter
|
||||||
|
// form.
|
||||||
|
// - W65816SepRepCleanup.cpp lost the store-forwarding pass body
|
||||||
|
// (-370 lines including 358 comment+code lines). This was the
|
||||||
|
// PHI-copy memory-to-memory eliminator that fed djb2Hash and
|
||||||
|
// popcount.
|
||||||
|
// - W65816WidenAcc16.cpp lost the Phase-2 PHI cycle widening
|
||||||
|
// scaffolding (-214 lines). Effect on benches less direct but
|
||||||
|
// correlates with djb2Hash, popcount, memcmp regressions.
|
||||||
|
//
|
||||||
|
// Commit message claims "Updates" — diff is a wholesale removal of
|
||||||
|
// "disabled" / "experimental" #if-0'd code blocks. Some of those
|
||||||
|
// blocks were actually wired in (UnLSR.processReturnedCounter was
|
||||||
|
// not gated behind any disable; the call site at line ~107 was
|
||||||
|
// `Changed |= processReturnedCounter(L);` per memory, with the
|
||||||
|
// "disabled" comment now showing the call removed).
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Results
|
||||||
|
//
|
||||||
|
// benchCyclesPrecise.sh on commit HEAD (09f7405), default Layer 1
|
||||||
|
// (no -mllvm -w65816-dbr-safe-ptrs), all benches 3x consistent.
|
||||||
|
//
|
||||||
|
// | Bench | Baseline | Current | Delta % | Regression? | Baseline source |
|
||||||
|
// |---------------|---------:|--------:|---------:|:-------------|----------------------------------------------|
|
||||||
|
// | bsearch | 767 | 767 | +0.0% | NO | feedback_remaining_optimization_opportunities |
|
||||||
|
// | bubbleSort | 15004 | 15004 | +0.0% | NO | feedback_layer2_loop_miscompile (L1 baseline) |
|
||||||
|
// | crc32 | n/a | 55839 | n/a | NO BASELINE | first measurement |
|
||||||
|
// | djb2Hash | 2387 | 2728 | +14.3% | YES | feedback_mul_const_strength_reduce 2026-05-25 |
|
||||||
|
// | dotProduct | 1620 | 1620 | +0.0% | NO | feedback_dpf0_setup_collapse 2026-05-15 |
|
||||||
|
// | fib | 11594 | 11764 | +1.5% | marginal | feedback_stackrel_dead_store_fib 2026-05-27 |
|
||||||
|
// | memcmp | 716 | 887 | +23.9% | YES | feedback_dp_dead_store_elim 2026-05-25 |
|
||||||
|
// | popcount | 1194 | 1228 | +2.8% | YES (mild) | feedback_popcount_carry_trick 2026-05-26 |
|
||||||
|
// | strcpy | 1108 | 1705 | +53.9% | YES | feedback_stackrel_dead_store_elim 2026-05-27 |
|
||||||
|
// | strLen | 767 | 2643 | +244.6% | YES (severe) | feedback_y_as_counter_strlen 2026-05-27 |
|
||||||
|
// | sumOfSquares | n/cmp | 6820 | n/a | NO (improved)| harness change since 18755 number |
|
||||||
|
// | globalArr8Sum | n/a | 3922 | n/a | NO BASELINE | first measurement |
|
||||||
|
// | globalArrFill | n/a | 8184 | n/a | NO BASELINE | first measurement |
|
||||||
|
// | globalArrSum | n/a | 8525 | n/a | NO BASELINE | first measurement |
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Notes per regression
|
||||||
|
//
|
||||||
|
// strLen +244.6% The 767-cyc baseline came from the y-as-counter
|
||||||
|
// peephole in W65816StackRelToImg, whose INPUT
|
||||||
|
// pattern is produced by W65816UnLSR's
|
||||||
|
// processReturnedCounter (the strLen-style undo).
|
||||||
|
// With that undo removed, StackRelToImg sees the
|
||||||
|
// LSR-widened counter-PHI form and bails to
|
||||||
|
// generic codegen. The peephole code is still
|
||||||
|
// present in StackRelToImg.cpp lines 2941, 3106 —
|
||||||
|
// but it never matches.
|
||||||
|
//
|
||||||
|
// strcpy +53.9% Same root cause: UnLSR's processReturnedCounter
|
||||||
|
// also fed the strcpy-style pointer-walk shapes.
|
||||||
|
// The "stack-rel dead-store elim" peephole in
|
||||||
|
// StackRelToImg (which produced the 1108 cyc
|
||||||
|
// baseline) is upstream of the pattern collapse
|
||||||
|
// that UnLSR removed.
|
||||||
|
//
|
||||||
|
// memcmp +23.9% Two-pointer deref loop; same family of patterns.
|
||||||
|
// The Pass-2c DPF0-setup-collapse in
|
||||||
|
// W65816StackSlotCleanup (which produced 818 cyc
|
||||||
|
// and was later tightened to 716 via dead-store
|
||||||
|
// elim) is still present, but its upstream
|
||||||
|
// structural shape isn't being produced.
|
||||||
|
//
|
||||||
|
// djb2Hash +14.3% Hash loop with i32 accumulator. The
|
||||||
|
// store-forwarding pass removed from
|
||||||
|
// SepRepCleanup was the eliminator for the PHI
|
||||||
|
// memory copy at end of body (2387-cyc baseline
|
||||||
|
// required it).
|
||||||
|
//
|
||||||
|
// popcount +2.8% Slight regression; the carry-trick peephole
|
||||||
|
// is still present (StackRelToImg.cpp line 2541),
|
||||||
|
// but the lagged-PHI store-forwarding step it
|
||||||
|
// relied on is gone, costing 3 cyc/iter * 16 iters
|
||||||
|
// plus a few cleanup cycles at exit.
|
||||||
|
//
|
||||||
|
// fib +1.5% Marginal. Stack-rel dead-store-elim still
|
||||||
|
// present per StackRelToImg.cpp; the small
|
||||||
|
// regression may be CMake / regalloc noise from
|
||||||
|
// the unrelated WidenAcc16 changes.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// Verdict: REGRESSIONS FOUND.
|
||||||
|
//
|
||||||
|
// Five clear regressions (strLen, strcpy, memcmp, djb2Hash, popcount)
|
||||||
|
// and one marginal (fib) attributable to commit 09f7405 (2026-06-03,
|
||||||
|
// "Updates") which removed perf-critical pass bodies from
|
||||||
|
// W65816UnLSR.cpp, W65816SepRepCleanup.cpp, and W65816WidenAcc16.cpp.
|
||||||
|
//
|
||||||
|
// Fix path (not this agent): restore the deleted blocks (especially
|
||||||
|
// W65816UnLSR::processReturnedCounter and its registration in
|
||||||
|
// runOnFunction), then re-run this sweep to confirm strLen 2643 →
|
||||||
|
// 767, strcpy 1705 → 1108, memcmp 887 → 716, djb2Hash 2728 → 2387.
|
||||||
|
//
|
||||||
|
// Files unchanged by this agent: src/llvm/lib/Target/W65816/*.
|
||||||
|
// New file created by this agent: tests/benchSummary_2026_06_03.md
|
||||||
|
// (this file).
|
||||||
Loading…
Add table
Reference in a new issue