Updates
This commit is contained in:
parent
da095402ec
commit
09f7405362
38 changed files with 2454 additions and 1599 deletions
7
.gitignore
vendored
7
.gitignore
vendored
|
|
@ -19,6 +19,13 @@ tools/
|
||||||
*.map
|
*.map
|
||||||
*.reloc
|
*.reloc
|
||||||
|
|
||||||
|
# Exception: demo .rsrc/ fixture directories ship TYPECODE_ID.bin files
|
||||||
|
# as source. Each rsrcBundle test reads them at build time and emits
|
||||||
|
# the AppleSingle + sidecar in the same dir; the .apl + sidecar are
|
||||||
|
# build artifacts (caught by *.bin above for the sidecar; .apl is
|
||||||
|
# tracked by name). We carve out the source fixtures here.
|
||||||
|
!demos/*.rsrc/*.bin
|
||||||
|
|
||||||
# Per-target build directories.
|
# Per-target build directories.
|
||||||
tests/coremark/build/
|
tests/coremark/build/
|
||||||
tests/lua/build/
|
tests/lua/build/
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -1,59 +1,134 @@
|
||||||
// rsrcProbe.c - Phase 3.4 stub-only Resource Manager smoke probe.
|
// rsrcProbe.c - Phase 3.4 real Resource Manager smoke probe.
|
||||||
//
|
//
|
||||||
// What this verifies right now:
|
// Replaces the stub-only probe. Builds a tiny in-memory .rsrc fixture,
|
||||||
// - resourceProbeInit() returns RES_ERR_BLOCKED (the stub-only path),
|
// registers it with mfsRegister, opens it via openResourceFile, loads
|
||||||
// - iigsLoadResource() returns NULL with err = RES_ERR_BLOCKED,
|
// a known rText resource, and verifies the bytes match the expected
|
||||||
// - iigsGetResourceSize() returns 0 with err = RES_ERR_BLOCKED,
|
// payload. This exercises the real parser path top-to-bottom without
|
||||||
// - the runtime resource.o links cleanly under -O2,
|
// needing a ProDOS resource fork.
|
||||||
// - the demo's OMF can be bundled with rsrcBundle.py (post-step in
|
|
||||||
// demos/build.sh when demos/rsrcProbe.rsrc/ is present).
|
|
||||||
//
|
|
||||||
// Marker discipline. Page-1 ($70..$73) per the cursorProbe.c
|
|
||||||
// convention - runViaFinder.sh samples direct-page bytes reliably
|
|
||||||
// across MAME timings, and full-24-bit BSS-style markers (0x025000)
|
|
||||||
// don't survive the Loader/Finder relocation games on GS/OS 6.0.2.
|
|
||||||
//
|
//
|
||||||
|
// Markers (page-1 direct page, per cursorProbe convention):
|
||||||
// $70 := 0x99 end-of-main success sentinel
|
// $70 := 0x99 end-of-main success sentinel
|
||||||
// $71 := initRc as int8 (expected 0xff = (uint8_t)RES_ERR_BLOCKED)
|
// $71 := 0x01 if openResourceFile succeeded (refnum != 0)
|
||||||
// $72 := loadErr (expected 0xff)
|
// $72 := 0x01 if loadResource returned a non-NULL handle whose
|
||||||
// $73 := 0x01 if resourceRuntimeEnabled()==0 (today's stub answer)
|
// bytes match "HELLO" and size is 5
|
||||||
|
// $73 := 0x01 if loadResource second call returned the SAME handle
|
||||||
|
// (cache hit) and closeResourceFile returned RES_OK
|
||||||
//
|
//
|
||||||
// Build: bash demos/build.sh rsrcProbe
|
// Build: bash demos/build.sh rsrcProbe
|
||||||
// Run: bash scripts/runViaFinder.sh demos/rsrcProbe.omf \
|
// Run: bash scripts/runViaFinder.sh demos/rsrcProbe.omf \
|
||||||
// --check 0x70=0x99
|
// --check 0x70=0x99 0x71=0x01 0x72=0x01 0x73=0x01
|
||||||
// runViaFinder LAUNCHES the OMF and samples at frame 6000; no keypress
|
|
||||||
// is required because we drop into while(1) immediately after writing
|
|
||||||
// the markers.
|
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
#include "iigs/resource.h"
|
#include "iigs/resource.h"
|
||||||
|
|
||||||
|
|
||||||
|
// rResourceMap fixture: header + 5-byte rText payload + one rIndex entry.
|
||||||
|
//
|
||||||
|
// Header (24 bytes, little-endian):
|
||||||
|
// rmVersion = 0x0000
|
||||||
|
// rmToIndex = 0x0000001D (29)
|
||||||
|
// rmFileNum = 0
|
||||||
|
// rmID = 0
|
||||||
|
// rmIndexSize = 0x00000014 (20 bytes = 1 entry)
|
||||||
|
// rmIndexUsed = 0x00000001
|
||||||
|
// rmFreeListSize = 0
|
||||||
|
// rmFreeListUsed = 0
|
||||||
|
// rmPad = 0
|
||||||
|
// Payload (5 bytes) at offset 24: "HELLO"
|
||||||
|
// rIndex entry (20 bytes) at offset 29:
|
||||||
|
// rType = 0x8014 (rText)
|
||||||
|
// rID = 0x00000001
|
||||||
|
// rOffset = 0x00000018 (24)
|
||||||
|
// rAttr = 0
|
||||||
|
// rSize = 0x00000005
|
||||||
|
// rHandle = 0
|
||||||
|
static const uint8_t kFixture[49] = {
|
||||||
|
// header
|
||||||
|
0x00, 0x00, // rmVersion
|
||||||
|
0x1D, 0x00, 0x00, 0x00, // rmToIndex = 29
|
||||||
|
0x00, 0x00, // rmFileNum
|
||||||
|
0x00, 0x00, // rmID
|
||||||
|
0x14, 0x00, 0x00, 0x00, // rmIndexSize = 20
|
||||||
|
0x01, 0x00, 0x00, 0x00, // rmIndexUsed = 1
|
||||||
|
0x00, 0x00, // rmFreeListSize
|
||||||
|
0x00, 0x00, // rmFreeListUsed
|
||||||
|
0x00, 0x00, // rmPad
|
||||||
|
// payload at offset 24: "HELLO"
|
||||||
|
0x48, 0x45, 0x4C, 0x4C, 0x4F,
|
||||||
|
// rIndex entry at offset 29
|
||||||
|
0x14, 0x80, // rType = 0x8014
|
||||||
|
0x01, 0x00, 0x00, 0x00, // rID = 1
|
||||||
|
0x18, 0x00, 0x00, 0x00, // rOffset = 24
|
||||||
|
0x00, 0x00, // rAttr
|
||||||
|
0x05, 0x00, 0x00, 0x00, // rSize = 5
|
||||||
|
0x00, 0x00, 0x00, 0x00 // rHandle
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
static const char kFixturePath[] = "rsrc.fixture";
|
||||||
|
static const char kExpectedText[] = "HELLO";
|
||||||
|
static const uint32_t kExpectedSize = 5;
|
||||||
|
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
volatile uint8_t *mark0 = (volatile uint8_t *)0x70;
|
volatile uint8_t *mark0 = (volatile uint8_t *)0x70;
|
||||||
volatile uint8_t *mark1 = (volatile uint8_t *)0x71;
|
volatile uint8_t *mark1 = (volatile uint8_t *)0x71;
|
||||||
volatile uint8_t *mark2 = (volatile uint8_t *)0x72;
|
volatile uint8_t *mark2 = (volatile uint8_t *)0x72;
|
||||||
volatile uint8_t *mark3 = (volatile uint8_t *)0x73;
|
volatile uint8_t *mark3 = (volatile uint8_t *)0x73;
|
||||||
|
|
||||||
*mark0 = 0x10; // entry sentinel: we did reach main()
|
*mark0 = 0x10;
|
||||||
int initRc = resourceProbeInit();
|
*mark1 = 0x00;
|
||||||
*mark1 = (uint8_t)initRc;
|
*mark2 = 0x00;
|
||||||
|
*mark3 = 0x00;
|
||||||
|
|
||||||
int loadErr = 0;
|
// Stage the fixture as a read-only memory-backed file. Cast away
|
||||||
void **h = iigsLoadResource(RES_TYPE_RTEXT, 1, &loadErr);
|
// const for the mfsRegister buffer pointer; the resource manager
|
||||||
(void)h;
|
// only ever reads.
|
||||||
*mark2 = (uint8_t)loadErr;
|
if (mfsRegister(kFixturePath, (void *)kFixture, sizeof(kFixture), sizeof(kFixture), 0) != 0) {
|
||||||
|
while (1) {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
int sizeErr = 0;
|
resourceProbeInit();
|
||||||
uint32_t sz = iigsGetResourceSize(RES_TYPE_RTEXT, 1, &sizeErr);
|
|
||||||
(void)sz;
|
|
||||||
|
|
||||||
*mark3 = (uint8_t)(resourceRuntimeEnabled() == 0 ? 0x01 : 0x00);
|
int rcOpen = 0;
|
||||||
|
ResourceRefNumT ref = openResourceFile(kFixturePath, 0, 0, &rcOpen);
|
||||||
|
if (ref != 0 && rcOpen == RES_OK) {
|
||||||
|
*mark1 = 0x01;
|
||||||
|
}
|
||||||
|
|
||||||
|
int rcLoad = 0;
|
||||||
|
void **h = loadResource(RES_TYPE_RTEXT, 1, &rcLoad);
|
||||||
|
if (h && rcLoad == RES_OK) {
|
||||||
|
const uint8_t *bytes = (const uint8_t *)*h;
|
||||||
|
uint32_t sz = getResourceSize(h);
|
||||||
|
int match = (sz == kExpectedSize);
|
||||||
|
if (match) {
|
||||||
|
for (uint32_t i = 0; i < kExpectedSize; i++) {
|
||||||
|
if (bytes[i] != (uint8_t)kExpectedText[i]) {
|
||||||
|
match = 0;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (match) {
|
||||||
|
*mark2 = 0x01;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Second load - cache hit must return the SAME handle. Then
|
||||||
|
// close the file, which must report RES_OK.
|
||||||
|
int rcLoad2 = 0;
|
||||||
|
void **h2 = loadResource(RES_TYPE_RTEXT, 1, &rcLoad2);
|
||||||
|
int sameHandle = (h2 == h && h2 != 0);
|
||||||
|
int rcClose = closeResourceFile(ref);
|
||||||
|
if (sameHandle && rcClose == RES_OK) {
|
||||||
|
*mark3 = 0x01;
|
||||||
|
}
|
||||||
|
|
||||||
// Success marker last - if any of the calls above trapped (which
|
|
||||||
// they shouldn't in stub-only mode), the harness will see $70
|
|
||||||
// != 0x99 and report failure.
|
|
||||||
*mark0 = 0x99;
|
*mark0 = 0x99;
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
|
|
|
||||||
1
demos/rsrcProbe.rsrc/8005_0001.bin
Normal file
1
demos/rsrcProbe.rsrc/8005_0001.bin
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
iconBytesPlaceholder
|
||||||
1
demos/rsrcProbe.rsrc/8014_0001.bin
Normal file
1
demos/rsrcProbe.rsrc/8014_0001.bin
Normal file
|
|
@ -0,0 +1 @@
|
||||||
|
HELLO
|
||||||
|
|
@ -1,34 +1,37 @@
|
||||||
// iigs/resource.h - typed-C facade over the IIgs Resource Manager.
|
// iigs/resource.h - typed-C facade over the IIgs Resource Manager.
|
||||||
//
|
//
|
||||||
// Phase 3.4 STUB-ONLY landing. The bundler + linker integration ship
|
// Phase 3.4 REAL implementation: parses .rsrc resource forks via the
|
||||||
// fully (see tools/rsrcBundle/), but the *runtime* path is blocked on
|
// stdio surface (fopen/fread/fseek/fclose) and serves resources from a
|
||||||
// Phase 1.1 (the GS/OS fopen hang). GS/OS 6.0.2 + ResourceStartUp +
|
// per-file cache. Read-only. No AddResource, no DetachResource, no
|
||||||
// OpenResourceFile reaches the same path that hangs in fopen today, so
|
// partial-load, no encryption - those are features we do not yet need.
|
||||||
// the LoadResource()/GetResourceSize() entry points below return error
|
|
||||||
// codes instead of calling the toolbox. When Phase 1.1 lands, flip
|
|
||||||
// IIGS_RESOURCE_RUNTIME_ENABLED to 1 (or define it at the compiler
|
|
||||||
// level) and rebuild the runtime - the same C surface stays.
|
|
||||||
//
|
//
|
||||||
// What you GET today:
|
// What you GET today:
|
||||||
// - resourceProbeInit() reports whether the runtime path is enabled.
|
// - openResourceFile(path, accessByte, fileType) -> refNum (>0) or
|
||||||
// - LoadResource() / GetResourceSize() return RES_ERR_BLOCKED unless
|
// 0 on failure (errno-style code lands in *err if provided).
|
||||||
// IIGS_RESOURCE_RUNTIME_ENABLED is set at compile time.
|
// - loadResource(type, id) -> Handle (void **) on success; cached so
|
||||||
|
// repeated calls return the same handle. *handle points at the
|
||||||
|
// resource bytes (already read from the file).
|
||||||
|
// - releaseResource(verb, handle) -> 0 on success. verb 0 just
|
||||||
|
// releases the current load; verb 1 also evicts the cache entry
|
||||||
|
// and frees the data.
|
||||||
|
// - closeResourceFile(refNum) -> 0 on success. Frees all cached
|
||||||
|
// handles owned by that file.
|
||||||
//
|
//
|
||||||
// HLock semantics (IMPORTANT for future Phase 1.1 unblock):
|
// On-disk format (Apple IIgs Toolbox Reference Vol 3, ch.42):
|
||||||
// The toolbox LoadResource() returns a HANDLE (void **) to a master
|
// File offset 0: rResourceMap header (24 bytes, little-endian fields
|
||||||
// pointer in MM-relocatable storage. The application MUST call
|
// because the 65816 is LE). Field rmToIndex is the file offset of
|
||||||
// HLock() before dereferencing if it intends to call ANY toolbox
|
// the rIndex table; rmIndexUsed is the number of valid entries; the
|
||||||
// routine that could trigger a heap compaction (most do). Without
|
// remaining header fields are bookkeeping/zero at build time.
|
||||||
// the HLock, the master pointer can be rewritten under you between
|
// Body bytes: resource payloads at the offsets recorded in rIndex.
|
||||||
// the LoadResource and the deref. The typed wrappers below DO NOT
|
// At rmToIndex: array of 20-byte rIndex entries, each:
|
||||||
// call HLock for you - that is a deliberate choice because over-
|
// uint16 rType, uint32 rID, uint32 rOffset, uint16 rAttr,
|
||||||
// locking is a memory-fragmentation footgun and the right scope is
|
// uint32 rSize, uint32 rHandle (zero on disk).
|
||||||
// workload-specific. Callers should:
|
//
|
||||||
// void **h = LoadResourceTyped(0x8014, 1);
|
// HLock semantics:
|
||||||
// HLock(h);
|
// The handles we return are NOT relocatable - they point straight at
|
||||||
// const RTextT *t = (const RTextT *)*h;
|
// a malloc'd payload buffer. That means HLock/HUnlock are no-ops
|
||||||
// ... use t ...
|
// here. The void ** indirection is preserved so that real Memory
|
||||||
// HUnlock(h);
|
// Manager handles can swap in later without changing callers.
|
||||||
|
|
||||||
#ifndef IIGS_RESOURCE_H
|
#ifndef IIGS_RESOURCE_H
|
||||||
#define IIGS_RESOURCE_H
|
#define IIGS_RESOURCE_H
|
||||||
|
|
@ -40,36 +43,39 @@ extern "C" {
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
|
||||||
// Flip to 1 (or pass -DIIGS_RESOURCE_RUNTIME_ENABLED=1 on the build line)
|
|
||||||
// once Phase 1.1 unblocks GS/OS fopen on 6.0.2. At that point the typed
|
|
||||||
// wrappers below dispatch into the live toolbox; until then they stub.
|
|
||||||
#ifndef IIGS_RESOURCE_RUNTIME_ENABLED
|
|
||||||
#define IIGS_RESOURCE_RUNTIME_ENABLED 0
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
// Status codes returned by the typed wrappers. Mirror the runtime's
|
// Status codes returned by the typed wrappers. Mirror the runtime's
|
||||||
// existing errno-style convention (negative = error).
|
// existing errno-style convention (negative = error).
|
||||||
enum {
|
enum {
|
||||||
RES_OK = 0,
|
RES_OK = 0,
|
||||||
RES_ERR_BLOCKED = -1, // Phase 1.1 runtime path still blocked
|
RES_ERR_BLOCKED = -1, // legacy stub marker - kept for
|
||||||
RES_ERR_NOT_STARTED = -2, // resourceProbeInit() not called yet
|
// backwards compat with old probes
|
||||||
RES_ERR_NOT_FOUND = -3, // OpenResourceFile / LoadResource failed
|
RES_ERR_NOT_STARTED = -2, // openResourceFile not called yet
|
||||||
RES_ERR_TOOLBOX = -4 // Resource Manager returned non-zero
|
RES_ERR_NOT_FOUND = -3, // file open / resource lookup failed
|
||||||
|
RES_ERR_TOOLBOX = -4, // map header corrupt / IO failure
|
||||||
|
RES_ERR_NO_MEM = -5, // malloc failed
|
||||||
|
RES_ERR_BAD_HANDLE = -6 // release/close given an unknown ref
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
// Resource type codes we expect to bundle. See Apple IIgs Toolbox
|
// Resource type codes we expect to bundle. See Apple IIgs Toolbox
|
||||||
// Reference Vol 3 chapter 42 for the canonical list. Defined here as
|
// Reference Vol 3 chapter 42 for the canonical list.
|
||||||
// constants so callers don't have to use raw hex.
|
|
||||||
#define RES_TYPE_RICON 0x8005
|
#define RES_TYPE_RICON 0x8005
|
||||||
#define RES_TYPE_RTEXT 0x8014
|
#define RES_TYPE_RTEXT 0x8014
|
||||||
#define RES_TYPE_RPSTRING 0x8015
|
#define RES_TYPE_RPSTRING 0x8015
|
||||||
#define RES_TYPE_RCSTRING 0x8016
|
#define RES_TYPE_RCSTRING 0x8016
|
||||||
|
|
||||||
|
|
||||||
// Resource ID type matching the toolbox (32-bit on disk and in the
|
// Build-time tunables. These cap the per-process resource footprint.
|
||||||
// rIndex; the public API uses uint32_t).
|
#ifndef IIGS_RES_MAX_FILES
|
||||||
|
#define IIGS_RES_MAX_FILES 2
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef IIGS_RES_MAX_HANDLES
|
||||||
|
#define IIGS_RES_MAX_HANDLES 16
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
// Resource ID (32-bit on disk and in the rIndex).
|
||||||
typedef uint32_t IigsResIdT;
|
typedef uint32_t IigsResIdT;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -78,37 +84,87 @@ typedef uint32_t IigsResIdT;
|
||||||
typedef uint16_t IigsResTypeT;
|
typedef uint16_t IigsResTypeT;
|
||||||
|
|
||||||
|
|
||||||
// One-shot Resource Manager bring-up. Calls MMStartUp + TLStartUp +
|
// 24-byte resource map header at the start of every .rsrc file.
|
||||||
// ResourceStartUp + OpenResourceFile (on our own pathname) when the
|
typedef struct {
|
||||||
// runtime path is enabled. Always callable; safe to call more than
|
uint16_t rmVersion;
|
||||||
// once (subsequent calls are no-ops).
|
uint32_t rmToIndex;
|
||||||
//
|
uint16_t rmFileNum;
|
||||||
// Returns:
|
uint16_t rmID;
|
||||||
// RES_OK if the resource fork was opened (or the stub
|
uint32_t rmIndexSize;
|
||||||
// path "succeeded" with no-op behavior),
|
uint32_t rmIndexUsed;
|
||||||
// RES_ERR_BLOCKED if compiled with IIGS_RESOURCE_RUNTIME_ENABLED=0
|
uint16_t rmFreeListSize;
|
||||||
// (the default until Phase 1.1 lands),
|
uint16_t rmFreeListUsed;
|
||||||
// RES_ERR_TOOLBOX if any of the StartUp calls returned non-zero.
|
uint16_t rmPad;
|
||||||
|
} ResourceMapHeaderT;
|
||||||
|
|
||||||
|
|
||||||
|
// 20-byte rIndex entry.
|
||||||
|
typedef struct {
|
||||||
|
uint16_t rType;
|
||||||
|
uint32_t rID;
|
||||||
|
uint32_t rOffset;
|
||||||
|
uint16_t rAttr;
|
||||||
|
uint32_t rSize;
|
||||||
|
uint32_t rHandle;
|
||||||
|
} ResourceIndexEntryT;
|
||||||
|
|
||||||
|
|
||||||
|
// Refnum returned by openResourceFile. Zero means "no file"; valid
|
||||||
|
// refnums start at 1.
|
||||||
|
typedef uint16_t ResourceRefNumT;
|
||||||
|
|
||||||
|
|
||||||
|
// One-shot init. Returns RES_OK; safe to call more than once.
|
||||||
int resourceProbeInit(void);
|
int resourceProbeInit(void);
|
||||||
|
|
||||||
|
|
||||||
// Read whether the runtime path is live. Cheap; returns 1 iff a
|
// Reports whether the Resource Manager is alive. Always 1 after
|
||||||
// successful resourceProbeInit() has run AND the build enabled the
|
// resourceProbeInit() has run.
|
||||||
// runtime path. Returns 0 in the stub-only landing.
|
|
||||||
int resourceRuntimeEnabled(void);
|
int resourceRuntimeEnabled(void);
|
||||||
|
|
||||||
|
|
||||||
// LoadResource typed wrapper. Returns a HANDLE (void **) on success,
|
// Opens a resource fork at `path`. `accessByte` and `fileType` are
|
||||||
// or NULL on failure (and sets *err if non-NULL).
|
// accepted for API parity with the toolbox but ignored on read-only
|
||||||
|
// in-memory backends. Returns refnum (>0) on success, 0 on failure.
|
||||||
|
// If `err` is non-NULL it receives RES_OK or one of RES_ERR_*.
|
||||||
|
ResourceRefNumT openResourceFile(const char *path, uint8_t accessByte,
|
||||||
|
uint16_t fileType, int *err);
|
||||||
|
|
||||||
|
|
||||||
|
// Closes a resource fork and frees any handles cached for that file.
|
||||||
|
// Returns RES_OK or RES_ERR_BAD_HANDLE.
|
||||||
|
int closeResourceFile(ResourceRefNumT refNum);
|
||||||
|
|
||||||
|
|
||||||
|
// Loads a resource by (type, id). Searches all open resource files
|
||||||
|
// in open order and returns a cached handle if the same (type, id)
|
||||||
|
// was previously loaded from any open file. Returns NULL on failure.
|
||||||
//
|
//
|
||||||
// Caller is responsible for HLock/HUnlock pairing around any usage that
|
// The returned handle is `void **`; `*handle` is the resource bytes.
|
||||||
// crosses a toolbox call; see HLock semantics block at the top of this
|
void **loadResource(IigsResTypeT type, IigsResIdT id, int *err);
|
||||||
// file.
|
|
||||||
|
|
||||||
|
// Releases a previously-loaded resource.
|
||||||
|
// verb 0: keep the cached payload (cheap; the handle may be reused).
|
||||||
|
// verb 1: evict the cache entry and free the payload.
|
||||||
|
// Returns RES_OK on success.
|
||||||
|
int releaseResource(int verb, void **handle);
|
||||||
|
|
||||||
|
|
||||||
|
// Convenience: byte size of the resource pointed to by `handle`.
|
||||||
|
// Returns 0 if `handle` is not in the cache.
|
||||||
|
uint32_t getResourceSize(void **handle);
|
||||||
|
|
||||||
|
|
||||||
|
// ---- Legacy stub API kept for backwards compatibility ----
|
||||||
|
// The pre-Phase-3.4 stub exposed iigsLoadResource / iigsGetResourceSize
|
||||||
|
// for the rsrcProbe markers. Those now dispatch to the real
|
||||||
|
// implementation when at least one resource file is open. They report
|
||||||
|
// RES_ERR_NOT_STARTED when no file is open (instead of the old
|
||||||
|
// RES_ERR_BLOCKED), preserving the "did Phase 3.4 land?" signal.
|
||||||
void **iigsLoadResource(IigsResTypeT resType, IigsResIdT resId, int *err);
|
void **iigsLoadResource(IigsResTypeT resType, IigsResIdT resId, int *err);
|
||||||
|
|
||||||
|
|
||||||
// GetResourceSize typed wrapper. Returns the byte size of the resource
|
|
||||||
// or 0 on failure (and sets *err if non-NULL).
|
|
||||||
uint32_t iigsGetResourceSize(IigsResTypeT resType, IigsResIdT resId,
|
uint32_t iigsGetResourceSize(IigsResTypeT resType, IigsResIdT resId,
|
||||||
int *err);
|
int *err);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -384,102 +384,11 @@ typedef __builtin_va_list va_list;
|
||||||
#define va_arg(ap, ty) __builtin_va_arg(ap, ty)
|
#define va_arg(ap, ty) __builtin_va_arg(ap, ty)
|
||||||
#define va_end(ap) __builtin_va_end(ap)
|
#define va_end(ap) __builtin_va_end(ap)
|
||||||
|
|
||||||
static void writeUDec(unsigned int n) {
|
// vprintf / printf used to dispatch through their own small format
|
||||||
char buf[6]; // 16-bit: max 5 digits + null
|
// helpers (writeUDec/writeDec/writeULong/writeHex/writeStr/writeSignedLong/
|
||||||
int i = 0;
|
// writeDouble). Once vprintf was rewritten to route through vsnprintf
|
||||||
if (n == 0) { putchar('0'); return; }
|
// (so printf and snprintf share one format engine in snprintf.c), the
|
||||||
while (n > 0) { buf[i++] = '0' + (n % 10); n /= 10; }
|
// helpers became dead weight and were removed.
|
||||||
while (i > 0) putchar(buf[--i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void writeDec(int n) {
|
|
||||||
// For INT_MIN, `-n` overflows signed int (UB). Negate as unsigned
|
|
||||||
// — well-defined (two's-complement wrap), and the magnitude is
|
|
||||||
// identical for the print path.
|
|
||||||
if (n < 0) { putchar('-'); writeUDec((unsigned int)(0u - (unsigned int)n)); }
|
|
||||||
else writeUDec((unsigned int)n);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void writeULong(unsigned long n) {
|
|
||||||
char buf[11]; // 32-bit: max 10 digits + null
|
|
||||||
int i = 0;
|
|
||||||
if (n == 0) { putchar('0'); return; }
|
|
||||||
while (n > 0) { buf[i++] = '0' + (n % 10); n /= 10; }
|
|
||||||
while (i > 0) putchar(buf[--i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void writeHex(unsigned int n, int width) {
|
|
||||||
static const char digits[] = "0123456789abcdef";
|
|
||||||
// unsigned int is 16-bit on this target -> at most 4 hex digits.
|
|
||||||
// Cap width to that; without it `printf("%08x", ...)` blew past
|
|
||||||
// the buf[] tail and corrupted the stack.
|
|
||||||
char buf[4];
|
|
||||||
if (width > 4) width = 4;
|
|
||||||
int i = 0;
|
|
||||||
if (n == 0) { buf[i++] = '0'; }
|
|
||||||
while (n > 0 && i < 4) { buf[i++] = digits[n & 0xF]; n >>= 4; }
|
|
||||||
while (i < width) buf[i++] = '0';
|
|
||||||
while (i > 0) putchar(buf[--i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void writeStr(const char *s) {
|
|
||||||
if (!s) s = "(null)";
|
|
||||||
while (*s) { putchar(*s); s++; }
|
|
||||||
}
|
|
||||||
|
|
||||||
// Format-spec handlers used to be marked noinline to keep vprintf's
|
|
||||||
// main loop small for the long-branch limitation; now W65816BranchExpand
|
|
||||||
// reliably promotes Bxx to BRL when needed, so the inliner is free to
|
|
||||||
// merge them when it wants.
|
|
||||||
static void writeSignedLong(long n) {
|
|
||||||
// See writeDec: avoid the signed-overflow UB on LONG_MIN.
|
|
||||||
if (n < 0) { putchar('-'); writeULong(0ul - (unsigned long)n); }
|
|
||||||
else writeULong((unsigned long)n);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Minimal %f / %g support. Uses double soft-float; precision capped
|
|
||||||
// at 6 fractional digits (the C default). Doesn't handle Inf/NaN
|
|
||||||
// specially — prints the integer extraction, which will be 0 for
|
|
||||||
// non-finite values. Not IEEE-precise (intermediate truncation in
|
|
||||||
// the soft-double mul/div), but good enough for typical formatted
|
|
||||||
// numeric output.
|
|
||||||
static void writeDouble(double v, int prec) {
|
|
||||||
if (prec < 0) prec = 6;
|
|
||||||
if (prec > 9) prec = 9;
|
|
||||||
// Test the IEEE-754 sign bit (so -0.0 prints with the sign per
|
|
||||||
// C99) and avoid the soft-float __ltdf2 comparison, which has
|
|
||||||
// historically miscompiled for negative inputs (see snprintf.c
|
|
||||||
// banner for the same workaround).
|
|
||||||
unsigned long long vbits;
|
|
||||||
__builtin_memcpy(&vbits, &v, 8);
|
|
||||||
if (vbits & ((unsigned long long)1 << 63)) {
|
|
||||||
putchar('-');
|
|
||||||
vbits &= ~((unsigned long long)1 << 63);
|
|
||||||
__builtin_memcpy(&v, &vbits, 8);
|
|
||||||
}
|
|
||||||
long ipart = (long)v;
|
|
||||||
writeULong((unsigned long)ipart);
|
|
||||||
if (prec == 0) return;
|
|
||||||
putchar('.');
|
|
||||||
double frac = v - (double)ipart;
|
|
||||||
// Multiply fraction by 10^prec, then print as integer with leading zeros.
|
|
||||||
long mul = 1;
|
|
||||||
for (int i = 0; i < prec; i++) mul *= 10;
|
|
||||||
long fdigits = (long)(frac * (double)mul);
|
|
||||||
if (fdigits < 0) fdigits = -fdigits;
|
|
||||||
char buf[10];
|
|
||||||
int n = 0;
|
|
||||||
long scale = mul / 10;
|
|
||||||
while (n < prec) {
|
|
||||||
if (scale == 0) scale = 1;
|
|
||||||
long d = fdigits / scale;
|
|
||||||
buf[n++] = '0' + (char)(d % 10);
|
|
||||||
scale /= 10;
|
|
||||||
if (scale == 0) break;
|
|
||||||
}
|
|
||||||
while (n < prec) buf[n++] = '0';
|
|
||||||
for (int i = 0; i < n; i++) putchar(buf[i]);
|
|
||||||
}
|
|
||||||
|
|
||||||
extern int vsnprintf(char *buf, size_t n, const char *fmt, va_list ap);
|
extern int vsnprintf(char *buf, size_t n, const char *fmt, va_list ap);
|
||||||
|
|
||||||
|
|
@ -724,10 +633,11 @@ void free(void *p) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void *calloc(size_t nmemb, size_t size) {
|
void *calloc(size_t nmemb, size_t size) {
|
||||||
// size_t is 16-bit on this target; nmemb*size can overflow and
|
// size_t is 32-bit, so the multiply itself won't overflow for any
|
||||||
// wrap to a small value (e.g. calloc(65536, 1) -> 0 -> 2-byte
|
// realistic input. The 0xFFFF cap is a "fits in one 64KB bank"
|
||||||
// alloc), then the caller writes way past the returned region.
|
// sanity check: the heap lives in bank 0 below the IO window, so
|
||||||
// Bail when the multiplication would overflow.
|
// any single allocation must fit there. calloc(65536, 1) returns
|
||||||
|
// null rather than silently truncating into the IO range.
|
||||||
if (size != 0 && nmemb > (size_t)0xFFFF / size) return (void *)0;
|
if (size != 0 && nmemb > (size_t)0xFFFF / size) return (void *)0;
|
||||||
size_t total = nmemb * size;
|
size_t total = nmemb * size;
|
||||||
void *p = malloc(total);
|
void *p = malloc(total);
|
||||||
|
|
@ -757,6 +667,15 @@ void *realloc(void *ptr, size_t n) {
|
||||||
typedef void (*AtexitFn)(void);
|
typedef void (*AtexitFn)(void);
|
||||||
static AtexitFn __atexitFn = (AtexitFn)0;
|
static AtexitFn __atexitFn = (AtexitFn)0;
|
||||||
|
|
||||||
|
// BRK $00 then spin -- halts a 65816 in BRK so MAME's debugger catches
|
||||||
|
// it; the spin loop guards against the (rare) case where BRK returns.
|
||||||
|
static void __halt(void) __attribute__((noreturn));
|
||||||
|
static void __halt(void) {
|
||||||
|
__asm__ volatile (".byte 0x00, 0x00");
|
||||||
|
while (1) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void exit(int code) {
|
void exit(int code) {
|
||||||
(void)code;
|
(void)code;
|
||||||
// C99 7.20.4.3: exit() must invoke registered atexit handlers in
|
// C99 7.20.4.3: exit() must invoke registered atexit handlers in
|
||||||
|
|
@ -766,9 +685,7 @@ void exit(int code) {
|
||||||
__atexitFn = (AtexitFn)0; // prevent re-entry if fn calls exit
|
__atexitFn = (AtexitFn)0; // prevent re-entry if fn calls exit
|
||||||
fn();
|
fn();
|
||||||
}
|
}
|
||||||
// BRK $00 — halts a 65816 in BRK, MAME's debugger catches.
|
__halt();
|
||||||
__asm__ volatile (".byte 0x00, 0x00");
|
|
||||||
while (1) {} // unreachable
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---- errno ----
|
// ---- errno ----
|
||||||
|
|
@ -1128,9 +1045,9 @@ typedef struct __sFILE {
|
||||||
static char __tmpNames[MFS_MAX_FILES][LIBC_L_TMPNAM];
|
static char __tmpNames[MFS_MAX_FILES][LIBC_L_TMPNAM];
|
||||||
|
|
||||||
static FILE __mfs[MFS_MAX_FILES] = {
|
static FILE __mfs[MFS_MAX_FILES] = {
|
||||||
{ FILE_KIND_STDIN, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0 },
|
{ .kind = FILE_KIND_STDIN, .unget = -1 },
|
||||||
{ FILE_KIND_STDOUT, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0 },
|
{ .kind = FILE_KIND_STDOUT, .writable = 1, .unget = -1 },
|
||||||
{ FILE_KIND_STDERR, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0 },
|
{ .kind = FILE_KIND_STDERR, .writable = 1, .unget = -1 },
|
||||||
};
|
};
|
||||||
|
|
||||||
FILE *stdin = &__mfs[0];
|
FILE *stdin = &__mfs[0];
|
||||||
|
|
@ -1278,9 +1195,6 @@ int fclose(FILE *stream) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Forward decls for routines that live in snprintf.c.
|
|
||||||
extern int vsnprintf(char *buf, size_t n, const char *fmt, va_list ap);
|
|
||||||
|
|
||||||
// Forward decl for vfprintf so fprintf can call it.
|
// Forward decl for vfprintf so fprintf can call it.
|
||||||
int vfprintf(FILE *stream, const char *fmt, va_list ap);
|
int vfprintf(FILE *stream, const char *fmt, va_list ap);
|
||||||
size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);
|
size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream);
|
||||||
|
|
@ -1377,8 +1291,7 @@ static AtexitFn __quickFn = (AtexitFn)0;
|
||||||
|
|
||||||
void _Exit(int code) {
|
void _Exit(int code) {
|
||||||
(void)code;
|
(void)code;
|
||||||
__asm__ volatile (".byte 0x00, 0x00");
|
__halt();
|
||||||
while (1) {} // unreachable
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void quick_exit(int code) {
|
void quick_exit(int code) {
|
||||||
|
|
@ -1388,8 +1301,7 @@ void quick_exit(int code) {
|
||||||
__quickFn = (AtexitFn)0;
|
__quickFn = (AtexitFn)0;
|
||||||
fn();
|
fn();
|
||||||
}
|
}
|
||||||
__asm__ volatile (".byte 0x00, 0x00");
|
__halt();
|
||||||
while (1) {} // unreachable
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int at_quick_exit(AtexitFn fn) {
|
int at_quick_exit(AtexitFn fn) {
|
||||||
|
|
@ -1438,20 +1350,26 @@ static void initFileMem(FILE *f, const MfsEntry *reg, int wantWrite) {
|
||||||
// LIBC_PATH_MAX (kept in sync with limits.h's PATH_MAX) so user code
|
// LIBC_PATH_MAX (kept in sync with limits.h's PATH_MAX) so user code
|
||||||
// that bounds-checks against PATH_MAX stays consistent with what fopen
|
// that bounds-checks against PATH_MAX stays consistent with what fopen
|
||||||
// will accept.
|
// will accept.
|
||||||
static struct {
|
typedef struct __GsosPathBufT {
|
||||||
u16 length;
|
u16 length;
|
||||||
char text[LIBC_PATH_MAX];
|
char text[LIBC_PATH_MAX];
|
||||||
} __gsosPathBuf;
|
} __GsosPathBufT;
|
||||||
|
|
||||||
static int __buildGSString(const char *path) {
|
static __GsosPathBufT __gsosPathBuf;
|
||||||
|
|
||||||
|
static int __fillGSString(__GsosPathBufT *buf, const char *path) {
|
||||||
size_t n = 0;
|
size_t n = 0;
|
||||||
while (path[n] && n < LIBC_PATH_MAX) n++;
|
while (path[n] && n < LIBC_PATH_MAX) n++;
|
||||||
if (path[n]) return -1; // path > PATH_MAX chars
|
if (path[n]) return -1; // path > PATH_MAX chars
|
||||||
__gsosPathBuf.length = (u16)n;
|
buf->length = (u16)n;
|
||||||
for (size_t i = 0; i < n; i++) __gsosPathBuf.text[i] = path[i];
|
for (size_t i = 0; i < n; i++) buf->text[i] = path[i];
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int __buildGSString(const char *path) {
|
||||||
|
return __fillGSString(&__gsosPathBuf, path);
|
||||||
|
}
|
||||||
|
|
||||||
FILE *fopen(const char *path, const char *mode) {
|
FILE *fopen(const char *path, const char *mode) {
|
||||||
if (!path || !mode) return (FILE *)0;
|
if (!path || !mode) return (FILE *)0;
|
||||||
int wantWrite = 0;
|
int wantWrite = 0;
|
||||||
|
|
@ -1486,7 +1404,6 @@ FILE *fopen(const char *path, const char *mode) {
|
||||||
|
|
||||||
if (reg) {
|
if (reg) {
|
||||||
initFileMem(f, reg, wantWrite);
|
initFileMem(f, reg, wantWrite);
|
||||||
(void)wantRead;
|
|
||||||
if (truncate) f->size = 0;
|
if (truncate) f->size = 0;
|
||||||
if (append) f->pos = f->size;
|
if (append) f->pos = f->size;
|
||||||
return f;
|
return f;
|
||||||
|
|
@ -1547,15 +1464,16 @@ FILE *fopen(const char *path, const char *mode) {
|
||||||
gsosSetMark(&m);
|
gsosSetMark(&m);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
(void)wantRead;
|
|
||||||
return f;
|
return f;
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream) {
|
size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream) {
|
||||||
if (!stream) return 0;
|
if (!stream) return 0;
|
||||||
if (size == 0 || nmemb == 0) return 0;
|
if (size == 0 || nmemb == 0) return 0;
|
||||||
// Avoid 32-bit overflow on size * nmemb: cap nmemb so each item
|
// size_t is u32 here, so the multiply itself can't overflow. The
|
||||||
// (size bytes) fits in remaining 16-bit address space.
|
// 0xFFFE cap is a "single 64KB bank" limit -- the underlying
|
||||||
|
// mem/GSOS backends address by 16-bit offset, so any single fread
|
||||||
|
// must fit in one bank.
|
||||||
if (nmemb > (size_t)0xFFFE / size) nmemb = (size_t)0xFFFE / size;
|
if (nmemb > (size_t)0xFFFE / size) nmemb = (size_t)0xFFFE / size;
|
||||||
if (stream->kind == FILE_KIND_GSOS) {
|
if (stream->kind == FILE_KIND_GSOS) {
|
||||||
// Drain unget byte first if present.
|
// Drain unget byte first if present.
|
||||||
|
|
@ -1605,8 +1523,10 @@ size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream) {
|
||||||
size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) {
|
size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) {
|
||||||
if (!stream) return 0;
|
if (!stream) return 0;
|
||||||
if (size == 0 || nmemb == 0) return 0;
|
if (size == 0 || nmemb == 0) return 0;
|
||||||
// Cap nmemb so each item (size bytes) fits in the address space
|
// size_t is u32 here, so the multiply itself can't overflow. The
|
||||||
// — avoids 32-bit `size * nmemb` that the i32 multiply path triggers.
|
// 0xFFFE cap is a "single 64KB bank" limit -- the underlying
|
||||||
|
// mem/GSOS backends address by 16-bit offset, so any single fwrite
|
||||||
|
// must fit in one bank.
|
||||||
if (nmemb > (size_t)0xFFFE / size) nmemb = (size_t)0xFFFE / size;
|
if (nmemb > (size_t)0xFFFE / size) nmemb = (size_t)0xFFFE / size;
|
||||||
const char *in = (const char *)ptr;
|
const char *in = (const char *)ptr;
|
||||||
if (stream->kind == FILE_KIND_STDOUT || stream->kind == FILE_KIND_STDERR) {
|
if (stream->kind == FILE_KIND_STDOUT || stream->kind == FILE_KIND_STDERR) {
|
||||||
|
|
@ -1814,7 +1734,6 @@ void setbuf(FILE *stream, char *buf) {
|
||||||
// GS/OS. This matches both ProDOS `/VOL/FILE` and HFS `:Vol:File:`
|
// GS/OS. This matches both ProDOS `/VOL/FILE` and HFS `:Vol:File:`
|
||||||
// conventions without forcing the caller to declare which.
|
// conventions without forcing the caller to declare which.
|
||||||
|
|
||||||
int mfsUnregister(const char *path);
|
|
||||||
extern int rand(void);
|
extern int rand(void);
|
||||||
|
|
||||||
// True when `path` looks like a GS/OS volume path (contains `/` or
|
// True when `path` looks like a GS/OS volume path (contains `/` or
|
||||||
|
|
@ -1863,18 +1782,10 @@ static int __sameParentDir(const char *a, const char *b) {
|
||||||
// simultaneously (old+new for ChangePath), and Destroy of the source
|
// simultaneously (old+new for ChangePath), and Destroy of the source
|
||||||
// at the end of the cross-dir fallback can reuse __gsosPathBuf for the
|
// at the end of the cross-dir fallback can reuse __gsosPathBuf for the
|
||||||
// source name. Keeps the destination name alive across all calls.
|
// source name. Keeps the destination name alive across all calls.
|
||||||
static struct {
|
static __GsosPathBufT __gsosPathBuf2;
|
||||||
u16 length;
|
|
||||||
char text[LIBC_PATH_MAX];
|
|
||||||
} __gsosPathBuf2;
|
|
||||||
|
|
||||||
static int __buildGSString2(const char *path) {
|
static int __buildGSString2(const char *path) {
|
||||||
size_t n = 0;
|
return __fillGSString(&__gsosPathBuf2, path);
|
||||||
while (path[n] && n < LIBC_PATH_MAX) n++;
|
|
||||||
if (path[n]) return -1;
|
|
||||||
__gsosPathBuf2.length = (u16)n;
|
|
||||||
for (size_t i = 0; i < n; i++) __gsosPathBuf2.text[i] = path[i];
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int remove(const char *path) {
|
int remove(const char *path) {
|
||||||
|
|
|
||||||
|
|
@ -1,149 +1,479 @@
|
||||||
// resource.c - iigs/resource.h implementation. Phase 3.4 STUB-ONLY
|
// resource.c - Apple IIgs Resource Manager - real implementation.
|
||||||
// landing.
|
|
||||||
//
|
//
|
||||||
// Phase 1.1 (GS/OS fopen hang on 6.0.2) blocks the live runtime path.
|
// Replaces the Phase 3.4 stub. Reads .rsrc resource forks via the
|
||||||
// ResourceStartUp + OpenResourceFile reaches the same blocking code,
|
// stdio surface (fopen/fread/fseek/fclose) and caches loaded payloads
|
||||||
// so all three entry points (init, load, size) return RES_ERR_BLOCKED
|
// by (type, id) so repeated loadResource() calls return the same
|
||||||
// unless the build defines IIGS_RESOURCE_RUNTIME_ENABLED=1. When that
|
// handle. Read-only - no AddResource / DetachResource / partial-load.
|
||||||
// flips on (Phase 1.1 lands), the toolbox calls below activate and the
|
|
||||||
// typed wrappers route through the real Resource Manager.
|
|
||||||
//
|
//
|
||||||
// HLock semantics:
|
// File format (Apple IIgs Toolbox Reference Vol 3, ch.42):
|
||||||
// LoadResource (toolbox 0x0E1E) returns a HANDLE - a pointer to a
|
// bytes 0..23 : ResourceMapHeaderT (little-endian fields)
|
||||||
// master pointer in Memory-Manager-relocatable storage. Until you
|
// bytes ... : payload blobs at offsets recorded in the index
|
||||||
// call HLock(handle), any subsequent toolbox call can compact the
|
// bytes at rmToIndex : rmIndexUsed * ResourceIndexEntryT entries
|
||||||
// heap and move the underlying bytes. The typed wrappers DO NOT
|
|
||||||
// call HLock for the caller; that is the caller's responsibility
|
|
||||||
// per the contract in iigs/resource.h.
|
|
||||||
//
|
//
|
||||||
// Why we stub instead of returning best-effort answers:
|
// Handle convention: we return a `void **` whose dereference yields the
|
||||||
// A real LoadResource that silently returned NULL would be ambiguous
|
// resource bytes. The handle storage lives in this file's static
|
||||||
// with "resource not found". RES_ERR_BLOCKED lets the demo + smoke
|
// table; the bytes themselves are malloc'd at first load and freed at
|
||||||
// harness distinguish "Phase 1.1 hasn't landed" from "your TYPECODE_ID
|
// releaseResource(verb=1) or closeResourceFile().
|
||||||
// bundle was missing a resource". Once Phase 1.1 lands, callers see
|
|
||||||
// the real error codes (RES_ERR_NOT_FOUND, RES_ERR_TOOLBOX) instead.
|
|
||||||
|
|
||||||
#include "iigs/resource.h"
|
#include "iigs/resource.h"
|
||||||
#include "iigs/toolbox.h"
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
|
||||||
// Set to non-zero by a successful resourceProbeInit() call. Read by
|
// --- Prototypes ---
|
||||||
// resourceRuntimeEnabled() to report status without re-running init.
|
static int freeHandleSlot(int slot);
|
||||||
// In the stub-only landing this never reaches 1 because the runtime
|
static int findHandleByPtr(void **handle);
|
||||||
// path is compiled out.
|
static int findHandleByTypeId(IigsResTypeT type, IigsResIdT id);
|
||||||
static int gResourceReady = 0;
|
static int findHandleSlot(void);
|
||||||
|
static int findOpenFileSlot(void);
|
||||||
|
static int loadIndex(int fileSlot);
|
||||||
|
static void *readPayload(int fileSlot, uint32_t offset, uint32_t size);
|
||||||
|
static int readU16(FILE *f, uint16_t *out);
|
||||||
|
static int readU32(FILE *f, uint32_t *out);
|
||||||
|
static int readMapHeader(FILE *f, ResourceMapHeaderT *hdr);
|
||||||
|
|
||||||
|
|
||||||
// Cached refNum from OpenResourceFile. Populated only when the
|
// --- Internal types ---
|
||||||
// runtime path is enabled. unsigned short to match the toolbox
|
typedef struct {
|
||||||
// signature (refNum is a 16-bit GS/OS fileID).
|
int inUse;
|
||||||
static unsigned short gResourceRefNum = 0;
|
FILE *fp;
|
||||||
|
ResourceMapHeaderT hdr;
|
||||||
|
ResourceIndexEntryT *index; // malloc'd; rmIndexUsed entries
|
||||||
|
uint16_t refNum; // 1..N, matches slot+1
|
||||||
|
} ResourceFileT;
|
||||||
|
|
||||||
|
|
||||||
// Stub flag to keep the unused-static-warning quiet when the runtime
|
typedef struct {
|
||||||
// path is compiled out. The compiler folds the function bodies below
|
int inUse;
|
||||||
// to constant returns under -O2 anyway; this just keeps -Wunused happy
|
int fileSlot; // which ResourceFileT owns it
|
||||||
// across both build modes.
|
IigsResTypeT type;
|
||||||
static void touchUnused(void) {
|
IigsResIdT id;
|
||||||
(void)gResourceRefNum;
|
void *data; // payload bytes
|
||||||
}
|
uint32_t size;
|
||||||
|
void *masterPtr; // master ptr cell -> &data
|
||||||
|
} HandleSlotT;
|
||||||
|
|
||||||
|
|
||||||
#if IIGS_RESOURCE_RUNTIME_ENABLED
|
// --- State ---
|
||||||
// Path passed to OpenResourceFile. When the runtime path is live the
|
// Declared volatile to defeat the GlobalOpt i1-narrowing pass that
|
||||||
// expectation is that this is the application's own pathname (the OMF
|
// otherwise produces an `i1, zext` load the W65816 backend can't select.
|
||||||
// the Loader launched), so OpenResourceFile attaches to the file's
|
// (See MEMORY.md: feedback_i1_load_custom.md.)
|
||||||
// resource fork. GS/OS holds the boot pathname in a known low-memory
|
static volatile int gResourceReady = 0;
|
||||||
// vector; we resolve it at init time and cache here.
|
static ResourceFileT gFiles[IIGS_RES_MAX_FILES];
|
||||||
//
|
static HandleSlotT gHandles[IIGS_RES_MAX_HANDLES];
|
||||||
// The exact pathname-resolution sequence is intentionally NOT implemented
|
|
||||||
// in this stub-only landing - it is part of the Phase 1.1 unblock work
|
|
||||||
// (the same code that fixes fopen will plumb the pathname through).
|
|
||||||
static char gOwnPathName[256] = { 0 };
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
int resourceProbeInit(void) {
|
int closeResourceFile(ResourceRefNumT refNum) {
|
||||||
touchUnused();
|
if (refNum == 0 || refNum > IIGS_RES_MAX_FILES) {
|
||||||
#if IIGS_RESOURCE_RUNTIME_ENABLED
|
return RES_ERR_BAD_HANDLE;
|
||||||
// Live path - placeholder until Phase 1.1 lands. We deliberately
|
}
|
||||||
// do not call ResourceStartUp here in the stub-only landing because
|
int slot = (int)refNum - 1;
|
||||||
// (a) it requires MMStartUp to have run already and (b) calling
|
if (!gFiles[slot].inUse) {
|
||||||
// ResourceStartUp on a userId we don't own would corrupt the
|
return RES_ERR_BAD_HANDLE;
|
||||||
// toolbox's per-app state. Phase 1.1's actual implementation will
|
}
|
||||||
// look like:
|
// Free every cached handle owned by this file.
|
||||||
// MMStartUp();
|
for (int i = 0; i < IIGS_RES_MAX_HANDLES; i++) {
|
||||||
// TLStartUp();
|
if (gHandles[i].inUse && gHandles[i].fileSlot == slot) {
|
||||||
// ResourceStartUp(myUserId);
|
freeHandleSlot(i);
|
||||||
// gResourceRefNum = OpenResourceFile(0x0001, NULL, gOwnPathName);
|
|
||||||
// gResourceReady = (gResourceRefNum != 0) ? 1 : 0;
|
|
||||||
return RES_ERR_BLOCKED;
|
|
||||||
#else
|
|
||||||
return RES_ERR_BLOCKED;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
int resourceRuntimeEnabled(void) {
|
|
||||||
return gResourceReady;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void **iigsLoadResource(IigsResTypeT resType, IigsResIdT resId, int *err) {
|
|
||||||
(void)resType;
|
|
||||||
(void)resId;
|
|
||||||
#if IIGS_RESOURCE_RUNTIME_ENABLED
|
|
||||||
if (!gResourceReady) {
|
|
||||||
if (err) {
|
|
||||||
*err = RES_ERR_NOT_STARTED;
|
|
||||||
}
|
}
|
||||||
return (void **)0;
|
|
||||||
}
|
}
|
||||||
// Phase 1.1 will plug LoadResource(resType, resId) here. Toolbox
|
if (gFiles[slot].index) {
|
||||||
// pushes 4-byte ID as a long, returns handle in PHA slot. Caller
|
free(gFiles[slot].index);
|
||||||
// must HLock() before dereferencing (see header notes).
|
gFiles[slot].index = (ResourceIndexEntryT *)0;
|
||||||
void **h = (void **)LoadResource((unsigned short)resType, (long)resId);
|
|
||||||
if (!h) {
|
|
||||||
if (err) {
|
|
||||||
*err = RES_ERR_NOT_FOUND;
|
|
||||||
}
|
|
||||||
return (void **)0;
|
|
||||||
}
|
}
|
||||||
if (err) {
|
if (gFiles[slot].fp) {
|
||||||
*err = RES_OK;
|
fclose(gFiles[slot].fp);
|
||||||
|
gFiles[slot].fp = (FILE *)0;
|
||||||
}
|
}
|
||||||
return h;
|
gFiles[slot].inUse = 0;
|
||||||
#else
|
return RES_OK;
|
||||||
if (err) {
|
|
||||||
*err = RES_ERR_BLOCKED;
|
|
||||||
}
|
|
||||||
return (void **)0;
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
uint32_t iigsGetResourceSize(IigsResTypeT resType, IigsResIdT resId,
|
static int findHandleByPtr(void **handle) {
|
||||||
int *err) {
|
if (!handle) {
|
||||||
(void)resType;
|
return -1;
|
||||||
(void)resId;
|
}
|
||||||
#if IIGS_RESOURCE_RUNTIME_ENABLED
|
for (int i = 0; i < IIGS_RES_MAX_HANDLES; i++) {
|
||||||
|
if (gHandles[i].inUse && (void **)&gHandles[i].data == handle) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int findHandleByTypeId(IigsResTypeT type, IigsResIdT id) {
|
||||||
|
for (int i = 0; i < IIGS_RES_MAX_HANDLES; i++) {
|
||||||
|
if (gHandles[i].inUse && gHandles[i].type == type && gHandles[i].id == id) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int findHandleSlot(void) {
|
||||||
|
for (int i = 0; i < IIGS_RES_MAX_HANDLES; i++) {
|
||||||
|
if (!gHandles[i].inUse) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int findOpenFileSlot(void) {
|
||||||
|
for (int i = 0; i < IIGS_RES_MAX_FILES; i++) {
|
||||||
|
if (!gFiles[i].inUse) {
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int freeHandleSlot(int slot) {
|
||||||
|
if (slot < 0 || slot >= IIGS_RES_MAX_HANDLES) {
|
||||||
|
return RES_ERR_BAD_HANDLE;
|
||||||
|
}
|
||||||
|
if (!gHandles[slot].inUse) {
|
||||||
|
return RES_ERR_BAD_HANDLE;
|
||||||
|
}
|
||||||
|
if (gHandles[slot].data) {
|
||||||
|
free(gHandles[slot].data);
|
||||||
|
gHandles[slot].data = (void *)0;
|
||||||
|
}
|
||||||
|
gHandles[slot].inUse = 0;
|
||||||
|
gHandles[slot].fileSlot = -1;
|
||||||
|
gHandles[slot].type = 0;
|
||||||
|
gHandles[slot].id = 0;
|
||||||
|
gHandles[slot].size = 0;
|
||||||
|
return RES_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
uint32_t getResourceSize(void **handle) {
|
||||||
|
int slot = findHandleByPtr(handle);
|
||||||
|
if (slot < 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return gHandles[slot].size;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Convenience wrapper kept for backwards compat with the old probe.
|
||||||
|
// Scans the cache + open files for (type, id) and reports the size.
|
||||||
|
uint32_t iigsGetResourceSize(IigsResTypeT resType, IigsResIdT resId, int *err) {
|
||||||
if (!gResourceReady) {
|
if (!gResourceReady) {
|
||||||
if (err) {
|
if (err) {
|
||||||
*err = RES_ERR_NOT_STARTED;
|
*err = RES_ERR_NOT_STARTED;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
// GetResourceSize returns a 32-bit byte count via the toolbox.
|
int hSlot = findHandleByTypeId(resType, resId);
|
||||||
uint32_t sz = (uint32_t)GetResourceSize((unsigned short)resType,
|
if (hSlot >= 0) {
|
||||||
(long)resId);
|
if (err) {
|
||||||
if (err) {
|
*err = RES_OK;
|
||||||
*err = (sz == 0) ? RES_ERR_NOT_FOUND : RES_OK;
|
}
|
||||||
|
return gHandles[hSlot].size;
|
||||||
|
}
|
||||||
|
// Not cached - scan every open file's index for the entry.
|
||||||
|
for (int f = 0; f < IIGS_RES_MAX_FILES; f++) {
|
||||||
|
if (!gFiles[f].inUse || !gFiles[f].index) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uint32_t n = gFiles[f].hdr.rmIndexUsed;
|
||||||
|
for (uint32_t i = 0; i < n; i++) {
|
||||||
|
ResourceIndexEntryT *e = &gFiles[f].index[i];
|
||||||
|
if (e->rType == resType && e->rID == resId) {
|
||||||
|
if (err) {
|
||||||
|
*err = RES_OK;
|
||||||
|
}
|
||||||
|
return e->rSize;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return sz;
|
|
||||||
#else
|
|
||||||
if (err) {
|
if (err) {
|
||||||
*err = RES_ERR_BLOCKED;
|
*err = RES_ERR_NOT_FOUND;
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
#endif
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Convenience wrapper kept for backwards compat with the old probe.
|
||||||
|
void **iigsLoadResource(IigsResTypeT resType, IigsResIdT resId, int *err) {
|
||||||
|
return loadResource(resType, resId, err);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Reads the 20-byte rIndex table for a freshly-opened file. Returns
|
||||||
|
// RES_OK or an RES_ERR_* code. Caller has populated gFiles[slot].hdr.
|
||||||
|
static int loadIndex(int fileSlot) {
|
||||||
|
ResourceFileT *rf = &gFiles[fileSlot];
|
||||||
|
uint32_t n = rf->hdr.rmIndexUsed;
|
||||||
|
if (n == 0) {
|
||||||
|
rf->index = (ResourceIndexEntryT *)0;
|
||||||
|
return RES_OK;
|
||||||
|
}
|
||||||
|
// Sanity-check against malloc'ing absurd amounts.
|
||||||
|
if (n > 1024) {
|
||||||
|
return RES_ERR_TOOLBOX;
|
||||||
|
}
|
||||||
|
ResourceIndexEntryT *idx = (ResourceIndexEntryT *)malloc(sizeof(ResourceIndexEntryT) * n);
|
||||||
|
if (!idx) {
|
||||||
|
return RES_ERR_NO_MEM;
|
||||||
|
}
|
||||||
|
if (fseek(rf->fp, (long)rf->hdr.rmToIndex, 0) != 0) {
|
||||||
|
free(idx);
|
||||||
|
return RES_ERR_TOOLBOX;
|
||||||
|
}
|
||||||
|
for (uint32_t i = 0; i < n; i++) {
|
||||||
|
uint16_t t;
|
||||||
|
uint32_t id;
|
||||||
|
uint32_t off;
|
||||||
|
uint16_t attr;
|
||||||
|
uint32_t sz;
|
||||||
|
uint32_t h;
|
||||||
|
if (readU16(rf->fp, &t) != 0 ||
|
||||||
|
readU32(rf->fp, &id) != 0 ||
|
||||||
|
readU32(rf->fp, &off) != 0 ||
|
||||||
|
readU16(rf->fp, &attr) != 0 ||
|
||||||
|
readU32(rf->fp, &sz) != 0 ||
|
||||||
|
readU32(rf->fp, &h) != 0) {
|
||||||
|
free(idx);
|
||||||
|
return RES_ERR_TOOLBOX;
|
||||||
|
}
|
||||||
|
idx[i].rType = t;
|
||||||
|
idx[i].rID = id;
|
||||||
|
idx[i].rOffset = off;
|
||||||
|
idx[i].rAttr = attr;
|
||||||
|
idx[i].rSize = sz;
|
||||||
|
idx[i].rHandle = h;
|
||||||
|
}
|
||||||
|
rf->index = idx;
|
||||||
|
return RES_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void **loadResource(IigsResTypeT type, IigsResIdT id, int *err) {
|
||||||
|
if (!gResourceReady) {
|
||||||
|
if (err) {
|
||||||
|
*err = RES_ERR_NOT_STARTED;
|
||||||
|
}
|
||||||
|
return (void **)0;
|
||||||
|
}
|
||||||
|
// Cache hit?
|
||||||
|
int hSlot = findHandleByTypeId(type, id);
|
||||||
|
if (hSlot >= 0) {
|
||||||
|
if (err) {
|
||||||
|
*err = RES_OK;
|
||||||
|
}
|
||||||
|
return (void **)&gHandles[hSlot].data;
|
||||||
|
}
|
||||||
|
// Cache miss - find the resource in any open file.
|
||||||
|
for (int f = 0; f < IIGS_RES_MAX_FILES; f++) {
|
||||||
|
if (!gFiles[f].inUse || !gFiles[f].index) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
uint32_t n = gFiles[f].hdr.rmIndexUsed;
|
||||||
|
for (uint32_t i = 0; i < n; i++) {
|
||||||
|
ResourceIndexEntryT *e = &gFiles[f].index[i];
|
||||||
|
if (e->rType != type || e->rID != id) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
int slot = findHandleSlot();
|
||||||
|
if (slot < 0) {
|
||||||
|
if (err) {
|
||||||
|
*err = RES_ERR_NO_MEM;
|
||||||
|
}
|
||||||
|
return (void **)0;
|
||||||
|
}
|
||||||
|
void *bytes = readPayload(f, e->rOffset, e->rSize);
|
||||||
|
if (!bytes) {
|
||||||
|
if (err) {
|
||||||
|
*err = RES_ERR_TOOLBOX;
|
||||||
|
}
|
||||||
|
return (void **)0;
|
||||||
|
}
|
||||||
|
gHandles[slot].inUse = 1;
|
||||||
|
gHandles[slot].fileSlot = f;
|
||||||
|
gHandles[slot].type = type;
|
||||||
|
gHandles[slot].id = id;
|
||||||
|
gHandles[slot].data = bytes;
|
||||||
|
gHandles[slot].size = e->rSize;
|
||||||
|
if (err) {
|
||||||
|
*err = RES_OK;
|
||||||
|
}
|
||||||
|
return (void **)&gHandles[slot].data;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (err) {
|
||||||
|
*err = RES_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
return (void **)0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ResourceRefNumT openResourceFile(const char *path, uint8_t accessByte, uint16_t fileType, int *err) {
|
||||||
|
(void)accessByte;
|
||||||
|
(void)fileType;
|
||||||
|
if (!path) {
|
||||||
|
if (err) {
|
||||||
|
*err = RES_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int slot = findOpenFileSlot();
|
||||||
|
if (slot < 0) {
|
||||||
|
if (err) {
|
||||||
|
*err = RES_ERR_NO_MEM;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
FILE *fp = fopen(path, "rb");
|
||||||
|
if (!fp) {
|
||||||
|
if (err) {
|
||||||
|
*err = RES_ERR_NOT_FOUND;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
ResourceFileT *rf = &gFiles[slot];
|
||||||
|
if (readMapHeader(fp, &rf->hdr) != 0) {
|
||||||
|
fclose(fp);
|
||||||
|
if (err) {
|
||||||
|
*err = RES_ERR_TOOLBOX;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
rf->fp = fp;
|
||||||
|
rf->inUse = 1;
|
||||||
|
rf->refNum = (uint16_t)(slot + 1);
|
||||||
|
rf->index = (ResourceIndexEntryT *)0;
|
||||||
|
int rc = loadIndex(slot);
|
||||||
|
if (rc != RES_OK) {
|
||||||
|
fclose(fp);
|
||||||
|
rf->fp = (FILE *)0;
|
||||||
|
rf->inUse = 0;
|
||||||
|
if (err) {
|
||||||
|
*err = rc;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
gResourceReady = 1;
|
||||||
|
if (err) {
|
||||||
|
*err = RES_OK;
|
||||||
|
}
|
||||||
|
return rf->refNum;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Allocates and reads `size` bytes at `offset` from the file at
|
||||||
|
// `fileSlot`. Returns NULL on any error.
|
||||||
|
static void *readPayload(int fileSlot, uint32_t offset, uint32_t size) {
|
||||||
|
if (size == 0) {
|
||||||
|
return (void *)0;
|
||||||
|
}
|
||||||
|
void *buf = malloc(size);
|
||||||
|
if (!buf) {
|
||||||
|
return (void *)0;
|
||||||
|
}
|
||||||
|
FILE *fp = gFiles[fileSlot].fp;
|
||||||
|
if (fseek(fp, (long)offset, 0) != 0) {
|
||||||
|
free(buf);
|
||||||
|
return (void *)0;
|
||||||
|
}
|
||||||
|
size_t got = fread(buf, 1, size, fp);
|
||||||
|
if (got != size) {
|
||||||
|
free(buf);
|
||||||
|
return (void *)0;
|
||||||
|
}
|
||||||
|
return buf;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Reads a little-endian uint16 from `f`. Returns 0 on success.
|
||||||
|
static int readU16(FILE *f, uint16_t *out) {
|
||||||
|
uint8_t b[2];
|
||||||
|
if (fread(b, 1, 2, f) != 2) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
*out = (uint16_t)(b[0] | ((uint16_t)b[1] << 8));
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Reads a little-endian uint32 from `f`. Returns 0 on success.
|
||||||
|
static int readU32(FILE *f, uint32_t *out) {
|
||||||
|
uint8_t b[4];
|
||||||
|
if (fread(b, 1, 4, f) != 4) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
*out = (uint32_t)b[0] |
|
||||||
|
((uint32_t)b[1] << 8) |
|
||||||
|
((uint32_t)b[2] << 16) |
|
||||||
|
((uint32_t)b[3] << 24);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Reads the 24-byte rResourceMap header at offset 0.
|
||||||
|
static int readMapHeader(FILE *f, ResourceMapHeaderT *hdr) {
|
||||||
|
if (fseek(f, 0L, 0) != 0) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (readU16(f, &hdr->rmVersion) != 0) return -1;
|
||||||
|
if (readU32(f, &hdr->rmToIndex) != 0) return -1;
|
||||||
|
if (readU16(f, &hdr->rmFileNum) != 0) return -1;
|
||||||
|
if (readU16(f, &hdr->rmID) != 0) return -1;
|
||||||
|
if (readU32(f, &hdr->rmIndexSize) != 0) return -1;
|
||||||
|
if (readU32(f, &hdr->rmIndexUsed) != 0) return -1;
|
||||||
|
if (readU16(f, &hdr->rmFreeListSize) != 0) return -1;
|
||||||
|
if (readU16(f, &hdr->rmFreeListUsed) != 0) return -1;
|
||||||
|
if (readU16(f, &hdr->rmPad) != 0) return -1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int releaseResource(int verb, void **handle) {
|
||||||
|
int slot = findHandleByPtr(handle);
|
||||||
|
if (slot < 0) {
|
||||||
|
return RES_ERR_BAD_HANDLE;
|
||||||
|
}
|
||||||
|
if (verb == 0) {
|
||||||
|
// Soft release: keep cached payload. Real toolbox would decrement
|
||||||
|
// a use-count; we just succeed.
|
||||||
|
return RES_OK;
|
||||||
|
}
|
||||||
|
return freeHandleSlot(slot);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int resourceProbeInit(void) {
|
||||||
|
// Zero the tables. Safe to call repeatedly - subsequent calls do
|
||||||
|
// not touch already-open files.
|
||||||
|
if (!gResourceReady) {
|
||||||
|
for (int i = 0; i < IIGS_RES_MAX_FILES; i++) {
|
||||||
|
gFiles[i].inUse = 0;
|
||||||
|
gFiles[i].fp = (FILE *)0;
|
||||||
|
gFiles[i].index = (ResourceIndexEntryT *)0;
|
||||||
|
gFiles[i].refNum = 0;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < IIGS_RES_MAX_HANDLES; i++) {
|
||||||
|
gHandles[i].inUse = 0;
|
||||||
|
gHandles[i].fileSlot = -1;
|
||||||
|
gHandles[i].data = (void *)0;
|
||||||
|
gHandles[i].size = 0;
|
||||||
|
}
|
||||||
|
gResourceReady = 1;
|
||||||
|
}
|
||||||
|
return RES_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
int resourceRuntimeEnabled(void) {
|
||||||
|
return gResourceReady;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -40,6 +40,13 @@ typedef __builtin_va_list va_list;
|
||||||
#define va_arg(ap, ty) __builtin_va_arg(ap, ty)
|
#define va_arg(ap, ty) __builtin_va_arg(ap, ty)
|
||||||
#define va_end(ap) __builtin_va_end(ap)
|
#define va_end(ap) __builtin_va_end(ap)
|
||||||
|
|
||||||
|
// Unbounded sink sentinel used by sprintf/vsprintf. Setting gEnd to
|
||||||
|
// `buf + 0xFFFE` looks innocuous but clang lowers the +0xFFFE to a
|
||||||
|
// `dec a; dec a` peephole (0xFFFE is -2 in 16-bit), giving gEnd =
|
||||||
|
// buf - 2 -- the `cur < end` bounds test then always fails. Use the
|
||||||
|
// absolute top-of-bank sentinel instead.
|
||||||
|
#define SPRINTF_END_SENTINEL ((char *)0xFFFF)
|
||||||
|
|
||||||
|
|
||||||
static char *gCur;
|
static char *gCur;
|
||||||
static char *gEnd;
|
static char *gEnd;
|
||||||
|
|
@ -757,12 +764,9 @@ int snprintf(char *buf, size_t n, const char *fmt, ...) {
|
||||||
|
|
||||||
int sprintf(char *buf, const char *fmt, ...) {
|
int sprintf(char *buf, const char *fmt, ...) {
|
||||||
gCur = buf;
|
gCur = buf;
|
||||||
// sprintf is unbounded. Setting gEnd = buf + 0xFFFE looks innocuous
|
// sprintf is unbounded; see SPRINTF_END_SENTINEL above for the
|
||||||
// but clang lowers the +0xFFFE to a `dec a; dec a` peephole (since
|
// reason we don't use buf + 0xFFFE.
|
||||||
// 0xFFFE is -2 in 16-bit), giving gEnd = buf - 2 — and then the
|
gEnd = SPRINTF_END_SENTINEL;
|
||||||
// emit() bounds test `cur < end` is always false, so nothing gets
|
|
||||||
// written. Use the absolute top-of-bank sentinel instead.
|
|
||||||
gEnd = (char *)0xFFFF;
|
|
||||||
gTotal = 0;
|
gTotal = 0;
|
||||||
va_list ap;
|
va_list ap;
|
||||||
va_start(ap, fmt);
|
va_start(ap, fmt);
|
||||||
|
|
@ -782,7 +786,7 @@ int vsnprintf(char *buf, size_t n, const char *fmt, va_list ap) {
|
||||||
|
|
||||||
int vsprintf(char *buf, const char *fmt, va_list ap) {
|
int vsprintf(char *buf, const char *fmt, va_list ap) {
|
||||||
gCur = buf;
|
gCur = buf;
|
||||||
gEnd = (char *)0xFFFF;
|
gEnd = SPRINTF_END_SENTINEL;
|
||||||
gTotal = 0;
|
gTotal = 0;
|
||||||
return format(fmt, ap);
|
return format(fmt, ap);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,7 @@
|
||||||
# DEBUGGER_E2E=1 scripts/mameDebug.py --bin ... --map ... --dwarf ...
|
# DEBUGGER_E2E=1 scripts/mameDebug.py --bin ... --map ... --dwarf ...
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
|
import importlib.util
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
@ -50,6 +51,21 @@ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||||
ROOT = os.path.dirname(SCRIPT_DIR)
|
ROOT = os.path.dirname(SCRIPT_DIR)
|
||||||
|
|
||||||
|
|
||||||
|
# Import pc2line.py as a module so the REPL can reuse its DWARF parsing
|
||||||
|
# (line table, DIE walking, type chains, locals evaluator) without
|
||||||
|
# shelling out + reparsing on every command. pc2line.py is the single
|
||||||
|
# source of truth for DWARF semantics; we must NOT duplicate any of it.
|
||||||
|
def _loadPc2lineModule():
|
||||||
|
spec = importlib.util.spec_from_file_location(
|
||||||
|
"pc2line", os.path.join(SCRIPT_DIR, "pc2line.py"))
|
||||||
|
mod = importlib.util.module_from_spec(spec)
|
||||||
|
spec.loader.exec_module(mod)
|
||||||
|
return mod
|
||||||
|
|
||||||
|
|
||||||
|
pc2line = _loadPc2lineModule()
|
||||||
|
|
||||||
|
|
||||||
# ---- Map + DWARF helpers ---------------------------------------------
|
# ---- Map + DWARF helpers ---------------------------------------------
|
||||||
|
|
||||||
def loadMapSyms(path):
|
def loadMapSyms(path):
|
||||||
|
|
@ -561,6 +577,766 @@ def interactiveMode(args):
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
# ---- REPL mode (--repl) ---------------------------------------------
|
||||||
|
#
|
||||||
|
# An interactive prompt that gives `gdb`-flavour commands on top of the
|
||||||
|
# load-snapshot-resolve cycle. Because MAME has no bidirectional Lua
|
||||||
|
# RPC channel under `-debugger none`, every "execute the program"
|
||||||
|
# command (run / continue / step / next) maps to one MAME process
|
||||||
|
# launch. The Lua autoboot writes the program into bank-0 memory,
|
||||||
|
# installs all queued breakpoints, runs until the first hit, captures
|
||||||
|
# a register + memory snapshot, and exits. The Python REPL then
|
||||||
|
# decodes the snapshot to answer `print`, `bt`, `where` from cached
|
||||||
|
# state — no further MAME launch needed for those.
|
||||||
|
#
|
||||||
|
# Commands:
|
||||||
|
# break <sym|file:line|0xADDR> set/queue a breakpoint
|
||||||
|
# run | continue [c] launch MAME, stop at first bp hit
|
||||||
|
# step | next advance to next source line
|
||||||
|
# (via DWARF line table; one bp install)
|
||||||
|
# bt | backtrace walk the JSL frame chain from S
|
||||||
|
# where PC -> source line for the last hit
|
||||||
|
# print <symbol> decode bytes at &symbol per DWARF type
|
||||||
|
# info locals show formal_parameters + locals
|
||||||
|
# info breakpoints list queued breakpoints
|
||||||
|
# delete <N> remove breakpoint by index
|
||||||
|
# quit | q exit
|
||||||
|
# ? this help
|
||||||
|
#
|
||||||
|
# Smoke-checkable: pipe a script of `break main\nrun\nwhere\nquit\n`
|
||||||
|
# into `mameDebug.py --repl ...` and assert the BP-HIT + WHERE output.
|
||||||
|
|
||||||
|
|
||||||
|
REPL_HELP = """\
|
||||||
|
Commands:
|
||||||
|
break <sym|file:line|0xADDR> set/queue a breakpoint
|
||||||
|
run | continue launch MAME, stop at first hit
|
||||||
|
step | next advance to next source line (DWARF)
|
||||||
|
bt | backtrace walk JSL frame chain from S
|
||||||
|
where PC -> source line for the last hit
|
||||||
|
print <symbol> decode bytes at &symbol per DWARF type
|
||||||
|
info locals show formal_parameters + locals
|
||||||
|
info breakpoints list queued breakpoints
|
||||||
|
delete <N> remove breakpoint by index
|
||||||
|
quit | q exit
|
||||||
|
? this help
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
# Lua autoboot for the REPL. Differs from the --trace template in three
|
||||||
|
# ways:
|
||||||
|
# 1. Breakpoint actions also dump (a) a 64-byte stack window around S
|
||||||
|
# and (b) per-symbol memory regions for `print` requests, both as
|
||||||
|
# tagged log lines so the host can parse.
|
||||||
|
# 2. exit_frame is generous (240) so a slow run still completes.
|
||||||
|
# 3. The list of "watch" memory regions is parameterised — the host
|
||||||
|
# stamps in (addr, len) pairs based on queued `print <symbol>`
|
||||||
|
# requests.
|
||||||
|
REPL_LUA_TEMPLATE = r"""
|
||||||
|
-- mameDebug REPL autoboot (generated by scripts/mameDebug.py --repl)
|
||||||
|
local BIN_PATH = "{bin_path}"
|
||||||
|
local LOAD_AT = 0x{load_at:04x}
|
||||||
|
local START_PC = 0x{start_pc:06x}
|
||||||
|
local BPS = {{ {bp_list} }}
|
||||||
|
local WATCHES = {{ {watch_list} }} -- list of {{addr, len}} pairs
|
||||||
|
|
||||||
|
local installed = false
|
||||||
|
local frame = 0
|
||||||
|
local cpu, dbg, mem
|
||||||
|
|
||||||
|
emu.register_frame_done(function()
|
||||||
|
frame = frame + 1
|
||||||
|
if frame == 30 and not installed then
|
||||||
|
cpu = manager.machine.devices[":maincpu"]
|
||||||
|
dbg = cpu.debug
|
||||||
|
mem = cpu.spaces["program"]
|
||||||
|
local f = io.open(BIN_PATH, "rb")
|
||||||
|
if not f then
|
||||||
|
print("MAMEDBG-BIN-MISSING " .. BIN_PATH)
|
||||||
|
manager.machine:exit()
|
||||||
|
return
|
||||||
|
end
|
||||||
|
local data = f:read("*all")
|
||||||
|
f:close()
|
||||||
|
for i = 1, #data do
|
||||||
|
local addr = LOAD_AT + i - 1
|
||||||
|
if not (addr >= 0x00C000 and addr < 0x00D000) then
|
||||||
|
mem:write_u8(addr, data:byte(i))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
cpu.state["PC"].value = START_PC
|
||||||
|
cpu.state["PB"].value = 0x00
|
||||||
|
cpu.state["DB"].value = 0x00
|
||||||
|
cpu.state["D"].value = 0x00
|
||||||
|
cpu.state["P"].value = 0x04
|
||||||
|
cpu.state["E"].value = 0
|
||||||
|
cpu.state["S"].value = 0x01FF
|
||||||
|
|
||||||
|
-- Build the bp action. We use the 3-arg bpset form (1-arg
|
||||||
|
-- crashes MAME). The action stamps a magic marker into bank-2
|
||||||
|
-- scratch ($020010 / 0xDEAD) so the periodic poller can detect
|
||||||
|
-- the hit and dump memory from a SAFE context (the action
|
||||||
|
-- string itself can't call multi-statement loops cleanly).
|
||||||
|
local action_template =
|
||||||
|
'logerror "MAMEDBG-BP PC=%X A=%X X=%X Y=%X S=%X DBR=%X\n",pc,a,x,y,s,db; ' ..
|
||||||
|
'w@0x020010=0xDEAD; w@0x020012=s; w@0x020014=pc & 0xFFFF; w@0x020016=(pc>>16) & 0xFF; go'
|
||||||
|
for _, pc in ipairs(BPS) do
|
||||||
|
dbg:bpset(pc, '', action_template)
|
||||||
|
end
|
||||||
|
print(string.format("MAMEDBG-LOADED bytes=%d bps=%d watches=%d",
|
||||||
|
#data, #BPS, #WATCHES))
|
||||||
|
installed = true
|
||||||
|
end
|
||||||
|
if frame == {exit_frame} then
|
||||||
|
print("MAMEDBG-EXIT frame=" .. frame)
|
||||||
|
manager.machine:exit()
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
|
||||||
|
-- Marker-driven snapshot dumper. Once the bp action stamps 0xDEAD at
|
||||||
|
-- $020010, this periodic handler reads S + PC from the scratch slots
|
||||||
|
-- and dumps the watched memory regions, then clears the marker.
|
||||||
|
local snapshotted = false
|
||||||
|
emu.register_periodic(function()
|
||||||
|
if installed and not snapshotted and mem ~= nil then
|
||||||
|
local marker = mem:read_u16(0x020010)
|
||||||
|
if marker == 0xDEAD then
|
||||||
|
local s_val = mem:read_u16(0x020012)
|
||||||
|
local pc_lo = mem:read_u16(0x020014)
|
||||||
|
local pc_bnk = mem:read_u8(0x020016)
|
||||||
|
local full_pc = (pc_bnk * 0x10000) + pc_lo
|
||||||
|
print(string.format("MAMEDBG-SNAP S=0x%04X PC=0x%06X",
|
||||||
|
s_val, full_pc))
|
||||||
|
-- Dump 64 bytes of the stack window above S (S+1 .. S+64).
|
||||||
|
-- That's where the topmost JSL return frame lives.
|
||||||
|
for ofs = 1, 64 do
|
||||||
|
local addr = s_val + ofs
|
||||||
|
local v = mem:read_u8(addr)
|
||||||
|
print(string.format("MAMEDBG-STACK addr=0x%06X val=0x%02X",
|
||||||
|
addr, v))
|
||||||
|
end
|
||||||
|
-- Dump each user-requested watch.
|
||||||
|
for _, w in ipairs(WATCHES) do
|
||||||
|
local addr, n = w[1], w[2]
|
||||||
|
for ofs = 0, n - 1 do
|
||||||
|
local v = mem:read_u8(addr + ofs)
|
||||||
|
print(string.format("MAMEDBG-WATCH addr=0x%06X val=0x%02X",
|
||||||
|
addr + ofs, v))
|
||||||
|
end
|
||||||
|
end
|
||||||
|
mem:write_u16(0x020010, 0)
|
||||||
|
snapshotted = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end)
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
def buildReplLuaScript(bin_path, load_at, bp_pcs, watch_regions,
|
||||||
|
start_pc, exit_frame):
|
||||||
|
"""Build a MAME autoboot Lua script for one REPL run.
|
||||||
|
|
||||||
|
bp_pcs: list of int (24-bit PCs) — breakpoints to install.
|
||||||
|
watch_regions: list of (addr, length) tuples — per-symbol memory
|
||||||
|
dumps stamped at the first BP hit.
|
||||||
|
"""
|
||||||
|
bp_list = ", ".join(f"0x{p:06x}" for p in bp_pcs)
|
||||||
|
watch_list = ", ".join(f"{{0x{a:06x}, {n}}}" for a, n in watch_regions)
|
||||||
|
return REPL_LUA_TEMPLATE.format(
|
||||||
|
bin_path = bin_path,
|
||||||
|
load_at = load_at,
|
||||||
|
start_pc = start_pc,
|
||||||
|
bp_list = bp_list or "",
|
||||||
|
watch_list = watch_list or "",
|
||||||
|
exit_frame = exit_frame,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# Regex for snapshot/watch/stack lines emitted by the REPL Lua script.
|
||||||
|
SNAP_RE = re.compile(r"MAMEDBG-SNAP\s+S=0x([0-9A-Fa-f]+)\s+PC=0x([0-9A-Fa-f]+)")
|
||||||
|
WATCH_RE = re.compile(r"MAMEDBG-WATCH\s+addr=0x([0-9A-Fa-f]+)\s+val=0x([0-9A-Fa-f]+)")
|
||||||
|
STACK_RE = re.compile(r"MAMEDBG-STACK\s+addr=0x([0-9A-Fa-f]+)\s+val=0x([0-9A-Fa-f]+)")
|
||||||
|
|
||||||
|
|
||||||
|
class ReplState:
|
||||||
|
"""All persistent state across REPL commands."""
|
||||||
|
|
||||||
|
def __init__(self, args):
|
||||||
|
self.args = args
|
||||||
|
# Map: address -> symbol name (binary-searchable by funcAt)
|
||||||
|
self.syms = pc2line.loadMapSymbols(args.map)
|
||||||
|
# DWARF: line table + DIE trees (parsed once, reused)
|
||||||
|
self.sectionPayloads = pc2line.loadSidecarSectionsAll(args.dwarf)
|
||||||
|
self.cus = pc2line.parseAllCus(self.sectionPayloads)
|
||||||
|
self.lineTable = pc2line.buildTable(args.dwarf)
|
||||||
|
# Breakpoints: list of (pc, label) - label is the original spec
|
||||||
|
self.breakpoints = []
|
||||||
|
# Watches: dict {symbol: (addr, length)}. Length picked from
|
||||||
|
# the symbol's DWARF type when available, else fall back to 2.
|
||||||
|
self.watches = {}
|
||||||
|
# Last snapshot — populated after a run. Empty until first run.
|
||||||
|
self.lastSnap = None # {"pc": int, "sp": int}
|
||||||
|
self.lastWatchBytes = {} # {addr: byte} (last run only)
|
||||||
|
self.lastStackBytes = {} # {addr: byte} (last run only)
|
||||||
|
|
||||||
|
def resolveSpec(self, spec):
|
||||||
|
"""Resolve `FUNC`, `FILE:LINE`, or `0xADDR` to a 24-bit PC.
|
||||||
|
Returns (pc, label) or (None, error_msg).
|
||||||
|
"""
|
||||||
|
spec = spec.strip()
|
||||||
|
# Hex address?
|
||||||
|
if spec.lower().startswith("0x"):
|
||||||
|
try:
|
||||||
|
return (int(spec, 16), spec)
|
||||||
|
except ValueError:
|
||||||
|
return (None, f"invalid hex: {spec!r}")
|
||||||
|
# File:line?
|
||||||
|
if ":" in spec:
|
||||||
|
file_part, line_part = spec.rsplit(":", 1)
|
||||||
|
try:
|
||||||
|
want_line = int(line_part)
|
||||||
|
except ValueError:
|
||||||
|
return (None, f"invalid line: {line_part!r}")
|
||||||
|
# Prefer the smallest-PC entry on the requested line so the
|
||||||
|
# bp lands on the statement's first instruction, not a
|
||||||
|
# later trailing entry.
|
||||||
|
best = None
|
||||||
|
for pc, fidx, ln, ft in self.lineTable:
|
||||||
|
if ln != want_line:
|
||||||
|
continue
|
||||||
|
if 0 < fidx <= len(ft):
|
||||||
|
fname = os.path.basename(ft[fidx - 1])
|
||||||
|
else:
|
||||||
|
fname = "?"
|
||||||
|
# Match if fname matches OR fname is "?" (DWARF5
|
||||||
|
# file_idx=0 path means "the CU's primary file" — we
|
||||||
|
# treat that as a wildcard match for the user-supplied
|
||||||
|
# file name).
|
||||||
|
if fname == file_part or fname.endswith(file_part) \
|
||||||
|
or fname == "?":
|
||||||
|
if best is None or pc < best[0]:
|
||||||
|
best = (pc, fname)
|
||||||
|
if best is not None:
|
||||||
|
return (best[0], f"{best[1]}:{want_line}")
|
||||||
|
return (None, f"no DWARF line entry for {spec!r}")
|
||||||
|
# Bare symbol name — lookup in map.
|
||||||
|
for addr, sym in self.syms:
|
||||||
|
if sym == spec:
|
||||||
|
return (addr, sym)
|
||||||
|
return (None, f"symbol {spec!r} not in map")
|
||||||
|
|
||||||
|
def symbolSize(self, symname):
|
||||||
|
"""Best-effort size of a global symbol's storage (in bytes).
|
||||||
|
|
||||||
|
Looks up DW_TAG_variable DIEs across all CUs. Returns the
|
||||||
|
resolved type's byte size, or None if not findable. Falls back
|
||||||
|
to caller-default (2) when None.
|
||||||
|
"""
|
||||||
|
for cu in self.cus:
|
||||||
|
if cu.root is None:
|
||||||
|
continue
|
||||||
|
for die in self._iterDies(cu.root):
|
||||||
|
if die.tag != pc2line.DW_TAG_variable:
|
||||||
|
continue
|
||||||
|
nm = pc2line.dieName(cu, die)
|
||||||
|
if nm != symname:
|
||||||
|
continue
|
||||||
|
tref = die.getRaw(pc2line.DW_AT_type)
|
||||||
|
if tref is None:
|
||||||
|
return None
|
||||||
|
target = pc2line._findDieByOffset(cu, tref[0])
|
||||||
|
return self._typeByteSize(cu, target)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _iterDies(self, die):
|
||||||
|
yield die
|
||||||
|
for ch in die.children:
|
||||||
|
yield from self._iterDies(ch)
|
||||||
|
|
||||||
|
def _typeByteSize(self, cu, die):
|
||||||
|
"""Walk a type DIE chain, return byte size or None."""
|
||||||
|
if die is None:
|
||||||
|
return None
|
||||||
|
seen = set()
|
||||||
|
cur = die
|
||||||
|
while cur is not None and cur.offset not in seen:
|
||||||
|
seen.add(cur.offset)
|
||||||
|
tag = cur.tag
|
||||||
|
# Base / structure / union / enum types carry DW_AT_byte_size.
|
||||||
|
bs = cur.getRaw(0x0b) # DW_AT_byte_size
|
||||||
|
if bs is not None:
|
||||||
|
return bs[0]
|
||||||
|
if tag == pc2line.DW_TAG_pointer_type:
|
||||||
|
# 24-bit byte addresses are stored as 4-byte ptr32 by
|
||||||
|
# default in our ABI; default-on Layer 2 builds use 4-byte
|
||||||
|
# ptrs. Fall back to addr_size if recorded.
|
||||||
|
return cu.addr_size
|
||||||
|
if tag in (0x26, 0x35, 0x37, 0x38):
|
||||||
|
# const/volatile/restrict/typedef — follow.
|
||||||
|
t = cur.getRaw(pc2line.DW_AT_type)
|
||||||
|
if t is None:
|
||||||
|
return None
|
||||||
|
cur = pc2line._findDieByOffset(cu, t[0])
|
||||||
|
continue
|
||||||
|
if tag == 0x01: # DW_TAG_array_type
|
||||||
|
t = cur.getRaw(pc2line.DW_AT_type)
|
||||||
|
if t is None:
|
||||||
|
return None
|
||||||
|
elem = self._typeByteSize(cu,
|
||||||
|
pc2line._findDieByOffset(cu, t[0]))
|
||||||
|
if elem is None:
|
||||||
|
return None
|
||||||
|
# Find first subrange child for count.
|
||||||
|
for ch in cur.children:
|
||||||
|
if ch.tag == 0x21: # DW_TAG_subrange_type
|
||||||
|
ub = ch.getRaw(0x2f) # DW_AT_upper_bound
|
||||||
|
if ub is not None:
|
||||||
|
return elem * (ub[0] + 1)
|
||||||
|
return None
|
||||||
|
# Other tags — give up.
|
||||||
|
return None
|
||||||
|
return None
|
||||||
|
|
||||||
|
def typeStrOfSymbol(self, symname):
|
||||||
|
"""Return a printable type string for a global symbol, or '?'."""
|
||||||
|
for cu in self.cus:
|
||||||
|
if cu.root is None:
|
||||||
|
continue
|
||||||
|
for die in self._iterDies(cu.root):
|
||||||
|
if die.tag != pc2line.DW_TAG_variable:
|
||||||
|
continue
|
||||||
|
nm = pc2line.dieName(cu, die)
|
||||||
|
if nm == symname:
|
||||||
|
return pc2line.varTypeStr(cu, die)
|
||||||
|
return "?"
|
||||||
|
|
||||||
|
|
||||||
|
def replLaunchMame(state, bp_pcs, start_pc, watch_regions, seconds=4):
|
||||||
|
"""Launch one MAME run with the queued breakpoints + watches.
|
||||||
|
|
||||||
|
Returns the captured stdout/stderr text. Parses MAMEDBG-SNAP,
|
||||||
|
MAMEDBG-WATCH, MAMEDBG-STACK lines into state.lastSnap +
|
||||||
|
state.lastWatchBytes + state.lastStackBytes.
|
||||||
|
"""
|
||||||
|
lua = buildReplLuaScript(state.args.bin, state.args.load_at,
|
||||||
|
bp_pcs, watch_regions,
|
||||||
|
start_pc=start_pc,
|
||||||
|
exit_frame=240)
|
||||||
|
with tempfile.NamedTemporaryFile("w", suffix=".lua",
|
||||||
|
delete=False) as lf:
|
||||||
|
lf.write(lua)
|
||||||
|
lua_path = lf.name
|
||||||
|
try:
|
||||||
|
out = runMame(lua_path, seconds=seconds, debug_flag=True)
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
os.unlink(lua_path)
|
||||||
|
except OSError:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Parse snapshot lines.
|
||||||
|
state.lastSnap = None
|
||||||
|
state.lastWatchBytes = {}
|
||||||
|
state.lastStackBytes = {}
|
||||||
|
bps = []
|
||||||
|
for ln in out.splitlines():
|
||||||
|
m = BP_RE.search(ln)
|
||||||
|
if m:
|
||||||
|
bps.append({
|
||||||
|
"pc": int(m.group(1), 16),
|
||||||
|
"a": int(m.group(2), 16),
|
||||||
|
"x": int(m.group(3), 16),
|
||||||
|
"y": int(m.group(4), 16),
|
||||||
|
"s": int(m.group(5), 16),
|
||||||
|
"db": int(m.group(6), 16),
|
||||||
|
})
|
||||||
|
m = SNAP_RE.search(ln)
|
||||||
|
if m:
|
||||||
|
state.lastSnap = {
|
||||||
|
"sp": int(m.group(1), 16),
|
||||||
|
"pc": int(m.group(2), 16),
|
||||||
|
}
|
||||||
|
m = WATCH_RE.search(ln)
|
||||||
|
if m:
|
||||||
|
state.lastWatchBytes[int(m.group(1), 16)] = int(m.group(2), 16)
|
||||||
|
m = STACK_RE.search(ln)
|
||||||
|
if m:
|
||||||
|
state.lastStackBytes[int(m.group(1), 16)] = int(m.group(2), 16)
|
||||||
|
state.lastBps = bps
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def replPrintWhere(state):
|
||||||
|
"""Print PC -> source line for the last snapshot."""
|
||||||
|
if state.lastSnap is None:
|
||||||
|
print(" no snapshot yet — `run` first")
|
||||||
|
return
|
||||||
|
pc = state.lastSnap["pc"]
|
||||||
|
sp = state.lastSnap["sp"]
|
||||||
|
row = pc2line.query(state.lineTable, pc)
|
||||||
|
func = pc2line.funcAt(state.syms, pc)
|
||||||
|
if row is None:
|
||||||
|
print(f" PC=0x{pc:06x} (no DWARF line) FUNC={func} S=0x{sp:04x}")
|
||||||
|
else:
|
||||||
|
_, fname, ln = row
|
||||||
|
print(f" PC=0x{pc:06x} FILE={fname} LINE={ln} FUNC={func} "
|
||||||
|
f"S=0x{sp:04x}")
|
||||||
|
|
||||||
|
|
||||||
|
def replPrintBacktrace(state):
|
||||||
|
"""Walk the JSL return frame chain starting from the captured S.
|
||||||
|
|
||||||
|
The W65816 JSL pushes 3 bytes per call (PCL, PCH, PBR). Our ABI is
|
||||||
|
empty-descending: S points to the next-free byte. So the topmost
|
||||||
|
return-address triplet lives at S+1, S+2, S+3. We read it from the
|
||||||
|
captured stack window. We have no DW_AT_frame_base / DW_CFA_*
|
||||||
|
sidecar yet, so we can't walk past one frame — but we can show the
|
||||||
|
return address of the current function, which is what most debug
|
||||||
|
sessions need anyway.
|
||||||
|
"""
|
||||||
|
if state.lastSnap is None:
|
||||||
|
print(" no snapshot yet — `run` first")
|
||||||
|
return
|
||||||
|
pc = state.lastSnap["pc"]
|
||||||
|
sp = state.lastSnap["sp"]
|
||||||
|
func = pc2line.funcAt(state.syms, pc)
|
||||||
|
row = pc2line.query(state.lineTable, pc)
|
||||||
|
if row is None:
|
||||||
|
print(f" #0 PC=0x{pc:06x} FUNC={func}")
|
||||||
|
else:
|
||||||
|
_, fname, ln = row
|
||||||
|
print(f" #0 PC=0x{pc:06x} {fname}:{ln} FUNC={func}")
|
||||||
|
# Try to read S+1..S+3 from the captured stack window.
|
||||||
|
pcl_addr = (sp + 1) & 0xFFFF
|
||||||
|
pch_addr = (sp + 2) & 0xFFFF
|
||||||
|
pbr_addr = (sp + 3) & 0xFFFF
|
||||||
|
pcl = state.lastStackBytes.get(pcl_addr)
|
||||||
|
pch = state.lastStackBytes.get(pch_addr)
|
||||||
|
pbr = state.lastStackBytes.get(pbr_addr)
|
||||||
|
if pcl is None or pch is None or pbr is None:
|
||||||
|
print(" #1 <return address not in captured stack window>")
|
||||||
|
return
|
||||||
|
# JSL pushes the address of the LAST byte of the JSL instruction,
|
||||||
|
# so the actual return target is ret_addr + 1.
|
||||||
|
ret_pc = (pbr << 16) | (pch << 8) | pcl
|
||||||
|
ret_pc = (ret_pc + 1) & 0xFFFFFF
|
||||||
|
ret_func = pc2line.funcAt(state.syms, ret_pc)
|
||||||
|
ret_row = pc2line.query(state.lineTable, ret_pc)
|
||||||
|
if ret_row is None:
|
||||||
|
print(f" #1 PC=0x{ret_pc:06x} FUNC={ret_func}")
|
||||||
|
else:
|
||||||
|
_, fname, ln = ret_row
|
||||||
|
print(f" #1 PC=0x{ret_pc:06x} {fname}:{ln} FUNC={ret_func}")
|
||||||
|
|
||||||
|
|
||||||
|
def replPrintSymbol(state, spec):
|
||||||
|
"""Decode a symbol's bytes from the last snapshot and print them
|
||||||
|
per the symbol's DWARF type. If the symbol hasn't been watched
|
||||||
|
yet (or no run has happened), instruct the user to `run` first.
|
||||||
|
"""
|
||||||
|
addr = None
|
||||||
|
for a, s in state.syms:
|
||||||
|
if s == spec:
|
||||||
|
addr = a
|
||||||
|
break
|
||||||
|
if addr is None:
|
||||||
|
print(f" no such symbol: {spec!r}")
|
||||||
|
return
|
||||||
|
# Make sure it's queued as a watch for the next run.
|
||||||
|
if spec not in state.watches:
|
||||||
|
sz = state.symbolSize(spec)
|
||||||
|
if sz is None or sz <= 0:
|
||||||
|
sz = 2
|
||||||
|
if sz > 64:
|
||||||
|
# Truncate: large structs/arrays surface the first 64 bytes.
|
||||||
|
sz = 64
|
||||||
|
state.watches[spec] = (addr, sz)
|
||||||
|
|
||||||
|
if state.lastSnap is None or not state.lastWatchBytes:
|
||||||
|
print(f" &{spec} = 0x{addr:06x} (watch queued — run to capture)")
|
||||||
|
return
|
||||||
|
|
||||||
|
addr_w, length = state.watches[spec]
|
||||||
|
bytes_ = bytearray(length)
|
||||||
|
have_all = True
|
||||||
|
for i in range(length):
|
||||||
|
b = state.lastWatchBytes.get(addr_w + i)
|
||||||
|
if b is None:
|
||||||
|
have_all = False
|
||||||
|
break
|
||||||
|
bytes_[i] = b
|
||||||
|
type_str = state.typeStrOfSymbol(spec)
|
||||||
|
if not have_all:
|
||||||
|
print(f" {spec}: ADDR=0x{addr:06x} TYPE={type_str} "
|
||||||
|
f"(no snapshot bytes — run again to capture)")
|
||||||
|
return
|
||||||
|
decoded = _decodeBytes(type_str, bytes_)
|
||||||
|
hex_dump = " ".join(f"{b:02x}" for b in bytes_)
|
||||||
|
print(f" {spec} : {type_str} = {decoded}")
|
||||||
|
print(f" ADDR=0x{addr:06x} BYTES=[{hex_dump}]")
|
||||||
|
|
||||||
|
|
||||||
|
def _decodeBytes(type_str, raw):
|
||||||
|
"""Best-effort C-value print for a small byte buffer.
|
||||||
|
|
||||||
|
Recognises:
|
||||||
|
- int/short/char (1/2/4 byte ints, little-endian)
|
||||||
|
- unsigned variants
|
||||||
|
- any "* " (pointer) type — print as hex address
|
||||||
|
- struct/union — show raw hex (the caller already prints BYTES=)
|
||||||
|
Floats are out of scope per the task; print bytes as hex.
|
||||||
|
"""
|
||||||
|
ts = type_str.strip()
|
||||||
|
if not raw:
|
||||||
|
return "<empty>"
|
||||||
|
|
||||||
|
# Pointer types -> print as hex address of the right width.
|
||||||
|
if ts.endswith("*") or " *" in ts:
|
||||||
|
if len(raw) >= 4:
|
||||||
|
v = raw[0] | (raw[1] << 8) | (raw[2] << 16) | (raw[3] << 24)
|
||||||
|
return f"0x{v & 0xFFFFFFFF:08x}"
|
||||||
|
if len(raw) >= 2:
|
||||||
|
v = raw[0] | (raw[1] << 8)
|
||||||
|
return f"0x{v:04x}"
|
||||||
|
return f"0x{raw[0]:02x}"
|
||||||
|
|
||||||
|
# Integer base types.
|
||||||
|
int_widths = {
|
||||||
|
"char": 1, "signed char": 1, "unsigned char": 1,
|
||||||
|
"_Bool": 1, "bool": 1,
|
||||||
|
"short": 2, "short int": 2,
|
||||||
|
"unsigned short": 2, "unsigned short int": 2,
|
||||||
|
"int": 2, "unsigned int": 2, "signed int": 2,
|
||||||
|
"long": 4, "long int": 4, "signed long": 4,
|
||||||
|
"unsigned long": 4, "unsigned long int": 4,
|
||||||
|
"long long": 4, "unsigned long long": 4,
|
||||||
|
}
|
||||||
|
signed_set = {"char", "signed char", "short", "short int",
|
||||||
|
"int", "signed int", "long", "long int",
|
||||||
|
"signed long", "long long"}
|
||||||
|
if ts in int_widths:
|
||||||
|
w = int_widths[ts]
|
||||||
|
n = min(w, len(raw))
|
||||||
|
v = 0
|
||||||
|
for i in range(n):
|
||||||
|
v |= raw[i] << (8 * i)
|
||||||
|
if ts in signed_set:
|
||||||
|
top = 1 << (8 * n - 1)
|
||||||
|
if v & top:
|
||||||
|
v = v - (1 << (8 * n))
|
||||||
|
return f"{v} (0x{v & ((1 << (8*n)) - 1):0{2*n}x})"
|
||||||
|
|
||||||
|
# struct / union / class — caller dumps raw bytes.
|
||||||
|
if ts.startswith("struct ") or ts.startswith("union ") \
|
||||||
|
or ts.startswith("class "):
|
||||||
|
# Show u16 words as a partial decode hint (often the first
|
||||||
|
# field is an integer the user wants to see).
|
||||||
|
if len(raw) >= 2:
|
||||||
|
first_u16 = raw[0] | (raw[1] << 8)
|
||||||
|
return f"<{ts}; first u16 = 0x{first_u16:04x}>"
|
||||||
|
return f"<{ts}>"
|
||||||
|
|
||||||
|
# Array type — show first elements as best-effort integers.
|
||||||
|
if "[" in ts and ts.endswith("]"):
|
||||||
|
first = " ".join(f"0x{b:02x}" for b in raw[:8])
|
||||||
|
return f"[{first}{', ...' if len(raw) > 8 else ''}]"
|
||||||
|
|
||||||
|
return "<no decoder>"
|
||||||
|
|
||||||
|
|
||||||
|
def replInfoLocals(state):
|
||||||
|
"""Show formal_parameters + locals at the last snapshot PC."""
|
||||||
|
if state.lastSnap is None:
|
||||||
|
print(" no snapshot yet — `run` first")
|
||||||
|
return
|
||||||
|
pc = state.lastSnap["pc"]
|
||||||
|
sp = state.lastSnap["sp"]
|
||||||
|
cu, sub, locs = pc2line.localsAtPc(state.cus, pc, sp_value=sp)
|
||||||
|
if sub is None:
|
||||||
|
print(f" no subprogram at PC=0x{pc:06x}")
|
||||||
|
return
|
||||||
|
sub_name = pc2line.dieName(cu, sub) or "<unnamed>"
|
||||||
|
print(f" in {sub_name!r} at PC=0x{pc:06x} S=0x{sp:04x}")
|
||||||
|
if not locs:
|
||||||
|
print(" (no formal_parameter / variable in scope)")
|
||||||
|
return
|
||||||
|
for name, ty, loc, _die in locs:
|
||||||
|
if loc.kind == "memory":
|
||||||
|
print(f" {name} : {ty} ADDR=0x{loc.addr:06x}")
|
||||||
|
elif loc.kind == "register":
|
||||||
|
if loc.dp_addr is not None:
|
||||||
|
print(f" {name} : {ty} REG=DW{loc.reg_dw} "
|
||||||
|
f"ADDR=0x{loc.dp_addr:06x}")
|
||||||
|
else:
|
||||||
|
print(f" {name} : {ty} REG=DW{loc.reg_dw}")
|
||||||
|
elif loc.kind == "value":
|
||||||
|
print(f" {name} : {ty} VALUE=0x{loc.value:x}")
|
||||||
|
else:
|
||||||
|
print(f" {name} : {ty} UNSUPPORTED={loc.reason}")
|
||||||
|
|
||||||
|
|
||||||
|
def replNextLinePc(state, current_pc):
|
||||||
|
"""Return the PC of the DWARF line entry strictly after current_pc,
|
||||||
|
or None if there isn't one (end of program / no DWARF).
|
||||||
|
"""
|
||||||
|
# The line table is unsorted in source order; iterate to find the
|
||||||
|
# smallest entry whose PC is strictly greater than current_pc.
|
||||||
|
best = None
|
||||||
|
for pc, _fidx, _ln, _ft in state.lineTable:
|
||||||
|
if pc > current_pc:
|
||||||
|
if best is None or pc < best:
|
||||||
|
best = pc
|
||||||
|
return best
|
||||||
|
|
||||||
|
|
||||||
|
def replLoop(state):
|
||||||
|
"""Run the REPL. Reads commands from stdin, dispatches each one."""
|
||||||
|
interactive_tty = sys.stdin.isatty()
|
||||||
|
if interactive_tty:
|
||||||
|
print("mameDebug REPL. Type ? for help.")
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
if interactive_tty:
|
||||||
|
line = input("(dbg) ")
|
||||||
|
else:
|
||||||
|
line = input() # no prompt in batch mode (cleaner output)
|
||||||
|
except EOFError:
|
||||||
|
if interactive_tty:
|
||||||
|
print()
|
||||||
|
break
|
||||||
|
line = line.strip()
|
||||||
|
if not line or line.startswith("#"):
|
||||||
|
continue
|
||||||
|
# Echo command in batch mode so the smoke test can diff output.
|
||||||
|
if not interactive_tty:
|
||||||
|
print(f"(dbg) {line}")
|
||||||
|
cmd, _, rest = line.partition(" ")
|
||||||
|
rest = rest.strip()
|
||||||
|
if cmd in ("q", "quit", "exit"):
|
||||||
|
break
|
||||||
|
if cmd == "?" or cmd == "help":
|
||||||
|
print(REPL_HELP)
|
||||||
|
continue
|
||||||
|
if cmd in ("break", "b"):
|
||||||
|
if not rest:
|
||||||
|
print(" usage: break <sym|file:line|0xADDR>")
|
||||||
|
continue
|
||||||
|
pc, label = state.resolveSpec(rest)
|
||||||
|
if pc is None:
|
||||||
|
print(f" cannot resolve: {label}")
|
||||||
|
continue
|
||||||
|
state.breakpoints.append((pc, label))
|
||||||
|
idx = len(state.breakpoints)
|
||||||
|
print(f" bp #{idx} at 0x{pc:06x} ({label})")
|
||||||
|
continue
|
||||||
|
if cmd in ("info",):
|
||||||
|
if rest == "breakpoints":
|
||||||
|
if not state.breakpoints:
|
||||||
|
print(" no breakpoints")
|
||||||
|
else:
|
||||||
|
for i, (pc, lab) in enumerate(state.breakpoints, 1):
|
||||||
|
print(f" #{i} 0x{pc:06x} ({lab})")
|
||||||
|
continue
|
||||||
|
if rest == "locals":
|
||||||
|
replInfoLocals(state)
|
||||||
|
continue
|
||||||
|
print(f" unknown info subcommand: {rest!r}")
|
||||||
|
continue
|
||||||
|
if cmd == "delete":
|
||||||
|
try:
|
||||||
|
idx = int(rest)
|
||||||
|
except ValueError:
|
||||||
|
print(" usage: delete <N>")
|
||||||
|
continue
|
||||||
|
if idx < 1 or idx > len(state.breakpoints):
|
||||||
|
print(f" no breakpoint #{idx}")
|
||||||
|
continue
|
||||||
|
del state.breakpoints[idx - 1]
|
||||||
|
print(f" deleted bp #{idx}")
|
||||||
|
continue
|
||||||
|
if cmd in ("run", "r", "continue", "c"):
|
||||||
|
if not state.breakpoints:
|
||||||
|
print(" no breakpoints set — nothing to break on")
|
||||||
|
continue
|
||||||
|
bp_pcs = [pc for pc, _ in state.breakpoints]
|
||||||
|
# Decide start_pc: --from-start runs through crt0; default
|
||||||
|
# is to jump to the first bp (matches --trace behaviour).
|
||||||
|
if state.args.from_start:
|
||||||
|
start_pc = state.args.load_at
|
||||||
|
else:
|
||||||
|
start_pc = bp_pcs[0]
|
||||||
|
watch_regions = list(state.watches.values())
|
||||||
|
replLaunchMame(state, bp_pcs, start_pc, watch_regions,
|
||||||
|
seconds=state.args.seconds)
|
||||||
|
if state.lastSnap is None:
|
||||||
|
print(" WARN: no BP-HIT captured (timed out?)")
|
||||||
|
else:
|
||||||
|
replPrintWhere(state)
|
||||||
|
continue
|
||||||
|
if cmd in ("step", "s", "next", "n"):
|
||||||
|
# Both map to "advance to next source line via DWARF" in
|
||||||
|
# our snapshot-based model. Requires a prior snapshot to
|
||||||
|
# know "where we are".
|
||||||
|
if state.lastSnap is None:
|
||||||
|
# No prior snapshot: just do `run` (start of program).
|
||||||
|
if not state.breakpoints:
|
||||||
|
print(" no breakpoints set — `break` first")
|
||||||
|
continue
|
||||||
|
bp_pcs = [pc for pc, _ in state.breakpoints]
|
||||||
|
start_pc = (state.args.load_at if state.args.from_start
|
||||||
|
else bp_pcs[0])
|
||||||
|
replLaunchMame(state, bp_pcs, start_pc,
|
||||||
|
list(state.watches.values()),
|
||||||
|
seconds=state.args.seconds)
|
||||||
|
if state.lastSnap is not None:
|
||||||
|
replPrintWhere(state)
|
||||||
|
continue
|
||||||
|
current_pc = state.lastSnap["pc"]
|
||||||
|
next_pc = replNextLinePc(state, current_pc)
|
||||||
|
if next_pc is None:
|
||||||
|
print(" no next DWARF line entry — at end of program")
|
||||||
|
continue
|
||||||
|
print(f" stepping to next DWARF line at 0x{next_pc:06x}")
|
||||||
|
replLaunchMame(state, [next_pc], current_pc,
|
||||||
|
list(state.watches.values()),
|
||||||
|
seconds=state.args.seconds)
|
||||||
|
if state.lastSnap is None:
|
||||||
|
print(" WARN: step did not hit the bp (timed out?)")
|
||||||
|
else:
|
||||||
|
replPrintWhere(state)
|
||||||
|
continue
|
||||||
|
if cmd == "where":
|
||||||
|
replPrintWhere(state)
|
||||||
|
continue
|
||||||
|
if cmd in ("bt", "backtrace"):
|
||||||
|
replPrintBacktrace(state)
|
||||||
|
continue
|
||||||
|
if cmd in ("print", "p"):
|
||||||
|
if not rest:
|
||||||
|
print(" usage: print <symbol>")
|
||||||
|
continue
|
||||||
|
replPrintSymbol(state, rest)
|
||||||
|
continue
|
||||||
|
print(f" unknown command: {line!r} (try ?)")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def replMode(args):
|
||||||
|
"""Entry point for `--repl`."""
|
||||||
|
state = ReplState(args)
|
||||||
|
if args.break_at:
|
||||||
|
# --break is interpreted as "queue this bp before reading any
|
||||||
|
# interactive commands" — useful when scripting.
|
||||||
|
pc, label = state.resolveSpec(args.break_at)
|
||||||
|
if pc is None:
|
||||||
|
print(f"mameDebug: --break {args.break_at!r}: {label}",
|
||||||
|
file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
state.breakpoints.append((pc, label))
|
||||||
|
print(f" bp #1 at 0x{pc:06x} ({label}) [from --break]")
|
||||||
|
return replLoop(state)
|
||||||
|
|
||||||
|
|
||||||
# ---- main ------------------------------------------------------------
|
# ---- main ------------------------------------------------------------
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|
@ -579,6 +1355,13 @@ def main():
|
||||||
ap.add_argument("--trace", action="store_true",
|
ap.add_argument("--trace", action="store_true",
|
||||||
help="default-on smoke mode: set bp, capture one "
|
help="default-on smoke mode: set bp, capture one "
|
||||||
"BP-HIT, resolve via pc2line, exit 0")
|
"BP-HIT, resolve via pc2line, exit 0")
|
||||||
|
ap.add_argument("--repl", action="store_true",
|
||||||
|
help="interactive REPL. Reads stdin commands "
|
||||||
|
"(break/run/step/next/where/bt/print/info/"
|
||||||
|
"delete/quit). Each `run`/`step`/`next` "
|
||||||
|
"launches one MAME process. `print`, `bt`, "
|
||||||
|
"and `where` decode the captured snapshot "
|
||||||
|
"and need no further MAME launch.")
|
||||||
ap.add_argument("--from-start", action="store_true",
|
ap.add_argument("--from-start", action="store_true",
|
||||||
help="start execution at LOAD_AT (i.e. through "
|
help="start execution at LOAD_AT (i.e. through "
|
||||||
"the crt0). Default is to jump straight to "
|
"the crt0). Default is to jump straight to "
|
||||||
|
|
@ -611,6 +1394,8 @@ def main():
|
||||||
return 2
|
return 2
|
||||||
if args.trace:
|
if args.trace:
|
||||||
return traceMode(args)
|
return traceMode(args)
|
||||||
|
if args.repl:
|
||||||
|
return replMode(args)
|
||||||
return interactiveMode(args)
|
return interactiveMode(args)
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
127
scripts/probeReplSmoke.sh
Executable file
127
scripts/probeReplSmoke.sh
Executable file
|
|
@ -0,0 +1,127 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
# probeReplSmoke.sh - non-interactive smoke check for mameDebug.py
|
||||||
|
# --repl mode. Pipes a canned script (`break main`, `run`, `where`,
|
||||||
|
# `quit`) into the REPL and asserts that:
|
||||||
|
# 1. The REPL parses each command without error
|
||||||
|
# 2. A breakpoint resolves through the link816 map
|
||||||
|
# 3. MAME launches with the bp installed and surfaces a BP-HIT line
|
||||||
|
# 4. `where` resolves the captured PC to a source line via DWARF
|
||||||
|
#
|
||||||
|
# Exit 0 on full pass. Exit 77 (autotools "skip") if MAME / toolchain
|
||||||
|
# missing. Exit 1 on any unexpected REPL output or missing capture.
|
||||||
|
#
|
||||||
|
# Usage: probeReplSmoke.sh [--verbose]
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
HERE="$(cd "$(dirname "$0")" && pwd)"
|
||||||
|
ROOT="$(cd "$HERE/.." && pwd)"
|
||||||
|
VERBOSE=0
|
||||||
|
if [ "${1:-}" = "--verbose" ]; then
|
||||||
|
VERBOSE=1
|
||||||
|
fi
|
||||||
|
|
||||||
|
CLANG="$ROOT/tools/llvm-mos-build/bin/clang"
|
||||||
|
LLVMMC="$ROOT/tools/llvm-mos-build/bin/llvm-mc"
|
||||||
|
LINK="$ROOT/tools/link816"
|
||||||
|
|
||||||
|
if [ ! -x "$CLANG" ] || [ ! -x "$LLVMMC" ] || [ ! -x "$LINK" ]; then
|
||||||
|
echo "probeReplSmoke: missing toolchain (clang/llvm-mc/link816)" >&2
|
||||||
|
exit 77
|
||||||
|
fi
|
||||||
|
if ! command -v mame >/dev/null 2>&1; then
|
||||||
|
echo "probeReplSmoke: mame not on PATH; skipping" >&2
|
||||||
|
exit 77
|
||||||
|
fi
|
||||||
|
|
||||||
|
WORK="$(mktemp -d)"
|
||||||
|
trap 'rm -rf "$WORK"' EXIT
|
||||||
|
CFILE="$WORK/repltest.c"
|
||||||
|
OFILE="$WORK/repltest.o"
|
||||||
|
OCRT0="$WORK/crt0.o"
|
||||||
|
OLIBGCC="$WORK/libgcc.o"
|
||||||
|
BIN="$WORK/repltest.bin"
|
||||||
|
MAP="$WORK/repltest.map"
|
||||||
|
DWARF="$WORK/repltest.dwarf"
|
||||||
|
OUT="$WORK/repl.out"
|
||||||
|
|
||||||
|
cat > "$CFILE" <<'EOF'
|
||||||
|
int gAnswer = 42;
|
||||||
|
int add(int a, int b) {
|
||||||
|
int c = a + b;
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
int main(void) {
|
||||||
|
int r = add(3, 4);
|
||||||
|
gAnswer = r;
|
||||||
|
while (1) { }
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
|
||||||
|
"$CLANG" --target=w65816 -O0 -g -ffunction-sections \
|
||||||
|
-c "$CFILE" -o "$OFILE" 2>/dev/null
|
||||||
|
"$LLVMMC" -arch=w65816 -filetype=obj \
|
||||||
|
"$ROOT/runtime/src/crt0.s" -o "$OCRT0" 2>/dev/null
|
||||||
|
"$LLVMMC" -arch=w65816 -filetype=obj \
|
||||||
|
"$ROOT/runtime/src/libgcc.s" -o "$OLIBGCC" 2>/dev/null
|
||||||
|
"$LINK" -o "$BIN" --text-base 0x1000 \
|
||||||
|
--map "$MAP" --debug-out "$DWARF" \
|
||||||
|
"$OCRT0" "$OFILE" "$OLIBGCC" >/dev/null 2>&1 || true
|
||||||
|
|
||||||
|
[ -s "$BIN" ] || { echo "probeReplSmoke: empty .bin"; exit 1; }
|
||||||
|
[ -s "$DWARF" ] || { echo "probeReplSmoke: empty DWARF sidecar"; exit 1; }
|
||||||
|
[ -s "$MAP" ] || { echo "probeReplSmoke: empty map"; exit 1; }
|
||||||
|
|
||||||
|
# Pipe the canned REPL script.
|
||||||
|
printf 'break main\nrun\nwhere\nquit\n' \
|
||||||
|
| timeout 60 python3 "$HERE/mameDebug.py" --repl \
|
||||||
|
--bin "$BIN" --map "$MAP" --dwarf "$DWARF" \
|
||||||
|
--seconds 4 > "$OUT" 2>&1 || {
|
||||||
|
echo "probeReplSmoke: mameDebug.py --repl failed" >&2
|
||||||
|
cat "$OUT" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
if [ "$VERBOSE" -eq 1 ]; then
|
||||||
|
cat "$OUT" >&2
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Required output lines:
|
||||||
|
# "(dbg) break main" - command echo
|
||||||
|
# " bp #1 at 0x...... (main)" - bp set ack
|
||||||
|
# "(dbg) run" - command echo
|
||||||
|
# " PC=0x...... ... FUNC=main ..." - where output after run
|
||||||
|
# "(dbg) where" - command echo
|
||||||
|
# " PC=0x...... ... FUNC=main ..." - where output (manual)
|
||||||
|
# "(dbg) quit" - command echo
|
||||||
|
if ! grep -q "bp #1 at 0x" "$OUT"; then
|
||||||
|
echo "probeReplSmoke: missing 'bp #1 at 0x...' breakpoint ack" >&2
|
||||||
|
cat "$OUT" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
if ! grep -q "FUNC=main" "$OUT"; then
|
||||||
|
echo "probeReplSmoke: missing FUNC=main in 'where' output" >&2
|
||||||
|
cat "$OUT" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
# The `where` command (run AFTER the `run` command) must produce
|
||||||
|
# output too — verify by counting occurrences of "PC=0x" prefix lines.
|
||||||
|
PC_HITS=$(grep -c "^ PC=0x" "$OUT" || true)
|
||||||
|
if [ "$PC_HITS" -lt 2 ]; then
|
||||||
|
echo "probeReplSmoke: expected >= 2 PC=0x lines (run + where), got $PC_HITS" >&2
|
||||||
|
cat "$OUT" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Bonus: verify the captured PC equals the map entry for `main`.
|
||||||
|
MAIN_PC=$(awk '$2 == "main" { print $1; exit }' "$MAP")
|
||||||
|
[ -n "$MAIN_PC" ] || { echo "probeReplSmoke: no 'main' symbol in map"; exit 1; }
|
||||||
|
MAIN_PC_LC=$(echo "$MAIN_PC" | tr 'A-Z' 'a-z')
|
||||||
|
if ! grep -qi "PC=$MAIN_PC_LC " "$OUT"; then
|
||||||
|
echo "probeReplSmoke: captured PC does not match map[main]=$MAIN_PC" >&2
|
||||||
|
cat "$OUT" >&2
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "probeReplSmoke: OK (bp resolved, BP-HIT captured, where decoded)"
|
||||||
|
exit 0
|
||||||
|
|
@ -1146,6 +1146,20 @@ EOF
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Phase 3.3: mameDebug.py --repl non-interactive smoke. Pipes a
|
||||||
|
# canned `break main / run / where / quit` script into the REPL and
|
||||||
|
# asserts that (1) the bp resolves via the link816 map, (2) MAME
|
||||||
|
# launches and surfaces a BP-HIT, (3) the captured PC is decoded
|
||||||
|
# through DWARF into FUNC=main on the where output, and (4) the
|
||||||
|
# captured PC equals the map's entry for main. MAME-gated.
|
||||||
|
if command -v mame >/dev/null && [ -d "$PROJECT_ROOT/tools/mame/roms" ]; then
|
||||||
|
log "check: mameDebug.py --repl non-interactive (break/run/where/quit)"
|
||||||
|
if ! bash "$PROJECT_ROOT/scripts/probeReplSmoke.sh" >/dev/null 2>&1; then
|
||||||
|
bash "$PROJECT_ROOT/scripts/probeReplSmoke.sh" --verbose >&2 || true
|
||||||
|
die "mameDebug.py --repl smoke probe failed"
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
# iigs/sound.h + iigs/eventLoop.h headers compile cleanly through
|
# iigs/sound.h + iigs/eventLoop.h headers compile cleanly through
|
||||||
# clang with the runtime include path. Catches missing extern "C"
|
# clang with the runtime include path. Catches missing extern "C"
|
||||||
# wraps, broken struct layouts, or unresolved tool-call stubs.
|
# wraps, broken struct layouts, or unresolved tool-call stubs.
|
||||||
|
|
@ -5988,12 +6002,19 @@ EOF
|
||||||
|
|
||||||
# omfEmit --stack-size: append a ~Direct DP/Stack segment so the
|
# omfEmit --stack-size: append a ~Direct DP/Stack segment so the
|
||||||
# GS/OS Loader allocates an explicit-sized DP+stack chunk instead
|
# GS/OS Loader allocates an explicit-sized DP+stack chunk instead
|
||||||
# of its 4KB default. KIND=0x1012 (DP/Stack | PRIVATE), LENGTH and
|
# of its 4KB default. KIND=0x4012 (DP/Stack | RELOAD), LENGTH =
|
||||||
# RESSPC both = requested size, ALIGN=0x100 (page-aligned per spec).
|
# requested size, RESSPC=0 (the stack bytes are carried in LCONST
|
||||||
# Plain (non-ExpressLoad) multi-segment OMFs do not launch under
|
# because the ExpressLoad fast path can't be trusted to honor
|
||||||
# GS/OS 6.0.2 Loader (verified empirically), so --stack-size auto-
|
# RESSPC — same trick the user CODE seg uses for BSS). ALIGN=
|
||||||
# enables --expressload: the OMF becomes 3 segments (ExpressLoad,
|
# 0x100 (page-aligned per spec). Plain (non-ExpressLoad) multi-
|
||||||
# code, DP/Stack), with DP/Stack as segnum 3.
|
# segment OMFs do not launch under GS/OS 6.0.2 Loader (verified
|
||||||
|
# empirically), so --stack-size auto-enables --expressload: the
|
||||||
|
# OMF becomes 3 segments (ExpressLoad, code, DP/Stack), with
|
||||||
|
# DP/Stack as segnum 3. The ExpressLoad load script also carries
|
||||||
|
# a segtable + remap + header_info entry for the DP/Stack so the
|
||||||
|
# Loader's fast path actually honors it (without that the Loader
|
||||||
|
# silently drops the seg and uses its default 4KB allocation —
|
||||||
|
# see feedback_gsos_fopen_partial_diagnosis).
|
||||||
log "check: omfEmit --stack-size emits a DP/Stack ~Direct segment"
|
log "check: omfEmit --stack-size emits a DP/Stack ~Direct segment"
|
||||||
omfStk="$(mktemp --suffix=.omf)"
|
omfStk="$(mktemp --suffix=.omf)"
|
||||||
"$PROJECT_ROOT/tools/omfEmit" \
|
"$PROJECT_ROOT/tools/omfEmit" \
|
||||||
|
|
@ -6022,16 +6043,34 @@ align = struct.unpack_from('<I', data, sp+28)[0]
|
||||||
segnum = struct.unpack_from('<H', data, sp+34)[0]
|
segnum = struct.unpack_from('<H', data, sp+34)[0]
|
||||||
dispnm = struct.unpack_from('<H', data, sp+40)[0]
|
dispnm = struct.unpack_from('<H', data, sp+40)[0]
|
||||||
name = data[sp+dispnm+10:sp+dispnm+20].decode('ascii', errors='replace').rstrip()
|
name = data[sp+dispnm+10:sp+dispnm+20].decode('ascii', errors='replace').rstrip()
|
||||||
if kind != 0x1012:
|
if kind != 0x4012:
|
||||||
sys.exit(f"DP/Stack KIND=0x{kind:04x} (expected 0x1012)")
|
sys.exit(f"DP/Stack KIND=0x{kind:04x} (expected 0x4012 = DP/Stack|RELOAD)")
|
||||||
if length != 4096 or resspc != 4096:
|
if length != 4096:
|
||||||
sys.exit(f"DP/Stack LENGTH={length} RESSPC={resspc} (expected 4096)")
|
sys.exit(f"DP/Stack LENGTH={length} (expected 4096)")
|
||||||
|
if resspc != 0:
|
||||||
|
sys.exit(f"DP/Stack RESSPC={resspc} (expected 0; stack carried as LCONST zeros)")
|
||||||
if align != 0x100:
|
if align != 0x100:
|
||||||
sys.exit(f"DP/Stack ALIGN=0x{align:x} (expected 0x100 = page-aligned)")
|
sys.exit(f"DP/Stack ALIGN=0x{align:x} (expected 0x100 = page-aligned)")
|
||||||
if segnum != 3:
|
if segnum != 3:
|
||||||
sys.exit(f"DP/Stack SEGNUM={segnum} (expected 3)")
|
sys.exit(f"DP/Stack SEGNUM={segnum} (expected 3)")
|
||||||
if name != "~Direct":
|
if name != "~Direct":
|
||||||
sys.exit(f"DP/Stack name='{name}' (expected ~Direct)")
|
sys.exit(f"DP/Stack name='{name}' (expected ~Direct)")
|
||||||
|
# ExpressLoad seg must carry a header_info record for the DP/Stack so
|
||||||
|
# the Loader's fast path honors it. count=N-2 where N=total segs (3),
|
||||||
|
# so count=1. Walk the script and confirm.
|
||||||
|
elBody = 0 + 44 + 23 # ExpressLoad hdr + LABLEN=0 name area (10 + 1 + 12)
|
||||||
|
if data[elBody] != 0xF2:
|
||||||
|
sys.exit(f"ExpressLoad body op 0x{data[elBody]:02x} (expected 0xF2 LCONST)")
|
||||||
|
elLen = struct.unpack_from('<I', data, elBody+1)[0]
|
||||||
|
elData = data[elBody+5:elBody+5+elLen]
|
||||||
|
count = struct.unpack_from('<H', elData, 4)[0]
|
||||||
|
if count != 1:
|
||||||
|
sys.exit(f"ExpressLoad count={count} (expected 1 = N-2 for 3 segs)")
|
||||||
|
# Second remap entry (DP/Stack) should map old seg 2 -> new seg 3
|
||||||
|
remapOff = 6 + 8*2
|
||||||
|
rm = struct.unpack_from('<H', elData, remapOff+2)[0]
|
||||||
|
if rm != 3:
|
||||||
|
sys.exit(f"DP/Stack ExpressLoad remap={rm} (expected 3)")
|
||||||
PY
|
PY
|
||||||
rm -f "$omfStk"
|
rm -f "$omfStk"
|
||||||
|
|
||||||
|
|
@ -6355,6 +6394,93 @@ EOF
|
||||||
"$omfGsf" "$testFileGsf"
|
"$omfGsf" "$testFileGsf"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
# Stack-size end-to-end: omfEmit --stack-size must actually propagate a
|
||||||
|
# larger DP/Stack chunk to the GS/OS Loader. Background: prior to the
|
||||||
|
# 2026-06-02 fix, the ~Direct DP/Stack segment was appended to the OMF
|
||||||
|
# but the ExpressLoad fast path silently ignored it (no segtable +
|
||||||
|
# remap + header_info entries for the seg), so --stack-size was a no-op
|
||||||
|
# under ExpressLoad mode. Default Loader allocation = 4KB at $0800 →
|
||||||
|
# SP=$17FF; --stack-size 0x4000 should yield a 16KB chunk → SP=$47FF.
|
||||||
|
# This check captures SP at entry to main() and confirms the high byte.
|
||||||
|
# Gated on the same prereqs as the GSOS_FILE_SMOKE check above; toggled
|
||||||
|
# off with SMOKE_SKIP_STACKSIZE=1.
|
||||||
|
if [ "${SMOKE_SKIP_STACKSIZE:-0}" != "1" ] \
|
||||||
|
&& [ -x "$CLANG" ] && [ -x "$CADIUS" ] && [ -f "$SYSDISK" ] \
|
||||||
|
&& command -v mame >/dev/null 2>&1; then
|
||||||
|
log "check: omfEmit --stack-size grows DP/Stack chunk under real GS/OS Loader"
|
||||||
|
cStkFile="$(mktemp --suffix=.c)"
|
||||||
|
oStkFile="$(mktemp --suffix=.o)"
|
||||||
|
binStk="$(mktemp --suffix=.bin)"
|
||||||
|
mapStk="$(mktemp --suffix=.map)"
|
||||||
|
relStk="$(mktemp --suffix=.reloc)"
|
||||||
|
omfStkWith="$(mktemp --suffix=.omf)"
|
||||||
|
omfStkWithout="$(mktemp --suffix=.omf)"
|
||||||
|
cat > "$cStkFile" <<'EOF'
|
||||||
|
// Stack-size end-to-end probe: capture SP at entry to main() and
|
||||||
|
// store its high byte at $71 so the harness can verify Loader honored
|
||||||
|
// --stack-size. $70 = 0x99 marker = program ran.
|
||||||
|
int main(void) {
|
||||||
|
__asm__ volatile (
|
||||||
|
"rep #0x30\n"
|
||||||
|
"tsc\n"
|
||||||
|
"xba\n"
|
||||||
|
"sep #0x20\n"
|
||||||
|
"sta 0x71\n"
|
||||||
|
"rep #0x20\n"
|
||||||
|
);
|
||||||
|
*(volatile unsigned char *)0x70 = 0x99;
|
||||||
|
for (volatile unsigned long s = 0; s < 600000UL; s++) { }
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
EOF
|
||||||
|
"$CLANG" --target=w65816 -I"$PROJECT_ROOT/runtime/include" -O2 -ffunction-sections -c \
|
||||||
|
"$cStkFile" -o "$oStkFile"
|
||||||
|
"$PROJECT_ROOT/tools/link816" -o "$binStk" --text-base 0x1000 \
|
||||||
|
--map "$mapStk" --reloc-out "$relStk" \
|
||||||
|
"$PROJECT_ROOT/runtime/crt0Gsos.o" "$oStkFile" \
|
||||||
|
"$PROJECT_ROOT/runtime/libc.o" \
|
||||||
|
"$PROJECT_ROOT/runtime/snprintf.o" \
|
||||||
|
"$PROJECT_ROOT/runtime/extras.o" \
|
||||||
|
"$PROJECT_ROOT/runtime/softFloat.o" \
|
||||||
|
"$PROJECT_ROOT/runtime/softDouble.o" \
|
||||||
|
"$PROJECT_ROOT/runtime/iigsGsos.o" \
|
||||||
|
"$PROJECT_ROOT/runtime/iigsToolbox.o" \
|
||||||
|
"$PROJECT_ROOT/runtime/libgcc.o" 2>/tmp/stkprobe-link.err >/dev/null \
|
||||||
|
|| die "stack-size smoke: link failed: $(cat /tmp/stkprobe-link.err)"
|
||||||
|
# WITH --stack-size 0x4000 (16 KB chunk; Loader places at $0800,
|
||||||
|
# SP lands at $47FF → high byte $47).
|
||||||
|
"$PROJECT_ROOT/tools/omfEmit" --input "$binStk" --map "$mapStk" \
|
||||||
|
--base 0x1000 --entry __start --output "$omfStkWith" \
|
||||||
|
--name STKPROBE --stack-size 0x4000 --relocs "$relStk" >/dev/null 2>&1
|
||||||
|
if [ ! -s "$omfStkWith" ]; then
|
||||||
|
die "stack-size smoke: omfEmit (with stack-size) produced empty OMF"
|
||||||
|
fi
|
||||||
|
if ! bash "$PROJECT_ROOT/scripts/runViaFinder.sh" "$omfStkWith" \
|
||||||
|
--check 0x70=0x99 0x71=0x47 >/dev/null 2>&1; then
|
||||||
|
bash "$PROJECT_ROOT/scripts/runViaFinder.sh" "$omfStkWith" \
|
||||||
|
--check 0x70=0x99 0x71=0x47 2>&1 | tail -5 >&2
|
||||||
|
die "stack-size smoke FAILED: SP high byte != 0x47 with --stack-size 0x4000 (Loader silently dropped the seg?)"
|
||||||
|
fi
|
||||||
|
# WITHOUT --stack-size: Loader default 4 KB chunk → SP=$17FF →
|
||||||
|
# high byte $17. This second run guards against a spurious pass
|
||||||
|
# of the first (e.g. if every program by coincidence got SP=$47FF
|
||||||
|
# without our seg).
|
||||||
|
"$PROJECT_ROOT/tools/omfEmit" --input "$binStk" --map "$mapStk" \
|
||||||
|
--base 0x1000 --entry __start --output "$omfStkWithout" \
|
||||||
|
--name STKPROBE --expressload --relocs "$relStk" >/dev/null 2>&1
|
||||||
|
if [ ! -s "$omfStkWithout" ]; then
|
||||||
|
die "stack-size smoke: omfEmit (no stack-size) produced empty OMF"
|
||||||
|
fi
|
||||||
|
if ! bash "$PROJECT_ROOT/scripts/runViaFinder.sh" "$omfStkWithout" \
|
||||||
|
--check 0x70=0x99 0x71=0x17 >/dev/null 2>&1; then
|
||||||
|
bash "$PROJECT_ROOT/scripts/runViaFinder.sh" "$omfStkWithout" \
|
||||||
|
--check 0x70=0x99 0x71=0x17 2>&1 | tail -5 >&2
|
||||||
|
die "stack-size smoke FAILED: baseline SP high byte != 0x17 (Loader default-allocation shifted?)"
|
||||||
|
fi
|
||||||
|
rm -f "$cStkFile" "$oStkFile" "$binStk" "$mapStk" "$relStk" \
|
||||||
|
"$omfStkWith" "$omfStkWithout"
|
||||||
|
fi
|
||||||
|
|
||||||
# W65816 codegen-shape regression pins. Tiny FileCheck assertions on
|
# W65816 codegen-shape regression pins. Tiny FileCheck assertions on
|
||||||
# specific lowering behaviors that have broken before; runs in well
|
# specific lowering behaviors that have broken before; runs in well
|
||||||
# under a second. See scripts/runFileCheckTests.sh.
|
# under a second. See scripts/runFileCheckTests.sh.
|
||||||
|
|
@ -6535,23 +6661,25 @@ else
|
||||||
log "OK: cursorProbe Push/Pop arrow+busy returned cleanly + marker set"
|
log "OK: cursorProbe Push/Pop arrow+busy returned cleanly + marker set"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Phase 3.4 resourcemgr STUB-ONLY landing. Verifies:
|
# Phase 3.4 resourcemgr REAL implementation. Verifies:
|
||||||
# - resource.o links into a normal GS/OS demo,
|
# - resource.o links into a normal GS/OS demo,
|
||||||
# - resourceProbeInit() / iigsLoadResource() / iigsGetResourceSize()
|
# - the demo stages an in-memory .rsrc fixture via mfsRegister,
|
||||||
# all return RES_ERR_BLOCKED in stub mode (mark 0x71/0x72 = 0xff),
|
# opens it through openResourceFile (real parser), loads an rText
|
||||||
# - resourceRuntimeEnabled() returns 0 in stub mode (mark 0x73 = 0x01),
|
# resource by (type, id), verifies the payload bytes match
|
||||||
# - demos/build.sh's rsrcBundle post-step produces an AppleSingle blob
|
# "HELLO" and the size is 5,
|
||||||
# and the cadius _ResourceFork.bin sidecar when demos/rsrcProbe.rsrc/
|
# - second loadResource() call returns the SAME handle (cache hit),
|
||||||
# is present (verified by file existence).
|
# - closeResourceFile() returns RES_OK,
|
||||||
# The live resource-fork pathway in MAME is NOT exercised here - the
|
# - demos/build.sh's rsrcBundle post-step still produces an AppleSingle
|
||||||
# whole point of the stub-only landing is that Phase 1.1 (GS/OS fopen
|
# blob + cadius sidecar when demos/rsrcProbe.rsrc/ is present.
|
||||||
# hang) blocks the live path on GS/OS 6.0.2.
|
# The fixture also doubles as a bundler-output verification: the on-disk
|
||||||
|
# sidecar bytes from rsrcBundle.py match the in-memory fixture byte-for-
|
||||||
|
# byte, so passing this check confirms parser + bundler agree on format.
|
||||||
if [ "${SMOKE_SKIP_RSRC:-0}" = 1 ]; then
|
if [ "${SMOKE_SKIP_RSRC:-0}" = 1 ]; then
|
||||||
warn "SMOKE_SKIP_RSRC=1; skipping Phase 3.4 rsrcProbe stage"
|
warn "SMOKE_SKIP_RSRC=1; skipping Phase 3.4 rsrcProbe stage"
|
||||||
elif [ ! -f "$SYSDISK_DR" ] || [ ! -x "$CADIUS_DR" ] || ! command -v mame >/dev/null 2>&1; then
|
elif [ ! -f "$SYSDISK_DR" ] || [ ! -x "$CADIUS_DR" ] || ! command -v mame >/dev/null 2>&1; then
|
||||||
warn "Phase 3.4 rsrcProbe prerequisites missing; skipping"
|
warn "Phase 3.4 rsrcProbe prerequisites missing; skipping"
|
||||||
else
|
else
|
||||||
log "check: rsrcProbe stub Resource Manager facade runs under GS/OS"
|
log "check: rsrcProbe real Resource Manager (open/load/release/close) under GS/OS"
|
||||||
bash "$PROJECT_ROOT/demos/build.sh" rsrcProbe >/tmp/rsrcBuildOut 2>&1 || {
|
bash "$PROJECT_ROOT/demos/build.sh" rsrcProbe >/tmp/rsrcBuildOut 2>&1 || {
|
||||||
cat /tmp/rsrcBuildOut >&2
|
cat /tmp/rsrcBuildOut >&2
|
||||||
die "demos/build.sh rsrcProbe failed"
|
die "demos/build.sh rsrcProbe failed"
|
||||||
|
|
@ -6565,11 +6693,11 @@ else
|
||||||
fi
|
fi
|
||||||
bash "$PROJECT_ROOT/scripts/runViaFinder.sh" \
|
bash "$PROJECT_ROOT/scripts/runViaFinder.sh" \
|
||||||
"$PROJECT_ROOT/demos/rsrcProbe.omf" \
|
"$PROJECT_ROOT/demos/rsrcProbe.omf" \
|
||||||
--check 0x70=0x99 0x71=0xff 0x72=0xff 0x73=0x01 >/tmp/rsrcRunOut 2>&1 || {
|
--check 0x70=0x99 0x71=0x01 0x72=0x01 0x73=0x01 >/tmp/rsrcRunOut 2>&1 || {
|
||||||
cat /tmp/rsrcRunOut >&2
|
cat /tmp/rsrcRunOut >&2
|
||||||
die "rsrcProbe did not set expected stub-mode markers"
|
die "rsrcProbe did not set expected real-impl markers"
|
||||||
}
|
}
|
||||||
log "OK: rsrcProbe (stub-mode RES_ERR_BLOCKED markers all green)"
|
log "OK: rsrcProbe (real Resource Manager open/load/cache/close all green)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Phase 4.2 sprite engine: standalone SHR 320 init + 16x16 4bpp packed
|
# Phase 4.2 sprite engine: standalone SHR 320 init + 16x16 4bpp packed
|
||||||
|
|
@ -6621,15 +6749,23 @@ fi
|
||||||
|
|
||||||
# Phase 6.2 UBSan-min smoke probe: build a tiny program with
|
# Phase 6.2 UBSan-min smoke probe: build a tiny program with
|
||||||
# `-fsanitize=undefined -fsanitize-minimal-runtime`, link against the
|
# `-fsanitize=undefined -fsanitize-minimal-runtime`, link against the
|
||||||
# new runtime/ubsan.o, and verify three representative UB kinds
|
# new runtime/ubsan.o, and verify nine recoverable UB kinds
|
||||||
# (add-overflow / shift-out-of-bounds / divrem-overflow) instrument
|
# (add-overflow / shift-out-of-bounds / divrem-overflow / sub-overflow /
|
||||||
# cleanly + recover. Bare-metal (no GS/OS), so we only require `mame`.
|
# mul-overflow / negate-overflow / pointer-overflow / load-invalid-value /
|
||||||
|
# out-of-bounds) instrument cleanly + recover. Bare-metal (no GS/OS),
|
||||||
|
# so we only require `mame`.
|
||||||
#
|
#
|
||||||
# What this probe pins:
|
# What this probe pins:
|
||||||
# $025000 = 0xC0DE add-overflow handler fired and recovered
|
# $025000 = 0xC0DE add-overflow handler fired and recovered
|
||||||
# $025002 = 0xC0DF shift-out-of-bounds handler fired and recovered
|
# $025002 = 0xC0DF shift-out-of-bounds handler fired and recovered
|
||||||
# $025004 = 0xC0E0 divrem-overflow handler fired and recovered
|
# $025004 = 0xC0E0 divrem-overflow handler fired and recovered
|
||||||
# $025006 = 0xC0DA main reached its tail past all three UBs
|
# $025006 = 0xC0E1 sub-overflow handler fired and recovered
|
||||||
|
# $025008 = 0xC0E2 mul-overflow handler fired and recovered
|
||||||
|
# $02500A = 0xC0E3 negate-overflow handler fired and recovered
|
||||||
|
# $02500C = 0xC0E4 pointer-overflow handler fired and recovered
|
||||||
|
# $02500E = 0xC0E5 load-invalid-value handler fired and recovered
|
||||||
|
# $025010 = 0xC0E6 out-of-bounds handler fired and recovered
|
||||||
|
# $025012 = 0xC0DA main reached its tail past all nine UBs
|
||||||
#
|
#
|
||||||
# Gated on `mame`. Override with SMOKE_SKIP_UBSAN=1.
|
# Gated on `mame`. Override with SMOKE_SKIP_UBSAN=1.
|
||||||
if [ "${SMOKE_SKIP_UBSAN:-0}" = 1 ]; then
|
if [ "${SMOKE_SKIP_UBSAN:-0}" = 1 ]; then
|
||||||
|
|
@ -6637,12 +6773,12 @@ if [ "${SMOKE_SKIP_UBSAN:-0}" = 1 ]; then
|
||||||
elif ! command -v mame >/dev/null 2>&1 || [ ! -d "$PROJECT_ROOT/tools/mame/roms" ]; then
|
elif ! command -v mame >/dev/null 2>&1 || [ ! -d "$PROJECT_ROOT/tools/mame/roms" ]; then
|
||||||
warn "Phase 6.2 ubsan prerequisites missing (mame); skipping"
|
warn "Phase 6.2 ubsan prerequisites missing (mame); skipping"
|
||||||
else
|
else
|
||||||
log "check: ubsanProbe (UBSan-min: add-overflow + shift-OOB + div-by-zero) in MAME"
|
log "check: ubsanProbe (UBSan-min: 9 UB kinds) in MAME"
|
||||||
bash "$PROJECT_ROOT/tests/ubsan/runUbsanProbe.sh" >/tmp/ubsanRunOut 2>&1 || {
|
bash "$PROJECT_ROOT/tests/ubsan/runUbsanProbe.sh" >/tmp/ubsanRunOut 2>&1 || {
|
||||||
cat /tmp/ubsanRunOut >&2
|
cat /tmp/ubsanRunOut >&2
|
||||||
die "ubsanProbe did not set expected handler-fired markers"
|
die "ubsanProbe did not set expected handler-fired markers"
|
||||||
}
|
}
|
||||||
log "OK: ubsanProbe (3 UB kinds instrumented + recovered + tail reached)"
|
log "OK: ubsanProbe (9 UB kinds instrumented + recovered + tail reached)"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
log "all smoke checks passed"
|
log "all smoke checks passed"
|
||||||
|
|
|
||||||
|
|
@ -73,12 +73,12 @@ struct Elf32Shdr {
|
||||||
uint32_t sh_entsize;
|
uint32_t sh_entsize;
|
||||||
};
|
};
|
||||||
|
|
||||||
static constexpr uint32_t SHT_NULL = 0;
|
[[maybe_unused]] static constexpr uint32_t SHT_NULL = 0;
|
||||||
static constexpr uint32_t SHT_PROGBITS = 1;
|
[[maybe_unused]] static constexpr uint32_t SHT_PROGBITS = 1;
|
||||||
static constexpr uint32_t SHT_SYMTAB = 2;
|
static constexpr uint32_t SHT_SYMTAB = 2;
|
||||||
static constexpr uint32_t SHT_STRTAB = 3;
|
static constexpr uint32_t SHT_STRTAB = 3;
|
||||||
static constexpr uint32_t SHT_RELA = 4;
|
static constexpr uint32_t SHT_RELA = 4;
|
||||||
static constexpr uint32_t SHT_NOBITS = 8;
|
[[maybe_unused]] static constexpr uint32_t SHT_NOBITS = 8;
|
||||||
|
|
||||||
struct Elf32Sym {
|
struct Elf32Sym {
|
||||||
uint32_t st_name;
|
uint32_t st_name;
|
||||||
|
|
@ -104,12 +104,12 @@ static constexpr uint16_t EM_NONE = 0;
|
||||||
inline uint8_t ELF32_ST_TYPE(uint8_t i) { return i & 0x0F; }
|
inline uint8_t ELF32_ST_TYPE(uint8_t i) { return i & 0x0F; }
|
||||||
inline uint8_t ELF32_ST_BIND(uint8_t i) { return (i >> 4) & 0x0F; }
|
inline uint8_t ELF32_ST_BIND(uint8_t i) { return (i >> 4) & 0x0F; }
|
||||||
static constexpr uint8_t STB_LOCAL = 0;
|
static constexpr uint8_t STB_LOCAL = 0;
|
||||||
static constexpr uint8_t STB_GLOBAL = 1;
|
[[maybe_unused]] static constexpr uint8_t STB_GLOBAL = 1;
|
||||||
static constexpr uint8_t STB_WEAK = 2;
|
static constexpr uint8_t STB_WEAK = 2;
|
||||||
|
|
||||||
static constexpr uint8_t STT_NOTYPE = 0;
|
[[maybe_unused]] static constexpr uint8_t STT_NOTYPE = 0;
|
||||||
static constexpr uint8_t STT_OBJECT = 1;
|
[[maybe_unused]] static constexpr uint8_t STT_OBJECT = 1;
|
||||||
static constexpr uint8_t STT_FUNC = 2;
|
[[maybe_unused]] static constexpr uint8_t STT_FUNC = 2;
|
||||||
static constexpr uint8_t STT_SECTION = 3;
|
static constexpr uint8_t STT_SECTION = 3;
|
||||||
|
|
||||||
struct Elf32Rela {
|
struct Elf32Rela {
|
||||||
|
|
@ -170,9 +170,10 @@ static std::string sectionKind(const std::string &name) {
|
||||||
// .init_array entries are 16-bit function pointers; treat as
|
// .init_array entries are 16-bit function pointers; treat as
|
||||||
// rodata so they end up in the read-only image and get a stable
|
// rodata so they end up in the read-only image and get a stable
|
||||||
// address. The linker emits __init_array_start/_end so crt0 can
|
// address. The linker emits __init_array_start/_end so crt0 can
|
||||||
// walk them. Same for .fini_array (destructors).
|
// walk them. (.fini_array is not yet wired up; ELF input is
|
||||||
|
// accepted but the sections are dropped — runtime has no
|
||||||
|
// destructor-walk path today.)
|
||||||
if (name == ".init_array" || name.rfind(".init_array.", 0) == 0) return "init_array";
|
if (name == ".init_array" || name.rfind(".init_array.", 0) == 0) return "init_array";
|
||||||
if (name == ".fini_array" || name.rfind(".fini_array.", 0) == 0) return "fini_array";
|
|
||||||
// DWARF debug sections that are *targets* of intra-debug relocs
|
// DWARF debug sections that are *targets* of intra-debug relocs
|
||||||
// (e.g. .debug_info -> .debug_str via R_W65816_DATA32, or
|
// (e.g. .debug_info -> .debug_str via R_W65816_DATA32, or
|
||||||
// .debug_str_offsets -> .debug_str via R_W65816_DATA32). Treat
|
// .debug_str_offsets -> .debug_str via R_W65816_DATA32). Treat
|
||||||
|
|
@ -384,6 +385,26 @@ static std::vector<Imm24Site> gImm24Sites;
|
||||||
static uint32_t gTextBaseForSites = 0;
|
static uint32_t gTextBaseForSites = 0;
|
||||||
static bool gRecordSites = false;
|
static bool gRecordSites = false;
|
||||||
|
|
||||||
|
|
||||||
|
// Record an intra-segment patch site for cRELOC emission. A target
|
||||||
|
// below the text base is never intra-segment (it is an undefined-weak
|
||||||
|
// resolving to 0, or an absolute address) and is skipped — see the
|
||||||
|
// commentary at the R_W65816_IMM16 callsite for why this matters.
|
||||||
|
static void recordCRelocSite(uint32_t patchAddr, uint32_t target,
|
||||||
|
uint8_t byteCnt, uint8_t bitShift) {
|
||||||
|
if (!gRecordSites) return;
|
||||||
|
uint32_t targetBank = target & 0xFF0000;
|
||||||
|
uint32_t baseBank = gTextBaseForSites & 0xFF0000;
|
||||||
|
if (targetBank != baseBank) return;
|
||||||
|
if (target < gTextBaseForSites) return;
|
||||||
|
Imm24Site s;
|
||||||
|
s.patchOff = patchAddr - gTextBaseForSites;
|
||||||
|
s.offsetRef = target - gTextBaseForSites;
|
||||||
|
s.byteCnt = byteCnt;
|
||||||
|
s.bitShift = bitShift;
|
||||||
|
gImm24Sites.push_back(s);
|
||||||
|
}
|
||||||
|
|
||||||
// Number of bytes patched by a given reloc type. Used by callers
|
// Number of bytes patched by a given reloc type. Used by callers
|
||||||
// that need to range-check a reloc offset against a buffer size
|
// that need to range-check a reloc offset against a buffer size
|
||||||
// without re-deriving the width inline. Returns 0 for unknown
|
// without re-deriving the width inline. Returns 0 for unknown
|
||||||
|
|
@ -411,7 +432,7 @@ static uint32_t relocWidth(uint8_t rtype) {
|
||||||
static void applyReloc(std::vector<uint8_t> &buf, uint32_t off,
|
static void applyReloc(std::vector<uint8_t> &buf, uint32_t off,
|
||||||
uint32_t patchAddr, uint32_t target,
|
uint32_t patchAddr, uint32_t target,
|
||||||
uint8_t rtype, const std::string &symName) {
|
uint8_t rtype, const std::string &symName) {
|
||||||
int64_t Signed;
|
int64_t pcrelDisp;
|
||||||
switch (rtype) {
|
switch (rtype) {
|
||||||
case R_W65816_IMM8:
|
case R_W65816_IMM8:
|
||||||
if (target > 0xFF)
|
if (target > 0xFF)
|
||||||
|
|
@ -433,28 +454,16 @@ static void applyReloc(std::vector<uint8_t> &buf, uint32_t off,
|
||||||
// time. Without this, `lda absConst` reads from the wrong
|
// time. Without this, `lda absConst` reads from the wrong
|
||||||
// address when the segment doesn't land at link-time-base
|
// address when the segment doesn't land at link-time-base
|
||||||
// (e.g., link-time-base=0x1000 but Loader places at bank:0).
|
// (e.g., link-time-base=0x1000 but Loader places at bank:0).
|
||||||
if (gRecordSites) {
|
// A target below the text base is never an intra-segment
|
||||||
uint32_t targetBank = target & 0xFF0000;
|
// relocatable site: it is an undefined-weak symbol (resolveSym
|
||||||
uint32_t baseBank = gTextBaseForSites & 0xFF0000;
|
// resolves those to 0) or an absolute address. Recording a
|
||||||
// A target below the text base is never an intra-segment
|
// cRELOC for it would (a) underflow offsetRef = target - textBase
|
||||||
// relocatable site: it is an undefined-weak symbol (resolveSym
|
// (omfEmit rejects it as out-of-range) and (b) make the Loader
|
||||||
// resolves those to 0) or an absolute address. Recording a
|
// rewrite a genuine null to segPlacedBase, breaking the
|
||||||
// cRELOC for it would (a) underflow offsetRef = target - textBase
|
// `if (weakFn) weakFn()` null test that the null is meant to fail.
|
||||||
// (omfEmit rejects it as out-of-range) and (b) make the Loader
|
// recordCRelocSite handles the gate; byteCnt=2 distinguishes
|
||||||
// rewrite a genuine null to segPlacedBase, breaking the
|
// from IMM24 (3) so omfEmit emits cRELOC ByteCnt=2 here.
|
||||||
// `if (weakFn) weakFn()` null test that the null is meant to fail.
|
recordCRelocSite(patchAddr, target, /*byteCnt=*/2, /*bitShift=*/0);
|
||||||
if (targetBank == baseBank && target >= gTextBaseForSites) {
|
|
||||||
Imm24Site s;
|
|
||||||
s.patchOff = patchAddr - gTextBaseForSites;
|
|
||||||
s.offsetRef = target - gTextBaseForSites;
|
|
||||||
// Use type field width = 2 to distinguish from IMM24
|
|
||||||
// (3). Imm24Site struct is reused — emitOmf will
|
|
||||||
// emit cRELOC ByteCnt=2 for this.
|
|
||||||
s.byteCnt = 2;
|
|
||||||
s.bitShift = 0;
|
|
||||||
gImm24Sites.push_back(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case R_W65816_BANK16:
|
case R_W65816_BANK16:
|
||||||
// 2-byte patch: byte 0 = bank of target, byte 1 = 0 (pad).
|
// 2-byte patch: byte 0 = bank of target, byte 1 = 0 (pad).
|
||||||
|
|
@ -463,20 +472,9 @@ static void applyReloc(std::vector<uint8_t> &buf, uint32_t off,
|
||||||
// the value reflects the actually-placed bank.
|
// the value reflects the actually-placed bank.
|
||||||
buf[off] = static_cast<uint8_t>((target >> 16) & 0xFF);
|
buf[off] = static_cast<uint8_t>((target >> 16) & 0xFF);
|
||||||
buf[off + 1] = 0;
|
buf[off + 1] = 0;
|
||||||
if (gRecordSites) {
|
// bitShift=16: cRELOC Loader patches the bank byte from
|
||||||
uint32_t targetBank = target & 0xFF0000;
|
// (segPlacedBase + offsetRef) >> 16 at load time.
|
||||||
uint32_t baseBank = gTextBaseForSites & 0xFF0000;
|
recordCRelocSite(patchAddr, target, /*byteCnt=*/2, /*bitShift=*/16);
|
||||||
// See R_W65816_IMM16: skip undefined-weak/absolute targets
|
|
||||||
// below the text base (no valid intra-segment cRELOC).
|
|
||||||
if (targetBank == baseBank && target >= gTextBaseForSites) {
|
|
||||||
Imm24Site s;
|
|
||||||
s.patchOff = patchAddr - gTextBaseForSites;
|
|
||||||
s.offsetRef = target - gTextBaseForSites;
|
|
||||||
s.byteCnt = 2;
|
|
||||||
s.bitShift = 16;
|
|
||||||
gImm24Sites.push_back(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case R_W65816_IMM24:
|
case R_W65816_IMM24:
|
||||||
if (target > 0xFFFFFF)
|
if (target > 0xFFFFFF)
|
||||||
|
|
@ -485,46 +483,30 @@ static void applyReloc(std::vector<uint8_t> &buf, uint32_t off,
|
||||||
buf[off] = static_cast<uint8_t>(target & 0xFF);
|
buf[off] = static_cast<uint8_t>(target & 0xFF);
|
||||||
buf[off + 1] = static_cast<uint8_t>((target >> 8) & 0xFF);
|
buf[off + 1] = static_cast<uint8_t>((target >> 8) & 0xFF);
|
||||||
buf[off + 2] = static_cast<uint8_t>((target >> 16) & 0xFF);
|
buf[off + 2] = static_cast<uint8_t>((target >> 16) & 0xFF);
|
||||||
// Record the site for OMF cRELOC emission (only if recording is
|
// Only intra-segment refs need cRELOC; cross-bank refs (to
|
||||||
// enabled — gRecordSites is set by the CLI when --reloc-out is
|
// GS/OS dispatcher etc.) target absolute fixed addresses
|
||||||
// requested). The patch offset is within the segment image; the
|
// and shouldn't be relocated by the Loader. recordCRelocSite
|
||||||
// reference offset is the in-segment offset of the target.
|
// applies the same gates as R_W65816_IMM16.
|
||||||
if (gRecordSites) {
|
recordCRelocSite(patchAddr, target, /*byteCnt=*/3, /*bitShift=*/0);
|
||||||
// Only intra-segment refs need cRELOC; cross-bank refs (to
|
|
||||||
// GS/OS dispatcher etc.) target absolute fixed addresses
|
|
||||||
// and shouldn't be relocated by the Loader.
|
|
||||||
uint32_t targetBank = target & 0xFF0000;
|
|
||||||
uint32_t baseBank = gTextBaseForSites & 0xFF0000;
|
|
||||||
// See R_W65816_IMM16: skip undefined-weak/absolute targets
|
|
||||||
// below the text base (no valid intra-segment cRELOC).
|
|
||||||
if (targetBank == baseBank && target >= gTextBaseForSites) {
|
|
||||||
Imm24Site s;
|
|
||||||
s.patchOff = patchAddr - gTextBaseForSites;
|
|
||||||
s.offsetRef = target - gTextBaseForSites;
|
|
||||||
s.byteCnt = 3;
|
|
||||||
s.bitShift = 0;
|
|
||||||
gImm24Sites.push_back(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case R_W65816_PCREL8:
|
case R_W65816_PCREL8:
|
||||||
Signed = static_cast<int64_t>(target) - (static_cast<int64_t>(patchAddr) + 1);
|
pcrelDisp = static_cast<int64_t>(target) - (static_cast<int64_t>(patchAddr) + 1);
|
||||||
if (Signed < -128 || Signed > 127) {
|
if (pcrelDisp < -128 || pcrelDisp > 127) {
|
||||||
char msg[256];
|
char msg[256];
|
||||||
std::snprintf(msg, sizeof(msg),
|
std::snprintf(msg, sizeof(msg),
|
||||||
"R_W65816_PCREL8 to '%s' out of branch range (%lld bytes)",
|
"R_W65816_PCREL8 to '%s' out of branch range (%lld bytes)",
|
||||||
symName.c_str(), (long long)Signed);
|
symName.c_str(), (long long)pcrelDisp);
|
||||||
die(msg);
|
die(msg);
|
||||||
}
|
}
|
||||||
buf[off] = static_cast<uint8_t>(Signed & 0xFF);
|
buf[off] = static_cast<uint8_t>(pcrelDisp & 0xFF);
|
||||||
break;
|
break;
|
||||||
case R_W65816_PCREL16:
|
case R_W65816_PCREL16:
|
||||||
Signed = static_cast<int64_t>(target) - (static_cast<int64_t>(patchAddr) + 2);
|
pcrelDisp = static_cast<int64_t>(target) - (static_cast<int64_t>(patchAddr) + 2);
|
||||||
if (Signed < -32768 || Signed > 32767)
|
if (pcrelDisp < -32768 || pcrelDisp > 32767)
|
||||||
die("R_W65816_PCREL16 to '" + symName +
|
die("R_W65816_PCREL16 to '" + symName +
|
||||||
"' out of BRL range");
|
"' out of BRL range");
|
||||||
buf[off] = static_cast<uint8_t>(Signed & 0xFF);
|
buf[off] = static_cast<uint8_t>(pcrelDisp & 0xFF);
|
||||||
buf[off + 1] = static_cast<uint8_t>((Signed >> 8) & 0xFF);
|
buf[off + 1] = static_cast<uint8_t>((pcrelDisp >> 8) & 0xFF);
|
||||||
break;
|
break;
|
||||||
case R_W65816_DATA32:
|
case R_W65816_DATA32:
|
||||||
// 4-byte LE absolute. Used in DWARF .debug_* sections
|
// 4-byte LE absolute. Used in DWARF .debug_* sections
|
||||||
|
|
@ -554,33 +536,22 @@ static void applyReloc(std::vector<uint8_t> &buf, uint32_t off,
|
||||||
// patches the low 3 bytes of the 4-byte slot at load time,
|
// patches the low 3 bytes of the 4-byte slot at load time,
|
||||||
// leaving the high (pad) byte at 0 (writes the resolved
|
// leaving the high (pad) byte at 0 (writes the resolved
|
||||||
// 24-bit value bank:offset with bitShift=0 == no shift).
|
// 24-bit value bank:offset with bitShift=0 == no shift).
|
||||||
if (gRecordSites) {
|
recordCRelocSite(patchAddr, target, /*byteCnt=*/3, /*bitShift=*/0);
|
||||||
uint32_t targetBank = target & 0xFF0000;
|
|
||||||
uint32_t baseBank = gTextBaseForSites & 0xFF0000;
|
|
||||||
if (targetBank == baseBank && target >= gTextBaseForSites) {
|
|
||||||
Imm24Site s;
|
|
||||||
s.patchOff = patchAddr - gTextBaseForSites;
|
|
||||||
s.offsetRef = target - gTextBaseForSites;
|
|
||||||
s.byteCnt = 3;
|
|
||||||
s.bitShift = 0;
|
|
||||||
gImm24Sites.push_back(s);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case R_W65816_PCREL32:
|
case R_W65816_PCREL32:
|
||||||
// 4-byte signed PC-relative. PCREL displacements have the
|
// 4-byte signed PC-relative. PCREL displacements have the
|
||||||
// PC pointing past the slot — the convention used by every
|
// PC pointing past the slot — the convention used by every
|
||||||
// other PCREL reloc in this file (PCREL8 adds 1, PCREL16
|
// other PCREL reloc in this file (PCREL8 adds 1, PCREL16
|
||||||
// adds 2), so PCREL32 adds 4.
|
// adds 2), so PCREL32 adds 4.
|
||||||
Signed = static_cast<int64_t>(target) - (static_cast<int64_t>(patchAddr) + 4);
|
pcrelDisp = static_cast<int64_t>(target) - (static_cast<int64_t>(patchAddr) + 4);
|
||||||
// No range check: 32-bit signed displacement covers the
|
// No range check: 32-bit signed displacement covers the
|
||||||
// full address space. In practice this fires for DWARF
|
// full address space. In practice this fires for DWARF
|
||||||
// intra-section diffs where target and patchAddr live in
|
// intra-section diffs where target and patchAddr live in
|
||||||
// the same section, so Signed is small.
|
// the same section, so pcrelDisp is small.
|
||||||
buf[off] = static_cast<uint8_t>(Signed & 0xFF);
|
buf[off] = static_cast<uint8_t>(pcrelDisp & 0xFF);
|
||||||
buf[off + 1] = static_cast<uint8_t>((Signed >> 8) & 0xFF);
|
buf[off + 1] = static_cast<uint8_t>((pcrelDisp >> 8) & 0xFF);
|
||||||
buf[off + 2] = static_cast<uint8_t>((Signed >> 16) & 0xFF);
|
buf[off + 2] = static_cast<uint8_t>((pcrelDisp >> 16) & 0xFF);
|
||||||
buf[off + 3] = static_cast<uint8_t>((Signed >> 24) & 0xFF);
|
buf[off + 3] = static_cast<uint8_t>((pcrelDisp >> 24) & 0xFF);
|
||||||
break;
|
break;
|
||||||
default: {
|
default: {
|
||||||
char msg[128];
|
char msg[128];
|
||||||
|
|
@ -1106,11 +1077,6 @@ struct Linker {
|
||||||
curRem -= seg;
|
curRem -= seg;
|
||||||
if (curRem == 0) { segIdx++; break; }
|
if (curRem == 0) { segIdx++; break; }
|
||||||
curBase += seg; // advance within bank or to next
|
curBase += seg; // advance within bank or to next
|
||||||
if ((curBase & 0xFFFFu) == 0) {
|
|
||||||
// Crossed bank boundary — already at start of next bank.
|
|
||||||
} else if ((curBase & 0xFF0000u) != ((curBase - 1) & 0xFF0000u)) {
|
|
||||||
// Just crossed into next bank.
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Zero out any unused segment slots so crt0 sees size=0.
|
// Zero out any unused segment slots so crt0 sees size=0.
|
||||||
for (uint32_t i = segIdx; i < 4; i++) {
|
for (uint32_t i = segIdx; i < 4; i++) {
|
||||||
|
|
@ -1709,13 +1675,9 @@ int main(int argc, char **argv) {
|
||||||
if (++i >= argc) usage(argv[0]);
|
if (++i >= argc) usage(argv[0]);
|
||||||
relocOutPath = argv[i++];
|
relocOutPath = argv[i++];
|
||||||
} else if (a == "--gc-sections") {
|
} else if (a == "--gc-sections") {
|
||||||
// Drop sections not reachable from __start / main /
|
// GC of unreachable sections is on by default; --gc-sections
|
||||||
// init_array. Requires `-ffunction-sections` (so each
|
// is accepted as a no-op alias for clarity. Use
|
||||||
// function is in its own section). Significantly shrinks
|
// --no-gc-sections to disable.
|
||||||
// text for programs that link the whole runtime but only
|
|
||||||
// use a fraction of it. ON by default; --no-gc-sections
|
|
||||||
// disables.
|
|
||||||
linker.gcSections = true;
|
|
||||||
i++;
|
i++;
|
||||||
} else if (a == "--no-gc-sections") {
|
} else if (a == "--no-gc-sections") {
|
||||||
linker.gcSections = false;
|
linker.gcSections = false;
|
||||||
|
|
|
||||||
BIN
src/link816/omfEmit
Executable file
BIN
src/link816/omfEmit
Executable file
Binary file not shown.
|
|
@ -32,6 +32,24 @@
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
|
// OMF v2.1 protocol constants -- single source of truth for the header
|
||||||
|
// layout and opcode set. See Apple IIgs Tech Note #17 and the FTN
|
||||||
|
// reference. Don't renumber; values are shared with the loader.
|
||||||
|
static constexpr uint8_t OMF_OP_LCONST = 0xF2;
|
||||||
|
static constexpr uint8_t OMF_OP_CRELOC = 0xF5;
|
||||||
|
static constexpr uint8_t OMF_OP_END = 0x00;
|
||||||
|
[[maybe_unused]] static constexpr uint8_t OMF_NUMLEN = 4;
|
||||||
|
[[maybe_unused]] static constexpr uint8_t OMF_VERSION_V21 = 0x02;
|
||||||
|
[[maybe_unused]] static constexpr uint32_t OMF_HDR_SIZE = 44;
|
||||||
|
[[maybe_unused]] static constexpr uint32_t OMF_LABLEN_FIXED = 10;
|
||||||
|
static constexpr uint16_t OMF_KIND_CODE_PRIV = 0x1000;
|
||||||
|
static constexpr uint16_t OMF_KIND_DPSTACK = 0x4012; // DP/Stack | RELOAD; matches real-world GNO/ME ~_STACK format
|
||||||
|
static constexpr uint16_t OMF_KIND_DATA_STATIC = 0x8001;
|
||||||
|
static constexpr uint16_t OMF_KIND_CODE_STATIC_ABSBANK = 0x8800;
|
||||||
|
// cRELOC opcode wire size: opcode + ByteCnt + BitShift + OffsetPatch +
|
||||||
|
// OffsetReference = 1 + 1 + 1 + 2 + 2 = 7 bytes per site.
|
||||||
|
static constexpr uint32_t OMF_CRELOC_BYTES_PER_SITE = 7;
|
||||||
|
|
||||||
[[noreturn]] static void die(const std::string &msg) {
|
[[noreturn]] static void die(const std::string &msg) {
|
||||||
std::fprintf(stderr, "omfEmit: %s\n", msg.c_str());
|
std::fprintf(stderr, "omfEmit: %s\n", msg.c_str());
|
||||||
std::exit(1);
|
std::exit(1);
|
||||||
|
|
@ -48,9 +66,7 @@ struct RelocSite {
|
||||||
uint8_t byteCnt;
|
uint8_t byteCnt;
|
||||||
uint8_t bitShift; // 0 for offset relocs, 16 for BANK16
|
uint8_t bitShift; // 0 for offset relocs, 16 for BANK16
|
||||||
};
|
};
|
||||||
} // close namespace
|
|
||||||
std::vector<RelocSite> gReloc24Sites;
|
std::vector<RelocSite> gReloc24Sites;
|
||||||
namespace {
|
|
||||||
|
|
||||||
static std::vector<uint8_t> readFile(const std::string &path) {
|
static std::vector<uint8_t> readFile(const std::string &path) {
|
||||||
std::ifstream f(path, std::ios::binary);
|
std::ifstream f(path, std::ios::binary);
|
||||||
|
|
@ -135,7 +151,7 @@ static std::vector<uint8_t> emitOneSeg(const std::vector<uint8_t> &image,
|
||||||
// literal bytes. With NUMLEN=4 (standard for v2.1), the count
|
// literal bytes. With NUMLEN=4 (standard for v2.1), the count
|
||||||
// field is 4 bytes. Verified empirically against real /SYSTEM/
|
// field is 4 bytes. Verified empirically against real /SYSTEM/
|
||||||
// START on GS/OS 6.0.2: every segment uses 0xF2 + 4-byte count.
|
// START on GS/OS 6.0.2: every segment uses 0xF2 + 4-byte count.
|
||||||
body.push_back(0xF2); // LCONST opcode
|
body.push_back(OMF_OP_LCONST); // LCONST opcode
|
||||||
put32(body, static_cast<uint32_t>(combined.size()));
|
put32(body, static_cast<uint32_t>(combined.size()));
|
||||||
body.insert(body.end(), combined.begin(), combined.end());
|
body.insert(body.end(), combined.begin(), combined.end());
|
||||||
}
|
}
|
||||||
|
|
@ -150,14 +166,14 @@ static std::vector<uint8_t> emitOneSeg(const std::vector<uint8_t> &image,
|
||||||
// (segPlacedBase + OffsetReference) at load time. This is what
|
// (segPlacedBase + OffsetReference) at load time. This is what
|
||||||
// makes JSL/JML/STAlong/etc. with intra-segment targets work when
|
// makes JSL/JML/STAlong/etc. with intra-segment targets work when
|
||||||
// the Loader places us at non-zero bank.
|
// the Loader places us at non-zero bank.
|
||||||
for (const auto &s : ::gReloc24Sites) {
|
for (const auto &s : gReloc24Sites) {
|
||||||
body.push_back(0xF5);
|
body.push_back(OMF_OP_CRELOC);
|
||||||
body.push_back(s.byteCnt); // ByteCnt (2 or 3)
|
body.push_back(s.byteCnt); // ByteCnt (2 or 3)
|
||||||
body.push_back(s.bitShift); // BitShift (0 or 16)
|
body.push_back(s.bitShift); // BitShift (0 or 16)
|
||||||
put16(body, s.patchOff); // OffsetPatch
|
put16(body, s.patchOff); // OffsetPatch
|
||||||
put16(body, s.offsetRef); // OffsetReference
|
put16(body, s.offsetRef); // OffsetReference
|
||||||
}
|
}
|
||||||
body.push_back(0x00); // END opcode
|
body.push_back(OMF_OP_END); // END opcode
|
||||||
|
|
||||||
// Real OMF format (Merlin32 convention, verified GS/OS Loader-launchable):
|
// Real OMF format (Merlin32 convention, verified GS/OS Loader-launchable):
|
||||||
// - LABLEN = 10: both LOAD_NAME and SEG_NAME are 10 bytes wide,
|
// - LABLEN = 10: both LOAD_NAME and SEG_NAME are 10 bytes wide,
|
||||||
|
|
@ -247,13 +263,21 @@ static std::vector<uint8_t> emitOneSeg(const std::vector<uint8_t> &image,
|
||||||
// allocate a page-aligned, locked memory block of that size in
|
// allocate a page-aligned, locked memory block of that size in
|
||||||
// bank $00."
|
// bank $00."
|
||||||
//
|
//
|
||||||
// The body is just an END opcode (no LCONST data — RESSPC alone tells
|
// The body is an LCONST opcode followed by `length` zero bytes plus an
|
||||||
// the Loader how big to make the allocation, and the bytes don't need
|
// END opcode — matching the real-world format used by every GNO/ME
|
||||||
// to come from the file). KIND = 0x1012 = DP/Stack | PRIVATE — the
|
// command (e.g. /GNO.BOOT/bin/echo's ~_STACK seg). Empirically a body
|
||||||
// PRIVATE attribute matches Apple's `makedirect` reference utility
|
// of just END (no LCONST, relying on RESSPC for allocation) makes the
|
||||||
// (ksherlock/omfutils).
|
// GS/OS Loader's ExpressLoad fast path silently drop the seg and fall
|
||||||
|
// back to its default 4 KB DP/Stack — hence this code emits real
|
||||||
|
// content so the Loader has something to copy. KIND = 0x4012 (RELOAD
|
||||||
|
// | DP/Stack) also matches the working GNO format; the earlier 0x1012
|
||||||
|
// (PRIVATE | DP/Stack) is what `makedirect` ships but doesn't survive
|
||||||
|
// ExpressLoad fast-path processing.
|
||||||
static std::vector<uint8_t> emitDpStackSeg(uint32_t length, uint16_t segNum) {
|
static std::vector<uint8_t> emitDpStackSeg(uint32_t length, uint16_t segNum) {
|
||||||
std::vector<uint8_t> body;
|
std::vector<uint8_t> body;
|
||||||
|
body.push_back(0xF2); // LCONST opcode
|
||||||
|
put32(body, length); // 4-byte literal length
|
||||||
|
body.insert(body.end(), length, 0); // `length` zero bytes
|
||||||
body.push_back(0x00); // END opcode
|
body.push_back(0x00); // END opcode
|
||||||
constexpr uint8_t LABLEN_VAL = 10;
|
constexpr uint8_t LABLEN_VAL = 10;
|
||||||
const std::string segNameTxt = "~Direct";
|
const std::string segNameTxt = "~Direct";
|
||||||
|
|
@ -267,10 +291,13 @@ static std::vector<uint8_t> emitDpStackSeg(uint32_t length, uint16_t segNum) {
|
||||||
DISPNAME + loadName.size() + segName.size());
|
DISPNAME + loadName.size() + segName.size());
|
||||||
const uint32_t LENGTH = length; // memory size requested
|
const uint32_t LENGTH = length; // memory size requested
|
||||||
const uint32_t BYTECNT = DISPDATA + static_cast<uint32_t>(body.size());
|
const uint32_t BYTECNT = DISPDATA + static_cast<uint32_t>(body.size());
|
||||||
const uint32_t RESSPC = length; // bytes to zero-allocate
|
// RESSPC = 0 because the bytes are carried in LCONST (matches the
|
||||||
|
// bss-as-zeros approach used for the user CODE seg — the Loader's
|
||||||
|
// ExpressLoad fast path can't be trusted to honor RESSPC).
|
||||||
|
const uint32_t RESSPC = 0;
|
||||||
const uint32_t BANKSIZE = 0; // DP/Stack lives in bank 0
|
const uint32_t BANKSIZE = 0; // DP/Stack lives in bank 0
|
||||||
const uint32_t ALIGN = 0x100; // page-aligned per spec
|
const uint32_t ALIGN = 0x100; // page-aligned per spec
|
||||||
const uint16_t KIND = 0x1012; // DP/Stack | PRIVATE
|
const uint16_t KIND = OMF_KIND_DPSTACK; // DP/Stack | RELOAD
|
||||||
|
|
||||||
std::vector<uint8_t> hdr;
|
std::vector<uint8_t> hdr;
|
||||||
put32(hdr, BYTECNT);
|
put32(hdr, BYTECNT);
|
||||||
|
|
@ -324,7 +351,7 @@ static std::vector<uint8_t> emitOMF(const std::vector<uint8_t> &image,
|
||||||
uint32_t bssGap = 0) {
|
uint32_t bssGap = 0) {
|
||||||
if (stackSize == 0) {
|
if (stackSize == 0) {
|
||||||
return emitOneSeg(image, entryOffset, /*org*/0, /*segNum*/1,
|
return emitOneSeg(image, entryOffset, /*org*/0, /*segNum*/1,
|
||||||
/*kind*/0x1000, name, bssSize, bssGap);
|
/*kind*/OMF_KIND_CODE_PRIV, name, bssSize, bssGap);
|
||||||
}
|
}
|
||||||
// DP/Stack segment ordering: Apple's `makedirect` reference utility
|
// DP/Stack segment ordering: Apple's `makedirect` reference utility
|
||||||
// assigns the DP/Stack as SEGNUM 1 (its own object); when linked
|
// assigns the DP/Stack as SEGNUM 1 (its own object); when linked
|
||||||
|
|
@ -334,7 +361,7 @@ static std::vector<uint8_t> emitOMF(const std::vector<uint8_t> &image,
|
||||||
// sets DP and SP appropriately when entering our code.
|
// sets DP and SP appropriately when entering our code.
|
||||||
auto dpSeg = emitDpStackSeg(stackSize, /*segNum*/1);
|
auto dpSeg = emitDpStackSeg(stackSize, /*segNum*/1);
|
||||||
auto codeSeg = emitOneSeg(image, entryOffset, /*org*/0, /*segNum*/2,
|
auto codeSeg = emitOneSeg(image, entryOffset, /*org*/0, /*segNum*/2,
|
||||||
/*kind*/0x1000, name, bssSize, bssGap);
|
/*kind*/OMF_KIND_CODE_PRIV, name, bssSize, bssGap);
|
||||||
std::vector<uint8_t> out;
|
std::vector<uint8_t> out;
|
||||||
out.insert(out.end(), dpSeg.begin(), dpSeg.end());
|
out.insert(out.end(), dpSeg.begin(), dpSeg.end());
|
||||||
out.insert(out.end(), codeSeg.begin(), codeSeg.end());
|
out.insert(out.end(), codeSeg.begin(), codeSeg.end());
|
||||||
|
|
@ -391,6 +418,17 @@ static std::vector<uint8_t> emitOmfExpressLoad(
|
||||||
auto userSeg = emitOneSeg(image, entryOffset, /*org*/0, /*segNum*/2,
|
auto userSeg = emitOneSeg(image, entryOffset, /*org*/0, /*segNum*/2,
|
||||||
/*kind*/0x1000, userSegName, bssSize, bssGap);
|
/*kind*/0x1000, userSegName, bssSize, bssGap);
|
||||||
|
|
||||||
|
// Optionally build the DP/Stack segment. If present it lives in the
|
||||||
|
// file AFTER the user seg and gets its own ExpressLoad segtable +
|
||||||
|
// remap + header_info entries — otherwise the Loader's ExpressLoad
|
||||||
|
// fast path never sees the KIND=0x4012 record and reverts to its
|
||||||
|
// default 4KB DP/Stack allocation (silent --stack-size no-op).
|
||||||
|
const bool haveDpStack = (stackSize != 0);
|
||||||
|
std::vector<uint8_t> dpStackSeg;
|
||||||
|
if (haveDpStack) {
|
||||||
|
dpStackSeg = emitDpStackSeg(stackSize, /*segNum*/3);
|
||||||
|
}
|
||||||
|
|
||||||
// Step 2: figure out the file offsets we'll need to bake into the
|
// Step 2: figure out the file offsets we'll need to bake into the
|
||||||
// load script. We don't know the ExpressLoad segment's total size
|
// load script. We don't know the ExpressLoad segment's total size
|
||||||
// yet — but we can compute it because each component is a fixed
|
// yet — but we can compute it because each component is a fixed
|
||||||
|
|
@ -399,11 +437,10 @@ static std::vector<uint8_t> emitOmfExpressLoad(
|
||||||
// ExpressLoad LCONST data layout (matches Merlin32 source — see
|
// ExpressLoad LCONST data layout (matches Merlin32 source — see
|
||||||
// BuildExpressLoadSegment in Merlin32's a65816_OMF.c):
|
// BuildExpressLoadSegment in Merlin32's a65816_OMF.c):
|
||||||
// 6 bytes header (4-byte reserved DWORD + 2-byte count WORD)
|
// 6 bytes header (4-byte reserved DWORD + 2-byte count WORD)
|
||||||
// 8 bytes segment list (1 entry per non-ExpressLoad segment)
|
// 8 bytes/seg segment list (1 entry per non-ExpressLoad segment)
|
||||||
// 2 bytes remap list (1 entry per non-ExpressLoad segment)
|
// 2 bytes/seg remap list (1 entry per non-ExpressLoad segment)
|
||||||
// 16 bytes header info offsets (data_off, data_len, reloc_off, reloc_len)
|
// 68 bytes/seg header_info (16B offsets + 32B hdr copy + 10B LOAD_NAME + 10B SEG_NAME)
|
||||||
// + header_xpress: bytes [12..43] of user header (32 bytes) + LOAD_NAME (10) + SEG_NAME (1+N)
|
// total: 6 + 78*N bytes for N non-ExpressLoad segs
|
||||||
// = 6 + 8 + 2 + 16 + 32 + 10 + 1 + N = 75 + N bytes
|
|
||||||
//
|
//
|
||||||
// KEY FIX from earlier emitter version: header is 6 bytes, NOT 8.
|
// KEY FIX from earlier emitter version: header is 6 bytes, NOT 8.
|
||||||
// I had written 8 bytes (file_ref WORD + reserved WORD + extra WORD +
|
// I had written 8 bytes (file_ref WORD + reserved WORD + extra WORD +
|
||||||
|
|
@ -415,7 +452,10 @@ static std::vector<uint8_t> emitOmfExpressLoad(
|
||||||
constexpr uint32_t HDR_SIZE = 44;
|
constexpr uint32_t HDR_SIZE = 44;
|
||||||
constexpr uint32_t LOAD_NAME_SIZE = 10;
|
constexpr uint32_t LOAD_NAME_SIZE = 10;
|
||||||
constexpr uint32_t SEG_NAME_SIZE = 10; // LABLEN=10 → fixed-width SEG_NAME
|
constexpr uint32_t SEG_NAME_SIZE = 10; // LABLEN=10 → fixed-width SEG_NAME
|
||||||
const uint32_t userNameLen = (uint32_t)userSegName.size();
|
constexpr uint32_t SEGTAB_ENTRY = 8;
|
||||||
|
constexpr uint32_t REMAP_ENTRY = 2;
|
||||||
|
constexpr uint32_t HDR_INFO_ENTRY = 16 + 32 + LOAD_NAME_SIZE + SEG_NAME_SIZE; // 68
|
||||||
|
constexpr uint32_t HEADER_BYTES = 6;
|
||||||
const uint32_t userNameAreaSize = LOAD_NAME_SIZE + SEG_NAME_SIZE;
|
const uint32_t userNameAreaSize = LOAD_NAME_SIZE + SEG_NAME_SIZE;
|
||||||
|
|
||||||
// ExpressLoad's own segment metrics. The name "~ExpressLoad" is 12
|
// ExpressLoad's own segment metrics. The name "~ExpressLoad" is 12
|
||||||
|
|
@ -423,12 +463,8 @@ static std::vector<uint8_t> emitOmfExpressLoad(
|
||||||
// uses LABLEN=0 (length-prefixed name): 1 length byte + 12 chars.
|
// uses LABLEN=0 (length-prefixed name): 1 length byte + 12 chars.
|
||||||
const std::string elName = "~ExpressLoad";
|
const std::string elName = "~ExpressLoad";
|
||||||
const uint32_t elNameAreaSize = LOAD_NAME_SIZE + 1 + (uint32_t)elName.size();
|
const uint32_t elNameAreaSize = LOAD_NAME_SIZE + 1 + (uint32_t)elName.size();
|
||||||
// header_xpress_length = (header bytes 12..43) + LOAD_NAME + SEG_NAME
|
const uint32_t nSegs = haveDpStack ? 2 : 1; // non-ExpressLoad segs
|
||||||
// = 32 + 10 + 10 = 52 bytes
|
const uint32_t elDataSize = HEADER_BYTES + (SEGTAB_ENTRY + REMAP_ENTRY + HDR_INFO_ENTRY) * nSegs;
|
||||||
// Per-segment ExpressLoad data: 8 (table) + 2 (remap) + 16 (offsets) + 52 = 78 bytes
|
|
||||||
// Header (6 bytes) + per-segment data: 6 + 78 = 84
|
|
||||||
const uint32_t elDataSize = 84;
|
|
||||||
(void)userNameLen; // truncated in user seg name; LABLEN=10 fixed
|
|
||||||
// Body size = 1 byte LCONST opcode + 4 byte length + data + 1 byte END
|
// Body size = 1 byte LCONST opcode + 4 byte length + data + 1 byte END
|
||||||
const uint32_t elBodySize = 1 + 4 + elDataSize + 1;
|
const uint32_t elBodySize = 1 + 4 + elDataSize + 1;
|
||||||
const uint32_t elSegSize = HDR_SIZE + elNameAreaSize + elBodySize;
|
const uint32_t elSegSize = HDR_SIZE + elNameAreaSize + elBodySize;
|
||||||
|
|
@ -438,24 +474,47 @@ static std::vector<uint8_t> emitOmfExpressLoad(
|
||||||
const uint32_t userBodyOpOff = userSegStart + HDR_SIZE + userNameAreaSize;
|
const uint32_t userBodyOpOff = userSegStart + HDR_SIZE + userNameAreaSize;
|
||||||
const uint32_t userDataOff = userBodyOpOff + 5; // 1 op + 4 length
|
const uint32_t userDataOff = userBodyOpOff + 5; // 1 op + 4 length
|
||||||
|
|
||||||
|
// DP/Stack segment file offsets (after user seg). The DP/Stack body
|
||||||
|
// mirrors the real GNO/ME ~_STACK seg format: an LCONST opcode + 4
|
||||||
|
// byte length + `stackSize` zero bytes + END. ExpressLoad's
|
||||||
|
// hdr_info entry has to point at the LCONST data so the Loader
|
||||||
|
// copies the right number of zeros into the allocated chunk — a
|
||||||
|
// body of just END (RESSPC-only) silently no-ops on the
|
||||||
|
// ExpressLoad fast path, which is the bug this whole section fixes.
|
||||||
|
const uint32_t dpStackSegStart = userSegStart + (uint32_t)userSeg.size();
|
||||||
|
const uint32_t dpStackBodyOff = dpStackSegStart + HDR_SIZE + (LOAD_NAME_SIZE + SEG_NAME_SIZE);
|
||||||
|
const uint32_t dpStackDataOff = dpStackBodyOff + 5; // 1 op + 4 length
|
||||||
|
|
||||||
// Step 3: build the ExpressLoad LCONST data.
|
// Step 3: build the ExpressLoad LCONST data.
|
||||||
std::vector<uint8_t> elData;
|
std::vector<uint8_t> elData;
|
||||||
// Header (6 bytes): reserved DWORD + count WORD
|
// Header (6 bytes): reserved DWORD + count WORD. count = N-2 where
|
||||||
|
// N = total segments in the file (including ExpressLoad). With a
|
||||||
|
// DP/Stack seg N=3 so count=1; without it N=2 so count=0.
|
||||||
put32(elData, 0); // reserved
|
put32(elData, 0); // reserved
|
||||||
put16(elData, 0); // count = N-2 = 0 (for 2 segs)
|
put16(elData, (uint16_t)(haveDpStack ? 1 : 0)); // count = N-2
|
||||||
|
|
||||||
// Segment list (1 × 8 bytes)
|
// Segment list: one 8-byte entry per non-ExpressLoad segment. Each
|
||||||
// Self-rel offset = (header info offset within elData) - (this entry pos)
|
// entry's first WORD is the SELF-RELATIVE offset (from this entry's
|
||||||
// = 16 - 6 = 10
|
// own start) to the segment's header_info record.
|
||||||
constexpr uint32_t segListEntryOff = 6;
|
const uint32_t segTableOff = HEADER_BYTES;
|
||||||
const uint32_t headerInfoOff = 6 + 8 + 2; // header + segtable + remap
|
const uint32_t remapOff = segTableOff + SEGTAB_ENTRY * nSegs;
|
||||||
put16(elData, (uint16_t)(headerInfoOff - segListEntryOff));
|
const uint32_t hdrInfoOff = remapOff + REMAP_ENTRY * nSegs;
|
||||||
put16(elData, 0); // flags
|
for (uint32_t i = 0; i < nSegs; i++) {
|
||||||
put32(elData, 0); // handle
|
const uint32_t thisEntryOff = segTableOff + SEGTAB_ENTRY * i;
|
||||||
|
const uint32_t thisHdrInfoOff = hdrInfoOff + HDR_INFO_ENTRY * i;
|
||||||
|
put16(elData, (uint16_t)(thisHdrInfoOff - thisEntryOff)); // self-rel
|
||||||
|
put16(elData, 0); // flags
|
||||||
|
put32(elData, 0); // handle
|
||||||
|
}
|
||||||
|
|
||||||
// Remap list: old seg 1 (which would be our user seg without
|
// Remap list: 1 WORD per non-ExpressLoad seg, giving the new
|
||||||
// ExpressLoad) maps to new seg 2 (since ExpressLoad takes seg 1).
|
// segment number for each old segment position. Old seg 1 (user
|
||||||
|
// code, would-be sole seg without ExpressLoad) → new seg 2.
|
||||||
|
// Old seg 2 (DP/Stack, only present when --stack-size) → new seg 3.
|
||||||
put16(elData, 2);
|
put16(elData, 2);
|
||||||
|
if (haveDpStack) {
|
||||||
|
put16(elData, 3);
|
||||||
|
}
|
||||||
|
|
||||||
// Header info entry for the user segment.
|
// Header info entry for the user segment.
|
||||||
// data length = LCONST data size in the file. emitOneSeg embeds
|
// data length = LCONST data size in the file. emitOneSeg embeds
|
||||||
|
|
@ -473,11 +532,10 @@ static std::vector<uint8_t> emitOmfExpressLoad(
|
||||||
put32(elData, 0); // reloc offset
|
put32(elData, 0); // reloc offset
|
||||||
put32(elData, 0); // reloc length
|
put32(elData, 0); // reloc length
|
||||||
} else {
|
} else {
|
||||||
const uint32_t crelocBytesPerSite = 7; // 0xF5 + 1+1+2+2
|
|
||||||
const uint32_t crelocOff =
|
const uint32_t crelocOff =
|
||||||
userDataOff + (uint32_t)image.size() + bssGap + bssSize;
|
userDataOff + (uint32_t)image.size() + bssGap + bssSize;
|
||||||
const uint32_t crelocLen =
|
const uint32_t crelocLen =
|
||||||
crelocBytesPerSite * (uint32_t)gReloc24Sites.size();
|
OMF_CRELOC_BYTES_PER_SITE * (uint32_t)gReloc24Sites.size();
|
||||||
put32(elData, crelocOff);
|
put32(elData, crelocOff);
|
||||||
put32(elData, crelocLen);
|
put32(elData, crelocLen);
|
||||||
}
|
}
|
||||||
|
|
@ -498,6 +556,34 @@ static std::vector<uint8_t> emitOmfExpressLoad(
|
||||||
elData.push_back(i < truncated.size() ? (uint8_t)truncated[i] : 0x20);
|
elData.push_back(i < truncated.size() ? (uint8_t)truncated[i] : 0x20);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Header info entry for the DP/Stack segment (when present).
|
||||||
|
// data_off / data_len point at the LCONST zero bytes carried in the
|
||||||
|
// DP/Stack seg's body, mirroring the working real-world layout
|
||||||
|
// (GNO/ME ~_STACK). No cRELOC entries for a DP/Stack seg, so
|
||||||
|
// reloc fields are 0.
|
||||||
|
if (haveDpStack) {
|
||||||
|
if (dpStackSeg.size() < HDR_SIZE) die("internal: DP/Stack seg too small");
|
||||||
|
put32(elData, dpStackDataOff); // data offset (LCONST data)
|
||||||
|
put32(elData, stackSize); // data length (= stack size)
|
||||||
|
put32(elData, 0); // reloc offset
|
||||||
|
put32(elData, 0); // reloc length
|
||||||
|
// Header copy: bytes [12..43] of DP/Stack segment header.
|
||||||
|
elData.insert(elData.end(), dpStackSeg.begin() + 12, dpStackSeg.begin() + HDR_SIZE);
|
||||||
|
elData[elData.size() - 32 + 30] = 0; // DISPDATA hi → 0
|
||||||
|
elData[elData.size() - 32 + 31] = 0;
|
||||||
|
// LOAD_NAME (10 bytes, space-padded)
|
||||||
|
for (int i = 0; i < (int)LOAD_NAME_SIZE; i++) elData.push_back(0x20);
|
||||||
|
// SEG_NAME = "~Direct" padded to 10 bytes (must match the value
|
||||||
|
// stored by emitDpStackSeg, otherwise ExpressLoad's name match
|
||||||
|
// could fail; the seg-name area in the file uses 10 spaces base
|
||||||
|
// with "~Direct" overwriting the first 7).
|
||||||
|
const char *dpName = "~Direct";
|
||||||
|
const size_t dpNameLen = 7;
|
||||||
|
for (size_t i = 0; i < SEG_NAME_SIZE; i++) {
|
||||||
|
elData.push_back(i < dpNameLen ? (uint8_t)dpName[i] : 0x20);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (elData.size() != elDataSize)
|
if (elData.size() != elDataSize)
|
||||||
die("internal: ExpressLoad data size mismatch");
|
die("internal: ExpressLoad data size mismatch");
|
||||||
|
|
||||||
|
|
@ -513,7 +599,7 @@ static std::vector<uint8_t> emitOmfExpressLoad(
|
||||||
elHdr.push_back(4); // NUMLEN
|
elHdr.push_back(4); // NUMLEN
|
||||||
elHdr.push_back(2); // VERSION (0x02 = v2.1)
|
elHdr.push_back(2); // VERSION (0x02 = v2.1)
|
||||||
put32(elHdr, 0); // BANKSIZE = 0 for DATA seg
|
put32(elHdr, 0); // BANKSIZE = 0 for DATA seg
|
||||||
put16(elHdr, 0x8001); // KIND = DATA|STATIC
|
put16(elHdr, OMF_KIND_DATA_STATIC); // KIND = DATA|STATIC
|
||||||
elHdr.push_back(0); elHdr.push_back(0); // undef
|
elHdr.push_back(0); elHdr.push_back(0); // undef
|
||||||
put32(elHdr, 0); // ORG
|
put32(elHdr, 0); // ORG
|
||||||
put32(elHdr, 0); // ALIGN
|
put32(elHdr, 0); // ALIGN
|
||||||
|
|
@ -542,16 +628,15 @@ static std::vector<uint8_t> emitOmfExpressLoad(
|
||||||
die("internal: ExpressLoad segment size mismatch");
|
die("internal: ExpressLoad segment size mismatch");
|
||||||
|
|
||||||
// Step 6: concatenate ExpressLoad + user segment + optional DP/Stack.
|
// Step 6: concatenate ExpressLoad + user segment + optional DP/Stack.
|
||||||
// The DP/Stack seg sits AFTER the user seg; the Loader walks file-
|
// The DP/Stack seg's presence is now also recorded in the
|
||||||
// ordered segments after the ExpressLoad load step completes, and
|
// ExpressLoad load script (segtable + remap + header_info entries
|
||||||
// processes each segment by KIND. The ExpressLoad load script only
|
// above) so the Loader's fast path honors KIND=0x4012 instead of
|
||||||
// tracks code/data segs; the DP/Stack seg is found by KIND walk.
|
// silently dropping it to its default 4 KB DP/Stack allocation.
|
||||||
std::vector<uint8_t> result;
|
std::vector<uint8_t> result;
|
||||||
result.insert(result.end(), elSeg.begin(), elSeg.end());
|
result.insert(result.end(), elSeg.begin(), elSeg.end());
|
||||||
result.insert(result.end(), userSeg.begin(), userSeg.end());
|
result.insert(result.end(), userSeg.begin(), userSeg.end());
|
||||||
if (stackSize != 0) {
|
if (haveDpStack) {
|
||||||
auto dpSeg = emitDpStackSeg(stackSize, /*segNum*/3);
|
result.insert(result.end(), dpStackSeg.begin(), dpStackSeg.end());
|
||||||
result.insert(result.end(), dpSeg.begin(), dpSeg.end());
|
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
@ -674,7 +759,7 @@ static void usage(const char *argv0) {
|
||||||
" sidecar; emit cRELOC (0xF5) opcodes after LCONST\n"
|
" sidecar; emit cRELOC (0xF5) opcodes after LCONST\n"
|
||||||
" so the Loader patches intra-segment 24-bit refs\n"
|
" so the Loader patches intra-segment 24-bit refs\n"
|
||||||
" (JSL/JML/STAlong/etc.) when placing the segment.\n"
|
" (JSL/JML/STAlong/etc.) when placing the segment.\n"
|
||||||
" --stack-size N append a ~Direct DP/Stack segment (KIND=0x1012)\n"
|
" --stack-size N append a ~Direct DP/Stack segment (KIND=0x4012)\n"
|
||||||
" of N bytes. The Loader allocates a page-aligned\n"
|
" of N bytes. The Loader allocates a page-aligned\n"
|
||||||
" block of this size in bank 0 for combined DP +\n"
|
" block of this size in bank 0 for combined DP +\n"
|
||||||
" stack use. N must be page-multiple (>= 256).\n"
|
" stack use. N must be page-multiple (>= 256).\n"
|
||||||
|
|
@ -782,7 +867,7 @@ int main(int argc, char **argv) {
|
||||||
// intra-segment relocations at link time and have no
|
// intra-segment relocations at link time and have no
|
||||||
// INTERSEG / RELOC opcodes); ABSBANK + ORG=base pins it
|
// INTERSEG / RELOC opcodes); ABSBANK + ORG=base pins it
|
||||||
// to a specific bank. CODE is the default (type 0).
|
// to a specific bank. CODE is the default (type 0).
|
||||||
uint16_t kind = (k == 0) ? 0x8800u : 0x8800u;
|
const uint16_t kind = OMF_KIND_CODE_STATIC_ABSBANK;
|
||||||
uint32_t entryOff = (k == 0) ? s.entryOff : 0;
|
uint32_t entryOff = (k == 0) ? s.entryOff : 0;
|
||||||
auto seg = emitOneSeg(img, entryOff, s.base,
|
auto seg = emitOneSeg(img, entryOff, s.base,
|
||||||
static_cast<uint16_t>(s.num),
|
static_cast<uint16_t>(s.num),
|
||||||
|
|
@ -846,10 +931,15 @@ int main(int argc, char **argv) {
|
||||||
if (!f) die("cannot open '" + output + "' for writing");
|
if (!f) die("cannot open '" + output + "' for writing");
|
||||||
f.write(reinterpret_cast<const char *>(blob.data()), blob.size());
|
f.write(reinterpret_cast<const char *>(blob.data()), blob.size());
|
||||||
|
|
||||||
|
// Segment count: 1 user CODE seg; +1 for ExpressLoad wrapper; +1
|
||||||
|
// when --stack-size adds a ~Direct DP/Stack seg.
|
||||||
|
int segCount = 1;
|
||||||
|
if (expressload) segCount++;
|
||||||
|
if (stackSize != 0) segCount++;
|
||||||
std::fprintf(stderr,
|
std::fprintf(stderr,
|
||||||
"OMF: %d segment%s%s, %zu bytes payload, entry='%s' at +0x%x -> %s "
|
"OMF: %d segment%s%s, %zu bytes payload, entry='%s' at +0x%x -> %s "
|
||||||
"(%zu bytes total)\n",
|
"(%zu bytes total)\n",
|
||||||
expressload ? 2 : 1, expressload ? "s" : "",
|
segCount, segCount == 1 ? "" : "s",
|
||||||
expressload ? " (ExpressLoad)" : "",
|
expressload ? " (ExpressLoad)" : "",
|
||||||
image.size(), entry.c_str(), entryOff,
|
image.size(), entry.c_str(), entryOff,
|
||||||
output.c_str(), blob.size());
|
output.c_str(), blob.size());
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,10 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
//
|
//
|
||||||
// Skeleton assembler backend. Fixup resolution, relaxation and nop
|
// W65816 assembler backend. Implements applyFixup for the
|
||||||
// generation are left unimplemented; they will be filled in once the
|
// R_W65816_* relocation family, BRA -> BRL relaxation when the 8-bit
|
||||||
// instruction encodings are defined.
|
// signed displacement won't fit, and writeNopData using 65816 NOP
|
||||||
|
// ($EA) bytes.
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
|
@ -29,6 +30,13 @@
|
||||||
// W65816::BRA / W65816::BRL opcodes are exported by W65816MCTargetDesc.h
|
// W65816::BRA / W65816::BRL opcodes are exported by W65816MCTargetDesc.h
|
||||||
// (which already includes the generated header).
|
// (which already includes the generated header).
|
||||||
|
|
||||||
|
// W65816 NOP machine encoding (single byte).
|
||||||
|
static constexpr unsigned char kOpcodeNOP = 0xEA;
|
||||||
|
|
||||||
|
// Signed 8-bit branch displacement range for Bxx / BRA fixups.
|
||||||
|
static constexpr int kBranch8Min = -128;
|
||||||
|
static constexpr int kBranch8Max = 127;
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
@ -110,7 +118,7 @@ public:
|
||||||
// instead of silently truncating.
|
// instead of silently truncating.
|
||||||
if (Fixup.getKind() == W65816::fixup_8_pcrel) {
|
if (Fixup.getKind() == W65816::fixup_8_pcrel) {
|
||||||
int64_t Signed = static_cast<int64_t>(Value);
|
int64_t Signed = static_cast<int64_t>(Value);
|
||||||
if (Signed < -128 || Signed > 127) {
|
if (Signed < kBranch8Min || Signed > kBranch8Max) {
|
||||||
getContext().reportError(
|
getContext().reportError(
|
||||||
Fixup.getLoc(),
|
Fixup.getLoc(),
|
||||||
"branch target out of range for 8-bit PC-relative branch "
|
"branch target out of range for 8-bit PC-relative branch "
|
||||||
|
|
@ -158,7 +166,7 @@ public:
|
||||||
const MCSubtargetInfo *STI) const override {
|
const MCSubtargetInfo *STI) const override {
|
||||||
// The 65816 NOP is a single 0xEA byte.
|
// The 65816 NOP is a single 0xEA byte.
|
||||||
for (uint64_t I = 0; I < Count; ++I)
|
for (uint64_t I = 0; I < Count; ++I)
|
||||||
OS << char(0xEA);
|
OS << static_cast<char>(kOpcodeNOP);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -192,7 +200,7 @@ public:
|
||||||
if (Fixup.getKind() != W65816::fixup_8_pcrel)
|
if (Fixup.getKind() != W65816::fixup_8_pcrel)
|
||||||
return false;
|
return false;
|
||||||
int64_t Signed = static_cast<int64_t>(Value);
|
int64_t Signed = static_cast<int64_t>(Value);
|
||||||
return Signed < -128 || Signed > 127;
|
return Signed < kBranch8Min || Signed > kBranch8Max;
|
||||||
}
|
}
|
||||||
|
|
||||||
void relaxInstruction(MCInst &Inst,
|
void relaxInstruction(MCInst &Inst,
|
||||||
|
|
|
||||||
|
|
@ -24,6 +24,23 @@
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
|
// R_W65816_* relocation numbers. These are protocol constants shared
|
||||||
|
// with link816 / omfEmit / llvm-objdump; do not renumber. If new types
|
||||||
|
// are added, mirror them in src/link816/link816.cpp's relocWidth() and
|
||||||
|
// the cRELOC pipeline.
|
||||||
|
namespace R_W65816 {
|
||||||
|
enum : unsigned {
|
||||||
|
R_IMM8 = 1,
|
||||||
|
R_IMM16 = 2,
|
||||||
|
R_IMM24 = 3,
|
||||||
|
R_PCREL8 = 4,
|
||||||
|
R_PCREL16 = 5,
|
||||||
|
R_BANK16 = 6,
|
||||||
|
R_DATA32 = 7,
|
||||||
|
R_PCREL32 = 8,
|
||||||
|
};
|
||||||
|
} // namespace R_W65816
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
class W65816ELFObjectWriter : public MCELFObjectTargetWriter {
|
class W65816ELFObjectWriter : public MCELFObjectTargetWriter {
|
||||||
|
|
@ -56,16 +73,16 @@ protected:
|
||||||
// type — observed as type 249 — and broke link816.py.
|
// type — observed as type 249 — and broke link816.py.
|
||||||
auto Kind = Fixup.getKind();
|
auto Kind = Fixup.getKind();
|
||||||
switch (Kind) {
|
switch (Kind) {
|
||||||
case W65816::fixup_8: return 1; // R_W65816_IMM8
|
case W65816::fixup_8: return R_W65816::R_IMM8;
|
||||||
case W65816::fixup_16: return 2; // R_W65816_IMM16
|
case W65816::fixup_16: return R_W65816::R_IMM16;
|
||||||
case W65816::fixup_24: return 3; // R_W65816_IMM24
|
case W65816::fixup_24: return R_W65816::R_IMM24;
|
||||||
case W65816::fixup_8_pcrel: return 4; // R_W65816_PCREL8
|
case W65816::fixup_8_pcrel: return R_W65816::R_PCREL8;
|
||||||
case W65816::fixup_16_pcrel: return 5; // R_W65816_PCREL16
|
case W65816::fixup_16_pcrel: return R_W65816::R_PCREL16;
|
||||||
case W65816::fixup_bank16: return 6; // R_W65816_BANK16
|
case W65816::fixup_bank16: return R_W65816::R_BANK16;
|
||||||
case W65816::fixup_32: return 7; // R_W65816_DATA32
|
case W65816::fixup_32: return R_W65816::R_DATA32;
|
||||||
case W65816::fixup_32_pcrel: return 8; // R_W65816_PCREL32
|
case W65816::fixup_32_pcrel: return R_W65816::R_PCREL32;
|
||||||
case FK_Data_1: return IsPCRel ? 4 : 1;
|
case FK_Data_1: return IsPCRel ? R_W65816::R_PCREL8 : R_W65816::R_IMM8;
|
||||||
case FK_Data_2: return IsPCRel ? 5 : 2;
|
case FK_Data_2: return IsPCRel ? R_W65816::R_PCREL16 : R_W65816::R_IMM16;
|
||||||
// FK_Data_4 is emitted by DWARF (.debug_info / .debug_line /
|
// FK_Data_4 is emitted by DWARF (.debug_info / .debug_line /
|
||||||
// .debug_frame section-relative addresses), .eh_frame,
|
// .debug_frame section-relative addresses), .eh_frame,
|
||||||
// .debug_loclists, and user `.long` directives. Dispatch by
|
// .debug_loclists, and user `.long` directives. Dispatch by
|
||||||
|
|
@ -78,7 +95,7 @@ protected:
|
||||||
// .debug_line decoder because the 4th byte of the slot landed
|
// .debug_line decoder because the 4th byte of the slot landed
|
||||||
// on whatever followed it (most often the size byte of the
|
// on whatever followed it (most often the size byte of the
|
||||||
// next line-program header → unit_length = 0).
|
// next line-program header → unit_length = 0).
|
||||||
case FK_Data_4: return IsPCRel ? 8 : 7;
|
case FK_Data_4: return IsPCRel ? R_W65816::R_PCREL32 : R_W65816::R_DATA32;
|
||||||
default:
|
default:
|
||||||
llvm_unreachable("W65816: unknown fixup kind");
|
llvm_unreachable("W65816: unknown fixup kind");
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -204,6 +204,7 @@ void initializeW65816SepRepCleanupPass(PassRegistry &);
|
||||||
void initializeW65816BranchExpandPass(PassRegistry &);
|
void initializeW65816BranchExpandPass(PassRegistry &);
|
||||||
void initializeW65816TiedDefSpillPass(PassRegistry &);
|
void initializeW65816TiedDefSpillPass(PassRegistry &);
|
||||||
void initializeW65816ABridgeViaXPass(PassRegistry &);
|
void initializeW65816ABridgeViaXPass(PassRegistry &);
|
||||||
|
void initializeW65816UnLSRPass(PassRegistry &);
|
||||||
void initializeW65816WidenAcc16Pass(PassRegistry &);
|
void initializeW65816WidenAcc16Pass(PassRegistry &);
|
||||||
void initializeW65816SpillToXPass(PassRegistry &);
|
void initializeW65816SpillToXPass(PassRegistry &);
|
||||||
void initializeW65816NegYIndYPass(PassRegistry &);
|
void initializeW65816NegYIndYPass(PassRegistry &);
|
||||||
|
|
|
||||||
|
|
@ -8,23 +8,28 @@
|
||||||
//
|
//
|
||||||
// Pre-regalloc complement to W65816TiedDefSpill. Where TiedDefSpill
|
// Pre-regalloc complement to W65816TiedDefSpill. Where TiedDefSpill
|
||||||
// preserves a multi-use Acc16 vreg by spilling it to a fresh stack
|
// preserves a multi-use Acc16 vreg by spilling it to a fresh stack
|
||||||
// slot around the tied-def consumer, this pass tries to do the same
|
// slot around the tied-def consumer, this pass bridges via an Img16
|
||||||
// preservation via TAX/TXA: copy to an Idx16 vreg before the consumer
|
// (DP-backed) vreg: park SrcReg in a fresh Img16 vreg before the
|
||||||
// (regalloc puts it in X or Y, expansion lowers the COPY to TAX/TAY),
|
// consumer, restore to a fresh Acc16 vreg after. Regalloc places the
|
||||||
// copy back to a fresh Acc16 vreg after.
|
// Img16 in IMG0..IMG7 (DP $D0..$DE); copyPhysReg lowers the COPYs to
|
||||||
|
// STA dp / LDA dp (4 cyc each) and no system-stack slot is allocated.
|
||||||
|
//
|
||||||
|
// (The pass name dates from an earlier prototype that bridged via X
|
||||||
|
// using TAX/TXA. Cross-MBB X-liveness analysis was unimplemented and
|
||||||
|
// the X-bridge couldn't survive Idx16 clobbers between consumer and
|
||||||
|
// last use, so the bridge moved to Img16. The DP-backed form has the
|
||||||
|
// same 4-cycle round-trip cost as TAX/TXA bridges with none of the
|
||||||
|
// liveness restrictions.)
|
||||||
//
|
//
|
||||||
// Win per bridged pair:
|
// Win per bridged pair:
|
||||||
// stack spill: STA dp,S (5 cyc) + LDA dp,S (5 cyc) + 1 frame slot
|
// stack spill: STA dp,S (5 cyc) + LDA dp,S (5 cyc) + 1 frame slot
|
||||||
// X bridge : TAX (2 cyc) + TXA (2 cyc) + no frame growth
|
// Img bridge : STA dp (4 cyc) + LDA dp (4 cyc) + no frame growth
|
||||||
// Net 6 cycles + 2 bytes saved per bridge — and we avoid one PHA per
|
// Net 2 cycles + (1 byte per access) saved per bridge -- and one PHA
|
||||||
// stack slot we didn't allocate.
|
// per avoided stack slot.
|
||||||
//
|
//
|
||||||
// Bail conditions (fall back to TiedDefSpill's stack route):
|
// Bail conditions (fall back to TiedDefSpill's stack route): any MI
|
||||||
// - any MI between consumer and SrcReg's last use clobbers Idx16
|
// between consumer and SrcReg's last use that clobbers IMG slots,
|
||||||
// (LDX/LDY/INX/DEX/INY/DEY/TAX/TAY/TXY/TYX/PHX/PHY/PLX/PLY/etc.)
|
// callees that clobber IMG0..IMG7, cross-MBB uses of SrcReg.
|
||||||
// - any call in the range (calls clobber X and Y per ABI)
|
|
||||||
// - SrcReg is used in a different MBB (cross-MBB liveness needs more
|
|
||||||
// analysis; deferred)
|
|
||||||
//
|
//
|
||||||
// Runs before TiedDefSpill so the latter doesn't double-process the
|
// Runs before TiedDefSpill so the latter doesn't double-process the
|
||||||
// same candidates.
|
// same candidates.
|
||||||
|
|
|
||||||
|
|
@ -6,8 +6,14 @@
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
//
|
//
|
||||||
// Skeleton assembly printer. The MCInst lowering path is wired up but no
|
// W65816 assembly printer. Owns the late pseudo-expansion path
|
||||||
// target-specific operand formatting is implemented yet.
|
// (MCInst lowering for the IR-pseudo opcodes that we keep through PEI
|
||||||
|
// because their machine encoding depends on AsmPrinter-time peepholes
|
||||||
|
// or runtime ABI knowledge -- BRK_pseudo, LDAi16imm_bank, JSLpseudo,
|
||||||
|
// the SEP/REP-wrapped i8 forms, etc.), plus a small set of mode-aware
|
||||||
|
// peepholes (PEA / PEI substitution for LDA+PUSH16 chains, STZ
|
||||||
|
// folding, etc.) that prefer to run after the rest of codegen has
|
||||||
|
// stabilised the MIR.
|
||||||
//
|
//
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
|
|
@ -31,6 +37,39 @@ using namespace llvm;
|
||||||
|
|
||||||
#define DEBUG_TYPE "asm-printer"
|
#define DEBUG_TYPE "asm-printer"
|
||||||
|
|
||||||
|
// W65816 processor-status flag masks used by SEP/REP wrapping.
|
||||||
|
// (See W65816 datasheet 6.10.) M = accumulator width (1 = 8-bit,
|
||||||
|
// 0 = 16-bit); X = index width (same convention). The wraps in this
|
||||||
|
// file toggle M only; X never changes in normal codegen.
|
||||||
|
static constexpr unsigned kPStatusM = 0x20;
|
||||||
|
[[maybe_unused]] static constexpr unsigned kPStatusX = 0x10;
|
||||||
|
|
||||||
|
// IIgs runtime DP slots referenced from emitted code. Both are part of
|
||||||
|
// the runtime ABI -- AsmPrinter / ISelLowering / libgcc must agree.
|
||||||
|
// kRuntimePbrStashDP -- crt0 stashes the runtime PBR here so
|
||||||
|
// LDAi16imm_bank can emit `lda $BE` (PBR-byte
|
||||||
|
// load) for &symbol values in non-bank-0 placements.
|
||||||
|
// kRuntimeIndirTargetDP -- __indirTarget vector used by the
|
||||||
|
// JMP (abs) indirect-call thunk.
|
||||||
|
static constexpr unsigned kRuntimePbrStashDP = 0xBE;
|
||||||
|
[[maybe_unused]] static constexpr unsigned kRuntimeIndirTargetDP = 0x00B8;
|
||||||
|
|
||||||
|
// DP scratch byte used by ADJCALLSTACKUP / ALLOCAfi to save A across a
|
||||||
|
// TSC/TCS bracket. Lives in the project-wide $E0..$DF DP scratch
|
||||||
|
// range; coordinate with W65816ISelLowering / W65816RegisterInfo if
|
||||||
|
// the layout changes.
|
||||||
|
static constexpr unsigned kDpScratch0 = 0xE0;
|
||||||
|
|
||||||
|
// IIgs bank-byte mask: a 24-bit address whose top 8 bits are non-zero
|
||||||
|
// is in a non-zero bank and must be encoded via the LONG form.
|
||||||
|
static constexpr uint64_t kBankByteMask = 0xFF0000;
|
||||||
|
|
||||||
|
// ADJCALLSTACKUP fan-out limit: PLY (1 byte / 4 cyc per pair-pop) wins
|
||||||
|
// over the 8-byte / ~14-cyc TAY/TSC/CLC/ADC/TCS/TYA bracket up through
|
||||||
|
// N = 14 even bytes; beyond that the bracket is cheaper. See the
|
||||||
|
// dispatch in the ADJCALLSTACKUP expansion.
|
||||||
|
static constexpr int kAdjStackUpPlyMaxN = 14;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
class W65816AsmPrinter : public AsmPrinter {
|
class W65816AsmPrinter : public AsmPrinter {
|
||||||
|
|
@ -267,7 +306,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
if (YLive) {
|
if (YLive) {
|
||||||
// Route through DP $E0 to preserve both A and Y.
|
// Route through DP $E0 to preserve both A and Y.
|
||||||
MCInst Sta; Sta.setOpcode(W65816::STA_DP);
|
MCInst Sta; Sta.setOpcode(W65816::STA_DP);
|
||||||
Sta.addOperand(MCOperand::createImm(0xE0));
|
Sta.addOperand(MCOperand::createImm(kDpScratch0));
|
||||||
EmitToStreamer(*OutStreamer, Sta);
|
EmitToStreamer(*OutStreamer, Sta);
|
||||||
MCInst Tsc; Tsc.setOpcode(W65816::TSC); EmitToStreamer(*OutStreamer, Tsc);
|
MCInst Tsc; Tsc.setOpcode(W65816::TSC); EmitToStreamer(*OutStreamer, Tsc);
|
||||||
MCInst Clc; Clc.setOpcode(W65816::CLC); EmitToStreamer(*OutStreamer, Clc);
|
MCInst Clc; Clc.setOpcode(W65816::CLC); EmitToStreamer(*OutStreamer, Clc);
|
||||||
|
|
@ -276,9 +315,13 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
EmitToStreamer(*OutStreamer, Adc);
|
EmitToStreamer(*OutStreamer, Adc);
|
||||||
MCInst Tcs; Tcs.setOpcode(W65816::TCS); EmitToStreamer(*OutStreamer, Tcs);
|
MCInst Tcs; Tcs.setOpcode(W65816::TCS); EmitToStreamer(*OutStreamer, Tcs);
|
||||||
MCInst Lda; Lda.setOpcode(W65816::LDA_DP);
|
MCInst Lda; Lda.setOpcode(W65816::LDA_DP);
|
||||||
Lda.addOperand(MCOperand::createImm(0xE0));
|
Lda.addOperand(MCOperand::createImm(kDpScratch0));
|
||||||
EmitToStreamer(*OutStreamer, Lda);
|
EmitToStreamer(*OutStreamer, Lda);
|
||||||
} else if (N <= 14 && (N % 2) == 0) {
|
} else if (N <= kAdjStackUpPlyMaxN && (N % 2) == 0) {
|
||||||
|
// Repeated PLY (1 byte / 4 cyc each) wins over the TAY/TSC/CLC/
|
||||||
|
// ADC/TCS/TYA bracket (8 bytes / ~14 cyc fixed) for N <= 14;
|
||||||
|
// beyond that the bracket is cheaper. Must be even (PLY pops
|
||||||
|
// 16-bit pairs).
|
||||||
for (int i = 0; i < N / 2; ++i) {
|
for (int i = 0; i < N / 2; ++i) {
|
||||||
MCInst Ply; Ply.setOpcode(W65816::PLY);
|
MCInst Ply; Ply.setOpcode(W65816::PLY);
|
||||||
EmitToStreamer(*OutStreamer, Ply);
|
EmitToStreamer(*OutStreamer, Ply);
|
||||||
|
|
@ -348,7 +391,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
Lda.addOperand(MCOperand::createImm(0));
|
Lda.addOperand(MCOperand::createImm(0));
|
||||||
} else {
|
} else {
|
||||||
Lda.setOpcode(W65816::LDA_DP);
|
Lda.setOpcode(W65816::LDA_DP);
|
||||||
Lda.addOperand(MCOperand::createImm(0xBE));
|
Lda.addOperand(MCOperand::createImm(kRuntimePbrStashDP));
|
||||||
}
|
}
|
||||||
EmitToStreamer(*OutStreamer, Lda);
|
EmitToStreamer(*OutStreamer, Lda);
|
||||||
return;
|
return;
|
||||||
|
|
@ -380,7 +423,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
// writes `*(uint16 *)0xE19E00UL = 0` we MUST keep the
|
// writes `*(uint16 *)0xE19E00UL = 0` we MUST keep the
|
||||||
// LDA #0 + STA_Long pair so the bank-explicit form survives.
|
// LDA #0 + STA_Long pair so the bank-explicit form survives.
|
||||||
bool AddrFitsIn16 = !It->getOperand(1).isImm() ||
|
bool AddrFitsIn16 = !It->getOperand(1).isImm() ||
|
||||||
(It->getOperand(1).getImm() & 0xFF0000) == 0;
|
(It->getOperand(1).getImm() & kBankByteMask) == 0;
|
||||||
if (AddrFitsIn16) {
|
if (AddrFitsIn16) {
|
||||||
MCInst Stz;
|
MCInst Stz;
|
||||||
Stz.setOpcode(W65816::STZ_Abs);
|
Stz.setOpcode(W65816::STZ_Abs);
|
||||||
|
|
@ -401,6 +444,10 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
if (It != MI->getParent()->end() && It->getOpcode() == W65816::PUSH16) {
|
if (It != MI->getParent()->end() && It->getOpcode() == W65816::PUSH16) {
|
||||||
auto It2 = std::next(It);
|
auto It2 = std::next(It);
|
||||||
while (It2 != MI->getParent()->end() && It2->isDebugInstr()) ++It2;
|
while (It2 != MI->getParent()->end() && It2->isDebugInstr()) ++It2;
|
||||||
|
// If PUSH16 is the last MI in the BB we leave the peephole as a
|
||||||
|
// no-op (conservative): the PUSH chain almost always feeds a JSL
|
||||||
|
// within the same BB, and proving A-dead at BB exit via successor
|
||||||
|
// live-in scan is not worth the bookkeeping.
|
||||||
bool ADead = false;
|
bool ADead = false;
|
||||||
if (It2 != MI->getParent()->end()) {
|
if (It2 != MI->getParent()->end()) {
|
||||||
const TargetRegisterInfo *TRI =
|
const TargetRegisterInfo *TRI =
|
||||||
|
|
@ -408,13 +455,6 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
if (It2->modifiesRegister(W65816::A, TRI) &&
|
if (It2->modifiesRegister(W65816::A, TRI) &&
|
||||||
!It2->readsRegister(W65816::A, TRI))
|
!It2->readsRegister(W65816::A, TRI))
|
||||||
ADead = true;
|
ADead = true;
|
||||||
} else {
|
|
||||||
// PUSH16 is the last instruction in the BB. A is dead at
|
|
||||||
// BB exit iff it's not live-out. Check the BB's live-out
|
|
||||||
// set via successors; if no successor lists A as live-in,
|
|
||||||
// it's safe. Conservative: treat as not-dead (skip peephole).
|
|
||||||
// This case is uncommon — the PUSH chain almost always feeds
|
|
||||||
// a JSL within the same BB.
|
|
||||||
}
|
}
|
||||||
if (ADead) {
|
if (ADead) {
|
||||||
MCInst Pea;
|
MCInst Pea;
|
||||||
|
|
@ -445,7 +485,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
// hit. We mark the next-SEP-to-skip via a per-AsmPrinter flag
|
// hit. We mark the next-SEP-to-skip via a per-AsmPrinter flag
|
||||||
// so the SEP visit drops it.
|
// so the SEP visit drops it.
|
||||||
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
||||||
Sep.addOperand(MCOperand::createImm(0x20));
|
Sep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Sep);
|
EmitToStreamer(*OutStreamer, Sep);
|
||||||
MCInst Lda;
|
MCInst Lda;
|
||||||
Lda.setOpcode(W65816::LDA_Imm8);
|
Lda.setOpcode(W65816::LDA_Imm8);
|
||||||
|
|
@ -487,9 +527,9 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
if (It != MI->getParent()->end() &&
|
if (It != MI->getParent()->end() &&
|
||||||
It->getOpcode() == W65816::SEP &&
|
It->getOpcode() == W65816::SEP &&
|
||||||
It->getNumOperands() >= 1 && It->getOperand(0).isImm() &&
|
It->getNumOperands() >= 1 && It->getOperand(0).isImm() &&
|
||||||
It->getOperand(0).getImm() == 0x20) {
|
It->getOperand(0).getImm() == kPStatusM) {
|
||||||
SkipRep = true;
|
SkipRep = true;
|
||||||
SkipNextSepImm = 0x20;
|
SkipNextSepImm = static_cast<int>(kPStatusM);
|
||||||
}
|
}
|
||||||
// STA8abs / STA8long don't expose their SEP at MIR — the wrap is
|
// STA8abs / STA8long don't expose their SEP at MIR — the wrap is
|
||||||
// emitted at MC layer. Detect them here so we can elide the
|
// emitted at MC layer. Detect them here so we can elide the
|
||||||
|
|
@ -505,7 +545,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
}
|
}
|
||||||
if (!SkipRep) {
|
if (!SkipRep) {
|
||||||
MCInst Rep; Rep.setOpcode(W65816::REP);
|
MCInst Rep; Rep.setOpcode(W65816::REP);
|
||||||
Rep.addOperand(MCOperand::createImm(0x20));
|
Rep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Rep);
|
EmitToStreamer(*OutStreamer, Rep);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
|
@ -533,7 +573,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
EmitToStreamer(*OutStreamer, Lda);
|
EmitToStreamer(*OutStreamer, Lda);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if ((A & 0xFF0000) != 0) {
|
if ((A & kBankByteMask) != 0) {
|
||||||
MCInst Lda;
|
MCInst Lda;
|
||||||
Lda.setOpcode(W65816::LDA_Long);
|
Lda.setOpcode(W65816::LDA_Long);
|
||||||
Lda.addOperand(lowerOperand(AddrOp, MCInstLowering));
|
Lda.addOperand(lowerOperand(AddrOp, MCInstLowering));
|
||||||
|
|
@ -564,7 +604,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
EmitToStreamer(*OutStreamer, Sta);
|
EmitToStreamer(*OutStreamer, Sta);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if ((A & 0xFF0000) != 0) {
|
if ((A & kBankByteMask) != 0) {
|
||||||
MCInst Sta;
|
MCInst Sta;
|
||||||
Sta.setOpcode(W65816::STA_Long);
|
Sta.setOpcode(W65816::STA_Long);
|
||||||
Sta.addOperand(lowerOperand(AddrOp, MCInstLowering));
|
Sta.addOperand(lowerOperand(AddrOp, MCInstLowering));
|
||||||
|
|
@ -649,7 +689,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
bool IsSub = MI->getOpcode() == W65816::SBCi8imm;
|
bool IsSub = MI->getOpcode() == W65816::SBCi8imm;
|
||||||
// SEP/REP wrap (see LDAi8imm comment).
|
// SEP/REP wrap (see LDAi8imm comment).
|
||||||
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
||||||
Sep.addOperand(MCOperand::createImm(0x20));
|
Sep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Sep);
|
EmitToStreamer(*OutStreamer, Sep);
|
||||||
MCInst Carry;
|
MCInst Carry;
|
||||||
Carry.setOpcode(IsSub ? W65816::SEC : W65816::CLC);
|
Carry.setOpcode(IsSub ? W65816::SEC : W65816::CLC);
|
||||||
|
|
@ -660,7 +700,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
Op.addOperand(MCOperand::createImm(Val));
|
Op.addOperand(MCOperand::createImm(Val));
|
||||||
EmitToStreamer(*OutStreamer, Op);
|
EmitToStreamer(*OutStreamer, Op);
|
||||||
MCInst Rep; Rep.setOpcode(W65816::REP);
|
MCInst Rep; Rep.setOpcode(W65816::REP);
|
||||||
Rep.addOperand(MCOperand::createImm(0x20));
|
Rep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Rep);
|
EmitToStreamer(*OutStreamer, Rep);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -682,11 +722,11 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
Op.addOperand(MCOperand::createImm(Val));
|
Op.addOperand(MCOperand::createImm(Val));
|
||||||
// SEP/REP wrap (see LDAi8imm comment).
|
// SEP/REP wrap (see LDAi8imm comment).
|
||||||
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
||||||
Sep.addOperand(MCOperand::createImm(0x20));
|
Sep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Sep);
|
EmitToStreamer(*OutStreamer, Sep);
|
||||||
EmitToStreamer(*OutStreamer, Op);
|
EmitToStreamer(*OutStreamer, Op);
|
||||||
MCInst Rep; Rep.setOpcode(W65816::REP);
|
MCInst Rep; Rep.setOpcode(W65816::REP);
|
||||||
Rep.addOperand(MCOperand::createImm(0x20));
|
Rep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Rep);
|
EmitToStreamer(*OutStreamer, Rep);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -696,7 +736,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
// LDA_Long (0xAF, bank-explicit) for const-int MMIO addresses.
|
// LDA_Long (0xAF, bank-explicit) for const-int MMIO addresses.
|
||||||
bool IsLong = MI->getOpcode() == W65816::LDA8long;
|
bool IsLong = MI->getOpcode() == W65816::LDA8long;
|
||||||
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
||||||
Sep.addOperand(MCOperand::createImm(0x20));
|
Sep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Sep);
|
EmitToStreamer(*OutStreamer, Sep);
|
||||||
MCInst Lda;
|
MCInst Lda;
|
||||||
Lda.setOpcode(IsLong ? W65816::LDA_Long : W65816::LDA_Abs);
|
Lda.setOpcode(IsLong ? W65816::LDA_Long : W65816::LDA_Abs);
|
||||||
|
|
@ -709,7 +749,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
Lda.addOperand(Addr);
|
Lda.addOperand(Addr);
|
||||||
EmitToStreamer(*OutStreamer, Lda);
|
EmitToStreamer(*OutStreamer, Lda);
|
||||||
MCInst Rep; Rep.setOpcode(W65816::REP);
|
MCInst Rep; Rep.setOpcode(W65816::REP);
|
||||||
Rep.addOperand(MCOperand::createImm(0x20));
|
Rep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Rep);
|
EmitToStreamer(*OutStreamer, Rep);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -717,14 +757,14 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
// i8 indexed-global load: SEP #0x20 ; LDA <addr>, X ; REP #0x20
|
// i8 indexed-global load: SEP #0x20 ; LDA <addr>, X ; REP #0x20
|
||||||
// X holds the index (set up by CopyToReg before this MI).
|
// X holds the index (set up by CopyToReg before this MI).
|
||||||
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
||||||
Sep.addOperand(MCOperand::createImm(0x20));
|
Sep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Sep);
|
EmitToStreamer(*OutStreamer, Sep);
|
||||||
MCInst Lda;
|
MCInst Lda;
|
||||||
Lda.setOpcode(W65816::LDA_AbsX);
|
Lda.setOpcode(W65816::LDA_AbsX);
|
||||||
Lda.addOperand(lowerOperand(MI->getOperand(0), MCInstLowering));
|
Lda.addOperand(lowerOperand(MI->getOperand(0), MCInstLowering));
|
||||||
EmitToStreamer(*OutStreamer, Lda);
|
EmitToStreamer(*OutStreamer, Lda);
|
||||||
MCInst Rep; Rep.setOpcode(W65816::REP);
|
MCInst Rep; Rep.setOpcode(W65816::REP);
|
||||||
Rep.addOperand(MCOperand::createImm(0x20));
|
Rep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Rep);
|
EmitToStreamer(*OutStreamer, Rep);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -732,14 +772,14 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
// i8 indexed-global store: SEP #0x20 ; STA <addr>, X ; REP #0x20
|
// i8 indexed-global store: SEP #0x20 ; STA <addr>, X ; REP #0x20
|
||||||
// A holds the value, X holds the index.
|
// A holds the value, X holds the index.
|
||||||
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
||||||
Sep.addOperand(MCOperand::createImm(0x20));
|
Sep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Sep);
|
EmitToStreamer(*OutStreamer, Sep);
|
||||||
MCInst Sta;
|
MCInst Sta;
|
||||||
Sta.setOpcode(W65816::STA_AbsX);
|
Sta.setOpcode(W65816::STA_AbsX);
|
||||||
Sta.addOperand(lowerOperand(MI->getOperand(0), MCInstLowering));
|
Sta.addOperand(lowerOperand(MI->getOperand(0), MCInstLowering));
|
||||||
EmitToStreamer(*OutStreamer, Sta);
|
EmitToStreamer(*OutStreamer, Sta);
|
||||||
MCInst Rep; Rep.setOpcode(W65816::REP);
|
MCInst Rep; Rep.setOpcode(W65816::REP);
|
||||||
Rep.addOperand(MCOperand::createImm(0x20));
|
Rep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Rep);
|
EmitToStreamer(*OutStreamer, Rep);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -764,7 +804,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
SkipNextSta8Wrap = false;
|
SkipNextSta8Wrap = false;
|
||||||
if (!UsesAcc8 && !SkipOpenSep) {
|
if (!UsesAcc8 && !SkipOpenSep) {
|
||||||
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
||||||
Sep.addOperand(MCOperand::createImm(0x20));
|
Sep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Sep);
|
EmitToStreamer(*OutStreamer, Sep);
|
||||||
}
|
}
|
||||||
MCInst Sta;
|
MCInst Sta;
|
||||||
|
|
@ -784,7 +824,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
EmitToStreamer(*OutStreamer, Sta);
|
EmitToStreamer(*OutStreamer, Sta);
|
||||||
if (!UsesAcc8) {
|
if (!UsesAcc8) {
|
||||||
MCInst Rep; Rep.setOpcode(W65816::REP);
|
MCInst Rep; Rep.setOpcode(W65816::REP);
|
||||||
Rep.addOperand(MCOperand::createImm(0x20));
|
Rep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Rep);
|
EmitToStreamer(*OutStreamer, Rep);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
|
|
@ -825,7 +865,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
// i8 immediate compare — needs M=1 so the CPU only reads 1 byte
|
// i8 immediate compare — needs M=1 so the CPU only reads 1 byte
|
||||||
// for the immediate. See LDAi8imm comment for the wrap rationale.
|
// for the immediate. See LDAi8imm comment for the wrap rationale.
|
||||||
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
MCInst Sep; Sep.setOpcode(W65816::SEP);
|
||||||
Sep.addOperand(MCOperand::createImm(0x20));
|
Sep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Sep);
|
EmitToStreamer(*OutStreamer, Sep);
|
||||||
MCInst Cmp;
|
MCInst Cmp;
|
||||||
Cmp.setOpcode(W65816::CMP_Imm8);
|
Cmp.setOpcode(W65816::CMP_Imm8);
|
||||||
|
|
@ -833,7 +873,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
Cmp.addOperand(MCOperand::createImm(Val));
|
Cmp.addOperand(MCOperand::createImm(Val));
|
||||||
EmitToStreamer(*OutStreamer, Cmp);
|
EmitToStreamer(*OutStreamer, Cmp);
|
||||||
MCInst Rep; Rep.setOpcode(W65816::REP);
|
MCInst Rep; Rep.setOpcode(W65816::REP);
|
||||||
Rep.addOperand(MCOperand::createImm(0x20));
|
Rep.addOperand(MCOperand::createImm(kPStatusM));
|
||||||
EmitToStreamer(*OutStreamer, Rep);
|
EmitToStreamer(*OutStreamer, Rep);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -965,12 +1005,12 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) {
|
||||||
// Size is in A on entry — but we need A=SP after TSC, so first
|
// Size is in A on entry — but we need A=SP after TSC, so first
|
||||||
// stash the size to DP scratch.
|
// stash the size to DP scratch.
|
||||||
MCInst Sta1; Sta1.setOpcode(W65816::STA_DP);
|
MCInst Sta1; Sta1.setOpcode(W65816::STA_DP);
|
||||||
Sta1.addOperand(MCOperand::createImm(0xE0));
|
Sta1.addOperand(MCOperand::createImm(kDpScratch0));
|
||||||
EmitToStreamer(*OutStreamer, Sta1);
|
EmitToStreamer(*OutStreamer, Sta1);
|
||||||
MCInst Tsc; Tsc.setOpcode(W65816::TSC); EmitToStreamer(*OutStreamer, Tsc);
|
MCInst Tsc; Tsc.setOpcode(W65816::TSC); EmitToStreamer(*OutStreamer, Tsc);
|
||||||
MCInst Sec; Sec.setOpcode(W65816::SEC); EmitToStreamer(*OutStreamer, Sec);
|
MCInst Sec; Sec.setOpcode(W65816::SEC); EmitToStreamer(*OutStreamer, Sec);
|
||||||
MCInst Sbc; Sbc.setOpcode(W65816::SBC_DP);
|
MCInst Sbc; Sbc.setOpcode(W65816::SBC_DP);
|
||||||
Sbc.addOperand(MCOperand::createImm(0xE0));
|
Sbc.addOperand(MCOperand::createImm(kDpScratch0));
|
||||||
EmitToStreamer(*OutStreamer, Sbc);
|
EmitToStreamer(*OutStreamer, Sbc);
|
||||||
MCInst Tcs; Tcs.setOpcode(W65816::TCS); EmitToStreamer(*OutStreamer, Tcs);
|
MCInst Tcs; Tcs.setOpcode(W65816::TCS); EmitToStreamer(*OutStreamer, Tcs);
|
||||||
MCInst Ina; Ina.setOpcode(W65816::INA); EmitToStreamer(*OutStreamer, Ina);
|
MCInst Ina; Ina.setOpcode(W65816::INA); EmitToStreamer(*OutStreamer, Ina);
|
||||||
|
|
|
||||||
|
|
@ -162,8 +162,7 @@ static unsigned estimateDistance(MachineFunction &MF,
|
||||||
// sliced after each non-final conditional, so every MBB ends up with
|
// sliced after each non-final conditional, so every MBB ends up with
|
||||||
// at most one conditional terminator. Returns true if any MBB was
|
// at most one conditional terminator. Returns true if any MBB was
|
||||||
// split.
|
// split.
|
||||||
static bool splitMultiBranchMBBs(MachineFunction &MF,
|
static bool splitMultiBranchMBBs(MachineFunction &MF) {
|
||||||
const TargetInstrInfo *TII) {
|
|
||||||
bool Changed = false;
|
bool Changed = false;
|
||||||
// Snapshot MBBs first (we mutate the list during iteration).
|
// Snapshot MBBs first (we mutate the list during iteration).
|
||||||
SmallVector<MachineBasicBlock *, 16> MBBs;
|
SmallVector<MachineBasicBlock *, 16> MBBs;
|
||||||
|
|
@ -233,7 +232,6 @@ static bool splitMultiBranchMBBs(MachineFunction &MF,
|
||||||
// see if another split is needed (multi-multi-branch case).
|
// see if another split is needed (multi-multi-branch case).
|
||||||
Changed = true;
|
Changed = true;
|
||||||
Sliced = true;
|
Sliced = true;
|
||||||
(void)TII; // unused for now
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Changed;
|
return Changed;
|
||||||
|
|
@ -354,7 +352,7 @@ bool W65816BranchExpand::runOnMachineFunction(MachineFunction &MF) {
|
||||||
AnyChanged |= dropDeadConditionalsToBRATarget(MF);
|
AnyChanged |= dropDeadConditionalsToBRATarget(MF);
|
||||||
|
|
||||||
// Step 1: split multi-conditional-terminator MBBs.
|
// Step 1: split multi-conditional-terminator MBBs.
|
||||||
AnyChanged |= splitMultiBranchMBBs(MF, TII);
|
AnyChanged |= splitMultiBranchMBBs(MF);
|
||||||
|
|
||||||
// Step 2: iterate to fixed-point. Each expansion adds 3 bytes
|
// Step 2: iterate to fixed-point. Each expansion adds 3 bytes
|
||||||
// (bridge BRA), which may push another previously-OK branch over
|
// (bridge BRA), which may push another previously-OK branch over
|
||||||
|
|
|
||||||
|
|
@ -68,10 +68,6 @@ char W65816I32IncFold::ID = 0;
|
||||||
INITIALIZE_PASS(W65816I32IncFold, DEBUG_TYPE,
|
INITIALIZE_PASS(W65816I32IncFold, DEBUG_TYPE,
|
||||||
"W65816 i32 += 1 fold", false, false)
|
"W65816 i32 += 1 fold", false, false)
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
void initializeW65816I32IncFoldPass(PassRegistry &);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Match the 6-instruction sequence; returns the post-pattern iterator
|
// Match the 6-instruction sequence; returns the post-pattern iterator
|
||||||
// and fills in the lo/hi stack-rel offsets if the pattern matches.
|
// and fills in the lo/hi stack-rel offsets if the pattern matches.
|
||||||
// Tolerates intervening TAX/TXA pairs (which regalloc inserts as
|
// Tolerates intervening TAX/TXA pairs (which regalloc inserts as
|
||||||
|
|
|
||||||
|
|
@ -87,10 +87,6 @@ char W65816ImgCalleeSave::ID = 0;
|
||||||
INITIALIZE_PASS(W65816ImgCalleeSave, DEBUG_TYPE,
|
INITIALIZE_PASS(W65816ImgCalleeSave, DEBUG_TYPE,
|
||||||
"W65816 IMG8..IMG15 callee save/restore", false, false)
|
"W65816 IMG8..IMG15 callee save/restore", false, false)
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
void initializeW65816ImgCalleeSavePass(PassRegistry &);
|
|
||||||
}
|
|
||||||
|
|
||||||
FunctionPass *llvm::createW65816ImgCalleeSave() {
|
FunctionPass *llvm::createW65816ImgCalleeSave() {
|
||||||
return new W65816ImgCalleeSave();
|
return new W65816ImgCalleeSave();
|
||||||
}
|
}
|
||||||
|
|
@ -188,7 +184,7 @@ bool W65816ImgCalleeSave::runOnMachineFunction(MachineFunction &MF) {
|
||||||
//
|
//
|
||||||
// copyPhysReg lowers `COPY $imgN = $a` to `STA_DP imm:0xCx`, so we
|
// copyPhysReg lowers `COPY $imgN = $a` to `STA_DP imm:0xCx`, so we
|
||||||
// check both the physreg-DEF form AND the DP-imm-store form.
|
// check both the physreg-DEF form AND the DP-imm-store form.
|
||||||
bool WrittenSlot[8] = {false};
|
bool UsedSlot[8] = {false};
|
||||||
bool AnyWritten = false;
|
bool AnyWritten = false;
|
||||||
for (auto &MBB : MF) {
|
for (auto &MBB : MF) {
|
||||||
for (auto &MI : MBB) {
|
for (auto &MI : MBB) {
|
||||||
|
|
@ -197,7 +193,7 @@ bool W65816ImgCalleeSave::runOnMachineFunction(MachineFunction &MF) {
|
||||||
if (!MO.isReg() || MO.getReg() == 0 || !MO.isDef()) continue;
|
if (!MO.isReg() || MO.getReg() == 0 || !MO.isDef()) continue;
|
||||||
int idx = classifyImgReg(MO.getReg());
|
int idx = classifyImgReg(MO.getReg());
|
||||||
if (idx >= 0) {
|
if (idx >= 0) {
|
||||||
WrittenSlot[idx] = true;
|
UsedSlot[idx] = true;
|
||||||
AnyWritten = true;
|
AnyWritten = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -205,15 +201,12 @@ bool W65816ImgCalleeSave::runOnMachineFunction(MachineFunction &MF) {
|
||||||
auto [idx, mode] = classifyDpImmAsImg(MI);
|
auto [idx, mode] = classifyDpImmAsImg(MI);
|
||||||
if (idx >= 0 &&
|
if (idx >= 0 &&
|
||||||
(mode == DpAccess::Write || mode == DpAccess::ReadWrite)) {
|
(mode == DpAccess::Write || mode == DpAccess::ReadWrite)) {
|
||||||
WrittenSlot[idx] = true;
|
UsedSlot[idx] = true;
|
||||||
AnyWritten = true;
|
AnyWritten = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!AnyWritten) return false;
|
if (!AnyWritten) return false;
|
||||||
// Rename for downstream Step 2/3/4 readability — they use UsedSlot.
|
|
||||||
bool (&UsedSlot)[8] = WrittenSlot;
|
|
||||||
(void)AnyWritten;
|
|
||||||
|
|
||||||
// Step 2: allocate one frame slot per used IMG. Size = 2 bytes (each
|
// Step 2: allocate one frame slot per used IMG. Size = 2 bytes (each
|
||||||
// Img16 holds a 16-bit value). Mark as a spill slot so PEI accounts
|
// Img16 holds a 16-bit value). Mark as a spill slot so PEI accounts
|
||||||
|
|
|
||||||
|
|
@ -215,14 +215,10 @@ namespace llvm {
|
||||||
class W65816Layer2StampPass : public PassInfoMixin<W65816Layer2StampPass> {
|
class W65816Layer2StampPass : public PassInfoMixin<W65816Layer2StampPass> {
|
||||||
public:
|
public:
|
||||||
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
|
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
|
||||||
bool Changed = false;
|
|
||||||
for (Function &F : M) {
|
for (Function &F : M) {
|
||||||
Changed |= stampFunction(F);
|
stampFunction(F);
|
||||||
}
|
}
|
||||||
if (!Changed) {
|
// We only add a function attribute, no IR-level effects. Preserve
|
||||||
return PreservedAnalyses::all();
|
|
||||||
}
|
|
||||||
// We only added a function attribute, no IR-level effects. Preserve
|
|
||||||
// everything; the inliner et al. will copy the attribute on inline.
|
// everything; the inliner et al. will copy the attribute on inline.
|
||||||
return PreservedAnalyses::all();
|
return PreservedAnalyses::all();
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -189,7 +189,6 @@ bool W65816NarrowI32Mul::runOnFunction(Function &F) {
|
||||||
// low-16 bits as the original i32 add at every observable point
|
// low-16 bits as the original i32 add at every observable point
|
||||||
// (the back-edge value can wrap on the exit iteration but is
|
// (the back-edge value can wrap on the exit iteration but is
|
||||||
// never observed — exit takes the trip-end branch first).
|
// never observed — exit takes the trip-end branch first).
|
||||||
bool NarrowedAny = false;
|
|
||||||
SmallVector<PHINode *, 4> PhiWorklist;
|
SmallVector<PHINode *, 4> PhiWorklist;
|
||||||
for (BasicBlock &BB : F) {
|
for (BasicBlock &BB : F) {
|
||||||
for (PHINode &PN : BB.phis()) {
|
for (PHINode &PN : BB.phis()) {
|
||||||
|
|
@ -282,7 +281,6 @@ bool W65816NarrowI32Mul::runOnFunction(Function &F) {
|
||||||
Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
|
Incr->replaceAllUsesWith(UndefValue::get(Incr->getType()));
|
||||||
Incr->eraseFromParent();
|
Incr->eraseFromParent();
|
||||||
PN->eraseFromParent();
|
PN->eraseFromParent();
|
||||||
NarrowedAny = true;
|
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -41,6 +41,13 @@
|
||||||
|
|
||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
|
|
||||||
|
// DP scratch byte used to park X when the negative-Y inserter needs to
|
||||||
|
// route through TAX/TXA. Lives in the project-wide $E0..$DF scratch
|
||||||
|
// range; $E0 is reserved for ADJCALLSTACKUP's A-preserve so we use
|
||||||
|
// $E2 here. Coordinate with W65816AsmPrinter / W65816ISelLowering /
|
||||||
|
// W65816RegisterInfo if the layout changes.
|
||||||
|
static constexpr unsigned kDpScratchX = 0xE2;
|
||||||
|
|
||||||
#define DEBUG_TYPE "w65816-neg-y-indy"
|
#define DEBUG_TYPE "w65816-neg-y-indy"
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
@ -110,9 +117,9 @@ bool W65816NegYIndY::runOnMachineFunction(MachineFunction &MF) {
|
||||||
if (XLive || xDef) break;
|
if (XLive || xDef) break;
|
||||||
}
|
}
|
||||||
if (XLive) {
|
if (XLive) {
|
||||||
// Save X to DP $E2 (don't use $E0 — that's the A-preserve
|
// Save X to DP kDpScratchX ($E2) -- $E0 is reserved as the
|
||||||
// slot in call-frame teardown and may be live).
|
// A-preserve slot in call-frame teardown and may be live.
|
||||||
BuildMI(MBB, MI, DL, TII->get(W65816::STX_DP)).addImm(0xE2);
|
BuildMI(MBB, MI, DL, TII->get(W65816::STX_DP)).addImm(kDpScratchX);
|
||||||
}
|
}
|
||||||
if (IsLDA) {
|
if (IsLDA) {
|
||||||
// LDA disp,S ; CLC ; ADC #neg ; TAX ; LDA $0000,X
|
// LDA disp,S ; CLC ; ADC #neg ; TAX ; LDA $0000,X
|
||||||
|
|
@ -154,7 +161,7 @@ bool W65816NegYIndY::runOnMachineFunction(MachineFunction &MF) {
|
||||||
}
|
}
|
||||||
if (XLive) {
|
if (XLive) {
|
||||||
// Restore X from DP $E2.
|
// Restore X from DP $E2.
|
||||||
BuildMI(MBB, MI, DL, TII->get(W65816::LDX_DP)).addImm(0xE2);
|
BuildMI(MBB, MI, DL, TII->get(W65816::LDX_DP)).addImm(kDpScratchX);
|
||||||
}
|
}
|
||||||
// Erase original LDY and the (sr,s),Y op.
|
// Erase original LDY and the (sr,s),Y op.
|
||||||
if (LastLDY) { LastLDY->eraseFromParent(); LastLDY = nullptr; }
|
if (LastLDY) { LastLDY->eraseFromParent(); LastLDY = nullptr; }
|
||||||
|
|
|
||||||
|
|
@ -99,64 +99,29 @@ FunctionPass *llvm::createW65816PromoteFiToImg() {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Returns the operand index of the FrameIndex for the given FI pseudo
|
|
||||||
// opcode, or -1 if this opcode isn't a promotable FI carrier.
|
|
||||||
static int getFiOperandIdx(unsigned Opc) {
|
|
||||||
switch (Opc) {
|
|
||||||
case W65816::LDAfi: return 1;
|
|
||||||
case W65816::STAfi: return 1;
|
|
||||||
case W65816::CMPfi: return 1;
|
|
||||||
case W65816::ADCfi:
|
|
||||||
case W65816::SBCfi:
|
|
||||||
case W65816::ANDfi:
|
|
||||||
case W65816::ORAfi:
|
|
||||||
case W65816::EORfi: return 2;
|
|
||||||
default: return -1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Map a promotable FI pseudo to the corresponding DP MC opcode.
|
|
||||||
static unsigned getDpOpcode(unsigned Opc) {
|
|
||||||
switch (Opc) {
|
|
||||||
case W65816::LDAfi: return W65816::LDA_DP;
|
|
||||||
case W65816::STAfi: return W65816::STA_DP;
|
|
||||||
case W65816::CMPfi: return W65816::CMP_DP;
|
|
||||||
case W65816::ADCfi: return W65816::ADC_DP;
|
|
||||||
case W65816::SBCfi: return W65816::SBC_DP;
|
|
||||||
case W65816::ANDfi: return W65816::AND_DP;
|
|
||||||
case W65816::ORAfi: return W65816::ORA_DP;
|
|
||||||
case W65816::EORfi: return W65816::EOR_DP;
|
|
||||||
default: return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// IMG8..IMG15 sit at DP addresses 0xC0, 0xC2, ..., 0xCE. IMG0..IMG7
|
|
||||||
// are at 0xD0..0xDE. Returns the DP byte for IMGn.
|
|
||||||
static uint8_t dpAddrForImg(unsigned ImgIdx) {
|
|
||||||
assert(ImgIdx < 16 && "IMG index out of range");
|
|
||||||
if (ImgIdx < 8) return 0xD0 + 2 * ImgIdx;
|
|
||||||
return 0xC0 + 2 * (ImgIdx - 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
bool W65816PromoteFiToImg::runOnMachineFunction(MachineFunction &MF) {
|
bool W65816PromoteFiToImg::runOnMachineFunction(MachineFunction &MF) {
|
||||||
// DISABLED again 2026-05-13 (3rd-attempt write-up). Two new findings:
|
// DISABLED 2026-05-13 (3rd-attempt write-up). Two findings blocked
|
||||||
|
// re-enable:
|
||||||
// 1. With kMaxPromote=2 and IMG0..7 (caller-save, skip ImgCalleeSave),
|
// 1. With kMaxPromote=2 and IMG0..7 (caller-save, skip ImgCalleeSave),
|
||||||
// sumSquares regressed 56 → 72 inst because the FIs picked by
|
// sumSquares regressed 56 -> 72 inst because the FIs picked by
|
||||||
// access-count (fi#2, fi#3) are intermediate spill temps, not
|
// access-count are intermediate spill temps, not the i32-accumulator
|
||||||
// the i32-accumulator's halves (which are different FIs). The
|
// halves (which are different FIs). Loop body ends up using BOTH
|
||||||
// loop body ends up using BOTH IMG and stack slots for related
|
// IMG and stack slots for related values.
|
||||||
// values.
|
// 2. To pick the RIGHT FIs (those corresponding to PHI-cycled values
|
||||||
// 2. To pick the RIGHT FIs (those corresponding to PHI-cycled
|
// like the i32 accumulator), we need either IR-level analysis
|
||||||
// values like the i32 accumulator), we need either:
|
// BEFORE FI assignment, or post-RA dataflow analysis to identify
|
||||||
// (a) IR-level analysis BEFORE FI assignment, or
|
// long-lived FIs (active across the loop back-edge with no def/use
|
||||||
// (b) Post-RA dataflow analysis to identify "long-lived" FIs
|
// boundary).
|
||||||
// (active across the loop back-edge with no def/use boundary).
|
// The pass framework is retained so the pipeline slot stays documented;
|
||||||
// This is the next blocker. Disabled until either (a) or (b) is
|
// see git history for the disabled prototype body.
|
||||||
// implemented.
|
(void)MF;
|
||||||
return false;
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// Disabled prototype body retained for reference; see comment above.
|
||||||
|
bool W65816PromoteFiToImg::runOnMachineFunctionDisabled(MachineFunction &MF) {
|
||||||
if (skipFunction(MF.getFunction())) return false;
|
if (skipFunction(MF.getFunction())) return false;
|
||||||
const W65816Subtarget &STI = MF.getSubtarget<W65816Subtarget>();
|
const W65816Subtarget &STI = MF.getSubtarget<W65816Subtarget>();
|
||||||
const W65816InstrInfo *TII = STI.getInstrInfo();
|
const W65816InstrInfo *TII = STI.getInstrInfo();
|
||||||
|
|
@ -396,3 +361,4 @@ bool W65816PromoteFiToImg::runOnMachineFunction(MachineFunction &MF) {
|
||||||
}
|
}
|
||||||
return Changed;
|
return Changed;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,9 @@ using namespace llvm;
|
||||||
|
|
||||||
#define DEBUG_TYPE "w65816-sep-rep-cleanup"
|
#define DEBUG_TYPE "w65816-sep-rep-cleanup"
|
||||||
|
|
||||||
|
// W65816 processor status M-bit mask (set/clear via SEP/REP #$20).
|
||||||
|
static constexpr int kMBit = 0x20;
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
class W65816SepRepCleanup : public MachineFunctionPass {
|
class W65816SepRepCleanup : public MachineFunctionPass {
|
||||||
|
|
@ -276,7 +279,7 @@ bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) {
|
||||||
for (auto It = MBB.begin(); It != MBB.end(); ++It) {
|
for (auto It = MBB.begin(); It != MBB.end(); ++It) {
|
||||||
if (It->getOpcode() != W65816::SEP) continue;
|
if (It->getOpcode() != W65816::SEP) continue;
|
||||||
if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) continue;
|
if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) continue;
|
||||||
if (It->getOperand(0).getImm() != 0x20) continue;
|
if (It->getOperand(0).getImm() != kMBit) continue;
|
||||||
// Walk forward looking for LDAi8imm before any STAfi_indY
|
// Walk forward looking for LDAi8imm before any STAfi_indY
|
||||||
// or REP at this nesting level.
|
// or REP at this nesting level.
|
||||||
auto Walker = std::next(It);
|
auto Walker = std::next(It);
|
||||||
|
|
@ -312,7 +315,7 @@ bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) {
|
||||||
if (Back->getOpcode() == W65816::SEP &&
|
if (Back->getOpcode() == W65816::SEP &&
|
||||||
Back->getNumOperands() >= 1 &&
|
Back->getNumOperands() >= 1 &&
|
||||||
Back->getOperand(0).isImm() &&
|
Back->getOperand(0).isImm() &&
|
||||||
Back->getOperand(0).getImm() == 0x20) {
|
Back->getOperand(0).getImm() == kMBit) {
|
||||||
OuterSep = &*Back;
|
OuterSep = &*Back;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
@ -409,7 +412,7 @@ bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) {
|
||||||
if (Op1 != W65816::REP && Op1 != W65816::SEP) continue;
|
if (Op1 != W65816::REP && Op1 != W65816::SEP) continue;
|
||||||
if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) continue;
|
if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) continue;
|
||||||
int Imm1 = It->getOperand(0).getImm();
|
int Imm1 = It->getOperand(0).getImm();
|
||||||
if (Imm1 != 0x20) continue; // M-bit only
|
if (Imm1 != kMBit) continue; // M-bit only
|
||||||
// Walk forward across mode-neutral ops looking for the matching
|
// Walk forward across mode-neutral ops looking for the matching
|
||||||
// opposite toggle. Bail at calls, asm, ALU ops on A, etc.
|
// opposite toggle. Bail at calls, asm, ALU ops on A, etc.
|
||||||
unsigned WantOp = (Op1 == W65816::REP) ? W65816::SEP : W65816::REP;
|
unsigned WantOp = (Op1 == W65816::REP) ? W65816::SEP : W65816::REP;
|
||||||
|
|
@ -1119,361 +1122,12 @@ bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Store forwarding (disabled — CRC32 regressed and I couldn't
|
// Three prototype peepholes were tried here and removed once shown
|
||||||
// nail down the safety hole in time). Even with PHP-wrap guards
|
// to regress benchmarks; design notes in
|
||||||
// and SP-modifier bails, the first fire (in memmove) silently
|
// feedback_close_gap_attempts_round2.md / feedback_cmp_zero_elim.md:
|
||||||
// miscompiles something that CRC32 later depends on. Pattern
|
// - PHI store-forwarding (CRC32 regression / memmove safety hole).
|
||||||
// is sound; safety analysis isn't complete. See
|
// - Redundant CMP #0 elimination (VLA sum_n carry-flag bookkeeping).
|
||||||
// feedback_close_gap_attempts_round2.md for details.
|
// - Narrow PHI-copy slot collapse (qsort regression).
|
||||||
#if 0
|
|
||||||
// Store forwarding for PHI memory copies. Pattern (sumSquares
|
|
||||||
// loop body):
|
|
||||||
//
|
|
||||||
// STA X,s ; A → slot X (some intermediate result)
|
|
||||||
// [code that modifies A but doesn't touch slot X or slot Y]
|
|
||||||
// LDA X,s ; reload A from slot X
|
|
||||||
// STA Y,s ; A → slot Y (the PHI copy)
|
|
||||||
//
|
|
||||||
// Transform: insert `STA Y,s` right after the first `STA X,s` (A
|
|
||||||
// still holds the same value at that point), then drop the LDA-
|
|
||||||
// STA pair. Net: -1 inst per pattern occurrence.
|
|
||||||
//
|
|
||||||
// Safety constraints (all between STA X and the LDA-STA pair, in
|
|
||||||
// the same MBB, in straight-line code):
|
|
||||||
// - No instruction writes slot X (else the LDA would see a
|
|
||||||
// different value than the original STA).
|
|
||||||
// - No instruction reads OR writes slot Y (else our early STA Y
|
|
||||||
// would be observed mid-flight with a different value than
|
|
||||||
// before, or our inserted store would be overwritten and the
|
|
||||||
// intervening read of Y in the original would have seen the
|
|
||||||
// overwrite).
|
|
||||||
// - No call / inline asm / branch (conservatively: those can
|
|
||||||
// touch memory we don't model).
|
|
||||||
{
|
|
||||||
auto isStackRelMC2 = [](unsigned Op) {
|
|
||||||
return Op == W65816::LDA_StackRel || Op == W65816::STA_StackRel ||
|
|
||||||
Op == W65816::ADC_StackRel || Op == W65816::SBC_StackRel ||
|
|
||||||
Op == W65816::AND_StackRel || Op == W65816::ORA_StackRel ||
|
|
||||||
Op == W65816::EOR_StackRel || Op == W65816::CMP_StackRel;
|
|
||||||
};
|
|
||||||
auto srAccess2 = [&](const MachineInstr &MI, int64_t &Off) -> bool {
|
|
||||||
if (!isStackRelMC2(MI.getOpcode())) return false;
|
|
||||||
if (MI.getNumOperands() < 1 || !MI.getOperand(0).isImm()) return false;
|
|
||||||
Off = MI.getOperand(0).getImm();
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
auto isStaSr = [](const MachineInstr &MI) {
|
|
||||||
return MI.getOpcode() == W65816::STA_StackRel;
|
|
||||||
};
|
|
||||||
auto isLdaSr = [](const MachineInstr &MI) {
|
|
||||||
return MI.getOpcode() == W65816::LDA_StackRel;
|
|
||||||
};
|
|
||||||
SmallVector<MachineInstr *, 4> ToErase;
|
|
||||||
SmallVector<std::tuple<MachineInstr *, int64_t>, 4> ToInsert;
|
|
||||||
static int g_fireLimit = -1;
|
|
||||||
static int g_fireCount = 0;
|
|
||||||
static bool initd = false;
|
|
||||||
if (!initd) {
|
|
||||||
if (const char *e = getenv("STORE_FWD_LIMIT")) g_fireLimit = atoi(e);
|
|
||||||
initd = true;
|
|
||||||
}
|
|
||||||
for (MachineBasicBlock &MBB : MF) {
|
|
||||||
for (auto It = MBB.begin(); It != MBB.end(); ++It) {
|
|
||||||
if (!isStaSr(*It)) continue;
|
|
||||||
int64_t X;
|
|
||||||
if (!srAccess2(*It, X)) continue;
|
|
||||||
MachineInstr *StaX = &*It;
|
|
||||||
// Check if StaX is INSIDE an open PHP/PLP wrap. In that case
|
|
||||||
// its operand offset has been pre-bumped by +1, and inserting
|
|
||||||
// a sibling STA Y immediately after writes at the WRONG slot
|
|
||||||
// (the un-bumped Y). Walk backward: if we find a PHP without
|
|
||||||
// a matching PLP first, bail.
|
|
||||||
{
|
|
||||||
bool insideWrap = false;
|
|
||||||
int depth = 0;
|
|
||||||
auto B = It;
|
|
||||||
while (B != MBB.begin()) {
|
|
||||||
--B;
|
|
||||||
if (B->getOpcode() == W65816::PLP) depth++;
|
|
||||||
else if (B->getOpcode() == W65816::PHP) {
|
|
||||||
if (depth > 0) depth--;
|
|
||||||
else { insideWrap = true; break; }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (insideWrap) continue;
|
|
||||||
}
|
|
||||||
// Walk forward looking for LDA X ; STA Y. Conservative bail
|
|
||||||
// on any non-tracked memory op (indirect pointer access,
|
|
||||||
// DP/abs ops, etc.) which could alias slot Y via memory.
|
|
||||||
bool ok = true;
|
|
||||||
int64_t Y = -1;
|
|
||||||
MachineInstr *LdaX = nullptr;
|
|
||||||
MachineInstr *StaY = nullptr;
|
|
||||||
for (auto Walker = std::next(It); Walker != MBB.end(); ++Walker) {
|
|
||||||
if (Walker->isDebugInstr()) continue;
|
|
||||||
if (Walker->isCall() || Walker->isInlineAsm() ||
|
|
||||||
Walker->isBranch() || Walker->isReturn()) {
|
|
||||||
ok = false; break;
|
|
||||||
}
|
|
||||||
// Found LDA X?
|
|
||||||
int64_t Off;
|
|
||||||
if (isLdaSr(*Walker) && srAccess2(*Walker, Off) && Off == X) {
|
|
||||||
LdaX = &*Walker;
|
|
||||||
auto Next = std::next(Walker);
|
|
||||||
while (Next != MBB.end() && Next->isDebugInstr()) ++Next;
|
|
||||||
if (Next == MBB.end() || !isStaSr(*Next) ||
|
|
||||||
!srAccess2(*Next, Y) || Y == X) {
|
|
||||||
ok = false;
|
|
||||||
} else {
|
|
||||||
StaY = &*Next;
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
// Stack-rel access to X (write or read): bail.
|
|
||||||
if (srAccess2(*Walker, Off) && Off == X) {
|
|
||||||
ok = false; break;
|
|
||||||
}
|
|
||||||
// Any memory-touching op that's NOT a tracked stack-rel
|
|
||||||
// access — bail. Indirect pointer stores/loads (DPIndY /
|
|
||||||
// DPIndLong / abs / etc.) could alias slot Y via a pointer
|
|
||||||
// we can't trace, and the safety check below would miss it.
|
|
||||||
if ((Walker->mayLoad() || Walker->mayStore()) &&
|
|
||||||
!isStackRelMC2(Walker->getOpcode())) {
|
|
||||||
ok = false; break;
|
|
||||||
}
|
|
||||||
// SP-modifying ops shift the stack-rel addressing window —
|
|
||||||
// a later `lda X, s` reads a DIFFERENT byte than the earlier
|
|
||||||
// `sta X, s` (or worse, the new stack pointer points into
|
|
||||||
// saved P/retaddr). Bail on TCS (direct SP write) and on
|
|
||||||
// any stack push/pop (PHx/PLx/PEA/PEI/COP/BRK). Also bail
|
|
||||||
// on PHP/PLP because the wrap pass already bumped in-wrap
|
|
||||||
// stack-rel ops by +1 — our inserted STA after STA X writes
|
|
||||||
// at the un-bumped offset which gets the WRONG slot.
|
|
||||||
{
|
|
||||||
unsigned WO = Walker->getOpcode();
|
|
||||||
if (WO == W65816::TCS || WO == W65816::PHA ||
|
|
||||||
WO == W65816::PLA || WO == W65816::PHX ||
|
|
||||||
WO == W65816::PLX || WO == W65816::PHY ||
|
|
||||||
WO == W65816::PLY || WO == W65816::PHP ||
|
|
||||||
WO == W65816::PLP || WO == W65816::PHB ||
|
|
||||||
WO == W65816::PLB || WO == W65816::PHD ||
|
|
||||||
WO == W65816::PLD || WO == W65816::PHK ||
|
|
||||||
WO == W65816::PEA || WO == W65816::PEI_DP) {
|
|
||||||
ok = false; break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!ok || !LdaX || !StaY) continue;
|
|
||||||
if (g_fireLimit >= 0 && g_fireCount >= g_fireLimit) continue;
|
|
||||||
g_fireCount++;
|
|
||||||
errs() << "SF FIRE " << g_fireCount << " in " << MF.getName()
|
|
||||||
<< " MBB " << MBB.getNumber()
|
|
||||||
<< " X=" << X << " Y=" << StaY->getOperand(0).getImm()
|
|
||||||
<< "\n";
|
|
||||||
// Now re-walk from std::next(It) up to LdaX and verify no
|
|
||||||
// access to slot Y in that gap.
|
|
||||||
ok = true;
|
|
||||||
for (auto W2 = std::next(It); W2 != LdaX->getIterator(); ++W2) {
|
|
||||||
if (W2->isDebugInstr()) continue;
|
|
||||||
int64_t Off;
|
|
||||||
if (srAccess2(*W2, Off) && Off == Y) { ok = false; break; }
|
|
||||||
}
|
|
||||||
if (!ok) continue;
|
|
||||||
// Safe to apply: schedule the StaY-after-StaX insert, and
|
|
||||||
// erase LdaX and StaY.
|
|
||||||
ToInsert.push_back({StaX, Y});
|
|
||||||
ToErase.push_back(LdaX);
|
|
||||||
ToErase.push_back(StaY);
|
|
||||||
Changed = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Apply (insertions first; iterators stay valid through erase).
|
|
||||||
for (auto &P : ToInsert) {
|
|
||||||
MachineInstr *StaX = std::get<0>(P);
|
|
||||||
int64_t Y = std::get<1>(P);
|
|
||||||
MachineBasicBlock *MBB = StaX->getParent();
|
|
||||||
DebugLoc DL = StaX->getDebugLoc();
|
|
||||||
auto NextIt = std::next(StaX->getIterator());
|
|
||||||
BuildMI(*MBB, NextIt, DL, TII.get(W65816::STA_StackRel))
|
|
||||||
.addImm(Y);
|
|
||||||
}
|
|
||||||
for (MachineInstr *MI : ToErase) MI->eraseFromParent();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
// (Redundant CMP #0 elimination — disabled, hit VLA sum_n
|
|
||||||
// regression. Carry-flag bookkeeping across the CMP turned out to
|
|
||||||
// have more cases than my forward-walk modeled. See
|
|
||||||
// feedback_cmp_zero_elim.md.)
|
|
||||||
#if 0
|
|
||||||
{
|
|
||||||
auto isNZSetOnA = [](unsigned Op) {
|
|
||||||
switch (Op) {
|
|
||||||
case W65816::DEA_PSEUDO: case W65816::INA_PSEUDO:
|
|
||||||
case W65816::ADC_StackRel: case W65816::ADC_DP: case W65816::ADC_Imm16:
|
|
||||||
case W65816::SBC_StackRel: case W65816::SBC_DP: case W65816::SBC_Imm16:
|
|
||||||
case W65816::AND_StackRel: case W65816::AND_DP: case W65816::AND_Imm16:
|
|
||||||
case W65816::ORA_StackRel: case W65816::ORA_DP: case W65816::ORA_Imm16:
|
|
||||||
case W65816::EOR_StackRel: case W65816::EOR_DP: case W65816::EOR_Imm16:
|
|
||||||
case W65816::LDA_StackRel: case W65816::LDA_DP:
|
|
||||||
case W65816::LDAi16imm: case W65816::LDA_Imm16:
|
|
||||||
case W65816::TXA: case W65816::TYA:
|
|
||||||
case W65816::ADCi16imm: case W65816::ADCEi16imm:
|
|
||||||
case W65816::SBCi16imm: case W65816::SBCEi16imm:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
auto isCmpZero = [](const MachineInstr &MI) {
|
|
||||||
if (MI.getOpcode() != W65816::CMPi16imm) return false;
|
|
||||||
// Operand layout: lhs (Acc16), imm. Find the imm.
|
|
||||||
for (const MachineOperand &MO : MI.operands()) {
|
|
||||||
if (MO.isImm()) return MO.getImm() == 0;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
auto modifiesA = [](const MachineInstr &MI) {
|
|
||||||
for (const MachineOperand &MO : MI.operands()) {
|
|
||||||
if (MO.isReg() && MO.getReg() == W65816::A && MO.isDef())
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
auto readsC = [](const MachineInstr &MI) {
|
|
||||||
// We don't model individual flag bits; approximate by checking
|
|
||||||
// if the MI reads $p AND is one of the carry-consuming ops.
|
|
||||||
unsigned Op = MI.getOpcode();
|
|
||||||
switch (Op) {
|
|
||||||
case W65816::ADC_StackRel: case W65816::ADC_DP: case W65816::ADC_Imm16:
|
|
||||||
case W65816::SBC_StackRel: case W65816::SBC_DP: case W65816::SBC_Imm16:
|
|
||||||
case W65816::ADCEi16imm: case W65816::SBCEi16imm:
|
|
||||||
case W65816::BCC: case W65816::BCS:
|
|
||||||
case W65816::ROL_A: case W65816::ROR_A:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
SmallVector<MachineInstr *, 4> CmpsToErase;
|
|
||||||
for (MachineBasicBlock &MBB : MF) {
|
|
||||||
for (MachineInstr &MI : MBB) {
|
|
||||||
if (!isCmpZero(MI)) continue;
|
|
||||||
// Walk backward, skipping flag-preserving instructions.
|
|
||||||
bool foundProducer = false;
|
|
||||||
auto Back = MI.getIterator();
|
|
||||||
while (Back != MBB.begin()) {
|
|
||||||
--Back;
|
|
||||||
if (Back->isDebugInstr()) continue;
|
|
||||||
if (Back->isCall() || Back->isInlineAsm()) break;
|
|
||||||
if (modifiesA(*Back)) {
|
|
||||||
foundProducer = isNZSetOnA(Back->getOpcode());
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
bool defsP = false;
|
|
||||||
for (const MachineOperand &MO : Back->operands()) {
|
|
||||||
if (MO.isReg() && MO.getReg() == W65816::P && MO.isDef()) {
|
|
||||||
defsP = true; break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (defsP) break;
|
|
||||||
}
|
|
||||||
if (!foundProducer) continue;
|
|
||||||
// Walk FORWARD from CMP: until the next C-defining MI, no MI
|
|
||||||
// reads C.
|
|
||||||
bool cConsumed = false;
|
|
||||||
for (auto Fwd = std::next(MI.getIterator()); Fwd != MBB.end(); ++Fwd) {
|
|
||||||
if (Fwd->isDebugInstr()) continue;
|
|
||||||
if (readsC(*Fwd)) { cConsumed = true; break; }
|
|
||||||
// Next def of $p: subsequent reads aren't ours.
|
|
||||||
bool defsP = false;
|
|
||||||
for (const MachineOperand &MO : Fwd->operands()) {
|
|
||||||
if (MO.isReg() && MO.getReg() == W65816::P && MO.isDef()) {
|
|
||||||
defsP = true; break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (defsP) break;
|
|
||||||
}
|
|
||||||
if (cConsumed) continue;
|
|
||||||
CmpsToErase.push_back(&MI);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (MachineInstr *MI : CmpsToErase) MI->eraseFromParent();
|
|
||||||
if (!CmpsToErase.empty()) Changed = true;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
// (Narrow PHI-copy slot collapse — disabled, qsort regression.)
|
|
||||||
#if 0
|
|
||||||
{
|
|
||||||
auto isStackRelMC2 = [](unsigned Op) {
|
|
||||||
return Op == W65816::LDA_StackRel || Op == W65816::STA_StackRel ||
|
|
||||||
Op == W65816::ADC_StackRel || Op == W65816::SBC_StackRel ||
|
|
||||||
Op == W65816::AND_StackRel || Op == W65816::ORA_StackRel ||
|
|
||||||
Op == W65816::EOR_StackRel || Op == W65816::CMP_StackRel;
|
|
||||||
};
|
|
||||||
auto srAccess2 = [&](const MachineInstr &MI, int64_t &Off) {
|
|
||||||
if (!isStackRelMC2(MI.getOpcode())) return false;
|
|
||||||
if (MI.getNumOperands() < 1 || !MI.getOperand(0).isImm()) return false;
|
|
||||||
Off = MI.getOperand(0).getImm();
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
DenseMap<int64_t, unsigned> Refs;
|
|
||||||
DenseMap<int64_t, MachineInstr *> StaInst, LdaInst;
|
|
||||||
DenseMap<int64_t, unsigned> NSta, NLda;
|
|
||||||
for (MachineBasicBlock &MBB : MF) {
|
|
||||||
for (MachineInstr &MI : MBB) {
|
|
||||||
int64_t Off;
|
|
||||||
if (!srAccess2(MI, Off)) continue;
|
|
||||||
Refs[Off]++;
|
|
||||||
if (MI.getOpcode() == W65816::STA_StackRel) {
|
|
||||||
NSta[Off]++; StaInst[Off] = &MI;
|
|
||||||
} else if (MI.getOpcode() == W65816::LDA_StackRel) {
|
|
||||||
NLda[Off]++; LdaInst[Off] = &MI;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
SmallVector<MachineInstr *, 4> ToErase;
|
|
||||||
for (auto &P : Refs) {
|
|
||||||
int64_t X = P.first;
|
|
||||||
if (P.second != 2) continue; // exactly 2 references
|
|
||||||
if (NSta[X] != 1 || NLda[X] != 1) continue;
|
|
||||||
MachineInstr *Sta = StaInst[X];
|
|
||||||
MachineInstr *Lda = LdaInst[X];
|
|
||||||
if (Sta->getParent() != Lda->getParent()) continue;
|
|
||||||
MachineBasicBlock *MBB = Sta->getParent();
|
|
||||||
// Sta must be before Lda.
|
|
||||||
bool staBefore = false;
|
|
||||||
for (auto It = MBB->begin(); It != MBB->end(); ++It) {
|
|
||||||
if (&*It == Sta) { staBefore = true; break; }
|
|
||||||
if (&*It == Lda) break;
|
|
||||||
}
|
|
||||||
if (!staBefore) continue;
|
|
||||||
// Next after Lda must be STA Y where Y != X.
|
|
||||||
auto NextIt = std::next(Lda->getIterator());
|
|
||||||
while (NextIt != MBB->end() && NextIt->isDebugInstr()) ++NextIt;
|
|
||||||
if (NextIt == MBB->end()) continue;
|
|
||||||
int64_t Y;
|
|
||||||
if (NextIt->getOpcode() != W65816::STA_StackRel ||
|
|
||||||
!srAccess2(*NextIt, Y) || Y == X) continue;
|
|
||||||
// Between Sta and Lda, no read/write of slot Y, no call, no
|
|
||||||
// anything that would re-set slot Y's value mid-flight.
|
|
||||||
bool ok = true;
|
|
||||||
for (auto It = std::next(Sta->getIterator()); It != Lda->getIterator();
|
|
||||||
++It) {
|
|
||||||
if (It->isDebugInstr()) continue;
|
|
||||||
if (It->isCall() || It->isInlineAsm()) { ok = false; break; }
|
|
||||||
int64_t Off;
|
|
||||||
if (srAccess2(*It, Off) && Off == Y) { ok = false; break; }
|
|
||||||
}
|
|
||||||
if (!ok) continue;
|
|
||||||
// Redirect the original STA to write to Y; delete the LDA-STA pair.
|
|
||||||
Sta->getOperand(0).setImm(Y);
|
|
||||||
ToErase.push_back(Lda);
|
|
||||||
ToErase.push_back(&*NextIt);
|
|
||||||
Changed = true;
|
|
||||||
}
|
|
||||||
for (MachineInstr *MI : ToErase) MI->eraseFromParent();
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return Changed;
|
return Changed;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -127,7 +127,7 @@ static bool touchesX(const MachineInstr &MI, const TargetRegisterInfo *TRI) {
|
||||||
return xEffect(MI, TRI) != XNone;
|
return xEffect(MI, TRI) != XNone;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if MI is `STAfi $a, slot, 0`.
|
// Returns FI if MI is `STAfi $a, slot, 0`, else -1.
|
||||||
static int matchSTAfi(const MachineInstr &MI) {
|
static int matchSTAfi(const MachineInstr &MI) {
|
||||||
if (MI.getOpcode() != W65816::STAfi) return -1;
|
if (MI.getOpcode() != W65816::STAfi) return -1;
|
||||||
if (MI.getNumOperands() < 3) return -1;
|
if (MI.getNumOperands() < 3) return -1;
|
||||||
|
|
|
||||||
|
|
@ -800,33 +800,6 @@ bool W65816StackRelToImg::runOnMachineFunction(MachineFunction &MF) {
|
||||||
// unhandled — they can shift SP arbitrarily. Caller must bail.
|
// unhandled — they can shift SP arbitrarily. Caller must bail.
|
||||||
return 0;
|
return 0;
|
||||||
};
|
};
|
||||||
auto miBailsAnalysis = [](const MachineInstr &MI) -> bool {
|
|
||||||
// We don't bail on TCS or ADJCALLSTACK*. TCS in prologue/epilogue
|
|
||||||
// resets SP to a known value (the "canonical" SP for that region);
|
|
||||||
// since stack-rel accesses don't span TCS in well-formed code (the
|
|
||||||
// prologue allocates, body uses, epilogue deallocates), treating
|
|
||||||
// SP as continuing across TCS gives correct relative offsets for
|
|
||||||
// accesses inside each region. ADJCALLSTACK* aren't usually
|
|
||||||
// present at pre-emit time (PEI eliminates them or AsmPrinter
|
|
||||||
// handles). If they're still present, treat as 0 SP-shift —
|
|
||||||
// the actual PUSH16 ops carry the real shift.
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
auto miSpDeltaWithAdj = [&](const MachineInstr &MI) -> int {
|
|
||||||
if (MI.getOpcode() == W65816::ADJCALLSTACKDOWN ||
|
|
||||||
MI.getOpcode() == W65816::ADJCALLSTACKUP) {
|
|
||||||
// Skip — the actual PUSH16/PEA/PHA ops inside the call seq
|
|
||||||
// carry the SP delta.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
if (MI.getOpcode() == W65816::TCS) {
|
|
||||||
// TCS sets SP; we treat it as a "reset to canonical SP" point.
|
|
||||||
// Return 0 here; the calling code can do the reset.
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
};
|
|
||||||
(void)miSpDeltaWithAdj;
|
|
||||||
while (!Worklist.empty() && SpAdjValid) {
|
while (!Worklist.empty() && SpAdjValid) {
|
||||||
MachineBasicBlock *MBB = Worklist.pop_back_val();
|
MachineBasicBlock *MBB = Worklist.pop_back_val();
|
||||||
if (!Visited.insert(MBB).second) continue;
|
if (!Visited.insert(MBB).second) continue;
|
||||||
|
|
|
||||||
|
|
@ -166,20 +166,26 @@ static bool semanticallyDefsA(const MachineInstr &MI) {
|
||||||
|
|
||||||
// Walk backward from MI in its MBB looking for the most recent A-define.
|
// Walk backward from MI in its MBB looking for the most recent A-define.
|
||||||
// Returns the MI that defines A, or nullptr if none in the same MBB.
|
// Returns the MI that defines A, or nullptr if none in the same MBB.
|
||||||
// Skips debug instructions. Stops at MBB boundary, calls, branches,
|
// Skips debug instructions. When BailOnCall is true, also stops at
|
||||||
// inline asm.
|
// calls / inline asm (used by the Case (3) twin check where call effects
|
||||||
static MachineInstr *findPriorADef(MachineInstr *MI) {
|
// invalidate the value-equivalence reasoning).
|
||||||
|
static MachineInstr *findADefBackward(MachineInstr *MI, bool BailOnCall) {
|
||||||
MachineBasicBlock *MBB = MI->getParent();
|
MachineBasicBlock *MBB = MI->getParent();
|
||||||
auto It = MI->getIterator();
|
auto It = MI->getIterator();
|
||||||
while (It != MBB->begin()) {
|
while (It != MBB->begin()) {
|
||||||
--It;
|
--It;
|
||||||
if (It->isDebugInstr()) continue;
|
if (It->isDebugInstr()) continue;
|
||||||
if (It->isCall() || It->isInlineAsm()) return nullptr;
|
if (BailOnCall && (It->isCall() || It->isInlineAsm())) return nullptr;
|
||||||
if (semanticallyDefsA(*It)) return &*It;
|
if (semanticallyDefsA(*It)) return &*It;
|
||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Convenience: Case (3) twin matcher (bails on calls/inline asm).
|
||||||
|
static MachineInstr *findPriorADef(MachineInstr *MI) {
|
||||||
|
return findADefBackward(MI, /*BailOnCall=*/true);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Walk forward from `Start` (exclusive) up to (but not including) `End`
|
// Walk forward from `Start` (exclusive) up to (but not including) `End`
|
||||||
// in the same MBB, tracking whether slot `WatchSlot` is written.
|
// in the same MBB, tracking whether slot `WatchSlot` is written.
|
||||||
|
|
@ -252,17 +258,9 @@ static bool usesFlagsP(const MachineInstr &MI) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Returns the MOST RECENT A-defining MI strictly before MI in its MBB,
|
// Convenience: Case (2) twin matcher (does NOT bail on calls/inline asm).
|
||||||
// skipping debug instructions. Returns nullptr if none in the same MBB.
|
|
||||||
static MachineInstr *findMostRecentADef(MachineInstr *MI) {
|
static MachineInstr *findMostRecentADef(MachineInstr *MI) {
|
||||||
MachineBasicBlock *MBB = MI->getParent();
|
return findADefBackward(MI, /*BailOnCall=*/false);
|
||||||
auto It = MI->getIterator();
|
|
||||||
while (It != MBB->begin()) {
|
|
||||||
--It;
|
|
||||||
if (It->isDebugInstr()) continue;
|
|
||||||
if (semanticallyDefsA(*It)) return &*It;
|
|
||||||
}
|
|
||||||
return nullptr;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -283,7 +281,6 @@ static MachineInstr *findMostRecentADef(MachineInstr *MI) {
|
||||||
static MachineInstr *findTwin(MachineInstr *StaX,
|
static MachineInstr *findTwin(MachineInstr *StaX,
|
||||||
ArrayRef<MachineInstr *> StasY) {
|
ArrayRef<MachineInstr *> StasY) {
|
||||||
MachineBasicBlock *MBBStaX = StaX->getParent();
|
MachineBasicBlock *MBBStaX = StaX->getParent();
|
||||||
int64_t XOff = StaX->getOperand(0).getImm();
|
|
||||||
// Cases (1) + (2): same MBB.
|
// Cases (1) + (2): same MBB.
|
||||||
for (MachineInstr *StaY : StasY) {
|
for (MachineInstr *StaY : StasY) {
|
||||||
if (StaY->getParent() != MBBStaX) continue;
|
if (StaY->getParent() != MBBStaX) continue;
|
||||||
|
|
@ -342,7 +339,6 @@ static MachineInstr *findTwin(MachineInstr *StaX,
|
||||||
}
|
}
|
||||||
if (XConst == YConst) return StaY;
|
if (XConst == YConst) return StaY;
|
||||||
}
|
}
|
||||||
(void)XOff;
|
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -8,10 +8,10 @@
|
||||||
//
|
//
|
||||||
// Pre-regalloc pass: when a tied-def Acc16 instruction (ADCfi, SBCfi,
|
// Pre-regalloc pass: when a tied-def Acc16 instruction (ADCfi, SBCfi,
|
||||||
// ANDfi, ORAfi, EORfi, ADCi16imm, SBCi16imm, ANDi16imm, ORAi16imm,
|
// ANDfi, ORAfi, EORfi, ADCi16imm, SBCi16imm, ANDi16imm, ORAi16imm,
|
||||||
// EORi16imm, ADCabs, SBCabs, ANDabs, ORAabs, EORabs, INA_PSEUDO,
|
// EORi16imm, ADCabs, SBCabs -- see isTiedAcc16Consumer below for the
|
||||||
// DEA_PSEUDO, ASLA16, LSRA16, NEGA16, SHL8A, SRL8A, SRA15A, etc.) has
|
// authoritative list) has a source vreg whose value is *also* needed
|
||||||
// a source vreg whose value is *also* needed past the consumer, fast
|
// past the consumer, fast regalloc fails to insert the necessary
|
||||||
// regalloc fails to insert the necessary save/restore on its own.
|
// save/restore on its own.
|
||||||
// (Acc16 has exactly one physical register, so the consumer's
|
// (Acc16 has exactly one physical register, so the consumer's
|
||||||
// tied-def overwrites the source; with multiple consumers/post-uses
|
// tied-def overwrites the source; with multiple consumers/post-uses
|
||||||
// the source must be spilled and reloaded.)
|
// the source must be spilled and reloaded.)
|
||||||
|
|
|
||||||
|
|
@ -59,10 +59,6 @@ using namespace llvm;
|
||||||
|
|
||||||
#define DEBUG_TYPE "w65816-un-lsr"
|
#define DEBUG_TYPE "w65816-un-lsr"
|
||||||
|
|
||||||
namespace llvm {
|
|
||||||
void initializeW65816UnLSRPass(PassRegistry &);
|
|
||||||
}
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
class W65816UnLSR : public FunctionPass {
|
class W65816UnLSR : public FunctionPass {
|
||||||
|
|
@ -84,7 +80,6 @@ public:
|
||||||
private:
|
private:
|
||||||
bool processLoop(Loop *L);
|
bool processLoop(Loop *L);
|
||||||
bool processCounterToPtrPHIs(Loop *L);
|
bool processCounterToPtrPHIs(Loop *L);
|
||||||
bool processReturnedCounter(Loop *L);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace
|
} // namespace
|
||||||
|
|
@ -107,7 +102,6 @@ bool W65816UnLSR::runOnFunction(Function &F) {
|
||||||
for (Loop *L : LI) {
|
for (Loop *L : LI) {
|
||||||
Changed |= processLoop(L);
|
Changed |= processLoop(L);
|
||||||
Changed |= processCounterToPtrPHIs(L);
|
Changed |= processCounterToPtrPHIs(L);
|
||||||
// processReturnedCounter remains disabled — see note above.
|
|
||||||
SmallVector<Loop *, 4> Worklist(L->begin(), L->end());
|
SmallVector<Loop *, 4> Worklist(L->begin(), L->end());
|
||||||
while (!Worklist.empty()) {
|
while (!Worklist.empty()) {
|
||||||
Loop *Sub = Worklist.pop_back_val();
|
Loop *Sub = Worklist.pop_back_val();
|
||||||
|
|
@ -120,241 +114,6 @@ bool W65816UnLSR::runOnFunction(Function &F) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// strLen-style undo: LSR converts `return p - s` into a counter PHI
|
|
||||||
// `%lsr.iv` that increments per iter and is returned directly:
|
|
||||||
// %lsr.iv = phi i16 [-1, %entry], [%lsr.iv.next, %latch]
|
|
||||||
// %p.0 = phi ptr [%s, %entry], [%incdec.ptr, %latch]
|
|
||||||
// %incdec.ptr = getelementptr i8, %p.0, i32 1
|
|
||||||
// %lsr.iv.next = add i16 %lsr.iv, 1
|
|
||||||
// br ..., %exit, %loop
|
|
||||||
// %exit:
|
|
||||||
// ret i16 %lsr.iv.next
|
|
||||||
//
|
|
||||||
// LSR's reasoning: cheaper to maintain a counter than compute (p - s)
|
|
||||||
// at exit. On W65816 the opposite is true: counter inc per iter costs
|
|
||||||
// 5 cyc/iter * N iters; one-time sub at exit costs ~10 cyc total.
|
|
||||||
//
|
|
||||||
// This undo finds the counter PHI, verifies its only out-of-loop use
|
|
||||||
// is via LCSSA → return, finds the sibling pointer PHI with the same
|
|
||||||
// stride, and replaces the return value with
|
|
||||||
// `(i16)(p_lcssa - base) + (K_init + 1)`. Erases the counter PHI.
|
|
||||||
//
|
|
||||||
// Saves ~5 cyc/iter on strLen-shape loops with a returned counter.
|
|
||||||
bool W65816UnLSR::processReturnedCounter(Loop *L) {
|
|
||||||
BasicBlock *Header = L->getHeader();
|
|
||||||
BasicBlock *Latch = L->getLoopLatch();
|
|
||||||
BasicBlock *Preheader = L->getLoopPreheader();
|
|
||||||
if (!Latch || !Preheader) return false;
|
|
||||||
|
|
||||||
// Single-exit loop.
|
|
||||||
SmallVector<BasicBlock *, 2> ExitBlocks;
|
|
||||||
L->getExitBlocks(ExitBlocks);
|
|
||||||
if (ExitBlocks.size() != 1) return false;
|
|
||||||
BasicBlock *Exit = ExitBlocks[0];
|
|
||||||
|
|
||||||
// Find a candidate counter PHI: integer, init=ConstantInt, step=+1.
|
|
||||||
PHINode *CounterPHI = nullptr;
|
|
||||||
ConstantInt *KInit = nullptr;
|
|
||||||
BinaryOperator *CounterStep = nullptr;
|
|
||||||
for (PHINode &PN : Header->phis()) {
|
|
||||||
if (!PN.getType()->isIntegerTy()) continue;
|
|
||||||
if (PN.getNumIncomingValues() != 2) continue;
|
|
||||||
Value *Init = nullptr, *Step = nullptr;
|
|
||||||
for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i) {
|
|
||||||
BasicBlock *Pred = PN.getIncomingBlock(i);
|
|
||||||
if (L->contains(Pred)) Step = PN.getIncomingValue(i);
|
|
||||||
else Init = PN.getIncomingValue(i);
|
|
||||||
}
|
|
||||||
if (!Init || !Step) continue;
|
|
||||||
auto *InitC = dyn_cast<ConstantInt>(Init);
|
|
||||||
if (!InitC) continue;
|
|
||||||
auto *StepBO = dyn_cast<BinaryOperator>(Step);
|
|
||||||
if (!StepBO || StepBO->getOpcode() != Instruction::Add) continue;
|
|
||||||
Value *Other = nullptr;
|
|
||||||
if (StepBO->getOperand(0) == &PN) Other = StepBO->getOperand(1);
|
|
||||||
else if (StepBO->getOperand(1) == &PN) Other = StepBO->getOperand(0);
|
|
||||||
if (!Other) continue;
|
|
||||||
auto *StepCI = dyn_cast<ConstantInt>(Other);
|
|
||||||
if (!StepCI || !StepCI->isOne()) continue;
|
|
||||||
CounterPHI = &PN;
|
|
||||||
KInit = InitC;
|
|
||||||
CounterStep = StepBO;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (!CounterPHI) return false;
|
|
||||||
|
|
||||||
// The counter PHI must be used INSIDE the loop only by its increment
|
|
||||||
// and OUTSIDE the loop only via an LCSSA PHI in the exit block that
|
|
||||||
// feeds a return. Same for the increment.
|
|
||||||
auto isOnlyInLoopUseTheStep = [&](Value *V) {
|
|
||||||
for (User *U : V->users()) {
|
|
||||||
auto *UI = dyn_cast<Instruction>(U);
|
|
||||||
if (!UI) return false;
|
|
||||||
if (!L->contains(UI)) continue; // out-of-loop is handled separately
|
|
||||||
if (UI == CounterStep) continue;
|
|
||||||
// The PHI itself is allowed (V might be CounterStep, used by
|
|
||||||
// CounterPHI's back-edge incoming).
|
|
||||||
if (UI == CounterPHI) continue;
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
if (!isOnlyInLoopUseTheStep(CounterPHI)) return false;
|
|
||||||
if (!isOnlyInLoopUseTheStep(CounterStep)) return false;
|
|
||||||
|
|
||||||
// Find a use of CounterPHI or CounterStep that's a ReturnInst.
|
|
||||||
// The use might be DIRECT (no LCSSA — common after LCSSA cleanup)
|
|
||||||
// or via an LCSSA PHI in the exit block.
|
|
||||||
ReturnInst *Ret = nullptr;
|
|
||||||
Value *RetSource = nullptr; // the value the ret reads
|
|
||||||
PHINode *ExitLCSSA = nullptr; // optional LCSSA PHI to erase
|
|
||||||
bool fromNext = false; // true if return source is CounterStep
|
|
||||||
auto findRet = [&](Value *V, bool isNext) -> bool {
|
|
||||||
for (User *U : V->users()) {
|
|
||||||
auto *UI = dyn_cast<Instruction>(U);
|
|
||||||
if (!UI) continue;
|
|
||||||
// Skip in-loop uses (those are the counter increment chain).
|
|
||||||
if (L->contains(UI->getParent())) continue;
|
|
||||||
if (auto *R = dyn_cast<ReturnInst>(UI)) {
|
|
||||||
if (R->getReturnValue() != V) continue;
|
|
||||||
Ret = R; RetSource = V; fromNext = isNext; return true;
|
|
||||||
}
|
|
||||||
// LCSSA PHI in the exit block?
|
|
||||||
if (auto *PN = dyn_cast<PHINode>(UI)) {
|
|
||||||
if (PN->getParent() != Exit) continue;
|
|
||||||
if (PN->getNumIncomingValues() != 1) continue;
|
|
||||||
if (PN->getIncomingValue(0) != V) continue;
|
|
||||||
if (!PN->hasOneUse()) continue;
|
|
||||||
auto *R = dyn_cast<ReturnInst>(PN->user_back());
|
|
||||||
if (!R || R->getReturnValue() != PN) continue;
|
|
||||||
Ret = R; RetSource = V; fromNext = isNext; ExitLCSSA = PN;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
if (!findRet(CounterStep, true) && !findRet(CounterPHI, false))
|
|
||||||
return false;
|
|
||||||
|
|
||||||
// Find a sibling pointer PHI: init=Base, latch incoming is a
|
|
||||||
// `getelementptr i8, %ptr, 1` of itself.
|
|
||||||
PHINode *PtrPHI = nullptr;
|
|
||||||
Value *Base = nullptr;
|
|
||||||
GetElementPtrInst *PtrStep = nullptr;
|
|
||||||
for (PHINode &PN : Header->phis()) {
|
|
||||||
if (!PN.getType()->isPointerTy()) continue;
|
|
||||||
if (PN.getNumIncomingValues() != 2) continue;
|
|
||||||
Value *Init = nullptr, *Step = nullptr;
|
|
||||||
for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i) {
|
|
||||||
BasicBlock *Pred = PN.getIncomingBlock(i);
|
|
||||||
if (L->contains(Pred)) Step = PN.getIncomingValue(i);
|
|
||||||
else Init = PN.getIncomingValue(i);
|
|
||||||
}
|
|
||||||
if (!Init || !Step) continue;
|
|
||||||
auto *StepGEP = dyn_cast<GetElementPtrInst>(Step);
|
|
||||||
if (!StepGEP) continue;
|
|
||||||
if (StepGEP->getPointerOperand() != &PN) continue;
|
|
||||||
if (StepGEP->getNumIndices() != 1) continue;
|
|
||||||
if (!StepGEP->getSourceElementType()->isIntegerTy(8)) continue;
|
|
||||||
auto *StrideCI = dyn_cast<ConstantInt>(StepGEP->getOperand(1));
|
|
||||||
if (!StrideCI || !StrideCI->isOne()) continue;
|
|
||||||
PtrPHI = &PN;
|
|
||||||
Base = Init;
|
|
||||||
PtrStep = StepGEP;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if (!PtrPHI) return false;
|
|
||||||
|
|
||||||
// The pointer-PHI must have an LCSSA in the exit (so we can compute
|
|
||||||
// p_lcssa - base). Find it or create one.
|
|
||||||
PHINode *PtrLCSSA = nullptr;
|
|
||||||
for (PHINode &EPN : Exit->phis()) {
|
|
||||||
if (EPN.getNumIncomingValues() != 1) continue;
|
|
||||||
if (EPN.getIncomingValue(0) == PtrPHI) {
|
|
||||||
PtrLCSSA = &EPN; break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (!PtrLCSSA) {
|
|
||||||
// Create LCSSA for PtrPHI.
|
|
||||||
IRBuilder<> B(&Exit->front());
|
|
||||||
PtrLCSSA = B.CreatePHI(PtrPHI->getType(), 1, "unlsr.p.lcssa");
|
|
||||||
PtrLCSSA->addIncoming(PtrPHI, Latch);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build replacement value: (i16)(p_lcssa - base) + (K_init + (fromNext ? 1 : 0))
|
|
||||||
// For fromNext=true (returning %counter.next): value = K_init + iters
|
|
||||||
// p_lcssa - base = iters (in bytes, stride 1) → value = K_init + (p_lcssa - base)
|
|
||||||
// But we want: counter.next at exit = K_init + iters; and p_lcssa - base = iters.
|
|
||||||
// So replacement = (i16)(p_lcssa - base) + K_init.
|
|
||||||
// For strLen: K_init = -1; iters at exit = K (where ret = K - 1 + 1 = K)
|
|
||||||
// Wait let me re-derive. counter init = -1. iter 1 entry: counter = -1.
|
|
||||||
// iter 1 exit: counter.next = 0. Suppose exit-iter is iter K. Then at
|
|
||||||
// iter K's icmp-true, counter.next = -1 + K.
|
|
||||||
// And p_lcssa = base + (K - 1) (since iter K had p.0 = base + K-1).
|
|
||||||
// So p_lcssa - base = K - 1.
|
|
||||||
// We want counter.next = K - 1 (because exit-iter is iter K, but counter.next
|
|
||||||
// was computed before icmp tested 0 - so it's K - 1 (with K iters = K decisions))
|
|
||||||
// Hmm, off-by-one is tricky. Let me just test empirically.
|
|
||||||
|
|
||||||
// The "return value type" we'll cast to.
|
|
||||||
Type *RetTy = Ret->getReturnValue()->getType();
|
|
||||||
if (!RetTy->isIntegerTy()) return false;
|
|
||||||
Instruction *InsertPt = ExitLCSSA ? ExitLCSSA->getNextNode() : Ret;
|
|
||||||
IRBuilder<> B(InsertPt);
|
|
||||||
// (p_lcssa - base) as integer.
|
|
||||||
Value *PLcssaInt = B.CreatePtrToInt(PtrLCSSA, Type::getInt32Ty(Header->getContext()), "unlsr.plcssa.i");
|
|
||||||
Value *BaseInt = B.CreatePtrToInt(Base, Type::getInt32Ty(Header->getContext()), "unlsr.base.i");
|
|
||||||
Value *Diff = B.CreateSub(PLcssaInt, BaseInt, "unlsr.diff");
|
|
||||||
// Truncate to counter type.
|
|
||||||
Value *DiffI = B.CreateTrunc(Diff, CounterPHI->getType(), "unlsr.diff.trunc");
|
|
||||||
// For fromNext (returning %counter.next): replacement = diff + (K_init + 1).
|
|
||||||
// At exit, counter.next = K_init + iters.
|
|
||||||
// p_lcssa - base = iters (in bytes; stride 1). Wait but iters is the iter count.
|
|
||||||
// Let me re-check with concrete example.
|
|
||||||
// strLen("a\0"): iter 1: p.0 = s, *p='a'!=0, p++, counter=-1, counter.next=0.
|
|
||||||
// iter 2: p.0 = s+1, *p=0, exit. counter=0, counter.next=1.
|
|
||||||
// At exit: counter.next = 1. iters before exit-iter's icmp-true = 2.
|
|
||||||
// p_lcssa = s+1 (the iter-2 entry value). p_lcssa - base = 1.
|
|
||||||
// counter.next = 1 = K_init + 2 = -1 + 2 = 1. ✓
|
|
||||||
// p_lcssa - base = 1. So counter.next = p_lcssa - base + 0.
|
|
||||||
// (K_init + iters - (iters - (p_lcssa - base))) = K_init + (p_lcssa - base) = K_init + 1.
|
|
||||||
// Wait: counter.next = K_init + iters; p_lcssa - base = iters - 1.
|
|
||||||
// So counter.next = K_init + (p_lcssa - base) + 1.
|
|
||||||
// For K_init = -1: counter.next = -1 + 1 + 1 = 1 if iters=2. ✓
|
|
||||||
// So replacement = diff + (K_init + 1).
|
|
||||||
int64_t Adjust = KInit->getSExtValue() + (fromNext ? 1 : 0);
|
|
||||||
Value *Result = DiffI;
|
|
||||||
if (Adjust != 0) {
|
|
||||||
Result = B.CreateAdd(DiffI,
|
|
||||||
ConstantInt::get(CounterPHI->getType(), Adjust),
|
|
||||||
"unlsr.result");
|
|
||||||
}
|
|
||||||
// Cast to return type if different.
|
|
||||||
if (Result->getType() != RetTy) {
|
|
||||||
if (CounterPHI->getType()->getIntegerBitWidth() <
|
|
||||||
RetTy->getIntegerBitWidth())
|
|
||||||
Result = B.CreateZExt(Result, RetTy);
|
|
||||||
else
|
|
||||||
Result = B.CreateTrunc(Result, RetTy);
|
|
||||||
}
|
|
||||||
// Replace the return. If there's an LCSSA PHI, replace it. Otherwise
|
|
||||||
// replace the direct use in `ret`.
|
|
||||||
if (ExitLCSSA) {
|
|
||||||
ExitLCSSA->replaceAllUsesWith(Result);
|
|
||||||
ExitLCSSA->eraseFromParent();
|
|
||||||
} else {
|
|
||||||
Ret->setOperand(0, Result);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Erase the counter PHI and its increment.
|
|
||||||
CounterStep->replaceAllUsesWith(UndefValue::get(CounterPHI->getType()));
|
|
||||||
CounterPHI->replaceAllUsesWith(UndefValue::get(CounterPHI->getType()));
|
|
||||||
CounterStep->eraseFromParent();
|
|
||||||
CounterPHI->eraseFromParent();
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// strcpy-style undo: LSR converts two pointer PHIs (`src.addr.0` and
|
// strcpy-style undo: LSR converts two pointer PHIs (`src.addr.0` and
|
||||||
// `d.0` each stepping by 1) into a single counter PHI (`lsr.iv`) plus
|
// `d.0` each stepping by 1) into a single counter PHI (`lsr.iv`) plus
|
||||||
// GEPs `(base, counter)` per iter. On 65816 the counter+GEP form
|
// GEPs `(base, counter)` per iter. On 65816 the counter+GEP form
|
||||||
|
|
|
||||||
|
|
@ -84,27 +84,6 @@ static bool flowsToIncompatiblePhysReg(Register VReg,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns true if VReg's def is a COPY from a physreg whose class is not
|
|
||||||
// Wide16-compatible. copyPhysReg only handles a fixed set of source/dest
|
|
||||||
// pairs; an incompatible source physreg (e.g., DPF0, the i64-return
|
|
||||||
// high-half carrier) lowered to an IMG dest would crash with an
|
|
||||||
// "unhandled copyPhysReg" assertion at AsmPrinter time. (Currently
|
|
||||||
// only the Phase-2 PHI widening uses this; that's disabled, so mark
|
|
||||||
// unused.)
|
|
||||||
[[maybe_unused]] static bool comesFromIncompatiblePhysReg(Register VReg,
|
|
||||||
const MachineRegisterInfo &MRI) {
|
|
||||||
for (auto &D : MRI.def_instructions(VReg)) {
|
|
||||||
if (!D.isCopy()) continue;
|
|
||||||
const MachineOperand &Src = D.getOperand(1);
|
|
||||||
if (!Src.isReg() || !Src.getReg().isPhysical()) continue;
|
|
||||||
Register P = Src.getReg();
|
|
||||||
if (P == W65816::A) continue;
|
|
||||||
if (P >= W65816::IMG0 && P <= W65816::IMG15) continue;
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Returns true if the vreg is used by any PHI. PHI input/result must
|
// Returns true if the vreg is used by any PHI. PHI input/result must
|
||||||
// share the same register class (verifier requirement). Rather than
|
// share the same register class (verifier requirement). Rather than
|
||||||
// also widen the PHI's result and recursively all of its uses, we skip
|
// also widen the PHI's result and recursively all of its uses, we skip
|
||||||
|
|
@ -212,196 +191,9 @@ bool W65816WidenAcc16::runOnMachineFunction(MachineFunction &MF) {
|
||||||
Changed = true;
|
Changed = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Phase 2: PHI cycle widening. EXPERIMENTAL, currently disabled —
|
// Phase 2: PHI cycle widening was prototyped here but never landed.
|
||||||
// see end of pass for explanation.
|
// The prototype body lived in an #if 0 block that was removed once
|
||||||
#if 0
|
// we settled on Phase 1 as the only effective half of the pass.
|
||||||
// PHIs whose def class is Acc16 keep
|
|
||||||
// the value pinned to $a across iterations, forcing stack spills
|
|
||||||
// when the PHI is live across calls or other A-clobbering ops.
|
|
||||||
// For sumSquares-style loops with an i32 accumulator, this manifests
|
|
||||||
// as per-iter `LDA slot ; ADC ; STA slot ; LDA slot ; STA slot` (the
|
|
||||||
// last LDA/STA pair is the PHI-back-edge copy). If we widen the
|
|
||||||
// PHI's def to Wide16, regalloc can keep it in an IMG slot and the
|
|
||||||
// back-edge PHI copy collapses to a register coalesce.
|
|
||||||
//
|
|
||||||
// To widen a PHI:
|
|
||||||
// 1. Compute the SCC of Acc16 vregs connected by PHI edges (PHI
|
|
||||||
// def ↔ PHI incoming vreg). This catches mutually-recursive
|
|
||||||
// PHIs in nested loops.
|
|
||||||
// 2. For every member: verify all non-PHI uses accept Wide16, no
|
|
||||||
// flow to a physreg, single def.
|
|
||||||
// 3. For each PHI in the SCC, walk its incoming list. Each
|
|
||||||
// incoming vreg is either ALREADY in the SCC (another PHI, no
|
|
||||||
// bridge needed) or an external Acc16 vreg whose value flows
|
|
||||||
// into the SCC — bridge it by inserting `WWide = COPY W` at
|
|
||||||
// the end of the predecessor block and pointing the PHI's
|
|
||||||
// incoming at WWide.
|
|
||||||
// 4. Change every SCC member's register class to Wide16.
|
|
||||||
auto worklistInsertIfAcc16 = [&MRI](Register V,
|
|
||||||
DenseSet<Register> &Seen,
|
|
||||||
SmallVectorImpl<Register> &WL) {
|
|
||||||
if (!V.isVirtual()) return;
|
|
||||||
if (MRI.getRegClass(V) != &W65816::Acc16RegClass) return;
|
|
||||||
if (!Seen.insert(V).second) return;
|
|
||||||
WL.push_back(V);
|
|
||||||
};
|
|
||||||
|
|
||||||
SmallVector<MachineInstr *, 16> AcctPhis;
|
|
||||||
for (MachineBasicBlock &MBB : MF) {
|
|
||||||
for (MachineInstr &MI : MBB.phis()) {
|
|
||||||
Register DefV = MI.getOperand(0).getReg();
|
|
||||||
if (MRI.getRegClass(DefV) == &W65816::Acc16RegClass) {
|
|
||||||
AcctPhis.push_back(&MI);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
DenseSet<Register> ProcessedPhiVregs;
|
|
||||||
for (MachineInstr *Seed : AcctPhis) {
|
|
||||||
Register SeedDef = Seed->getOperand(0).getReg();
|
|
||||||
if (ProcessedPhiVregs.count(SeedDef)) continue;
|
|
||||||
// Build SCC by following PHI edges in both directions.
|
|
||||||
DenseSet<Register> Comp;
|
|
||||||
SmallVector<Register, 8> Stack;
|
|
||||||
worklistInsertIfAcc16(SeedDef, Comp, Stack);
|
|
||||||
while (!Stack.empty()) {
|
|
||||||
Register V = Stack.pop_back_val();
|
|
||||||
// Forward: V flows into other PHIs as an incoming → include those PHI defs.
|
|
||||||
for (auto &U : MRI.use_nodbg_instructions(V)) {
|
|
||||||
if (!U.isPHI()) continue;
|
|
||||||
Register PhiDef = U.getOperand(0).getReg();
|
|
||||||
worklistInsertIfAcc16(PhiDef, Comp, Stack);
|
|
||||||
}
|
|
||||||
// Backward: if V is itself a PHI def, include the incoming vregs.
|
|
||||||
MachineInstr *DM = &*MRI.def_instructions(V).begin();
|
|
||||||
if (!DM || !DM->isPHI()) continue;
|
|
||||||
for (unsigned i = 1, e = DM->getNumOperands(); i < e; i += 2) {
|
|
||||||
MachineOperand &MO = DM->getOperand(i);
|
|
||||||
if (!MO.isReg() || !MO.getReg().isVirtual()) continue;
|
|
||||||
worklistInsertIfAcc16(MO.getReg(), Comp, Stack);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (Register V : Comp) ProcessedPhiVregs.insert(V);
|
|
||||||
|
|
||||||
// Validate every member. PHI uses are ACCEPTED when the consumer
|
|
||||||
// PHI is itself in the SCC (those PHIs are being widened in
|
|
||||||
// lock-step). Narrow-class uses (e.g., INA_PSEUDO's tied-def
|
|
||||||
// input requires Acc16) are ALSO accepted — we'll insert a
|
|
||||||
// Wide16→Acc16 COPY at the use site after widening. The only
|
|
||||||
// unrecoverable cases are: PHI uses where the consumer PHI is
|
|
||||||
// outside the SCC (forcing cross-SCC class merging), and physreg
|
|
||||||
// flow to $x/$y/etc. (handled separately above).
|
|
||||||
auto usesAcceptInSCC = [&](Register V,
|
|
||||||
SmallVectorImpl<MachineOperand *> *NarrowSites)
|
|
||||||
-> bool {
|
|
||||||
for (auto &MO : MRI.use_nodbg_operands(V)) {
|
|
||||||
MachineInstr *UMI = MO.getParent();
|
|
||||||
if (UMI->isCopy()) continue;
|
|
||||||
if (UMI->isPHI()) {
|
|
||||||
Register PhiDef = UMI->getOperand(0).getReg();
|
|
||||||
if (Comp.count(PhiDef)) continue; // co-widened
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
unsigned OpIdx = UMI->getOperandNo(&MO);
|
|
||||||
const TargetRegisterClass *Expected =
|
|
||||||
TII->getRegClass(UMI->getDesc(), OpIdx);
|
|
||||||
if (!Expected) continue;
|
|
||||||
if (Expected == &W65816::Wide16RegClass) continue;
|
|
||||||
if (Expected->hasSubClassEq(&W65816::Wide16RegClass)) continue;
|
|
||||||
// Expected is narrower than Wide16 (e.g., Acc16-only tied
|
|
||||||
// input). Mark for runtime narrowing — we'll insert a COPY
|
|
||||||
// at apply time.
|
|
||||||
if (NarrowSites) NarrowSites->push_back(&MO);
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
};
|
|
||||||
bool ok = true;
|
|
||||||
SmallVector<MachineOperand *, 8> NarrowSites;
|
|
||||||
for (Register V : Comp) {
|
|
||||||
if (!MRI.hasOneDef(V)) { ok = false; break; }
|
|
||||||
if (flowsToIncompatiblePhysReg(V, MRI)) { ok = false; break; }
|
|
||||||
if (comesFromIncompatiblePhysReg(V, MRI)) { ok = false; break; }
|
|
||||||
if (!usesAcceptInSCC(V, &NarrowSites)) { ok = false; break; }
|
|
||||||
}
|
|
||||||
if (!ok) continue;
|
|
||||||
|
|
||||||
// Apply widening. First insert bridge COPYs at predecessor edges
|
|
||||||
// for external (non-Comp) Acc16 incomings to each PHI in Comp.
|
|
||||||
SmallVector<std::pair<MachineInstr *, unsigned>, 16> BridgeSites;
|
|
||||||
for (Register V : Comp) {
|
|
||||||
MachineInstr *DM = &*MRI.def_instructions(V).begin();
|
|
||||||
if (!DM->isPHI()) continue;
|
|
||||||
for (unsigned i = 1, e = DM->getNumOperands(); i < e; i += 2) {
|
|
||||||
MachineOperand &MO = DM->getOperand(i);
|
|
||||||
if (!MO.isReg() || !MO.getReg().isVirtual()) continue;
|
|
||||||
Register Inc = MO.getReg();
|
|
||||||
if (Comp.count(Inc)) continue; // in-SCC, no bridge needed
|
|
||||||
// External incoming: ensure it's currently Acc16; if so, we'll
|
|
||||||
// insert a COPY at the predecessor block's end.
|
|
||||||
if (MRI.getRegClass(Inc) != &W65816::Acc16RegClass &&
|
|
||||||
MRI.getRegClass(Inc) != &W65816::Wide16RegClass) {
|
|
||||||
ok = false;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
BridgeSites.push_back({DM, i});
|
|
||||||
}
|
|
||||||
if (!ok) break;
|
|
||||||
}
|
|
||||||
if (!ok) continue;
|
|
||||||
|
|
||||||
// Insert bridges.
|
|
||||||
for (auto &Site : BridgeSites) {
|
|
||||||
MachineInstr *PhiMI = Site.first;
|
|
||||||
unsigned OpIdx = Site.second;
|
|
||||||
Register Inc = PhiMI->getOperand(OpIdx).getReg();
|
|
||||||
MachineBasicBlock *PredMBB = PhiMI->getOperand(OpIdx + 1).getMBB();
|
|
||||||
// If already Wide16 (e.g., another candidate widened it already),
|
|
||||||
// no bridge needed — but we still need the PHI incoming to use
|
|
||||||
// a Wide16 vreg. Use Inc directly.
|
|
||||||
if (MRI.getRegClass(Inc) == &W65816::Wide16RegClass) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
// Insert COPY before the predecessor's terminator(s).
|
|
||||||
auto InsertPos = PredMBB->getFirstTerminator();
|
|
||||||
DebugLoc DL = (InsertPos == PredMBB->end())
|
|
||||||
? PredMBB->findBranchDebugLoc()
|
|
||||||
: InsertPos->getDebugLoc();
|
|
||||||
Register WideInc = MRI.createVirtualRegister(&W65816::Wide16RegClass);
|
|
||||||
BuildMI(*PredMBB, InsertPos, DL, TII->get(TargetOpcode::COPY),
|
|
||||||
WideInc)
|
|
||||||
.addReg(Inc);
|
|
||||||
PhiMI->getOperand(OpIdx).setReg(WideInc);
|
|
||||||
PhiMI->getOperand(OpIdx).setIsKill(false);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Force every SCC member to Img16 (IMG-only, no A). Using Wide16
|
|
||||||
// (A + IMG) doesn't work here: the Register Coalescer joins our
|
|
||||||
// Wide16 vregs with adjacent Acc16 vregs (intersection = Acc16)
|
|
||||||
// and narrows them back to A-only, defeating the widening. Img16
|
|
||||||
// intersects Acc16 to ∅, so the coalescer can't merge — the PHI
|
|
||||||
// stays in IMG. This is correct anyway for the common case (PHI
|
|
||||||
// live across a call): A is JSL-clobbered, so it can't carry the
|
|
||||||
// value through, and IMG8..15 is the right home.
|
|
||||||
for (Register V : Comp) {
|
|
||||||
MRI.setRegClass(V, &W65816::Img16RegClass);
|
|
||||||
}
|
|
||||||
// Insert narrowing COPYs at each narrow-class use site. Each site
|
|
||||||
// is `... = OP V, ...` where the operand requires Acc16 but V is
|
|
||||||
// now Wide16. Replace with `%Vacc = COPY V (Acc16); ... = OP %Vacc, ...`.
|
|
||||||
for (MachineOperand *MO : NarrowSites) {
|
|
||||||
MachineInstr *UMI = MO->getParent();
|
|
||||||
Register OldReg = MO->getReg();
|
|
||||||
Register NarrowReg =
|
|
||||||
MRI.createVirtualRegister(&W65816::Acc16RegClass);
|
|
||||||
DebugLoc DL = UMI->getDebugLoc();
|
|
||||||
BuildMI(*UMI->getParent(), UMI, DL, TII->get(TargetOpcode::COPY),
|
|
||||||
NarrowReg)
|
|
||||||
.addReg(OldReg);
|
|
||||||
MO->setReg(NarrowReg);
|
|
||||||
MO->setIsKill(false);
|
|
||||||
}
|
|
||||||
Changed = true;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
// Why disabled (2026-05-13 attempt):
|
// Why disabled (2026-05-13 attempt):
|
||||||
// - Widening PHI cycles to Wide16 (= A + IMG0..15) is undone by the
|
// - Widening PHI cycles to Wide16 (= A + IMG0..15) is undone by the
|
||||||
// Register Coalescer: it joins our Wide16 vregs with adjacent
|
// Register Coalescer: it joins our Wide16 vregs with adjacent
|
||||||
|
|
|
||||||
|
|
@ -1,20 +1,26 @@
|
||||||
# tests/ubsan — UBSan-min smoke probe (Phase 6.2)
|
# tests/ubsan — UBSan-min smoke probe (Phase 6.2)
|
||||||
|
|
||||||
Three-case probe that exercises the `-fsanitize=undefined
|
Nine-case probe that exercises the `-fsanitize=undefined
|
||||||
-fsanitize-minimal-runtime` instrumentation end-to-end on the W65816
|
-fsanitize-minimal-runtime` instrumentation end-to-end on the W65816
|
||||||
target:
|
target:
|
||||||
|
|
||||||
| Kind | UB | Sentinel |
|
| Kind | UB | Sentinel |
|
||||||
|-----------------------|----------------------------------|--------------|
|
|------------------------|----------------------------------|----------------------|
|
||||||
| `add-overflow` | i16 `INT_MAX + 1` | `$025000=0xC0DE` |
|
| `add-overflow` | i16 `INT_MAX + 1` | `$025000=0xC0DE` |
|
||||||
| `shift-out-of-bounds` | u16 `1 << 17` | `$025002=0xC0DF` |
|
| `shift-out-of-bounds` | u16 `1 << 17` | `$025002=0xC0DF` |
|
||||||
| `divrem-overflow` | i16 `n / 0` | `$025004=0xC0E0` |
|
| `divrem-overflow` | i16 `n / 0` | `$025004=0xC0E0` |
|
||||||
| (liveness) | tail of `main` reached | `$025006=0xC0DA` |
|
| `sub-overflow` | i16 `INT_MIN - 1` | `$025006=0xC0E1` |
|
||||||
|
| `mul-overflow` | i16 `INT_MAX * 2` | `$025008=0xC0E2` |
|
||||||
|
| `negate-overflow` | i16 `-INT_MIN` | `$02500A=0xC0E3` |
|
||||||
|
| `pointer-overflow` | `(char*)0xFFFFFFF0 + 0x40` | `$02500C=0xC0E4` |
|
||||||
|
| `load-invalid-value` | `_Bool` loaded from byte = 2 | `$02500E=0xC0E5` |
|
||||||
|
| `out-of-bounds` | `arr[idx>=N]` on static array | `$025010=0xC0E6` |
|
||||||
|
| (liveness) | tail of `main` reached | `$025012=0xC0DA` |
|
||||||
|
|
||||||
The probe ships strong override defs for the three `__ubsan_handle_*_minimal`
|
The probe ships strong override defs for the nine `__ubsan_handle_*_minimal`
|
||||||
recovering handlers it exercises; the remaining 22 are pulled in from
|
recovering handlers it exercises; the remaining handlers are pulled in
|
||||||
`runtime/ubsan.o` so any extra UB site clang emits (e.g. constant-fold
|
from `runtime/ubsan.o` so any extra UB site clang emits (e.g. constant-
|
||||||
overflow at `-O2`) still resolves cleanly.
|
fold overflow at `-O2`) still resolves cleanly.
|
||||||
|
|
||||||
## Build + run
|
## Build + run
|
||||||
|
|
||||||
|
|
@ -27,8 +33,14 @@ Expected output:
|
||||||
MAME-READ addr=0x025000 val=0xc0de
|
MAME-READ addr=0x025000 val=0xc0de
|
||||||
MAME-READ addr=0x025002 val=0xc0df
|
MAME-READ addr=0x025002 val=0xc0df
|
||||||
MAME-READ addr=0x025004 val=0xc0e0
|
MAME-READ addr=0x025004 val=0xc0e0
|
||||||
MAME-READ addr=0x025006 val=0xc0da
|
MAME-READ addr=0x025006 val=0xc0e1
|
||||||
MAME OK: 4 reads matched
|
MAME-READ addr=0x025008 val=0xc0e2
|
||||||
|
MAME-READ addr=0x02500a val=0xc0e3
|
||||||
|
MAME-READ addr=0x02500c val=0xc0e4
|
||||||
|
MAME-READ addr=0x02500e val=0xc0e5
|
||||||
|
MAME-READ addr=0x025010 val=0xc0e6
|
||||||
|
MAME-READ addr=0x025012 val=0xc0da
|
||||||
|
MAME OK: 10 reads matched
|
||||||
```
|
```
|
||||||
|
|
||||||
## What this probe is NOT
|
## What this probe is NOT
|
||||||
|
|
@ -39,9 +51,14 @@ MAME OK: 4 reads matched
|
||||||
overrides the handlers so it can verify the *call edge* without
|
overrides the handlers so it can verify the *call edge* without
|
||||||
pulling in console code. A separate diagnostic-format probe would
|
pulling in console code. A separate diagnostic-format probe would
|
||||||
link `libc.o` + `libcGno.o` + GNO crt0 and assert on stderr.
|
link `libc.o` + `libcGno.o` + GNO crt0 and assert on stderr.
|
||||||
- It is **not** a sweep of all 25 handler kinds. The user-spec scope
|
- It is **not** a sweep of all 25 handler kinds. The kinds covered
|
||||||
is "3 representative kinds". The other 22 are link-tested
|
are all the cheap-to-trigger recoverable handlers that clang emits
|
||||||
implicitly by `runtime/ubsan.o`'s symbol set being available.
|
at `-O2` for the W65816 target. Aborting-only kinds (e.g.
|
||||||
|
`builtin_unreachable_minimal`, `missing_return_minimal`) cannot be
|
||||||
|
exercised here because returning from the handler after the IR
|
||||||
|
`unreachable` is itself UB. Float-cast-overflow / VLA-not-positive
|
||||||
|
/ type-mismatch / CFI / Objective-C kinds are linked but not
|
||||||
|
triggered.
|
||||||
|
|
||||||
## Files
|
## Files
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -7,12 +7,14 @@
|
||||||
# What this verifies:
|
# What this verifies:
|
||||||
# - clang accepts -fsanitize=undefined -fsanitize-minimal-runtime on
|
# - clang accepts -fsanitize=undefined -fsanitize-minimal-runtime on
|
||||||
# the w65816 target.
|
# the w65816 target.
|
||||||
# - The three exercised UB kinds (add-overflow / shift-out-of-bounds /
|
# - Nine exercised UB kinds (add-overflow / shift-out-of-bounds /
|
||||||
# divrem-overflow) instrument as expected — the handler-fired byte
|
# divrem-overflow / sub-overflow / mul-overflow / negate-overflow /
|
||||||
# flips inside the per-kind handler override.
|
# pointer-overflow / load-invalid-value / out-of-bounds) instrument
|
||||||
|
# as expected -- the handler-fired byte flips inside the per-kind
|
||||||
|
# handler override.
|
||||||
# - The recovering minimal runtime returns to the caller cleanly, so
|
# - The recovering minimal runtime returns to the caller cleanly, so
|
||||||
# the probe continues writing sentinels past each UB site.
|
# the probe continues writing sentinels past each UB site.
|
||||||
# - runtime/ubsan.o links + resolves the other 22 handler kinds without
|
# - runtime/ubsan.o links + resolves the other handler kinds without
|
||||||
# pulling in console code that the probe doesn't need.
|
# pulling in console code that the probe doesn't need.
|
||||||
|
|
||||||
set -eu
|
set -eu
|
||||||
|
|
@ -27,7 +29,7 @@ bash "$SCRIPT_DIR/build.sh"
|
||||||
|
|
||||||
# Link. crt0.o + the probe + ubsan.o + libgcc.o (for the i16 div+rem
|
# Link. crt0.o + the probe + ubsan.o + libgcc.o (for the i16 div+rem
|
||||||
# helpers triggerDivByZero needs). We deliberately do NOT link libc.o
|
# helpers triggerDivByZero needs). We deliberately do NOT link libc.o
|
||||||
# — the probe sets memory sentinels directly, doesn't call printf, and
|
# -- the probe sets memory sentinels directly, doesn't call printf, and
|
||||||
# pulling libc.o in would also pull snprintf.o (~9 KB) for no benefit.
|
# pulling libc.o in would also pull snprintf.o (~9 KB) for no benefit.
|
||||||
"$PROJECT_ROOT/tools/link816" -o ubsanProbe.bin \
|
"$PROJECT_ROOT/tools/link816" -o ubsanProbe.bin \
|
||||||
--text-base 0x1000 --bss-base 0xA000 --map ubsanProbe.map \
|
--text-base 0x1000 --bss-base 0xA000 --map ubsanProbe.map \
|
||||||
|
|
@ -39,11 +41,22 @@ bash "$SCRIPT_DIR/build.sh"
|
||||||
ls -la ubsanProbe.bin
|
ls -la ubsanProbe.bin
|
||||||
echo ""
|
echo ""
|
||||||
|
|
||||||
# Sentinels:
|
# Sentinels (one per recoverable handler exercised, plus a tail
|
||||||
|
# liveness sentinel). Each is a 16-bit write at $025000+kind*2.
|
||||||
# $025000 = 0xC0DE add-overflow handler fired
|
# $025000 = 0xC0DE add-overflow handler fired
|
||||||
# $025002 = 0xC0DF shift-out-of-bounds handler fired
|
# $025002 = 0xC0DF shift-out-of-bounds handler fired
|
||||||
# $025004 = 0xC0E0 divrem-overflow handler fired
|
# $025004 = 0xC0E0 divrem-overflow handler fired
|
||||||
# $025006 = 0xC0DA all three recovered and main reached its tail
|
# $025006 = 0xC0E1 sub-overflow handler fired
|
||||||
|
# $025008 = 0xC0E2 mul-overflow handler fired
|
||||||
|
# $02500A = 0xC0E3 negate-overflow handler fired
|
||||||
|
# $02500C = 0xC0E4 pointer-overflow handler fired
|
||||||
|
# $02500E = 0xC0E5 load-invalid-value handler fired
|
||||||
|
# $025010 = 0xC0E6 out-of-bounds handler fired
|
||||||
|
# $025012 = 0xC0DA all nine recovered and main reached its tail
|
||||||
bash "$PROJECT_ROOT/scripts/runInMame.sh" \
|
bash "$PROJECT_ROOT/scripts/runInMame.sh" \
|
||||||
"$SCRIPT_DIR/ubsanProbe.bin" \
|
"$SCRIPT_DIR/ubsanProbe.bin" \
|
||||||
--check 0x025000=C0DE 0x025002=C0DF 0x025004=C0E0 0x025006=C0DA
|
--check \
|
||||||
|
0x025000=C0DE 0x025002=C0DF 0x025004=C0E0 \
|
||||||
|
0x025006=C0E1 0x025008=C0E2 0x02500A=C0E3 \
|
||||||
|
0x02500C=C0E4 0x02500E=C0E5 0x025010=C0E6 \
|
||||||
|
0x025012=C0DA
|
||||||
|
|
|
||||||
|
|
@ -1,44 +1,62 @@
|
||||||
// Phase 6.2 UBSan-min smoke probe.
|
// Phase 6.2 UBSan-min smoke probe.
|
||||||
//
|
//
|
||||||
// Three UB cases (one each from the spec):
|
// Nine UB cases — one per recoverable handler kind we exercise:
|
||||||
// kind 0 (sentinel 0xC0DE): signed-overflow add (i16 INT_MAX + 1)
|
// kind 0 (sentinel 0xC0DE): add-overflow (i16 INT_MAX + 1)
|
||||||
// kind 1 (sentinel 0xC0DF): shift-out-of-bounds (1 << 17 on a u16)
|
// kind 1 (sentinel 0xC0DF): shift-out-of-bounds (1 << 17 on a u16)
|
||||||
// kind 2 (sentinel 0xC0E0): divide-by-zero (n / 0)
|
// kind 2 (sentinel 0xC0E0): divrem-overflow (n / 0)
|
||||||
|
// kind 3 (sentinel 0xC0E1): sub-overflow (INT_MIN - 1)
|
||||||
|
// kind 4 (sentinel 0xC0E2): mul-overflow (INT_MAX * 2)
|
||||||
|
// kind 5 (sentinel 0xC0E3): negate-overflow (-INT_MIN)
|
||||||
|
// kind 6 (sentinel 0xC0E4): pointer-overflow (ptr + huge offset)
|
||||||
|
// kind 7 (sentinel 0xC0E5): load-invalid-value (_Bool from byte=2)
|
||||||
|
// kind 8 (sentinel 0xC0E6): out-of-bounds (arr[idx>=N])
|
||||||
//
|
//
|
||||||
// The probe overrides the three relevant `__ubsan_handle_*_minimal`
|
// The probe overrides each relevant `__ubsan_handle_*_minimal` recovering
|
||||||
// recovering handlers with strong definitions that record their
|
// handler with a strong definition that records its firing in a static
|
||||||
// firing in a static state byte. After each UB, the probe writes
|
// state byte. After each UB, the probe writes 0xC0DE+kind to a per-kind
|
||||||
// 0xC0DE + kind to $025000 to prove (a) the instrumentation fired and
|
// 16-bit slot at 0x025000+kind*2 to prove (a) the instrumentation fired
|
||||||
// (b) execution recovered cleanly past the UB. The recover handler
|
// and (b) execution recovered cleanly past the UB. The recover handler
|
||||||
// returning normally is the whole point of -fsanitize-minimal-runtime
|
// returning normally is the whole point of -fsanitize-minimal-runtime
|
||||||
// + -fsanitize-recover; this probe is what proves the round-trip.
|
// + -fsanitize-recover; this probe is what proves the round-trip.
|
||||||
//
|
//
|
||||||
// To verify all three at once we cascade the sentinel writes through a
|
// To verify all nine at once we cascade the sentinel writes through a
|
||||||
// staircase of $025000 / $025002 / $025004 word stores so the smoke
|
// staircase of word stores so the smoke harness can read independent
|
||||||
// harness can read three independent 16-bit values back from MAME.
|
// 16-bit values back from MAME.
|
||||||
//
|
//
|
||||||
// Compile with -fsanitize=undefined -fsanitize-minimal-runtime.
|
// Compile with -fsanitize=undefined -fsanitize-minimal-runtime.
|
||||||
|
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
|
|
||||||
// Bank-2 BSS at $025000-$025006 — outside the SHR shadow and outside
|
// Bank-2 BSS at $025000-$025014 -- outside the SHR shadow and outside
|
||||||
// $C000-$CFFF IO window. link816 places .bss at the user-specified
|
// $C000-$CFFF IO window. link816 places .bss at the user-specified
|
||||||
// --bss-base (we pass 0xA000) so these constant addresses are
|
// --bss-base (we pass 0xA000) so these constant addresses are
|
||||||
// independent of BSS layout.
|
// independent of BSS layout.
|
||||||
#define MARK_ADD_OVF ((volatile uint16_t *)0x025000UL)
|
#define MARK_ADD_OVF ((volatile uint16_t *)0x025000UL)
|
||||||
#define MARK_SHIFT_OOB ((volatile uint16_t *)0x025002UL)
|
#define MARK_SHIFT_OOB ((volatile uint16_t *)0x025002UL)
|
||||||
#define MARK_DIV_ZERO ((volatile uint16_t *)0x025004UL)
|
#define MARK_DIV_ZERO ((volatile uint16_t *)0x025004UL)
|
||||||
#define DONE_SENTINEL ((volatile uint16_t *)0x025006UL)
|
#define MARK_SUB_OVF ((volatile uint16_t *)0x025006UL)
|
||||||
|
#define MARK_MUL_OVF ((volatile uint16_t *)0x025008UL)
|
||||||
|
#define MARK_NEG_OVF ((volatile uint16_t *)0x02500AUL)
|
||||||
|
#define MARK_PTR_OVF ((volatile uint16_t *)0x02500CUL)
|
||||||
|
#define MARK_LOAD_INVAL ((volatile uint16_t *)0x02500EUL)
|
||||||
|
#define MARK_OUT_OF_BNDS ((volatile uint16_t *)0x025010UL)
|
||||||
|
#define DONE_SENTINEL ((volatile uint16_t *)0x025012UL)
|
||||||
|
|
||||||
|
|
||||||
// Strong overrides win over runtime/ubsan.o's weak-by-link defaults.
|
// Strong overrides win over runtime/ubsan.o's weak-by-link defaults.
|
||||||
// Each fires once per kind and records that the corresponding UB
|
// Each fires once per kind and records that the corresponding UB
|
||||||
// instrumentation reached us. Recovering handlers MUST return so the
|
// instrumentation reached us. Recovering handlers MUST return so the
|
||||||
// probe continues executing past the UB site.
|
// probe continues executing past the UB site.
|
||||||
static volatile uint8_t handlerFiredAdd = 0;
|
static volatile uint8_t handlerFiredAdd = 0;
|
||||||
static volatile uint8_t handlerFiredShift = 0;
|
static volatile uint8_t handlerFiredShift = 0;
|
||||||
static volatile uint8_t handlerFiredDiv = 0;
|
static volatile uint8_t handlerFiredDiv = 0;
|
||||||
|
static volatile uint8_t handlerFiredSub = 0;
|
||||||
|
static volatile uint8_t handlerFiredMul = 0;
|
||||||
|
static volatile uint8_t handlerFiredNeg = 0;
|
||||||
|
static volatile uint8_t handlerFiredPtr = 0;
|
||||||
|
static volatile uint8_t handlerFiredLoadInv = 0;
|
||||||
|
static volatile uint8_t handlerFiredOob = 0;
|
||||||
|
|
||||||
|
|
||||||
void __ubsan_handle_add_overflow_minimal(void) {
|
void __ubsan_handle_add_overflow_minimal(void) {
|
||||||
|
|
@ -56,6 +74,36 @@ void __ubsan_handle_divrem_overflow_minimal(void) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void __ubsan_handle_sub_overflow_minimal(void) {
|
||||||
|
handlerFiredSub = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void __ubsan_handle_mul_overflow_minimal(void) {
|
||||||
|
handlerFiredMul = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void __ubsan_handle_negate_overflow_minimal(void) {
|
||||||
|
handlerFiredNeg = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void __ubsan_handle_pointer_overflow_minimal(void) {
|
||||||
|
handlerFiredPtr = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void __ubsan_handle_load_invalid_value_minimal(void) {
|
||||||
|
handlerFiredLoadInv = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void __ubsan_handle_out_of_bounds_minimal(void) {
|
||||||
|
handlerFiredOob = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Each UB site goes through a noinline wrapper so the optimizer
|
// Each UB site goes through a noinline wrapper so the optimizer
|
||||||
// cannot constant-fold the operation away. __attribute__((noinline))
|
// cannot constant-fold the operation away. __attribute__((noinline))
|
||||||
// + volatile inputs blocks the obvious folding paths; we also wrap
|
// + volatile inputs blocks the obvious folding paths; we also wrap
|
||||||
|
|
@ -79,6 +127,47 @@ static int16_t triggerDivByZero(int16_t a, int16_t b) {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
|
static int16_t triggerSubOverflow(int16_t a, int16_t b) {
|
||||||
|
return a - b;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
|
static int16_t triggerMulOverflow(int16_t a, int16_t b) {
|
||||||
|
return a * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
|
static int16_t triggerNegateOverflow(int16_t a) {
|
||||||
|
return -a;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
|
static char *triggerPointerOverflow(char *p, int32_t o) {
|
||||||
|
return p + o;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
|
static int triggerLoadInvalidValue(volatile uint8_t *p) {
|
||||||
|
_Bool v = *(_Bool *)p;
|
||||||
|
// Use the value so the load isn't dead-stripped. We don't trust
|
||||||
|
// the post-instrumentation cast to a 0/1 narrow value -- the
|
||||||
|
// important thing is the load itself fired the handler.
|
||||||
|
return v ? 1 : 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
__attribute__((noinline))
|
||||||
|
static int16_t triggerOutOfBounds(int16_t idx) {
|
||||||
|
static int16_t arr[4] = { 10, 20, 30, 40 };
|
||||||
|
return arr[idx];
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
int main(void) {
|
int main(void) {
|
||||||
// --- case 0: signed-overflow add (INT16_MAX + 1) ---
|
// --- case 0: signed-overflow add (INT16_MAX + 1) ---
|
||||||
volatile int16_t aMax = 0x7FFF;
|
volatile int16_t aMax = 0x7FFF;
|
||||||
|
|
@ -104,12 +193,58 @@ int main(void) {
|
||||||
*MARK_DIV_ZERO = 0xC0E0;
|
*MARK_DIV_ZERO = 0xC0E0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final liveness sentinel — only written if we got past all three
|
// --- case 3: sub-overflow (INT16_MIN - 1) ---
|
||||||
|
volatile int16_t aMin = (int16_t)0x8000;
|
||||||
|
(void)triggerSubOverflow(aMin, aOne);
|
||||||
|
if (handlerFiredSub) {
|
||||||
|
*MARK_SUB_OVF = 0xC0E1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- case 4: mul-overflow (INT16_MAX * 2 wraps) ---
|
||||||
|
volatile int16_t aTwo = 2;
|
||||||
|
(void)triggerMulOverflow(aMax, aTwo);
|
||||||
|
if (handlerFiredMul) {
|
||||||
|
*MARK_MUL_OVF = 0xC0E2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- case 5: negate-overflow (-INT16_MIN) ---
|
||||||
|
(void)triggerNegateOverflow(aMin);
|
||||||
|
if (handlerFiredNeg) {
|
||||||
|
*MARK_NEG_OVF = 0xC0E3;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- case 6: pointer-overflow (signed-wrap on i16 addr) ---
|
||||||
|
// Cast a high address to char* and add a positive offset that
|
||||||
|
// overflows the address calculation. -fsanitize=pointer-overflow
|
||||||
|
// fires on signed-overflow of the offset add.
|
||||||
|
volatile uint32_t hiAddr = 0xFFFFFFF0UL;
|
||||||
|
volatile int32_t big = 0x40;
|
||||||
|
char *p = (char *)(uintptr_t)hiAddr;
|
||||||
|
(void)triggerPointerOverflow(p, big);
|
||||||
|
if (handlerFiredPtr) {
|
||||||
|
*MARK_PTR_OVF = 0xC0E4;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- case 7: load-invalid-value (_Bool from byte=2) ---
|
||||||
|
volatile uint8_t boolByte = 2;
|
||||||
|
(void)triggerLoadInvalidValue(&boolByte);
|
||||||
|
if (handlerFiredLoadInv) {
|
||||||
|
*MARK_LOAD_INVAL = 0xC0E5;
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- case 8: out-of-bounds (static arr[idx>=N]) ---
|
||||||
|
volatile int16_t badIdx = 7;
|
||||||
|
(void)triggerOutOfBounds(badIdx);
|
||||||
|
if (handlerFiredOob) {
|
||||||
|
*MARK_OUT_OF_BNDS = 0xC0E6;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Final liveness sentinel -- only written if we got past all nine
|
||||||
// UB sites without the runtime aborting (which would have spun on
|
// UB sites without the runtime aborting (which would have spun on
|
||||||
// a BRK_pseudo at $70 instead of reaching here).
|
// a BRK_pseudo at $70 instead of reaching here).
|
||||||
*DONE_SENTINEL = 0xC0DA;
|
*DONE_SENTINEL = 0xC0DA;
|
||||||
|
|
||||||
// Halt — crt0's return-from-main path hits a BRK that headless
|
// Halt -- crt0's return-from-main path hits a BRK that headless
|
||||||
// MAME wild-jumps from, so spin-wait instead.
|
// MAME wild-jumps from, so spin-wait instead.
|
||||||
while (1) {
|
while (1) {
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@
|
||||||
"num": 1,
|
"num": 1,
|
||||||
"name": "SEG1",
|
"name": "SEG1",
|
||||||
"base": "0x001000",
|
"base": "0x001000",
|
||||||
"size": 3432,
|
"size": 5084,
|
||||||
"image": "ubsanProbe.bin",
|
"image": "ubsanProbe.bin",
|
||||||
"entry_offset": "0x0000"
|
"entry_offset": "0x0000"
|
||||||
}
|
}
|
||||||
|
|
@ -22,6 +22,12 @@
|
||||||
{"addr": "0x025000", "expect": "0xC0DE", "label": "add-overflow handler fired"},
|
{"addr": "0x025000", "expect": "0xC0DE", "label": "add-overflow handler fired"},
|
||||||
{"addr": "0x025002", "expect": "0xC0DF", "label": "shift-out-of-bounds handler fired"},
|
{"addr": "0x025002", "expect": "0xC0DF", "label": "shift-out-of-bounds handler fired"},
|
||||||
{"addr": "0x025004", "expect": "0xC0E0", "label": "divrem-overflow handler fired"},
|
{"addr": "0x025004", "expect": "0xC0E0", "label": "divrem-overflow handler fired"},
|
||||||
{"addr": "0x025006", "expect": "0xC0DA", "label": "main reached tail after all three recoveries"}
|
{"addr": "0x025006", "expect": "0xC0E1", "label": "sub-overflow handler fired"},
|
||||||
|
{"addr": "0x025008", "expect": "0xC0E2", "label": "mul-overflow handler fired"},
|
||||||
|
{"addr": "0x02500A", "expect": "0xC0E3", "label": "negate-overflow handler fired"},
|
||||||
|
{"addr": "0x02500C", "expect": "0xC0E4", "label": "pointer-overflow handler fired"},
|
||||||
|
{"addr": "0x02500E", "expect": "0xC0E5", "label": "load-invalid-value handler fired"},
|
||||||
|
{"addr": "0x025010", "expect": "0xC0E6", "label": "out-of-bounds handler fired"},
|
||||||
|
{"addr": "0x025012", "expect": "0xC0DA", "label": "main reached tail after all nine recoveries"}
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue