65816-llvm-mos/src/llvm/lib/Target/W65816/W65816StackSlotMerge.cpp
Scott Duensing 42f0d16d07 Checkpoint
2026-05-13 20:54:28 -05:00

733 lines
28 KiB
C++

//===-- W65816StackSlotMerge.cpp - Merge value-equivalent stack slots ----===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===---------------------------------------------------------------------===//
//
// Pre-emit pass that runs after PEI (eliminateFrameIndex) and merges
// pairs of stack-rel slots that hold the same value at every observable
// program point — typically the PHI src/dst pair PHI-elim leaves at
// the back-edge of a loop body.
//
// LLVM's StackSlotColoring merges slots with non-overlapping liveness.
// It can't merge slots that are simultaneously live but happen to hold
// the same value (which is what a PHI memory-copy creates). This pass
// catches that case via a stricter "value equivalence" check.
//
// Canonical pattern (sumSquares loop body):
//
// .LBB0_4:
// LDA 0x7, s ; PHA ; JSL __umulhisi3 ; PLY
// CLC ; ADC 0x3, s ; STA 0xb, s ; new total.lo (write X)
// TXA ; ADC 0x1, s ; STA 0x9, s
// LDA 0x7, s ; INC A ; STA 0x7, s
// LDA 0xb, s ; STA 0x3, s ; PHI copy: load X, store Y
// LDA 0x9, s ; STA 0x1, s
// ...
//
// The pair (0xb, 0x3) is the lo-half PHI memory copy. Slots 0xb and
// 0x3 always hold the same value at every read site:
// - Function entry: both initialized to 0 (`lda #0; sta 0xb, s` in
// entry, `lda #0; sta 0x3, s` in preheader).
// - Loop iteration: the PHI copy moves the new total.lo from 0xb to
// 0x3 at the end of every iteration.
// - Exit: only 0xb is read (return value), but its value equals 0x3's.
//
// Rename 0xb → 0x3 function-wide; the now self-copy `lda 0x3; sta 0x3`
// is dead and we erase it. Saves 2 inst per PHI copy occurrence (the
// memory copy round-trip). sumSquares loop body shrinks from 21 to
// 17 inst per iter.
//
// Safety check (sufficient condition for value equivalence):
// 1. Both slots have ≥1 STA in the function (skips arg slots passed
// by the caller — those have only LDA reads, no STAs, and renaming
// would change where we read the arg from).
// 2. For every STA X in the function, find a "twin" STA Y at a
// program point where the values match. Matching = either:
// (a) Same MBB, same A-source value (no intervening A-define).
// Covers the loop-body iter-end pattern: STA X then later
// LDA X ; STA Y. Also covers entry's `lda #N ; sta X` if
// the same MBB also has `sta Y`.
// (b) Different MBBs, both preceded by `LDA #const` of the same
// constant. Covers entry-block STA X=0 paired with
// preheader STA Y=0.
// 3. Symmetric: for every STA Y, find a twin STA X.
// 4. No "orphan" STAs. If a STA X or STA Y has no twin, bail.
//
// When all checks pass, the rename function-wide preserves semantics:
// every read of slot X at program point P sees the same value that
// slot Y holds at P (and vice versa).
//
//===---------------------------------------------------------------------===//
#include "W65816.h"
#include "W65816InstrInfo.h"
#include "W65816Subtarget.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "w65816-stack-slot-merge"
namespace {
class W65816StackSlotMerge : public MachineFunctionPass {
public:
static char ID;
W65816StackSlotMerge() : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "W65816 merge value-equivalent stack slots (PHI-copy collapse)";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<MachineDominatorTreeWrapperPass>();
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
bool runOnMachineFunction(MachineFunction &MF) override;
};
} // namespace
char W65816StackSlotMerge::ID = 0;
INITIALIZE_PASS_BEGIN(W65816StackSlotMerge, DEBUG_TYPE,
"W65816 stack slot merge", false, false)
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
INITIALIZE_PASS_END(W65816StackSlotMerge, DEBUG_TYPE,
"W65816 stack slot merge", false, false)
FunctionPass *llvm::createW65816StackSlotMerge() {
return new W65816StackSlotMerge();
}
// Stack-relative MC opcodes — the ops that survive eliminateFrameIndex
// and reference a slot via an 8-bit SP-relative offset.
static bool isStackRelOp(unsigned Op) {
return Op == W65816::LDA_StackRel || Op == W65816::STA_StackRel ||
Op == W65816::ADC_StackRel || Op == W65816::SBC_StackRel ||
Op == W65816::AND_StackRel || Op == W65816::ORA_StackRel ||
Op == W65816::EOR_StackRel || Op == W65816::CMP_StackRel;
}
// Returns true if MI is a stack-rel op; out-param Off receives the slot
// offset (operand 0).
static bool srAccess(const MachineInstr &MI, int64_t &Off) {
if (!isStackRelOp(MI.getOpcode())) return false;
if (MI.getNumOperands() < 1 || !MI.getOperand(0).isImm()) return false;
Off = MI.getOperand(0).getImm();
return true;
}
// True if the MI semantically defines A. Covers both the explicit
// case (operand has reg=A,isDef) AND the implicit case where the
// tablegen InstDP / InstAbs / etc. base classes omit the A-Def
// annotation despite LDA semantically writing A (a backend modelling
// gap — many `LDA_DP`, `LDA_Abs`, `LDA_LongX`, etc. are missing the
// implicit-def in the MIR even though they load into A). Opcode-
// based fallback catches all of them.
static bool semanticallyDefsA(const MachineInstr &MI) {
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg() == W65816::A && MO.isDef())
return true;
}
unsigned Op = MI.getOpcode();
switch (Op) {
case W65816::LDA_DP: case W65816::LDA_DPX:
case W65816::LDA_DPInd: case W65816::LDA_DPIndY:
case W65816::LDA_DPIndX:
case W65816::LDA_Abs: case W65816::LDA_AbsX:
case W65816::LDA_AbsY: case W65816::LDA_Long:
case W65816::LDA_LongX:
case W65816::PLA:
return true;
default:
return false;
}
}
// Walk backward from MI in its MBB looking for the most recent A-define.
// Returns the MI that defines A, or nullptr if none in the same MBB.
// Skips debug instructions. Stops at MBB boundary, calls, branches,
// inline asm.
static MachineInstr *findPriorADef(MachineInstr *MI) {
MachineBasicBlock *MBB = MI->getParent();
auto It = MI->getIterator();
while (It != MBB->begin()) {
--It;
if (It->isDebugInstr()) continue;
if (It->isCall() || It->isInlineAsm()) return nullptr;
if (semanticallyDefsA(*It)) return &*It;
}
return nullptr;
}
// Walk forward from `Start` (exclusive) up to (but not including) `End`
// in the same MBB, tracking whether slot `WatchSlot` is written.
// Returns true if slot `WatchSlot` is NOT written in the interval.
static bool slotNotWrittenBetween(MachineBasicBlock::iterator Start,
MachineBasicBlock::iterator End,
int64_t WatchSlot) {
for (auto It = std::next(Start); It != End; ++It) {
if (It->isDebugInstr()) continue;
int64_t Off;
if (It->getOpcode() == W65816::STA_StackRel && srAccess(*It, Off) &&
Off == WatchSlot) {
return false;
}
}
return true;
}
// Returns true if MI clobbers P (N/Z/C/V flags). Mirrors LLVM's
// operand-based check + an opcode whitelist for tablegen entries that
// omit `Defs = [P]` (InstImplied, InstStackRel, etc.).
static bool clobbersFlagsP(const MachineInstr &MI) {
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg() == W65816::P && MO.isDef())
return true;
}
if (MI.isCall() || MI.isInlineAsm()) return true;
unsigned Op = MI.getOpcode();
switch (Op) {
case W65816::PLA: case W65816::PLY: case W65816::PLX:
case W65816::PLP:
case W65816::INA: case W65816::DEA:
case W65816::INX: case W65816::DEX:
case W65816::INY: case W65816::DEY:
case W65816::TAX: case W65816::TAY:
case W65816::TYA: case W65816::TXA:
case W65816::TYX: case W65816::TXY:
case W65816::LDA_StackRel: case W65816::LDA_DP:
case W65816::LDA_DPX: case W65816::LDA_DPInd:
case W65816::LDA_DPIndY: case W65816::LDA_DPIndX:
case W65816::LDA_Abs: case W65816::LDA_AbsX:
case W65816::LDA_AbsY: case W65816::LDA_Long:
case W65816::LDA_LongX:
case W65816::ADC_StackRel: case W65816::SBC_StackRel:
case W65816::CMP_StackRel: case W65816::AND_StackRel:
case W65816::ORA_StackRel: case W65816::EOR_StackRel:
case W65816::ADC_DP: case W65816::ADC_Abs:
case W65816::SBC_DP: case W65816::SBC_Abs:
case W65816::CMP_DP: case W65816::CMP_Abs:
case W65816::AND_DP: case W65816::AND_Abs:
case W65816::ORA_DP: case W65816::ORA_Abs:
case W65816::EOR_DP: case W65816::EOR_Abs:
return true;
default:
return false;
}
}
// Returns true if MI reads P flags (conditional branches, PLP, etc.).
static bool usesFlagsP(const MachineInstr &MI) {
if (MI.isConditionalBranch()) return true;
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg() == W65816::P && MO.isUse() &&
!MO.isDef())
return true;
}
return false;
}
// Returns the MOST RECENT A-defining MI strictly before MI in its MBB,
// skipping debug instructions. Returns nullptr if none in the same MBB.
static MachineInstr *findMostRecentADef(MachineInstr *MI) {
MachineBasicBlock *MBB = MI->getParent();
auto It = MI->getIterator();
while (It != MBB->begin()) {
--It;
if (It->isDebugInstr()) continue;
if (semanticallyDefsA(*It)) return &*It;
}
return nullptr;
}
// "Twin" check. Given a STA X at position StaX and a candidate slot Y,
// scan the function's STA Y instances and return one that's value-
// equivalent under the rules described in the header comment.
//
// Source-value equivalence cases:
// (1) Same-MBB twin store: no A-define between StaX and the candidate
// StaY → both store the same A value. Pure twin pattern.
// (2) Same-MBB PHI-copy: the candidate StaY is preceded by
// `LDA_StackRel slotX` (PHI-copy reload). Even if many A-defines
// sit between StaX and StaY, the LDA X re-establishes A =
// slot[X] = value StaX wrote (assuming slot X wasn't re-written
// in the gap).
// (3) Different MBBs, both preceded by LDA_Imm16 / LDAi16imm of the
// same constant. Covers entry/preheader init parallel pair.
static MachineInstr *findTwin(MachineInstr *StaX,
ArrayRef<MachineInstr *> StasY) {
MachineBasicBlock *MBBStaX = StaX->getParent();
int64_t XOff = StaX->getOperand(0).getImm();
// Cases (1) + (2): same MBB.
for (MachineInstr *StaY : StasY) {
if (StaY->getParent() != MBBStaX) continue;
// Determine ordering.
MachineInstr *Earlier = nullptr;
MachineInstr *Later = nullptr;
for (auto It = MBBStaX->begin(); It != MBBStaX->end(); ++It) {
if (&*It == StaX) { Earlier = StaX; Later = StaY; break; }
if (&*It == StaY) { Earlier = StaY; Later = StaX; break; }
}
if (!Earlier || !Later) continue;
int64_t EOff = Earlier->getOperand(0).getImm();
// Case (2): if Later is preceded by `LDA_StackRel <Earlier's slot>`
// (the PHI-copy reload), it's a PHI twin. Also require slot
// Earlier-slot wasn't re-written between Earlier and Later.
MachineInstr *PriorOfLater = findMostRecentADef(Later);
if (PriorOfLater) {
int64_t Off;
if (PriorOfLater->getOpcode() == W65816::LDA_StackRel &&
srAccess(*PriorOfLater, Off) && Off == EOff &&
slotNotWrittenBetween(Earlier->getIterator(),
PriorOfLater->getIterator(), EOff)) {
return StaY;
}
}
// Case (1): no A-define between Earlier and Later — same A value.
{
bool noADefs = true;
for (auto It = std::next(Earlier->getIterator());
It != Later->getIterator(); ++It) {
if (It->isDebugInstr()) continue;
if (semanticallyDefsA(*It)) { noADefs = false; break; }
}
if (noADefs) return StaY;
}
}
// Case (3): different MBBs, both preceded by LDA_Imm16 / LDAi16imm
// with the same constant.
MachineInstr *PriorX = findPriorADef(StaX);
if (!PriorX) return nullptr;
unsigned PriorXOp = PriorX->getOpcode();
if (PriorXOp != W65816::LDA_Imm16 && PriorXOp != W65816::LDAi16imm)
return nullptr;
int64_t XConst = 0;
for (const MachineOperand &MO : PriorX->operands()) {
if (MO.isImm()) { XConst = MO.getImm(); break; }
}
for (MachineInstr *StaY : StasY) {
if (StaY->getParent() == MBBStaX) continue;
MachineInstr *PriorY = findPriorADef(StaY);
if (!PriorY) continue;
if (PriorY->getOpcode() != PriorXOp) continue;
int64_t YConst = 0;
for (const MachineOperand &MO : PriorY->operands()) {
if (MO.isImm()) { YConst = MO.getImm(); break; }
}
if (XConst == YConst) return StaY;
}
(void)XOff;
return nullptr;
}
// Run Phase 6a + Phase 6 (per-MBB peepholes) — independent of rename
// logic, so they fire on every function. Returns true if anything
// changed.
static bool runPerMBBPeepholes(MachineFunction &MF) {
bool Changed = false;
// Phase 6a: redundant `STA Y, s` immediately followed by `LDA Y, s`.
for (MachineBasicBlock &MBB : MF) {
SmallVector<MachineInstr *, 4> Dead;
for (auto It = MBB.begin(); It != MBB.end(); ++It) {
if (It->isDebugInstr()) continue;
if (It->getOpcode() != W65816::STA_StackRel) continue;
int64_t StaSlot;
if (!srAccess(*It, StaSlot)) continue;
auto NextIt = std::next(It);
while (NextIt != MBB.end() && NextIt->isDebugInstr()) ++NextIt;
if (NextIt == MBB.end()) continue;
if (NextIt->getOpcode() != W65816::LDA_StackRel) continue;
int64_t LdaSlot;
if (!srAccess(*NextIt, LdaSlot)) continue;
if (StaSlot != LdaSlot) continue;
bool flagsSafe = false;
bool aIsUsedBeforeClobber = false;
for (auto Fwd = std::next(NextIt); Fwd != MBB.end(); ++Fwd) {
if (Fwd->isDebugInstr()) continue;
// Calls/JSLs that take A as arg — even though clobbersFlagsP
// returns true for them, the elimination could mis-track A's
// live-in to the call. Bail.
if (Fwd->isCall()) break;
// Generic: any instr that has `implicit $a` as a USE — A is
// live going in. Bail to avoid live-range trouble.
for (const MachineOperand &MO : Fwd->operands()) {
if (MO.isReg() && MO.getReg() == W65816::A && MO.isUse() &&
!MO.isDef()) {
aIsUsedBeforeClobber = true;
break;
}
}
if (aIsUsedBeforeClobber) break;
if (usesFlagsP(*Fwd)) break;
if (Fwd->isTerminator() && !Fwd->isConditionalBranch()) {
flagsSafe = true; break;
}
if (clobbersFlagsP(*Fwd)) { flagsSafe = true; break; }
}
if (!flagsSafe) continue;
Dead.push_back(&*NextIt);
}
for (MachineInstr *MI : Dead) {
MI->eraseFromParent();
Changed = true;
}
}
// Phase 6: per-MBB redundant `LDA #K` elimination.
auto isAandPPreserving = [](const MachineInstr &MI) -> bool {
unsigned Op = MI.getOpcode();
switch (Op) {
case W65816::STA_StackRel:
case W65816::STA_DP: case W65816::STA_DPX:
case W65816::STA_DPInd: case W65816::STA_DPIndY:
case W65816::STA_DPIndX:
case W65816::STA_Abs: case W65816::STA_AbsX:
case W65816::STA_AbsY: case W65816::STA_Long:
case W65816::STA_LongX:
case W65816::STX_DP: case W65816::STX_Abs:
case W65816::STY_DP: case W65816::STY_Abs: case W65816::STY_DPX:
case W65816::STZ_DP: case W65816::STZ_Abs:
case W65816::STZ_DPX: case W65816::STZ_AbsX:
return true;
default:
break;
}
for (const MachineOperand &MO : MI.operands()) {
if (MO.isReg() && MO.getReg() == W65816::P && MO.isDef())
return false;
}
if (MI.mayStore() && !MI.mayLoad() && !semanticallyDefsA(MI))
return true;
return false;
};
auto isLdaImmK = [](const MachineInstr &MI, int64_t &K) -> bool {
unsigned Op = MI.getOpcode();
if (Op != W65816::LDA_Imm16 && Op != W65816::LDAi16imm) return false;
for (const MachineOperand &MO : MI.operands()) {
if (MO.isImm()) { K = MO.getImm(); return true; }
}
return false;
};
for (MachineBasicBlock &MBB : MF) {
std::optional<int64_t> KnownK;
SmallVector<MachineInstr *, 4> Dead;
for (auto It = MBB.begin(); It != MBB.end(); ++It) {
if (It->isDebugInstr()) continue;
int64_t K;
if (isLdaImmK(*It, K)) {
if (KnownK && *KnownK == K) {
Dead.push_back(&*It);
continue;
}
KnownK = K;
continue;
}
if (isAandPPreserving(*It)) continue;
KnownK.reset();
}
for (MachineInstr *MI : Dead) {
MI->eraseFromParent();
Changed = true;
}
}
return Changed;
}
bool W65816StackSlotMerge::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction())) return false;
if (MF.getFunction().hasOptNone()) return false;
// Run per-MBB peepholes first — independent of rename logic.
bool peepChanged = runPerMBBPeepholes(MF);
// Phase 1: index all stack-rel STA/LDA grouped by slot offset.
DenseMap<int64_t, SmallVector<MachineInstr *, 4>> Stas;
DenseMap<int64_t, SmallVector<MachineInstr *, 4>> Ldas;
DenseMap<int64_t, unsigned> AllRefs; // STA + LDA + ADC + ... count
for (MachineBasicBlock &MBB : MF) {
for (MachineInstr &MI : MBB) {
int64_t Off;
if (!srAccess(MI, Off)) continue;
AllRefs[Off]++;
if (MI.getOpcode() == W65816::STA_StackRel) {
Stas[Off].push_back(&MI);
} else if (MI.getOpcode() == W65816::LDA_StackRel) {
Ldas[Off].push_back(&MI);
}
}
}
// Phase 2: find PHI-copy site candidates. Pattern: LDA X ; STA Y
// in a LOOP BODY MBB (= the MBB has itself as a predecessor, i.e.
// a self-loop back-edge). Restricting to loop bodies distinguishes
// genuine PHI-cycle copies from one-shot temp transfers (where
// slot X is just a scratch register dropped on the way to slot Y
// for an unrelated purpose, like qsortIter's pointer-construction
// pattern `STA 5; ...; LDA 5; STA 39` followed by `LDA 39; STA dp`).
DenseMap<int64_t, int64_t> PhiCopyPair; // X -> Y
for (MachineBasicBlock &MBB : MF) {
// Self-loop check: MBB must have itself as a predecessor.
bool selfLoop = false;
for (MachineBasicBlock *Pred : MBB.predecessors()) {
if (Pred == &MBB) { selfLoop = true; break; }
}
if (!selfLoop) continue;
for (auto It = MBB.begin(); It != MBB.end(); ++It) {
if (It->getOpcode() != W65816::LDA_StackRel) continue;
int64_t X;
if (!srAccess(*It, X)) continue;
auto NextIt = std::next(It);
while (NextIt != MBB.end() && NextIt->isDebugInstr()) ++NextIt;
if (NextIt == MBB.end()) continue;
if (NextIt->getOpcode() != W65816::STA_StackRel) continue;
int64_t Y;
if (!srAccess(*NextIt, Y) || Y == X) continue;
if (PhiCopyPair.count(X)) continue;
PhiCopyPair[X] = Y;
}
}
// Phase 3: validate each pair and apply rename if safe.
// Track which slots have already been merged so we don't double-merge.
DenseMap<int64_t, int64_t> Renames; // X -> Y
for (auto &P : PhiCopyPair) {
int64_t X = P.first, Y = P.second;
// Don't re-merge an already-processed slot.
if (Renames.count(X) || Renames.count(Y)) continue;
// Arg-slot guard: skip slots with no STAs (caller-passed args).
if (Stas[X].empty() || Stas[Y].empty()) continue;
// Validate that every STA X has a twin STA Y.
bool allPaired = true;
for (MachineInstr *StaX : Stas[X]) {
if (!findTwin(StaX, Stas[Y])) { allPaired = false; break; }
}
if (!allPaired) continue;
// Symmetric: every STA Y must have a twin STA X.
for (MachineInstr *StaY : Stas[Y]) {
if (!findTwin(StaY, Stas[X])) { allPaired = false; break; }
}
if (!allPaired) continue;
LLVM_DEBUG(dbgs() << "StackSlotMerge: rename slot " << X
<< " -> " << Y << " in " << MF.getName() << "\n");
Renames[X] = Y;
}
if (Renames.empty()) return false;
// Phase 4: apply rename.
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
SmallVector<MachineInstr *, 4> ToErase;
for (MachineInstr &MI : MBB) {
int64_t Off;
if (!srAccess(MI, Off)) continue;
auto It = Renames.find(Off);
if (It == Renames.end()) continue;
MI.getOperand(0).setImm(It->second);
Changed = true;
}
// After rename, look for now-redundant LDA-STA pairs to the same
// slot (the PHI-copy self-copy). Erase them.
for (auto It = MBB.begin(); It != MBB.end(); ++It) {
if (It->getOpcode() != W65816::LDA_StackRel) continue;
int64_t LdaOff;
if (!srAccess(*It, LdaOff)) continue;
auto NextIt = std::next(It);
while (NextIt != MBB.end() && NextIt->isDebugInstr()) ++NextIt;
if (NextIt == MBB.end()) continue;
if (NextIt->getOpcode() != W65816::STA_StackRel) continue;
int64_t StaOff;
if (!srAccess(*NextIt, StaOff)) continue;
if (LdaOff != StaOff) continue;
ToErase.push_back(&*It);
ToErase.push_back(&*NextIt);
}
for (MachineInstr *MI : ToErase) MI->eraseFromParent();
if (!ToErase.empty()) Changed = true;
}
// Phase 5: redundant constant-init elimination. After rename, the
// Case (3) twin pairings leave us with TWO sites writing the same
// constant to the same slot (one renamed from X to Y, the other was
// already targeting Y). The dominated one is redundant — its slot
// already holds the constant from the dominating write.
//
// Generalize: scan post-rename for ALL `LDA_Imm16 K ; STA_StackRel Y`
// pairs (or LDAi16imm K; STA Y). For each pair, look for another
// such pair with the same (K, Y) where one DOMINATES the other AND
// no slot-Y access exists on any path between them. Erase the
// dominated STA + its preceding LDA (if A isn't otherwise consumed).
{
auto isLdaImm = [](const MachineInstr &MI) {
unsigned Op = MI.getOpcode();
return Op == W65816::LDA_Imm16 || Op == W65816::LDAi16imm;
};
auto immValue = [](const MachineInstr &MI) -> int64_t {
for (const MachineOperand &MO : MI.operands()) {
if (MO.isImm()) return MO.getImm();
}
return 0;
};
// Collect `LDA #K ; STA_StackRel Y` pairs, grouped by Y.
DenseMap<int64_t, SmallVector<std::pair<MachineInstr *, int64_t>, 4>>
ConstStas;
for (MachineBasicBlock &MBB : MF) {
for (auto It = MBB.begin(); It != MBB.end(); ++It) {
if (!isLdaImm(*It)) continue;
int64_t K = immValue(*It);
auto NextIt = std::next(It);
while (NextIt != MBB.end() && NextIt->isDebugInstr()) ++NextIt;
if (NextIt == MBB.end()) continue;
if (NextIt->getOpcode() != W65816::STA_StackRel) continue;
int64_t Y;
if (!srAccess(*NextIt, Y)) continue;
ConstStas[Y].push_back({&*NextIt, K});
}
}
// For each slot Y with at least two const-init STAs, check for
// dominator redundancy.
auto &MDT = getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
// Check that no instruction WRITES slot Y on any path between
// From and To. Reads are fine because both From and To write
// the same constant K — any intermediate read would see K either
// way (since From dominates, From has already executed). Calls
// are bailout conditions: a call might write to the stack via
// address-taken locals or other side effects we don't model.
auto noSlotWriteOnPath = [&](MachineInstr *From, MachineInstr *To,
int64_t Y) -> bool {
MachineBasicBlock *FromMBB = From->getParent();
MachineBasicBlock *ToMBB = To->getParent();
auto opWritesY = [&](MachineInstr &MI) {
if (MI.isCall() || MI.isInlineAsm()) return true;
int64_t Off;
if (MI.getOpcode() == W65816::STA_StackRel &&
srAccess(MI, Off) && Off == Y) {
return true;
}
return false;
};
// (a) After From in its MBB.
for (auto It = std::next(From->getIterator()); It != FromMBB->end();
++It) {
if (It->isDebugInstr()) continue;
if (opWritesY(*It)) return false;
}
// (b) BFS forward from FromMBB's successors, stopping at ToMBB.
SmallPtrSet<MachineBasicBlock *, 8> Visited;
SmallVector<MachineBasicBlock *, 8> Stack;
for (auto *Succ : FromMBB->successors()) Stack.push_back(Succ);
while (!Stack.empty()) {
auto *MBB = Stack.pop_back_val();
if (MBB == ToMBB) continue; // checked separately in (c)
if (!Visited.insert(MBB).second) continue;
for (auto &MI : *MBB) {
if (MI.isDebugInstr()) continue;
if (opWritesY(MI)) return false;
}
for (auto *Succ : MBB->successors()) Stack.push_back(Succ);
}
// (c) In ToMBB, before To, any write of Y?
for (auto It = ToMBB->begin(); It != To->getIterator(); ++It) {
if (It->isDebugInstr()) continue;
if (opWritesY(*It)) return false;
}
return true;
};
SmallVector<MachineInstr *, 8> ToErase;
LLVM_DEBUG({
dbgs() << "Phase 5 in " << MF.getName() << ":\n";
for (auto &P : ConstStas) {
dbgs() << " slot " << P.first << " has " << P.second.size()
<< " const STAs\n";
}
});
for (auto &P : ConstStas) {
int64_t Y = P.first;
auto &stas = P.second;
if (stas.size() < 2) continue;
// For each pair (i, j) where i dominates j with same constant K:
for (auto &Sj : stas) {
MachineInstr *DominatedSta = Sj.first;
int64_t Kj = Sj.second;
for (auto &Si : stas) {
if (&Si == &Sj) continue;
if (Si.second != Kj) continue; // different K
MachineInstr *DominatorSta = Si.first;
if (!MDT.dominates(DominatorSta, DominatedSta)) continue;
if (!noSlotWriteOnPath(DominatorSta, DominatedSta, Y)) continue;
// Flag safety: erasing `LDA #K; STA Y` removes a flag-setting
// op (the LDA). Walk forward from the STA looking for next
// flag-clobber or unconditional terminator (safe) vs.
// flag-use (unsafe).
MachineBasicBlock *MBB = DominatedSta->getParent();
bool flagsSafeP5 = false;
for (auto Fwd = std::next(DominatedSta->getIterator());
Fwd != MBB->end(); ++Fwd) {
if (Fwd->isDebugInstr()) continue;
if (usesFlagsP(*Fwd)) break;
if (Fwd->isTerminator() && !Fwd->isConditionalBranch()) {
flagsSafeP5 = true; break;
}
if (clobbersFlagsP(*Fwd)) { flagsSafeP5 = true; break; }
}
if (!flagsSafeP5) continue;
// Erase DominatedSta and its preceding LDA #K.
auto Prev = DominatedSta->getIterator();
while (Prev != MBB->begin()) {
--Prev;
if (!Prev->isDebugInstr()) break;
}
if (Prev != DominatedSta->getIterator() && isLdaImm(*Prev) &&
immValue(*Prev) == Kj) {
// Verify A isn't consumed between LDA and STA — they're
// adjacent so no consumers exist; safe. Erase both.
ToErase.push_back(&*Prev);
}
ToErase.push_back(DominatedSta);
break;
}
}
}
// De-dup ToErase before erasing.
SmallPtrSet<MachineInstr *, 8> ErasedSet;
for (MachineInstr *MI : ToErase) {
if (ErasedSet.insert(MI).second) {
MI->eraseFromParent();
Changed = true;
}
}
}
return Changed || peepChanged;
}