//===-- W65816StackSlotMerge.cpp - Merge value-equivalent stack slots ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===---------------------------------------------------------------------===// // // Pre-emit pass that runs after PEI (eliminateFrameIndex) and merges // pairs of stack-rel slots that hold the same value at every observable // program point — typically the PHI src/dst pair PHI-elim leaves at // the back-edge of a loop body. // // LLVM's StackSlotColoring merges slots with non-overlapping liveness. // It can't merge slots that are simultaneously live but happen to hold // the same value (which is what a PHI memory-copy creates). This pass // catches that case via a stricter "value equivalence" check. // // Canonical pattern (sumSquares loop body): // // .LBB0_4: // LDA 0x7, s ; PHA ; JSL __umulhisi3 ; PLY // CLC ; ADC 0x3, s ; STA 0xb, s ; new total.lo (write X) // TXA ; ADC 0x1, s ; STA 0x9, s // LDA 0x7, s ; INC A ; STA 0x7, s // LDA 0xb, s ; STA 0x3, s ; PHI copy: load X, store Y // LDA 0x9, s ; STA 0x1, s // ... // // The pair (0xb, 0x3) is the lo-half PHI memory copy. Slots 0xb and // 0x3 always hold the same value at every read site: // - Function entry: both initialized to 0 (`lda #0; sta 0xb, s` in // entry, `lda #0; sta 0x3, s` in preheader). // - Loop iteration: the PHI copy moves the new total.lo from 0xb to // 0x3 at the end of every iteration. // - Exit: only 0xb is read (return value), but its value equals 0x3's. // // Rename 0xb → 0x3 function-wide; the now self-copy `lda 0x3; sta 0x3` // is dead and we erase it. Saves 2 inst per PHI copy occurrence (the // memory copy round-trip). sumSquares loop body shrinks from 21 to // 17 inst per iter. // // Safety check (sufficient condition for value equivalence): // 1. Both slots have ≥1 STA in the function (skips arg slots passed // by the caller — those have only LDA reads, no STAs, and renaming // would change where we read the arg from). // 2. For every STA X in the function, find a "twin" STA Y at a // program point where the values match. Matching = either: // (a) Same MBB, same A-source value (no intervening A-define). // Covers the loop-body iter-end pattern: STA X then later // LDA X ; STA Y. Also covers entry's `lda #N ; sta X` if // the same MBB also has `sta Y`. // (b) Different MBBs, both preceded by `LDA #const` of the same // constant. Covers entry-block STA X=0 paired with // preheader STA Y=0. // 3. Symmetric: for every STA Y, find a twin STA X. // 4. No "orphan" STAs. If a STA X or STA Y has no twin, bail. // // When all checks pass, the rename function-wide preserves semantics: // every read of slot X at program point P sees the same value that // slot Y holds at P (and vice versa). // //===---------------------------------------------------------------------===// #include "W65816.h" #include "W65816InstrInfo.h" #include "W65816Subtarget.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "w65816-stack-slot-merge" namespace { class W65816StackSlotMerge : public MachineFunctionPass { public: static char ID; W65816StackSlotMerge() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "W65816 merge value-equivalent stack slots (PHI-copy collapse)"; } void getAnalysisUsage(AnalysisUsage &AU) const override { AU.addRequired(); AU.setPreservesCFG(); MachineFunctionPass::getAnalysisUsage(AU); } bool runOnMachineFunction(MachineFunction &MF) override; }; } // namespace char W65816StackSlotMerge::ID = 0; INITIALIZE_PASS_BEGIN(W65816StackSlotMerge, DEBUG_TYPE, "W65816 stack slot merge", false, false) INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) INITIALIZE_PASS_END(W65816StackSlotMerge, DEBUG_TYPE, "W65816 stack slot merge", false, false) FunctionPass *llvm::createW65816StackSlotMerge() { return new W65816StackSlotMerge(); } // Stack-relative MC opcodes — the ops that survive eliminateFrameIndex // and reference a slot via an 8-bit SP-relative offset. static bool isStackRelOp(unsigned Op) { return Op == W65816::LDA_StackRel || Op == W65816::STA_StackRel || Op == W65816::ADC_StackRel || Op == W65816::SBC_StackRel || Op == W65816::AND_StackRel || Op == W65816::ORA_StackRel || Op == W65816::EOR_StackRel || Op == W65816::CMP_StackRel; } // Returns true if MI is a stack-rel op; out-param Off receives the slot // offset (operand 0). static bool srAccess(const MachineInstr &MI, int64_t &Off) { if (!isStackRelOp(MI.getOpcode())) return false; if (MI.getNumOperands() < 1 || !MI.getOperand(0).isImm()) return false; Off = MI.getOperand(0).getImm(); return true; } // True if the MI semantically defines A. Covers both the explicit // case (operand has reg=A,isDef) AND the implicit case where the // tablegen InstDP / InstAbs / etc. base classes omit the A-Def // annotation despite LDA semantically writing A (a backend modelling // gap — many `LDA_DP`, `LDA_Abs`, `LDA_LongX`, etc. are missing the // implicit-def in the MIR even though they load into A). Opcode- // based fallback catches all of them. static bool semanticallyDefsA(const MachineInstr &MI) { for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg() == W65816::A && MO.isDef()) return true; } unsigned Op = MI.getOpcode(); switch (Op) { case W65816::LDA_DP: case W65816::LDA_DPX: case W65816::LDA_DPInd: case W65816::LDA_DPIndY: case W65816::LDA_DPIndX: case W65816::LDA_Abs: case W65816::LDA_AbsX: case W65816::LDA_AbsY: case W65816::LDA_Long: case W65816::LDA_LongX: case W65816::PLA: return true; default: return false; } } // Walk backward from MI in its MBB looking for the most recent A-define. // Returns the MI that defines A, or nullptr if none in the same MBB. // Skips debug instructions. Stops at MBB boundary, calls, branches, // inline asm. static MachineInstr *findPriorADef(MachineInstr *MI) { MachineBasicBlock *MBB = MI->getParent(); auto It = MI->getIterator(); while (It != MBB->begin()) { --It; if (It->isDebugInstr()) continue; if (It->isCall() || It->isInlineAsm()) return nullptr; if (semanticallyDefsA(*It)) return &*It; } return nullptr; } // Walk forward from `Start` (exclusive) up to (but not including) `End` // in the same MBB, tracking whether slot `WatchSlot` is written. // Returns true if slot `WatchSlot` is NOT written in the interval. static bool slotNotWrittenBetween(MachineBasicBlock::iterator Start, MachineBasicBlock::iterator End, int64_t WatchSlot) { for (auto It = std::next(Start); It != End; ++It) { if (It->isDebugInstr()) continue; int64_t Off; if (It->getOpcode() == W65816::STA_StackRel && srAccess(*It, Off) && Off == WatchSlot) { return false; } } return true; } // Returns true if MI clobbers P (N/Z/C/V flags). Mirrors LLVM's // operand-based check + an opcode whitelist for tablegen entries that // omit `Defs = [P]` (InstImplied, InstStackRel, etc.). static bool clobbersFlagsP(const MachineInstr &MI) { for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg() == W65816::P && MO.isDef()) return true; } if (MI.isCall() || MI.isInlineAsm()) return true; unsigned Op = MI.getOpcode(); switch (Op) { case W65816::PLA: case W65816::PLY: case W65816::PLX: case W65816::PLP: case W65816::INA: case W65816::DEA: case W65816::INX: case W65816::DEX: case W65816::INY: case W65816::DEY: case W65816::TAX: case W65816::TAY: case W65816::TYA: case W65816::TXA: case W65816::TYX: case W65816::TXY: case W65816::LDA_StackRel: case W65816::LDA_DP: case W65816::LDA_DPX: case W65816::LDA_DPInd: case W65816::LDA_DPIndY: case W65816::LDA_DPIndX: case W65816::LDA_Abs: case W65816::LDA_AbsX: case W65816::LDA_AbsY: case W65816::LDA_Long: case W65816::LDA_LongX: case W65816::ADC_StackRel: case W65816::SBC_StackRel: case W65816::CMP_StackRel: case W65816::AND_StackRel: case W65816::ORA_StackRel: case W65816::EOR_StackRel: case W65816::ADC_DP: case W65816::ADC_Abs: case W65816::SBC_DP: case W65816::SBC_Abs: case W65816::CMP_DP: case W65816::CMP_Abs: case W65816::AND_DP: case W65816::AND_Abs: case W65816::ORA_DP: case W65816::ORA_Abs: case W65816::EOR_DP: case W65816::EOR_Abs: return true; default: return false; } } // Returns true if MI reads P flags (conditional branches, PLP, etc.). static bool usesFlagsP(const MachineInstr &MI) { if (MI.isConditionalBranch()) return true; for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg() == W65816::P && MO.isUse() && !MO.isDef()) return true; } return false; } // Returns the MOST RECENT A-defining MI strictly before MI in its MBB, // skipping debug instructions. Returns nullptr if none in the same MBB. static MachineInstr *findMostRecentADef(MachineInstr *MI) { MachineBasicBlock *MBB = MI->getParent(); auto It = MI->getIterator(); while (It != MBB->begin()) { --It; if (It->isDebugInstr()) continue; if (semanticallyDefsA(*It)) return &*It; } return nullptr; } // "Twin" check. Given a STA X at position StaX and a candidate slot Y, // scan the function's STA Y instances and return one that's value- // equivalent under the rules described in the header comment. // // Source-value equivalence cases: // (1) Same-MBB twin store: no A-define between StaX and the candidate // StaY → both store the same A value. Pure twin pattern. // (2) Same-MBB PHI-copy: the candidate StaY is preceded by // `LDA_StackRel slotX` (PHI-copy reload). Even if many A-defines // sit between StaX and StaY, the LDA X re-establishes A = // slot[X] = value StaX wrote (assuming slot X wasn't re-written // in the gap). // (3) Different MBBs, both preceded by LDA_Imm16 / LDAi16imm of the // same constant. Covers entry/preheader init parallel pair. static MachineInstr *findTwin(MachineInstr *StaX, ArrayRef StasY) { MachineBasicBlock *MBBStaX = StaX->getParent(); int64_t XOff = StaX->getOperand(0).getImm(); // Cases (1) + (2): same MBB. for (MachineInstr *StaY : StasY) { if (StaY->getParent() != MBBStaX) continue; // Determine ordering. MachineInstr *Earlier = nullptr; MachineInstr *Later = nullptr; for (auto It = MBBStaX->begin(); It != MBBStaX->end(); ++It) { if (&*It == StaX) { Earlier = StaX; Later = StaY; break; } if (&*It == StaY) { Earlier = StaY; Later = StaX; break; } } if (!Earlier || !Later) continue; int64_t EOff = Earlier->getOperand(0).getImm(); // Case (2): if Later is preceded by `LDA_StackRel ` // (the PHI-copy reload), it's a PHI twin. Also require slot // Earlier-slot wasn't re-written between Earlier and Later. MachineInstr *PriorOfLater = findMostRecentADef(Later); if (PriorOfLater) { int64_t Off; if (PriorOfLater->getOpcode() == W65816::LDA_StackRel && srAccess(*PriorOfLater, Off) && Off == EOff && slotNotWrittenBetween(Earlier->getIterator(), PriorOfLater->getIterator(), EOff)) { return StaY; } } // Case (1): no A-define between Earlier and Later — same A value. { bool noADefs = true; for (auto It = std::next(Earlier->getIterator()); It != Later->getIterator(); ++It) { if (It->isDebugInstr()) continue; if (semanticallyDefsA(*It)) { noADefs = false; break; } } if (noADefs) return StaY; } } // Case (3): different MBBs, both preceded by LDA_Imm16 / LDAi16imm // with the same constant. MachineInstr *PriorX = findPriorADef(StaX); if (!PriorX) return nullptr; unsigned PriorXOp = PriorX->getOpcode(); if (PriorXOp != W65816::LDA_Imm16 && PriorXOp != W65816::LDAi16imm) return nullptr; int64_t XConst = 0; for (const MachineOperand &MO : PriorX->operands()) { if (MO.isImm()) { XConst = MO.getImm(); break; } } for (MachineInstr *StaY : StasY) { if (StaY->getParent() == MBBStaX) continue; MachineInstr *PriorY = findPriorADef(StaY); if (!PriorY) continue; if (PriorY->getOpcode() != PriorXOp) continue; int64_t YConst = 0; for (const MachineOperand &MO : PriorY->operands()) { if (MO.isImm()) { YConst = MO.getImm(); break; } } if (XConst == YConst) return StaY; } (void)XOff; return nullptr; } // Run Phase 6a + Phase 6 (per-MBB peepholes) — independent of rename // logic, so they fire on every function. Returns true if anything // changed. static bool runPerMBBPeepholes(MachineFunction &MF) { bool Changed = false; // Phase 6a: redundant `STA Y, s` immediately followed by `LDA Y, s`. for (MachineBasicBlock &MBB : MF) { SmallVector Dead; for (auto It = MBB.begin(); It != MBB.end(); ++It) { if (It->isDebugInstr()) continue; if (It->getOpcode() != W65816::STA_StackRel) continue; int64_t StaSlot; if (!srAccess(*It, StaSlot)) continue; auto NextIt = std::next(It); while (NextIt != MBB.end() && NextIt->isDebugInstr()) ++NextIt; if (NextIt == MBB.end()) continue; if (NextIt->getOpcode() != W65816::LDA_StackRel) continue; int64_t LdaSlot; if (!srAccess(*NextIt, LdaSlot)) continue; if (StaSlot != LdaSlot) continue; bool flagsSafe = false; bool aIsUsedBeforeClobber = false; for (auto Fwd = std::next(NextIt); Fwd != MBB.end(); ++Fwd) { if (Fwd->isDebugInstr()) continue; // Calls/JSLs that take A as arg — even though clobbersFlagsP // returns true for them, the elimination could mis-track A's // live-in to the call. Bail. if (Fwd->isCall()) break; // Generic: any instr that has `implicit $a` as a USE — A is // live going in. Bail to avoid live-range trouble. for (const MachineOperand &MO : Fwd->operands()) { if (MO.isReg() && MO.getReg() == W65816::A && MO.isUse() && !MO.isDef()) { aIsUsedBeforeClobber = true; break; } } if (aIsUsedBeforeClobber) break; if (usesFlagsP(*Fwd)) break; if (Fwd->isTerminator() && !Fwd->isConditionalBranch()) { flagsSafe = true; break; } if (clobbersFlagsP(*Fwd)) { flagsSafe = true; break; } } if (!flagsSafe) continue; Dead.push_back(&*NextIt); } for (MachineInstr *MI : Dead) { MI->eraseFromParent(); Changed = true; } } // Phase 6: per-MBB redundant `LDA #K` elimination. auto isAandPPreserving = [](const MachineInstr &MI) -> bool { unsigned Op = MI.getOpcode(); switch (Op) { case W65816::STA_StackRel: case W65816::STA_DP: case W65816::STA_DPX: case W65816::STA_DPInd: case W65816::STA_DPIndY: case W65816::STA_DPIndX: case W65816::STA_Abs: case W65816::STA_AbsX: case W65816::STA_AbsY: case W65816::STA_Long: case W65816::STA_LongX: case W65816::STX_DP: case W65816::STX_Abs: case W65816::STY_DP: case W65816::STY_Abs: case W65816::STY_DPX: case W65816::STZ_DP: case W65816::STZ_Abs: case W65816::STZ_DPX: case W65816::STZ_AbsX: return true; default: break; } for (const MachineOperand &MO : MI.operands()) { if (MO.isReg() && MO.getReg() == W65816::P && MO.isDef()) return false; } if (MI.mayStore() && !MI.mayLoad() && !semanticallyDefsA(MI)) return true; return false; }; auto isLdaImmK = [](const MachineInstr &MI, int64_t &K) -> bool { unsigned Op = MI.getOpcode(); if (Op != W65816::LDA_Imm16 && Op != W65816::LDAi16imm) return false; for (const MachineOperand &MO : MI.operands()) { if (MO.isImm()) { K = MO.getImm(); return true; } } return false; }; for (MachineBasicBlock &MBB : MF) { std::optional KnownK; SmallVector Dead; for (auto It = MBB.begin(); It != MBB.end(); ++It) { if (It->isDebugInstr()) continue; int64_t K; if (isLdaImmK(*It, K)) { if (KnownK && *KnownK == K) { Dead.push_back(&*It); continue; } KnownK = K; continue; } if (isAandPPreserving(*It)) continue; KnownK.reset(); } for (MachineInstr *MI : Dead) { MI->eraseFromParent(); Changed = true; } } return Changed; } bool W65816StackSlotMerge::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; if (MF.getFunction().hasOptNone()) return false; // Run per-MBB peepholes first — independent of rename logic. bool peepChanged = runPerMBBPeepholes(MF); // Phase 1: index all stack-rel STA/LDA grouped by slot offset. DenseMap> Stas; DenseMap> Ldas; DenseMap AllRefs; // STA + LDA + ADC + ... count for (MachineBasicBlock &MBB : MF) { for (MachineInstr &MI : MBB) { int64_t Off; if (!srAccess(MI, Off)) continue; AllRefs[Off]++; if (MI.getOpcode() == W65816::STA_StackRel) { Stas[Off].push_back(&MI); } else if (MI.getOpcode() == W65816::LDA_StackRel) { Ldas[Off].push_back(&MI); } } } // Phase 2: find PHI-copy site candidates. Pattern: LDA X ; STA Y // in a LOOP BODY MBB (= the MBB has itself as a predecessor, i.e. // a self-loop back-edge). Restricting to loop bodies distinguishes // genuine PHI-cycle copies from one-shot temp transfers (where // slot X is just a scratch register dropped on the way to slot Y // for an unrelated purpose, like qsortIter's pointer-construction // pattern `STA 5; ...; LDA 5; STA 39` followed by `LDA 39; STA dp`). DenseMap PhiCopyPair; // X -> Y for (MachineBasicBlock &MBB : MF) { // Self-loop check: MBB must have itself as a predecessor. bool selfLoop = false; for (MachineBasicBlock *Pred : MBB.predecessors()) { if (Pred == &MBB) { selfLoop = true; break; } } if (!selfLoop) continue; for (auto It = MBB.begin(); It != MBB.end(); ++It) { if (It->getOpcode() != W65816::LDA_StackRel) continue; int64_t X; if (!srAccess(*It, X)) continue; auto NextIt = std::next(It); while (NextIt != MBB.end() && NextIt->isDebugInstr()) ++NextIt; if (NextIt == MBB.end()) continue; if (NextIt->getOpcode() != W65816::STA_StackRel) continue; int64_t Y; if (!srAccess(*NextIt, Y) || Y == X) continue; if (PhiCopyPair.count(X)) continue; PhiCopyPair[X] = Y; } } // Phase 3: validate each pair and apply rename if safe. // Track which slots have already been merged so we don't double-merge. DenseMap Renames; // X -> Y for (auto &P : PhiCopyPair) { int64_t X = P.first, Y = P.second; // Don't re-merge an already-processed slot. if (Renames.count(X) || Renames.count(Y)) continue; // Arg-slot guard: skip slots with no STAs (caller-passed args). if (Stas[X].empty() || Stas[Y].empty()) continue; // Validate that every STA X has a twin STA Y. bool allPaired = true; for (MachineInstr *StaX : Stas[X]) { if (!findTwin(StaX, Stas[Y])) { allPaired = false; break; } } if (!allPaired) continue; // Symmetric: every STA Y must have a twin STA X. for (MachineInstr *StaY : Stas[Y]) { if (!findTwin(StaY, Stas[X])) { allPaired = false; break; } } if (!allPaired) continue; LLVM_DEBUG(dbgs() << "StackSlotMerge: rename slot " << X << " -> " << Y << " in " << MF.getName() << "\n"); Renames[X] = Y; } if (Renames.empty()) return false; // Phase 4: apply rename. bool Changed = false; for (MachineBasicBlock &MBB : MF) { SmallVector ToErase; for (MachineInstr &MI : MBB) { int64_t Off; if (!srAccess(MI, Off)) continue; auto It = Renames.find(Off); if (It == Renames.end()) continue; MI.getOperand(0).setImm(It->second); Changed = true; } // After rename, look for now-redundant LDA-STA pairs to the same // slot (the PHI-copy self-copy). Erase them. for (auto It = MBB.begin(); It != MBB.end(); ++It) { if (It->getOpcode() != W65816::LDA_StackRel) continue; int64_t LdaOff; if (!srAccess(*It, LdaOff)) continue; auto NextIt = std::next(It); while (NextIt != MBB.end() && NextIt->isDebugInstr()) ++NextIt; if (NextIt == MBB.end()) continue; if (NextIt->getOpcode() != W65816::STA_StackRel) continue; int64_t StaOff; if (!srAccess(*NextIt, StaOff)) continue; if (LdaOff != StaOff) continue; ToErase.push_back(&*It); ToErase.push_back(&*NextIt); } for (MachineInstr *MI : ToErase) MI->eraseFromParent(); if (!ToErase.empty()) Changed = true; } // Phase 5: redundant constant-init elimination. After rename, the // Case (3) twin pairings leave us with TWO sites writing the same // constant to the same slot (one renamed from X to Y, the other was // already targeting Y). The dominated one is redundant — its slot // already holds the constant from the dominating write. // // Generalize: scan post-rename for ALL `LDA_Imm16 K ; STA_StackRel Y` // pairs (or LDAi16imm K; STA Y). For each pair, look for another // such pair with the same (K, Y) where one DOMINATES the other AND // no slot-Y access exists on any path between them. Erase the // dominated STA + its preceding LDA (if A isn't otherwise consumed). { auto isLdaImm = [](const MachineInstr &MI) { unsigned Op = MI.getOpcode(); return Op == W65816::LDA_Imm16 || Op == W65816::LDAi16imm; }; auto immValue = [](const MachineInstr &MI) -> int64_t { for (const MachineOperand &MO : MI.operands()) { if (MO.isImm()) return MO.getImm(); } return 0; }; // Collect `LDA #K ; STA_StackRel Y` pairs, grouped by Y. DenseMap, 4>> ConstStas; for (MachineBasicBlock &MBB : MF) { for (auto It = MBB.begin(); It != MBB.end(); ++It) { if (!isLdaImm(*It)) continue; int64_t K = immValue(*It); auto NextIt = std::next(It); while (NextIt != MBB.end() && NextIt->isDebugInstr()) ++NextIt; if (NextIt == MBB.end()) continue; if (NextIt->getOpcode() != W65816::STA_StackRel) continue; int64_t Y; if (!srAccess(*NextIt, Y)) continue; ConstStas[Y].push_back({&*NextIt, K}); } } // For each slot Y with at least two const-init STAs, check for // dominator redundancy. auto &MDT = getAnalysis().getDomTree(); // Check that no instruction WRITES slot Y on any path between // From and To. Reads are fine because both From and To write // the same constant K — any intermediate read would see K either // way (since From dominates, From has already executed). Calls // are bailout conditions: a call might write to the stack via // address-taken locals or other side effects we don't model. auto noSlotWriteOnPath = [&](MachineInstr *From, MachineInstr *To, int64_t Y) -> bool { MachineBasicBlock *FromMBB = From->getParent(); MachineBasicBlock *ToMBB = To->getParent(); auto opWritesY = [&](MachineInstr &MI) { if (MI.isCall() || MI.isInlineAsm()) return true; int64_t Off; if (MI.getOpcode() == W65816::STA_StackRel && srAccess(MI, Off) && Off == Y) { return true; } return false; }; // (a) After From in its MBB. for (auto It = std::next(From->getIterator()); It != FromMBB->end(); ++It) { if (It->isDebugInstr()) continue; if (opWritesY(*It)) return false; } // (b) BFS forward from FromMBB's successors, stopping at ToMBB. SmallPtrSet Visited; SmallVector Stack; for (auto *Succ : FromMBB->successors()) Stack.push_back(Succ); while (!Stack.empty()) { auto *MBB = Stack.pop_back_val(); if (MBB == ToMBB) continue; // checked separately in (c) if (!Visited.insert(MBB).second) continue; for (auto &MI : *MBB) { if (MI.isDebugInstr()) continue; if (opWritesY(MI)) return false; } for (auto *Succ : MBB->successors()) Stack.push_back(Succ); } // (c) In ToMBB, before To, any write of Y? for (auto It = ToMBB->begin(); It != To->getIterator(); ++It) { if (It->isDebugInstr()) continue; if (opWritesY(*It)) return false; } return true; }; SmallVector ToErase; LLVM_DEBUG({ dbgs() << "Phase 5 in " << MF.getName() << ":\n"; for (auto &P : ConstStas) { dbgs() << " slot " << P.first << " has " << P.second.size() << " const STAs\n"; } }); for (auto &P : ConstStas) { int64_t Y = P.first; auto &stas = P.second; if (stas.size() < 2) continue; // For each pair (i, j) where i dominates j with same constant K: for (auto &Sj : stas) { MachineInstr *DominatedSta = Sj.first; int64_t Kj = Sj.second; for (auto &Si : stas) { if (&Si == &Sj) continue; if (Si.second != Kj) continue; // different K MachineInstr *DominatorSta = Si.first; if (!MDT.dominates(DominatorSta, DominatedSta)) continue; if (!noSlotWriteOnPath(DominatorSta, DominatedSta, Y)) continue; // Flag safety: erasing `LDA #K; STA Y` removes a flag-setting // op (the LDA). Walk forward from the STA looking for next // flag-clobber or unconditional terminator (safe) vs. // flag-use (unsafe). MachineBasicBlock *MBB = DominatedSta->getParent(); bool flagsSafeP5 = false; for (auto Fwd = std::next(DominatedSta->getIterator()); Fwd != MBB->end(); ++Fwd) { if (Fwd->isDebugInstr()) continue; if (usesFlagsP(*Fwd)) break; if (Fwd->isTerminator() && !Fwd->isConditionalBranch()) { flagsSafeP5 = true; break; } if (clobbersFlagsP(*Fwd)) { flagsSafeP5 = true; break; } } if (!flagsSafeP5) continue; // Erase DominatedSta and its preceding LDA #K. auto Prev = DominatedSta->getIterator(); while (Prev != MBB->begin()) { --Prev; if (!Prev->isDebugInstr()) break; } if (Prev != DominatedSta->getIterator() && isLdaImm(*Prev) && immValue(*Prev) == Kj) { // Verify A isn't consumed between LDA and STA — they're // adjacent so no consumers exist; safe. Erase both. ToErase.push_back(&*Prev); } ToErase.push_back(DominatedSta); break; } } } // De-dup ToErase before erasing. SmallPtrSet ErasedSet; for (MachineInstr *MI : ToErase) { if (ErasedSet.insert(MI).second) { MI->eraseFromParent(); Changed = true; } } } return Changed || peepChanged; }