//===-- W65816SepRepCleanup.cpp - Coalesce adjacent SEP/REP toggles -------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Post-PEI peephole that drops adjacent `REP #$20 ; SEP #$20` (or vice // versa) pairs that toggle the M-bit redundantly. // // The STA8fi expansion in W65816RegisterInfo::eliminateFrameIndex emits // `SEP #$20 / STA d,S / REP #$20` so each i8 store runs with M=1. When // two STA8fi sit back-to-back in the MIR (no 16-bit ALU op between // them), the post-PEI stream contains: // // SEP #$20 // STA d1, S // REP #$20 <-- toggle // SEP #$20 <-- toggle (cancels above) // STA d2, S // REP #$20 // // The middle REP/SEP pair is a no-op: both stores can run in one M=1 // region. We drop them to leave: // // SEP #$20 // STA d1, S // STA d2, S // REP #$20 // // Saves 2 bytes / 6 cycles per coalesced pair. Symmetric `SEP/REP` // pairs (M=1 then M=0 with nothing in between) are also dropped — they // can arise around inline-asm or hand-written assembly snippets. // // Runs at addPreEmitPass (after PEI has expanded STA8fi). // //===----------------------------------------------------------------------===// #include "W65816.h" #include "W65816InstrInfo.h" #include "W65816Subtarget.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" using namespace llvm; #define DEBUG_TYPE "w65816-sep-rep-cleanup" namespace { class W65816SepRepCleanup : public MachineFunctionPass { public: static char ID; W65816SepRepCleanup() : MachineFunctionPass(ID) {} StringRef getPassName() const override { return "W65816 SEP/REP toggle coalescing"; } bool runOnMachineFunction(MachineFunction &MF) override; }; } // namespace char W65816SepRepCleanup::ID = 0; INITIALIZE_PASS(W65816SepRepCleanup, DEBUG_TYPE, "W65816 SEP/REP toggle coalescing", false, false) FunctionPass *llvm::createW65816SepRepCleanup() { return new W65816SepRepCleanup(); } // Returns the immediate value of `op` if MI is a `SEP #imm` or `REP #imm`, // else -1. static int getSepRepImm(const MachineInstr &MI, unsigned Opc) { if (MI.getOpcode() != Opc) return -1; if (MI.getNumOperands() < 1 || !MI.getOperand(0).isImm()) return -1; return MI.getOperand(0).getImm(); } // Returns true if MI may consume the carry or overflow flag — these // are the flags that ADC/SBC define but INA/DEA don't. Conservative: // any branch that reads C or V counts, plus the chained ADC/SBC ops // that wait for a prior carry-out. Anything else (CMP, CLC, SEC, // LDA, STA, AND, ORA, EOR, etc.) re-defines or doesn't read C/V. static bool readsCarryOrV(const MachineInstr &MI) { switch (MI.getOpcode()) { case W65816::BCS: // reads C case W65816::BCC: // reads C case W65816::BVS: // reads V case W65816::BVC: // reads V case W65816::ADC_StackRel: // reads C as carry-in case W65816::ADC_Imm16: case W65816::ADC_Imm8: case W65816::ADC_DP: case W65816::ADC_Abs: case W65816::SBC_StackRel: case W65816::SBC_Imm16: case W65816::SBC_Imm8: case W65816::SBC_DP: case W65816::SBC_Abs: case W65816::ROL_A: // rotates fold C in case W65816::ROR_A: case W65816::ROL_DP: case W65816::ROL_Abs: case W65816::ROR_DP: case W65816::ROR_Abs: return true; default: return false; } } // Returns true if `Op` is one of the flag-redefining opcodes (CLC, SEC, // CMP*, CPX*, CPY*, REP, SEP) — observing C/V before this is safe. // Includes the pseudo CMP* variants (CMPi16imm etc.) since this peephole // runs at pre-emit, BEFORE the AsmPrinter expands them. static bool isFlagRedefiner(unsigned Op) { switch (Op) { case W65816::CLC: case W65816::SEC: case W65816::CMP_Imm8: case W65816::CMP_Imm16: case W65816::CMP_StackRel: case W65816::CMP_DP: case W65816::CMP_Abs: case W65816::CMPi16imm: case W65816::CMPi8imm: case W65816::CMPfi: case W65816::CMPabs: case W65816::CMP_RR: case W65816::CPX_Imm8: case W65816::CPX_Imm16: case W65816::CPX_DP: case W65816::CPX_Abs: case W65816::CPY_Imm8: case W65816::CPY_Imm16: case W65816::CPY_DP: case W65816::CPY_Abs: case W65816::REP: case W65816::SEP: return true; default: return false; } } // Returns true if a subsequent MI in the same MBB observes the C/V // flags before any flag-redefiner clears the dependency. At MBB end, // extends one step into each successor: if any successor's first // (non-debug) MI reads C/V before redefining them, the flag is live // across the edge — bail. This is critical for loop bodies where // the back-edge re-enters the same MBB at LDA/PHA (neither reads C/V), // so a per-iteration `clc; adc #2` is foldable. Cross-MBB carry chains // would normally use ADCEi16imm (not ADCi16imm), so this is safe. static bool carryFlagLiveAfter(MachineBasicBlock::iterator After, MachineBasicBlock &MBB) { // Phase 1: scan within this MBB. for (auto Probe = std::next(After); Probe != MBB.end(); ++Probe) { if (Probe->isDebugInstr()) continue; if (readsCarryOrV(*Probe)) return true; if (isFlagRedefiner(Probe->getOpcode())) return false; if (Probe->isCall()) return false; // callee resets flags } // Phase 2: peek into each successor's first few MIs. We BAIL only on // a positive C/V read; reaching MBB end or peek-cap without finding // one is treated as "carry dead" — ADCi16imm's carry-out is never // used in carry chains (those use ADCEi16imm), so a stray carry // floating into RTL or an unrelated arithmetic op causes no harm. const unsigned MaxPeek = 6; for (MachineBasicBlock *Succ : MBB.successors()) { unsigned Peeked = 0; for (auto &MI : *Succ) { if (MI.isDebugInstr()) continue; if (readsCarryOrV(MI)) return true; if (isFlagRedefiner(MI.getOpcode()) || MI.isCall()) break; if (++Peeked >= MaxPeek) break; } } return false; } // Convert `ADCi16imm dst, src, ±1`/`±2` and `SBCi16imm` similarly to // INA / INA;INA / DEA / DEA;DEA chains when C/V are dead. ADCi16imm // is a pseudo that expands to CLC+ADC_Imm16 (4B/5cyc). INA is 1B/2cyc. // Savings per ±1: 3B/3cyc; per ±2: 2B/1cyc. SBCi16imm is symmetric // (sub by N == add by -N), so SBC #1 → DEA, SBC #-1 → INA, etc. static bool foldImmAdcToInaDea(MachineBasicBlock &MBB, const W65816InstrInfo &TII) { bool Changed = false; auto It = MBB.begin(); while (It != MBB.end()) { unsigned Op = It->getOpcode(); bool isAdc = (Op == W65816::ADCi16imm); bool isSbc = (Op == W65816::SBCi16imm); if ((!isAdc && !isSbc) || It->getNumOperands() < 3 || !It->getOperand(2).isImm()) { ++It; continue; } int64_t Imm = (int16_t)It->getOperand(2).getImm(); // For SBC, negate: SBC by +N is "subtract N", same as ADC by -N. int64_t Effective = isSbc ? -Imm : Imm; if (Effective < -2 || Effective > 2 || Effective == 0) { ++It; continue; } if (carryFlagLiveAfter(It, MBB)) { ++It; continue; } DebugLoc DL = It->getDebugLoc(); unsigned NewOpc = (Effective > 0) ? W65816::INA : W65816::DEA; unsigned Count = (Effective > 0) ? Effective : -Effective; for (unsigned i = 0; i < Count; ++i) BuildMI(MBB, It, DL, TII.get(NewOpc)); auto NextIt = std::next(It); It->eraseFromParent(); It = NextIt; Changed = true; } return Changed; } bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; const auto &STI = MF.getSubtarget(); const auto &TII = *STI.getInstrInfo(); for (MachineBasicBlock &MBB : MF) { SmallVector Toggles; for (MachineInstr &MI : MBB) { unsigned Opc = MI.getOpcode(); if (Opc == W65816::REP || Opc == W65816::SEP) Toggles.push_back(&MI); } SmallPtrSet Erased; for (MachineInstr *First : Toggles) { if (Erased.count(First)) continue; // The next non-debug instruction must be the matching opposite // toggle with the same imm. auto It = std::next(First->getIterator()); while (It != MBB.end() && It->isDebugInstr()) ++It; if (It == MBB.end()) continue; MachineInstr &Next = *It; // Look for REP-then-SEP or SEP-then-REP with matching imm. unsigned FirstOpc = First->getOpcode(); unsigned WantOpc = (FirstOpc == W65816::REP) ? W65816::SEP : W65816::REP; int FirstImm = getSepRepImm(*First, FirstOpc); int NextImm = getSepRepImm(Next, WantOpc); if (FirstImm < 0 || NextImm < 0 || FirstImm != NextImm) continue; Erased.insert(First); Erased.insert(&Next); First->eraseFromParent(); Next.eraseFromParent(); Changed = true; } // Second peephole: collapse `ADCi16imm src, ±1/±2` (and SBCi16imm) // into INA/DEA chains when the carry flag they would set is unused. // ADCi16imm is a pseudo (expands to CLC+ADC_Imm16); we rewrite it // here BEFORE the AsmPrinter expansion runs. But this pass runs at // pre-emit, AFTER post-RA pseudo expansion. ADCi16imm survives // because its MCInst lowering is in W65816AsmPrinter (not in the // generic post-RA pseudo expander), so it's still in the MIR here. Changed |= foldImmAdcToInaDea(MBB, TII); // Third peephole: drop `LDY_Imm16 K` when Y already holds K from // an earlier LDY in the same MBB and no intervening MI clobbered // Y. Custom inserter emits LDY #0 before every LDAfi_indY/STAfi_indY, // even though Y already holds 0 from a previous emit — the // redundant LDYs survive MachineLICM because Y is a phys reg and // the inserter binds them tightly to each use. int yKnown = -1; // -1 means unknown; otherwise the immediate auto It2 = MBB.begin(); while (It2 != MBB.end()) { MachineInstr &MI = *It2; if (MI.isDebugInstr()) { ++It2; continue; } unsigned Op = MI.getOpcode(); if (Op == W65816::LDY_Imm16 && MI.getNumOperands() >= 1 && MI.getOperand(0).isImm()) { int K = MI.getOperand(0).getImm() & 0xFFFF; if (yKnown == K) { auto Erase = It2++; Erase->eraseFromParent(); Changed = true; continue; } yKnown = K; } else { // Conservatively invalidate yKnown on anything that touches Y // or on calls / inline asm / any instruction that doesn't have // a clean "no Y effect" guarantee. Cheaper to underclaim than // miscompile. switch (Op) { case W65816::LDAfi_indY: // reads Y, doesn't def it — keep yKnown case W65816::STAfi_indY: case W65816::LDA_StackRelIndY: case W65816::STA_StackRelIndY: break; case W65816::TAY: case W65816::TXY: case W65816::INY: case W65816::DEY: case W65816::PLY: case W65816::LDY_DP: case W65816::LDY_Abs: case W65816::LDY_DPX: case W65816::LDY_AbsX: yKnown = -1; break; default: if (MI.isCall()) yKnown = -1; break; } } ++It2; } } return Changed; }