//===-- W65816I32IncFold.cpp - Fold i32 += 1 into INC + conditional skip --===// // // Pre-emit peephole: detect the post-PEI 6-instruction sequence emitted // for `i32 += 1` on a Wide32 vreg whose halves spilled to two stack-rel // slots, and rewrite to a tighter form using INA + a conditional skip // over the hi half. // // Original (after PEI, pseudos still un-expanded): // $a = LDA_StackRel imm_lo ; load lo half // $a = ADCi16imm $a, 1 ; CLC + ADC #1 (5 cyc) // STA_StackRel $a, imm_lo ; store lo // $a = LDA_StackRel imm_hi ; load hi half // $a = ADCEi16imm $a, 0 ; ADC #0 (uses carry from lo) // STA_StackRel $a, imm_hi ; store hi // // Cycle cost: 5 + 2 + 3 + 5 + 5 + 3 + 5 = 28 cyc // // Rewrite: // $a = LDA_StackRel imm_lo ; load lo // $a = INA_PSEUDO $a, $a ; lo + 1 — sets Z based on result // STA_StackRel $a, imm_lo ; store lo (Z preserved) // INC_HI_IF_CARRY_StackRel imm_hi ; AsmPrinter expands to: // ; bne L_skip // ; lda imm_hi, s // ; inc a // ; sta imm_hi, s // ; L_skip: // // Cycle cost (no carry, common case): // 5 + 2 + 5 + 3 (BNE taken) = 15 cyc — saves 13 cyc // Cycle cost (with carry, rare case): // 5 + 2 + 5 + 2 (BNE not-taken) + 5 + 2 + 5 = 26 cyc — saves 2 cyc // // The Z flag from `INA` survives the intervening STA_StackRel because // STA does not modify the processor status register. The BNE in the // expansion of INC_HI_IF_CARRY_StackRel reads that Z to decide whether // the hi half needs to be touched. //===----------------------------------------------------------------------===// #include "W65816.h" #include "W65816InstrInfo.h" #include "W65816Subtarget.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/Debug.h" using namespace llvm; #define DEBUG_TYPE "w65816-i32-inc-fold" namespace { class W65816I32IncFold : public MachineFunctionPass { public: static char ID; W65816I32IncFold() : MachineFunctionPass(ID) {} bool runOnMachineFunction(MachineFunction &MF) override; StringRef getPassName() const override { return "W65816 i32 += 1 → INC + conditional skip"; } void getAnalysisUsage(AnalysisUsage &AU) const override { MachineFunctionPass::getAnalysisUsage(AU); } }; } // namespace char W65816I32IncFold::ID = 0; INITIALIZE_PASS(W65816I32IncFold, DEBUG_TYPE, "W65816 i32 += 1 fold", false, false) namespace llvm { void initializeW65816I32IncFoldPass(PassRegistry &); } // Match the 6-instruction sequence; returns the post-pattern iterator // and fills in the lo/hi stack-rel offsets if the pattern matches. // Tolerates intervening TAX/TXA pairs (which regalloc inserts as // spurious A-save brackets around STAfi's conservative Defs=[A]). // They're collected into `KillMe` so the rewrite can erase them too. static bool matchI32AddOnePattern(MachineBasicBlock::iterator It, MachineBasicBlock::iterator End, int64_t &OffLo, int64_t &OffHi, MachineBasicBlock::iterator &PatEnd, SmallVectorImpl &KillMe) { auto skipDebug = [&]() { while (It != End && It->isDebugInstr()) ++It; }; auto skipTaxTxa = [&]() { while (It != End && (It->isDebugInstr() || It->getOpcode() == W65816::TAX || It->getOpcode() == W65816::TXA)) { if (It->getOpcode() == W65816::TAX || It->getOpcode() == W65816::TXA) { KillMe.push_back(&*It); } ++It; } }; skipDebug(); if (It == End) return false; // 1. LDA_StackRel imm_lo if (It->getOpcode() != W65816::LDA_StackRel) return false; if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) return false; OffLo = It->getOperand(0).getImm(); ++It; skipTaxTxa(); if (It == End) return false; // 2. ADCi16imm with imm == 1 if (It->getOpcode() != W65816::ADCi16imm) return false; if (It->getNumOperands() < 3 || !It->getOperand(2).isImm()) return false; if (It->getOperand(2).getImm() != 1) return false; ++It; skipDebug(); if (It == End) return false; // 3. STA_StackRel to same offset if (It->getOpcode() != W65816::STA_StackRel) return false; if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) return false; if (It->getOperand(0).getImm() != OffLo) return false; ++It; skipTaxTxa(); if (It == End) return false; // 4. LDA_StackRel imm_hi (different offset) if (It->getOpcode() != W65816::LDA_StackRel) return false; if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) return false; OffHi = It->getOperand(0).getImm(); if (OffHi == OffLo) return false; ++It; skipDebug(); if (It == End) return false; // 5. ADCEi16imm with imm == 0 if (It->getOpcode() != W65816::ADCEi16imm) return false; if (It->getNumOperands() < 3 || !It->getOperand(2).isImm()) return false; if (It->getOperand(2).getImm() != 0) return false; ++It; skipDebug(); if (It == End) return false; // 6. STA_StackRel to hi offset if (It->getOpcode() != W65816::STA_StackRel) return false; if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) return false; if (It->getOperand(0).getImm() != OffHi) return false; ++It; PatEnd = It; return true; } bool W65816I32IncFold::runOnMachineFunction(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; const auto &STI = MF.getSubtarget(); const auto *TII = STI.getInstrInfo(); bool Changed = false; for (MachineBasicBlock &MBB : MF) { auto It = MBB.begin(); while (It != MBB.end()) { int64_t OffLo = 0, OffHi = 0; MachineBasicBlock::iterator PatEnd; SmallVector KillMe; auto Start = It; if (!matchI32AddOnePattern(It, MBB.end(), OffLo, OffHi, PatEnd, KillMe)) { ++It; continue; } // Erase any spurious TAX/TXA pseudo-saves we tolerated inside // the pattern. These are dead because STAfi's Defs=[A] was // a conservative over-approximation; the A-source path preserves // A in the actual asm. for (MachineInstr *MI : KillMe) MI->eraseFromParent(); // Found the 6-instruction pattern, [Start, PatEnd). Rewrite // in-place: keep the LDA_StackRel for lo, replace ADCi16imm // with INA_PSEUDO, keep STA_StackRel for lo, then replace the // entire LDA-ADCE-STA hi-half triple with INC_HI_IF_CARRY_StackRel. DebugLoc DL = Start->getDebugLoc(); // Walk to the ADCi16imm (Start+1) and replace. Build a fresh // INA_PSEUDO with the same tied-def shape: dst=A, src=A. auto AdcIt = std::next(Start); while (AdcIt != PatEnd && AdcIt->isDebugInstr()) ++AdcIt; // INA_PSEUDO has constraint $src = $dst; emit with both as A. // Operand layout: (outs Acc16:$dst), (ins Acc16:$src) BuildMI(MBB, AdcIt, DL, TII->get(W65816::INA_PSEUDO), W65816::A) .addReg(W65816::A); auto Erased = AdcIt; ++AdcIt; Erased->eraseFromParent(); // Now find the start of the hi-half triple: it's at Start+3 (after // skipping debug). Walk past STA_StackRel (lo) which is now at // AdcIt's position. while (AdcIt != PatEnd && AdcIt->isDebugInstr()) ++AdcIt; // AdcIt should now point at STA_StackRel (lo). Skip it. ++AdcIt; while (AdcIt != PatEnd && AdcIt->isDebugInstr()) ++AdcIt; // AdcIt now points at LDA_StackRel (hi) — start of the hi triple. MachineBasicBlock::iterator HiStart = AdcIt; // Insert INC_HI_IF_CARRY_StackRel before the hi triple, then // erase all three hi instructions. BuildMI(MBB, HiStart, DL, TII->get(W65816::INC_HI_IF_CARRY_StackRel)) .addImm(OffHi); // Erase the 3 hi instructions: LDA_StackRel, ADCEi16imm, STA_StackRel. auto KillIt = HiStart; for (int i = 0; i < 3 && KillIt != PatEnd; ) { if (KillIt->isDebugInstr()) { ++KillIt; continue; } auto Next = std::next(KillIt); KillIt->eraseFromParent(); KillIt = Next; ++i; } Changed = true; It = PatEnd; } } return Changed; } FunctionPass *llvm::createW65816I32IncFold() { return new W65816I32IncFold(); }