65816-llvm-mos/src/llvm/lib/Target/W65816/W65816I32IncFold.cpp
Scott Duensing e65fedc8e1 Checkpoint
2026-05-13 15:48:34 -05:00

225 lines
8.4 KiB
C++

//===-- W65816I32IncFold.cpp - Fold i32 += 1 into INC + conditional skip --===//
//
// Pre-emit peephole: detect the post-PEI 6-instruction sequence emitted
// for `i32 += 1` on a Wide32 vreg whose halves spilled to two stack-rel
// slots, and rewrite to a tighter form using INA + a conditional skip
// over the hi half.
//
// Original (after PEI, pseudos still un-expanded):
// $a = LDA_StackRel imm_lo ; load lo half
// $a = ADCi16imm $a, 1 ; CLC + ADC #1 (5 cyc)
// STA_StackRel $a, imm_lo ; store lo
// $a = LDA_StackRel imm_hi ; load hi half
// $a = ADCEi16imm $a, 0 ; ADC #0 (uses carry from lo)
// STA_StackRel $a, imm_hi ; store hi
//
// Cycle cost: 5 + 2 + 3 + 5 + 5 + 3 + 5 = 28 cyc
//
// Rewrite:
// $a = LDA_StackRel imm_lo ; load lo
// $a = INA_PSEUDO $a, $a ; lo + 1 — sets Z based on result
// STA_StackRel $a, imm_lo ; store lo (Z preserved)
// INC_HI_IF_CARRY_StackRel imm_hi ; AsmPrinter expands to:
// ; bne L_skip
// ; lda imm_hi, s
// ; inc a
// ; sta imm_hi, s
// ; L_skip:
//
// Cycle cost (no carry, common case):
// 5 + 2 + 5 + 3 (BNE taken) = 15 cyc — saves 13 cyc
// Cycle cost (with carry, rare case):
// 5 + 2 + 5 + 2 (BNE not-taken) + 5 + 2 + 5 = 26 cyc — saves 2 cyc
//
// The Z flag from `INA` survives the intervening STA_StackRel because
// STA does not modify the processor status register. The BNE in the
// expansion of INC_HI_IF_CARRY_StackRel reads that Z to decide whether
// the hi half needs to be touched.
//===----------------------------------------------------------------------===//
#include "W65816.h"
#include "W65816InstrInfo.h"
#include "W65816Subtarget.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/Debug.h"
using namespace llvm;
#define DEBUG_TYPE "w65816-i32-inc-fold"
namespace {
class W65816I32IncFold : public MachineFunctionPass {
public:
static char ID;
W65816I32IncFold() : MachineFunctionPass(ID) {}
bool runOnMachineFunction(MachineFunction &MF) override;
StringRef getPassName() const override {
return "W65816 i32 += 1 → INC + conditional skip";
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
} // namespace
char W65816I32IncFold::ID = 0;
INITIALIZE_PASS(W65816I32IncFold, DEBUG_TYPE,
"W65816 i32 += 1 fold", false, false)
namespace llvm {
void initializeW65816I32IncFoldPass(PassRegistry &);
}
// Match the 6-instruction sequence; returns the post-pattern iterator
// and fills in the lo/hi stack-rel offsets if the pattern matches.
// Tolerates intervening TAX/TXA pairs (which regalloc inserts as
// spurious A-save brackets around STAfi's conservative Defs=[A]).
// They're collected into `KillMe` so the rewrite can erase them too.
static bool matchI32AddOnePattern(MachineBasicBlock::iterator It,
MachineBasicBlock::iterator End,
int64_t &OffLo, int64_t &OffHi,
MachineBasicBlock::iterator &PatEnd,
SmallVectorImpl<MachineInstr *> &KillMe) {
auto skipDebug = [&]() {
while (It != End && It->isDebugInstr()) ++It;
};
auto skipTaxTxa = [&]() {
while (It != End && (It->isDebugInstr() ||
It->getOpcode() == W65816::TAX ||
It->getOpcode() == W65816::TXA)) {
if (It->getOpcode() == W65816::TAX || It->getOpcode() == W65816::TXA) {
KillMe.push_back(&*It);
}
++It;
}
};
skipDebug();
if (It == End) return false;
// 1. LDA_StackRel imm_lo
if (It->getOpcode() != W65816::LDA_StackRel) return false;
if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) return false;
OffLo = It->getOperand(0).getImm();
++It;
skipTaxTxa();
if (It == End) return false;
// 2. ADCi16imm with imm == 1
if (It->getOpcode() != W65816::ADCi16imm) return false;
if (It->getNumOperands() < 3 || !It->getOperand(2).isImm()) return false;
if (It->getOperand(2).getImm() != 1) return false;
++It;
skipDebug();
if (It == End) return false;
// 3. STA_StackRel to same offset
if (It->getOpcode() != W65816::STA_StackRel) return false;
if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) return false;
if (It->getOperand(0).getImm() != OffLo) return false;
++It;
skipTaxTxa();
if (It == End) return false;
// 4. LDA_StackRel imm_hi (different offset)
if (It->getOpcode() != W65816::LDA_StackRel) return false;
if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) return false;
OffHi = It->getOperand(0).getImm();
if (OffHi == OffLo) return false;
++It;
skipDebug();
if (It == End) return false;
// 5. ADCEi16imm with imm == 0
if (It->getOpcode() != W65816::ADCEi16imm) return false;
if (It->getNumOperands() < 3 || !It->getOperand(2).isImm()) return false;
if (It->getOperand(2).getImm() != 0) return false;
++It;
skipDebug();
if (It == End) return false;
// 6. STA_StackRel to hi offset
if (It->getOpcode() != W65816::STA_StackRel) return false;
if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) return false;
if (It->getOperand(0).getImm() != OffHi) return false;
++It;
PatEnd = It;
return true;
}
bool W65816I32IncFold::runOnMachineFunction(MachineFunction &MF) {
if (skipFunction(MF.getFunction())) return false;
const auto &STI = MF.getSubtarget<W65816Subtarget>();
const auto *TII = STI.getInstrInfo();
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
auto It = MBB.begin();
while (It != MBB.end()) {
int64_t OffLo = 0, OffHi = 0;
MachineBasicBlock::iterator PatEnd;
SmallVector<MachineInstr *, 4> KillMe;
auto Start = It;
if (!matchI32AddOnePattern(It, MBB.end(), OffLo, OffHi, PatEnd, KillMe)) {
++It;
continue;
}
// Erase any spurious TAX/TXA pseudo-saves we tolerated inside
// the pattern. These are dead because STAfi's Defs=[A] was
// a conservative over-approximation; the A-source path preserves
// A in the actual asm.
for (MachineInstr *MI : KillMe) MI->eraseFromParent();
// Found the 6-instruction pattern, [Start, PatEnd). Rewrite
// in-place: keep the LDA_StackRel for lo, replace ADCi16imm
// with INA_PSEUDO, keep STA_StackRel for lo, then replace the
// entire LDA-ADCE-STA hi-half triple with INC_HI_IF_CARRY_StackRel.
DebugLoc DL = Start->getDebugLoc();
// Walk to the ADCi16imm (Start+1) and replace. Build a fresh
// INA_PSEUDO with the same tied-def shape: dst=A, src=A.
auto AdcIt = std::next(Start);
while (AdcIt != PatEnd && AdcIt->isDebugInstr()) ++AdcIt;
// INA_PSEUDO has constraint $src = $dst; emit with both as A.
// Operand layout: (outs Acc16:$dst), (ins Acc16:$src)
BuildMI(MBB, AdcIt, DL, TII->get(W65816::INA_PSEUDO), W65816::A)
.addReg(W65816::A);
auto Erased = AdcIt;
++AdcIt;
Erased->eraseFromParent();
// Now find the start of the hi-half triple: it's at Start+3 (after
// skipping debug). Walk past STA_StackRel (lo) which is now at
// AdcIt's position.
while (AdcIt != PatEnd && AdcIt->isDebugInstr()) ++AdcIt;
// AdcIt should now point at STA_StackRel (lo). Skip it.
++AdcIt;
while (AdcIt != PatEnd && AdcIt->isDebugInstr()) ++AdcIt;
// AdcIt now points at LDA_StackRel (hi) — start of the hi triple.
MachineBasicBlock::iterator HiStart = AdcIt;
// Insert INC_HI_IF_CARRY_StackRel before the hi triple, then
// erase all three hi instructions.
BuildMI(MBB, HiStart, DL, TII->get(W65816::INC_HI_IF_CARRY_StackRel))
.addImm(OffHi);
// Erase the 3 hi instructions: LDA_StackRel, ADCEi16imm, STA_StackRel.
auto KillIt = HiStart;
for (int i = 0; i < 3 && KillIt != PatEnd; ) {
if (KillIt->isDebugInstr()) { ++KillIt; continue; }
auto Next = std::next(KillIt);
KillIt->eraseFromParent();
KillIt = Next;
++i;
}
Changed = true;
It = PatEnd;
}
}
return Changed;
}
FunctionPass *llvm::createW65816I32IncFold() {
return new W65816I32IncFold();
}