225 lines
8.4 KiB
C++
225 lines
8.4 KiB
C++
//===-- W65816I32IncFold.cpp - Fold i32 += 1 into INC + conditional skip --===//
|
|
//
|
|
// Pre-emit peephole: detect the post-PEI 6-instruction sequence emitted
|
|
// for `i32 += 1` on a Wide32 vreg whose halves spilled to two stack-rel
|
|
// slots, and rewrite to a tighter form using INA + a conditional skip
|
|
// over the hi half.
|
|
//
|
|
// Original (after PEI, pseudos still un-expanded):
|
|
// $a = LDA_StackRel imm_lo ; load lo half
|
|
// $a = ADCi16imm $a, 1 ; CLC + ADC #1 (5 cyc)
|
|
// STA_StackRel $a, imm_lo ; store lo
|
|
// $a = LDA_StackRel imm_hi ; load hi half
|
|
// $a = ADCEi16imm $a, 0 ; ADC #0 (uses carry from lo)
|
|
// STA_StackRel $a, imm_hi ; store hi
|
|
//
|
|
// Cycle cost: 5 + 2 + 3 + 5 + 5 + 3 + 5 = 28 cyc
|
|
//
|
|
// Rewrite:
|
|
// $a = LDA_StackRel imm_lo ; load lo
|
|
// $a = INA_PSEUDO $a, $a ; lo + 1 — sets Z based on result
|
|
// STA_StackRel $a, imm_lo ; store lo (Z preserved)
|
|
// INC_HI_IF_CARRY_StackRel imm_hi ; AsmPrinter expands to:
|
|
// ; bne L_skip
|
|
// ; lda imm_hi, s
|
|
// ; inc a
|
|
// ; sta imm_hi, s
|
|
// ; L_skip:
|
|
//
|
|
// Cycle cost (no carry, common case):
|
|
// 5 + 2 + 5 + 3 (BNE taken) = 15 cyc — saves 13 cyc
|
|
// Cycle cost (with carry, rare case):
|
|
// 5 + 2 + 5 + 2 (BNE not-taken) + 5 + 2 + 5 = 26 cyc — saves 2 cyc
|
|
//
|
|
// The Z flag from `INA` survives the intervening STA_StackRel because
|
|
// STA does not modify the processor status register. The BNE in the
|
|
// expansion of INC_HI_IF_CARRY_StackRel reads that Z to decide whether
|
|
// the hi half needs to be touched.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "W65816.h"
|
|
#include "W65816InstrInfo.h"
|
|
#include "W65816Subtarget.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/Support/Debug.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "w65816-i32-inc-fold"
|
|
|
|
namespace {
|
|
class W65816I32IncFold : public MachineFunctionPass {
|
|
public:
|
|
static char ID;
|
|
W65816I32IncFold() : MachineFunctionPass(ID) {}
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
StringRef getPassName() const override {
|
|
return "W65816 i32 += 1 → INC + conditional skip";
|
|
}
|
|
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
|
MachineFunctionPass::getAnalysisUsage(AU);
|
|
}
|
|
};
|
|
} // namespace
|
|
|
|
char W65816I32IncFold::ID = 0;
|
|
|
|
INITIALIZE_PASS(W65816I32IncFold, DEBUG_TYPE,
|
|
"W65816 i32 += 1 fold", false, false)
|
|
|
|
namespace llvm {
|
|
void initializeW65816I32IncFoldPass(PassRegistry &);
|
|
}
|
|
|
|
// Match the 6-instruction sequence; returns the post-pattern iterator
|
|
// and fills in the lo/hi stack-rel offsets if the pattern matches.
|
|
// Tolerates intervening TAX/TXA pairs (which regalloc inserts as
|
|
// spurious A-save brackets around STAfi's conservative Defs=[A]).
|
|
// They're collected into `KillMe` so the rewrite can erase them too.
|
|
static bool matchI32AddOnePattern(MachineBasicBlock::iterator It,
|
|
MachineBasicBlock::iterator End,
|
|
int64_t &OffLo, int64_t &OffHi,
|
|
MachineBasicBlock::iterator &PatEnd,
|
|
SmallVectorImpl<MachineInstr *> &KillMe) {
|
|
auto skipDebug = [&]() {
|
|
while (It != End && It->isDebugInstr()) ++It;
|
|
};
|
|
auto skipTaxTxa = [&]() {
|
|
while (It != End && (It->isDebugInstr() ||
|
|
It->getOpcode() == W65816::TAX ||
|
|
It->getOpcode() == W65816::TXA)) {
|
|
if (It->getOpcode() == W65816::TAX || It->getOpcode() == W65816::TXA) {
|
|
KillMe.push_back(&*It);
|
|
}
|
|
++It;
|
|
}
|
|
};
|
|
skipDebug();
|
|
if (It == End) return false;
|
|
|
|
// 1. LDA_StackRel imm_lo
|
|
if (It->getOpcode() != W65816::LDA_StackRel) return false;
|
|
if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) return false;
|
|
OffLo = It->getOperand(0).getImm();
|
|
++It;
|
|
skipTaxTxa();
|
|
if (It == End) return false;
|
|
|
|
// 2. ADCi16imm with imm == 1
|
|
if (It->getOpcode() != W65816::ADCi16imm) return false;
|
|
if (It->getNumOperands() < 3 || !It->getOperand(2).isImm()) return false;
|
|
if (It->getOperand(2).getImm() != 1) return false;
|
|
++It;
|
|
skipDebug();
|
|
if (It == End) return false;
|
|
|
|
// 3. STA_StackRel to same offset
|
|
if (It->getOpcode() != W65816::STA_StackRel) return false;
|
|
if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) return false;
|
|
if (It->getOperand(0).getImm() != OffLo) return false;
|
|
++It;
|
|
skipTaxTxa();
|
|
if (It == End) return false;
|
|
|
|
// 4. LDA_StackRel imm_hi (different offset)
|
|
if (It->getOpcode() != W65816::LDA_StackRel) return false;
|
|
if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) return false;
|
|
OffHi = It->getOperand(0).getImm();
|
|
if (OffHi == OffLo) return false;
|
|
++It;
|
|
skipDebug();
|
|
if (It == End) return false;
|
|
|
|
// 5. ADCEi16imm with imm == 0
|
|
if (It->getOpcode() != W65816::ADCEi16imm) return false;
|
|
if (It->getNumOperands() < 3 || !It->getOperand(2).isImm()) return false;
|
|
if (It->getOperand(2).getImm() != 0) return false;
|
|
++It;
|
|
skipDebug();
|
|
if (It == End) return false;
|
|
|
|
// 6. STA_StackRel to hi offset
|
|
if (It->getOpcode() != W65816::STA_StackRel) return false;
|
|
if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) return false;
|
|
if (It->getOperand(0).getImm() != OffHi) return false;
|
|
++It;
|
|
PatEnd = It;
|
|
return true;
|
|
}
|
|
|
|
bool W65816I32IncFold::runOnMachineFunction(MachineFunction &MF) {
|
|
if (skipFunction(MF.getFunction())) return false;
|
|
const auto &STI = MF.getSubtarget<W65816Subtarget>();
|
|
const auto *TII = STI.getInstrInfo();
|
|
bool Changed = false;
|
|
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
auto It = MBB.begin();
|
|
while (It != MBB.end()) {
|
|
int64_t OffLo = 0, OffHi = 0;
|
|
MachineBasicBlock::iterator PatEnd;
|
|
SmallVector<MachineInstr *, 4> KillMe;
|
|
auto Start = It;
|
|
if (!matchI32AddOnePattern(It, MBB.end(), OffLo, OffHi, PatEnd, KillMe)) {
|
|
++It;
|
|
continue;
|
|
}
|
|
// Erase any spurious TAX/TXA pseudo-saves we tolerated inside
|
|
// the pattern. These are dead because STAfi's Defs=[A] was
|
|
// a conservative over-approximation; the A-source path preserves
|
|
// A in the actual asm.
|
|
for (MachineInstr *MI : KillMe) MI->eraseFromParent();
|
|
// Found the 6-instruction pattern, [Start, PatEnd). Rewrite
|
|
// in-place: keep the LDA_StackRel for lo, replace ADCi16imm
|
|
// with INA_PSEUDO, keep STA_StackRel for lo, then replace the
|
|
// entire LDA-ADCE-STA hi-half triple with INC_HI_IF_CARRY_StackRel.
|
|
DebugLoc DL = Start->getDebugLoc();
|
|
|
|
// Walk to the ADCi16imm (Start+1) and replace. Build a fresh
|
|
// INA_PSEUDO with the same tied-def shape: dst=A, src=A.
|
|
auto AdcIt = std::next(Start);
|
|
while (AdcIt != PatEnd && AdcIt->isDebugInstr()) ++AdcIt;
|
|
// INA_PSEUDO has constraint $src = $dst; emit with both as A.
|
|
// Operand layout: (outs Acc16:$dst), (ins Acc16:$src)
|
|
BuildMI(MBB, AdcIt, DL, TII->get(W65816::INA_PSEUDO), W65816::A)
|
|
.addReg(W65816::A);
|
|
auto Erased = AdcIt;
|
|
++AdcIt;
|
|
Erased->eraseFromParent();
|
|
|
|
// Now find the start of the hi-half triple: it's at Start+3 (after
|
|
// skipping debug). Walk past STA_StackRel (lo) which is now at
|
|
// AdcIt's position.
|
|
while (AdcIt != PatEnd && AdcIt->isDebugInstr()) ++AdcIt;
|
|
// AdcIt should now point at STA_StackRel (lo). Skip it.
|
|
++AdcIt;
|
|
while (AdcIt != PatEnd && AdcIt->isDebugInstr()) ++AdcIt;
|
|
// AdcIt now points at LDA_StackRel (hi) — start of the hi triple.
|
|
MachineBasicBlock::iterator HiStart = AdcIt;
|
|
|
|
// Insert INC_HI_IF_CARRY_StackRel before the hi triple, then
|
|
// erase all three hi instructions.
|
|
BuildMI(MBB, HiStart, DL, TII->get(W65816::INC_HI_IF_CARRY_StackRel))
|
|
.addImm(OffHi);
|
|
|
|
// Erase the 3 hi instructions: LDA_StackRel, ADCEi16imm, STA_StackRel.
|
|
auto KillIt = HiStart;
|
|
for (int i = 0; i < 3 && KillIt != PatEnd; ) {
|
|
if (KillIt->isDebugInstr()) { ++KillIt; continue; }
|
|
auto Next = std::next(KillIt);
|
|
KillIt->eraseFromParent();
|
|
KillIt = Next;
|
|
++i;
|
|
}
|
|
|
|
Changed = true;
|
|
It = PatEnd;
|
|
}
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
FunctionPass *llvm::createW65816I32IncFold() {
|
|
return new W65816I32IncFold();
|
|
}
|