301 lines
11 KiB
C++
301 lines
11 KiB
C++
//===-- W65816SepRepCleanup.cpp - Coalesce adjacent SEP/REP toggles -------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Post-PEI peephole that drops adjacent `REP #$20 ; SEP #$20` (or vice
|
|
// versa) pairs that toggle the M-bit redundantly.
|
|
//
|
|
// The STA8fi expansion in W65816RegisterInfo::eliminateFrameIndex emits
|
|
// `SEP #$20 / STA d,S / REP #$20` so each i8 store runs with M=1. When
|
|
// two STA8fi sit back-to-back in the MIR (no 16-bit ALU op between
|
|
// them), the post-PEI stream contains:
|
|
//
|
|
// SEP #$20
|
|
// STA d1, S
|
|
// REP #$20 <-- toggle
|
|
// SEP #$20 <-- toggle (cancels above)
|
|
// STA d2, S
|
|
// REP #$20
|
|
//
|
|
// The middle REP/SEP pair is a no-op: both stores can run in one M=1
|
|
// region. We drop them to leave:
|
|
//
|
|
// SEP #$20
|
|
// STA d1, S
|
|
// STA d2, S
|
|
// REP #$20
|
|
//
|
|
// Saves 2 bytes / 6 cycles per coalesced pair. Symmetric `SEP/REP`
|
|
// pairs (M=1 then M=0 with nothing in between) are also dropped — they
|
|
// can arise around inline-asm or hand-written assembly snippets.
|
|
//
|
|
// Runs at addPreEmitPass (after PEI has expanded STA8fi).
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "W65816.h"
|
|
#include "W65816InstrInfo.h"
|
|
#include "W65816Subtarget.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
#include "llvm/CodeGen/MachineFunctionPass.h"
|
|
#include "llvm/CodeGen/MachineInstr.h"
|
|
|
|
using namespace llvm;
|
|
|
|
#define DEBUG_TYPE "w65816-sep-rep-cleanup"
|
|
|
|
namespace {
|
|
|
|
class W65816SepRepCleanup : public MachineFunctionPass {
|
|
public:
|
|
static char ID;
|
|
|
|
W65816SepRepCleanup() : MachineFunctionPass(ID) {}
|
|
|
|
StringRef getPassName() const override {
|
|
return "W65816 SEP/REP toggle coalescing";
|
|
}
|
|
|
|
bool runOnMachineFunction(MachineFunction &MF) override;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
char W65816SepRepCleanup::ID = 0;
|
|
|
|
INITIALIZE_PASS(W65816SepRepCleanup, DEBUG_TYPE,
|
|
"W65816 SEP/REP toggle coalescing", false, false)
|
|
|
|
FunctionPass *llvm::createW65816SepRepCleanup() {
|
|
return new W65816SepRepCleanup();
|
|
}
|
|
|
|
// Returns the immediate value of `op` if MI is a `SEP #imm` or `REP #imm`,
|
|
// else -1.
|
|
static int getSepRepImm(const MachineInstr &MI, unsigned Opc) {
|
|
if (MI.getOpcode() != Opc)
|
|
return -1;
|
|
if (MI.getNumOperands() < 1 || !MI.getOperand(0).isImm())
|
|
return -1;
|
|
return MI.getOperand(0).getImm();
|
|
}
|
|
|
|
// Returns true if MI may consume the carry or overflow flag — these
|
|
// are the flags that ADC/SBC define but INA/DEA don't. Conservative:
|
|
// any branch that reads C or V counts, plus the chained ADC/SBC ops
|
|
// that wait for a prior carry-out. Anything else (CMP, CLC, SEC,
|
|
// LDA, STA, AND, ORA, EOR, etc.) re-defines or doesn't read C/V.
|
|
static bool readsCarryOrV(const MachineInstr &MI) {
|
|
switch (MI.getOpcode()) {
|
|
case W65816::BCS: // reads C
|
|
case W65816::BCC: // reads C
|
|
case W65816::BVS: // reads V
|
|
case W65816::BVC: // reads V
|
|
case W65816::ADC_StackRel: // reads C as carry-in
|
|
case W65816::ADC_Imm16:
|
|
case W65816::ADC_Imm8:
|
|
case W65816::ADC_DP:
|
|
case W65816::ADC_Abs:
|
|
case W65816::SBC_StackRel:
|
|
case W65816::SBC_Imm16:
|
|
case W65816::SBC_Imm8:
|
|
case W65816::SBC_DP:
|
|
case W65816::SBC_Abs:
|
|
case W65816::ROL_A: // rotates fold C in
|
|
case W65816::ROR_A:
|
|
case W65816::ROL_DP:
|
|
case W65816::ROL_Abs:
|
|
case W65816::ROR_DP:
|
|
case W65816::ROR_Abs:
|
|
return true;
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// Returns true if `Op` is one of the flag-redefining opcodes (CLC, SEC,
|
|
// CMP*, CPX*, CPY*, REP, SEP) — observing C/V before this is safe.
|
|
// Includes the pseudo CMP* variants (CMPi16imm etc.) since this peephole
|
|
// runs at pre-emit, BEFORE the AsmPrinter expands them.
|
|
static bool isFlagRedefiner(unsigned Op) {
|
|
switch (Op) {
|
|
case W65816::CLC:
|
|
case W65816::SEC:
|
|
case W65816::CMP_Imm8: case W65816::CMP_Imm16:
|
|
case W65816::CMP_StackRel: case W65816::CMP_DP: case W65816::CMP_Abs:
|
|
case W65816::CMPi16imm: case W65816::CMPi8imm:
|
|
case W65816::CMPfi: case W65816::CMPabs:
|
|
case W65816::CMP_RR:
|
|
case W65816::CPX_Imm8: case W65816::CPX_Imm16:
|
|
case W65816::CPX_DP: case W65816::CPX_Abs:
|
|
case W65816::CPY_Imm8: case W65816::CPY_Imm16:
|
|
case W65816::CPY_DP: case W65816::CPY_Abs:
|
|
case W65816::REP: case W65816::SEP:
|
|
return true;
|
|
default: return false;
|
|
}
|
|
}
|
|
|
|
// Returns true if a subsequent MI in the same MBB observes the C/V
|
|
// flags before any flag-redefiner clears the dependency. At MBB end,
|
|
// extends one step into each successor: if any successor's first
|
|
// (non-debug) MI reads C/V before redefining them, the flag is live
|
|
// across the edge — bail. This is critical for loop bodies where
|
|
// the back-edge re-enters the same MBB at LDA/PHA (neither reads C/V),
|
|
// so a per-iteration `clc; adc #2` is foldable. Cross-MBB carry chains
|
|
// would normally use ADCEi16imm (not ADCi16imm), so this is safe.
|
|
static bool carryFlagLiveAfter(MachineBasicBlock::iterator After,
|
|
MachineBasicBlock &MBB) {
|
|
// Phase 1: scan within this MBB.
|
|
for (auto Probe = std::next(After); Probe != MBB.end(); ++Probe) {
|
|
if (Probe->isDebugInstr()) continue;
|
|
if (readsCarryOrV(*Probe)) return true;
|
|
if (isFlagRedefiner(Probe->getOpcode())) return false;
|
|
if (Probe->isCall()) return false; // callee resets flags
|
|
}
|
|
// Phase 2: peek into each successor's first few MIs. We BAIL only on
|
|
// a positive C/V read; reaching MBB end or peek-cap without finding
|
|
// one is treated as "carry dead" — ADCi16imm's carry-out is never
|
|
// used in carry chains (those use ADCEi16imm), so a stray carry
|
|
// floating into RTL or an unrelated arithmetic op causes no harm.
|
|
const unsigned MaxPeek = 6;
|
|
for (MachineBasicBlock *Succ : MBB.successors()) {
|
|
unsigned Peeked = 0;
|
|
for (auto &MI : *Succ) {
|
|
if (MI.isDebugInstr()) continue;
|
|
if (readsCarryOrV(MI)) return true;
|
|
if (isFlagRedefiner(MI.getOpcode()) || MI.isCall()) break;
|
|
if (++Peeked >= MaxPeek) break;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
// Convert `ADCi16imm dst, src, ±1`/`±2` and `SBCi16imm` similarly to
|
|
// INA / INA;INA / DEA / DEA;DEA chains when C/V are dead. ADCi16imm
|
|
// is a pseudo that expands to CLC+ADC_Imm16 (4B/5cyc). INA is 1B/2cyc.
|
|
// Savings per ±1: 3B/3cyc; per ±2: 2B/1cyc. SBCi16imm is symmetric
|
|
// (sub by N == add by -N), so SBC #1 → DEA, SBC #-1 → INA, etc.
|
|
static bool foldImmAdcToInaDea(MachineBasicBlock &MBB,
|
|
const W65816InstrInfo &TII) {
|
|
bool Changed = false;
|
|
auto It = MBB.begin();
|
|
while (It != MBB.end()) {
|
|
unsigned Op = It->getOpcode();
|
|
bool isAdc = (Op == W65816::ADCi16imm);
|
|
bool isSbc = (Op == W65816::SBCi16imm);
|
|
if ((!isAdc && !isSbc) || It->getNumOperands() < 3 ||
|
|
!It->getOperand(2).isImm()) { ++It; continue; }
|
|
int64_t Imm = (int16_t)It->getOperand(2).getImm();
|
|
// For SBC, negate: SBC by +N is "subtract N", same as ADC by -N.
|
|
int64_t Effective = isSbc ? -Imm : Imm;
|
|
if (Effective < -2 || Effective > 2 || Effective == 0) { ++It; continue; }
|
|
if (carryFlagLiveAfter(It, MBB)) { ++It; continue; }
|
|
|
|
DebugLoc DL = It->getDebugLoc();
|
|
unsigned NewOpc = (Effective > 0) ? W65816::INA : W65816::DEA;
|
|
unsigned Count = (Effective > 0) ? Effective : -Effective;
|
|
for (unsigned i = 0; i < Count; ++i)
|
|
BuildMI(MBB, It, DL, TII.get(NewOpc));
|
|
auto NextIt = std::next(It);
|
|
It->eraseFromParent();
|
|
It = NextIt;
|
|
Changed = true;
|
|
}
|
|
return Changed;
|
|
}
|
|
|
|
bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) {
|
|
bool Changed = false;
|
|
const auto &STI = MF.getSubtarget<W65816Subtarget>();
|
|
const auto &TII = *STI.getInstrInfo();
|
|
for (MachineBasicBlock &MBB : MF) {
|
|
SmallVector<MachineInstr *, 8> Toggles;
|
|
for (MachineInstr &MI : MBB) {
|
|
unsigned Opc = MI.getOpcode();
|
|
if (Opc == W65816::REP || Opc == W65816::SEP)
|
|
Toggles.push_back(&MI);
|
|
}
|
|
SmallPtrSet<MachineInstr *, 8> Erased;
|
|
for (MachineInstr *First : Toggles) {
|
|
if (Erased.count(First)) continue;
|
|
// The next non-debug instruction must be the matching opposite
|
|
// toggle with the same imm.
|
|
auto It = std::next(First->getIterator());
|
|
while (It != MBB.end() && It->isDebugInstr()) ++It;
|
|
if (It == MBB.end()) continue;
|
|
MachineInstr &Next = *It;
|
|
// Look for REP-then-SEP or SEP-then-REP with matching imm.
|
|
unsigned FirstOpc = First->getOpcode();
|
|
unsigned WantOpc = (FirstOpc == W65816::REP) ? W65816::SEP : W65816::REP;
|
|
int FirstImm = getSepRepImm(*First, FirstOpc);
|
|
int NextImm = getSepRepImm(Next, WantOpc);
|
|
if (FirstImm < 0 || NextImm < 0 || FirstImm != NextImm) continue;
|
|
Erased.insert(First);
|
|
Erased.insert(&Next);
|
|
First->eraseFromParent();
|
|
Next.eraseFromParent();
|
|
Changed = true;
|
|
}
|
|
|
|
// Second peephole: collapse `ADCi16imm src, ±1/±2` (and SBCi16imm)
|
|
// into INA/DEA chains when the carry flag they would set is unused.
|
|
// ADCi16imm is a pseudo (expands to CLC+ADC_Imm16); we rewrite it
|
|
// here BEFORE the AsmPrinter expansion runs. But this pass runs at
|
|
// pre-emit, AFTER post-RA pseudo expansion. ADCi16imm survives
|
|
// because its MCInst lowering is in W65816AsmPrinter (not in the
|
|
// generic post-RA pseudo expander), so it's still in the MIR here.
|
|
Changed |= foldImmAdcToInaDea(MBB, TII);
|
|
|
|
// Third peephole: drop `LDY_Imm16 K` when Y already holds K from
|
|
// an earlier LDY in the same MBB and no intervening MI clobbered
|
|
// Y. Custom inserter emits LDY #0 before every LDAfi_indY/STAfi_indY,
|
|
// even though Y already holds 0 from a previous emit — the
|
|
// redundant LDYs survive MachineLICM because Y is a phys reg and
|
|
// the inserter binds them tightly to each use.
|
|
int yKnown = -1; // -1 means unknown; otherwise the immediate
|
|
auto It2 = MBB.begin();
|
|
while (It2 != MBB.end()) {
|
|
MachineInstr &MI = *It2;
|
|
if (MI.isDebugInstr()) { ++It2; continue; }
|
|
unsigned Op = MI.getOpcode();
|
|
if (Op == W65816::LDY_Imm16 && MI.getNumOperands() >= 1 &&
|
|
MI.getOperand(0).isImm()) {
|
|
int K = MI.getOperand(0).getImm() & 0xFFFF;
|
|
if (yKnown == K) {
|
|
auto Erase = It2++;
|
|
Erase->eraseFromParent();
|
|
Changed = true;
|
|
continue;
|
|
}
|
|
yKnown = K;
|
|
} else {
|
|
// Conservatively invalidate yKnown on anything that touches Y
|
|
// or on calls / inline asm / any instruction that doesn't have
|
|
// a clean "no Y effect" guarantee. Cheaper to underclaim than
|
|
// miscompile.
|
|
switch (Op) {
|
|
case W65816::LDAfi_indY: // reads Y, doesn't def it — keep yKnown
|
|
case W65816::STAfi_indY:
|
|
case W65816::LDA_StackRelIndY:
|
|
case W65816::STA_StackRelIndY:
|
|
break;
|
|
case W65816::TAY: case W65816::TXY:
|
|
case W65816::INY: case W65816::DEY:
|
|
case W65816::PLY: case W65816::LDY_DP: case W65816::LDY_Abs:
|
|
case W65816::LDY_DPX: case W65816::LDY_AbsX:
|
|
yKnown = -1; break;
|
|
default:
|
|
if (MI.isCall()) yKnown = -1;
|
|
break;
|
|
}
|
|
}
|
|
++It2;
|
|
}
|
|
}
|
|
return Changed;
|
|
}
|