65816-llvm-mos/src/llvm/lib/Target/W65816/W65816SepRepCleanup.cpp
Scott Duensing 6d7eae0356 Checkpoint.
2026-04-30 01:29:16 -05:00

301 lines
11 KiB
C++

//===-- W65816SepRepCleanup.cpp - Coalesce adjacent SEP/REP toggles -------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Post-PEI peephole that drops adjacent `REP #$20 ; SEP #$20` (or vice
// versa) pairs that toggle the M-bit redundantly.
//
// The STA8fi expansion in W65816RegisterInfo::eliminateFrameIndex emits
// `SEP #$20 / STA d,S / REP #$20` so each i8 store runs with M=1. When
// two STA8fi sit back-to-back in the MIR (no 16-bit ALU op between
// them), the post-PEI stream contains:
//
// SEP #$20
// STA d1, S
// REP #$20 <-- toggle
// SEP #$20 <-- toggle (cancels above)
// STA d2, S
// REP #$20
//
// The middle REP/SEP pair is a no-op: both stores can run in one M=1
// region. We drop them to leave:
//
// SEP #$20
// STA d1, S
// STA d2, S
// REP #$20
//
// Saves 2 bytes / 6 cycles per coalesced pair. Symmetric `SEP/REP`
// pairs (M=1 then M=0 with nothing in between) are also dropped — they
// can arise around inline-asm or hand-written assembly snippets.
//
// Runs at addPreEmitPass (after PEI has expanded STA8fi).
//
//===----------------------------------------------------------------------===//
#include "W65816.h"
#include "W65816InstrInfo.h"
#include "W65816Subtarget.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
using namespace llvm;
#define DEBUG_TYPE "w65816-sep-rep-cleanup"
namespace {
class W65816SepRepCleanup : public MachineFunctionPass {
public:
static char ID;
W65816SepRepCleanup() : MachineFunctionPass(ID) {}
StringRef getPassName() const override {
return "W65816 SEP/REP toggle coalescing";
}
bool runOnMachineFunction(MachineFunction &MF) override;
};
} // namespace
char W65816SepRepCleanup::ID = 0;
INITIALIZE_PASS(W65816SepRepCleanup, DEBUG_TYPE,
"W65816 SEP/REP toggle coalescing", false, false)
FunctionPass *llvm::createW65816SepRepCleanup() {
return new W65816SepRepCleanup();
}
// Returns the immediate value of `op` if MI is a `SEP #imm` or `REP #imm`,
// else -1.
static int getSepRepImm(const MachineInstr &MI, unsigned Opc) {
if (MI.getOpcode() != Opc)
return -1;
if (MI.getNumOperands() < 1 || !MI.getOperand(0).isImm())
return -1;
return MI.getOperand(0).getImm();
}
// Returns true if MI may consume the carry or overflow flag — these
// are the flags that ADC/SBC define but INA/DEA don't. Conservative:
// any branch that reads C or V counts, plus the chained ADC/SBC ops
// that wait for a prior carry-out. Anything else (CMP, CLC, SEC,
// LDA, STA, AND, ORA, EOR, etc.) re-defines or doesn't read C/V.
static bool readsCarryOrV(const MachineInstr &MI) {
switch (MI.getOpcode()) {
case W65816::BCS: // reads C
case W65816::BCC: // reads C
case W65816::BVS: // reads V
case W65816::BVC: // reads V
case W65816::ADC_StackRel: // reads C as carry-in
case W65816::ADC_Imm16:
case W65816::ADC_Imm8:
case W65816::ADC_DP:
case W65816::ADC_Abs:
case W65816::SBC_StackRel:
case W65816::SBC_Imm16:
case W65816::SBC_Imm8:
case W65816::SBC_DP:
case W65816::SBC_Abs:
case W65816::ROL_A: // rotates fold C in
case W65816::ROR_A:
case W65816::ROL_DP:
case W65816::ROL_Abs:
case W65816::ROR_DP:
case W65816::ROR_Abs:
return true;
default:
return false;
}
}
// Returns true if `Op` is one of the flag-redefining opcodes (CLC, SEC,
// CMP*, CPX*, CPY*, REP, SEP) — observing C/V before this is safe.
// Includes the pseudo CMP* variants (CMPi16imm etc.) since this peephole
// runs at pre-emit, BEFORE the AsmPrinter expands them.
static bool isFlagRedefiner(unsigned Op) {
switch (Op) {
case W65816::CLC:
case W65816::SEC:
case W65816::CMP_Imm8: case W65816::CMP_Imm16:
case W65816::CMP_StackRel: case W65816::CMP_DP: case W65816::CMP_Abs:
case W65816::CMPi16imm: case W65816::CMPi8imm:
case W65816::CMPfi: case W65816::CMPabs:
case W65816::CMP_RR:
case W65816::CPX_Imm8: case W65816::CPX_Imm16:
case W65816::CPX_DP: case W65816::CPX_Abs:
case W65816::CPY_Imm8: case W65816::CPY_Imm16:
case W65816::CPY_DP: case W65816::CPY_Abs:
case W65816::REP: case W65816::SEP:
return true;
default: return false;
}
}
// Returns true if a subsequent MI in the same MBB observes the C/V
// flags before any flag-redefiner clears the dependency. At MBB end,
// extends one step into each successor: if any successor's first
// (non-debug) MI reads C/V before redefining them, the flag is live
// across the edge — bail. This is critical for loop bodies where
// the back-edge re-enters the same MBB at LDA/PHA (neither reads C/V),
// so a per-iteration `clc; adc #2` is foldable. Cross-MBB carry chains
// would normally use ADCEi16imm (not ADCi16imm), so this is safe.
static bool carryFlagLiveAfter(MachineBasicBlock::iterator After,
MachineBasicBlock &MBB) {
// Phase 1: scan within this MBB.
for (auto Probe = std::next(After); Probe != MBB.end(); ++Probe) {
if (Probe->isDebugInstr()) continue;
if (readsCarryOrV(*Probe)) return true;
if (isFlagRedefiner(Probe->getOpcode())) return false;
if (Probe->isCall()) return false; // callee resets flags
}
// Phase 2: peek into each successor's first few MIs. We BAIL only on
// a positive C/V read; reaching MBB end or peek-cap without finding
// one is treated as "carry dead" — ADCi16imm's carry-out is never
// used in carry chains (those use ADCEi16imm), so a stray carry
// floating into RTL or an unrelated arithmetic op causes no harm.
const unsigned MaxPeek = 6;
for (MachineBasicBlock *Succ : MBB.successors()) {
unsigned Peeked = 0;
for (auto &MI : *Succ) {
if (MI.isDebugInstr()) continue;
if (readsCarryOrV(MI)) return true;
if (isFlagRedefiner(MI.getOpcode()) || MI.isCall()) break;
if (++Peeked >= MaxPeek) break;
}
}
return false;
}
// Convert `ADCi16imm dst, src, ±1`/`±2` and `SBCi16imm` similarly to
// INA / INA;INA / DEA / DEA;DEA chains when C/V are dead. ADCi16imm
// is a pseudo that expands to CLC+ADC_Imm16 (4B/5cyc). INA is 1B/2cyc.
// Savings per ±1: 3B/3cyc; per ±2: 2B/1cyc. SBCi16imm is symmetric
// (sub by N == add by -N), so SBC #1 → DEA, SBC #-1 → INA, etc.
static bool foldImmAdcToInaDea(MachineBasicBlock &MBB,
const W65816InstrInfo &TII) {
bool Changed = false;
auto It = MBB.begin();
while (It != MBB.end()) {
unsigned Op = It->getOpcode();
bool isAdc = (Op == W65816::ADCi16imm);
bool isSbc = (Op == W65816::SBCi16imm);
if ((!isAdc && !isSbc) || It->getNumOperands() < 3 ||
!It->getOperand(2).isImm()) { ++It; continue; }
int64_t Imm = (int16_t)It->getOperand(2).getImm();
// For SBC, negate: SBC by +N is "subtract N", same as ADC by -N.
int64_t Effective = isSbc ? -Imm : Imm;
if (Effective < -2 || Effective > 2 || Effective == 0) { ++It; continue; }
if (carryFlagLiveAfter(It, MBB)) { ++It; continue; }
DebugLoc DL = It->getDebugLoc();
unsigned NewOpc = (Effective > 0) ? W65816::INA : W65816::DEA;
unsigned Count = (Effective > 0) ? Effective : -Effective;
for (unsigned i = 0; i < Count; ++i)
BuildMI(MBB, It, DL, TII.get(NewOpc));
auto NextIt = std::next(It);
It->eraseFromParent();
It = NextIt;
Changed = true;
}
return Changed;
}
bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) {
bool Changed = false;
const auto &STI = MF.getSubtarget<W65816Subtarget>();
const auto &TII = *STI.getInstrInfo();
for (MachineBasicBlock &MBB : MF) {
SmallVector<MachineInstr *, 8> Toggles;
for (MachineInstr &MI : MBB) {
unsigned Opc = MI.getOpcode();
if (Opc == W65816::REP || Opc == W65816::SEP)
Toggles.push_back(&MI);
}
SmallPtrSet<MachineInstr *, 8> Erased;
for (MachineInstr *First : Toggles) {
if (Erased.count(First)) continue;
// The next non-debug instruction must be the matching opposite
// toggle with the same imm.
auto It = std::next(First->getIterator());
while (It != MBB.end() && It->isDebugInstr()) ++It;
if (It == MBB.end()) continue;
MachineInstr &Next = *It;
// Look for REP-then-SEP or SEP-then-REP with matching imm.
unsigned FirstOpc = First->getOpcode();
unsigned WantOpc = (FirstOpc == W65816::REP) ? W65816::SEP : W65816::REP;
int FirstImm = getSepRepImm(*First, FirstOpc);
int NextImm = getSepRepImm(Next, WantOpc);
if (FirstImm < 0 || NextImm < 0 || FirstImm != NextImm) continue;
Erased.insert(First);
Erased.insert(&Next);
First->eraseFromParent();
Next.eraseFromParent();
Changed = true;
}
// Second peephole: collapse `ADCi16imm src, ±1/±2` (and SBCi16imm)
// into INA/DEA chains when the carry flag they would set is unused.
// ADCi16imm is a pseudo (expands to CLC+ADC_Imm16); we rewrite it
// here BEFORE the AsmPrinter expansion runs. But this pass runs at
// pre-emit, AFTER post-RA pseudo expansion. ADCi16imm survives
// because its MCInst lowering is in W65816AsmPrinter (not in the
// generic post-RA pseudo expander), so it's still in the MIR here.
Changed |= foldImmAdcToInaDea(MBB, TII);
// Third peephole: drop `LDY_Imm16 K` when Y already holds K from
// an earlier LDY in the same MBB and no intervening MI clobbered
// Y. Custom inserter emits LDY #0 before every LDAfi_indY/STAfi_indY,
// even though Y already holds 0 from a previous emit — the
// redundant LDYs survive MachineLICM because Y is a phys reg and
// the inserter binds them tightly to each use.
int yKnown = -1; // -1 means unknown; otherwise the immediate
auto It2 = MBB.begin();
while (It2 != MBB.end()) {
MachineInstr &MI = *It2;
if (MI.isDebugInstr()) { ++It2; continue; }
unsigned Op = MI.getOpcode();
if (Op == W65816::LDY_Imm16 && MI.getNumOperands() >= 1 &&
MI.getOperand(0).isImm()) {
int K = MI.getOperand(0).getImm() & 0xFFFF;
if (yKnown == K) {
auto Erase = It2++;
Erase->eraseFromParent();
Changed = true;
continue;
}
yKnown = K;
} else {
// Conservatively invalidate yKnown on anything that touches Y
// or on calls / inline asm / any instruction that doesn't have
// a clean "no Y effect" guarantee. Cheaper to underclaim than
// miscompile.
switch (Op) {
case W65816::LDAfi_indY: // reads Y, doesn't def it — keep yKnown
case W65816::STAfi_indY:
case W65816::LDA_StackRelIndY:
case W65816::STA_StackRelIndY:
break;
case W65816::TAY: case W65816::TXY:
case W65816::INY: case W65816::DEY:
case W65816::PLY: case W65816::LDY_DP: case W65816::LDY_Abs:
case W65816::LDY_DPX: case W65816::LDY_AbsX:
yKnown = -1; break;
default:
if (MI.isCall()) yKnown = -1;
break;
}
}
++It2;
}
}
return Changed;
}