//===-- W65816InstrInfo.cpp - W65816 Instruction Information --------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Skeleton instruction-info implementation. Real register copy and stack // spill/reload lowering will be added once the instruction set is described. // //===----------------------------------------------------------------------===// #include "W65816InstrInfo.h" #include "W65816.h" #include "W65816Subtarget.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; #define GET_INSTRINFO_CTOR_DTOR #include "W65816GenInstrInfo.inc" void W65816InstrInfo::anchor() {} W65816InstrInfo::W65816InstrInfo(const W65816Subtarget &STI) : W65816GenInstrInfo(STI, RI, W65816::ADJCALLSTACKDOWN, W65816::ADJCALLSTACKUP), RI() {} // Shared helpers exposed via W65816InstrInfo.h. See the namespace // comment there for usage notes. namespace llvm { namespace W65816Helpers { int imgDPAddr(unsigned Reg) { switch (Reg) { case W65816::IMG0: return 0xD0; case W65816::IMG1: return 0xD2; case W65816::IMG2: return 0xD4; case W65816::IMG3: return 0xD6; case W65816::IMG4: return 0xD8; case W65816::IMG5: return 0xDA; case W65816::IMG6: return 0xDC; case W65816::IMG7: return 0xDE; case W65816::IMG8: return 0xC0; case W65816::IMG9: return 0xC2; case W65816::IMG10: return 0xC4; case W65816::IMG11: return 0xC6; case W65816::IMG12: return 0xC8; case W65816::IMG13: return 0xCA; case W65816::IMG14: return 0xCC; case W65816::IMG15: return 0xCE; default: return -1; } } unsigned invertCondOpcode(unsigned Opc) { switch (Opc) { case W65816::BEQ: return W65816::BNE; case W65816::BNE: return W65816::BEQ; case W65816::BCS: return W65816::BCC; case W65816::BCC: return W65816::BCS; case W65816::BMI: return W65816::BPL; case W65816::BPL: return W65816::BMI; case W65816::BVS: return W65816::BVC; case W65816::BVC: return W65816::BVS; default: return 0; } } unsigned getDpOpcodeForStackRel(unsigned Opc) { switch (Opc) { case W65816::LDA_StackRel: return W65816::LDA_DP; case W65816::STA_StackRel: return W65816::STA_DP; case W65816::ADC_StackRel: return W65816::ADC_DP; case W65816::SBC_StackRel: return W65816::SBC_DP; case W65816::CMP_StackRel: return W65816::CMP_DP; case W65816::AND_StackRel: return W65816::AND_DP; case W65816::ORA_StackRel: return W65816::ORA_DP; case W65816::EOR_StackRel: return W65816::EOR_DP; default: return 0; } } bool isTiedAcc16Consumer(unsigned Opc) { switch (Opc) { case W65816::ADCfi: case W65816::SBCfi: case W65816::ANDfi: case W65816::ORAfi: case W65816::EORfi: case W65816::ADCabs: case W65816::SBCabs: case W65816::ADCi16imm: case W65816::SBCi16imm: case W65816::ANDi16imm: case W65816::ORAi16imm: case W65816::EORi16imm: return true; default: return false; } } bool hasTiedAcc16Src(const MachineInstr &MI) { if (!isTiedAcc16Consumer(MI.getOpcode())) return false; for (unsigned i = 0; i < MI.getNumOperands(); ++i) { const MachineOperand &MO = MI.getOperand(i); if (!MO.isReg() || !MO.isUse()) continue; if (MI.isRegTiedToDefOperand(i)) return true; } return false; } } // namespace W65816Helpers } // namespace llvm void W65816InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest, bool RenamableSrc) const { if (DestReg == SrcReg) return; // A → X / X → A via TAX / TXA. Used by i32 return ABI (lo in A, hi // in X) and by callers reading split-i32 results. Both instructions // are 16-bit when M=0/X=0; that matches our default mode. if (DestReg == W65816::X && SrcReg == W65816::A) { BuildMI(MBB, I, DL, get(W65816::TAX)); return; } if (DestReg == W65816::A && SrcReg == W65816::X) { BuildMI(MBB, I, DL, get(W65816::TXA)); return; } // A → Y / Y → A via TAY / TYA. Same M/X width caveat. if (DestReg == W65816::Y && SrcReg == W65816::A) { BuildMI(MBB, I, DL, get(W65816::TAY)); return; } if (DestReg == W65816::A && SrcReg == W65816::Y) { BuildMI(MBB, I, DL, get(W65816::TYA)); return; } // A → IMGn / IMGn → A: STA dp / LDA dp. IMGn is DP-backed at fixed // addresses $D0..$DE — see W65816Helpers::imgDPAddr above. int srcImg = W65816Helpers::imgDPAddr(SrcReg); int dstImg = W65816Helpers::imgDPAddr(DestReg); if (DestReg == W65816::A && srcImg >= 0) { BuildMI(MBB, I, DL, get(W65816::LDA_DP)).addImm(srcImg); return; } if (dstImg >= 0 && SrcReg == W65816::A) { BuildMI(MBB, I, DL, get(W65816::STA_DP)).addImm(dstImg); return; } // IMGn → IMGm: route through A, but PHA-bracket so A is preserved. // Without the bracket, regalloc could insert this COPY between a // def of A and the use of A (e.g. between `$a = COPY $img10` and // `STAfi $a, slot`, when both vregs are alive simultaneously and // the regalloc decides to shuffle img physregs in between). The // unbracketed lda/sta clobbers A and the subsequent STAfi spills // garbage. Observed under ptr32 + full IMG defs in the C++ try/ // catch path: `*p = 42` after `__cxa_allocate_exception` stored // hi-half-of-ptr at lo-half-slot, breaking the indirect-long // address setup so 42 landed at the wrong place. // // PHA bracket cost: +PHA (3 cyc, 1 byte) + PLA (4 cyc, 1 byte) = +7 // cyc, +2 bytes per IMG-IMG copy. These are rare (regalloc usually // can avoid them by picking the same physreg for COPY's src and // dst), so the cost is small. if (srcImg >= 0 && dstImg >= 0) { BuildMI(MBB, I, DL, get(W65816::PHA)); BuildMI(MBB, I, DL, get(W65816::LDA_DP)).addImm(srcImg); BuildMI(MBB, I, DL, get(W65816::STA_DP)).addImm(dstImg); BuildMI(MBB, I, DL, get(W65816::PLA)); return; } // SP -> A via TSC. Used by alloca / setjmp asm machinery. if (DestReg == W65816::A && SrcReg == W65816::SP) { BuildMI(MBB, I, DL, get(W65816::TSC)); return; } // A -> SP via TCS. if (DestReg == W65816::SP && SrcReg == W65816::A) { BuildMI(MBB, I, DL, get(W65816::TCS)); return; } // X <-> Y via A: 65816 has no direct X<->Y transfer; bridge through // A. Caller is responsible for ensuring A is dead at this program // point (regalloc arranges this). Used by greedy when an i16 vreg // forced into one Idx16 reg gets coalesced with a use in the other. if (DestReg == W65816::Y && SrcReg == W65816::X) { BuildMI(MBB, I, DL, get(W65816::TXA)); BuildMI(MBB, I, DL, get(W65816::TAY)); return; } if (DestReg == W65816::X && SrcReg == W65816::Y) { BuildMI(MBB, I, DL, get(W65816::TYA)); BuildMI(MBB, I, DL, get(W65816::TAX)); return; } // X → IMGn / IMGn → X: STX dp / LDX dp. Used by the i64-first-arg // entry COPY (LowerFormalArguments routes arg0_ml through Img16 to // dodge the TXA-bridge-clobbers-A spill bug for udivmod-shaped // signatures). if (dstImg >= 0 && SrcReg == W65816::X) { BuildMI(MBB, I, DL, get(W65816::STX_DP)).addImm(dstImg); return; } if (DestReg == W65816::X && srcImg >= 0) { BuildMI(MBB, I, DL, get(W65816::LDX_DP)).addImm(srcImg); return; } // Y -> IMGn / IMGn -> Y: STY dp / LDY dp. Symmetric with the X // case above. Used by the i32-first-arg ABI's hi half (in X) and // by Wide32 pair copies that have one half in Y after the per-half // routing — see the lambda dispatch below. if (dstImg >= 0 && SrcReg == W65816::Y) { BuildMI(MBB, I, DL, get(W65816::STY_DP)).addImm(dstImg); return; } if (DestReg == W65816::Y && srcImg >= 0) { BuildMI(MBB, I, DL, get(W65816::LDY_DP)).addImm(srcImg); return; } // DPF0 → A: emit `LDA $F0`. DPF0 is the pseudo-physreg carrier // for an i64-returning call's high 16 bits; LowerCall builds a // CopyFromReg(DPF0) glued to the call so the SDAG combiner / // scheduler can't merge or reorder reads across calls. if (DestReg == W65816::A && SrcReg == W65816::DPF0) { BuildMI(MBB, I, DL, get(W65816::LDA_DP)).addImm(0xF0); return; } // A → DPF0: emit `STA $F0`. Used by LowerReturn for the i64 high // half; using a true direct-page store is critical because plain // ISD::STORE with addr=0xF0 was lowering to `(d,s),y` indirect via // DBR — which silently broke under DBR != 0 (e.g. after a bank // switch). STA dp uses D + dp directly, ignoring DBR. if (DestReg == W65816::DPF0 && SrcReg == W65816::A) { BuildMI(MBB, I, DL, get(W65816::STA_DP)).addImm(0xF0); return; } // Wide32 (AX32 or IMG-pair) <-> Wide32 copy: split on sub_lo / sub_hi // and recurse. Use a hand-written dispatch instead of getSubReg // because the MCRegisterInfo::getSubReg path crashes when called // from TargetInstrInfo::lowerCopy on regs that are not pair regs // (the table lookup walks past the end of the diff list). auto wide32Halves = [](Register R) -> std::pair { switch (R) { case W65816::AX32: return {W65816::A, W65816::X}; case W65816::IMG01: return {W65816::IMG0, W65816::IMG1}; case W65816::IMG23: return {W65816::IMG2, W65816::IMG3}; case W65816::IMG45: return {W65816::IMG4, W65816::IMG5}; case W65816::IMG67: return {W65816::IMG6, W65816::IMG7}; case W65816::IMG89: return {W65816::IMG8, W65816::IMG9}; case W65816::IMG1011: return {W65816::IMG10, W65816::IMG11}; case W65816::IMG1213: return {W65816::IMG12, W65816::IMG13}; case W65816::IMG1415: return {W65816::IMG14, W65816::IMG15}; default: return {Register(), Register()}; } }; auto [srcLo, srcHi] = wide32Halves(SrcReg); auto [dstLo, dstHi] = wide32Halves(DestReg); if (srcLo && srcHi && dstLo && dstHi) { // Wide32 -> Wide32. Lo-first order is correct in every direction: // AX32 -> IMG_pair : STA dstLo (A live), then STX dstHi // IMG_pair -> AX32 : LDA srcLo, then LDX srcHi (independent halves) // IMG_pair -> IMG_pair : LDA/STA chain twice (A is only per-half scratch) copyPhysReg(MBB, I, DL, dstLo, srcLo, KillSrc, RenamableDest, RenamableSrc); copyPhysReg(MBB, I, DL, dstHi, srcHi, KillSrc, RenamableDest, RenamableSrc); return; } // Wide32 -> i16: take sub_lo of source. Arises post-RA when an // EXTRACT_SUBREG was lowered as a parent-reg COPY (the SubRegIndex // is dropped by lowerCopy). if (srcLo && srcHi && !dstLo) { copyPhysReg(MBB, I, DL, DestReg, srcLo, KillSrc, RenamableDest, RenamableSrc); return; } // i16 -> Wide32: write sub_lo only (sub_hi left as caller had it, // matching INSERT_SUBREG semantics). Arises post-RA when REG_SEQUENCE // is expanded into per-half COPY pseudos, then a parent-reg COPY of // a sub-reg-only def appears. if (!srcLo && dstLo && dstHi) { copyPhysReg(MBB, I, DL, dstLo, SrcReg, KillSrc, RenamableDest, RenamableSrc); return; } // Virtual-register caller: this happens when the inline spiller // (called from Basic regalloc) rewrites uses of a spilled vreg and // asks us to copy through A before its physreg has been assigned. // Emit a generic COPY pseudo and let the regalloc rewriter / a later // ExpandPostRA pass resolve it once both regs are physical. if (SrcReg.isVirtual() || DestReg.isVirtual()) { BuildMI(MBB, I, DL, get(TargetOpcode::COPY), DestReg) .addReg(SrcReg, getKillRegState(KillSrc)); return; } const TargetRegisterInfo *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); llvm::errs() << "W65816 copyPhysReg unhandled: src=" << (SrcReg.isPhysical() ? TRI->getRegAsmName(SrcReg) : "") << " dst=" << (DestReg.isPhysical() ? TRI->getRegAsmName(DestReg) : "") << " srcImg=" << srcImg << " dstImg=" << dstImg << "\n"; llvm_unreachable("W65816: cross-class copyPhysReg not yet implemented"); } void W65816InstrInfo::storeRegToStackSlot( MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register SrcReg, bool isKill, int FrameIdx, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags) const { // STAfi gets eliminated by W65816RegisterInfo::eliminateFrameIndex into // a real STA d,S. Source is implicit A; emit the pseudo with the FI // and zero offset. When regalloc hands us a spill from X or Y, bridge // through A (TXA / TYA) — same rationale as loadRegFromStackSlot. DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); // Wide32 spill: split into 2 i16 stores at offsets 0 and 2 of the // 4-byte spill slot. Bridge each half through A using copyPhysReg. if (RC == &W65816::Wide32RegClass || RC == &W65816::Acc32RegClass || RC == &W65816::AnyWide32RegClass) { Register Lo, Hi; switch (SrcReg) { case W65816::AX32: Lo = W65816::A; Hi = W65816::X; break; case W65816::IMG01: Lo = W65816::IMG0; Hi = W65816::IMG1; break; case W65816::IMG23: Lo = W65816::IMG2; Hi = W65816::IMG3; break; case W65816::IMG45: Lo = W65816::IMG4; Hi = W65816::IMG5; break; case W65816::IMG67: Lo = W65816::IMG6; Hi = W65816::IMG7; break; case W65816::IMG89: Lo = W65816::IMG8; Hi = W65816::IMG9; break; case W65816::IMG1011: Lo = W65816::IMG10; Hi = W65816::IMG11; break; case W65816::IMG1213: Lo = W65816::IMG12; Hi = W65816::IMG13; break; case W65816::IMG1415: Lo = W65816::IMG14; Hi = W65816::IMG15; break; default: // Regalloc occasionally hands us an UNPAIRED single i16 physreg // (Acc16 / Img16 / Idx16) for a Wide32-class spill — happens when // only one sub-reg is live at the spill point and the regalloc // decides to spill it through the Wide32 path anyway. Treat as // a single i16 store of the lone half at offset 0; the matching // reload mirrors this (only the lo half is read back). The hi // half slot at offset 2 is left unwritten — the reload's hi load // reads zero-init stack memory which is fine because nothing // genuinely needed the hi value (otherwise the regalloc would // have allocated a real pair). if (SrcReg != W65816::A) { copyPhysReg(MBB, MI, DL, W65816::A, SrcReg, false); } BuildMI(MBB, MI, DL, get(W65816::STAfi)) .addReg(W65816::A).addFrameIndex(FrameIdx).addImm(0); return; } // Bridge lo through A, store at offset 0; bridge hi through A, // store at offset 2. This is brittle in the face of regalloc // expectations — Wide32 spills are best avoided by keeping the // pair in registers if at all possible. if (Lo != W65816::A) { copyPhysReg(MBB, MI, DL, W65816::A, Lo, false); } BuildMI(MBB, MI, DL, get(W65816::STAfi)) .addReg(W65816::A).addFrameIndex(FrameIdx).addImm(0); copyPhysReg(MBB, MI, DL, W65816::A, Hi, false); BuildMI(MBB, MI, DL, get(W65816::STAfi)) .addReg(W65816::A).addFrameIndex(FrameIdx).addImm(2); return; } if (SrcReg == W65816::X || SrcReg == W65816::Y) { unsigned XferOp = (SrcReg == W65816::X) ? W65816::TXA : W65816::TYA; BuildMI(MBB, MI, DL, get(XferOp)); SrcReg = W65816::A; } BuildMI(MBB, MI, DL, get(W65816::STAfi)) .addReg(SrcReg, getKillRegState(isKill)) .addFrameIndex(FrameIdx) .addImm(0); } void W65816InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Register DestReg, int FrameIdx, const TargetRegisterClass *RC, Register VReg, unsigned SubReg, MachineInstr::MIFlag Flags) const { // LDAfi only knows how to put the value in A. If regalloc asks for // a spill into X or Y, we have to bridge through A: LDA d,S then // TAX / TAY. Without this, the MIR has `$x = LDAfi` but the asm // printer emits just `LDA d,S` (which writes A, not X) — a silent // miscompile that surfaced as i64 subtract chains using stale X // values for the second word (caught by udivmod's `a - q*b` mod // computation). DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); // Wide32 reload: 2 i16 loads at offsets 0 and 2 of the 4-byte slot. if (RC == &W65816::Wide32RegClass || RC == &W65816::Acc32RegClass || RC == &W65816::AnyWide32RegClass) { Register Lo, Hi; switch (DestReg) { case W65816::AX32: Lo = W65816::A; Hi = W65816::X; break; case W65816::IMG01: Lo = W65816::IMG0; Hi = W65816::IMG1; break; case W65816::IMG23: Lo = W65816::IMG2; Hi = W65816::IMG3; break; case W65816::IMG45: Lo = W65816::IMG4; Hi = W65816::IMG5; break; case W65816::IMG67: Lo = W65816::IMG6; Hi = W65816::IMG7; break; case W65816::IMG89: Lo = W65816::IMG8; Hi = W65816::IMG9; break; case W65816::IMG1011: Lo = W65816::IMG10; Hi = W65816::IMG11; break; case W65816::IMG1213: Lo = W65816::IMG12; Hi = W65816::IMG13; break; case W65816::IMG1415: Lo = W65816::IMG14; Hi = W65816::IMG15; break; default: // Mirror of the unpaired-spill case in storeRegToStackSlot: // regalloc handed us a single physreg for a Wide32 reload. // Just load the lo half from offset 0 into the dest. BuildMI(MBB, MI, DL, get(W65816::LDAfi), W65816::A) .addFrameIndex(FrameIdx).addImm(0); if (DestReg != W65816::A) copyPhysReg(MBB, MI, DL, DestReg, W65816::A, false); return; } // Lo half: LDA from offset 0, transfer to Lo if needed. BuildMI(MBB, MI, DL, get(W65816::LDAfi), W65816::A) .addFrameIndex(FrameIdx).addImm(0); if (Lo != W65816::A) copyPhysReg(MBB, MI, DL, Lo, W65816::A, false); // Hi half: LDA from offset 2, transfer to Hi. BuildMI(MBB, MI, DL, get(W65816::LDAfi), W65816::A) .addFrameIndex(FrameIdx).addImm(2); if (Hi != W65816::A) copyPhysReg(MBB, MI, DL, Hi, W65816::A, false); return; } if (DestReg == W65816::A) { BuildMI(MBB, MI, DL, get(W65816::LDAfi), DestReg) .addFrameIndex(FrameIdx) .addImm(0); return; } if (DestReg == W65816::X || DestReg == W65816::Y) { // Load via A, then transfer. A is implicitly clobbered. BuildMI(MBB, MI, DL, get(W65816::LDAfi), W65816::A) .addFrameIndex(FrameIdx) .addImm(0); unsigned XferOp = (DestReg == W65816::X) ? W65816::TAX : W65816::TAY; BuildMI(MBB, MI, DL, get(XferOp)); return; } // Fallback: assume A path (covers Acc16 / Wide16 vregs by class). BuildMI(MBB, MI, DL, get(W65816::LDAfi), DestReg) .addFrameIndex(FrameIdx) .addImm(0); } Register W65816InstrInfo::isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const { if (MI.getOpcode() != W65816::LDAfi) return 0; // memfi packs (FrameIndex, offset). Treat only offset==0 as a true // stack-slot load — non-zero offset means we're addressing within // the slot (e.g. the high half of an i32 spill), which the generic // peephole/CSE machinery doesn't model. if (MI.getNumOperands() < 3 || !MI.getOperand(1).isFI() || !MI.getOperand(2).isImm() || MI.getOperand(2).getImm() != 0) return 0; FrameIndex = MI.getOperand(1).getIndex(); return MI.getOperand(0).getReg(); } Register W65816InstrInfo::isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const { if (MI.getOpcode() != W65816::STAfi) return 0; // STAfi: (ins Acc16:$src, memfi:$addr) — op0 is src reg, op1 is // FrameIndex, op2 is offset. if (MI.getNumOperands() < 3 || !MI.getOperand(1).isFI() || !MI.getOperand(2).isImm() || MI.getOperand(2).getImm() != 0) return 0; FrameIndex = MI.getOperand(1).getIndex(); return MI.getOperand(0).getReg(); } bool W65816InstrInfo::isReMaterializableImpl(const MachineInstr &MI) const { // Only LDAfi is gated on this hook. We declare it // isReMaterializable=1 in tablegen so the framework will *consider* // re-emitting it instead of spilling, then call back here to confirm. // The instruction is safely rematerializable iff it loads from a // *fixed* (immutable) frame index — i.e. an arg slot. Loads from a // regular spill slot read a computed value that may not be available // at the rematerialization point. if (MI.getOpcode() != W65816::LDAfi) return TargetInstrInfo::isReMaterializableImpl(MI); // Operand 1 is the FrameIndex (operand 0 is the def). const MachineOperand &FIOp = MI.getOperand(1); if (!FIOp.isFI()) return false; const MachineFrameInfo &MFI = MI.getMF()->getFrameInfo(); return MFI.isFixedObjectIndex(FIOp.getIndex()); } int W65816InstrInfo::getSPAdjust(const MachineInstr &MI) const { unsigned Opc = MI.getOpcode(); // ADJCALLSTACKDOWN returns 0 (we don't pre-shift SP — PUSH16 does // it incrementally). ADJCALLSTACKUP returns -N where N is the // first immediate (= total pushed bytes); this counterbalances // the +2 contributions accumulated from each PUSH16 so SPAdj // returns to 0 at the end of the call sequence. if (Opc == W65816::ADJCALLSTACKDOWN) return 0; if (Opc == W65816::ADJCALLSTACKUP) { // The immediate is the byte count. if (MI.getNumOperands() > 0 && MI.getOperand(0).isImm()) return -static_cast(MI.getOperand(0).getImm()); return 0; } if (Opc == W65816::PUSH16 || Opc == W65816::PUSH16X) return 2; return TargetInstrInfo::getSPAdjust(MI); } // Conditional branch opcode predicate — derived from the shared // invertCondOpcode helper so the two stay in lockstep. static bool isCondBranch(unsigned Opc) { return W65816Helpers::invertCondOpcode(Opc) != 0; } // Unconditional direct-target branch predicate. Excludes JMP_AbsInd // (indirect) and JML_Long (different operand kind). static bool isUncondDirectBranch(unsigned Opc) { return Opc == W65816::BRA || Opc == W65816::BRL || Opc == W65816::JMP_Abs; } // invertCondOpcode lives in namespace W65816Helpers above. MachineBasicBlock * W65816InstrInfo::getBranchDestBlock(const MachineInstr &MI) const { // All our direct branches encode the target MBB in operand 0. if (MI.getNumOperands() < 1 || !MI.getOperand(0).isMBB()) return nullptr; return MI.getOperand(0).getMBB(); } bool W65816InstrInfo::analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { TBB = nullptr; FBB = nullptr; Cond.clear(); // We deliberately keep conditional branches (BEQ/BNE/etc.) opaque to // BranchFolder. Their condition is encoded in the OPCODE and the // flag input is an implicit use of P set by a preceding CMP/etc.; // BranchFolder doesn't track that the CMP must stay adjacent, so // if it re-inserts the Bxx in a tail-merged block the flag input // becomes whatever earlier instruction last clobbered P. Caught by // the softDouble dadd smoke (1.5 + 2.5 != 4.0) once we tried to make // conditional branches analyzable. // // What we DO analyze: // * Empty terminator sequence (pure fall-through) — return // analyzable with no targets so MachineBlockPlacement's assert // about fall-through blocks is satisfied trivially. // * Single unconditional direct branch (BRA / BRL / JMP_Abs) — // return analyzable with TBB set, no Cond. Safe to move because // no flag dependency. // Everything else (Bxx in any position, indirect jumps, multiple // terminators, etc.) stays unanalyzable. MachineBasicBlock::iterator I = MBB.getFirstTerminator(); while (I != MBB.end() && I->isDebugInstr()) ++I; if (I == MBB.end()) return false; // No terminators: pure fall-through. unsigned FirstOpc = I->getOpcode(); if (!isUncondDirectBranch(FirstOpc)) return true; // Conditional or unknown. Stay opaque. // Single unconditional direct branch — analyzable. TBB = getBranchDestBlock(*I); if (!TBB) return true; auto Next = std::next(I); while (Next != MBB.end() && Next->isDebugInstr()) ++Next; if (Next != MBB.end()) return true; // Extra terminators after unconditional. return false; } unsigned W65816InstrInfo::removeBranch(MachineBasicBlock &MBB, int *BytesRemoved) const { if (BytesRemoved) *BytesRemoved = 0; unsigned NumRemoved = 0; // Walk from the end, removing trailing direct branches. Stop when // we hit a non-branch or a branch we can't analyze (e.g. JMP_AbsInd). while (!MBB.empty()) { auto It = std::prev(MBB.end()); if (It->isDebugInstr()) { // Skip debug instructions but don't delete them. if (It == MBB.begin()) break; --It; } unsigned Opc = It->getOpcode(); if (!isCondBranch(Opc) && !isUncondDirectBranch(Opc)) break; if (BytesRemoved) *BytesRemoved += getInstSizeInBytes(*It); It->eraseFromParent(); ++NumRemoved; } return NumRemoved; } unsigned W65816InstrInfo::insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded) const { assert(TBB && "insertBranch requires a true target"); assert((Cond.empty() || Cond.size() == 1) && "W65816 branch conditions are single-operand (opcode)"); if (BytesAdded) *BytesAdded = 0; unsigned NumAdded = 0; if (Cond.empty()) { // Unconditional branch. Use BRA — W65816AsmBackend auto-relaxes // to BRL when the displacement exceeds an 8-bit signed offset. auto MI = BuildMI(&MBB, DL, get(W65816::BRA)).addMBB(TBB); if (BytesAdded) *BytesAdded += getInstSizeInBytes(*MI); return 1; } // Conditional branch using the opcode stored in Cond[0]. unsigned CondOpc = Cond[0].getImm(); auto MIc = BuildMI(&MBB, DL, get(CondOpc)).addMBB(TBB); if (BytesAdded) *BytesAdded += getInstSizeInBytes(*MIc); ++NumAdded; // If there's also a false target, emit an unconditional branch to it. if (FBB) { auto MIu = BuildMI(&MBB, DL, get(W65816::BRA)).addMBB(FBB); if (BytesAdded) *BytesAdded += getInstSizeInBytes(*MIu); ++NumAdded; } return NumAdded; } bool W65816InstrInfo::reverseBranchCondition( SmallVectorImpl &Cond) const { if (Cond.size() != 1) return true; unsigned Inverted = W65816Helpers::invertCondOpcode(Cond[0].getImm()); if (!Inverted) return true; Cond[0].setImm(Inverted); return false; } unsigned W65816InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const { // Meta-instructions emit nothing — PHI nodes get eliminated, COPY // gets lowered to TXA/TYA/TAY/TAX or LDA/STA, KILL/IMPLICIT_DEF/ // BUNDLE/CFI_INSTRUCTION/DBG_VALUE leave no bytes. For COPY we // could be more precise (1 or 2 bytes depending on transfer) but // returning 0 is fine: the size estimate just needs to be a lower // bound for the BranchExpand pass's distance estimate. if (MI.isMetaInstruction()) return 0; unsigned Opc = MI.getOpcode(); // ADJCALLSTACKDOWN / ADJCALLSTACKUP get expanded to PLA loops or // TSC/CLC/ADC/TCS bracket; estimate ~8 bytes worst case. if (Opc == W65816::ADJCALLSTACKDOWN || Opc == W65816::ADJCALLSTACKUP) return 8; // Pseudo expansions handled by AsmPrinter that emit multiple // bytes need explicit estimates; a missing case underestimates // and risks branch-range errors. Rough byte counts below mirror // each pseudo's expansion in W65816AsmPrinter::emitInstruction. switch (Opc) { // i8 immediate ops wrap with SEP/REP: SEP(2) + op(2) + REP(2) = 6. case W65816::LDAi8imm: case W65816::ADCi8imm: case W65816::SBCi8imm: case W65816::ANDi8imm: case W65816::ORAi8imm: case W65816::EORi8imm: case W65816::CMPi8imm: return 6 + (Opc == W65816::ADCi8imm || Opc == W65816::SBCi8imm ? 1 : 0); // i8 abs load wraps: SEP(2) + LDA_Abs(3) + REP(2) = 7. case W65816::LDA8abs: return 7; // i8 abs store wraps: SEP(2) + STA_Abs(3) + REP(2) = 7. case W65816::STA8abs: return 7; // STA8fi: SEP(2) + STA d,S(2) + REP(2) = 6 (PEI expansion). case W65816::STA8fi: return 6; // i16 ADC/SBC pseudos prepend CLC/SEC: 1 + 3 = 4 bytes. case W65816::ADCi16imm: case W65816::SBCi16imm: case W65816::ADCabs: case W65816::SBCabs: return 4; // ADDframe: TSC + CLC + ADC #imm = 1 + 1 + 3 = 5. case W65816::ADDframe: return 5; // ALLOCAfi: STA dp + TSC + SEC + SBC dp + TCS + INC A = 2+1+1+2+1+1 = 8. case W65816::ALLOCAfi: return 8; // PUSH16 / PUSH16X: PHA / PHX = 1 byte. case W65816::PUSH16: case W65816::PUSH16X: return 1; // JSLpseudo: jsl is 4 bytes. case W65816::JSLpseudo: case W65816::JSLpseudo32: return 4; default: break; } // Real (non-pseudo) instruction: tablegen-defined Size. unsigned Size = MI.getDesc().getSize(); if (Size != 0) return Size; // Fallback for any pseudo we forgot to enumerate: 4 bytes is a // pessimistic-but-safe upper bound on most W65816 instructions. return 4; }