//===-- W65816RegisterInfo.cpp - W65816 Register Information --------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // Skeleton implementation of the W65816 register info. The callee-saved // register list, reserved set and frame-index elimination logic are // deliberately minimal; they will be fleshed out alongside frame lowering. // //===----------------------------------------------------------------------===// #include "W65816RegisterInfo.h" #include "W65816.h" #include "W65816FrameLowering.h" #include "W65816InstrInfo.h" #include "W65816MachineFunctionInfo.h" #include "W65816Subtarget.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; // IMG slot DP addresses for STAfi's IMG-source path. static int imgRegToDP(Register R) { switch (R) { case W65816::IMG0: return 0xD0; case W65816::IMG1: return 0xD2; case W65816::IMG2: return 0xD4; case W65816::IMG3: return 0xD6; case W65816::IMG4: return 0xD8; case W65816::IMG5: return 0xDA; case W65816::IMG6: return 0xDC; case W65816::IMG7: return 0xDE; case W65816::IMG8: return 0xC0; case W65816::IMG9: return 0xC2; case W65816::IMG10: return 0xC4; case W65816::IMG11: return 0xC6; case W65816::IMG12: return 0xC8; case W65816::IMG13: return 0xCA; case W65816::IMG14: return 0xCC; case W65816::IMG15: return 0xCE; default: return -1; } } // Far FI elim via DP frame-pointer ($F6/$F7). Called when an FI's // effective offset exceeds 0xFF and the function reserved an FP at // prologue time (StackSize > 200). Stack is always bank 0, so // `(F6),Y` (16-bit DP-indirect, Y-indexed, bank-0 result) is correct. // // Common skeleton (varies per opcode): // PHY; LDY #FPOff; ; PLY // PHY/PLY balance, so subsequent `,S` accesses stay accurate. PLY // preserves C (only N/Z), so multi-precision carry chains survive // the load-via-Y. static bool expandFarFI(MachineInstr &MI, int FPOff, const W65816InstrInfo &TII) { MachineBasicBlock &MBB = *MI.getParent(); MachineBasicBlock::iterator II = MI.getIterator(); DebugLoc DL = MI.getDebugLoc(); unsigned Opc = MI.getOpcode(); switch (Opc) { case W65816::LDAfi: { Register Dst = MI.getOperand(0).getReg(); BuildMI(MBB, II, DL, TII.get(W65816::PHY)) .addReg(W65816::Y, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)) .addImm(FPOff) .addReg(W65816::Y, RegState::ImplicitDefine); BuildMI(MBB, II, DL, TII.get(W65816::LDA_DPIndY)) .addImm(0xF6) .addReg(W65816::A, RegState::ImplicitDefine) .addReg(W65816::Y, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::PLY)) .addReg(W65816::Y, RegState::ImplicitDefine); if (Dst == W65816::X) BuildMI(MBB, II, DL, TII.get(W65816::TAX)); else if (Dst == W65816::Y) BuildMI(MBB, II, DL, TII.get(W65816::TAY)); return true; } case W65816::STAfi: { Register Src = MI.getOperand(0).getReg(); int srcDP = imgRegToDP(Src); if (srcDP >= 0) BuildMI(MBB, II, DL, TII.get(W65816::LDA_DP)).addImm(srcDP); BuildMI(MBB, II, DL, TII.get(W65816::PHY)) .addReg(W65816::Y, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)).addImm(FPOff); BuildMI(MBB, II, DL, TII.get(W65816::STA_DPIndY)) .addImm(0xF6) .addReg(W65816::A, RegState::Implicit) .addReg(W65816::Y, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::PLY)); return true; } case W65816::STA8fi: { BuildMI(MBB, II, DL, TII.get(W65816::SEP)).addImm(0x20) .addReg(W65816::P, RegState::ImplicitDefine); BuildMI(MBB, II, DL, TII.get(W65816::PHY)) .addReg(W65816::Y, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)).addImm(FPOff); BuildMI(MBB, II, DL, TII.get(W65816::STA_DPIndY)) .addImm(0xF6) .addReg(W65816::A, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::PLY)); BuildMI(MBB, II, DL, TII.get(W65816::REP)).addImm(0x20) .addReg(W65816::P, RegState::ImplicitDefine); return true; } case W65816::ADCfi: case W65816::ADCEfi: case W65816::ANDfi: case W65816::ORAfi: case W65816::EORfi: { // Commutative (or chained): A op M. Save A to $E2, load M to A // via (F6),Y, then op against saved A. Order matters: PLY must // come BEFORE the final op so PLY's N/Z clobber doesn't hide the // op's flags from a downstream consumer. BuildMI(MBB, II, DL, TII.get(W65816::STA_DP)).addImm(0xE2) .addReg(W65816::A, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::PHY)) .addReg(W65816::Y, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)).addImm(FPOff); BuildMI(MBB, II, DL, TII.get(W65816::LDA_DPIndY)).addImm(0xF6) .addReg(W65816::A, RegState::ImplicitDefine) .addReg(W65816::Y, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::PLY)) .addReg(W65816::Y, RegState::ImplicitDefine); unsigned OpDPOpc = 0; switch (Opc) { case W65816::ADCfi: case W65816::ADCEfi: OpDPOpc = W65816::ADC_DP; break; case W65816::ANDfi: OpDPOpc = W65816::AND_DP; break; case W65816::ORAfi: OpDPOpc = W65816::ORA_DP; break; case W65816::EORfi: OpDPOpc = W65816::EOR_DP; break; default: llvm_unreachable("unhandled commutative far-FI"); } auto B = BuildMI(MBB, II, DL, TII.get(OpDPOpc)).addImm(0xE2) .addReg(W65816::A, RegState::Implicit) .addReg(W65816::A, RegState::ImplicitDefine); if (OpDPOpc == W65816::ADC_DP) { B.addReg(W65816::P, RegState::Implicit) .addReg(W65816::P, RegState::ImplicitDefine); } return true; } case W65816::SBCfi: case W65816::SBCEfi: case W65816::CMPfi: { // Non-commutative (A - M): we must load M into a scratch slot // without losing A. Sequence: // STA $E0 ; save original A // PHY // LDY #FPOff // LDA ($F6),Y ; A = M (lost saved A, but $E0 still has it) // STA $E2 ; $E2 = M // LDA $E0 ; A = original // PLY ; preserves C, clobbers N/Z (re-set by SBC/CMP) // SBC/CMP $E2 BuildMI(MBB, II, DL, TII.get(W65816::STA_DP)).addImm(0xE0) .addReg(W65816::A, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::PHY)) .addReg(W65816::Y, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)).addImm(FPOff); BuildMI(MBB, II, DL, TII.get(W65816::LDA_DPIndY)).addImm(0xF6) .addReg(W65816::A, RegState::ImplicitDefine) .addReg(W65816::Y, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::STA_DP)).addImm(0xE2) .addReg(W65816::A, RegState::Implicit); BuildMI(MBB, II, DL, TII.get(W65816::LDA_DP)).addImm(0xE0) .addReg(W65816::A, RegState::ImplicitDefine); BuildMI(MBB, II, DL, TII.get(W65816::PLY)) .addReg(W65816::Y, RegState::ImplicitDefine); if (Opc == W65816::CMPfi) { BuildMI(MBB, II, DL, TII.get(W65816::CMP_DP)).addImm(0xE2) .addReg(W65816::A, RegState::Implicit) .addReg(W65816::P, RegState::ImplicitDefine); } else { BuildMI(MBB, II, DL, TII.get(W65816::SBC_DP)).addImm(0xE2) .addReg(W65816::A, RegState::Implicit) .addReg(W65816::A, RegState::ImplicitDefine) .addReg(W65816::P, RegState::Implicit) .addReg(W65816::P, RegState::ImplicitDefine); } return true; } case W65816::ADDframe: { // LEA into A: A = FP + FPOff. 16-bit add, no carry chain needed. BuildMI(MBB, II, DL, TII.get(W65816::LDA_DP)).addImm(0xF6) .addReg(W65816::A, RegState::ImplicitDefine); BuildMI(MBB, II, DL, TII.get(W65816::CLC)) .addReg(W65816::P, RegState::ImplicitDefine); BuildMI(MBB, II, DL, TII.get(W65816::ADC_Imm16)).addImm(FPOff) .addReg(W65816::A, RegState::Implicit) .addReg(W65816::A, RegState::ImplicitDefine) .addReg(W65816::P, RegState::Implicit) .addReg(W65816::P, RegState::ImplicitDefine); return true; } default: return false; } } #define DEBUG_TYPE "w65816-reg-info" #define GET_REGINFO_TARGET_DESC #include "W65816GenRegisterInfo.inc" W65816RegisterInfo::W65816RegisterInfo() : W65816GenRegisterInfo(W65816::PC) {} const MCPhysReg * W65816RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { // The 65816 C calling convention preserves DP and DBR across calls. static const MCPhysReg CalleeSavedRegs[] = {W65816::DP, W65816::DBR, 0}; return CalleeSavedRegs; } BitVector W65816RegisterInfo::getReservedRegs(const MachineFunction &MF) const { BitVector Reserved(getNumRegs()); // SP, PC, P, PBR and DBR are all special-purpose registers the allocator // must never pick. DP is allocatable in principle but is treated as // reserved for the skeleton until direct-page management lands. Reserved.set(W65816::SP); Reserved.set(W65816::PC); Reserved.set(W65816::P); Reserved.set(W65816::PBR); Reserved.set(W65816::DBR); Reserved.set(W65816::DP); return Reserved; } const TargetRegisterClass * W65816RegisterInfo::getPointerRegClass(unsigned Kind) const { return &W65816::PtrRegsRegClass; } bool W65816RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const { MachineInstr &MI = *II; MachineFunction &MF = *MI.getParent()->getParent(); const MachineFrameInfo &MFI = MF.getFrameInfo(); const W65816InstrInfo &TII = *MF.getSubtarget().getInstrInfo(); unsigned Opc = MI.getOpcode(); unsigned NewOpc = 0; bool NeedsCarryPrefix = false; bool IsSub = false; switch (Opc) { case W65816::LDAfi: { // LDAfi targets A. If the regalloc parked the dest in X or Y // (which can happen via Idx16 vreg coalescing), bridge through A // by appending a TAX / TAY. Register Dst = MI.getOperand(0).getReg(); int FI = MI.getOperand(FIOperandNum).getIndex(); int FrameOffset = MFI.getObjectOffset(FI); int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm(); int Offset = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj; if (FrameOffset < 0) Offset += 1; if (Offset < 0 || Offset > 0xFF) { // Far slot. Use FP if reserved. FP-relative offset excludes // SPAdj because $F6 captures S after prologue, before any // intermediate PUSH16 inside a call sequence. if (MF.getInfo()->getUsesDpFP()) { int FPOff = FrameOffset + ImmOffset + (int)MFI.getStackSize(); if (FrameOffset < 0) FPOff += 1; if (expandFarFI(MI, FPOff, TII)) { MI.eraseFromParent(); return true; } } report_fatal_error("W65816: frame offset out of stack-relative range"); } BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::LDA_StackRel)) .addImm(Offset) .addReg(W65816::A, RegState::ImplicitDefine); if (Dst == W65816::X) { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::TAX)); } else if (Dst == W65816::Y) { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::TAY)); } else { // LDAfi with Dst = IMGn: regalloc allocated a Wide16 vreg to an // IMG slot. LDA_StackRel only loaded into A; we must also store // A to the IMG DP slot so subsequent `COPY $x = $imgN` (which // expands to `ldx $D?`) reads the right value. Without this, the // IMG slot stays stale and downstream reads silently get garbage // — e.g., `dadd(1.5, 2.5) → 0x4010_0000_3000_3000` under full // IMG-clobber because every cross-call spill reload of an IMG- // assigned vreg fed stale prior data into the next computation. int dstDP = -1; switch (Dst) { case W65816::IMG0: dstDP = 0xD0; break; case W65816::IMG1: dstDP = 0xD2; break; case W65816::IMG2: dstDP = 0xD4; break; case W65816::IMG3: dstDP = 0xD6; break; case W65816::IMG4: dstDP = 0xD8; break; case W65816::IMG5: dstDP = 0xDA; break; case W65816::IMG6: dstDP = 0xDC; break; case W65816::IMG7: dstDP = 0xDE; break; case W65816::IMG8: dstDP = 0xC0; break; case W65816::IMG9: dstDP = 0xC2; break; case W65816::IMG10: dstDP = 0xC4; break; case W65816::IMG11: dstDP = 0xC6; break; case W65816::IMG12: dstDP = 0xC8; break; case W65816::IMG13: dstDP = 0xCA; break; case W65816::IMG14: dstDP = 0xCC; break; case W65816::IMG15: dstDP = 0xCE; break; default: break; } if (dstDP >= 0) { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::STA_DP)).addImm(dstDP); } } MI.eraseFromParent(); return true; } case W65816::STAfi: { // Wide16-source STAfi: if the source ended up in IMGn (DP-backed), // prepend LDA dp so the value reaches A before the actual store. int FI = MI.getOperand(FIOperandNum).getIndex(); int FrameOffset = MFI.getObjectOffset(FI); int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm(); // +1 skew for locals: the 65816 SP points to next-FREE byte (empty // descending), but LLVM PEI assigns FrameOffset assuming SP points // to the first-USED byte (full descending). Without the +1, slot 0 // ends up at S+0 — exactly where the next JSL writes its return // address bank. Args have positive FrameOffset (caller pushed them // at S+1..S+N already, the JSL push naturally puts them at S+4+N // in callee), so they don't need the skew. int Offset = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj; if (FrameOffset < 0) Offset += 1; if (Offset < 0 || Offset > 0xFF) { if (MF.getInfo()->getUsesDpFP()) { int FPOff = FrameOffset + ImmOffset + (int)MFI.getStackSize(); if (FrameOffset < 0) FPOff += 1; if (expandFarFI(MI, FPOff, TII)) { MI.eraseFromParent(); return true; } } report_fatal_error("W65816: frame offset out of stack-relative range"); } Register Src = MI.getOperand(0).getReg(); int srcDP = -1; switch (Src) { case W65816::IMG0: srcDP = 0xD0; break; case W65816::IMG1: srcDP = 0xD2; break; case W65816::IMG2: srcDP = 0xD4; break; case W65816::IMG3: srcDP = 0xD6; break; case W65816::IMG4: srcDP = 0xD8; break; case W65816::IMG5: srcDP = 0xDA; break; case W65816::IMG6: srcDP = 0xDC; break; case W65816::IMG7: srcDP = 0xDE; break; case W65816::IMG8: srcDP = 0xC0; break; case W65816::IMG9: srcDP = 0xC2; break; case W65816::IMG10: srcDP = 0xC4; break; case W65816::IMG11: srcDP = 0xC6; break; case W65816::IMG12: srcDP = 0xC8; break; case W65816::IMG13: srcDP = 0xCA; break; case W65816::IMG14: srcDP = 0xCC; break; case W65816::IMG15: srcDP = 0xCE; break; default: break; } if (srcDP >= 0 || Src == W65816::X || Src == W65816::Y) { // STAfi with non-A source: must clobber A to land the value in // A and then `sta d,s`. PHA-bracket so A's incoming value is // preserved across the spill — without this, a regalloc-emitted // sequence like `STAfi $img0 (=$x); $img0 = COPY $a; STAfi $img0` // overwrites $a's value at the first STAfi (via `lda 0xd0`), // making the second STAfi spill garbage. Observed under full // IMG-clobber as `dadd(1.5,2.5) → 0x4010_0000_3000_3000`. // // Sequence: pha (SP -= 2); load source into A; sta (d+2),s // (offset bumped to compensate for the PHA SP shift); pla // (SP += 2, A restored). Cost: +PHA (3 cyc, 1 byte) + PLA // (4 cyc, 1 byte) = +7 cyc, +2 bytes per IMG/X/Y-source STAfi. BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::PHA)); if (srcDP >= 0) { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::LDA_DP)).addImm(srcDP); } else { unsigned XferOp = (Src == W65816::X) ? W65816::TXA : W65816::TYA; BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(XferOp)); } BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::STA_StackRel)) .addImm(Offset + 2) // PHA shifted SP by 2 .addReg(W65816::A, RegState::Implicit); BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::PLA)); } else { // Direct A source: simple sta d,s — A is the source, A is fine // afterward (no implicit clobber). BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::STA_StackRel)) .addImm(Offset) .addReg(W65816::A, RegState::Implicit); } MI.eraseFromParent(); return true; } case W65816::ADCfi: NewOpc = W65816::ADC_StackRel; NeedsCarryPrefix = true; break; case W65816::SBCfi: NewOpc = W65816::SBC_StackRel; NeedsCarryPrefix = true; IsSub = true; break; // ADCEfi / SBCEfi are the chained-carry variants used as the hi half of a // multi-precision split. No CLC/SEC prefix — they read the carry left // in P by the previous addc/adde/subc/sube. case W65816::ADCEfi: NewOpc = W65816::ADC_StackRel; break; case W65816::SBCEfi: NewOpc = W65816::SBC_StackRel; break; case W65816::ANDfi: NewOpc = W65816::AND_StackRel; break; case W65816::ORAfi: NewOpc = W65816::ORA_StackRel; break; case W65816::EORfi: NewOpc = W65816::EOR_StackRel; break; case W65816::CMPfi: NewOpc = W65816::CMP_StackRel; break; case W65816::LDAfi_indY: NewOpc = W65816::LDA_StackRelIndY; break; case W65816::STAfi_indY: NewOpc = W65816::STA_StackRelIndY; break; case W65816::STA8fi: { // i8 truncating store via stack-rel. Wrap the store in // SEP #$20 / STA d,S / REP #$20 so only one byte is written. We // assume entry M=0 (16-bit accumulator) per the function prologue; // restoring REP #$20 after the STA preserves that invariant. int FI = MI.getOperand(FIOperandNum).getIndex(); int FrameOffset = MFI.getObjectOffset(FI); int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm(); int Offset = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj; if (FrameOffset < 0) Offset += 1; // empty-descending SP skew (see STAfi) if (Offset < 0 || Offset > 0xFF) { if (MF.getInfo()->getUsesDpFP()) { int FPOff = FrameOffset + ImmOffset + (int)MFI.getStackSize(); if (FrameOffset < 0) FPOff += 1; if (expandFarFI(MI, FPOff, TII)) { MI.eraseFromParent(); return true; } } report_fatal_error("W65816: frame offset out of stack-relative range"); } BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::SEP)) .addImm(0x20) .addReg(W65816::P, RegState::ImplicitDefine); BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::STA_StackRel)) .addImm(Offset) .addReg(W65816::A, RegState::Implicit); BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::REP)) .addImm(0x20) .addReg(W65816::P, RegState::ImplicitDefine); MI.eraseFromParent(); return true; } case W65816::ADDframe: { // LEA-equivalent: emit "TSC; CLC; ADC #disp" so A holds SP + disp, // i.e. the address of the stack slot. TSC has no carry side-effect // (it just transfers SP into A), so the CLC + ADC is needed for a // clean unsigned add. Disp uses the same FrameOffset+ImmOffset+ // StackSize formula as the load/store cases. int FI = MI.getOperand(FIOperandNum).getIndex(); int FrameOffset = MFI.getObjectOffset(FI); int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm(); int Disp = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj; if (FrameOffset < 0) Disp += 1; // empty-descending SP skew (see STAfi) // ADDframe (LEA) routes through TSC + ADC. Always works for any // 16-bit Disp via TSC's full-width 16-bit transfer, so we don't // need a far-FI variant here even when usesDpFP is true. if (Disp < 0 || Disp > 0xFFFF) report_fatal_error("W65816: frame offset out of i16 LEA range"); // TSC: A = SP (implicit def of A, use of SP). BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::TSC)) .addReg(W65816::A, RegState::ImplicitDefine) .addReg(W65816::SP, RegState::Implicit); // CLC: clears C. Models as P-def, P-use (preserves N/V/Z). BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::CLC)) .addReg(W65816::P, RegState::ImplicitDefine); // ADC #imm: reads A and P, writes A and P. BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::ADC_Imm16)) .addImm(Disp) .addReg(W65816::A, RegState::Implicit) .addReg(W65816::A, RegState::ImplicitDefine) .addReg(W65816::P, RegState::Implicit) .addReg(W65816::P, RegState::ImplicitDefine); MI.eraseFromParent(); return true; } default: llvm_unreachable("W65816: unhandled instruction in eliminateFrameIndex"); } int FI = MI.getOperand(FIOperandNum).getIndex(); int FrameOffset = MFI.getObjectOffset(FI); int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm(); // WDC stack-relative addressing: `LDA disp,S` computes effective // address S + disp. Both fixed objects (args) and local objects // are stored at addresses relative to entry-SP; my prologue has // shifted S down by StackSize. Plus, between ADJCALLSTACKDOWN and // ADJCALLSTACKUP, PUSH16/PHA shifts SP further by SPAdj. So: // address = entry_S + FrameOffset // S = entry_S - StackSize - SPAdj // disp = address - S // = FrameOffset + StackSize + SPAdj // PLUS a +1 skew for locals: the 65816 SP is empty-descending (points // to next-FREE byte), but LLVM PEI assigns FrameOffset assuming SP is // full-descending (points to first-USED byte). Without +1, slot 0 // ends up at S+0 — clobbered by the next JSL retaddr push. Args have // positive FrameOffset and don't need the skew. int Offset = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj; if (FrameOffset < 0) Offset += 1; if (Offset < 0 || Offset > 0xFF) { if (MF.getInfo()->getUsesDpFP()) { int FPOff = FrameOffset + ImmOffset + (int)MFI.getStackSize(); if (FrameOffset < 0) FPOff += 1; // Emit the carry prefix (CLC/SEC) BEFORE the far-FI sequence — // expandFarFI's PHY/PLY pair preserves C, so the prefix's value // survives intact to the final ADC/SBC/CMP at the bottom of // the expansion. if (NeedsCarryPrefix) { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(IsSub ? W65816::SEC : W65816::CLC)); } if (expandFarFI(MI, FPOff, TII)) { MI.eraseFromParent(); return true; } } report_fatal_error("W65816: frame offset out of stack-relative range"); } // (Prologue-PHA fold reverted — it was correct in isolation but // surfaced a separate compile-time hazard via the DAG combiner on // shift-by-1 i8. Saved 1 op per affected function but at the cost // of huge compile slowdowns. Re-enable once the DAG combiner // interaction is understood.) // Emit the carry-prep instruction first if the operation needs it. if (NeedsCarryPrefix) { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(IsSub ? W65816::SEC : W65816::CLC)); } // The MC instructions (LDA_StackRel, STA_StackRel, ADC_StackRel, // ADC_Imm16, etc.) don't have explicit Defs/Uses on the accumulator // because that's an implicit hardware semantic of every 65816 // arithmetic/load/store. Without an explicit Def/Use, post-RA // passes (Machine Copy Propagation in particular) miss that an ADC // d,S between a TXA and a TAX redefines $a, and elide the TAX as // "redundant" — corrupting the return value. Add the implicit // operands here so dataflow tracking is correct. Match the // original pseudo's read/write semantics: LDA defs A only; STA uses // A only; ADC/SBC/AND/ORA/EOR/CMP read A and write A (CMP only // sets flags, but it still uses A — modelling it as Use is // sufficient since it doesn't change A). auto Builder = BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(NewOpc)).addImm(Offset); switch (NewOpc) { case W65816::LDA_StackRel: Builder.addReg(W65816::A, RegState::ImplicitDefine); break; case W65816::LDA_StackRelIndY: // Indirect-Y: A def + Y use. The Y use is critical — without it, // post-RA passes can reorder a Y-defining op past us, leaving the // load reading at (ptr + stale_Y). Caught when modelling the dep // for the (sr,s),Y bank-wrap workaround in W65816NegYIndY. Builder.addReg(W65816::A, RegState::ImplicitDefine) .addReg(W65816::Y, RegState::Implicit); break; case W65816::STA_StackRel: Builder.addReg(W65816::A, RegState::Implicit); break; case W65816::STA_StackRelIndY: // Indirect-Y store: A use + Y use (same Y reasoning as above). Builder.addReg(W65816::A, RegState::Implicit) .addReg(W65816::Y, RegState::Implicit); break; case W65816::ADC_StackRel: case W65816::SBC_StackRel: Builder.addReg(W65816::A, RegState::Implicit) .addReg(W65816::A, RegState::ImplicitDefine) .addReg(W65816::P, RegState::Implicit) .addReg(W65816::P, RegState::ImplicitDefine); break; case W65816::AND_StackRel: case W65816::ORA_StackRel: case W65816::EOR_StackRel: Builder.addReg(W65816::A, RegState::Implicit) .addReg(W65816::A, RegState::ImplicitDefine); break; case W65816::CMP_StackRel: Builder.addReg(W65816::A, RegState::Implicit) .addReg(W65816::P, RegState::ImplicitDefine); break; default: break; } MI.eraseFromParent(); return true; } Register W65816RegisterInfo::getFrameRegister(const MachineFunction &MF) const { return W65816::SP; }