65816-llvm-mos/src/llvm/lib/Target/W65816/W65816RegisterInfo.cpp
Scott Duensing e84492a449 Checkpoint
2026-05-07 20:44:53 -05:00

610 lines
26 KiB
C++

//===-- W65816RegisterInfo.cpp - W65816 Register Information --------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Skeleton implementation of the W65816 register info. The callee-saved
// register list, reserved set and frame-index elimination logic are
// deliberately minimal; they will be fleshed out alongside frame lowering.
//
//===----------------------------------------------------------------------===//
#include "W65816RegisterInfo.h"
#include "W65816.h"
#include "W65816FrameLowering.h"
#include "W65816InstrInfo.h"
#include "W65816MachineFunctionInfo.h"
#include "W65816Subtarget.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
// IMG slot DP addresses for STAfi's IMG-source path.
static int imgRegToDP(Register R) {
switch (R) {
case W65816::IMG0: return 0xD0;
case W65816::IMG1: return 0xD2;
case W65816::IMG2: return 0xD4;
case W65816::IMG3: return 0xD6;
case W65816::IMG4: return 0xD8;
case W65816::IMG5: return 0xDA;
case W65816::IMG6: return 0xDC;
case W65816::IMG7: return 0xDE;
case W65816::IMG8: return 0xC0;
case W65816::IMG9: return 0xC2;
case W65816::IMG10: return 0xC4;
case W65816::IMG11: return 0xC6;
case W65816::IMG12: return 0xC8;
case W65816::IMG13: return 0xCA;
case W65816::IMG14: return 0xCC;
case W65816::IMG15: return 0xCE;
default: return -1;
}
}
// Far FI elim via DP frame-pointer ($F6/$F7). Called when an FI's
// effective offset exceeds 0xFF and the function reserved an FP at
// prologue time (StackSize > 200). Stack is always bank 0, so
// `(F6),Y` (16-bit DP-indirect, Y-indexed, bank-0 result) is correct.
//
// Common skeleton (varies per opcode):
// PHY; LDY #FPOff; <op via ($F6),Y>; PLY
// PHY/PLY balance, so subsequent `,S` accesses stay accurate. PLY
// preserves C (only N/Z), so multi-precision carry chains survive
// the load-via-Y.
static bool expandFarFI(MachineInstr &MI, int FPOff,
const W65816InstrInfo &TII) {
MachineBasicBlock &MBB = *MI.getParent();
MachineBasicBlock::iterator II = MI.getIterator();
DebugLoc DL = MI.getDebugLoc();
unsigned Opc = MI.getOpcode();
switch (Opc) {
case W65816::LDAfi: {
Register Dst = MI.getOperand(0).getReg();
BuildMI(MBB, II, DL, TII.get(W65816::PHY))
.addReg(W65816::Y, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16))
.addImm(FPOff)
.addReg(W65816::Y, RegState::ImplicitDefine);
BuildMI(MBB, II, DL, TII.get(W65816::LDA_DPIndY))
.addImm(0xF6)
.addReg(W65816::A, RegState::ImplicitDefine)
.addReg(W65816::Y, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::PLY))
.addReg(W65816::Y, RegState::ImplicitDefine);
if (Dst == W65816::X)
BuildMI(MBB, II, DL, TII.get(W65816::TAX));
else if (Dst == W65816::Y)
BuildMI(MBB, II, DL, TII.get(W65816::TAY));
return true;
}
case W65816::STAfi: {
Register Src = MI.getOperand(0).getReg();
int srcDP = imgRegToDP(Src);
if (srcDP >= 0)
BuildMI(MBB, II, DL, TII.get(W65816::LDA_DP)).addImm(srcDP);
BuildMI(MBB, II, DL, TII.get(W65816::PHY))
.addReg(W65816::Y, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)).addImm(FPOff);
BuildMI(MBB, II, DL, TII.get(W65816::STA_DPIndY))
.addImm(0xF6)
.addReg(W65816::A, RegState::Implicit)
.addReg(W65816::Y, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::PLY));
return true;
}
case W65816::STA8fi: {
BuildMI(MBB, II, DL, TII.get(W65816::SEP)).addImm(0x20)
.addReg(W65816::P, RegState::ImplicitDefine);
BuildMI(MBB, II, DL, TII.get(W65816::PHY))
.addReg(W65816::Y, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)).addImm(FPOff);
BuildMI(MBB, II, DL, TII.get(W65816::STA_DPIndY))
.addImm(0xF6)
.addReg(W65816::A, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::PLY));
BuildMI(MBB, II, DL, TII.get(W65816::REP)).addImm(0x20)
.addReg(W65816::P, RegState::ImplicitDefine);
return true;
}
case W65816::ADCfi:
case W65816::ADCEfi:
case W65816::ANDfi:
case W65816::ORAfi:
case W65816::EORfi: {
// Commutative (or chained): A op M. Save A to $E2, load M to A
// via (F6),Y, then op against saved A. Order matters: PLY must
// come BEFORE the final op so PLY's N/Z clobber doesn't hide the
// op's flags from a downstream consumer.
BuildMI(MBB, II, DL, TII.get(W65816::STA_DP)).addImm(0xE2)
.addReg(W65816::A, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::PHY))
.addReg(W65816::Y, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)).addImm(FPOff);
BuildMI(MBB, II, DL, TII.get(W65816::LDA_DPIndY)).addImm(0xF6)
.addReg(W65816::A, RegState::ImplicitDefine)
.addReg(W65816::Y, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::PLY))
.addReg(W65816::Y, RegState::ImplicitDefine);
unsigned OpDPOpc = 0;
switch (Opc) {
case W65816::ADCfi:
case W65816::ADCEfi: OpDPOpc = W65816::ADC_DP; break;
case W65816::ANDfi: OpDPOpc = W65816::AND_DP; break;
case W65816::ORAfi: OpDPOpc = W65816::ORA_DP; break;
case W65816::EORfi: OpDPOpc = W65816::EOR_DP; break;
default: llvm_unreachable("unhandled commutative far-FI");
}
auto B = BuildMI(MBB, II, DL, TII.get(OpDPOpc)).addImm(0xE2)
.addReg(W65816::A, RegState::Implicit)
.addReg(W65816::A, RegState::ImplicitDefine);
if (OpDPOpc == W65816::ADC_DP) {
B.addReg(W65816::P, RegState::Implicit)
.addReg(W65816::P, RegState::ImplicitDefine);
}
return true;
}
case W65816::SBCfi:
case W65816::SBCEfi:
case W65816::CMPfi: {
// Non-commutative (A - M): we must load M into a scratch slot
// without losing A. Sequence:
// STA $E0 ; save original A
// PHY
// LDY #FPOff
// LDA ($F6),Y ; A = M (lost saved A, but $E0 still has it)
// STA $E2 ; $E2 = M
// LDA $E0 ; A = original
// PLY ; preserves C, clobbers N/Z (re-set by SBC/CMP)
// SBC/CMP $E2
BuildMI(MBB, II, DL, TII.get(W65816::STA_DP)).addImm(0xE0)
.addReg(W65816::A, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::PHY))
.addReg(W65816::Y, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)).addImm(FPOff);
BuildMI(MBB, II, DL, TII.get(W65816::LDA_DPIndY)).addImm(0xF6)
.addReg(W65816::A, RegState::ImplicitDefine)
.addReg(W65816::Y, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::STA_DP)).addImm(0xE2)
.addReg(W65816::A, RegState::Implicit);
BuildMI(MBB, II, DL, TII.get(W65816::LDA_DP)).addImm(0xE0)
.addReg(W65816::A, RegState::ImplicitDefine);
BuildMI(MBB, II, DL, TII.get(W65816::PLY))
.addReg(W65816::Y, RegState::ImplicitDefine);
if (Opc == W65816::CMPfi) {
BuildMI(MBB, II, DL, TII.get(W65816::CMP_DP)).addImm(0xE2)
.addReg(W65816::A, RegState::Implicit)
.addReg(W65816::P, RegState::ImplicitDefine);
} else {
BuildMI(MBB, II, DL, TII.get(W65816::SBC_DP)).addImm(0xE2)
.addReg(W65816::A, RegState::Implicit)
.addReg(W65816::A, RegState::ImplicitDefine)
.addReg(W65816::P, RegState::Implicit)
.addReg(W65816::P, RegState::ImplicitDefine);
}
return true;
}
case W65816::ADDframe: {
// LEA into A: A = FP + FPOff. 16-bit add, no carry chain needed.
BuildMI(MBB, II, DL, TII.get(W65816::LDA_DP)).addImm(0xF6)
.addReg(W65816::A, RegState::ImplicitDefine);
BuildMI(MBB, II, DL, TII.get(W65816::CLC))
.addReg(W65816::P, RegState::ImplicitDefine);
BuildMI(MBB, II, DL, TII.get(W65816::ADC_Imm16)).addImm(FPOff)
.addReg(W65816::A, RegState::Implicit)
.addReg(W65816::A, RegState::ImplicitDefine)
.addReg(W65816::P, RegState::Implicit)
.addReg(W65816::P, RegState::ImplicitDefine);
return true;
}
default:
return false;
}
}
#define DEBUG_TYPE "w65816-reg-info"
#define GET_REGINFO_TARGET_DESC
#include "W65816GenRegisterInfo.inc"
W65816RegisterInfo::W65816RegisterInfo() : W65816GenRegisterInfo(W65816::PC) {}
const MCPhysReg *
W65816RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
// The 65816 C calling convention preserves DP and DBR across calls.
static const MCPhysReg CalleeSavedRegs[] = {W65816::DP, W65816::DBR, 0};
return CalleeSavedRegs;
}
BitVector W65816RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
// SP, PC, P, PBR and DBR are all special-purpose registers the allocator
// must never pick. DP is allocatable in principle but is treated as
// reserved for the skeleton until direct-page management lands.
Reserved.set(W65816::SP);
Reserved.set(W65816::PC);
Reserved.set(W65816::P);
Reserved.set(W65816::PBR);
Reserved.set(W65816::DBR);
Reserved.set(W65816::DP);
return Reserved;
}
const TargetRegisterClass *
W65816RegisterInfo::getPointerRegClass(unsigned Kind) const {
return &W65816::PtrRegsRegClass;
}
bool W65816RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, unsigned FIOperandNum,
RegScavenger *RS) const {
MachineInstr &MI = *II;
MachineFunction &MF = *MI.getParent()->getParent();
const MachineFrameInfo &MFI = MF.getFrameInfo();
const W65816InstrInfo &TII = *MF.getSubtarget<W65816Subtarget>().getInstrInfo();
unsigned Opc = MI.getOpcode();
unsigned NewOpc = 0;
bool NeedsCarryPrefix = false;
bool IsSub = false;
switch (Opc) {
case W65816::LDAfi: {
// LDAfi targets A. If the regalloc parked the dest in X or Y
// (which can happen via Idx16 vreg coalescing), bridge through A
// by appending a TAX / TAY.
Register Dst = MI.getOperand(0).getReg();
int FI = MI.getOperand(FIOperandNum).getIndex();
int FrameOffset = MFI.getObjectOffset(FI);
int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm();
int Offset = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj;
if (FrameOffset < 0) Offset += 1;
if (Offset < 0 || Offset > 0xFF) {
// Far slot. Use FP if reserved. FP-relative offset excludes
// SPAdj because $F6 captures S after prologue, before any
// intermediate PUSH16 inside a call sequence.
if (MF.getInfo<W65816MachineFunctionInfo>()->getUsesDpFP()) {
int FPOff = FrameOffset + ImmOffset + (int)MFI.getStackSize();
if (FrameOffset < 0) FPOff += 1;
if (expandFarFI(MI, FPOff, TII)) {
MI.eraseFromParent();
return true;
}
}
report_fatal_error("W65816: frame offset out of stack-relative range");
}
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(W65816::LDA_StackRel))
.addImm(Offset)
.addReg(W65816::A, RegState::ImplicitDefine);
if (Dst == W65816::X) {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::TAX));
} else if (Dst == W65816::Y) {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::TAY));
} else {
// LDAfi with Dst = IMGn: regalloc allocated a Wide16 vreg to an
// IMG slot. LDA_StackRel only loaded into A; we must also store
// A to the IMG DP slot so subsequent `COPY $x = $imgN` (which
// expands to `ldx $D?`) reads the right value. Without this, the
// IMG slot stays stale and downstream reads silently get garbage
// — e.g., `dadd(1.5, 2.5) → 0x4010_0000_3000_3000` under full
// IMG-clobber because every cross-call spill reload of an IMG-
// assigned vreg fed stale prior data into the next computation.
int dstDP = -1;
switch (Dst) {
case W65816::IMG0: dstDP = 0xD0; break;
case W65816::IMG1: dstDP = 0xD2; break;
case W65816::IMG2: dstDP = 0xD4; break;
case W65816::IMG3: dstDP = 0xD6; break;
case W65816::IMG4: dstDP = 0xD8; break;
case W65816::IMG5: dstDP = 0xDA; break;
case W65816::IMG6: dstDP = 0xDC; break;
case W65816::IMG7: dstDP = 0xDE; break;
case W65816::IMG8: dstDP = 0xC0; break;
case W65816::IMG9: dstDP = 0xC2; break;
case W65816::IMG10: dstDP = 0xC4; break;
case W65816::IMG11: dstDP = 0xC6; break;
case W65816::IMG12: dstDP = 0xC8; break;
case W65816::IMG13: dstDP = 0xCA; break;
case W65816::IMG14: dstDP = 0xCC; break;
case W65816::IMG15: dstDP = 0xCE; break;
default: break;
}
if (dstDP >= 0) {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(W65816::STA_DP)).addImm(dstDP);
}
}
MI.eraseFromParent();
return true;
}
case W65816::STAfi: {
// Wide16-source STAfi: if the source ended up in IMGn (DP-backed),
// prepend LDA dp so the value reaches A before the actual store.
int FI = MI.getOperand(FIOperandNum).getIndex();
int FrameOffset = MFI.getObjectOffset(FI);
int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm();
// +1 skew for locals: the 65816 SP points to next-FREE byte (empty
// descending), but LLVM PEI assigns FrameOffset assuming SP points
// to the first-USED byte (full descending). Without the +1, slot 0
// ends up at S+0 — exactly where the next JSL writes its return
// address bank. Args have positive FrameOffset (caller pushed them
// at S+1..S+N already, the JSL push naturally puts them at S+4+N
// in callee), so they don't need the skew.
int Offset = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj;
if (FrameOffset < 0) Offset += 1;
if (Offset < 0 || Offset > 0xFF) {
if (MF.getInfo<W65816MachineFunctionInfo>()->getUsesDpFP()) {
int FPOff = FrameOffset + ImmOffset + (int)MFI.getStackSize();
if (FrameOffset < 0) FPOff += 1;
if (expandFarFI(MI, FPOff, TII)) {
MI.eraseFromParent();
return true;
}
}
report_fatal_error("W65816: frame offset out of stack-relative range");
}
Register Src = MI.getOperand(0).getReg();
int srcDP = -1;
switch (Src) {
case W65816::IMG0: srcDP = 0xD0; break;
case W65816::IMG1: srcDP = 0xD2; break;
case W65816::IMG2: srcDP = 0xD4; break;
case W65816::IMG3: srcDP = 0xD6; break;
case W65816::IMG4: srcDP = 0xD8; break;
case W65816::IMG5: srcDP = 0xDA; break;
case W65816::IMG6: srcDP = 0xDC; break;
case W65816::IMG7: srcDP = 0xDE; break;
case W65816::IMG8: srcDP = 0xC0; break;
case W65816::IMG9: srcDP = 0xC2; break;
case W65816::IMG10: srcDP = 0xC4; break;
case W65816::IMG11: srcDP = 0xC6; break;
case W65816::IMG12: srcDP = 0xC8; break;
case W65816::IMG13: srcDP = 0xCA; break;
case W65816::IMG14: srcDP = 0xCC; break;
case W65816::IMG15: srcDP = 0xCE; break;
default: break;
}
if (srcDP >= 0 || Src == W65816::X || Src == W65816::Y) {
// STAfi with non-A source: must clobber A to land the value in
// A and then `sta d,s`. PHA-bracket so A's incoming value is
// preserved across the spill — without this, a regalloc-emitted
// sequence like `STAfi $img0 (=$x); $img0 = COPY $a; STAfi $img0`
// overwrites $a's value at the first STAfi (via `lda 0xd0`),
// making the second STAfi spill garbage. Observed under full
// IMG-clobber as `dadd(1.5,2.5) → 0x4010_0000_3000_3000`.
//
// Sequence: pha (SP -= 2); load source into A; sta (d+2),s
// (offset bumped to compensate for the PHA SP shift); pla
// (SP += 2, A restored). Cost: +PHA (3 cyc, 1 byte) + PLA
// (4 cyc, 1 byte) = +7 cyc, +2 bytes per IMG/X/Y-source STAfi.
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::PHA));
if (srcDP >= 0) {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(W65816::LDA_DP)).addImm(srcDP);
} else {
unsigned XferOp = (Src == W65816::X) ? W65816::TXA : W65816::TYA;
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(XferOp));
}
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(W65816::STA_StackRel))
.addImm(Offset + 2) // PHA shifted SP by 2
.addReg(W65816::A, RegState::Implicit);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::PLA));
} else {
// Direct A source: simple sta d,s — A is the source, A is fine
// afterward (no implicit clobber).
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(W65816::STA_StackRel))
.addImm(Offset)
.addReg(W65816::A, RegState::Implicit);
}
MI.eraseFromParent();
return true;
}
case W65816::ADCfi: NewOpc = W65816::ADC_StackRel; NeedsCarryPrefix = true; break;
case W65816::SBCfi: NewOpc = W65816::SBC_StackRel; NeedsCarryPrefix = true; IsSub = true; break;
// ADCEfi / SBCEfi are the chained-carry variants used as the hi half of a
// multi-precision split. No CLC/SEC prefix — they read the carry left
// in P by the previous addc/adde/subc/sube.
case W65816::ADCEfi: NewOpc = W65816::ADC_StackRel; break;
case W65816::SBCEfi: NewOpc = W65816::SBC_StackRel; break;
case W65816::ANDfi: NewOpc = W65816::AND_StackRel; break;
case W65816::ORAfi: NewOpc = W65816::ORA_StackRel; break;
case W65816::EORfi: NewOpc = W65816::EOR_StackRel; break;
case W65816::CMPfi: NewOpc = W65816::CMP_StackRel; break;
case W65816::LDAfi_indY: NewOpc = W65816::LDA_StackRelIndY; break;
case W65816::STAfi_indY: NewOpc = W65816::STA_StackRelIndY; break;
case W65816::STA8fi: {
// i8 truncating store via stack-rel. Wrap the store in
// SEP #$20 / STA d,S / REP #$20 so only one byte is written. We
// assume entry M=0 (16-bit accumulator) per the function prologue;
// restoring REP #$20 after the STA preserves that invariant.
int FI = MI.getOperand(FIOperandNum).getIndex();
int FrameOffset = MFI.getObjectOffset(FI);
int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm();
int Offset = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj;
if (FrameOffset < 0) Offset += 1; // empty-descending SP skew (see STAfi)
if (Offset < 0 || Offset > 0xFF) {
if (MF.getInfo<W65816MachineFunctionInfo>()->getUsesDpFP()) {
int FPOff = FrameOffset + ImmOffset + (int)MFI.getStackSize();
if (FrameOffset < 0) FPOff += 1;
if (expandFarFI(MI, FPOff, TII)) {
MI.eraseFromParent();
return true;
}
}
report_fatal_error("W65816: frame offset out of stack-relative range");
}
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::SEP))
.addImm(0x20)
.addReg(W65816::P, RegState::ImplicitDefine);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(W65816::STA_StackRel))
.addImm(Offset)
.addReg(W65816::A, RegState::Implicit);
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::REP))
.addImm(0x20)
.addReg(W65816::P, RegState::ImplicitDefine);
MI.eraseFromParent();
return true;
}
case W65816::ADDframe: {
// LEA-equivalent: emit "TSC; CLC; ADC #disp" so A holds SP + disp,
// i.e. the address of the stack slot. TSC has no carry side-effect
// (it just transfers SP into A), so the CLC + ADC is needed for a
// clean unsigned add. Disp uses the same FrameOffset+ImmOffset+
// StackSize formula as the load/store cases.
int FI = MI.getOperand(FIOperandNum).getIndex();
int FrameOffset = MFI.getObjectOffset(FI);
int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm();
int Disp = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj;
if (FrameOffset < 0) Disp += 1; // empty-descending SP skew (see STAfi)
// ADDframe (LEA) routes through TSC + ADC. Always works for any
// 16-bit Disp via TSC's full-width 16-bit transfer, so we don't
// need a far-FI variant here even when usesDpFP is true.
if (Disp < 0 || Disp > 0xFFFF)
report_fatal_error("W65816: frame offset out of i16 LEA range");
// TSC: A = SP (implicit def of A, use of SP).
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::TSC))
.addReg(W65816::A, RegState::ImplicitDefine)
.addReg(W65816::SP, RegState::Implicit);
// CLC: clears C. Models as P-def, P-use (preserves N/V/Z).
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::CLC))
.addReg(W65816::P, RegState::ImplicitDefine);
// ADC #imm: reads A and P, writes A and P.
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::ADC_Imm16))
.addImm(Disp)
.addReg(W65816::A, RegState::Implicit)
.addReg(W65816::A, RegState::ImplicitDefine)
.addReg(W65816::P, RegState::Implicit)
.addReg(W65816::P, RegState::ImplicitDefine);
MI.eraseFromParent();
return true;
}
default:
llvm_unreachable("W65816: unhandled instruction in eliminateFrameIndex");
}
int FI = MI.getOperand(FIOperandNum).getIndex();
int FrameOffset = MFI.getObjectOffset(FI);
int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm();
// WDC stack-relative addressing: `LDA disp,S` computes effective
// address S + disp. Both fixed objects (args) and local objects
// are stored at addresses relative to entry-SP; my prologue has
// shifted S down by StackSize. Plus, between ADJCALLSTACKDOWN and
// ADJCALLSTACKUP, PUSH16/PHA shifts SP further by SPAdj. So:
// address = entry_S + FrameOffset
// S = entry_S - StackSize - SPAdj
// disp = address - S
// = FrameOffset + StackSize + SPAdj
// PLUS a +1 skew for locals: the 65816 SP is empty-descending (points
// to next-FREE byte), but LLVM PEI assigns FrameOffset assuming SP is
// full-descending (points to first-USED byte). Without +1, slot 0
// ends up at S+0 — clobbered by the next JSL retaddr push. Args have
// positive FrameOffset and don't need the skew.
int Offset = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj;
if (FrameOffset < 0) Offset += 1;
if (Offset < 0 || Offset > 0xFF) {
if (MF.getInfo<W65816MachineFunctionInfo>()->getUsesDpFP()) {
int FPOff = FrameOffset + ImmOffset + (int)MFI.getStackSize();
if (FrameOffset < 0) FPOff += 1;
// Emit the carry prefix (CLC/SEC) BEFORE the far-FI sequence —
// expandFarFI's PHY/PLY pair preserves C, so the prefix's value
// survives intact to the final ADC/SBC/CMP at the bottom of
// the expansion.
if (NeedsCarryPrefix) {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(IsSub ? W65816::SEC : W65816::CLC));
}
if (expandFarFI(MI, FPOff, TII)) {
MI.eraseFromParent();
return true;
}
}
report_fatal_error("W65816: frame offset out of stack-relative range");
}
// (Prologue-PHA fold reverted — it was correct in isolation but
// surfaced a separate compile-time hazard via the DAG combiner on
// shift-by-1 i8. Saved 1 op per affected function but at the cost
// of huge compile slowdowns. Re-enable once the DAG combiner
// interaction is understood.)
// Emit the carry-prep instruction first if the operation needs it.
if (NeedsCarryPrefix) {
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(IsSub ? W65816::SEC : W65816::CLC));
}
// The MC instructions (LDA_StackRel, STA_StackRel, ADC_StackRel,
// ADC_Imm16, etc.) don't have explicit Defs/Uses on the accumulator
// because that's an implicit hardware semantic of every 65816
// arithmetic/load/store. Without an explicit Def/Use, post-RA
// passes (Machine Copy Propagation in particular) miss that an ADC
// d,S between a TXA and a TAX redefines $a, and elide the TAX as
// "redundant" — corrupting the return value. Add the implicit
// operands here so dataflow tracking is correct. Match the
// original pseudo's read/write semantics: LDA defs A only; STA uses
// A only; ADC/SBC/AND/ORA/EOR/CMP read A and write A (CMP only
// sets flags, but it still uses A — modelling it as Use is
// sufficient since it doesn't change A).
auto Builder = BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
TII.get(NewOpc)).addImm(Offset);
switch (NewOpc) {
case W65816::LDA_StackRel:
Builder.addReg(W65816::A, RegState::ImplicitDefine);
break;
case W65816::LDA_StackRelIndY:
// Indirect-Y: A def + Y use. The Y use is critical — without it,
// post-RA passes can reorder a Y-defining op past us, leaving the
// load reading at (ptr + stale_Y). Caught when modelling the dep
// for the (sr,s),Y bank-wrap workaround in W65816NegYIndY.
Builder.addReg(W65816::A, RegState::ImplicitDefine)
.addReg(W65816::Y, RegState::Implicit);
break;
case W65816::STA_StackRel:
Builder.addReg(W65816::A, RegState::Implicit);
break;
case W65816::STA_StackRelIndY:
// Indirect-Y store: A use + Y use (same Y reasoning as above).
Builder.addReg(W65816::A, RegState::Implicit)
.addReg(W65816::Y, RegState::Implicit);
break;
case W65816::ADC_StackRel:
case W65816::SBC_StackRel:
Builder.addReg(W65816::A, RegState::Implicit)
.addReg(W65816::A, RegState::ImplicitDefine)
.addReg(W65816::P, RegState::Implicit)
.addReg(W65816::P, RegState::ImplicitDefine);
break;
case W65816::AND_StackRel:
case W65816::ORA_StackRel:
case W65816::EOR_StackRel:
Builder.addReg(W65816::A, RegState::Implicit)
.addReg(W65816::A, RegState::ImplicitDefine);
break;
case W65816::CMP_StackRel:
Builder.addReg(W65816::A, RegState::Implicit)
.addReg(W65816::P, RegState::ImplicitDefine);
break;
default:
break;
}
MI.eraseFromParent();
return true;
}
Register W65816RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return W65816::SP;
}