610 lines
26 KiB
C++
610 lines
26 KiB
C++
//===-- W65816RegisterInfo.cpp - W65816 Register Information --------------===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// Skeleton implementation of the W65816 register info. The callee-saved
|
|
// register list, reserved set and frame-index elimination logic are
|
|
// deliberately minimal; they will be fleshed out alongside frame lowering.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
#include "W65816RegisterInfo.h"
|
|
#include "W65816.h"
|
|
#include "W65816FrameLowering.h"
|
|
#include "W65816InstrInfo.h"
|
|
#include "W65816MachineFunctionInfo.h"
|
|
#include "W65816Subtarget.h"
|
|
#include "llvm/ADT/BitVector.h"
|
|
#include "llvm/CodeGen/MachineFrameInfo.h"
|
|
#include "llvm/CodeGen/MachineFunction.h"
|
|
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
|
#include "llvm/Support/ErrorHandling.h"
|
|
|
|
using namespace llvm;
|
|
|
|
// IMG slot DP addresses for STAfi's IMG-source path.
|
|
static int imgRegToDP(Register R) {
|
|
switch (R) {
|
|
case W65816::IMG0: return 0xD0;
|
|
case W65816::IMG1: return 0xD2;
|
|
case W65816::IMG2: return 0xD4;
|
|
case W65816::IMG3: return 0xD6;
|
|
case W65816::IMG4: return 0xD8;
|
|
case W65816::IMG5: return 0xDA;
|
|
case W65816::IMG6: return 0xDC;
|
|
case W65816::IMG7: return 0xDE;
|
|
case W65816::IMG8: return 0xC0;
|
|
case W65816::IMG9: return 0xC2;
|
|
case W65816::IMG10: return 0xC4;
|
|
case W65816::IMG11: return 0xC6;
|
|
case W65816::IMG12: return 0xC8;
|
|
case W65816::IMG13: return 0xCA;
|
|
case W65816::IMG14: return 0xCC;
|
|
case W65816::IMG15: return 0xCE;
|
|
default: return -1;
|
|
}
|
|
}
|
|
|
|
// Far FI elim via DP frame-pointer ($F6/$F7). Called when an FI's
|
|
// effective offset exceeds 0xFF and the function reserved an FP at
|
|
// prologue time (StackSize > 200). Stack is always bank 0, so
|
|
// `(F6),Y` (16-bit DP-indirect, Y-indexed, bank-0 result) is correct.
|
|
//
|
|
// Common skeleton (varies per opcode):
|
|
// PHY; LDY #FPOff; <op via ($F6),Y>; PLY
|
|
// PHY/PLY balance, so subsequent `,S` accesses stay accurate. PLY
|
|
// preserves C (only N/Z), so multi-precision carry chains survive
|
|
// the load-via-Y.
|
|
static bool expandFarFI(MachineInstr &MI, int FPOff,
|
|
const W65816InstrInfo &TII) {
|
|
MachineBasicBlock &MBB = *MI.getParent();
|
|
MachineBasicBlock::iterator II = MI.getIterator();
|
|
DebugLoc DL = MI.getDebugLoc();
|
|
unsigned Opc = MI.getOpcode();
|
|
|
|
switch (Opc) {
|
|
case W65816::LDAfi: {
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
BuildMI(MBB, II, DL, TII.get(W65816::PHY))
|
|
.addReg(W65816::Y, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16))
|
|
.addImm(FPOff)
|
|
.addReg(W65816::Y, RegState::ImplicitDefine);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::LDA_DPIndY))
|
|
.addImm(0xF6)
|
|
.addReg(W65816::A, RegState::ImplicitDefine)
|
|
.addReg(W65816::Y, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::PLY))
|
|
.addReg(W65816::Y, RegState::ImplicitDefine);
|
|
if (Dst == W65816::X)
|
|
BuildMI(MBB, II, DL, TII.get(W65816::TAX));
|
|
else if (Dst == W65816::Y)
|
|
BuildMI(MBB, II, DL, TII.get(W65816::TAY));
|
|
return true;
|
|
}
|
|
case W65816::STAfi: {
|
|
Register Src = MI.getOperand(0).getReg();
|
|
int srcDP = imgRegToDP(Src);
|
|
if (srcDP >= 0)
|
|
BuildMI(MBB, II, DL, TII.get(W65816::LDA_DP)).addImm(srcDP);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::PHY))
|
|
.addReg(W65816::Y, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)).addImm(FPOff);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::STA_DPIndY))
|
|
.addImm(0xF6)
|
|
.addReg(W65816::A, RegState::Implicit)
|
|
.addReg(W65816::Y, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::PLY));
|
|
return true;
|
|
}
|
|
case W65816::STA8fi: {
|
|
BuildMI(MBB, II, DL, TII.get(W65816::SEP)).addImm(0x20)
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::PHY))
|
|
.addReg(W65816::Y, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)).addImm(FPOff);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::STA_DPIndY))
|
|
.addImm(0xF6)
|
|
.addReg(W65816::A, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::PLY));
|
|
BuildMI(MBB, II, DL, TII.get(W65816::REP)).addImm(0x20)
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
return true;
|
|
}
|
|
case W65816::ADCfi:
|
|
case W65816::ADCEfi:
|
|
case W65816::ANDfi:
|
|
case W65816::ORAfi:
|
|
case W65816::EORfi: {
|
|
// Commutative (or chained): A op M. Save A to $E2, load M to A
|
|
// via (F6),Y, then op against saved A. Order matters: PLY must
|
|
// come BEFORE the final op so PLY's N/Z clobber doesn't hide the
|
|
// op's flags from a downstream consumer.
|
|
BuildMI(MBB, II, DL, TII.get(W65816::STA_DP)).addImm(0xE2)
|
|
.addReg(W65816::A, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::PHY))
|
|
.addReg(W65816::Y, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)).addImm(FPOff);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::LDA_DPIndY)).addImm(0xF6)
|
|
.addReg(W65816::A, RegState::ImplicitDefine)
|
|
.addReg(W65816::Y, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::PLY))
|
|
.addReg(W65816::Y, RegState::ImplicitDefine);
|
|
unsigned OpDPOpc = 0;
|
|
switch (Opc) {
|
|
case W65816::ADCfi:
|
|
case W65816::ADCEfi: OpDPOpc = W65816::ADC_DP; break;
|
|
case W65816::ANDfi: OpDPOpc = W65816::AND_DP; break;
|
|
case W65816::ORAfi: OpDPOpc = W65816::ORA_DP; break;
|
|
case W65816::EORfi: OpDPOpc = W65816::EOR_DP; break;
|
|
default: llvm_unreachable("unhandled commutative far-FI");
|
|
}
|
|
auto B = BuildMI(MBB, II, DL, TII.get(OpDPOpc)).addImm(0xE2)
|
|
.addReg(W65816::A, RegState::Implicit)
|
|
.addReg(W65816::A, RegState::ImplicitDefine);
|
|
if (OpDPOpc == W65816::ADC_DP) {
|
|
B.addReg(W65816::P, RegState::Implicit)
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
}
|
|
return true;
|
|
}
|
|
case W65816::SBCfi:
|
|
case W65816::SBCEfi:
|
|
case W65816::CMPfi: {
|
|
// Non-commutative (A - M): we must load M into a scratch slot
|
|
// without losing A. Sequence:
|
|
// STA $E0 ; save original A
|
|
// PHY
|
|
// LDY #FPOff
|
|
// LDA ($F6),Y ; A = M (lost saved A, but $E0 still has it)
|
|
// STA $E2 ; $E2 = M
|
|
// LDA $E0 ; A = original
|
|
// PLY ; preserves C, clobbers N/Z (re-set by SBC/CMP)
|
|
// SBC/CMP $E2
|
|
BuildMI(MBB, II, DL, TII.get(W65816::STA_DP)).addImm(0xE0)
|
|
.addReg(W65816::A, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::PHY))
|
|
.addReg(W65816::Y, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::LDY_Imm16)).addImm(FPOff);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::LDA_DPIndY)).addImm(0xF6)
|
|
.addReg(W65816::A, RegState::ImplicitDefine)
|
|
.addReg(W65816::Y, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::STA_DP)).addImm(0xE2)
|
|
.addReg(W65816::A, RegState::Implicit);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::LDA_DP)).addImm(0xE0)
|
|
.addReg(W65816::A, RegState::ImplicitDefine);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::PLY))
|
|
.addReg(W65816::Y, RegState::ImplicitDefine);
|
|
if (Opc == W65816::CMPfi) {
|
|
BuildMI(MBB, II, DL, TII.get(W65816::CMP_DP)).addImm(0xE2)
|
|
.addReg(W65816::A, RegState::Implicit)
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
} else {
|
|
BuildMI(MBB, II, DL, TII.get(W65816::SBC_DP)).addImm(0xE2)
|
|
.addReg(W65816::A, RegState::Implicit)
|
|
.addReg(W65816::A, RegState::ImplicitDefine)
|
|
.addReg(W65816::P, RegState::Implicit)
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
}
|
|
return true;
|
|
}
|
|
case W65816::ADDframe: {
|
|
// LEA into A: A = FP + FPOff. 16-bit add, no carry chain needed.
|
|
BuildMI(MBB, II, DL, TII.get(W65816::LDA_DP)).addImm(0xF6)
|
|
.addReg(W65816::A, RegState::ImplicitDefine);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::CLC))
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
BuildMI(MBB, II, DL, TII.get(W65816::ADC_Imm16)).addImm(FPOff)
|
|
.addReg(W65816::A, RegState::Implicit)
|
|
.addReg(W65816::A, RegState::ImplicitDefine)
|
|
.addReg(W65816::P, RegState::Implicit)
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
return true;
|
|
}
|
|
default:
|
|
return false;
|
|
}
|
|
}
|
|
|
|
#define DEBUG_TYPE "w65816-reg-info"
|
|
|
|
#define GET_REGINFO_TARGET_DESC
|
|
#include "W65816GenRegisterInfo.inc"
|
|
|
|
W65816RegisterInfo::W65816RegisterInfo() : W65816GenRegisterInfo(W65816::PC) {}
|
|
|
|
const MCPhysReg *
|
|
W65816RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
|
// The 65816 C calling convention preserves DP and DBR across calls.
|
|
static const MCPhysReg CalleeSavedRegs[] = {W65816::DP, W65816::DBR, 0};
|
|
return CalleeSavedRegs;
|
|
}
|
|
|
|
BitVector W65816RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
|
BitVector Reserved(getNumRegs());
|
|
|
|
// SP, PC, P, PBR and DBR are all special-purpose registers the allocator
|
|
// must never pick. DP is allocatable in principle but is treated as
|
|
// reserved for the skeleton until direct-page management lands.
|
|
Reserved.set(W65816::SP);
|
|
Reserved.set(W65816::PC);
|
|
Reserved.set(W65816::P);
|
|
Reserved.set(W65816::PBR);
|
|
Reserved.set(W65816::DBR);
|
|
Reserved.set(W65816::DP);
|
|
|
|
return Reserved;
|
|
}
|
|
|
|
const TargetRegisterClass *
|
|
W65816RegisterInfo::getPointerRegClass(unsigned Kind) const {
|
|
return &W65816::PtrRegsRegClass;
|
|
}
|
|
|
|
bool W65816RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
|
int SPAdj, unsigned FIOperandNum,
|
|
RegScavenger *RS) const {
|
|
MachineInstr &MI = *II;
|
|
MachineFunction &MF = *MI.getParent()->getParent();
|
|
const MachineFrameInfo &MFI = MF.getFrameInfo();
|
|
const W65816InstrInfo &TII = *MF.getSubtarget<W65816Subtarget>().getInstrInfo();
|
|
|
|
unsigned Opc = MI.getOpcode();
|
|
unsigned NewOpc = 0;
|
|
bool NeedsCarryPrefix = false;
|
|
bool IsSub = false;
|
|
switch (Opc) {
|
|
case W65816::LDAfi: {
|
|
// LDAfi targets A. If the regalloc parked the dest in X or Y
|
|
// (which can happen via Idx16 vreg coalescing), bridge through A
|
|
// by appending a TAX / TAY.
|
|
Register Dst = MI.getOperand(0).getReg();
|
|
int FI = MI.getOperand(FIOperandNum).getIndex();
|
|
int FrameOffset = MFI.getObjectOffset(FI);
|
|
int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm();
|
|
int Offset = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj;
|
|
if (FrameOffset < 0) Offset += 1;
|
|
if (Offset < 0 || Offset > 0xFF) {
|
|
// Far slot. Use FP if reserved. FP-relative offset excludes
|
|
// SPAdj because $F6 captures S after prologue, before any
|
|
// intermediate PUSH16 inside a call sequence.
|
|
if (MF.getInfo<W65816MachineFunctionInfo>()->getUsesDpFP()) {
|
|
int FPOff = FrameOffset + ImmOffset + (int)MFI.getStackSize();
|
|
if (FrameOffset < 0) FPOff += 1;
|
|
if (expandFarFI(MI, FPOff, TII)) {
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
}
|
|
report_fatal_error("W65816: frame offset out of stack-relative range");
|
|
}
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
|
TII.get(W65816::LDA_StackRel))
|
|
.addImm(Offset)
|
|
.addReg(W65816::A, RegState::ImplicitDefine);
|
|
if (Dst == W65816::X) {
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::TAX));
|
|
} else if (Dst == W65816::Y) {
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::TAY));
|
|
} else {
|
|
// LDAfi with Dst = IMGn: regalloc allocated a Wide16 vreg to an
|
|
// IMG slot. LDA_StackRel only loaded into A; we must also store
|
|
// A to the IMG DP slot so subsequent `COPY $x = $imgN` (which
|
|
// expands to `ldx $D?`) reads the right value. Without this, the
|
|
// IMG slot stays stale and downstream reads silently get garbage
|
|
// — e.g., `dadd(1.5, 2.5) → 0x4010_0000_3000_3000` under full
|
|
// IMG-clobber because every cross-call spill reload of an IMG-
|
|
// assigned vreg fed stale prior data into the next computation.
|
|
int dstDP = -1;
|
|
switch (Dst) {
|
|
case W65816::IMG0: dstDP = 0xD0; break;
|
|
case W65816::IMG1: dstDP = 0xD2; break;
|
|
case W65816::IMG2: dstDP = 0xD4; break;
|
|
case W65816::IMG3: dstDP = 0xD6; break;
|
|
case W65816::IMG4: dstDP = 0xD8; break;
|
|
case W65816::IMG5: dstDP = 0xDA; break;
|
|
case W65816::IMG6: dstDP = 0xDC; break;
|
|
case W65816::IMG7: dstDP = 0xDE; break;
|
|
case W65816::IMG8: dstDP = 0xC0; break;
|
|
case W65816::IMG9: dstDP = 0xC2; break;
|
|
case W65816::IMG10: dstDP = 0xC4; break;
|
|
case W65816::IMG11: dstDP = 0xC6; break;
|
|
case W65816::IMG12: dstDP = 0xC8; break;
|
|
case W65816::IMG13: dstDP = 0xCA; break;
|
|
case W65816::IMG14: dstDP = 0xCC; break;
|
|
case W65816::IMG15: dstDP = 0xCE; break;
|
|
default: break;
|
|
}
|
|
if (dstDP >= 0) {
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
|
TII.get(W65816::STA_DP)).addImm(dstDP);
|
|
}
|
|
}
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
case W65816::STAfi: {
|
|
// Wide16-source STAfi: if the source ended up in IMGn (DP-backed),
|
|
// prepend LDA dp so the value reaches A before the actual store.
|
|
int FI = MI.getOperand(FIOperandNum).getIndex();
|
|
int FrameOffset = MFI.getObjectOffset(FI);
|
|
int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm();
|
|
// +1 skew for locals: the 65816 SP points to next-FREE byte (empty
|
|
// descending), but LLVM PEI assigns FrameOffset assuming SP points
|
|
// to the first-USED byte (full descending). Without the +1, slot 0
|
|
// ends up at S+0 — exactly where the next JSL writes its return
|
|
// address bank. Args have positive FrameOffset (caller pushed them
|
|
// at S+1..S+N already, the JSL push naturally puts them at S+4+N
|
|
// in callee), so they don't need the skew.
|
|
int Offset = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj;
|
|
if (FrameOffset < 0) Offset += 1;
|
|
if (Offset < 0 || Offset > 0xFF) {
|
|
if (MF.getInfo<W65816MachineFunctionInfo>()->getUsesDpFP()) {
|
|
int FPOff = FrameOffset + ImmOffset + (int)MFI.getStackSize();
|
|
if (FrameOffset < 0) FPOff += 1;
|
|
if (expandFarFI(MI, FPOff, TII)) {
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
}
|
|
report_fatal_error("W65816: frame offset out of stack-relative range");
|
|
}
|
|
Register Src = MI.getOperand(0).getReg();
|
|
int srcDP = -1;
|
|
switch (Src) {
|
|
case W65816::IMG0: srcDP = 0xD0; break;
|
|
case W65816::IMG1: srcDP = 0xD2; break;
|
|
case W65816::IMG2: srcDP = 0xD4; break;
|
|
case W65816::IMG3: srcDP = 0xD6; break;
|
|
case W65816::IMG4: srcDP = 0xD8; break;
|
|
case W65816::IMG5: srcDP = 0xDA; break;
|
|
case W65816::IMG6: srcDP = 0xDC; break;
|
|
case W65816::IMG7: srcDP = 0xDE; break;
|
|
case W65816::IMG8: srcDP = 0xC0; break;
|
|
case W65816::IMG9: srcDP = 0xC2; break;
|
|
case W65816::IMG10: srcDP = 0xC4; break;
|
|
case W65816::IMG11: srcDP = 0xC6; break;
|
|
case W65816::IMG12: srcDP = 0xC8; break;
|
|
case W65816::IMG13: srcDP = 0xCA; break;
|
|
case W65816::IMG14: srcDP = 0xCC; break;
|
|
case W65816::IMG15: srcDP = 0xCE; break;
|
|
default: break;
|
|
}
|
|
if (srcDP >= 0 || Src == W65816::X || Src == W65816::Y) {
|
|
// STAfi with non-A source: must clobber A to land the value in
|
|
// A and then `sta d,s`. PHA-bracket so A's incoming value is
|
|
// preserved across the spill — without this, a regalloc-emitted
|
|
// sequence like `STAfi $img0 (=$x); $img0 = COPY $a; STAfi $img0`
|
|
// overwrites $a's value at the first STAfi (via `lda 0xd0`),
|
|
// making the second STAfi spill garbage. Observed under full
|
|
// IMG-clobber as `dadd(1.5,2.5) → 0x4010_0000_3000_3000`.
|
|
//
|
|
// Sequence: pha (SP -= 2); load source into A; sta (d+2),s
|
|
// (offset bumped to compensate for the PHA SP shift); pla
|
|
// (SP += 2, A restored). Cost: +PHA (3 cyc, 1 byte) + PLA
|
|
// (4 cyc, 1 byte) = +7 cyc, +2 bytes per IMG/X/Y-source STAfi.
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::PHA));
|
|
if (srcDP >= 0) {
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
|
TII.get(W65816::LDA_DP)).addImm(srcDP);
|
|
} else {
|
|
unsigned XferOp = (Src == W65816::X) ? W65816::TXA : W65816::TYA;
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(XferOp));
|
|
}
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
|
TII.get(W65816::STA_StackRel))
|
|
.addImm(Offset + 2) // PHA shifted SP by 2
|
|
.addReg(W65816::A, RegState::Implicit);
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::PLA));
|
|
} else {
|
|
// Direct A source: simple sta d,s — A is the source, A is fine
|
|
// afterward (no implicit clobber).
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
|
TII.get(W65816::STA_StackRel))
|
|
.addImm(Offset)
|
|
.addReg(W65816::A, RegState::Implicit);
|
|
}
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
case W65816::ADCfi: NewOpc = W65816::ADC_StackRel; NeedsCarryPrefix = true; break;
|
|
case W65816::SBCfi: NewOpc = W65816::SBC_StackRel; NeedsCarryPrefix = true; IsSub = true; break;
|
|
// ADCEfi / SBCEfi are the chained-carry variants used as the hi half of a
|
|
// multi-precision split. No CLC/SEC prefix — they read the carry left
|
|
// in P by the previous addc/adde/subc/sube.
|
|
case W65816::ADCEfi: NewOpc = W65816::ADC_StackRel; break;
|
|
case W65816::SBCEfi: NewOpc = W65816::SBC_StackRel; break;
|
|
case W65816::ANDfi: NewOpc = W65816::AND_StackRel; break;
|
|
case W65816::ORAfi: NewOpc = W65816::ORA_StackRel; break;
|
|
case W65816::EORfi: NewOpc = W65816::EOR_StackRel; break;
|
|
case W65816::CMPfi: NewOpc = W65816::CMP_StackRel; break;
|
|
case W65816::LDAfi_indY: NewOpc = W65816::LDA_StackRelIndY; break;
|
|
case W65816::STAfi_indY: NewOpc = W65816::STA_StackRelIndY; break;
|
|
case W65816::STA8fi: {
|
|
// i8 truncating store via stack-rel. Wrap the store in
|
|
// SEP #$20 / STA d,S / REP #$20 so only one byte is written. We
|
|
// assume entry M=0 (16-bit accumulator) per the function prologue;
|
|
// restoring REP #$20 after the STA preserves that invariant.
|
|
int FI = MI.getOperand(FIOperandNum).getIndex();
|
|
int FrameOffset = MFI.getObjectOffset(FI);
|
|
int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm();
|
|
int Offset = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj;
|
|
if (FrameOffset < 0) Offset += 1; // empty-descending SP skew (see STAfi)
|
|
if (Offset < 0 || Offset > 0xFF) {
|
|
if (MF.getInfo<W65816MachineFunctionInfo>()->getUsesDpFP()) {
|
|
int FPOff = FrameOffset + ImmOffset + (int)MFI.getStackSize();
|
|
if (FrameOffset < 0) FPOff += 1;
|
|
if (expandFarFI(MI, FPOff, TII)) {
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
}
|
|
report_fatal_error("W65816: frame offset out of stack-relative range");
|
|
}
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::SEP))
|
|
.addImm(0x20)
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
|
TII.get(W65816::STA_StackRel))
|
|
.addImm(Offset)
|
|
.addReg(W65816::A, RegState::Implicit);
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::REP))
|
|
.addImm(0x20)
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
case W65816::ADDframe: {
|
|
// LEA-equivalent: emit "TSC; CLC; ADC #disp" so A holds SP + disp,
|
|
// i.e. the address of the stack slot. TSC has no carry side-effect
|
|
// (it just transfers SP into A), so the CLC + ADC is needed for a
|
|
// clean unsigned add. Disp uses the same FrameOffset+ImmOffset+
|
|
// StackSize formula as the load/store cases.
|
|
int FI = MI.getOperand(FIOperandNum).getIndex();
|
|
int FrameOffset = MFI.getObjectOffset(FI);
|
|
int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm();
|
|
int Disp = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj;
|
|
if (FrameOffset < 0) Disp += 1; // empty-descending SP skew (see STAfi)
|
|
// ADDframe (LEA) routes through TSC + ADC. Always works for any
|
|
// 16-bit Disp via TSC's full-width 16-bit transfer, so we don't
|
|
// need a far-FI variant here even when usesDpFP is true.
|
|
if (Disp < 0 || Disp > 0xFFFF)
|
|
report_fatal_error("W65816: frame offset out of i16 LEA range");
|
|
// TSC: A = SP (implicit def of A, use of SP).
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::TSC))
|
|
.addReg(W65816::A, RegState::ImplicitDefine)
|
|
.addReg(W65816::SP, RegState::Implicit);
|
|
// CLC: clears C. Models as P-def, P-use (preserves N/V/Z).
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::CLC))
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
// ADC #imm: reads A and P, writes A and P.
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::ADC_Imm16))
|
|
.addImm(Disp)
|
|
.addReg(W65816::A, RegState::Implicit)
|
|
.addReg(W65816::A, RegState::ImplicitDefine)
|
|
.addReg(W65816::P, RegState::Implicit)
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
default:
|
|
llvm_unreachable("W65816: unhandled instruction in eliminateFrameIndex");
|
|
}
|
|
|
|
int FI = MI.getOperand(FIOperandNum).getIndex();
|
|
int FrameOffset = MFI.getObjectOffset(FI);
|
|
int ImmOffset = MI.getOperand(FIOperandNum + 1).getImm();
|
|
|
|
// WDC stack-relative addressing: `LDA disp,S` computes effective
|
|
// address S + disp. Both fixed objects (args) and local objects
|
|
// are stored at addresses relative to entry-SP; my prologue has
|
|
// shifted S down by StackSize. Plus, between ADJCALLSTACKDOWN and
|
|
// ADJCALLSTACKUP, PUSH16/PHA shifts SP further by SPAdj. So:
|
|
// address = entry_S + FrameOffset
|
|
// S = entry_S - StackSize - SPAdj
|
|
// disp = address - S
|
|
// = FrameOffset + StackSize + SPAdj
|
|
// PLUS a +1 skew for locals: the 65816 SP is empty-descending (points
|
|
// to next-FREE byte), but LLVM PEI assigns FrameOffset assuming SP is
|
|
// full-descending (points to first-USED byte). Without +1, slot 0
|
|
// ends up at S+0 — clobbered by the next JSL retaddr push. Args have
|
|
// positive FrameOffset and don't need the skew.
|
|
int Offset = FrameOffset + ImmOffset + (int)MFI.getStackSize() + SPAdj;
|
|
if (FrameOffset < 0) Offset += 1;
|
|
|
|
if (Offset < 0 || Offset > 0xFF) {
|
|
if (MF.getInfo<W65816MachineFunctionInfo>()->getUsesDpFP()) {
|
|
int FPOff = FrameOffset + ImmOffset + (int)MFI.getStackSize();
|
|
if (FrameOffset < 0) FPOff += 1;
|
|
// Emit the carry prefix (CLC/SEC) BEFORE the far-FI sequence —
|
|
// expandFarFI's PHY/PLY pair preserves C, so the prefix's value
|
|
// survives intact to the final ADC/SBC/CMP at the bottom of
|
|
// the expansion.
|
|
if (NeedsCarryPrefix) {
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
|
TII.get(IsSub ? W65816::SEC : W65816::CLC));
|
|
}
|
|
if (expandFarFI(MI, FPOff, TII)) {
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
}
|
|
report_fatal_error("W65816: frame offset out of stack-relative range");
|
|
}
|
|
|
|
// (Prologue-PHA fold reverted — it was correct in isolation but
|
|
// surfaced a separate compile-time hazard via the DAG combiner on
|
|
// shift-by-1 i8. Saved 1 op per affected function but at the cost
|
|
// of huge compile slowdowns. Re-enable once the DAG combiner
|
|
// interaction is understood.)
|
|
|
|
// Emit the carry-prep instruction first if the operation needs it.
|
|
if (NeedsCarryPrefix) {
|
|
BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
|
TII.get(IsSub ? W65816::SEC : W65816::CLC));
|
|
}
|
|
// The MC instructions (LDA_StackRel, STA_StackRel, ADC_StackRel,
|
|
// ADC_Imm16, etc.) don't have explicit Defs/Uses on the accumulator
|
|
// because that's an implicit hardware semantic of every 65816
|
|
// arithmetic/load/store. Without an explicit Def/Use, post-RA
|
|
// passes (Machine Copy Propagation in particular) miss that an ADC
|
|
// d,S between a TXA and a TAX redefines $a, and elide the TAX as
|
|
// "redundant" — corrupting the return value. Add the implicit
|
|
// operands here so dataflow tracking is correct. Match the
|
|
// original pseudo's read/write semantics: LDA defs A only; STA uses
|
|
// A only; ADC/SBC/AND/ORA/EOR/CMP read A and write A (CMP only
|
|
// sets flags, but it still uses A — modelling it as Use is
|
|
// sufficient since it doesn't change A).
|
|
auto Builder = BuildMI(*MI.getParent(), II, MI.getDebugLoc(),
|
|
TII.get(NewOpc)).addImm(Offset);
|
|
switch (NewOpc) {
|
|
case W65816::LDA_StackRel:
|
|
Builder.addReg(W65816::A, RegState::ImplicitDefine);
|
|
break;
|
|
case W65816::LDA_StackRelIndY:
|
|
// Indirect-Y: A def + Y use. The Y use is critical — without it,
|
|
// post-RA passes can reorder a Y-defining op past us, leaving the
|
|
// load reading at (ptr + stale_Y). Caught when modelling the dep
|
|
// for the (sr,s),Y bank-wrap workaround in W65816NegYIndY.
|
|
Builder.addReg(W65816::A, RegState::ImplicitDefine)
|
|
.addReg(W65816::Y, RegState::Implicit);
|
|
break;
|
|
case W65816::STA_StackRel:
|
|
Builder.addReg(W65816::A, RegState::Implicit);
|
|
break;
|
|
case W65816::STA_StackRelIndY:
|
|
// Indirect-Y store: A use + Y use (same Y reasoning as above).
|
|
Builder.addReg(W65816::A, RegState::Implicit)
|
|
.addReg(W65816::Y, RegState::Implicit);
|
|
break;
|
|
case W65816::ADC_StackRel:
|
|
case W65816::SBC_StackRel:
|
|
Builder.addReg(W65816::A, RegState::Implicit)
|
|
.addReg(W65816::A, RegState::ImplicitDefine)
|
|
.addReg(W65816::P, RegState::Implicit)
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
break;
|
|
case W65816::AND_StackRel:
|
|
case W65816::ORA_StackRel:
|
|
case W65816::EOR_StackRel:
|
|
Builder.addReg(W65816::A, RegState::Implicit)
|
|
.addReg(W65816::A, RegState::ImplicitDefine);
|
|
break;
|
|
case W65816::CMP_StackRel:
|
|
Builder.addReg(W65816::A, RegState::Implicit)
|
|
.addReg(W65816::P, RegState::ImplicitDefine);
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
MI.eraseFromParent();
|
|
return true;
|
|
}
|
|
|
|
Register W65816RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
|
|
return W65816::SP;
|
|
}
|