65816-llvm-mos/src/llvm/lib/Target/W65816/W65816InstrInfo.td
Scott Duensing 0210b06a5e Checkpoint
2026-05-06 17:42:52 -05:00

1665 lines
84 KiB
TableGen

//===-- W65816InstrInfo.td - W65816 Instruction defs -------*- tablegen -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// W65816 instruction description. This file defines the MC-layer instruction
// encodings for the core 65816 instruction set. DAG-selection patterns will
// be added incrementally on top of these MC instructions.
//
//===----------------------------------------------------------------------===//
include "W65816InstrFormats.td"
//===----------------------------------------------------------------------===//
// Type Profiles
//===----------------------------------------------------------------------===//
def SDT_W65816Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
def SDT_W65816CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>,
SDTCisVT<1, i16>]>;
def SDT_W65816CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def SDT_W65816Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
SDTCisPtrTy<0>]>;
def SDT_W65816Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>,
SDTCisInt<0>]>;
// (CMP allows both i16 and i8 operands.)
def SDT_W65816BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
SDTCisVT<1, i8>]>;
//===----------------------------------------------------------------------===//
// W65816-specific SDNodes
//===----------------------------------------------------------------------===//
def W65816retglue : SDNode<"W65816ISD::RET_GLUE", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
def W65816call : SDNode<"W65816ISD::CALL", SDT_W65816Call,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
SDNPVariadic]>;
def W65816callseq_start :
SDNode<"ISD::CALLSEQ_START", SDT_W65816CallSeqStart,
[SDNPHasChain, SDNPOutGlue]>;
def W65816callseq_end :
SDNode<"ISD::CALLSEQ_END", SDT_W65816CallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def W65816Wrapper : SDNode<"W65816ISD::Wrapper", SDT_W65816Wrapper>;
// Comparison: produces a Glue value (carrying processor flags).
def W65816cmp : SDNode<"W65816ISD::CMP", SDT_W65816Cmp, [SDNPOutGlue]>;
// Conditional branch: takes (Chain, Dest, CC, Glue from CMP).
def W65816brcc : SDNode<"W65816ISD::BR_CC", SDT_W65816BrCC,
[SDNPHasChain, SDNPInGlue]>;
// Push A onto the stack. Used by LowerCall to pass extra args.
// Takes Chain + Glue (with A pre-loaded via CopyToReg), produces
// Chain + Glue. Has a side effect (SP changes) and stores to
// memory. In 16-bit M mode, pushes 2 bytes and decrements SP by 2;
// the call's ADJCALLSTACKUP pseudo unwinds those bytes via
// tsc;clc;adc #N;tcs after the JSL returns.
def W65816push : SDNode<"W65816ISD::PUSH", SDTNone,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
SDNPSideEffect, SDNPMayStore]>;
// Push X onto the stack. Same shape as W65816push but the value to
// push is glued from CopyToReg(X) instead of CopyToReg(A).
def W65816pushx : SDNode<"W65816ISD::PUSH_X", SDTNone,
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
SDNPSideEffect, SDNPMayStore]>;
// SELECT_CC: takes (TVal, FVal, CC) plus a glue value carrying the
// flags from a preceding W65816cmp. Lowered by EmitInstrWithCustomInserter
// into a CMP (already in the BB) + Bxx + diamond CFG + PHI.
def SDT_W65816SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
SDTCisSameAs<0, 2>,
SDTCisVT<3, i8>]>;
def W65816selectcc : SDNode<"W65816ISD::SELECT_CC", SDT_W65816SelectCC,
[SDNPInGlue]>;
// Dynamic stack allocation: takes (chain, size:i16) and returns
// (ptr:i16, chain). Lowers to TSC; SEC; SBC size; TCS; INC A in
// AsmPrinter. See LowerDynamicStackalloc.
def SDT_W65816Alloca : SDTypeProfile<1, 1, [SDTCisVT<0, i16>,
SDTCisVT<1, i16>]>;
def W65816alloca : SDNode<"W65816ISD::ALLOCA", SDT_W65816Alloca,
[SDNPHasChain, SDNPSideEffect]>;
// ptr32 load / store: target-specific load/store nodes that take a 32-bit
// pointer (Wide32 = i32) and lower to [dp],Y indirect-long with the bank
// byte taken from the pointer's hi-half. Used for ptr32 mode where
// generic (load i32-addr) needs explicit lowering — wrapping in a target
// node prevents DAG combines from rewriting the load before isel.
//
// Loads always materialise an i16 in A (16-bit LDA); byte zext / anyext
// patterns AND-mask afterwards exactly as the existing LDAptr does.
// Stores split into two nodes: ST_PTR (full 16-bit STA) and STB_PTR
// (SEP/REP-wrapped 8-bit STA for truncating stores).
def SDT_W65816LdPtr : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i32>]>;
def SDT_W65816StPtr : SDTypeProfile<0, 2, [SDTCisVT<0, i16>, SDTCisVT<1, i32>]>;
def W65816ldPtr : SDNode<"W65816ISD::LD_PTR", SDT_W65816LdPtr,
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
// va_arg's stack-pointer deref: bank-0-explicit load. The 65816 stack
// is hardwired to bank 0; va_arg's `ap` is always a stack pointer.
// Under Loader, $BE points to OUR bank, but va_arg needs bank 0 — so
// LowerVAARG emits this opcode and the pattern routes to LDAptrBank0
// (the bank-0-hardcoded variant of LDAptr).
def SDT_W65816VAArgLoad : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
def W65816vaargLoad : SDNode<"W65816ISD::VAARG_LOAD", SDT_W65816VAArgLoad,
[SDNPHasChain, SDNPMayLoad]>;
def W65816stPtr : SDNode<"W65816ISD::ST_PTR", SDT_W65816StPtr,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
def W65816stbPtr : SDNode<"W65816ISD::STB_PTR", SDT_W65816StPtr,
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
let Defs = [SP], Uses = [SP] in {
def ADJCALLSTACKDOWN : W65816Pseudo<(outs),
(ins i16imm:$amt1, i16imm:$amt2),
"# ADJCALLSTACKDOWN $amt1 $amt2",
[(W65816callseq_start timm:$amt1,
timm:$amt2)]>;
def ADJCALLSTACKUP : W65816Pseudo<(outs),
(ins i16imm:$amt1, i16imm:$amt2),
"# ADJCALLSTACKUP $amt1 $amt2",
[(W65816callseq_end timm:$amt1,
timm:$amt2)]>;
}
// LEA-equivalent: compute the address (SP + frame_offset + offset) of a
// stack slot and place it in A. Selected from a bare ISD::FrameIndex
// SDValue in W65816DAGToDAGISel::Select; expanded by eliminateFrameIndex
// into TSC + CLC + ADC #disp. Output is Acc16 because the address ends
// up in A; PtrRegs (which only contains SP) is the wrong class.
let isReMaterializable = 1, hasSideEffects = 0,
mayLoad = 0, mayStore = 0 in
def ADDframe : W65816Pseudo<(outs Acc16:$dst),
(ins i16imm:$base, i16imm:$offset),
"# ADDframe PSEUDO", []>;
// VLA / dynamic_stackalloc: takes a 16-bit byte count in A, returns
// the address of the allocated region in A. Expanded at AsmPrinter
// time to: TSC; SEC; SBC count; TCS; INC A. Has side effects
// (changes SP). Both $dst and $size are tied to A; explicit
// Defs/Uses on SP keep regalloc honest about the side effect.
let Defs = [SP], Uses = [SP], hasSideEffects = 1,
Constraints = "$size = $dst" in
def ALLOCAfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$size),
"# ALLOCAfi $dst, $size",
[(set Acc16:$dst, (W65816alloca Acc16:$size))]>;
// The retglue node lowers directly to RTL (see Returns section below).
// No separate RET pseudo — the real MC instruction handles the pattern.
// Push A onto the stack. Expanded in AsmPrinter to MC `PHA`. Used by
// LowerCall to pass extra args; the matching `tsc;clc;adc #N;tcs` SP
// unwind happens in eliminateCallFramePseudoInstr for ADJCALLSTACKUP.
let Defs = [SP], Uses = [A, SP], mayStore = 1, hasSideEffects = 1 in {
def PUSH16 : W65816Pseudo<(outs), (ins), "# PUSH16",
[(W65816push)]>;
}
// Push X onto the stack. Used by LowerCall when an outgoing arg's
// SDValue is already in X (e.g. forwarding the i32-first-arg-in-A:X
// hi half). Saves a TXA+spill round-trip. Expansion: PHX.
let Defs = [SP], Uses = [X, SP], mayStore = 1, hasSideEffects = 1 in {
def PUSH16X : W65816Pseudo<(outs), (ins), "# PUSH16X",
[(W65816pushx)]>;
}
// SELECT_CC16: implements (set Acc16:$dst, (W65816selectcc tval, fval, cc))
// where the CMP that produced the flags has already been emitted (its
// glue is implicit via the P register). EmitInstrWithCustomInserter
// expands this into a Bxx + 2 BBs + PHI. Marked usesCustomInserter so
// the codegen invokes our hook; Uses=[P] so MachineSched keeps the CMP
// adjacent.
let usesCustomInserter = 1, Uses = [P], hasSideEffects = 1 in {
def SELECT_CC16 : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$tval, Acc16:$fval, i8imm:$cc),
"# SELECT_CC16 $dst, $tval, $fval, $cc",
[(set Acc16:$dst,
(W65816selectcc Acc16:$tval,
Acc16:$fval,
timm:$cc))]>;
// i8 mirror. Without this, `c ? a : b` patterns where the result is
// i8 (e.g. `unsigned char to_lower(char c)`) fail isel with "Cannot
// Select" — pre-existing bug. EmitInstrWithCustomInserter handles
// both the i8 and i16 forms identically; the only difference is the
// register class on the operands.
def SELECT_CC8 : W65816Pseudo<(outs Acc8:$dst),
(ins Acc8:$tval, Acc8:$fval, i8imm:$cc),
"# SELECT_CC8 $dst, $tval, $fval, $cc",
[(set Acc8:$dst,
(W65816selectcc Acc8:$tval,
Acc8:$fval,
timm:$cc))]>;
}
//===----------------------------------------------------------------------===//
// Codegen pseudos that expand to MC instructions in the AsmPrinter.
//
// These pseudos carry DAG patterns with explicit output operands so the
// generic code generator can allocate them; the MC-layer instructions they
// expand to have the opcode encoding but no virtual output (the result lives
// in the implicit A register). W65816AsmPrinter::emitInstruction maps each
// pseudo here to its real MC counterpart.
//===----------------------------------------------------------------------===//
// NOTE: LDA / LDX physically update N and Z, but we deliberately do
// NOT model that with `Defs = [P]`. Adding `Defs = [P]` lets the
// scheduler legally place an LDA between CMP and Bxx (P just gets
// re-defined; the latest def is what Bxx tests) — same flag-corruption
// bug, different mechanism. Two complementary fixes carry the load:
// the 4-block SELECT_CC inserter for SETCC patterns, and the post-RA
// PHP/PLP wrap pass (W65816StackSlotCleanup Pass -2.5) for BR_CC
// patterns (`while`/`for`/`if-goto`). Both landed.
let isAsCheapAsAMove = 1, isReMaterializable = 1,
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
def LDAi16imm : W65816Pseudo<(outs Acc16:$dst), (ins i16imm:$imm),
"# LDAi16imm $dst, $imm",
[(set Acc16:$dst, (i16 imm:$imm))]>;
let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0,
mayLoad = 0, mayStore = 0 in
def LDXi16imm : W65816Pseudo<(outs Idx16:$dst), (ins i16imm:$imm),
"# LDXi16imm $dst, $imm",
[(set Idx16:$dst, (i16 imm:$imm))]>;
def LDAi8imm : W65816Pseudo<(outs Acc8:$dst), (ins i8imm:$imm),
"# LDAi8imm $dst, $imm",
[(set Acc8:$dst, (i8 imm:$imm))]>;
}
// Materialise a 16-bit address (global / external symbol) into A. Same
// pseudo as for an immediate constant — it expands to LDA_Imm16 with the
// symbol as the operand, which the MC encoder turns into a fixup_16.
def : Pat<(i16 (W65816Wrapper tglobaladdr:$g)),
(LDAi16imm tglobaladdr:$g)>;
def : Pat<(i16 (W65816Wrapper texternalsym:$s)),
(LDAi16imm texternalsym:$s)>;
// 8-bit add/sub of an immediate.
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
def ADCi8imm : W65816Pseudo<(outs Acc8:$dst),
(ins Acc8:$src, i8imm:$imm),
"# ADCi8imm $dst, $src, $imm",
[(set Acc8:$dst, (add Acc8:$src, imm:$imm))]>;
def SBCi8imm : W65816Pseudo<(outs Acc8:$dst),
(ins Acc8:$src, i8imm:$imm),
"# SBCi8imm $dst, $src, $imm",
[(set Acc8:$dst, (sub Acc8:$src, imm:$imm))]>;
def ANDi8imm : W65816Pseudo<(outs Acc8:$dst),
(ins Acc8:$src, i8imm:$imm),
"# ANDi8imm $dst, $src, $imm",
[(set Acc8:$dst, (and Acc8:$src, imm:$imm))]>;
def ORAi8imm : W65816Pseudo<(outs Acc8:$dst),
(ins Acc8:$src, i8imm:$imm),
"# ORAi8imm $dst, $src, $imm",
[(set Acc8:$dst, (or Acc8:$src, imm:$imm))]>;
def EORi8imm : W65816Pseudo<(outs Acc8:$dst),
(ins Acc8:$src, i8imm:$imm),
"# EORi8imm $dst, $src, $imm",
[(set Acc8:$dst, (xor Acc8:$src, imm:$imm))]>;
}
// 8-bit load / store via a 16-bit absolute address.
let mayLoad = 1, hasSideEffects = 0, mayStore = 0 in {
def LDA8abs : W65816Pseudo<(outs Acc8:$dst), (ins i32imm:$addr),
"# LDA8abs $dst, $addr", []>;
// LDA8long: companion to STA8long. Bank-explicit i8 load via LDA_Long
// (0xAF). Used for `*(uint8*)0xC035` reads — LDA_Abs (0xAD) is
// DBR-relative and would land in the wrong bank under GS/OS Loader.
// Pattern that ROUTES const-int loads here lives at the ANDi16imm
// section (must appear after ANDi16imm is defined).
def LDA8long : W65816Pseudo<(outs Acc8:$dst), (ins i32imm:$addr),
"# LDA8long $dst, $addr", []>;
}
let mayStore = 1, hasSideEffects = 0, mayLoad = 0 in {
def STA8abs : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr),
"# STA8abs $src, $addr", []>;
// STA8long: 8-bit absolute-long store. Same pattern as STA8abs but
// the AsmPrinter emits STA_Long (0x8F) — a true 24-bit bank-explicit
// store — instead of STA_Abs (0x8D, DBR-relative). Used for MMIO via
// a constant integer address; the i32imm carries the full 24-bit
// physical address. See the (store Acc8, (iPTR imm)) pattern.
def STA8long : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr),
"# STA8long $src, $addr", []>;
}
def : Pat<(i8 (load (W65816Wrapper tglobaladdr:$g))),
(LDA8abs tglobaladdr:$g)>;
def : Pat<(i8 (load (W65816Wrapper texternalsym:$s))),
(LDA8abs texternalsym:$s)>;
def : Pat<(store Acc8:$src, (W65816Wrapper tglobaladdr:$g)),
(STA8abs Acc8:$src, tglobaladdr:$g)>;
def : Pat<(store Acc8:$src, (W65816Wrapper texternalsym:$s)),
(STA8abs Acc8:$src, texternalsym:$s)>;
// Byte store via a constant-int address (MMIO-style: `*(volatile uint8 *)0x70
// = v`). Without this, the i8 store falls through to STBptr ([dp],Y), which
// is 16 B / 30 cyc. We route through STA8long (sta abs-long, opcode 0x8F)
// rather than STA8abs because a const-int address is a physical 24-bit
// pointer and must NOT track DBR — under the GS/OS Loader the data bank is
// non-zero, so DBR-relative `sta abs` would land in the wrong bank.
// `timm` matches TargetConstantSDNode — under p:32:16, a pre-isel combine
// in W65816TargetLowering::PerformDAGCombine converts the ConstantSDNode
// ptr to a TargetConstantSDNode so it survives LowerI32Constant intact.
def : Pat<(store Acc8:$src, (iPTR imm:$addr)),
(STA8long Acc8:$src, (i32 imm:$addr))>;
def : Pat<(store Acc8:$src, (iPTR timm:$addr)),
(STA8long Acc8:$src, (i32 timm:$addr))>;
def : Pat<(truncstorei8 Acc16:$src, (iPTR imm:$addr)),
(STA8long (COPY_TO_REGCLASS Acc16:$src, Acc8), (i32 imm:$addr))>;
def : Pat<(truncstorei8 Acc16:$src, (iPTR timm:$addr)),
(STA8long (COPY_TO_REGCLASS Acc16:$src, Acc8), (i32 timm:$addr))>;
// Load 16 bits via a 16-bit absolute address. Currently only matches
// loads from a Wrapper(global); direct constant-pointer loads come once
// we add an addressing-mode complex pattern.
let mayLoad = 1, hasSideEffects = 0, mayStore = 0 in {
def LDAabs : W65816Pseudo<(outs Acc16:$dst), (ins i32imm:$addr),
"# LDAabs $dst, $addr", []>;
}
def : Pat<(i16 (load (W65816Wrapper tglobaladdr:$g))),
(LDAabs tglobaladdr:$g)>;
def : Pat<(i16 (load (W65816Wrapper texternalsym:$s))),
(LDAabs texternalsym:$s)>;
// i16 const-int-address load: companion to the STAabs (iPTR imm) /
// (iPTR timm) store patterns at line ~350. `*(volatile uint16*)0x5000`
// → LDAabs (DBR-relative). The combine in W65816TargetLowering returns
// a TargetConstant for the Wide32-zero-hi-Constant unwrap.
def : Pat<(i16 (load (iPTR imm:$addr))),
(LDAabs (i32 imm:$addr))>;
def : Pat<(i16 (load (iPTR timm:$addr))),
(LDAabs (i32 timm:$addr))>;
// Store 16 bits to a 16-bit absolute address.
let mayStore = 1, hasSideEffects = 0, mayLoad = 0 in {
def STAabs : W65816Pseudo<(outs), (ins Acc16:$src, i32imm:$addr),
"# STAabs $src, $addr", []>;
}
def : Pat<(store Acc16:$src, (W65816Wrapper tglobaladdr:$g)),
(STAabs Acc16:$src, tglobaladdr:$g)>;
def : Pat<(store Acc16:$src, (W65816Wrapper texternalsym:$s)),
(STAabs Acc16:$src, texternalsym:$s)>;
// Store via a constant-int address (`*(volatile uint16 *)0x5000 = v`).
// Lowers to STAabs (0x8D, DBR-relative) — DELIBERATELY asymmetric with the
// i8 case (STA8long, bank-explicit). Rationale: most 65816 MMIO is i8
// (e.g. `*(uint8*)0xC035`) where users expect bank=0 always. Const-int
// i16 is mostly used as a DBR-relative idiom in test code that switches
// DBR and verifies a write lands in the new bank. Switching i16 to
// bank-explicit broke 10+ existing tests with no real-world i16 MMIO
// use case to justify it. Users who need bank-explicit i16 should
// declare a global or split into two i8 stores.
def : Pat<(store Acc16:$src, (iPTR imm:$addr)),
(STAabs Acc16:$src, (i32 imm:$addr))>;
// Under ptr32 the i16/i32 const-addr stores emerge with TargetConstant
// pointers (the PerformDAGCombine on STORE rewrites the ConstantSDNode
// into a TargetConstant to bypass LowerI32Constant's REG_SEQUENCE
// expansion). Match `timm` so STAabs fires.
def : Pat<(store Acc16:$src, (iPTR timm:$addr)),
(STAabs Acc16:$src, (i32 timm:$addr))>;
// 16-bit ADD: expands to CLC + ADC_Imm16. The 65816 ADC sums with the
// carry flag, so a clean add needs CLC first. Constraints tie the
// source and dest to A — there is only one Acc16 register so this is
// implicit, but stating it lets the register allocator coalesce
// without needing a COPY.
//
// Defs = [P] models the C-flag side-effect. Required so tablegen can
// connect this instruction to the SDNode `addc` / `subc` (SDNPOutGlue),
// which is what the type legalizer emits as the lo half of a multi-
// precision add/sub when ADDC/SUBC is Legal (see W65816ISelLowering ctor).
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in {
def ADCi16imm : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i16imm:$imm),
"# ADCi16imm $dst, $src, $imm",
[(set Acc16:$dst,
(add Acc16:$src, imm:$imm))]>;
def SBCi16imm : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i16imm:$imm),
"# SBCi16imm $dst, $src, $imm",
[(set Acc16:$dst,
(sub Acc16:$src, imm:$imm))]>;
}
// addc/subc: same as add/sub on this target (CLC then ADC, SEC then SBC),
// but the SDNode produces a Glue carrying the post-op carry into a
// subsequent adde/sube. Tablegen wires the Glue to the P register
// because the instruction has Defs = [P].
def : Pat<(addc Acc16:$src, imm:$imm),
(ADCi16imm Acc16:$src, imm:$imm)>;
def : Pat<(subc Acc16:$src, imm:$imm),
(SBCi16imm Acc16:$src, imm:$imm)>;
// ADC/SBC from a 16-bit absolute address. Folds a load on the
// right-hand side of an add/sub into the carry-arithmetic op.
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 1, mayStore = 0, Defs = [P] in {
def ADCabs : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i32imm:$addr),
"# ADCabs $dst, $src, $addr", []>;
def SBCabs : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i32imm:$addr),
"# SBCabs $dst, $src, $addr", []>;
}
def : Pat<(add Acc16:$src,
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
(ADCabs Acc16:$src, tglobaladdr:$g)>;
def : Pat<(add Acc16:$src,
(i16 (load (W65816Wrapper texternalsym:$s)))),
(ADCabs Acc16:$src, texternalsym:$s)>;
def : Pat<(sub Acc16:$src,
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
(SBCabs Acc16:$src, tglobaladdr:$g)>;
def : Pat<(sub Acc16:$src,
(i16 (load (W65816Wrapper texternalsym:$s)))),
(SBCabs Acc16:$src, texternalsym:$s)>;
def : Pat<(addc Acc16:$src,
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
(ADCabs Acc16:$src, tglobaladdr:$g)>;
def : Pat<(addc Acc16:$src,
(i16 (load (W65816Wrapper texternalsym:$s)))),
(ADCabs Acc16:$src, texternalsym:$s)>;
def : Pat<(subc Acc16:$src,
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
(SBCabs Acc16:$src, tglobaladdr:$g)>;
def : Pat<(subc Acc16:$src,
(i16 (load (W65816Wrapper texternalsym:$s)))),
(SBCabs Acc16:$src, texternalsym:$s)>;
// adde/sube: the chained ADC/SBC for the hi half of a multi-precision
// add/sub. Reads the C flag from the previous addc/adde (Uses = [P]),
// produces a fresh carry/borrow (Defs = [P]). AsmPrinter expansion
// emits a bare ADC/SBC with no preceding CLC/SEC; eliminateFrameIndex
// for ADCEfi/SBCEfi skips the carry-prefix step that the standalone
// ADCfi/SBCfi rely on.
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0,
Uses = [P], Defs = [P] in {
def ADCEi16imm : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i16imm:$imm),
"# ADCEi16imm $dst, $src, $imm",
[(set Acc16:$dst,
(adde Acc16:$src, imm:$imm))]>;
def SBCEi16imm : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i16imm:$imm),
"# SBCEi16imm $dst, $src, $imm",
[(set Acc16:$dst,
(sube Acc16:$src, imm:$imm))]>;
}
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 1, mayStore = 0,
Uses = [P], Defs = [P] in {
def ADCEabs : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i32imm:$addr),
"# ADCEabs $dst, $src, $addr", []>;
def SBCEabs : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i32imm:$addr),
"# SBCEabs $dst, $src, $addr", []>;
}
def : Pat<(adde Acc16:$src,
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
(ADCEabs Acc16:$src, tglobaladdr:$g)>;
def : Pat<(adde Acc16:$src,
(i16 (load (W65816Wrapper texternalsym:$s)))),
(ADCEabs Acc16:$src, texternalsym:$s)>;
def : Pat<(sube Acc16:$src,
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
(SBCEabs Acc16:$src, tglobaladdr:$g)>;
def : Pat<(sube Acc16:$src,
(i16 (load (W65816Wrapper texternalsym:$s)))),
(SBCEabs Acc16:$src, texternalsym:$s)>;
// (add Acc16, Acc16) — same value added to itself, equivalent to a 1-bit
// left shift. Pattern needs a tied input so the result lands in A.
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
def ASLA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# ASLA16 $dst, $src",
[(set Acc16:$dst, (add Acc16:$src, Acc16:$src))]>;
}
// 1-bit shift left of the accumulator: shl x, 1.
def : Pat<(shl Acc16:$src, (i16 1)), (ASLA16 Acc16:$src)>;
// 1-bit logical shift right. Pseudo because the MC LSR_A has no
// virtual output operand.
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
def LSRA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# LSRA16 $dst, $src",
[(set Acc16:$dst, (srl Acc16:$src, (i16 1)))]>;
def ASLA8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src),
"# ASLA8 $dst, $src",
[(set Acc8:$dst, (shl Acc8:$src, (i8 1)))]>;
def LSRA8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src),
"# LSRA8 $dst, $src",
[(set Acc8:$dst, (srl Acc8:$src, (i8 1)))]>;
// Signed shift right by 1: copy A's high bit into carry, then ROR
// to bring it back into A's high bit while halving the rest. The
// AsmPrinter expands this to the 4-instruction PHA;ASL;PLA;ROR
// sequence.
def ASRA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# ASRA16 $dst, $src",
[(set Acc16:$dst, (sra Acc16:$src, (i16 1)))]> {
let Constraints = "$src = $dst";
}
}
// Shifts by small constants — unroll into 2-4 single-bit shifts.
// Anything beyond 4 bits would benefit from a loop or a XBA-and-mask
// trick; left for a future peephole.
def : Pat<(shl Acc16:$src, (i16 2)), (ASLA16 (ASLA16 Acc16:$src))>;
def : Pat<(shl Acc16:$src, (i16 3)),
(ASLA16 (ASLA16 (ASLA16 Acc16:$src)))>;
def : Pat<(shl Acc16:$src, (i16 4)),
(ASLA16 (ASLA16 (ASLA16 (ASLA16 Acc16:$src))))>;
def : Pat<(srl Acc16:$src, (i16 2)), (LSRA16 (LSRA16 Acc16:$src))>;
def : Pat<(srl Acc16:$src, (i16 3)),
(LSRA16 (LSRA16 (LSRA16 Acc16:$src)))>;
def : Pat<(srl Acc16:$src, (i16 4)),
(LSRA16 (LSRA16 (LSRA16 (LSRA16 Acc16:$src))))>;
// Shift counts 5..7 — chained single-bit shifts. Earlier these were
// withheld because the DAG combiner narrowed `(trunc (shl (zext X), N))`
// back to `(shl X, N)` on i8 and re-entered LowerShift in a loop; the
// `isTypeDesirableForOp(SHL/SRL/SRA, i8) -> false` override in
// W65816TargetLowering now blocks that combine, so the patterns are
// safe. Cheaper than __ashlhi3/__lshrhi3 for these counts.
def : Pat<(shl Acc16:$src, (i16 5)),
(ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 Acc16:$src)))))>;
def : Pat<(shl Acc16:$src, (i16 6)),
(ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 Acc16:$src))))))>;
def : Pat<(shl Acc16:$src, (i16 7)),
(ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 Acc16:$src)))))))>;
def : Pat<(srl Acc16:$src, (i16 5)),
(LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 Acc16:$src)))))>;
def : Pat<(srl Acc16:$src, (i16 6)),
(LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 Acc16:$src))))))>;
def : Pat<(srl Acc16:$src, (i16 7)),
(LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 Acc16:$src)))))))>;
// Increment / decrement of A by 1. Match `(add x, 1)` and `(add x, -1)`
// (LLVM canonicalises sub-by-1 to add-by-(-1)).
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
def INA_PSEUDO : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# INA_PSEUDO $dst, $src",
[(set Acc16:$dst, (add Acc16:$src, (i16 1)))]>;
def DEA_PSEUDO : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# DEA_PSEUDO $dst, $src",
[(set Acc16:$dst, (add Acc16:$src, (i16 -1)))]>;
def INA_PSEUDO8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src),
"# INA_PSEUDO8 $dst, $src",
[(set Acc8:$dst, (add Acc8:$src, (i8 1)))]>;
def DEA_PSEUDO8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src),
"# DEA_PSEUDO8 $dst, $src",
[(set Acc8:$dst, (add Acc8:$src, (i8 -1)))]>;
}
// Two's-complement negation: `0 - x` → `EOR #$FFFF; INC A` (i.e.
// bitwise-not then add 1). Catches (sub 0, x) which LLVM uses for
// `-x` and the `abs` intrinsic.
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
def NEGA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# NEGA16 $dst, $src",
[(set Acc16:$dst, (sub (i16 0), Acc16:$src))]>;
// i8 mirror. Without this the codegen falls into the generic SBC
// path: `LDA #0; SEC; SBC slot` plus 8-bit M-mode prologue and
// PHA/PLA bracketing — ~12 insns for `-x`. NEGA8 expands to
// `EOR #$FF; INA` (2 insns in 8-bit M).
def NEGA8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src),
"# NEGA8 $dst, $src",
[(set Acc8:$dst, (sub (i8 0), Acc8:$src))]>;
}
// Multi-precision negation: lo + hi halves of `-x` where x is i32.
// LLVM splits `0 - x` into `(subc 0, x_lo)` and `(sube 0, x_hi)`.
// We implement both via the ADD chain `~x + carry` since INC doesn't
// touch C; the bit pattern of C from `~x + 1` matches what `subc 0, x`
// would set (C=1 iff x was 0, i.e. no borrow).
// NEGC16 matches subc → "EOR #$FFFF; CLC; ADC #1" (5 bytes)
// NEGE16 matches sube → "EOR #$FFFF; ADC #0" (4 bytes, uses C-in)
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in {
def NEGC16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# NEGC16 $dst, $src",
[(set Acc16:$dst, (subc (i16 0), Acc16:$src))]>;
}
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0,
Uses = [P], Defs = [P] in {
def NEGE16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# NEGE16 $dst, $src",
[(set Acc16:$dst, (sube (i16 0), Acc16:$src))]>;
}
// Bitwise NOT pattern moved below EORi16imm definition.
// 16-bit bitwise ops: AND / OR / XOR against an immediate or memory
// operand. Same shape as ADCi16imm / ADCabs minus the carry prefix
// (these don't read/write the carry flag).
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
def ANDi16imm : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i16imm:$imm),
"# ANDi16imm $dst, $src, $imm",
[(set Acc16:$dst,
(and Acc16:$src, imm:$imm))]>;
def ORAi16imm : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i16imm:$imm),
"# ORAi16imm $dst, $src, $imm",
[(set Acc16:$dst,
(or Acc16:$src, imm:$imm))]>;
def EORi16imm : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i16imm:$imm),
"# EORi16imm $dst, $src, $imm",
[(set Acc16:$dst,
(xor Acc16:$src, imm:$imm))]>;
}
// Bank-explicit i8 loads from a constant-int address (`*(uint8*)0xC035`).
// The default lowering goes through LDAptr ([dp],Y indirect-long) — 22 B /
// 35 cyc — because LDAptr's pattern `(load Wide16:$ptr)` matches once the
// matcher materialises the const into Wide16. These patterns shortcut to
// LDA8long (sta long, 0xAF, 6 B / 10 cyc) and run BEFORE that materialisation
// because the explicit imm leaf has higher AddedComplexity. Only the
// `(zextloadi8 imm)` form actually appears in real IR (i8 loads are
// always i16-extended at SDAG time on this 16-bit target); kept the
// raw `(load imm)` form too for symmetry with the store side.
let AddedComplexity = 50 in {
def : Pat<(i8 (load (iPTR imm:$addr))),
(LDA8long (i32 imm:$addr))>;
def : Pat<(i8 (load (iPTR timm:$addr))),
(LDA8long (i32 timm:$addr))>;
def : Pat<(i16 (zextloadi8 (iPTR imm:$addr))),
(ANDi16imm (COPY_TO_REGCLASS (LDA8long (i32 imm:$addr)), Acc16),
0xFF)>;
def : Pat<(i16 (zextloadi8 (iPTR timm:$addr))),
(ANDi16imm (COPY_TO_REGCLASS (LDA8long (i32 timm:$addr)), Acc16),
0xFF)>;
def : Pat<(i16 (extloadi8 (iPTR imm:$addr))),
(COPY_TO_REGCLASS (LDA8long (i32 imm:$addr)), Acc16)>;
def : Pat<(i16 (extloadi8 (iPTR timm:$addr))),
(COPY_TO_REGCLASS (LDA8long (i32 timm:$addr)), Acc16)>;
}
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
def ANDabs : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i32imm:$addr),
"# ANDabs $dst, $src, $addr", []>;
def ORAabs : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i32imm:$addr),
"# ORAabs $dst, $src, $addr", []>;
def EORabs : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src, i32imm:$addr),
"# EORabs $dst, $src, $addr", []>;
}
def : Pat<(and Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))),
(ANDabs Acc16:$src, tglobaladdr:$g)>;
def : Pat<(or Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))),
(ORAabs Acc16:$src, tglobaladdr:$g)>;
def : Pat<(xor Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))),
(EORabs Acc16:$src, tglobaladdr:$g)>;
// Bitwise NOT: x ^ 0xFFFF. LLVM lowers `~x` and i1 inversion through
// this; emit a single EOR #$FFFF via the bitwise pseudo above.
def : Pat<(xor Acc16:$src, (i16 -1)),
(EORi16imm Acc16:$src, 0xFFFF)>;
// (srl x, 15): extract bit 15 to bit 0 (yields 0 or 1). The
// type-legalizer's SHL_PARTS expansion of `i32 << 1` needs this for
// the high-half "carry from low" slot, and routing it through the
// __lshrhi3 libcall costs ~10 bytes per i32 shift-by-1. Inline as
// `ASL A; LDA #0; ROL A` (3 bytes): ASL puts bit 15 into C and
// trashes A; LDA #0 doesn't touch C; ROL A folds C into bit 0.
//
// (shl x, 15): move bit 0 to bit 15 (yields 0 or 0x8000). Used by
// SRL_PARTS / SRA_PARTS expansion of `i32 >> 1` for the low-half
// "carry from hi" slot. Mirror sequence: `LSR A; LDA #0; ROR A`.
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in {
def SRL15A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# SRL15A $dst, $src",
[(set Acc16:$dst, (srl Acc16:$src, (i16 15)))]>;
def SHL15A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# SHL15A $dst, $src",
[(set Acc16:$dst, (shl Acc16:$src, (i16 15)))]>;
}
// (srl x, 8): high byte to low byte, zero high byte. XBA swaps the
// two bytes of A (in 16-bit M); AND #$00FF clears the new high byte.
// 4 bytes total — much shorter than the __lshrhi3 libcall path. Used
// by i32 shift-by-8 SHL_PARTS expansion for the cross-half slot.
//
// (shl x, 8): low byte to high byte, zero low byte. Mirror.
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
def SRL8A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# SRL8A $dst, $src",
[(set Acc16:$dst, (srl Acc16:$src, (i16 8)))]>;
def SHL8A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# SHL8A $dst, $src",
[(set Acc16:$dst, (shl Acc16:$src, (i16 8)))]>;
}
// Shift counts 9..14: SHL builds on SHL8A (XBA + low-byte mask) and chains
// 1..6 ASLs after it; SRL mirrors via SRL8A + LSRA chains. The
// isTypeDesirableForOp override prevents the i8-shift combine loop that
// kept these out of tablegen earlier.
def : Pat<(shl Acc16:$src, (i16 9)),
(ASLA16 (SHL8A Acc16:$src))>;
def : Pat<(shl Acc16:$src, (i16 10)),
(ASLA16 (ASLA16 (SHL8A Acc16:$src)))>;
def : Pat<(shl Acc16:$src, (i16 11)),
(ASLA16 (ASLA16 (ASLA16 (SHL8A Acc16:$src))))>;
def : Pat<(shl Acc16:$src, (i16 12)),
(ASLA16 (ASLA16 (ASLA16 (ASLA16 (SHL8A Acc16:$src)))))>;
def : Pat<(shl Acc16:$src, (i16 13)),
(ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (SHL8A Acc16:$src))))))>;
def : Pat<(shl Acc16:$src, (i16 14)),
(ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (SHL8A Acc16:$src)))))))>;
def : Pat<(srl Acc16:$src, (i16 9)),
(LSRA16 (SRL8A Acc16:$src))>;
def : Pat<(srl Acc16:$src, (i16 10)),
(LSRA16 (LSRA16 (SRL8A Acc16:$src)))>;
def : Pat<(srl Acc16:$src, (i16 11)),
(LSRA16 (LSRA16 (LSRA16 (SRL8A Acc16:$src))))>;
def : Pat<(srl Acc16:$src, (i16 12)),
(LSRA16 (LSRA16 (LSRA16 (LSRA16 (SRL8A Acc16:$src)))))>;
def : Pat<(srl Acc16:$src, (i16 13)),
(LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (SRL8A Acc16:$src))))))>;
def : Pat<(srl Acc16:$src, (i16 14)),
(LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (SRL8A Acc16:$src)))))))>;
// (sra x, 15): sign-fill — yields $0000 if x is non-negative, $FFFF
// if negative. Used by i32 sext-from-i16 type-legalization for the
// hi half (avoids the __ashrhi3 libcall path). Sequence:
// `ASL A; LDA #0; SBC #0; EOR #-1` (when our SBCi16imm uses SEC + SBC,
// LDA #0; SBC #0 produces $FFFF if C=0, $0000 if C=1; EOR #-1 flips).
// Actually simpler since SBC sets carry differently: see AsmPrinter
// expansion for the exact 5-byte sequence.
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in {
def SRA15A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# SRA15A $dst, $src",
[(set Acc16:$dst, (sra Acc16:$src, (i16 15)))]>;
}
// sext_inreg from i1: broadcast bit 0 to all bits. LLVM emits this
// for `(c & 1) ? -1 : 0` patterns (e.g. CRC inner loops). The result
// is `-(x & 1)` — 0 if bit 0 was clear, 0xFFFF if set. Mask to bit
// 0 then two's-complement-negate. Three pseudos = ~7 bytes.
def : Pat<(sext_inreg Acc16:$src, i1),
(NEGA16 (ANDi16imm Acc16:$src, 1))>;
// sext_inreg from i8: branchless `((x & 0xFF) ^ 0x80) - 0x80` trick
// (same sequence LowerSignExtend uses for ISD::SIGN_EXTEND i8->i16).
// LLVM emits this when expanding a sextload-i16-from-i8 (we set
// SEXTLOAD i8 to Expand in the lowering ctor) and for explicit
// `(int)(signed char)` casts.
def : Pat<(sext_inreg Acc16:$src, i8),
(SBCi16imm (EORi16imm
(ANDi16imm Acc16:$src, 0x00FF), 0x0080),
0x0080)>;
// Frame-index loads/stores: take a FrameIndex + offset (packed into a
// single MIOperandInfo) and expand (in eliminateFrameIndex) into an
// LDA / STA d,S with the offset baked in. Used by LowerFormalArguments
// to read stack-passed arguments and by spill/reload via
// storeRegToStackSlot.
def memfi : Operand<i16> {
let MIOperandInfo = (ops i32imm, i32imm);
let PrintMethod = "printFrameMem";
}
// LDAfi is rematerializable when the FI is a fixed (immutable) arg
// slot — see W65816InstrInfo::isReMaterializableImpl. Without this,
// greedy regalloc spills every arg load to a fresh local slot then
// reloads from there, ballooning every i32-arg function by 4-6 insns.
let mayLoad = 1, hasSideEffects = 0, mayStore = 0,
isReMaterializable = 1 in {
def LDAfi : W65816Pseudo<(outs Acc16:$dst), (ins memfi:$addr),
"# LDAfi $dst, $addr", []>;
}
// STAfi accepts Wide16 src so greedy can park the value in IMGn instead
// of A. When src is in IMGn, eliminateFrameIndex prepends a LDA dp;
// hence Defs = [A] (the IMG case clobbers A).
let mayStore = 1, hasSideEffects = 0, mayLoad = 0, Defs = [A] in {
def STAfi : W65816Pseudo<(outs),
(ins Wide16:$src, memfi:$addr),
"# STAfi $src, $addr", []>;
}
// i8 truncating store to a FrameIndex slot. eliminateFrameIndex wraps
// it in SEP #$20 / STA d,S / REP #$20 so only one byte is written.
// Without the wrap, a 16-bit STA writes the byte at slot+1 too, which
// corrupts the next stack slot (or return address for the last slot of
// an alloca). Defs P because SEP/REP modify the M bit.
let mayStore = 1, hasSideEffects = 1, mayLoad = 0, Defs = [P] in {
def STA8fi : W65816Pseudo<(outs),
(ins Acc16:$src, memfi:$addr),
"# STA8fi $src, $addr", []>;
}
// ComplexPattern bridging FrameIndex SDValues to memfi. See
// SelectFrameIndex in W65816ISelDAGToDAG.cpp.
def addr_fi : ComplexPattern<i16, 2, "SelectFrameIndex", [frameindex]>;
def : Pat<(i16 (load addr_fi:$addr)),
(LDAfi addr_fi:$addr)>;
def : Pat<(store Acc16:$src, addr_fi:$addr),
(STAfi Acc16:$src, addr_fi:$addr)>;
// i8 access to a FrameIndex slot. Loads read 2 bytes via 16-bit LDA
// — the high byte is harmless (extending loads mask or sign-extend it,
// narrowing loads narrow back to Acc8 / discard). Stores must write
// only one byte: i8 alloca arrays pack adjacent slots one byte apart,
// and a 16-bit STA at the last slot of the array would corrupt the
// return address. Truncating stores route through STA8fi which wraps
// the STA in SEP #$20 / REP #$20.
def : Pat<(i8 (load addr_fi:$addr)),
(COPY_TO_REGCLASS (LDAfi addr_fi:$addr), Acc8)>;
def : Pat<(i16 (zextloadi8 addr_fi:$addr)),
(ANDi16imm (LDAfi addr_fi:$addr), 0xFF)>;
def : Pat<(i16 (extloadi8 addr_fi:$addr)),
(LDAfi addr_fi:$addr)>;
def : Pat<(store Acc8:$src, addr_fi:$addr),
(STA8fi (COPY_TO_REGCLASS Acc8:$src, Acc16), addr_fi:$addr)>;
def : Pat<(truncstorei8 Acc16:$src, addr_fi:$addr),
(STA8fi Acc16:$src, addr_fi:$addr)>;
// Frame-index folding into ADC / SBC / AND / ORA / EOR / CMP. Same
// shape as the *abs variants but the second operand is a stack slot.
// ADCfi/SBCfi mark P as Def so they can match `addc`/`subc` (the lo
// half of a multi-precision split — see ADCi16imm comment above).
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
let Defs = [P] in {
def ADCfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
"# ADCfi $dst, $src, $addr", []>;
def SBCfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
"# SBCfi $dst, $src, $addr", []>;
}
def ANDfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
"# ANDfi $dst, $src, $addr", []>;
def ORAfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
"# ORAfi $dst, $src, $addr", []>;
def EORfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
"# EORfi $dst, $src, $addr", []>;
}
// ADCEfi / SBCEfi: chained ADC/SBC, hi half of a multi-precision split.
// Read carry from previous addc/adde/subc/sube via Uses = [P].
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 1, mayStore = 0,
Uses = [P], Defs = [P] in {
def ADCEfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
"# ADCEfi $dst, $src, $addr", []>;
def SBCEfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
"# SBCEfi $dst, $src, $addr", []>;
}
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Defs = [P] in {
def CMPfi : W65816Pseudo<(outs), (ins Acc16:$lhs, memfi:$addr),
"# CMPfi $lhs, $addr", []>;
}
def : Pat<(add Acc16:$src, (i16 (load addr_fi:$addr))),
(ADCfi Acc16:$src, addr_fi:$addr)>;
def : Pat<(sub Acc16:$src, (i16 (load addr_fi:$addr))),
(SBCfi Acc16:$src, addr_fi:$addr)>;
def : Pat<(addc Acc16:$src, (i16 (load addr_fi:$addr))),
(ADCfi Acc16:$src, addr_fi:$addr)>;
def : Pat<(subc Acc16:$src, (i16 (load addr_fi:$addr))),
(SBCfi Acc16:$src, addr_fi:$addr)>;
def : Pat<(adde Acc16:$src, (i16 (load addr_fi:$addr))),
(ADCEfi Acc16:$src, addr_fi:$addr)>;
def : Pat<(sube Acc16:$src, (i16 (load addr_fi:$addr))),
(SBCEfi Acc16:$src, addr_fi:$addr)>;
def : Pat<(and Acc16:$src, (i16 (load addr_fi:$addr))),
(ANDfi Acc16:$src, addr_fi:$addr)>;
def : Pat<(or Acc16:$src, (i16 (load addr_fi:$addr))),
(ORAfi Acc16:$src, addr_fi:$addr)>;
def : Pat<(xor Acc16:$src, (i16 (load addr_fi:$addr))),
(EORfi Acc16:$src, addr_fi:$addr)>;
def : Pat<(W65816cmp Acc16:$lhs, (i16 (load addr_fi:$addr))),
(CMPfi Acc16:$lhs, addr_fi:$addr)>;
// Zero-extending byte load: 16-bit LDA reads two bytes (the byte we want
// plus the next byte), then mask the high byte with AND #$00FF. Reads
// one byte past the source — fine for standalone bytes in the bank-0
// data area but caller must ensure addr+1 is safe to read. A future
// optimisation could use SEP/REP transitions to do a true 8-bit load.
def : Pat<(i16 (zextloadi8 (W65816Wrapper tglobaladdr:$g))),
(ANDi16imm (LDAabs tglobaladdr:$g), 0xFF)>;
def : Pat<(i16 (zextloadi8 (W65816Wrapper texternalsym:$s))),
(ANDi16imm (LDAabs texternalsym:$s), 0xFF)>;
// CMP / branches. CMP sets the flags via the W65816cmp SDNode (glue
// out); the W65816brcc node consumes the glue and dispatches to the
// right Bxx instruction by condition code.
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in {
def CMPi16imm : W65816Pseudo<(outs), (ins Acc16:$lhs, i16imm:$rhs),
"# CMPi16imm $lhs, $rhs",
[(W65816cmp Acc16:$lhs, (i16 imm:$rhs))]>;
def CMPi8imm : W65816Pseudo<(outs), (ins Acc8:$lhs, i8imm:$rhs),
"# CMPi8imm $lhs, $rhs",
[(W65816cmp Acc8:$lhs, (i8 imm:$rhs))]>;
}
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Defs = [P] in {
def CMPabs : W65816Pseudo<(outs), (ins Acc16:$lhs, i32imm:$addr),
"# CMPabs $lhs, $addr", []>;
}
def : Pat<(W65816cmp Acc16:$lhs,
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
(CMPabs Acc16:$lhs, tglobaladdr:$g)>;
def : Pat<(W65816cmp Acc16:$lhs,
(i16 (load (W65816Wrapper texternalsym:$s)))),
(CMPabs Acc16:$lhs, texternalsym:$s)>;
// 16-bit byte swap: XBA exchanges A.high and A.low. Pattern matches
// the (bswap Acc16) SDNode emitted by clang for byte-reverse loops.
let Constraints = "$src = $dst",
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
def XBA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
"# XBA16 $dst, $src",
[(set Acc16:$dst, (bswap Acc16:$src))]>;
}
// Two-Acc16 binary ops. We have only one A register, so when both
// operands are computed values (neither a foldable load/imm/global) we
// must spill one to a stack slot. Each pseudo's custom inserter
// allocates a fresh slot and emits a STAfi+OPfi sequence; the
// register allocator handles the surrounding spills/reloads.
// hasSideEffects=1 tells the validator the pseudo may load/store
// without requiring a matching SDNode pattern (the stores are added
// by the inserter, not visible in the DAG pattern).
//
// Defs = [P] on ADD_RR/SUB_RR matches the C-flag side-effect of the
// underlying ADC/SBC, letting these pseudos serve `addc`/`subc` (the
// lo half of an i32 split) as well as plain `add`/`sub`.
let usesCustomInserter = 1, hasSideEffects = 1 in {
let Defs = [P] in {
def ADD_RR : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src1, Acc16:$src2),
"# ADD_RR $dst, $src1, $src2",
[(set Acc16:$dst,
(add Acc16:$src1, Acc16:$src2))]>;
def SUB_RR : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src1, Acc16:$src2),
"# SUB_RR $dst, $src1, $src2",
[(set Acc16:$dst,
(sub Acc16:$src1, Acc16:$src2))]>;
}
def AND_RR : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src1, Acc16:$src2),
"# AND_RR $dst, $src1, $src2",
[(set Acc16:$dst,
(and Acc16:$src1, Acc16:$src2))]>;
def ORA_RR : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src1, Acc16:$src2),
"# ORA_RR $dst, $src1, $src2",
[(set Acc16:$dst,
(or Acc16:$src1, Acc16:$src2))]>;
def EOR_RR : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src1, Acc16:$src2),
"# EOR_RR $dst, $src1, $src2",
[(set Acc16:$dst,
(xor Acc16:$src1, Acc16:$src2))]>;
}
def : Pat<(addc Acc16:$src1, Acc16:$src2),
(ADD_RR Acc16:$src1, Acc16:$src2)>;
def : Pat<(subc Acc16:$src1, Acc16:$src2),
(SUB_RR Acc16:$src1, Acc16:$src2)>;
// Chained-carry two-Acc16 add/sub for the hi half of i32 splits.
// Inserter mirrors ADD_RR (STAfi spill + ADCEfi load-fold) but emits
// the carry-chain pseudo so the previous addc/adde's C flag is
// consumed instead of overwritten by a CLC. Uses+Defs = [P]
// reflects the carry chain through the SDNode.
let usesCustomInserter = 1, hasSideEffects = 1,
Uses = [P], Defs = [P] in {
def ADDE_RR : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src1, Acc16:$src2),
"# ADDE_RR $dst, $src1, $src2",
[(set Acc16:$dst,
(adde Acc16:$src1, Acc16:$src2))]>;
def SUBE_RR : W65816Pseudo<(outs Acc16:$dst),
(ins Acc16:$src1, Acc16:$src2),
"# SUBE_RR $dst, $src1, $src2",
[(set Acc16:$dst,
(sube Acc16:$src1, Acc16:$src2))]>;
}
let usesCustomInserter = 1, hasSideEffects = 1, Defs = [P] in {
def CMP_RR : W65816Pseudo<(outs), (ins Acc16:$lhs, Acc16:$rhs),
"# CMP_RR $lhs, $rhs",
[(W65816cmp Acc16:$lhs, Acc16:$rhs)]>;
}
// Pointer dereference. The 65816 can't deref a register pointer
// directly — the indirect addressing modes all read the pointer from
// memory (DP or stack). These pseudos spill the Acc16 pointer to a
// fresh stack slot, set Y=0, and emit LDA/STA (slot,S),Y. Y gets
// clobbered as a side effect. hasSideEffects=1 covers the spill
// store the inserter adds, in addition to the deref.
// LDAptr / STAptr / STBptr lower to [dp],Y indirect-long via DP
// scratch $E0..$E2 (see W65816ISelLowering.cpp inserter). The
// inserter uses A and Y plus the DP scratch — X is not touched.
// Defs: Y (LDY #0) and P (STA/LDA set N/Z).
// $ptr is Wide16 (A or IMGn) so when bb.3-style pressure forces the
// pointer to share A with another live vreg, RA can park ptr in an
// IMGn DP slot. Acc16:$ptr was being silently coalesced with the
// loop-PHI accumulator: both wanted A at end of bb, and PHI-elim
// dropped the COPY needed to refresh A with the pointer at top of
// the loop. With Wide16, the COPY $a = ptr lowers to a real LDA $dp.
let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1,
Defs = [Y, P] in {
def LDAptr : W65816Pseudo<(outs Acc16:$dst), (ins Wide16:$ptr),
"# LDAptr $dst, $ptr",
[(set Acc16:$dst, (load Wide16:$ptr))]>;
// Variant that hardcodes bank=0 for the [dp],Y deref. Used by
// LowerVAARG: va_arg derefs a stack pointer, and the 65816 stack is
// always in bank 0 — but under GS/OS Loader our default $E2 source
// ($BE = our bank when LoaderBankDeref is on) would point reads at
// the wrong bank. This variant always emits `STZ $E2` so the deref
// is unambiguously bank-0. Caught by snprintf("%d", N) under Loader
// returning constant garbage instead of N's decimal — see
// feedback_loader_substantial_test.md.
def LDAptrBank0 : W65816Pseudo<(outs Acc16:$dst), (ins Wide16:$ptr),
"# LDAptrBank0 $dst, $ptr",
[(set Acc16:$dst, (W65816vaargLoad Wide16:$ptr))]>;
}
let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1,
Defs = [Y, P] in {
def STAptr : W65816Pseudo<(outs), (ins Acc16:$val, Wide16:$ptr),
"# STAptr $val, $ptr",
[(store Acc16:$val, Wide16:$ptr)]>;
}
// i8 zero-extending pointer load: do a 16-bit LDA (slot,s),y and mask
// the high byte. Reads one byte past the source — fine for byte-array
// iteration where the buffer is at least 2 bytes long. A future
// SEP/REP-aware mode pass could switch to a true 8-bit LDA.
def : Pat<(i16 (zextloadi8 Wide16:$ptr)),
(ANDi16imm (LDAptr Wide16:$ptr), 0xFF)>;
// Anyext byte load via pointer: consumer doesn't care about the high
// byte, so just LDA (16-bit). Same 1-byte-past-buffer caveat as
// zextloadi8.
def : Pat<(i16 (extloadi8 Wide16:$ptr)),
(LDAptr Wide16:$ptr)>;
// And the equivalent for absolute addresses (byte loads via global ptr).
// (Already covered for Wrapper(global) above; this catches the case
// where the ptr is materialised as a value.)
// Intermediate pseudos used by the LDAptr/STAptr inserters. Each takes
// a memfi describing the slot containing the pointer; eliminateFrameIndex
// resolves it to LDA_StackRelIndY / STA_StackRelIndY with the right d-byte.
// Y must hold 0 at the issue point (the inserter emits LDY #0 first).
let mayLoad = 1, hasSideEffects = 0, mayStore = 0, Uses = [Y] in {
def LDAfi_indY : W65816Pseudo<(outs Acc16:$dst), (ins memfi:$addr),
"# LDAfi_indY $dst, $addr", []>;
}
let mayStore = 1, hasSideEffects = 0, mayLoad = 0, Uses = [Y] in {
def STAfi_indY : W65816Pseudo<(outs), (ins Acc16:$src, memfi:$addr),
"# STAfi_indY $src, $addr", []>;
}
// i8 truncating store via Acc16 pointer. Same shape as STAptr but
// custom inserter wraps the actual STA in SEP/REP so the M-bit is 8
// across the store and only one byte is written. Without the wrap the
// 16-bit STA would clobber the byte at ptr+1. Two patterns: the
// natural truncstorei8 from an i16 value (common with arg promotion),
// and a true i8 store (Acc8) that arises from i8-typed IR.
let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1,
Defs = [Y, P] in {
def STBptr : W65816Pseudo<(outs), (ins Acc16:$val, Wide16:$ptr),
"# STBptr $val, $ptr",
[(truncstorei8 Acc16:$val, Wide16:$ptr)]>;
}
// Pointer access with constant offset. `(load (add ptr, $off))` and
// `(store val, (add ptr, $off))` come up for struct field access and
// array indexing with small constant offsets. Without these patterns,
// the offset becomes an explicit ADC #imm that has to spill A and
// recompute the pointer per access. With them, we just load Y with
// the offset in the inserter (Y is 16-bit so any i16 constant fits).
// LDAptrOff / STAptrOff / STBptrOff: same [dp],Y lowering as the
// no-offset variants but folds the offset into Y.
let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1,
Defs = [Y, P] in {
def LDAptrOff : W65816Pseudo<(outs Acc16:$dst),
(ins Wide16:$ptr, i16imm:$off),
"# LDAptrOff $dst, $ptr, $off", []>;
}
let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1,
Defs = [Y, P] in {
def STAptrOff : W65816Pseudo<(outs),
(ins Acc16:$val, Wide16:$ptr, i16imm:$off),
"# STAptrOff $val, $ptr, $off", []>;
def STBptrOff : W65816Pseudo<(outs),
(ins Acc16:$val, Wide16:$ptr, i16imm:$off),
"# STBptrOff $val, $ptr, $off", []>;
}
def : Pat<(i16 (load (add Wide16:$ptr, (i16 imm:$off)))),
(LDAptrOff Wide16:$ptr, imm:$off)>;
def : Pat<(store Acc16:$val, (add Wide16:$ptr, (i16 imm:$off))),
(STAptrOff Acc16:$val, Wide16:$ptr, imm:$off)>;
def : Pat<(truncstorei8 Acc16:$val, (add Wide16:$ptr, (i16 imm:$off))),
(STBptrOff Acc16:$val, Wide16:$ptr, imm:$off)>;
def : Pat<(store Acc8:$val, (add Wide16:$ptr, (i16 imm:$off))),
(STBptrOff (COPY_TO_REGCLASS Acc8:$val, Acc16),
Wide16:$ptr, imm:$off)>;
def : Pat<(store Acc8:$val, Wide16:$ptr),
(STBptr (COPY_TO_REGCLASS Acc8:$val, Acc16), Wide16:$ptr)>;
// ---------------------------------------------------------------------
// ptr32 deref pseudos. Same shape and inserter as LDAptr/STAptr/STBptr,
// but the pointer is a Wide32 (i32) value: sub_lo carries the low 16
// bits of the address, sub_hi carries the bank byte in its low half.
// Inserter stages the low 16 bits at $E0..$E1 and the bank byte at $E2,
// then emits LDA/STA [dp],Y just like the i16 path — but with a
// pointer-derived bank instead of a forced 0.
//
// Dead unless ptr32 mode is active (LowerLoad/LowerStore only emit
// W65816ldPtr/stPtr/stbPtr when the address is i32).
// ---------------------------------------------------------------------
let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1,
Defs = [Y, P] in {
def LDAptr32 : W65816Pseudo<(outs Acc16:$dst), (ins AnyWide32:$ptr),
"# LDAptr32 $dst, $ptr",
[(set Acc16:$dst, (W65816ldPtr AnyWide32:$ptr))]>;
}
let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1,
Defs = [Y, P] in {
def STAptr32 : W65816Pseudo<(outs), (ins Acc16:$val, AnyWide32:$ptr),
"# STAptr32 $val, $ptr",
[(W65816stPtr Acc16:$val, AnyWide32:$ptr)]>;
def STBptr32 : W65816Pseudo<(outs), (ins Acc16:$val, AnyWide32:$ptr),
"# STBptr32 $val, $ptr",
[(W65816stbPtr Acc16:$val, AnyWide32:$ptr)]>;
}
let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1,
Defs = [Y, P] in {
def LDAptr32Off : W65816Pseudo<(outs Acc16:$dst),
(ins AnyWide32:$ptr, i16imm:$off),
"# LDAptr32Off $dst, $ptr, $off", []>;
}
let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1,
Defs = [Y, P] in {
def STAptr32Off : W65816Pseudo<(outs),
(ins Acc16:$val, AnyWide32:$ptr, i16imm:$off),
"# STAptr32Off $val, $ptr, $off", []>;
def STBptr32Off : W65816Pseudo<(outs),
(ins Acc16:$val, AnyWide32:$ptr, i16imm:$off),
"# STBptr32Off $val, $ptr, $off", []>;
}
// Direct ptr32 load/store patterns over generic ISD::LOAD / ISD::STORE
// when the address is an i32 (AnyWide32) reg. These are unreachable
// while i32 is not a legal type (ptr16 mode). When ptr32 mode is
// activated they fire instead of the i16-pointer LDAptr / STAptr.
def : Pat<(i16 (load AnyWide32:$ptr)),
(LDAptr32 AnyWide32:$ptr)>;
def : Pat<(store Acc16:$val, AnyWide32:$ptr),
(STAptr32 Acc16:$val, AnyWide32:$ptr)>;
def : Pat<(truncstorei8 Acc16:$val, AnyWide32:$ptr),
(STBptr32 Acc16:$val, AnyWide32:$ptr)>;
def : Pat<(i16 (zextloadi8 AnyWide32:$ptr)),
(ANDi16imm (LDAptr32 AnyWide32:$ptr), 0xFF)>;
def : Pat<(i16 (extloadi8 AnyWide32:$ptr)),
(LDAptr32 AnyWide32:$ptr)>;
def : Pat<(i8 (load AnyWide32:$ptr)),
(COPY_TO_REGCLASS (ANDi16imm (LDAptr32 AnyWide32:$ptr), 0xFF), Acc8)>;
def : Pat<(store Acc8:$val, AnyWide32:$ptr),
(STBptr32 (COPY_TO_REGCLASS Acc8:$val, Acc16), AnyWide32:$ptr)>;
// Off variants — folded constant-offset add patterns deferred until
// ptr32 mode is activated and we can profile real cases. The base
// LDAptr32/STAptr32 pseudos handle the general (add ptr, off) case
// correctly via a separate i32 ADD; the Off pseudos are an optional
// optimization for small constant offsets.
// Split-pair variants: same semantics as LDAptr32/STAptr32/STBptr32 but
// the ptr is two separate i16 register operands (lo + hi) instead of
// one Wide32 register pair. Used by the W65816LowerWide32 pre-RA pass
// to relieve register-pair allocation pressure: it walks REG_SEQUENCE
// + LDAptr32 chains, decomposes the Wide32 vregs into pairs of i16
// vregs, and rewrites the LDAptr32-family to take the two halves
// directly.
let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1,
Defs = [Y, P] in {
def LDAptr32S : W65816Pseudo<(outs Acc16:$dst),
(ins Wide16:$ptrLo, Wide16:$ptrHi),
"# LDAptr32S $dst, $ptrLo, $ptrHi", []>;
}
let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1,
Defs = [Y, P] in {
def STAptr32S : W65816Pseudo<(outs),
(ins Acc16:$val, Wide16:$ptrLo, Wide16:$ptrHi),
"# STAptr32S $val, $ptrLo, $ptrHi", []>;
def STBptr32S : W65816Pseudo<(outs),
(ins Acc16:$val, Wide16:$ptrLo, Wide16:$ptrHi),
"# STBptr32S $val, $ptrLo, $ptrHi", []>;
}
// i8 load via Acc16 pointer producing a true i8 (Acc8) result. Reuses
// the existing zextloadi8 16-bit-LDA-and-mask path: load 2 bytes, mask
// the high byte, then narrow to Acc8. COPY_TO_REGCLASS to Acc8 is a
// no-op at MC level (same physical A). Reads one byte past the source;
// fine for char-array iteration where the buffer is at least 2 bytes.
def : Pat<(i8 (load Wide16:$ptr)),
(COPY_TO_REGCLASS (ANDi16imm (LDAptr Wide16:$ptr), 0xFF), Acc8)>;
// Acc8-to-Acc16 type conversions. Both Acc8 and Acc16 alias physical A,
// so COPY_TO_REGCLASS is a no-op at MC level. ZEXT additionally masks
// the high byte (which holds B from before any prior SEP). ANYEXT
// leaves the high byte untouched since the consumer doesn't care.
def : Pat<(i16 (anyext Acc8:$src)),
(COPY_TO_REGCLASS Acc8:$src, Acc16)>;
def : Pat<(i16 (zext Acc8:$src)),
(ANDi16imm (COPY_TO_REGCLASS Acc8:$src, Acc16), 0xFF)>;
def : Pat<(i8 (trunc Acc16:$src)),
(COPY_TO_REGCLASS Acc16:$src, Acc8)>;
// Acc8 reg-reg arithmetic and bitwise ops, expanded through the Acc16
// _RR pseudos. Cheap to do because Acc8 and Acc16 alias the same
// physical A — COPY_TO_REGCLASS is a no-op. Only the low byte
// matters; the high byte gets unrelated bits but is discarded by the
// final narrow-back to Acc8. This lets an i8 expression that wasn't
// promoted by legalization (e.g. an i8 XOR feeding only an i8 store)
// reuse the spill-and-OPfi inserter without needing dedicated Acc8
// pseudos.
multiclass Acc8RR<SDNode op, Instruction ri> {
def : Pat<(i8 (op Acc8:$a, Acc8:$b)),
(COPY_TO_REGCLASS
(ri (COPY_TO_REGCLASS Acc8:$a, Acc16),
(COPY_TO_REGCLASS Acc8:$b, Acc16)),
Acc8)>;
}
defm : Acc8RR<add, ADD_RR>;
defm : Acc8RR<sub, SUB_RR>;
defm : Acc8RR<and, AND_RR>;
defm : Acc8RR<or, ORA_RR>;
defm : Acc8RR<xor, EOR_RR>;
// (memory inc/dec patterns moved below INC_Abs/DEC_Abs defs.)
// (Branch patterns moved below the Real Instructions section since
// they reference instruction defs.)
//===----------------------------------------------------------------------===//
// Real Instructions
//
// Opcodes taken from the WDC W65C816S data sheet. Instructions whose size
// depends on the M or X bits exist in two variants (Imm8 / Imm16) and carry
// TSFlags bits indicating which processor mode they assume; the REP/SEP
// scheduling pass uses those to verify/insert mode transitions.
//
// Disassembler note: for every opcode that has both an _Imm8 and an _Imm16
// form (LDA/LDX/LDY/ADC/SBC/CMP/AND/ORA/EOR/BIT/CPX/CPY), the two forms share
// the same opcode byte but differ in operand width according to M/X mode.
// The scaffold disassembler only consults the default "W65816" decoder
// table, so we push the _Imm8 variants into namespaces "W65816MHigh" /
// "W65816XHigh". That keeps only one variant per opcode in the default
// table (the 3-byte _Imm16 form for M-dependent insns, and the 3-byte
// _Imm16 form for X-dependent insns), so `llvm-objdump -d` always decodes
// these as 16-bit immediates until the mode-aware decoder lands.
//===----------------------------------------------------------------------===//
//---------------------------------------------------------------- CPU control
def NOP : InstImplied<0xEA, "nop"> {
let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0;
}
def REP : InstImm8<0xC2, "rep"> {
let hasSideEffects = 1;
let mayLoad = 0; let mayStore = 0;
}
def SEP : InstImm8<0xE2, "sep"> {
let hasSideEffects = 1;
let mayLoad = 0; let mayStore = 0;
}
def CLC : InstImplied<0x18, "clc"> { let mayLoad = 0; let mayStore = 0; }
def SEC : InstImplied<0x38, "sec"> { let mayLoad = 0; let mayStore = 0; }
def CLI : InstImplied<0x58, "cli"> { let mayLoad = 0; let mayStore = 0; }
def SEI : InstImplied<0x78, "sei"> { let mayLoad = 0; let mayStore = 0; }
def CLD : InstImplied<0xD8, "cld"> { let mayLoad = 0; let mayStore = 0; }
def SED : InstImplied<0xF8, "sed"> { let mayLoad = 0; let mayStore = 0; }
def CLV : InstImplied<0xB8, "clv"> { let mayLoad = 0; let mayStore = 0; }
def XCE : InstImplied<0xFB, "xce"> { let mayLoad = 0; let mayStore = 0; }
def XBA : InstImplied<0xEB, "xba"> { let mayLoad = 0; let mayStore = 0; }
def WAI : InstImplied<0xCB, "wai">;
def STP : InstImplied<0xDB, "stp">;
// WDM (William D Mensch) — reserved 2-byte NOP-equivalent. Useful as
// a debugger / emulator hook: MAME's apple2gs CPU traps on WDM and a
// Lua plugin can dispatch on the operand byte. CPU-side, it acts as
// a 2-byte NOP. Operand syntax mirrors MVN: `wdm $ab` (no `#`).
def WDM : InstDP<0x42, "wdm">;
// TRB / TSB — Test and Reset/Set memory Bits. Atomic bit clear/set
// on a byte (or 16-bit word per M flag) at the given DP or abs
// address. Z flag set per (M & A) where M is the memory operand.
// Useful for memory-mapped IO bit twiddling. No DP indexing form.
def TRB_DP : InstDP<0x14, "trb">;
def TRB_Abs : InstAbs<0x1C, "trb">;
def TSB_DP : InstDP<0x04, "tsb">;
def TSB_Abs : InstAbs<0x0C, "tsb">;
// PEI — Push Effective Indirect. Reads a 16-bit value from DP and
// pushes it. Useful for indirect parameter passing without going
// through A first.
def PEI_DP : InstDP<0xD4, "pei">;
//---------------------------------------------------------------- LDA (load A)
// The `_Imm8` forms of the mode-dependent load/arith/compare ops are
// marked isCodeGenOnly so the asm matcher never picks them — our
// AsmParser has no way to know the current M/X bits, so it always
// reaches for the _Imm16 form. Codegen can still select _Imm8
// explicitly once we have 8-bit patterns.
def LDA_Imm8 : InstImm8<0xA9, "lda"> { let MHigh = 1; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; let Defs = [A]; }
def LDA_Imm16 : InstImm16<0xA9, "lda"> { let MLow = 1; let Defs = [A]; }
def LDA_DP : InstDP<0xA5, "lda">;
def LDA_Abs : InstAbs<0xAD, "lda">;
def LDA_Long : InstAbsLong<0xAF, "lda">;
def LDA_DPX : InstDPX<0xB5, "lda">;
def LDA_AbsX : InstAbsX<0xBD, "lda">;
def LDA_AbsY : InstAbsY<0xB9, "lda">;
def LDA_DPInd : InstDPInd <0xB2, "lda">;
def LDA_DPIndY : InstDPIndY<0xB1, "lda">;
def LDA_DPIndX : InstDPIndX<0xA1, "lda">;
def LDA_DPIndLong : InstDPIndLong <0xA7, "lda"> { let Defs = [A]; }
// LDA [dp],Y: reads Y to compute the indexed address, defines A.
// Without these, regalloc thought A was unaffected by the load and
// dead-code-eliminated COPYs that were supposed to materialise the
// next pointer in A — silent miscompile in mySwap-style helpers.
def LDA_DPIndLongY : InstDPIndLongY<0xB7, "lda"> { let Defs = [A]; let Uses = [Y]; }
def LDA_LongX : InstAbsLongX<0xBF, "lda">;
//---------------------------------------------------------------- STA (store A)
def STA_DP : InstDP<0x85, "sta">;
def STA_Abs : InstAbs<0x8D, "sta">;
def STA_Long : InstAbsLong<0x8F, "sta">;
def STA_DPX : InstDPX<0x95, "sta">;
def STA_AbsX : InstAbsX<0x9D, "sta">;
def STA_AbsY : InstAbsY<0x99, "sta">;
def STA_DPInd : InstDPInd <0x92, "sta">;
def STA_DPIndY : InstDPIndY<0x91, "sta">;
def STA_DPIndX : InstDPIndX<0x81, "sta">;
def STA_DPIndLong : InstDPIndLong <0x87, "sta"> { let Uses = [A]; }
// STA [dp],Y: reads A (the value to store) and Y (the index). Mark
// both so regalloc keeps A's value live across this instruction.
def STA_DPIndLongY : InstDPIndLongY<0x97, "sta"> { let Uses = [A, Y]; }
def STA_LongX : InstAbsLongX<0x9F, "sta">;
//---------------------------------------------------------------- LDX (load X)
def LDX_Imm8 : InstImm8<0xA2, "ldx"> { let XHigh = 1; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; let Defs = [X]; }
def LDX_Imm16 : InstImm16<0xA2, "ldx"> { let XLow = 1; let Defs = [X]; }
def LDX_DP : InstDP<0xA6, "ldx">;
def LDX_Abs : InstAbs<0xAE, "ldx">;
def LDX_DPY : InstDPY<0xB6, "ldx">;
def LDX_AbsY : InstAbsY<0xBE, "ldx">;
//---------------------------------------------------------------- STX (store X)
def STX_DP : InstDP<0x86, "stx">;
def STX_Abs : InstAbs<0x8E, "stx">;
def STX_DPY : InstDPY<0x96, "stx">;
//---------------------------------------------------------------- LDY (load Y)
def LDY_Imm8 : InstImm8<0xA0, "ldy"> { let XHigh = 1; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; let Defs = [Y]; }
def LDY_Imm16 : InstImm16<0xA0, "ldy"> { let XLow = 1; let Defs = [Y]; }
def LDY_DP : InstDP<0xA4, "ldy">;
def LDY_Abs : InstAbs<0xAC, "ldy">;
def LDY_DPX : InstDPX<0xB4, "ldy">;
def LDY_AbsX : InstAbsX<0xBC, "ldy">;
//---------------------------------------------------------------- STY (store Y)
def STY_DP : InstDP<0x84, "sty">;
def STY_Abs : InstAbs<0x8C, "sty">;
def STY_DPX : InstDPX<0x94, "sty">;
//---------------------------------------------------------------- STZ (store zero)
// Width follows M flag — same as STA. Useful for zeroing DP scratch
// without burning A. Saves 1 byte vs `LDA #0; STA dp` per zero.
def STZ_DP : InstDP<0x64, "stz">;
def STZ_Abs : InstAbs<0x9C, "stz">;
def STZ_DPX : InstDPX<0x74, "stz">;
def STZ_AbsX : InstAbsX<0x9E, "stz">;
//------------------------------------------------------------------------- ADC
def ADC_Imm8 : InstImm8<0x69, "adc"> { let MHigh = 1; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
def ADC_Imm16 : InstImm16<0x69, "adc"> { let MLow = 1; }
def ADC_DP : InstDP<0x65, "adc">;
def ADC_Abs : InstAbs<0x6D, "adc">;
def ADC_DPX : InstDPX<0x75, "adc">;
def ADC_AbsX : InstAbsX<0x7D, "adc">;
def ADC_AbsY : InstAbsY<0x79, "adc">;
//------------------------------------------------------------------------- SBC
def SBC_Imm8 : InstImm8<0xE9, "sbc"> { let MHigh = 1; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
def SBC_Imm16 : InstImm16<0xE9, "sbc"> { let MLow = 1; }
def SBC_DP : InstDP<0xE5, "sbc">;
def SBC_Abs : InstAbs<0xED, "sbc">;
def SBC_DPX : InstDPX<0xF5, "sbc">;
def SBC_AbsX : InstAbsX<0xFD, "sbc">;
def SBC_AbsY : InstAbsY<0xF9, "sbc">;
//------------------------------------------------------------------------- CMP
def CMP_Imm8 : InstImm8<0xC9, "cmp"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
def CMP_Imm16 : InstImm16<0xC9, "cmp"> { let MLow = 1; let mayLoad=0; let mayStore=0; }
def CMP_DP : InstDP<0xC5, "cmp"> { let mayStore = 0; }
def CMP_Abs : InstAbs<0xCD, "cmp"> { let mayStore = 0; }
def CMP_DPX : InstDPX<0xD5, "cmp"> { let mayStore = 0; }
def CMP_AbsX : InstAbsX<0xDD, "cmp"> { let mayStore = 0; }
def CMP_AbsY : InstAbsY<0xD9, "cmp"> { let mayStore = 0; }
//---------------------------------------------------------------- CPX/CPY
def CPX_Imm8 : InstImm8<0xE0, "cpx"> { let XHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; }
def CPX_Imm16 : InstImm16<0xE0, "cpx"> { let XLow = 1; let mayLoad=0; let mayStore=0; }
def CPX_DP : InstDP<0xE4, "cpx"> { let mayStore = 0; }
def CPX_Abs : InstAbs<0xEC, "cpx"> { let mayStore = 0; }
def CPY_Imm8 : InstImm8<0xC0, "cpy"> { let XHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; }
def CPY_Imm16 : InstImm16<0xC0, "cpy"> { let XLow = 1; let mayLoad=0; let mayStore=0; }
def CPY_DP : InstDP<0xC4, "cpy"> { let mayStore = 0; }
def CPY_Abs : InstAbs<0xCC, "cpy"> { let mayStore = 0; }
//---------------------------------------------------------------- AND/ORA/EOR
def AND_Imm8 : InstImm8<0x29, "and"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
def AND_Imm16 : InstImm16<0x29, "and"> { let MLow = 1; let mayLoad=0; let mayStore=0; }
def AND_DP : InstDP<0x25, "and"> { let mayStore = 0; }
def AND_Abs : InstAbs<0x2D, "and"> { let mayStore = 0; }
def ORA_Imm8 : InstImm8<0x09, "ora"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
def ORA_Imm16 : InstImm16<0x09, "ora"> { let MLow = 1; let mayLoad=0; let mayStore=0; }
def ORA_DP : InstDP<0x05, "ora"> { let mayStore = 0; }
def ORA_Abs : InstAbs<0x0D, "ora"> { let mayStore = 0; }
def EOR_Imm8 : InstImm8<0x49, "eor"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
def EOR_Imm16 : InstImm16<0x49, "eor"> { let MLow = 1; let mayLoad=0; let mayStore=0; }
def EOR_DP : InstDP<0x45, "eor"> { let mayStore = 0; }
def EOR_Abs : InstAbs<0x4D, "eor"> { let mayStore = 0; }
def BIT_Imm8 : InstImm8<0x89, "bit"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
def BIT_Imm16 : InstImm16<0x89, "bit"> { let MLow = 1; let mayLoad=0; let mayStore=0; }
def BIT_DP : InstDP<0x24, "bit"> { let mayStore = 0; }
def BIT_Abs : InstAbs<0x2C, "bit"> { let mayStore = 0; }
//---------------------------------------------------------------- INC/DEC
def INA : InstImplied<0x1A, "inc a"> { let mayLoad = 0; let mayStore = 0; }
def DEA : InstImplied<0x3A, "dec a"> { let mayLoad = 0; let mayStore = 0; }
def INX : InstImplied<0xE8, "inx"> { let mayLoad = 0; let mayStore = 0; }
def DEX : InstImplied<0xCA, "dex"> { let mayLoad = 0; let mayStore = 0; }
def INY : InstImplied<0xC8, "iny"> { let mayLoad = 0; let mayStore = 0; }
def DEY : InstImplied<0x88, "dey"> { let mayLoad = 0; let mayStore = 0; }
def INC_DP : InstDP<0xE6, "inc">;
def INC_Abs : InstAbs<0xEE, "inc">;
def INC_DPX : InstDPX<0xF6, "inc">;
def INC_AbsX: InstAbsX<0xFE, "inc">;
def DEC_DP : InstDP<0xC6, "dec">;
def DEC_Abs : InstAbs<0xCE, "dec">;
def DEC_DPX : InstDPX<0xD6, "dec">;
def DEC_AbsX: InstAbsX<0xDE, "dec">;
//---------------------------------------------------------------- Shifts
def ASL_A : InstImplied<0x0A, "asl a"> { let mayLoad = 0; let mayStore = 0; }
def LSR_A : InstImplied<0x4A, "lsr a"> { let mayLoad = 0; let mayStore = 0; }
def ROL_A : InstImplied<0x2A, "rol a"> { let mayLoad = 0; let mayStore = 0; }
def ROR_A : InstImplied<0x6A, "ror a"> { let mayLoad = 0; let mayStore = 0; }
def ASL_DP : InstDP<0x06, "asl">;
def ASL_Abs : InstAbs<0x0E, "asl">;
def LSR_DP : InstDP<0x46, "lsr">;
def LSR_Abs : InstAbs<0x4E, "lsr">;
def ROL_DP : InstDP<0x26, "rol">;
def ROL_Abs : InstAbs<0x2E, "rol">;
def ROR_DP : InstDP<0x66, "ror">;
def ROR_Abs : InstAbs<0x6E, "ror">;
//---------------------------------------------------------------- Transfers
// Defs/Uses metadata is critical: without it, machine-cp doesn't see
// that TAX (etc.) reads the source register, and may delete a `$a =
// COPY $x` immediately preceding it as a "dead store" — corrupting
// the data flow. See feedback_w65816_implied_ops.md for the canary.
def TAX : InstImplied<0xAA, "tax"> { let mayLoad = 0; let mayStore = 0; let Defs = [X]; let Uses = [A]; }
def TAY : InstImplied<0xA8, "tay"> { let mayLoad = 0; let mayStore = 0; let Defs = [Y]; let Uses = [A]; }
def TXA : InstImplied<0x8A, "txa"> { let mayLoad = 0; let mayStore = 0; let Defs = [A]; let Uses = [X]; }
def TYA : InstImplied<0x98, "tya"> { let mayLoad = 0; let mayStore = 0; let Defs = [A]; let Uses = [Y]; }
def TXY : InstImplied<0x9B, "txy"> { let mayLoad = 0; let mayStore = 0; let Defs = [Y]; let Uses = [X]; }
def TYX : InstImplied<0xBB, "tyx"> { let mayLoad = 0; let mayStore = 0; let Defs = [X]; let Uses = [Y]; }
def TXS : InstImplied<0x9A, "txs"> { let mayLoad = 0; let mayStore = 0; let Defs = [SP]; let Uses = [X]; }
def TSX : InstImplied<0xBA, "tsx"> { let mayLoad = 0; let mayStore = 0; let Defs = [X]; let Uses = [SP]; }
def TCD : InstImplied<0x5B, "tcd"> { let mayLoad = 0; let mayStore = 0; }
def TDC : InstImplied<0x7B, "tdc"> { let mayLoad = 0; let mayStore = 0; }
def TCS : InstImplied<0x1B, "tcs"> { let mayLoad = 0; let mayStore = 0; }
def TSC : InstImplied<0x3B, "tsc"> { let mayLoad = 0; let mayStore = 0; }
//---------------------------------------------------------------- Stack push/pull
def PHA : InstImplied<0x48, "pha">;
def PLA : InstImplied<0x68, "pla">;
def PHX : InstImplied<0xDA, "phx">;
def PLX : InstImplied<0xFA, "plx">;
def PHY : InstImplied<0x5A, "phy">;
def PLY : InstImplied<0x7A, "ply">;
def PHP : InstImplied<0x08, "php">;
def PLP : InstImplied<0x28, "plp">;
def PHB : InstImplied<0x8B, "phb">;
def PLB : InstImplied<0xAB, "plb">;
def PHD : InstImplied<0x0B, "phd">;
def PLD : InstImplied<0x2B, "pld">;
def PHK : InstImplied<0x4B, "phk">;
def PEA : InstAbs<0xF4, "pea">;
def PER : InstPCRel16<0x62, "per">;
//---------------------------------------------------------------- Branches
// Conditional branches READ the P (status) register. Without this
// Uses, MachineCSE saw no dependency between an earlier CMP (which
// defines P) and the consuming Bxx, and would happily reuse a
// "redundant" CMP whose flags had been clobbered by an intervening
// LDA/STA/ADC. Modelling the dep is the principled fix; the
// W65816TargetMachine workaround that disabled MachineCSE entirely
// can come back out once this is verified.
let isBranch = 1, isTerminator = 1, mayLoad = 0, mayStore = 0,
Uses = [P] in {
def BEQ : InstPCRel8<0xF0, "beq">;
def BNE : InstPCRel8<0xD0, "bne">;
def BCS : InstPCRel8<0xB0, "bcs">;
def BCC : InstPCRel8<0x90, "bcc">;
def BMI : InstPCRel8<0x30, "bmi">;
def BPL : InstPCRel8<0x10, "bpl">;
def BVS : InstPCRel8<0x70, "bvs">;
def BVC : InstPCRel8<0x50, "bvc">;
}
let isBranch = 1, isTerminator = 1, isBarrier = 1, mayLoad = 0, mayStore = 0 in {
def BRA : InstPCRel8<0x80, "bra">;
def BRL : InstPCRel16<0x82, "brl">;
def JMP_Abs : InstAbs<0x4C, "jmp">;
def JMP_AbsInd : InstAbsInd<0x6C, "jmp">;
def JML_Long : InstAbsLong<0x5C, "jml">;
}
//---------------------------------------------------------------- Calls
let isCall = 1, mayLoad = 0, mayStore = 0 in {
def JSR_Abs : InstAbs<0x20, "jsr">;
def JSL_Long : InstAbsLong<0x22, "jsl">;
}
//---------------------------------------------------------------- Returns
let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 0, mayStore = 0 in {
def RTS : InstImplied<0x60, "rts">;
def RTI : InstImplied<0x40, "rti">;
// RTL is the 65816 long return; we select it for the generic retglue node.
def RTL : InstImplied<0x6B, "rtl"> {
let Pattern = [(W65816retglue)];
}
}
//---------------------------------------------------------------- Block move
// MVN/MVP are 3 bytes: opcode + destBank + srcBank. WDC writes the
// operand order as "dst, src" but the bytes on the wire are dst-then-src.
// Block-move operands are bank bytes written without a '#' prefix
// (e.g. `mvn $01, $02`), so the parser produces AddrDP-kind operands,
// not immediates. Use addrDP here to match that; the encoder path is
// identical since both are single-byte values.
class InstBlockMove<bits<8> op, string mnem>
: W65816Inst<(outs), (ins addrDP:$dst, addrDP:$src),
!strconcat(mnem, "\t$dst, $src")> {
let Size = 3;
bits<8> dst;
bits<8> src;
bits<24> Inst;
let Inst{7-0} = op;
let Inst{15-8} = dst;
let Inst{23-16} = src;
}
def MVN : InstBlockMove<0x54, "mvn">;
def MVP : InstBlockMove<0x44, "mvp">;
//---------------------------------------------------------------- Stack-rel
def LDA_StackRel : InstStackRel<0xA3, "lda">;
def STA_StackRel : InstStackRel<0x83, "sta">;
def ADC_StackRel : InstStackRel<0x63, "adc">;
def SBC_StackRel : InstStackRel<0xE3, "sbc">;
def CMP_StackRel : InstStackRel<0xC3, "cmp">;
def AND_StackRel : InstStackRel<0x23, "and">;
def ORA_StackRel : InstStackRel<0x03, "ora">;
def EOR_StackRel : InstStackRel<0x43, "eor">;
//---------------------------------------------------------------- Stack-ind-Y
// Stack-relative indirect indexed-Y: deref a pointer spilled at S+off.
def LDA_StackRelIndY : InstStackRelIndY<0xB3, "lda">;
def STA_StackRelIndY : InstStackRelIndY<0x93, "sta">;
//===----------------------------------------------------------------------===//
// Branch patterns (placed after the Bxx defs).
//
// W65816brcc takes (Dest, CondCode) plus a glue from W65816cmp. The CC
// constant maps to one of the eight Bxx instructions. Values mirror
// W65816CC::CondCode in W65816.h.
//===----------------------------------------------------------------------===//
def : Pat<(W65816brcc bb:$dest, (i8 0)), (BEQ bb:$dest)>;
def : Pat<(W65816brcc bb:$dest, (i8 1)), (BNE bb:$dest)>;
def : Pat<(W65816brcc bb:$dest, (i8 2)), (BCS bb:$dest)>;
def : Pat<(W65816brcc bb:$dest, (i8 3)), (BCC bb:$dest)>;
def : Pat<(W65816brcc bb:$dest, (i8 4)), (BMI bb:$dest)>;
def : Pat<(W65816brcc bb:$dest, (i8 5)), (BPL bb:$dest)>;
def : Pat<(W65816brcc bb:$dest, (i8 6)), (BVS bb:$dest)>;
def : Pat<(W65816brcc bb:$dest, (i8 7)), (BVC bb:$dest)>;
// Unconditional branch from generic ISD::BR.
def : Pat<(br bb:$dest), (BRA bb:$dest)>;
// Memory inc/dec: `*p = *p + 1` → `INC abs`. Single-instruction RMW
// instead of LDA → CLC → ADC #1 → STA.
def : Pat<(store
(i16 (add (i16 (load (W65816Wrapper tglobaladdr:$g))),
(i16 1))),
(W65816Wrapper tglobaladdr:$g)),
(INC_Abs tglobaladdr:$g)>;
def : Pat<(store
(i16 (add (i16 (load (W65816Wrapper tglobaladdr:$g))),
(i16 -1))),
(W65816Wrapper tglobaladdr:$g)),
(DEC_Abs tglobaladdr:$g)>;
// Direct call to a global / external symbol. We use JSL (24-bit
// long jump-and-link) and RTL pairing throughout — matches the
// IIgs convention where main is entered via JSL, and means a
// function doesn't have to know how it was called to choose its
// return instruction. A pseudo bridges the i16 symbol operand
// to JSL_Long's 24-bit operand class.
// Defs lists ALL caller-clobbered regs. The 65816 has no
// caller/callee-save split — every callee may freely modify
// A/X/Y/DPF0/P/etc. Critically, i32/i64 returns place high
// halves in X (i32), Y and DPF0 (i64); without those in Defs,
// the InstrEmitter does not add implicit-defs for glued
// CopyFromReg(X/Y/DPF0) on the call MI, and the verifier sees
// the post-call `COPY $y` as reading an undefined register.
// DPF0 was historically the only "extra" def so getLoad(0xF0)
// wouldn't CSE across calls; the same anti-CSE rationale applies
// to A/X/Y, but more fundamentally those are call return slots.
let isCall = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0,
Defs = [A, X, Y, DPF0] in {
def JSLpseudo : W65816Pseudo<(outs), (ins i16imm:$dst),
"# JSLpseudo $dst", []>;
// ptr32 variant — same expansion in AsmPrinter; the operand class
// just exists so tablegen accepts an i32-typed tglobaladdr operand.
def JSLpseudo32 : W65816Pseudo<(outs), (ins i32imm:$dst),
"# JSLpseudo32 $dst", []>;
}
def : Pat<(W65816call (i16 tglobaladdr:$dst)), (JSLpseudo tglobaladdr:$dst)>;
def : Pat<(W65816call (i16 texternalsym:$dst)), (JSLpseudo texternalsym:$dst)>;
// ptr32: under p:32:16, call targets are i32 (iPTR matches the pointer
// width). Same JSL_long instruction handles either width — the OMF
// cRELOC opcode rewrites the offset and bank at load time.
def : Pat<(W65816call (i32 tglobaladdr:$dst)), (JSLpseudo32 tglobaladdr:$dst)>;
def : Pat<(W65816call (i32 texternalsym:$dst)), (JSLpseudo32 texternalsym:$dst)>;