1665 lines
84 KiB
TableGen
1665 lines
84 KiB
TableGen
//===-- W65816InstrInfo.td - W65816 Instruction defs -------*- tablegen -*-===//
|
|
//
|
|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
|
|
// See https://llvm.org/LICENSE.txt for license information.
|
|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
//
|
|
// W65816 instruction description. This file defines the MC-layer instruction
|
|
// encodings for the core 65816 instruction set. DAG-selection patterns will
|
|
// be added incrementally on top of these MC instructions.
|
|
//
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
include "W65816InstrFormats.td"
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Type Profiles
|
|
//===----------------------------------------------------------------------===//
|
|
def SDT_W65816Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>;
|
|
def SDT_W65816CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>,
|
|
SDTCisVT<1, i16>]>;
|
|
def SDT_W65816CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
|
|
def SDT_W65816Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>,
|
|
SDTCisPtrTy<0>]>;
|
|
def SDT_W65816Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>,
|
|
SDTCisInt<0>]>;
|
|
// (CMP allows both i16 and i8 operands.)
|
|
def SDT_W65816BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
|
|
SDTCisVT<1, i8>]>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// W65816-specific SDNodes
|
|
//===----------------------------------------------------------------------===//
|
|
def W65816retglue : SDNode<"W65816ISD::RET_GLUE", SDTNone,
|
|
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
|
|
|
|
def W65816call : SDNode<"W65816ISD::CALL", SDT_W65816Call,
|
|
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
|
|
SDNPVariadic]>;
|
|
|
|
def W65816callseq_start :
|
|
SDNode<"ISD::CALLSEQ_START", SDT_W65816CallSeqStart,
|
|
[SDNPHasChain, SDNPOutGlue]>;
|
|
def W65816callseq_end :
|
|
SDNode<"ISD::CALLSEQ_END", SDT_W65816CallSeqEnd,
|
|
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
|
|
|
def W65816Wrapper : SDNode<"W65816ISD::Wrapper", SDT_W65816Wrapper>;
|
|
|
|
// Comparison: produces a Glue value (carrying processor flags).
|
|
def W65816cmp : SDNode<"W65816ISD::CMP", SDT_W65816Cmp, [SDNPOutGlue]>;
|
|
// Conditional branch: takes (Chain, Dest, CC, Glue from CMP).
|
|
def W65816brcc : SDNode<"W65816ISD::BR_CC", SDT_W65816BrCC,
|
|
[SDNPHasChain, SDNPInGlue]>;
|
|
|
|
// Push A onto the stack. Used by LowerCall to pass extra args.
|
|
// Takes Chain + Glue (with A pre-loaded via CopyToReg), produces
|
|
// Chain + Glue. Has a side effect (SP changes) and stores to
|
|
// memory. In 16-bit M mode, pushes 2 bytes and decrements SP by 2;
|
|
// the call's ADJCALLSTACKUP pseudo unwinds those bytes via
|
|
// tsc;clc;adc #N;tcs after the JSL returns.
|
|
def W65816push : SDNode<"W65816ISD::PUSH", SDTNone,
|
|
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
|
|
SDNPSideEffect, SDNPMayStore]>;
|
|
|
|
// Push X onto the stack. Same shape as W65816push but the value to
|
|
// push is glued from CopyToReg(X) instead of CopyToReg(A).
|
|
def W65816pushx : SDNode<"W65816ISD::PUSH_X", SDTNone,
|
|
[SDNPHasChain, SDNPInGlue, SDNPOutGlue,
|
|
SDNPSideEffect, SDNPMayStore]>;
|
|
|
|
|
|
// SELECT_CC: takes (TVal, FVal, CC) plus a glue value carrying the
|
|
// flags from a preceding W65816cmp. Lowered by EmitInstrWithCustomInserter
|
|
// into a CMP (already in the BB) + Bxx + diamond CFG + PHI.
|
|
def SDT_W65816SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>,
|
|
SDTCisSameAs<0, 2>,
|
|
SDTCisVT<3, i8>]>;
|
|
def W65816selectcc : SDNode<"W65816ISD::SELECT_CC", SDT_W65816SelectCC,
|
|
[SDNPInGlue]>;
|
|
|
|
// Dynamic stack allocation: takes (chain, size:i16) and returns
|
|
// (ptr:i16, chain). Lowers to TSC; SEC; SBC size; TCS; INC A in
|
|
// AsmPrinter. See LowerDynamicStackalloc.
|
|
def SDT_W65816Alloca : SDTypeProfile<1, 1, [SDTCisVT<0, i16>,
|
|
SDTCisVT<1, i16>]>;
|
|
def W65816alloca : SDNode<"W65816ISD::ALLOCA", SDT_W65816Alloca,
|
|
[SDNPHasChain, SDNPSideEffect]>;
|
|
|
|
// ptr32 load / store: target-specific load/store nodes that take a 32-bit
|
|
// pointer (Wide32 = i32) and lower to [dp],Y indirect-long with the bank
|
|
// byte taken from the pointer's hi-half. Used for ptr32 mode where
|
|
// generic (load i32-addr) needs explicit lowering — wrapping in a target
|
|
// node prevents DAG combines from rewriting the load before isel.
|
|
//
|
|
// Loads always materialise an i16 in A (16-bit LDA); byte zext / anyext
|
|
// patterns AND-mask afterwards exactly as the existing LDAptr does.
|
|
// Stores split into two nodes: ST_PTR (full 16-bit STA) and STB_PTR
|
|
// (SEP/REP-wrapped 8-bit STA for truncating stores).
|
|
def SDT_W65816LdPtr : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i32>]>;
|
|
def SDT_W65816StPtr : SDTypeProfile<0, 2, [SDTCisVT<0, i16>, SDTCisVT<1, i32>]>;
|
|
|
|
def W65816ldPtr : SDNode<"W65816ISD::LD_PTR", SDT_W65816LdPtr,
|
|
[SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
|
|
|
|
// va_arg's stack-pointer deref: bank-0-explicit load. The 65816 stack
|
|
// is hardwired to bank 0; va_arg's `ap` is always a stack pointer.
|
|
// Under Loader, $BE points to OUR bank, but va_arg needs bank 0 — so
|
|
// LowerVAARG emits this opcode and the pattern routes to LDAptrBank0
|
|
// (the bank-0-hardcoded variant of LDAptr).
|
|
def SDT_W65816VAArgLoad : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>;
|
|
def W65816vaargLoad : SDNode<"W65816ISD::VAARG_LOAD", SDT_W65816VAArgLoad,
|
|
[SDNPHasChain, SDNPMayLoad]>;
|
|
def W65816stPtr : SDNode<"W65816ISD::ST_PTR", SDT_W65816StPtr,
|
|
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
|
def W65816stbPtr : SDNode<"W65816ISD::STB_PTR", SDT_W65816StPtr,
|
|
[SDNPHasChain, SDNPMayStore, SDNPMemOperand]>;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Pseudo Instructions
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
let Defs = [SP], Uses = [SP] in {
|
|
def ADJCALLSTACKDOWN : W65816Pseudo<(outs),
|
|
(ins i16imm:$amt1, i16imm:$amt2),
|
|
"# ADJCALLSTACKDOWN $amt1 $amt2",
|
|
[(W65816callseq_start timm:$amt1,
|
|
timm:$amt2)]>;
|
|
def ADJCALLSTACKUP : W65816Pseudo<(outs),
|
|
(ins i16imm:$amt1, i16imm:$amt2),
|
|
"# ADJCALLSTACKUP $amt1 $amt2",
|
|
[(W65816callseq_end timm:$amt1,
|
|
timm:$amt2)]>;
|
|
}
|
|
|
|
// LEA-equivalent: compute the address (SP + frame_offset + offset) of a
|
|
// stack slot and place it in A. Selected from a bare ISD::FrameIndex
|
|
// SDValue in W65816DAGToDAGISel::Select; expanded by eliminateFrameIndex
|
|
// into TSC + CLC + ADC #disp. Output is Acc16 because the address ends
|
|
// up in A; PtrRegs (which only contains SP) is the wrong class.
|
|
let isReMaterializable = 1, hasSideEffects = 0,
|
|
mayLoad = 0, mayStore = 0 in
|
|
def ADDframe : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins i16imm:$base, i16imm:$offset),
|
|
"# ADDframe PSEUDO", []>;
|
|
|
|
// VLA / dynamic_stackalloc: takes a 16-bit byte count in A, returns
|
|
// the address of the allocated region in A. Expanded at AsmPrinter
|
|
// time to: TSC; SEC; SBC count; TCS; INC A. Has side effects
|
|
// (changes SP). Both $dst and $size are tied to A; explicit
|
|
// Defs/Uses on SP keep regalloc honest about the side effect.
|
|
let Defs = [SP], Uses = [SP], hasSideEffects = 1,
|
|
Constraints = "$size = $dst" in
|
|
def ALLOCAfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$size),
|
|
"# ALLOCAfi $dst, $size",
|
|
[(set Acc16:$dst, (W65816alloca Acc16:$size))]>;
|
|
|
|
// The retglue node lowers directly to RTL (see Returns section below).
|
|
// No separate RET pseudo — the real MC instruction handles the pattern.
|
|
|
|
// Push A onto the stack. Expanded in AsmPrinter to MC `PHA`. Used by
|
|
// LowerCall to pass extra args; the matching `tsc;clc;adc #N;tcs` SP
|
|
// unwind happens in eliminateCallFramePseudoInstr for ADJCALLSTACKUP.
|
|
let Defs = [SP], Uses = [A, SP], mayStore = 1, hasSideEffects = 1 in {
|
|
def PUSH16 : W65816Pseudo<(outs), (ins), "# PUSH16",
|
|
[(W65816push)]>;
|
|
}
|
|
// Push X onto the stack. Used by LowerCall when an outgoing arg's
|
|
// SDValue is already in X (e.g. forwarding the i32-first-arg-in-A:X
|
|
// hi half). Saves a TXA+spill round-trip. Expansion: PHX.
|
|
let Defs = [SP], Uses = [X, SP], mayStore = 1, hasSideEffects = 1 in {
|
|
def PUSH16X : W65816Pseudo<(outs), (ins), "# PUSH16X",
|
|
[(W65816pushx)]>;
|
|
}
|
|
|
|
// SELECT_CC16: implements (set Acc16:$dst, (W65816selectcc tval, fval, cc))
|
|
// where the CMP that produced the flags has already been emitted (its
|
|
// glue is implicit via the P register). EmitInstrWithCustomInserter
|
|
// expands this into a Bxx + 2 BBs + PHI. Marked usesCustomInserter so
|
|
// the codegen invokes our hook; Uses=[P] so MachineSched keeps the CMP
|
|
// adjacent.
|
|
let usesCustomInserter = 1, Uses = [P], hasSideEffects = 1 in {
|
|
def SELECT_CC16 : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$tval, Acc16:$fval, i8imm:$cc),
|
|
"# SELECT_CC16 $dst, $tval, $fval, $cc",
|
|
[(set Acc16:$dst,
|
|
(W65816selectcc Acc16:$tval,
|
|
Acc16:$fval,
|
|
timm:$cc))]>;
|
|
// i8 mirror. Without this, `c ? a : b` patterns where the result is
|
|
// i8 (e.g. `unsigned char to_lower(char c)`) fail isel with "Cannot
|
|
// Select" — pre-existing bug. EmitInstrWithCustomInserter handles
|
|
// both the i8 and i16 forms identically; the only difference is the
|
|
// register class on the operands.
|
|
def SELECT_CC8 : W65816Pseudo<(outs Acc8:$dst),
|
|
(ins Acc8:$tval, Acc8:$fval, i8imm:$cc),
|
|
"# SELECT_CC8 $dst, $tval, $fval, $cc",
|
|
[(set Acc8:$dst,
|
|
(W65816selectcc Acc8:$tval,
|
|
Acc8:$fval,
|
|
timm:$cc))]>;
|
|
}
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Codegen pseudos that expand to MC instructions in the AsmPrinter.
|
|
//
|
|
// These pseudos carry DAG patterns with explicit output operands so the
|
|
// generic code generator can allocate them; the MC-layer instructions they
|
|
// expand to have the opcode encoding but no virtual output (the result lives
|
|
// in the implicit A register). W65816AsmPrinter::emitInstruction maps each
|
|
// pseudo here to its real MC counterpart.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
// NOTE: LDA / LDX physically update N and Z, but we deliberately do
|
|
// NOT model that with `Defs = [P]`. Adding `Defs = [P]` lets the
|
|
// scheduler legally place an LDA between CMP and Bxx (P just gets
|
|
// re-defined; the latest def is what Bxx tests) — same flag-corruption
|
|
// bug, different mechanism. Two complementary fixes carry the load:
|
|
// the 4-block SELECT_CC inserter for SETCC patterns, and the post-RA
|
|
// PHP/PLP wrap pass (W65816StackSlotCleanup Pass -2.5) for BR_CC
|
|
// patterns (`while`/`for`/`if-goto`). Both landed.
|
|
let isAsCheapAsAMove = 1, isReMaterializable = 1,
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
|
|
def LDAi16imm : W65816Pseudo<(outs Acc16:$dst), (ins i16imm:$imm),
|
|
"# LDAi16imm $dst, $imm",
|
|
[(set Acc16:$dst, (i16 imm:$imm))]>;
|
|
let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0,
|
|
mayLoad = 0, mayStore = 0 in
|
|
def LDXi16imm : W65816Pseudo<(outs Idx16:$dst), (ins i16imm:$imm),
|
|
"# LDXi16imm $dst, $imm",
|
|
[(set Idx16:$dst, (i16 imm:$imm))]>;
|
|
def LDAi8imm : W65816Pseudo<(outs Acc8:$dst), (ins i8imm:$imm),
|
|
"# LDAi8imm $dst, $imm",
|
|
[(set Acc8:$dst, (i8 imm:$imm))]>;
|
|
}
|
|
|
|
// Materialise a 16-bit address (global / external symbol) into A. Same
|
|
// pseudo as for an immediate constant — it expands to LDA_Imm16 with the
|
|
// symbol as the operand, which the MC encoder turns into a fixup_16.
|
|
def : Pat<(i16 (W65816Wrapper tglobaladdr:$g)),
|
|
(LDAi16imm tglobaladdr:$g)>;
|
|
def : Pat<(i16 (W65816Wrapper texternalsym:$s)),
|
|
(LDAi16imm texternalsym:$s)>;
|
|
|
|
// 8-bit add/sub of an immediate.
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
|
|
def ADCi8imm : W65816Pseudo<(outs Acc8:$dst),
|
|
(ins Acc8:$src, i8imm:$imm),
|
|
"# ADCi8imm $dst, $src, $imm",
|
|
[(set Acc8:$dst, (add Acc8:$src, imm:$imm))]>;
|
|
def SBCi8imm : W65816Pseudo<(outs Acc8:$dst),
|
|
(ins Acc8:$src, i8imm:$imm),
|
|
"# SBCi8imm $dst, $src, $imm",
|
|
[(set Acc8:$dst, (sub Acc8:$src, imm:$imm))]>;
|
|
def ANDi8imm : W65816Pseudo<(outs Acc8:$dst),
|
|
(ins Acc8:$src, i8imm:$imm),
|
|
"# ANDi8imm $dst, $src, $imm",
|
|
[(set Acc8:$dst, (and Acc8:$src, imm:$imm))]>;
|
|
def ORAi8imm : W65816Pseudo<(outs Acc8:$dst),
|
|
(ins Acc8:$src, i8imm:$imm),
|
|
"# ORAi8imm $dst, $src, $imm",
|
|
[(set Acc8:$dst, (or Acc8:$src, imm:$imm))]>;
|
|
def EORi8imm : W65816Pseudo<(outs Acc8:$dst),
|
|
(ins Acc8:$src, i8imm:$imm),
|
|
"# EORi8imm $dst, $src, $imm",
|
|
[(set Acc8:$dst, (xor Acc8:$src, imm:$imm))]>;
|
|
}
|
|
|
|
// 8-bit load / store via a 16-bit absolute address.
|
|
let mayLoad = 1, hasSideEffects = 0, mayStore = 0 in {
|
|
def LDA8abs : W65816Pseudo<(outs Acc8:$dst), (ins i32imm:$addr),
|
|
"# LDA8abs $dst, $addr", []>;
|
|
// LDA8long: companion to STA8long. Bank-explicit i8 load via LDA_Long
|
|
// (0xAF). Used for `*(uint8*)0xC035` reads — LDA_Abs (0xAD) is
|
|
// DBR-relative and would land in the wrong bank under GS/OS Loader.
|
|
// Pattern that ROUTES const-int loads here lives at the ANDi16imm
|
|
// section (must appear after ANDi16imm is defined).
|
|
def LDA8long : W65816Pseudo<(outs Acc8:$dst), (ins i32imm:$addr),
|
|
"# LDA8long $dst, $addr", []>;
|
|
}
|
|
let mayStore = 1, hasSideEffects = 0, mayLoad = 0 in {
|
|
def STA8abs : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr),
|
|
"# STA8abs $src, $addr", []>;
|
|
// STA8long: 8-bit absolute-long store. Same pattern as STA8abs but
|
|
// the AsmPrinter emits STA_Long (0x8F) — a true 24-bit bank-explicit
|
|
// store — instead of STA_Abs (0x8D, DBR-relative). Used for MMIO via
|
|
// a constant integer address; the i32imm carries the full 24-bit
|
|
// physical address. See the (store Acc8, (iPTR imm)) pattern.
|
|
def STA8long : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr),
|
|
"# STA8long $src, $addr", []>;
|
|
}
|
|
def : Pat<(i8 (load (W65816Wrapper tglobaladdr:$g))),
|
|
(LDA8abs tglobaladdr:$g)>;
|
|
def : Pat<(i8 (load (W65816Wrapper texternalsym:$s))),
|
|
(LDA8abs texternalsym:$s)>;
|
|
def : Pat<(store Acc8:$src, (W65816Wrapper tglobaladdr:$g)),
|
|
(STA8abs Acc8:$src, tglobaladdr:$g)>;
|
|
def : Pat<(store Acc8:$src, (W65816Wrapper texternalsym:$s)),
|
|
(STA8abs Acc8:$src, texternalsym:$s)>;
|
|
// Byte store via a constant-int address (MMIO-style: `*(volatile uint8 *)0x70
|
|
// = v`). Without this, the i8 store falls through to STBptr ([dp],Y), which
|
|
// is 16 B / 30 cyc. We route through STA8long (sta abs-long, opcode 0x8F)
|
|
// rather than STA8abs because a const-int address is a physical 24-bit
|
|
// pointer and must NOT track DBR — under the GS/OS Loader the data bank is
|
|
// non-zero, so DBR-relative `sta abs` would land in the wrong bank.
|
|
// `timm` matches TargetConstantSDNode — under p:32:16, a pre-isel combine
|
|
// in W65816TargetLowering::PerformDAGCombine converts the ConstantSDNode
|
|
// ptr to a TargetConstantSDNode so it survives LowerI32Constant intact.
|
|
def : Pat<(store Acc8:$src, (iPTR imm:$addr)),
|
|
(STA8long Acc8:$src, (i32 imm:$addr))>;
|
|
def : Pat<(store Acc8:$src, (iPTR timm:$addr)),
|
|
(STA8long Acc8:$src, (i32 timm:$addr))>;
|
|
def : Pat<(truncstorei8 Acc16:$src, (iPTR imm:$addr)),
|
|
(STA8long (COPY_TO_REGCLASS Acc16:$src, Acc8), (i32 imm:$addr))>;
|
|
def : Pat<(truncstorei8 Acc16:$src, (iPTR timm:$addr)),
|
|
(STA8long (COPY_TO_REGCLASS Acc16:$src, Acc8), (i32 timm:$addr))>;
|
|
|
|
// Load 16 bits via a 16-bit absolute address. Currently only matches
|
|
// loads from a Wrapper(global); direct constant-pointer loads come once
|
|
// we add an addressing-mode complex pattern.
|
|
let mayLoad = 1, hasSideEffects = 0, mayStore = 0 in {
|
|
def LDAabs : W65816Pseudo<(outs Acc16:$dst), (ins i32imm:$addr),
|
|
"# LDAabs $dst, $addr", []>;
|
|
}
|
|
def : Pat<(i16 (load (W65816Wrapper tglobaladdr:$g))),
|
|
(LDAabs tglobaladdr:$g)>;
|
|
def : Pat<(i16 (load (W65816Wrapper texternalsym:$s))),
|
|
(LDAabs texternalsym:$s)>;
|
|
// i16 const-int-address load: companion to the STAabs (iPTR imm) /
|
|
// (iPTR timm) store patterns at line ~350. `*(volatile uint16*)0x5000`
|
|
// → LDAabs (DBR-relative). The combine in W65816TargetLowering returns
|
|
// a TargetConstant for the Wide32-zero-hi-Constant unwrap.
|
|
def : Pat<(i16 (load (iPTR imm:$addr))),
|
|
(LDAabs (i32 imm:$addr))>;
|
|
def : Pat<(i16 (load (iPTR timm:$addr))),
|
|
(LDAabs (i32 timm:$addr))>;
|
|
|
|
// Store 16 bits to a 16-bit absolute address.
|
|
let mayStore = 1, hasSideEffects = 0, mayLoad = 0 in {
|
|
def STAabs : W65816Pseudo<(outs), (ins Acc16:$src, i32imm:$addr),
|
|
"# STAabs $src, $addr", []>;
|
|
}
|
|
def : Pat<(store Acc16:$src, (W65816Wrapper tglobaladdr:$g)),
|
|
(STAabs Acc16:$src, tglobaladdr:$g)>;
|
|
def : Pat<(store Acc16:$src, (W65816Wrapper texternalsym:$s)),
|
|
(STAabs Acc16:$src, texternalsym:$s)>;
|
|
// Store via a constant-int address (`*(volatile uint16 *)0x5000 = v`).
|
|
// Lowers to STAabs (0x8D, DBR-relative) — DELIBERATELY asymmetric with the
|
|
// i8 case (STA8long, bank-explicit). Rationale: most 65816 MMIO is i8
|
|
// (e.g. `*(uint8*)0xC035`) where users expect bank=0 always. Const-int
|
|
// i16 is mostly used as a DBR-relative idiom in test code that switches
|
|
// DBR and verifies a write lands in the new bank. Switching i16 to
|
|
// bank-explicit broke 10+ existing tests with no real-world i16 MMIO
|
|
// use case to justify it. Users who need bank-explicit i16 should
|
|
// declare a global or split into two i8 stores.
|
|
def : Pat<(store Acc16:$src, (iPTR imm:$addr)),
|
|
(STAabs Acc16:$src, (i32 imm:$addr))>;
|
|
// Under ptr32 the i16/i32 const-addr stores emerge with TargetConstant
|
|
// pointers (the PerformDAGCombine on STORE rewrites the ConstantSDNode
|
|
// into a TargetConstant to bypass LowerI32Constant's REG_SEQUENCE
|
|
// expansion). Match `timm` so STAabs fires.
|
|
def : Pat<(store Acc16:$src, (iPTR timm:$addr)),
|
|
(STAabs Acc16:$src, (i32 timm:$addr))>;
|
|
|
|
// 16-bit ADD: expands to CLC + ADC_Imm16. The 65816 ADC sums with the
|
|
// carry flag, so a clean add needs CLC first. Constraints tie the
|
|
// source and dest to A — there is only one Acc16 register so this is
|
|
// implicit, but stating it lets the register allocator coalesce
|
|
// without needing a COPY.
|
|
//
|
|
// Defs = [P] models the C-flag side-effect. Required so tablegen can
|
|
// connect this instruction to the SDNode `addc` / `subc` (SDNPOutGlue),
|
|
// which is what the type legalizer emits as the lo half of a multi-
|
|
// precision add/sub when ADDC/SUBC is Legal (see W65816ISelLowering ctor).
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in {
|
|
def ADCi16imm : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i16imm:$imm),
|
|
"# ADCi16imm $dst, $src, $imm",
|
|
[(set Acc16:$dst,
|
|
(add Acc16:$src, imm:$imm))]>;
|
|
def SBCi16imm : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i16imm:$imm),
|
|
"# SBCi16imm $dst, $src, $imm",
|
|
[(set Acc16:$dst,
|
|
(sub Acc16:$src, imm:$imm))]>;
|
|
}
|
|
|
|
// addc/subc: same as add/sub on this target (CLC then ADC, SEC then SBC),
|
|
// but the SDNode produces a Glue carrying the post-op carry into a
|
|
// subsequent adde/sube. Tablegen wires the Glue to the P register
|
|
// because the instruction has Defs = [P].
|
|
def : Pat<(addc Acc16:$src, imm:$imm),
|
|
(ADCi16imm Acc16:$src, imm:$imm)>;
|
|
def : Pat<(subc Acc16:$src, imm:$imm),
|
|
(SBCi16imm Acc16:$src, imm:$imm)>;
|
|
|
|
// ADC/SBC from a 16-bit absolute address. Folds a load on the
|
|
// right-hand side of an add/sub into the carry-arithmetic op.
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 1, mayStore = 0, Defs = [P] in {
|
|
def ADCabs : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i32imm:$addr),
|
|
"# ADCabs $dst, $src, $addr", []>;
|
|
def SBCabs : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i32imm:$addr),
|
|
"# SBCabs $dst, $src, $addr", []>;
|
|
}
|
|
def : Pat<(add Acc16:$src,
|
|
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
|
|
(ADCabs Acc16:$src, tglobaladdr:$g)>;
|
|
def : Pat<(add Acc16:$src,
|
|
(i16 (load (W65816Wrapper texternalsym:$s)))),
|
|
(ADCabs Acc16:$src, texternalsym:$s)>;
|
|
def : Pat<(sub Acc16:$src,
|
|
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
|
|
(SBCabs Acc16:$src, tglobaladdr:$g)>;
|
|
def : Pat<(sub Acc16:$src,
|
|
(i16 (load (W65816Wrapper texternalsym:$s)))),
|
|
(SBCabs Acc16:$src, texternalsym:$s)>;
|
|
def : Pat<(addc Acc16:$src,
|
|
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
|
|
(ADCabs Acc16:$src, tglobaladdr:$g)>;
|
|
def : Pat<(addc Acc16:$src,
|
|
(i16 (load (W65816Wrapper texternalsym:$s)))),
|
|
(ADCabs Acc16:$src, texternalsym:$s)>;
|
|
def : Pat<(subc Acc16:$src,
|
|
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
|
|
(SBCabs Acc16:$src, tglobaladdr:$g)>;
|
|
def : Pat<(subc Acc16:$src,
|
|
(i16 (load (W65816Wrapper texternalsym:$s)))),
|
|
(SBCabs Acc16:$src, texternalsym:$s)>;
|
|
|
|
// adde/sube: the chained ADC/SBC for the hi half of a multi-precision
|
|
// add/sub. Reads the C flag from the previous addc/adde (Uses = [P]),
|
|
// produces a fresh carry/borrow (Defs = [P]). AsmPrinter expansion
|
|
// emits a bare ADC/SBC with no preceding CLC/SEC; eliminateFrameIndex
|
|
// for ADCEfi/SBCEfi skips the carry-prefix step that the standalone
|
|
// ADCfi/SBCfi rely on.
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0,
|
|
Uses = [P], Defs = [P] in {
|
|
def ADCEi16imm : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i16imm:$imm),
|
|
"# ADCEi16imm $dst, $src, $imm",
|
|
[(set Acc16:$dst,
|
|
(adde Acc16:$src, imm:$imm))]>;
|
|
def SBCEi16imm : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i16imm:$imm),
|
|
"# SBCEi16imm $dst, $src, $imm",
|
|
[(set Acc16:$dst,
|
|
(sube Acc16:$src, imm:$imm))]>;
|
|
}
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 1, mayStore = 0,
|
|
Uses = [P], Defs = [P] in {
|
|
def ADCEabs : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i32imm:$addr),
|
|
"# ADCEabs $dst, $src, $addr", []>;
|
|
def SBCEabs : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i32imm:$addr),
|
|
"# SBCEabs $dst, $src, $addr", []>;
|
|
}
|
|
def : Pat<(adde Acc16:$src,
|
|
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
|
|
(ADCEabs Acc16:$src, tglobaladdr:$g)>;
|
|
def : Pat<(adde Acc16:$src,
|
|
(i16 (load (W65816Wrapper texternalsym:$s)))),
|
|
(ADCEabs Acc16:$src, texternalsym:$s)>;
|
|
def : Pat<(sube Acc16:$src,
|
|
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
|
|
(SBCEabs Acc16:$src, tglobaladdr:$g)>;
|
|
def : Pat<(sube Acc16:$src,
|
|
(i16 (load (W65816Wrapper texternalsym:$s)))),
|
|
(SBCEabs Acc16:$src, texternalsym:$s)>;
|
|
|
|
// (add Acc16, Acc16) — same value added to itself, equivalent to a 1-bit
|
|
// left shift. Pattern needs a tied input so the result lands in A.
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
|
|
def ASLA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# ASLA16 $dst, $src",
|
|
[(set Acc16:$dst, (add Acc16:$src, Acc16:$src))]>;
|
|
}
|
|
// 1-bit shift left of the accumulator: shl x, 1.
|
|
def : Pat<(shl Acc16:$src, (i16 1)), (ASLA16 Acc16:$src)>;
|
|
|
|
// 1-bit logical shift right. Pseudo because the MC LSR_A has no
|
|
// virtual output operand.
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
|
|
def LSRA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# LSRA16 $dst, $src",
|
|
[(set Acc16:$dst, (srl Acc16:$src, (i16 1)))]>;
|
|
def ASLA8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src),
|
|
"# ASLA8 $dst, $src",
|
|
[(set Acc8:$dst, (shl Acc8:$src, (i8 1)))]>;
|
|
def LSRA8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src),
|
|
"# LSRA8 $dst, $src",
|
|
[(set Acc8:$dst, (srl Acc8:$src, (i8 1)))]>;
|
|
// Signed shift right by 1: copy A's high bit into carry, then ROR
|
|
// to bring it back into A's high bit while halving the rest. The
|
|
// AsmPrinter expands this to the 4-instruction PHA;ASL;PLA;ROR
|
|
// sequence.
|
|
def ASRA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# ASRA16 $dst, $src",
|
|
[(set Acc16:$dst, (sra Acc16:$src, (i16 1)))]> {
|
|
let Constraints = "$src = $dst";
|
|
}
|
|
}
|
|
|
|
// Shifts by small constants — unroll into 2-4 single-bit shifts.
|
|
// Anything beyond 4 bits would benefit from a loop or a XBA-and-mask
|
|
// trick; left for a future peephole.
|
|
def : Pat<(shl Acc16:$src, (i16 2)), (ASLA16 (ASLA16 Acc16:$src))>;
|
|
def : Pat<(shl Acc16:$src, (i16 3)),
|
|
(ASLA16 (ASLA16 (ASLA16 Acc16:$src)))>;
|
|
def : Pat<(shl Acc16:$src, (i16 4)),
|
|
(ASLA16 (ASLA16 (ASLA16 (ASLA16 Acc16:$src))))>;
|
|
|
|
def : Pat<(srl Acc16:$src, (i16 2)), (LSRA16 (LSRA16 Acc16:$src))>;
|
|
def : Pat<(srl Acc16:$src, (i16 3)),
|
|
(LSRA16 (LSRA16 (LSRA16 Acc16:$src)))>;
|
|
def : Pat<(srl Acc16:$src, (i16 4)),
|
|
(LSRA16 (LSRA16 (LSRA16 (LSRA16 Acc16:$src))))>;
|
|
|
|
// Shift counts 5..7 — chained single-bit shifts. Earlier these were
|
|
// withheld because the DAG combiner narrowed `(trunc (shl (zext X), N))`
|
|
// back to `(shl X, N)` on i8 and re-entered LowerShift in a loop; the
|
|
// `isTypeDesirableForOp(SHL/SRL/SRA, i8) -> false` override in
|
|
// W65816TargetLowering now blocks that combine, so the patterns are
|
|
// safe. Cheaper than __ashlhi3/__lshrhi3 for these counts.
|
|
def : Pat<(shl Acc16:$src, (i16 5)),
|
|
(ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 Acc16:$src)))))>;
|
|
def : Pat<(shl Acc16:$src, (i16 6)),
|
|
(ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 Acc16:$src))))))>;
|
|
def : Pat<(shl Acc16:$src, (i16 7)),
|
|
(ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 Acc16:$src)))))))>;
|
|
def : Pat<(srl Acc16:$src, (i16 5)),
|
|
(LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 Acc16:$src)))))>;
|
|
def : Pat<(srl Acc16:$src, (i16 6)),
|
|
(LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 Acc16:$src))))))>;
|
|
def : Pat<(srl Acc16:$src, (i16 7)),
|
|
(LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 Acc16:$src)))))))>;
|
|
|
|
// Increment / decrement of A by 1. Match `(add x, 1)` and `(add x, -1)`
|
|
// (LLVM canonicalises sub-by-1 to add-by-(-1)).
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
|
|
def INA_PSEUDO : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# INA_PSEUDO $dst, $src",
|
|
[(set Acc16:$dst, (add Acc16:$src, (i16 1)))]>;
|
|
def DEA_PSEUDO : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# DEA_PSEUDO $dst, $src",
|
|
[(set Acc16:$dst, (add Acc16:$src, (i16 -1)))]>;
|
|
def INA_PSEUDO8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src),
|
|
"# INA_PSEUDO8 $dst, $src",
|
|
[(set Acc8:$dst, (add Acc8:$src, (i8 1)))]>;
|
|
def DEA_PSEUDO8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src),
|
|
"# DEA_PSEUDO8 $dst, $src",
|
|
[(set Acc8:$dst, (add Acc8:$src, (i8 -1)))]>;
|
|
}
|
|
|
|
// Two's-complement negation: `0 - x` → `EOR #$FFFF; INC A` (i.e.
|
|
// bitwise-not then add 1). Catches (sub 0, x) which LLVM uses for
|
|
// `-x` and the `abs` intrinsic.
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
|
|
def NEGA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# NEGA16 $dst, $src",
|
|
[(set Acc16:$dst, (sub (i16 0), Acc16:$src))]>;
|
|
// i8 mirror. Without this the codegen falls into the generic SBC
|
|
// path: `LDA #0; SEC; SBC slot` plus 8-bit M-mode prologue and
|
|
// PHA/PLA bracketing — ~12 insns for `-x`. NEGA8 expands to
|
|
// `EOR #$FF; INA` (2 insns in 8-bit M).
|
|
def NEGA8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src),
|
|
"# NEGA8 $dst, $src",
|
|
[(set Acc8:$dst, (sub (i8 0), Acc8:$src))]>;
|
|
}
|
|
|
|
// Multi-precision negation: lo + hi halves of `-x` where x is i32.
|
|
// LLVM splits `0 - x` into `(subc 0, x_lo)` and `(sube 0, x_hi)`.
|
|
// We implement both via the ADD chain `~x + carry` since INC doesn't
|
|
// touch C; the bit pattern of C from `~x + 1` matches what `subc 0, x`
|
|
// would set (C=1 iff x was 0, i.e. no borrow).
|
|
// NEGC16 matches subc → "EOR #$FFFF; CLC; ADC #1" (5 bytes)
|
|
// NEGE16 matches sube → "EOR #$FFFF; ADC #0" (4 bytes, uses C-in)
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in {
|
|
def NEGC16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# NEGC16 $dst, $src",
|
|
[(set Acc16:$dst, (subc (i16 0), Acc16:$src))]>;
|
|
}
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0,
|
|
Uses = [P], Defs = [P] in {
|
|
def NEGE16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# NEGE16 $dst, $src",
|
|
[(set Acc16:$dst, (sube (i16 0), Acc16:$src))]>;
|
|
}
|
|
|
|
// Bitwise NOT pattern moved below EORi16imm definition.
|
|
|
|
// 16-bit bitwise ops: AND / OR / XOR against an immediate or memory
|
|
// operand. Same shape as ADCi16imm / ADCabs minus the carry prefix
|
|
// (these don't read/write the carry flag).
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
|
|
def ANDi16imm : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i16imm:$imm),
|
|
"# ANDi16imm $dst, $src, $imm",
|
|
[(set Acc16:$dst,
|
|
(and Acc16:$src, imm:$imm))]>;
|
|
def ORAi16imm : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i16imm:$imm),
|
|
"# ORAi16imm $dst, $src, $imm",
|
|
[(set Acc16:$dst,
|
|
(or Acc16:$src, imm:$imm))]>;
|
|
def EORi16imm : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i16imm:$imm),
|
|
"# EORi16imm $dst, $src, $imm",
|
|
[(set Acc16:$dst,
|
|
(xor Acc16:$src, imm:$imm))]>;
|
|
}
|
|
|
|
// Bank-explicit i8 loads from a constant-int address (`*(uint8*)0xC035`).
|
|
// The default lowering goes through LDAptr ([dp],Y indirect-long) — 22 B /
|
|
// 35 cyc — because LDAptr's pattern `(load Wide16:$ptr)` matches once the
|
|
// matcher materialises the const into Wide16. These patterns shortcut to
|
|
// LDA8long (sta long, 0xAF, 6 B / 10 cyc) and run BEFORE that materialisation
|
|
// because the explicit imm leaf has higher AddedComplexity. Only the
|
|
// `(zextloadi8 imm)` form actually appears in real IR (i8 loads are
|
|
// always i16-extended at SDAG time on this 16-bit target); kept the
|
|
// raw `(load imm)` form too for symmetry with the store side.
|
|
let AddedComplexity = 50 in {
|
|
def : Pat<(i8 (load (iPTR imm:$addr))),
|
|
(LDA8long (i32 imm:$addr))>;
|
|
def : Pat<(i8 (load (iPTR timm:$addr))),
|
|
(LDA8long (i32 timm:$addr))>;
|
|
def : Pat<(i16 (zextloadi8 (iPTR imm:$addr))),
|
|
(ANDi16imm (COPY_TO_REGCLASS (LDA8long (i32 imm:$addr)), Acc16),
|
|
0xFF)>;
|
|
def : Pat<(i16 (zextloadi8 (iPTR timm:$addr))),
|
|
(ANDi16imm (COPY_TO_REGCLASS (LDA8long (i32 timm:$addr)), Acc16),
|
|
0xFF)>;
|
|
def : Pat<(i16 (extloadi8 (iPTR imm:$addr))),
|
|
(COPY_TO_REGCLASS (LDA8long (i32 imm:$addr)), Acc16)>;
|
|
def : Pat<(i16 (extloadi8 (iPTR timm:$addr))),
|
|
(COPY_TO_REGCLASS (LDA8long (i32 timm:$addr)), Acc16)>;
|
|
}
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
|
|
def ANDabs : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i32imm:$addr),
|
|
"# ANDabs $dst, $src, $addr", []>;
|
|
def ORAabs : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i32imm:$addr),
|
|
"# ORAabs $dst, $src, $addr", []>;
|
|
def EORabs : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src, i32imm:$addr),
|
|
"# EORabs $dst, $src, $addr", []>;
|
|
}
|
|
def : Pat<(and Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))),
|
|
(ANDabs Acc16:$src, tglobaladdr:$g)>;
|
|
def : Pat<(or Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))),
|
|
(ORAabs Acc16:$src, tglobaladdr:$g)>;
|
|
def : Pat<(xor Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))),
|
|
(EORabs Acc16:$src, tglobaladdr:$g)>;
|
|
|
|
// Bitwise NOT: x ^ 0xFFFF. LLVM lowers `~x` and i1 inversion through
|
|
// this; emit a single EOR #$FFFF via the bitwise pseudo above.
|
|
def : Pat<(xor Acc16:$src, (i16 -1)),
|
|
(EORi16imm Acc16:$src, 0xFFFF)>;
|
|
|
|
// (srl x, 15): extract bit 15 to bit 0 (yields 0 or 1). The
|
|
// type-legalizer's SHL_PARTS expansion of `i32 << 1` needs this for
|
|
// the high-half "carry from low" slot, and routing it through the
|
|
// __lshrhi3 libcall costs ~10 bytes per i32 shift-by-1. Inline as
|
|
// `ASL A; LDA #0; ROL A` (3 bytes): ASL puts bit 15 into C and
|
|
// trashes A; LDA #0 doesn't touch C; ROL A folds C into bit 0.
|
|
//
|
|
// (shl x, 15): move bit 0 to bit 15 (yields 0 or 0x8000). Used by
|
|
// SRL_PARTS / SRA_PARTS expansion of `i32 >> 1` for the low-half
|
|
// "carry from hi" slot. Mirror sequence: `LSR A; LDA #0; ROR A`.
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in {
|
|
def SRL15A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# SRL15A $dst, $src",
|
|
[(set Acc16:$dst, (srl Acc16:$src, (i16 15)))]>;
|
|
def SHL15A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# SHL15A $dst, $src",
|
|
[(set Acc16:$dst, (shl Acc16:$src, (i16 15)))]>;
|
|
}
|
|
// (srl x, 8): high byte to low byte, zero high byte. XBA swaps the
|
|
// two bytes of A (in 16-bit M); AND #$00FF clears the new high byte.
|
|
// 4 bytes total — much shorter than the __lshrhi3 libcall path. Used
|
|
// by i32 shift-by-8 SHL_PARTS expansion for the cross-half slot.
|
|
//
|
|
// (shl x, 8): low byte to high byte, zero low byte. Mirror.
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
|
|
def SRL8A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# SRL8A $dst, $src",
|
|
[(set Acc16:$dst, (srl Acc16:$src, (i16 8)))]>;
|
|
def SHL8A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# SHL8A $dst, $src",
|
|
[(set Acc16:$dst, (shl Acc16:$src, (i16 8)))]>;
|
|
}
|
|
|
|
// Shift counts 9..14: SHL builds on SHL8A (XBA + low-byte mask) and chains
|
|
// 1..6 ASLs after it; SRL mirrors via SRL8A + LSRA chains. The
|
|
// isTypeDesirableForOp override prevents the i8-shift combine loop that
|
|
// kept these out of tablegen earlier.
|
|
def : Pat<(shl Acc16:$src, (i16 9)),
|
|
(ASLA16 (SHL8A Acc16:$src))>;
|
|
def : Pat<(shl Acc16:$src, (i16 10)),
|
|
(ASLA16 (ASLA16 (SHL8A Acc16:$src)))>;
|
|
def : Pat<(shl Acc16:$src, (i16 11)),
|
|
(ASLA16 (ASLA16 (ASLA16 (SHL8A Acc16:$src))))>;
|
|
def : Pat<(shl Acc16:$src, (i16 12)),
|
|
(ASLA16 (ASLA16 (ASLA16 (ASLA16 (SHL8A Acc16:$src)))))>;
|
|
def : Pat<(shl Acc16:$src, (i16 13)),
|
|
(ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (SHL8A Acc16:$src))))))>;
|
|
def : Pat<(shl Acc16:$src, (i16 14)),
|
|
(ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (SHL8A Acc16:$src)))))))>;
|
|
def : Pat<(srl Acc16:$src, (i16 9)),
|
|
(LSRA16 (SRL8A Acc16:$src))>;
|
|
def : Pat<(srl Acc16:$src, (i16 10)),
|
|
(LSRA16 (LSRA16 (SRL8A Acc16:$src)))>;
|
|
def : Pat<(srl Acc16:$src, (i16 11)),
|
|
(LSRA16 (LSRA16 (LSRA16 (SRL8A Acc16:$src))))>;
|
|
def : Pat<(srl Acc16:$src, (i16 12)),
|
|
(LSRA16 (LSRA16 (LSRA16 (LSRA16 (SRL8A Acc16:$src)))))>;
|
|
def : Pat<(srl Acc16:$src, (i16 13)),
|
|
(LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (SRL8A Acc16:$src))))))>;
|
|
def : Pat<(srl Acc16:$src, (i16 14)),
|
|
(LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (SRL8A Acc16:$src)))))))>;
|
|
// (sra x, 15): sign-fill — yields $0000 if x is non-negative, $FFFF
|
|
// if negative. Used by i32 sext-from-i16 type-legalization for the
|
|
// hi half (avoids the __ashrhi3 libcall path). Sequence:
|
|
// `ASL A; LDA #0; SBC #0; EOR #-1` (when our SBCi16imm uses SEC + SBC,
|
|
// LDA #0; SBC #0 produces $FFFF if C=0, $0000 if C=1; EOR #-1 flips).
|
|
// Actually simpler since SBC sets carry differently: see AsmPrinter
|
|
// expansion for the exact 5-byte sequence.
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in {
|
|
def SRA15A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# SRA15A $dst, $src",
|
|
[(set Acc16:$dst, (sra Acc16:$src, (i16 15)))]>;
|
|
}
|
|
|
|
// sext_inreg from i1: broadcast bit 0 to all bits. LLVM emits this
|
|
// for `(c & 1) ? -1 : 0` patterns (e.g. CRC inner loops). The result
|
|
// is `-(x & 1)` — 0 if bit 0 was clear, 0xFFFF if set. Mask to bit
|
|
// 0 then two's-complement-negate. Three pseudos = ~7 bytes.
|
|
def : Pat<(sext_inreg Acc16:$src, i1),
|
|
(NEGA16 (ANDi16imm Acc16:$src, 1))>;
|
|
|
|
// sext_inreg from i8: branchless `((x & 0xFF) ^ 0x80) - 0x80` trick
|
|
// (same sequence LowerSignExtend uses for ISD::SIGN_EXTEND i8->i16).
|
|
// LLVM emits this when expanding a sextload-i16-from-i8 (we set
|
|
// SEXTLOAD i8 to Expand in the lowering ctor) and for explicit
|
|
// `(int)(signed char)` casts.
|
|
def : Pat<(sext_inreg Acc16:$src, i8),
|
|
(SBCi16imm (EORi16imm
|
|
(ANDi16imm Acc16:$src, 0x00FF), 0x0080),
|
|
0x0080)>;
|
|
|
|
// Frame-index loads/stores: take a FrameIndex + offset (packed into a
|
|
// single MIOperandInfo) and expand (in eliminateFrameIndex) into an
|
|
// LDA / STA d,S with the offset baked in. Used by LowerFormalArguments
|
|
// to read stack-passed arguments and by spill/reload via
|
|
// storeRegToStackSlot.
|
|
def memfi : Operand<i16> {
|
|
let MIOperandInfo = (ops i32imm, i32imm);
|
|
let PrintMethod = "printFrameMem";
|
|
}
|
|
|
|
// LDAfi is rematerializable when the FI is a fixed (immutable) arg
|
|
// slot — see W65816InstrInfo::isReMaterializableImpl. Without this,
|
|
// greedy regalloc spills every arg load to a fresh local slot then
|
|
// reloads from there, ballooning every i32-arg function by 4-6 insns.
|
|
let mayLoad = 1, hasSideEffects = 0, mayStore = 0,
|
|
isReMaterializable = 1 in {
|
|
def LDAfi : W65816Pseudo<(outs Acc16:$dst), (ins memfi:$addr),
|
|
"# LDAfi $dst, $addr", []>;
|
|
}
|
|
// STAfi accepts Wide16 src so greedy can park the value in IMGn instead
|
|
// of A. When src is in IMGn, eliminateFrameIndex prepends a LDA dp;
|
|
// hence Defs = [A] (the IMG case clobbers A).
|
|
let mayStore = 1, hasSideEffects = 0, mayLoad = 0, Defs = [A] in {
|
|
def STAfi : W65816Pseudo<(outs),
|
|
(ins Wide16:$src, memfi:$addr),
|
|
"# STAfi $src, $addr", []>;
|
|
}
|
|
// i8 truncating store to a FrameIndex slot. eliminateFrameIndex wraps
|
|
// it in SEP #$20 / STA d,S / REP #$20 so only one byte is written.
|
|
// Without the wrap, a 16-bit STA writes the byte at slot+1 too, which
|
|
// corrupts the next stack slot (or return address for the last slot of
|
|
// an alloca). Defs P because SEP/REP modify the M bit.
|
|
let mayStore = 1, hasSideEffects = 1, mayLoad = 0, Defs = [P] in {
|
|
def STA8fi : W65816Pseudo<(outs),
|
|
(ins Acc16:$src, memfi:$addr),
|
|
"# STA8fi $src, $addr", []>;
|
|
}
|
|
|
|
// ComplexPattern bridging FrameIndex SDValues to memfi. See
|
|
// SelectFrameIndex in W65816ISelDAGToDAG.cpp.
|
|
def addr_fi : ComplexPattern<i16, 2, "SelectFrameIndex", [frameindex]>;
|
|
|
|
def : Pat<(i16 (load addr_fi:$addr)),
|
|
(LDAfi addr_fi:$addr)>;
|
|
def : Pat<(store Acc16:$src, addr_fi:$addr),
|
|
(STAfi Acc16:$src, addr_fi:$addr)>;
|
|
|
|
// i8 access to a FrameIndex slot. Loads read 2 bytes via 16-bit LDA
|
|
// — the high byte is harmless (extending loads mask or sign-extend it,
|
|
// narrowing loads narrow back to Acc8 / discard). Stores must write
|
|
// only one byte: i8 alloca arrays pack adjacent slots one byte apart,
|
|
// and a 16-bit STA at the last slot of the array would corrupt the
|
|
// return address. Truncating stores route through STA8fi which wraps
|
|
// the STA in SEP #$20 / REP #$20.
|
|
def : Pat<(i8 (load addr_fi:$addr)),
|
|
(COPY_TO_REGCLASS (LDAfi addr_fi:$addr), Acc8)>;
|
|
def : Pat<(i16 (zextloadi8 addr_fi:$addr)),
|
|
(ANDi16imm (LDAfi addr_fi:$addr), 0xFF)>;
|
|
def : Pat<(i16 (extloadi8 addr_fi:$addr)),
|
|
(LDAfi addr_fi:$addr)>;
|
|
def : Pat<(store Acc8:$src, addr_fi:$addr),
|
|
(STA8fi (COPY_TO_REGCLASS Acc8:$src, Acc16), addr_fi:$addr)>;
|
|
def : Pat<(truncstorei8 Acc16:$src, addr_fi:$addr),
|
|
(STA8fi Acc16:$src, addr_fi:$addr)>;
|
|
|
|
// Frame-index folding into ADC / SBC / AND / ORA / EOR / CMP. Same
|
|
// shape as the *abs variants but the second operand is a stack slot.
|
|
// ADCfi/SBCfi mark P as Def so they can match `addc`/`subc` (the lo
|
|
// half of a multi-precision split — see ADCi16imm comment above).
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 1, mayStore = 0 in {
|
|
let Defs = [P] in {
|
|
def ADCfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
|
|
"# ADCfi $dst, $src, $addr", []>;
|
|
def SBCfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
|
|
"# SBCfi $dst, $src, $addr", []>;
|
|
}
|
|
def ANDfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
|
|
"# ANDfi $dst, $src, $addr", []>;
|
|
def ORAfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
|
|
"# ORAfi $dst, $src, $addr", []>;
|
|
def EORfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
|
|
"# EORfi $dst, $src, $addr", []>;
|
|
}
|
|
// ADCEfi / SBCEfi: chained ADC/SBC, hi half of a multi-precision split.
|
|
// Read carry from previous addc/adde/subc/sube via Uses = [P].
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 1, mayStore = 0,
|
|
Uses = [P], Defs = [P] in {
|
|
def ADCEfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
|
|
"# ADCEfi $dst, $src, $addr", []>;
|
|
def SBCEfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr),
|
|
"# SBCEfi $dst, $src, $addr", []>;
|
|
}
|
|
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Defs = [P] in {
|
|
def CMPfi : W65816Pseudo<(outs), (ins Acc16:$lhs, memfi:$addr),
|
|
"# CMPfi $lhs, $addr", []>;
|
|
}
|
|
def : Pat<(add Acc16:$src, (i16 (load addr_fi:$addr))),
|
|
(ADCfi Acc16:$src, addr_fi:$addr)>;
|
|
def : Pat<(sub Acc16:$src, (i16 (load addr_fi:$addr))),
|
|
(SBCfi Acc16:$src, addr_fi:$addr)>;
|
|
def : Pat<(addc Acc16:$src, (i16 (load addr_fi:$addr))),
|
|
(ADCfi Acc16:$src, addr_fi:$addr)>;
|
|
def : Pat<(subc Acc16:$src, (i16 (load addr_fi:$addr))),
|
|
(SBCfi Acc16:$src, addr_fi:$addr)>;
|
|
def : Pat<(adde Acc16:$src, (i16 (load addr_fi:$addr))),
|
|
(ADCEfi Acc16:$src, addr_fi:$addr)>;
|
|
def : Pat<(sube Acc16:$src, (i16 (load addr_fi:$addr))),
|
|
(SBCEfi Acc16:$src, addr_fi:$addr)>;
|
|
def : Pat<(and Acc16:$src, (i16 (load addr_fi:$addr))),
|
|
(ANDfi Acc16:$src, addr_fi:$addr)>;
|
|
def : Pat<(or Acc16:$src, (i16 (load addr_fi:$addr))),
|
|
(ORAfi Acc16:$src, addr_fi:$addr)>;
|
|
def : Pat<(xor Acc16:$src, (i16 (load addr_fi:$addr))),
|
|
(EORfi Acc16:$src, addr_fi:$addr)>;
|
|
def : Pat<(W65816cmp Acc16:$lhs, (i16 (load addr_fi:$addr))),
|
|
(CMPfi Acc16:$lhs, addr_fi:$addr)>;
|
|
|
|
// Zero-extending byte load: 16-bit LDA reads two bytes (the byte we want
|
|
// plus the next byte), then mask the high byte with AND #$00FF. Reads
|
|
// one byte past the source — fine for standalone bytes in the bank-0
|
|
// data area but caller must ensure addr+1 is safe to read. A future
|
|
// optimisation could use SEP/REP transitions to do a true 8-bit load.
|
|
def : Pat<(i16 (zextloadi8 (W65816Wrapper tglobaladdr:$g))),
|
|
(ANDi16imm (LDAabs tglobaladdr:$g), 0xFF)>;
|
|
def : Pat<(i16 (zextloadi8 (W65816Wrapper texternalsym:$s))),
|
|
(ANDi16imm (LDAabs texternalsym:$s), 0xFF)>;
|
|
|
|
// CMP / branches. CMP sets the flags via the W65816cmp SDNode (glue
|
|
// out); the W65816brcc node consumes the glue and dispatches to the
|
|
// right Bxx instruction by condition code.
|
|
let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in {
|
|
def CMPi16imm : W65816Pseudo<(outs), (ins Acc16:$lhs, i16imm:$rhs),
|
|
"# CMPi16imm $lhs, $rhs",
|
|
[(W65816cmp Acc16:$lhs, (i16 imm:$rhs))]>;
|
|
def CMPi8imm : W65816Pseudo<(outs), (ins Acc8:$lhs, i8imm:$rhs),
|
|
"# CMPi8imm $lhs, $rhs",
|
|
[(W65816cmp Acc8:$lhs, (i8 imm:$rhs))]>;
|
|
}
|
|
let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Defs = [P] in {
|
|
def CMPabs : W65816Pseudo<(outs), (ins Acc16:$lhs, i32imm:$addr),
|
|
"# CMPabs $lhs, $addr", []>;
|
|
}
|
|
def : Pat<(W65816cmp Acc16:$lhs,
|
|
(i16 (load (W65816Wrapper tglobaladdr:$g)))),
|
|
(CMPabs Acc16:$lhs, tglobaladdr:$g)>;
|
|
def : Pat<(W65816cmp Acc16:$lhs,
|
|
(i16 (load (W65816Wrapper texternalsym:$s)))),
|
|
(CMPabs Acc16:$lhs, texternalsym:$s)>;
|
|
|
|
// 16-bit byte swap: XBA exchanges A.high and A.low. Pattern matches
|
|
// the (bswap Acc16) SDNode emitted by clang for byte-reverse loops.
|
|
let Constraints = "$src = $dst",
|
|
hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
|
|
def XBA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src),
|
|
"# XBA16 $dst, $src",
|
|
[(set Acc16:$dst, (bswap Acc16:$src))]>;
|
|
}
|
|
|
|
// Two-Acc16 binary ops. We have only one A register, so when both
|
|
// operands are computed values (neither a foldable load/imm/global) we
|
|
// must spill one to a stack slot. Each pseudo's custom inserter
|
|
// allocates a fresh slot and emits a STAfi+OPfi sequence; the
|
|
// register allocator handles the surrounding spills/reloads.
|
|
// hasSideEffects=1 tells the validator the pseudo may load/store
|
|
// without requiring a matching SDNode pattern (the stores are added
|
|
// by the inserter, not visible in the DAG pattern).
|
|
//
|
|
// Defs = [P] on ADD_RR/SUB_RR matches the C-flag side-effect of the
|
|
// underlying ADC/SBC, letting these pseudos serve `addc`/`subc` (the
|
|
// lo half of an i32 split) as well as plain `add`/`sub`.
|
|
let usesCustomInserter = 1, hasSideEffects = 1 in {
|
|
let Defs = [P] in {
|
|
def ADD_RR : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src1, Acc16:$src2),
|
|
"# ADD_RR $dst, $src1, $src2",
|
|
[(set Acc16:$dst,
|
|
(add Acc16:$src1, Acc16:$src2))]>;
|
|
def SUB_RR : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src1, Acc16:$src2),
|
|
"# SUB_RR $dst, $src1, $src2",
|
|
[(set Acc16:$dst,
|
|
(sub Acc16:$src1, Acc16:$src2))]>;
|
|
}
|
|
def AND_RR : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src1, Acc16:$src2),
|
|
"# AND_RR $dst, $src1, $src2",
|
|
[(set Acc16:$dst,
|
|
(and Acc16:$src1, Acc16:$src2))]>;
|
|
def ORA_RR : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src1, Acc16:$src2),
|
|
"# ORA_RR $dst, $src1, $src2",
|
|
[(set Acc16:$dst,
|
|
(or Acc16:$src1, Acc16:$src2))]>;
|
|
def EOR_RR : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src1, Acc16:$src2),
|
|
"# EOR_RR $dst, $src1, $src2",
|
|
[(set Acc16:$dst,
|
|
(xor Acc16:$src1, Acc16:$src2))]>;
|
|
}
|
|
def : Pat<(addc Acc16:$src1, Acc16:$src2),
|
|
(ADD_RR Acc16:$src1, Acc16:$src2)>;
|
|
def : Pat<(subc Acc16:$src1, Acc16:$src2),
|
|
(SUB_RR Acc16:$src1, Acc16:$src2)>;
|
|
|
|
// Chained-carry two-Acc16 add/sub for the hi half of i32 splits.
|
|
// Inserter mirrors ADD_RR (STAfi spill + ADCEfi load-fold) but emits
|
|
// the carry-chain pseudo so the previous addc/adde's C flag is
|
|
// consumed instead of overwritten by a CLC. Uses+Defs = [P]
|
|
// reflects the carry chain through the SDNode.
|
|
let usesCustomInserter = 1, hasSideEffects = 1,
|
|
Uses = [P], Defs = [P] in {
|
|
def ADDE_RR : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src1, Acc16:$src2),
|
|
"# ADDE_RR $dst, $src1, $src2",
|
|
[(set Acc16:$dst,
|
|
(adde Acc16:$src1, Acc16:$src2))]>;
|
|
def SUBE_RR : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Acc16:$src1, Acc16:$src2),
|
|
"# SUBE_RR $dst, $src1, $src2",
|
|
[(set Acc16:$dst,
|
|
(sube Acc16:$src1, Acc16:$src2))]>;
|
|
}
|
|
let usesCustomInserter = 1, hasSideEffects = 1, Defs = [P] in {
|
|
def CMP_RR : W65816Pseudo<(outs), (ins Acc16:$lhs, Acc16:$rhs),
|
|
"# CMP_RR $lhs, $rhs",
|
|
[(W65816cmp Acc16:$lhs, Acc16:$rhs)]>;
|
|
}
|
|
|
|
// Pointer dereference. The 65816 can't deref a register pointer
|
|
// directly — the indirect addressing modes all read the pointer from
|
|
// memory (DP or stack). These pseudos spill the Acc16 pointer to a
|
|
// fresh stack slot, set Y=0, and emit LDA/STA (slot,S),Y. Y gets
|
|
// clobbered as a side effect. hasSideEffects=1 covers the spill
|
|
// store the inserter adds, in addition to the deref.
|
|
// LDAptr / STAptr / STBptr lower to [dp],Y indirect-long via DP
|
|
// scratch $E0..$E2 (see W65816ISelLowering.cpp inserter). The
|
|
// inserter uses A and Y plus the DP scratch — X is not touched.
|
|
// Defs: Y (LDY #0) and P (STA/LDA set N/Z).
|
|
// $ptr is Wide16 (A or IMGn) so when bb.3-style pressure forces the
|
|
// pointer to share A with another live vreg, RA can park ptr in an
|
|
// IMGn DP slot. Acc16:$ptr was being silently coalesced with the
|
|
// loop-PHI accumulator: both wanted A at end of bb, and PHI-elim
|
|
// dropped the COPY needed to refresh A with the pointer at top of
|
|
// the loop. With Wide16, the COPY $a = ptr lowers to a real LDA $dp.
|
|
let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1,
|
|
Defs = [Y, P] in {
|
|
def LDAptr : W65816Pseudo<(outs Acc16:$dst), (ins Wide16:$ptr),
|
|
"# LDAptr $dst, $ptr",
|
|
[(set Acc16:$dst, (load Wide16:$ptr))]>;
|
|
// Variant that hardcodes bank=0 for the [dp],Y deref. Used by
|
|
// LowerVAARG: va_arg derefs a stack pointer, and the 65816 stack is
|
|
// always in bank 0 — but under GS/OS Loader our default $E2 source
|
|
// ($BE = our bank when LoaderBankDeref is on) would point reads at
|
|
// the wrong bank. This variant always emits `STZ $E2` so the deref
|
|
// is unambiguously bank-0. Caught by snprintf("%d", N) under Loader
|
|
// returning constant garbage instead of N's decimal — see
|
|
// feedback_loader_substantial_test.md.
|
|
def LDAptrBank0 : W65816Pseudo<(outs Acc16:$dst), (ins Wide16:$ptr),
|
|
"# LDAptrBank0 $dst, $ptr",
|
|
[(set Acc16:$dst, (W65816vaargLoad Wide16:$ptr))]>;
|
|
}
|
|
let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1,
|
|
Defs = [Y, P] in {
|
|
def STAptr : W65816Pseudo<(outs), (ins Acc16:$val, Wide16:$ptr),
|
|
"# STAptr $val, $ptr",
|
|
[(store Acc16:$val, Wide16:$ptr)]>;
|
|
}
|
|
|
|
// i8 zero-extending pointer load: do a 16-bit LDA (slot,s),y and mask
|
|
// the high byte. Reads one byte past the source — fine for byte-array
|
|
// iteration where the buffer is at least 2 bytes long. A future
|
|
// SEP/REP-aware mode pass could switch to a true 8-bit LDA.
|
|
def : Pat<(i16 (zextloadi8 Wide16:$ptr)),
|
|
(ANDi16imm (LDAptr Wide16:$ptr), 0xFF)>;
|
|
// Anyext byte load via pointer: consumer doesn't care about the high
|
|
// byte, so just LDA (16-bit). Same 1-byte-past-buffer caveat as
|
|
// zextloadi8.
|
|
def : Pat<(i16 (extloadi8 Wide16:$ptr)),
|
|
(LDAptr Wide16:$ptr)>;
|
|
// And the equivalent for absolute addresses (byte loads via global ptr).
|
|
// (Already covered for Wrapper(global) above; this catches the case
|
|
// where the ptr is materialised as a value.)
|
|
|
|
// Intermediate pseudos used by the LDAptr/STAptr inserters. Each takes
|
|
// a memfi describing the slot containing the pointer; eliminateFrameIndex
|
|
// resolves it to LDA_StackRelIndY / STA_StackRelIndY with the right d-byte.
|
|
// Y must hold 0 at the issue point (the inserter emits LDY #0 first).
|
|
let mayLoad = 1, hasSideEffects = 0, mayStore = 0, Uses = [Y] in {
|
|
def LDAfi_indY : W65816Pseudo<(outs Acc16:$dst), (ins memfi:$addr),
|
|
"# LDAfi_indY $dst, $addr", []>;
|
|
}
|
|
let mayStore = 1, hasSideEffects = 0, mayLoad = 0, Uses = [Y] in {
|
|
def STAfi_indY : W65816Pseudo<(outs), (ins Acc16:$src, memfi:$addr),
|
|
"# STAfi_indY $src, $addr", []>;
|
|
}
|
|
|
|
// i8 truncating store via Acc16 pointer. Same shape as STAptr but
|
|
// custom inserter wraps the actual STA in SEP/REP so the M-bit is 8
|
|
// across the store and only one byte is written. Without the wrap the
|
|
// 16-bit STA would clobber the byte at ptr+1. Two patterns: the
|
|
// natural truncstorei8 from an i16 value (common with arg promotion),
|
|
// and a true i8 store (Acc8) that arises from i8-typed IR.
|
|
let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1,
|
|
Defs = [Y, P] in {
|
|
def STBptr : W65816Pseudo<(outs), (ins Acc16:$val, Wide16:$ptr),
|
|
"# STBptr $val, $ptr",
|
|
[(truncstorei8 Acc16:$val, Wide16:$ptr)]>;
|
|
}
|
|
|
|
// Pointer access with constant offset. `(load (add ptr, $off))` and
|
|
// `(store val, (add ptr, $off))` come up for struct field access and
|
|
// array indexing with small constant offsets. Without these patterns,
|
|
// the offset becomes an explicit ADC #imm that has to spill A and
|
|
// recompute the pointer per access. With them, we just load Y with
|
|
// the offset in the inserter (Y is 16-bit so any i16 constant fits).
|
|
// LDAptrOff / STAptrOff / STBptrOff: same [dp],Y lowering as the
|
|
// no-offset variants but folds the offset into Y.
|
|
let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1,
|
|
Defs = [Y, P] in {
|
|
def LDAptrOff : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Wide16:$ptr, i16imm:$off),
|
|
"# LDAptrOff $dst, $ptr, $off", []>;
|
|
}
|
|
let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1,
|
|
Defs = [Y, P] in {
|
|
def STAptrOff : W65816Pseudo<(outs),
|
|
(ins Acc16:$val, Wide16:$ptr, i16imm:$off),
|
|
"# STAptrOff $val, $ptr, $off", []>;
|
|
def STBptrOff : W65816Pseudo<(outs),
|
|
(ins Acc16:$val, Wide16:$ptr, i16imm:$off),
|
|
"# STBptrOff $val, $ptr, $off", []>;
|
|
}
|
|
def : Pat<(i16 (load (add Wide16:$ptr, (i16 imm:$off)))),
|
|
(LDAptrOff Wide16:$ptr, imm:$off)>;
|
|
def : Pat<(store Acc16:$val, (add Wide16:$ptr, (i16 imm:$off))),
|
|
(STAptrOff Acc16:$val, Wide16:$ptr, imm:$off)>;
|
|
def : Pat<(truncstorei8 Acc16:$val, (add Wide16:$ptr, (i16 imm:$off))),
|
|
(STBptrOff Acc16:$val, Wide16:$ptr, imm:$off)>;
|
|
def : Pat<(store Acc8:$val, (add Wide16:$ptr, (i16 imm:$off))),
|
|
(STBptrOff (COPY_TO_REGCLASS Acc8:$val, Acc16),
|
|
Wide16:$ptr, imm:$off)>;
|
|
def : Pat<(store Acc8:$val, Wide16:$ptr),
|
|
(STBptr (COPY_TO_REGCLASS Acc8:$val, Acc16), Wide16:$ptr)>;
|
|
|
|
// ---------------------------------------------------------------------
|
|
// ptr32 deref pseudos. Same shape and inserter as LDAptr/STAptr/STBptr,
|
|
// but the pointer is a Wide32 (i32) value: sub_lo carries the low 16
|
|
// bits of the address, sub_hi carries the bank byte in its low half.
|
|
// Inserter stages the low 16 bits at $E0..$E1 and the bank byte at $E2,
|
|
// then emits LDA/STA [dp],Y just like the i16 path — but with a
|
|
// pointer-derived bank instead of a forced 0.
|
|
//
|
|
// Dead unless ptr32 mode is active (LowerLoad/LowerStore only emit
|
|
// W65816ldPtr/stPtr/stbPtr when the address is i32).
|
|
// ---------------------------------------------------------------------
|
|
let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1,
|
|
Defs = [Y, P] in {
|
|
def LDAptr32 : W65816Pseudo<(outs Acc16:$dst), (ins AnyWide32:$ptr),
|
|
"# LDAptr32 $dst, $ptr",
|
|
[(set Acc16:$dst, (W65816ldPtr AnyWide32:$ptr))]>;
|
|
}
|
|
let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1,
|
|
Defs = [Y, P] in {
|
|
def STAptr32 : W65816Pseudo<(outs), (ins Acc16:$val, AnyWide32:$ptr),
|
|
"# STAptr32 $val, $ptr",
|
|
[(W65816stPtr Acc16:$val, AnyWide32:$ptr)]>;
|
|
def STBptr32 : W65816Pseudo<(outs), (ins Acc16:$val, AnyWide32:$ptr),
|
|
"# STBptr32 $val, $ptr",
|
|
[(W65816stbPtr Acc16:$val, AnyWide32:$ptr)]>;
|
|
}
|
|
let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1,
|
|
Defs = [Y, P] in {
|
|
def LDAptr32Off : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins AnyWide32:$ptr, i16imm:$off),
|
|
"# LDAptr32Off $dst, $ptr, $off", []>;
|
|
}
|
|
let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1,
|
|
Defs = [Y, P] in {
|
|
def STAptr32Off : W65816Pseudo<(outs),
|
|
(ins Acc16:$val, AnyWide32:$ptr, i16imm:$off),
|
|
"# STAptr32Off $val, $ptr, $off", []>;
|
|
def STBptr32Off : W65816Pseudo<(outs),
|
|
(ins Acc16:$val, AnyWide32:$ptr, i16imm:$off),
|
|
"# STBptr32Off $val, $ptr, $off", []>;
|
|
}
|
|
|
|
// Direct ptr32 load/store patterns over generic ISD::LOAD / ISD::STORE
|
|
// when the address is an i32 (AnyWide32) reg. These are unreachable
|
|
// while i32 is not a legal type (ptr16 mode). When ptr32 mode is
|
|
// activated they fire instead of the i16-pointer LDAptr / STAptr.
|
|
def : Pat<(i16 (load AnyWide32:$ptr)),
|
|
(LDAptr32 AnyWide32:$ptr)>;
|
|
def : Pat<(store Acc16:$val, AnyWide32:$ptr),
|
|
(STAptr32 Acc16:$val, AnyWide32:$ptr)>;
|
|
def : Pat<(truncstorei8 Acc16:$val, AnyWide32:$ptr),
|
|
(STBptr32 Acc16:$val, AnyWide32:$ptr)>;
|
|
def : Pat<(i16 (zextloadi8 AnyWide32:$ptr)),
|
|
(ANDi16imm (LDAptr32 AnyWide32:$ptr), 0xFF)>;
|
|
def : Pat<(i16 (extloadi8 AnyWide32:$ptr)),
|
|
(LDAptr32 AnyWide32:$ptr)>;
|
|
def : Pat<(i8 (load AnyWide32:$ptr)),
|
|
(COPY_TO_REGCLASS (ANDi16imm (LDAptr32 AnyWide32:$ptr), 0xFF), Acc8)>;
|
|
def : Pat<(store Acc8:$val, AnyWide32:$ptr),
|
|
(STBptr32 (COPY_TO_REGCLASS Acc8:$val, Acc16), AnyWide32:$ptr)>;
|
|
|
|
// Off variants — folded constant-offset add patterns deferred until
|
|
// ptr32 mode is activated and we can profile real cases. The base
|
|
// LDAptr32/STAptr32 pseudos handle the general (add ptr, off) case
|
|
// correctly via a separate i32 ADD; the Off pseudos are an optional
|
|
// optimization for small constant offsets.
|
|
|
|
// Split-pair variants: same semantics as LDAptr32/STAptr32/STBptr32 but
|
|
// the ptr is two separate i16 register operands (lo + hi) instead of
|
|
// one Wide32 register pair. Used by the W65816LowerWide32 pre-RA pass
|
|
// to relieve register-pair allocation pressure: it walks REG_SEQUENCE
|
|
// + LDAptr32 chains, decomposes the Wide32 vregs into pairs of i16
|
|
// vregs, and rewrites the LDAptr32-family to take the two halves
|
|
// directly.
|
|
let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1,
|
|
Defs = [Y, P] in {
|
|
def LDAptr32S : W65816Pseudo<(outs Acc16:$dst),
|
|
(ins Wide16:$ptrLo, Wide16:$ptrHi),
|
|
"# LDAptr32S $dst, $ptrLo, $ptrHi", []>;
|
|
}
|
|
let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1,
|
|
Defs = [Y, P] in {
|
|
def STAptr32S : W65816Pseudo<(outs),
|
|
(ins Acc16:$val, Wide16:$ptrLo, Wide16:$ptrHi),
|
|
"# STAptr32S $val, $ptrLo, $ptrHi", []>;
|
|
def STBptr32S : W65816Pseudo<(outs),
|
|
(ins Acc16:$val, Wide16:$ptrLo, Wide16:$ptrHi),
|
|
"# STBptr32S $val, $ptrLo, $ptrHi", []>;
|
|
}
|
|
|
|
// i8 load via Acc16 pointer producing a true i8 (Acc8) result. Reuses
|
|
// the existing zextloadi8 16-bit-LDA-and-mask path: load 2 bytes, mask
|
|
// the high byte, then narrow to Acc8. COPY_TO_REGCLASS to Acc8 is a
|
|
// no-op at MC level (same physical A). Reads one byte past the source;
|
|
// fine for char-array iteration where the buffer is at least 2 bytes.
|
|
def : Pat<(i8 (load Wide16:$ptr)),
|
|
(COPY_TO_REGCLASS (ANDi16imm (LDAptr Wide16:$ptr), 0xFF), Acc8)>;
|
|
|
|
// Acc8-to-Acc16 type conversions. Both Acc8 and Acc16 alias physical A,
|
|
// so COPY_TO_REGCLASS is a no-op at MC level. ZEXT additionally masks
|
|
// the high byte (which holds B from before any prior SEP). ANYEXT
|
|
// leaves the high byte untouched since the consumer doesn't care.
|
|
def : Pat<(i16 (anyext Acc8:$src)),
|
|
(COPY_TO_REGCLASS Acc8:$src, Acc16)>;
|
|
def : Pat<(i16 (zext Acc8:$src)),
|
|
(ANDi16imm (COPY_TO_REGCLASS Acc8:$src, Acc16), 0xFF)>;
|
|
def : Pat<(i8 (trunc Acc16:$src)),
|
|
(COPY_TO_REGCLASS Acc16:$src, Acc8)>;
|
|
|
|
// Acc8 reg-reg arithmetic and bitwise ops, expanded through the Acc16
|
|
// _RR pseudos. Cheap to do because Acc8 and Acc16 alias the same
|
|
// physical A — COPY_TO_REGCLASS is a no-op. Only the low byte
|
|
// matters; the high byte gets unrelated bits but is discarded by the
|
|
// final narrow-back to Acc8. This lets an i8 expression that wasn't
|
|
// promoted by legalization (e.g. an i8 XOR feeding only an i8 store)
|
|
// reuse the spill-and-OPfi inserter without needing dedicated Acc8
|
|
// pseudos.
|
|
multiclass Acc8RR<SDNode op, Instruction ri> {
|
|
def : Pat<(i8 (op Acc8:$a, Acc8:$b)),
|
|
(COPY_TO_REGCLASS
|
|
(ri (COPY_TO_REGCLASS Acc8:$a, Acc16),
|
|
(COPY_TO_REGCLASS Acc8:$b, Acc16)),
|
|
Acc8)>;
|
|
}
|
|
defm : Acc8RR<add, ADD_RR>;
|
|
defm : Acc8RR<sub, SUB_RR>;
|
|
defm : Acc8RR<and, AND_RR>;
|
|
defm : Acc8RR<or, ORA_RR>;
|
|
defm : Acc8RR<xor, EOR_RR>;
|
|
|
|
// (memory inc/dec patterns moved below INC_Abs/DEC_Abs defs.)
|
|
|
|
// (Branch patterns moved below the Real Instructions section since
|
|
// they reference instruction defs.)
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Real Instructions
|
|
//
|
|
// Opcodes taken from the WDC W65C816S data sheet. Instructions whose size
|
|
// depends on the M or X bits exist in two variants (Imm8 / Imm16) and carry
|
|
// TSFlags bits indicating which processor mode they assume; the REP/SEP
|
|
// scheduling pass uses those to verify/insert mode transitions.
|
|
//
|
|
// Disassembler note: for every opcode that has both an _Imm8 and an _Imm16
|
|
// form (LDA/LDX/LDY/ADC/SBC/CMP/AND/ORA/EOR/BIT/CPX/CPY), the two forms share
|
|
// the same opcode byte but differ in operand width according to M/X mode.
|
|
// The scaffold disassembler only consults the default "W65816" decoder
|
|
// table, so we push the _Imm8 variants into namespaces "W65816MHigh" /
|
|
// "W65816XHigh". That keeps only one variant per opcode in the default
|
|
// table (the 3-byte _Imm16 form for M-dependent insns, and the 3-byte
|
|
// _Imm16 form for X-dependent insns), so `llvm-objdump -d` always decodes
|
|
// these as 16-bit immediates until the mode-aware decoder lands.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//---------------------------------------------------------------- CPU control
|
|
def NOP : InstImplied<0xEA, "nop"> {
|
|
let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0;
|
|
}
|
|
|
|
def REP : InstImm8<0xC2, "rep"> {
|
|
let hasSideEffects = 1;
|
|
let mayLoad = 0; let mayStore = 0;
|
|
}
|
|
def SEP : InstImm8<0xE2, "sep"> {
|
|
let hasSideEffects = 1;
|
|
let mayLoad = 0; let mayStore = 0;
|
|
}
|
|
|
|
def CLC : InstImplied<0x18, "clc"> { let mayLoad = 0; let mayStore = 0; }
|
|
def SEC : InstImplied<0x38, "sec"> { let mayLoad = 0; let mayStore = 0; }
|
|
def CLI : InstImplied<0x58, "cli"> { let mayLoad = 0; let mayStore = 0; }
|
|
def SEI : InstImplied<0x78, "sei"> { let mayLoad = 0; let mayStore = 0; }
|
|
def CLD : InstImplied<0xD8, "cld"> { let mayLoad = 0; let mayStore = 0; }
|
|
def SED : InstImplied<0xF8, "sed"> { let mayLoad = 0; let mayStore = 0; }
|
|
def CLV : InstImplied<0xB8, "clv"> { let mayLoad = 0; let mayStore = 0; }
|
|
|
|
def XCE : InstImplied<0xFB, "xce"> { let mayLoad = 0; let mayStore = 0; }
|
|
def XBA : InstImplied<0xEB, "xba"> { let mayLoad = 0; let mayStore = 0; }
|
|
|
|
def WAI : InstImplied<0xCB, "wai">;
|
|
def STP : InstImplied<0xDB, "stp">;
|
|
|
|
// WDM (William D Mensch) — reserved 2-byte NOP-equivalent. Useful as
|
|
// a debugger / emulator hook: MAME's apple2gs CPU traps on WDM and a
|
|
// Lua plugin can dispatch on the operand byte. CPU-side, it acts as
|
|
// a 2-byte NOP. Operand syntax mirrors MVN: `wdm $ab` (no `#`).
|
|
def WDM : InstDP<0x42, "wdm">;
|
|
|
|
// TRB / TSB — Test and Reset/Set memory Bits. Atomic bit clear/set
|
|
// on a byte (or 16-bit word per M flag) at the given DP or abs
|
|
// address. Z flag set per (M & A) where M is the memory operand.
|
|
// Useful for memory-mapped IO bit twiddling. No DP indexing form.
|
|
def TRB_DP : InstDP<0x14, "trb">;
|
|
def TRB_Abs : InstAbs<0x1C, "trb">;
|
|
def TSB_DP : InstDP<0x04, "tsb">;
|
|
def TSB_Abs : InstAbs<0x0C, "tsb">;
|
|
|
|
// PEI — Push Effective Indirect. Reads a 16-bit value from DP and
|
|
// pushes it. Useful for indirect parameter passing without going
|
|
// through A first.
|
|
def PEI_DP : InstDP<0xD4, "pei">;
|
|
|
|
//---------------------------------------------------------------- LDA (load A)
|
|
// The `_Imm8` forms of the mode-dependent load/arith/compare ops are
|
|
// marked isCodeGenOnly so the asm matcher never picks them — our
|
|
// AsmParser has no way to know the current M/X bits, so it always
|
|
// reaches for the _Imm16 form. Codegen can still select _Imm8
|
|
// explicitly once we have 8-bit patterns.
|
|
def LDA_Imm8 : InstImm8<0xA9, "lda"> { let MHigh = 1; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; let Defs = [A]; }
|
|
def LDA_Imm16 : InstImm16<0xA9, "lda"> { let MLow = 1; let Defs = [A]; }
|
|
def LDA_DP : InstDP<0xA5, "lda">;
|
|
def LDA_Abs : InstAbs<0xAD, "lda">;
|
|
def LDA_Long : InstAbsLong<0xAF, "lda">;
|
|
def LDA_DPX : InstDPX<0xB5, "lda">;
|
|
def LDA_AbsX : InstAbsX<0xBD, "lda">;
|
|
def LDA_AbsY : InstAbsY<0xB9, "lda">;
|
|
def LDA_DPInd : InstDPInd <0xB2, "lda">;
|
|
def LDA_DPIndY : InstDPIndY<0xB1, "lda">;
|
|
def LDA_DPIndX : InstDPIndX<0xA1, "lda">;
|
|
def LDA_DPIndLong : InstDPIndLong <0xA7, "lda"> { let Defs = [A]; }
|
|
// LDA [dp],Y: reads Y to compute the indexed address, defines A.
|
|
// Without these, regalloc thought A was unaffected by the load and
|
|
// dead-code-eliminated COPYs that were supposed to materialise the
|
|
// next pointer in A — silent miscompile in mySwap-style helpers.
|
|
def LDA_DPIndLongY : InstDPIndLongY<0xB7, "lda"> { let Defs = [A]; let Uses = [Y]; }
|
|
def LDA_LongX : InstAbsLongX<0xBF, "lda">;
|
|
|
|
//---------------------------------------------------------------- STA (store A)
|
|
def STA_DP : InstDP<0x85, "sta">;
|
|
def STA_Abs : InstAbs<0x8D, "sta">;
|
|
def STA_Long : InstAbsLong<0x8F, "sta">;
|
|
def STA_DPX : InstDPX<0x95, "sta">;
|
|
def STA_AbsX : InstAbsX<0x9D, "sta">;
|
|
def STA_AbsY : InstAbsY<0x99, "sta">;
|
|
def STA_DPInd : InstDPInd <0x92, "sta">;
|
|
def STA_DPIndY : InstDPIndY<0x91, "sta">;
|
|
def STA_DPIndX : InstDPIndX<0x81, "sta">;
|
|
def STA_DPIndLong : InstDPIndLong <0x87, "sta"> { let Uses = [A]; }
|
|
// STA [dp],Y: reads A (the value to store) and Y (the index). Mark
|
|
// both so regalloc keeps A's value live across this instruction.
|
|
def STA_DPIndLongY : InstDPIndLongY<0x97, "sta"> { let Uses = [A, Y]; }
|
|
def STA_LongX : InstAbsLongX<0x9F, "sta">;
|
|
|
|
//---------------------------------------------------------------- LDX (load X)
|
|
def LDX_Imm8 : InstImm8<0xA2, "ldx"> { let XHigh = 1; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; let Defs = [X]; }
|
|
def LDX_Imm16 : InstImm16<0xA2, "ldx"> { let XLow = 1; let Defs = [X]; }
|
|
def LDX_DP : InstDP<0xA6, "ldx">;
|
|
def LDX_Abs : InstAbs<0xAE, "ldx">;
|
|
def LDX_DPY : InstDPY<0xB6, "ldx">;
|
|
def LDX_AbsY : InstAbsY<0xBE, "ldx">;
|
|
|
|
//---------------------------------------------------------------- STX (store X)
|
|
def STX_DP : InstDP<0x86, "stx">;
|
|
def STX_Abs : InstAbs<0x8E, "stx">;
|
|
def STX_DPY : InstDPY<0x96, "stx">;
|
|
|
|
//---------------------------------------------------------------- LDY (load Y)
|
|
def LDY_Imm8 : InstImm8<0xA0, "ldy"> { let XHigh = 1; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; let Defs = [Y]; }
|
|
def LDY_Imm16 : InstImm16<0xA0, "ldy"> { let XLow = 1; let Defs = [Y]; }
|
|
def LDY_DP : InstDP<0xA4, "ldy">;
|
|
def LDY_Abs : InstAbs<0xAC, "ldy">;
|
|
def LDY_DPX : InstDPX<0xB4, "ldy">;
|
|
def LDY_AbsX : InstAbsX<0xBC, "ldy">;
|
|
|
|
//---------------------------------------------------------------- STY (store Y)
|
|
def STY_DP : InstDP<0x84, "sty">;
|
|
def STY_Abs : InstAbs<0x8C, "sty">;
|
|
def STY_DPX : InstDPX<0x94, "sty">;
|
|
|
|
//---------------------------------------------------------------- STZ (store zero)
|
|
// Width follows M flag — same as STA. Useful for zeroing DP scratch
|
|
// without burning A. Saves 1 byte vs `LDA #0; STA dp` per zero.
|
|
def STZ_DP : InstDP<0x64, "stz">;
|
|
def STZ_Abs : InstAbs<0x9C, "stz">;
|
|
def STZ_DPX : InstDPX<0x74, "stz">;
|
|
def STZ_AbsX : InstAbsX<0x9E, "stz">;
|
|
|
|
//------------------------------------------------------------------------- ADC
|
|
def ADC_Imm8 : InstImm8<0x69, "adc"> { let MHigh = 1; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
|
|
def ADC_Imm16 : InstImm16<0x69, "adc"> { let MLow = 1; }
|
|
def ADC_DP : InstDP<0x65, "adc">;
|
|
def ADC_Abs : InstAbs<0x6D, "adc">;
|
|
def ADC_DPX : InstDPX<0x75, "adc">;
|
|
def ADC_AbsX : InstAbsX<0x7D, "adc">;
|
|
def ADC_AbsY : InstAbsY<0x79, "adc">;
|
|
|
|
//------------------------------------------------------------------------- SBC
|
|
def SBC_Imm8 : InstImm8<0xE9, "sbc"> { let MHigh = 1; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
|
|
def SBC_Imm16 : InstImm16<0xE9, "sbc"> { let MLow = 1; }
|
|
def SBC_DP : InstDP<0xE5, "sbc">;
|
|
def SBC_Abs : InstAbs<0xED, "sbc">;
|
|
def SBC_DPX : InstDPX<0xF5, "sbc">;
|
|
def SBC_AbsX : InstAbsX<0xFD, "sbc">;
|
|
def SBC_AbsY : InstAbsY<0xF9, "sbc">;
|
|
|
|
//------------------------------------------------------------------------- CMP
|
|
def CMP_Imm8 : InstImm8<0xC9, "cmp"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
|
|
def CMP_Imm16 : InstImm16<0xC9, "cmp"> { let MLow = 1; let mayLoad=0; let mayStore=0; }
|
|
def CMP_DP : InstDP<0xC5, "cmp"> { let mayStore = 0; }
|
|
def CMP_Abs : InstAbs<0xCD, "cmp"> { let mayStore = 0; }
|
|
def CMP_DPX : InstDPX<0xD5, "cmp"> { let mayStore = 0; }
|
|
def CMP_AbsX : InstAbsX<0xDD, "cmp"> { let mayStore = 0; }
|
|
def CMP_AbsY : InstAbsY<0xD9, "cmp"> { let mayStore = 0; }
|
|
|
|
//---------------------------------------------------------------- CPX/CPY
|
|
def CPX_Imm8 : InstImm8<0xE0, "cpx"> { let XHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; }
|
|
def CPX_Imm16 : InstImm16<0xE0, "cpx"> { let XLow = 1; let mayLoad=0; let mayStore=0; }
|
|
def CPX_DP : InstDP<0xE4, "cpx"> { let mayStore = 0; }
|
|
def CPX_Abs : InstAbs<0xEC, "cpx"> { let mayStore = 0; }
|
|
def CPY_Imm8 : InstImm8<0xC0, "cpy"> { let XHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; }
|
|
def CPY_Imm16 : InstImm16<0xC0, "cpy"> { let XLow = 1; let mayLoad=0; let mayStore=0; }
|
|
def CPY_DP : InstDP<0xC4, "cpy"> { let mayStore = 0; }
|
|
def CPY_Abs : InstAbs<0xCC, "cpy"> { let mayStore = 0; }
|
|
|
|
//---------------------------------------------------------------- AND/ORA/EOR
|
|
def AND_Imm8 : InstImm8<0x29, "and"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
|
|
def AND_Imm16 : InstImm16<0x29, "and"> { let MLow = 1; let mayLoad=0; let mayStore=0; }
|
|
def AND_DP : InstDP<0x25, "and"> { let mayStore = 0; }
|
|
def AND_Abs : InstAbs<0x2D, "and"> { let mayStore = 0; }
|
|
|
|
def ORA_Imm8 : InstImm8<0x09, "ora"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
|
|
def ORA_Imm16 : InstImm16<0x09, "ora"> { let MLow = 1; let mayLoad=0; let mayStore=0; }
|
|
def ORA_DP : InstDP<0x05, "ora"> { let mayStore = 0; }
|
|
def ORA_Abs : InstAbs<0x0D, "ora"> { let mayStore = 0; }
|
|
|
|
def EOR_Imm8 : InstImm8<0x49, "eor"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
|
|
def EOR_Imm16 : InstImm16<0x49, "eor"> { let MLow = 1; let mayLoad=0; let mayStore=0; }
|
|
def EOR_DP : InstDP<0x45, "eor"> { let mayStore = 0; }
|
|
def EOR_Abs : InstAbs<0x4D, "eor"> { let mayStore = 0; }
|
|
|
|
def BIT_Imm8 : InstImm8<0x89, "bit"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; }
|
|
def BIT_Imm16 : InstImm16<0x89, "bit"> { let MLow = 1; let mayLoad=0; let mayStore=0; }
|
|
def BIT_DP : InstDP<0x24, "bit"> { let mayStore = 0; }
|
|
def BIT_Abs : InstAbs<0x2C, "bit"> { let mayStore = 0; }
|
|
|
|
//---------------------------------------------------------------- INC/DEC
|
|
def INA : InstImplied<0x1A, "inc a"> { let mayLoad = 0; let mayStore = 0; }
|
|
def DEA : InstImplied<0x3A, "dec a"> { let mayLoad = 0; let mayStore = 0; }
|
|
def INX : InstImplied<0xE8, "inx"> { let mayLoad = 0; let mayStore = 0; }
|
|
def DEX : InstImplied<0xCA, "dex"> { let mayLoad = 0; let mayStore = 0; }
|
|
def INY : InstImplied<0xC8, "iny"> { let mayLoad = 0; let mayStore = 0; }
|
|
def DEY : InstImplied<0x88, "dey"> { let mayLoad = 0; let mayStore = 0; }
|
|
|
|
def INC_DP : InstDP<0xE6, "inc">;
|
|
def INC_Abs : InstAbs<0xEE, "inc">;
|
|
def INC_DPX : InstDPX<0xF6, "inc">;
|
|
def INC_AbsX: InstAbsX<0xFE, "inc">;
|
|
|
|
def DEC_DP : InstDP<0xC6, "dec">;
|
|
def DEC_Abs : InstAbs<0xCE, "dec">;
|
|
def DEC_DPX : InstDPX<0xD6, "dec">;
|
|
def DEC_AbsX: InstAbsX<0xDE, "dec">;
|
|
|
|
//---------------------------------------------------------------- Shifts
|
|
def ASL_A : InstImplied<0x0A, "asl a"> { let mayLoad = 0; let mayStore = 0; }
|
|
def LSR_A : InstImplied<0x4A, "lsr a"> { let mayLoad = 0; let mayStore = 0; }
|
|
def ROL_A : InstImplied<0x2A, "rol a"> { let mayLoad = 0; let mayStore = 0; }
|
|
def ROR_A : InstImplied<0x6A, "ror a"> { let mayLoad = 0; let mayStore = 0; }
|
|
def ASL_DP : InstDP<0x06, "asl">;
|
|
def ASL_Abs : InstAbs<0x0E, "asl">;
|
|
def LSR_DP : InstDP<0x46, "lsr">;
|
|
def LSR_Abs : InstAbs<0x4E, "lsr">;
|
|
def ROL_DP : InstDP<0x26, "rol">;
|
|
def ROL_Abs : InstAbs<0x2E, "rol">;
|
|
def ROR_DP : InstDP<0x66, "ror">;
|
|
def ROR_Abs : InstAbs<0x6E, "ror">;
|
|
|
|
//---------------------------------------------------------------- Transfers
|
|
// Defs/Uses metadata is critical: without it, machine-cp doesn't see
|
|
// that TAX (etc.) reads the source register, and may delete a `$a =
|
|
// COPY $x` immediately preceding it as a "dead store" — corrupting
|
|
// the data flow. See feedback_w65816_implied_ops.md for the canary.
|
|
def TAX : InstImplied<0xAA, "tax"> { let mayLoad = 0; let mayStore = 0; let Defs = [X]; let Uses = [A]; }
|
|
def TAY : InstImplied<0xA8, "tay"> { let mayLoad = 0; let mayStore = 0; let Defs = [Y]; let Uses = [A]; }
|
|
def TXA : InstImplied<0x8A, "txa"> { let mayLoad = 0; let mayStore = 0; let Defs = [A]; let Uses = [X]; }
|
|
def TYA : InstImplied<0x98, "tya"> { let mayLoad = 0; let mayStore = 0; let Defs = [A]; let Uses = [Y]; }
|
|
def TXY : InstImplied<0x9B, "txy"> { let mayLoad = 0; let mayStore = 0; let Defs = [Y]; let Uses = [X]; }
|
|
def TYX : InstImplied<0xBB, "tyx"> { let mayLoad = 0; let mayStore = 0; let Defs = [X]; let Uses = [Y]; }
|
|
def TXS : InstImplied<0x9A, "txs"> { let mayLoad = 0; let mayStore = 0; let Defs = [SP]; let Uses = [X]; }
|
|
def TSX : InstImplied<0xBA, "tsx"> { let mayLoad = 0; let mayStore = 0; let Defs = [X]; let Uses = [SP]; }
|
|
def TCD : InstImplied<0x5B, "tcd"> { let mayLoad = 0; let mayStore = 0; }
|
|
def TDC : InstImplied<0x7B, "tdc"> { let mayLoad = 0; let mayStore = 0; }
|
|
def TCS : InstImplied<0x1B, "tcs"> { let mayLoad = 0; let mayStore = 0; }
|
|
def TSC : InstImplied<0x3B, "tsc"> { let mayLoad = 0; let mayStore = 0; }
|
|
|
|
//---------------------------------------------------------------- Stack push/pull
|
|
def PHA : InstImplied<0x48, "pha">;
|
|
def PLA : InstImplied<0x68, "pla">;
|
|
def PHX : InstImplied<0xDA, "phx">;
|
|
def PLX : InstImplied<0xFA, "plx">;
|
|
def PHY : InstImplied<0x5A, "phy">;
|
|
def PLY : InstImplied<0x7A, "ply">;
|
|
def PHP : InstImplied<0x08, "php">;
|
|
def PLP : InstImplied<0x28, "plp">;
|
|
def PHB : InstImplied<0x8B, "phb">;
|
|
def PLB : InstImplied<0xAB, "plb">;
|
|
def PHD : InstImplied<0x0B, "phd">;
|
|
def PLD : InstImplied<0x2B, "pld">;
|
|
def PHK : InstImplied<0x4B, "phk">;
|
|
def PEA : InstAbs<0xF4, "pea">;
|
|
def PER : InstPCRel16<0x62, "per">;
|
|
|
|
//---------------------------------------------------------------- Branches
|
|
// Conditional branches READ the P (status) register. Without this
|
|
// Uses, MachineCSE saw no dependency between an earlier CMP (which
|
|
// defines P) and the consuming Bxx, and would happily reuse a
|
|
// "redundant" CMP whose flags had been clobbered by an intervening
|
|
// LDA/STA/ADC. Modelling the dep is the principled fix; the
|
|
// W65816TargetMachine workaround that disabled MachineCSE entirely
|
|
// can come back out once this is verified.
|
|
let isBranch = 1, isTerminator = 1, mayLoad = 0, mayStore = 0,
|
|
Uses = [P] in {
|
|
def BEQ : InstPCRel8<0xF0, "beq">;
|
|
def BNE : InstPCRel8<0xD0, "bne">;
|
|
def BCS : InstPCRel8<0xB0, "bcs">;
|
|
def BCC : InstPCRel8<0x90, "bcc">;
|
|
def BMI : InstPCRel8<0x30, "bmi">;
|
|
def BPL : InstPCRel8<0x10, "bpl">;
|
|
def BVS : InstPCRel8<0x70, "bvs">;
|
|
def BVC : InstPCRel8<0x50, "bvc">;
|
|
}
|
|
|
|
let isBranch = 1, isTerminator = 1, isBarrier = 1, mayLoad = 0, mayStore = 0 in {
|
|
def BRA : InstPCRel8<0x80, "bra">;
|
|
def BRL : InstPCRel16<0x82, "brl">;
|
|
def JMP_Abs : InstAbs<0x4C, "jmp">;
|
|
def JMP_AbsInd : InstAbsInd<0x6C, "jmp">;
|
|
def JML_Long : InstAbsLong<0x5C, "jml">;
|
|
}
|
|
|
|
//---------------------------------------------------------------- Calls
|
|
let isCall = 1, mayLoad = 0, mayStore = 0 in {
|
|
def JSR_Abs : InstAbs<0x20, "jsr">;
|
|
def JSL_Long : InstAbsLong<0x22, "jsl">;
|
|
}
|
|
|
|
//---------------------------------------------------------------- Returns
|
|
let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 0, mayStore = 0 in {
|
|
def RTS : InstImplied<0x60, "rts">;
|
|
def RTI : InstImplied<0x40, "rti">;
|
|
// RTL is the 65816 long return; we select it for the generic retglue node.
|
|
def RTL : InstImplied<0x6B, "rtl"> {
|
|
let Pattern = [(W65816retglue)];
|
|
}
|
|
}
|
|
|
|
//---------------------------------------------------------------- Block move
|
|
// MVN/MVP are 3 bytes: opcode + destBank + srcBank. WDC writes the
|
|
// operand order as "dst, src" but the bytes on the wire are dst-then-src.
|
|
// Block-move operands are bank bytes written without a '#' prefix
|
|
// (e.g. `mvn $01, $02`), so the parser produces AddrDP-kind operands,
|
|
// not immediates. Use addrDP here to match that; the encoder path is
|
|
// identical since both are single-byte values.
|
|
class InstBlockMove<bits<8> op, string mnem>
|
|
: W65816Inst<(outs), (ins addrDP:$dst, addrDP:$src),
|
|
!strconcat(mnem, "\t$dst, $src")> {
|
|
let Size = 3;
|
|
bits<8> dst;
|
|
bits<8> src;
|
|
bits<24> Inst;
|
|
let Inst{7-0} = op;
|
|
let Inst{15-8} = dst;
|
|
let Inst{23-16} = src;
|
|
}
|
|
|
|
def MVN : InstBlockMove<0x54, "mvn">;
|
|
def MVP : InstBlockMove<0x44, "mvp">;
|
|
|
|
//---------------------------------------------------------------- Stack-rel
|
|
def LDA_StackRel : InstStackRel<0xA3, "lda">;
|
|
def STA_StackRel : InstStackRel<0x83, "sta">;
|
|
def ADC_StackRel : InstStackRel<0x63, "adc">;
|
|
def SBC_StackRel : InstStackRel<0xE3, "sbc">;
|
|
def CMP_StackRel : InstStackRel<0xC3, "cmp">;
|
|
def AND_StackRel : InstStackRel<0x23, "and">;
|
|
def ORA_StackRel : InstStackRel<0x03, "ora">;
|
|
def EOR_StackRel : InstStackRel<0x43, "eor">;
|
|
|
|
//---------------------------------------------------------------- Stack-ind-Y
|
|
// Stack-relative indirect indexed-Y: deref a pointer spilled at S+off.
|
|
def LDA_StackRelIndY : InstStackRelIndY<0xB3, "lda">;
|
|
def STA_StackRelIndY : InstStackRelIndY<0x93, "sta">;
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
// Branch patterns (placed after the Bxx defs).
|
|
//
|
|
// W65816brcc takes (Dest, CondCode) plus a glue from W65816cmp. The CC
|
|
// constant maps to one of the eight Bxx instructions. Values mirror
|
|
// W65816CC::CondCode in W65816.h.
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
def : Pat<(W65816brcc bb:$dest, (i8 0)), (BEQ bb:$dest)>;
|
|
def : Pat<(W65816brcc bb:$dest, (i8 1)), (BNE bb:$dest)>;
|
|
def : Pat<(W65816brcc bb:$dest, (i8 2)), (BCS bb:$dest)>;
|
|
def : Pat<(W65816brcc bb:$dest, (i8 3)), (BCC bb:$dest)>;
|
|
def : Pat<(W65816brcc bb:$dest, (i8 4)), (BMI bb:$dest)>;
|
|
def : Pat<(W65816brcc bb:$dest, (i8 5)), (BPL bb:$dest)>;
|
|
def : Pat<(W65816brcc bb:$dest, (i8 6)), (BVS bb:$dest)>;
|
|
def : Pat<(W65816brcc bb:$dest, (i8 7)), (BVC bb:$dest)>;
|
|
|
|
// Unconditional branch from generic ISD::BR.
|
|
def : Pat<(br bb:$dest), (BRA bb:$dest)>;
|
|
|
|
// Memory inc/dec: `*p = *p + 1` → `INC abs`. Single-instruction RMW
|
|
// instead of LDA → CLC → ADC #1 → STA.
|
|
def : Pat<(store
|
|
(i16 (add (i16 (load (W65816Wrapper tglobaladdr:$g))),
|
|
(i16 1))),
|
|
(W65816Wrapper tglobaladdr:$g)),
|
|
(INC_Abs tglobaladdr:$g)>;
|
|
def : Pat<(store
|
|
(i16 (add (i16 (load (W65816Wrapper tglobaladdr:$g))),
|
|
(i16 -1))),
|
|
(W65816Wrapper tglobaladdr:$g)),
|
|
(DEC_Abs tglobaladdr:$g)>;
|
|
|
|
// Direct call to a global / external symbol. We use JSL (24-bit
|
|
// long jump-and-link) and RTL pairing throughout — matches the
|
|
// IIgs convention where main is entered via JSL, and means a
|
|
// function doesn't have to know how it was called to choose its
|
|
// return instruction. A pseudo bridges the i16 symbol operand
|
|
// to JSL_Long's 24-bit operand class.
|
|
// Defs lists ALL caller-clobbered regs. The 65816 has no
|
|
// caller/callee-save split — every callee may freely modify
|
|
// A/X/Y/DPF0/P/etc. Critically, i32/i64 returns place high
|
|
// halves in X (i32), Y and DPF0 (i64); without those in Defs,
|
|
// the InstrEmitter does not add implicit-defs for glued
|
|
// CopyFromReg(X/Y/DPF0) on the call MI, and the verifier sees
|
|
// the post-call `COPY $y` as reading an undefined register.
|
|
// DPF0 was historically the only "extra" def so getLoad(0xF0)
|
|
// wouldn't CSE across calls; the same anti-CSE rationale applies
|
|
// to A/X/Y, but more fundamentally those are call return slots.
|
|
let isCall = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0,
|
|
Defs = [A, X, Y, DPF0] in {
|
|
def JSLpseudo : W65816Pseudo<(outs), (ins i16imm:$dst),
|
|
"# JSLpseudo $dst", []>;
|
|
// ptr32 variant — same expansion in AsmPrinter; the operand class
|
|
// just exists so tablegen accepts an i32-typed tglobaladdr operand.
|
|
def JSLpseudo32 : W65816Pseudo<(outs), (ins i32imm:$dst),
|
|
"# JSLpseudo32 $dst", []>;
|
|
}
|
|
|
|
def : Pat<(W65816call (i16 tglobaladdr:$dst)), (JSLpseudo tglobaladdr:$dst)>;
|
|
def : Pat<(W65816call (i16 texternalsym:$dst)), (JSLpseudo texternalsym:$dst)>;
|
|
// ptr32: under p:32:16, call targets are i32 (iPTR matches the pointer
|
|
// width). Same JSL_long instruction handles either width — the OMF
|
|
// cRELOC opcode rewrites the offset and bank at load time.
|
|
def : Pat<(W65816call (i32 tglobaladdr:$dst)), (JSLpseudo32 tglobaladdr:$dst)>;
|
|
def : Pat<(W65816call (i32 texternalsym:$dst)), (JSLpseudo32 texternalsym:$dst)>;
|