//===-- W65816InstrInfo.td - W65816 Instruction defs -------*- tablegen -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // W65816 instruction description. This file defines the MC-layer instruction // encodings for the core 65816 instruction set. DAG-selection patterns will // be added incrementally on top of these MC instructions. // //===----------------------------------------------------------------------===// include "W65816InstrFormats.td" //===----------------------------------------------------------------------===// // Type Profiles //===----------------------------------------------------------------------===// def SDT_W65816Call : SDTypeProfile<0, -1, [SDTCisVT<0, iPTR>]>; def SDT_W65816CallSeqStart : SDCallSeqStart<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def SDT_W65816CallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def SDT_W65816Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>; def SDT_W65816Cmp : SDTypeProfile<0, 2, [SDTCisSameAs<0, 1>, SDTCisInt<0>]>; // (CMP allows both i16 and i8 operands.) def SDT_W65816BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>, SDTCisVT<1, i8>]>; //===----------------------------------------------------------------------===// // W65816-specific SDNodes //===----------------------------------------------------------------------===// def W65816retglue : SDNode<"W65816ISD::RET_GLUE", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def W65816call : SDNode<"W65816ISD::CALL", SDT_W65816Call, [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, SDNPVariadic]>; def W65816callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_W65816CallSeqStart, [SDNPHasChain, SDNPOutGlue]>; def W65816callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_W65816CallSeqEnd, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; def W65816Wrapper : SDNode<"W65816ISD::Wrapper", SDT_W65816Wrapper>; // Comparison: produces a Glue value (carrying processor flags). def W65816cmp : SDNode<"W65816ISD::CMP", SDT_W65816Cmp, [SDNPOutGlue]>; // Conditional branch: takes (Chain, Dest, CC, Glue from CMP). def W65816brcc : SDNode<"W65816ISD::BR_CC", SDT_W65816BrCC, [SDNPHasChain, SDNPInGlue]>; // Push A onto the stack. Used by LowerCall to pass extra args. // Takes Chain + Glue (with A pre-loaded via CopyToReg), produces // Chain + Glue. Has a side effect (SP changes) and stores to // memory. In 16-bit M mode, pushes 2 bytes and decrements SP by 2; // the call's ADJCALLSTACKUP pseudo unwinds those bytes via // tsc;clc;adc #N;tcs after the JSL returns. def W65816push : SDNode<"W65816ISD::PUSH", SDTNone, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect, SDNPMayStore]>; // Push X onto the stack. Same shape as W65816push but the value to // push is glued from CopyToReg(X) instead of CopyToReg(A). def W65816pushx : SDNode<"W65816ISD::PUSH_X", SDTNone, [SDNPHasChain, SDNPInGlue, SDNPOutGlue, SDNPSideEffect, SDNPMayStore]>; // SELECT_CC: takes (TVal, FVal, CC) plus a glue value carrying the // flags from a preceding W65816cmp. Lowered by EmitInstrWithCustomInserter // into a CMP (already in the BB) + Bxx + diamond CFG + PHI. def SDT_W65816SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisVT<3, i8>]>; def W65816selectcc : SDNode<"W65816ISD::SELECT_CC", SDT_W65816SelectCC, [SDNPInGlue]>; // Dynamic stack allocation: takes (chain, size:i16) and returns // (ptr:i16, chain). Lowers to TSC; SEC; SBC size; TCS; INC A in // AsmPrinter. See LowerDynamicStackalloc. def SDT_W65816Alloca : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def W65816alloca : SDNode<"W65816ISD::ALLOCA", SDT_W65816Alloca, [SDNPHasChain, SDNPSideEffect]>; // ptr32 load / store: target-specific load/store nodes that take a 32-bit // pointer (Wide32 = i32) and lower to [dp],Y indirect-long with the bank // byte taken from the pointer's hi-half. Used for ptr32 mode where // generic (load i32-addr) needs explicit lowering — wrapping in a target // node prevents DAG combines from rewriting the load before isel. // // Loads always materialise an i16 in A (16-bit LDA); byte zext / anyext // patterns AND-mask afterwards exactly as the existing LDAptr does. // Stores split into two nodes: ST_PTR (full 16-bit STA) and STB_PTR // (SEP/REP-wrapped 8-bit STA for truncating stores). def SDT_W65816LdPtr : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i32>]>; def SDT_W65816StPtr : SDTypeProfile<0, 2, [SDTCisVT<0, i16>, SDTCisVT<1, i32>]>; def W65816ldPtr : SDNode<"W65816ISD::LD_PTR", SDT_W65816LdPtr, [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>; // va_arg's stack-pointer deref: bank-0-explicit load. The 65816 stack // is hardwired to bank 0; va_arg's `ap` is always a stack pointer. // Under Loader, $BE points to OUR bank, but va_arg needs bank 0 — so // LowerVAARG emits this opcode and the pattern routes to LDAptrBank0 // (the bank-0-hardcoded variant of LDAptr). def SDT_W65816VAArgLoad : SDTypeProfile<1, 1, [SDTCisVT<0, i16>, SDTCisVT<1, i16>]>; def W65816vaargLoad : SDNode<"W65816ISD::VAARG_LOAD", SDT_W65816VAArgLoad, [SDNPHasChain, SDNPMayLoad]>; def W65816stPtr : SDNode<"W65816ISD::ST_PTR", SDT_W65816StPtr, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; def W65816stbPtr : SDNode<"W65816ISD::STB_PTR", SDT_W65816StPtr, [SDNPHasChain, SDNPMayStore, SDNPMemOperand]>; //===----------------------------------------------------------------------===// // Pseudo Instructions //===----------------------------------------------------------------------===// let Defs = [SP], Uses = [SP] in { def ADJCALLSTACKDOWN : W65816Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), "# ADJCALLSTACKDOWN $amt1 $amt2", [(W65816callseq_start timm:$amt1, timm:$amt2)]>; def ADJCALLSTACKUP : W65816Pseudo<(outs), (ins i16imm:$amt1, i16imm:$amt2), "# ADJCALLSTACKUP $amt1 $amt2", [(W65816callseq_end timm:$amt1, timm:$amt2)]>; } // LEA-equivalent: compute the address (SP + frame_offset + offset) of a // stack slot and place it in A. Selected from a bare ISD::FrameIndex // SDValue in W65816DAGToDAGISel::Select; expanded by eliminateFrameIndex // into TSC + CLC + ADC #disp. Output is Acc16 because the address ends // up in A; PtrRegs (which only contains SP) is the wrong class. let isReMaterializable = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def ADDframe : W65816Pseudo<(outs Acc16:$dst), (ins i16imm:$base, i16imm:$offset), "# ADDframe PSEUDO", []>; // VLA / dynamic_stackalloc: takes a 16-bit byte count in A, returns // the address of the allocated region in A. Expanded at AsmPrinter // time to: TSC; SEC; SBC count; TCS; INC A. Has side effects // (changes SP). Both $dst and $size are tied to A; explicit // Defs/Uses on SP keep regalloc honest about the side effect. let Defs = [SP], Uses = [SP], hasSideEffects = 1, Constraints = "$size = $dst" in def ALLOCAfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$size), "# ALLOCAfi $dst, $size", [(set Acc16:$dst, (W65816alloca Acc16:$size))]>; // The retglue node lowers directly to RTL (see Returns section below). // No separate RET pseudo — the real MC instruction handles the pattern. // Push A onto the stack. Expanded in AsmPrinter to MC `PHA`. Used by // LowerCall to pass extra args; the matching `tsc;clc;adc #N;tcs` SP // unwind happens in eliminateCallFramePseudoInstr for ADJCALLSTACKUP. let Defs = [SP], Uses = [A, SP], mayStore = 1, hasSideEffects = 1 in { def PUSH16 : W65816Pseudo<(outs), (ins), "# PUSH16", [(W65816push)]>; } // Push X onto the stack. Used by LowerCall when an outgoing arg's // SDValue is already in X (e.g. forwarding the i32-first-arg-in-A:X // hi half). Saves a TXA+spill round-trip. Expansion: PHX. let Defs = [SP], Uses = [X, SP], mayStore = 1, hasSideEffects = 1 in { def PUSH16X : W65816Pseudo<(outs), (ins), "# PUSH16X", [(W65816pushx)]>; } // SELECT_CC16: implements (set Acc16:$dst, (W65816selectcc tval, fval, cc)) // where the CMP that produced the flags has already been emitted (its // glue is implicit via the P register). EmitInstrWithCustomInserter // expands this into a Bxx + 2 BBs + PHI. Marked usesCustomInserter so // the codegen invokes our hook; Uses=[P] so MachineSched keeps the CMP // adjacent. let usesCustomInserter = 1, Uses = [P], hasSideEffects = 1 in { def SELECT_CC16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$tval, Acc16:$fval, i8imm:$cc), "# SELECT_CC16 $dst, $tval, $fval, $cc", [(set Acc16:$dst, (W65816selectcc Acc16:$tval, Acc16:$fval, timm:$cc))]>; // i8 mirror. Without this, `c ? a : b` patterns where the result is // i8 (e.g. `unsigned char to_lower(char c)`) fail isel with "Cannot // Select" — pre-existing bug. EmitInstrWithCustomInserter handles // both the i8 and i16 forms identically; the only difference is the // register class on the operands. def SELECT_CC8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$tval, Acc8:$fval, i8imm:$cc), "# SELECT_CC8 $dst, $tval, $fval, $cc", [(set Acc8:$dst, (W65816selectcc Acc8:$tval, Acc8:$fval, timm:$cc))]>; } //===----------------------------------------------------------------------===// // Codegen pseudos that expand to MC instructions in the AsmPrinter. // // These pseudos carry DAG patterns with explicit output operands so the // generic code generator can allocate them; the MC-layer instructions they // expand to have the opcode encoding but no virtual output (the result lives // in the implicit A register). W65816AsmPrinter::emitInstruction maps each // pseudo here to its real MC counterpart. //===----------------------------------------------------------------------===// // NOTE: LDA / LDX physically update N and Z, but we deliberately do // NOT model that with `Defs = [P]`. Adding `Defs = [P]` lets the // scheduler legally place an LDA between CMP and Bxx (P just gets // re-defined; the latest def is what Bxx tests) — same flag-corruption // bug, different mechanism. Two complementary fixes carry the load: // the 4-block SELECT_CC inserter for SETCC patterns, and the post-RA // PHP/PLP wrap pass (W65816StackSlotCleanup Pass -2.5) for BR_CC // patterns (`while`/`for`/`if-goto`). Both landed. let isAsCheapAsAMove = 1, isReMaterializable = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def LDAi16imm : W65816Pseudo<(outs Acc16:$dst), (ins i16imm:$imm), "# LDAi16imm $dst, $imm", [(set Acc16:$dst, (i16 imm:$imm))]>; let isReMaterializable = 1, isAsCheapAsAMove = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0 in def LDXi16imm : W65816Pseudo<(outs Idx16:$dst), (ins i16imm:$imm), "# LDXi16imm $dst, $imm", [(set Idx16:$dst, (i16 imm:$imm))]>; def LDAi8imm : W65816Pseudo<(outs Acc8:$dst), (ins i8imm:$imm), "# LDAi8imm $dst, $imm", [(set Acc8:$dst, (i8 imm:$imm))]>; } // Materialise a 16-bit address (global / external symbol) into A. Same // pseudo as for an immediate constant — it expands to LDA_Imm16 with the // symbol as the operand, which the MC encoder turns into a fixup_16. def : Pat<(i16 (W65816Wrapper tglobaladdr:$g)), (LDAi16imm tglobaladdr:$g)>; def : Pat<(i16 (W65816Wrapper texternalsym:$s)), (LDAi16imm texternalsym:$s)>; // 8-bit add/sub of an immediate. let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def ADCi8imm : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src, i8imm:$imm), "# ADCi8imm $dst, $src, $imm", [(set Acc8:$dst, (add Acc8:$src, imm:$imm))]>; def SBCi8imm : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src, i8imm:$imm), "# SBCi8imm $dst, $src, $imm", [(set Acc8:$dst, (sub Acc8:$src, imm:$imm))]>; def ANDi8imm : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src, i8imm:$imm), "# ANDi8imm $dst, $src, $imm", [(set Acc8:$dst, (and Acc8:$src, imm:$imm))]>; def ORAi8imm : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src, i8imm:$imm), "# ORAi8imm $dst, $src, $imm", [(set Acc8:$dst, (or Acc8:$src, imm:$imm))]>; def EORi8imm : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src, i8imm:$imm), "# EORi8imm $dst, $src, $imm", [(set Acc8:$dst, (xor Acc8:$src, imm:$imm))]>; } // 8-bit load / store via a 16-bit absolute address. let mayLoad = 1, hasSideEffects = 0, mayStore = 0 in { def LDA8abs : W65816Pseudo<(outs Acc8:$dst), (ins i32imm:$addr), "# LDA8abs $dst, $addr", []>; // LDA8long: companion to STA8long. Bank-explicit i8 load via LDA_Long // (0xAF). Used for `*(uint8*)0xC035` reads — LDA_Abs (0xAD) is // DBR-relative and would land in the wrong bank under GS/OS Loader. // Pattern that ROUTES const-int loads here lives at the ANDi16imm // section (must appear after ANDi16imm is defined). def LDA8long : W65816Pseudo<(outs Acc8:$dst), (ins i32imm:$addr), "# LDA8long $dst, $addr", []>; } let mayStore = 1, hasSideEffects = 0, mayLoad = 0 in { def STA8abs : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr), "# STA8abs $src, $addr", []>; // STA8long: 8-bit absolute-long store. Same pattern as STA8abs but // the AsmPrinter emits STA_Long (0x8F) — a true 24-bit bank-explicit // store — instead of STA_Abs (0x8D, DBR-relative). Used for MMIO via // a constant integer address; the i32imm carries the full 24-bit // physical address. See the (store Acc8, (iPTR imm)) pattern. def STA8long : W65816Pseudo<(outs), (ins Acc8:$src, i32imm:$addr), "# STA8long $src, $addr", []>; } def : Pat<(i8 (load (W65816Wrapper tglobaladdr:$g))), (LDA8abs tglobaladdr:$g)>; def : Pat<(i8 (load (W65816Wrapper texternalsym:$s))), (LDA8abs texternalsym:$s)>; def : Pat<(store Acc8:$src, (W65816Wrapper tglobaladdr:$g)), (STA8abs Acc8:$src, tglobaladdr:$g)>; def : Pat<(store Acc8:$src, (W65816Wrapper texternalsym:$s)), (STA8abs Acc8:$src, texternalsym:$s)>; // Byte store via a constant-int address (MMIO-style: `*(volatile uint8 *)0x70 // = v`). Without this, the i8 store falls through to STBptr ([dp],Y), which // is 16 B / 30 cyc. We route through STA8long (sta abs-long, opcode 0x8F) // rather than STA8abs because a const-int address is a physical 24-bit // pointer and must NOT track DBR — under the GS/OS Loader the data bank is // non-zero, so DBR-relative `sta abs` would land in the wrong bank. // `timm` matches TargetConstantSDNode — under p:32:16, a pre-isel combine // in W65816TargetLowering::PerformDAGCombine converts the ConstantSDNode // ptr to a TargetConstantSDNode so it survives LowerI32Constant intact. def : Pat<(store Acc8:$src, (iPTR imm:$addr)), (STA8long Acc8:$src, (i32 imm:$addr))>; def : Pat<(store Acc8:$src, (iPTR timm:$addr)), (STA8long Acc8:$src, (i32 timm:$addr))>; def : Pat<(truncstorei8 Acc16:$src, (iPTR imm:$addr)), (STA8long (COPY_TO_REGCLASS Acc16:$src, Acc8), (i32 imm:$addr))>; def : Pat<(truncstorei8 Acc16:$src, (iPTR timm:$addr)), (STA8long (COPY_TO_REGCLASS Acc16:$src, Acc8), (i32 timm:$addr))>; // Load 16 bits via a 16-bit absolute address. Currently only matches // loads from a Wrapper(global); direct constant-pointer loads come once // we add an addressing-mode complex pattern. let mayLoad = 1, hasSideEffects = 0, mayStore = 0 in { def LDAabs : W65816Pseudo<(outs Acc16:$dst), (ins i32imm:$addr), "# LDAabs $dst, $addr", []>; } def : Pat<(i16 (load (W65816Wrapper tglobaladdr:$g))), (LDAabs tglobaladdr:$g)>; def : Pat<(i16 (load (W65816Wrapper texternalsym:$s))), (LDAabs texternalsym:$s)>; // i16 const-int-address load: companion to the STAabs (iPTR imm) / // (iPTR timm) store patterns at line ~350. `*(volatile uint16*)0x5000` // → LDAabs (DBR-relative). The combine in W65816TargetLowering returns // a TargetConstant for the Wide32-zero-hi-Constant unwrap. def : Pat<(i16 (load (iPTR imm:$addr))), (LDAabs (i32 imm:$addr))>; def : Pat<(i16 (load (iPTR timm:$addr))), (LDAabs (i32 timm:$addr))>; // Store 16 bits to a 16-bit absolute address. let mayStore = 1, hasSideEffects = 0, mayLoad = 0 in { def STAabs : W65816Pseudo<(outs), (ins Acc16:$src, i32imm:$addr), "# STAabs $src, $addr", []>; } def : Pat<(store Acc16:$src, (W65816Wrapper tglobaladdr:$g)), (STAabs Acc16:$src, tglobaladdr:$g)>; def : Pat<(store Acc16:$src, (W65816Wrapper texternalsym:$s)), (STAabs Acc16:$src, texternalsym:$s)>; // Store via a constant-int address (`*(volatile uint16 *)0x5000 = v`). // Lowers to STAabs (0x8D, DBR-relative) — DELIBERATELY asymmetric with the // i8 case (STA8long, bank-explicit). Rationale: most 65816 MMIO is i8 // (e.g. `*(uint8*)0xC035`) where users expect bank=0 always. Const-int // i16 is mostly used as a DBR-relative idiom in test code that switches // DBR and verifies a write lands in the new bank. Switching i16 to // bank-explicit broke 10+ existing tests with no real-world i16 MMIO // use case to justify it. Users who need bank-explicit i16 should // declare a global or split into two i8 stores. def : Pat<(store Acc16:$src, (iPTR imm:$addr)), (STAabs Acc16:$src, (i32 imm:$addr))>; // Under ptr32 the i16/i32 const-addr stores emerge with TargetConstant // pointers (the PerformDAGCombine on STORE rewrites the ConstantSDNode // into a TargetConstant to bypass LowerI32Constant's REG_SEQUENCE // expansion). Match `timm` so STAabs fires. def : Pat<(store Acc16:$src, (iPTR timm:$addr)), (STAabs Acc16:$src, (i32 timm:$addr))>; // 16-bit ADD: expands to CLC + ADC_Imm16. The 65816 ADC sums with the // carry flag, so a clean add needs CLC first. Constraints tie the // source and dest to A — there is only one Acc16 register so this is // implicit, but stating it lets the register allocator coalesce // without needing a COPY. // // Defs = [P] models the C-flag side-effect. Required so tablegen can // connect this instruction to the SDNode `addc` / `subc` (SDNPOutGlue), // which is what the type legalizer emits as the lo half of a multi- // precision add/sub when ADDC/SUBC is Legal (see W65816ISelLowering ctor). let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in { def ADCi16imm : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i16imm:$imm), "# ADCi16imm $dst, $src, $imm", [(set Acc16:$dst, (add Acc16:$src, imm:$imm))]>; def SBCi16imm : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i16imm:$imm), "# SBCi16imm $dst, $src, $imm", [(set Acc16:$dst, (sub Acc16:$src, imm:$imm))]>; } // addc/subc: same as add/sub on this target (CLC then ADC, SEC then SBC), // but the SDNode produces a Glue carrying the post-op carry into a // subsequent adde/sube. Tablegen wires the Glue to the P register // because the instruction has Defs = [P]. def : Pat<(addc Acc16:$src, imm:$imm), (ADCi16imm Acc16:$src, imm:$imm)>; def : Pat<(subc Acc16:$src, imm:$imm), (SBCi16imm Acc16:$src, imm:$imm)>; // ADC/SBC from a 16-bit absolute address. Folds a load on the // right-hand side of an add/sub into the carry-arithmetic op. let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 1, mayStore = 0, Defs = [P] in { def ADCabs : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i32imm:$addr), "# ADCabs $dst, $src, $addr", []>; def SBCabs : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i32imm:$addr), "# SBCabs $dst, $src, $addr", []>; } def : Pat<(add Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))), (ADCabs Acc16:$src, tglobaladdr:$g)>; def : Pat<(add Acc16:$src, (i16 (load (W65816Wrapper texternalsym:$s)))), (ADCabs Acc16:$src, texternalsym:$s)>; def : Pat<(sub Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))), (SBCabs Acc16:$src, tglobaladdr:$g)>; def : Pat<(sub Acc16:$src, (i16 (load (W65816Wrapper texternalsym:$s)))), (SBCabs Acc16:$src, texternalsym:$s)>; def : Pat<(addc Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))), (ADCabs Acc16:$src, tglobaladdr:$g)>; def : Pat<(addc Acc16:$src, (i16 (load (W65816Wrapper texternalsym:$s)))), (ADCabs Acc16:$src, texternalsym:$s)>; def : Pat<(subc Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))), (SBCabs Acc16:$src, tglobaladdr:$g)>; def : Pat<(subc Acc16:$src, (i16 (load (W65816Wrapper texternalsym:$s)))), (SBCabs Acc16:$src, texternalsym:$s)>; // adde/sube: the chained ADC/SBC for the hi half of a multi-precision // add/sub. Reads the C flag from the previous addc/adde (Uses = [P]), // produces a fresh carry/borrow (Defs = [P]). AsmPrinter expansion // emits a bare ADC/SBC with no preceding CLC/SEC; eliminateFrameIndex // for ADCEfi/SBCEfi skips the carry-prefix step that the standalone // ADCfi/SBCfi rely on. let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [P], Defs = [P] in { def ADCEi16imm : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i16imm:$imm), "# ADCEi16imm $dst, $src, $imm", [(set Acc16:$dst, (adde Acc16:$src, imm:$imm))]>; def SBCEi16imm : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i16imm:$imm), "# SBCEi16imm $dst, $src, $imm", [(set Acc16:$dst, (sube Acc16:$src, imm:$imm))]>; } let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 1, mayStore = 0, Uses = [P], Defs = [P] in { def ADCEabs : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i32imm:$addr), "# ADCEabs $dst, $src, $addr", []>; def SBCEabs : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i32imm:$addr), "# SBCEabs $dst, $src, $addr", []>; } def : Pat<(adde Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))), (ADCEabs Acc16:$src, tglobaladdr:$g)>; def : Pat<(adde Acc16:$src, (i16 (load (W65816Wrapper texternalsym:$s)))), (ADCEabs Acc16:$src, texternalsym:$s)>; def : Pat<(sube Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))), (SBCEabs Acc16:$src, tglobaladdr:$g)>; def : Pat<(sube Acc16:$src, (i16 (load (W65816Wrapper texternalsym:$s)))), (SBCEabs Acc16:$src, texternalsym:$s)>; // (add Acc16, Acc16) — same value added to itself, equivalent to a 1-bit // left shift. Pattern needs a tied input so the result lands in A. let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def ASLA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# ASLA16 $dst, $src", [(set Acc16:$dst, (add Acc16:$src, Acc16:$src))]>; } // 1-bit shift left of the accumulator: shl x, 1. def : Pat<(shl Acc16:$src, (i16 1)), (ASLA16 Acc16:$src)>; // 1-bit logical shift right. Pseudo because the MC LSR_A has no // virtual output operand. let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def LSRA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# LSRA16 $dst, $src", [(set Acc16:$dst, (srl Acc16:$src, (i16 1)))]>; def ASLA8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src), "# ASLA8 $dst, $src", [(set Acc8:$dst, (shl Acc8:$src, (i8 1)))]>; def LSRA8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src), "# LSRA8 $dst, $src", [(set Acc8:$dst, (srl Acc8:$src, (i8 1)))]>; // Signed shift right by 1: copy A's high bit into carry, then ROR // to bring it back into A's high bit while halving the rest. The // AsmPrinter expands this to the 4-instruction PHA;ASL;PLA;ROR // sequence. def ASRA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# ASRA16 $dst, $src", [(set Acc16:$dst, (sra Acc16:$src, (i16 1)))]> { let Constraints = "$src = $dst"; } } // Shifts by small constants — unroll into 2-4 single-bit shifts. // Anything beyond 4 bits would benefit from a loop or a XBA-and-mask // trick; left for a future peephole. def : Pat<(shl Acc16:$src, (i16 2)), (ASLA16 (ASLA16 Acc16:$src))>; def : Pat<(shl Acc16:$src, (i16 3)), (ASLA16 (ASLA16 (ASLA16 Acc16:$src)))>; def : Pat<(shl Acc16:$src, (i16 4)), (ASLA16 (ASLA16 (ASLA16 (ASLA16 Acc16:$src))))>; def : Pat<(srl Acc16:$src, (i16 2)), (LSRA16 (LSRA16 Acc16:$src))>; def : Pat<(srl Acc16:$src, (i16 3)), (LSRA16 (LSRA16 (LSRA16 Acc16:$src)))>; def : Pat<(srl Acc16:$src, (i16 4)), (LSRA16 (LSRA16 (LSRA16 (LSRA16 Acc16:$src))))>; // Shift counts 5..7 — chained single-bit shifts. Earlier these were // withheld because the DAG combiner narrowed `(trunc (shl (zext X), N))` // back to `(shl X, N)` on i8 and re-entered LowerShift in a loop; the // `isTypeDesirableForOp(SHL/SRL/SRA, i8) -> false` override in // W65816TargetLowering now blocks that combine, so the patterns are // safe. Cheaper than __ashlhi3/__lshrhi3 for these counts. def : Pat<(shl Acc16:$src, (i16 5)), (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 Acc16:$src)))))>; def : Pat<(shl Acc16:$src, (i16 6)), (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 Acc16:$src))))))>; def : Pat<(shl Acc16:$src, (i16 7)), (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 Acc16:$src)))))))>; def : Pat<(srl Acc16:$src, (i16 5)), (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 Acc16:$src)))))>; def : Pat<(srl Acc16:$src, (i16 6)), (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 Acc16:$src))))))>; def : Pat<(srl Acc16:$src, (i16 7)), (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 Acc16:$src)))))))>; // Increment / decrement of A by 1. Match `(add x, 1)` and `(add x, -1)` // (LLVM canonicalises sub-by-1 to add-by-(-1)). let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def INA_PSEUDO : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# INA_PSEUDO $dst, $src", [(set Acc16:$dst, (add Acc16:$src, (i16 1)))]>; def DEA_PSEUDO : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# DEA_PSEUDO $dst, $src", [(set Acc16:$dst, (add Acc16:$src, (i16 -1)))]>; def INA_PSEUDO8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src), "# INA_PSEUDO8 $dst, $src", [(set Acc8:$dst, (add Acc8:$src, (i8 1)))]>; def DEA_PSEUDO8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src), "# DEA_PSEUDO8 $dst, $src", [(set Acc8:$dst, (add Acc8:$src, (i8 -1)))]>; } // Two's-complement negation: `0 - x` → `EOR #$FFFF; INC A` (i.e. // bitwise-not then add 1). Catches (sub 0, x) which LLVM uses for // `-x` and the `abs` intrinsic. let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def NEGA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# NEGA16 $dst, $src", [(set Acc16:$dst, (sub (i16 0), Acc16:$src))]>; // i8 mirror. Without this the codegen falls into the generic SBC // path: `LDA #0; SEC; SBC slot` plus 8-bit M-mode prologue and // PHA/PLA bracketing — ~12 insns for `-x`. NEGA8 expands to // `EOR #$FF; INA` (2 insns in 8-bit M). def NEGA8 : W65816Pseudo<(outs Acc8:$dst), (ins Acc8:$src), "# NEGA8 $dst, $src", [(set Acc8:$dst, (sub (i8 0), Acc8:$src))]>; } // Multi-precision negation: lo + hi halves of `-x` where x is i32. // LLVM splits `0 - x` into `(subc 0, x_lo)` and `(sube 0, x_hi)`. // We implement both via the ADD chain `~x + carry` since INC doesn't // touch C; the bit pattern of C from `~x + 1` matches what `subc 0, x` // would set (C=1 iff x was 0, i.e. no borrow). // NEGC16 matches subc → "EOR #$FFFF; CLC; ADC #1" (5 bytes) // NEGE16 matches sube → "EOR #$FFFF; ADC #0" (4 bytes, uses C-in) let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in { def NEGC16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# NEGC16 $dst, $src", [(set Acc16:$dst, (subc (i16 0), Acc16:$src))]>; } let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0, Uses = [P], Defs = [P] in { def NEGE16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# NEGE16 $dst, $src", [(set Acc16:$dst, (sube (i16 0), Acc16:$src))]>; } // Bitwise NOT pattern moved below EORi16imm definition. // 16-bit bitwise ops: AND / OR / XOR against an immediate or memory // operand. Same shape as ADCi16imm / ADCabs minus the carry prefix // (these don't read/write the carry flag). let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def ANDi16imm : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i16imm:$imm), "# ANDi16imm $dst, $src, $imm", [(set Acc16:$dst, (and Acc16:$src, imm:$imm))]>; def ORAi16imm : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i16imm:$imm), "# ORAi16imm $dst, $src, $imm", [(set Acc16:$dst, (or Acc16:$src, imm:$imm))]>; def EORi16imm : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i16imm:$imm), "# EORi16imm $dst, $src, $imm", [(set Acc16:$dst, (xor Acc16:$src, imm:$imm))]>; } // Bank-explicit i8 loads from a constant-int address (`*(uint8*)0xC035`). // The default lowering goes through LDAptr ([dp],Y indirect-long) — 22 B / // 35 cyc — because LDAptr's pattern `(load Wide16:$ptr)` matches once the // matcher materialises the const into Wide16. These patterns shortcut to // LDA8long (sta long, 0xAF, 6 B / 10 cyc) and run BEFORE that materialisation // because the explicit imm leaf has higher AddedComplexity. Only the // `(zextloadi8 imm)` form actually appears in real IR (i8 loads are // always i16-extended at SDAG time on this 16-bit target); kept the // raw `(load imm)` form too for symmetry with the store side. let AddedComplexity = 50 in { def : Pat<(i8 (load (iPTR imm:$addr))), (LDA8long (i32 imm:$addr))>; def : Pat<(i8 (load (iPTR timm:$addr))), (LDA8long (i32 timm:$addr))>; def : Pat<(i16 (zextloadi8 (iPTR imm:$addr))), (ANDi16imm (COPY_TO_REGCLASS (LDA8long (i32 imm:$addr)), Acc16), 0xFF)>; def : Pat<(i16 (zextloadi8 (iPTR timm:$addr))), (ANDi16imm (COPY_TO_REGCLASS (LDA8long (i32 timm:$addr)), Acc16), 0xFF)>; def : Pat<(i16 (extloadi8 (iPTR imm:$addr))), (COPY_TO_REGCLASS (LDA8long (i32 imm:$addr)), Acc16)>; def : Pat<(i16 (extloadi8 (iPTR timm:$addr))), (COPY_TO_REGCLASS (LDA8long (i32 timm:$addr)), Acc16)>; } let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { def ANDabs : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i32imm:$addr), "# ANDabs $dst, $src, $addr", []>; def ORAabs : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i32imm:$addr), "# ORAabs $dst, $src, $addr", []>; def EORabs : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, i32imm:$addr), "# EORabs $dst, $src, $addr", []>; } def : Pat<(and Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))), (ANDabs Acc16:$src, tglobaladdr:$g)>; def : Pat<(or Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))), (ORAabs Acc16:$src, tglobaladdr:$g)>; def : Pat<(xor Acc16:$src, (i16 (load (W65816Wrapper tglobaladdr:$g)))), (EORabs Acc16:$src, tglobaladdr:$g)>; // Bitwise NOT: x ^ 0xFFFF. LLVM lowers `~x` and i1 inversion through // this; emit a single EOR #$FFFF via the bitwise pseudo above. def : Pat<(xor Acc16:$src, (i16 -1)), (EORi16imm Acc16:$src, 0xFFFF)>; // (srl x, 15): extract bit 15 to bit 0 (yields 0 or 1). The // type-legalizer's SHL_PARTS expansion of `i32 << 1` needs this for // the high-half "carry from low" slot, and routing it through the // __lshrhi3 libcall costs ~10 bytes per i32 shift-by-1. Inline as // `ASL A; LDA #0; ROL A` (3 bytes): ASL puts bit 15 into C and // trashes A; LDA #0 doesn't touch C; ROL A folds C into bit 0. // // (shl x, 15): move bit 0 to bit 15 (yields 0 or 0x8000). Used by // SRL_PARTS / SRA_PARTS expansion of `i32 >> 1` for the low-half // "carry from hi" slot. Mirror sequence: `LSR A; LDA #0; ROR A`. let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in { def SRL15A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# SRL15A $dst, $src", [(set Acc16:$dst, (srl Acc16:$src, (i16 15)))]>; def SHL15A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# SHL15A $dst, $src", [(set Acc16:$dst, (shl Acc16:$src, (i16 15)))]>; } // (srl x, 8): high byte to low byte, zero high byte. XBA swaps the // two bytes of A (in 16-bit M); AND #$00FF clears the new high byte. // 4 bytes total — much shorter than the __lshrhi3 libcall path. Used // by i32 shift-by-8 SHL_PARTS expansion for the cross-half slot. // // (shl x, 8): low byte to high byte, zero low byte. Mirror. let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def SRL8A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# SRL8A $dst, $src", [(set Acc16:$dst, (srl Acc16:$src, (i16 8)))]>; def SHL8A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# SHL8A $dst, $src", [(set Acc16:$dst, (shl Acc16:$src, (i16 8)))]>; } // Shift counts 9..14: SHL builds on SHL8A (XBA + low-byte mask) and chains // 1..6 ASLs after it; SRL mirrors via SRL8A + LSRA chains. The // isTypeDesirableForOp override prevents the i8-shift combine loop that // kept these out of tablegen earlier. def : Pat<(shl Acc16:$src, (i16 9)), (ASLA16 (SHL8A Acc16:$src))>; def : Pat<(shl Acc16:$src, (i16 10)), (ASLA16 (ASLA16 (SHL8A Acc16:$src)))>; def : Pat<(shl Acc16:$src, (i16 11)), (ASLA16 (ASLA16 (ASLA16 (SHL8A Acc16:$src))))>; def : Pat<(shl Acc16:$src, (i16 12)), (ASLA16 (ASLA16 (ASLA16 (ASLA16 (SHL8A Acc16:$src)))))>; def : Pat<(shl Acc16:$src, (i16 13)), (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (SHL8A Acc16:$src))))))>; def : Pat<(shl Acc16:$src, (i16 14)), (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (ASLA16 (SHL8A Acc16:$src)))))))>; def : Pat<(srl Acc16:$src, (i16 9)), (LSRA16 (SRL8A Acc16:$src))>; def : Pat<(srl Acc16:$src, (i16 10)), (LSRA16 (LSRA16 (SRL8A Acc16:$src)))>; def : Pat<(srl Acc16:$src, (i16 11)), (LSRA16 (LSRA16 (LSRA16 (SRL8A Acc16:$src))))>; def : Pat<(srl Acc16:$src, (i16 12)), (LSRA16 (LSRA16 (LSRA16 (LSRA16 (SRL8A Acc16:$src)))))>; def : Pat<(srl Acc16:$src, (i16 13)), (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (SRL8A Acc16:$src))))))>; def : Pat<(srl Acc16:$src, (i16 14)), (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (LSRA16 (SRL8A Acc16:$src)))))))>; // (sra x, 15): sign-fill — yields $0000 if x is non-negative, $FFFF // if negative. Used by i32 sext-from-i16 type-legalization for the // hi half (avoids the __ashrhi3 libcall path). Sequence: // `ASL A; LDA #0; SBC #0; EOR #-1` (when our SBCi16imm uses SEC + SBC, // LDA #0; SBC #0 produces $FFFF if C=0, $0000 if C=1; EOR #-1 flips). // Actually simpler since SBC sets carry differently: see AsmPrinter // expansion for the exact 5-byte sequence. let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in { def SRA15A : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# SRA15A $dst, $src", [(set Acc16:$dst, (sra Acc16:$src, (i16 15)))]>; } // sext_inreg from i1: broadcast bit 0 to all bits. LLVM emits this // for `(c & 1) ? -1 : 0` patterns (e.g. CRC inner loops). The result // is `-(x & 1)` — 0 if bit 0 was clear, 0xFFFF if set. Mask to bit // 0 then two's-complement-negate. Three pseudos = ~7 bytes. def : Pat<(sext_inreg Acc16:$src, i1), (NEGA16 (ANDi16imm Acc16:$src, 1))>; // sext_inreg from i8: branchless `((x & 0xFF) ^ 0x80) - 0x80` trick // (same sequence LowerSignExtend uses for ISD::SIGN_EXTEND i8->i16). // LLVM emits this when expanding a sextload-i16-from-i8 (we set // SEXTLOAD i8 to Expand in the lowering ctor) and for explicit // `(int)(signed char)` casts. def : Pat<(sext_inreg Acc16:$src, i8), (SBCi16imm (EORi16imm (ANDi16imm Acc16:$src, 0x00FF), 0x0080), 0x0080)>; // Frame-index loads/stores: take a FrameIndex + offset (packed into a // single MIOperandInfo) and expand (in eliminateFrameIndex) into an // LDA / STA d,S with the offset baked in. Used by LowerFormalArguments // to read stack-passed arguments and by spill/reload via // storeRegToStackSlot. def memfi : Operand { let MIOperandInfo = (ops i32imm, i32imm); let PrintMethod = "printFrameMem"; } // LDAfi is rematerializable when the FI is a fixed (immutable) arg // slot — see W65816InstrInfo::isReMaterializableImpl. Without this, // greedy regalloc spills every arg load to a fresh local slot then // reloads from there, ballooning every i32-arg function by 4-6 insns. let mayLoad = 1, hasSideEffects = 0, mayStore = 0, isReMaterializable = 1 in { def LDAfi : W65816Pseudo<(outs Acc16:$dst), (ins memfi:$addr), "# LDAfi $dst, $addr", []>; } // STAfi accepts Wide16 src so greedy can park the value in IMGn instead // of A. When src is in IMGn, eliminateFrameIndex prepends a LDA dp; // hence Defs = [A] (the IMG case clobbers A). let mayStore = 1, hasSideEffects = 0, mayLoad = 0, Defs = [A] in { def STAfi : W65816Pseudo<(outs), (ins Wide16:$src, memfi:$addr), "# STAfi $src, $addr", []>; } // i8 truncating store to a FrameIndex slot. eliminateFrameIndex wraps // it in SEP #$20 / STA d,S / REP #$20 so only one byte is written. // Without the wrap, a 16-bit STA writes the byte at slot+1 too, which // corrupts the next stack slot (or return address for the last slot of // an alloca). Defs P because SEP/REP modify the M bit. let mayStore = 1, hasSideEffects = 1, mayLoad = 0, Defs = [P] in { def STA8fi : W65816Pseudo<(outs), (ins Acc16:$src, memfi:$addr), "# STA8fi $src, $addr", []>; } // ComplexPattern bridging FrameIndex SDValues to memfi. See // SelectFrameIndex in W65816ISelDAGToDAG.cpp. def addr_fi : ComplexPattern; def : Pat<(i16 (load addr_fi:$addr)), (LDAfi addr_fi:$addr)>; def : Pat<(store Acc16:$src, addr_fi:$addr), (STAfi Acc16:$src, addr_fi:$addr)>; // i8 access to a FrameIndex slot. Loads read 2 bytes via 16-bit LDA // — the high byte is harmless (extending loads mask or sign-extend it, // narrowing loads narrow back to Acc8 / discard). Stores must write // only one byte: i8 alloca arrays pack adjacent slots one byte apart, // and a 16-bit STA at the last slot of the array would corrupt the // return address. Truncating stores route through STA8fi which wraps // the STA in SEP #$20 / REP #$20. def : Pat<(i8 (load addr_fi:$addr)), (COPY_TO_REGCLASS (LDAfi addr_fi:$addr), Acc8)>; def : Pat<(i16 (zextloadi8 addr_fi:$addr)), (ANDi16imm (LDAfi addr_fi:$addr), 0xFF)>; def : Pat<(i16 (extloadi8 addr_fi:$addr)), (LDAfi addr_fi:$addr)>; def : Pat<(store Acc8:$src, addr_fi:$addr), (STA8fi (COPY_TO_REGCLASS Acc8:$src, Acc16), addr_fi:$addr)>; def : Pat<(truncstorei8 Acc16:$src, addr_fi:$addr), (STA8fi Acc16:$src, addr_fi:$addr)>; // Frame-index folding into ADC / SBC / AND / ORA / EOR / CMP. Same // shape as the *abs variants but the second operand is a stack slot. // ADCfi/SBCfi mark P as Def so they can match `addc`/`subc` (the lo // half of a multi-precision split — see ADCi16imm comment above). let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 1, mayStore = 0 in { let Defs = [P] in { def ADCfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr), "# ADCfi $dst, $src, $addr", []>; def SBCfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr), "# SBCfi $dst, $src, $addr", []>; } def ANDfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr), "# ANDfi $dst, $src, $addr", []>; def ORAfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr), "# ORAfi $dst, $src, $addr", []>; def EORfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr), "# EORfi $dst, $src, $addr", []>; } // ADCEfi / SBCEfi: chained ADC/SBC, hi half of a multi-precision split. // Read carry from previous addc/adde/subc/sube via Uses = [P]. let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 1, mayStore = 0, Uses = [P], Defs = [P] in { def ADCEfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr), "# ADCEfi $dst, $src, $addr", []>; def SBCEfi : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src, memfi:$addr), "# SBCEfi $dst, $src, $addr", []>; } let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Defs = [P] in { def CMPfi : W65816Pseudo<(outs), (ins Acc16:$lhs, memfi:$addr), "# CMPfi $lhs, $addr", []>; } def : Pat<(add Acc16:$src, (i16 (load addr_fi:$addr))), (ADCfi Acc16:$src, addr_fi:$addr)>; def : Pat<(sub Acc16:$src, (i16 (load addr_fi:$addr))), (SBCfi Acc16:$src, addr_fi:$addr)>; def : Pat<(addc Acc16:$src, (i16 (load addr_fi:$addr))), (ADCfi Acc16:$src, addr_fi:$addr)>; def : Pat<(subc Acc16:$src, (i16 (load addr_fi:$addr))), (SBCfi Acc16:$src, addr_fi:$addr)>; def : Pat<(adde Acc16:$src, (i16 (load addr_fi:$addr))), (ADCEfi Acc16:$src, addr_fi:$addr)>; def : Pat<(sube Acc16:$src, (i16 (load addr_fi:$addr))), (SBCEfi Acc16:$src, addr_fi:$addr)>; def : Pat<(and Acc16:$src, (i16 (load addr_fi:$addr))), (ANDfi Acc16:$src, addr_fi:$addr)>; def : Pat<(or Acc16:$src, (i16 (load addr_fi:$addr))), (ORAfi Acc16:$src, addr_fi:$addr)>; def : Pat<(xor Acc16:$src, (i16 (load addr_fi:$addr))), (EORfi Acc16:$src, addr_fi:$addr)>; def : Pat<(W65816cmp Acc16:$lhs, (i16 (load addr_fi:$addr))), (CMPfi Acc16:$lhs, addr_fi:$addr)>; // Zero-extending byte load: 16-bit LDA reads two bytes (the byte we want // plus the next byte), then mask the high byte with AND #$00FF. Reads // one byte past the source — fine for standalone bytes in the bank-0 // data area but caller must ensure addr+1 is safe to read. A future // optimisation could use SEP/REP transitions to do a true 8-bit load. def : Pat<(i16 (zextloadi8 (W65816Wrapper tglobaladdr:$g))), (ANDi16imm (LDAabs tglobaladdr:$g), 0xFF)>; def : Pat<(i16 (zextloadi8 (W65816Wrapper texternalsym:$s))), (ANDi16imm (LDAabs texternalsym:$s), 0xFF)>; // CMP / branches. CMP sets the flags via the W65816cmp SDNode (glue // out); the W65816brcc node consumes the glue and dispatches to the // right Bxx instruction by condition code. let hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [P] in { def CMPi16imm : W65816Pseudo<(outs), (ins Acc16:$lhs, i16imm:$rhs), "# CMPi16imm $lhs, $rhs", [(W65816cmp Acc16:$lhs, (i16 imm:$rhs))]>; def CMPi8imm : W65816Pseudo<(outs), (ins Acc8:$lhs, i8imm:$rhs), "# CMPi8imm $lhs, $rhs", [(W65816cmp Acc8:$lhs, (i8 imm:$rhs))]>; } let hasSideEffects = 0, mayLoad = 1, mayStore = 0, Defs = [P] in { def CMPabs : W65816Pseudo<(outs), (ins Acc16:$lhs, i32imm:$addr), "# CMPabs $lhs, $addr", []>; } def : Pat<(W65816cmp Acc16:$lhs, (i16 (load (W65816Wrapper tglobaladdr:$g)))), (CMPabs Acc16:$lhs, tglobaladdr:$g)>; def : Pat<(W65816cmp Acc16:$lhs, (i16 (load (W65816Wrapper texternalsym:$s)))), (CMPabs Acc16:$lhs, texternalsym:$s)>; // 16-bit byte swap: XBA exchanges A.high and A.low. Pattern matches // the (bswap Acc16) SDNode emitted by clang for byte-reverse loops. let Constraints = "$src = $dst", hasSideEffects = 0, mayLoad = 0, mayStore = 0 in { def XBA16 : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src), "# XBA16 $dst, $src", [(set Acc16:$dst, (bswap Acc16:$src))]>; } // Two-Acc16 binary ops. We have only one A register, so when both // operands are computed values (neither a foldable load/imm/global) we // must spill one to a stack slot. Each pseudo's custom inserter // allocates a fresh slot and emits a STAfi+OPfi sequence; the // register allocator handles the surrounding spills/reloads. // hasSideEffects=1 tells the validator the pseudo may load/store // without requiring a matching SDNode pattern (the stores are added // by the inserter, not visible in the DAG pattern). // // Defs = [P] on ADD_RR/SUB_RR matches the C-flag side-effect of the // underlying ADC/SBC, letting these pseudos serve `addc`/`subc` (the // lo half of an i32 split) as well as plain `add`/`sub`. let usesCustomInserter = 1, hasSideEffects = 1 in { let Defs = [P] in { def ADD_RR : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src1, Acc16:$src2), "# ADD_RR $dst, $src1, $src2", [(set Acc16:$dst, (add Acc16:$src1, Acc16:$src2))]>; def SUB_RR : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src1, Acc16:$src2), "# SUB_RR $dst, $src1, $src2", [(set Acc16:$dst, (sub Acc16:$src1, Acc16:$src2))]>; } def AND_RR : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src1, Acc16:$src2), "# AND_RR $dst, $src1, $src2", [(set Acc16:$dst, (and Acc16:$src1, Acc16:$src2))]>; def ORA_RR : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src1, Acc16:$src2), "# ORA_RR $dst, $src1, $src2", [(set Acc16:$dst, (or Acc16:$src1, Acc16:$src2))]>; def EOR_RR : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src1, Acc16:$src2), "# EOR_RR $dst, $src1, $src2", [(set Acc16:$dst, (xor Acc16:$src1, Acc16:$src2))]>; } def : Pat<(addc Acc16:$src1, Acc16:$src2), (ADD_RR Acc16:$src1, Acc16:$src2)>; def : Pat<(subc Acc16:$src1, Acc16:$src2), (SUB_RR Acc16:$src1, Acc16:$src2)>; // Chained-carry two-Acc16 add/sub for the hi half of i32 splits. // Inserter mirrors ADD_RR (STAfi spill + ADCEfi load-fold) but emits // the carry-chain pseudo so the previous addc/adde's C flag is // consumed instead of overwritten by a CLC. Uses+Defs = [P] // reflects the carry chain through the SDNode. let usesCustomInserter = 1, hasSideEffects = 1, Uses = [P], Defs = [P] in { def ADDE_RR : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src1, Acc16:$src2), "# ADDE_RR $dst, $src1, $src2", [(set Acc16:$dst, (adde Acc16:$src1, Acc16:$src2))]>; def SUBE_RR : W65816Pseudo<(outs Acc16:$dst), (ins Acc16:$src1, Acc16:$src2), "# SUBE_RR $dst, $src1, $src2", [(set Acc16:$dst, (sube Acc16:$src1, Acc16:$src2))]>; } let usesCustomInserter = 1, hasSideEffects = 1, Defs = [P] in { def CMP_RR : W65816Pseudo<(outs), (ins Acc16:$lhs, Acc16:$rhs), "# CMP_RR $lhs, $rhs", [(W65816cmp Acc16:$lhs, Acc16:$rhs)]>; } // Pointer dereference. The 65816 can't deref a register pointer // directly — the indirect addressing modes all read the pointer from // memory (DP or stack). These pseudos spill the Acc16 pointer to a // fresh stack slot, set Y=0, and emit LDA/STA (slot,S),Y. Y gets // clobbered as a side effect. hasSideEffects=1 covers the spill // store the inserter adds, in addition to the deref. // LDAptr / STAptr / STBptr lower to [dp],Y indirect-long via DP // scratch $E0..$E2 (see W65816ISelLowering.cpp inserter). The // inserter uses A and Y plus the DP scratch — X is not touched. // Defs: Y (LDY #0) and P (STA/LDA set N/Z). // $ptr is Wide16 (A or IMGn) so when bb.3-style pressure forces the // pointer to share A with another live vreg, RA can park ptr in an // IMGn DP slot. Acc16:$ptr was being silently coalesced with the // loop-PHI accumulator: both wanted A at end of bb, and PHI-elim // dropped the COPY needed to refresh A with the pointer at top of // the loop. With Wide16, the COPY $a = ptr lowers to a real LDA $dp. let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1, Defs = [Y, P] in { def LDAptr : W65816Pseudo<(outs Acc16:$dst), (ins Wide16:$ptr), "# LDAptr $dst, $ptr", [(set Acc16:$dst, (load Wide16:$ptr))]>; // Variant that hardcodes bank=0 for the [dp],Y deref. Used by // LowerVAARG: va_arg derefs a stack pointer, and the 65816 stack is // always in bank 0 — but under GS/OS Loader our default $E2 source // ($BE = our bank when LoaderBankDeref is on) would point reads at // the wrong bank. This variant always emits `STZ $E2` so the deref // is unambiguously bank-0. Caught by snprintf("%d", N) under Loader // returning constant garbage instead of N's decimal — see // feedback_loader_substantial_test.md. def LDAptrBank0 : W65816Pseudo<(outs Acc16:$dst), (ins Wide16:$ptr), "# LDAptrBank0 $dst, $ptr", [(set Acc16:$dst, (W65816vaargLoad Wide16:$ptr))]>; } let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1, Defs = [Y, P] in { def STAptr : W65816Pseudo<(outs), (ins Acc16:$val, Wide16:$ptr), "# STAptr $val, $ptr", [(store Acc16:$val, Wide16:$ptr)]>; } // i8 zero-extending pointer load: do a 16-bit LDA (slot,s),y and mask // the high byte. Reads one byte past the source — fine for byte-array // iteration where the buffer is at least 2 bytes long. A future // SEP/REP-aware mode pass could switch to a true 8-bit LDA. def : Pat<(i16 (zextloadi8 Wide16:$ptr)), (ANDi16imm (LDAptr Wide16:$ptr), 0xFF)>; // Anyext byte load via pointer: consumer doesn't care about the high // byte, so just LDA (16-bit). Same 1-byte-past-buffer caveat as // zextloadi8. def : Pat<(i16 (extloadi8 Wide16:$ptr)), (LDAptr Wide16:$ptr)>; // And the equivalent for absolute addresses (byte loads via global ptr). // (Already covered for Wrapper(global) above; this catches the case // where the ptr is materialised as a value.) // Intermediate pseudos used by the LDAptr/STAptr inserters. Each takes // a memfi describing the slot containing the pointer; eliminateFrameIndex // resolves it to LDA_StackRelIndY / STA_StackRelIndY with the right d-byte. // Y must hold 0 at the issue point (the inserter emits LDY #0 first). let mayLoad = 1, hasSideEffects = 0, mayStore = 0, Uses = [Y] in { def LDAfi_indY : W65816Pseudo<(outs Acc16:$dst), (ins memfi:$addr), "# LDAfi_indY $dst, $addr", []>; } let mayStore = 1, hasSideEffects = 0, mayLoad = 0, Uses = [Y] in { def STAfi_indY : W65816Pseudo<(outs), (ins Acc16:$src, memfi:$addr), "# STAfi_indY $src, $addr", []>; } // i8 truncating store via Acc16 pointer. Same shape as STAptr but // custom inserter wraps the actual STA in SEP/REP so the M-bit is 8 // across the store and only one byte is written. Without the wrap the // 16-bit STA would clobber the byte at ptr+1. Two patterns: the // natural truncstorei8 from an i16 value (common with arg promotion), // and a true i8 store (Acc8) that arises from i8-typed IR. let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1, Defs = [Y, P] in { def STBptr : W65816Pseudo<(outs), (ins Acc16:$val, Wide16:$ptr), "# STBptr $val, $ptr", [(truncstorei8 Acc16:$val, Wide16:$ptr)]>; } // Pointer access with constant offset. `(load (add ptr, $off))` and // `(store val, (add ptr, $off))` come up for struct field access and // array indexing with small constant offsets. Without these patterns, // the offset becomes an explicit ADC #imm that has to spill A and // recompute the pointer per access. With them, we just load Y with // the offset in the inserter (Y is 16-bit so any i16 constant fits). // LDAptrOff / STAptrOff / STBptrOff: same [dp],Y lowering as the // no-offset variants but folds the offset into Y. let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1, Defs = [Y, P] in { def LDAptrOff : W65816Pseudo<(outs Acc16:$dst), (ins Wide16:$ptr, i16imm:$off), "# LDAptrOff $dst, $ptr, $off", []>; } let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1, Defs = [Y, P] in { def STAptrOff : W65816Pseudo<(outs), (ins Acc16:$val, Wide16:$ptr, i16imm:$off), "# STAptrOff $val, $ptr, $off", []>; def STBptrOff : W65816Pseudo<(outs), (ins Acc16:$val, Wide16:$ptr, i16imm:$off), "# STBptrOff $val, $ptr, $off", []>; } def : Pat<(i16 (load (add Wide16:$ptr, (i16 imm:$off)))), (LDAptrOff Wide16:$ptr, imm:$off)>; def : Pat<(store Acc16:$val, (add Wide16:$ptr, (i16 imm:$off))), (STAptrOff Acc16:$val, Wide16:$ptr, imm:$off)>; def : Pat<(truncstorei8 Acc16:$val, (add Wide16:$ptr, (i16 imm:$off))), (STBptrOff Acc16:$val, Wide16:$ptr, imm:$off)>; def : Pat<(store Acc8:$val, (add Wide16:$ptr, (i16 imm:$off))), (STBptrOff (COPY_TO_REGCLASS Acc8:$val, Acc16), Wide16:$ptr, imm:$off)>; def : Pat<(store Acc8:$val, Wide16:$ptr), (STBptr (COPY_TO_REGCLASS Acc8:$val, Acc16), Wide16:$ptr)>; // --------------------------------------------------------------------- // ptr32 deref pseudos. Same shape and inserter as LDAptr/STAptr/STBptr, // but the pointer is a Wide32 (i32) value: sub_lo carries the low 16 // bits of the address, sub_hi carries the bank byte in its low half. // Inserter stages the low 16 bits at $E0..$E1 and the bank byte at $E2, // then emits LDA/STA [dp],Y just like the i16 path — but with a // pointer-derived bank instead of a forced 0. // // Dead unless ptr32 mode is active (LowerLoad/LowerStore only emit // W65816ldPtr/stPtr/stbPtr when the address is i32). // --------------------------------------------------------------------- let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1, Defs = [Y, P] in { def LDAptr32 : W65816Pseudo<(outs Acc16:$dst), (ins AnyWide32:$ptr), "# LDAptr32 $dst, $ptr", [(set Acc16:$dst, (W65816ldPtr AnyWide32:$ptr))]>; } let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1, Defs = [Y, P] in { def STAptr32 : W65816Pseudo<(outs), (ins Acc16:$val, AnyWide32:$ptr), "# STAptr32 $val, $ptr", [(W65816stPtr Acc16:$val, AnyWide32:$ptr)]>; def STBptr32 : W65816Pseudo<(outs), (ins Acc16:$val, AnyWide32:$ptr), "# STBptr32 $val, $ptr", [(W65816stbPtr Acc16:$val, AnyWide32:$ptr)]>; } let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1, Defs = [Y, P] in { def LDAptr32Off : W65816Pseudo<(outs Acc16:$dst), (ins AnyWide32:$ptr, i16imm:$off), "# LDAptr32Off $dst, $ptr, $off", []>; } let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1, Defs = [Y, P] in { def STAptr32Off : W65816Pseudo<(outs), (ins Acc16:$val, AnyWide32:$ptr, i16imm:$off), "# STAptr32Off $val, $ptr, $off", []>; def STBptr32Off : W65816Pseudo<(outs), (ins Acc16:$val, AnyWide32:$ptr, i16imm:$off), "# STBptr32Off $val, $ptr, $off", []>; } // Direct ptr32 load/store patterns over generic ISD::LOAD / ISD::STORE // when the address is an i32 (AnyWide32) reg. These are unreachable // while i32 is not a legal type (ptr16 mode). When ptr32 mode is // activated they fire instead of the i16-pointer LDAptr / STAptr. def : Pat<(i16 (load AnyWide32:$ptr)), (LDAptr32 AnyWide32:$ptr)>; def : Pat<(store Acc16:$val, AnyWide32:$ptr), (STAptr32 Acc16:$val, AnyWide32:$ptr)>; def : Pat<(truncstorei8 Acc16:$val, AnyWide32:$ptr), (STBptr32 Acc16:$val, AnyWide32:$ptr)>; def : Pat<(i16 (zextloadi8 AnyWide32:$ptr)), (ANDi16imm (LDAptr32 AnyWide32:$ptr), 0xFF)>; def : Pat<(i16 (extloadi8 AnyWide32:$ptr)), (LDAptr32 AnyWide32:$ptr)>; def : Pat<(i8 (load AnyWide32:$ptr)), (COPY_TO_REGCLASS (ANDi16imm (LDAptr32 AnyWide32:$ptr), 0xFF), Acc8)>; def : Pat<(store Acc8:$val, AnyWide32:$ptr), (STBptr32 (COPY_TO_REGCLASS Acc8:$val, Acc16), AnyWide32:$ptr)>; // Off variants — folded constant-offset add patterns deferred until // ptr32 mode is activated and we can profile real cases. The base // LDAptr32/STAptr32 pseudos handle the general (add ptr, off) case // correctly via a separate i32 ADD; the Off pseudos are an optional // optimization for small constant offsets. // Split-pair variants: same semantics as LDAptr32/STAptr32/STBptr32 but // the ptr is two separate i16 register operands (lo + hi) instead of // one Wide32 register pair. Used by the W65816LowerWide32 pre-RA pass // to relieve register-pair allocation pressure: it walks REG_SEQUENCE // + LDAptr32 chains, decomposes the Wide32 vregs into pairs of i16 // vregs, and rewrites the LDAptr32-family to take the two halves // directly. let usesCustomInserter = 1, hasSideEffects = 1, mayLoad = 1, Defs = [Y, P] in { def LDAptr32S : W65816Pseudo<(outs Acc16:$dst), (ins Wide16:$ptrLo, Wide16:$ptrHi), "# LDAptr32S $dst, $ptrLo, $ptrHi", []>; } let usesCustomInserter = 1, hasSideEffects = 1, mayStore = 1, Defs = [Y, P] in { def STAptr32S : W65816Pseudo<(outs), (ins Acc16:$val, Wide16:$ptrLo, Wide16:$ptrHi), "# STAptr32S $val, $ptrLo, $ptrHi", []>; def STBptr32S : W65816Pseudo<(outs), (ins Acc16:$val, Wide16:$ptrLo, Wide16:$ptrHi), "# STBptr32S $val, $ptrLo, $ptrHi", []>; } // i8 load via Acc16 pointer producing a true i8 (Acc8) result. Reuses // the existing zextloadi8 16-bit-LDA-and-mask path: load 2 bytes, mask // the high byte, then narrow to Acc8. COPY_TO_REGCLASS to Acc8 is a // no-op at MC level (same physical A). Reads one byte past the source; // fine for char-array iteration where the buffer is at least 2 bytes. def : Pat<(i8 (load Wide16:$ptr)), (COPY_TO_REGCLASS (ANDi16imm (LDAptr Wide16:$ptr), 0xFF), Acc8)>; // Acc8-to-Acc16 type conversions. Both Acc8 and Acc16 alias physical A, // so COPY_TO_REGCLASS is a no-op at MC level. ZEXT additionally masks // the high byte (which holds B from before any prior SEP). ANYEXT // leaves the high byte untouched since the consumer doesn't care. def : Pat<(i16 (anyext Acc8:$src)), (COPY_TO_REGCLASS Acc8:$src, Acc16)>; def : Pat<(i16 (zext Acc8:$src)), (ANDi16imm (COPY_TO_REGCLASS Acc8:$src, Acc16), 0xFF)>; def : Pat<(i8 (trunc Acc16:$src)), (COPY_TO_REGCLASS Acc16:$src, Acc8)>; // Acc8 reg-reg arithmetic and bitwise ops, expanded through the Acc16 // _RR pseudos. Cheap to do because Acc8 and Acc16 alias the same // physical A — COPY_TO_REGCLASS is a no-op. Only the low byte // matters; the high byte gets unrelated bits but is discarded by the // final narrow-back to Acc8. This lets an i8 expression that wasn't // promoted by legalization (e.g. an i8 XOR feeding only an i8 store) // reuse the spill-and-OPfi inserter without needing dedicated Acc8 // pseudos. multiclass Acc8RR { def : Pat<(i8 (op Acc8:$a, Acc8:$b)), (COPY_TO_REGCLASS (ri (COPY_TO_REGCLASS Acc8:$a, Acc16), (COPY_TO_REGCLASS Acc8:$b, Acc16)), Acc8)>; } defm : Acc8RR; defm : Acc8RR; defm : Acc8RR; defm : Acc8RR; defm : Acc8RR; // (memory inc/dec patterns moved below INC_Abs/DEC_Abs defs.) // (Branch patterns moved below the Real Instructions section since // they reference instruction defs.) //===----------------------------------------------------------------------===// // Real Instructions // // Opcodes taken from the WDC W65C816S data sheet. Instructions whose size // depends on the M or X bits exist in two variants (Imm8 / Imm16) and carry // TSFlags bits indicating which processor mode they assume; the REP/SEP // scheduling pass uses those to verify/insert mode transitions. // // Disassembler note: for every opcode that has both an _Imm8 and an _Imm16 // form (LDA/LDX/LDY/ADC/SBC/CMP/AND/ORA/EOR/BIT/CPX/CPY), the two forms share // the same opcode byte but differ in operand width according to M/X mode. // The scaffold disassembler only consults the default "W65816" decoder // table, so we push the _Imm8 variants into namespaces "W65816MHigh" / // "W65816XHigh". That keeps only one variant per opcode in the default // table (the 3-byte _Imm16 form for M-dependent insns, and the 3-byte // _Imm16 form for X-dependent insns), so `llvm-objdump -d` always decodes // these as 16-bit immediates until the mode-aware decoder lands. //===----------------------------------------------------------------------===// //---------------------------------------------------------------- CPU control def NOP : InstImplied<0xEA, "nop"> { let mayLoad = 0; let mayStore = 0; let hasSideEffects = 0; } def REP : InstImm8<0xC2, "rep"> { let hasSideEffects = 1; let mayLoad = 0; let mayStore = 0; } def SEP : InstImm8<0xE2, "sep"> { let hasSideEffects = 1; let mayLoad = 0; let mayStore = 0; } def CLC : InstImplied<0x18, "clc"> { let mayLoad = 0; let mayStore = 0; } def SEC : InstImplied<0x38, "sec"> { let mayLoad = 0; let mayStore = 0; } def CLI : InstImplied<0x58, "cli"> { let mayLoad = 0; let mayStore = 0; } def SEI : InstImplied<0x78, "sei"> { let mayLoad = 0; let mayStore = 0; } def CLD : InstImplied<0xD8, "cld"> { let mayLoad = 0; let mayStore = 0; } def SED : InstImplied<0xF8, "sed"> { let mayLoad = 0; let mayStore = 0; } def CLV : InstImplied<0xB8, "clv"> { let mayLoad = 0; let mayStore = 0; } def XCE : InstImplied<0xFB, "xce"> { let mayLoad = 0; let mayStore = 0; } def XBA : InstImplied<0xEB, "xba"> { let mayLoad = 0; let mayStore = 0; } def WAI : InstImplied<0xCB, "wai">; def STP : InstImplied<0xDB, "stp">; // WDM (William D Mensch) — reserved 2-byte NOP-equivalent. Useful as // a debugger / emulator hook: MAME's apple2gs CPU traps on WDM and a // Lua plugin can dispatch on the operand byte. CPU-side, it acts as // a 2-byte NOP. Operand syntax mirrors MVN: `wdm $ab` (no `#`). def WDM : InstDP<0x42, "wdm">; // TRB / TSB — Test and Reset/Set memory Bits. Atomic bit clear/set // on a byte (or 16-bit word per M flag) at the given DP or abs // address. Z flag set per (M & A) where M is the memory operand. // Useful for memory-mapped IO bit twiddling. No DP indexing form. def TRB_DP : InstDP<0x14, "trb">; def TRB_Abs : InstAbs<0x1C, "trb">; def TSB_DP : InstDP<0x04, "tsb">; def TSB_Abs : InstAbs<0x0C, "tsb">; // PEI — Push Effective Indirect. Reads a 16-bit value from DP and // pushes it. Useful for indirect parameter passing without going // through A first. def PEI_DP : InstDP<0xD4, "pei">; //---------------------------------------------------------------- LDA (load A) // The `_Imm8` forms of the mode-dependent load/arith/compare ops are // marked isCodeGenOnly so the asm matcher never picks them — our // AsmParser has no way to know the current M/X bits, so it always // reaches for the _Imm16 form. Codegen can still select _Imm8 // explicitly once we have 8-bit patterns. def LDA_Imm8 : InstImm8<0xA9, "lda"> { let MHigh = 1; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; let Defs = [A]; } def LDA_Imm16 : InstImm16<0xA9, "lda"> { let MLow = 1; let Defs = [A]; } def LDA_DP : InstDP<0xA5, "lda">; def LDA_Abs : InstAbs<0xAD, "lda">; def LDA_Long : InstAbsLong<0xAF, "lda">; def LDA_DPX : InstDPX<0xB5, "lda">; def LDA_AbsX : InstAbsX<0xBD, "lda">; def LDA_AbsY : InstAbsY<0xB9, "lda">; def LDA_DPInd : InstDPInd <0xB2, "lda">; def LDA_DPIndY : InstDPIndY<0xB1, "lda">; def LDA_DPIndX : InstDPIndX<0xA1, "lda">; def LDA_DPIndLong : InstDPIndLong <0xA7, "lda"> { let Defs = [A]; } // LDA [dp],Y: reads Y to compute the indexed address, defines A. // Without these, regalloc thought A was unaffected by the load and // dead-code-eliminated COPYs that were supposed to materialise the // next pointer in A — silent miscompile in mySwap-style helpers. def LDA_DPIndLongY : InstDPIndLongY<0xB7, "lda"> { let Defs = [A]; let Uses = [Y]; } def LDA_LongX : InstAbsLongX<0xBF, "lda">; //---------------------------------------------------------------- STA (store A) def STA_DP : InstDP<0x85, "sta">; def STA_Abs : InstAbs<0x8D, "sta">; def STA_Long : InstAbsLong<0x8F, "sta">; def STA_DPX : InstDPX<0x95, "sta">; def STA_AbsX : InstAbsX<0x9D, "sta">; def STA_AbsY : InstAbsY<0x99, "sta">; def STA_DPInd : InstDPInd <0x92, "sta">; def STA_DPIndY : InstDPIndY<0x91, "sta">; def STA_DPIndX : InstDPIndX<0x81, "sta">; def STA_DPIndLong : InstDPIndLong <0x87, "sta"> { let Uses = [A]; } // STA [dp],Y: reads A (the value to store) and Y (the index). Mark // both so regalloc keeps A's value live across this instruction. def STA_DPIndLongY : InstDPIndLongY<0x97, "sta"> { let Uses = [A, Y]; } def STA_LongX : InstAbsLongX<0x9F, "sta">; //---------------------------------------------------------------- LDX (load X) def LDX_Imm8 : InstImm8<0xA2, "ldx"> { let XHigh = 1; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; let Defs = [X]; } def LDX_Imm16 : InstImm16<0xA2, "ldx"> { let XLow = 1; let Defs = [X]; } def LDX_DP : InstDP<0xA6, "ldx">; def LDX_Abs : InstAbs<0xAE, "ldx">; def LDX_DPY : InstDPY<0xB6, "ldx">; def LDX_AbsY : InstAbsY<0xBE, "ldx">; //---------------------------------------------------------------- STX (store X) def STX_DP : InstDP<0x86, "stx">; def STX_Abs : InstAbs<0x8E, "stx">; def STX_DPY : InstDPY<0x96, "stx">; //---------------------------------------------------------------- LDY (load Y) def LDY_Imm8 : InstImm8<0xA0, "ldy"> { let XHigh = 1; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; let Defs = [Y]; } def LDY_Imm16 : InstImm16<0xA0, "ldy"> { let XLow = 1; let Defs = [Y]; } def LDY_DP : InstDP<0xA4, "ldy">; def LDY_Abs : InstAbs<0xAC, "ldy">; def LDY_DPX : InstDPX<0xB4, "ldy">; def LDY_AbsX : InstAbsX<0xBC, "ldy">; //---------------------------------------------------------------- STY (store Y) def STY_DP : InstDP<0x84, "sty">; def STY_Abs : InstAbs<0x8C, "sty">; def STY_DPX : InstDPX<0x94, "sty">; //---------------------------------------------------------------- STZ (store zero) // Width follows M flag — same as STA. Useful for zeroing DP scratch // without burning A. Saves 1 byte vs `LDA #0; STA dp` per zero. def STZ_DP : InstDP<0x64, "stz">; def STZ_Abs : InstAbs<0x9C, "stz">; def STZ_DPX : InstDPX<0x74, "stz">; def STZ_AbsX : InstAbsX<0x9E, "stz">; //------------------------------------------------------------------------- ADC def ADC_Imm8 : InstImm8<0x69, "adc"> { let MHigh = 1; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; } def ADC_Imm16 : InstImm16<0x69, "adc"> { let MLow = 1; } def ADC_DP : InstDP<0x65, "adc">; def ADC_Abs : InstAbs<0x6D, "adc">; def ADC_DPX : InstDPX<0x75, "adc">; def ADC_AbsX : InstAbsX<0x7D, "adc">; def ADC_AbsY : InstAbsY<0x79, "adc">; //------------------------------------------------------------------------- SBC def SBC_Imm8 : InstImm8<0xE9, "sbc"> { let MHigh = 1; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; } def SBC_Imm16 : InstImm16<0xE9, "sbc"> { let MLow = 1; } def SBC_DP : InstDP<0xE5, "sbc">; def SBC_Abs : InstAbs<0xED, "sbc">; def SBC_DPX : InstDPX<0xF5, "sbc">; def SBC_AbsX : InstAbsX<0xFD, "sbc">; def SBC_AbsY : InstAbsY<0xF9, "sbc">; //------------------------------------------------------------------------- CMP def CMP_Imm8 : InstImm8<0xC9, "cmp"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; } def CMP_Imm16 : InstImm16<0xC9, "cmp"> { let MLow = 1; let mayLoad=0; let mayStore=0; } def CMP_DP : InstDP<0xC5, "cmp"> { let mayStore = 0; } def CMP_Abs : InstAbs<0xCD, "cmp"> { let mayStore = 0; } def CMP_DPX : InstDPX<0xD5, "cmp"> { let mayStore = 0; } def CMP_AbsX : InstAbsX<0xDD, "cmp"> { let mayStore = 0; } def CMP_AbsY : InstAbsY<0xD9, "cmp"> { let mayStore = 0; } //---------------------------------------------------------------- CPX/CPY def CPX_Imm8 : InstImm8<0xE0, "cpx"> { let XHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; } def CPX_Imm16 : InstImm16<0xE0, "cpx"> { let XLow = 1; let mayLoad=0; let mayStore=0; } def CPX_DP : InstDP<0xE4, "cpx"> { let mayStore = 0; } def CPX_Abs : InstAbs<0xEC, "cpx"> { let mayStore = 0; } def CPY_Imm8 : InstImm8<0xC0, "cpy"> { let XHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816XHigh"; let isCodeGenOnly = 1; } def CPY_Imm16 : InstImm16<0xC0, "cpy"> { let XLow = 1; let mayLoad=0; let mayStore=0; } def CPY_DP : InstDP<0xC4, "cpy"> { let mayStore = 0; } def CPY_Abs : InstAbs<0xCC, "cpy"> { let mayStore = 0; } //---------------------------------------------------------------- AND/ORA/EOR def AND_Imm8 : InstImm8<0x29, "and"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; } def AND_Imm16 : InstImm16<0x29, "and"> { let MLow = 1; let mayLoad=0; let mayStore=0; } def AND_DP : InstDP<0x25, "and"> { let mayStore = 0; } def AND_Abs : InstAbs<0x2D, "and"> { let mayStore = 0; } def ORA_Imm8 : InstImm8<0x09, "ora"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; } def ORA_Imm16 : InstImm16<0x09, "ora"> { let MLow = 1; let mayLoad=0; let mayStore=0; } def ORA_DP : InstDP<0x05, "ora"> { let mayStore = 0; } def ORA_Abs : InstAbs<0x0D, "ora"> { let mayStore = 0; } def EOR_Imm8 : InstImm8<0x49, "eor"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; } def EOR_Imm16 : InstImm16<0x49, "eor"> { let MLow = 1; let mayLoad=0; let mayStore=0; } def EOR_DP : InstDP<0x45, "eor"> { let mayStore = 0; } def EOR_Abs : InstAbs<0x4D, "eor"> { let mayStore = 0; } def BIT_Imm8 : InstImm8<0x89, "bit"> { let MHigh = 1; let mayLoad=0; let mayStore=0; let DecoderNamespace = "W65816MHigh"; let isCodeGenOnly = 1; } def BIT_Imm16 : InstImm16<0x89, "bit"> { let MLow = 1; let mayLoad=0; let mayStore=0; } def BIT_DP : InstDP<0x24, "bit"> { let mayStore = 0; } def BIT_Abs : InstAbs<0x2C, "bit"> { let mayStore = 0; } //---------------------------------------------------------------- INC/DEC def INA : InstImplied<0x1A, "inc a"> { let mayLoad = 0; let mayStore = 0; } def DEA : InstImplied<0x3A, "dec a"> { let mayLoad = 0; let mayStore = 0; } def INX : InstImplied<0xE8, "inx"> { let mayLoad = 0; let mayStore = 0; } def DEX : InstImplied<0xCA, "dex"> { let mayLoad = 0; let mayStore = 0; } def INY : InstImplied<0xC8, "iny"> { let mayLoad = 0; let mayStore = 0; } def DEY : InstImplied<0x88, "dey"> { let mayLoad = 0; let mayStore = 0; } def INC_DP : InstDP<0xE6, "inc">; def INC_Abs : InstAbs<0xEE, "inc">; def INC_DPX : InstDPX<0xF6, "inc">; def INC_AbsX: InstAbsX<0xFE, "inc">; def DEC_DP : InstDP<0xC6, "dec">; def DEC_Abs : InstAbs<0xCE, "dec">; def DEC_DPX : InstDPX<0xD6, "dec">; def DEC_AbsX: InstAbsX<0xDE, "dec">; //---------------------------------------------------------------- Shifts def ASL_A : InstImplied<0x0A, "asl a"> { let mayLoad = 0; let mayStore = 0; } def LSR_A : InstImplied<0x4A, "lsr a"> { let mayLoad = 0; let mayStore = 0; } def ROL_A : InstImplied<0x2A, "rol a"> { let mayLoad = 0; let mayStore = 0; } def ROR_A : InstImplied<0x6A, "ror a"> { let mayLoad = 0; let mayStore = 0; } def ASL_DP : InstDP<0x06, "asl">; def ASL_Abs : InstAbs<0x0E, "asl">; def LSR_DP : InstDP<0x46, "lsr">; def LSR_Abs : InstAbs<0x4E, "lsr">; def ROL_DP : InstDP<0x26, "rol">; def ROL_Abs : InstAbs<0x2E, "rol">; def ROR_DP : InstDP<0x66, "ror">; def ROR_Abs : InstAbs<0x6E, "ror">; //---------------------------------------------------------------- Transfers // Defs/Uses metadata is critical: without it, machine-cp doesn't see // that TAX (etc.) reads the source register, and may delete a `$a = // COPY $x` immediately preceding it as a "dead store" — corrupting // the data flow. See feedback_w65816_implied_ops.md for the canary. def TAX : InstImplied<0xAA, "tax"> { let mayLoad = 0; let mayStore = 0; let Defs = [X]; let Uses = [A]; } def TAY : InstImplied<0xA8, "tay"> { let mayLoad = 0; let mayStore = 0; let Defs = [Y]; let Uses = [A]; } def TXA : InstImplied<0x8A, "txa"> { let mayLoad = 0; let mayStore = 0; let Defs = [A]; let Uses = [X]; } def TYA : InstImplied<0x98, "tya"> { let mayLoad = 0; let mayStore = 0; let Defs = [A]; let Uses = [Y]; } def TXY : InstImplied<0x9B, "txy"> { let mayLoad = 0; let mayStore = 0; let Defs = [Y]; let Uses = [X]; } def TYX : InstImplied<0xBB, "tyx"> { let mayLoad = 0; let mayStore = 0; let Defs = [X]; let Uses = [Y]; } def TXS : InstImplied<0x9A, "txs"> { let mayLoad = 0; let mayStore = 0; let Defs = [SP]; let Uses = [X]; } def TSX : InstImplied<0xBA, "tsx"> { let mayLoad = 0; let mayStore = 0; let Defs = [X]; let Uses = [SP]; } def TCD : InstImplied<0x5B, "tcd"> { let mayLoad = 0; let mayStore = 0; } def TDC : InstImplied<0x7B, "tdc"> { let mayLoad = 0; let mayStore = 0; } def TCS : InstImplied<0x1B, "tcs"> { let mayLoad = 0; let mayStore = 0; } def TSC : InstImplied<0x3B, "tsc"> { let mayLoad = 0; let mayStore = 0; } //---------------------------------------------------------------- Stack push/pull def PHA : InstImplied<0x48, "pha">; def PLA : InstImplied<0x68, "pla">; def PHX : InstImplied<0xDA, "phx">; def PLX : InstImplied<0xFA, "plx">; def PHY : InstImplied<0x5A, "phy">; def PLY : InstImplied<0x7A, "ply">; def PHP : InstImplied<0x08, "php">; def PLP : InstImplied<0x28, "plp">; def PHB : InstImplied<0x8B, "phb">; def PLB : InstImplied<0xAB, "plb">; def PHD : InstImplied<0x0B, "phd">; def PLD : InstImplied<0x2B, "pld">; def PHK : InstImplied<0x4B, "phk">; def PEA : InstAbs<0xF4, "pea">; def PER : InstPCRel16<0x62, "per">; //---------------------------------------------------------------- Branches // Conditional branches READ the P (status) register. Without this // Uses, MachineCSE saw no dependency between an earlier CMP (which // defines P) and the consuming Bxx, and would happily reuse a // "redundant" CMP whose flags had been clobbered by an intervening // LDA/STA/ADC. Modelling the dep is the principled fix; the // W65816TargetMachine workaround that disabled MachineCSE entirely // can come back out once this is verified. let isBranch = 1, isTerminator = 1, mayLoad = 0, mayStore = 0, Uses = [P] in { def BEQ : InstPCRel8<0xF0, "beq">; def BNE : InstPCRel8<0xD0, "bne">; def BCS : InstPCRel8<0xB0, "bcs">; def BCC : InstPCRel8<0x90, "bcc">; def BMI : InstPCRel8<0x30, "bmi">; def BPL : InstPCRel8<0x10, "bpl">; def BVS : InstPCRel8<0x70, "bvs">; def BVC : InstPCRel8<0x50, "bvc">; } let isBranch = 1, isTerminator = 1, isBarrier = 1, mayLoad = 0, mayStore = 0 in { def BRA : InstPCRel8<0x80, "bra">; def BRL : InstPCRel16<0x82, "brl">; def JMP_Abs : InstAbs<0x4C, "jmp">; def JMP_AbsInd : InstAbsInd<0x6C, "jmp">; def JML_Long : InstAbsLong<0x5C, "jml">; } //---------------------------------------------------------------- Calls let isCall = 1, mayLoad = 0, mayStore = 0 in { def JSR_Abs : InstAbs<0x20, "jsr">; def JSL_Long : InstAbsLong<0x22, "jsl">; } //---------------------------------------------------------------- Returns let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 0, mayStore = 0 in { def RTS : InstImplied<0x60, "rts">; def RTI : InstImplied<0x40, "rti">; // RTL is the 65816 long return; we select it for the generic retglue node. def RTL : InstImplied<0x6B, "rtl"> { let Pattern = [(W65816retglue)]; } } //---------------------------------------------------------------- Block move // MVN/MVP are 3 bytes: opcode + destBank + srcBank. WDC writes the // operand order as "dst, src" but the bytes on the wire are dst-then-src. // Block-move operands are bank bytes written without a '#' prefix // (e.g. `mvn $01, $02`), so the parser produces AddrDP-kind operands, // not immediates. Use addrDP here to match that; the encoder path is // identical since both are single-byte values. class InstBlockMove op, string mnem> : W65816Inst<(outs), (ins addrDP:$dst, addrDP:$src), !strconcat(mnem, "\t$dst, $src")> { let Size = 3; bits<8> dst; bits<8> src; bits<24> Inst; let Inst{7-0} = op; let Inst{15-8} = dst; let Inst{23-16} = src; } def MVN : InstBlockMove<0x54, "mvn">; def MVP : InstBlockMove<0x44, "mvp">; //---------------------------------------------------------------- Stack-rel def LDA_StackRel : InstStackRel<0xA3, "lda">; def STA_StackRel : InstStackRel<0x83, "sta">; def ADC_StackRel : InstStackRel<0x63, "adc">; def SBC_StackRel : InstStackRel<0xE3, "sbc">; def CMP_StackRel : InstStackRel<0xC3, "cmp">; def AND_StackRel : InstStackRel<0x23, "and">; def ORA_StackRel : InstStackRel<0x03, "ora">; def EOR_StackRel : InstStackRel<0x43, "eor">; //---------------------------------------------------------------- Stack-ind-Y // Stack-relative indirect indexed-Y: deref a pointer spilled at S+off. def LDA_StackRelIndY : InstStackRelIndY<0xB3, "lda">; def STA_StackRelIndY : InstStackRelIndY<0x93, "sta">; //===----------------------------------------------------------------------===// // Branch patterns (placed after the Bxx defs). // // W65816brcc takes (Dest, CondCode) plus a glue from W65816cmp. The CC // constant maps to one of the eight Bxx instructions. Values mirror // W65816CC::CondCode in W65816.h. //===----------------------------------------------------------------------===// def : Pat<(W65816brcc bb:$dest, (i8 0)), (BEQ bb:$dest)>; def : Pat<(W65816brcc bb:$dest, (i8 1)), (BNE bb:$dest)>; def : Pat<(W65816brcc bb:$dest, (i8 2)), (BCS bb:$dest)>; def : Pat<(W65816brcc bb:$dest, (i8 3)), (BCC bb:$dest)>; def : Pat<(W65816brcc bb:$dest, (i8 4)), (BMI bb:$dest)>; def : Pat<(W65816brcc bb:$dest, (i8 5)), (BPL bb:$dest)>; def : Pat<(W65816brcc bb:$dest, (i8 6)), (BVS bb:$dest)>; def : Pat<(W65816brcc bb:$dest, (i8 7)), (BVC bb:$dest)>; // Unconditional branch from generic ISD::BR. def : Pat<(br bb:$dest), (BRA bb:$dest)>; // Memory inc/dec: `*p = *p + 1` → `INC abs`. Single-instruction RMW // instead of LDA → CLC → ADC #1 → STA. def : Pat<(store (i16 (add (i16 (load (W65816Wrapper tglobaladdr:$g))), (i16 1))), (W65816Wrapper tglobaladdr:$g)), (INC_Abs tglobaladdr:$g)>; def : Pat<(store (i16 (add (i16 (load (W65816Wrapper tglobaladdr:$g))), (i16 -1))), (W65816Wrapper tglobaladdr:$g)), (DEC_Abs tglobaladdr:$g)>; // Direct call to a global / external symbol. We use JSL (24-bit // long jump-and-link) and RTL pairing throughout — matches the // IIgs convention where main is entered via JSL, and means a // function doesn't have to know how it was called to choose its // return instruction. A pseudo bridges the i16 symbol operand // to JSL_Long's 24-bit operand class. // Defs lists ALL caller-clobbered regs. The 65816 has no // caller/callee-save split — every callee may freely modify // A/X/Y/DPF0/P/etc. Critically, i32/i64 returns place high // halves in X (i32), Y and DPF0 (i64); without those in Defs, // the InstrEmitter does not add implicit-defs for glued // CopyFromReg(X/Y/DPF0) on the call MI, and the verifier sees // the post-call `COPY $y` as reading an undefined register. // DPF0 was historically the only "extra" def so getLoad(0xF0) // wouldn't CSE across calls; the same anti-CSE rationale applies // to A/X/Y, but more fundamentally those are call return slots. let isCall = 1, hasSideEffects = 0, mayLoad = 0, mayStore = 0, Defs = [A, X, Y, DPF0] in { def JSLpseudo : W65816Pseudo<(outs), (ins i16imm:$dst), "# JSLpseudo $dst", []>; // ptr32 variant — same expansion in AsmPrinter; the operand class // just exists so tablegen accepts an i32-typed tglobaladdr operand. def JSLpseudo32 : W65816Pseudo<(outs), (ins i32imm:$dst), "# JSLpseudo32 $dst", []>; } def : Pat<(W65816call (i16 tglobaladdr:$dst)), (JSLpseudo tglobaladdr:$dst)>; def : Pat<(W65816call (i16 texternalsym:$dst)), (JSLpseudo texternalsym:$dst)>; // ptr32: under p:32:16, call targets are i32 (iPTR matches the pointer // width). Same JSL_long instruction handles either width — the OMF // cRELOC opcode rewrites the offset and bank at load time. def : Pat<(W65816call (i32 tglobaladdr:$dst)), (JSLpseudo32 tglobaladdr:$dst)>; def : Pat<(W65816call (i32 texternalsym:$dst)), (JSLpseudo32 texternalsym:$dst)>;