diff --git a/src/llvm/lib/Target/W65816/W65816RegisterInfo.cpp b/src/llvm/lib/Target/W65816/W65816RegisterInfo.cpp index f294a07..c4f0af7 100644 --- a/src/llvm/lib/Target/W65816/W65816RegisterInfo.cpp +++ b/src/llvm/lib/Target/W65816/W65816RegisterInfo.cpp @@ -290,6 +290,39 @@ bool W65816RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::TAX)); } else if (Dst == W65816::Y) { BuildMI(*MI.getParent(), II, MI.getDebugLoc(), TII.get(W65816::TAY)); + } else { + // LDAfi with Dst = IMGn: regalloc allocated a Wide16 vreg to an + // IMG slot. LDA_StackRel only loaded into A; we must also store + // A to the IMG DP slot so subsequent `COPY $x = $imgN` (which + // expands to `ldx $D?`) reads the right value. Without this, the + // IMG slot stays stale and downstream reads silently get garbage + // — e.g., `dadd(1.5, 2.5) → 0x4010_0000_3000_3000` under full + // IMG-clobber because every cross-call spill reload of an IMG- + // assigned vreg fed stale prior data into the next computation. + int dstDP = -1; + switch (Dst) { + case W65816::IMG0: dstDP = 0xD0; break; + case W65816::IMG1: dstDP = 0xD2; break; + case W65816::IMG2: dstDP = 0xD4; break; + case W65816::IMG3: dstDP = 0xD6; break; + case W65816::IMG4: dstDP = 0xD8; break; + case W65816::IMG5: dstDP = 0xDA; break; + case W65816::IMG6: dstDP = 0xDC; break; + case W65816::IMG7: dstDP = 0xDE; break; + case W65816::IMG8: dstDP = 0xC0; break; + case W65816::IMG9: dstDP = 0xC2; break; + case W65816::IMG10: dstDP = 0xC4; break; + case W65816::IMG11: dstDP = 0xC6; break; + case W65816::IMG12: dstDP = 0xC8; break; + case W65816::IMG13: dstDP = 0xCA; break; + case W65816::IMG14: dstDP = 0xCC; break; + case W65816::IMG15: dstDP = 0xCE; break; + default: break; + } + if (dstDP >= 0) { + BuildMI(*MI.getParent(), II, MI.getDebugLoc(), + TII.get(W65816::STA_DP)).addImm(dstDP); + } } MI.eraseFromParent(); return true;