From 09f74053620c7fb5ef05e69c0f2374768e760b0c Mon Sep 17 00:00:00 2001 From: Scott Duensing Date: Wed, 3 Jun 2026 16:08:42 -0500 Subject: [PATCH] Updates --- .gitignore | 7 + demos/rsrcProbe.apl | Bin 37599 -> 45035 bytes demos/rsrcProbe.c | 143 +++- demos/rsrcProbe.rsrc/8005_0001.bin | 1 + demos/rsrcProbe.rsrc/8014_0001.bin | 1 + runtime/include/iigs/resource.h | 182 ++-- runtime/src/libc.c | 181 +--- runtime/src/resource.c | 562 ++++++++++--- runtime/src/snprintf.c | 18 +- scripts/mameDebug.py | 785 ++++++++++++++++++ scripts/probeReplSmoke.sh | 127 +++ scripts/smokeTest.sh | 196 ++++- src/link816/link816.cpp | 172 ++-- src/link816/omfEmit | Bin 0 -> 71512 bytes src/link816/omfEmit.cpp | 194 +++-- .../W65816/MCTargetDesc/W65816AsmBackend.cpp | 20 +- .../MCTargetDesc/W65816ELFObjectWriter.cpp | 39 +- src/llvm/lib/Target/W65816/W65816.h | 1 + .../lib/Target/W65816/W65816ABridgeViaX.cpp | 31 +- .../lib/Target/W65816/W65816AsmPrinter.cpp | 112 ++- .../lib/Target/W65816/W65816BranchExpand.cpp | 6 +- .../lib/Target/W65816/W65816I32IncFold.cpp | 4 - .../lib/Target/W65816/W65816ImgCalleeSave.cpp | 13 +- .../lib/Target/W65816/W65816Layer2Gate.cpp | 8 +- .../lib/Target/W65816/W65816NarrowI32Mul.cpp | 2 - src/llvm/lib/Target/W65816/W65816NegYIndY.cpp | 15 +- .../Target/W65816/W65816PromoteFiToImg.cpp | 76 +- .../lib/Target/W65816/W65816SepRepCleanup.cpp | 370 +-------- src/llvm/lib/Target/W65816/W65816SpillToX.cpp | 2 +- .../lib/Target/W65816/W65816StackRelToImg.cpp | 27 - .../Target/W65816/W65816StackSlotMerge.cpp | 28 +- .../lib/Target/W65816/W65816TiedDefSpill.cpp | 8 +- src/llvm/lib/Target/W65816/W65816UnLSR.cpp | 241 ------ .../lib/Target/W65816/W65816WidenAcc16.cpp | 214 +---- tests/ubsan/README.md | 49 +- tests/ubsan/runUbsanProbe.sh | 29 +- tests/ubsan/ubsanProbe.c | 179 +++- tests/ubsan/ubsanProbe.manifest.json | 10 +- 38 files changed, 2454 insertions(+), 1599 deletions(-) create mode 100644 demos/rsrcProbe.rsrc/8005_0001.bin create mode 100644 demos/rsrcProbe.rsrc/8014_0001.bin create mode 100755 scripts/probeReplSmoke.sh create mode 100755 src/link816/omfEmit diff --git a/.gitignore b/.gitignore index 20d4275..8487729 100644 --- a/.gitignore +++ b/.gitignore @@ -19,6 +19,13 @@ tools/ *.map *.reloc +# Exception: demo .rsrc/ fixture directories ship TYPECODE_ID.bin files +# as source. Each rsrcBundle test reads them at build time and emits +# the AppleSingle + sidecar in the same dir; the .apl + sidecar are +# build artifacts (caught by *.bin above for the sidecar; .apl is +# tracked by name). We carve out the source fixtures here. +!demos/*.rsrc/*.bin + # Per-target build directories. tests/coremark/build/ tests/lua/build/ diff --git a/demos/rsrcProbe.apl b/demos/rsrcProbe.apl index ecef918e278776bd3237c8475c44a76f1f1f383c..96e3f9c27de61f2701117531696a7ea501018890 100644 GIT binary patch literal 45035 zcmeHw3tW_C_WwJ)48v^%MC39e1Bf^x0vZD11r-&`yke%^!~t^z!9X)>e`WJJR<;&h zep$P&i#|>p?7~pH+r@3I9Brd^YhqcJTz@VRv1YE3xvrVX|9hV2eTPAir4-8P-eX( z-PC2iZmr{B>1&6~_%Czc*PA-O-qakmy%R;A?{vI;G%2x6x^M!Ytv!b_lOCTc;qNWs zQc=>VGHLCh%+js*#MrhCrpHw@0g0SVIt06cwG#lF$VHIu90|wNyC|Hq?)(N_sj@x?6l&B&pYL>!qZ3Zk6(5 zY$=kGfNBja!O^+H(S@%dcUNZ-vQniY-k{n-!6M#*5+Z8F^IGw|7OK3|sYI}l-XSc) z6AS`%fl0&AW>B z=_-gBV zl?1#@l$0nMTO^ADagqJ9AaN&i)w@gxLEw%%j5DTJrmA5q$ZZr{iC_`QVI=jW>=~31 z0S zt>~w}M@L7!^jd#1rNhw{2+SA{$a&4t;iQzrQ<9^tKCn*7hQ6|Ft}MM_)hZKf|R)V;UOqxJrE%0y~&qG~fcdLcCB&{u~I z1h+fecoKb4@|d#?HQTkC=nu(bH2a7e-pz~B6|FbR_I-Vfw~M}cyAZLXjTvKXfs*4W zIKvyK@g8@!9XX0GwVYp+b>x_qRQMcDbslr43O*cn9#1){q%gglf&q4;XKpTx&Q@xV zw~wKKXi6R>Q-N-UTrkUYw1KW3k=a~@a6eUu7=z8I^~aSZ({3C<2{f}_1LnAizj z)Po)+gz0?wfw7DOFv6uQFL+T0QaklZE z)FB>q9-|?)p(-TZz+|MzVkW|T=(r`-!N7LH@{m>4gGZcgOb;rKawZ)W9BuPv>5!%B z6f^GXWFe^1fquobx=i%mT#Qsmj1-qhf)3Hw1{f|_5@&n4aFpXD)6?7A6d4mkhMdp% zL9>-;Xg2(h+~90e(sqE{G)EVU<>Mq13Tqt2sYDsFi||n_8l1&+@L(RaQkhGA)TXVk zPtnn|un@;Boa0PJQFM|@s8oT%80}Lf7R1|*W+BA2SOy11ZKGfC3XTiO5Y9Kb2qN=f zK&@+;EbBn`WIvU#Af}_rAt>F3((P)Sv7*grk~-Q|+l=*TGg^+N@$0x#fT%>TVdw}G z*KyRYNGuHMz$pyV3b8Q3lqi+($N_|dYQZ2l4t$R;a&{;w&ZA25T9ynl5(V3Uj07@@ zMe;`AYN!z;Q6!4^4w(z$j$guPtYN|>rLYidiV{f^BZ5V`WBV;Wc`J7`pT)?ENRGtc zXx?@di>0>7n;k>@#|LuQC>)Y(kLqJl7%JR63Ma5gYl4`U5lrva`SgQ$!7caqqn`(d zpN*=dAkl0fF9bUWumiKF1GDI;J)zH+wW`!L)?QsFu2cvQY)PvUzm_4w!gTKifuwbYM2a zig5M-4Qu6bo1lnK%Pb_WC0xQ+y-o=^J5*xEq}gTC-44$VM|*u*os#VI#K0!CIS(p{ z&UR}e{7+mRm;h`7%|j&~*m%r&aw{+xS{G=M^`R7`M5+yiP|5I`?>46gRp5!>dfH2A zo^d0E)Nzm)$bwPt^)ZfPXeqC+2UCpFFpigv#c|Y0TAcGNTyKx0Baxtsf`wQ@!|w z0zXaQ7kcqI0>4P$OT735fnP1~HD0_y;5Q1q!;Ak6D+TnsUEp_l@qZThrv<*=i+@ev z8wCD0Ui{Al{#AkhgBSm(z`rZ-fA!)w3w)=*f9}QKCh%tjUf!$mXRg5e@8x`p68PQ; zocQ4anW`e=IdTk;TrI)oJT*tQ8+D{Dq44s;lXP&`1dU=cw|M==~6p)j8Gmn zjwT=l+)xb>G(gjZyBkAU_>MS<1@UMO7W*0x_wV#Y(sD89>LR2r^RCWAW(~(e>cYJ! zuErkC#i?yNUl)XF9#svC$k`IvFdNR-y#z>gAJek50xqV7z zDU@qs6fLA^E+~;Z3s^MRJQGW8Ko@!Blk+$xnPD9Yn@70`EEyA{MW(qi4sKVX?g`_M zNjrun;BG$17sq;gfeS9^V zFC~pDkkm03#h52*thG5_5g$& zV(fg$m%=DifCKin9?Lt23$vkC6jd35Rh+0|ga;$HhjljP8Yp)tyr9VC@(zZh7Riu- ztcv905Q|HQA%Zug7XgdRB0vSA9#Db2FeHpxf*d6TFLE@_aIYIFSg3_=DB;Ul3vt#W z2G-A6;C$A%K7&WMYQ!pfqA<*$b*nkncpF9?&*KR%Nta6_Sx6)K8>PWg`eznRCv|51 zfM!;dcVKTVdDK)yd=VRjo@PNhwnLuMri^92DFVp(XC8YS}9(xgDO$dEA# z?X^6tcC(r`C{7vJRxVhrxdAPJG*BTLZb_4RdE|vo=sRS`J2u*Lb<_q*X>AA(sTkD~ zVgHh}F-i~!J0pqlbHd)0z8hgsZB7oi(du`}{xe?tN|M4fqQ4_aw|PnOT=#wGi6(AI zBqqgZ5AIKdLB#fS3cSF)RAniohwvMQ>7`D>EoK(?VsPeVJ&RVOuF*Ornxd{)9(5_E z9ZD=kT{kgPjP)j3&nPQctaXKwN>SHx!Y)Vfc2HRc>@tL$z6yda&^YTuDKZkNA`Ay1 zAOjPTlL1?;?)SmsCX+^5H2X%T9Zj@A-Pv$u3}!+S%sE<|)W?$Jlw^;drNp6r6!$-g zyjkDT#j^y3R8+CJ;{%zQbo(Ed$d+=sai0SV6{n=4p~N^vI2^D4@pe5{8O25_qXmPh z`{qmb$NPv<6N@0?AZwrrVZg4Y#{5)5V8yLO*RR^7L?X0UwMmOmN)e#|f3Lvb>&4F( z_}v1(Ti|Vz@xB9bGA%Jk1FRHNoV-a;a&__`P zi&Pe}Xyy8Pnf#QBgPgEWXc2NsWfm^B6qahEF0@9_A9sSjX|Fx?6+bwd0!b$eXG-fl z_?lD}O|5yAfF%kS%=N=EFe?RFASuwRIv|CUq(%%vQ=mya2q3KzjysY1g5UXf`brRe zg?^X5BDkulK?3y!hj>L_L_Do!!FEJMb;HSzp(wm#vS0!HI3G-i2!G&0Nmq?zIcU$k zMb#|6PY1uNZl=a-vjas$fPsw>EJaj*DEI@>4di5m=p3<^mrgM!lZC9kF$IM^z7tZL zt1KBUd_&_j()i@4dDzvRc^T(HB(y-C?_P?p-VU;NQ7{)qY#_s>Tn8luv!EM&eZ)vlr^>Tc-=}H;-B5dHs&i*RhV|k))-ubvk;+tRJGI zz?RNHXtpV-N(wr($4M+PIa!$t;|iq&rSbAyQU0Jry9vn@>{=}p`U*qCEmlGt#!1nx z2w+WOp8ilG*(b8(7D>E_SlL`YQ+aLOhrcHSN zOjdKbdqsojSo7iN8O>`Rd<;f6MzQoJ+vj{m(#C*cV+q6}hsMo_R@CSn!7L3dxj%5B zXr-JAMc8Mbr*T+V3d;3w7C^63Td-5Y9jS#S?3}rmS&~yg?$!XRH@m)Vx;$G+9asr= zf)FflOfQVoygso z=z$;PJIy2~l!dJ$Rcp+xQ9>Fg)qs1cR2D`x6=i8!Q@aU#MkJstmSoTYlz{n*M+ssM zm5_nuyQO>^Oe~XNO)0CNC=)6R6AeR1S^YH7eAi||0%i4g8N!z@OrO*?osQJjOSCn* zUJ^v^P*Umz1zrPPKXdQ5%`E3ZbQN@UakP;9W$qj?J_|Eky%^TNq%WKdWjgwAVAI7C zjr*ouog!x{&zY)8FA&y3WaRUVd{ISpPD(P?Xe=e^Y z4E=!tR+l~pyaN|Okmb~F^1qmzF&pP6t#~n3 z2ni0#qP4;-bz(7{oHCD3{>Mn8%OoWa6&tWqKm$P`TC$4_+5k|p>-~i5Ba&YaPKgUE~s;zti@FQ?rb95*hCZzWWx|r#*jj8_|lgKvL_l1 z=ni9snMOZCH?^tGqp?&mo-E{gj}~c|KDE^8!8xf}DetT%Hi{-PcoX6zQstGHIy1$n zWc!ixyd<{}8hNnpqG6%p@O{1#5-wj2{80H8iJRD4R4apF_KQ}VfrtZ9k_-`;od}|A zWVU!^C7silE)hMvcU(f z3t-n4;eS+9j9^mzrM{%Sx8te>tpyW^#&|a0ZSv9>%NDp3eB69ua;%a_IDRUC!trvV zClD|JoyAi}Z1EUR05ieKEm9^zZI)v?P&?{Ak{fvJy$Q7v)QCg01obQ@QaBAG-*Q6d&^~cwhV3zYk0tp; zGm|^YE1xK66rqJiMN&${V9_i(8B1ptDW`%#Ht$0_mZ%AH1?z~bFS-f?0x*?p}y6-y}$Qg^8yT_o7Q{8|h)f)5r~ zRpPKgq-Xj#ap0GTyDPG{ast!4>E@4MU_7PJ)RSLRQ;GH;el4a83u$16q>44#L{Swh zQIoq#PEhTH_h>g(Ob2N1aB6Gh7+4G0jxC{Bu6MCJAjw$h9SZQRStxMrOcp`=$yv0Q znFVGNtG3Gv1iI!^xLKy0XedRZcZ8%Oi9$tUS!{B&k^vRT=#}jU zbTGBq(m-qMuX-m_?nm)7Pd-Y7Bp_^TsrM@nh=sa9LFi6cHk<)p)EnUkXkB3g36XS@ zf<(oubezV-XtPPDj*l3Dy=p<)H*afX@bSGC~G1t zWP~S|LnY!3>cPI2uRukMstZea2WSW^loPi6DX1<>j(M#J=xD__cH`^PA zaGTCj=m0=i^PnAMFR#{;9#`J_7k1=Z)E((uA2)-uo(ODKU}+e}j8NNs{z5Y|NHa4u z+dxsFIGDlaD^K{-B5jjDD6#k0ky1mw zLbM|Y+5i#e&nPMl&2jjRB}S}E1T>W8QZ8sacMVwAM;DQK*RlL#TJ z6hZ^TEd--_#l?HHq7GB^2)YFUG^udu$vTB9Sf?o=DM}7N3m-j6i4> z-C5LQ<{EH%B-{VuN8AN1aH=+tA0%${z;y>v5_f~pEmi=63UPB%T&3kdTNfnrSce6hi z>sq0KQKW%%fTcR&dfo^(n~{{I9`|Sy$9{s$8dQyrL$u2}_1{(1D6sZk=lPGT+RTFQ zplV}u{{i`@K>jI#YzQq%{!FVVFoa@s?D1$(H$+Y>=5z0I*+2{Rypn3TX^I(8-8N8w z+;F313h!Zw#QIDx47a%Oa-~>#w>0w%6s-4~oC!kO2_#Zl`D$xhF6el|iW51ng@h)B z^_w-aWvW@bMxu+yv<;FjB?Zo)(Wi?AOS+2XUPHeFgQAi&C>fy|`~?g+ZmxKJ6a_E9 z8iW@ir%h#z&_-YpPDPp06{WOJ z+X>*)5H>bASTS+36<^y&E=OCxlZ2QF#aJvaAB>L@JIe%w9tgM=(<}=TbkvG#VB&lz z3^k}}7W*m6TuUDWj3zl1HN((0?Et_yio#eM^_M~w-hqq`W<{U{+Jb=H5JeFhUePYL zo#hm?8;N$wgYqkHL>YqQZ)i8_ibj&rNHTgOpUYhWHIRs*B`PL3m}b7yWWpWtdfcZc zrAkOj;cFWNTc-MRn{bZ^xAVjmclG99{rOE;5(10;7TQigd_q?spfpdH>!h6@Pwz7C z*!j2Vhxko=oms-CeCJmj`?ZP4&D%M}c{+VMxP_e-( z_U|0~nTicivGNQWv3`aa@d$x+bZ+mWYy8^(`($I1Bz;&XNo#j?zV+6CKk5`6Mv;fU z{z4xn6Rwzk-PdxduUWRgrepfaivH{m>klsRC zSs0VHNAs)xXeUH#M-2!t0@Q_RR&PU#NJcaSija*wkbrHK??=LWBRL6Z=l_NTVzf~c zf|q6^%_Wkqub@Ln&;}L)?$=^Lq&tg1Ront3#s}5EMe+?H`G#=$20^|-oN-!Y0{Ox% zf+2`xWD&X;R30E^v2hbh`T#}?z8komcf`R4&Uev{UTcR!AQ2J@zVkcJ zREydb4T)?a6WD!kRkVa{Vx1OD>Ch6QCoebpTc)bW#CXWD@!c#f4RQN(aZ_jnf2fIH z$3+Z1Co*KHy4E(@`W6yFvEecyghC!kiL*gB8mAELa zEQ;IP5a&~J<6BL=6A(W-TG7i~_uT$HCr4X-Ym+kD+2NkeBTBsYfJ!NCy-*&x@pw~j zwpMMHd)9zzCz_NQs5V2bM#Z$6DRSdkpPJL%(+AYN)TB&B&8b>V+$z;-e(Kj4?CS3` z-O}W43^L~wHpPD)DoB=h*y71CUhdW!VDPGUuv%-Y~ zvJSUJt7+)3*`jN_%co|lJ9R)!y9@WlJKQN+P2957c>AcX@e!Yz$?oIT&hO~D%n^F^AmHj$~<=S-6oQy@#C&DwoH$Jox{z!{lvvBX?Lf2T=goEZiO-h0c^7)NzgRb!@IH$%Zo0Kp)5~a>bWJ2j0PzZ%3JlsgM zC*f{Das;-a;}P;=XPjIhhRV+tOe`MtaC?BZm%Urj@qB3OfaPXYJ%`}?d+^6JQgnki z8Bhny9x*hr6D4*=_y(f5TY~ZUjxUNuxMy-r0VV1+4%z;`j>X0)u|E5&V3eC~+KJ_R z1WR<&OBmXwC{mz$4Chgl-Q3r>pLi6lYdlOmI^@eEKTId$(aRc-KKJ3#{>yp9H}kK~ zqgSrVqq2XCN7#)6%QPNgIfupCW$cM}&-76TaHvS?VCH-p5F zrFw)lk?ATg9XfknZh@2)WYc7(B|HhE4hN|7cP0n`&FO4`x4C&w1;zcniPrZ zrY{cZC@JQ9qBcihNWgXM7GZsilM=p4$9j=U#J!&?;`w4( z#Oipf6_n)ou_&C5BxOWm{f;+cS%gey1!h0*rd0^FjO{|+W+LNBM2DaO>s>KzLwFZ@ zPEsNkNi3p>B^KdoGd7rilZ*3(yIc5Gs5j`65U50+v53VMu>|$yXcmdZYEtn8DvFO1 zhWGH?Q9e4H$ilQkAe!=Y>adXq*->H<6-Admu{fGa$2n7Zke$zBt@$utzi(3V2%Cou z!h=c@7AdNN{+&3nj%-1`bW|UA&)w z^E-T}vp{}m#f7V$8Nhw0SwyxrLuLC-7wt1IG6M;fmUdUz1IQ%lqF1jk_Sr)(inHEz zh?KW`5JGz7yxw5g+1@y~0kP#HvTw$VSBTw<>Mv|6de+0HCRvi8Jb2#%63&xzK+97GT4d)4{g}8z$s%u7 z#wNyC%jl(N#mcg*R$#%T{Gy^FHWhbx`n5HX=>tD+X4q zM)J|xG@?Cy2kAOrhNQDJ?BIy+87w0?9aO~|Q0sO{DM7yttc2@A9plu36QOmyKY(}41)Yx0Z5Ve3y}I3h^pPe=yN#1}8z&7m#sZenh1PVI zHJ}c&Ks(Y!dn{vMR@xQhe_y_ROq;df+bx{9TfTvBp;z+l`*vKRc6<}>_@-~@c*vC< ze_w%pLFY-gi_0b7F0d7xv=svchA{T~3GDllf6aF$e>o?A`2g|}Tz)_DzpvOg$wJ?j zapIPJgV_@bZacq#9A={Iy7_n+@rQ9E9Yj+L|qw%DiZe6MZF7x&j-5)ffz z2$PUczY4+f;Y;#_+slrFmDa3Tc3qN6YzmvGEM!@@!LPl@mZ3J2ZzC)0Z6*hn1wCNr zQ0*a`XJzw!Cz02$Ao;2hvM}CCD$^RxrbaKTX#t_BJWk2xwhu(pa4oQN`Eqs(zcziV z0Uk!@&y9JrYRI!&@Cz0n)YF<+{^D3@@p&cS0H;{cTv@qD>8b8?t1%XyZ5u13q z_d>%TN_5>hja;>$D+kvLlW0R!$SzOx$FCQ{{i2r~&I=*IJT90#mP173XtG3);C_Eu zB;dl)*&2yeqlJx7fpmK@626Z8S}hObw&z@yneNTR8Uk0-y;P!!-f(Z`MX`LK^N)O5 zJy1wd&eHg9X@7;ta*rB3{`2_JJTOQ$M` z!7(5N-7FrXL>6RgLKYVIII%&NdT*Mt&eNiwXW=d!vd|HW6t%GAh!+iF<%}$dZU^Py zYdT8NWok5l%>%t?Ad-iS6xh+>uy*%@F9Z51l96TN#G15Q)EY$;DT??H*QdqMHHz@| zu6oHgUb~j9Qbcdpv#?|%qUUINDndq(g!u(!qZZOxG$cIFU)7srp*N}Os~}KN!LDR6 zaKT}5xf0yNyh@>Y0|^x2e>y<6_D1;L{z*= za*D=0aTQhEn1-&AjJ%s5L1F}R`3UCn5rAaz?x{+$rqBKC{g09V)>q$?d^NzG&B>pw zk&ic?F~)yQ{`b|6ps&7*cFf|$&GOL>dhJ1b8SDS0cGxcTvo{`8*q|Mzf1w>TO}*P{ zYFsjdk6?zzUh$6Azh>|ERqlo_Swx%?flaokI9&0=Y{fQ$JDJYcGIeCCt9+0*w}~U3Eah_fYVVHqiB`F#&lS%Ef8}t?nGtOwPr}GiduoXf#Q} zLY^;DvNVq+%u&Am$d`vQaZdB3WWq*oFMfq4pdLGXPtYOx6P$-2j(1;gZ&Ds3^kW(t z{bIxPQNlgygWK1n>?GVyAKbG|%EN?v*av5BRvseULq0fcS3f|w2YhhB&5DC?4j&wL z!4<+OKDe_k<$l85?}Ni8`+bDF&j)whrTiD+{>ul4J^XtJcaIP5Etj%|a9ezEI3n0g zxXnJe7hK9F!fo=w;RK?Fa5X---7e)0!rkG6!x;ztSRbqM!EJXb>j}5s2ZvLXa>AAS z;5N7v8{up|IGoemPPp5Ba4TI(8R5!&a5xFF63*&_D|9I(gcD0|h#m69S<`KVyUnNE zc$abu;coH4;WXAJV=U-XqbdeHs}^MW1m77g#fNVSIJZ-^#WzbqsMHdCj*J?SZkb@h*zB#3*(QIvak@IvAt^=Ib>Y$4N zEn^;T81!eHSXFwm^whp^DWGTcseqHtQ%6qJ8gf5A;W*WE@@>z_RdmM+s4wWjlOWT>+XJX*tZ&hp|k;L=m2r$GU=GWyi$(*Y+& zLm)?bYNI^?U{-f+Sg!Nb$K4Ld;^Q92>7El0dfEms&1}2fhaSO5!kux_c}YZvYpN@ zwQLt8+06nSJ){pPNb1n*?A8i)JA2fEJz^MYX%_4_!9pD;*$~Gm7VbE`%GpEJo^y6J zc=;*@#{!>w9r_>2IYwf%f7WOB^!X~1R%A}knr_Z-D$;nhB4N&+;5FV-r~2#<^qJSL zEj?3uw)9--d1tr2#}feQ*ztmIf#mGD7K+%tjWol$U-&u6XjPige^nB+{&MptVYgI1cqm%q3e!u+%DjDD|RpxGqsQ zL071|L1)wbKv%2VuX{!JuC7~mK^NgS#;?HdM!!4!cKGf0JLva;-&wzKd7L~?UL$Xl z_sMU_owDR__MhRu+JBq>v;Hmqr~Je9*XUR1@73?uzpejV9}`dzur}bafCB;D0YeOV zhEhYV;Z?&YhT(yQfz^R~13iIKP-aj`(BnaegU$t6f^QD44L%(FW$^fr(vTm8ydM%6 zS`d0y=<}f`Lq{2JH9li(GlmbDKjfhyZw~Pfn;CY0*y~|ZctQC6;fKQgBj!Xr6!H6r z@S(*+pBZ|5=%~nbkSm zSdrM0I67%tQdiQ9O)T&W$j7mz`meP|lZ}cxlho^2#{UCMPnEhjdEq7QtECpkq z8yk^!cUpJaf^n~m8e=d~?=4rQlaLdM9!*Bb=ZSz+3tlCzRwEB(JtE^IKUFnRnuCi@wM&I6Yd&Syt z+plaZ)?He+zx>*Y&nuo@Ur>3X^0BJP8@e|qSooDXcyLr)F z(w62eC3g?K=kPsu{=o8I$NsDK-opF7ysv3%>HTr`_w5fV1>4SVd)~2b`=|$wKk&?h z#Sew=`2CKD9-i|<|DA_+-v3C!qf+hbwf8?Z^Kt(t-hASrC+Gin_)~39J+tf9r$;?= z@|ov*{#3-c;B zmQ`18+JIW-nk_XO%hsE%B^4FYyvhxmHfB_9+6Z7qiK=RE)zY#%ZC2iFNogtlQv}kC z>f1N1FRR>WuCUc?G;ge`GFMbp-Y(6)tITR%Q&qjbL~CMZNvWWzw5-NjZQB5ns9Eb; zTSci^C8?}NT2fY1wW%6?fSe6g)g{%oiY?|%l_htU*eXg^SCmN$K~&Wm^ZK&&G^Rx~ z?wT56Lupy14dbVNG~x=qG!6LcQ zT;jvVc{Kx*m3Nj@*wClS2&ou6SYJ}P#ay+atdbA8#%!xpIefKTFuY^~IIyAGR^znCP;*%I>4UWvL2@|i$o|KcD zH)H0k!r61A>ebTb)oD_784YhTxLvc+Rw-uV=4zUUmE+8+!qM!khO*vSRvIjgr%9Y3 z<-@n%CGC=aEwxEsND;bB-2$Cnnz>h!+=vP2qr-oc_TzsGvBHyqC5%qeOO);}Njaj# z$|^}(jpuGWPvY5+=T$uK;rRrQgnuhzD4x-H^6;$0vlh?Yc%H^Th_D@S51s}*ui|+J z&tLGI#Pb!NSo{kc>3A07S%s$x&jWa##M6N16+B1q^x&cP{P7Ts#*>MsP)!r=f5Wo| zbOvmaq)~Wg;)%w;+z|U6p8M1fIDVB+U!Oqg$hcqe(Q^I1HTQ~Y|6d@8WK500&mac| zIWWk9K@JRZV2}fY92n%lAO{9HFvx*H4h(W&kOPAp805ep2L?GX$bmr)402$Q1A`nG zY201Xufk6%oa$t}HgB%#-z#s<(IWWk9K@JRZV2}fY92n%lAO{9HFvx*H4h(W& zkOPAp805ep2L?GX$bmr)402$Q1A`nGT7Vg{KYS`A)lAW)&aaN0DgyykmqEHK(N0bfsy{f1hV{j zV}}K@#h(}XK!C^qUL+=fw^<&*Te&xYSJ-3VtuO=vt0CFJmWGOFXHVt}KNE&st zA|#GLPACV{LOF6_=r}^&5IU8>?V-~MRE91ja9`*m0^37N24TsV$U}r3b>HA^5%52wvpN2wuc*C{G!N@*+_~xe~dCaSpyRj5F)KVVreeMy(=s zlZJB;G5lUi#Ry;-zME2+0!$lzfKsysSUS9eQY8XB9NU_K)QbXynPw85W>Xs>*P05f z0P9WD2vnP{A+Xsrfxv5~DFhCiCKLG7luMx3G|7sAJvxF{e`y37lckOECo=#(NVqW* zz-69E;K{^FV$Y8g3#rI~q`k`lK1!*$0iY{Yq9PBD;mF@vwoz)rI9_i?I_hD{qz7gJ zJe85M9IfolI7cITDdRkWKV^JO#Q!DZZv;Ng_=>>i8DA67Wkyi9hRjF;(V3|P;xjD- z(le(Ln3y@8z^u%r1mCcZ^Qem#-rHC>aKjnuo>@Vt&|+NqU-Y!va6 zZq9BcusQoUfhV$05O_ZOEP+2{Un1~ncJNOD43mZtNSS0NFk{kK0=G??MBtuD1q7a% zG@rn)C*45ckCRpt_-s-I0b|Zx1S~mr0&{a7Az;mUn!x=z`w9Fcr-{ICat;t^%Q;No zOpb>@NbXSrmfRi!Gjn?htjfJeU`uWQnYHI~M-cdZ?oa}s=8hq7F_(|8C6BZ2{k%FF zQG4DV0zG+8@$uz7NkE#+i~N1^eoFmg@?ioOCLbaYoPU5oSpLfdM&je7wBL*YsGL@u1h9V^7i!Zq=mh59wBHjrI_+Hoz0>&E1EzC2 zP17NFS&AuuPWVa73iued6!1Z_f(zuNeqLZ13UIi9xA`vR$rRn~OMDb1knIa2*Fn z1lTs8r$$}R!AAuAq{kO<5w{6&YylV57Ypn(cEdt0V$;HXl*(B6EP+`I&7>2{7ji`@ zUl>fO`xbKTd1B#N(xsm*;XqOkI*hgU(+vlEA_xp#-c;0tswha*jHBc*z+8KV8yE;Fn8|5%}Gbw+Z}Z$(sZ| zTk!%cr}5qin*P*RLrf0ZW*^B{>!*UF)ZWuC3qRPHpXRqPDd`|GvJ10BX2_W zask?xEw}-xE&)y~Gg0bOk@CBNr}Q`QB8@ljdWQsPT5covc}Re_mq*`-)E`9ZbCEhL zQhhhBrMxvOICxZmHw9=}@wXModrzcVMJjtGFF9l7)|JRBT=^h@xhr|e1uJVQRV<3! zDvFedBBi3pJ)($1fSoIwR>Ce_W99Kgp#awku)tdQIPz`~sar*Ag8+A0S5ka^w{jMN%Tc0Fw#@ayOob?p~7pzAJT(b5M@GIrC1eE#{Nx>pD zq%@jRLrXIW3@=uugzV0jdSKQ-He#xK{wX0NVxFA;2R7JTAaf0{lpTJp$|#;8_701ZWcA z1p!_Z;D7)J1vn(Yo26@rXK$AVkl%c_bQFQs(wPL>OQR|5Jz5$|;J5%?0`v%QQh?I} zoDtxh02ctT^376LzheLqoTwigenV|0egv#$Q3ZY&ZEaOWX<0S@Q9ZrSkLuBH?$Jm5 S(B3%yUHhRu=_Bcvg!n%kd2kN^ delta 1236 zcmai!T}TvB6vxlZ?7BN^*4n1)D4XLD$U+}meq`E1K_7+>1%ld&R8X|C4`mW$J-E0` z55kh$6jl(F!^EyoRtpsr2}=+KQV&JYi_@gju8J;U5rS^#&b1zT>B8LKIrsd}|DL-K zJ3GlvYV6Ra(yNUCzye&ofINm@A;zELjmKRu1YqFT&cvt1_$#mLb{xxO4aKK-vM{7< zWQ6`%pFZn*SaMmv1MotxR+yc^h zPwMs71*R!if&)f{!DvPDn=e|ay&P35WqnXKQ~?LXhh=?HA362W(~4m|fez?(L#f^o zIGD`m!|4F};;uS5SI(_EaeNn!pU+zkBLM#|D|_1Ny;(svG`-=g&22lB&zHMKHKSoU zD=4wH?(IT}HWQXj)h1^|jOW6lxdDR@GuphLg&ZHkjH@6v23~n&45eZf%WMd|Wp0bValRWifanSmV{q{quf^$V+02Ahrsq(3WzX*eML` zAVSUjr}$mgwMG0De#%qYFWLMfwy$WWo3tI*=8=6uYOFJ6W0+LcX@6rXaJrmqAB*~b zu;7K+B1Aa>2Y#625cN>UAa+9+AojunIqZQEf`{;cfMq5KTug$XnK@6ei_6*3Y#;ZH zcm}yFL6t4bBkC#c+5{rGPteao%4XUyeL_zsX!f3F7wlGJjH0igwg8neit7ai$!^f> z1+og$H}j^d2~KE-W5yQJ#;9Zv3aB>no&4@EdsLv~2(Y diff --git a/demos/rsrcProbe.c b/demos/rsrcProbe.c index 0c7b74c..bb29ca4 100644 --- a/demos/rsrcProbe.c +++ b/demos/rsrcProbe.c @@ -1,59 +1,134 @@ -// rsrcProbe.c - Phase 3.4 stub-only Resource Manager smoke probe. +// rsrcProbe.c - Phase 3.4 real Resource Manager smoke probe. // -// What this verifies right now: -// - resourceProbeInit() returns RES_ERR_BLOCKED (the stub-only path), -// - iigsLoadResource() returns NULL with err = RES_ERR_BLOCKED, -// - iigsGetResourceSize() returns 0 with err = RES_ERR_BLOCKED, -// - the runtime resource.o links cleanly under -O2, -// - the demo's OMF can be bundled with rsrcBundle.py (post-step in -// demos/build.sh when demos/rsrcProbe.rsrc/ is present). -// -// Marker discipline. Page-1 ($70..$73) per the cursorProbe.c -// convention - runViaFinder.sh samples direct-page bytes reliably -// across MAME timings, and full-24-bit BSS-style markers (0x025000) -// don't survive the Loader/Finder relocation games on GS/OS 6.0.2. +// Replaces the stub-only probe. Builds a tiny in-memory .rsrc fixture, +// registers it with mfsRegister, opens it via openResourceFile, loads +// a known rText resource, and verifies the bytes match the expected +// payload. This exercises the real parser path top-to-bottom without +// needing a ProDOS resource fork. // +// Markers (page-1 direct page, per cursorProbe convention): // $70 := 0x99 end-of-main success sentinel -// $71 := initRc as int8 (expected 0xff = (uint8_t)RES_ERR_BLOCKED) -// $72 := loadErr (expected 0xff) -// $73 := 0x01 if resourceRuntimeEnabled()==0 (today's stub answer) +// $71 := 0x01 if openResourceFile succeeded (refnum != 0) +// $72 := 0x01 if loadResource returned a non-NULL handle whose +// bytes match "HELLO" and size is 5 +// $73 := 0x01 if loadResource second call returned the SAME handle +// (cache hit) and closeResourceFile returned RES_OK // // Build: bash demos/build.sh rsrcProbe // Run: bash scripts/runViaFinder.sh demos/rsrcProbe.omf \ -// --check 0x70=0x99 -// runViaFinder LAUNCHES the OMF and samples at frame 6000; no keypress -// is required because we drop into while(1) immediately after writing -// the markers. +// --check 0x70=0x99 0x71=0x01 0x72=0x01 0x73=0x01 #include +#include +#include #include "iigs/resource.h" +// rResourceMap fixture: header + 5-byte rText payload + one rIndex entry. +// +// Header (24 bytes, little-endian): +// rmVersion = 0x0000 +// rmToIndex = 0x0000001D (29) +// rmFileNum = 0 +// rmID = 0 +// rmIndexSize = 0x00000014 (20 bytes = 1 entry) +// rmIndexUsed = 0x00000001 +// rmFreeListSize = 0 +// rmFreeListUsed = 0 +// rmPad = 0 +// Payload (5 bytes) at offset 24: "HELLO" +// rIndex entry (20 bytes) at offset 29: +// rType = 0x8014 (rText) +// rID = 0x00000001 +// rOffset = 0x00000018 (24) +// rAttr = 0 +// rSize = 0x00000005 +// rHandle = 0 +static const uint8_t kFixture[49] = { + // header + 0x00, 0x00, // rmVersion + 0x1D, 0x00, 0x00, 0x00, // rmToIndex = 29 + 0x00, 0x00, // rmFileNum + 0x00, 0x00, // rmID + 0x14, 0x00, 0x00, 0x00, // rmIndexSize = 20 + 0x01, 0x00, 0x00, 0x00, // rmIndexUsed = 1 + 0x00, 0x00, // rmFreeListSize + 0x00, 0x00, // rmFreeListUsed + 0x00, 0x00, // rmPad + // payload at offset 24: "HELLO" + 0x48, 0x45, 0x4C, 0x4C, 0x4F, + // rIndex entry at offset 29 + 0x14, 0x80, // rType = 0x8014 + 0x01, 0x00, 0x00, 0x00, // rID = 1 + 0x18, 0x00, 0x00, 0x00, // rOffset = 24 + 0x00, 0x00, // rAttr + 0x05, 0x00, 0x00, 0x00, // rSize = 5 + 0x00, 0x00, 0x00, 0x00 // rHandle +}; + + +static const char kFixturePath[] = "rsrc.fixture"; +static const char kExpectedText[] = "HELLO"; +static const uint32_t kExpectedSize = 5; + + int main(void) { volatile uint8_t *mark0 = (volatile uint8_t *)0x70; volatile uint8_t *mark1 = (volatile uint8_t *)0x71; volatile uint8_t *mark2 = (volatile uint8_t *)0x72; volatile uint8_t *mark3 = (volatile uint8_t *)0x73; - *mark0 = 0x10; // entry sentinel: we did reach main() - int initRc = resourceProbeInit(); - *mark1 = (uint8_t)initRc; + *mark0 = 0x10; + *mark1 = 0x00; + *mark2 = 0x00; + *mark3 = 0x00; - int loadErr = 0; - void **h = iigsLoadResource(RES_TYPE_RTEXT, 1, &loadErr); - (void)h; - *mark2 = (uint8_t)loadErr; + // Stage the fixture as a read-only memory-backed file. Cast away + // const for the mfsRegister buffer pointer; the resource manager + // only ever reads. + if (mfsRegister(kFixturePath, (void *)kFixture, sizeof(kFixture), sizeof(kFixture), 0) != 0) { + while (1) { + } + } - int sizeErr = 0; - uint32_t sz = iigsGetResourceSize(RES_TYPE_RTEXT, 1, &sizeErr); - (void)sz; + resourceProbeInit(); - *mark3 = (uint8_t)(resourceRuntimeEnabled() == 0 ? 0x01 : 0x00); + int rcOpen = 0; + ResourceRefNumT ref = openResourceFile(kFixturePath, 0, 0, &rcOpen); + if (ref != 0 && rcOpen == RES_OK) { + *mark1 = 0x01; + } + + int rcLoad = 0; + void **h = loadResource(RES_TYPE_RTEXT, 1, &rcLoad); + if (h && rcLoad == RES_OK) { + const uint8_t *bytes = (const uint8_t *)*h; + uint32_t sz = getResourceSize(h); + int match = (sz == kExpectedSize); + if (match) { + for (uint32_t i = 0; i < kExpectedSize; i++) { + if (bytes[i] != (uint8_t)kExpectedText[i]) { + match = 0; + break; + } + } + } + if (match) { + *mark2 = 0x01; + } + } + + // Second load - cache hit must return the SAME handle. Then + // close the file, which must report RES_OK. + int rcLoad2 = 0; + void **h2 = loadResource(RES_TYPE_RTEXT, 1, &rcLoad2); + int sameHandle = (h2 == h && h2 != 0); + int rcClose = closeResourceFile(ref); + if (sameHandle && rcClose == RES_OK) { + *mark3 = 0x01; + } - // Success marker last - if any of the calls above trapped (which - // they shouldn't in stub-only mode), the harness will see $70 - // != 0x99 and report failure. *mark0 = 0x99; while (1) { diff --git a/demos/rsrcProbe.rsrc/8005_0001.bin b/demos/rsrcProbe.rsrc/8005_0001.bin new file mode 100644 index 0000000..36d13a3 --- /dev/null +++ b/demos/rsrcProbe.rsrc/8005_0001.bin @@ -0,0 +1 @@ +iconBytesPlaceholder \ No newline at end of file diff --git a/demos/rsrcProbe.rsrc/8014_0001.bin b/demos/rsrcProbe.rsrc/8014_0001.bin new file mode 100644 index 0000000..d9605cb --- /dev/null +++ b/demos/rsrcProbe.rsrc/8014_0001.bin @@ -0,0 +1 @@ +HELLO \ No newline at end of file diff --git a/runtime/include/iigs/resource.h b/runtime/include/iigs/resource.h index 32f0710..836f281 100644 --- a/runtime/include/iigs/resource.h +++ b/runtime/include/iigs/resource.h @@ -1,34 +1,37 @@ // iigs/resource.h - typed-C facade over the IIgs Resource Manager. // -// Phase 3.4 STUB-ONLY landing. The bundler + linker integration ship -// fully (see tools/rsrcBundle/), but the *runtime* path is blocked on -// Phase 1.1 (the GS/OS fopen hang). GS/OS 6.0.2 + ResourceStartUp + -// OpenResourceFile reaches the same path that hangs in fopen today, so -// the LoadResource()/GetResourceSize() entry points below return error -// codes instead of calling the toolbox. When Phase 1.1 lands, flip -// IIGS_RESOURCE_RUNTIME_ENABLED to 1 (or define it at the compiler -// level) and rebuild the runtime - the same C surface stays. +// Phase 3.4 REAL implementation: parses .rsrc resource forks via the +// stdio surface (fopen/fread/fseek/fclose) and serves resources from a +// per-file cache. Read-only. No AddResource, no DetachResource, no +// partial-load, no encryption - those are features we do not yet need. // // What you GET today: -// - resourceProbeInit() reports whether the runtime path is enabled. -// - LoadResource() / GetResourceSize() return RES_ERR_BLOCKED unless -// IIGS_RESOURCE_RUNTIME_ENABLED is set at compile time. +// - openResourceFile(path, accessByte, fileType) -> refNum (>0) or +// 0 on failure (errno-style code lands in *err if provided). +// - loadResource(type, id) -> Handle (void **) on success; cached so +// repeated calls return the same handle. *handle points at the +// resource bytes (already read from the file). +// - releaseResource(verb, handle) -> 0 on success. verb 0 just +// releases the current load; verb 1 also evicts the cache entry +// and frees the data. +// - closeResourceFile(refNum) -> 0 on success. Frees all cached +// handles owned by that file. // -// HLock semantics (IMPORTANT for future Phase 1.1 unblock): -// The toolbox LoadResource() returns a HANDLE (void **) to a master -// pointer in MM-relocatable storage. The application MUST call -// HLock() before dereferencing if it intends to call ANY toolbox -// routine that could trigger a heap compaction (most do). Without -// the HLock, the master pointer can be rewritten under you between -// the LoadResource and the deref. The typed wrappers below DO NOT -// call HLock for you - that is a deliberate choice because over- -// locking is a memory-fragmentation footgun and the right scope is -// workload-specific. Callers should: -// void **h = LoadResourceTyped(0x8014, 1); -// HLock(h); -// const RTextT *t = (const RTextT *)*h; -// ... use t ... -// HUnlock(h); +// On-disk format (Apple IIgs Toolbox Reference Vol 3, ch.42): +// File offset 0: rResourceMap header (24 bytes, little-endian fields +// because the 65816 is LE). Field rmToIndex is the file offset of +// the rIndex table; rmIndexUsed is the number of valid entries; the +// remaining header fields are bookkeeping/zero at build time. +// Body bytes: resource payloads at the offsets recorded in rIndex. +// At rmToIndex: array of 20-byte rIndex entries, each: +// uint16 rType, uint32 rID, uint32 rOffset, uint16 rAttr, +// uint32 rSize, uint32 rHandle (zero on disk). +// +// HLock semantics: +// The handles we return are NOT relocatable - they point straight at +// a malloc'd payload buffer. That means HLock/HUnlock are no-ops +// here. The void ** indirection is preserved so that real Memory +// Manager handles can swap in later without changing callers. #ifndef IIGS_RESOURCE_H #define IIGS_RESOURCE_H @@ -40,36 +43,39 @@ extern "C" { #include -// Flip to 1 (or pass -DIIGS_RESOURCE_RUNTIME_ENABLED=1 on the build line) -// once Phase 1.1 unblocks GS/OS fopen on 6.0.2. At that point the typed -// wrappers below dispatch into the live toolbox; until then they stub. -#ifndef IIGS_RESOURCE_RUNTIME_ENABLED -#define IIGS_RESOURCE_RUNTIME_ENABLED 0 -#endif - - // Status codes returned by the typed wrappers. Mirror the runtime's // existing errno-style convention (negative = error). enum { RES_OK = 0, - RES_ERR_BLOCKED = -1, // Phase 1.1 runtime path still blocked - RES_ERR_NOT_STARTED = -2, // resourceProbeInit() not called yet - RES_ERR_NOT_FOUND = -3, // OpenResourceFile / LoadResource failed - RES_ERR_TOOLBOX = -4 // Resource Manager returned non-zero + RES_ERR_BLOCKED = -1, // legacy stub marker - kept for + // backwards compat with old probes + RES_ERR_NOT_STARTED = -2, // openResourceFile not called yet + RES_ERR_NOT_FOUND = -3, // file open / resource lookup failed + RES_ERR_TOOLBOX = -4, // map header corrupt / IO failure + RES_ERR_NO_MEM = -5, // malloc failed + RES_ERR_BAD_HANDLE = -6 // release/close given an unknown ref }; // Resource type codes we expect to bundle. See Apple IIgs Toolbox -// Reference Vol 3 chapter 42 for the canonical list. Defined here as -// constants so callers don't have to use raw hex. +// Reference Vol 3 chapter 42 for the canonical list. #define RES_TYPE_RICON 0x8005 #define RES_TYPE_RTEXT 0x8014 #define RES_TYPE_RPSTRING 0x8015 #define RES_TYPE_RCSTRING 0x8016 -// Resource ID type matching the toolbox (32-bit on disk and in the -// rIndex; the public API uses uint32_t). +// Build-time tunables. These cap the per-process resource footprint. +#ifndef IIGS_RES_MAX_FILES +#define IIGS_RES_MAX_FILES 2 +#endif + +#ifndef IIGS_RES_MAX_HANDLES +#define IIGS_RES_MAX_HANDLES 16 +#endif + + +// Resource ID (32-bit on disk and in the rIndex). typedef uint32_t IigsResIdT; @@ -78,37 +84,87 @@ typedef uint32_t IigsResIdT; typedef uint16_t IigsResTypeT; -// One-shot Resource Manager bring-up. Calls MMStartUp + TLStartUp + -// ResourceStartUp + OpenResourceFile (on our own pathname) when the -// runtime path is enabled. Always callable; safe to call more than -// once (subsequent calls are no-ops). -// -// Returns: -// RES_OK if the resource fork was opened (or the stub -// path "succeeded" with no-op behavior), -// RES_ERR_BLOCKED if compiled with IIGS_RESOURCE_RUNTIME_ENABLED=0 -// (the default until Phase 1.1 lands), -// RES_ERR_TOOLBOX if any of the StartUp calls returned non-zero. +// 24-byte resource map header at the start of every .rsrc file. +typedef struct { + uint16_t rmVersion; + uint32_t rmToIndex; + uint16_t rmFileNum; + uint16_t rmID; + uint32_t rmIndexSize; + uint32_t rmIndexUsed; + uint16_t rmFreeListSize; + uint16_t rmFreeListUsed; + uint16_t rmPad; +} ResourceMapHeaderT; + + +// 20-byte rIndex entry. +typedef struct { + uint16_t rType; + uint32_t rID; + uint32_t rOffset; + uint16_t rAttr; + uint32_t rSize; + uint32_t rHandle; +} ResourceIndexEntryT; + + +// Refnum returned by openResourceFile. Zero means "no file"; valid +// refnums start at 1. +typedef uint16_t ResourceRefNumT; + + +// One-shot init. Returns RES_OK; safe to call more than once. int resourceProbeInit(void); -// Read whether the runtime path is live. Cheap; returns 1 iff a -// successful resourceProbeInit() has run AND the build enabled the -// runtime path. Returns 0 in the stub-only landing. +// Reports whether the Resource Manager is alive. Always 1 after +// resourceProbeInit() has run. int resourceRuntimeEnabled(void); -// LoadResource typed wrapper. Returns a HANDLE (void **) on success, -// or NULL on failure (and sets *err if non-NULL). +// Opens a resource fork at `path`. `accessByte` and `fileType` are +// accepted for API parity with the toolbox but ignored on read-only +// in-memory backends. Returns refnum (>0) on success, 0 on failure. +// If `err` is non-NULL it receives RES_OK or one of RES_ERR_*. +ResourceRefNumT openResourceFile(const char *path, uint8_t accessByte, + uint16_t fileType, int *err); + + +// Closes a resource fork and frees any handles cached for that file. +// Returns RES_OK or RES_ERR_BAD_HANDLE. +int closeResourceFile(ResourceRefNumT refNum); + + +// Loads a resource by (type, id). Searches all open resource files +// in open order and returns a cached handle if the same (type, id) +// was previously loaded from any open file. Returns NULL on failure. // -// Caller is responsible for HLock/HUnlock pairing around any usage that -// crosses a toolbox call; see HLock semantics block at the top of this -// file. +// The returned handle is `void **`; `*handle` is the resource bytes. +void **loadResource(IigsResTypeT type, IigsResIdT id, int *err); + + +// Releases a previously-loaded resource. +// verb 0: keep the cached payload (cheap; the handle may be reused). +// verb 1: evict the cache entry and free the payload. +// Returns RES_OK on success. +int releaseResource(int verb, void **handle); + + +// Convenience: byte size of the resource pointed to by `handle`. +// Returns 0 if `handle` is not in the cache. +uint32_t getResourceSize(void **handle); + + +// ---- Legacy stub API kept for backwards compatibility ---- +// The pre-Phase-3.4 stub exposed iigsLoadResource / iigsGetResourceSize +// for the rsrcProbe markers. Those now dispatch to the real +// implementation when at least one resource file is open. They report +// RES_ERR_NOT_STARTED when no file is open (instead of the old +// RES_ERR_BLOCKED), preserving the "did Phase 3.4 land?" signal. void **iigsLoadResource(IigsResTypeT resType, IigsResIdT resId, int *err); -// GetResourceSize typed wrapper. Returns the byte size of the resource -// or 0 on failure (and sets *err if non-NULL). uint32_t iigsGetResourceSize(IigsResTypeT resType, IigsResIdT resId, int *err); diff --git a/runtime/src/libc.c b/runtime/src/libc.c index 24195e7..104a8df 100644 --- a/runtime/src/libc.c +++ b/runtime/src/libc.c @@ -384,102 +384,11 @@ typedef __builtin_va_list va_list; #define va_arg(ap, ty) __builtin_va_arg(ap, ty) #define va_end(ap) __builtin_va_end(ap) -static void writeUDec(unsigned int n) { - char buf[6]; // 16-bit: max 5 digits + null - int i = 0; - if (n == 0) { putchar('0'); return; } - while (n > 0) { buf[i++] = '0' + (n % 10); n /= 10; } - while (i > 0) putchar(buf[--i]); -} - -static void writeDec(int n) { - // For INT_MIN, `-n` overflows signed int (UB). Negate as unsigned - // — well-defined (two's-complement wrap), and the magnitude is - // identical for the print path. - if (n < 0) { putchar('-'); writeUDec((unsigned int)(0u - (unsigned int)n)); } - else writeUDec((unsigned int)n); -} - -static void writeULong(unsigned long n) { - char buf[11]; // 32-bit: max 10 digits + null - int i = 0; - if (n == 0) { putchar('0'); return; } - while (n > 0) { buf[i++] = '0' + (n % 10); n /= 10; } - while (i > 0) putchar(buf[--i]); -} - -static void writeHex(unsigned int n, int width) { - static const char digits[] = "0123456789abcdef"; - // unsigned int is 16-bit on this target -> at most 4 hex digits. - // Cap width to that; without it `printf("%08x", ...)` blew past - // the buf[] tail and corrupted the stack. - char buf[4]; - if (width > 4) width = 4; - int i = 0; - if (n == 0) { buf[i++] = '0'; } - while (n > 0 && i < 4) { buf[i++] = digits[n & 0xF]; n >>= 4; } - while (i < width) buf[i++] = '0'; - while (i > 0) putchar(buf[--i]); -} - -static void writeStr(const char *s) { - if (!s) s = "(null)"; - while (*s) { putchar(*s); s++; } -} - -// Format-spec handlers used to be marked noinline to keep vprintf's -// main loop small for the long-branch limitation; now W65816BranchExpand -// reliably promotes Bxx to BRL when needed, so the inliner is free to -// merge them when it wants. -static void writeSignedLong(long n) { - // See writeDec: avoid the signed-overflow UB on LONG_MIN. - if (n < 0) { putchar('-'); writeULong(0ul - (unsigned long)n); } - else writeULong((unsigned long)n); -} - -// Minimal %f / %g support. Uses double soft-float; precision capped -// at 6 fractional digits (the C default). Doesn't handle Inf/NaN -// specially — prints the integer extraction, which will be 0 for -// non-finite values. Not IEEE-precise (intermediate truncation in -// the soft-double mul/div), but good enough for typical formatted -// numeric output. -static void writeDouble(double v, int prec) { - if (prec < 0) prec = 6; - if (prec > 9) prec = 9; - // Test the IEEE-754 sign bit (so -0.0 prints with the sign per - // C99) and avoid the soft-float __ltdf2 comparison, which has - // historically miscompiled for negative inputs (see snprintf.c - // banner for the same workaround). - unsigned long long vbits; - __builtin_memcpy(&vbits, &v, 8); - if (vbits & ((unsigned long long)1 << 63)) { - putchar('-'); - vbits &= ~((unsigned long long)1 << 63); - __builtin_memcpy(&v, &vbits, 8); - } - long ipart = (long)v; - writeULong((unsigned long)ipart); - if (prec == 0) return; - putchar('.'); - double frac = v - (double)ipart; - // Multiply fraction by 10^prec, then print as integer with leading zeros. - long mul = 1; - for (int i = 0; i < prec; i++) mul *= 10; - long fdigits = (long)(frac * (double)mul); - if (fdigits < 0) fdigits = -fdigits; - char buf[10]; - int n = 0; - long scale = mul / 10; - while (n < prec) { - if (scale == 0) scale = 1; - long d = fdigits / scale; - buf[n++] = '0' + (char)(d % 10); - scale /= 10; - if (scale == 0) break; - } - while (n < prec) buf[n++] = '0'; - for (int i = 0; i < n; i++) putchar(buf[i]); -} +// vprintf / printf used to dispatch through their own small format +// helpers (writeUDec/writeDec/writeULong/writeHex/writeStr/writeSignedLong/ +// writeDouble). Once vprintf was rewritten to route through vsnprintf +// (so printf and snprintf share one format engine in snprintf.c), the +// helpers became dead weight and were removed. extern int vsnprintf(char *buf, size_t n, const char *fmt, va_list ap); @@ -724,10 +633,11 @@ void free(void *p) { } void *calloc(size_t nmemb, size_t size) { - // size_t is 16-bit on this target; nmemb*size can overflow and - // wrap to a small value (e.g. calloc(65536, 1) -> 0 -> 2-byte - // alloc), then the caller writes way past the returned region. - // Bail when the multiplication would overflow. + // size_t is 32-bit, so the multiply itself won't overflow for any + // realistic input. The 0xFFFF cap is a "fits in one 64KB bank" + // sanity check: the heap lives in bank 0 below the IO window, so + // any single allocation must fit there. calloc(65536, 1) returns + // null rather than silently truncating into the IO range. if (size != 0 && nmemb > (size_t)0xFFFF / size) return (void *)0; size_t total = nmemb * size; void *p = malloc(total); @@ -757,6 +667,15 @@ void *realloc(void *ptr, size_t n) { typedef void (*AtexitFn)(void); static AtexitFn __atexitFn = (AtexitFn)0; +// BRK $00 then spin -- halts a 65816 in BRK so MAME's debugger catches +// it; the spin loop guards against the (rare) case where BRK returns. +static void __halt(void) __attribute__((noreturn)); +static void __halt(void) { + __asm__ volatile (".byte 0x00, 0x00"); + while (1) {} +} + + void exit(int code) { (void)code; // C99 7.20.4.3: exit() must invoke registered atexit handlers in @@ -766,9 +685,7 @@ void exit(int code) { __atexitFn = (AtexitFn)0; // prevent re-entry if fn calls exit fn(); } - // BRK $00 — halts a 65816 in BRK, MAME's debugger catches. - __asm__ volatile (".byte 0x00, 0x00"); - while (1) {} // unreachable + __halt(); } // ---- errno ---- @@ -1128,9 +1045,9 @@ typedef struct __sFILE { static char __tmpNames[MFS_MAX_FILES][LIBC_L_TMPNAM]; static FILE __mfs[MFS_MAX_FILES] = { - { FILE_KIND_STDIN, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0 }, - { FILE_KIND_STDOUT, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0 }, - { FILE_KIND_STDERR, 1, 0, 0, 0, 0, 0, 0, 0, -1, 0, 0 }, + { .kind = FILE_KIND_STDIN, .unget = -1 }, + { .kind = FILE_KIND_STDOUT, .writable = 1, .unget = -1 }, + { .kind = FILE_KIND_STDERR, .writable = 1, .unget = -1 }, }; FILE *stdin = &__mfs[0]; @@ -1278,9 +1195,6 @@ int fclose(FILE *stream) { return 0; } -// Forward decls for routines that live in snprintf.c. -extern int vsnprintf(char *buf, size_t n, const char *fmt, va_list ap); - // Forward decl for vfprintf so fprintf can call it. int vfprintf(FILE *stream, const char *fmt, va_list ap); size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream); @@ -1377,8 +1291,7 @@ static AtexitFn __quickFn = (AtexitFn)0; void _Exit(int code) { (void)code; - __asm__ volatile (".byte 0x00, 0x00"); - while (1) {} // unreachable + __halt(); } void quick_exit(int code) { @@ -1388,8 +1301,7 @@ void quick_exit(int code) { __quickFn = (AtexitFn)0; fn(); } - __asm__ volatile (".byte 0x00, 0x00"); - while (1) {} // unreachable + __halt(); } int at_quick_exit(AtexitFn fn) { @@ -1438,20 +1350,26 @@ static void initFileMem(FILE *f, const MfsEntry *reg, int wantWrite) { // LIBC_PATH_MAX (kept in sync with limits.h's PATH_MAX) so user code // that bounds-checks against PATH_MAX stays consistent with what fopen // will accept. -static struct { +typedef struct __GsosPathBufT { u16 length; char text[LIBC_PATH_MAX]; -} __gsosPathBuf; +} __GsosPathBufT; -static int __buildGSString(const char *path) { +static __GsosPathBufT __gsosPathBuf; + +static int __fillGSString(__GsosPathBufT *buf, const char *path) { size_t n = 0; while (path[n] && n < LIBC_PATH_MAX) n++; if (path[n]) return -1; // path > PATH_MAX chars - __gsosPathBuf.length = (u16)n; - for (size_t i = 0; i < n; i++) __gsosPathBuf.text[i] = path[i]; + buf->length = (u16)n; + for (size_t i = 0; i < n; i++) buf->text[i] = path[i]; return 0; } +static int __buildGSString(const char *path) { + return __fillGSString(&__gsosPathBuf, path); +} + FILE *fopen(const char *path, const char *mode) { if (!path || !mode) return (FILE *)0; int wantWrite = 0; @@ -1486,7 +1404,6 @@ FILE *fopen(const char *path, const char *mode) { if (reg) { initFileMem(f, reg, wantWrite); - (void)wantRead; if (truncate) f->size = 0; if (append) f->pos = f->size; return f; @@ -1547,15 +1464,16 @@ FILE *fopen(const char *path, const char *mode) { gsosSetMark(&m); } } - (void)wantRead; return f; } size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { if (!stream) return 0; if (size == 0 || nmemb == 0) return 0; - // Avoid 32-bit overflow on size * nmemb: cap nmemb so each item - // (size bytes) fits in remaining 16-bit address space. + // size_t is u32 here, so the multiply itself can't overflow. The + // 0xFFFE cap is a "single 64KB bank" limit -- the underlying + // mem/GSOS backends address by 16-bit offset, so any single fread + // must fit in one bank. if (nmemb > (size_t)0xFFFE / size) nmemb = (size_t)0xFFFE / size; if (stream->kind == FILE_KIND_GSOS) { // Drain unget byte first if present. @@ -1605,8 +1523,10 @@ size_t fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { size_t fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { if (!stream) return 0; if (size == 0 || nmemb == 0) return 0; - // Cap nmemb so each item (size bytes) fits in the address space - // — avoids 32-bit `size * nmemb` that the i32 multiply path triggers. + // size_t is u32 here, so the multiply itself can't overflow. The + // 0xFFFE cap is a "single 64KB bank" limit -- the underlying + // mem/GSOS backends address by 16-bit offset, so any single fwrite + // must fit in one bank. if (nmemb > (size_t)0xFFFE / size) nmemb = (size_t)0xFFFE / size; const char *in = (const char *)ptr; if (stream->kind == FILE_KIND_STDOUT || stream->kind == FILE_KIND_STDERR) { @@ -1814,7 +1734,6 @@ void setbuf(FILE *stream, char *buf) { // GS/OS. This matches both ProDOS `/VOL/FILE` and HFS `:Vol:File:` // conventions without forcing the caller to declare which. -int mfsUnregister(const char *path); extern int rand(void); // True when `path` looks like a GS/OS volume path (contains `/` or @@ -1863,18 +1782,10 @@ static int __sameParentDir(const char *a, const char *b) { // simultaneously (old+new for ChangePath), and Destroy of the source // at the end of the cross-dir fallback can reuse __gsosPathBuf for the // source name. Keeps the destination name alive across all calls. -static struct { - u16 length; - char text[LIBC_PATH_MAX]; -} __gsosPathBuf2; +static __GsosPathBufT __gsosPathBuf2; static int __buildGSString2(const char *path) { - size_t n = 0; - while (path[n] && n < LIBC_PATH_MAX) n++; - if (path[n]) return -1; - __gsosPathBuf2.length = (u16)n; - for (size_t i = 0; i < n; i++) __gsosPathBuf2.text[i] = path[i]; - return 0; + return __fillGSString(&__gsosPathBuf2, path); } int remove(const char *path) { diff --git a/runtime/src/resource.c b/runtime/src/resource.c index 3802bd7..c31a523 100644 --- a/runtime/src/resource.c +++ b/runtime/src/resource.c @@ -1,149 +1,479 @@ -// resource.c - iigs/resource.h implementation. Phase 3.4 STUB-ONLY -// landing. +// resource.c - Apple IIgs Resource Manager - real implementation. // -// Phase 1.1 (GS/OS fopen hang on 6.0.2) blocks the live runtime path. -// ResourceStartUp + OpenResourceFile reaches the same blocking code, -// so all three entry points (init, load, size) return RES_ERR_BLOCKED -// unless the build defines IIGS_RESOURCE_RUNTIME_ENABLED=1. When that -// flips on (Phase 1.1 lands), the toolbox calls below activate and the -// typed wrappers route through the real Resource Manager. +// Replaces the Phase 3.4 stub. Reads .rsrc resource forks via the +// stdio surface (fopen/fread/fseek/fclose) and caches loaded payloads +// by (type, id) so repeated loadResource() calls return the same +// handle. Read-only - no AddResource / DetachResource / partial-load. // -// HLock semantics: -// LoadResource (toolbox 0x0E1E) returns a HANDLE - a pointer to a -// master pointer in Memory-Manager-relocatable storage. Until you -// call HLock(handle), any subsequent toolbox call can compact the -// heap and move the underlying bytes. The typed wrappers DO NOT -// call HLock for the caller; that is the caller's responsibility -// per the contract in iigs/resource.h. +// File format (Apple IIgs Toolbox Reference Vol 3, ch.42): +// bytes 0..23 : ResourceMapHeaderT (little-endian fields) +// bytes ... : payload blobs at offsets recorded in the index +// bytes at rmToIndex : rmIndexUsed * ResourceIndexEntryT entries // -// Why we stub instead of returning best-effort answers: -// A real LoadResource that silently returned NULL would be ambiguous -// with "resource not found". RES_ERR_BLOCKED lets the demo + smoke -// harness distinguish "Phase 1.1 hasn't landed" from "your TYPECODE_ID -// bundle was missing a resource". Once Phase 1.1 lands, callers see -// the real error codes (RES_ERR_NOT_FOUND, RES_ERR_TOOLBOX) instead. +// Handle convention: we return a `void **` whose dereference yields the +// resource bytes. The handle storage lives in this file's static +// table; the bytes themselves are malloc'd at first load and freed at +// releaseResource(verb=1) or closeResourceFile(). #include "iigs/resource.h" -#include "iigs/toolbox.h" + +#include +#include +#include +#include -// Set to non-zero by a successful resourceProbeInit() call. Read by -// resourceRuntimeEnabled() to report status without re-running init. -// In the stub-only landing this never reaches 1 because the runtime -// path is compiled out. -static int gResourceReady = 0; +// --- Prototypes --- +static int freeHandleSlot(int slot); +static int findHandleByPtr(void **handle); +static int findHandleByTypeId(IigsResTypeT type, IigsResIdT id); +static int findHandleSlot(void); +static int findOpenFileSlot(void); +static int loadIndex(int fileSlot); +static void *readPayload(int fileSlot, uint32_t offset, uint32_t size); +static int readU16(FILE *f, uint16_t *out); +static int readU32(FILE *f, uint32_t *out); +static int readMapHeader(FILE *f, ResourceMapHeaderT *hdr); -// Cached refNum from OpenResourceFile. Populated only when the -// runtime path is enabled. unsigned short to match the toolbox -// signature (refNum is a 16-bit GS/OS fileID). -static unsigned short gResourceRefNum = 0; +// --- Internal types --- +typedef struct { + int inUse; + FILE *fp; + ResourceMapHeaderT hdr; + ResourceIndexEntryT *index; // malloc'd; rmIndexUsed entries + uint16_t refNum; // 1..N, matches slot+1 +} ResourceFileT; -// Stub flag to keep the unused-static-warning quiet when the runtime -// path is compiled out. The compiler folds the function bodies below -// to constant returns under -O2 anyway; this just keeps -Wunused happy -// across both build modes. -static void touchUnused(void) { - (void)gResourceRefNum; -} +typedef struct { + int inUse; + int fileSlot; // which ResourceFileT owns it + IigsResTypeT type; + IigsResIdT id; + void *data; // payload bytes + uint32_t size; + void *masterPtr; // master ptr cell -> &data +} HandleSlotT; -#if IIGS_RESOURCE_RUNTIME_ENABLED -// Path passed to OpenResourceFile. When the runtime path is live the -// expectation is that this is the application's own pathname (the OMF -// the Loader launched), so OpenResourceFile attaches to the file's -// resource fork. GS/OS holds the boot pathname in a known low-memory -// vector; we resolve it at init time and cache here. -// -// The exact pathname-resolution sequence is intentionally NOT implemented -// in this stub-only landing - it is part of the Phase 1.1 unblock work -// (the same code that fixes fopen will plumb the pathname through). -static char gOwnPathName[256] = { 0 }; -#endif +// --- State --- +// Declared volatile to defeat the GlobalOpt i1-narrowing pass that +// otherwise produces an `i1, zext` load the W65816 backend can't select. +// (See MEMORY.md: feedback_i1_load_custom.md.) +static volatile int gResourceReady = 0; +static ResourceFileT gFiles[IIGS_RES_MAX_FILES]; +static HandleSlotT gHandles[IIGS_RES_MAX_HANDLES]; -int resourceProbeInit(void) { - touchUnused(); -#if IIGS_RESOURCE_RUNTIME_ENABLED - // Live path - placeholder until Phase 1.1 lands. We deliberately - // do not call ResourceStartUp here in the stub-only landing because - // (a) it requires MMStartUp to have run already and (b) calling - // ResourceStartUp on a userId we don't own would corrupt the - // toolbox's per-app state. Phase 1.1's actual implementation will - // look like: - // MMStartUp(); - // TLStartUp(); - // ResourceStartUp(myUserId); - // gResourceRefNum = OpenResourceFile(0x0001, NULL, gOwnPathName); - // gResourceReady = (gResourceRefNum != 0) ? 1 : 0; - return RES_ERR_BLOCKED; -#else - return RES_ERR_BLOCKED; -#endif -} - - -int resourceRuntimeEnabled(void) { - return gResourceReady; -} - - -void **iigsLoadResource(IigsResTypeT resType, IigsResIdT resId, int *err) { - (void)resType; - (void)resId; -#if IIGS_RESOURCE_RUNTIME_ENABLED - if (!gResourceReady) { - if (err) { - *err = RES_ERR_NOT_STARTED; +int closeResourceFile(ResourceRefNumT refNum) { + if (refNum == 0 || refNum > IIGS_RES_MAX_FILES) { + return RES_ERR_BAD_HANDLE; + } + int slot = (int)refNum - 1; + if (!gFiles[slot].inUse) { + return RES_ERR_BAD_HANDLE; + } + // Free every cached handle owned by this file. + for (int i = 0; i < IIGS_RES_MAX_HANDLES; i++) { + if (gHandles[i].inUse && gHandles[i].fileSlot == slot) { + freeHandleSlot(i); } - return (void **)0; } - // Phase 1.1 will plug LoadResource(resType, resId) here. Toolbox - // pushes 4-byte ID as a long, returns handle in PHA slot. Caller - // must HLock() before dereferencing (see header notes). - void **h = (void **)LoadResource((unsigned short)resType, (long)resId); - if (!h) { - if (err) { - *err = RES_ERR_NOT_FOUND; - } - return (void **)0; + if (gFiles[slot].index) { + free(gFiles[slot].index); + gFiles[slot].index = (ResourceIndexEntryT *)0; } - if (err) { - *err = RES_OK; + if (gFiles[slot].fp) { + fclose(gFiles[slot].fp); + gFiles[slot].fp = (FILE *)0; } - return h; -#else - if (err) { - *err = RES_ERR_BLOCKED; - } - return (void **)0; -#endif + gFiles[slot].inUse = 0; + return RES_OK; } -uint32_t iigsGetResourceSize(IigsResTypeT resType, IigsResIdT resId, - int *err) { - (void)resType; - (void)resId; -#if IIGS_RESOURCE_RUNTIME_ENABLED +static int findHandleByPtr(void **handle) { + if (!handle) { + return -1; + } + for (int i = 0; i < IIGS_RES_MAX_HANDLES; i++) { + if (gHandles[i].inUse && (void **)&gHandles[i].data == handle) { + return i; + } + } + return -1; +} + + +static int findHandleByTypeId(IigsResTypeT type, IigsResIdT id) { + for (int i = 0; i < IIGS_RES_MAX_HANDLES; i++) { + if (gHandles[i].inUse && gHandles[i].type == type && gHandles[i].id == id) { + return i; + } + } + return -1; +} + + +static int findHandleSlot(void) { + for (int i = 0; i < IIGS_RES_MAX_HANDLES; i++) { + if (!gHandles[i].inUse) { + return i; + } + } + return -1; +} + + +static int findOpenFileSlot(void) { + for (int i = 0; i < IIGS_RES_MAX_FILES; i++) { + if (!gFiles[i].inUse) { + return i; + } + } + return -1; +} + + +static int freeHandleSlot(int slot) { + if (slot < 0 || slot >= IIGS_RES_MAX_HANDLES) { + return RES_ERR_BAD_HANDLE; + } + if (!gHandles[slot].inUse) { + return RES_ERR_BAD_HANDLE; + } + if (gHandles[slot].data) { + free(gHandles[slot].data); + gHandles[slot].data = (void *)0; + } + gHandles[slot].inUse = 0; + gHandles[slot].fileSlot = -1; + gHandles[slot].type = 0; + gHandles[slot].id = 0; + gHandles[slot].size = 0; + return RES_OK; +} + + +uint32_t getResourceSize(void **handle) { + int slot = findHandleByPtr(handle); + if (slot < 0) { + return 0; + } + return gHandles[slot].size; +} + + +// Convenience wrapper kept for backwards compat with the old probe. +// Scans the cache + open files for (type, id) and reports the size. +uint32_t iigsGetResourceSize(IigsResTypeT resType, IigsResIdT resId, int *err) { if (!gResourceReady) { if (err) { *err = RES_ERR_NOT_STARTED; } return 0; } - // GetResourceSize returns a 32-bit byte count via the toolbox. - uint32_t sz = (uint32_t)GetResourceSize((unsigned short)resType, - (long)resId); - if (err) { - *err = (sz == 0) ? RES_ERR_NOT_FOUND : RES_OK; + int hSlot = findHandleByTypeId(resType, resId); + if (hSlot >= 0) { + if (err) { + *err = RES_OK; + } + return gHandles[hSlot].size; + } + // Not cached - scan every open file's index for the entry. + for (int f = 0; f < IIGS_RES_MAX_FILES; f++) { + if (!gFiles[f].inUse || !gFiles[f].index) { + continue; + } + uint32_t n = gFiles[f].hdr.rmIndexUsed; + for (uint32_t i = 0; i < n; i++) { + ResourceIndexEntryT *e = &gFiles[f].index[i]; + if (e->rType == resType && e->rID == resId) { + if (err) { + *err = RES_OK; + } + return e->rSize; + } + } } - return sz; -#else if (err) { - *err = RES_ERR_BLOCKED; + *err = RES_ERR_NOT_FOUND; } return 0; -#endif +} + + +// Convenience wrapper kept for backwards compat with the old probe. +void **iigsLoadResource(IigsResTypeT resType, IigsResIdT resId, int *err) { + return loadResource(resType, resId, err); +} + + +// Reads the 20-byte rIndex table for a freshly-opened file. Returns +// RES_OK or an RES_ERR_* code. Caller has populated gFiles[slot].hdr. +static int loadIndex(int fileSlot) { + ResourceFileT *rf = &gFiles[fileSlot]; + uint32_t n = rf->hdr.rmIndexUsed; + if (n == 0) { + rf->index = (ResourceIndexEntryT *)0; + return RES_OK; + } + // Sanity-check against malloc'ing absurd amounts. + if (n > 1024) { + return RES_ERR_TOOLBOX; + } + ResourceIndexEntryT *idx = (ResourceIndexEntryT *)malloc(sizeof(ResourceIndexEntryT) * n); + if (!idx) { + return RES_ERR_NO_MEM; + } + if (fseek(rf->fp, (long)rf->hdr.rmToIndex, 0) != 0) { + free(idx); + return RES_ERR_TOOLBOX; + } + for (uint32_t i = 0; i < n; i++) { + uint16_t t; + uint32_t id; + uint32_t off; + uint16_t attr; + uint32_t sz; + uint32_t h; + if (readU16(rf->fp, &t) != 0 || + readU32(rf->fp, &id) != 0 || + readU32(rf->fp, &off) != 0 || + readU16(rf->fp, &attr) != 0 || + readU32(rf->fp, &sz) != 0 || + readU32(rf->fp, &h) != 0) { + free(idx); + return RES_ERR_TOOLBOX; + } + idx[i].rType = t; + idx[i].rID = id; + idx[i].rOffset = off; + idx[i].rAttr = attr; + idx[i].rSize = sz; + idx[i].rHandle = h; + } + rf->index = idx; + return RES_OK; +} + + +void **loadResource(IigsResTypeT type, IigsResIdT id, int *err) { + if (!gResourceReady) { + if (err) { + *err = RES_ERR_NOT_STARTED; + } + return (void **)0; + } + // Cache hit? + int hSlot = findHandleByTypeId(type, id); + if (hSlot >= 0) { + if (err) { + *err = RES_OK; + } + return (void **)&gHandles[hSlot].data; + } + // Cache miss - find the resource in any open file. + for (int f = 0; f < IIGS_RES_MAX_FILES; f++) { + if (!gFiles[f].inUse || !gFiles[f].index) { + continue; + } + uint32_t n = gFiles[f].hdr.rmIndexUsed; + for (uint32_t i = 0; i < n; i++) { + ResourceIndexEntryT *e = &gFiles[f].index[i]; + if (e->rType != type || e->rID != id) { + continue; + } + int slot = findHandleSlot(); + if (slot < 0) { + if (err) { + *err = RES_ERR_NO_MEM; + } + return (void **)0; + } + void *bytes = readPayload(f, e->rOffset, e->rSize); + if (!bytes) { + if (err) { + *err = RES_ERR_TOOLBOX; + } + return (void **)0; + } + gHandles[slot].inUse = 1; + gHandles[slot].fileSlot = f; + gHandles[slot].type = type; + gHandles[slot].id = id; + gHandles[slot].data = bytes; + gHandles[slot].size = e->rSize; + if (err) { + *err = RES_OK; + } + return (void **)&gHandles[slot].data; + } + } + if (err) { + *err = RES_ERR_NOT_FOUND; + } + return (void **)0; +} + + +ResourceRefNumT openResourceFile(const char *path, uint8_t accessByte, uint16_t fileType, int *err) { + (void)accessByte; + (void)fileType; + if (!path) { + if (err) { + *err = RES_ERR_NOT_FOUND; + } + return 0; + } + int slot = findOpenFileSlot(); + if (slot < 0) { + if (err) { + *err = RES_ERR_NO_MEM; + } + return 0; + } + FILE *fp = fopen(path, "rb"); + if (!fp) { + if (err) { + *err = RES_ERR_NOT_FOUND; + } + return 0; + } + ResourceFileT *rf = &gFiles[slot]; + if (readMapHeader(fp, &rf->hdr) != 0) { + fclose(fp); + if (err) { + *err = RES_ERR_TOOLBOX; + } + return 0; + } + rf->fp = fp; + rf->inUse = 1; + rf->refNum = (uint16_t)(slot + 1); + rf->index = (ResourceIndexEntryT *)0; + int rc = loadIndex(slot); + if (rc != RES_OK) { + fclose(fp); + rf->fp = (FILE *)0; + rf->inUse = 0; + if (err) { + *err = rc; + } + return 0; + } + gResourceReady = 1; + if (err) { + *err = RES_OK; + } + return rf->refNum; +} + + +// Allocates and reads `size` bytes at `offset` from the file at +// `fileSlot`. Returns NULL on any error. +static void *readPayload(int fileSlot, uint32_t offset, uint32_t size) { + if (size == 0) { + return (void *)0; + } + void *buf = malloc(size); + if (!buf) { + return (void *)0; + } + FILE *fp = gFiles[fileSlot].fp; + if (fseek(fp, (long)offset, 0) != 0) { + free(buf); + return (void *)0; + } + size_t got = fread(buf, 1, size, fp); + if (got != size) { + free(buf); + return (void *)0; + } + return buf; +} + + +// Reads a little-endian uint16 from `f`. Returns 0 on success. +static int readU16(FILE *f, uint16_t *out) { + uint8_t b[2]; + if (fread(b, 1, 2, f) != 2) { + return -1; + } + *out = (uint16_t)(b[0] | ((uint16_t)b[1] << 8)); + return 0; +} + + +// Reads a little-endian uint32 from `f`. Returns 0 on success. +static int readU32(FILE *f, uint32_t *out) { + uint8_t b[4]; + if (fread(b, 1, 4, f) != 4) { + return -1; + } + *out = (uint32_t)b[0] | + ((uint32_t)b[1] << 8) | + ((uint32_t)b[2] << 16) | + ((uint32_t)b[3] << 24); + return 0; +} + + +// Reads the 24-byte rResourceMap header at offset 0. +static int readMapHeader(FILE *f, ResourceMapHeaderT *hdr) { + if (fseek(f, 0L, 0) != 0) { + return -1; + } + if (readU16(f, &hdr->rmVersion) != 0) return -1; + if (readU32(f, &hdr->rmToIndex) != 0) return -1; + if (readU16(f, &hdr->rmFileNum) != 0) return -1; + if (readU16(f, &hdr->rmID) != 0) return -1; + if (readU32(f, &hdr->rmIndexSize) != 0) return -1; + if (readU32(f, &hdr->rmIndexUsed) != 0) return -1; + if (readU16(f, &hdr->rmFreeListSize) != 0) return -1; + if (readU16(f, &hdr->rmFreeListUsed) != 0) return -1; + if (readU16(f, &hdr->rmPad) != 0) return -1; + return 0; +} + + +int releaseResource(int verb, void **handle) { + int slot = findHandleByPtr(handle); + if (slot < 0) { + return RES_ERR_BAD_HANDLE; + } + if (verb == 0) { + // Soft release: keep cached payload. Real toolbox would decrement + // a use-count; we just succeed. + return RES_OK; + } + return freeHandleSlot(slot); +} + + +int resourceProbeInit(void) { + // Zero the tables. Safe to call repeatedly - subsequent calls do + // not touch already-open files. + if (!gResourceReady) { + for (int i = 0; i < IIGS_RES_MAX_FILES; i++) { + gFiles[i].inUse = 0; + gFiles[i].fp = (FILE *)0; + gFiles[i].index = (ResourceIndexEntryT *)0; + gFiles[i].refNum = 0; + } + for (int i = 0; i < IIGS_RES_MAX_HANDLES; i++) { + gHandles[i].inUse = 0; + gHandles[i].fileSlot = -1; + gHandles[i].data = (void *)0; + gHandles[i].size = 0; + } + gResourceReady = 1; + } + return RES_OK; +} + + +int resourceRuntimeEnabled(void) { + return gResourceReady; } diff --git a/runtime/src/snprintf.c b/runtime/src/snprintf.c index 811a32f..0591efa 100644 --- a/runtime/src/snprintf.c +++ b/runtime/src/snprintf.c @@ -40,6 +40,13 @@ typedef __builtin_va_list va_list; #define va_arg(ap, ty) __builtin_va_arg(ap, ty) #define va_end(ap) __builtin_va_end(ap) +// Unbounded sink sentinel used by sprintf/vsprintf. Setting gEnd to +// `buf + 0xFFFE` looks innocuous but clang lowers the +0xFFFE to a +// `dec a; dec a` peephole (0xFFFE is -2 in 16-bit), giving gEnd = +// buf - 2 -- the `cur < end` bounds test then always fails. Use the +// absolute top-of-bank sentinel instead. +#define SPRINTF_END_SENTINEL ((char *)0xFFFF) + static char *gCur; static char *gEnd; @@ -757,12 +764,9 @@ int snprintf(char *buf, size_t n, const char *fmt, ...) { int sprintf(char *buf, const char *fmt, ...) { gCur = buf; - // sprintf is unbounded. Setting gEnd = buf + 0xFFFE looks innocuous - // but clang lowers the +0xFFFE to a `dec a; dec a` peephole (since - // 0xFFFE is -2 in 16-bit), giving gEnd = buf - 2 — and then the - // emit() bounds test `cur < end` is always false, so nothing gets - // written. Use the absolute top-of-bank sentinel instead. - gEnd = (char *)0xFFFF; + // sprintf is unbounded; see SPRINTF_END_SENTINEL above for the + // reason we don't use buf + 0xFFFE. + gEnd = SPRINTF_END_SENTINEL; gTotal = 0; va_list ap; va_start(ap, fmt); @@ -782,7 +786,7 @@ int vsnprintf(char *buf, size_t n, const char *fmt, va_list ap) { int vsprintf(char *buf, const char *fmt, va_list ap) { gCur = buf; - gEnd = (char *)0xFFFF; + gEnd = SPRINTF_END_SENTINEL; gTotal = 0; return format(fmt, ap); } diff --git a/scripts/mameDebug.py b/scripts/mameDebug.py index 7e7d358..652e327 100755 --- a/scripts/mameDebug.py +++ b/scripts/mameDebug.py @@ -39,6 +39,7 @@ # DEBUGGER_E2E=1 scripts/mameDebug.py --bin ... --map ... --dwarf ... import argparse +import importlib.util import os import re import subprocess @@ -50,6 +51,21 @@ SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) ROOT = os.path.dirname(SCRIPT_DIR) +# Import pc2line.py as a module so the REPL can reuse its DWARF parsing +# (line table, DIE walking, type chains, locals evaluator) without +# shelling out + reparsing on every command. pc2line.py is the single +# source of truth for DWARF semantics; we must NOT duplicate any of it. +def _loadPc2lineModule(): + spec = importlib.util.spec_from_file_location( + "pc2line", os.path.join(SCRIPT_DIR, "pc2line.py")) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + return mod + + +pc2line = _loadPc2lineModule() + + # ---- Map + DWARF helpers --------------------------------------------- def loadMapSyms(path): @@ -561,6 +577,766 @@ def interactiveMode(args): return 0 +# ---- REPL mode (--repl) --------------------------------------------- +# +# An interactive prompt that gives `gdb`-flavour commands on top of the +# load-snapshot-resolve cycle. Because MAME has no bidirectional Lua +# RPC channel under `-debugger none`, every "execute the program" +# command (run / continue / step / next) maps to one MAME process +# launch. The Lua autoboot writes the program into bank-0 memory, +# installs all queued breakpoints, runs until the first hit, captures +# a register + memory snapshot, and exits. The Python REPL then +# decodes the snapshot to answer `print`, `bt`, `where` from cached +# state — no further MAME launch needed for those. +# +# Commands: +# break set/queue a breakpoint +# run | continue [c] launch MAME, stop at first bp hit +# step | next advance to next source line +# (via DWARF line table; one bp install) +# bt | backtrace walk the JSL frame chain from S +# where PC -> source line for the last hit +# print decode bytes at &symbol per DWARF type +# info locals show formal_parameters + locals +# info breakpoints list queued breakpoints +# delete remove breakpoint by index +# quit | q exit +# ? this help +# +# Smoke-checkable: pipe a script of `break main\nrun\nwhere\nquit\n` +# into `mameDebug.py --repl ...` and assert the BP-HIT + WHERE output. + + +REPL_HELP = """\ +Commands: + break set/queue a breakpoint + run | continue launch MAME, stop at first hit + step | next advance to next source line (DWARF) + bt | backtrace walk JSL frame chain from S + where PC -> source line for the last hit + print decode bytes at &symbol per DWARF type + info locals show formal_parameters + locals + info breakpoints list queued breakpoints + delete remove breakpoint by index + quit | q exit + ? this help +""" + + +# Lua autoboot for the REPL. Differs from the --trace template in three +# ways: +# 1. Breakpoint actions also dump (a) a 64-byte stack window around S +# and (b) per-symbol memory regions for `print` requests, both as +# tagged log lines so the host can parse. +# 2. exit_frame is generous (240) so a slow run still completes. +# 3. The list of "watch" memory regions is parameterised — the host +# stamps in (addr, len) pairs based on queued `print ` +# requests. +REPL_LUA_TEMPLATE = r""" +-- mameDebug REPL autoboot (generated by scripts/mameDebug.py --repl) +local BIN_PATH = "{bin_path}" +local LOAD_AT = 0x{load_at:04x} +local START_PC = 0x{start_pc:06x} +local BPS = {{ {bp_list} }} +local WATCHES = {{ {watch_list} }} -- list of {{addr, len}} pairs + +local installed = false +local frame = 0 +local cpu, dbg, mem + +emu.register_frame_done(function() + frame = frame + 1 + if frame == 30 and not installed then + cpu = manager.machine.devices[":maincpu"] + dbg = cpu.debug + mem = cpu.spaces["program"] + local f = io.open(BIN_PATH, "rb") + if not f then + print("MAMEDBG-BIN-MISSING " .. BIN_PATH) + manager.machine:exit() + return + end + local data = f:read("*all") + f:close() + for i = 1, #data do + local addr = LOAD_AT + i - 1 + if not (addr >= 0x00C000 and addr < 0x00D000) then + mem:write_u8(addr, data:byte(i)) + end + end + cpu.state["PC"].value = START_PC + cpu.state["PB"].value = 0x00 + cpu.state["DB"].value = 0x00 + cpu.state["D"].value = 0x00 + cpu.state["P"].value = 0x04 + cpu.state["E"].value = 0 + cpu.state["S"].value = 0x01FF + + -- Build the bp action. We use the 3-arg bpset form (1-arg + -- crashes MAME). The action stamps a magic marker into bank-2 + -- scratch ($020010 / 0xDEAD) so the periodic poller can detect + -- the hit and dump memory from a SAFE context (the action + -- string itself can't call multi-statement loops cleanly). + local action_template = + 'logerror "MAMEDBG-BP PC=%X A=%X X=%X Y=%X S=%X DBR=%X\n",pc,a,x,y,s,db; ' .. + 'w@0x020010=0xDEAD; w@0x020012=s; w@0x020014=pc & 0xFFFF; w@0x020016=(pc>>16) & 0xFF; go' + for _, pc in ipairs(BPS) do + dbg:bpset(pc, '', action_template) + end + print(string.format("MAMEDBG-LOADED bytes=%d bps=%d watches=%d", + #data, #BPS, #WATCHES)) + installed = true + end + if frame == {exit_frame} then + print("MAMEDBG-EXIT frame=" .. frame) + manager.machine:exit() + end +end) + +-- Marker-driven snapshot dumper. Once the bp action stamps 0xDEAD at +-- $020010, this periodic handler reads S + PC from the scratch slots +-- and dumps the watched memory regions, then clears the marker. +local snapshotted = false +emu.register_periodic(function() + if installed and not snapshotted and mem ~= nil then + local marker = mem:read_u16(0x020010) + if marker == 0xDEAD then + local s_val = mem:read_u16(0x020012) + local pc_lo = mem:read_u16(0x020014) + local pc_bnk = mem:read_u8(0x020016) + local full_pc = (pc_bnk * 0x10000) + pc_lo + print(string.format("MAMEDBG-SNAP S=0x%04X PC=0x%06X", + s_val, full_pc)) + -- Dump 64 bytes of the stack window above S (S+1 .. S+64). + -- That's where the topmost JSL return frame lives. + for ofs = 1, 64 do + local addr = s_val + ofs + local v = mem:read_u8(addr) + print(string.format("MAMEDBG-STACK addr=0x%06X val=0x%02X", + addr, v)) + end + -- Dump each user-requested watch. + for _, w in ipairs(WATCHES) do + local addr, n = w[1], w[2] + for ofs = 0, n - 1 do + local v = mem:read_u8(addr + ofs) + print(string.format("MAMEDBG-WATCH addr=0x%06X val=0x%02X", + addr + ofs, v)) + end + end + mem:write_u16(0x020010, 0) + snapshotted = true + end + end +end) +""" + + +def buildReplLuaScript(bin_path, load_at, bp_pcs, watch_regions, + start_pc, exit_frame): + """Build a MAME autoboot Lua script for one REPL run. + + bp_pcs: list of int (24-bit PCs) — breakpoints to install. + watch_regions: list of (addr, length) tuples — per-symbol memory + dumps stamped at the first BP hit. + """ + bp_list = ", ".join(f"0x{p:06x}" for p in bp_pcs) + watch_list = ", ".join(f"{{0x{a:06x}, {n}}}" for a, n in watch_regions) + return REPL_LUA_TEMPLATE.format( + bin_path = bin_path, + load_at = load_at, + start_pc = start_pc, + bp_list = bp_list or "", + watch_list = watch_list or "", + exit_frame = exit_frame, + ) + + +# Regex for snapshot/watch/stack lines emitted by the REPL Lua script. +SNAP_RE = re.compile(r"MAMEDBG-SNAP\s+S=0x([0-9A-Fa-f]+)\s+PC=0x([0-9A-Fa-f]+)") +WATCH_RE = re.compile(r"MAMEDBG-WATCH\s+addr=0x([0-9A-Fa-f]+)\s+val=0x([0-9A-Fa-f]+)") +STACK_RE = re.compile(r"MAMEDBG-STACK\s+addr=0x([0-9A-Fa-f]+)\s+val=0x([0-9A-Fa-f]+)") + + +class ReplState: + """All persistent state across REPL commands.""" + + def __init__(self, args): + self.args = args + # Map: address -> symbol name (binary-searchable by funcAt) + self.syms = pc2line.loadMapSymbols(args.map) + # DWARF: line table + DIE trees (parsed once, reused) + self.sectionPayloads = pc2line.loadSidecarSectionsAll(args.dwarf) + self.cus = pc2line.parseAllCus(self.sectionPayloads) + self.lineTable = pc2line.buildTable(args.dwarf) + # Breakpoints: list of (pc, label) - label is the original spec + self.breakpoints = [] + # Watches: dict {symbol: (addr, length)}. Length picked from + # the symbol's DWARF type when available, else fall back to 2. + self.watches = {} + # Last snapshot — populated after a run. Empty until first run. + self.lastSnap = None # {"pc": int, "sp": int} + self.lastWatchBytes = {} # {addr: byte} (last run only) + self.lastStackBytes = {} # {addr: byte} (last run only) + + def resolveSpec(self, spec): + """Resolve `FUNC`, `FILE:LINE`, or `0xADDR` to a 24-bit PC. + Returns (pc, label) or (None, error_msg). + """ + spec = spec.strip() + # Hex address? + if spec.lower().startswith("0x"): + try: + return (int(spec, 16), spec) + except ValueError: + return (None, f"invalid hex: {spec!r}") + # File:line? + if ":" in spec: + file_part, line_part = spec.rsplit(":", 1) + try: + want_line = int(line_part) + except ValueError: + return (None, f"invalid line: {line_part!r}") + # Prefer the smallest-PC entry on the requested line so the + # bp lands on the statement's first instruction, not a + # later trailing entry. + best = None + for pc, fidx, ln, ft in self.lineTable: + if ln != want_line: + continue + if 0 < fidx <= len(ft): + fname = os.path.basename(ft[fidx - 1]) + else: + fname = "?" + # Match if fname matches OR fname is "?" (DWARF5 + # file_idx=0 path means "the CU's primary file" — we + # treat that as a wildcard match for the user-supplied + # file name). + if fname == file_part or fname.endswith(file_part) \ + or fname == "?": + if best is None or pc < best[0]: + best = (pc, fname) + if best is not None: + return (best[0], f"{best[1]}:{want_line}") + return (None, f"no DWARF line entry for {spec!r}") + # Bare symbol name — lookup in map. + for addr, sym in self.syms: + if sym == spec: + return (addr, sym) + return (None, f"symbol {spec!r} not in map") + + def symbolSize(self, symname): + """Best-effort size of a global symbol's storage (in bytes). + + Looks up DW_TAG_variable DIEs across all CUs. Returns the + resolved type's byte size, or None if not findable. Falls back + to caller-default (2) when None. + """ + for cu in self.cus: + if cu.root is None: + continue + for die in self._iterDies(cu.root): + if die.tag != pc2line.DW_TAG_variable: + continue + nm = pc2line.dieName(cu, die) + if nm != symname: + continue + tref = die.getRaw(pc2line.DW_AT_type) + if tref is None: + return None + target = pc2line._findDieByOffset(cu, tref[0]) + return self._typeByteSize(cu, target) + return None + + def _iterDies(self, die): + yield die + for ch in die.children: + yield from self._iterDies(ch) + + def _typeByteSize(self, cu, die): + """Walk a type DIE chain, return byte size or None.""" + if die is None: + return None + seen = set() + cur = die + while cur is not None and cur.offset not in seen: + seen.add(cur.offset) + tag = cur.tag + # Base / structure / union / enum types carry DW_AT_byte_size. + bs = cur.getRaw(0x0b) # DW_AT_byte_size + if bs is not None: + return bs[0] + if tag == pc2line.DW_TAG_pointer_type: + # 24-bit byte addresses are stored as 4-byte ptr32 by + # default in our ABI; default-on Layer 2 builds use 4-byte + # ptrs. Fall back to addr_size if recorded. + return cu.addr_size + if tag in (0x26, 0x35, 0x37, 0x38): + # const/volatile/restrict/typedef — follow. + t = cur.getRaw(pc2line.DW_AT_type) + if t is None: + return None + cur = pc2line._findDieByOffset(cu, t[0]) + continue + if tag == 0x01: # DW_TAG_array_type + t = cur.getRaw(pc2line.DW_AT_type) + if t is None: + return None + elem = self._typeByteSize(cu, + pc2line._findDieByOffset(cu, t[0])) + if elem is None: + return None + # Find first subrange child for count. + for ch in cur.children: + if ch.tag == 0x21: # DW_TAG_subrange_type + ub = ch.getRaw(0x2f) # DW_AT_upper_bound + if ub is not None: + return elem * (ub[0] + 1) + return None + # Other tags — give up. + return None + return None + + def typeStrOfSymbol(self, symname): + """Return a printable type string for a global symbol, or '?'.""" + for cu in self.cus: + if cu.root is None: + continue + for die in self._iterDies(cu.root): + if die.tag != pc2line.DW_TAG_variable: + continue + nm = pc2line.dieName(cu, die) + if nm == symname: + return pc2line.varTypeStr(cu, die) + return "?" + + +def replLaunchMame(state, bp_pcs, start_pc, watch_regions, seconds=4): + """Launch one MAME run with the queued breakpoints + watches. + + Returns the captured stdout/stderr text. Parses MAMEDBG-SNAP, + MAMEDBG-WATCH, MAMEDBG-STACK lines into state.lastSnap + + state.lastWatchBytes + state.lastStackBytes. + """ + lua = buildReplLuaScript(state.args.bin, state.args.load_at, + bp_pcs, watch_regions, + start_pc=start_pc, + exit_frame=240) + with tempfile.NamedTemporaryFile("w", suffix=".lua", + delete=False) as lf: + lf.write(lua) + lua_path = lf.name + try: + out = runMame(lua_path, seconds=seconds, debug_flag=True) + finally: + try: + os.unlink(lua_path) + except OSError: + pass + + # Parse snapshot lines. + state.lastSnap = None + state.lastWatchBytes = {} + state.lastStackBytes = {} + bps = [] + for ln in out.splitlines(): + m = BP_RE.search(ln) + if m: + bps.append({ + "pc": int(m.group(1), 16), + "a": int(m.group(2), 16), + "x": int(m.group(3), 16), + "y": int(m.group(4), 16), + "s": int(m.group(5), 16), + "db": int(m.group(6), 16), + }) + m = SNAP_RE.search(ln) + if m: + state.lastSnap = { + "sp": int(m.group(1), 16), + "pc": int(m.group(2), 16), + } + m = WATCH_RE.search(ln) + if m: + state.lastWatchBytes[int(m.group(1), 16)] = int(m.group(2), 16) + m = STACK_RE.search(ln) + if m: + state.lastStackBytes[int(m.group(1), 16)] = int(m.group(2), 16) + state.lastBps = bps + return out + + +def replPrintWhere(state): + """Print PC -> source line for the last snapshot.""" + if state.lastSnap is None: + print(" no snapshot yet — `run` first") + return + pc = state.lastSnap["pc"] + sp = state.lastSnap["sp"] + row = pc2line.query(state.lineTable, pc) + func = pc2line.funcAt(state.syms, pc) + if row is None: + print(f" PC=0x{pc:06x} (no DWARF line) FUNC={func} S=0x{sp:04x}") + else: + _, fname, ln = row + print(f" PC=0x{pc:06x} FILE={fname} LINE={ln} FUNC={func} " + f"S=0x{sp:04x}") + + +def replPrintBacktrace(state): + """Walk the JSL return frame chain starting from the captured S. + + The W65816 JSL pushes 3 bytes per call (PCL, PCH, PBR). Our ABI is + empty-descending: S points to the next-free byte. So the topmost + return-address triplet lives at S+1, S+2, S+3. We read it from the + captured stack window. We have no DW_AT_frame_base / DW_CFA_* + sidecar yet, so we can't walk past one frame — but we can show the + return address of the current function, which is what most debug + sessions need anyway. + """ + if state.lastSnap is None: + print(" no snapshot yet — `run` first") + return + pc = state.lastSnap["pc"] + sp = state.lastSnap["sp"] + func = pc2line.funcAt(state.syms, pc) + row = pc2line.query(state.lineTable, pc) + if row is None: + print(f" #0 PC=0x{pc:06x} FUNC={func}") + else: + _, fname, ln = row + print(f" #0 PC=0x{pc:06x} {fname}:{ln} FUNC={func}") + # Try to read S+1..S+3 from the captured stack window. + pcl_addr = (sp + 1) & 0xFFFF + pch_addr = (sp + 2) & 0xFFFF + pbr_addr = (sp + 3) & 0xFFFF + pcl = state.lastStackBytes.get(pcl_addr) + pch = state.lastStackBytes.get(pch_addr) + pbr = state.lastStackBytes.get(pbr_addr) + if pcl is None or pch is None or pbr is None: + print(" #1 ") + return + # JSL pushes the address of the LAST byte of the JSL instruction, + # so the actual return target is ret_addr + 1. + ret_pc = (pbr << 16) | (pch << 8) | pcl + ret_pc = (ret_pc + 1) & 0xFFFFFF + ret_func = pc2line.funcAt(state.syms, ret_pc) + ret_row = pc2line.query(state.lineTable, ret_pc) + if ret_row is None: + print(f" #1 PC=0x{ret_pc:06x} FUNC={ret_func}") + else: + _, fname, ln = ret_row + print(f" #1 PC=0x{ret_pc:06x} {fname}:{ln} FUNC={ret_func}") + + +def replPrintSymbol(state, spec): + """Decode a symbol's bytes from the last snapshot and print them + per the symbol's DWARF type. If the symbol hasn't been watched + yet (or no run has happened), instruct the user to `run` first. + """ + addr = None + for a, s in state.syms: + if s == spec: + addr = a + break + if addr is None: + print(f" no such symbol: {spec!r}") + return + # Make sure it's queued as a watch for the next run. + if spec not in state.watches: + sz = state.symbolSize(spec) + if sz is None or sz <= 0: + sz = 2 + if sz > 64: + # Truncate: large structs/arrays surface the first 64 bytes. + sz = 64 + state.watches[spec] = (addr, sz) + + if state.lastSnap is None or not state.lastWatchBytes: + print(f" &{spec} = 0x{addr:06x} (watch queued — run to capture)") + return + + addr_w, length = state.watches[spec] + bytes_ = bytearray(length) + have_all = True + for i in range(length): + b = state.lastWatchBytes.get(addr_w + i) + if b is None: + have_all = False + break + bytes_[i] = b + type_str = state.typeStrOfSymbol(spec) + if not have_all: + print(f" {spec}: ADDR=0x{addr:06x} TYPE={type_str} " + f"(no snapshot bytes — run again to capture)") + return + decoded = _decodeBytes(type_str, bytes_) + hex_dump = " ".join(f"{b:02x}" for b in bytes_) + print(f" {spec} : {type_str} = {decoded}") + print(f" ADDR=0x{addr:06x} BYTES=[{hex_dump}]") + + +def _decodeBytes(type_str, raw): + """Best-effort C-value print for a small byte buffer. + + Recognises: + - int/short/char (1/2/4 byte ints, little-endian) + - unsigned variants + - any "* " (pointer) type — print as hex address + - struct/union — show raw hex (the caller already prints BYTES=) + Floats are out of scope per the task; print bytes as hex. + """ + ts = type_str.strip() + if not raw: + return "" + + # Pointer types -> print as hex address of the right width. + if ts.endswith("*") or " *" in ts: + if len(raw) >= 4: + v = raw[0] | (raw[1] << 8) | (raw[2] << 16) | (raw[3] << 24) + return f"0x{v & 0xFFFFFFFF:08x}" + if len(raw) >= 2: + v = raw[0] | (raw[1] << 8) + return f"0x{v:04x}" + return f"0x{raw[0]:02x}" + + # Integer base types. + int_widths = { + "char": 1, "signed char": 1, "unsigned char": 1, + "_Bool": 1, "bool": 1, + "short": 2, "short int": 2, + "unsigned short": 2, "unsigned short int": 2, + "int": 2, "unsigned int": 2, "signed int": 2, + "long": 4, "long int": 4, "signed long": 4, + "unsigned long": 4, "unsigned long int": 4, + "long long": 4, "unsigned long long": 4, + } + signed_set = {"char", "signed char", "short", "short int", + "int", "signed int", "long", "long int", + "signed long", "long long"} + if ts in int_widths: + w = int_widths[ts] + n = min(w, len(raw)) + v = 0 + for i in range(n): + v |= raw[i] << (8 * i) + if ts in signed_set: + top = 1 << (8 * n - 1) + if v & top: + v = v - (1 << (8 * n)) + return f"{v} (0x{v & ((1 << (8*n)) - 1):0{2*n}x})" + + # struct / union / class — caller dumps raw bytes. + if ts.startswith("struct ") or ts.startswith("union ") \ + or ts.startswith("class "): + # Show u16 words as a partial decode hint (often the first + # field is an integer the user wants to see). + if len(raw) >= 2: + first_u16 = raw[0] | (raw[1] << 8) + return f"<{ts}; first u16 = 0x{first_u16:04x}>" + return f"<{ts}>" + + # Array type — show first elements as best-effort integers. + if "[" in ts and ts.endswith("]"): + first = " ".join(f"0x{b:02x}" for b in raw[:8]) + return f"[{first}{', ...' if len(raw) > 8 else ''}]" + + return "" + + +def replInfoLocals(state): + """Show formal_parameters + locals at the last snapshot PC.""" + if state.lastSnap is None: + print(" no snapshot yet — `run` first") + return + pc = state.lastSnap["pc"] + sp = state.lastSnap["sp"] + cu, sub, locs = pc2line.localsAtPc(state.cus, pc, sp_value=sp) + if sub is None: + print(f" no subprogram at PC=0x{pc:06x}") + return + sub_name = pc2line.dieName(cu, sub) or "" + print(f" in {sub_name!r} at PC=0x{pc:06x} S=0x{sp:04x}") + if not locs: + print(" (no formal_parameter / variable in scope)") + return + for name, ty, loc, _die in locs: + if loc.kind == "memory": + print(f" {name} : {ty} ADDR=0x{loc.addr:06x}") + elif loc.kind == "register": + if loc.dp_addr is not None: + print(f" {name} : {ty} REG=DW{loc.reg_dw} " + f"ADDR=0x{loc.dp_addr:06x}") + else: + print(f" {name} : {ty} REG=DW{loc.reg_dw}") + elif loc.kind == "value": + print(f" {name} : {ty} VALUE=0x{loc.value:x}") + else: + print(f" {name} : {ty} UNSUPPORTED={loc.reason}") + + +def replNextLinePc(state, current_pc): + """Return the PC of the DWARF line entry strictly after current_pc, + or None if there isn't one (end of program / no DWARF). + """ + # The line table is unsorted in source order; iterate to find the + # smallest entry whose PC is strictly greater than current_pc. + best = None + for pc, _fidx, _ln, _ft in state.lineTable: + if pc > current_pc: + if best is None or pc < best: + best = pc + return best + + +def replLoop(state): + """Run the REPL. Reads commands from stdin, dispatches each one.""" + interactive_tty = sys.stdin.isatty() + if interactive_tty: + print("mameDebug REPL. Type ? for help.") + while True: + try: + if interactive_tty: + line = input("(dbg) ") + else: + line = input() # no prompt in batch mode (cleaner output) + except EOFError: + if interactive_tty: + print() + break + line = line.strip() + if not line or line.startswith("#"): + continue + # Echo command in batch mode so the smoke test can diff output. + if not interactive_tty: + print(f"(dbg) {line}") + cmd, _, rest = line.partition(" ") + rest = rest.strip() + if cmd in ("q", "quit", "exit"): + break + if cmd == "?" or cmd == "help": + print(REPL_HELP) + continue + if cmd in ("break", "b"): + if not rest: + print(" usage: break ") + continue + pc, label = state.resolveSpec(rest) + if pc is None: + print(f" cannot resolve: {label}") + continue + state.breakpoints.append((pc, label)) + idx = len(state.breakpoints) + print(f" bp #{idx} at 0x{pc:06x} ({label})") + continue + if cmd in ("info",): + if rest == "breakpoints": + if not state.breakpoints: + print(" no breakpoints") + else: + for i, (pc, lab) in enumerate(state.breakpoints, 1): + print(f" #{i} 0x{pc:06x} ({lab})") + continue + if rest == "locals": + replInfoLocals(state) + continue + print(f" unknown info subcommand: {rest!r}") + continue + if cmd == "delete": + try: + idx = int(rest) + except ValueError: + print(" usage: delete ") + continue + if idx < 1 or idx > len(state.breakpoints): + print(f" no breakpoint #{idx}") + continue + del state.breakpoints[idx - 1] + print(f" deleted bp #{idx}") + continue + if cmd in ("run", "r", "continue", "c"): + if not state.breakpoints: + print(" no breakpoints set — nothing to break on") + continue + bp_pcs = [pc for pc, _ in state.breakpoints] + # Decide start_pc: --from-start runs through crt0; default + # is to jump to the first bp (matches --trace behaviour). + if state.args.from_start: + start_pc = state.args.load_at + else: + start_pc = bp_pcs[0] + watch_regions = list(state.watches.values()) + replLaunchMame(state, bp_pcs, start_pc, watch_regions, + seconds=state.args.seconds) + if state.lastSnap is None: + print(" WARN: no BP-HIT captured (timed out?)") + else: + replPrintWhere(state) + continue + if cmd in ("step", "s", "next", "n"): + # Both map to "advance to next source line via DWARF" in + # our snapshot-based model. Requires a prior snapshot to + # know "where we are". + if state.lastSnap is None: + # No prior snapshot: just do `run` (start of program). + if not state.breakpoints: + print(" no breakpoints set — `break` first") + continue + bp_pcs = [pc for pc, _ in state.breakpoints] + start_pc = (state.args.load_at if state.args.from_start + else bp_pcs[0]) + replLaunchMame(state, bp_pcs, start_pc, + list(state.watches.values()), + seconds=state.args.seconds) + if state.lastSnap is not None: + replPrintWhere(state) + continue + current_pc = state.lastSnap["pc"] + next_pc = replNextLinePc(state, current_pc) + if next_pc is None: + print(" no next DWARF line entry — at end of program") + continue + print(f" stepping to next DWARF line at 0x{next_pc:06x}") + replLaunchMame(state, [next_pc], current_pc, + list(state.watches.values()), + seconds=state.args.seconds) + if state.lastSnap is None: + print(" WARN: step did not hit the bp (timed out?)") + else: + replPrintWhere(state) + continue + if cmd == "where": + replPrintWhere(state) + continue + if cmd in ("bt", "backtrace"): + replPrintBacktrace(state) + continue + if cmd in ("print", "p"): + if not rest: + print(" usage: print ") + continue + replPrintSymbol(state, rest) + continue + print(f" unknown command: {line!r} (try ?)") + return 0 + + +def replMode(args): + """Entry point for `--repl`.""" + state = ReplState(args) + if args.break_at: + # --break is interpreted as "queue this bp before reading any + # interactive commands" — useful when scripting. + pc, label = state.resolveSpec(args.break_at) + if pc is None: + print(f"mameDebug: --break {args.break_at!r}: {label}", + file=sys.stderr) + return 2 + state.breakpoints.append((pc, label)) + print(f" bp #1 at 0x{pc:06x} ({label}) [from --break]") + return replLoop(state) + + # ---- main ------------------------------------------------------------ def main(): @@ -579,6 +1355,13 @@ def main(): ap.add_argument("--trace", action="store_true", help="default-on smoke mode: set bp, capture one " "BP-HIT, resolve via pc2line, exit 0") + ap.add_argument("--repl", action="store_true", + help="interactive REPL. Reads stdin commands " + "(break/run/step/next/where/bt/print/info/" + "delete/quit). Each `run`/`step`/`next` " + "launches one MAME process. `print`, `bt`, " + "and `where` decode the captured snapshot " + "and need no further MAME launch.") ap.add_argument("--from-start", action="store_true", help="start execution at LOAD_AT (i.e. through " "the crt0). Default is to jump straight to " @@ -611,6 +1394,8 @@ def main(): return 2 if args.trace: return traceMode(args) + if args.repl: + return replMode(args) return interactiveMode(args) diff --git a/scripts/probeReplSmoke.sh b/scripts/probeReplSmoke.sh new file mode 100755 index 0000000..1eff5ee --- /dev/null +++ b/scripts/probeReplSmoke.sh @@ -0,0 +1,127 @@ +#!/usr/bin/env bash +# probeReplSmoke.sh - non-interactive smoke check for mameDebug.py +# --repl mode. Pipes a canned script (`break main`, `run`, `where`, +# `quit`) into the REPL and asserts that: +# 1. The REPL parses each command without error +# 2. A breakpoint resolves through the link816 map +# 3. MAME launches with the bp installed and surfaces a BP-HIT line +# 4. `where` resolves the captured PC to a source line via DWARF +# +# Exit 0 on full pass. Exit 77 (autotools "skip") if MAME / toolchain +# missing. Exit 1 on any unexpected REPL output or missing capture. +# +# Usage: probeReplSmoke.sh [--verbose] + +set -euo pipefail +HERE="$(cd "$(dirname "$0")" && pwd)" +ROOT="$(cd "$HERE/.." && pwd)" +VERBOSE=0 +if [ "${1:-}" = "--verbose" ]; then + VERBOSE=1 +fi + +CLANG="$ROOT/tools/llvm-mos-build/bin/clang" +LLVMMC="$ROOT/tools/llvm-mos-build/bin/llvm-mc" +LINK="$ROOT/tools/link816" + +if [ ! -x "$CLANG" ] || [ ! -x "$LLVMMC" ] || [ ! -x "$LINK" ]; then + echo "probeReplSmoke: missing toolchain (clang/llvm-mc/link816)" >&2 + exit 77 +fi +if ! command -v mame >/dev/null 2>&1; then + echo "probeReplSmoke: mame not on PATH; skipping" >&2 + exit 77 +fi + +WORK="$(mktemp -d)" +trap 'rm -rf "$WORK"' EXIT +CFILE="$WORK/repltest.c" +OFILE="$WORK/repltest.o" +OCRT0="$WORK/crt0.o" +OLIBGCC="$WORK/libgcc.o" +BIN="$WORK/repltest.bin" +MAP="$WORK/repltest.map" +DWARF="$WORK/repltest.dwarf" +OUT="$WORK/repl.out" + +cat > "$CFILE" <<'EOF' +int gAnswer = 42; +int add(int a, int b) { + int c = a + b; + return c; +} +int main(void) { + int r = add(3, 4); + gAnswer = r; + while (1) { } + return r; +} +EOF + +"$CLANG" --target=w65816 -O0 -g -ffunction-sections \ + -c "$CFILE" -o "$OFILE" 2>/dev/null +"$LLVMMC" -arch=w65816 -filetype=obj \ + "$ROOT/runtime/src/crt0.s" -o "$OCRT0" 2>/dev/null +"$LLVMMC" -arch=w65816 -filetype=obj \ + "$ROOT/runtime/src/libgcc.s" -o "$OLIBGCC" 2>/dev/null +"$LINK" -o "$BIN" --text-base 0x1000 \ + --map "$MAP" --debug-out "$DWARF" \ + "$OCRT0" "$OFILE" "$OLIBGCC" >/dev/null 2>&1 || true + +[ -s "$BIN" ] || { echo "probeReplSmoke: empty .bin"; exit 1; } +[ -s "$DWARF" ] || { echo "probeReplSmoke: empty DWARF sidecar"; exit 1; } +[ -s "$MAP" ] || { echo "probeReplSmoke: empty map"; exit 1; } + +# Pipe the canned REPL script. +printf 'break main\nrun\nwhere\nquit\n' \ + | timeout 60 python3 "$HERE/mameDebug.py" --repl \ + --bin "$BIN" --map "$MAP" --dwarf "$DWARF" \ + --seconds 4 > "$OUT" 2>&1 || { + echo "probeReplSmoke: mameDebug.py --repl failed" >&2 + cat "$OUT" >&2 + exit 1 +} + +if [ "$VERBOSE" -eq 1 ]; then + cat "$OUT" >&2 +fi + +# Required output lines: +# "(dbg) break main" - command echo +# " bp #1 at 0x...... (main)" - bp set ack +# "(dbg) run" - command echo +# " PC=0x...... ... FUNC=main ..." - where output after run +# "(dbg) where" - command echo +# " PC=0x...... ... FUNC=main ..." - where output (manual) +# "(dbg) quit" - command echo +if ! grep -q "bp #1 at 0x" "$OUT"; then + echo "probeReplSmoke: missing 'bp #1 at 0x...' breakpoint ack" >&2 + cat "$OUT" >&2 + exit 1 +fi +if ! grep -q "FUNC=main" "$OUT"; then + echo "probeReplSmoke: missing FUNC=main in 'where' output" >&2 + cat "$OUT" >&2 + exit 1 +fi +# The `where` command (run AFTER the `run` command) must produce +# output too — verify by counting occurrences of "PC=0x" prefix lines. +PC_HITS=$(grep -c "^ PC=0x" "$OUT" || true) +if [ "$PC_HITS" -lt 2 ]; then + echo "probeReplSmoke: expected >= 2 PC=0x lines (run + where), got $PC_HITS" >&2 + cat "$OUT" >&2 + exit 1 +fi + +# Bonus: verify the captured PC equals the map entry for `main`. +MAIN_PC=$(awk '$2 == "main" { print $1; exit }' "$MAP") +[ -n "$MAIN_PC" ] || { echo "probeReplSmoke: no 'main' symbol in map"; exit 1; } +MAIN_PC_LC=$(echo "$MAIN_PC" | tr 'A-Z' 'a-z') +if ! grep -qi "PC=$MAIN_PC_LC " "$OUT"; then + echo "probeReplSmoke: captured PC does not match map[main]=$MAIN_PC" >&2 + cat "$OUT" >&2 + exit 1 +fi + +echo "probeReplSmoke: OK (bp resolved, BP-HIT captured, where decoded)" +exit 0 diff --git a/scripts/smokeTest.sh b/scripts/smokeTest.sh index d5c2d9f..51577fb 100755 --- a/scripts/smokeTest.sh +++ b/scripts/smokeTest.sh @@ -1146,6 +1146,20 @@ EOF fi fi + # Phase 3.3: mameDebug.py --repl non-interactive smoke. Pipes a + # canned `break main / run / where / quit` script into the REPL and + # asserts that (1) the bp resolves via the link816 map, (2) MAME + # launches and surfaces a BP-HIT, (3) the captured PC is decoded + # through DWARF into FUNC=main on the where output, and (4) the + # captured PC equals the map's entry for main. MAME-gated. + if command -v mame >/dev/null && [ -d "$PROJECT_ROOT/tools/mame/roms" ]; then + log "check: mameDebug.py --repl non-interactive (break/run/where/quit)" + if ! bash "$PROJECT_ROOT/scripts/probeReplSmoke.sh" >/dev/null 2>&1; then + bash "$PROJECT_ROOT/scripts/probeReplSmoke.sh" --verbose >&2 || true + die "mameDebug.py --repl smoke probe failed" + fi + fi + # iigs/sound.h + iigs/eventLoop.h headers compile cleanly through # clang with the runtime include path. Catches missing extern "C" # wraps, broken struct layouts, or unresolved tool-call stubs. @@ -5988,12 +6002,19 @@ EOF # omfEmit --stack-size: append a ~Direct DP/Stack segment so the # GS/OS Loader allocates an explicit-sized DP+stack chunk instead - # of its 4KB default. KIND=0x1012 (DP/Stack | PRIVATE), LENGTH and - # RESSPC both = requested size, ALIGN=0x100 (page-aligned per spec). - # Plain (non-ExpressLoad) multi-segment OMFs do not launch under - # GS/OS 6.0.2 Loader (verified empirically), so --stack-size auto- - # enables --expressload: the OMF becomes 3 segments (ExpressLoad, - # code, DP/Stack), with DP/Stack as segnum 3. + # of its 4KB default. KIND=0x4012 (DP/Stack | RELOAD), LENGTH = + # requested size, RESSPC=0 (the stack bytes are carried in LCONST + # because the ExpressLoad fast path can't be trusted to honor + # RESSPC — same trick the user CODE seg uses for BSS). ALIGN= + # 0x100 (page-aligned per spec). Plain (non-ExpressLoad) multi- + # segment OMFs do not launch under GS/OS 6.0.2 Loader (verified + # empirically), so --stack-size auto-enables --expressload: the + # OMF becomes 3 segments (ExpressLoad, code, DP/Stack), with + # DP/Stack as segnum 3. The ExpressLoad load script also carries + # a segtable + remap + header_info entry for the DP/Stack so the + # Loader's fast path actually honors it (without that the Loader + # silently drops the seg and uses its default 4KB allocation — + # see feedback_gsos_fopen_partial_diagnosis). log "check: omfEmit --stack-size emits a DP/Stack ~Direct segment" omfStk="$(mktemp --suffix=.omf)" "$PROJECT_ROOT/tools/omfEmit" \ @@ -6022,16 +6043,34 @@ align = struct.unpack_from(' new seg 3 +remapOff = 6 + 8*2 +rm = struct.unpack_from('/dev/null 2>&1; then + log "check: omfEmit --stack-size grows DP/Stack chunk under real GS/OS Loader" + cStkFile="$(mktemp --suffix=.c)" + oStkFile="$(mktemp --suffix=.o)" + binStk="$(mktemp --suffix=.bin)" + mapStk="$(mktemp --suffix=.map)" + relStk="$(mktemp --suffix=.reloc)" + omfStkWith="$(mktemp --suffix=.omf)" + omfStkWithout="$(mktemp --suffix=.omf)" + cat > "$cStkFile" <<'EOF' +// Stack-size end-to-end probe: capture SP at entry to main() and +// store its high byte at $71 so the harness can verify Loader honored +// --stack-size. $70 = 0x99 marker = program ran. +int main(void) { + __asm__ volatile ( + "rep #0x30\n" + "tsc\n" + "xba\n" + "sep #0x20\n" + "sta 0x71\n" + "rep #0x20\n" + ); + *(volatile unsigned char *)0x70 = 0x99; + for (volatile unsigned long s = 0; s < 600000UL; s++) { } + return 0; +} +EOF + "$CLANG" --target=w65816 -I"$PROJECT_ROOT/runtime/include" -O2 -ffunction-sections -c \ + "$cStkFile" -o "$oStkFile" + "$PROJECT_ROOT/tools/link816" -o "$binStk" --text-base 0x1000 \ + --map "$mapStk" --reloc-out "$relStk" \ + "$PROJECT_ROOT/runtime/crt0Gsos.o" "$oStkFile" \ + "$PROJECT_ROOT/runtime/libc.o" \ + "$PROJECT_ROOT/runtime/snprintf.o" \ + "$PROJECT_ROOT/runtime/extras.o" \ + "$PROJECT_ROOT/runtime/softFloat.o" \ + "$PROJECT_ROOT/runtime/softDouble.o" \ + "$PROJECT_ROOT/runtime/iigsGsos.o" \ + "$PROJECT_ROOT/runtime/iigsToolbox.o" \ + "$PROJECT_ROOT/runtime/libgcc.o" 2>/tmp/stkprobe-link.err >/dev/null \ + || die "stack-size smoke: link failed: $(cat /tmp/stkprobe-link.err)" + # WITH --stack-size 0x4000 (16 KB chunk; Loader places at $0800, + # SP lands at $47FF → high byte $47). + "$PROJECT_ROOT/tools/omfEmit" --input "$binStk" --map "$mapStk" \ + --base 0x1000 --entry __start --output "$omfStkWith" \ + --name STKPROBE --stack-size 0x4000 --relocs "$relStk" >/dev/null 2>&1 + if [ ! -s "$omfStkWith" ]; then + die "stack-size smoke: omfEmit (with stack-size) produced empty OMF" + fi + if ! bash "$PROJECT_ROOT/scripts/runViaFinder.sh" "$omfStkWith" \ + --check 0x70=0x99 0x71=0x47 >/dev/null 2>&1; then + bash "$PROJECT_ROOT/scripts/runViaFinder.sh" "$omfStkWith" \ + --check 0x70=0x99 0x71=0x47 2>&1 | tail -5 >&2 + die "stack-size smoke FAILED: SP high byte != 0x47 with --stack-size 0x4000 (Loader silently dropped the seg?)" + fi + # WITHOUT --stack-size: Loader default 4 KB chunk → SP=$17FF → + # high byte $17. This second run guards against a spurious pass + # of the first (e.g. if every program by coincidence got SP=$47FF + # without our seg). + "$PROJECT_ROOT/tools/omfEmit" --input "$binStk" --map "$mapStk" \ + --base 0x1000 --entry __start --output "$omfStkWithout" \ + --name STKPROBE --expressload --relocs "$relStk" >/dev/null 2>&1 + if [ ! -s "$omfStkWithout" ]; then + die "stack-size smoke: omfEmit (no stack-size) produced empty OMF" + fi + if ! bash "$PROJECT_ROOT/scripts/runViaFinder.sh" "$omfStkWithout" \ + --check 0x70=0x99 0x71=0x17 >/dev/null 2>&1; then + bash "$PROJECT_ROOT/scripts/runViaFinder.sh" "$omfStkWithout" \ + --check 0x70=0x99 0x71=0x17 2>&1 | tail -5 >&2 + die "stack-size smoke FAILED: baseline SP high byte != 0x17 (Loader default-allocation shifted?)" + fi + rm -f "$cStkFile" "$oStkFile" "$binStk" "$mapStk" "$relStk" \ + "$omfStkWith" "$omfStkWithout" +fi + # W65816 codegen-shape regression pins. Tiny FileCheck assertions on # specific lowering behaviors that have broken before; runs in well # under a second. See scripts/runFileCheckTests.sh. @@ -6535,23 +6661,25 @@ else log "OK: cursorProbe Push/Pop arrow+busy returned cleanly + marker set" fi -# Phase 3.4 resourcemgr STUB-ONLY landing. Verifies: +# Phase 3.4 resourcemgr REAL implementation. Verifies: # - resource.o links into a normal GS/OS demo, -# - resourceProbeInit() / iigsLoadResource() / iigsGetResourceSize() -# all return RES_ERR_BLOCKED in stub mode (mark 0x71/0x72 = 0xff), -# - resourceRuntimeEnabled() returns 0 in stub mode (mark 0x73 = 0x01), -# - demos/build.sh's rsrcBundle post-step produces an AppleSingle blob -# and the cadius _ResourceFork.bin sidecar when demos/rsrcProbe.rsrc/ -# is present (verified by file existence). -# The live resource-fork pathway in MAME is NOT exercised here - the -# whole point of the stub-only landing is that Phase 1.1 (GS/OS fopen -# hang) blocks the live path on GS/OS 6.0.2. +# - the demo stages an in-memory .rsrc fixture via mfsRegister, +# opens it through openResourceFile (real parser), loads an rText +# resource by (type, id), verifies the payload bytes match +# "HELLO" and the size is 5, +# - second loadResource() call returns the SAME handle (cache hit), +# - closeResourceFile() returns RES_OK, +# - demos/build.sh's rsrcBundle post-step still produces an AppleSingle +# blob + cadius sidecar when demos/rsrcProbe.rsrc/ is present. +# The fixture also doubles as a bundler-output verification: the on-disk +# sidecar bytes from rsrcBundle.py match the in-memory fixture byte-for- +# byte, so passing this check confirms parser + bundler agree on format. if [ "${SMOKE_SKIP_RSRC:-0}" = 1 ]; then warn "SMOKE_SKIP_RSRC=1; skipping Phase 3.4 rsrcProbe stage" elif [ ! -f "$SYSDISK_DR" ] || [ ! -x "$CADIUS_DR" ] || ! command -v mame >/dev/null 2>&1; then warn "Phase 3.4 rsrcProbe prerequisites missing; skipping" else - log "check: rsrcProbe stub Resource Manager facade runs under GS/OS" + log "check: rsrcProbe real Resource Manager (open/load/release/close) under GS/OS" bash "$PROJECT_ROOT/demos/build.sh" rsrcProbe >/tmp/rsrcBuildOut 2>&1 || { cat /tmp/rsrcBuildOut >&2 die "demos/build.sh rsrcProbe failed" @@ -6565,11 +6693,11 @@ else fi bash "$PROJECT_ROOT/scripts/runViaFinder.sh" \ "$PROJECT_ROOT/demos/rsrcProbe.omf" \ - --check 0x70=0x99 0x71=0xff 0x72=0xff 0x73=0x01 >/tmp/rsrcRunOut 2>&1 || { + --check 0x70=0x99 0x71=0x01 0x72=0x01 0x73=0x01 >/tmp/rsrcRunOut 2>&1 || { cat /tmp/rsrcRunOut >&2 - die "rsrcProbe did not set expected stub-mode markers" + die "rsrcProbe did not set expected real-impl markers" } - log "OK: rsrcProbe (stub-mode RES_ERR_BLOCKED markers all green)" + log "OK: rsrcProbe (real Resource Manager open/load/cache/close all green)" fi # Phase 4.2 sprite engine: standalone SHR 320 init + 16x16 4bpp packed @@ -6621,15 +6749,23 @@ fi # Phase 6.2 UBSan-min smoke probe: build a tiny program with # `-fsanitize=undefined -fsanitize-minimal-runtime`, link against the -# new runtime/ubsan.o, and verify three representative UB kinds -# (add-overflow / shift-out-of-bounds / divrem-overflow) instrument -# cleanly + recover. Bare-metal (no GS/OS), so we only require `mame`. +# new runtime/ubsan.o, and verify nine recoverable UB kinds +# (add-overflow / shift-out-of-bounds / divrem-overflow / sub-overflow / +# mul-overflow / negate-overflow / pointer-overflow / load-invalid-value / +# out-of-bounds) instrument cleanly + recover. Bare-metal (no GS/OS), +# so we only require `mame`. # # What this probe pins: # $025000 = 0xC0DE add-overflow handler fired and recovered # $025002 = 0xC0DF shift-out-of-bounds handler fired and recovered # $025004 = 0xC0E0 divrem-overflow handler fired and recovered -# $025006 = 0xC0DA main reached its tail past all three UBs +# $025006 = 0xC0E1 sub-overflow handler fired and recovered +# $025008 = 0xC0E2 mul-overflow handler fired and recovered +# $02500A = 0xC0E3 negate-overflow handler fired and recovered +# $02500C = 0xC0E4 pointer-overflow handler fired and recovered +# $02500E = 0xC0E5 load-invalid-value handler fired and recovered +# $025010 = 0xC0E6 out-of-bounds handler fired and recovered +# $025012 = 0xC0DA main reached its tail past all nine UBs # # Gated on `mame`. Override with SMOKE_SKIP_UBSAN=1. if [ "${SMOKE_SKIP_UBSAN:-0}" = 1 ]; then @@ -6637,12 +6773,12 @@ if [ "${SMOKE_SKIP_UBSAN:-0}" = 1 ]; then elif ! command -v mame >/dev/null 2>&1 || [ ! -d "$PROJECT_ROOT/tools/mame/roms" ]; then warn "Phase 6.2 ubsan prerequisites missing (mame); skipping" else - log "check: ubsanProbe (UBSan-min: add-overflow + shift-OOB + div-by-zero) in MAME" + log "check: ubsanProbe (UBSan-min: 9 UB kinds) in MAME" bash "$PROJECT_ROOT/tests/ubsan/runUbsanProbe.sh" >/tmp/ubsanRunOut 2>&1 || { cat /tmp/ubsanRunOut >&2 die "ubsanProbe did not set expected handler-fired markers" } - log "OK: ubsanProbe (3 UB kinds instrumented + recovered + tail reached)" + log "OK: ubsanProbe (9 UB kinds instrumented + recovered + tail reached)" fi log "all smoke checks passed" diff --git a/src/link816/link816.cpp b/src/link816/link816.cpp index 248e24f..3f0fbcf 100644 --- a/src/link816/link816.cpp +++ b/src/link816/link816.cpp @@ -73,12 +73,12 @@ struct Elf32Shdr { uint32_t sh_entsize; }; -static constexpr uint32_t SHT_NULL = 0; -static constexpr uint32_t SHT_PROGBITS = 1; +[[maybe_unused]] static constexpr uint32_t SHT_NULL = 0; +[[maybe_unused]] static constexpr uint32_t SHT_PROGBITS = 1; static constexpr uint32_t SHT_SYMTAB = 2; static constexpr uint32_t SHT_STRTAB = 3; static constexpr uint32_t SHT_RELA = 4; -static constexpr uint32_t SHT_NOBITS = 8; +[[maybe_unused]] static constexpr uint32_t SHT_NOBITS = 8; struct Elf32Sym { uint32_t st_name; @@ -104,12 +104,12 @@ static constexpr uint16_t EM_NONE = 0; inline uint8_t ELF32_ST_TYPE(uint8_t i) { return i & 0x0F; } inline uint8_t ELF32_ST_BIND(uint8_t i) { return (i >> 4) & 0x0F; } static constexpr uint8_t STB_LOCAL = 0; -static constexpr uint8_t STB_GLOBAL = 1; +[[maybe_unused]] static constexpr uint8_t STB_GLOBAL = 1; static constexpr uint8_t STB_WEAK = 2; -static constexpr uint8_t STT_NOTYPE = 0; -static constexpr uint8_t STT_OBJECT = 1; -static constexpr uint8_t STT_FUNC = 2; +[[maybe_unused]] static constexpr uint8_t STT_NOTYPE = 0; +[[maybe_unused]] static constexpr uint8_t STT_OBJECT = 1; +[[maybe_unused]] static constexpr uint8_t STT_FUNC = 2; static constexpr uint8_t STT_SECTION = 3; struct Elf32Rela { @@ -170,9 +170,10 @@ static std::string sectionKind(const std::string &name) { // .init_array entries are 16-bit function pointers; treat as // rodata so they end up in the read-only image and get a stable // address. The linker emits __init_array_start/_end so crt0 can - // walk them. Same for .fini_array (destructors). + // walk them. (.fini_array is not yet wired up; ELF input is + // accepted but the sections are dropped — runtime has no + // destructor-walk path today.) if (name == ".init_array" || name.rfind(".init_array.", 0) == 0) return "init_array"; - if (name == ".fini_array" || name.rfind(".fini_array.", 0) == 0) return "fini_array"; // DWARF debug sections that are *targets* of intra-debug relocs // (e.g. .debug_info -> .debug_str via R_W65816_DATA32, or // .debug_str_offsets -> .debug_str via R_W65816_DATA32). Treat @@ -384,6 +385,26 @@ static std::vector gImm24Sites; static uint32_t gTextBaseForSites = 0; static bool gRecordSites = false; + +// Record an intra-segment patch site for cRELOC emission. A target +// below the text base is never intra-segment (it is an undefined-weak +// resolving to 0, or an absolute address) and is skipped — see the +// commentary at the R_W65816_IMM16 callsite for why this matters. +static void recordCRelocSite(uint32_t patchAddr, uint32_t target, + uint8_t byteCnt, uint8_t bitShift) { + if (!gRecordSites) return; + uint32_t targetBank = target & 0xFF0000; + uint32_t baseBank = gTextBaseForSites & 0xFF0000; + if (targetBank != baseBank) return; + if (target < gTextBaseForSites) return; + Imm24Site s; + s.patchOff = patchAddr - gTextBaseForSites; + s.offsetRef = target - gTextBaseForSites; + s.byteCnt = byteCnt; + s.bitShift = bitShift; + gImm24Sites.push_back(s); +} + // Number of bytes patched by a given reloc type. Used by callers // that need to range-check a reloc offset against a buffer size // without re-deriving the width inline. Returns 0 for unknown @@ -411,7 +432,7 @@ static uint32_t relocWidth(uint8_t rtype) { static void applyReloc(std::vector &buf, uint32_t off, uint32_t patchAddr, uint32_t target, uint8_t rtype, const std::string &symName) { - int64_t Signed; + int64_t pcrelDisp; switch (rtype) { case R_W65816_IMM8: if (target > 0xFF) @@ -433,28 +454,16 @@ static void applyReloc(std::vector &buf, uint32_t off, // time. Without this, `lda absConst` reads from the wrong // address when the segment doesn't land at link-time-base // (e.g., link-time-base=0x1000 but Loader places at bank:0). - if (gRecordSites) { - uint32_t targetBank = target & 0xFF0000; - uint32_t baseBank = gTextBaseForSites & 0xFF0000; - // A target below the text base is never an intra-segment - // relocatable site: it is an undefined-weak symbol (resolveSym - // resolves those to 0) or an absolute address. Recording a - // cRELOC for it would (a) underflow offsetRef = target - textBase - // (omfEmit rejects it as out-of-range) and (b) make the Loader - // rewrite a genuine null to segPlacedBase, breaking the - // `if (weakFn) weakFn()` null test that the null is meant to fail. - if (targetBank == baseBank && target >= gTextBaseForSites) { - Imm24Site s; - s.patchOff = patchAddr - gTextBaseForSites; - s.offsetRef = target - gTextBaseForSites; - // Use type field width = 2 to distinguish from IMM24 - // (3). Imm24Site struct is reused — emitOmf will - // emit cRELOC ByteCnt=2 for this. - s.byteCnt = 2; - s.bitShift = 0; - gImm24Sites.push_back(s); - } - } + // A target below the text base is never an intra-segment + // relocatable site: it is an undefined-weak symbol (resolveSym + // resolves those to 0) or an absolute address. Recording a + // cRELOC for it would (a) underflow offsetRef = target - textBase + // (omfEmit rejects it as out-of-range) and (b) make the Loader + // rewrite a genuine null to segPlacedBase, breaking the + // `if (weakFn) weakFn()` null test that the null is meant to fail. + // recordCRelocSite handles the gate; byteCnt=2 distinguishes + // from IMM24 (3) so omfEmit emits cRELOC ByteCnt=2 here. + recordCRelocSite(patchAddr, target, /*byteCnt=*/2, /*bitShift=*/0); break; case R_W65816_BANK16: // 2-byte patch: byte 0 = bank of target, byte 1 = 0 (pad). @@ -463,20 +472,9 @@ static void applyReloc(std::vector &buf, uint32_t off, // the value reflects the actually-placed bank. buf[off] = static_cast((target >> 16) & 0xFF); buf[off + 1] = 0; - if (gRecordSites) { - uint32_t targetBank = target & 0xFF0000; - uint32_t baseBank = gTextBaseForSites & 0xFF0000; - // See R_W65816_IMM16: skip undefined-weak/absolute targets - // below the text base (no valid intra-segment cRELOC). - if (targetBank == baseBank && target >= gTextBaseForSites) { - Imm24Site s; - s.patchOff = patchAddr - gTextBaseForSites; - s.offsetRef = target - gTextBaseForSites; - s.byteCnt = 2; - s.bitShift = 16; - gImm24Sites.push_back(s); - } - } + // bitShift=16: cRELOC Loader patches the bank byte from + // (segPlacedBase + offsetRef) >> 16 at load time. + recordCRelocSite(patchAddr, target, /*byteCnt=*/2, /*bitShift=*/16); break; case R_W65816_IMM24: if (target > 0xFFFFFF) @@ -485,46 +483,30 @@ static void applyReloc(std::vector &buf, uint32_t off, buf[off] = static_cast(target & 0xFF); buf[off + 1] = static_cast((target >> 8) & 0xFF); buf[off + 2] = static_cast((target >> 16) & 0xFF); - // Record the site for OMF cRELOC emission (only if recording is - // enabled — gRecordSites is set by the CLI when --reloc-out is - // requested). The patch offset is within the segment image; the - // reference offset is the in-segment offset of the target. - if (gRecordSites) { - // Only intra-segment refs need cRELOC; cross-bank refs (to - // GS/OS dispatcher etc.) target absolute fixed addresses - // and shouldn't be relocated by the Loader. - uint32_t targetBank = target & 0xFF0000; - uint32_t baseBank = gTextBaseForSites & 0xFF0000; - // See R_W65816_IMM16: skip undefined-weak/absolute targets - // below the text base (no valid intra-segment cRELOC). - if (targetBank == baseBank && target >= gTextBaseForSites) { - Imm24Site s; - s.patchOff = patchAddr - gTextBaseForSites; - s.offsetRef = target - gTextBaseForSites; - s.byteCnt = 3; - s.bitShift = 0; - gImm24Sites.push_back(s); - } - } + // Only intra-segment refs need cRELOC; cross-bank refs (to + // GS/OS dispatcher etc.) target absolute fixed addresses + // and shouldn't be relocated by the Loader. recordCRelocSite + // applies the same gates as R_W65816_IMM16. + recordCRelocSite(patchAddr, target, /*byteCnt=*/3, /*bitShift=*/0); break; case R_W65816_PCREL8: - Signed = static_cast(target) - (static_cast(patchAddr) + 1); - if (Signed < -128 || Signed > 127) { + pcrelDisp = static_cast(target) - (static_cast(patchAddr) + 1); + if (pcrelDisp < -128 || pcrelDisp > 127) { char msg[256]; std::snprintf(msg, sizeof(msg), "R_W65816_PCREL8 to '%s' out of branch range (%lld bytes)", - symName.c_str(), (long long)Signed); + symName.c_str(), (long long)pcrelDisp); die(msg); } - buf[off] = static_cast(Signed & 0xFF); + buf[off] = static_cast(pcrelDisp & 0xFF); break; case R_W65816_PCREL16: - Signed = static_cast(target) - (static_cast(patchAddr) + 2); - if (Signed < -32768 || Signed > 32767) + pcrelDisp = static_cast(target) - (static_cast(patchAddr) + 2); + if (pcrelDisp < -32768 || pcrelDisp > 32767) die("R_W65816_PCREL16 to '" + symName + "' out of BRL range"); - buf[off] = static_cast(Signed & 0xFF); - buf[off + 1] = static_cast((Signed >> 8) & 0xFF); + buf[off] = static_cast(pcrelDisp & 0xFF); + buf[off + 1] = static_cast((pcrelDisp >> 8) & 0xFF); break; case R_W65816_DATA32: // 4-byte LE absolute. Used in DWARF .debug_* sections @@ -554,33 +536,22 @@ static void applyReloc(std::vector &buf, uint32_t off, // patches the low 3 bytes of the 4-byte slot at load time, // leaving the high (pad) byte at 0 (writes the resolved // 24-bit value bank:offset with bitShift=0 == no shift). - if (gRecordSites) { - uint32_t targetBank = target & 0xFF0000; - uint32_t baseBank = gTextBaseForSites & 0xFF0000; - if (targetBank == baseBank && target >= gTextBaseForSites) { - Imm24Site s; - s.patchOff = patchAddr - gTextBaseForSites; - s.offsetRef = target - gTextBaseForSites; - s.byteCnt = 3; - s.bitShift = 0; - gImm24Sites.push_back(s); - } - } + recordCRelocSite(patchAddr, target, /*byteCnt=*/3, /*bitShift=*/0); break; case R_W65816_PCREL32: // 4-byte signed PC-relative. PCREL displacements have the // PC pointing past the slot — the convention used by every // other PCREL reloc in this file (PCREL8 adds 1, PCREL16 // adds 2), so PCREL32 adds 4. - Signed = static_cast(target) - (static_cast(patchAddr) + 4); + pcrelDisp = static_cast(target) - (static_cast(patchAddr) + 4); // No range check: 32-bit signed displacement covers the // full address space. In practice this fires for DWARF // intra-section diffs where target and patchAddr live in - // the same section, so Signed is small. - buf[off] = static_cast(Signed & 0xFF); - buf[off + 1] = static_cast((Signed >> 8) & 0xFF); - buf[off + 2] = static_cast((Signed >> 16) & 0xFF); - buf[off + 3] = static_cast((Signed >> 24) & 0xFF); + // the same section, so pcrelDisp is small. + buf[off] = static_cast(pcrelDisp & 0xFF); + buf[off + 1] = static_cast((pcrelDisp >> 8) & 0xFF); + buf[off + 2] = static_cast((pcrelDisp >> 16) & 0xFF); + buf[off + 3] = static_cast((pcrelDisp >> 24) & 0xFF); break; default: { char msg[128]; @@ -1106,11 +1077,6 @@ struct Linker { curRem -= seg; if (curRem == 0) { segIdx++; break; } curBase += seg; // advance within bank or to next - if ((curBase & 0xFFFFu) == 0) { - // Crossed bank boundary — already at start of next bank. - } else if ((curBase & 0xFF0000u) != ((curBase - 1) & 0xFF0000u)) { - // Just crossed into next bank. - } } // Zero out any unused segment slots so crt0 sees size=0. for (uint32_t i = segIdx; i < 4; i++) { @@ -1709,13 +1675,9 @@ int main(int argc, char **argv) { if (++i >= argc) usage(argv[0]); relocOutPath = argv[i++]; } else if (a == "--gc-sections") { - // Drop sections not reachable from __start / main / - // init_array. Requires `-ffunction-sections` (so each - // function is in its own section). Significantly shrinks - // text for programs that link the whole runtime but only - // use a fraction of it. ON by default; --no-gc-sections - // disables. - linker.gcSections = true; + // GC of unreachable sections is on by default; --gc-sections + // is accepted as a no-op alias for clarity. Use + // --no-gc-sections to disable. i++; } else if (a == "--no-gc-sections") { linker.gcSections = false; diff --git a/src/link816/omfEmit b/src/link816/omfEmit new file mode 100755 index 0000000000000000000000000000000000000000..360dacd10b0d8eb936a1211537e49cd2c4fd5997 GIT binary patch literal 71512 zcmeFadwf*I`9Ho30V0AYDq6H&R=Q|XE+$BjRlqDHa8@=Dxmc`X2+0N_AxV=B;FZ8; zX-?O*Xr-UtY|-AV)rwURt-)|HQEL@xMbrv-Da}XD7C-H$%}>(E|HoVWOnqh(8GiONwcTQ?UFtJmHFMcd_QBume}&8boBoQd zb`5_~cv7DabE5FGpLT!OpqfUu}WyQjAV=t{N z8&z3RRlj1?it*z{jT<|})6zni#YR`329>w*v22J{&iW5N ze$)DwZ`pfG`OB|-W5M6n7GLsc&KlMs-E6};{1Klu<%zr>Db^uBGVZ^BL56E)AHwqS z|1|u+GxYM(F5dF-RIIknI9zhe9+vGWSsUfTJ~^J+HNMuXqfK0o>@s`dCg|5TUj zj2=qb2KEe}0s?!gAMFJ`WkApMFTh}W!oy|I6Z{Yu&=dSKOh`}g+FttoZ7<_G)(gD3 z7kW!tqx^n(A1Uh41Yh5r7%^t-VadE4Df{eSkt&xd=FtIK<# z|3|&hnchqN`+KQR1=iDi?dnAiqrJ@g)L!VnuNOYQ(F>hV^-{mS7k;{Xp=V7m`eAM_ z{Zj9G@PF#>_Fm|j*$Y1dz3?-$mw5-7J@z}Y7x>s-=Ec*?c%STL-5u@ia9pFD5UY%#Z0)UP3cOB-D^*2~}JQlnj z<0X87g{Q%SAH%qY!CEf(sRE!o`P&9Qu>O9levt)t6&EjBT3uCK7cQv{7Ztd;yRf9LqO`c8ye?cDDp?vR1uUm@ zaY=1)xVEGsTo))UD43E{Fl#z0UtU~Xx?)96PR-EI{MSiUZYk z#WQO{RZFYO&_8<_S6N+JQW?t6DOd);?C_ZE;^Oe)+Un)Sm7%Id;l;(F+S=;cqUoio zdawPQM4IZ>S6os&LSm1s`v29D`7HKTgHb(jF1NHYR8mW}jFD^sYr-XA za%Il70RAV;SECtU0j8p_Q2rRn!x_{6&2;p_rku;mE2_$h%PVT@!o^k9;o|CY$uBaj ztg>hspL6DXEwaAuOkG|ZsteUF3l&(C^mW@q5@*gH3yJ^kf5U)rb@dA&vjt1pJEYK8 zJZmAuJQOOfr~+Bx;u5f@Hngy$vZSgsw6F-Q7@w$81!|YpwT&;UE?!CYS(lbj@pStKjV5X%) z{+%-FP5QH;C2B)8l_jMRzonLjm zBCd+M>e4Znp#=oT z*B}eV(te166C>-mU8mt+rrr3DDTU!wK>Awv-AFNR=XOoRUdrBY7F~QZa;LfG;$QyU zi0X~D%4?9X>HHt;j0U;pTd)$DALy#I@;ATwC{pRJTdn-xA6ZO_Bh~YI^g>q@W~FiaKOho;D;UXD;@Ao2YiA9p0QtLuq}0salq|; zF(xY!TW7Jo&n0{s0oKpn&k`Q6fq2^cZo;QH;PyV6@Tm^Cz3(Qxzya@kTV=S9CasCz zUX>yof|&TFJK&US`W9y#y4RyfTxBc@t;G8r2m+64BZ~K?+fZMXdvhfZ$1TOK* zbHEXLNc?mM+!9S$R^)&WO4N3_<~iV40EyoM2mG`I)a6?2fNKtTjRQ^{VEr{-6Us)B)e%fS==lZ+5`Xb--I3 z@NYWc&pY7u86S&xI^fpPnryYl0l&b3XTJk}p#v@)aE}9i*a5%D0q=CchdJObYh6dJr4LK4tS;mp6P&RJK$Lk_;?5W+YWf113tn5*Bx-i z8tq?^13t z0l(Z)|0fRkcn5sF1MYLcA9TR4aKJY>;1eD2%?|h^2fW1rzsdoB-T}{Z!2iEaeyUyY zg&z4ZU61v9=UJCakG6#SbnVn5JJPpHQo3@t;mE6N_zwK5U6|)${lzRX-|gz^Dqkz} zG#KWtWS)k?+?LGKK$w3{=4lwr2aFbnYGD06~DPKnWw@xXC?Dg z^ycJbo(kR^lgv}Go5PZMDs=OVWS)xL?32t>ft#P4NQ{??+k7{fr@}UOCG%9&=C)*> z4u|>YWS$D!d?1;pqBieI=Bc2~)yX^+vss(WQz4rRlX)s)b5=4>1#C`E=BaqiG08j? zt~o53KcD$ClKBgm?~}}5$owZ?CC2Yz{@rAriqzbd%u|7y+md-IPV>*nJQb$-Kr&B7 zY2K5}Q$d=mlX)sevo@KhLNpg9^HhZ9tYn@F(43siQ}LN&l6fjTb67G@MQ5Io%u~Uc zeUf=9HuIC?iSbjRneQg^RAlC^WS)+mxh4F|L{L3l% z)|C8~l>DZY{GU?tze~ygG9~}xl>A*O`86r|J5ut?Qu0+P`SO(fjVbwSQ}Q!X@>5gt zlTz~IQu3oy@)xJ%FG$IsosvH-CEq_Ke`0g@`8}GFKa`R`kdoho-Jnv&m= zlHZh)|5Hl-cPaT_rsRK|lD{h@za}MrM@oKKO1>&3U!Ib`F(rR(N`6L4erig7Qc8YY zN`7=o{^FGU1&O?F>=WamnRTN>)WAXoje{c7%6s^X<$LFq=_AsHQ%TmJ!TP#Y&xP{# z-Qo{WY&x2K%qObr$?6_fKSx&Y5U-w89^Nl1W#fI~2|nYud9%esvf6Flbg|xQup)`@ z4AS?!q;H>SKzaK=L>3+*4cbW*qrm+2ESHP5sLwg7?@BDUc{9cOFG*mdcLn^3B+7Ks zSdW%sKXLp;Rw{7mR~Dp)({!T%u1so^Vo`4)O&5PeDiE70^w?P~e`e2NZ;fuuNvp|e z(T#lX3O#n4cc>okciCg`hKxpU4V)V>wMR+@lahAe6W5ZGYw(YBr;P0)D*LU%@(JoGvjV5?SA0$Z47t8qU5TD>l znli--Nhjvdf(#YqY>zUJcuB%i`#g&*T7^EXPR6m@XT-iQ5)dyTKe39x!OV}W%s^z! zI&fV2syZe|078TfpMz;{vG}DWiq^dnVTahwC+Nu#=b;W4iARsE$Pjz|Y6hO~CXq!% z_8#zX_AzAkaj%8xP771ISdCQsL*i5*+@gcwY_wNmZ}fJ<7w#weqpsL0n>O!Zx`63; z^e{RJ#y4h2PDBqYV2#QNnQLB)oH(25STjLlpn?|>enR%wyqWM%kfv09pkPF%=!rK| zki;$tFL*vV^OYp|OQMS@svcEMH?ZlM_3$~ald9vO_%RENeL>?>i(1i0h#G)y^h9~A zc|XzIfO=*wS4tnGC_h)TrBymfr6o)L%yhijLt1|v=Daj-VET$YneHUEI|z?Q9UL;> zfac($`m{5N_Ml2jwAZUHo@eu`Wb@`cqMbcaqT&o#12FHRUb9&78wa|ds5CiRhyfTX zyOS7iM;c5oG){X@x+q!Q?)VnKyLX&2L&`2G!kcU5AmnU3hBV3h1D@t-M65WK#g6(@ zrN*L%xfWvgkr}ONLy=4y&dr*3D#|3Vv7ZYBI|~$&_rVTx>&euk1UBQJI4 zT;2GW|2zI`{d4_u{KoOw`l=6gY}s{VBU!4)bAR4M0|8#s=YR=*9=0?d-H)ZxD*pv6 zN}U3rOY%X9T|DYhxeQ54G+XA{T4g1d-WJ`1Ho(YaXEI6iMrf;SPTZTxn{DG!F&aVr8B8fv^Z{YK^W(C$7K1P`ZNg<^__IQ7&6lSP#Z@I@#ulydy9mX^r?~yT9ok*bVjFEOyU6xFSI5HH z>EH$xs3p5r-&h8K*O1djRG@G1du*5!{7_u*b%is4Oma3};}*Zc=<|K=X?OmW%Olb( zg+gomDYo`8$?GVn;U(OOWU{vA*l@(so8=^E(HsOAR>=&fK>PkbIFpfPIsIDn9Y`^9 za?aJnf|tRe5-257Yy1QVP(=!**7!bh4b5^|wZ_+36mNEMKCDs2H%j3*q>i@7_-ky& zE#c7P_bIcoS2x;p<32T|+fj1^x$(x0s2STxA(zVMw+{lhcpSDV=-aO~J_B-Nd3mw> zC{VsV+UmumKejHN6pUC$LX54tv0FFRk-5g3y89E|_o^1jL%pm$7{)v)(?*-t=tJZP zM72n{v)zn6XoVT)18aTK8a3my5ww7YBZWcZknoZmql2!`Pd}CwJyykUcdHmQBqH%y zoor5|0pmFlLYp9_v++*ErefFU75e_AHLgI~A1lae%Naz66ytUW7hCwL(TvQ78x zuIra|*l#@7(7YZsTw3D;7=V9Hz7hey@kYM!g?}y@bfVaD7W8!N=pKj_7Qhb?Izn>~c^SJeQ$A#MJ?m55cvHN6xU0+R94ybE49BD% z(;EANlwiEyKO%e#uE66dc#W=W_1MJUKjd<4VP^I$J$}~Rt3b$iigH@I3U+l!Omg_2G96Q{;6bEm zjP@xQ#yxEI8n!-9w$!2=Fpd!CGkVRCl@08S{4CiPIxE_;=p zPwv5XU_o$g#+05llpcz$BT7pDZb_pmjICpB^KKN8ACb=fS~ORUkOkz%Lg>-i7H6tT zwCKaATfS;V+R~6F4T;Sdo;W2LgBj(_^{z-$I8XxpYd#~%$a9hLHT7XFS_Q$j@xO<_ zZxVkh<5zts8Bdjmt|~&2 z%T77Xws^~lv?u5#aNc|!=q-CeHr9cS_dABU>B2;hdVC|58^jFW3zt0rI{~;lmJjP> zY@`zB10eK3wSyB8t#P}Yzh>seP3^Lb1{a!3+1%4`qM&T z=<_ls6$ovx*n%mtx`{Q*PGeD7)z>D?oor2Ww-KGT$|ARE(wVpHlNNpabarjouEy)+ zrbsGqjI0LY@^m`%jI=9KyI@otw%7qaAC2x;TCV9L5^bsdM_eGXAm@dGoLxafIwQgO zZ@c8`%M(!Fhmi~#@lFZ(MZX7u0v;0C!DA9V`j!?Q0NJ*@@N%uAr68vTE6Q@t2o}0r@5Y;3Ze{yHvVEIs&zE!bm^HMGlAQQF_tO}S ztNje6M<(2^H6j=XmIJ@y}99@gfZWMO~ZrD%=TiF|5I}RfqssPAdkoADo94 zUkfzle&Z_83^Ce(oWFHipQVtP8l=HUc)}~JRM6PifF#PAEJ$mV?Sb{T>s7&ni2|ac z&rW}~FVM6`O9>Ivl~pW423|h?4W_z|6@mgR-Aap2f7<}QCji-?F}0A=A83u1Nu)=| zaqeUjBO2}r>5!9<>YH(my!K{X;|v%BEY$wCY3b#-iDz{It${8WQgw+RIlCZ7PNMs< zgXHKVc__041z5@Er{JZ7M;~BuW}~nz$|@C0OBS_8^N|XnxAAzgR%^5XDVKO{j2w_; zcPvh3K{Uxq-RKNLG8!)iZRNUFwM{!)uBV{ynAUh;pcLV?vR<5xwbSKX$@u1OmRfu@ zQJxVog9Jfn! zDL(Lr~m4G zl&`Za(k!K3YxH{1f23K8x)$9C?GuxN9O*noTXPjOMx?V;TQd@gi3cJ1+L}FJ{6r0h zq}rNq18QvB!l|D4E4%}@?8=E(P1aB_@oc!961?)qKSBNvY}2*>NIz}Oi=4MrUv$0j zvny5RX$;X7uXP*B7R9647%Y7vY)<6n&=NGU5_Buu7TZV(vu06zf4-b;dPh0Ca*p~p zbAjsd!7two^>i+#G_m|Lg7G})NIgC1+lIZ9lpyIp>9L{=>{u2^tVJ-*QaXc%t2Hkp zIj5>>TOMs3W(AY_h1NJ7SU%ONrDl*5`kb85xSG&)sS~=ZW!XlZCJqjMofkCnh5|bUN;itKQI|Zhbt^?aED>82 zPGt?ntf_ek#Hkt>NXP4umZDZ!l~D{iTAD>=Vx?AsO?n&W0Ng6wL!Q8hhfn7aVCowt z^qH?UK1cs0Cc~OR-$A+hW#3%=&%TQ$v1T>?W3o&Rm}S3KWf>e)OnGXS)8rwDRT@~u zGXAUk$`Gh@Dd3ur)_~Ef`?hP1PvClnMRO3lx~*TPiQNjh^pwthhdwC%7=82KUQ$0@ zdvp)n#2QX)yt#qM;>$AVddR7^TnwqU60r_zTaRs{q*lw4mBr7;vUh3!3v%AaCO1E} zq7#_tAfW073XP3dHf;Cu9*#w@ZXjkL!<_{=c34H|NtC?WgFP_p#ZDqp`*r_Qt|r_% z)t<~d9MeD3<5N60xo&b5i4BMt2Qd}wITnAAkH`)W+Afw5>HZ>!B*tg4-bM}^lsDml z{ZugPaKQasVYDl(p~VIQvrdSw_ET*h4W9wnT3PW}Fzcu|0I(%lfyfRwoT;J!LPo1E zhGSwgEM;5%cT7oQTmIw_GHs*ecI-Zq2fC#@FSq54?@z(BT@#GOGazY&hEUsW3`;Ja z_*qXNY>FslY~nY64xMxWmFJ<=cf}1zflboW&$|J=#8voz&Tyu?h0A}2z2RE9J^1El zvJqEy`?Z$;XVpzGp47>Gr6NG?Gwq>TVXqDB8wx_MQ^#g=r(MC?U+s4}FZ`{XW5 zYurI46(5TgyzxOwkgru+eJKvLa4wB_(2IZOc(Zk%RQ}MKz!NZDBI5m^0H{v&BQ4NdhA};z za3FgLaZ9YI0M0K#r|yn?o~}i%hC^|5Ynm247cO4(1+9^%Z3q)j@a8p5I=ZzF91sAE z7yR0jw+%W`Zrpa-33WG4?&l@F!T2<{Khgq+9n^6p#a#< zMZUmjnXEk-be|}nYy|tCkVxc_&05{q6O2h-Lo1W{SgSNE+*+&Y0Tlc30+%49A)8z4@Zp7C^EG?&wVkhTZdiMdN z%^b-smC?<;J*mzx9FSQ2m;hOrWV})s%b$P^QvM`x`qs=sW4|6b*omWm_s4;(!;yD7 z$@|qWqTgUlS^|J_jv9v}t(dGKkdh$-d@tAHN`NdhdE#Wnr+h%4Z?pwtK_Bo3C4SvP zu4jFm?{1A8=!~2I_6-i~2RgAr^NkjNqy;X-5bQ;V4DkC7*M4aApr#;#7J777_*;79 z3%Azze1CdTVDSl&_7+OimH;d!zuA^{^6jAFWYU5fN{7}l*vIh+#sax+; zihkpRte&-3kDTbMt@$hb{MebnB_9m(8@mhRp|pVSGc8&oeW}6o?q$1vC46l8<3sAF zXFadK+6k=>r)Q_O`Zt)W=q|01{DqE5yE>jW0tP~W3+bv#bbeWjackBM4D%BsgPT7WbY4YCzC^%)CvD1(mFaMPN6S1SiSxp1f zy+2-XVuD-y{syLxwsvUIMobBYTuySX?6(LHE$V~f(POz+6NdOv)+exG(nm=0#8^j_ zqrdtE__AByGFSc1(tV$3jZZ^8LhZ;FbiV&d)g4d6^%ohQl3e@>u$&j@14PS|3r&35 zlTs&TP_7-xdfAKg(cr5mxcUe?!e&wYf;3?a_u^@`V#^bZ z_Zz<$XFt?qh2Yw8?3uB-=X7h0ACpb--_mBqq<RMl=2K zTWyjA9J!cbJ~K$$UfEDHVcK+^8K40W6U(C*X%fXOv~ z_Z~R-mRW-bWP3dZ)_x|Gp^WRoK_utxFm2s*^dbR18QKk3Yn$NQT7fGW*$&3b-AzN_ zrmBE&?M4Q6<8mh~8MuxMpJ85Dl%5LMv)1@q#2LYM)s>36M`PfW-964sz+1^EwF`|; z2EDSPD0&TTacYfzxJhTlfU!esY>@30PB>e*Y==1THaUO0*qcIWh2~sb0Nh;LAM7SZ4k|Ib8z~_V!wxwcm|kj`0(SK?KqK{Cq5HF-kvoy4 z#_&n3?rsHO0#fFehz-CrOYHGIs0??JmQef&V}$#|G!_I)bmL_jet4h5KtTHgmMSWE z72~J@D=+|tWD)oHM+oX*{tOkSzk+5qQM8RklT}gU0vtNnXJhv$O(bU*q5*vJq)M7V z$Vv^EY#;wgt*4IkZN)3&b0m@{>W{-i;6cIl*ec^`$V(C8E7JMkUXG)iqO*>j)aKLt zCOY=VCU}h(j7k33ZG(&cUcp%!^?krAujEm`J@sF|!U?)FR81v0Aktu2hj2$g~zY$_eIS z&gyA0F0Bq`q+yGW2gLlbv~%`lltFM$3UZFBb30eQA$Rb2Q2DpYU5Cqj@NIV9?Pz^9 zc2dxhoNPTl@p0b%u*9+W6;_stB58O{-W1d~CwKC#Fx!x2IpJoO99%d2!xVl>S3+{s zCdxixwFF)ciLITdReKF?8vhSU;}wx9P3Ue_su zpI}i;E#+wTiRS_wV{XvO9)JkI4l0?(vOFmAUp|R1H-Vk#l9faV<`36qYLEt+`4*aI z&tQ#igUwZqSW#rNQTO7rS!~@T_;>eeEsYCaymgvvAxVwr4lYJ6vF^mAlL>f2dZvo% zN5MynJ7CliYXIcX#%VT(PLvSiKJjbt3`6U?P-S;u>brAj*xyd#knZSf3i`qdQFL0u zl>0IR^M;C)Suzzn9iZ+^aXZi)=Wx3hze`W|Yw_&&gW1BKt$J&Or=Q&O_lY{zEN5%= z$vrRr+C7hwJ*RMC9^-o5xez^BG`^IF!D`};_Jk&IGMY&zqj|!C=24=F-6vN^EIw2& z@Z62^dY8I8gn4h5UFANqfHdwCFL9P(ny@q}>qZ+Q6ylPSiS{%OoFt=q0-898)^qZUrXfqebFS7`%6*T zCoVWyrtg)#DUoL!GASP`lzgmX+nBWd6o|jFPCPemVSJdP&|N-S#ef%1l8;f2ZZai` zlts~g9q@Kv6g4Ny(E;h8s71j~If5ycCj9kx*`6na^gt}bBINq(E(X7Lp!lVOOR@NP z4mh?hTaEiEsjK$cnEo!S=Kfhh#S;)>9EQuz4Oom0amMo}4SKwztIOEFJ0<6^_-oCo zJb<&74(8(02@*jDRwhDNYV>qY#MvmomS1lAwq#tw%(3AaiaY;xa)LY2Wru#e{gs*Ncl8|LK!H;4H(hh+SSGQ!(x=% ze$sd#wl@j!W;znmjg;?$*#6_m%TQ!Py_zxLLD!M3eZk%haw?)Nco;A9J`gT=MhO&+ zZQQbMW;x;yc;p+OV5)Yqtm!HmvkyRF>vxzsCU*NJ!2>}sv&S%1xF&s=tvPw%p4_a6 z85j&^B%7*E?WYinRD2~HsQZ>ok*HpTxbc|hVWP4eYl%t>R5+AmyqRcEsT8BxxVsPx zVL-z2Dl}<+x~=>pWaZbAk*A@dBm?`HPtzkJFWu&&NT&4sGljZFav{~_3RBN@r}QRS ze`4-D%zD4)h|b<>JbNub+oaX*M!b>(DKp|SX&|~4pOYLLY{S)a&||k(bCTYn+zh?5 zLrx9_#|j$R3Qk>lOf=5@93+BiYGnHWXM1}g50s`TcRECa`i%11-LVO+A`t zB3I(~6-oI}2GqKBiW21Firhi*?)KDf2B8~P5ChOw ztUig14>6{bmrY`zsZ;ov)IJW5*Z)yR1Sg*RAf}6NNKDiavJ+n@wpwcR6*&r9jf#&> zp0!dL)>JC^Mrd{Gl|%DYD1AG9J+vd!*bmBv)OWu-S=d45Jl$#`e{ ztaEVT6RYnEKHBXG*k z7tO)J8e;zOiSK}*uCso0i_7&{vIXE<0ny|Ap60{XiEvu1-yKXNw026@e(M|=cjX(b z?(dfMbAP}GUKKVD>st?9g(LdDy1P|>^+b3G5ENPn(!2Jn`vrD;j;~?jd=$ItN6ghD z6TiS6D=5}~g`v$RUJSVSycsI3479fP%SGNT@98_+o6AvX(Ni=}kDt{Mu-;FAmg2Wz zjL!o`C(v~Ow|E*HdKMgWiS^HRb>ZExBMdw1c!>v9MQ{tX>g;sHY|##P^Y`H#?fRUf z^UY!8>{|aE9K-~S&(Q8GwEH_6WjJ0P%lD?EPh*G|@gZ&bWIc8j14Z>Y+NS=DH=Kt6 zAzrFB=BKx&@m&K{PrjZy8GUA4Ki_|Yd=CLZrq|og#rg7qoL%Bt^sVDU0$mB7uJh)B z0pH+&I8V~6@V5U>#E~se&Sv>q$&V#Psr&#l-dzC}JOUOx3p0xQn`pUK$%`Hvz{$qE zEk`$PdRD7>1alzeak~RM@A3Gr4H&!qa|6Z^{~R3X%MdQ^XRk)ztK;F}F7Yu*-;Al_?<#%7lyu!EYOmCJ5%dsEYr`|C6>wpq_Tr%Nt`$fx zW(8vpdB#^bwnKR1GclIm6CR(qis_#tt+dT^=E`TgaYxcWCuf%;I`ZKHOjozLoJT7c z_NME&6(L6=ZvP7~$1c-ja{%>y627?M3RnF_`b3PtwY-*Ke!O6ae37tzJ|=X5oKQVp zi3=35=@2PKk-nNirr>?d6QncO1v;5s4~F1i4c8>-2_6(K4g)gc_#VPQzSW&AE=EIf zzZ_*Z#G5U|W*1NUwE>#9IH{ zV7xlRKLPe(-3U7w2((-3dd zY>42Lbbo8U+yHi4Fi;mehN{WOl2ICug8Op27fQy;bNx`5&ZIyZedWCjb~PnK?gk)h z2gK_n0P89k8_|BZ2w2>4EX{UE(+5y0;y%>uK3+)uaSS$AjcW=Q>b|$bldXFXw`N1) z^K@M58;moPJbC8uC)v@hxM?@VW0!bj$-OMWT^V4g-yT5h8fSOm(*v=vSE>XkR`YwP zBBiMtzrPbOj;U8$Z7~KR)6(@Tr)7kPX`9Y&%FrXz(s4y?N4mXE{KeZ7{0YV`$IUl# z-d>3-b>DMZ<3pUG=ttq@0q{-!R_gINqwvb!2SIJ>2Ra{<@j69^c=0KD41?L?3CKt^ zx?{dv!$$-0%8|G=a8O(#Ia-&2t8?5wJ%|=T<6DL94|QWndeA5ZiQTBf`Mp$1{${C~ z7I*8xE79^&-e2^X#M%B3rf2VTOwXS%2nTm~4`9w*OTqV`sJYk%1Z(wRM8$tOJIE3lOgDi{~Dl^*gKkT19pDj3YUl2UB_0D zv9*0@Q$D~WlC^F?_Pl>f!!J)7FU4lILlSPq<*c{G0n8vQ4@9j12YA=xE|-)_WOx!b z6j-cJI&_Frk-{1nVz*Z==)e(y^x)d>TF2ZQd9gJ4;t z3pj2h4%WfvISz3r=tS=v@`h3lNR1$(^6);}s$~vmYzvt!`3oK9buTU^> z1QH)9$oJ~7r|>I0%4GnTGs$HLWf0dnJ+1lPY?p{{L{E@8`MXN@p_0}(4+1Wa6XARO zPM1x@5Rl=hI06-QzKqSk(b4wrf*LcT14TlJ@r(Mnk@A)&Y) z3jH$H%aGnNF_eQeP8pFMz;@^_swy!ACT~Qn4a6tcZq*I z#;{|(V6Ak213<&Vv!Na{^tJ*iq;$7i3wTv{w-Dq42+|Y2yB3|m@kDma z(+%NRWmpgKQumf&a$b(XaTPa^e7;5Sqd;s%y3Y641qgdbTaDXc;ND#5%y(cBbYre7 z5E~bCAAqvKYiiHy@gb*)J3hrB?M;}>g#FfW*Qt2>~`uA|%1!UJPIzcK#V zG+;cstxxzgT$LZ>D=#nx;f;s3K9P6bVaV$cpTB9yc+QL2kd$k`@ha}K;Bu$&N@Qz# zFqSr{&`7%~-}gf8p~85Yrn^6G|9SVZ*XKi&&&E;}+2CnPStiQ9K+sYQIk{{Qtv1U# zH^7_y`^gpB!7#{^54chFfjmZ?L=4Qgz0#v#rkx)6L^6E>aBH1%J#{7hqfhbek0vsj z%#WRo=xVnmwSo>mCD!1#r)2yug8R*`@M11<2=T$`VsX!t25vs;D!a>Cr=MJl zj<~jF7E$7VqWr^Th4TGBadB6A^OB3Z5-ZeN-BogR2MJKiI}gj7iJUAa(SDqG!a8=&#>fy>jlnhcoFo#pl=w@NA~ZVFfVLsn?8?pWJkJAS+=*~ z^96V0G<;ru$3KF;PwKDAd5bjTs(y#oc($eTIcT?d2P`oA%B7R%ikSX}&-*QZ$--a1 z-s&#^k233%>G83bgT;kehvi)2WwC%ZZ71J6eYk_;odc%CCT3J5W$QXWCb(-jm`#ou z+rcKSu@)X004|8s19?&p)Q`gIuOEu_UVn;Q=gUc~weZ_N>aG>q z_av1@ds|OxAb+BZdf=nNc>ZAA=lr<+XWa$6o(p<#9_YRV+!uX7UP=XSqdvspis+9D zDP^yqY$wY;cvvlD#%#Zf)x!nO^i)SW=twVgq_0If#s9ay`(s&Ax&9%_j#aNdwntm~ z8~L$3nyebQkU{8xOitHOaqy3HhH~{%Uf#rQ#CX5gN_dN~qov%q*INX9xWY9CrP}F@ zURqZ^xe)CS@W~JMwKaMfspBvC)joPh2*kkA#5etdZYc6LF^}Ad_d91HD#jo7HlUQQ zC5Y<4{hmx4Y0#*Il3D_GNyqu1@JOy+#*4pn1B3@fl&u;)b_;OBt;g#P5s;8`RQv;V zk2ZSOVxi(QA@`A12v&^@HOBG;tRSuea3Bae#eR{-Pv~)1aETc~3j3;prjfASbSn$6 zxs)%6ze75$>xHsNLf<8p3wfy;yMj=9@XAm|{kDYGS{Xz~d9x}iO z7s$TIjn~n%MutB1SUntr6XsM&g*1r$J}>5^!B9!za7qS*2ZlmQUjtBy0e=97_%rgu zi{jg41HKQ*)_`qB1xl2xcnEZ;k4wVpDItxWb=?Biz!xUb>TP+~Kz&s*A-dw=gQQR` zB!`{*B~io9QMkIfMUaQsa!=&tG*JlVn|vYtq~`UDp%6~NIkb3#*kNj5Ob|eeoe3-C z=fO23{El`O<>p!0Vql}ugoTj-v}b#ws~&Mps8sRp2T)S;!5J}+|IEfe%9m&}F93^d zzv3cC`g}+FoJ6|KygZTKZeHt1&vB&7kY+sMJ6vMYAz{c=sijpPc7j!~iqN3H@HQYF zMA*@TyP0rryQwOHvYeJyHC@sb$OZ(_e|R3^@JAf}Gv4)pfGgv##QVSb-U|lOhL(Lu zr?I2uS$XyjAwx)5waXLfVAB?lhmDE`vQpp~kM#>09@zq~OGq)xqwNNJ=XiBae~pb~ zf!O|gyvT1Op+qhyH*`$bo!$pgi7lYNFr+!&#d`FGuwReeD+$#$jmc}u-t16j5b(!AymR4aW`0~$DZ)g zZ_}@QLiT`ACMef7fz3_*^+=;EM9hl_*)UC9k5t)M?#c#X zOYa3l{PD#&A1i^@rKN9Sy?AbPED;|R1ttfc`e|AOZl94g#YRNXL^gW2y^*i5W|WF} z&O9xlI-WBJInwVt>Ku2ZhoX)hpLXa2#>`iJxY!MfVxvjSdL*%4&Xc!bls{+M1b zQ$)u&K~oerGBD=@q*og7y{1JkMw%*$jrln~ED@~gRx}sNsg|JVM^g>Bw)Y^v)B@1ecKDVbOIUuBps>omfbi_T&=W^A3Bz z3n;68S4>YN|ZXB=FO%yu)NQdiSFqe z^7|}++^8Toy2LyGQgw?U>)$IaRV}VP|vmhl3QiAW1Tabkcvd}HYT98r& zDRqfq7Nkr;%3LA?5O4=CmxsL}K(Wk1;>-7)-QzxCO8zb6u|7PgFJ@X?!x|&;gM~_X#Zq(n>6Gm^IbQjo955ID6Ad)t%@)TB<=PVXzy$U4NPh(Ss? zR6USvKn)>Cp;fj=HL;hBcqvg9gjll?96_;3LZ4AI;P@hg+@!ei2*bFK=+vj)dA1=7 z_Y*G)E3HDQVtmK=F_uQ9XBd*1VU?1sMAH_lX^vI6)>*jTDjXsUvGY$fYp_bb_yx&P zX4J8{`I6LP;?m)C&}KWLzVu)5EfjuAu1Utv9^yp`9#ESeA30Qy zeCfX9R2geOB#wX_gpRP4#j8f(4avmZ>e%vU2eS@{5E=*3JN85p!^6q|8V}2w)W;m9 zS=1>_0r!j8^B@?j;5e;w5ORtfpi(w`x3q;{r^MX;gbaX&W8dXkWs9EmCf`b1DnF5T zyDSWPGXoSEsxo$17#oS>*|c;Lz-9s8+gjsakAYS=CcZbc)!deIpjzYakiwg2)_D8L z@wNoqI1p88(!3YV%GW}G!5=y<*>UkMdBpMW!?z$S%^r~x8OsH zZ=o+nXR_TGQ9Sp(Q5;T(IKH&u4M^jB8}YLO9`nk*>Ji&R)r|?>*e|4q zN?*MQhf|pJpf`_h=^XVwZ?}!>0t}uvqacN|{nx1XWUtA2K^*@K!;9?jfQ8RsL&9Ml zri@TBpcTc#Z&T18mRd_j=9G4VH=&$ahX;$d>Q6-*-5-zM4HW)W&+WzokU`?w{oHhM z2l3%PcreQ0=o0&oQQ+4SsS&6uKeof#TL8xHkjXOv*JB0g{`lAn^brM^>@Ha5uTTN6 zcm4?%Vf9$QU*RDmS=Ym6PL7Wq5y;xXHmslTJ{oxSxNKEW@%|(DsDWsa*bfl9`gwIQ7U>+Jt=S2TtN>*QXAe#AJ(Q8yuMZMGzzdQrS&_3#$}Ii!ofx?M z$^q98D#Ons$Rqv+c1AvO*TYq9RQ?vwjem*^u|ALnbs{eL3Hb7nJb&HnL6Lst!(Mtk zbvR~L&2WASy+zbbmM>NZ%zETGTr~M=VEqe{AQgMWSjKY$iJeRyc9CMJzj5H@_frTQcWX0?Z6F}1RR4}D zvby_3j6<0#NOy=p%{SCtd`48kBAF*&&|BQ(9kIa zuY{#|8TFUyu{7_=4u;ZMtH3msT8@<2{3WbX{9Yi00THHmj)wHa8)4w4$LqeN-Zt8V5%|cV# zz%7BMveQ!&>F3OQ6Y1^d<&JcLBRxHle$MRrpClg50l(3aE_S3BAT9Ny|2ukoX1e@H zaraZ^Jy?e{cb;x$@OeOK24}+GSNrR>Y3US4y=@vKfK6t4kj_+T7(wnAph7dmt{=%0 zoBhzkII;Oy zmf2HiQXv00!4_Yk6FmkaGfN!J&+wq5?T~(!r9Z`G03#94y>2+x;;&g#UAn0MZlded z9v^_&YPfa0tDb*VMReUw?D(F-;NOB0^%__#4_^g)UTa*ADVByA6}xt7TXvcuK-O|O zE+&BsbXbfAkU++*<&&2rXeWJtr@JYOw5fmRq3U;bu^%>!Z11c4-qco4kmeIus2bO^ zHLz^Jlj;Rn#`BK~xZ!Flzg3(C4DF+=IDkYQpNGxcZGG^4fFJT`Iefl4WT$)DA*RZK zspv0G*=YY%beoKM5Z&O058dC?bg&3=~io2q_49cPt*>d$-653u%73239sz6 z{&^e|n^(iR*-UB4c>x!%Fd-wc@Wd|0<}j2@+Fnl(f9^@19z98W00r7ar0~@tO*pk@ zHw}OUjU$D4PmdpmZbX(I{Ia3ETRE#Rb_Xo{2Z7ifo`A0-piO<9-#r+F;IbI6Xt^+u z)j_q}5b(9#ezrWXTZt*_LiUc$`9^27WyNFodojQWN{-h5DFBZ>#v21ZP!XPv+hsUj z#ixGiJcYhvjt)A+>vzf1drzTpSawjD)fw=$Xz{)1EwHMSecpB|J`RTSOyfo5p4IUW zBaeAv0%)UH#`C~}hvlewbpX@QfI7kWtAJ-bMkT|iTF|qji)CaI-;`%sc6*H#e4_dg9$TVl$!Gjv%U(f`U(WB)P8su8Y2TfHQaU@;vrkbg?{YdcoD+?494m* zD`iJ1jHsWe$Ei^vCh6j*J+Z3(o4h$1&;7-Pm=oho-W!_e!WL4rqqgYJCbqsxoLa_Y z-45~m%Ph_PftCFOCMBVk)WKzc424rkbCHNS)IVs8g}4|Pl-X-Dk3f6aCa}|y{>+g+ z>_{JSq=(&;Y8>(-3l{oK;Acs)Y8#O;^b`bb~(x%vQf46Hy#ef{~z zDx<#t{8MHOY%pHcj~(O}3DfkgADoK1{DAcGE%H*Ie0Qb;Z_?Zua5HH zW|mnQ+^?waceBhWh2+fBs5$fNQ}SH?iE>;pc?x;#@7}~{@z)@BV(@quCV)ib-Q$6+ zqEFC`kDm>>N8kExAG!a+`$F6O2>7{Ma2X?*=Efc_ZEDk?{`ql#SH$cyxhbtbKBLWP zS^ftkgKOh@%#rCuemBWk4ReLF-P-kb7tDB?k!44-~J{Dd-ON zGEWXl-kxe@JRI6-RwkR7)2&P%GefM5%8-trn=2fp)TZp2{$$4H8@(CtNs$SEwAyK{^k#$e3JPA z<}Jue)==Rn*7P&D!0TNm-!a&NHQ+Mmpww|zX#EX;p;jDl+ckH*AN)euM-W*qYA>SC$zMHz6HPh7lPCn@;il;z%O38() zd4dDyQ1lC*Y z2jw>n`bPZPuE?Q2O=;iouj-UbbxYiz9r^r}Wf|@l;(l4TX&N7cc=bJH$>Ml^m0Ygd zMHys-axCR*uAg^N{98Y|P`=D0UuH=^_LipajeH3`{wPKh`4VdU5hPZ9skHcmGOd*O zuaP$PH20Gr`Pu%QRbMK#{X@W-;hCZxRrj)?tG=YxUN#f&%;AIm%VuuIA+syK*d2hu z&G5&=c*V##_%uSA@TtUXwm-k z+Ge6_|8p~!TKgZHVH#ZRziH+IYrn6VtE>ITn@MN;ok{z~HT4gyDMUBri=bj!Bl<8* z-t|MInJ)Tpxum9N;5)_K9`Qmu-u@e-8)xDW6HvUR#=}elFOK3KyW{q{^?G9BeG6GF zev3QOm0ovQ-qE(S`de|t(W-4K%D}5`0Q3oOL~QfPIg{{-(mB2ptl$27$NMYjF2H3y zI|DC+#PZMXLetVYw)k(*?rm{khu=$jW`kkD*i<0@QjeX6@AYDPn_d{d z8gq8})$z1GK_8BU`vP&mH#wv3>|lImM$q@N*0>*C1!FTa0=}(v#fkffR|T_LaGUAO@AU2*N8(QNowEoZJ17nl-O2xC_>PeXDpLWQMw)!oi{W?NW8|_5^Zo)jT z^*^Nnp?MC{%HHvBFs{*%wb>B;945;A%C>tHb0xpEc9`YoY#Asf7 ze1)LovcLyz0Z3gM<{tp;QHs~106zKcXTm4{p9YSu0RvTEn~3hJA3&ED{i)a3PFQa+ z;|7j=r5pQ<#0zYg>)Pw^CKjBgi}b7`u$3(g~XWZ<;j3SJi+)^nQ~@w^Uf2XQ$TchC?a!ERGr!j2G89m;L0 z8y_YI25nPLAUc7*xG_CoRH1`zt_0*Br-c`SS&Bz*sts4?9gsc_ z*zq0~CP2@6O%b>g^C(~Q>k@y%$Z%m5OZx-dYYNhdK}HVa9@GM6-iE`wg1R&qHjbEGsA=iHK$B$qs~jd$i}~@5cD0w%WQ}6bz-~=X+pAj7Mos}1z8{B z6;FT+9>S*UG2Gt9#sgf?<5##P^I-|8-rN(~+@BfQoa-dsPBb5IdN5kqoBknhrc_y0$-=KgV zur3L-;8jorQO^zHW8_);@v(^8&y=FQ5`*c2{NC{p81uV8Y}!!VOGU*$KxyPP9<47X zrg2iGNlYDLnZy(xma{9`qH6)X%dUT+^m8XJ<$Yj%PXa=>7FQXASsx>qAQ@JH1Sj;i zMaJ{(+vK{9(@B`6`R6whw{7|$H0rK{rx<+lS}`-NaSbPkUjl}zz^uq+u<&MU8Nd>xgL^~qhgSG$fQfSCgY=1V z;1yso?p~e1+sGmtEfVt~tcpWaMeWWpAOt$K>Dze13&y^;=k@I9WRulNTD0ZnhtP(> z4d48sVf^Wrec}Q11`7yLLab(mhc9~g4q-&Qxpsy~lCWR+#R@eF(1k$Yp#bBE3XDISK z*SC_h7F)xohzroW>iI|_1lfS5b_j9+LXa3+jap-f8YaB~OTS^fp74H-cNlQd;>UE1 zw+0f_inq_9D_*t6i>ZV1dKl*>R9@&6Z!-%1m4xmSb^#{rT=u7Kd?q_Ia}XI`%kkej z)D_SQ_TqmWa$@{onevn9zejeYZ^sD!@9+O|;Qx;taJ#t0$H$}UN*0B#@C>i>j2czJ zzZC458uZUWW@$-{XKEl=fF%D$rpKS3KMQI3w=F%huPYpAUt-2{)TrwEFx$>7oa(uL z)F}R0M$Zg?VZnUnLMv)&Lv?kP)g@)~ovlz*8>*}>t&{!8s&(O#(j}wnDsBmRCSNzF zV0NOuq;F|SRYiHIF6=3sS}>b5CVL$S#NB!VRiUL7VNZeTJc!Oml`O0bNfI+_L*J{f zs122Q%ByQV3#+TcPP(jtRhHCOl`am|)_Lly%0jiC+E7WQXWHybXU_HzTc|c`wCvO| z2DC)2GEbneaLiaFF*r|U1qd#$tzL>$)spc!<1QhqWf|%3q2s!WvQTMB?UiZ(rLzix zGpBemvsX;b&GJ;&lvbC8>O3XoVUQG@GIPf4IX$7M_JkLQEIK_kC1DJt&QpQEOIk9j zF0^PVX3;Zd?5KrcP;IEZt_KX6SI-V!dUav&(%EzTmDN>?E)9iCM`wAKFAi0CYAQ=g zE2JlS&gORJYItUy~(=NZuh0y!p6eO(A7% zCVIx?j*|jmNl)UDA1W_FJ@l5T#D=&q7vVb3!upEJFjk#s?DWZ=GF7if9D$`Zl@+BG z;mVsmp(;u-MaZ$NJeizrDVsfvKy?eT!xCIhVOWewzX=mkhv8rZln5@1x*q$UQ62W& z9145t>T7DMYr|mF@(N&}f>5h*G%vv=e$W*arNwpOS_sG$p3>?nBegh` zD2MRdQc@E3REA0*F%S+V$DYg_DLYwqKRvfo6GPF0nn;-{PeXsqE@1!XO!j z0A%NkQ~IT(YEj7ZZo|EaRUfXd_EeVCE(%?RcD58HO2O4SDwj+6hm1owR8v35C#=*h z^>)3wkuoQh12}qfB~-F!)Fc|ROjc6`;p%WnWflcg*{!;pmoBWXB!yJ6QX_+5vZrqS zl-b=3uN&S|Pb0Ao@i(3)UNXGy5)Wp1MD~i|EB;;gCy!GpCh#v*yR@PT%B;*)y|lal zHjCDHpsU+Lxe?%= zC@w6f4TC}~uBfUD)rPy3u&NCoc123}b@dCej$L)(>iSBTC2iHv?y^(bP?|l=n#)r4 z3Cy4@RJ$zXSWb&k6Q)Z_l3Jj5I98aWBGe?RK{ZicusAC#9fhGvPs%d)O!T-snGRH0 z;CV$A1X#K?c*3K>zjS8skn@KpT@_iTlo^uBsc?;|X04>_f}C0jNFWZJr9j0bGK zEUR0sity~k73JNkWwTn&xGdK&OIOtmOUhniZP{g)B+D*Iz@!=|sjV%!Ia#{6geu!! z6?LxJ_y&{bl!1dsLg+aE%WEsbod0gij1oz`1Pe#r7Z)$At1ITj+sSZAcOe9$Ds36G z)}#H%&d$!s9`k?NyYle3iX-21JV{vASP&BohGonZLBi}@8jTXO$d<;kEFX%bVB_FC zjb=ugF**ime893q!Hz>fHd-KuQUa!yV_m+DD-U@HfTj_KAJU*|_=kxo@eF0yEFX*fEyZs)&*YETD{pJ3E zzrr8%SC+fWJ>_0FPk(uNd7!+aJXl^Ca0fgAZ@?Gu2g(D1Kt&)JsH|{Tcq+UVz6yUu zc}1Y2q9RyP8FU9dL2u9(^asm>fnY^27_6*>h?OY5609YtMD9vFVV0@3Z{2}6d>Wqy zqv`Yru3IpvS%k2>56}td?!bSiwP9`_a3kOfz$pccZ3LW%Z-XBL-iq&cpLV9xFW~## z;x7CjDLA{Q0AB(;vpb#cn8?`C$#nV?z`;~HeHma2_R$_$f_uOxQSQKPcw$X z-GBptgSV&Cj{|N4906o^q|=ij$1vczfTee()6W93yVL1XJLCqO4LFF?MD74&_aGnO z03a=)+znU-*l-{C0uF9UryoXr1|LqRn^6zSL!KQF)T@xMP6j`Og4sXhWM5LMGyy z!fz4qLzG`8V&0G6;EU-rs>0J0y2o$&t680C8nzeS+Cf#_yBO8c#~jwx$wH4f)0d#%H5ojbwdoZ*-P?lT=l zHN}j|rR@#RA`g9d&Muea^yAr1Jex+(kVsFIEax=L8Z(@r&cPEe@Lm zv_Ff6CGs^|zkRK39hot;3+Zq=?7tc^+UZ+5uWvQg7d)xHeHLqTc7@^a`x;w67zcqX1N5W+{1Mm0bFCH4-cLN&*k?G(R%tbkDr-bY4qXb#>x#Kd zQRLl;ywvVO9<-B`$GT*K(){e=Oe4VdLAoCK+~}i+pe~f}V$#<-+f0XZt$n7Wte^C? zreKC6xT>($QMG7-BM1b90YOH2bBe7O8a_ZflWj2A;4YMbz2H(d2(iXit!$$8V$0K8pPB7wSyi~EpkG^SUuV16${JAYv(Wu&_`N#FV80eUa#a6y zx%!uK3|MT}TTm2a%WHyaWxr+MwGjQDB5QjRJRJj;?1H}m{D2+q7=E#n>K-CJ$v)j@ zwb`w(l^X5?c%PUH67tu(u;+tja_z@~Cv+?AgzJmj$*8_+94*#_p+ahl8S&c!elNkF zzXxdlBb@MPy^ZW{j$_zjy#NfW$5cC1>;%6fQJ+DqT(2hi9tS^U$gVT>z10{q%Ha3T zAUm&j?6%nIp-Iv2rvudj9`TpbX_~Xfp#;2+YpE~v+h%9_9~s^T^gmc(rvJ?#TNx{Vw3917Gc-LCXg1jQqbwTp>#3a@GG*n}kNdkCM}zeSLuRmJ z(${6s=Z!x{+(r7LlOMUh;IpOwcQ4_jAkfHMhCmV|8;zU`w7W1cdRVb zj?EZ8cOr&M<68J5BRdU||E6}_h7!kmP1K%v4T9Ht*u)C53GUmBcuAXRv2GE(5KUwc zf-{TRxyZf?ytW~hY$sdhyhiO|jGr{OFY9M01nwr!&DR!N(ZPcVsPZ1b3yD5lhVn8}a(gaSRmL9<_t#*kK)3E@G;4!SiFpq8o8O zY`wt6Y`zkgGY1e_yM8^URwp3>Wrdh5Z+aCLYkUwB@AlPTO6`F12Vc6tWjA+Ll6g zvF%dpYY z`n8{0w5t>B1G~1hoxMBUI0ki2u@45tg>m(}d!*cD3IQwgW z_+K(zyFSidovuNwYfjPD#MsCGLf1b#ReLYS-aqx2tudCaIvkk4%>1+EvnabVL>X?L zMWmZ*iQRCmhW}Cgc3lMo?XM@MtLGB_xw+ci?d(r;wHw;m<@2QE4cd+v zd*s`MdHvf&^TS5%^%%Rek*>EiYVWnP-HjS5oNm-Uim-c|v^OJcdlQwlvxzwTs9Afa zjs3it=s#~BCaphx8a>!`ns$8~8(65JG8-0ZueY<`F4T6kv%fAR(yJF~7qzoT7LkTt zUZlN<_BoxduQ*+Mzl}Y4y7opJTf3OVTECbIyKk}fNR++xUBYZWLwhI6o;^c*HOdB- zXg5dMh9%n7QTFr_ZC#Y@TB4mFW%qoKC_nxlm36s31+Da?u00oJ7q-j=exyZvBEr^& zwFe_?Q&@xX{3@)y5@8>Nwe1o1U@PVQLo3x{Pb-yvZA7~(Lbk8{J9YU3>3{|fXyAYb z4rt(j1`cT8fCdg|;D81WXyAYb_Jao61kVwgJVY9f33%Y||1S-wNgx+Z_#vtLQ}XWX z<^4oK#+XxFU^xI!i|!QgaCu)w$Fd+z6@Y*DpvxoVeNaB9?=zH&1YjTTNq5tw`gW~e zT6bdZj}oRfdHMuXk(4Bwy({IDfPZ7A=ZDgttR%BxO@KhsR4t_oWI>a~EyFZ4PlriZ zBn4OaBz?q2QgUA@6^Ch1p5$ZpPx4;T{WUG>2Xc`YSni%ljQ)6^yVi zhCjy?=zdAtXuqpLaytdGO2P&STO_2jI4JcwIbZ`44oWyA;jo0;Bpi`&w}fmOVSS4YDge?*#CG3-M zK*B)@ha?=9aGQi967H6e9V^S1&?%u?!YT8OI3(e)gxe$(rS1FQ zqLJJ@wbj+%be4Um6$=xWIXym?&*eTju#De&&-40SZokWOqI{5>GxyGdTRVG~f7;1@ zLjB5uC;c{ob;^g>f@w&Pb4i#LUZ+%!LvBe=-+KvPCGq^@0&uG(J}L1xNc#6B-q|h? ze7y=TXG2ip(;)H6e9kq&)0uEY-zVwsk^Bp+0zdRMfuL^~l)@5U6&Lrk-h|Rdi655q zw4Q|0MCgV149|d6FllZ=+0+EvV|1iPR_j%&O&J=`vZ3r%Ha5#i_b_hIQ&w)#~ z#P>=3`QkBKBJri20>Rg1;Bt?|CncV*!2o_4{4ep}mK5}~E`rj_692KRuUIwA*c=Q5 zTpn2=T2Dafc8T|wiTi2+*ujO8ex*RrcWz2iiC-b<>033Whb4Z9KI|dUcV0@bO8nU; ziuL!t>0IC~Si$5T;kTJ_=|V}rL-LW;W_u*QPOZtt#8}&!} zFEzx!AtnhW_@%`6<>8-|`20BVP2fpCWg~)tR6qNO;}2shPEe&Bg#n(*WwM_t{E5I@ z*jJf~I~3kb{@I*9R z=nrui%X7R5J_tO?v->tt2LI-d%e!H>hXC(%3p{-fr}Uu-pQ9%km+J-oVCz(tKhB^IX0i(25R9ji77)MxTZ*o6LPz#oip%{d|nRX^GcJo({# zf4H6Fk7TnYy=2dxH{t&Q@Z7Gh6_k8!BQB@mJ<>=&^(OeJ34R0crCIVTd2TnMe;IhP zf46*p?2{QL<3-p=o(>cI%_jH{P4G)G;b6r7JQMs?Cir_z@V^C~^qjm^R46U$b=ncq z9tH)1ul>a3jldhpV?&c0;p>4X{_YzEAHEI}m)Drk?*iT_NOS%cVIswd|IsG+9uxfA zCiuxm8JByk3H}t|jpSKtg12I#gzRVNMNutP$8Q2}#OKE*`2Pl;^f@g3e3Q~rQ=}*5 zadl%0QMfgsr^4-O+ng(s#2%x?^F3a@cJ6|iGv@00eBCp*obPZCHPlDwVNn*rj*AMiHCc1?2*Ri+`34pDU3g%Kp6WG+q;N?E798%cDcvL2V%|BGy7rd zz%CS8-mEtT^`?~9jXhK=!aY$P`zL$C*sVtu%PKCfPQGX&7HX^?M{Pn`Oq)^xu{E$R zPiK{lw3)LBvXBm%JbEZp;|(nr_1=d(9-gNumFzHAuX!Ds^=7x;OocbpHoOuSqUw&T9NT zd!1X3vnM=(P~#YGDFfXTvCnHjyi^!?P;YF76Nts?%x%-uTZEcY{$x1OQ%5Gd6n7rC z-W2P=c27@lsty)R1lYgdTZiXdi9m&da1H49y|GXOa`==0dKb=m;F(74-aupJw6Qm9 z`)=L*N?mX7TE_hx++A094|dOHTuWU;oOfxspQeBg4^%M@59A5PR>Dz6QcZlP|Exr; zBRbCRf%3Dem*?sBOF5a|V^-MwWu2|Do-bG_1JldY=1VEStQtYuj6Dn6^~c#D27-T4 zmS&=Q{B%f5O_CoALT+WLQI9pCGG*u{h6}o4#0>r|-@Pndno5mt;9R!*>i z*Fa02{aCnZg@e4pI5}YSpc})AkGIy`D~|0bq~-lss9AL@$hPN&lV6@`e?g8yP&#Qq zimYN!9GRfk>HG)+M6A83Wo>P)2*c3_v38sY6hnm9sYh_K4q1{O?bh2nx?AC;qX_2$n=`iae`A_HB-^_f?2bgLI}!c%$ysd9I>9Bu5KNH zVwlc5sH&Y?H?vyzy1cG(h9f+Z7{YUK`eMYnm6@SUoIs(6yP`Oh0u>K;MPjWq`elY+ zKKdaF=#4eG^~NS$a9gxU_qqJ&e9fmdr7A@TpXf%oJ~n<2<>kQ8>G6pibRt9RvNpq9 zhoaYm&LZ>OpI8#(~wII0-Rh9lr=Bd8I7ugr~|KSH3IFi`S$gt zr4k{5I5LFZTRO8Dy1X7yjZBdwMK%S~>lWYyADri*;|vZS5s!wMm8oudx=*MRMi5yU z)>~oGT{_Gl60fRWv}nf6I^E;)$%n8}obq!{zAZo(`&y@J5BW8JjvY{C&I;c zbw)aqqE6BzdCO&N>mL`|!Q|)Z?Xi?R)KPSK_Q?F*@xC;h=Xk-s7S8XrM;n zVk5#Mv=_Z2>Ut{P)4f7RZQE0E>VMrmXx*HqL(O|*soa|D(cW%7j-J+mgEz+W3JlFJ zFi<>~%CDqKbn)Yy;AGlyKuuRFUoz2SJ>{bBh@NBC1#_yzMb7Iuo(hV2a}Kkqo6}fN zFAK}}F8bwf6s%^U3+JD?5H7D=sV8yPTQ^y2>TG>Ez2)b4qeNGvC&nw5gQgIjcicRg zL4+t=HduE%&crfoFYd$9RXP%_bZQpPL*YY4rU8BA>I@tQy*`nZC(q81dWiIkzZNf` z8Hq}=x%mj6$|I?>lQGPL1TjRdNJL{@Avuqd*CGMo(D(A)V|gfqcaQlnDolik*Acvk z%%EX~JPJ-jTA9eRV(xkO!hQScTq5#%XaS#I2e-Nah)y(9UJKCxvRAG=FJG2@1$4Ao zcet*zGcqRu+rxxK!>ne#p&ru~4Gnm?u1Pj~buLV~-q0!3+O;CbA4Ad6j)vu(S+3Jn zDH?rmMl1BqJm<~ZZC{;hK%DRt>dcW%j1@Wdj-v_pp&O7%N6yqE@iX@H)h00q+>G2_rNh)l5*IuRmd9YGk)Rrl(cUP|CKp>8P# zzJ|!s=KNRzhneZ!ZDM+8%!tf=ZBL9hNtXMO&PPVM-TM~hiZj0W!CtxXLAE4%TPMW_ z=Va^2<)tB)JLSFb^4U3sy!+ZWpg9U5_*^609>g(leK3-}al*zewyzT5O_U#&7cvoK zZ(ciikep@OW4>~Dbn#h(rY6mYN9xT8FFL!I=LPQEWMHsH;eJ~ZhkmBo$lGVEoxiIY zL|0?X?f)P&7mgznV1FrJDJXg(dqn88ug&kNAV-FPu%|bLcVFta{N>Hq8vc%l!b@x{ z+(V7*LsSF(gc=Q`Ku}$QH#i*Mj6fI~ zHT+D7Zx6~-V0wp73-AwEA-umens-P1+o6u!bDt4B+fwlgv3EaGIrHr|7(rqf-3W!jQ!??QQCm|=93oa(+!e=5E zefz^{uxDqX=w#~X$vB)@P+>~vWct$1$#E2DoVlWBccBOYaWX7_vK%kamjGIuB@g12g% zd7Mn)b2&K$&Q(lv3Lfg3_>dT2$wdx#CQz2(jzdWq=59Ob-Civ+7(c|a4raj*%e#4+ zBG)r3h-Dc$iDYb)&3s^XUlwYdGi?g!+4IRvYGmSBIQ?bx6DW~lI~Rf zv8;%v1pZPMRleG9q@dbgB&Y=sbxrUr{Allv%CGk8D0rD9R`M%41<%8C+6$!a)qWra zxzL~?`AJSCKQ4J%i@&7POnzB0wNFXyPg3Qp{Hp#}$ow-oH}JxW#h*EGL&?eiX7+)JJ0YvOCb%BI?EFqAVUgX*0UMU>DelBMdUTCtqdeV$zuEam zjum(Xo#Hn0SLIW12v4%}^8-bRS{lBxL&ZSlQ}BVj{Axdkg17uo|`>XZS3ieUw$V&O;KaFQpf0bXYb61eg9L`Sp<-Wr0x}*g#1*0Wu|IfzJdevFe|Bi3Uwr&Q{Pef)qVm6OJ&1Q|EHw#t9soITz2_t zf1`p<)v!GGXnw`#cx2Ab&qgGngvw7yY^Hp5|8-<0UP^wa$|qr&B9zdXckhfMXKUHs4FKna9$d1cQ+VqaNJb^N1B~`9MVY#wl R{#WQ<{2^85Wyrf_{|o9Zpn(7Y literal 0 HcmV?d00001 diff --git a/src/link816/omfEmit.cpp b/src/link816/omfEmit.cpp index c9f5e0a..59c8ea2 100644 --- a/src/link816/omfEmit.cpp +++ b/src/link816/omfEmit.cpp @@ -32,6 +32,24 @@ namespace { +// OMF v2.1 protocol constants -- single source of truth for the header +// layout and opcode set. See Apple IIgs Tech Note #17 and the FTN +// reference. Don't renumber; values are shared with the loader. +static constexpr uint8_t OMF_OP_LCONST = 0xF2; +static constexpr uint8_t OMF_OP_CRELOC = 0xF5; +static constexpr uint8_t OMF_OP_END = 0x00; +[[maybe_unused]] static constexpr uint8_t OMF_NUMLEN = 4; +[[maybe_unused]] static constexpr uint8_t OMF_VERSION_V21 = 0x02; +[[maybe_unused]] static constexpr uint32_t OMF_HDR_SIZE = 44; +[[maybe_unused]] static constexpr uint32_t OMF_LABLEN_FIXED = 10; +static constexpr uint16_t OMF_KIND_CODE_PRIV = 0x1000; +static constexpr uint16_t OMF_KIND_DPSTACK = 0x4012; // DP/Stack | RELOAD; matches real-world GNO/ME ~_STACK format +static constexpr uint16_t OMF_KIND_DATA_STATIC = 0x8001; +static constexpr uint16_t OMF_KIND_CODE_STATIC_ABSBANK = 0x8800; +// cRELOC opcode wire size: opcode + ByteCnt + BitShift + OffsetPatch + +// OffsetReference = 1 + 1 + 1 + 2 + 2 = 7 bytes per site. +static constexpr uint32_t OMF_CRELOC_BYTES_PER_SITE = 7; + [[noreturn]] static void die(const std::string &msg) { std::fprintf(stderr, "omfEmit: %s\n", msg.c_str()); std::exit(1); @@ -48,9 +66,7 @@ struct RelocSite { uint8_t byteCnt; uint8_t bitShift; // 0 for offset relocs, 16 for BANK16 }; -} // close namespace std::vector gReloc24Sites; -namespace { static std::vector readFile(const std::string &path) { std::ifstream f(path, std::ios::binary); @@ -135,7 +151,7 @@ static std::vector emitOneSeg(const std::vector &image, // literal bytes. With NUMLEN=4 (standard for v2.1), the count // field is 4 bytes. Verified empirically against real /SYSTEM/ // START on GS/OS 6.0.2: every segment uses 0xF2 + 4-byte count. - body.push_back(0xF2); // LCONST opcode + body.push_back(OMF_OP_LCONST); // LCONST opcode put32(body, static_cast(combined.size())); body.insert(body.end(), combined.begin(), combined.end()); } @@ -150,14 +166,14 @@ static std::vector emitOneSeg(const std::vector &image, // (segPlacedBase + OffsetReference) at load time. This is what // makes JSL/JML/STAlong/etc. with intra-segment targets work when // the Loader places us at non-zero bank. - for (const auto &s : ::gReloc24Sites) { - body.push_back(0xF5); + for (const auto &s : gReloc24Sites) { + body.push_back(OMF_OP_CRELOC); body.push_back(s.byteCnt); // ByteCnt (2 or 3) body.push_back(s.bitShift); // BitShift (0 or 16) put16(body, s.patchOff); // OffsetPatch put16(body, s.offsetRef); // OffsetReference } - body.push_back(0x00); // END opcode + body.push_back(OMF_OP_END); // END opcode // Real OMF format (Merlin32 convention, verified GS/OS Loader-launchable): // - LABLEN = 10: both LOAD_NAME and SEG_NAME are 10 bytes wide, @@ -247,13 +263,21 @@ static std::vector emitOneSeg(const std::vector &image, // allocate a page-aligned, locked memory block of that size in // bank $00." // -// The body is just an END opcode (no LCONST data — RESSPC alone tells -// the Loader how big to make the allocation, and the bytes don't need -// to come from the file). KIND = 0x1012 = DP/Stack | PRIVATE — the -// PRIVATE attribute matches Apple's `makedirect` reference utility -// (ksherlock/omfutils). +// The body is an LCONST opcode followed by `length` zero bytes plus an +// END opcode — matching the real-world format used by every GNO/ME +// command (e.g. /GNO.BOOT/bin/echo's ~_STACK seg). Empirically a body +// of just END (no LCONST, relying on RESSPC for allocation) makes the +// GS/OS Loader's ExpressLoad fast path silently drop the seg and fall +// back to its default 4 KB DP/Stack — hence this code emits real +// content so the Loader has something to copy. KIND = 0x4012 (RELOAD +// | DP/Stack) also matches the working GNO format; the earlier 0x1012 +// (PRIVATE | DP/Stack) is what `makedirect` ships but doesn't survive +// ExpressLoad fast-path processing. static std::vector emitDpStackSeg(uint32_t length, uint16_t segNum) { std::vector body; + body.push_back(0xF2); // LCONST opcode + put32(body, length); // 4-byte literal length + body.insert(body.end(), length, 0); // `length` zero bytes body.push_back(0x00); // END opcode constexpr uint8_t LABLEN_VAL = 10; const std::string segNameTxt = "~Direct"; @@ -267,10 +291,13 @@ static std::vector emitDpStackSeg(uint32_t length, uint16_t segNum) { DISPNAME + loadName.size() + segName.size()); const uint32_t LENGTH = length; // memory size requested const uint32_t BYTECNT = DISPDATA + static_cast(body.size()); - const uint32_t RESSPC = length; // bytes to zero-allocate + // RESSPC = 0 because the bytes are carried in LCONST (matches the + // bss-as-zeros approach used for the user CODE seg — the Loader's + // ExpressLoad fast path can't be trusted to honor RESSPC). + const uint32_t RESSPC = 0; const uint32_t BANKSIZE = 0; // DP/Stack lives in bank 0 const uint32_t ALIGN = 0x100; // page-aligned per spec - const uint16_t KIND = 0x1012; // DP/Stack | PRIVATE + const uint16_t KIND = OMF_KIND_DPSTACK; // DP/Stack | RELOAD std::vector hdr; put32(hdr, BYTECNT); @@ -324,7 +351,7 @@ static std::vector emitOMF(const std::vector &image, uint32_t bssGap = 0) { if (stackSize == 0) { return emitOneSeg(image, entryOffset, /*org*/0, /*segNum*/1, - /*kind*/0x1000, name, bssSize, bssGap); + /*kind*/OMF_KIND_CODE_PRIV, name, bssSize, bssGap); } // DP/Stack segment ordering: Apple's `makedirect` reference utility // assigns the DP/Stack as SEGNUM 1 (its own object); when linked @@ -334,7 +361,7 @@ static std::vector emitOMF(const std::vector &image, // sets DP and SP appropriately when entering our code. auto dpSeg = emitDpStackSeg(stackSize, /*segNum*/1); auto codeSeg = emitOneSeg(image, entryOffset, /*org*/0, /*segNum*/2, - /*kind*/0x1000, name, bssSize, bssGap); + /*kind*/OMF_KIND_CODE_PRIV, name, bssSize, bssGap); std::vector out; out.insert(out.end(), dpSeg.begin(), dpSeg.end()); out.insert(out.end(), codeSeg.begin(), codeSeg.end()); @@ -391,6 +418,17 @@ static std::vector emitOmfExpressLoad( auto userSeg = emitOneSeg(image, entryOffset, /*org*/0, /*segNum*/2, /*kind*/0x1000, userSegName, bssSize, bssGap); + // Optionally build the DP/Stack segment. If present it lives in the + // file AFTER the user seg and gets its own ExpressLoad segtable + + // remap + header_info entries — otherwise the Loader's ExpressLoad + // fast path never sees the KIND=0x4012 record and reverts to its + // default 4KB DP/Stack allocation (silent --stack-size no-op). + const bool haveDpStack = (stackSize != 0); + std::vector dpStackSeg; + if (haveDpStack) { + dpStackSeg = emitDpStackSeg(stackSize, /*segNum*/3); + } + // Step 2: figure out the file offsets we'll need to bake into the // load script. We don't know the ExpressLoad segment's total size // yet — but we can compute it because each component is a fixed @@ -399,11 +437,10 @@ static std::vector emitOmfExpressLoad( // ExpressLoad LCONST data layout (matches Merlin32 source — see // BuildExpressLoadSegment in Merlin32's a65816_OMF.c): // 6 bytes header (4-byte reserved DWORD + 2-byte count WORD) - // 8 bytes segment list (1 entry per non-ExpressLoad segment) - // 2 bytes remap list (1 entry per non-ExpressLoad segment) - // 16 bytes header info offsets (data_off, data_len, reloc_off, reloc_len) - // + header_xpress: bytes [12..43] of user header (32 bytes) + LOAD_NAME (10) + SEG_NAME (1+N) - // = 6 + 8 + 2 + 16 + 32 + 10 + 1 + N = 75 + N bytes + // 8 bytes/seg segment list (1 entry per non-ExpressLoad segment) + // 2 bytes/seg remap list (1 entry per non-ExpressLoad segment) + // 68 bytes/seg header_info (16B offsets + 32B hdr copy + 10B LOAD_NAME + 10B SEG_NAME) + // total: 6 + 78*N bytes for N non-ExpressLoad segs // // KEY FIX from earlier emitter version: header is 6 bytes, NOT 8. // I had written 8 bytes (file_ref WORD + reserved WORD + extra WORD + @@ -415,7 +452,10 @@ static std::vector emitOmfExpressLoad( constexpr uint32_t HDR_SIZE = 44; constexpr uint32_t LOAD_NAME_SIZE = 10; constexpr uint32_t SEG_NAME_SIZE = 10; // LABLEN=10 → fixed-width SEG_NAME - const uint32_t userNameLen = (uint32_t)userSegName.size(); + constexpr uint32_t SEGTAB_ENTRY = 8; + constexpr uint32_t REMAP_ENTRY = 2; + constexpr uint32_t HDR_INFO_ENTRY = 16 + 32 + LOAD_NAME_SIZE + SEG_NAME_SIZE; // 68 + constexpr uint32_t HEADER_BYTES = 6; const uint32_t userNameAreaSize = LOAD_NAME_SIZE + SEG_NAME_SIZE; // ExpressLoad's own segment metrics. The name "~ExpressLoad" is 12 @@ -423,12 +463,8 @@ static std::vector emitOmfExpressLoad( // uses LABLEN=0 (length-prefixed name): 1 length byte + 12 chars. const std::string elName = "~ExpressLoad"; const uint32_t elNameAreaSize = LOAD_NAME_SIZE + 1 + (uint32_t)elName.size(); - // header_xpress_length = (header bytes 12..43) + LOAD_NAME + SEG_NAME - // = 32 + 10 + 10 = 52 bytes - // Per-segment ExpressLoad data: 8 (table) + 2 (remap) + 16 (offsets) + 52 = 78 bytes - // Header (6 bytes) + per-segment data: 6 + 78 = 84 - const uint32_t elDataSize = 84; - (void)userNameLen; // truncated in user seg name; LABLEN=10 fixed + const uint32_t nSegs = haveDpStack ? 2 : 1; // non-ExpressLoad segs + const uint32_t elDataSize = HEADER_BYTES + (SEGTAB_ENTRY + REMAP_ENTRY + HDR_INFO_ENTRY) * nSegs; // Body size = 1 byte LCONST opcode + 4 byte length + data + 1 byte END const uint32_t elBodySize = 1 + 4 + elDataSize + 1; const uint32_t elSegSize = HDR_SIZE + elNameAreaSize + elBodySize; @@ -438,24 +474,47 @@ static std::vector emitOmfExpressLoad( const uint32_t userBodyOpOff = userSegStart + HDR_SIZE + userNameAreaSize; const uint32_t userDataOff = userBodyOpOff + 5; // 1 op + 4 length + // DP/Stack segment file offsets (after user seg). The DP/Stack body + // mirrors the real GNO/ME ~_STACK seg format: an LCONST opcode + 4 + // byte length + `stackSize` zero bytes + END. ExpressLoad's + // hdr_info entry has to point at the LCONST data so the Loader + // copies the right number of zeros into the allocated chunk — a + // body of just END (RESSPC-only) silently no-ops on the + // ExpressLoad fast path, which is the bug this whole section fixes. + const uint32_t dpStackSegStart = userSegStart + (uint32_t)userSeg.size(); + const uint32_t dpStackBodyOff = dpStackSegStart + HDR_SIZE + (LOAD_NAME_SIZE + SEG_NAME_SIZE); + const uint32_t dpStackDataOff = dpStackBodyOff + 5; // 1 op + 4 length + // Step 3: build the ExpressLoad LCONST data. std::vector elData; - // Header (6 bytes): reserved DWORD + count WORD + // Header (6 bytes): reserved DWORD + count WORD. count = N-2 where + // N = total segments in the file (including ExpressLoad). With a + // DP/Stack seg N=3 so count=1; without it N=2 so count=0. put32(elData, 0); // reserved - put16(elData, 0); // count = N-2 = 0 (for 2 segs) + put16(elData, (uint16_t)(haveDpStack ? 1 : 0)); // count = N-2 - // Segment list (1 × 8 bytes) - // Self-rel offset = (header info offset within elData) - (this entry pos) - // = 16 - 6 = 10 - constexpr uint32_t segListEntryOff = 6; - const uint32_t headerInfoOff = 6 + 8 + 2; // header + segtable + remap - put16(elData, (uint16_t)(headerInfoOff - segListEntryOff)); - put16(elData, 0); // flags - put32(elData, 0); // handle + // Segment list: one 8-byte entry per non-ExpressLoad segment. Each + // entry's first WORD is the SELF-RELATIVE offset (from this entry's + // own start) to the segment's header_info record. + const uint32_t segTableOff = HEADER_BYTES; + const uint32_t remapOff = segTableOff + SEGTAB_ENTRY * nSegs; + const uint32_t hdrInfoOff = remapOff + REMAP_ENTRY * nSegs; + for (uint32_t i = 0; i < nSegs; i++) { + const uint32_t thisEntryOff = segTableOff + SEGTAB_ENTRY * i; + const uint32_t thisHdrInfoOff = hdrInfoOff + HDR_INFO_ENTRY * i; + put16(elData, (uint16_t)(thisHdrInfoOff - thisEntryOff)); // self-rel + put16(elData, 0); // flags + put32(elData, 0); // handle + } - // Remap list: old seg 1 (which would be our user seg without - // ExpressLoad) maps to new seg 2 (since ExpressLoad takes seg 1). + // Remap list: 1 WORD per non-ExpressLoad seg, giving the new + // segment number for each old segment position. Old seg 1 (user + // code, would-be sole seg without ExpressLoad) → new seg 2. + // Old seg 2 (DP/Stack, only present when --stack-size) → new seg 3. put16(elData, 2); + if (haveDpStack) { + put16(elData, 3); + } // Header info entry for the user segment. // data length = LCONST data size in the file. emitOneSeg embeds @@ -473,11 +532,10 @@ static std::vector emitOmfExpressLoad( put32(elData, 0); // reloc offset put32(elData, 0); // reloc length } else { - const uint32_t crelocBytesPerSite = 7; // 0xF5 + 1+1+2+2 const uint32_t crelocOff = userDataOff + (uint32_t)image.size() + bssGap + bssSize; const uint32_t crelocLen = - crelocBytesPerSite * (uint32_t)gReloc24Sites.size(); + OMF_CRELOC_BYTES_PER_SITE * (uint32_t)gReloc24Sites.size(); put32(elData, crelocOff); put32(elData, crelocLen); } @@ -498,6 +556,34 @@ static std::vector emitOmfExpressLoad( elData.push_back(i < truncated.size() ? (uint8_t)truncated[i] : 0x20); } + // Header info entry for the DP/Stack segment (when present). + // data_off / data_len point at the LCONST zero bytes carried in the + // DP/Stack seg's body, mirroring the working real-world layout + // (GNO/ME ~_STACK). No cRELOC entries for a DP/Stack seg, so + // reloc fields are 0. + if (haveDpStack) { + if (dpStackSeg.size() < HDR_SIZE) die("internal: DP/Stack seg too small"); + put32(elData, dpStackDataOff); // data offset (LCONST data) + put32(elData, stackSize); // data length (= stack size) + put32(elData, 0); // reloc offset + put32(elData, 0); // reloc length + // Header copy: bytes [12..43] of DP/Stack segment header. + elData.insert(elData.end(), dpStackSeg.begin() + 12, dpStackSeg.begin() + HDR_SIZE); + elData[elData.size() - 32 + 30] = 0; // DISPDATA hi → 0 + elData[elData.size() - 32 + 31] = 0; + // LOAD_NAME (10 bytes, space-padded) + for (int i = 0; i < (int)LOAD_NAME_SIZE; i++) elData.push_back(0x20); + // SEG_NAME = "~Direct" padded to 10 bytes (must match the value + // stored by emitDpStackSeg, otherwise ExpressLoad's name match + // could fail; the seg-name area in the file uses 10 spaces base + // with "~Direct" overwriting the first 7). + const char *dpName = "~Direct"; + const size_t dpNameLen = 7; + for (size_t i = 0; i < SEG_NAME_SIZE; i++) { + elData.push_back(i < dpNameLen ? (uint8_t)dpName[i] : 0x20); + } + } + if (elData.size() != elDataSize) die("internal: ExpressLoad data size mismatch"); @@ -513,7 +599,7 @@ static std::vector emitOmfExpressLoad( elHdr.push_back(4); // NUMLEN elHdr.push_back(2); // VERSION (0x02 = v2.1) put32(elHdr, 0); // BANKSIZE = 0 for DATA seg - put16(elHdr, 0x8001); // KIND = DATA|STATIC + put16(elHdr, OMF_KIND_DATA_STATIC); // KIND = DATA|STATIC elHdr.push_back(0); elHdr.push_back(0); // undef put32(elHdr, 0); // ORG put32(elHdr, 0); // ALIGN @@ -542,16 +628,15 @@ static std::vector emitOmfExpressLoad( die("internal: ExpressLoad segment size mismatch"); // Step 6: concatenate ExpressLoad + user segment + optional DP/Stack. - // The DP/Stack seg sits AFTER the user seg; the Loader walks file- - // ordered segments after the ExpressLoad load step completes, and - // processes each segment by KIND. The ExpressLoad load script only - // tracks code/data segs; the DP/Stack seg is found by KIND walk. + // The DP/Stack seg's presence is now also recorded in the + // ExpressLoad load script (segtable + remap + header_info entries + // above) so the Loader's fast path honors KIND=0x4012 instead of + // silently dropping it to its default 4 KB DP/Stack allocation. std::vector result; result.insert(result.end(), elSeg.begin(), elSeg.end()); result.insert(result.end(), userSeg.begin(), userSeg.end()); - if (stackSize != 0) { - auto dpSeg = emitDpStackSeg(stackSize, /*segNum*/3); - result.insert(result.end(), dpSeg.begin(), dpSeg.end()); + if (haveDpStack) { + result.insert(result.end(), dpStackSeg.begin(), dpStackSeg.end()); } return result; } @@ -674,7 +759,7 @@ static void usage(const char *argv0) { " sidecar; emit cRELOC (0xF5) opcodes after LCONST\n" " so the Loader patches intra-segment 24-bit refs\n" " (JSL/JML/STAlong/etc.) when placing the segment.\n" - " --stack-size N append a ~Direct DP/Stack segment (KIND=0x1012)\n" + " --stack-size N append a ~Direct DP/Stack segment (KIND=0x4012)\n" " of N bytes. The Loader allocates a page-aligned\n" " block of this size in bank 0 for combined DP +\n" " stack use. N must be page-multiple (>= 256).\n" @@ -782,7 +867,7 @@ int main(int argc, char **argv) { // intra-segment relocations at link time and have no // INTERSEG / RELOC opcodes); ABSBANK + ORG=base pins it // to a specific bank. CODE is the default (type 0). - uint16_t kind = (k == 0) ? 0x8800u : 0x8800u; + const uint16_t kind = OMF_KIND_CODE_STATIC_ABSBANK; uint32_t entryOff = (k == 0) ? s.entryOff : 0; auto seg = emitOneSeg(img, entryOff, s.base, static_cast(s.num), @@ -846,10 +931,15 @@ int main(int argc, char **argv) { if (!f) die("cannot open '" + output + "' for writing"); f.write(reinterpret_cast(blob.data()), blob.size()); + // Segment count: 1 user CODE seg; +1 for ExpressLoad wrapper; +1 + // when --stack-size adds a ~Direct DP/Stack seg. + int segCount = 1; + if (expressload) segCount++; + if (stackSize != 0) segCount++; std::fprintf(stderr, "OMF: %d segment%s%s, %zu bytes payload, entry='%s' at +0x%x -> %s " "(%zu bytes total)\n", - expressload ? 2 : 1, expressload ? "s" : "", + segCount, segCount == 1 ? "" : "s", expressload ? " (ExpressLoad)" : "", image.size(), entry.c_str(), entryOff, output.c_str(), blob.size()); diff --git a/src/llvm/lib/Target/W65816/MCTargetDesc/W65816AsmBackend.cpp b/src/llvm/lib/Target/W65816/MCTargetDesc/W65816AsmBackend.cpp index 6edd2ca..7b4a89a 100644 --- a/src/llvm/lib/Target/W65816/MCTargetDesc/W65816AsmBackend.cpp +++ b/src/llvm/lib/Target/W65816/MCTargetDesc/W65816AsmBackend.cpp @@ -6,9 +6,10 @@ // //===----------------------------------------------------------------------===// // -// Skeleton assembler backend. Fixup resolution, relaxation and nop -// generation are left unimplemented; they will be filled in once the -// instruction encodings are defined. +// W65816 assembler backend. Implements applyFixup for the +// R_W65816_* relocation family, BRA -> BRL relaxation when the 8-bit +// signed displacement won't fit, and writeNopData using 65816 NOP +// ($EA) bytes. // //===----------------------------------------------------------------------===// @@ -29,6 +30,13 @@ // W65816::BRA / W65816::BRL opcodes are exported by W65816MCTargetDesc.h // (which already includes the generated header). +// W65816 NOP machine encoding (single byte). +static constexpr unsigned char kOpcodeNOP = 0xEA; + +// Signed 8-bit branch displacement range for Bxx / BRA fixups. +static constexpr int kBranch8Min = -128; +static constexpr int kBranch8Max = 127; + using namespace llvm; namespace { @@ -110,7 +118,7 @@ public: // instead of silently truncating. if (Fixup.getKind() == W65816::fixup_8_pcrel) { int64_t Signed = static_cast(Value); - if (Signed < -128 || Signed > 127) { + if (Signed < kBranch8Min || Signed > kBranch8Max) { getContext().reportError( Fixup.getLoc(), "branch target out of range for 8-bit PC-relative branch " @@ -158,7 +166,7 @@ public: const MCSubtargetInfo *STI) const override { // The 65816 NOP is a single 0xEA byte. for (uint64_t I = 0; I < Count; ++I) - OS << char(0xEA); + OS << static_cast(kOpcodeNOP); return true; } @@ -192,7 +200,7 @@ public: if (Fixup.getKind() != W65816::fixup_8_pcrel) return false; int64_t Signed = static_cast(Value); - return Signed < -128 || Signed > 127; + return Signed < kBranch8Min || Signed > kBranch8Max; } void relaxInstruction(MCInst &Inst, diff --git a/src/llvm/lib/Target/W65816/MCTargetDesc/W65816ELFObjectWriter.cpp b/src/llvm/lib/Target/W65816/MCTargetDesc/W65816ELFObjectWriter.cpp index 2c86082..887ca82 100644 --- a/src/llvm/lib/Target/W65816/MCTargetDesc/W65816ELFObjectWriter.cpp +++ b/src/llvm/lib/Target/W65816/MCTargetDesc/W65816ELFObjectWriter.cpp @@ -24,6 +24,23 @@ using namespace llvm; +// R_W65816_* relocation numbers. These are protocol constants shared +// with link816 / omfEmit / llvm-objdump; do not renumber. If new types +// are added, mirror them in src/link816/link816.cpp's relocWidth() and +// the cRELOC pipeline. +namespace R_W65816 { +enum : unsigned { + R_IMM8 = 1, + R_IMM16 = 2, + R_IMM24 = 3, + R_PCREL8 = 4, + R_PCREL16 = 5, + R_BANK16 = 6, + R_DATA32 = 7, + R_PCREL32 = 8, +}; +} // namespace R_W65816 + namespace { class W65816ELFObjectWriter : public MCELFObjectTargetWriter { @@ -56,16 +73,16 @@ protected: // type — observed as type 249 — and broke link816.py. auto Kind = Fixup.getKind(); switch (Kind) { - case W65816::fixup_8: return 1; // R_W65816_IMM8 - case W65816::fixup_16: return 2; // R_W65816_IMM16 - case W65816::fixup_24: return 3; // R_W65816_IMM24 - case W65816::fixup_8_pcrel: return 4; // R_W65816_PCREL8 - case W65816::fixup_16_pcrel: return 5; // R_W65816_PCREL16 - case W65816::fixup_bank16: return 6; // R_W65816_BANK16 - case W65816::fixup_32: return 7; // R_W65816_DATA32 - case W65816::fixup_32_pcrel: return 8; // R_W65816_PCREL32 - case FK_Data_1: return IsPCRel ? 4 : 1; - case FK_Data_2: return IsPCRel ? 5 : 2; + case W65816::fixup_8: return R_W65816::R_IMM8; + case W65816::fixup_16: return R_W65816::R_IMM16; + case W65816::fixup_24: return R_W65816::R_IMM24; + case W65816::fixup_8_pcrel: return R_W65816::R_PCREL8; + case W65816::fixup_16_pcrel: return R_W65816::R_PCREL16; + case W65816::fixup_bank16: return R_W65816::R_BANK16; + case W65816::fixup_32: return R_W65816::R_DATA32; + case W65816::fixup_32_pcrel: return R_W65816::R_PCREL32; + case FK_Data_1: return IsPCRel ? R_W65816::R_PCREL8 : R_W65816::R_IMM8; + case FK_Data_2: return IsPCRel ? R_W65816::R_PCREL16 : R_W65816::R_IMM16; // FK_Data_4 is emitted by DWARF (.debug_info / .debug_line / // .debug_frame section-relative addresses), .eh_frame, // .debug_loclists, and user `.long` directives. Dispatch by @@ -78,7 +95,7 @@ protected: // .debug_line decoder because the 4th byte of the slot landed // on whatever followed it (most often the size byte of the // next line-program header → unit_length = 0). - case FK_Data_4: return IsPCRel ? 8 : 7; + case FK_Data_4: return IsPCRel ? R_W65816::R_PCREL32 : R_W65816::R_DATA32; default: llvm_unreachable("W65816: unknown fixup kind"); } diff --git a/src/llvm/lib/Target/W65816/W65816.h b/src/llvm/lib/Target/W65816/W65816.h index f133acf..1af1083 100644 --- a/src/llvm/lib/Target/W65816/W65816.h +++ b/src/llvm/lib/Target/W65816/W65816.h @@ -204,6 +204,7 @@ void initializeW65816SepRepCleanupPass(PassRegistry &); void initializeW65816BranchExpandPass(PassRegistry &); void initializeW65816TiedDefSpillPass(PassRegistry &); void initializeW65816ABridgeViaXPass(PassRegistry &); +void initializeW65816UnLSRPass(PassRegistry &); void initializeW65816WidenAcc16Pass(PassRegistry &); void initializeW65816SpillToXPass(PassRegistry &); void initializeW65816NegYIndYPass(PassRegistry &); diff --git a/src/llvm/lib/Target/W65816/W65816ABridgeViaX.cpp b/src/llvm/lib/Target/W65816/W65816ABridgeViaX.cpp index 64ab410..95d557e 100644 --- a/src/llvm/lib/Target/W65816/W65816ABridgeViaX.cpp +++ b/src/llvm/lib/Target/W65816/W65816ABridgeViaX.cpp @@ -8,23 +8,28 @@ // // Pre-regalloc complement to W65816TiedDefSpill. Where TiedDefSpill // preserves a multi-use Acc16 vreg by spilling it to a fresh stack -// slot around the tied-def consumer, this pass tries to do the same -// preservation via TAX/TXA: copy to an Idx16 vreg before the consumer -// (regalloc puts it in X or Y, expansion lowers the COPY to TAX/TAY), -// copy back to a fresh Acc16 vreg after. +// slot around the tied-def consumer, this pass bridges via an Img16 +// (DP-backed) vreg: park SrcReg in a fresh Img16 vreg before the +// consumer, restore to a fresh Acc16 vreg after. Regalloc places the +// Img16 in IMG0..IMG7 (DP $D0..$DE); copyPhysReg lowers the COPYs to +// STA dp / LDA dp (4 cyc each) and no system-stack slot is allocated. +// +// (The pass name dates from an earlier prototype that bridged via X +// using TAX/TXA. Cross-MBB X-liveness analysis was unimplemented and +// the X-bridge couldn't survive Idx16 clobbers between consumer and +// last use, so the bridge moved to Img16. The DP-backed form has the +// same 4-cycle round-trip cost as TAX/TXA bridges with none of the +// liveness restrictions.) // // Win per bridged pair: // stack spill: STA dp,S (5 cyc) + LDA dp,S (5 cyc) + 1 frame slot -// X bridge : TAX (2 cyc) + TXA (2 cyc) + no frame growth -// Net 6 cycles + 2 bytes saved per bridge — and we avoid one PHA per -// stack slot we didn't allocate. +// Img bridge : STA dp (4 cyc) + LDA dp (4 cyc) + no frame growth +// Net 2 cycles + (1 byte per access) saved per bridge -- and one PHA +// per avoided stack slot. // -// Bail conditions (fall back to TiedDefSpill's stack route): -// - any MI between consumer and SrcReg's last use clobbers Idx16 -// (LDX/LDY/INX/DEX/INY/DEY/TAX/TAY/TXY/TYX/PHX/PHY/PLX/PLY/etc.) -// - any call in the range (calls clobber X and Y per ABI) -// - SrcReg is used in a different MBB (cross-MBB liveness needs more -// analysis; deferred) +// Bail conditions (fall back to TiedDefSpill's stack route): any MI +// between consumer and SrcReg's last use that clobbers IMG slots, +// callees that clobber IMG0..IMG7, cross-MBB uses of SrcReg. // // Runs before TiedDefSpill so the latter doesn't double-process the // same candidates. diff --git a/src/llvm/lib/Target/W65816/W65816AsmPrinter.cpp b/src/llvm/lib/Target/W65816/W65816AsmPrinter.cpp index a92bf26..dbb1679 100644 --- a/src/llvm/lib/Target/W65816/W65816AsmPrinter.cpp +++ b/src/llvm/lib/Target/W65816/W65816AsmPrinter.cpp @@ -6,8 +6,14 @@ // //===----------------------------------------------------------------------===// // -// Skeleton assembly printer. The MCInst lowering path is wired up but no -// target-specific operand formatting is implemented yet. +// W65816 assembly printer. Owns the late pseudo-expansion path +// (MCInst lowering for the IR-pseudo opcodes that we keep through PEI +// because their machine encoding depends on AsmPrinter-time peepholes +// or runtime ABI knowledge -- BRK_pseudo, LDAi16imm_bank, JSLpseudo, +// the SEP/REP-wrapped i8 forms, etc.), plus a small set of mode-aware +// peepholes (PEA / PEI substitution for LDA+PUSH16 chains, STZ +// folding, etc.) that prefer to run after the rest of codegen has +// stabilised the MIR. // //===----------------------------------------------------------------------===// @@ -31,6 +37,39 @@ using namespace llvm; #define DEBUG_TYPE "asm-printer" +// W65816 processor-status flag masks used by SEP/REP wrapping. +// (See W65816 datasheet 6.10.) M = accumulator width (1 = 8-bit, +// 0 = 16-bit); X = index width (same convention). The wraps in this +// file toggle M only; X never changes in normal codegen. +static constexpr unsigned kPStatusM = 0x20; +[[maybe_unused]] static constexpr unsigned kPStatusX = 0x10; + +// IIgs runtime DP slots referenced from emitted code. Both are part of +// the runtime ABI -- AsmPrinter / ISelLowering / libgcc must agree. +// kRuntimePbrStashDP -- crt0 stashes the runtime PBR here so +// LDAi16imm_bank can emit `lda $BE` (PBR-byte +// load) for &symbol values in non-bank-0 placements. +// kRuntimeIndirTargetDP -- __indirTarget vector used by the +// JMP (abs) indirect-call thunk. +static constexpr unsigned kRuntimePbrStashDP = 0xBE; +[[maybe_unused]] static constexpr unsigned kRuntimeIndirTargetDP = 0x00B8; + +// DP scratch byte used by ADJCALLSTACKUP / ALLOCAfi to save A across a +// TSC/TCS bracket. Lives in the project-wide $E0..$DF DP scratch +// range; coordinate with W65816ISelLowering / W65816RegisterInfo if +// the layout changes. +static constexpr unsigned kDpScratch0 = 0xE0; + +// IIgs bank-byte mask: a 24-bit address whose top 8 bits are non-zero +// is in a non-zero bank and must be encoded via the LONG form. +static constexpr uint64_t kBankByteMask = 0xFF0000; + +// ADJCALLSTACKUP fan-out limit: PLY (1 byte / 4 cyc per pair-pop) wins +// over the 8-byte / ~14-cyc TAY/TSC/CLC/ADC/TCS/TYA bracket up through +// N = 14 even bytes; beyond that the bracket is cheaper. See the +// dispatch in the ADJCALLSTACKUP expansion. +static constexpr int kAdjStackUpPlyMaxN = 14; + namespace { class W65816AsmPrinter : public AsmPrinter { @@ -267,7 +306,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { if (YLive) { // Route through DP $E0 to preserve both A and Y. MCInst Sta; Sta.setOpcode(W65816::STA_DP); - Sta.addOperand(MCOperand::createImm(0xE0)); + Sta.addOperand(MCOperand::createImm(kDpScratch0)); EmitToStreamer(*OutStreamer, Sta); MCInst Tsc; Tsc.setOpcode(W65816::TSC); EmitToStreamer(*OutStreamer, Tsc); MCInst Clc; Clc.setOpcode(W65816::CLC); EmitToStreamer(*OutStreamer, Clc); @@ -276,9 +315,13 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, Adc); MCInst Tcs; Tcs.setOpcode(W65816::TCS); EmitToStreamer(*OutStreamer, Tcs); MCInst Lda; Lda.setOpcode(W65816::LDA_DP); - Lda.addOperand(MCOperand::createImm(0xE0)); + Lda.addOperand(MCOperand::createImm(kDpScratch0)); EmitToStreamer(*OutStreamer, Lda); - } else if (N <= 14 && (N % 2) == 0) { + } else if (N <= kAdjStackUpPlyMaxN && (N % 2) == 0) { + // Repeated PLY (1 byte / 4 cyc each) wins over the TAY/TSC/CLC/ + // ADC/TCS/TYA bracket (8 bytes / ~14 cyc fixed) for N <= 14; + // beyond that the bracket is cheaper. Must be even (PLY pops + // 16-bit pairs). for (int i = 0; i < N / 2; ++i) { MCInst Ply; Ply.setOpcode(W65816::PLY); EmitToStreamer(*OutStreamer, Ply); @@ -348,7 +391,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { Lda.addOperand(MCOperand::createImm(0)); } else { Lda.setOpcode(W65816::LDA_DP); - Lda.addOperand(MCOperand::createImm(0xBE)); + Lda.addOperand(MCOperand::createImm(kRuntimePbrStashDP)); } EmitToStreamer(*OutStreamer, Lda); return; @@ -380,7 +423,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { // writes `*(uint16 *)0xE19E00UL = 0` we MUST keep the // LDA #0 + STA_Long pair so the bank-explicit form survives. bool AddrFitsIn16 = !It->getOperand(1).isImm() || - (It->getOperand(1).getImm() & 0xFF0000) == 0; + (It->getOperand(1).getImm() & kBankByteMask) == 0; if (AddrFitsIn16) { MCInst Stz; Stz.setOpcode(W65816::STZ_Abs); @@ -401,6 +444,10 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { if (It != MI->getParent()->end() && It->getOpcode() == W65816::PUSH16) { auto It2 = std::next(It); while (It2 != MI->getParent()->end() && It2->isDebugInstr()) ++It2; + // If PUSH16 is the last MI in the BB we leave the peephole as a + // no-op (conservative): the PUSH chain almost always feeds a JSL + // within the same BB, and proving A-dead at BB exit via successor + // live-in scan is not worth the bookkeeping. bool ADead = false; if (It2 != MI->getParent()->end()) { const TargetRegisterInfo *TRI = @@ -408,13 +455,6 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { if (It2->modifiesRegister(W65816::A, TRI) && !It2->readsRegister(W65816::A, TRI)) ADead = true; - } else { - // PUSH16 is the last instruction in the BB. A is dead at - // BB exit iff it's not live-out. Check the BB's live-out - // set via successors; if no successor lists A as live-in, - // it's safe. Conservative: treat as not-dead (skip peephole). - // This case is uncommon — the PUSH chain almost always feeds - // a JSL within the same BB. } if (ADead) { MCInst Pea; @@ -445,7 +485,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { // hit. We mark the next-SEP-to-skip via a per-AsmPrinter flag // so the SEP visit drops it. MCInst Sep; Sep.setOpcode(W65816::SEP); - Sep.addOperand(MCOperand::createImm(0x20)); + Sep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Sep); MCInst Lda; Lda.setOpcode(W65816::LDA_Imm8); @@ -487,9 +527,9 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { if (It != MI->getParent()->end() && It->getOpcode() == W65816::SEP && It->getNumOperands() >= 1 && It->getOperand(0).isImm() && - It->getOperand(0).getImm() == 0x20) { + It->getOperand(0).getImm() == kPStatusM) { SkipRep = true; - SkipNextSepImm = 0x20; + SkipNextSepImm = static_cast(kPStatusM); } // STA8abs / STA8long don't expose their SEP at MIR — the wrap is // emitted at MC layer. Detect them here so we can elide the @@ -505,7 +545,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { } if (!SkipRep) { MCInst Rep; Rep.setOpcode(W65816::REP); - Rep.addOperand(MCOperand::createImm(0x20)); + Rep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Rep); } return; @@ -533,7 +573,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, Lda); return; } - if ((A & 0xFF0000) != 0) { + if ((A & kBankByteMask) != 0) { MCInst Lda; Lda.setOpcode(W65816::LDA_Long); Lda.addOperand(lowerOperand(AddrOp, MCInstLowering)); @@ -564,7 +604,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, Sta); return; } - if ((A & 0xFF0000) != 0) { + if ((A & kBankByteMask) != 0) { MCInst Sta; Sta.setOpcode(W65816::STA_Long); Sta.addOperand(lowerOperand(AddrOp, MCInstLowering)); @@ -649,7 +689,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { bool IsSub = MI->getOpcode() == W65816::SBCi8imm; // SEP/REP wrap (see LDAi8imm comment). MCInst Sep; Sep.setOpcode(W65816::SEP); - Sep.addOperand(MCOperand::createImm(0x20)); + Sep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Sep); MCInst Carry; Carry.setOpcode(IsSub ? W65816::SEC : W65816::CLC); @@ -660,7 +700,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { Op.addOperand(MCOperand::createImm(Val)); EmitToStreamer(*OutStreamer, Op); MCInst Rep; Rep.setOpcode(W65816::REP); - Rep.addOperand(MCOperand::createImm(0x20)); + Rep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Rep); return; } @@ -682,11 +722,11 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { Op.addOperand(MCOperand::createImm(Val)); // SEP/REP wrap (see LDAi8imm comment). MCInst Sep; Sep.setOpcode(W65816::SEP); - Sep.addOperand(MCOperand::createImm(0x20)); + Sep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Sep); EmitToStreamer(*OutStreamer, Op); MCInst Rep; Rep.setOpcode(W65816::REP); - Rep.addOperand(MCOperand::createImm(0x20)); + Rep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Rep); return; } @@ -696,7 +736,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { // LDA_Long (0xAF, bank-explicit) for const-int MMIO addresses. bool IsLong = MI->getOpcode() == W65816::LDA8long; MCInst Sep; Sep.setOpcode(W65816::SEP); - Sep.addOperand(MCOperand::createImm(0x20)); + Sep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Sep); MCInst Lda; Lda.setOpcode(IsLong ? W65816::LDA_Long : W65816::LDA_Abs); @@ -709,7 +749,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { Lda.addOperand(Addr); EmitToStreamer(*OutStreamer, Lda); MCInst Rep; Rep.setOpcode(W65816::REP); - Rep.addOperand(MCOperand::createImm(0x20)); + Rep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Rep); return; } @@ -717,14 +757,14 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { // i8 indexed-global load: SEP #0x20 ; LDA , X ; REP #0x20 // X holds the index (set up by CopyToReg before this MI). MCInst Sep; Sep.setOpcode(W65816::SEP); - Sep.addOperand(MCOperand::createImm(0x20)); + Sep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Sep); MCInst Lda; Lda.setOpcode(W65816::LDA_AbsX); Lda.addOperand(lowerOperand(MI->getOperand(0), MCInstLowering)); EmitToStreamer(*OutStreamer, Lda); MCInst Rep; Rep.setOpcode(W65816::REP); - Rep.addOperand(MCOperand::createImm(0x20)); + Rep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Rep); return; } @@ -732,14 +772,14 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { // i8 indexed-global store: SEP #0x20 ; STA , X ; REP #0x20 // A holds the value, X holds the index. MCInst Sep; Sep.setOpcode(W65816::SEP); - Sep.addOperand(MCOperand::createImm(0x20)); + Sep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Sep); MCInst Sta; Sta.setOpcode(W65816::STA_AbsX); Sta.addOperand(lowerOperand(MI->getOperand(0), MCInstLowering)); EmitToStreamer(*OutStreamer, Sta); MCInst Rep; Rep.setOpcode(W65816::REP); - Rep.addOperand(MCOperand::createImm(0x20)); + Rep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Rep); return; } @@ -764,7 +804,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { SkipNextSta8Wrap = false; if (!UsesAcc8 && !SkipOpenSep) { MCInst Sep; Sep.setOpcode(W65816::SEP); - Sep.addOperand(MCOperand::createImm(0x20)); + Sep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Sep); } MCInst Sta; @@ -784,7 +824,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { EmitToStreamer(*OutStreamer, Sta); if (!UsesAcc8) { MCInst Rep; Rep.setOpcode(W65816::REP); - Rep.addOperand(MCOperand::createImm(0x20)); + Rep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Rep); } return; @@ -825,7 +865,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { // i8 immediate compare — needs M=1 so the CPU only reads 1 byte // for the immediate. See LDAi8imm comment for the wrap rationale. MCInst Sep; Sep.setOpcode(W65816::SEP); - Sep.addOperand(MCOperand::createImm(0x20)); + Sep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Sep); MCInst Cmp; Cmp.setOpcode(W65816::CMP_Imm8); @@ -833,7 +873,7 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { Cmp.addOperand(MCOperand::createImm(Val)); EmitToStreamer(*OutStreamer, Cmp); MCInst Rep; Rep.setOpcode(W65816::REP); - Rep.addOperand(MCOperand::createImm(0x20)); + Rep.addOperand(MCOperand::createImm(kPStatusM)); EmitToStreamer(*OutStreamer, Rep); return; } @@ -965,12 +1005,12 @@ void W65816AsmPrinter::emitInstruction(const MachineInstr *MI) { // Size is in A on entry — but we need A=SP after TSC, so first // stash the size to DP scratch. MCInst Sta1; Sta1.setOpcode(W65816::STA_DP); - Sta1.addOperand(MCOperand::createImm(0xE0)); + Sta1.addOperand(MCOperand::createImm(kDpScratch0)); EmitToStreamer(*OutStreamer, Sta1); MCInst Tsc; Tsc.setOpcode(W65816::TSC); EmitToStreamer(*OutStreamer, Tsc); MCInst Sec; Sec.setOpcode(W65816::SEC); EmitToStreamer(*OutStreamer, Sec); MCInst Sbc; Sbc.setOpcode(W65816::SBC_DP); - Sbc.addOperand(MCOperand::createImm(0xE0)); + Sbc.addOperand(MCOperand::createImm(kDpScratch0)); EmitToStreamer(*OutStreamer, Sbc); MCInst Tcs; Tcs.setOpcode(W65816::TCS); EmitToStreamer(*OutStreamer, Tcs); MCInst Ina; Ina.setOpcode(W65816::INA); EmitToStreamer(*OutStreamer, Ina); diff --git a/src/llvm/lib/Target/W65816/W65816BranchExpand.cpp b/src/llvm/lib/Target/W65816/W65816BranchExpand.cpp index fd6b5c0..e23591e 100644 --- a/src/llvm/lib/Target/W65816/W65816BranchExpand.cpp +++ b/src/llvm/lib/Target/W65816/W65816BranchExpand.cpp @@ -162,8 +162,7 @@ static unsigned estimateDistance(MachineFunction &MF, // sliced after each non-final conditional, so every MBB ends up with // at most one conditional terminator. Returns true if any MBB was // split. -static bool splitMultiBranchMBBs(MachineFunction &MF, - const TargetInstrInfo *TII) { +static bool splitMultiBranchMBBs(MachineFunction &MF) { bool Changed = false; // Snapshot MBBs first (we mutate the list during iteration). SmallVector MBBs; @@ -233,7 +232,6 @@ static bool splitMultiBranchMBBs(MachineFunction &MF, // see if another split is needed (multi-multi-branch case). Changed = true; Sliced = true; - (void)TII; // unused for now } } return Changed; @@ -354,7 +352,7 @@ bool W65816BranchExpand::runOnMachineFunction(MachineFunction &MF) { AnyChanged |= dropDeadConditionalsToBRATarget(MF); // Step 1: split multi-conditional-terminator MBBs. - AnyChanged |= splitMultiBranchMBBs(MF, TII); + AnyChanged |= splitMultiBranchMBBs(MF); // Step 2: iterate to fixed-point. Each expansion adds 3 bytes // (bridge BRA), which may push another previously-OK branch over diff --git a/src/llvm/lib/Target/W65816/W65816I32IncFold.cpp b/src/llvm/lib/Target/W65816/W65816I32IncFold.cpp index b6bab79..3b7dfdb 100644 --- a/src/llvm/lib/Target/W65816/W65816I32IncFold.cpp +++ b/src/llvm/lib/Target/W65816/W65816I32IncFold.cpp @@ -68,10 +68,6 @@ char W65816I32IncFold::ID = 0; INITIALIZE_PASS(W65816I32IncFold, DEBUG_TYPE, "W65816 i32 += 1 fold", false, false) -namespace llvm { -void initializeW65816I32IncFoldPass(PassRegistry &); -} - // Match the 6-instruction sequence; returns the post-pattern iterator // and fills in the lo/hi stack-rel offsets if the pattern matches. // Tolerates intervening TAX/TXA pairs (which regalloc inserts as diff --git a/src/llvm/lib/Target/W65816/W65816ImgCalleeSave.cpp b/src/llvm/lib/Target/W65816/W65816ImgCalleeSave.cpp index 121ceeb..4560c1a 100644 --- a/src/llvm/lib/Target/W65816/W65816ImgCalleeSave.cpp +++ b/src/llvm/lib/Target/W65816/W65816ImgCalleeSave.cpp @@ -87,10 +87,6 @@ char W65816ImgCalleeSave::ID = 0; INITIALIZE_PASS(W65816ImgCalleeSave, DEBUG_TYPE, "W65816 IMG8..IMG15 callee save/restore", false, false) -namespace llvm { -void initializeW65816ImgCalleeSavePass(PassRegistry &); -} - FunctionPass *llvm::createW65816ImgCalleeSave() { return new W65816ImgCalleeSave(); } @@ -188,7 +184,7 @@ bool W65816ImgCalleeSave::runOnMachineFunction(MachineFunction &MF) { // // copyPhysReg lowers `COPY $imgN = $a` to `STA_DP imm:0xCx`, so we // check both the physreg-DEF form AND the DP-imm-store form. - bool WrittenSlot[8] = {false}; + bool UsedSlot[8] = {false}; bool AnyWritten = false; for (auto &MBB : MF) { for (auto &MI : MBB) { @@ -197,7 +193,7 @@ bool W65816ImgCalleeSave::runOnMachineFunction(MachineFunction &MF) { if (!MO.isReg() || MO.getReg() == 0 || !MO.isDef()) continue; int idx = classifyImgReg(MO.getReg()); if (idx >= 0) { - WrittenSlot[idx] = true; + UsedSlot[idx] = true; AnyWritten = true; } } @@ -205,15 +201,12 @@ bool W65816ImgCalleeSave::runOnMachineFunction(MachineFunction &MF) { auto [idx, mode] = classifyDpImmAsImg(MI); if (idx >= 0 && (mode == DpAccess::Write || mode == DpAccess::ReadWrite)) { - WrittenSlot[idx] = true; + UsedSlot[idx] = true; AnyWritten = true; } } } if (!AnyWritten) return false; - // Rename for downstream Step 2/3/4 readability — they use UsedSlot. - bool (&UsedSlot)[8] = WrittenSlot; - (void)AnyWritten; // Step 2: allocate one frame slot per used IMG. Size = 2 bytes (each // Img16 holds a 16-bit value). Mark as a spill slot so PEI accounts diff --git a/src/llvm/lib/Target/W65816/W65816Layer2Gate.cpp b/src/llvm/lib/Target/W65816/W65816Layer2Gate.cpp index 37da391..975ec85 100644 --- a/src/llvm/lib/Target/W65816/W65816Layer2Gate.cpp +++ b/src/llvm/lib/Target/W65816/W65816Layer2Gate.cpp @@ -215,14 +215,10 @@ namespace llvm { class W65816Layer2StampPass : public PassInfoMixin { public: PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) { - bool Changed = false; for (Function &F : M) { - Changed |= stampFunction(F); + stampFunction(F); } - if (!Changed) { - return PreservedAnalyses::all(); - } - // We only added a function attribute, no IR-level effects. Preserve + // We only add a function attribute, no IR-level effects. Preserve // everything; the inliner et al. will copy the attribute on inline. return PreservedAnalyses::all(); } diff --git a/src/llvm/lib/Target/W65816/W65816NarrowI32Mul.cpp b/src/llvm/lib/Target/W65816/W65816NarrowI32Mul.cpp index f2e1292..490a4bc 100644 --- a/src/llvm/lib/Target/W65816/W65816NarrowI32Mul.cpp +++ b/src/llvm/lib/Target/W65816/W65816NarrowI32Mul.cpp @@ -189,7 +189,6 @@ bool W65816NarrowI32Mul::runOnFunction(Function &F) { // low-16 bits as the original i32 add at every observable point // (the back-edge value can wrap on the exit iteration but is // never observed — exit takes the trip-end branch first). - bool NarrowedAny = false; SmallVector PhiWorklist; for (BasicBlock &BB : F) { for (PHINode &PN : BB.phis()) { @@ -282,7 +281,6 @@ bool W65816NarrowI32Mul::runOnFunction(Function &F) { Incr->replaceAllUsesWith(UndefValue::get(Incr->getType())); Incr->eraseFromParent(); PN->eraseFromParent(); - NarrowedAny = true; } return true; } diff --git a/src/llvm/lib/Target/W65816/W65816NegYIndY.cpp b/src/llvm/lib/Target/W65816/W65816NegYIndY.cpp index dd7fc82..1dfcf48 100644 --- a/src/llvm/lib/Target/W65816/W65816NegYIndY.cpp +++ b/src/llvm/lib/Target/W65816/W65816NegYIndY.cpp @@ -41,6 +41,13 @@ using namespace llvm; +// DP scratch byte used to park X when the negative-Y inserter needs to +// route through TAX/TXA. Lives in the project-wide $E0..$DF scratch +// range; $E0 is reserved for ADJCALLSTACKUP's A-preserve so we use +// $E2 here. Coordinate with W65816AsmPrinter / W65816ISelLowering / +// W65816RegisterInfo if the layout changes. +static constexpr unsigned kDpScratchX = 0xE2; + #define DEBUG_TYPE "w65816-neg-y-indy" namespace { @@ -110,9 +117,9 @@ bool W65816NegYIndY::runOnMachineFunction(MachineFunction &MF) { if (XLive || xDef) break; } if (XLive) { - // Save X to DP $E2 (don't use $E0 — that's the A-preserve - // slot in call-frame teardown and may be live). - BuildMI(MBB, MI, DL, TII->get(W65816::STX_DP)).addImm(0xE2); + // Save X to DP kDpScratchX ($E2) -- $E0 is reserved as the + // A-preserve slot in call-frame teardown and may be live. + BuildMI(MBB, MI, DL, TII->get(W65816::STX_DP)).addImm(kDpScratchX); } if (IsLDA) { // LDA disp,S ; CLC ; ADC #neg ; TAX ; LDA $0000,X @@ -154,7 +161,7 @@ bool W65816NegYIndY::runOnMachineFunction(MachineFunction &MF) { } if (XLive) { // Restore X from DP $E2. - BuildMI(MBB, MI, DL, TII->get(W65816::LDX_DP)).addImm(0xE2); + BuildMI(MBB, MI, DL, TII->get(W65816::LDX_DP)).addImm(kDpScratchX); } // Erase original LDY and the (sr,s),Y op. if (LastLDY) { LastLDY->eraseFromParent(); LastLDY = nullptr; } diff --git a/src/llvm/lib/Target/W65816/W65816PromoteFiToImg.cpp b/src/llvm/lib/Target/W65816/W65816PromoteFiToImg.cpp index dd6c65f..a75b41a 100644 --- a/src/llvm/lib/Target/W65816/W65816PromoteFiToImg.cpp +++ b/src/llvm/lib/Target/W65816/W65816PromoteFiToImg.cpp @@ -99,64 +99,29 @@ FunctionPass *llvm::createW65816PromoteFiToImg() { } -// Returns the operand index of the FrameIndex for the given FI pseudo -// opcode, or -1 if this opcode isn't a promotable FI carrier. -static int getFiOperandIdx(unsigned Opc) { - switch (Opc) { - case W65816::LDAfi: return 1; - case W65816::STAfi: return 1; - case W65816::CMPfi: return 1; - case W65816::ADCfi: - case W65816::SBCfi: - case W65816::ANDfi: - case W65816::ORAfi: - case W65816::EORfi: return 2; - default: return -1; - } -} - - -// Map a promotable FI pseudo to the corresponding DP MC opcode. -static unsigned getDpOpcode(unsigned Opc) { - switch (Opc) { - case W65816::LDAfi: return W65816::LDA_DP; - case W65816::STAfi: return W65816::STA_DP; - case W65816::CMPfi: return W65816::CMP_DP; - case W65816::ADCfi: return W65816::ADC_DP; - case W65816::SBCfi: return W65816::SBC_DP; - case W65816::ANDfi: return W65816::AND_DP; - case W65816::ORAfi: return W65816::ORA_DP; - case W65816::EORfi: return W65816::EOR_DP; - default: return 0; - } -} - - -// IMG8..IMG15 sit at DP addresses 0xC0, 0xC2, ..., 0xCE. IMG0..IMG7 -// are at 0xD0..0xDE. Returns the DP byte for IMGn. -static uint8_t dpAddrForImg(unsigned ImgIdx) { - assert(ImgIdx < 16 && "IMG index out of range"); - if (ImgIdx < 8) return 0xD0 + 2 * ImgIdx; - return 0xC0 + 2 * (ImgIdx - 8); -} - - bool W65816PromoteFiToImg::runOnMachineFunction(MachineFunction &MF) { - // DISABLED again 2026-05-13 (3rd-attempt write-up). Two new findings: + // DISABLED 2026-05-13 (3rd-attempt write-up). Two findings blocked + // re-enable: // 1. With kMaxPromote=2 and IMG0..7 (caller-save, skip ImgCalleeSave), - // sumSquares regressed 56 → 72 inst because the FIs picked by - // access-count (fi#2, fi#3) are intermediate spill temps, not - // the i32-accumulator's halves (which are different FIs). The - // loop body ends up using BOTH IMG and stack slots for related - // values. - // 2. To pick the RIGHT FIs (those corresponding to PHI-cycled - // values like the i32 accumulator), we need either: - // (a) IR-level analysis BEFORE FI assignment, or - // (b) Post-RA dataflow analysis to identify "long-lived" FIs - // (active across the loop back-edge with no def/use boundary). - // This is the next blocker. Disabled until either (a) or (b) is - // implemented. + // sumSquares regressed 56 -> 72 inst because the FIs picked by + // access-count are intermediate spill temps, not the i32-accumulator + // halves (which are different FIs). Loop body ends up using BOTH + // IMG and stack slots for related values. + // 2. To pick the RIGHT FIs (those corresponding to PHI-cycled values + // like the i32 accumulator), we need either IR-level analysis + // BEFORE FI assignment, or post-RA dataflow analysis to identify + // long-lived FIs (active across the loop back-edge with no def/use + // boundary). + // The pass framework is retained so the pipeline slot stays documented; + // see git history for the disabled prototype body. + (void)MF; return false; +} + + +#if 0 +// Disabled prototype body retained for reference; see comment above. +bool W65816PromoteFiToImg::runOnMachineFunctionDisabled(MachineFunction &MF) { if (skipFunction(MF.getFunction())) return false; const W65816Subtarget &STI = MF.getSubtarget(); const W65816InstrInfo *TII = STI.getInstrInfo(); @@ -396,3 +361,4 @@ bool W65816PromoteFiToImg::runOnMachineFunction(MachineFunction &MF) { } return Changed; } +#endif diff --git a/src/llvm/lib/Target/W65816/W65816SepRepCleanup.cpp b/src/llvm/lib/Target/W65816/W65816SepRepCleanup.cpp index 9b64457..72f55ec 100644 --- a/src/llvm/lib/Target/W65816/W65816SepRepCleanup.cpp +++ b/src/llvm/lib/Target/W65816/W65816SepRepCleanup.cpp @@ -50,6 +50,9 @@ using namespace llvm; #define DEBUG_TYPE "w65816-sep-rep-cleanup" +// W65816 processor status M-bit mask (set/clear via SEP/REP #$20). +static constexpr int kMBit = 0x20; + namespace { class W65816SepRepCleanup : public MachineFunctionPass { @@ -276,7 +279,7 @@ bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) { for (auto It = MBB.begin(); It != MBB.end(); ++It) { if (It->getOpcode() != W65816::SEP) continue; if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) continue; - if (It->getOperand(0).getImm() != 0x20) continue; + if (It->getOperand(0).getImm() != kMBit) continue; // Walk forward looking for LDAi8imm before any STAfi_indY // or REP at this nesting level. auto Walker = std::next(It); @@ -312,7 +315,7 @@ bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) { if (Back->getOpcode() == W65816::SEP && Back->getNumOperands() >= 1 && Back->getOperand(0).isImm() && - Back->getOperand(0).getImm() == 0x20) { + Back->getOperand(0).getImm() == kMBit) { OuterSep = &*Back; break; } @@ -409,7 +412,7 @@ bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) { if (Op1 != W65816::REP && Op1 != W65816::SEP) continue; if (It->getNumOperands() < 1 || !It->getOperand(0).isImm()) continue; int Imm1 = It->getOperand(0).getImm(); - if (Imm1 != 0x20) continue; // M-bit only + if (Imm1 != kMBit) continue; // M-bit only // Walk forward across mode-neutral ops looking for the matching // opposite toggle. Bail at calls, asm, ALU ops on A, etc. unsigned WantOp = (Op1 == W65816::REP) ? W65816::SEP : W65816::REP; @@ -1119,361 +1122,12 @@ bool W65816SepRepCleanup::runOnMachineFunction(MachineFunction &MF) { } } - // Store forwarding (disabled — CRC32 regressed and I couldn't - // nail down the safety hole in time). Even with PHP-wrap guards - // and SP-modifier bails, the first fire (in memmove) silently - // miscompiles something that CRC32 later depends on. Pattern - // is sound; safety analysis isn't complete. See - // feedback_close_gap_attempts_round2.md for details. - #if 0 - // Store forwarding for PHI memory copies. Pattern (sumSquares - // loop body): - // - // STA X,s ; A → slot X (some intermediate result) - // [code that modifies A but doesn't touch slot X or slot Y] - // LDA X,s ; reload A from slot X - // STA Y,s ; A → slot Y (the PHI copy) - // - // Transform: insert `STA Y,s` right after the first `STA X,s` (A - // still holds the same value at that point), then drop the LDA- - // STA pair. Net: -1 inst per pattern occurrence. - // - // Safety constraints (all between STA X and the LDA-STA pair, in - // the same MBB, in straight-line code): - // - No instruction writes slot X (else the LDA would see a - // different value than the original STA). - // - No instruction reads OR writes slot Y (else our early STA Y - // would be observed mid-flight with a different value than - // before, or our inserted store would be overwritten and the - // intervening read of Y in the original would have seen the - // overwrite). - // - No call / inline asm / branch (conservatively: those can - // touch memory we don't model). - { - auto isStackRelMC2 = [](unsigned Op) { - return Op == W65816::LDA_StackRel || Op == W65816::STA_StackRel || - Op == W65816::ADC_StackRel || Op == W65816::SBC_StackRel || - Op == W65816::AND_StackRel || Op == W65816::ORA_StackRel || - Op == W65816::EOR_StackRel || Op == W65816::CMP_StackRel; - }; - auto srAccess2 = [&](const MachineInstr &MI, int64_t &Off) -> bool { - if (!isStackRelMC2(MI.getOpcode())) return false; - if (MI.getNumOperands() < 1 || !MI.getOperand(0).isImm()) return false; - Off = MI.getOperand(0).getImm(); - return true; - }; - auto isStaSr = [](const MachineInstr &MI) { - return MI.getOpcode() == W65816::STA_StackRel; - }; - auto isLdaSr = [](const MachineInstr &MI) { - return MI.getOpcode() == W65816::LDA_StackRel; - }; - SmallVector ToErase; - SmallVector, 4> ToInsert; - static int g_fireLimit = -1; - static int g_fireCount = 0; - static bool initd = false; - if (!initd) { - if (const char *e = getenv("STORE_FWD_LIMIT")) g_fireLimit = atoi(e); - initd = true; - } - for (MachineBasicBlock &MBB : MF) { - for (auto It = MBB.begin(); It != MBB.end(); ++It) { - if (!isStaSr(*It)) continue; - int64_t X; - if (!srAccess2(*It, X)) continue; - MachineInstr *StaX = &*It; - // Check if StaX is INSIDE an open PHP/PLP wrap. In that case - // its operand offset has been pre-bumped by +1, and inserting - // a sibling STA Y immediately after writes at the WRONG slot - // (the un-bumped Y). Walk backward: if we find a PHP without - // a matching PLP first, bail. - { - bool insideWrap = false; - int depth = 0; - auto B = It; - while (B != MBB.begin()) { - --B; - if (B->getOpcode() == W65816::PLP) depth++; - else if (B->getOpcode() == W65816::PHP) { - if (depth > 0) depth--; - else { insideWrap = true; break; } - } - } - if (insideWrap) continue; - } - // Walk forward looking for LDA X ; STA Y. Conservative bail - // on any non-tracked memory op (indirect pointer access, - // DP/abs ops, etc.) which could alias slot Y via memory. - bool ok = true; - int64_t Y = -1; - MachineInstr *LdaX = nullptr; - MachineInstr *StaY = nullptr; - for (auto Walker = std::next(It); Walker != MBB.end(); ++Walker) { - if (Walker->isDebugInstr()) continue; - if (Walker->isCall() || Walker->isInlineAsm() || - Walker->isBranch() || Walker->isReturn()) { - ok = false; break; - } - // Found LDA X? - int64_t Off; - if (isLdaSr(*Walker) && srAccess2(*Walker, Off) && Off == X) { - LdaX = &*Walker; - auto Next = std::next(Walker); - while (Next != MBB.end() && Next->isDebugInstr()) ++Next; - if (Next == MBB.end() || !isStaSr(*Next) || - !srAccess2(*Next, Y) || Y == X) { - ok = false; - } else { - StaY = &*Next; - } - break; - } - // Stack-rel access to X (write or read): bail. - if (srAccess2(*Walker, Off) && Off == X) { - ok = false; break; - } - // Any memory-touching op that's NOT a tracked stack-rel - // access — bail. Indirect pointer stores/loads (DPIndY / - // DPIndLong / abs / etc.) could alias slot Y via a pointer - // we can't trace, and the safety check below would miss it. - if ((Walker->mayLoad() || Walker->mayStore()) && - !isStackRelMC2(Walker->getOpcode())) { - ok = false; break; - } - // SP-modifying ops shift the stack-rel addressing window — - // a later `lda X, s` reads a DIFFERENT byte than the earlier - // `sta X, s` (or worse, the new stack pointer points into - // saved P/retaddr). Bail on TCS (direct SP write) and on - // any stack push/pop (PHx/PLx/PEA/PEI/COP/BRK). Also bail - // on PHP/PLP because the wrap pass already bumped in-wrap - // stack-rel ops by +1 — our inserted STA after STA X writes - // at the un-bumped offset which gets the WRONG slot. - { - unsigned WO = Walker->getOpcode(); - if (WO == W65816::TCS || WO == W65816::PHA || - WO == W65816::PLA || WO == W65816::PHX || - WO == W65816::PLX || WO == W65816::PHY || - WO == W65816::PLY || WO == W65816::PHP || - WO == W65816::PLP || WO == W65816::PHB || - WO == W65816::PLB || WO == W65816::PHD || - WO == W65816::PLD || WO == W65816::PHK || - WO == W65816::PEA || WO == W65816::PEI_DP) { - ok = false; break; - } - } - } - if (!ok || !LdaX || !StaY) continue; - if (g_fireLimit >= 0 && g_fireCount >= g_fireLimit) continue; - g_fireCount++; - errs() << "SF FIRE " << g_fireCount << " in " << MF.getName() - << " MBB " << MBB.getNumber() - << " X=" << X << " Y=" << StaY->getOperand(0).getImm() - << "\n"; - // Now re-walk from std::next(It) up to LdaX and verify no - // access to slot Y in that gap. - ok = true; - for (auto W2 = std::next(It); W2 != LdaX->getIterator(); ++W2) { - if (W2->isDebugInstr()) continue; - int64_t Off; - if (srAccess2(*W2, Off) && Off == Y) { ok = false; break; } - } - if (!ok) continue; - // Safe to apply: schedule the StaY-after-StaX insert, and - // erase LdaX and StaY. - ToInsert.push_back({StaX, Y}); - ToErase.push_back(LdaX); - ToErase.push_back(StaY); - Changed = true; - } - } - // Apply (insertions first; iterators stay valid through erase). - for (auto &P : ToInsert) { - MachineInstr *StaX = std::get<0>(P); - int64_t Y = std::get<1>(P); - MachineBasicBlock *MBB = StaX->getParent(); - DebugLoc DL = StaX->getDebugLoc(); - auto NextIt = std::next(StaX->getIterator()); - BuildMI(*MBB, NextIt, DL, TII.get(W65816::STA_StackRel)) - .addImm(Y); - } - for (MachineInstr *MI : ToErase) MI->eraseFromParent(); - } - #endif - // (Redundant CMP #0 elimination — disabled, hit VLA sum_n - // regression. Carry-flag bookkeeping across the CMP turned out to - // have more cases than my forward-walk modeled. See - // feedback_cmp_zero_elim.md.) - #if 0 - { - auto isNZSetOnA = [](unsigned Op) { - switch (Op) { - case W65816::DEA_PSEUDO: case W65816::INA_PSEUDO: - case W65816::ADC_StackRel: case W65816::ADC_DP: case W65816::ADC_Imm16: - case W65816::SBC_StackRel: case W65816::SBC_DP: case W65816::SBC_Imm16: - case W65816::AND_StackRel: case W65816::AND_DP: case W65816::AND_Imm16: - case W65816::ORA_StackRel: case W65816::ORA_DP: case W65816::ORA_Imm16: - case W65816::EOR_StackRel: case W65816::EOR_DP: case W65816::EOR_Imm16: - case W65816::LDA_StackRel: case W65816::LDA_DP: - case W65816::LDAi16imm: case W65816::LDA_Imm16: - case W65816::TXA: case W65816::TYA: - case W65816::ADCi16imm: case W65816::ADCEi16imm: - case W65816::SBCi16imm: case W65816::SBCEi16imm: - return true; - default: - return false; - } - }; - auto isCmpZero = [](const MachineInstr &MI) { - if (MI.getOpcode() != W65816::CMPi16imm) return false; - // Operand layout: lhs (Acc16), imm. Find the imm. - for (const MachineOperand &MO : MI.operands()) { - if (MO.isImm()) return MO.getImm() == 0; - } - return false; - }; - auto modifiesA = [](const MachineInstr &MI) { - for (const MachineOperand &MO : MI.operands()) { - if (MO.isReg() && MO.getReg() == W65816::A && MO.isDef()) - return true; - } - return false; - }; - auto readsC = [](const MachineInstr &MI) { - // We don't model individual flag bits; approximate by checking - // if the MI reads $p AND is one of the carry-consuming ops. - unsigned Op = MI.getOpcode(); - switch (Op) { - case W65816::ADC_StackRel: case W65816::ADC_DP: case W65816::ADC_Imm16: - case W65816::SBC_StackRel: case W65816::SBC_DP: case W65816::SBC_Imm16: - case W65816::ADCEi16imm: case W65816::SBCEi16imm: - case W65816::BCC: case W65816::BCS: - case W65816::ROL_A: case W65816::ROR_A: - return true; - default: - return false; - } - }; - SmallVector CmpsToErase; - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - if (!isCmpZero(MI)) continue; - // Walk backward, skipping flag-preserving instructions. - bool foundProducer = false; - auto Back = MI.getIterator(); - while (Back != MBB.begin()) { - --Back; - if (Back->isDebugInstr()) continue; - if (Back->isCall() || Back->isInlineAsm()) break; - if (modifiesA(*Back)) { - foundProducer = isNZSetOnA(Back->getOpcode()); - break; - } - bool defsP = false; - for (const MachineOperand &MO : Back->operands()) { - if (MO.isReg() && MO.getReg() == W65816::P && MO.isDef()) { - defsP = true; break; - } - } - if (defsP) break; - } - if (!foundProducer) continue; - // Walk FORWARD from CMP: until the next C-defining MI, no MI - // reads C. - bool cConsumed = false; - for (auto Fwd = std::next(MI.getIterator()); Fwd != MBB.end(); ++Fwd) { - if (Fwd->isDebugInstr()) continue; - if (readsC(*Fwd)) { cConsumed = true; break; } - // Next def of $p: subsequent reads aren't ours. - bool defsP = false; - for (const MachineOperand &MO : Fwd->operands()) { - if (MO.isReg() && MO.getReg() == W65816::P && MO.isDef()) { - defsP = true; break; - } - } - if (defsP) break; - } - if (cConsumed) continue; - CmpsToErase.push_back(&MI); - } - } - for (MachineInstr *MI : CmpsToErase) MI->eraseFromParent(); - if (!CmpsToErase.empty()) Changed = true; - } - #endif - // (Narrow PHI-copy slot collapse — disabled, qsort regression.) - #if 0 - { - auto isStackRelMC2 = [](unsigned Op) { - return Op == W65816::LDA_StackRel || Op == W65816::STA_StackRel || - Op == W65816::ADC_StackRel || Op == W65816::SBC_StackRel || - Op == W65816::AND_StackRel || Op == W65816::ORA_StackRel || - Op == W65816::EOR_StackRel || Op == W65816::CMP_StackRel; - }; - auto srAccess2 = [&](const MachineInstr &MI, int64_t &Off) { - if (!isStackRelMC2(MI.getOpcode())) return false; - if (MI.getNumOperands() < 1 || !MI.getOperand(0).isImm()) return false; - Off = MI.getOperand(0).getImm(); - return true; - }; - DenseMap Refs; - DenseMap StaInst, LdaInst; - DenseMap NSta, NLda; - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB) { - int64_t Off; - if (!srAccess2(MI, Off)) continue; - Refs[Off]++; - if (MI.getOpcode() == W65816::STA_StackRel) { - NSta[Off]++; StaInst[Off] = &MI; - } else if (MI.getOpcode() == W65816::LDA_StackRel) { - NLda[Off]++; LdaInst[Off] = &MI; - } - } - } - SmallVector ToErase; - for (auto &P : Refs) { - int64_t X = P.first; - if (P.second != 2) continue; // exactly 2 references - if (NSta[X] != 1 || NLda[X] != 1) continue; - MachineInstr *Sta = StaInst[X]; - MachineInstr *Lda = LdaInst[X]; - if (Sta->getParent() != Lda->getParent()) continue; - MachineBasicBlock *MBB = Sta->getParent(); - // Sta must be before Lda. - bool staBefore = false; - for (auto It = MBB->begin(); It != MBB->end(); ++It) { - if (&*It == Sta) { staBefore = true; break; } - if (&*It == Lda) break; - } - if (!staBefore) continue; - // Next after Lda must be STA Y where Y != X. - auto NextIt = std::next(Lda->getIterator()); - while (NextIt != MBB->end() && NextIt->isDebugInstr()) ++NextIt; - if (NextIt == MBB->end()) continue; - int64_t Y; - if (NextIt->getOpcode() != W65816::STA_StackRel || - !srAccess2(*NextIt, Y) || Y == X) continue; - // Between Sta and Lda, no read/write of slot Y, no call, no - // anything that would re-set slot Y's value mid-flight. - bool ok = true; - for (auto It = std::next(Sta->getIterator()); It != Lda->getIterator(); - ++It) { - if (It->isDebugInstr()) continue; - if (It->isCall() || It->isInlineAsm()) { ok = false; break; } - int64_t Off; - if (srAccess2(*It, Off) && Off == Y) { ok = false; break; } - } - if (!ok) continue; - // Redirect the original STA to write to Y; delete the LDA-STA pair. - Sta->getOperand(0).setImm(Y); - ToErase.push_back(Lda); - ToErase.push_back(&*NextIt); - Changed = true; - } - for (MachineInstr *MI : ToErase) MI->eraseFromParent(); - } - #endif + // Three prototype peepholes were tried here and removed once shown + // to regress benchmarks; design notes in + // feedback_close_gap_attempts_round2.md / feedback_cmp_zero_elim.md: + // - PHI store-forwarding (CRC32 regression / memmove safety hole). + // - Redundant CMP #0 elimination (VLA sum_n carry-flag bookkeeping). + // - Narrow PHI-copy slot collapse (qsort regression). return Changed; } diff --git a/src/llvm/lib/Target/W65816/W65816SpillToX.cpp b/src/llvm/lib/Target/W65816/W65816SpillToX.cpp index 765976a..7f14eb4 100644 --- a/src/llvm/lib/Target/W65816/W65816SpillToX.cpp +++ b/src/llvm/lib/Target/W65816/W65816SpillToX.cpp @@ -127,7 +127,7 @@ static bool touchesX(const MachineInstr &MI, const TargetRegisterInfo *TRI) { return xEffect(MI, TRI) != XNone; } -// Returns true if MI is `STAfi $a, slot, 0`. +// Returns FI if MI is `STAfi $a, slot, 0`, else -1. static int matchSTAfi(const MachineInstr &MI) { if (MI.getOpcode() != W65816::STAfi) return -1; if (MI.getNumOperands() < 3) return -1; diff --git a/src/llvm/lib/Target/W65816/W65816StackRelToImg.cpp b/src/llvm/lib/Target/W65816/W65816StackRelToImg.cpp index 073905a..e2a9524 100644 --- a/src/llvm/lib/Target/W65816/W65816StackRelToImg.cpp +++ b/src/llvm/lib/Target/W65816/W65816StackRelToImg.cpp @@ -800,33 +800,6 @@ bool W65816StackRelToImg::runOnMachineFunction(MachineFunction &MF) { // unhandled — they can shift SP arbitrarily. Caller must bail. return 0; }; - auto miBailsAnalysis = [](const MachineInstr &MI) -> bool { - // We don't bail on TCS or ADJCALLSTACK*. TCS in prologue/epilogue - // resets SP to a known value (the "canonical" SP for that region); - // since stack-rel accesses don't span TCS in well-formed code (the - // prologue allocates, body uses, epilogue deallocates), treating - // SP as continuing across TCS gives correct relative offsets for - // accesses inside each region. ADJCALLSTACK* aren't usually - // present at pre-emit time (PEI eliminates them or AsmPrinter - // handles). If they're still present, treat as 0 SP-shift — - // the actual PUSH16 ops carry the real shift. - return false; - }; - auto miSpDeltaWithAdj = [&](const MachineInstr &MI) -> int { - if (MI.getOpcode() == W65816::ADJCALLSTACKDOWN || - MI.getOpcode() == W65816::ADJCALLSTACKUP) { - // Skip — the actual PUSH16/PEA/PHA ops inside the call seq - // carry the SP delta. - return 0; - } - if (MI.getOpcode() == W65816::TCS) { - // TCS sets SP; we treat it as a "reset to canonical SP" point. - // Return 0 here; the calling code can do the reset. - return 0; - } - return 0; - }; - (void)miSpDeltaWithAdj; while (!Worklist.empty() && SpAdjValid) { MachineBasicBlock *MBB = Worklist.pop_back_val(); if (!Visited.insert(MBB).second) continue; diff --git a/src/llvm/lib/Target/W65816/W65816StackSlotMerge.cpp b/src/llvm/lib/Target/W65816/W65816StackSlotMerge.cpp index a84f13c..66c09e3 100644 --- a/src/llvm/lib/Target/W65816/W65816StackSlotMerge.cpp +++ b/src/llvm/lib/Target/W65816/W65816StackSlotMerge.cpp @@ -166,20 +166,26 @@ static bool semanticallyDefsA(const MachineInstr &MI) { // Walk backward from MI in its MBB looking for the most recent A-define. // Returns the MI that defines A, or nullptr if none in the same MBB. -// Skips debug instructions. Stops at MBB boundary, calls, branches, -// inline asm. -static MachineInstr *findPriorADef(MachineInstr *MI) { +// Skips debug instructions. When BailOnCall is true, also stops at +// calls / inline asm (used by the Case (3) twin check where call effects +// invalidate the value-equivalence reasoning). +static MachineInstr *findADefBackward(MachineInstr *MI, bool BailOnCall) { MachineBasicBlock *MBB = MI->getParent(); auto It = MI->getIterator(); while (It != MBB->begin()) { --It; if (It->isDebugInstr()) continue; - if (It->isCall() || It->isInlineAsm()) return nullptr; + if (BailOnCall && (It->isCall() || It->isInlineAsm())) return nullptr; if (semanticallyDefsA(*It)) return &*It; } return nullptr; } +// Convenience: Case (3) twin matcher (bails on calls/inline asm). +static MachineInstr *findPriorADef(MachineInstr *MI) { + return findADefBackward(MI, /*BailOnCall=*/true); +} + // Walk forward from `Start` (exclusive) up to (but not including) `End` // in the same MBB, tracking whether slot `WatchSlot` is written. @@ -252,17 +258,9 @@ static bool usesFlagsP(const MachineInstr &MI) { } -// Returns the MOST RECENT A-defining MI strictly before MI in its MBB, -// skipping debug instructions. Returns nullptr if none in the same MBB. +// Convenience: Case (2) twin matcher (does NOT bail on calls/inline asm). static MachineInstr *findMostRecentADef(MachineInstr *MI) { - MachineBasicBlock *MBB = MI->getParent(); - auto It = MI->getIterator(); - while (It != MBB->begin()) { - --It; - if (It->isDebugInstr()) continue; - if (semanticallyDefsA(*It)) return &*It; - } - return nullptr; + return findADefBackward(MI, /*BailOnCall=*/false); } @@ -283,7 +281,6 @@ static MachineInstr *findMostRecentADef(MachineInstr *MI) { static MachineInstr *findTwin(MachineInstr *StaX, ArrayRef StasY) { MachineBasicBlock *MBBStaX = StaX->getParent(); - int64_t XOff = StaX->getOperand(0).getImm(); // Cases (1) + (2): same MBB. for (MachineInstr *StaY : StasY) { if (StaY->getParent() != MBBStaX) continue; @@ -342,7 +339,6 @@ static MachineInstr *findTwin(MachineInstr *StaX, } if (XConst == YConst) return StaY; } - (void)XOff; return nullptr; } diff --git a/src/llvm/lib/Target/W65816/W65816TiedDefSpill.cpp b/src/llvm/lib/Target/W65816/W65816TiedDefSpill.cpp index 3942ad9..a1a0c80 100644 --- a/src/llvm/lib/Target/W65816/W65816TiedDefSpill.cpp +++ b/src/llvm/lib/Target/W65816/W65816TiedDefSpill.cpp @@ -8,10 +8,10 @@ // // Pre-regalloc pass: when a tied-def Acc16 instruction (ADCfi, SBCfi, // ANDfi, ORAfi, EORfi, ADCi16imm, SBCi16imm, ANDi16imm, ORAi16imm, -// EORi16imm, ADCabs, SBCabs, ANDabs, ORAabs, EORabs, INA_PSEUDO, -// DEA_PSEUDO, ASLA16, LSRA16, NEGA16, SHL8A, SRL8A, SRA15A, etc.) has -// a source vreg whose value is *also* needed past the consumer, fast -// regalloc fails to insert the necessary save/restore on its own. +// EORi16imm, ADCabs, SBCabs -- see isTiedAcc16Consumer below for the +// authoritative list) has a source vreg whose value is *also* needed +// past the consumer, fast regalloc fails to insert the necessary +// save/restore on its own. // (Acc16 has exactly one physical register, so the consumer's // tied-def overwrites the source; with multiple consumers/post-uses // the source must be spilled and reloaded.) diff --git a/src/llvm/lib/Target/W65816/W65816UnLSR.cpp b/src/llvm/lib/Target/W65816/W65816UnLSR.cpp index 425c334..5143cb1 100644 --- a/src/llvm/lib/Target/W65816/W65816UnLSR.cpp +++ b/src/llvm/lib/Target/W65816/W65816UnLSR.cpp @@ -59,10 +59,6 @@ using namespace llvm; #define DEBUG_TYPE "w65816-un-lsr" -namespace llvm { -void initializeW65816UnLSRPass(PassRegistry &); -} - namespace { class W65816UnLSR : public FunctionPass { @@ -84,7 +80,6 @@ public: private: bool processLoop(Loop *L); bool processCounterToPtrPHIs(Loop *L); - bool processReturnedCounter(Loop *L); }; } // namespace @@ -107,7 +102,6 @@ bool W65816UnLSR::runOnFunction(Function &F) { for (Loop *L : LI) { Changed |= processLoop(L); Changed |= processCounterToPtrPHIs(L); - // processReturnedCounter remains disabled — see note above. SmallVector Worklist(L->begin(), L->end()); while (!Worklist.empty()) { Loop *Sub = Worklist.pop_back_val(); @@ -120,241 +114,6 @@ bool W65816UnLSR::runOnFunction(Function &F) { } -// strLen-style undo: LSR converts `return p - s` into a counter PHI -// `%lsr.iv` that increments per iter and is returned directly: -// %lsr.iv = phi i16 [-1, %entry], [%lsr.iv.next, %latch] -// %p.0 = phi ptr [%s, %entry], [%incdec.ptr, %latch] -// %incdec.ptr = getelementptr i8, %p.0, i32 1 -// %lsr.iv.next = add i16 %lsr.iv, 1 -// br ..., %exit, %loop -// %exit: -// ret i16 %lsr.iv.next -// -// LSR's reasoning: cheaper to maintain a counter than compute (p - s) -// at exit. On W65816 the opposite is true: counter inc per iter costs -// 5 cyc/iter * N iters; one-time sub at exit costs ~10 cyc total. -// -// This undo finds the counter PHI, verifies its only out-of-loop use -// is via LCSSA → return, finds the sibling pointer PHI with the same -// stride, and replaces the return value with -// `(i16)(p_lcssa - base) + (K_init + 1)`. Erases the counter PHI. -// -// Saves ~5 cyc/iter on strLen-shape loops with a returned counter. -bool W65816UnLSR::processReturnedCounter(Loop *L) { - BasicBlock *Header = L->getHeader(); - BasicBlock *Latch = L->getLoopLatch(); - BasicBlock *Preheader = L->getLoopPreheader(); - if (!Latch || !Preheader) return false; - - // Single-exit loop. - SmallVector ExitBlocks; - L->getExitBlocks(ExitBlocks); - if (ExitBlocks.size() != 1) return false; - BasicBlock *Exit = ExitBlocks[0]; - - // Find a candidate counter PHI: integer, init=ConstantInt, step=+1. - PHINode *CounterPHI = nullptr; - ConstantInt *KInit = nullptr; - BinaryOperator *CounterStep = nullptr; - for (PHINode &PN : Header->phis()) { - if (!PN.getType()->isIntegerTy()) continue; - if (PN.getNumIncomingValues() != 2) continue; - Value *Init = nullptr, *Step = nullptr; - for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i) { - BasicBlock *Pred = PN.getIncomingBlock(i); - if (L->contains(Pred)) Step = PN.getIncomingValue(i); - else Init = PN.getIncomingValue(i); - } - if (!Init || !Step) continue; - auto *InitC = dyn_cast(Init); - if (!InitC) continue; - auto *StepBO = dyn_cast(Step); - if (!StepBO || StepBO->getOpcode() != Instruction::Add) continue; - Value *Other = nullptr; - if (StepBO->getOperand(0) == &PN) Other = StepBO->getOperand(1); - else if (StepBO->getOperand(1) == &PN) Other = StepBO->getOperand(0); - if (!Other) continue; - auto *StepCI = dyn_cast(Other); - if (!StepCI || !StepCI->isOne()) continue; - CounterPHI = &PN; - KInit = InitC; - CounterStep = StepBO; - break; - } - if (!CounterPHI) return false; - - // The counter PHI must be used INSIDE the loop only by its increment - // and OUTSIDE the loop only via an LCSSA PHI in the exit block that - // feeds a return. Same for the increment. - auto isOnlyInLoopUseTheStep = [&](Value *V) { - for (User *U : V->users()) { - auto *UI = dyn_cast(U); - if (!UI) return false; - if (!L->contains(UI)) continue; // out-of-loop is handled separately - if (UI == CounterStep) continue; - // The PHI itself is allowed (V might be CounterStep, used by - // CounterPHI's back-edge incoming). - if (UI == CounterPHI) continue; - return false; - } - return true; - }; - if (!isOnlyInLoopUseTheStep(CounterPHI)) return false; - if (!isOnlyInLoopUseTheStep(CounterStep)) return false; - - // Find a use of CounterPHI or CounterStep that's a ReturnInst. - // The use might be DIRECT (no LCSSA — common after LCSSA cleanup) - // or via an LCSSA PHI in the exit block. - ReturnInst *Ret = nullptr; - Value *RetSource = nullptr; // the value the ret reads - PHINode *ExitLCSSA = nullptr; // optional LCSSA PHI to erase - bool fromNext = false; // true if return source is CounterStep - auto findRet = [&](Value *V, bool isNext) -> bool { - for (User *U : V->users()) { - auto *UI = dyn_cast(U); - if (!UI) continue; - // Skip in-loop uses (those are the counter increment chain). - if (L->contains(UI->getParent())) continue; - if (auto *R = dyn_cast(UI)) { - if (R->getReturnValue() != V) continue; - Ret = R; RetSource = V; fromNext = isNext; return true; - } - // LCSSA PHI in the exit block? - if (auto *PN = dyn_cast(UI)) { - if (PN->getParent() != Exit) continue; - if (PN->getNumIncomingValues() != 1) continue; - if (PN->getIncomingValue(0) != V) continue; - if (!PN->hasOneUse()) continue; - auto *R = dyn_cast(PN->user_back()); - if (!R || R->getReturnValue() != PN) continue; - Ret = R; RetSource = V; fromNext = isNext; ExitLCSSA = PN; - return true; - } - } - return false; - }; - if (!findRet(CounterStep, true) && !findRet(CounterPHI, false)) - return false; - - // Find a sibling pointer PHI: init=Base, latch incoming is a - // `getelementptr i8, %ptr, 1` of itself. - PHINode *PtrPHI = nullptr; - Value *Base = nullptr; - GetElementPtrInst *PtrStep = nullptr; - for (PHINode &PN : Header->phis()) { - if (!PN.getType()->isPointerTy()) continue; - if (PN.getNumIncomingValues() != 2) continue; - Value *Init = nullptr, *Step = nullptr; - for (unsigned i = 0; i < PN.getNumIncomingValues(); ++i) { - BasicBlock *Pred = PN.getIncomingBlock(i); - if (L->contains(Pred)) Step = PN.getIncomingValue(i); - else Init = PN.getIncomingValue(i); - } - if (!Init || !Step) continue; - auto *StepGEP = dyn_cast(Step); - if (!StepGEP) continue; - if (StepGEP->getPointerOperand() != &PN) continue; - if (StepGEP->getNumIndices() != 1) continue; - if (!StepGEP->getSourceElementType()->isIntegerTy(8)) continue; - auto *StrideCI = dyn_cast(StepGEP->getOperand(1)); - if (!StrideCI || !StrideCI->isOne()) continue; - PtrPHI = &PN; - Base = Init; - PtrStep = StepGEP; - break; - } - if (!PtrPHI) return false; - - // The pointer-PHI must have an LCSSA in the exit (so we can compute - // p_lcssa - base). Find it or create one. - PHINode *PtrLCSSA = nullptr; - for (PHINode &EPN : Exit->phis()) { - if (EPN.getNumIncomingValues() != 1) continue; - if (EPN.getIncomingValue(0) == PtrPHI) { - PtrLCSSA = &EPN; break; - } - } - if (!PtrLCSSA) { - // Create LCSSA for PtrPHI. - IRBuilder<> B(&Exit->front()); - PtrLCSSA = B.CreatePHI(PtrPHI->getType(), 1, "unlsr.p.lcssa"); - PtrLCSSA->addIncoming(PtrPHI, Latch); - } - - // Build replacement value: (i16)(p_lcssa - base) + (K_init + (fromNext ? 1 : 0)) - // For fromNext=true (returning %counter.next): value = K_init + iters - // p_lcssa - base = iters (in bytes, stride 1) → value = K_init + (p_lcssa - base) - // But we want: counter.next at exit = K_init + iters; and p_lcssa - base = iters. - // So replacement = (i16)(p_lcssa - base) + K_init. - // For strLen: K_init = -1; iters at exit = K (where ret = K - 1 + 1 = K) - // Wait let me re-derive. counter init = -1. iter 1 entry: counter = -1. - // iter 1 exit: counter.next = 0. Suppose exit-iter is iter K. Then at - // iter K's icmp-true, counter.next = -1 + K. - // And p_lcssa = base + (K - 1) (since iter K had p.0 = base + K-1). - // So p_lcssa - base = K - 1. - // We want counter.next = K - 1 (because exit-iter is iter K, but counter.next - // was computed before icmp tested 0 - so it's K - 1 (with K iters = K decisions)) - // Hmm, off-by-one is tricky. Let me just test empirically. - - // The "return value type" we'll cast to. - Type *RetTy = Ret->getReturnValue()->getType(); - if (!RetTy->isIntegerTy()) return false; - Instruction *InsertPt = ExitLCSSA ? ExitLCSSA->getNextNode() : Ret; - IRBuilder<> B(InsertPt); - // (p_lcssa - base) as integer. - Value *PLcssaInt = B.CreatePtrToInt(PtrLCSSA, Type::getInt32Ty(Header->getContext()), "unlsr.plcssa.i"); - Value *BaseInt = B.CreatePtrToInt(Base, Type::getInt32Ty(Header->getContext()), "unlsr.base.i"); - Value *Diff = B.CreateSub(PLcssaInt, BaseInt, "unlsr.diff"); - // Truncate to counter type. - Value *DiffI = B.CreateTrunc(Diff, CounterPHI->getType(), "unlsr.diff.trunc"); - // For fromNext (returning %counter.next): replacement = diff + (K_init + 1). - // At exit, counter.next = K_init + iters. - // p_lcssa - base = iters (in bytes; stride 1). Wait but iters is the iter count. - // Let me re-check with concrete example. - // strLen("a\0"): iter 1: p.0 = s, *p='a'!=0, p++, counter=-1, counter.next=0. - // iter 2: p.0 = s+1, *p=0, exit. counter=0, counter.next=1. - // At exit: counter.next = 1. iters before exit-iter's icmp-true = 2. - // p_lcssa = s+1 (the iter-2 entry value). p_lcssa - base = 1. - // counter.next = 1 = K_init + 2 = -1 + 2 = 1. ✓ - // p_lcssa - base = 1. So counter.next = p_lcssa - base + 0. - // (K_init + iters - (iters - (p_lcssa - base))) = K_init + (p_lcssa - base) = K_init + 1. - // Wait: counter.next = K_init + iters; p_lcssa - base = iters - 1. - // So counter.next = K_init + (p_lcssa - base) + 1. - // For K_init = -1: counter.next = -1 + 1 + 1 = 1 if iters=2. ✓ - // So replacement = diff + (K_init + 1). - int64_t Adjust = KInit->getSExtValue() + (fromNext ? 1 : 0); - Value *Result = DiffI; - if (Adjust != 0) { - Result = B.CreateAdd(DiffI, - ConstantInt::get(CounterPHI->getType(), Adjust), - "unlsr.result"); - } - // Cast to return type if different. - if (Result->getType() != RetTy) { - if (CounterPHI->getType()->getIntegerBitWidth() < - RetTy->getIntegerBitWidth()) - Result = B.CreateZExt(Result, RetTy); - else - Result = B.CreateTrunc(Result, RetTy); - } - // Replace the return. If there's an LCSSA PHI, replace it. Otherwise - // replace the direct use in `ret`. - if (ExitLCSSA) { - ExitLCSSA->replaceAllUsesWith(Result); - ExitLCSSA->eraseFromParent(); - } else { - Ret->setOperand(0, Result); - } - - // Erase the counter PHI and its increment. - CounterStep->replaceAllUsesWith(UndefValue::get(CounterPHI->getType())); - CounterPHI->replaceAllUsesWith(UndefValue::get(CounterPHI->getType())); - CounterStep->eraseFromParent(); - CounterPHI->eraseFromParent(); - return true; -} - - // strcpy-style undo: LSR converts two pointer PHIs (`src.addr.0` and // `d.0` each stepping by 1) into a single counter PHI (`lsr.iv`) plus // GEPs `(base, counter)` per iter. On 65816 the counter+GEP form diff --git a/src/llvm/lib/Target/W65816/W65816WidenAcc16.cpp b/src/llvm/lib/Target/W65816/W65816WidenAcc16.cpp index 226e1dc..b3279d6 100644 --- a/src/llvm/lib/Target/W65816/W65816WidenAcc16.cpp +++ b/src/llvm/lib/Target/W65816/W65816WidenAcc16.cpp @@ -84,27 +84,6 @@ static bool flowsToIncompatiblePhysReg(Register VReg, return false; } -// Returns true if VReg's def is a COPY from a physreg whose class is not -// Wide16-compatible. copyPhysReg only handles a fixed set of source/dest -// pairs; an incompatible source physreg (e.g., DPF0, the i64-return -// high-half carrier) lowered to an IMG dest would crash with an -// "unhandled copyPhysReg" assertion at AsmPrinter time. (Currently -// only the Phase-2 PHI widening uses this; that's disabled, so mark -// unused.) -[[maybe_unused]] static bool comesFromIncompatiblePhysReg(Register VReg, - const MachineRegisterInfo &MRI) { - for (auto &D : MRI.def_instructions(VReg)) { - if (!D.isCopy()) continue; - const MachineOperand &Src = D.getOperand(1); - if (!Src.isReg() || !Src.getReg().isPhysical()) continue; - Register P = Src.getReg(); - if (P == W65816::A) continue; - if (P >= W65816::IMG0 && P <= W65816::IMG15) continue; - return true; - } - return false; -} - // Returns true if the vreg is used by any PHI. PHI input/result must // share the same register class (verifier requirement). Rather than // also widen the PHI's result and recursively all of its uses, we skip @@ -212,196 +191,9 @@ bool W65816WidenAcc16::runOnMachineFunction(MachineFunction &MF) { Changed = true; } - // Phase 2: PHI cycle widening. EXPERIMENTAL, currently disabled — - // see end of pass for explanation. - #if 0 - // PHIs whose def class is Acc16 keep - // the value pinned to $a across iterations, forcing stack spills - // when the PHI is live across calls or other A-clobbering ops. - // For sumSquares-style loops with an i32 accumulator, this manifests - // as per-iter `LDA slot ; ADC ; STA slot ; LDA slot ; STA slot` (the - // last LDA/STA pair is the PHI-back-edge copy). If we widen the - // PHI's def to Wide16, regalloc can keep it in an IMG slot and the - // back-edge PHI copy collapses to a register coalesce. - // - // To widen a PHI: - // 1. Compute the SCC of Acc16 vregs connected by PHI edges (PHI - // def ↔ PHI incoming vreg). This catches mutually-recursive - // PHIs in nested loops. - // 2. For every member: verify all non-PHI uses accept Wide16, no - // flow to a physreg, single def. - // 3. For each PHI in the SCC, walk its incoming list. Each - // incoming vreg is either ALREADY in the SCC (another PHI, no - // bridge needed) or an external Acc16 vreg whose value flows - // into the SCC — bridge it by inserting `WWide = COPY W` at - // the end of the predecessor block and pointing the PHI's - // incoming at WWide. - // 4. Change every SCC member's register class to Wide16. - auto worklistInsertIfAcc16 = [&MRI](Register V, - DenseSet &Seen, - SmallVectorImpl &WL) { - if (!V.isVirtual()) return; - if (MRI.getRegClass(V) != &W65816::Acc16RegClass) return; - if (!Seen.insert(V).second) return; - WL.push_back(V); - }; - - SmallVector AcctPhis; - for (MachineBasicBlock &MBB : MF) { - for (MachineInstr &MI : MBB.phis()) { - Register DefV = MI.getOperand(0).getReg(); - if (MRI.getRegClass(DefV) == &W65816::Acc16RegClass) { - AcctPhis.push_back(&MI); - } - } - } - DenseSet ProcessedPhiVregs; - for (MachineInstr *Seed : AcctPhis) { - Register SeedDef = Seed->getOperand(0).getReg(); - if (ProcessedPhiVregs.count(SeedDef)) continue; - // Build SCC by following PHI edges in both directions. - DenseSet Comp; - SmallVector Stack; - worklistInsertIfAcc16(SeedDef, Comp, Stack); - while (!Stack.empty()) { - Register V = Stack.pop_back_val(); - // Forward: V flows into other PHIs as an incoming → include those PHI defs. - for (auto &U : MRI.use_nodbg_instructions(V)) { - if (!U.isPHI()) continue; - Register PhiDef = U.getOperand(0).getReg(); - worklistInsertIfAcc16(PhiDef, Comp, Stack); - } - // Backward: if V is itself a PHI def, include the incoming vregs. - MachineInstr *DM = &*MRI.def_instructions(V).begin(); - if (!DM || !DM->isPHI()) continue; - for (unsigned i = 1, e = DM->getNumOperands(); i < e; i += 2) { - MachineOperand &MO = DM->getOperand(i); - if (!MO.isReg() || !MO.getReg().isVirtual()) continue; - worklistInsertIfAcc16(MO.getReg(), Comp, Stack); - } - } - for (Register V : Comp) ProcessedPhiVregs.insert(V); - - // Validate every member. PHI uses are ACCEPTED when the consumer - // PHI is itself in the SCC (those PHIs are being widened in - // lock-step). Narrow-class uses (e.g., INA_PSEUDO's tied-def - // input requires Acc16) are ALSO accepted — we'll insert a - // Wide16→Acc16 COPY at the use site after widening. The only - // unrecoverable cases are: PHI uses where the consumer PHI is - // outside the SCC (forcing cross-SCC class merging), and physreg - // flow to $x/$y/etc. (handled separately above). - auto usesAcceptInSCC = [&](Register V, - SmallVectorImpl *NarrowSites) - -> bool { - for (auto &MO : MRI.use_nodbg_operands(V)) { - MachineInstr *UMI = MO.getParent(); - if (UMI->isCopy()) continue; - if (UMI->isPHI()) { - Register PhiDef = UMI->getOperand(0).getReg(); - if (Comp.count(PhiDef)) continue; // co-widened - return false; - } - unsigned OpIdx = UMI->getOperandNo(&MO); - const TargetRegisterClass *Expected = - TII->getRegClass(UMI->getDesc(), OpIdx); - if (!Expected) continue; - if (Expected == &W65816::Wide16RegClass) continue; - if (Expected->hasSubClassEq(&W65816::Wide16RegClass)) continue; - // Expected is narrower than Wide16 (e.g., Acc16-only tied - // input). Mark for runtime narrowing — we'll insert a COPY - // at apply time. - if (NarrowSites) NarrowSites->push_back(&MO); - } - return true; - }; - bool ok = true; - SmallVector NarrowSites; - for (Register V : Comp) { - if (!MRI.hasOneDef(V)) { ok = false; break; } - if (flowsToIncompatiblePhysReg(V, MRI)) { ok = false; break; } - if (comesFromIncompatiblePhysReg(V, MRI)) { ok = false; break; } - if (!usesAcceptInSCC(V, &NarrowSites)) { ok = false; break; } - } - if (!ok) continue; - - // Apply widening. First insert bridge COPYs at predecessor edges - // for external (non-Comp) Acc16 incomings to each PHI in Comp. - SmallVector, 16> BridgeSites; - for (Register V : Comp) { - MachineInstr *DM = &*MRI.def_instructions(V).begin(); - if (!DM->isPHI()) continue; - for (unsigned i = 1, e = DM->getNumOperands(); i < e; i += 2) { - MachineOperand &MO = DM->getOperand(i); - if (!MO.isReg() || !MO.getReg().isVirtual()) continue; - Register Inc = MO.getReg(); - if (Comp.count(Inc)) continue; // in-SCC, no bridge needed - // External incoming: ensure it's currently Acc16; if so, we'll - // insert a COPY at the predecessor block's end. - if (MRI.getRegClass(Inc) != &W65816::Acc16RegClass && - MRI.getRegClass(Inc) != &W65816::Wide16RegClass) { - ok = false; - break; - } - BridgeSites.push_back({DM, i}); - } - if (!ok) break; - } - if (!ok) continue; - - // Insert bridges. - for (auto &Site : BridgeSites) { - MachineInstr *PhiMI = Site.first; - unsigned OpIdx = Site.second; - Register Inc = PhiMI->getOperand(OpIdx).getReg(); - MachineBasicBlock *PredMBB = PhiMI->getOperand(OpIdx + 1).getMBB(); - // If already Wide16 (e.g., another candidate widened it already), - // no bridge needed — but we still need the PHI incoming to use - // a Wide16 vreg. Use Inc directly. - if (MRI.getRegClass(Inc) == &W65816::Wide16RegClass) { - continue; - } - // Insert COPY before the predecessor's terminator(s). - auto InsertPos = PredMBB->getFirstTerminator(); - DebugLoc DL = (InsertPos == PredMBB->end()) - ? PredMBB->findBranchDebugLoc() - : InsertPos->getDebugLoc(); - Register WideInc = MRI.createVirtualRegister(&W65816::Wide16RegClass); - BuildMI(*PredMBB, InsertPos, DL, TII->get(TargetOpcode::COPY), - WideInc) - .addReg(Inc); - PhiMI->getOperand(OpIdx).setReg(WideInc); - PhiMI->getOperand(OpIdx).setIsKill(false); - } - - // Force every SCC member to Img16 (IMG-only, no A). Using Wide16 - // (A + IMG) doesn't work here: the Register Coalescer joins our - // Wide16 vregs with adjacent Acc16 vregs (intersection = Acc16) - // and narrows them back to A-only, defeating the widening. Img16 - // intersects Acc16 to ∅, so the coalescer can't merge — the PHI - // stays in IMG. This is correct anyway for the common case (PHI - // live across a call): A is JSL-clobbered, so it can't carry the - // value through, and IMG8..15 is the right home. - for (Register V : Comp) { - MRI.setRegClass(V, &W65816::Img16RegClass); - } - // Insert narrowing COPYs at each narrow-class use site. Each site - // is `... = OP V, ...` where the operand requires Acc16 but V is - // now Wide16. Replace with `%Vacc = COPY V (Acc16); ... = OP %Vacc, ...`. - for (MachineOperand *MO : NarrowSites) { - MachineInstr *UMI = MO->getParent(); - Register OldReg = MO->getReg(); - Register NarrowReg = - MRI.createVirtualRegister(&W65816::Acc16RegClass); - DebugLoc DL = UMI->getDebugLoc(); - BuildMI(*UMI->getParent(), UMI, DL, TII->get(TargetOpcode::COPY), - NarrowReg) - .addReg(OldReg); - MO->setReg(NarrowReg); - MO->setIsKill(false); - } - Changed = true; - } - #endif + // Phase 2: PHI cycle widening was prototyped here but never landed. + // The prototype body lived in an #if 0 block that was removed once + // we settled on Phase 1 as the only effective half of the pass. // Why disabled (2026-05-13 attempt): // - Widening PHI cycles to Wide16 (= A + IMG0..15) is undone by the // Register Coalescer: it joins our Wide16 vregs with adjacent diff --git a/tests/ubsan/README.md b/tests/ubsan/README.md index 28d4e79..c64f8c0 100644 --- a/tests/ubsan/README.md +++ b/tests/ubsan/README.md @@ -1,20 +1,26 @@ # tests/ubsan — UBSan-min smoke probe (Phase 6.2) -Three-case probe that exercises the `-fsanitize=undefined +Nine-case probe that exercises the `-fsanitize=undefined -fsanitize-minimal-runtime` instrumentation end-to-end on the W65816 target: -| Kind | UB | Sentinel | -|-----------------------|----------------------------------|--------------| -| `add-overflow` | i16 `INT_MAX + 1` | `$025000=0xC0DE` | -| `shift-out-of-bounds` | u16 `1 << 17` | `$025002=0xC0DF` | -| `divrem-overflow` | i16 `n / 0` | `$025004=0xC0E0` | -| (liveness) | tail of `main` reached | `$025006=0xC0DA` | +| Kind | UB | Sentinel | +|------------------------|----------------------------------|----------------------| +| `add-overflow` | i16 `INT_MAX + 1` | `$025000=0xC0DE` | +| `shift-out-of-bounds` | u16 `1 << 17` | `$025002=0xC0DF` | +| `divrem-overflow` | i16 `n / 0` | `$025004=0xC0E0` | +| `sub-overflow` | i16 `INT_MIN - 1` | `$025006=0xC0E1` | +| `mul-overflow` | i16 `INT_MAX * 2` | `$025008=0xC0E2` | +| `negate-overflow` | i16 `-INT_MIN` | `$02500A=0xC0E3` | +| `pointer-overflow` | `(char*)0xFFFFFFF0 + 0x40` | `$02500C=0xC0E4` | +| `load-invalid-value` | `_Bool` loaded from byte = 2 | `$02500E=0xC0E5` | +| `out-of-bounds` | `arr[idx>=N]` on static array | `$025010=0xC0E6` | +| (liveness) | tail of `main` reached | `$025012=0xC0DA` | -The probe ships strong override defs for the three `__ubsan_handle_*_minimal` -recovering handlers it exercises; the remaining 22 are pulled in from -`runtime/ubsan.o` so any extra UB site clang emits (e.g. constant-fold -overflow at `-O2`) still resolves cleanly. +The probe ships strong override defs for the nine `__ubsan_handle_*_minimal` +recovering handlers it exercises; the remaining handlers are pulled in +from `runtime/ubsan.o` so any extra UB site clang emits (e.g. constant- +fold overflow at `-O2`) still resolves cleanly. ## Build + run @@ -27,8 +33,14 @@ Expected output: MAME-READ addr=0x025000 val=0xc0de MAME-READ addr=0x025002 val=0xc0df MAME-READ addr=0x025004 val=0xc0e0 -MAME-READ addr=0x025006 val=0xc0da -MAME OK: 4 reads matched +MAME-READ addr=0x025006 val=0xc0e1 +MAME-READ addr=0x025008 val=0xc0e2 +MAME-READ addr=0x02500a val=0xc0e3 +MAME-READ addr=0x02500c val=0xc0e4 +MAME-READ addr=0x02500e val=0xc0e5 +MAME-READ addr=0x025010 val=0xc0e6 +MAME-READ addr=0x025012 val=0xc0da +MAME OK: 10 reads matched ``` ## What this probe is NOT @@ -39,9 +51,14 @@ MAME OK: 4 reads matched overrides the handlers so it can verify the *call edge* without pulling in console code. A separate diagnostic-format probe would link `libc.o` + `libcGno.o` + GNO crt0 and assert on stderr. -- It is **not** a sweep of all 25 handler kinds. The user-spec scope - is "3 representative kinds". The other 22 are link-tested - implicitly by `runtime/ubsan.o`'s symbol set being available. +- It is **not** a sweep of all 25 handler kinds. The kinds covered + are all the cheap-to-trigger recoverable handlers that clang emits + at `-O2` for the W65816 target. Aborting-only kinds (e.g. + `builtin_unreachable_minimal`, `missing_return_minimal`) cannot be + exercised here because returning from the handler after the IR + `unreachable` is itself UB. Float-cast-overflow / VLA-not-positive + / type-mismatch / CFI / Objective-C kinds are linked but not + triggered. ## Files diff --git a/tests/ubsan/runUbsanProbe.sh b/tests/ubsan/runUbsanProbe.sh index 0530f8e..fc107b5 100755 --- a/tests/ubsan/runUbsanProbe.sh +++ b/tests/ubsan/runUbsanProbe.sh @@ -7,12 +7,14 @@ # What this verifies: # - clang accepts -fsanitize=undefined -fsanitize-minimal-runtime on # the w65816 target. -# - The three exercised UB kinds (add-overflow / shift-out-of-bounds / -# divrem-overflow) instrument as expected — the handler-fired byte -# flips inside the per-kind handler override. +# - Nine exercised UB kinds (add-overflow / shift-out-of-bounds / +# divrem-overflow / sub-overflow / mul-overflow / negate-overflow / +# pointer-overflow / load-invalid-value / out-of-bounds) instrument +# as expected -- the handler-fired byte flips inside the per-kind +# handler override. # - The recovering minimal runtime returns to the caller cleanly, so # the probe continues writing sentinels past each UB site. -# - runtime/ubsan.o links + resolves the other 22 handler kinds without +# - runtime/ubsan.o links + resolves the other handler kinds without # pulling in console code that the probe doesn't need. set -eu @@ -27,7 +29,7 @@ bash "$SCRIPT_DIR/build.sh" # Link. crt0.o + the probe + ubsan.o + libgcc.o (for the i16 div+rem # helpers triggerDivByZero needs). We deliberately do NOT link libc.o -# — the probe sets memory sentinels directly, doesn't call printf, and +# -- the probe sets memory sentinels directly, doesn't call printf, and # pulling libc.o in would also pull snprintf.o (~9 KB) for no benefit. "$PROJECT_ROOT/tools/link816" -o ubsanProbe.bin \ --text-base 0x1000 --bss-base 0xA000 --map ubsanProbe.map \ @@ -39,11 +41,22 @@ bash "$SCRIPT_DIR/build.sh" ls -la ubsanProbe.bin echo "" -# Sentinels: +# Sentinels (one per recoverable handler exercised, plus a tail +# liveness sentinel). Each is a 16-bit write at $025000+kind*2. # $025000 = 0xC0DE add-overflow handler fired # $025002 = 0xC0DF shift-out-of-bounds handler fired # $025004 = 0xC0E0 divrem-overflow handler fired -# $025006 = 0xC0DA all three recovered and main reached its tail +# $025006 = 0xC0E1 sub-overflow handler fired +# $025008 = 0xC0E2 mul-overflow handler fired +# $02500A = 0xC0E3 negate-overflow handler fired +# $02500C = 0xC0E4 pointer-overflow handler fired +# $02500E = 0xC0E5 load-invalid-value handler fired +# $025010 = 0xC0E6 out-of-bounds handler fired +# $025012 = 0xC0DA all nine recovered and main reached its tail bash "$PROJECT_ROOT/scripts/runInMame.sh" \ "$SCRIPT_DIR/ubsanProbe.bin" \ - --check 0x025000=C0DE 0x025002=C0DF 0x025004=C0E0 0x025006=C0DA + --check \ + 0x025000=C0DE 0x025002=C0DF 0x025004=C0E0 \ + 0x025006=C0E1 0x025008=C0E2 0x02500A=C0E3 \ + 0x02500C=C0E4 0x02500E=C0E5 0x025010=C0E6 \ + 0x025012=C0DA diff --git a/tests/ubsan/ubsanProbe.c b/tests/ubsan/ubsanProbe.c index 01db8fc..9e1fbc8 100644 --- a/tests/ubsan/ubsanProbe.c +++ b/tests/ubsan/ubsanProbe.c @@ -1,44 +1,62 @@ // Phase 6.2 UBSan-min smoke probe. // -// Three UB cases (one each from the spec): -// kind 0 (sentinel 0xC0DE): signed-overflow add (i16 INT_MAX + 1) -// kind 1 (sentinel 0xC0DF): shift-out-of-bounds (1 << 17 on a u16) -// kind 2 (sentinel 0xC0E0): divide-by-zero (n / 0) +// Nine UB cases — one per recoverable handler kind we exercise: +// kind 0 (sentinel 0xC0DE): add-overflow (i16 INT_MAX + 1) +// kind 1 (sentinel 0xC0DF): shift-out-of-bounds (1 << 17 on a u16) +// kind 2 (sentinel 0xC0E0): divrem-overflow (n / 0) +// kind 3 (sentinel 0xC0E1): sub-overflow (INT_MIN - 1) +// kind 4 (sentinel 0xC0E2): mul-overflow (INT_MAX * 2) +// kind 5 (sentinel 0xC0E3): negate-overflow (-INT_MIN) +// kind 6 (sentinel 0xC0E4): pointer-overflow (ptr + huge offset) +// kind 7 (sentinel 0xC0E5): load-invalid-value (_Bool from byte=2) +// kind 8 (sentinel 0xC0E6): out-of-bounds (arr[idx>=N]) // -// The probe overrides the three relevant `__ubsan_handle_*_minimal` -// recovering handlers with strong definitions that record their -// firing in a static state byte. After each UB, the probe writes -// 0xC0DE + kind to $025000 to prove (a) the instrumentation fired and -// (b) execution recovered cleanly past the UB. The recover handler +// The probe overrides each relevant `__ubsan_handle_*_minimal` recovering +// handler with a strong definition that records its firing in a static +// state byte. After each UB, the probe writes 0xC0DE+kind to a per-kind +// 16-bit slot at 0x025000+kind*2 to prove (a) the instrumentation fired +// and (b) execution recovered cleanly past the UB. The recover handler // returning normally is the whole point of -fsanitize-minimal-runtime // + -fsanitize-recover; this probe is what proves the round-trip. // -// To verify all three at once we cascade the sentinel writes through a -// staircase of $025000 / $025002 / $025004 word stores so the smoke -// harness can read three independent 16-bit values back from MAME. +// To verify all nine at once we cascade the sentinel writes through a +// staircase of word stores so the smoke harness can read independent +// 16-bit values back from MAME. // // Compile with -fsanitize=undefined -fsanitize-minimal-runtime. #include -// Bank-2 BSS at $025000-$025006 — outside the SHR shadow and outside +// Bank-2 BSS at $025000-$025014 -- outside the SHR shadow and outside // $C000-$CFFF IO window. link816 places .bss at the user-specified // --bss-base (we pass 0xA000) so these constant addresses are // independent of BSS layout. -#define MARK_ADD_OVF ((volatile uint16_t *)0x025000UL) -#define MARK_SHIFT_OOB ((volatile uint16_t *)0x025002UL) -#define MARK_DIV_ZERO ((volatile uint16_t *)0x025004UL) -#define DONE_SENTINEL ((volatile uint16_t *)0x025006UL) +#define MARK_ADD_OVF ((volatile uint16_t *)0x025000UL) +#define MARK_SHIFT_OOB ((volatile uint16_t *)0x025002UL) +#define MARK_DIV_ZERO ((volatile uint16_t *)0x025004UL) +#define MARK_SUB_OVF ((volatile uint16_t *)0x025006UL) +#define MARK_MUL_OVF ((volatile uint16_t *)0x025008UL) +#define MARK_NEG_OVF ((volatile uint16_t *)0x02500AUL) +#define MARK_PTR_OVF ((volatile uint16_t *)0x02500CUL) +#define MARK_LOAD_INVAL ((volatile uint16_t *)0x02500EUL) +#define MARK_OUT_OF_BNDS ((volatile uint16_t *)0x025010UL) +#define DONE_SENTINEL ((volatile uint16_t *)0x025012UL) // Strong overrides win over runtime/ubsan.o's weak-by-link defaults. // Each fires once per kind and records that the corresponding UB // instrumentation reached us. Recovering handlers MUST return so the // probe continues executing past the UB site. -static volatile uint8_t handlerFiredAdd = 0; -static volatile uint8_t handlerFiredShift = 0; -static volatile uint8_t handlerFiredDiv = 0; +static volatile uint8_t handlerFiredAdd = 0; +static volatile uint8_t handlerFiredShift = 0; +static volatile uint8_t handlerFiredDiv = 0; +static volatile uint8_t handlerFiredSub = 0; +static volatile uint8_t handlerFiredMul = 0; +static volatile uint8_t handlerFiredNeg = 0; +static volatile uint8_t handlerFiredPtr = 0; +static volatile uint8_t handlerFiredLoadInv = 0; +static volatile uint8_t handlerFiredOob = 0; void __ubsan_handle_add_overflow_minimal(void) { @@ -56,6 +74,36 @@ void __ubsan_handle_divrem_overflow_minimal(void) { } +void __ubsan_handle_sub_overflow_minimal(void) { + handlerFiredSub = 1; +} + + +void __ubsan_handle_mul_overflow_minimal(void) { + handlerFiredMul = 1; +} + + +void __ubsan_handle_negate_overflow_minimal(void) { + handlerFiredNeg = 1; +} + + +void __ubsan_handle_pointer_overflow_minimal(void) { + handlerFiredPtr = 1; +} + + +void __ubsan_handle_load_invalid_value_minimal(void) { + handlerFiredLoadInv = 1; +} + + +void __ubsan_handle_out_of_bounds_minimal(void) { + handlerFiredOob = 1; +} + + // Each UB site goes through a noinline wrapper so the optimizer // cannot constant-fold the operation away. __attribute__((noinline)) // + volatile inputs blocks the obvious folding paths; we also wrap @@ -79,6 +127,47 @@ static int16_t triggerDivByZero(int16_t a, int16_t b) { } +__attribute__((noinline)) +static int16_t triggerSubOverflow(int16_t a, int16_t b) { + return a - b; +} + + +__attribute__((noinline)) +static int16_t triggerMulOverflow(int16_t a, int16_t b) { + return a * b; +} + + +__attribute__((noinline)) +static int16_t triggerNegateOverflow(int16_t a) { + return -a; +} + + +__attribute__((noinline)) +static char *triggerPointerOverflow(char *p, int32_t o) { + return p + o; +} + + +__attribute__((noinline)) +static int triggerLoadInvalidValue(volatile uint8_t *p) { + _Bool v = *(_Bool *)p; + // Use the value so the load isn't dead-stripped. We don't trust + // the post-instrumentation cast to a 0/1 narrow value -- the + // important thing is the load itself fired the handler. + return v ? 1 : 0; +} + + +__attribute__((noinline)) +static int16_t triggerOutOfBounds(int16_t idx) { + static int16_t arr[4] = { 10, 20, 30, 40 }; + return arr[idx]; +} + + int main(void) { // --- case 0: signed-overflow add (INT16_MAX + 1) --- volatile int16_t aMax = 0x7FFF; @@ -104,12 +193,58 @@ int main(void) { *MARK_DIV_ZERO = 0xC0E0; } - // Final liveness sentinel — only written if we got past all three + // --- case 3: sub-overflow (INT16_MIN - 1) --- + volatile int16_t aMin = (int16_t)0x8000; + (void)triggerSubOverflow(aMin, aOne); + if (handlerFiredSub) { + *MARK_SUB_OVF = 0xC0E1; + } + + // --- case 4: mul-overflow (INT16_MAX * 2 wraps) --- + volatile int16_t aTwo = 2; + (void)triggerMulOverflow(aMax, aTwo); + if (handlerFiredMul) { + *MARK_MUL_OVF = 0xC0E2; + } + + // --- case 5: negate-overflow (-INT16_MIN) --- + (void)triggerNegateOverflow(aMin); + if (handlerFiredNeg) { + *MARK_NEG_OVF = 0xC0E3; + } + + // --- case 6: pointer-overflow (signed-wrap on i16 addr) --- + // Cast a high address to char* and add a positive offset that + // overflows the address calculation. -fsanitize=pointer-overflow + // fires on signed-overflow of the offset add. + volatile uint32_t hiAddr = 0xFFFFFFF0UL; + volatile int32_t big = 0x40; + char *p = (char *)(uintptr_t)hiAddr; + (void)triggerPointerOverflow(p, big); + if (handlerFiredPtr) { + *MARK_PTR_OVF = 0xC0E4; + } + + // --- case 7: load-invalid-value (_Bool from byte=2) --- + volatile uint8_t boolByte = 2; + (void)triggerLoadInvalidValue(&boolByte); + if (handlerFiredLoadInv) { + *MARK_LOAD_INVAL = 0xC0E5; + } + + // --- case 8: out-of-bounds (static arr[idx>=N]) --- + volatile int16_t badIdx = 7; + (void)triggerOutOfBounds(badIdx); + if (handlerFiredOob) { + *MARK_OUT_OF_BNDS = 0xC0E6; + } + + // Final liveness sentinel -- only written if we got past all nine // UB sites without the runtime aborting (which would have spun on // a BRK_pseudo at $70 instead of reaching here). *DONE_SENTINEL = 0xC0DA; - // Halt — crt0's return-from-main path hits a BRK that headless + // Halt -- crt0's return-from-main path hits a BRK that headless // MAME wild-jumps from, so spin-wait instead. while (1) { } diff --git a/tests/ubsan/ubsanProbe.manifest.json b/tests/ubsan/ubsanProbe.manifest.json index abbcef3..cfe69df 100644 --- a/tests/ubsan/ubsanProbe.manifest.json +++ b/tests/ubsan/ubsanProbe.manifest.json @@ -13,7 +13,7 @@ "num": 1, "name": "SEG1", "base": "0x001000", - "size": 3432, + "size": 5084, "image": "ubsanProbe.bin", "entry_offset": "0x0000" } @@ -22,6 +22,12 @@ {"addr": "0x025000", "expect": "0xC0DE", "label": "add-overflow handler fired"}, {"addr": "0x025002", "expect": "0xC0DF", "label": "shift-out-of-bounds handler fired"}, {"addr": "0x025004", "expect": "0xC0E0", "label": "divrem-overflow handler fired"}, - {"addr": "0x025006", "expect": "0xC0DA", "label": "main reached tail after all three recoveries"} + {"addr": "0x025006", "expect": "0xC0E1", "label": "sub-overflow handler fired"}, + {"addr": "0x025008", "expect": "0xC0E2", "label": "mul-overflow handler fired"}, + {"addr": "0x02500A", "expect": "0xC0E3", "label": "negate-overflow handler fired"}, + {"addr": "0x02500C", "expect": "0xC0E4", "label": "pointer-overflow handler fired"}, + {"addr": "0x02500E", "expect": "0xC0E5", "label": "load-invalid-value handler fired"}, + {"addr": "0x025010", "expect": "0xC0E6", "label": "out-of-bounds handler fired"}, + {"addr": "0x025012", "expect": "0xC0DA", "label": "main reached tail after all nine recoveries"} ] }