65816-llvm-mos/scripts/ltoLink.sh
Scott Duensing da095402ec Updated
2026-06-02 23:17:57 -05:00

210 lines
6.6 KiB
Bash
Executable file

#!/usr/bin/env bash
# ltoLink.sh - ThinLTO-style link driver for the W65816 backend.
#
# Phase 5.2 of docs/GAP_CLOSURE_PLAN.md. Takes a mix of LLVM bitcode
# (.bc) and native asm objects (.o) plus a final output object name and
# does:
#
# 1. llvm-link: merge all bitcode inputs into a single module.
# 2. opt -passes='w65816-layer2-gate': hard-fail if any two TUs in
# the merged module disagree on `-mllvm -w65816-dbr-safe-ptrs`
# (Phase 1.12 silent-miscompile gate). Refuses on mismatch --
# that's the entire point of having the gate at all.
# 3. opt -O2 + -inline-threshold=50: IR-level optimization with the
# same inline threshold as per-TU codegen, to keep code size sane.
# We pass --mtriple=w65816 explicitly because `opt` does NOT
# invoke TargetPassConfig, so the TM-init hook that sets
# inline-threshold in W65816TargetMachine.cpp does not fire here.
# 4. llc -filetype=obj: produce the final native .o.
# 5. (caller hands the .o + the native asm objects to link816)
#
# Usage:
# bash scripts/ltoLink.sh -o <out.o> <input1.bc|.ll> [<input2.bc|...> ...]
#
# Flags:
# -o <out> output object path (required)
# --keep-temps do not delete the merged.bc / opt.bc intermediates
# --layer2 stamp the merged module with Layer 2 = true (use
# when ALL input TUs were built with -mllvm
# -w65816-dbr-safe-ptrs). The gate also enforces
# this via per-TU stamps; --layer2 just lets the
# driver document caller intent in the log.
# --inline-threshold N
# override the default IR-optimization inline
# threshold (default 50, mirrors the target's
# per-TU default).
# --emit-ll additionally emit a human-readable .ll of the
# post-opt module for debugging.
#
# Native asm objects (handed to link816 by buildGno.sh / link816
# directly) are NOT part of the bitcode merge -- they're passed through
# unchanged. Caller must pass `.o` files to link816 separately. This
# script only consumes `.bc` / `.ll` and produces ONE `.o`.
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
LLVM_BIN="$ROOT/tools/llvm-mos-build/bin"
LLVM_LINK="$LLVM_BIN/llvm-link"
LLVM_DIS="$LLVM_BIN/llvm-dis"
LLVM_AS="$LLVM_BIN/llvm-as"
OPT="$LLVM_BIN/opt"
LLC="$LLVM_BIN/llc"
for tool in "$LLVM_LINK" "$OPT" "$LLC" "$LLVM_AS" "$LLVM_DIS"; do
if [ ! -x "$tool" ]; then
echo "ltoLink: missing tool: $tool" >&2
echo " Run scripts/installLlvmMos.sh to build the LTO chain." >&2
exit 1
fi
done
OUT=""
KEEP_TEMPS=0
LAYER2=0
INLINE_THRESHOLD=50
EMIT_LL=0
INPUTS=()
while [ $# -gt 0 ]; do
case "$1" in
-o)
OUT="$2"
shift 2
;;
--keep-temps)
KEEP_TEMPS=1
shift
;;
--layer2)
LAYER2=1
shift
;;
--inline-threshold)
INLINE_THRESHOLD="$2"
shift 2
;;
--emit-ll)
EMIT_LL=1
shift
;;
--)
shift
while [ $# -gt 0 ]; do
INPUTS+=("$1")
shift
done
;;
-*)
echo "ltoLink: unknown flag: $1" >&2
exit 2
;;
*)
INPUTS+=("$1")
shift
;;
esac
done
if [ -z "$OUT" ]; then
echo "ltoLink: -o <out> is required" >&2
exit 2
fi
if [ "${#INPUTS[@]}" -eq 0 ]; then
echo "ltoLink: no input bitcode files" >&2
exit 2
fi
OUT_DIR="$(dirname "$OUT")"
OUT_BASE="$(basename "$OUT" .o)"
MERGED="$OUT_DIR/$OUT_BASE.merged.bc"
OPTD="$OUT_DIR/$OUT_BASE.opt.bc"
LL="$OUT_DIR/$OUT_BASE.opt.ll"
mkdir -p "$OUT_DIR"
cleanup() {
if [ "$KEEP_TEMPS" -eq 0 ]; then
rm -f "$MERGED" "$OPTD"
if [ "$EMIT_LL" -eq 0 ]; then
rm -f "$LL"
fi
fi
}
trap cleanup EXIT
# Pre-flight: convert any .ll inputs to .bc so llvm-link gets a uniform
# input set. llvm-link does accept .ll directly but mixing the two in
# one invocation has bitten us with module-flag mismatches.
NORMALIZED=()
TMP_BCS=()
for f in "${INPUTS[@]}"; do
case "$f" in
*.ll)
tmpbc="$OUT_DIR/$(basename "${f%.ll}").tmp.bc"
"$LLVM_AS" "$f" -o "$tmpbc"
NORMALIZED+=("$tmpbc")
TMP_BCS+=("$tmpbc")
;;
*.bc)
NORMALIZED+=("$f")
;;
*)
echo "ltoLink: input must be .bc or .ll: $f" >&2
exit 2
;;
esac
done
echo "ltoLink: merging ${#NORMALIZED[@]} bitcode module(s) -> $MERGED"
"$LLVM_LINK" "${NORMALIZED[@]}" -o "$MERGED"
# Drop any .ll->.bc temporaries; the merged bitcode is the source of truth from here.
for t in "${TMP_BCS[@]}"; do
rm -f "$t"
done
# Phase 1.12 Layer 2 gate: hard-fail if TUs disagree. Refuse-on-mismatch
# is the gate's contract -- mixing Layer 2 + non-Layer 2 in one module
# produces silent wrong code in struct-field deref hot paths.
echo "ltoLink: running Layer 2 LTO consistency gate"
"$OPT" -passes='w65816-layer2-gate' "$MERGED" -o /dev/null
# Run -O2 with the W65816-appropriate inline threshold. -O2 fires the
# inliner, GVN, SROA, etc. -inline-threshold is explicitly set here
# because opt does NOT invoke TargetPassConfig and therefore does NOT
# pick up W65816TargetMachine.cpp's default-50 override; without -inline-
# threshold here opt would default to the LLVM stock 225 and bloat the
# binary.
#
# Stamp pass NOT re-run here -- the per-TU stamps are already present
# in the bitcode (they were written by the new-PM stamp pass at the
# start of each TU's opt pipeline during clang -c). Running stamp
# again post-link could only ever reset attributes to whatever
# DbrSafePtrs is in opt's CommandLine context, which would defeat the
# gate.
echo "ltoLink: opt -O2 (inline-threshold=$INLINE_THRESHOLD) -> $OPTD"
"$OPT" --mtriple=w65816 \
-passes='default<O2>' \
-inline-threshold="$INLINE_THRESHOLD" \
"$MERGED" -o "$OPTD"
if [ "$EMIT_LL" -eq 1 ]; then
echo "ltoLink: emitting human-readable IR -> $LL"
"$LLVM_DIS" "$OPTD" -o "$LL"
fi
echo "ltoLink: llc -filetype=obj -> $OUT"
"$LLC" --mtriple=w65816 -filetype=obj "$OPTD" -o "$OUT"
# Document Layer 2 status in the log. The actual enforcement happened
# in step 2 (the gate); this is just for human readers.
if [ "$LAYER2" -eq 1 ]; then
echo "ltoLink: caller asserts Layer 2 (--layer2); gate confirmed all TUs match"
else
echo "ltoLink: Layer 2 OFF (gate confirmed all TUs match)"
fi
echo "ltoLink: done -> $OUT"