joeylib2/tools/diff-uber-perf

132 lines
4.1 KiB
Python
Executable file

#!/usr/bin/env python3
"""Compare two UBER joeylog.txt files by per-op ops/sec.
Sibling of diff-uber-hashes (which compares pixel correctness). This
tool drives Phase 10 of project_planar_68k_plan.md: pick the
biggest perf gaps vs the IIgs reference and target asm/algorithmic
optimization at those.
Usage:
tools/diff-uber-perf <reference-log> <test-log> [--threshold 1.0]
Output is sorted by speed ratio (test/ref) ascending, so the worst
gaps print first. Ops missing from either log are flagged. The
threshold flag (default 1.0) marks ops below that ratio as FAIL --
project_perf_directive.md says "IIgs is the perf floor; every
other target must match or beat it", so parity = 1.0x. Use
--threshold 0.8 for the project_planar_68k_plan looser acceptance.
Exit code:
0 = all common ops at >= threshold
1 = at least one op below threshold (or missing)
2 = usage error or missing file
"""
import re
import sys
# Match e.g.:
# UBER: drawCircle r=80: 56 iters / 4 frames = 840 ops/sec | hash=A1B2C3D4
LINE_RE = re.compile(
r"UBER:\s+(?P<op>[^:]+):\s+\d+\s+iters\s+/\s+\d+\s+frames\s+=\s+(?P<ops>\d+)\s+ops/sec"
)
def parse_log(path):
"""Return ordered dict {op_name: ops_per_sec} from a UBER log file.
Multiple runs may be concatenated (joeyLog appends); last value
for each op wins, matching the most recent run.
"""
perf = {}
with open(path) as f:
for line in f:
m = LINE_RE.search(line)
if m:
perf[m.group("op").strip()] = int(m.group("ops"))
return perf
def main(argv):
threshold = 1.0
args = []
i = 1
while i < len(argv):
if argv[i] == "--threshold" and i + 1 < len(argv):
try:
threshold = float(argv[i + 1])
except ValueError:
sys.stderr.write(f"error: bad threshold {argv[i+1]}\n")
return 2
i += 2
else:
args.append(argv[i])
i += 1
if len(args) != 2:
sys.stderr.write(
"usage: diff-uber-perf <reference-log> <test-log> [--threshold 1.0]\n"
)
return 2
try:
ref = parse_log(args[0])
test = parse_log(args[1])
except OSError as e:
sys.stderr.write(f"error: {e}\n")
return 2
if not ref:
sys.stderr.write(f"error: no UBER lines found in {args[0]}\n")
return 2
if not test:
sys.stderr.write(f"error: no UBER lines found in {args[1]}\n")
return 2
rows = []
for op, ref_ops in ref.items():
test_ops = test.get(op)
if test_ops is None:
rows.append((op, ref_ops, None, None, "MISSING"))
continue
if ref_ops == 0:
ratio = float("inf") if test_ops > 0 else 1.0
else:
ratio = test_ops / ref_ops
status = "ok" if ratio >= threshold else "FAIL"
rows.append((op, ref_ops, test_ops, ratio, status))
extras = [(op, None, test[op], None, "EXTRA") for op in test if op not in ref]
# Sort: missing/fail first by worst ratio, then ok ascending by ratio.
def sort_key(row):
op, refv, testv, ratio, status = row
if status == "MISSING":
return (0, 0.0, op)
if status == "EXTRA":
return (3, 0.0, op)
return (1 if status == "FAIL" else 2, ratio, op)
rows.sort(key=sort_key)
op_w = max(len(op) for op in ref) if ref else 8
op_w = max(op_w, max((len(op) for op in test), default=8), len("op"))
print(f"{'op':<{op_w}} {'ref':>10} {'test':>10} {'ratio':>7} status")
print(f"{'-'*op_w} {'-'*10} {'-'*10} {'-'*7} ------")
fails = 0
for op, refv, testv, ratio, status in rows + extras:
refs = "" if refv is None else str(refv)
tests = "" if testv is None else str(testv)
rats = "" if ratio is None else f"{ratio:.2f}x"
print(f"{op:<{op_w}} {refs:>10} {tests:>10} {rats:>7} {status}")
if status in ("FAIL", "MISSING"):
fails += 1
print()
print(f"threshold: {threshold:.2f}x ({len(rows)} ops compared, {fails} below threshold)")
return 1 if fails > 0 else 0
if __name__ == "__main__":
sys.exit(main(sys.argv))