From 093ac10c12d9daee14bcda1bd276b201457f5e5b Mon Sep 17 00:00:00 2001 From: metrics Date: Sat, 28 Mar 2026 13:37:37 +0000 Subject: [PATCH] feat: add transpiler output baseline and diff tool scripts --- .gitignore | 3 + scripts/transpile_diff.py | 323 ++++++++++++++++++++++++++++++++++++++ scripts/transpile_diff.sh | 2 + 3 files changed, 328 insertions(+) create mode 100755 scripts/transpile_diff.py create mode 100755 scripts/transpile_diff.sh diff --git a/.gitignore b/.gitignore index 60fa304..045ec21 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,9 @@ docs/.ub_cache/ *.profraw *.profdata +# Transpiler output baselines (created by scripts/transpile_diff.sh) +.herkos-baselines/ + # Temporary files *.tmp *.bak diff --git a/scripts/transpile_diff.py b/scripts/transpile_diff.py new file mode 100755 index 0000000..04b9c8a --- /dev/null +++ b/scripts/transpile_diff.py @@ -0,0 +1,323 @@ +#!/usr/bin/env python3 +"""Baseline and diff tool for herkos transpiler output. + +Creates named snapshots of the .rs files generated by herkos-tests and +lets you diff any two baselines—or a baseline against the current build. +""" + +import argparse +import os +import shutil +import subprocess +import sys +import tempfile +from datetime import datetime, timezone +from pathlib import Path + + +# --------------------------------------------------------------------------- +# Paths +# --------------------------------------------------------------------------- + +SCRIPT_DIR = Path(__file__).resolve().parent +REPO_ROOT = SCRIPT_DIR.parent +BASELINES_DIR = REPO_ROOT / ".herkos-baselines" + + +def check_repo_root() -> None: + if not (REPO_ROOT / "Cargo.toml").exists(): + sys.exit("ERROR: Cargo.toml not found. Please run this script from the repo root.") + + +# --------------------------------------------------------------------------- +# OUT_DIR discovery +# --------------------------------------------------------------------------- + +def find_out_dir(profile: str) -> Path | None: + """Find the herkos-tests OUT_DIR that actually contains .rs files.""" + build_dir = REPO_ROOT / "target" / profile / "build" + if not build_dir.is_dir(): + return None + for candidate in build_dir.glob("herkos-tests-*/out"): + if candidate.is_dir() and any(candidate.glob("*.rs")): + return candidate + return None + + +# --------------------------------------------------------------------------- +# Git helpers +# --------------------------------------------------------------------------- + +def git(*args: str) -> str: + result = subprocess.run( + ["git", "-C", str(REPO_ROOT), *args], + capture_output=True, text=True, check=True, + ) + return result.stdout.strip() + + +# --------------------------------------------------------------------------- +# Snapshot +# --------------------------------------------------------------------------- + +def cmd_snapshot(name: str | None, profile: str) -> None: + out_dir = find_out_dir(profile) + if out_dir is None: + sys.exit( + f"ERROR: No populated OUT_DIR found in target/{profile}/build/herkos-tests-*/out/\n" + " Run 'cargo build -p herkos-tests' first." + ) + + if name is None: + branch = git("rev-parse", "--abbrev-ref", "HEAD").replace("/", "_") + commit = git("rev-parse", "--short", "HEAD") + ts = datetime.now(timezone.utc).strftime("%Y%m%dT%H%M%SZ") + name = f"{branch}__{commit}__{ts}" + + dest = BASELINES_DIR / name / "files" + if (BASELINES_DIR / name).exists(): + sys.exit( + f"ERROR: Baseline '{name}' already exists. Delete it first:\n" + f" scripts/transpile_diff.py delete {name}" + ) + + dest.mkdir(parents=True) + _copy_rs_files(out_dir, dest) + + rs_count = sum(1 for _ in dest.rglob("*.rs")) + branch = git("rev-parse", "--abbrev-ref", "HEAD") + commit_sha = git("rev-parse", "HEAD") + commit_short = git("rev-parse", "--short", "HEAD") + + meta = BASELINES_DIR / name / "meta.env" + meta.write_text( + f"TIMESTAMP={datetime.now(timezone.utc).strftime('%Y-%m-%dT%H:%M:%SZ')}\n" + f"BRANCH={branch}\n" + f"COMMIT_SHA={commit_sha}\n" + f"COMMIT_SHORT={commit_short}\n" + f"HERKOS_OPTIMIZE={os.environ.get('HERKOS_OPTIMIZE', '0')}\n" + f"PROFILE={profile}\n" + f"OUT_DIR={out_dir}\n" + f"RS_FILE_COUNT={rs_count}\n" + ) + + print(f"Snapshot '{name}' saved ({rs_count} .rs files).") + print(f" Branch: {branch}") + print(f" Commit: {commit_short}") + print(f" OUT_DIR: {out_dir}") + + +def _copy_rs_files(src: Path, dest: Path) -> None: + """Copy all .rs files (except *_src.rs) from src to dest, preserving structure.""" + for rs_file in src.rglob("*.rs"): + if rs_file.name.endswith("_src.rs"): + continue + rel = rs_file.relative_to(src) + target = dest / rel + target.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(rs_file, target) + + +# --------------------------------------------------------------------------- +# List +# --------------------------------------------------------------------------- + +def cmd_list() -> None: + if not BASELINES_DIR.is_dir() or not any(BASELINES_DIR.iterdir()): + print(f"No baselines found in {BASELINES_DIR}") + return + + header = f"{'NAME':<42} {'TIMESTAMP':<22} {'BRANCH':<20} {'COMMIT':<10} FILES" + print(header) + print("─" * 100) + + for meta_path in sorted(BASELINES_DIR.glob("*/meta.env")): + name = meta_path.parent.name + meta = _read_meta(meta_path) + print( + f"{name:<42} " + f"{meta.get('TIMESTAMP', '?'):<22} " + f"{meta.get('BRANCH', '?'):<20} " + f"{meta.get('COMMIT_SHORT', '?'):<10} " + f"{meta.get('RS_FILE_COUNT', '?')}" + ) + + +def _read_meta(path: Path) -> dict[str, str]: + meta: dict[str, str] = {} + for line in path.read_text().splitlines(): + if "=" in line: + k, _, v = line.partition("=") + meta[k.strip()] = v.strip() + return meta + + +# --------------------------------------------------------------------------- +# Compare +# --------------------------------------------------------------------------- + +def cmd_compare(a: str, b: str, profile: str) -> None: + dir_a = _resolve_baseline(a) + tmp_dir: str | None = None + + if b == "latest": + out_dir = find_out_dir(profile) + if out_dir is None: + sys.exit( + f"ERROR: 'latest' requested but no populated OUT_DIR found in target/{profile}/.\n" + " Run 'cargo build -p herkos-tests' or specify a baseline name." + ) + tmp_dir = tempfile.mkdtemp() + _copy_rs_files(out_dir, Path(tmp_dir)) + dir_b = Path(tmp_dir) + label_b = f"latest ({profile} build)" + else: + dir_b = _resolve_baseline(b) + label_b = b + + try: + _run_compare(a, dir_a, label_b, dir_b) + finally: + if tmp_dir: + shutil.rmtree(tmp_dir, ignore_errors=True) + + +def _resolve_baseline(name: str) -> Path: + path = BASELINES_DIR / name / "files" + if not path.is_dir(): + sys.exit( + f"ERROR: Baseline '{name}' not found in {BASELINES_DIR}/\n" + " Run 'scripts/transpile_diff.py list' to see available baselines." + ) + return path + + +def _run_compare(label_a: str, dir_a: Path, label_b: str, dir_b: Path) -> None: + print("=== herkos transpiler diff ===") + print(f" A: {label_a}") + print(f" B: {label_b}") + print() + + files_a = {p.relative_to(dir_a) for p in dir_a.rglob("*.rs")} + files_b = {p.relative_to(dir_b) for p in dir_b.rglob("*.rs")} + + added = sorted(files_b - files_a) + removed = sorted(files_a - files_b) + changed = sorted( + f for f in files_a & files_b + if (dir_a / f).read_bytes() != (dir_b / f).read_bytes() + ) + + print(f"Summary: Changed: {len(changed)} | Added: {len(added)} | Removed: {len(removed)}") + print() + + if changed: + print("Changed files:") + for f in changed: + print(f" ~ {f}") + print() + if added: + print("Added files (in B, not in A):") + for f in added: + print(f" + {f}") + print() + if removed: + print("Removed files (in A, not in B):") + for f in removed: + print(f" - {f}") + print() + + if not (changed or added or removed): + print("No differences found.") + return + + print("Full diff:") + print("─" * 60) + + diff_proc = subprocess.Popen( + ["git", "-C", str(REPO_ROOT), "diff", "--no-index", "--color=always", + str(dir_a), str(dir_b)], + stdout=subprocess.PIPE, + ) + + if sys.stdout.isatty(): + pager = subprocess.Popen(["less", "-R"], stdin=diff_proc.stdout) + diff_proc.stdout.close() # type: ignore[union-attr] + pager.wait() + else: + shutil.copyfileobj(diff_proc.stdout, sys.stdout.buffer) # type: ignore[arg-type] + + diff_proc.wait() # exit code 1 is normal when differences exist + + +# --------------------------------------------------------------------------- +# Delete +# --------------------------------------------------------------------------- + +def cmd_delete(name: str) -> None: + path = BASELINES_DIR / name + if not path.is_dir(): + sys.exit(f"ERROR: Baseline '{name}' not found in {BASELINES_DIR}/") + shutil.rmtree(path) + print(f"Deleted baseline '{name}'.") + + +# --------------------------------------------------------------------------- +# CLI +# --------------------------------------------------------------------------- + +def main() -> None: + check_repo_root() + + parser = argparse.ArgumentParser( + prog="scripts/transpile_diff.py", + description="Baseline and diff tool for herkos transpiler output.", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +examples: + scripts/transpile_diff.py snapshot before-optimizer + scripts/transpile_diff.py snapshot + scripts/transpile_diff.py list + scripts/transpile_diff.py compare before-optimizer + scripts/transpile_diff.py compare before-optimizer after-optimizer + scripts/transpile_diff.py --release compare main__abc + scripts/transpile_diff.py delete before-optimizer +""", + ) + parser.add_argument( + "--release", action="store_true", + help="Use target/release/ instead of target/debug/ when discovering OUT_DIR.", + ) + + sub = parser.add_subparsers(dest="subcommand", metavar="subcommand") + sub.required = True + + p_snap = sub.add_parser("snapshot", help="Save current transpiler output as a named baseline.") + p_snap.add_argument("name", nargs="?", default=None, + help="Baseline name (auto-generated if omitted).") + + sub.add_parser("list", help="Show all saved baselines with metadata.") + + p_cmp = sub.add_parser("compare", help="Diff baseline A vs B (default B: latest build).") + p_cmp.add_argument("a", metavar="A", help="First baseline name.") + p_cmp.add_argument("b", metavar="B", nargs="?", default="latest", + help="Second baseline name, or 'latest' (default).") + + p_del = sub.add_parser("delete", help="Remove a baseline.") + p_del.add_argument("name", help="Baseline name to delete.") + + args = parser.parse_args() + profile = "release" if args.release else "debug" + + if args.subcommand == "snapshot": + cmd_snapshot(args.name, profile) + elif args.subcommand == "list": + cmd_list() + elif args.subcommand == "compare": + cmd_compare(args.a, args.b, profile) + elif args.subcommand == "delete": + cmd_delete(args.name) + + +if __name__ == "__main__": + main() diff --git a/scripts/transpile_diff.sh b/scripts/transpile_diff.sh new file mode 100755 index 0000000..f5ba0fa --- /dev/null +++ b/scripts/transpile_diff.sh @@ -0,0 +1,2 @@ +#!/usr/bin/env bash +exec python3 "$(dirname "$0")/transpile_diff.py" "$@"