"""Build tools_index.yaml — classifies all tools/ Python files as active, support, or archive. Active: called directly from spec/41_release_dag.yaml Support: imported by other active tools (not entry-point but needed) Archive: no direct or transitive reference from DAG or active tools """ from __future__ import annotations import argparse import re from pathlib import Path from typing import Any import yaml ROOT = Path(__file__).resolve().parents[1] TOOLS_DIR = ROOT / "tools" DAG_SPEC = ROOT / "spec" / "41_release_dag.yaml" DEFAULT_OUT = ROOT / "tools_index.yaml" def _load_dag_active(dag_path: Path) -> set[str]: if not dag_path.exists(): return set() try: data = yaml.safe_load(dag_path.read_text(encoding="utf-8")) or {} except Exception: return set() active: set[str] = set() for node in (data.get("dag") or {}).get("nodes", {}).values(): cmd = node.get("command", "") if isinstance(cmd, list): cmd = " ".join(str(c) for c in cmd) for m in re.findall(r"tools/([\w]+\.py)", str(cmd)): active.add(m) return active def _find_imports(py_file: Path) -> set[str]: text = py_file.read_text(encoding="utf-8", errors="ignore") imports: set[str] = set() for m in re.findall(r"from\s+([\w_]+)\s+import|import\s+([\w_]+)", text): name = (m[0] or m[1]).strip() if name: candidate = f"{name}.py" imports.add(candidate) return imports def _compute_support(active: set[str], all_tools: set[str]) -> set[str]: support: set[str] = set() frontier = set(active) for _ in range(5): new_support: set[str] = set() for tool_name in frontier: py = TOOLS_DIR / tool_name if not py.exists(): continue for imp in _find_imports(py): if imp in all_tools and imp not in active and imp not in support: new_support.add(imp) if not new_support: break support.update(new_support) frontier = new_support return support def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--root", default=str(ROOT)) ap.add_argument("--out", default=str(DEFAULT_OUT)) args = ap.parse_args() tools_dir = Path(args.root) / "tools" all_tool_names = {p.name for p in tools_dir.glob("*.py")} dag_active = _load_dag_active(DAG_SPEC) support = _compute_support(dag_active, all_tool_names) active_list = sorted(dag_active & all_tool_names) support_list = sorted(support - dag_active) archive_candidates = sorted(all_tool_names - dag_active - support) result: dict[str, Any] = { "schema_version": "tools_index.v1", "generated_at": "2026-06-10", "summary": { "total": len(all_tool_names), "active": len(active_list), "support": len(support_list), "archive_candidates": len(archive_candidates), }, "active": active_list, "support": support_list, "archive_candidates": archive_candidates, } out_path = Path(args.out) out_path.write_text(yaml.dump(result, allow_unicode=True, default_flow_style=False), encoding="utf-8") print("TOOL_INVENTORY_V1_OK") print(f" total: {len(all_tool_names)}") print(f" active (in DAG): {len(active_list)}") print(f" support (imported by active): {len(support_list)}") print(f" archive_candidates: {len(archive_candidates)}") return 0 if __name__ == "__main__": raise SystemExit(main())