from __future__ import annotations import argparse import json import re from collections import defaultdict from pathlib import Path from typing import Any import yaml ROOT = Path(__file__).resolve().parents[2] SOURCE = ROOT / "spec" / "13_formula_registry.yaml" def load_yaml(path: Path) -> dict[str, Any]: return yaml.safe_load(path.read_text(encoding="utf-8")) or {} def to_snake(name: str) -> str: slug = re.sub(r"[^0-9A-Za-z]+", "_", name).strip("_").lower() return slug or "formula" def write_text(path: Path, text: str) -> None: path.parent.mkdir(parents=True, exist_ok=True) path.write_text(text, encoding="utf-8") def build_stub(formula_id: str, spec: dict[str, Any]) -> str: inputs = spec.get("inputs") or [] outputs = spec.get("outputs") or spec.get("output_fields") or [] owner = spec.get("owner") status = spec.get("status") input_fields = [item.get("field") for item in inputs if isinstance(item, dict) and item.get("field")] return ( f'"""Auto-generated formula stub for {formula_id}."""\n' f"\n" f"FORMULA_ID = {formula_id!r}\n" f"FORMULA_OWNER = {owner!r}\n" f"FORMULA_STATUS = {status!r}\n" f"FORMULA_INPUT_FIELDS = {input_fields!r}\n" f"FORMULA_OUTPUT_FIELDS = {outputs!r}\n" f"\n" f"def execute(inputs: dict[str, object]) -> dict[str, object]:\n" f" raise NotImplementedError({formula_id!r} + ' is a generated stub.')\n" ) def build_golden_test(formula_id: str, spec: dict[str, Any]) -> str: slug = to_snake(formula_id) outputs = spec.get("outputs") or spec.get("output_fields") or [] return ( f'"""Auto-generated golden test stub for {formula_id}."""\n' f"\n" f"def test_{slug}_golden_stub_exists() -> None:\n" f" assert {formula_id!r}\n" f"\n" f"def test_{slug}_declares_outputs() -> None:\n" f" outputs = {outputs!r}\n" f" assert isinstance(outputs, list)\n" f" assert outputs\n" ) def build_schema_fragment(formula_id: str, spec: dict[str, Any]) -> dict[str, Any]: inputs = spec.get("inputs") or [] outputs = spec.get("outputs") or spec.get("output_fields") or [] return { "$schema": "https://json-schema.org/draft/2020-12/schema", "$id": f"schema://formula/{formula_id}", "title": formula_id, "type": "object", "properties": { "formula_id": {"const": formula_id}, "owner": {"type": "string"}, "status": {"type": "string"}, "inputs": {"type": "array", "items": {"type": "string"}}, "outputs": {"type": "array", "items": {"type": "string"}}, }, "required": ["formula_id", "owner", "status", "inputs", "outputs"], "x_formula_inputs": [ item.get("field") for item in inputs if isinstance(item, dict) and item.get("field") ], "x_formula_outputs": outputs, } def main() -> int: parser = argparse.ArgumentParser(description="Compile formula registry stubs and artifacts.") parser.add_argument("--dry-run", action="store_true", help="Validate inputs without writing files.") parser.add_argument("--out-report", default=str(ROOT / "Temp" / "formula_compile_report_v1.json")) parser.add_argument("--out-graph", default=str(ROOT / "Temp" / "formula_dependency_graph_v1.json")) args = parser.parse_args() source = load_yaml(SOURCE) formulas = (source.get("formula_registry") or {}).get("formulas") or {} if not isinstance(formulas, dict): raise TypeError("formula_registry.formulas must be a mapping") runtime_dir = ROOT / "runtime" / "python" / "core" / "formulas" / "generated" golden_dir = ROOT / "tests" / "golden" / "generated" schema_dir = ROOT / "schemas" / "generated" output_field_map: dict[str, set[str]] = defaultdict(set) for formula_id, spec in formulas.items(): if not isinstance(spec, dict): continue outputs = spec.get("outputs") or spec.get("output_fields") or [] for field in outputs: if isinstance(field, str): output_field_map[field].add(formula_id) dependency_graph: dict[str, list[str]] = {} generated_count = 0 active_count = 0 for formula_id in sorted(formulas): spec = formulas[formula_id] or {} status = str(spec.get("status", "active")).lower() if status not in {"deprecated", "removed"}: active_count += 1 stub_name = f"{to_snake(formula_id)}.py" golden_name = f"{to_snake(formula_id)}_golden.py" schema_name = f"{to_snake(formula_id)}.schema.json" input_fields = [ item.get("field") for item in (spec.get("inputs") or []) if isinstance(item, dict) and item.get("field") ] dependencies = sorted( { producer for field in input_fields for producer in output_field_map.get(field, set()) if producer != formula_id } ) dependency_graph[formula_id] = dependencies if not args.dry_run: write_text(runtime_dir / stub_name, build_stub(formula_id, spec)) write_text(golden_dir / golden_name, build_golden_test(formula_id, spec)) write_text(schema_dir / schema_name, json.dumps(build_schema_fragment(formula_id, spec), ensure_ascii=False, indent=2) + "\n") generated_count += 1 report = { "source": str(SOURCE.relative_to(ROOT)), "formula_count": len(formulas), "active_formula_count": active_count, "generated_stub_count": generated_count, "golden_stub_count": generated_count, "schema_fragment_count": generated_count, "dependency_graph_node_count": len(dependency_graph), "status": "OK", } if not args.dry_run: write_text(Path(args.out_report), json.dumps(report, ensure_ascii=False, indent=2) + "\n") write_text(Path(args.out_graph), json.dumps(dependency_graph, ensure_ascii=False, indent=2) + "\n") for package_dir in (runtime_dir, golden_dir, schema_dir): init_file = package_dir / "__init__.py" if not init_file.exists(): write_text(init_file, '"""Auto-generated package."""\n') print(json.dumps(report, ensure_ascii=False, indent=2)) return 0 if __name__ == "__main__": raise SystemExit(main())