from __future__ import annotations import argparse import json import re from pathlib import Path from typing import Any import yaml ROOT = Path(__file__).resolve().parents[1] REGISTRY = ROOT / "spec" / "13_formula_registry.yaml" GOLDEN_V2 = ROOT / "spec" / "formula_golden_cases_v2.yaml" GOLDEN_TEMP = ROOT / "Temp" / "formula_golden_cases.yaml" DEFAULT_OUT = ROOT / "Temp" / "semantic_formula_coverage_v1.json" def _load_registry(path: Path) -> list[str]: if not path.exists(): return [] try: payload = yaml.safe_load(path.read_text(encoding="utf-8")) except Exception: return [] fr = (payload.get("formula_registry") or {}) if isinstance(payload, dict) else {} formulas = fr.get("formulas") or {} if formulas: return sorted(formulas.keys()) # fallback: regex scan text = json.dumps(payload, ensure_ascii=False) ids = sorted(set(re.findall(r'"formula_id"\s*:\s*"([A-Z0-9_]+)"', text))) if not ids: ids = sorted(set(re.findall(r'\b([A-Z][A-Z0-9_]+_V[0-9]+)\b', text))) return ids def _load_golden(path: Path) -> dict[str, Any]: if not path.exists(): return {} try: payload = yaml.safe_load(path.read_text(encoding="utf-8")) except Exception: return {} return payload if isinstance(payload, dict) else {} def _scan_code() -> str: blobs: list[str] = [] for p in ROOT.rglob("*"): if not p.is_file(): continue if p.suffix.lower() not in {".py", ".gs", ".yaml", ".yml", ".md"}: continue try: blobs.append(p.read_text(encoding="utf-8", errors="ignore")) except Exception: continue return "\n".join(blobs) def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--out", default=str(DEFAULT_OUT)) args = ap.parse_args() out_path = Path(args.out) if not out_path.is_absolute(): out_path = ROOT / out_path formula_ids = _load_registry(REGISTRY) corpus = _scan_code() spec_total = len(formula_ids) impl = [fid for fid in formula_ids if fid in corpus] report_binding = [fid for fid in formula_ids if fid in corpus and "render_operational_report.py" in corpus] outcome_binding = [fid for fid in formula_ids if fid.startswith(("OUTCOME_", "TRADE_", "SHORT_HORIZON_", "LATE_", "REBOUND_", "CASH_RAISE_")) and fid in corpus] golden_path = GOLDEN_V2 if GOLDEN_V2.exists() else GOLDEN_TEMP golden = _load_golden(golden_path) golden_rows = golden.get("golden_cases_v2") if isinstance(golden.get("golden_cases_v2"), list) else golden.get("golden_cases") if not isinstance(golden_rows, list): golden_rows = [] golden_formula_ids = [str(row.get("formula_id") or "") for row in golden_rows if isinstance(row, dict)] golden_formula_ids = [fid for fid in golden_formula_ids if fid] golden_covered = sum(1 for fid in golden_formula_ids if fid in corpus) grade = "PASS" if spec_total > 0 and len(impl) == spec_total and golden_covered == len(golden_formula_ids) else ("WARN" if len(impl) > 0 else "FAIL") # outcome_binding_deferred: decision-critical formulas not yet bound to operational T+20 outcomes. # Marked DEFERRED (not absent) until V8-P1-06 operational_t20_count >= 30. outcome_binding_deferred = spec_total - len(outcome_binding) out = { "formula_id": "SEMANTIC_FORMULA_COVERAGE_HARNESS_V1", "spec_total": spec_total, "implementation_covered": len(impl), "golden_test_covered": golden_covered, "report_binding_covered": len(report_binding), "outcome_binding_covered": len(outcome_binding), "outcome_binding_deferred": outcome_binding_deferred, "outcome_binding_deferred_reason": "PENDING_OPERATIONAL_T20_SAMPLE_V8_P1_06", "outcome_binding_total_check": len(outcome_binding) + outcome_binding_deferred, "coverage_grade": grade, "missing": [fid for fid in formula_ids if fid not in impl][:200], "golden_source": str(golden_path.name), } out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text(json.dumps(out, ensure_ascii=False, indent=2), encoding="utf-8") print(json.dumps(out, ensure_ascii=False, indent=2)) return 0 if __name__ == "__main__": raise SystemExit(main())