"""build_yaml_code_coverage_v1.py — YAML_TO_CODE_COVERAGE_V1 spec/13_formula_registry.yaml 의 active=true formula_id를 authoritative denominator로 삼고 tools/*.py / *.gs 구현 여부를 매핑해 yaml-to-code 커버리지 보고서를 산출한다. 산출물: Temp/yaml_code_coverage_v1.json - yaml_formula_count: spec에 등록된 공식 수 - implemented_count: 코드에서 확인된 공식 수 - golden_test_count: tests/*.yaml / spec/formula_golden_cases_v2.yaml에 테스트가 있는 공식 수 - unimplemented_rules: 코드 미구현 공식 목록 - orphan_code_rules: 코드에는 있으나 spec에 없는 식별자 (샘플) - coverage_ratio: implemented / total - golden_coverage_ratio: golden_test / total """ from __future__ import annotations import argparse import json import re from pathlib import Path from typing import Any import yaml ROOT = Path(__file__).resolve().parents[1] DEFAULT_OUT = ROOT / "Temp" / "yaml_code_coverage_v1.json" SPEC_DIR = ROOT / "spec" TOOLS_DIR = ROOT / "tools" FORMULA_YAML_FILES = [SPEC_DIR / "13_formula_registry.yaml"] GOLDEN_YAML_FILES = [ SPEC_DIR / "formula_golden_cases_v2.yaml", SPEC_DIR / "formula_golden_cases_v3.yaml", SPEC_DIR / "formula_golden_cases_v4.yaml", ROOT / "tests" / "strategy_tests.yaml", ] GS_FILES = list(ROOT.glob("*.gs")) PY_FILES = list(TOOLS_DIR.glob("*.py")) ALL_CODE_FILES = GS_FILES + PY_FILES def _load_yaml(path: Path) -> Any: if not path.exists(): return {} try: return yaml.safe_load(path.read_text(encoding="utf-8")) or {} except Exception: return {} def _extract_formula_ids(registry: Any) -> list[str]: fr = (registry.get("formula_registry") or {}) if isinstance(registry, dict) else {} return list((fr.get("formulas") or {}).keys()) def _read_code_text() -> str: parts = [] for f in ALL_CODE_FILES: try: parts.append(f.read_text(encoding="utf-8")) except Exception: pass return "\n".join(parts) def _read_golden_text() -> str: parts = [] for f in GOLDEN_YAML_FILES: if f.exists(): try: parts.append(f.read_text(encoding="utf-8")) except Exception: pass return "\n".join(parts) def _find_orphan_formula_ids(code_text: str, spec_ids: set[str], max_sample: int = 20) -> list[str]: """코드에 정의된 FORMULA_ID 패턴 중 spec에 없는 것 (샘플).""" candidates = set(re.findall(r"\bFORMULA_ID\s*=\s*[\"']([A-Z0-9_]+)[\"']", code_text)) # also pick up python_tool formula_id strings candidates |= set(re.findall(r'"formula_id"\s*:\s*"([A-Z0-9_]+)"', code_text)) orphans = sorted(candidates - spec_ids) return orphans[:max_sample] def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--out", default=str(DEFAULT_OUT)) args = ap.parse_args() out_path = Path(args.out) if Path(args.out).is_absolute() else ROOT / args.out # 1) spec 공식 수집 all_spec_ids: list[str] = [] spec_sources: dict[str, str] = {} for yf in FORMULA_YAML_FILES: reg = _load_yaml(yf) ids = _extract_formula_ids(reg) for fid in ids: # also read python_tool field for implementation source formula_def = (reg.get("formula_registry") or {}).get("formulas", {}).get(fid, {}) if not bool(formula_def.get("active", True)): continue py_tool = str(formula_def.get("python_tool") or "") gas_impl = str(formula_def.get("gas_function") or formula_def.get("gas_name") or "") source = py_tool or gas_impl or "unknown" all_spec_ids.append(fid) spec_sources[fid] = source all_spec_ids = list(dict.fromkeys(all_spec_ids)) # dedup preserving order spec_id_set = set(all_spec_ids) # 2) 코드에서 구현 확인 code_text = _read_code_text() golden_text = _read_golden_text() runtime_gas_text = "\n".join( p.read_text(encoding="utf-8", errors="ignore") for p in sorted(ROOT.glob("gas_*.gs")) if p.exists() ) rows: list[dict[str, Any]] = [] for fid in all_spec_ids: in_code = bool(re.search(re.escape(fid), code_text)) in_golden = bool(re.search(re.escape(fid), golden_text)) declared_source = spec_sources.get(fid, "") # Check if declared python_tool file actually exists source_exists: bool | str = "N/A" if declared_source and declared_source.startswith("tools/"): source_path = ROOT / declared_source source_exists = source_path.exists() rows.append({ "formula_id": fid, "in_code": in_code, "in_golden_test": in_golden, "declared_source": declared_source, "source_file_exists": source_exists, }) implemented = [r for r in rows if r["in_code"]] unimplemented = [r for r in rows if not r["in_code"]] golden_covered = [r for r in rows if r["in_golden_test"]] missing_source_file = [r for r in rows if r["source_file_exists"] is False] orphan_ids = _find_orphan_formula_ids(runtime_gas_text, spec_id_set) result = { "formula_id": "YAML_TO_CODE_COVERAGE_V1", "yaml_formula_count": len(all_spec_ids), "implemented_count": len(implemented), "unimplemented_count": len(unimplemented), "golden_test_count": len(golden_covered), "missing_source_file_count": len(missing_source_file), "orphan_code_formula_count": len(orphan_ids), "coverage_ratio": round(len(implemented) / len(all_spec_ids), 4) if all_spec_ids else 0.0, "golden_coverage_ratio": round(len(golden_covered) / len(all_spec_ids), 4) if all_spec_ids else 0.0, "gate": "PASS" if not unimplemented else "WARN", "unimplemented_rules": [r["formula_id"] for r in unimplemented], "missing_source_file_rules": [r["formula_id"] for r in missing_source_file], "orphan_code_formulas": orphan_ids, "golden_uncovered_rules": [r["formula_id"] for r in rows if not r["in_golden_test"]], "rows": rows, } out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") print( f"[YAML_TO_CODE_COVERAGE_V1] total={len(all_spec_ids)} " f"implemented={len(implemented)} ({result['coverage_ratio']*100:.1f}%) " f"golden={len(golden_covered)} ({result['golden_coverage_ratio']*100:.1f}%) " f"unimplemented={len(unimplemented)} orphan={len(orphan_ids)} -> {out_path}" ) return 0 if __name__ == "__main__": raise SystemExit(main())