#!/usr/bin/env python3 """ validate_behavioral_coverage_v1.py ─────────────────────────────────────────────────────────────────────────────── 행위기반 커버리지 하네스 — 3-way 동등성 게이트 (BCH-V1 B05 단계) Python 미러(B03)와 GAS 패리티(B04) 결과를 통합해 최종 판정을 내린다. 판정 기준: BEHAVIORAL_COVERAGE_V1_OK ← 아래 조건 모두 충족 - behavioral_coverage_pct >= 100.0 (decision-critical 공식 전부 통과) - implementation_divergence_count == 0 (Python ≠ GAS 불일치 0건) - python_fail_count == 0 (Python 미러 실패 0건) - gas_fail_count == 0 (GAS 패리티 실패 0건) IMPLEMENTATION_DIVERGENCE ← Python ≠ GAS 불일치 발견 시 → B06에서 spec/13_formula_registry.yaml 기준으로 근본 정정 필요. BEHAVIORAL_COVERAGE_GAP ← golden case 없는 decision-critical 공식 발견 시. 출력: Temp/formula_behavioral_coverage_summary_v1.json 사용법: python tools/validate_behavioral_coverage_v1.py python tools/validate_behavioral_coverage_v1.py --strict """ from __future__ import annotations import json import sys from pathlib import Path ROOT = Path(__file__).resolve().parent.parent PY_RESULT = ROOT / "Temp" / "formula_behavioral_coverage_v1.json" GAS_RESULT = ROOT / "Temp" / "formula_gas_parity_v1.json" CONTRACT = ROOT / "spec" / "26_behavioral_coverage_contract.yaml" SUMMARY_OUT = ROOT / "Temp" / "formula_behavioral_coverage_summary_v1.json" if sys.stdout.encoding and sys.stdout.encoding.lower() not in ("utf-8", "utf8"): sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf-8", buffering=1) def load_json(path: Path) -> dict: if not path.exists(): return {} return json.loads(path.read_text(encoding="utf-8")) def main() -> int: strict = "--strict" in sys.argv py_data = load_json(PY_RESULT) gas_data = load_json(GAS_RESULT) missing = [] if not PY_RESULT.exists(): missing.append(str(PY_RESULT)) if not GAS_RESULT.exists(): missing.append(str(GAS_RESULT)) if missing: print("BEHAVIORAL_COVERAGE_V1_FAIL") for m in missing: print(f" - MISSING: {m}") print(" 먼저 B03(run_formula_golden_cases_v2.py)과 B04(run_gas_golden_parity.js)를 실행하세요.") return 1 sep = "=" * 70 print(sep) print(" 행위기반 커버리지 — 3-way 동등성 게이트 (BCH-V1 B05)") print(sep) # ── Python 미러 결과 분석 ──────────────────────────────────────────── py_coverage = py_data.get("behavioral_coverage_pct", 0.0) py_divergences = py_data.get("python_divergences", []) py_missing_mirrors = py_data.get("missing_python_mirrors", []) py_per_formula = py_data.get("per_formula", []) py_fail_formulas = [f for f in py_per_formula if f.get("status") == "FAIL"] py_fail_count = sum(f.get("case_count", 0) - f.get("pass_count", 0) for f in py_per_formula) # ── GAS 패리티 결과 분석 ───────────────────────────────────────────── gas_pass = gas_data.get("gas_pass", 0) gas_fail = gas_data.get("gas_fail", 0) gas_divergences = gas_data.get("divergences", []) gas_load_errors = gas_data.get("load_errors", []) gas_coverage = gas_data.get("gas_coverage_pct", 0.0) # ── IMPLEMENTATION_DIVERGENCE 계산 ─────────────────────────────────── # Python ≠ GAS 불일치 = py_divergences (Python ≠ spec_correct) # + gas_divergences with type=GAS_FAIL impl_divergences: list[dict] = [] # Python ≠ spec_correct (= GAS가 정답인 경우) for d in py_divergences: impl_divergences.append({ "type": "PYTHON_DIVERGES_FROM_SPEC", "formula_id": d.get("formula_id"), "case_id": d.get("case_id"), "spec_correct": d.get("spec_correct"), "python_output": d.get("python_output"), "note": d.get("note", ""), "resolution": "B06: Python 미러 함수를 spec/13 expression(floor 방식)으로 정정", }) # GAS가 spec_correct를 못 내는 경우 for d in gas_divergences: if d.get("type") == "GAS_FAIL": impl_divergences.append({ "type": "GAS_DIVERGES_FROM_GOLDEN", "formula_id": d.get("formula_id"), "case_id": d.get("case_id"), "errors": d.get("errors", []), "actual": d.get("actual"), "expected": d.get("expected"), "resolution": "B06: GAS 함수를 spec/13 expression에 맞게 정정", }) impl_divergence_count = len(impl_divergences) # ── BEHAVIORAL_COVERAGE_GAP 계산 ───────────────────────────────────── # Python 미러 없는 공식 (golden case 있지만 Python 실행 불가) gap_formulas = py_missing_mirrors + gas_load_errors # ── 종합 판정 ──────────────────────────────────────────────────────── overall_ok = ( py_coverage >= 100.0 and impl_divergence_count == 0 and gas_fail == 0 and len(gas_load_errors) == 0 ) # ── 콘솔 출력 ──────────────────────────────────────────────────────── print(f"\n [Python 미러] behavioral_coverage_pct: {py_coverage}%") print(f" 미러 없는 공식: {len(py_missing_mirrors)}개") print(f" [GAS 패리티] coverage: {gas_coverage}% pass={gas_pass} fail={gas_fail}") print(f" [분기 건수] implementation_divergence_count: {impl_divergence_count}") if impl_divergences: print(f"\n [IMPLEMENTATION_DIVERGENCE] {impl_divergence_count}건:") for d in impl_divergences: print(f" [{d['type']}] {d['formula_id']}:{d['case_id']}") if d.get("spec_correct") is not None: print(f" spec_correct={d['spec_correct']}, python_output={d['python_output']}") if d.get("errors"): for e in d["errors"]: print(f" - {e}") print(f" → {d['resolution']}") if py_fail_formulas: print(f"\n [Python FAIL 공식] {len(py_fail_formulas)}개:") for f in py_fail_formulas: print(f" {f['formula_id']}: {f['pass_count']}/{f['case_count']} 통과") if gas_load_errors: print(f"\n [GAS 로드 실패] {len(gas_load_errors)}건:") for e in gas_load_errors: print(f" {e}") if py_missing_mirrors: print(f"\n [Python 미러 미구현] {len(py_missing_mirrors)}개:") for m in py_missing_mirrors: print(f" {m}") print() # ── 요약 표 ────────────────────────────────────────────────────────── print(" ┌─────────────────────────────────────────────────────────────┐") print(" │ 행위기반 커버리지 최종 판정 (BCH-V1) │") print(" ├──────────────────────────────────┬──────────────────────────┤") print(f" │ behavioral_coverage_pct │ {py_coverage:>6.2f}% {'✓' if py_coverage >= 100 else '✗'} │") print(f" │ implementation_divergence_count │ {impl_divergence_count:>6d} {'✓' if impl_divergence_count == 0 else '✗ B06 정정 필요'} │") print(f" │ gas_pass / gas_fail │ {gas_pass:>4d} / {gas_fail:<4d} {'✓' if gas_fail == 0 else '✗'} │") print(f" │ python_mirrors_missing │ {len(py_missing_mirrors):>6d} {'(허용)' if py_missing_mirrors else ''} │") print(" ├──────────────────────────────────┴──────────────────────────┤") status_token = "BEHAVIORAL_COVERAGE_V1_OK" if overall_ok else "BEHAVIORAL_COVERAGE_V1_FAIL" print(f" │ STATUS: {status_token:<51}│") print(" └─────────────────────────────────────────────────────────────┘") # ── JSON 저장 ──────────────────────────────────────────────────────── summary = { "status": status_token, "behavioral_coverage_pct": py_coverage, "implementation_divergence_count": impl_divergence_count, "gas_pass": gas_pass, "gas_fail": gas_fail, "gas_coverage_pct": gas_coverage, "python_mirrors_missing": py_missing_mirrors, "gas_load_errors": gas_load_errors, "implementation_divergences": impl_divergences, "behavioral_coverage_gaps": gap_formulas, "completion_gate": { "behavioral_coverage_pct_min": 100.0, "implementation_divergence_count_max": 0, "met": overall_ok, }, } SUMMARY_OUT.parent.mkdir(parents=True, exist_ok=True) SUMMARY_OUT.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8") print(f"\n → 결과 저장: {SUMMARY_OUT}") print(f" {status_token}\n") if strict and not overall_ok: return 1 return 0 if __name__ == "__main__": raise SystemExit(main())