QuantEngineByItz/tools/validate_behavioral_coverage_v1.py

#!/usr/bin/env python3
"""
validate_behavioral_coverage_v1.py
───────────────────────────────────────────────────────────────────────────────
행위기반 커버리지 하네스 — 3-way 동등성 게이트 (BCH-V1 B05 단계)

Python 미러(B03)와 GAS 패리티(B04) 결과를 통합해 최종 판정을 내린다.

판정 기준:
  BEHAVIORAL_COVERAGE_V1_OK  ← 아래 조건 모두 충족
    - behavioral_coverage_pct >= 100.0  (decision-critical 공식 전부 통과)
    - implementation_divergence_count == 0  (Python ≠ GAS 불일치 0건)
    - python_fail_count == 0  (Python 미러 실패 0건)
    - gas_fail_count == 0  (GAS 패리티 실패 0건)

  IMPLEMENTATION_DIVERGENCE  ← Python ≠ GAS 불일치 발견 시
    → B06에서 spec/13_formula_registry.yaml 기준으로 근본 정정 필요.

  BEHAVIORAL_COVERAGE_GAP  ← golden case 없는 decision-critical 공식 발견 시.

출력: Temp/formula_behavioral_coverage_summary_v1.json

사용법:
  python tools/validate_behavioral_coverage_v1.py
  python tools/validate_behavioral_coverage_v1.py --strict
"""

from __future__ import annotations

import json
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent
PY_RESULT   = ROOT / "Temp" / "formula_behavioral_coverage_v1.json"
GAS_RESULT  = ROOT / "Temp" / "formula_gas_parity_v1.json"
CONTRACT    = ROOT / "spec" / "26_behavioral_coverage_contract.yaml"
SUMMARY_OUT = ROOT / "Temp" / "formula_behavioral_coverage_summary_v1.json"

if sys.stdout.encoding and sys.stdout.encoding.lower() not in ("utf-8", "utf8"):
    sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf-8", buffering=1)


def load_json(path: Path) -> dict:
    if not path.exists():
        return {}
    return json.loads(path.read_text(encoding="utf-8"))


def main() -> int:
    strict = "--strict" in sys.argv

    py_data  = load_json(PY_RESULT)
    gas_data = load_json(GAS_RESULT)

    missing = []
    if not PY_RESULT.exists():
        missing.append(str(PY_RESULT))
    if not GAS_RESULT.exists():
        missing.append(str(GAS_RESULT))
    if missing:
        print("BEHAVIORAL_COVERAGE_V1_FAIL")
        for m in missing:
            print(f"  - MISSING: {m}")
        print("  먼저 B03(run_formula_golden_cases_v2.py)과 B04(run_gas_golden_parity.js)를 실행하세요.")
        return 1

    sep = "=" * 70
    print(sep)
    print("  행위기반 커버리지 — 3-way 동등성 게이트 (BCH-V1 B05)")
    print(sep)

    # ── Python 미러 결과 분석 ────────────────────────────────────────────
    py_coverage      = py_data.get("behavioral_coverage_pct", 0.0)
    py_divergences   = py_data.get("python_divergences", [])
    py_missing_mirrors = py_data.get("missing_python_mirrors", [])
    py_per_formula   = py_data.get("per_formula", [])
    py_fail_formulas = [f for f in py_per_formula if f.get("status") == "FAIL"]
    py_fail_count    = sum(f.get("case_count", 0) - f.get("pass_count", 0) for f in py_per_formula)

    # ── GAS 패리티 결과 분석 ─────────────────────────────────────────────
    gas_pass        = gas_data.get("gas_pass", 0)
    gas_fail        = gas_data.get("gas_fail", 0)
    gas_divergences = gas_data.get("divergences", [])
    gas_load_errors = gas_data.get("load_errors", [])
    gas_coverage    = gas_data.get("gas_coverage_pct", 0.0)

    # ── IMPLEMENTATION_DIVERGENCE 계산 ───────────────────────────────────
    # Python ≠ GAS 불일치 = py_divergences (Python ≠ spec_correct)
    # + gas_divergences with type=GAS_FAIL
    impl_divergences: list[dict] = []

    # Python ≠ spec_correct (= GAS가 정답인 경우)
    for d in py_divergences:
        impl_divergences.append({
            "type": "PYTHON_DIVERGES_FROM_SPEC",
            "formula_id": d.get("formula_id"),
            "case_id": d.get("case_id"),
            "spec_correct": d.get("spec_correct"),
            "python_output": d.get("python_output"),
            "note": d.get("note", ""),
            "resolution": "B06: Python 미러 함수를 spec/13 expression(floor 방식)으로 정정",
        })

    # GAS가 spec_correct를 못 내는 경우
    for d in gas_divergences:
        if d.get("type") == "GAS_FAIL":
            impl_divergences.append({
                "type": "GAS_DIVERGES_FROM_GOLDEN",
                "formula_id": d.get("formula_id"),
                "case_id": d.get("case_id"),
                "errors": d.get("errors", []),
                "actual": d.get("actual"),
                "expected": d.get("expected"),
                "resolution": "B06: GAS 함수를 spec/13 expression에 맞게 정정",
            })

    impl_divergence_count = len(impl_divergences)

    # ── BEHAVIORAL_COVERAGE_GAP 계산 ─────────────────────────────────────
    # Python 미러 없는 공식 (golden case 있지만 Python 실행 불가)
    gap_formulas = py_missing_mirrors + gas_load_errors

    # ── 종합 판정 ────────────────────────────────────────────────────────
    overall_ok = (
        py_coverage >= 100.0
        and impl_divergence_count == 0
        and gas_fail == 0
        and len(gas_load_errors) == 0
    )

    # ── 콘솔 출력 ────────────────────────────────────────────────────────
    print(f"\n  [Python 미러]  behavioral_coverage_pct: {py_coverage}%")
    print(f"                 미러 없는 공식: {len(py_missing_mirrors)}개")
    print(f"  [GAS 패리티]   coverage: {gas_coverage}%  pass={gas_pass} fail={gas_fail}")
    print(f"  [분기 건수]    implementation_divergence_count: {impl_divergence_count}")

    if impl_divergences:
        print(f"\n  [IMPLEMENTATION_DIVERGENCE] {impl_divergence_count}건:")
        for d in impl_divergences:
            print(f"    [{d['type']}] {d['formula_id']}:{d['case_id']}")
            if d.get("spec_correct") is not None:
                print(f"      spec_correct={d['spec_correct']}, python_output={d['python_output']}")
            if d.get("errors"):
                for e in d["errors"]:
                    print(f"      - {e}")
            print(f"      → {d['resolution']}")

    if py_fail_formulas:
        print(f"\n  [Python FAIL 공식] {len(py_fail_formulas)}개:")
        for f in py_fail_formulas:
            print(f"    {f['formula_id']}: {f['pass_count']}/{f['case_count']} 통과")

    if gas_load_errors:
        print(f"\n  [GAS 로드 실패] {len(gas_load_errors)}건:")
        for e in gas_load_errors:
            print(f"    {e}")

    if py_missing_mirrors:
        print(f"\n  [Python 미러 미구현] {len(py_missing_mirrors)}개:")
        for m in py_missing_mirrors:
            print(f"    {m}")

    print()

    # ── 요약 표 ──────────────────────────────────────────────────────────
    print("  ┌─────────────────────────────────────────────────────────────┐")
    print("  │  행위기반 커버리지 최종 판정 (BCH-V1)                      │")
    print("  ├──────────────────────────────────┬──────────────────────────┤")
    print(f"  │  behavioral_coverage_pct         │  {py_coverage:>6.2f}% {'✓' if py_coverage >= 100 else '✗'}              │")
    print(f"  │  implementation_divergence_count │  {impl_divergence_count:>6d}  {'✓' if impl_divergence_count == 0 else '✗ B06 정정 필요'}           │")
    print(f"  │  gas_pass / gas_fail             │  {gas_pass:>4d} / {gas_fail:<4d} {'✓' if gas_fail == 0 else '✗'}          │")
    print(f"  │  python_mirrors_missing          │  {len(py_missing_mirrors):>6d}  {'(허용)' if py_missing_mirrors else ''}              │")
    print("  ├──────────────────────────────────┴──────────────────────────┤")
    status_token = "BEHAVIORAL_COVERAGE_V1_OK" if overall_ok else "BEHAVIORAL_COVERAGE_V1_FAIL"
    print(f"  │  STATUS: {status_token:<51}│")
    print("  └─────────────────────────────────────────────────────────────┘")

    # ── JSON 저장 ────────────────────────────────────────────────────────
    summary = {
        "status": status_token,
        "behavioral_coverage_pct": py_coverage,
        "implementation_divergence_count": impl_divergence_count,
        "gas_pass": gas_pass,
        "gas_fail": gas_fail,
        "gas_coverage_pct": gas_coverage,
        "python_mirrors_missing": py_missing_mirrors,
        "gas_load_errors": gas_load_errors,
        "implementation_divergences": impl_divergences,
        "behavioral_coverage_gaps": gap_formulas,
        "completion_gate": {
            "behavioral_coverage_pct_min": 100.0,
            "implementation_divergence_count_max": 0,
            "met": overall_ok,
        },
    }
    SUMMARY_OUT.parent.mkdir(parents=True, exist_ok=True)
    SUMMARY_OUT.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8")

    print(f"\n  → 결과 저장: {SUMMARY_OUT}")
    print(f"  {status_token}\n")

    if strict and not overall_ok:
        return 1
    return 0


if __name__ == "__main__":
    raise SystemExit(main())