from __future__ import annotations

import argparse
import json
import re
from pathlib import Path
from typing import Any

import yaml


ROOT = Path(__file__).resolve().parents[1]
REGISTRY = ROOT / "spec" / "13_formula_registry.yaml"
GOLDEN_V2 = ROOT / "spec" / "formula_golden_cases_v2.yaml"
GOLDEN_TEMP = ROOT / "Temp" / "formula_golden_cases.yaml"
DEFAULT_OUT = ROOT / "Temp" / "semantic_formula_coverage_v1.json"


def _load_registry(path: Path) -> list[str]:
    if not path.exists():
        return []
    try:
        payload = yaml.safe_load(path.read_text(encoding="utf-8"))
    except Exception:
        return []
    fr = (payload.get("formula_registry") or {}) if isinstance(payload, dict) else {}
    formulas = fr.get("formulas") or {}
    if formulas:
        return sorted(formulas.keys())
    # fallback: regex scan
    text = json.dumps(payload, ensure_ascii=False)
    ids = sorted(set(re.findall(r'"formula_id"\s*:\s*"([A-Z0-9_]+)"', text)))
    if not ids:
        ids = sorted(set(re.findall(r'\b([A-Z][A-Z0-9_]+_V[0-9]+)\b', text)))
    return ids


def _load_golden(path: Path) -> dict[str, Any]:
    if not path.exists():
        return {}
    try:
        payload = yaml.safe_load(path.read_text(encoding="utf-8"))
    except Exception:
        return {}
    return payload if isinstance(payload, dict) else {}


def _scan_code() -> str:
    blobs: list[str] = []
    for p in ROOT.rglob("*"):
        if not p.is_file():
            continue
        if p.suffix.lower() not in {".py", ".gs", ".yaml", ".yml", ".md"}:
            continue
        try:
            blobs.append(p.read_text(encoding="utf-8", errors="ignore"))
        except Exception:
            continue
    return "\n".join(blobs)


def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("--out", default=str(DEFAULT_OUT))
    args = ap.parse_args()
    out_path = Path(args.out)
    if not out_path.is_absolute():
        out_path = ROOT / out_path

    formula_ids = _load_registry(REGISTRY)
    corpus = _scan_code()
    spec_total = len(formula_ids)
    impl = [fid for fid in formula_ids if fid in corpus]
    report_binding = [fid for fid in formula_ids if fid in corpus and "render_operational_report.py" in corpus]
    outcome_binding = [fid for fid in formula_ids if fid.startswith(("OUTCOME_", "TRADE_", "SHORT_HORIZON_", "LATE_", "REBOUND_", "CASH_RAISE_")) and fid in corpus]

    golden_path = GOLDEN_V2 if GOLDEN_V2.exists() else GOLDEN_TEMP
    golden = _load_golden(golden_path)
    golden_rows = golden.get("golden_cases_v2") if isinstance(golden.get("golden_cases_v2"), list) else golden.get("golden_cases")
    if not isinstance(golden_rows, list):
        golden_rows = []
    golden_formula_ids = [str(row.get("formula_id") or "") for row in golden_rows if isinstance(row, dict)]
    golden_formula_ids = [fid for fid in golden_formula_ids if fid]
    golden_covered = sum(1 for fid in golden_formula_ids if fid in corpus)

    grade = "PASS" if spec_total > 0 and len(impl) == spec_total and golden_covered == len(golden_formula_ids) else ("WARN" if len(impl) > 0 else "FAIL")

    # outcome_binding_deferred: decision-critical formulas not yet bound to operational T+20 outcomes.
    # Marked DEFERRED (not absent) until V8-P1-06 operational_t20_count >= 30.
    outcome_binding_deferred = spec_total - len(outcome_binding)

    out = {
        "formula_id": "SEMANTIC_FORMULA_COVERAGE_HARNESS_V1",
        "spec_total": spec_total,
        "implementation_covered": len(impl),
        "golden_test_covered": golden_covered,
        "report_binding_covered": len(report_binding),
        "outcome_binding_covered": len(outcome_binding),
        "outcome_binding_deferred": outcome_binding_deferred,
        "outcome_binding_deferred_reason": "PENDING_OPERATIONAL_T20_SAMPLE_V8_P1_06",
        "outcome_binding_total_check": len(outcome_binding) + outcome_binding_deferred,
        "coverage_grade": grade,
        "missing": [fid for fid in formula_ids if fid not in impl][:200],
        "golden_source": str(golden_path.name),
    }

    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(json.dumps(out, ensure_ascii=False, indent=2), encoding="utf-8")
    print(json.dumps(out, ensure_ascii=False, indent=2))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())