QuantEngineByItz/tools/build_calibration_review_report_v1.py

#!/usr/bin/env python3
"""
build_calibration_review_report_v1.py
───────────────────────────────────────────────────────────────────────────────
calibration_registry.yaml + calibration_priority_v1.json + calibration_change_ledger_v4.json
을 묶어 운영용 보정 리뷰 리포트를 만든다.

목적:
  - PROVISIONAL / CALIBRATED 승격 후보를 사람이 읽을 수 있게 정리
  - registry warning fallback 상태를 숨기지 않고 그대로 공시
  - 월간 보정 운영에서 바로 참고 가능한 Markdown + JSON 산출물 생성

출력:
  Temp/calibration_review_report_v1.json
  Temp/calibration_review_report_v1.md

사용법:
  python tools/build_calibration_review_report_v1.py
"""

from __future__ import annotations

import json
import sys
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

import yaml

ROOT = Path(__file__).resolve().parent.parent
REGISTRY = ROOT / "spec" / "calibration_registry.yaml"
PRIORITY = ROOT / "Temp" / "calibration_priority_v1.json"
LEDGER = ROOT / "Temp" / "calibration_change_ledger_v4.json"
OUT_JSON = ROOT / "Temp" / "calibration_review_report_v1.json"
OUT_MD = ROOT / "Temp" / "calibration_review_report_v1.md"

if sys.stdout.encoding and sys.stdout.encoding.lower() not in ("utf-8", "utf8"):
    sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf-8", buffering=1)


def _load_json(path: Path) -> dict[str, Any]:
    if not path.exists():
        return {}
    try:
        data = json.loads(path.read_text(encoding="utf-8"))
    except Exception:
        return {}
    return data if isinstance(data, dict) else {}


def _load_registry(path: Path) -> list[dict[str, Any]]:
    if not path.exists():
        return []
    data = yaml.safe_load(path.read_text(encoding="utf-8")) or {}
    thresholds = data.get("thresholds", [])
    return [t for t in thresholds if isinstance(t, dict)]


def _readiness(entry: dict[str, Any]) -> tuple[str, str]:
    source = str(entry.get("source") or "EXPERT_PRIOR")
    sample_n = int(entry.get("sample_n") or 0)
    if source == "CALIBRATED":
        return "CALIBRATED", "Already calibrated"
    if source == "PROVISIONAL" and sample_n >= 30:
        return "CALIBRATION_READY", "Ready for calibrated review"
    if source == "PROVISIONAL":
        return "PROVISIONAL_ACTIVE", "Provisional with live samples"
    if sample_n >= 10:
        return "PROVISIONAL_CANDIDATE", "Candidate for provisional review"
    return "WATCH", "Keep under watch"


def _table(rows: list[dict[str, Any]], keys: list[str], max_rows: int = 25) -> str:
    if not rows:
        return "_데이터 없음_"
    header = "| " + " | ".join(keys) + " |"
    sep = "| " + " | ".join(["---"] * len(keys)) + " |"
    body = []
    for row in rows[:max_rows]:
        body.append("| " + " | ".join(str(row.get(k, "")).replace("|", "ㅣ") for k in keys) + " |")
    suffix = f"\n\n_...총 {len(rows)}행 중 {max_rows}행 표시_" if len(rows) > max_rows else ""
    return "\n".join([header, sep, *body]) + suffix


def main() -> int:
    registry = _load_registry(REGISTRY)
    priority = _load_json(PRIORITY)
    ledger = _load_json(LEDGER)

    source_counts: dict[str, int] = {}
    readiness_counts: dict[str, int] = {}
    reviewed_rows: list[dict[str, Any]] = []

    for entry in registry:
        source = str(entry.get("source") or "EXPERT_PRIOR")
        source_counts[source] = source_counts.get(source, 0) + 1
        readiness, reason = _readiness(entry)
        readiness_counts[readiness] = readiness_counts.get(readiness, 0) + 1
        if readiness in {"PROVISIONAL_CANDIDATE", "CALIBRATION_READY", "PROVISIONAL_ACTIVE"}:
            reviewed_rows.append(
                {
                    "id": entry.get("id", ""),
                    "source": source,
                    "sample_n": int(entry.get("sample_n") or 0),
                    "value": entry.get("value"),
                    "unit": entry.get("unit", ""),
                    "owner_formula": entry.get("owner_formula", ""),
                    "readiness": readiness,
                    "reason": reason,
                    "notes": str(entry.get("notes") or "")[:120],
                }
            )

    priority_list = priority.get("priority_list") if isinstance(priority.get("priority_list"), list) else []
    priority_rows = []
    for item in priority_list[:20]:
        if not isinstance(item, dict):
            continue
        priority_rows.append(
            {
                "calibration_id": item.get("calibration_id", ""),
                "source": item.get("source", ""),
                "sample_n": item.get("sample_n", 0),
                "urgency_score": item.get("urgency_score", 0),
                "linked_factor": item.get("linked_factor", ""),
                "owner_formula": item.get("owner_formula", ""),
            }
        )

    report = {
        "formula_id": "CALIBRATION_REVIEW_REPORT_V1",
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "registry_path": str(REGISTRY),
        "priority_path": str(PRIORITY),
        "ledger_path": str(LEDGER),
        "summary": {
            "total_thresholds": len(registry),
            "source_counts": source_counts,
            "readiness_counts": readiness_counts,
            "priority_count": int(priority.get("priority_count") or len(priority_rows)),
            "ledger_change_count": len(ledger.get("changes", [])) if isinstance(ledger.get("changes"), list) else 0,
            "ledger_without_change_count": int(ledger.get("threshold_change_without_ledger_count") or 0),
        },
        "top_priority_rows": priority_rows,
        "review_rows": reviewed_rows,
    }

    OUT_JSON.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")

    md_lines = [
        "# Calibration Review Report",
        "",
        "## Summary",
        "",
        f"- total thresholds: {report['summary']['total_thresholds']}",
        f"- priority count: {report['summary']['priority_count']}",
        f"- ledger change count: {report['summary']['ledger_change_count']}",
        f"- ledger without change count: {report['summary']['ledger_without_change_count']}",
        "",
        "### Source Counts",
        "",
        _table(
            [{"source": k, "count": v} for k, v in sorted(source_counts.items())],
            ["source", "count"],
            max_rows=50,
        ),
        "",
        "### Readiness Counts",
        "",
        _table(
            [{"readiness": k, "count": v} for k, v in sorted(readiness_counts.items())],
            ["readiness", "count"],
            max_rows=50,
        ),
        "",
        "## Top Priority Rows",
        "",
        _table(priority_rows, ["calibration_id", "source", "sample_n", "urgency_score", "linked_factor", "owner_formula"]),
        "",
        "## Review Candidates",
        "",
        _table(reviewed_rows, ["id", "source", "sample_n", "value", "unit", "owner_formula", "readiness", "reason"]),
        "",
        "## Evidence",
        "",
        f"- registry: {REGISTRY}",
        f"- priority: {PRIORITY}",
        f"- ledger: {LEDGER}",
    ]
    OUT_MD.write_text("\n".join(md_lines), encoding="utf-8")

    print(json.dumps({
        "formula_id": report["formula_id"],
        "gate": "PASS" if reviewed_rows or priority_rows else "WARN",
        "review_rows": len(reviewed_rows),
        "priority_rows": len(priority_rows),
        "json_path": str(OUT_JSON),
        "md_path": str(OUT_MD),
    }, ensure_ascii=False, indent=2))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())