QuantEngineByItz/tools/build_performance_readiness_replay_bridge_v1.py

"""build_performance_readiness_replay_bridge_v1.py — P1-009: Performance Readiness Replay Bridge

REPLAY 표본(REPLAY_BACKFILL/REPLAY_FROM_KRX_EOD)은 성과 지표 집계 혼입 금지(spec/29).
LIVE/PAPER 표본이 30건 이상 축적돼야 PERFORMANCE_READY 판정.
현재 인프라 구축 단계: gate=WATCH_PENDING_LIVE_SAMPLE.
"""
from __future__ import annotations

import argparse
import json
import statistics
from collections import Counter
from datetime import datetime, timezone
from pathlib import Path

from v7_hardening_common import ROOT, TEMP, load_json, save_json

DEFAULT_HIST = ROOT / "Temp" / "proposal_evaluation_history.json"
DEFAULT_OUT  = TEMP / "performance_readiness_replay_bridge_v1.json"

LIVE_SAMPLE_MIN = 30    # gate PERFORMANCE_READY 조건
LIVE_T20_PASS_RATE_MIN = 60.0

_REPLAY_ORIGINS = {"REPLAY_FROM_KRX_EOD", "REPLAY_BACKFILL"}
_REPLAY_VALIDATION = {"REPLAY_BACKFILL"}


def _is_replay(r: dict) -> bool:
    return (
        str(r.get("data_origin") or "").upper() in _REPLAY_ORIGINS
        or str(r.get("validation_status") or "").upper() in _REPLAY_VALIDATION
        or str(r.get("record_type") or "").upper().startswith("HISTORICAL_REPLAY")
    )


def _pass_rate(records: list[dict], outcome_key: str) -> float:
    matched = [r for r in records if r.get(outcome_key) == "MATCHED"]
    return round(len(matched) / len(records) * 100.0, 2) if records else 0.0


def _avg_return(records: list[dict], ret_key: str) -> float | None:
    vals = [r[ret_key] for r in records if r.get(ret_key) is not None]
    return round(statistics.mean(vals), 4) if vals else None


def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("--hist", default=str(DEFAULT_HIST))
    ap.add_argument("--out",  default=str(DEFAULT_OUT))
    args = ap.parse_args()

    hist_raw = load_json(Path(args.hist))
    records: list[dict] = hist_raw.get("records", []) if isinstance(hist_raw, dict) else (hist_raw if isinstance(hist_raw, list) else [])

    # ── 분류 ────────────────────────────────────────────────────────────────
    live_all  = [r for r in records if not _is_replay(r)]
    replay_all = [r for r in records if _is_replay(r)]

    live_t20  = [r for r in live_all  if r.get("t20_evaluation_status") == "EVALUATED_T20"]
    replay_t20 = [r for r in replay_all if r.get("t20_evaluation_status") == "EVALUATED_T20"]

    live_t5   = [r for r in live_all  if r.get("t5_evaluation_status") == "EVALUATED_T5"]
    replay_t5 = [r for r in replay_all if r.get("t5_evaluation_status") == "EVALUATED_T5"]

    live_t20_count  = len(live_t20)
    replay_t20_count = len(replay_t20)

    # ── LIVE 성과 지표만 집계 (spec/29: REPLAY 혼입 금지) ──────────────────
    live_t20_pass_rate = _pass_rate(live_t20, "t20_outcome")
    live_t20_avg_ret   = _avg_return(live_t20, "t20_return_pct")
    live_t5_pass_rate  = _pass_rate(live_t5, "t5_outcome")

    # ── REPLAY 정보용 통계 (성과 지표로 사용 금지) ─────────────────────────
    replay_t20_pass_rate = _pass_rate(replay_t20, "t20_outcome")  # informational only
    replay_t20_avg_ret   = _avg_return(replay_t20, "t20_return_pct")  # informational only

    # ── gate 판정 ────────────────────────────────────────────────────────────
    if live_t20_count >= LIVE_SAMPLE_MIN and live_t20_pass_rate >= LIVE_T20_PASS_RATE_MIN:
        gate = "PERFORMANCE_READY"
        readiness_score = min(100.0, live_t20_pass_rate)
    elif live_t20_count >= LIVE_SAMPLE_MIN:
        gate = "WATCH_LIVE_BELOW_THRESHOLD"
        readiness_score = live_t20_pass_rate
    elif live_t20_count > 0:
        gate = "WATCH_PENDING_LIVE_SAMPLE"
        # 부분 반영: live 표본이 일부 있으면 비례 가산
        readiness_score = min(50.0, live_t20_count / LIVE_SAMPLE_MIN * 50.0)
    else:
        gate = "WATCH_PENDING_LIVE_SAMPLE"
        readiness_score = 0.0

    # ── replay vs live gap (live 표본 있을 때만 계산) ─────────────────────
    replay_vs_live_gap_pct: float | None = None
    if live_t20_count >= 5 and live_t20_avg_ret is not None and replay_t20_avg_ret is not None:
        replay_vs_live_gap_pct = round(abs(replay_t20_avg_ret - live_t20_avg_ret), 4)

    result = {
        "formula_id": "PERFORMANCE_READINESS_REPLAY_BRIDGE_V1",
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "gate": gate,
        "readiness_gate": gate,
        "performance_readiness_score": round(readiness_score, 2),
        # ── live 집계 (성과 지표) ──────────────────────────────────────────
        "live": {
            "total_records": len(live_all),
            "t20_count": live_t20_count,
            "t5_count": len(live_t5),
            "t20_pass_rate_pct": live_t20_pass_rate,
            "t5_pass_rate_pct": live_t5_pass_rate,
            "t20_avg_return_pct": live_t20_avg_ret,
            "sample_gate": "PASS" if live_t20_count >= LIVE_SAMPLE_MIN else f"PENDING({live_t20_count}/{LIVE_SAMPLE_MIN})",
        },
        # ── replay 집계 (정보용 — 성과 지표로 사용 금지) ─────────────────
        "replay_informational": {
            "total_records": len(replay_all),
            "t20_count": replay_t20_count,
            "t5_count": len(replay_t5),
            "t20_pass_rate_pct": replay_t20_pass_rate,
            "t20_avg_return_pct": replay_t20_avg_ret,
            "note": "REPLAY 표본은 성과지표 산출 금지(spec/29). 인프라 상태 확인용만.",
        },
        # ── 종합 ──────────────────────────────────────────────────────────
        "replay_vs_live_gap_pct": replay_vs_live_gap_pct,
        "required_live_t20_count": LIVE_SAMPLE_MIN,
        "required_live_t20_pass_rate_pct": LIVE_T20_PASS_RATE_MIN,
        "targets": {
            "live_t20_count": f">={LIVE_SAMPLE_MIN}",
            "live_t20_pass_rate_pct": f">={LIVE_T20_PASS_RATE_MIN}",
            "replay_vs_live_gap_pct": "<=10 (live 집계 가능 시)",
        },
        "prohibitions": [
            "REPLAY 표본 성과지표 혼입 금지",
            "REPLAY T20를 operational_t20_count에 가산 금지",
            "live < 30 상태에서 PERFORMANCE_READY 판정 금지",
        ],
    }

    save_json(args.out, result)
    print(json.dumps(result, ensure_ascii=False, indent=2))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())