#!/usr/bin/env python3
"""
measure_harness_coverage.py
───────────────────────────────────────────────────────────────────────────────
하네스 커버리지 측정기

"YAML 스펙을 작성해도 GAS가 실제로 계산하지 않으면 LLM이 매번 다른 숫자를 만든다."
이 도구는 현재 harness_context에서 GAS가 실제 채운 수치 필드 vs
LLM이 추정해야 하는 공백 필드를 정량 측정한다.

출력:
  - 전체 커버리지 % (GAS 산출 / 전체 필수 필드)
  - 공식별 커버리지 표
  - LLM 자유도 점수 (낮을수록 결정론적)
  - 재현성 위험 필드 목록 (LLM이 계산해야 하는 필드 = 랜덤성 원천)

사용법:
  python tools/measure_harness_coverage.py [GatherTradingData.json]
  python tools/measure_harness_coverage.py [GatherTradingData.json] --strict-100
"""

from __future__ import annotations

import json
import sys
from pathlib import Path

ROOT = Path(__file__).resolve().parent.parent

# ── 공식별 필수 출력 필드 정의 ──────────────────────────────────────────────
# (field_name, description, data_type)
FORMULA_OUTPUT_FIELDS: dict[str, list[tuple[str, str, str]]] = {
    # ── STAGE 0 ──────────────────────────────────────────────────────────────
    "HARNESS_DATA_FRESHNESS_GATE_V1": [
        ("data_freshness_status", "데이터 신선도 상태", "enum"),
    ],
    "INTRADAY_ACTION_MATRIX_V1": [
        ("intraday_scope",  "장중/장전 허용 액션 범위",     "enum"),
        ("intraday_lock",   "장중 잠금 여부",               "bool"),
    ],
    # ── STAGE 1 ──────────────────────────────────────────────────────────────
    "CASH_RATIOS_V1": [
        ("settlement_cash_d2_krw",   "D+2 정산현금(원)",   "numeric"),
        ("settlement_cash_pct",      "D+2 현금 비율(%)",   "numeric"),
        ("cash_floor_min_pct",       "최소 현금 바닥(%)",  "numeric"),
        ("cash_shortfall_min_krw",   "현금 부족분(원)",    "numeric"),
    ],
    "TOTAL_HEAT_V1": [
        ("total_heat_pct",    "포트폴리오 총 Heat(%)",  "numeric"),
        ("heat_gate_status",  "Heat 게이트 상태",        "enum"),
    ],
    # ── STAGE 2 ──────────────────────────────────────────────────────────────
    "PROFIT_LOCK_RATCHET_V1": [
        ("profit_lock_stage",       "수익 잠금 단계",          "enum"),
        ("auto_trailing_stop",      "ATR 기반 자동 트레일링",  "numeric"),
    ],
    "PROFIT_RATCHET_TIERED_V2": [
        ("auto_trailing_stop_v2",   "3RD — APEX_SUPER 래칫",  "numeric"),
        ("ratchet_stage_v2",        "래칫 단계 v2",            "enum"),
    ],
    # ── STAGE 3 ──────────────────────────────────────────────────────────────
    "FLOW_ACCELERATION_V1": [
        ("flow_acceleration_status", "수급 에너지 소진 상태",  "enum"),
    ],
    "DISTRIBUTION_SELL_DETECTOR_V1": [
        ("distribution_sell_detector_status", "설거지 감지 상태 (6신호)", "enum"),
        ("signals_count",                      "트리거된 신호 수",          "numeric"),
    ],
    # ── STAGE 4 ──────────────────────────────────────────────────────────────
    "BREAKOUT_QUALITY_GATE_V2": [
        ("breakout_quality_score", "돌파 품질 점수",   "numeric"),
    ],
    "ANTI_CHASING_VELOCITY_V1": [
        ("anti_chasing_verdict",          "뒷박 추격 차단 판정", "enum"),
        ("anti_chasing_velocity_status",  "속도 차단 상태",       "enum"),
    ],
    "PULLBACK_ENTRY_TRIGGER_V1": [
        ("pullback_entry_verdict",       "눌림목 진입 판정",        "enum"),
        ("pullback_entry_trigger_price", "허용 진입 기준가(원)",    "numeric"),
    ],
    # ── STAGE 5 ──────────────────────────────────────────────────────────────
    "CASH_RECOVERY_OPTIMIZER_V1": [
        ("cash_recovery_plan_json", "현금회복 최적 매도조합 JSON", "json"),
    ],
    "SELL_WATERFALL_ENGINE_V1": [
        ("waterfall_plan_json",         "폭포수 매도 계획 JSON",   "json"),
    ],
    "SELL_EXECUTION_TIMING_V1": [
        ("sell_timing_verdict",         "매도 실행 타이밍 판정",   "enum"),
        ("sell_execution_window",       "실행 허용 시간대",         "enum"),
    ],
    "SELL_VALUE_PRESERVATION_TIERED_V2": [
        ("preservation_verdict",        "주식가치 보호 매도 판정", "enum"),
    ],
    # ── STAGE 6 ──────────────────────────────────────────────────────────────
    "TICK_NORMALIZER_V1": [
        ("tick_normalized_price",       "호가 정규화 완료 표시",   "bool"),
    ],
    "SELL_PRICE_SANITY_V1": [
        ("sell_price_sanity_status",    "매도가 역전/비현실가 검증", "enum"),
    ],
    # ── STAGE 7 ──────────────────────────────────────────────────────────────
    "BENCHMARK_RELATIVE_TIMESERIES_V1": [
        ("brt_verdict",      "BRT 상대강도 판정",  "enum"),
        ("brt_rs_slope",     "RS 기울기",           "numeric"),
    ],
    "RS_VERDICT_V2": [
        ("rs_verdict",       "최종 RS 판정",        "enum"),
    ],
    # ── STAGE 8 ──────────────────────────────────────────────────────────────
    "SATELLITE_ALPHA_QUALITY_GATE_V1": [
        ("saqg_verdict",     "위성 품질 게이트",    "enum"),
    ],
    "SATELLITE_AGGREGATE_PNL_GATE_V1": [
        ("sapg_verdict",     "위성 합산 손익 게이트", "enum"),
    ],
    # ── STAGE 9 ──────────────────────────────────────────────────────────────
    "LLM_SERVING_CONSTRAINT_V1": [
        ("serving_constraint_check",    "LLM 제약 검사 결과",  "enum"),
    ],
    "DETERMINISTIC_ROUTING_ENGINE_V1": [
        ("routing_execution_log",       "9단계 라우팅 실행 로그", "json"),
    ],
    # ── MONTHLY BATCH ─────────────────────────────────────────────────────────
    "TRADE_QUALITY_SCORER_V1": [
        ("trade_quality_json",          "거래 품질 채점 결과 JSON", "json"),
    ],
    "PATTERN_BLACKLIST_AUTO_V1": [
        ("pattern_blacklist_status",    "반복 패턴 블랙리스트 상태", "enum"),
    ],
    # ── 기존 필수 필드 ─────────────────────────────────────────────────────────
    "POSITION_SIZE_V1": [
        ("buy_power_krw",    "매수 가용 현금(원)",  "numeric"),
        ("total_asset_krw",  "총 자산(원)",          "numeric"),
    ],
    "prices_lock": [
        ("prices_json",             "가격 잠금 JSON (stop/tp/current)", "json"),
    ],
    "quantities_lock": [
        ("sell_quantities_json",    "매도 수량 잠금 JSON",  "json"),
        ("buy_qty_inputs_json",     "매수 수량 잠금 JSON",  "json"),
        ("order_blueprint_json",    "HTS 주문 청사진 JSON", "json"),
    ],
}

SEP  = "=" * 70
SEP2 = "-" * 70


def load_harness_context(json_path: Path) -> dict:
    raw = json.loads(json_path.read_text(encoding="utf-8"))
    hc = None
    try:
        hc = raw["data"]["_harness_context"]
    except (KeyError, TypeError):
        pass
    if hc is None:
        for key in ["_harness_context", "harness_context"]:
            if key in raw and isinstance(raw[key], dict):
                hc = raw[key]
                break
    if hc is None:
        print("[ERROR] harness_context를 찾을 수 없음")
        sys.exit(1)
    return hc


def is_field_present(hc: dict, field: str) -> bool:
    val = hc.get(field)
    if val is None:
        return False
    if isinstance(val, str) and val.strip() == "":
        return False
    return True


def field_is_numeric(hc: dict, field: str) -> bool:
    val = hc.get(field)
    return isinstance(val, (int, float)) and not isinstance(val, bool)


def compute_coverage(hc: dict) -> dict[str, object]:
    total_fields = 0
    covered_fields = 0
    missing_fields: list[tuple[str, str, str]] = []
    covered_list: list[tuple[str, str]] = []
    formula_results: list[dict[str, object]] = []

    for formula_id, fields in FORMULA_OUTPUT_FIELDS.items():
        f_total = len(fields)
        f_covered = 0
        f_missing: list[str] = []

        for field_name, _description, dtype in fields:
            total_fields += 1
            if is_field_present(hc, field_name):
                covered_fields += 1
                f_covered += 1
                covered_list.append((formula_id, field_name))
            else:
                f_missing.append(field_name)
                missing_fields.append((formula_id, field_name, dtype))

        pct = f_covered / f_total * 100 if f_total > 0 else 0
        formula_results.append({
            "formula_id": formula_id,
            "total": f_total,
            "covered": f_covered,
            "pct": pct,
            "missing": f_missing,
        })

    overall_pct = covered_fields / total_fields * 100 if total_fields > 0 else 0
    return {
        "total_fields": total_fields,
        "covered_fields": covered_fields,
        "overall_pct": overall_pct,
        "llm_freedom_score": 100 - overall_pct,
        "missing_fields": missing_fields,
        "covered_list": covered_list,
        "formula_results": formula_results,
    }


def ensure_utf8_stdio() -> None:
    # Windows cp949 터미널 호환
    if sys.stdout.encoding and sys.stdout.encoding.lower() not in ("utf-8", "utf8"):
        sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf-8", buffering=1)
    if sys.stderr.encoding and sys.stderr.encoding.lower() not in ("utf-8", "utf8"):
        sys.stderr = open(sys.stderr.fileno(), mode="w", encoding="utf-8", buffering=1)


def main() -> int:
    ensure_utf8_stdio()
    strict_100 = "--strict-100" in sys.argv
    argv = [arg for arg in sys.argv[1:] if arg != "--strict-100"]
    json_path = Path(argv[0]) if argv else ROOT / "GatherTradingData.json"
    if not json_path.exists():
        print(f"[ERROR] {json_path} not found")
        return 1

    hc = load_harness_context(json_path)
    coverage = compute_coverage(hc)

    print(SEP)
    print("  하네스 커버리지 측정기 — Harness Coverage Report")
    print(f"  파일: {json_path.name}")
    print(f"  harness_version: {hc.get('harness_version', '(missing)')}")
    print(f"  computed_at: {hc.get('computed_at', '(missing)')}")
    print(SEP)

    # ── 공식별 커버리지 표 ──────────────────────────────────────────────────
    print("\n[공식별 커버리지]")
    print(f"  {'공식 ID':<45} {'커버':<6} {'전체':<6} {'%':<7} 상태")
    print("  " + "-" * 65)
    for r in coverage["formula_results"]:
        bar = "●" * r["covered"] + "○" * (r["total"] - r["covered"])
        status = "✔ FULL" if r["pct"] == 100 else ("△ PARTIAL" if r["pct"] > 0 else "✗ MISSING")
        print(f"  {r['formula_id']:<45} {r['covered']:<6} {r['total']:<6} {r['pct']:>5.0f}%  {status}  {bar}")

    # ── 전체 커버리지 요약 ──────────────────────────────────────────────────
    overall_pct = coverage["overall_pct"]
    llm_freedom_score = coverage["llm_freedom_score"]  # 높을수록 LLM이 더 많이 추정

    print()
    print(SEP)
    print(f"  전체 커버리지    : {coverage['covered_fields']}/{coverage['total_fields']} 필드 = {overall_pct:.1f}%")
    print(f"  LLM 자유도 점수  : {llm_freedom_score:.1f}%  ← 낮을수록 결정론적 (목표: 0%)")
    print(SEP)

    if llm_freedom_score == 0:
        print("\n  ✔ 완전 결정론적 — LLM이 임의 계산해야 할 필드 없음")
    else:
        # ── 재현성 위험 필드 목록 ─────────────────────────────────────────
        missing_fields = coverage["missing_fields"]
        print(f"\n[재현성 위험 필드 — GAS 미계산 = LLM 추정 = 랜덤성 원천] ({len(missing_fields)}개)")
        print("  이 필드들은 LLM 호출마다 다른 값이 나올 수 있습니다.\n")
        print(f"  {'공식 ID':<45} {'필드명':<40} 타입")
        print("  " + "-" * 95)
        for formula_id, field_name, dtype in missing_fields:
            print(f"  {formula_id:<45} {field_name:<40} {dtype}")

    # ── 수치 필드 실제 값 확인 (GAS 계산 완료된 필드) ──────────────────────
    covered_list = coverage["covered_list"]
    formula_results = coverage["formula_results"]
    print(f"\n[GAS 계산 완료 수치 필드] ({len(covered_list)}개)")
    numeric_present = [
        (fid, fn, hc[fn])
        for fid, fn in covered_list
        if field_is_numeric(hc, fn)
    ]
    for fid, fn, val in numeric_present[:20]:
        print(f"  {fn:<45} = {val:>15,.0f}" if isinstance(val, (int, float)) else f"  {fn:<45} = {val}")

    # ── GAS 구현 우선순위 권고 ──────────────────────────────────────────────
    print(f"\n[GAS 구현 우선순위 — 커버리지 0% 공식부터]")
    zero_coverage = [r for r in formula_results if r["pct"] == 0]
    for r in zero_coverage:
        print(f"  !!! {r['formula_id']} — 출력 필드 {r['total']}개 전부 미계산")

    print()
    threshold = 100.0 if strict_100 else 80.0
    return 0 if overall_pct >= threshold else 1


if __name__ == "__main__":
    raise SystemExit(main())