QuantEngineByItz/tools/build_honest_performance_guard_v2.py

#!/usr/bin/env python3
"""
build_honest_performance_guard_v2.py
────────────────────────────────────────────────────────────────────────
정직 성과증빙 하네스 V2 (P0_01 단계)

P0_01: design vs validated 분리를 엄격하게

모든 *_score 필드에 score_kind ∈ {DESIGN, VALIDATED} 라벨을 강제하고,
VALIDATED는 live_sample_n >= 30일 때만 허용한다.
보고서에 노출되는 점수는 VALIDATED만 허용.

출력:
  - Temp/honest_performance_guard_v2.json
  - Temp/p0_01_strictness_report.json
"""

from __future__ import annotations

import json
import sys
from pathlib import Path
from datetime import datetime
from typing import Any

ROOT = Path(__file__).resolve().parent.parent

# 입력 파일
OP_REPORT = ROOT / "Temp" / "operational_report.json"
REBOUND_EFF = ROOT / "Temp" / "rebound_sell_efficiency_v1.json"
LATE_CHASE = ROOT / "Temp" / "late_chase_attribution_v1.json"
PREDICTION_ACC = ROOT / "Temp" / "prediction_accuracy_harness_v2.json"

# 출력 파일
OUTPUT_V2 = ROOT / "Temp" / "honest_performance_guard_v2.json"
REPORT_P001 = ROOT / "Temp" / "p0_01_strictness_report.json"

SAMPLE_THRESHOLD = 30
ACCEPTED_SCORE_KINDS = {"DESIGN", "VALIDATED"}

if sys.stdout.encoding and sys.stdout.encoding.lower() not in ("utf-8", "utf8"):
    sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf-8", buffering=1)


def load_json(p: Path) -> dict | list:
    if not p.exists():
        return {}
    try:
        return json.loads(p.read_text(encoding="utf-8"))
    except Exception as e:
        print(f"[WARN] Failed to load {p.name}: {e}")
        return {}


def check_all_scores_have_kind_and_sample_n(obj: Any, path: str = "") -> list[dict]:
    """모든 *_score 필드가 score_kind와 sample_n을 가지는지 검사."""
    violations = []

    if isinstance(obj, dict):
        for key, value in obj.items():
            current_path = f"{path}.{key}" if path else key

            # *_score 필드 검사
            if key.endswith("_score"):
                if not isinstance(value, dict):
                    violations.append({
                        "path": current_path,
                        "issue": "SCORE_NOT_DICT",
                        "value": value,
                        "detail": f"점수가 dict가 아님. 값={value}"
                    })
                else:
                    # score_kind 검사
                    score_kind = value.get("score_kind")
                    sample_n = value.get("sample_n")
                    score_value = value.get("value")

                    if score_kind is None:
                        violations.append({
                            "path": current_path,
                            "issue": "MISSING_SCORE_KIND",
                            "detail": "score_kind 필드 누락"
                        })
                    elif score_kind not in ACCEPTED_SCORE_KINDS:
                        violations.append({
                            "path": current_path,
                            "issue": "INVALID_SCORE_KIND",
                            "value": score_kind,
                            "detail": f"허용되지 않는 값: {score_kind}"
                        })

                    if sample_n is None:
                        violations.append({
                            "path": current_path,
                            "issue": "MISSING_SAMPLE_N",
                            "detail": "sample_n 필드 누락"
                        })

                    # VALIDATED인데 sample_n < 30 검사
                    if score_kind == "VALIDATED" and isinstance(sample_n, int):
                        if sample_n < SAMPLE_THRESHOLD:
                            violations.append({
                                "path": current_path,
                                "issue": "INVALID_VALIDATED_LABEL",
                                "sample_n": sample_n,
                                "detail": f"VALIDATED 라벨인데 sample_n={sample_n} < {SAMPLE_THRESHOLD}"
                            })

            # 재귀 검사
            elif isinstance(value, (dict, list)):
                violations.extend(check_all_scores_have_kind_and_sample_n(value, current_path))

    elif isinstance(obj, list):
        for i, item in enumerate(obj):
            current_path = f"{path}[{i}]"
            violations.extend(check_all_scores_have_kind_and_sample_n(item, current_path))

    return violations


def build_strictness_report(rebound: dict, chase: dict, pred_acc: dict) -> dict:
    """P0_01 엄격성 검사 보고서 작성."""
    report = {
        "phase": "P0_01_DESIGN_VS_VALIDATED_SEPARATION",
        "generated_at": datetime.now().isoformat(),
        "threshold_sample_min": SAMPLE_THRESHOLD,
        "findings": {
            "rebound_efficiency": {},
            "late_chase_attribution": {},
            "prediction_accuracy": {}
        },
        "violations": [],
        "corrections_required": []
    }

    # 1. rebound_efficiency 검사
    rb_metrics = rebound.get("metrics", {})
    rb_combo = rb_metrics.get("combo_count", 0)
    rb_score = rb_metrics.get("rebound_efficiency_score", 0)

    report["findings"]["rebound_efficiency"] = {
        "metric_name": "rebound_efficiency_score",
        "current_value": rb_score,
        "sample_n": rb_combo,
        "meets_validated_threshold": rb_combo >= SAMPLE_THRESHOLD,
        "required_score_kind": "VALIDATED" if rb_combo >= SAMPLE_THRESHOLD else "DESIGN",
        "annotation_suffix": f" [설계점수, n={rb_combo}]" if rb_combo < SAMPLE_THRESHOLD else ""
    }

    if rb_combo < SAMPLE_THRESHOLD:
        report["corrections_required"].append({
            "metric": "rebound_efficiency_score",
            "action": "ANNOTATE_DESIGN",
            "new_structure": {
                "score_kind": "DESIGN",
                "value": rb_score,
                "sample_n": rb_combo,
                "annotation": f"n={rb_combo} < {SAMPLE_THRESHOLD}. 실측 미검증."
            }
        })

    # 2. late_chase_attribution 검사
    chase_metrics = chase.get("metrics", {})
    chase_sample = chase_metrics.get("sample_n", 0)
    chase_rate = chase_metrics.get("chase_entry_rate_pct", 0)

    report["findings"]["late_chase_attribution"] = {
        "metric_name": "late_chase_attribution",
        "current_value": chase_rate,
        "sample_n": chase_sample,
        "meets_validated_threshold": chase_sample >= SAMPLE_THRESHOLD,
        "required_score_kind": "VALIDATED" if chase_sample >= SAMPLE_THRESHOLD else "DESIGN"
    }

    if chase_sample < SAMPLE_THRESHOLD:
        report["corrections_required"].append({
            "metric": "late_chase_attribution",
            "action": "ANNOTATE_DESIGN",
            "new_structure": {
                "score_kind": "DESIGN",
                "value": chase_rate,
                "sample_n": chase_sample,
                "annotation": f"뒷박 차단 효과 미검증 (n={chase_sample})"
            }
        })

    # 3. prediction_accuracy 검사
    t5_sample = pred_acc.get("t5_sample", 0)
    t5_rate = pred_acc.get("t5_op_rate", 0)

    report["findings"]["prediction_accuracy"] = {
        "metric_name": "t5_match_rate_pct",
        "current_value": t5_rate,
        "sample_n": t5_sample,
        "meets_validated_threshold": t5_sample >= SAMPLE_THRESHOLD,
        "required_score_kind": "VALIDATED" if t5_sample >= SAMPLE_THRESHOLD else "DESIGN"
    }

    if t5_sample < SAMPLE_THRESHOLD:
        report["corrections_required"].append({
            "metric": "t5_match_rate_pct",
            "action": "ANNOTATE_DESIGN",
            "new_structure": {
                "score_kind": "DESIGN",
                "value": t5_rate,
                "sample_n": t5_sample,
                "annotation": f"실측 미검증 (n={t5_sample})"
            }
        })

    # 최종 verdict
    report["verdict"] = {
        "all_scores_properly_labeled": len(report["corrections_required"]) == 0,
        "required_corrections_count": len(report["corrections_required"]),
        "status": "PASS" if len(report["corrections_required"]) == 0 else "FAIL_CORRECTION_REQUIRED"
    }

    return report


def main() -> int:
    print("=" * 80)
    print("  P0_01: Design vs Validated 엄격한 분리")
    print("=" * 80)

    # 입력 로드
    rebound = load_json(REBOUND_EFF)
    chase = load_json(LATE_CHASE)
    pred_acc = load_json(PREDICTION_ACC)

    # P0_01 보고서 생성
    p001_report = build_strictness_report(rebound, chase, pred_acc)

    print(f"\n[1] 재정렬 효율 (rebound_efficiency_score)")
    rb_find = p001_report["findings"]["rebound_efficiency"]
    print(f"    현재값: {rb_find['current_value']}")
    print(f"    표본 수: {rb_find['sample_n']} / {SAMPLE_THRESHOLD}")
    print(f"    필수 라벨: {rb_find['required_score_kind']}")

    print(f"\n[2] 뒷박 매수 (late_chase_attribution)")
    chase_find = p001_report["findings"]["late_chase_attribution"]
    print(f"    현재값: {chase_find['current_value']}")
    print(f"    표본 수: {chase_find['sample_n']} / {SAMPLE_THRESHOLD}")
    print(f"    필수 라벨: {chase_find['required_score_kind']}")

    print(f"\n[3] 예측 정확도 (T+5 일치율)")
    pred_find = p001_report["findings"]["prediction_accuracy"]
    print(f"    현재값: {pred_find['current_value']}%")
    print(f"    표본 수: {pred_find['sample_n']} / {SAMPLE_THRESHOLD}")
    print(f"    필수 라벨: {pred_find['required_score_kind']}")

    print(f"\n[결과]")
    print(f"  필요한 수정: {p001_report['verdict']['required_corrections_count']}")
    print(f"  상태: {p001_report['verdict']['status']}")

    # 보고서 저장
    REPORT_P001.write_text(
        json.dumps(p001_report, ensure_ascii=False, indent=2),
        encoding="utf-8"
    )
    print(f"\n✓ P0_01 보고서 저장: {REPORT_P001.name}")

    # V2 가드 생성
    guard_v2 = {
        "schema_version": "honest_performance_guard_v2",
        "generated_at": datetime.now().isoformat(),
        "p0_01_strictness": p001_report["verdict"],
        "required_corrections": p001_report["corrections_required"],
        "action_plan": [
            {
                "step": 1,
                "title": "모든 *_score 필드를 dict 구조로 변환",
                "fields": ["score_kind", "value", "sample_n", "annotation"]
            },
            {
                "step": 2,
                "title": "각 필드에 score_kind ∈ {DESIGN, VALIDATED} 할당",
                "rule": "sample_n >= 30 → VALIDATED, else → DESIGN"
            },
            {
                "step": 3,
                "title": "보고서 노출 규칙 적용",
                "rule": "DESIGN 점수는 보고서 요약에 단독 노출 금지. (설계, n=N) 접미사 필수"
            }
        ]
    }

    OUTPUT_V2.write_text(
        json.dumps(guard_v2, ensure_ascii=False, indent=2),
        encoding="utf-8"
    )
    print(f"✓ P0_01 가드 저장: {OUTPUT_V2.name}")

    return 0 if p001_report['verdict']['status'] == "PASS" else 1


if __name__ == "__main__":
    sys.exit(main())