#!/usr/bin/env python3 """ build_honest_performance_guard_v2.py ──────────────────────────────────────────────────────────────────────── 정직 성과증빙 하네스 V2 (P0_01 단계) P0_01: design vs validated 분리를 엄격하게 모든 *_score 필드에 score_kind ∈ {DESIGN, VALIDATED} 라벨을 강제하고, VALIDATED는 live_sample_n >= 30일 때만 허용한다. 보고서에 노출되는 점수는 VALIDATED만 허용. 출력: - Temp/honest_performance_guard_v2.json - Temp/p0_01_strictness_report.json """ from __future__ import annotations import json import sys from pathlib import Path from datetime import datetime from typing import Any ROOT = Path(__file__).resolve().parent.parent # 입력 파일 OP_REPORT = ROOT / "Temp" / "operational_report.json" REBOUND_EFF = ROOT / "Temp" / "rebound_sell_efficiency_v1.json" LATE_CHASE = ROOT / "Temp" / "late_chase_attribution_v1.json" PREDICTION_ACC = ROOT / "Temp" / "prediction_accuracy_harness_v2.json" # 출력 파일 OUTPUT_V2 = ROOT / "Temp" / "honest_performance_guard_v2.json" REPORT_P001 = ROOT / "Temp" / "p0_01_strictness_report.json" SAMPLE_THRESHOLD = 30 ACCEPTED_SCORE_KINDS = {"DESIGN", "VALIDATED"} if sys.stdout.encoding and sys.stdout.encoding.lower() not in ("utf-8", "utf8"): sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf-8", buffering=1) def load_json(p: Path) -> dict | list: if not p.exists(): return {} try: return json.loads(p.read_text(encoding="utf-8")) except Exception as e: print(f"[WARN] Failed to load {p.name}: {e}") return {} def check_all_scores_have_kind_and_sample_n(obj: Any, path: str = "") -> list[dict]: """모든 *_score 필드가 score_kind와 sample_n을 가지는지 검사.""" violations = [] if isinstance(obj, dict): for key, value in obj.items(): current_path = f"{path}.{key}" if path else key # *_score 필드 검사 if key.endswith("_score"): if not isinstance(value, dict): violations.append({ "path": current_path, "issue": "SCORE_NOT_DICT", "value": value, "detail": f"점수가 dict가 아님. 값={value}" }) else: # score_kind 검사 score_kind = value.get("score_kind") sample_n = value.get("sample_n") score_value = value.get("value") if score_kind is None: violations.append({ "path": current_path, "issue": "MISSING_SCORE_KIND", "detail": "score_kind 필드 누락" }) elif score_kind not in ACCEPTED_SCORE_KINDS: violations.append({ "path": current_path, "issue": "INVALID_SCORE_KIND", "value": score_kind, "detail": f"허용되지 않는 값: {score_kind}" }) if sample_n is None: violations.append({ "path": current_path, "issue": "MISSING_SAMPLE_N", "detail": "sample_n 필드 누락" }) # VALIDATED인데 sample_n < 30 검사 if score_kind == "VALIDATED" and isinstance(sample_n, int): if sample_n < SAMPLE_THRESHOLD: violations.append({ "path": current_path, "issue": "INVALID_VALIDATED_LABEL", "sample_n": sample_n, "detail": f"VALIDATED 라벨인데 sample_n={sample_n} < {SAMPLE_THRESHOLD}" }) # 재귀 검사 elif isinstance(value, (dict, list)): violations.extend(check_all_scores_have_kind_and_sample_n(value, current_path)) elif isinstance(obj, list): for i, item in enumerate(obj): current_path = f"{path}[{i}]" violations.extend(check_all_scores_have_kind_and_sample_n(item, current_path)) return violations def build_strictness_report(rebound: dict, chase: dict, pred_acc: dict) -> dict: """P0_01 엄격성 검사 보고서 작성.""" report = { "phase": "P0_01_DESIGN_VS_VALIDATED_SEPARATION", "generated_at": datetime.now().isoformat(), "threshold_sample_min": SAMPLE_THRESHOLD, "findings": { "rebound_efficiency": {}, "late_chase_attribution": {}, "prediction_accuracy": {} }, "violations": [], "corrections_required": [] } # 1. rebound_efficiency 검사 rb_metrics = rebound.get("metrics", {}) rb_combo = rb_metrics.get("combo_count", 0) rb_score = rb_metrics.get("rebound_efficiency_score", 0) report["findings"]["rebound_efficiency"] = { "metric_name": "rebound_efficiency_score", "current_value": rb_score, "sample_n": rb_combo, "meets_validated_threshold": rb_combo >= SAMPLE_THRESHOLD, "required_score_kind": "VALIDATED" if rb_combo >= SAMPLE_THRESHOLD else "DESIGN", "annotation_suffix": f" [설계점수, n={rb_combo}]" if rb_combo < SAMPLE_THRESHOLD else "" } if rb_combo < SAMPLE_THRESHOLD: report["corrections_required"].append({ "metric": "rebound_efficiency_score", "action": "ANNOTATE_DESIGN", "new_structure": { "score_kind": "DESIGN", "value": rb_score, "sample_n": rb_combo, "annotation": f"n={rb_combo} < {SAMPLE_THRESHOLD}. 실측 미검증." } }) # 2. late_chase_attribution 검사 chase_metrics = chase.get("metrics", {}) chase_sample = chase_metrics.get("sample_n", 0) chase_rate = chase_metrics.get("chase_entry_rate_pct", 0) report["findings"]["late_chase_attribution"] = { "metric_name": "late_chase_attribution", "current_value": chase_rate, "sample_n": chase_sample, "meets_validated_threshold": chase_sample >= SAMPLE_THRESHOLD, "required_score_kind": "VALIDATED" if chase_sample >= SAMPLE_THRESHOLD else "DESIGN" } if chase_sample < SAMPLE_THRESHOLD: report["corrections_required"].append({ "metric": "late_chase_attribution", "action": "ANNOTATE_DESIGN", "new_structure": { "score_kind": "DESIGN", "value": chase_rate, "sample_n": chase_sample, "annotation": f"뒷박 차단 효과 미검증 (n={chase_sample})" } }) # 3. prediction_accuracy 검사 t5_sample = pred_acc.get("t5_sample", 0) t5_rate = pred_acc.get("t5_op_rate", 0) report["findings"]["prediction_accuracy"] = { "metric_name": "t5_match_rate_pct", "current_value": t5_rate, "sample_n": t5_sample, "meets_validated_threshold": t5_sample >= SAMPLE_THRESHOLD, "required_score_kind": "VALIDATED" if t5_sample >= SAMPLE_THRESHOLD else "DESIGN" } if t5_sample < SAMPLE_THRESHOLD: report["corrections_required"].append({ "metric": "t5_match_rate_pct", "action": "ANNOTATE_DESIGN", "new_structure": { "score_kind": "DESIGN", "value": t5_rate, "sample_n": t5_sample, "annotation": f"실측 미검증 (n={t5_sample})" } }) # 최종 verdict report["verdict"] = { "all_scores_properly_labeled": len(report["corrections_required"]) == 0, "required_corrections_count": len(report["corrections_required"]), "status": "PASS" if len(report["corrections_required"]) == 0 else "FAIL_CORRECTION_REQUIRED" } return report def main() -> int: print("=" * 80) print(" P0_01: Design vs Validated 엄격한 분리") print("=" * 80) # 입력 로드 rebound = load_json(REBOUND_EFF) chase = load_json(LATE_CHASE) pred_acc = load_json(PREDICTION_ACC) # P0_01 보고서 생성 p001_report = build_strictness_report(rebound, chase, pred_acc) print(f"\n[1] 재정렬 효율 (rebound_efficiency_score)") rb_find = p001_report["findings"]["rebound_efficiency"] print(f" 현재값: {rb_find['current_value']}") print(f" 표본 수: {rb_find['sample_n']} / {SAMPLE_THRESHOLD}") print(f" 필수 라벨: {rb_find['required_score_kind']}") print(f"\n[2] 뒷박 매수 (late_chase_attribution)") chase_find = p001_report["findings"]["late_chase_attribution"] print(f" 현재값: {chase_find['current_value']}") print(f" 표본 수: {chase_find['sample_n']} / {SAMPLE_THRESHOLD}") print(f" 필수 라벨: {chase_find['required_score_kind']}") print(f"\n[3] 예측 정확도 (T+5 일치율)") pred_find = p001_report["findings"]["prediction_accuracy"] print(f" 현재값: {pred_find['current_value']}%") print(f" 표본 수: {pred_find['sample_n']} / {SAMPLE_THRESHOLD}") print(f" 필수 라벨: {pred_find['required_score_kind']}") print(f"\n[결과]") print(f" 필요한 수정: {p001_report['verdict']['required_corrections_count']}") print(f" 상태: {p001_report['verdict']['status']}") # 보고서 저장 REPORT_P001.write_text( json.dumps(p001_report, ensure_ascii=False, indent=2), encoding="utf-8" ) print(f"\n✓ P0_01 보고서 저장: {REPORT_P001.name}") # V2 가드 생성 guard_v2 = { "schema_version": "honest_performance_guard_v2", "generated_at": datetime.now().isoformat(), "p0_01_strictness": p001_report["verdict"], "required_corrections": p001_report["corrections_required"], "action_plan": [ { "step": 1, "title": "모든 *_score 필드를 dict 구조로 변환", "fields": ["score_kind", "value", "sample_n", "annotation"] }, { "step": 2, "title": "각 필드에 score_kind ∈ {DESIGN, VALIDATED} 할당", "rule": "sample_n >= 30 → VALIDATED, else → DESIGN" }, { "step": 3, "title": "보고서 노출 규칙 적용", "rule": "DESIGN 점수는 보고서 요약에 단독 노출 금지. (설계, n=N) 접미사 필수" } ] } OUTPUT_V2.write_text( json.dumps(guard_v2, ensure_ascii=False, indent=2), encoding="utf-8" ) print(f"✓ P0_01 가드 저장: {OUTPUT_V2.name}") return 0 if p001_report['verdict']['status'] == "PASS" else 1 if __name__ == "__main__": sys.exit(main())