"""build_truth_reconciliation_gate_v1.py — TRUTH_RECONCILIATION_GATE_V1 P0-T3: 동일 지표가 파일마다 다른 값을 가지면 자동 FAIL. 감시 지표: prediction_match_rate_pct, t20_pass_rate, value_damage_pct_avg, gs_coverage_pct, portfolio_alpha_confidence, performance_readiness_score 허용 오차: 비율 지표 ±0.5%p, 금액 지표 ±1원 """ from __future__ import annotations import json from datetime import datetime, timezone from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] TEMP = ROOT / "Temp" DEFAULT_OUT = TEMP / "truth_reconciliation_gate_v1.json" TOLERANCE_RATE = 0.5 # %p TOLERANCE_KRW = 1.0 # 원 # 감시 지표: (정규화된 metric_id, json_pointer_list, 단위, 제외 파일 패턴) # 위양성 방지: 같은 key명이 다른 개념에 쓰이는 파일은 명시 제외 MONITORED_METRICS: list[tuple[str, list[str], str, set[str]]] = [ ("prediction_match_rate_pct", ["prediction_match_rate_pct", "t5_ap_combined"], "rate", # v5 = legacy v5.todo.batch 파일 (builder 없음), v7 = 다른 블렌드 점수 {"prediction_accuracy_harness_v5", "smart_cash_recovery_v7"}), ("t20_pass_rate", ["t20_pass_rate"], # pass_rate_pct는 제외 (completion_gap과 혼동) "rate", {"completion_gap", "phase_checks"}), # 완료기준 통과율 파일 제외 ("value_damage_pct_avg", ["value_damage_pct_avg"], "rate", # 다른 목적함수 + 구버전 아카이브 파일 제외 (현재 파이프라인 외 레거시) {"dynamic_value_preservation", "cash_raise_value_optimizer", "cash_raise_value_preservation", "value_preserving_cash_raise_v1", "hts_sell_blueprint", "smart_cash_recovery_v7.json"}), # v7 non-authoritative (2026-05-31 legacy) ("gs_strict_coverage_pct", # gs_coverage_pct 대신 strict 전용 포인터 ["gs_coverage_pct"], "rate", {"gs_native_coverage_lock"}), # native coverage는 다른 개념 ("portfolio_alpha_confidence", ["portfolio_alpha_confidence", "alpha_confidence"], "rate", set()), ("performance_readiness_score", ["performance_readiness_score", "blended_performance_readiness_score"], "rate", set()), ] def _load(p: Path) -> dict[str, Any]: if not p.exists(): return {} try: obj = json.loads(p.read_text(encoding="utf-8")) return obj if isinstance(obj, dict) else {} except Exception: return {} def _extract(d: dict[str, Any], pointers: list[str]) -> float | None: for ptr in pointers: v = d.get(ptr) if v is not None: try: f = float(v) if f != 0.0 or ptr in d: return f except (TypeError, ValueError): pass return None def main() -> int: # 모든 Temp JSON 로드 json_files = list(TEMP.glob("*.json")) # 제외: 보고서/golden/binary exclude_patterns = {"formula_golden", "formula_behavioral", "formula_gas_parity", "engine_audit_2026"} candidates = [f for f in json_files if not any(ex in f.name for ex in exclude_patterns)] observations: dict[str, list[dict[str, Any]]] = {m[0]: [] for m in MONITORED_METRICS} for f in candidates: d = _load(f) if not d: continue rel = str(f.relative_to(ROOT)) for metric_id, pointers, unit, exclude_patterns in MONITORED_METRICS: # 제외 패턴 파일 스킵 if any(ep in f.name for ep in exclude_patterns): continue val = _extract(d, pointers) if val is not None: observations[metric_id].append({"file": rel, "value": val}) conflicts: list[dict[str, Any]] = [] for metric_id, pointers, unit, _ in MONITORED_METRICS: obs = observations[metric_id] if len(obs) < 2: continue values = [o["value"] for o in obs] min_v, max_v = min(values), max(values) tol = TOLERANCE_RATE if unit == "rate" else TOLERANCE_KRW if (max_v - min_v) > tol: conflicts.append({ "metric_id": metric_id, "min": min_v, "max": max_v, "spread": round(max_v - min_v, 4), "tolerance": tol, "unit": unit, "observations": sorted(obs, key=lambda x: x["value"]), }) gate = "PASS" if not conflicts else "FAIL" result = { "formula_id": "TRUTH_RECONCILIATION_GATE_V1", "gate": gate, "conflict_count": len(conflicts), "conflicts": conflicts, "monitored_metrics": [m[0] for m in MONITORED_METRICS], "excluded_per_metric": {m[0]: list(m[3]) for m in MONITORED_METRICS if m[3]}, "files_scanned": len(candidates), "generated_at": datetime.now(timezone.utc).isoformat(), } DEFAULT_OUT.parent.mkdir(parents=True, exist_ok=True) DEFAULT_OUT.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") summary = {k: v for k, v in result.items() if k != "conflicts"} print(json.dumps(summary, indent=2, ensure_ascii=False)) if gate == "PASS": print("TRUTH_RECONCILIATION_GATE_V1_PASS") else: print(f"TRUTH_RECONCILIATION_GATE_V1_FAIL ({len(conflicts)} conflicts)") for c in conflicts: print(f" {c['metric_id']}: spread={c['spread']} (tol={c['tolerance']})") for o in c["observations"]: print(f" {o['file']}: {o['value']}") return 0 if gate == "PASS" else 1 if __name__ == "__main__": raise SystemExit(main())