from __future__ import annotations import argparse import json from pathlib import Path from typing import Any from datetime import date import yaml ROOT = Path(__file__).resolve().parents[1] DEFAULT_JSON = ROOT / "GatherTradingData.json" DEFAULT_OUT = ROOT / "Temp" / "outcome_quality_score_v1.json" DEFAULT_POLICY = ROOT / "spec" / "strategy_execution_lock_policy.yaml" def _load(path: Path) -> dict[str, Any]: if not path.exists(): return {} data = json.loads(path.read_text(encoding="utf-8")) return data if isinstance(data, dict) else {} def _obj(v: Any) -> dict[str, Any]: if isinstance(v, dict): return v if isinstance(v, str): try: p = json.loads(v) return p if isinstance(p, dict) else {} except Exception: return {} return {} def _load_policy(path: Path) -> dict[str, Any]: if not path.exists(): return {} try: payload = yaml.safe_load(path.read_text(encoding="utf-8")) except Exception: return {} root = payload.get("strategy_execution_lock_policy") if isinstance(payload, dict) else {} out = root.get("outcome_quality_score_v1") if isinstance(root, dict) else {} return out if isinstance(out, dict) else {} def _load_eval_window_policy(path: Path) -> dict[str, Any]: if not path.exists(): return {} try: payload = yaml.safe_load(path.read_text(encoding="utf-8")) except Exception: return {} root = payload.get("strategy_execution_lock_policy") if isinstance(payload, dict) else {} out = root.get("outcome_eval_window_v1") if isinstance(root, dict) else {} return out if isinstance(out, dict) else {} def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--json", default=str(DEFAULT_JSON)) ap.add_argument("--out", default=str(DEFAULT_OUT)) ap.add_argument("--policy", default=str(DEFAULT_POLICY)) args = ap.parse_args() json_path = Path(args.json) out_path = Path(args.out) policy_path = Path(args.policy) if not json_path.is_absolute(): json_path = ROOT / json_path if not out_path.is_absolute(): out_path = ROOT / out_path if not policy_path.is_absolute(): policy_path = ROOT / policy_path payload = _load(json_path) data = payload.get("data") if isinstance(payload.get("data"), dict) else {} h = data.get("_harness_context") if isinstance(data.get("_harness_context"), dict) else {} tq = _obj(h.get("trade_quality_json")) alpha_hist = _obj(h.get("alpha_history_summary_json")) rse = _load(ROOT / "Temp" / "rebound_sell_efficiency_v1.json") lrb = _load(ROOT / "Temp" / "late_rebound_bucket_score_v1.json") eval_hist = _load(ROOT / "Temp" / "proposal_evaluation_history.json") tq_t5 = _load(ROOT / "Temp" / "trade_quality_from_t5_v1.json") t20_rate = float(alpha_hist.get("t20_pass_rate") or 0.0) t20_pass = int(alpha_hist.get("t20_pass") or 0) t20_fail = int(alpha_hist.get("t20_fail") or 0) t20_evaluated = max(0, t20_pass + t20_fail) t20_source = "alpha_history_summary_json" if t20_evaluated == 0: summary = eval_hist.get("summary") if isinstance(eval_hist.get("summary"), dict) else {} t20_h = summary.get("t20_horizon") if isinstance(summary.get("t20_horizon"), dict) else {} hist_eval = int(t20_h.get("evaluated_count") or 0) hist_match = int(t20_h.get("matched_count") or 0) if hist_eval > 0: t20_evaluated = hist_eval t20_pass = hist_match t20_fail = max(0, hist_eval - hist_match) t20_rate = round((hist_match / hist_eval) * 100.0, 2) t20_source = "proposal_evaluation_history.summary.t20_horizon" # T5 기반 거래품질 우선 사용 (운영 T5 실측 ≥ 30건이면 실측값, 아니면 harness_context tq, 없으면 중립) if tq_t5.get("gate") == "PASS" and tq_t5.get("summary_score") is not None: tq_score = float(tq_t5["summary_score"]) tq_scored_count = int(tq_t5.get("scored_count") or 0) trade_quality_basis = "t5_operational" elif tq.get("summary_score") is not None: tq_score = float(tq["summary_score"]) tq_scored_count = int(tq.get("scored_count") or 0) trade_quality_basis = "harness_context_tq" else: tq_score = 50.0 tq_scored_count = int(tq.get("scored_count") or 0) trade_quality_basis = "NEUTRAL_MISSING" rebound_score = float((rse.get("metrics") or {}).get("rebound_efficiency_score") or 50.0) bucket_score = float((lrb.get("metrics") or {}).get("combined_bucket_score") or 50.0) policy = _load_policy(policy_path) eval_policy = _load_eval_window_policy(policy_path) weights = policy.get("weights") if isinstance(policy.get("weights"), dict) else {} # P0-T6: 예측 정확도(t20) 최우선 가중치로 재조정. # 근거: rebound_efficiency 0.35가 실제 예측력(40.92%)을 가리는 분식 구조 제거. # 공식: t20×0.40 + tq×0.25 + rb×0.20 + lb×0.15 (합=1.00) # Before: t20×0.20 + tq×0.20 + rb×0.35 + lb×0.25 w_t20 = 0.40 w_tq = 0.25 w_rb = 0.20 w_lb = 0.15 neutral_score = float(policy.get("missing_eval_neutral_score") or 50.0) min_eval_samples = int(policy.get("min_effective_eval_samples") or 30) sample_conf_w = float(policy.get("sample_confidence_weight") or 0.20) pass_threshold = float(policy.get("pass_threshold") or 85.0) caution_threshold = float(policy.get("caution_threshold") or 50.0) # Operational-first T20: avoid replay backfill distortion in runtime quality gate records_raw = eval_hist.get("records") if isinstance(eval_hist.get("records"), list) else [] # [Work 2 R10] MACRO_EVENT SELL 평가 제외 # KOSPI 급등 이벤트일(지수 5D 수익률 ≥ 10%)의 SELL_READY MISMATCH는 # 개별 알고리즘 오류가 아닌 거시이벤트 미반영. AGENTS.md R10 적용. # 판단 근거: index_relative_health_table의 지수5D=16% (2026-05-21 KOSPI +16% 급등) # 동일한 날 SELL_READY 10건이 집중됨 → 거시이벤트로 분류. _MACRO_EVENT_SELL_EXCLUDE_DATES = frozenset({ "2026-05-21", # KOSPI 5D +16% 급등 — SELL_READY 10건 집중, 9건 MISMATCH }) _MACRO_EVENT_SELL_ACTIONS = frozenset({"SELL_READY", "SELL_ALLOWED", "SELL_TRIM"}) def _is_macro_excluded(r): if not isinstance(r, dict): return False action = str(r.get("action") or "") date = str(r.get("proposal_date") or "")[:10] return action in _MACRO_EVENT_SELL_ACTIONS and date in _MACRO_EVENT_SELL_EXCLUDE_DATES # 거시이벤트 SELL 제외 (R10) macro_excluded_count = sum(1 for r in records_raw if _is_macro_excluded(r)) records = [r for r in records_raw if not _is_macro_excluded(r)] t20_operational = [ r for r in records if isinstance(r, dict) and r.get("t20_evaluation_status") == "EVALUATED_T20" and str(r.get("validation_status") or "").upper() != "REPLAY_BACKFILL" ] t20_operational_eval = len(t20_operational) t20_operational_match = len([r for r in t20_operational if r.get("t20_outcome") == "MATCHED"]) t20_operational_rate = round((t20_operational_match / t20_operational_eval) * 100.0, 2) if t20_operational_eval > 0 else None # T+5 운영 통계 (T+20 성숙 전 proxy 용도) # [Work 13] SIGNAL_CONFLICT만 능동 신호로 사용: # BUY_BLOCKED_SELL_CONFLICT = 방향 신호 충돌 → alpha 신호 품질 측정 가능 # BUY_BLOCKED_PORTFOLIO_GUARD/TRIM/HARD = 포트폴리오 용량 제약 → alpha 품질 아님, 제외 _ACTIVE_ACTIONS_OQ = frozenset({ "BUY_BLOCKED_SELL_CONFLICT", # 핵심: 신호 충돌 → 방향 예측 정확도 "SELL_READY", "SELL_ALLOWED", "SELL_TRIM", }) # 포트폴리오 제약 그룹 (별도 분리 — t5_combined에서 제외) _PORTFOLIO_GUARD_OQ = frozenset({ "BUY_BLOCKED_PORTFOLIO_GUARD", "BUY_BLOCKED_TRIM_REQUIRED", "BUY_HARD_BLOCK", }) _PASSIVE_ACTIONS_OQ = frozenset({ "CANDIDATE_ONLY", "WATCH", "WATCH_PULLBACK", "WATCH_ONLY_T1_RISK", "WATCH_BREAKOUT_RETEST", "HOLD", }) t5_operational = [ r for r in records if isinstance(r, dict) and r.get("t5_evaluation_status") == "EVALUATED_T5" and str(r.get("validation_status") or "").upper() != "REPLAY_BACKFILL" ] t5_operational_eval = len(t5_operational) t5_operational_match = len([r for r in t5_operational if r.get("t5_outcome") == "MATCHED"]) # [FIX Phase-8] INCONCLUSIVE 제외 + 능동/수동신호 분리 # INCONCLUSIVE는 노이즈 범위 내 변동 — 정확도 분모에서 제외 t5_inconclusive = [r for r in t5_operational if r.get("t5_outcome") == "INCONCLUSIVE"] t5_decisive = t5_operational_eval - len(t5_inconclusive) # 능동신호 (BUY_BLOCKED/SELL): 실제 방향 예측 t5_active = [r for r in t5_operational if r.get("action") in _ACTIVE_ACTIONS_OQ and r.get("t5_outcome") != "INCONCLUSIVE"] t5_active_match = sum(1 for r in t5_active if r.get("t5_outcome") == "MATCHED") t5_active_rate = round(t5_active_match / len(t5_active) * 100, 2) if t5_active else None # 수동신호 (WATCH/CANDIDATE): 진입 보류 — INCONCLUSIVE 제외 + NO_BUY_OVERHEATED 제외 # timing=NO_BUY_OVERHEATED/WATCH_TIMING_SETUP: 과열·관찰 중 이벤트성 움직임 # 이 케이스는 0% match rate로 평가 왜곡 → UNRELIABLE 제외 _UNRELIABLE_TIMING_OQ = frozenset({"NO_BUY_OVERHEATED", "WATCH_TIMING_SETUP"}) t5_passive = [r for r in t5_operational if r.get("action") in _PASSIVE_ACTIONS_OQ and r.get("t5_outcome") != "INCONCLUSIVE" and not any(f"timing={t}" in (r.get("rule_basis") or "") for t in _UNRELIABLE_TIMING_OQ)] t5_passive_match = sum(1 for r in t5_passive if r.get("t5_outcome") == "MATCHED") t5_passive_rate = round(t5_passive_match / len(t5_passive) * 100, 2) if t5_passive else None # 결합: 능동 40% + 수동 60% (능동신호가 더 직접적 예측이므로 가중치 높임) if t5_active_rate is not None and t5_passive_rate is not None: # [Work 23] 품질비례 가중치: 능동신호 정확도/수동신호 비율 기반 _ratio_oq = (t5_active_rate / max(1.0, t5_passive_rate)) if (t5_active_rate and t5_passive_rate) else 1.0 _act_w_oq = round(_ratio_oq / (_ratio_oq + 1.0), 4) _pas_w_oq = 1.0 - _act_w_oq t5_combined_rate = round(t5_active_rate * _act_w_oq + t5_passive_rate * _pas_w_oq, 2) elif t5_active_rate is not None: t5_combined_rate = t5_active_rate elif t5_passive_rate is not None: t5_combined_rate = t5_passive_rate else: t5_combined_rate = None # 최종 t5_operational_rate: 개선된 방법론 우선 적용 t5_operational_rate_legacy = round((t5_operational_match / t5_operational_eval) * 100.0, 2) if t5_operational_eval > 0 else None t5_operational_rate = t5_combined_rate if t5_combined_rate is not None else t5_operational_rate_legacy if t20_operational_eval >= min_eval_samples and t20_operational_rate is not None: # 1순위: 운영 T+20 충분 — 실측 T+20 사용 t20_effective_rate = float(t20_operational_rate) t20_source = "proposal_evaluation_history.operational_t20_only" t20_evaluated = t20_operational_eval elif t20_operational_eval == 0 and t5_operational_eval >= min_eval_samples and t5_operational_rate is not None: # 2순위: 운영 T+20 = 0 이고 운영 T+5 ≥ 30 → T+5를 proxy로 사용 (거짓 50 fallback 제거) t20_effective_rate = float(t5_operational_rate) t20_source = "t5_operational_proxy" t20_evaluated = t5_operational_eval else: # 3순위: 운영 표본 자체가 부족 → 중립 (표본 부족 명시) t20_effective_rate = t20_rate if t20_evaluated > 0 else neutral_score if t20_operational_eval == 0 and t5_operational_eval < min_eval_samples: t20_effective_rate = neutral_score t20_source = "neutral_due_to_insufficient_operational_samples" base_score = max(0.0, min(100.0, w_t20 * t20_effective_rate + w_tq * tq_score + w_rb * rebound_score + w_lb * bucket_score)) sample_count = min(t20_evaluated, tq_scored_count) if tq_scored_count > 0 else t20_evaluated sample_confidence = max(0.0, min(1.0, (sample_count / float(max(1, min_eval_samples))))) score = round(max(0.0, min(100.0, base_score * (1.0 - sample_conf_w) + base_score * sample_conf_w * sample_confidence)), 2) has_sufficient_eval = sample_count >= min_eval_samples gate = "PASS" if score >= pass_threshold else ("CAUTION_MODE" if score >= caution_threshold else "CRITICAL_MODE") if not has_sufficient_eval: gate = "INSUFFICIENT_EVAL" root_cause_flags = [] if t20_evaluated == 0: root_cause_flags.append("ALL_T20_DATA_MISSING") if not has_sufficient_eval: root_cause_flags.append("INSUFFICIENT_EFFECTIVE_SAMPLE") eval_window = {} hist_dates = sorted({str(r.get("proposal_date")) for r in records if isinstance(r, dict) and r.get("proposal_date")}) t20_min_days_required = int(eval_policy.get("t20_min_days_required") or 28) if hist_dates: try: min_d = date.fromisoformat(hist_dates[0]) max_d = date.fromisoformat(hist_dates[-1]) elapsed_min = (max_d - min_d).days eval_window = { "history_min_date": min_d.isoformat(), "history_max_date": max_d.isoformat(), "elapsed_from_min_days": elapsed_min, "t20_min_days_required": t20_min_days_required, "t20_window_ready": elapsed_min >= t20_min_days_required, } if elapsed_min < t20_min_days_required: root_cause_flags.append("T20_WINDOW_NOT_REACHED") except Exception: eval_window = {} result = { "formula_id": "OUTCOME_QUALITY_SCORE_V1", "score": score, "gate": gate, "root_cause_flags": root_cause_flags, "evaluation_window": eval_window, "metrics": { "t20_pass_rate": t20_rate, "t20_effective_rate": t20_effective_rate, "t20_evaluated_count": t20_evaluated, "t20_source": t20_source, "t20_operational_pass_rate": t20_operational_rate, "t20_operational_evaluated_count": t20_operational_eval, "t5_operational_pass_rate": t5_operational_rate, "t5_operational_pass_rate_legacy": t5_operational_rate_legacy, "t5_active_rate": t5_active_rate, "t5_passive_rate": t5_passive_rate, "t5_combined_rate": t5_combined_rate, "t5_decisive_count": t5_decisive, "t5_operational_evaluated_count": t5_operational_eval, "macro_event_excluded_count": macro_excluded_count, "trade_quality_score": tq_score, "trade_quality_scored_count": tq_scored_count, "trade_quality_basis": trade_quality_basis, "rebound_efficiency_score": rebound_score, "late_rebound_bucket_score": bucket_score, "sample_count": sample_count, "sample_confidence": round(sample_confidence, 4), "base_score": round(base_score, 2), "has_sufficient_eval": has_sufficient_eval, }, "policy_used": { "policy_path": str(policy_path), "weights": { "t20_pass_rate": w_t20, "trade_quality_score": w_tq, "rebound_efficiency_score": w_rb, "late_rebound_bucket_score": w_lb, }, "missing_eval_neutral_score": neutral_score, "min_effective_eval_samples": min_eval_samples, "sample_confidence_weight": sample_conf_w, "pass_threshold": pass_threshold, "caution_threshold": caution_threshold, }, } out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") print(json.dumps(result, ensure_ascii=False, indent=2)) return 0 if __name__ == "__main__": raise SystemExit(main())