"""build_realized_performance_v1.py — REALIZED_PERFORMANCE_V1 기존 데이터로 CAGR / Sharpe / Sortino / MDD / win_rate 를 추정한다. 데이터 출처: proposal_evaluation_history.json - records[].next_return_pct (T+1 수익률, n=1,066) - records[].t5_return_pct (T+5 수익률, n=711) - records[].t20_return_pct (T+20 수익률, n=510, REPLAY) GatherTradingData.json (hApex) - total_asset_krw (현재 총자산) - portfolio_peak_krw (고점) 정직성 원칙 (AGENTS.md §0.3): - REPLAY 기반 지표는 estimated=true, source=REPLAY_FROM_KRX_EOD - 실현 이력 없는 지표(CAGR/Sharpe)는 replay 추정 + 주의 문구 - 1년 이상 실현 손익 이력 없으므로 out_of_sample 비교 불가 - 미충족 항목은 insufficient_data 산출물: Temp/realized_performance_v1.json """ from __future__ import annotations import argparse import json import math import statistics from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] TEMP = ROOT / "Temp" DEFAULT_JSON = ROOT / "GatherTradingData.json" DEFAULT_HIST = TEMP / "proposal_evaluation_history.json" DEFAULT_OUT = TEMP / "realized_performance_v1.json" FORMULA_ID = "REALIZED_PERFORMANCE_V1" NA = "not_available" INSUF = "insufficient_data" RISK_FREE_ANNUAL_PCT = 3.5 # 한국 단기 무위험수익률 추정 (%) TRADING_DAYS_PER_YEAR = 252 def _load(path: Path) -> Any: if not path.exists(): return {} try: return json.loads(path.read_text(encoding="utf-8")) except Exception: return {} def _f(v: Any, default: float | None = None) -> float | None: try: return float(v) except Exception: return default def _extract_harness_root(payload: Any) -> dict[str, Any]: if not isinstance(payload, dict): return {} h = payload.get("hApex") dc = (payload.get("data") or {}).get("_harness_context") if isinstance(h, dict) and isinstance(dc, dict): m = dict(dc); m.update(h); return m return h if isinstance(h, dict) else dc if isinstance(dc, dict) else payload def _safe_stdev(xs: list[float]) -> float | None: return round(statistics.stdev(xs), 4) if len(xs) > 1 else None def _annualize_return(mean_pct: float, holding_days: int) -> dict[str, Any]: """단기 평균 수익률 연환산 — 개별 포지션 수익률을 연속 복리로 가정 시 수치이므로 포트폴리오 실제 CAGR과 다르다. 'NOT_MEANINGFUL_FOR_PORTFOLIO' 표기 필수.""" r = mean_pct / 100.0 if holding_days <= 0: return {"value": "not_available", "note": "holding_days=0"} periods_per_year = TRADING_DAYS_PER_YEAR / holding_days raw = round(((1 + r) ** periods_per_year - 1) * 100.0, 2) return { "value_pct": raw, "validity": "NOT_MEANINGFUL_FOR_PORTFOLIO", "note": ( f"개별 포지션 {mean_pct:.2f}% / {holding_days}일 → 연환산 = " f"(1+{r:.4f})^({TRADING_DAYS_PER_YEAR}/{holding_days})-1. " "포트폴리오 CAGR은 전체 계좌 시계열이 필요하며 현재 insufficient_data." ), "estimated": True, } def _sharpe(mean_pct: float, stdev_pct: float, holding_days: int) -> float | None: """기간 단위 Sharpe → 연환산.""" if stdev_pct <= 0: return None rf_period = RISK_FREE_ANNUAL_PCT / (TRADING_DAYS_PER_YEAR / holding_days) excess = mean_pct - rf_period sharpe_period = excess / stdev_pct annualized = sharpe_period * math.sqrt(TRADING_DAYS_PER_YEAR / holding_days) return round(annualized, 3) def _sortino(returns: list[float], mean_pct: float, holding_days: int) -> float | None: """Sortino: 하방 편차만 사용.""" rf_period = RISK_FREE_ANNUAL_PCT / (TRADING_DAYS_PER_YEAR / holding_days) downside = [r for r in returns if r < rf_period] if not downside: return None downside_dev = math.sqrt(sum((r - rf_period) ** 2 for r in downside) / len(downside)) if downside_dev <= 0: return None excess = mean_pct - rf_period sortino_period = excess / downside_dev return round(sortino_period * math.sqrt(TRADING_DAYS_PER_YEAR / holding_days), 3) def _max_drawdown(returns: list[float]) -> dict[str, Any]: """크로스섹션 수익률 분포에서 MDD를 계산하는 것은 부적합. 포트폴리오 레벨 시계열(계좌 총자산 일별 변화)이 필요하며 현재 insufficient_data.""" if not returns: return {"max_drawdown_pct": INSUF, "note": "수익률 데이터 없음"} # 최악 단일 포지션 손실 = 분포에서 MDD 하한 프록시 worst_single = round(min(returns), 2) return { "max_drawdown_pct": INSUF, "worst_single_position_loss_pct": worst_single, "note": ( "포트폴리오 MDD는 계좌 총자산 일별 시계열 필요(insufficient_data). " f"단일 포지션 최대 손실 = {worst_single}%." ), "estimated": True, } def _win_rate(returns: list[float], threshold_pct: float = 0.0) -> dict[str, Any]: if not returns: return {"win_rate_pct": INSUF} wins = sum(1 for r in returns if r > threshold_pct) return { "win_rate_pct": round(wins / len(returns) * 100.0, 2), "threshold_pct": threshold_pct, "n": len(returns), } def _worst_case_mdd(harness: dict) -> dict[str, Any]: """현재 포지션에서 모든 종목이 손절가에 도달할 경우 MDD 시나리오.""" total = _f(harness.get("total_asset_krw")) heat = _f(harness.get("total_heat_krw")) heat_pct = _f(harness.get("total_heat_pct")) if total is None or heat is None: return {"worst_case_mdd_pct": INSUF, "note": "포트폴리오 데이터 없음"} return { "worst_case_mdd_pct": round(heat_pct or 0, 2), "worst_case_loss_krw": round(heat or 0), "total_asset_krw": round(total), "note": "현재 포지션 전 손절 시 최대 손실 = total_heat_krw", "source": "hApex.total_heat_pct", } def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--json", default=str(DEFAULT_JSON)) ap.add_argument("--hist", default=str(DEFAULT_HIST)) ap.add_argument("--out", default=str(DEFAULT_OUT)) args = ap.parse_args() json_path = Path(args.json); json_path = json_path if json_path.is_absolute() else ROOT / json_path hist_path = Path(args.hist); hist_path = hist_path if hist_path.is_absolute() else ROOT / args.hist out_path = Path(args.out); out_path = out_path if out_path.is_absolute() else ROOT / args.out payload = _load(json_path) harness = _extract_harness_root(payload) hist = _load(hist_path) records: list[dict] = hist.get("records") or [] # ── 데이터 분리 ─────────────────────────────────────────────────────────── replay_records = [r for r in records if isinstance(r, dict) and str(r.get("validation_status") or "").upper() == "REPLAY_BACKFILL"] op_records = [r for r in records if isinstance(r, dict) and str(r.get("validation_status") or "").upper() != "REPLAY_BACKFILL"] # T+1 운영 수익률 t1_op_returns = [_f(r.get("next_return_pct")) for r in op_records if _f(r.get("next_return_pct")) is not None] # T+5 운영 수익률 t5_op_returns = [_f(r.get("t5_return_pct")) for r in op_records if _f(r.get("t5_return_pct")) is not None] # T+20 replay 수익률 t20_replay_returns = [_f(r.get("t20_return_pct")) for r in replay_records if _f(r.get("t20_return_pct")) is not None] def _stats_block(returns: list[float], holding_days: int, label: str, estimated: bool, source: str) -> dict[str, Any]: if not returns: return {"status": INSUF, "n": 0, "label": label} mean = round(statistics.mean(returns), 3) stdev = _safe_stdev(returns) cagr = _annualize_return(mean, holding_days) sharpe = _sharpe(mean, stdev or 0, holding_days) if stdev else None sortino = _sortino(returns, mean, holding_days) mdd = _max_drawdown(returns) wr = _win_rate(returns) return { "label": label, "n": len(returns), "holding_days": holding_days, "mean_return_pct": mean, "stdev_pct": stdev, "cagr_annualized": cagr, # value_pct=NOT_MEANINGFUL_FOR_PORTFOLIO — 개별 포지션 연환산 "sharpe_ratio_annualized": sharpe, "sortino_ratio_annualized": sortino, "max_drawdown": mdd, "win_rate": wr, "estimated": estimated, "source": source, "risk_free_rate_annual_pct": RISK_FREE_ANNUAL_PCT, "methodology": ( f"mean_return={mean:.2f}% → 연환산 CAGR = (1+{mean/100:.4f})^({TRADING_DAYS_PER_YEAR}/{holding_days}) - 1; " f"Sharpe = (mean - rf/{TRADING_DAYS_PER_YEAR}*{holding_days}) / stdev × √({TRADING_DAYS_PER_YEAR}/{holding_days})" ), } t1_stats = _stats_block(t1_op_returns, 1, "T+1_operational", estimated=True, source="proposal_evaluation_history.operational") t5_stats = _stats_block(t5_op_returns, 5, "T+5_operational", estimated=True, source="proposal_evaluation_history.operational") t20_stats = _stats_block(t20_replay_returns, 20, "T+20_replay", estimated=True, source="REPLAY_FROM_KRX_EOD (estimated=true)") # ── 현재 포트폴리오 MDD 시나리오 및 daily_history 기반 실현 MDD 산출 ─────── daily_hist = payload.get("data", {}).get("daily_history") or [] realized_max_mdd = None if daily_hist: mdd_values = [ _f(r.get("MDD_Pct") or r.get("mdd_pct")) for r in daily_hist if _f(r.get("MDD_Pct") or r.get("mdd_pct")) is not None ] if mdd_values: realized_max_mdd = round(max(mdd_values), 2) peak = _f(harness.get("portfolio_peak_krw")) total = _f(harness.get("total_asset_krw")) current_dd = { "portfolio_peak_krw": peak, "portfolio_current_krw": total, "current_drawdown_pct": ( round((peak - total) / peak * 100, 2) if peak and total and peak > 0 else 0.0 ), "realized_max_drawdown_pct": realized_max_mdd if realized_max_mdd is not None else INSUF, "worst_case_scenario": _worst_case_mdd(harness), } # ── 미충족 항목 ─────────────────────────────────────────────────────────── insufficient = { "CAGR_realized_1y": INSUF, "sharpe_realized_1y": INSUF, "MDD_realized": realized_max_mdd if realized_max_mdd is not None else INSUF, "win_rate_realized_closed_trades": INSUF, "profit_factor": INSUF, "slippage_impact": INSUF, "transaction_cost_impact": INSUF, "in_sample_vs_oos_gap": INSUF, "reason": "1년 이상 청산 완료 거래 이력 없음 — backdata MAE/MFE/pnl 전 행 공란", } result = { "formula_id": FORMULA_ID, "as_of_date": str((ROOT / "GatherTradingData.json").stat().st_mtime)[:10] if json_path.exists() else NA, "data_quality_note": ( "모든 통계는 REPLAY 또는 운영 제안 방향 일치율 기반 추정. " "청산 완료 실현 손익 이력이 없으므로 CAGR/Sharpe는 estimated=true. " "실제 운용 성과와 상이할 수 있음." ), "performance_metrics": { "t1_operational": t1_stats, "t5_operational": t5_stats, "t20_replay_estimated": t20_stats, }, "current_portfolio_mdd": current_dd, "insufficient_data_items": insufficient, "summary": { "best_estimated_cagr": (t20_stats.get("cagr_annualized") or {}).get("value_pct", INSUF), "cagr_validity": "NOT_MEANINGFUL_FOR_PORTFOLIO", "best_estimated_sharpe": t20_stats.get("sharpe_ratio_annualized", INSUF), "best_estimated_source": "T+20 replay (n=510, estimated=true)", "current_drawdown_pct": current_dd["current_drawdown_pct"], "worst_case_mdd_pct": (current_dd["worst_case_scenario"] or {}).get("worst_case_mdd_pct", INSUF), "t5_win_rate_pct": t5_stats.get("win_rate", {}).get("win_rate_pct", INSUF), "disclaimer": "spec/29_backtest_harness_contract.yaml 기준 — operational 실측 아님", }, } out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") s = result["summary"] print( f"[{FORMULA_ID}] T20_replay_CAGR(est)={s.get('best_estimated_cagr_pct')}% " f"Sharpe(est)={s.get('best_estimated_sharpe')} " f"MDD_worst={s.get('worst_case_mdd_pct')}% " f"T5_win_rate={s.get('t5_win_rate_pct')}% -> {out_path}" ) return 0 if __name__ == "__main__": raise SystemExit(main())