Files
QuantEngineByItz/tools/build_realized_performance_v1.py

313 lines
13 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""build_realized_performance_v1.py — REALIZED_PERFORMANCE_V1
기존 데이터로 CAGR / Sharpe / Sortino / MDD / win_rate 를 추정한다.
데이터 출처:
proposal_evaluation_history.json
- records[].next_return_pct (T+1 수익률, n=1,066)
- records[].t5_return_pct (T+5 수익률, n=711)
- records[].t20_return_pct (T+20 수익률, n=510, REPLAY)
GatherTradingData.json (hApex)
- total_asset_krw (현재 총자산)
- portfolio_peak_krw (고점)
정직성 원칙 (AGENTS.md §0.3):
- REPLAY 기반 지표는 estimated=true, source=REPLAY_FROM_KRX_EOD
- 실현 이력 없는 지표(CAGR/Sharpe)는 replay 추정 + 주의 문구
- 1년 이상 실현 손익 이력 없으므로 out_of_sample 비교 불가
- 미충족 항목은 insufficient_data
산출물: Temp/realized_performance_v1.json
"""
from __future__ import annotations
import argparse
import json
import math
import statistics
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
TEMP = ROOT / "Temp"
DEFAULT_JSON = ROOT / "GatherTradingData.json"
DEFAULT_HIST = TEMP / "proposal_evaluation_history.json"
DEFAULT_OUT = TEMP / "realized_performance_v1.json"
FORMULA_ID = "REALIZED_PERFORMANCE_V1"
NA = "not_available"
INSUF = "insufficient_data"
RISK_FREE_ANNUAL_PCT = 3.5 # 한국 단기 무위험수익률 추정 (%)
TRADING_DAYS_PER_YEAR = 252
def _load(path: Path) -> Any:
if not path.exists():
return {}
try:
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
return {}
def _f(v: Any, default: float | None = None) -> float | None:
try:
return float(v)
except Exception:
return default
def _extract_harness_root(payload: Any) -> dict[str, Any]:
if not isinstance(payload, dict):
return {}
h = payload.get("hApex")
dc = (payload.get("data") or {}).get("_harness_context")
if isinstance(h, dict) and isinstance(dc, dict):
m = dict(dc); m.update(h); return m
return h if isinstance(h, dict) else dc if isinstance(dc, dict) else payload
def _safe_stdev(xs: list[float]) -> float | None:
return round(statistics.stdev(xs), 4) if len(xs) > 1 else None
def _annualize_return(mean_pct: float, holding_days: int) -> dict[str, Any]:
"""단기 평균 수익률 연환산 — 개별 포지션 수익률을 연속 복리로 가정 시 수치이므로
포트폴리오 실제 CAGR과 다르다. 'NOT_MEANINGFUL_FOR_PORTFOLIO' 표기 필수."""
r = mean_pct / 100.0
if holding_days <= 0:
return {"value": "not_available", "note": "holding_days=0"}
periods_per_year = TRADING_DAYS_PER_YEAR / holding_days
raw = round(((1 + r) ** periods_per_year - 1) * 100.0, 2)
return {
"value_pct": raw,
"validity": "NOT_MEANINGFUL_FOR_PORTFOLIO",
"note": (
f"개별 포지션 {mean_pct:.2f}% / {holding_days}일 → 연환산 = "
f"(1+{r:.4f})^({TRADING_DAYS_PER_YEAR}/{holding_days})-1. "
"포트폴리오 CAGR은 전체 계좌 시계열이 필요하며 현재 insufficient_data."
),
"estimated": True,
}
def _sharpe(mean_pct: float, stdev_pct: float, holding_days: int) -> float | None:
"""기간 단위 Sharpe → 연환산."""
if stdev_pct <= 0:
return None
rf_period = RISK_FREE_ANNUAL_PCT / (TRADING_DAYS_PER_YEAR / holding_days)
excess = mean_pct - rf_period
sharpe_period = excess / stdev_pct
annualized = sharpe_period * math.sqrt(TRADING_DAYS_PER_YEAR / holding_days)
return round(annualized, 3)
def _sortino(returns: list[float], mean_pct: float, holding_days: int) -> float | None:
"""Sortino: 하방 편차만 사용."""
rf_period = RISK_FREE_ANNUAL_PCT / (TRADING_DAYS_PER_YEAR / holding_days)
downside = [r for r in returns if r < rf_period]
if not downside:
return None
downside_dev = math.sqrt(sum((r - rf_period) ** 2 for r in downside) / len(downside))
if downside_dev <= 0:
return None
excess = mean_pct - rf_period
sortino_period = excess / downside_dev
return round(sortino_period * math.sqrt(TRADING_DAYS_PER_YEAR / holding_days), 3)
def _max_drawdown(returns: list[float]) -> dict[str, Any]:
"""크로스섹션 수익률 분포에서 MDD를 계산하는 것은 부적합.
포트폴리오 레벨 시계열(계좌 총자산 일별 변화)이 필요하며 현재 insufficient_data."""
if not returns:
return {"max_drawdown_pct": INSUF, "note": "수익률 데이터 없음"}
# 최악 단일 포지션 손실 = 분포에서 MDD 하한 프록시
worst_single = round(min(returns), 2)
return {
"max_drawdown_pct": INSUF,
"worst_single_position_loss_pct": worst_single,
"note": (
"포트폴리오 MDD는 계좌 총자산 일별 시계열 필요(insufficient_data). "
f"단일 포지션 최대 손실 = {worst_single}%."
),
"estimated": True,
}
def _win_rate(returns: list[float], threshold_pct: float = 0.0) -> dict[str, Any]:
if not returns:
return {"win_rate_pct": INSUF}
wins = sum(1 for r in returns if r > threshold_pct)
return {
"win_rate_pct": round(wins / len(returns) * 100.0, 2),
"threshold_pct": threshold_pct,
"n": len(returns),
}
def _worst_case_mdd(harness: dict) -> dict[str, Any]:
"""현재 포지션에서 모든 종목이 손절가에 도달할 경우 MDD 시나리오."""
total = _f(harness.get("total_asset_krw"))
heat = _f(harness.get("total_heat_krw"))
heat_pct = _f(harness.get("total_heat_pct"))
if total is None or heat is None:
return {"worst_case_mdd_pct": INSUF, "note": "포트폴리오 데이터 없음"}
return {
"worst_case_mdd_pct": round(heat_pct or 0, 2),
"worst_case_loss_krw": round(heat or 0),
"total_asset_krw": round(total),
"note": "현재 포지션 전 손절 시 최대 손실 = total_heat_krw",
"source": "hApex.total_heat_pct",
}
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--json", default=str(DEFAULT_JSON))
ap.add_argument("--hist", default=str(DEFAULT_HIST))
ap.add_argument("--out", default=str(DEFAULT_OUT))
args = ap.parse_args()
json_path = Path(args.json); json_path = json_path if json_path.is_absolute() else ROOT / json_path
hist_path = Path(args.hist); hist_path = hist_path if hist_path.is_absolute() else ROOT / args.hist
out_path = Path(args.out); out_path = out_path if out_path.is_absolute() else ROOT / args.out
payload = _load(json_path)
harness = _extract_harness_root(payload)
hist = _load(hist_path)
records: list[dict] = hist.get("records") or []
# ── 데이터 분리 ───────────────────────────────────────────────────────────
replay_records = [r for r in records if isinstance(r, dict) and
str(r.get("validation_status") or "").upper() == "REPLAY_BACKFILL"]
op_records = [r for r in records if isinstance(r, dict) and
str(r.get("validation_status") or "").upper() != "REPLAY_BACKFILL"]
# T+1 운영 수익률
t1_op_returns = [_f(r.get("next_return_pct")) for r in op_records
if _f(r.get("next_return_pct")) is not None]
# T+5 운영 수익률
t5_op_returns = [_f(r.get("t5_return_pct")) for r in op_records
if _f(r.get("t5_return_pct")) is not None]
# T+20 replay 수익률
t20_replay_returns = [_f(r.get("t20_return_pct")) for r in replay_records
if _f(r.get("t20_return_pct")) is not None]
def _stats_block(returns: list[float], holding_days: int,
label: str, estimated: bool, source: str) -> dict[str, Any]:
if not returns:
return {"status": INSUF, "n": 0, "label": label}
mean = round(statistics.mean(returns), 3)
stdev = _safe_stdev(returns)
cagr = _annualize_return(mean, holding_days)
sharpe = _sharpe(mean, stdev or 0, holding_days) if stdev else None
sortino = _sortino(returns, mean, holding_days)
mdd = _max_drawdown(returns)
wr = _win_rate(returns)
return {
"label": label,
"n": len(returns),
"holding_days": holding_days,
"mean_return_pct": mean,
"stdev_pct": stdev,
"cagr_annualized": cagr, # value_pct=NOT_MEANINGFUL_FOR_PORTFOLIO — 개별 포지션 연환산
"sharpe_ratio_annualized": sharpe,
"sortino_ratio_annualized": sortino,
"max_drawdown": mdd,
"win_rate": wr,
"estimated": estimated,
"source": source,
"risk_free_rate_annual_pct": RISK_FREE_ANNUAL_PCT,
"methodology": (
f"mean_return={mean:.2f}% → 연환산 CAGR = (1+{mean/100:.4f})^({TRADING_DAYS_PER_YEAR}/{holding_days}) - 1; "
f"Sharpe = (mean - rf/{TRADING_DAYS_PER_YEAR}*{holding_days}) / stdev × √({TRADING_DAYS_PER_YEAR}/{holding_days})"
),
}
t1_stats = _stats_block(t1_op_returns, 1, "T+1_operational",
estimated=True, source="proposal_evaluation_history.operational")
t5_stats = _stats_block(t5_op_returns, 5, "T+5_operational",
estimated=True, source="proposal_evaluation_history.operational")
t20_stats = _stats_block(t20_replay_returns, 20, "T+20_replay",
estimated=True, source="REPLAY_FROM_KRX_EOD (estimated=true)")
# ── 현재 포트폴리오 MDD 시나리오 및 daily_history 기반 실현 MDD 산출 ───────
daily_hist = payload.get("data", {}).get("daily_history") or []
realized_max_mdd = None
if daily_hist:
mdd_values = [
_f(r.get("MDD_Pct") or r.get("mdd_pct"))
for r in daily_hist
if _f(r.get("MDD_Pct") or r.get("mdd_pct")) is not None
]
if mdd_values:
realized_max_mdd = round(max(mdd_values), 2)
peak = _f(harness.get("portfolio_peak_krw"))
total = _f(harness.get("total_asset_krw"))
current_dd = {
"portfolio_peak_krw": peak,
"portfolio_current_krw": total,
"current_drawdown_pct": (
round((peak - total) / peak * 100, 2) if peak and total and peak > 0
else 0.0
),
"realized_max_drawdown_pct": realized_max_mdd if realized_max_mdd is not None else INSUF,
"worst_case_scenario": _worst_case_mdd(harness),
}
# ── 미충족 항목 ───────────────────────────────────────────────────────────
insufficient = {
"CAGR_realized_1y": INSUF,
"sharpe_realized_1y": INSUF,
"MDD_realized": realized_max_mdd if realized_max_mdd is not None else INSUF,
"win_rate_realized_closed_trades": INSUF,
"profit_factor": INSUF,
"slippage_impact": INSUF,
"transaction_cost_impact": INSUF,
"in_sample_vs_oos_gap": INSUF,
"reason": "1년 이상 청산 완료 거래 이력 없음 — backdata MAE/MFE/pnl 전 행 공란",
}
result = {
"formula_id": FORMULA_ID,
"as_of_date": str((ROOT / "GatherTradingData.json").stat().st_mtime)[:10] if json_path.exists() else NA,
"data_quality_note": (
"모든 통계는 REPLAY 또는 운영 제안 방향 일치율 기반 추정. "
"청산 완료 실현 손익 이력이 없으므로 CAGR/Sharpe는 estimated=true. "
"실제 운용 성과와 상이할 수 있음."
),
"performance_metrics": {
"t1_operational": t1_stats,
"t5_operational": t5_stats,
"t20_replay_estimated": t20_stats,
},
"current_portfolio_mdd": current_dd,
"insufficient_data_items": insufficient,
"summary": {
"best_estimated_cagr": (t20_stats.get("cagr_annualized") or {}).get("value_pct", INSUF),
"cagr_validity": "NOT_MEANINGFUL_FOR_PORTFOLIO",
"best_estimated_sharpe": t20_stats.get("sharpe_ratio_annualized", INSUF),
"best_estimated_source": "T+20 replay (n=510, estimated=true)",
"current_drawdown_pct": current_dd["current_drawdown_pct"],
"worst_case_mdd_pct": (current_dd["worst_case_scenario"] or {}).get("worst_case_mdd_pct", INSUF),
"t5_win_rate_pct": t5_stats.get("win_rate", {}).get("win_rate_pct", INSUF),
"disclaimer": "spec/29_backtest_harness_contract.yaml 기준 — operational 실측 아님",
},
}
out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
s = result["summary"]
print(
f"[{FORMULA_ID}] T20_replay_CAGR(est)={s.get('best_estimated_cagr')}% "
f"Sharpe(est)={s.get('best_estimated_sharpe')} "
f"MDD_worst={s.get('worst_case_mdd_pct')}% "
f"T5_win_rate={s.get('t5_win_rate_pct')}% -> {out_path}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())