Files
QuantEngineByItz/tools/build_realized_performance_v1.py
T
kjh2064 ee3e799de1 feat: 리밸런싱 엔진 V1 + GAS 버그 수정 (2026-06-13)
주요 변경:
- tools/build_rebalance_engine_v1.py: REBALANCE_ENGINE_V1 신규
  * account_snapshot 직접 합산(_build_snap_position_map) → 소수주 분리 행 병합
  * 레짐 소스 macro.REGIME_PRELIM 최우선 (GAS 와 동일)
- src/gas_adapter_parts/gdf_06_rebalance.gs: runRebalanceSheet_() 신규
  * Logger.log / getSpreadsheet_() 로 run_all 연동 수정
- src/gas_adapter_parts/gdc_01_fetch_fundamentals.gs
  * _mergePositionRecord_(): 소수주 중복 행 합산 신규
  * parseInt → parseFloat (qty, availQty)
- src/gas_adapter_parts/gdf_01_price_metrics.gs
  * 미보유 종목 SELL_READY → WATCH_EXIT_SIGNAL
- spec/41_release_dag.yaml: build_rebalance_sheet 노드 추가 (step_count 63)
- spec/51_formula_lifecycle_registry.yaml: REBALANCE_ENGINE_V1 등록

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-13 13:20:14 +09:00

300 lines
13 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""build_realized_performance_v1.py — REALIZED_PERFORMANCE_V1
기존 데이터로 CAGR / Sharpe / Sortino / MDD / win_rate 를 추정한다.
데이터 출처:
proposal_evaluation_history.json
- records[].next_return_pct (T+1 수익률, n=1,066)
- records[].t5_return_pct (T+5 수익률, n=711)
- records[].t20_return_pct (T+20 수익률, n=510, REPLAY)
GatherTradingData.json (hApex)
- total_asset_krw (현재 총자산)
- portfolio_peak_krw (고점)
정직성 원칙 (AGENTS.md §0.3):
- REPLAY 기반 지표는 estimated=true, source=REPLAY_FROM_KRX_EOD
- 실현 이력 없는 지표(CAGR/Sharpe)는 replay 추정 + 주의 문구
- 1년 이상 실현 손익 이력 없으므로 out_of_sample 비교 불가
- 미충족 항목은 insufficient_data
산출물: Temp/realized_performance_v1.json
"""
from __future__ import annotations
import argparse
import json
import math
import statistics
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
TEMP = ROOT / "Temp"
DEFAULT_JSON = ROOT / "GatherTradingData.json"
DEFAULT_HIST = TEMP / "proposal_evaluation_history.json"
DEFAULT_OUT = TEMP / "realized_performance_v1.json"
FORMULA_ID = "REALIZED_PERFORMANCE_V1"
NA = "not_available"
INSUF = "insufficient_data"
RISK_FREE_ANNUAL_PCT = 3.5 # 한국 단기 무위험수익률 추정 (%)
TRADING_DAYS_PER_YEAR = 252
def _load(path: Path) -> Any:
if not path.exists():
return {}
try:
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
return {}
def _f(v: Any, default: float | None = None) -> float | None:
try:
return float(v)
except Exception:
return default
def _extract_harness_root(payload: Any) -> dict[str, Any]:
if not isinstance(payload, dict):
return {}
h = payload.get("hApex")
dc = (payload.get("data") or {}).get("_harness_context")
if isinstance(h, dict) and isinstance(dc, dict):
m = dict(dc); m.update(h); return m
return h if isinstance(h, dict) else dc if isinstance(dc, dict) else payload
def _safe_stdev(xs: list[float]) -> float | None:
return round(statistics.stdev(xs), 4) if len(xs) > 1 else None
def _annualize_return(mean_pct: float, holding_days: int) -> dict[str, Any]:
"""단기 평균 수익률 연환산 — 개별 포지션 수익률을 연속 복리로 가정 시 수치이므로
포트폴리오 실제 CAGR과 다르다. 'NOT_MEANINGFUL_FOR_PORTFOLIO' 표기 필수."""
r = mean_pct / 100.0
if holding_days <= 0:
return {"value": "not_available", "note": "holding_days=0"}
periods_per_year = TRADING_DAYS_PER_YEAR / holding_days
raw = round(((1 + r) ** periods_per_year - 1) * 100.0, 2)
return {
"value_pct": raw,
"validity": "NOT_MEANINGFUL_FOR_PORTFOLIO",
"note": (
f"개별 포지션 {mean_pct:.2f}% / {holding_days}일 → 연환산 = "
f"(1+{r:.4f})^({TRADING_DAYS_PER_YEAR}/{holding_days})-1. "
"포트폴리오 CAGR은 전체 계좌 시계열이 필요하며 현재 insufficient_data."
),
"estimated": True,
}
def _sharpe(mean_pct: float, stdev_pct: float, holding_days: int) -> float | None:
"""기간 단위 Sharpe → 연환산."""
if stdev_pct <= 0:
return None
rf_period = RISK_FREE_ANNUAL_PCT / (TRADING_DAYS_PER_YEAR / holding_days)
excess = mean_pct - rf_period
sharpe_period = excess / stdev_pct
annualized = sharpe_period * math.sqrt(TRADING_DAYS_PER_YEAR / holding_days)
return round(annualized, 3)
def _sortino(returns: list[float], mean_pct: float, holding_days: int) -> float | None:
"""Sortino: 하방 편차만 사용."""
rf_period = RISK_FREE_ANNUAL_PCT / (TRADING_DAYS_PER_YEAR / holding_days)
downside = [r for r in returns if r < rf_period]
if not downside:
return None
downside_dev = math.sqrt(sum((r - rf_period) ** 2 for r in downside) / len(downside))
if downside_dev <= 0:
return None
excess = mean_pct - rf_period
sortino_period = excess / downside_dev
return round(sortino_period * math.sqrt(TRADING_DAYS_PER_YEAR / holding_days), 3)
def _max_drawdown(returns: list[float]) -> dict[str, Any]:
"""크로스섹션 수익률 분포에서 MDD를 계산하는 것은 부적합.
포트폴리오 레벨 시계열(계좌 총자산 일별 변화)이 필요하며 현재 insufficient_data."""
if not returns:
return {"max_drawdown_pct": INSUF, "note": "수익률 데이터 없음"}
# 최악 단일 포지션 손실 = 분포에서 MDD 하한 프록시
worst_single = round(min(returns), 2)
return {
"max_drawdown_pct": INSUF,
"worst_single_position_loss_pct": worst_single,
"note": (
"포트폴리오 MDD는 계좌 총자산 일별 시계열 필요(insufficient_data). "
f"단일 포지션 최대 손실 = {worst_single}%."
),
"estimated": True,
}
def _win_rate(returns: list[float], threshold_pct: float = 0.0) -> dict[str, Any]:
if not returns:
return {"win_rate_pct": INSUF}
wins = sum(1 for r in returns if r > threshold_pct)
return {
"win_rate_pct": round(wins / len(returns) * 100.0, 2),
"threshold_pct": threshold_pct,
"n": len(returns),
}
def _worst_case_mdd(harness: dict) -> dict[str, Any]:
"""현재 포지션에서 모든 종목이 손절가에 도달할 경우 MDD 시나리오."""
total = _f(harness.get("total_asset_krw"))
heat = _f(harness.get("total_heat_krw"))
heat_pct = _f(harness.get("total_heat_pct"))
if total is None or heat is None:
return {"worst_case_mdd_pct": INSUF, "note": "포트폴리오 데이터 없음"}
return {
"worst_case_mdd_pct": round(heat_pct or 0, 2),
"worst_case_loss_krw": round(heat or 0),
"total_asset_krw": round(total),
"note": "현재 포지션 전 손절 시 최대 손실 = total_heat_krw",
"source": "hApex.total_heat_pct",
}
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--json", default=str(DEFAULT_JSON))
ap.add_argument("--hist", default=str(DEFAULT_HIST))
ap.add_argument("--out", default=str(DEFAULT_OUT))
args = ap.parse_args()
json_path = Path(args.json); json_path = json_path if json_path.is_absolute() else ROOT / json_path
hist_path = Path(args.hist); hist_path = hist_path if hist_path.is_absolute() else ROOT / args.hist
out_path = Path(args.out); out_path = out_path if out_path.is_absolute() else ROOT / args.out
payload = _load(json_path)
harness = _extract_harness_root(payload)
hist = _load(hist_path)
records: list[dict] = hist.get("records") or []
# ── 데이터 분리 ───────────────────────────────────────────────────────────
replay_records = [r for r in records if isinstance(r, dict) and
str(r.get("validation_status") or "").upper() == "REPLAY_BACKFILL"]
op_records = [r for r in records if isinstance(r, dict) and
str(r.get("validation_status") or "").upper() != "REPLAY_BACKFILL"]
# T+1 운영 수익률
t1_op_returns = [_f(r.get("next_return_pct")) for r in op_records
if _f(r.get("next_return_pct")) is not None]
# T+5 운영 수익률
t5_op_returns = [_f(r.get("t5_return_pct")) for r in op_records
if _f(r.get("t5_return_pct")) is not None]
# T+20 replay 수익률
t20_replay_returns = [_f(r.get("t20_return_pct")) for r in replay_records
if _f(r.get("t20_return_pct")) is not None]
def _stats_block(returns: list[float], holding_days: int,
label: str, estimated: bool, source: str) -> dict[str, Any]:
if not returns:
return {"status": INSUF, "n": 0, "label": label}
mean = round(statistics.mean(returns), 3)
stdev = _safe_stdev(returns)
cagr = _annualize_return(mean, holding_days)
sharpe = _sharpe(mean, stdev or 0, holding_days) if stdev else None
sortino = _sortino(returns, mean, holding_days)
mdd = _max_drawdown(returns)
wr = _win_rate(returns)
return {
"label": label,
"n": len(returns),
"holding_days": holding_days,
"mean_return_pct": mean,
"stdev_pct": stdev,
"cagr_annualized": cagr, # value_pct=NOT_MEANINGFUL_FOR_PORTFOLIO — 개별 포지션 연환산
"sharpe_ratio_annualized": sharpe,
"sortino_ratio_annualized": sortino,
"max_drawdown": mdd,
"win_rate": wr,
"estimated": estimated,
"source": source,
"risk_free_rate_annual_pct": RISK_FREE_ANNUAL_PCT,
"methodology": (
f"mean_return={mean:.2f}% → 연환산 CAGR = (1+{mean/100:.4f})^({TRADING_DAYS_PER_YEAR}/{holding_days}) - 1; "
f"Sharpe = (mean - rf/{TRADING_DAYS_PER_YEAR}*{holding_days}) / stdev × √({TRADING_DAYS_PER_YEAR}/{holding_days})"
),
}
t1_stats = _stats_block(t1_op_returns, 1, "T+1_operational",
estimated=True, source="proposal_evaluation_history.operational")
t5_stats = _stats_block(t5_op_returns, 5, "T+5_operational",
estimated=True, source="proposal_evaluation_history.operational")
t20_stats = _stats_block(t20_replay_returns, 20, "T+20_replay",
estimated=True, source="REPLAY_FROM_KRX_EOD (estimated=true)")
# ── 현재 포트폴리오 MDD 시나리오 ─────────────────────────────────────────
peak = _f(harness.get("portfolio_peak_krw"))
total = _f(harness.get("total_asset_krw"))
current_dd = {
"portfolio_peak_krw": peak,
"portfolio_current_krw": total,
"current_drawdown_pct": (
round((peak - total) / peak * 100, 2) if peak and total and peak > 0
else 0.0
),
"worst_case_scenario": _worst_case_mdd(harness),
}
# ── 미충족 항목 ───────────────────────────────────────────────────────────
insufficient = {
"CAGR_realized_1y": INSUF,
"sharpe_realized_1y": INSUF,
"MDD_realized": INSUF,
"win_rate_realized_closed_trades": INSUF,
"profit_factor": INSUF,
"slippage_impact": INSUF,
"transaction_cost_impact": INSUF,
"in_sample_vs_oos_gap": INSUF,
"reason": "1년 이상 청산 완료 거래 이력 없음 — backdata MAE/MFE/pnl 전 행 공란",
}
result = {
"formula_id": FORMULA_ID,
"as_of_date": str((ROOT / "GatherTradingData.json").stat().st_mtime)[:10] if json_path.exists() else NA,
"data_quality_note": (
"모든 통계는 REPLAY 또는 운영 제안 방향 일치율 기반 추정. "
"청산 완료 실현 손익 이력이 없으므로 CAGR/Sharpe는 estimated=true. "
"실제 운용 성과와 상이할 수 있음."
),
"performance_metrics": {
"t1_operational": t1_stats,
"t5_operational": t5_stats,
"t20_replay_estimated": t20_stats,
},
"current_portfolio_mdd": current_dd,
"insufficient_data_items": insufficient,
"summary": {
"best_estimated_cagr": (t20_stats.get("cagr_annualized") or {}).get("value_pct", INSUF),
"cagr_validity": "NOT_MEANINGFUL_FOR_PORTFOLIO",
"best_estimated_sharpe": t20_stats.get("sharpe_ratio_annualized", INSUF),
"best_estimated_source": "T+20 replay (n=510, estimated=true)",
"current_drawdown_pct": current_dd["current_drawdown_pct"],
"worst_case_mdd_pct": (current_dd["worst_case_scenario"] or {}).get("worst_case_mdd_pct", INSUF),
"t5_win_rate_pct": t5_stats.get("win_rate", {}).get("win_rate_pct", INSUF),
"disclaimer": "spec/29_backtest_harness_contract.yaml 기준 — operational 실측 아님",
},
}
out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
s = result["summary"]
print(
f"[{FORMULA_ID}] T20_replay_CAGR(est)={s.get('best_estimated_cagr_pct')}% "
f"Sharpe(est)={s.get('best_estimated_sharpe')} "
f"MDD_worst={s.get('worst_case_mdd_pct')}% "
f"T5_win_rate={s.get('t5_win_rate_pct')}% -> {out_path}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())