feat: 리밸런싱 엔진 V1 + GAS 버그 수정 (2026-06-13)

주요 변경:
- tools/build_rebalance_engine_v1.py: REBALANCE_ENGINE_V1 신규
  * account_snapshot 직접 합산(_build_snap_position_map) → 소수주 분리 행 병합
  * 레짐 소스 macro.REGIME_PRELIM 최우선 (GAS 와 동일)
- src/gas_adapter_parts/gdf_06_rebalance.gs: runRebalanceSheet_() 신규
  * Logger.log / getSpreadsheet_() 로 run_all 연동 수정
- src/gas_adapter_parts/gdc_01_fetch_fundamentals.gs
  * _mergePositionRecord_(): 소수주 중복 행 합산 신규
  * parseInt → parseFloat (qty, availQty)
- src/gas_adapter_parts/gdf_01_price_metrics.gs
  * 미보유 종목 SELL_READY → WATCH_EXIT_SIGNAL
- spec/41_release_dag.yaml: build_rebalance_sheet 노드 추가 (step_count 63)
- spec/51_formula_lifecycle_registry.yaml: REBALANCE_ENGINE_V1 등록

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
2026-06-13 13:20:14 +09:00
commit ee3e799de1
1474 changed files with 176087 additions and 0 deletions
+299
View File
@@ -0,0 +1,299 @@
"""build_realized_performance_v1.py — REALIZED_PERFORMANCE_V1
기존 데이터로 CAGR / Sharpe / Sortino / MDD / win_rate 를 추정한다.
데이터 출처:
proposal_evaluation_history.json
- records[].next_return_pct (T+1 수익률, n=1,066)
- records[].t5_return_pct (T+5 수익률, n=711)
- records[].t20_return_pct (T+20 수익률, n=510, REPLAY)
GatherTradingData.json (hApex)
- total_asset_krw (현재 총자산)
- portfolio_peak_krw (고점)
정직성 원칙 (AGENTS.md §0.3):
- REPLAY 기반 지표는 estimated=true, source=REPLAY_FROM_KRX_EOD
- 실현 이력 없는 지표(CAGR/Sharpe)는 replay 추정 + 주의 문구
- 1년 이상 실현 손익 이력 없으므로 out_of_sample 비교 불가
- 미충족 항목은 insufficient_data
산출물: Temp/realized_performance_v1.json
"""
from __future__ import annotations
import argparse
import json
import math
import statistics
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
TEMP = ROOT / "Temp"
DEFAULT_JSON = ROOT / "GatherTradingData.json"
DEFAULT_HIST = TEMP / "proposal_evaluation_history.json"
DEFAULT_OUT = TEMP / "realized_performance_v1.json"
FORMULA_ID = "REALIZED_PERFORMANCE_V1"
NA = "not_available"
INSUF = "insufficient_data"
RISK_FREE_ANNUAL_PCT = 3.5 # 한국 단기 무위험수익률 추정 (%)
TRADING_DAYS_PER_YEAR = 252
def _load(path: Path) -> Any:
if not path.exists():
return {}
try:
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
return {}
def _f(v: Any, default: float | None = None) -> float | None:
try:
return float(v)
except Exception:
return default
def _extract_harness_root(payload: Any) -> dict[str, Any]:
if not isinstance(payload, dict):
return {}
h = payload.get("hApex")
dc = (payload.get("data") or {}).get("_harness_context")
if isinstance(h, dict) and isinstance(dc, dict):
m = dict(dc); m.update(h); return m
return h if isinstance(h, dict) else dc if isinstance(dc, dict) else payload
def _safe_stdev(xs: list[float]) -> float | None:
return round(statistics.stdev(xs), 4) if len(xs) > 1 else None
def _annualize_return(mean_pct: float, holding_days: int) -> dict[str, Any]:
"""단기 평균 수익률 연환산 — 개별 포지션 수익률을 연속 복리로 가정 시 수치이므로
포트폴리오 실제 CAGR과 다르다. 'NOT_MEANINGFUL_FOR_PORTFOLIO' 표기 필수."""
r = mean_pct / 100.0
if holding_days <= 0:
return {"value": "not_available", "note": "holding_days=0"}
periods_per_year = TRADING_DAYS_PER_YEAR / holding_days
raw = round(((1 + r) ** periods_per_year - 1) * 100.0, 2)
return {
"value_pct": raw,
"validity": "NOT_MEANINGFUL_FOR_PORTFOLIO",
"note": (
f"개별 포지션 {mean_pct:.2f}% / {holding_days}일 → 연환산 = "
f"(1+{r:.4f})^({TRADING_DAYS_PER_YEAR}/{holding_days})-1. "
"포트폴리오 CAGR은 전체 계좌 시계열이 필요하며 현재 insufficient_data."
),
"estimated": True,
}
def _sharpe(mean_pct: float, stdev_pct: float, holding_days: int) -> float | None:
"""기간 단위 Sharpe → 연환산."""
if stdev_pct <= 0:
return None
rf_period = RISK_FREE_ANNUAL_PCT / (TRADING_DAYS_PER_YEAR / holding_days)
excess = mean_pct - rf_period
sharpe_period = excess / stdev_pct
annualized = sharpe_period * math.sqrt(TRADING_DAYS_PER_YEAR / holding_days)
return round(annualized, 3)
def _sortino(returns: list[float], mean_pct: float, holding_days: int) -> float | None:
"""Sortino: 하방 편차만 사용."""
rf_period = RISK_FREE_ANNUAL_PCT / (TRADING_DAYS_PER_YEAR / holding_days)
downside = [r for r in returns if r < rf_period]
if not downside:
return None
downside_dev = math.sqrt(sum((r - rf_period) ** 2 for r in downside) / len(downside))
if downside_dev <= 0:
return None
excess = mean_pct - rf_period
sortino_period = excess / downside_dev
return round(sortino_period * math.sqrt(TRADING_DAYS_PER_YEAR / holding_days), 3)
def _max_drawdown(returns: list[float]) -> dict[str, Any]:
"""크로스섹션 수익률 분포에서 MDD를 계산하는 것은 부적합.
포트폴리오 레벨 시계열(계좌 총자산 일별 변화)이 필요하며 현재 insufficient_data."""
if not returns:
return {"max_drawdown_pct": INSUF, "note": "수익률 데이터 없음"}
# 최악 단일 포지션 손실 = 분포에서 MDD 하한 프록시
worst_single = round(min(returns), 2)
return {
"max_drawdown_pct": INSUF,
"worst_single_position_loss_pct": worst_single,
"note": (
"포트폴리오 MDD는 계좌 총자산 일별 시계열 필요(insufficient_data). "
f"단일 포지션 최대 손실 = {worst_single}%."
),
"estimated": True,
}
def _win_rate(returns: list[float], threshold_pct: float = 0.0) -> dict[str, Any]:
if not returns:
return {"win_rate_pct": INSUF}
wins = sum(1 for r in returns if r > threshold_pct)
return {
"win_rate_pct": round(wins / len(returns) * 100.0, 2),
"threshold_pct": threshold_pct,
"n": len(returns),
}
def _worst_case_mdd(harness: dict) -> dict[str, Any]:
"""현재 포지션에서 모든 종목이 손절가에 도달할 경우 MDD 시나리오."""
total = _f(harness.get("total_asset_krw"))
heat = _f(harness.get("total_heat_krw"))
heat_pct = _f(harness.get("total_heat_pct"))
if total is None or heat is None:
return {"worst_case_mdd_pct": INSUF, "note": "포트폴리오 데이터 없음"}
return {
"worst_case_mdd_pct": round(heat_pct or 0, 2),
"worst_case_loss_krw": round(heat or 0),
"total_asset_krw": round(total),
"note": "현재 포지션 전 손절 시 최대 손실 = total_heat_krw",
"source": "hApex.total_heat_pct",
}
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--json", default=str(DEFAULT_JSON))
ap.add_argument("--hist", default=str(DEFAULT_HIST))
ap.add_argument("--out", default=str(DEFAULT_OUT))
args = ap.parse_args()
json_path = Path(args.json); json_path = json_path if json_path.is_absolute() else ROOT / json_path
hist_path = Path(args.hist); hist_path = hist_path if hist_path.is_absolute() else ROOT / args.hist
out_path = Path(args.out); out_path = out_path if out_path.is_absolute() else ROOT / args.out
payload = _load(json_path)
harness = _extract_harness_root(payload)
hist = _load(hist_path)
records: list[dict] = hist.get("records") or []
# ── 데이터 분리 ───────────────────────────────────────────────────────────
replay_records = [r for r in records if isinstance(r, dict) and
str(r.get("validation_status") or "").upper() == "REPLAY_BACKFILL"]
op_records = [r for r in records if isinstance(r, dict) and
str(r.get("validation_status") or "").upper() != "REPLAY_BACKFILL"]
# T+1 운영 수익률
t1_op_returns = [_f(r.get("next_return_pct")) for r in op_records
if _f(r.get("next_return_pct")) is not None]
# T+5 운영 수익률
t5_op_returns = [_f(r.get("t5_return_pct")) for r in op_records
if _f(r.get("t5_return_pct")) is not None]
# T+20 replay 수익률
t20_replay_returns = [_f(r.get("t20_return_pct")) for r in replay_records
if _f(r.get("t20_return_pct")) is not None]
def _stats_block(returns: list[float], holding_days: int,
label: str, estimated: bool, source: str) -> dict[str, Any]:
if not returns:
return {"status": INSUF, "n": 0, "label": label}
mean = round(statistics.mean(returns), 3)
stdev = _safe_stdev(returns)
cagr = _annualize_return(mean, holding_days)
sharpe = _sharpe(mean, stdev or 0, holding_days) if stdev else None
sortino = _sortino(returns, mean, holding_days)
mdd = _max_drawdown(returns)
wr = _win_rate(returns)
return {
"label": label,
"n": len(returns),
"holding_days": holding_days,
"mean_return_pct": mean,
"stdev_pct": stdev,
"cagr_annualized": cagr, # value_pct=NOT_MEANINGFUL_FOR_PORTFOLIO — 개별 포지션 연환산
"sharpe_ratio_annualized": sharpe,
"sortino_ratio_annualized": sortino,
"max_drawdown": mdd,
"win_rate": wr,
"estimated": estimated,
"source": source,
"risk_free_rate_annual_pct": RISK_FREE_ANNUAL_PCT,
"methodology": (
f"mean_return={mean:.2f}% → 연환산 CAGR = (1+{mean/100:.4f})^({TRADING_DAYS_PER_YEAR}/{holding_days}) - 1; "
f"Sharpe = (mean - rf/{TRADING_DAYS_PER_YEAR}*{holding_days}) / stdev × √({TRADING_DAYS_PER_YEAR}/{holding_days})"
),
}
t1_stats = _stats_block(t1_op_returns, 1, "T+1_operational",
estimated=True, source="proposal_evaluation_history.operational")
t5_stats = _stats_block(t5_op_returns, 5, "T+5_operational",
estimated=True, source="proposal_evaluation_history.operational")
t20_stats = _stats_block(t20_replay_returns, 20, "T+20_replay",
estimated=True, source="REPLAY_FROM_KRX_EOD (estimated=true)")
# ── 현재 포트폴리오 MDD 시나리오 ─────────────────────────────────────────
peak = _f(harness.get("portfolio_peak_krw"))
total = _f(harness.get("total_asset_krw"))
current_dd = {
"portfolio_peak_krw": peak,
"portfolio_current_krw": total,
"current_drawdown_pct": (
round((peak - total) / peak * 100, 2) if peak and total and peak > 0
else 0.0
),
"worst_case_scenario": _worst_case_mdd(harness),
}
# ── 미충족 항목 ───────────────────────────────────────────────────────────
insufficient = {
"CAGR_realized_1y": INSUF,
"sharpe_realized_1y": INSUF,
"MDD_realized": INSUF,
"win_rate_realized_closed_trades": INSUF,
"profit_factor": INSUF,
"slippage_impact": INSUF,
"transaction_cost_impact": INSUF,
"in_sample_vs_oos_gap": INSUF,
"reason": "1년 이상 청산 완료 거래 이력 없음 — backdata MAE/MFE/pnl 전 행 공란",
}
result = {
"formula_id": FORMULA_ID,
"as_of_date": str((ROOT / "GatherTradingData.json").stat().st_mtime)[:10] if json_path.exists() else NA,
"data_quality_note": (
"모든 통계는 REPLAY 또는 운영 제안 방향 일치율 기반 추정. "
"청산 완료 실현 손익 이력이 없으므로 CAGR/Sharpe는 estimated=true. "
"실제 운용 성과와 상이할 수 있음."
),
"performance_metrics": {
"t1_operational": t1_stats,
"t5_operational": t5_stats,
"t20_replay_estimated": t20_stats,
},
"current_portfolio_mdd": current_dd,
"insufficient_data_items": insufficient,
"summary": {
"best_estimated_cagr": (t20_stats.get("cagr_annualized") or {}).get("value_pct", INSUF),
"cagr_validity": "NOT_MEANINGFUL_FOR_PORTFOLIO",
"best_estimated_sharpe": t20_stats.get("sharpe_ratio_annualized", INSUF),
"best_estimated_source": "T+20 replay (n=510, estimated=true)",
"current_drawdown_pct": current_dd["current_drawdown_pct"],
"worst_case_mdd_pct": (current_dd["worst_case_scenario"] or {}).get("worst_case_mdd_pct", INSUF),
"t5_win_rate_pct": t5_stats.get("win_rate", {}).get("win_rate_pct", INSUF),
"disclaimer": "spec/29_backtest_harness_contract.yaml 기준 — operational 실측 아님",
},
}
out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
s = result["summary"]
print(
f"[{FORMULA_ID}] T20_replay_CAGR(est)={s.get('best_estimated_cagr_pct')}% "
f"Sharpe(est)={s.get('best_estimated_sharpe')} "
f"MDD_worst={s.get('worst_case_mdd_pct')}% "
f"T5_win_rate={s.get('t5_win_rate_pct')}% -> {out_path}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())