"""build_data_gated_progress_v1.py — DATA_GATED_PROGRESS_V1 DATA_GATED 항목별 실측 데이터 기반 진척도를 추적한다. 모든 수치는 실측 artifact에서 직접 산출 — 추정/하드코딩 없음. 입력: GatherTradingData.json (sector_flow_history) Temp/prediction_accuracy_harness_v2.json Temp/live_replay_separation_v3.json Temp/alpha_feedback_loop_v2.json Temp/imputed_data_exposure_gate_v2.json Temp/algorithm_guidance_proof_v1.json Temp/horizon_classification_v1.json 출력: Temp/data_gated_progress_v1.json """ from __future__ import annotations import json from datetime import date, datetime, timedelta from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] TEMP = ROOT / "Temp" FORMULA_ID = "DATA_GATED_PROGRESS_V1" def _load(path: Path) -> Any: if not path.exists(): return {} try: return json.loads(path.read_text(encoding="utf-8")) except Exception: return {} def _f(v: Any, default: float = 0.0) -> float: try: return float(v) except Exception: return default def _eta_trading_days(current: int, target: int, as_of: date) -> str: """남은 거래일 수를 근사 추정 (주말 제외, 공휴일 미반영).""" remaining = max(0, target - current) if remaining == 0: return "ACHIEVED" # 주 5일 기준 근사 weeks, days = divmod(remaining, 5) delta = timedelta(weeks=weeks, days=days + (2 * weeks)) eta = as_of + delta return eta.strftime("%Y-%m-%d") def _count_sector_flow_dates(payload: dict) -> int: """GatherTradingData.json의 sector_flow_history 고유 날짜 수.""" rows = (payload.get("data") or {}).get("sector_flow_history") or [] dates: set[str] = set() for row in rows: if not isinstance(row, dict): continue snap = row.get("Snapshot_Date") or row.get("snapshot_date") or "" val = str(snap)[:10] if val and val != "None": dates.add(val) return len(dates) def _count_live_samples(live_sep: dict) -> int: """live_replay_separation_v3.json의 실측(non-replay) 행 수.""" rows = live_sep.get("performance_rows") or [] return sum( 1 for r in rows if isinstance(r, dict) and r.get("origin") in ("operational_live", "shadow_live") ) def main() -> int: today = date.today() today_str = today.isoformat() payload = _load(ROOT / "GatherTradingData.json") pred = _load(TEMP / "prediction_accuracy_harness_v2.json") live_sep = _load(TEMP / "live_replay_separation_v3.json") alpha = _load(TEMP / "alpha_feedback_loop_v2.json") imputed = _load(TEMP / "imputed_data_exposure_gate_v2.json") proof = _load(TEMP / "algorithm_guidance_proof_v1.json") horizon = _load(TEMP / "horizon_classification_v1.json") # ── WBS-2.5: sector_flow_history 30일 ──────────────────────────────────── sfh_days = _count_sector_flow_dates(payload) sfh_target = 30 sfh_done = sfh_days >= sfh_target wbs_2_5 = { "id": "WBS-2.5", "label": "sector_flow_history 30일 누적", "current": sfh_days, "target": sfh_target, "unit": "trading_days", "pct_complete": round(min(100.0, sfh_days / sfh_target * 100), 1), "status": "DONE" if sfh_done else "IN_PROGRESS", "eta": "ACHIEVED" if sfh_done else _eta_trading_days(sfh_days, sfh_target, today), "source": "GatherTradingData.json:data.sector_flow_history", } # ── WBS-4.1: T+20 레저 30건 ────────────────────────────────────────────── t20_sample = int(_f(pred.get("t20_sample") or 0)) t20_op_rate = pred.get("t20_op_rate") live_sample_count = _count_live_samples(live_sep) t20_target = 30 t20_done = t20_sample >= t20_target wbs_4_1 = { "id": "WBS-4.1", "label": "T+20 레저 30건 누적", "current": t20_sample, "target": t20_target, "unit": "trades", "live_sample_count": live_sample_count, "pct_complete": round(min(100.0, t20_sample / t20_target * 100), 1), "status": "DONE" if t20_done else "DATA_GATED", "eta": "ACHIEVED" if t20_done else "~2026-07-15", "source": "Temp/prediction_accuracy_harness_v2.json:t20_sample", } # ── WBS-4.2: 예측 정확도 (T+20 의존) ───────────────────────────────────── t20_rate_val = t20_op_rate if t20_op_rate is not None else "pending" wbs_4_2 = { "id": "WBS-4.2", "label": "예측 정확도 평가 (T+20 기준)", "current": t20_rate_val, "target": "t20_op_rate available", "status": "DATA_GATED" if t20_op_rate is None else "IN_PROGRESS", "depends_on": "WBS-4.1", "source": "Temp/prediction_accuracy_harness_v2.json:t20_op_rate", } # ── WBS-4.3: 알파 보정 (4.2 의존) ──────────────────────────────────────── alpha_cases = int(_f(alpha.get("total_cases") or 0)) wbs_4_3 = { "id": "WBS-4.3", "label": "알파 보정 (alpha_feedback_loop)", "current_cases": alpha_cases, "target_cases": 30, "status": "DATA_GATED" if alpha_cases < 30 else "IN_PROGRESS", "depends_on": "WBS-4.2", "source": "Temp/alpha_feedback_loop_v2.json:total_cases", } # ── RELEASE_GATE_TRUTH: honest_proof_score ───────────────────────────── honest_score = _f(proof.get("honest_proof_score") or 0) honest_gate = proof.get("honest_gate", "FAIL") fund_coverage = _f(imputed.get("fundamental_core_factor_coverage") or 0) truth_target = 70.0 truth_done = honest_gate == "PASS" and honest_score >= truth_target rgt = { "id": "RELEASE_GATE_TRUTH", "label": "honest_proof_score >= 70 (RELEASE_GATE_TRUTH)", "current_honest_score": honest_score, "target": truth_target, "honest_gate": honest_gate, "gap": round(truth_target - honest_score, 2), "fundamental_core_factor_coverage": fund_coverage, "status": "PASS" if truth_done else "FAIL", "blockers": [ b for b in [ "REALIZED_OUTCOME_T20_ZERO: T+20 표본 0건 — WBS-4.1 달성 필요" if (t20_op_rate is None) else None, "FUNDAMENTAL_CORE_FACTORS_MISSING: ROE/OPM/OCF/FCF 미수집 — GAS fetchFundamentalsWithCache_ 실행 필요" if fund_coverage < 0.5 else None, ] if b is not None ], "source": "Temp/algorithm_guidance_proof_v1.json:honest_proof_score", } # ── imputed_data_exposure_gate ───────────────────────────────────────── imputed_gate = imputed.get("gate_status", "UNKNOWN") idge = { "id": "imputed_data_exposure_gate", "label": "펀더멘털 실측 데이터 커버리지", "gate_status": imputed_gate, "fundamental_core_factor_coverage": fund_coverage, "status": "USER_ACTION_REQUIRED" if fund_coverage < 0.5 else "IN_PROGRESS", "user_action": "GAS fetchFundamentalsWithCache_ 실행 → ROE/OPM/OCF/FCF 수집", "source": "Temp/imputed_data_exposure_gate_v2.json", } # ── routing_gate ─────────────────────────────────────────────────────── alloc = horizon.get("allocation_pct") or {} short_pct = _f(alloc.get("SHORT") or 0) routing_status = "PASS" if short_pct <= 40.0 else "FAIL" rg = { "id": "routing_gate", "label": "SHORT 호라이즌 비중 <= 40%", "current_short_pct": short_pct, "cap_pct": 40.0, "status": routing_status, "note": ( "horizon_classification_v1.json 기준 현재 SHORT 비중 측정값. " "strategy_routing_audit_v1.json 생성 후 공식 게이트 업데이트." ), "source": "Temp/horizon_classification_v1.json:allocation_pct.SHORT", } # ── 요약 ────────────────────────────────────────────────────────────── items = [wbs_2_5, wbs_4_1, wbs_4_2, wbs_4_3, rgt, idge, rg] done_count = sum(1 for i in items if i.get("status") in ("DONE", "PASS")) total_count = len(items) result = { "formula_id": FORMULA_ID, "as_of": today_str, "gate": "PASS" if done_count == total_count else "IN_PROGRESS", "done_count": done_count, "total_count": total_count, "items": items, } out = TEMP / "data_gated_progress_v1.json" out.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") print(f"[{FORMULA_ID}] {done_count}/{total_count} DONE as_of={today_str}") for item in items: status = item.get("status", "?") label = item.get("label", item.get("id", "")) current = item.get("current", item.get("current_honest_score", item.get("gate_status", ""))) target = item.get("target", "") print(f" [{status}] {item['id']}: {current} / {target}") return 0 if __name__ == "__main__": raise SystemExit(main())