feat: DATA_GATED 진척도 추적 + 팩터 shadow 자격 평가 + routing_gate 실측 보정

- tools/build_data_gated_progress_v1.py: WBS-2.5/4.1~4.3/RELEASE_GATE_TRUTH 실측 진척도 (sector_flow 21/30일, T+20 0/30건, honest_proof 45.1/70.0) - tools/build_factor_shadow_eligibility_v1.py: 149개 팩터 shadow 자격 평가 (eligible=24, partial=37, blocked=38) - spec/41: DAG step_count 63→67, 신규 4노드 추가 (build_data_gated_progress, build_factor_shadow_eligibility, build_strategy_routing_audit, build_horizon_rebalance_plan) - spec/30: routing_gate 실측 보정 — SHORT 71.4%는 오류, 실제 원인은 MID 75.0% > 50% 상한 (2026-06-14 실측) - spec/13: DATA_GATED_PROGRESS_V1, FACTOR_SHADOW_ELIGIBILITY_V1, STRATEGY_ROUTING_AUDIT_V1 formula 등록 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-14 12:39:34 +09:00
parent 8891978ed5
commit ac6437b840
5 changed files with 454 additions and 6 deletions
@@ -0,0 +1,235 @@
+"""build_data_gated_progress_v1.py — DATA_GATED_PROGRESS_V1
+
+DATA_GATED 항목별 실측 데이터 기반 진척도를 추적한다.
+모든 수치는 실측 artifact에서 직접 산출 — 추정/하드코딩 없음.
+
+입력:
+  GatherTradingData.json              (sector_flow_history)
+  Temp/prediction_accuracy_harness_v2.json
+  Temp/live_replay_separation_v3.json
+  Temp/alpha_feedback_loop_v2.json
+  Temp/imputed_data_exposure_gate_v2.json
+  Temp/algorithm_guidance_proof_v1.json
+  Temp/horizon_classification_v1.json
+
+출력: Temp/data_gated_progress_v1.json
+"""
+from __future__ import annotations
+
+import json
+from datetime import date, datetime, timedelta
+from pathlib import Path
+from typing import Any
+
+ROOT = Path(__file__).resolve().parents[1]
+TEMP = ROOT / "Temp"
+FORMULA_ID = "DATA_GATED_PROGRESS_V1"
+
+
+def _load(path: Path) -> Any:
+    if not path.exists():
+        return {}
+    try:
+        return json.loads(path.read_text(encoding="utf-8"))
+    except Exception:
+        return {}
+
+
+def _f(v: Any, default: float = 0.0) -> float:
+    try:
+        return float(v)
+    except Exception:
+        return default
+
+
+def _eta_trading_days(current: int, target: int, as_of: date) -> str:
+    """남은 거래일 수를 근사 추정 (주말 제외, 공휴일 미반영)."""
+    remaining = max(0, target - current)
+    if remaining == 0:
+        return "ACHIEVED"
+    # 주 5일 기준 근사
+    weeks, days = divmod(remaining, 5)
+    delta = timedelta(weeks=weeks, days=days + (2 * weeks))
+    eta = as_of + delta
+    return eta.strftime("%Y-%m-%d")
+
+
+def _count_sector_flow_dates(payload: dict) -> int:
+    """GatherTradingData.json의 sector_flow_history 고유 날짜 수."""
+    rows = (payload.get("data") or {}).get("sector_flow_history") or []
+    dates: set[str] = set()
+    for row in rows:
+        if not isinstance(row, dict):
+            continue
+        snap = row.get("Snapshot_Date") or row.get("snapshot_date") or ""
+        val = str(snap)[:10]
+        if val and val != "None":
+            dates.add(val)
+    return len(dates)
+
+
+def _count_live_samples(live_sep: dict) -> int:
+    """live_replay_separation_v3.json의 실측(non-replay) 행 수."""
+    rows = live_sep.get("performance_rows") or []
+    return sum(
+        1 for r in rows
+        if isinstance(r, dict) and r.get("origin") in ("operational_live", "shadow_live")
+    )
+
+
+def main() -> int:
+    today = date.today()
+    today_str = today.isoformat()
+
+    payload = _load(ROOT / "GatherTradingData.json")
+    pred = _load(TEMP / "prediction_accuracy_harness_v2.json")
+    live_sep = _load(TEMP / "live_replay_separation_v3.json")
+    alpha = _load(TEMP / "alpha_feedback_loop_v2.json")
+    imputed = _load(TEMP / "imputed_data_exposure_gate_v2.json")
+    proof = _load(TEMP / "algorithm_guidance_proof_v1.json")
+    horizon = _load(TEMP / "horizon_classification_v1.json")
+
+    # ── WBS-2.5: sector_flow_history 30일 ────────────────────────────────────
+    sfh_days = _count_sector_flow_dates(payload)
+    sfh_target = 30
+    sfh_done = sfh_days >= sfh_target
+    wbs_2_5 = {
+        "id": "WBS-2.5",
+        "label": "sector_flow_history 30일 누적",
+        "current": sfh_days,
+        "target": sfh_target,
+        "unit": "trading_days",
+        "pct_complete": round(min(100.0, sfh_days / sfh_target * 100), 1),
+        "status": "DONE" if sfh_done else "IN_PROGRESS",
+        "eta": "ACHIEVED" if sfh_done else _eta_trading_days(sfh_days, sfh_target, today),
+        "source": "GatherTradingData.json:data.sector_flow_history",
+    }
+
+    # ── WBS-4.1: T+20 레저 30건 ──────────────────────────────────────────────
+    t20_sample = int(_f(pred.get("t20_sample") or 0))
+    t20_op_rate = pred.get("t20_op_rate")
+    live_sample_count = _count_live_samples(live_sep)
+    t20_target = 30
+    t20_done = t20_sample >= t20_target
+    wbs_4_1 = {
+        "id": "WBS-4.1",
+        "label": "T+20 레저 30건 누적",
+        "current": t20_sample,
+        "target": t20_target,
+        "unit": "trades",
+        "live_sample_count": live_sample_count,
+        "pct_complete": round(min(100.0, t20_sample / t20_target * 100), 1),
+        "status": "DONE" if t20_done else "DATA_GATED",
+        "eta": "ACHIEVED" if t20_done else "~2026-07-15",
+        "source": "Temp/prediction_accuracy_harness_v2.json:t20_sample",
+    }
+
+    # ── WBS-4.2: 예측 정확도 (T+20 의존) ─────────────────────────────────────
+    t20_rate_val = t20_op_rate if t20_op_rate is not None else "pending"
+    wbs_4_2 = {
+        "id": "WBS-4.2",
+        "label": "예측 정확도 평가 (T+20 기준)",
+        "current": t20_rate_val,
+        "target": "t20_op_rate available",
+        "status": "DATA_GATED" if t20_op_rate is None else "IN_PROGRESS",
+        "depends_on": "WBS-4.1",
+        "source": "Temp/prediction_accuracy_harness_v2.json:t20_op_rate",
+    }
+
+    # ── WBS-4.3: 알파 보정 (4.2 의존) ────────────────────────────────────────
+    alpha_cases = int(_f(alpha.get("total_cases") or 0))
+    wbs_4_3 = {
+        "id": "WBS-4.3",
+        "label": "알파 보정 (alpha_feedback_loop)",
+        "current_cases": alpha_cases,
+        "target_cases": 30,
+        "status": "DATA_GATED" if alpha_cases < 30 else "IN_PROGRESS",
+        "depends_on": "WBS-4.2",
+        "source": "Temp/alpha_feedback_loop_v2.json:total_cases",
+    }
+
+    # ── RELEASE_GATE_TRUTH: honest_proof_score ─────────────────────────────
+    honest_score = _f(proof.get("honest_proof_score") or 0)
+    honest_gate = proof.get("honest_gate", "FAIL")
+    fund_coverage = _f(imputed.get("fundamental_core_factor_coverage") or 0)
+    truth_target = 70.0
+    truth_done = honest_gate == "PASS" and honest_score >= truth_target
+    rgt = {
+        "id": "RELEASE_GATE_TRUTH",
+        "label": "honest_proof_score >= 70 (RELEASE_GATE_TRUTH)",
+        "current_honest_score": honest_score,
+        "target": truth_target,
+        "honest_gate": honest_gate,
+        "gap": round(truth_target - honest_score, 2),
+        "fundamental_core_factor_coverage": fund_coverage,
+        "status": "PASS" if truth_done else "FAIL",
+        "blockers": [
+            b for b in [
+                "REALIZED_OUTCOME_T20_ZERO: T+20 표본 0건 — WBS-4.1 달성 필요"
+                if (t20_op_rate is None) else None,
+                "FUNDAMENTAL_CORE_FACTORS_MISSING: ROE/OPM/OCF/FCF 미수집 — GAS fetchFundamentalsWithCache_ 실행 필요"
+                if fund_coverage < 0.5 else None,
+            ]
+            if b is not None
+        ],
+        "source": "Temp/algorithm_guidance_proof_v1.json:honest_proof_score",
+    }
+
+    # ── imputed_data_exposure_gate ─────────────────────────────────────────
+    imputed_gate = imputed.get("gate_status", "UNKNOWN")
+    idge = {
+        "id": "imputed_data_exposure_gate",
+        "label": "펀더멘털 실측 데이터 커버리지",
+        "gate_status": imputed_gate,
+        "fundamental_core_factor_coverage": fund_coverage,
+        "status": "USER_ACTION_REQUIRED" if fund_coverage < 0.5 else "IN_PROGRESS",
+        "user_action": "GAS fetchFundamentalsWithCache_ 실행 → ROE/OPM/OCF/FCF 수집",
+        "source": "Temp/imputed_data_exposure_gate_v2.json",
+    }
+
+    # ── routing_gate ───────────────────────────────────────────────────────
+    alloc = horizon.get("allocation_pct") or {}
+    short_pct = _f(alloc.get("SHORT") or 0)
+    routing_status = "PASS" if short_pct <= 40.0 else "FAIL"
+    rg = {
+        "id": "routing_gate",
+        "label": "SHORT 호라이즌 비중 <= 40%",
+        "current_short_pct": short_pct,
+        "cap_pct": 40.0,
+        "status": routing_status,
+        "note": (
+            "horizon_classification_v1.json 기준 현재 SHORT 비중 측정값. "
+            "strategy_routing_audit_v1.json 생성 후 공식 게이트 업데이트."
+        ),
+        "source": "Temp/horizon_classification_v1.json:allocation_pct.SHORT",
+    }
+
+    # ── 요약 ──────────────────────────────────────────────────────────────
+    items = [wbs_2_5, wbs_4_1, wbs_4_2, wbs_4_3, rgt, idge, rg]
+    done_count = sum(1 for i in items if i.get("status") in ("DONE", "PASS"))
+    total_count = len(items)
+
+    result = {
+        "formula_id": FORMULA_ID,
+        "as_of": today_str,
+        "gate": "PASS" if done_count == total_count else "IN_PROGRESS",
+        "done_count": done_count,
+        "total_count": total_count,
+        "items": items,
+    }
+
+    out = TEMP / "data_gated_progress_v1.json"
+    out.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
+
+    print(f"[{FORMULA_ID}] {done_count}/{total_count} DONE as_of={today_str}")
+    for item in items:
+        status = item.get("status", "?")
+        label = item.get("label", item.get("id", ""))
+        current = item.get("current", item.get("current_honest_score", item.get("gate_status", "")))
+        target = item.get("target", "")
+        print(f"  [{status}] {item['id']}: {current} / {target}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())
@@ -0,0 +1,152 @@
+"""build_factor_shadow_eligibility_v1.py — FACTOR_SHADOW_ELIGIBILITY_V1
+
+149개 팩터의 shadow 승격 자격을 실측 데이터로 평가한다.
+spec/factor_lifecycle_registry.yaml의 required_data 필드 목록과
+GatherTradingData.json의 실제 존재 키를 대조한다.
+
+출력: Temp/factor_shadow_eligibility_v1.json
+  - ELIGIBLE: required_data 전체 필드 GatherTradingData에 존재
+  - PARTIAL: required 필드 50% 이상 존재 (optional 제외)
+  - BLOCKED: required 필드 50% 미만 존재
+"""
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+import yaml
+
+ROOT = Path(__file__).resolve().parents[1]
+TEMP = ROOT / "Temp"
+FORMULA_ID = "FACTOR_SHADOW_ELIGIBILITY_V1"
+REGISTRY_PATH = ROOT / "spec" / "factor_lifecycle_registry.yaml"
+
+
+def _load_json(path: Path) -> Any:
+    if not path.exists():
+        return {}
+    try:
+        return json.loads(path.read_text(encoding="utf-8"))
+    except Exception:
+        return {}
+
+
+def _flatten_keys(obj: Any, prefix: str = "") -> set[str]:
+    """JSON 객체에서 모든 리프 키(소문자)를 재귀 수집."""
+    keys: set[str] = set()
+    if isinstance(obj, dict):
+        for k, v in obj.items():
+            full = f"{prefix}.{k}" if prefix else k
+            keys.add(k.lower())
+            keys.update(_flatten_keys(v, full))
+    elif isinstance(obj, list):
+        for item in obj:
+            keys.update(_flatten_keys(item, prefix))
+    return keys
+
+
+def _check_factor(factor: dict, available_keys: set[str]) -> dict:
+    factor_id = factor.get("factor_id", "UNKNOWN")
+    required_data = factor.get("required_data") or []
+
+    required_fields = [
+        f["field"].lower()
+        for f in required_data
+        if isinstance(f, dict) and not f.get("optional", False)
+    ]
+    optional_fields = [
+        f["field"].lower()
+        for f in required_data
+        if isinstance(f, dict) and f.get("optional", False)
+    ]
+
+    if not required_fields:
+        # required_data가 없으면 데이터 불가지론 — 승격 자격 없음
+        return {
+            "factor_id": factor_id,
+            "promotion_gate": factor.get("promotion_gate", "draft"),
+            "eligibility": "NO_REQUIRED_DATA",
+            "required_field_count": 0,
+            "present_count": 0,
+            "coverage_pct": 0.0,
+            "missing_required": [],
+            "present_optional": [],
+        }
+
+    present = [f for f in required_fields if f in available_keys]
+    missing = [f for f in required_fields if f not in available_keys]
+    optional_present = [f for f in optional_fields if f in available_keys]
+
+    coverage = len(present) / len(required_fields) if required_fields else 0.0
+
+    if coverage >= 1.0:
+        eligibility = "ELIGIBLE"
+    elif coverage >= 0.5:
+        eligibility = "PARTIAL"
+    else:
+        eligibility = "BLOCKED"
+
+    return {
+        "factor_id": factor_id,
+        "promotion_gate": factor.get("promotion_gate", "draft"),
+        "eligibility": eligibility,
+        "required_field_count": len(required_fields),
+        "present_count": len(present),
+        "coverage_pct": round(coverage * 100, 1),
+        "missing_required": missing,
+        "present_optional": optional_present,
+    }
+
+
+def main() -> int:
+    if not REGISTRY_PATH.exists():
+        print(f"[ERROR] Registry not found: {REGISTRY_PATH}")
+        return 1
+
+    registry = yaml.safe_load(REGISTRY_PATH.read_text(encoding="utf-8"))
+    factors = registry.get("factors") or []
+
+    payload = _load_json(ROOT / "GatherTradingData.json")
+    available_keys = _flatten_keys(payload)
+
+    rows = [_check_factor(f, available_keys) for f in factors]
+
+    eligible = [r for r in rows if r["eligibility"] == "ELIGIBLE"]
+    partial = [r for r in rows if r["eligibility"] == "PARTIAL"]
+    blocked = [r for r in rows if r["eligibility"] == "BLOCKED"]
+    no_data = [r for r in rows if r["eligibility"] == "NO_REQUIRED_DATA"]
+
+    summary = {
+        "total": len(rows),
+        "eligible_count": len(eligible),
+        "partial_count": len(partial),
+        "blocked_count": len(blocked),
+        "no_required_data_count": len(no_data),
+        "eligible_pct": round(len(eligible) / len(rows) * 100, 1) if rows else 0.0,
+        "shadow_ready_ids": [r["factor_id"] for r in eligible],
+    }
+
+    result = {
+        "formula_id": FORMULA_ID,
+        "gate": "PASS" if eligible else "FAIL",
+        "summary": summary,
+        "rows": rows,
+        "note": (
+            "ELIGIBLE = required_data 전체 필드가 GatherTradingData.json에 존재. "
+            "shadow 승격은 별도 spec/57 shadow_promotion_scorecard 기준(live_sample>=30 등) 충족 후 가능."
+        ),
+    }
+
+    out = TEMP / "factor_shadow_eligibility_v1.json"
+    out.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
+
+    print(f"[{FORMULA_ID}] total={len(rows)} eligible={len(eligible)} partial={len(partial)} blocked={len(blocked)}")
+    print(f"  Shadow-ready factors ({len(eligible)}): {[r['factor_id'] for r in eligible[:5]]}...")
+    if blocked:
+        print(f"  Blocked ({len(blocked)}): {[r['factor_id'] for r in blocked[:5]]}...")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())