ac6437b840
- tools/build_data_gated_progress_v1.py: WBS-2.5/4.1~4.3/RELEASE_GATE_TRUTH 실측 진척도 (sector_flow 21/30일, T+20 0/30건, honest_proof 45.1/70.0) - tools/build_factor_shadow_eligibility_v1.py: 149개 팩터 shadow 자격 평가 (eligible=24, partial=37, blocked=38) - spec/41: DAG step_count 63→67, 신규 4노드 추가 (build_data_gated_progress, build_factor_shadow_eligibility, build_strategy_routing_audit, build_horizon_rebalance_plan) - spec/30: routing_gate 실측 보정 — SHORT 71.4%는 오류, 실제 원인은 MID 75.0% > 50% 상한 (2026-06-14 실측) - spec/13: DATA_GATED_PROGRESS_V1, FACTOR_SHADOW_ELIGIBILITY_V1, STRATEGY_ROUTING_AUDIT_V1 formula 등록 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
153 lines
5.1 KiB
Python
153 lines
5.1 KiB
Python
"""build_factor_shadow_eligibility_v1.py — FACTOR_SHADOW_ELIGIBILITY_V1
|
|
|
|
149개 팩터의 shadow 승격 자격을 실측 데이터로 평가한다.
|
|
spec/factor_lifecycle_registry.yaml의 required_data 필드 목록과
|
|
GatherTradingData.json의 실제 존재 키를 대조한다.
|
|
|
|
출력: Temp/factor_shadow_eligibility_v1.json
|
|
- ELIGIBLE: required_data 전체 필드 GatherTradingData에 존재
|
|
- PARTIAL: required 필드 50% 이상 존재 (optional 제외)
|
|
- BLOCKED: required 필드 50% 미만 존재
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from pathlib import Path
|
|
from typing import Any
|
|
|
|
import yaml
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
TEMP = ROOT / "Temp"
|
|
FORMULA_ID = "FACTOR_SHADOW_ELIGIBILITY_V1"
|
|
REGISTRY_PATH = ROOT / "spec" / "factor_lifecycle_registry.yaml"
|
|
|
|
|
|
def _load_json(path: Path) -> Any:
|
|
if not path.exists():
|
|
return {}
|
|
try:
|
|
return json.loads(path.read_text(encoding="utf-8"))
|
|
except Exception:
|
|
return {}
|
|
|
|
|
|
def _flatten_keys(obj: Any, prefix: str = "") -> set[str]:
|
|
"""JSON 객체에서 모든 리프 키(소문자)를 재귀 수집."""
|
|
keys: set[str] = set()
|
|
if isinstance(obj, dict):
|
|
for k, v in obj.items():
|
|
full = f"{prefix}.{k}" if prefix else k
|
|
keys.add(k.lower())
|
|
keys.update(_flatten_keys(v, full))
|
|
elif isinstance(obj, list):
|
|
for item in obj:
|
|
keys.update(_flatten_keys(item, prefix))
|
|
return keys
|
|
|
|
|
|
def _check_factor(factor: dict, available_keys: set[str]) -> dict:
|
|
factor_id = factor.get("factor_id", "UNKNOWN")
|
|
required_data = factor.get("required_data") or []
|
|
|
|
required_fields = [
|
|
f["field"].lower()
|
|
for f in required_data
|
|
if isinstance(f, dict) and not f.get("optional", False)
|
|
]
|
|
optional_fields = [
|
|
f["field"].lower()
|
|
for f in required_data
|
|
if isinstance(f, dict) and f.get("optional", False)
|
|
]
|
|
|
|
if not required_fields:
|
|
# required_data가 없으면 데이터 불가지론 — 승격 자격 없음
|
|
return {
|
|
"factor_id": factor_id,
|
|
"promotion_gate": factor.get("promotion_gate", "draft"),
|
|
"eligibility": "NO_REQUIRED_DATA",
|
|
"required_field_count": 0,
|
|
"present_count": 0,
|
|
"coverage_pct": 0.0,
|
|
"missing_required": [],
|
|
"present_optional": [],
|
|
}
|
|
|
|
present = [f for f in required_fields if f in available_keys]
|
|
missing = [f for f in required_fields if f not in available_keys]
|
|
optional_present = [f for f in optional_fields if f in available_keys]
|
|
|
|
coverage = len(present) / len(required_fields) if required_fields else 0.0
|
|
|
|
if coverage >= 1.0:
|
|
eligibility = "ELIGIBLE"
|
|
elif coverage >= 0.5:
|
|
eligibility = "PARTIAL"
|
|
else:
|
|
eligibility = "BLOCKED"
|
|
|
|
return {
|
|
"factor_id": factor_id,
|
|
"promotion_gate": factor.get("promotion_gate", "draft"),
|
|
"eligibility": eligibility,
|
|
"required_field_count": len(required_fields),
|
|
"present_count": len(present),
|
|
"coverage_pct": round(coverage * 100, 1),
|
|
"missing_required": missing,
|
|
"present_optional": optional_present,
|
|
}
|
|
|
|
|
|
def main() -> int:
|
|
if not REGISTRY_PATH.exists():
|
|
print(f"[ERROR] Registry not found: {REGISTRY_PATH}")
|
|
return 1
|
|
|
|
registry = yaml.safe_load(REGISTRY_PATH.read_text(encoding="utf-8"))
|
|
factors = registry.get("factors") or []
|
|
|
|
payload = _load_json(ROOT / "GatherTradingData.json")
|
|
available_keys = _flatten_keys(payload)
|
|
|
|
rows = [_check_factor(f, available_keys) for f in factors]
|
|
|
|
eligible = [r for r in rows if r["eligibility"] == "ELIGIBLE"]
|
|
partial = [r for r in rows if r["eligibility"] == "PARTIAL"]
|
|
blocked = [r for r in rows if r["eligibility"] == "BLOCKED"]
|
|
no_data = [r for r in rows if r["eligibility"] == "NO_REQUIRED_DATA"]
|
|
|
|
summary = {
|
|
"total": len(rows),
|
|
"eligible_count": len(eligible),
|
|
"partial_count": len(partial),
|
|
"blocked_count": len(blocked),
|
|
"no_required_data_count": len(no_data),
|
|
"eligible_pct": round(len(eligible) / len(rows) * 100, 1) if rows else 0.0,
|
|
"shadow_ready_ids": [r["factor_id"] for r in eligible],
|
|
}
|
|
|
|
result = {
|
|
"formula_id": FORMULA_ID,
|
|
"gate": "PASS" if eligible else "FAIL",
|
|
"summary": summary,
|
|
"rows": rows,
|
|
"note": (
|
|
"ELIGIBLE = required_data 전체 필드가 GatherTradingData.json에 존재. "
|
|
"shadow 승격은 별도 spec/57 shadow_promotion_scorecard 기준(live_sample>=30 등) 충족 후 가능."
|
|
),
|
|
}
|
|
|
|
out = TEMP / "factor_shadow_eligibility_v1.json"
|
|
out.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8")
|
|
|
|
print(f"[{FORMULA_ID}] total={len(rows)} eligible={len(eligible)} partial={len(partial)} blocked={len(blocked)}")
|
|
print(f" Shadow-ready factors ({len(eligible)}): {[r['factor_id'] for r in eligible[:5]]}...")
|
|
if blocked:
|
|
print(f" Blocked ({len(blocked)}): {[r['factor_id'] for r in blocked[:5]]}...")
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|