"""build_factor_shadow_eligibility_v1.py — FACTOR_SHADOW_ELIGIBILITY_V1 149개 팩터의 shadow 승격 자격을 실측 데이터로 평가한다. spec/factor_lifecycle_registry.yaml의 required_data 필드 목록과 GatherTradingData.json의 실제 존재 키를 대조한다. 출력: Temp/factor_shadow_eligibility_v1.json - ELIGIBLE: required_data 전체 필드 GatherTradingData에 존재 - PARTIAL: required 필드 50% 이상 존재 (optional 제외) - BLOCKED: required 필드 50% 미만 존재 """ from __future__ import annotations import json from pathlib import Path from typing import Any import yaml ROOT = Path(__file__).resolve().parents[1] TEMP = ROOT / "Temp" FORMULA_ID = "FACTOR_SHADOW_ELIGIBILITY_V1" REGISTRY_PATH = ROOT / "spec" / "factor_lifecycle_registry.yaml" def _load_json(path: Path) -> Any: if not path.exists(): return {} try: return json.loads(path.read_text(encoding="utf-8")) except Exception: return {} def _flatten_keys(obj: Any, prefix: str = "") -> set[str]: """JSON 객체에서 모든 리프 키(소문자)를 재귀 수집.""" keys: set[str] = set() if isinstance(obj, dict): for k, v in obj.items(): full = f"{prefix}.{k}" if prefix else k keys.add(k.lower()) keys.update(_flatten_keys(v, full)) elif isinstance(obj, list): for item in obj: keys.update(_flatten_keys(item, prefix)) return keys def _check_factor(factor: dict, available_keys: set[str]) -> dict: factor_id = factor.get("factor_id", "UNKNOWN") required_data = factor.get("required_data") or [] required_fields = [ f["field"].lower() for f in required_data if isinstance(f, dict) and not f.get("optional", False) ] optional_fields = [ f["field"].lower() for f in required_data if isinstance(f, dict) and f.get("optional", False) ] if not required_fields: # required_data가 없으면 데이터 불가지론 — 승격 자격 없음 return { "factor_id": factor_id, "promotion_gate": factor.get("promotion_gate", "draft"), "eligibility": "NO_REQUIRED_DATA", "required_field_count": 0, "present_count": 0, "coverage_pct": 0.0, "missing_required": [], "present_optional": [], } present = [f for f in required_fields if f in available_keys] missing = [f for f in required_fields if f not in available_keys] optional_present = [f for f in optional_fields if f in available_keys] coverage = len(present) / len(required_fields) if required_fields else 0.0 if coverage >= 1.0: eligibility = "ELIGIBLE" elif coverage >= 0.5: eligibility = "PARTIAL" else: eligibility = "BLOCKED" return { "factor_id": factor_id, "promotion_gate": factor.get("promotion_gate", "draft"), "eligibility": eligibility, "required_field_count": len(required_fields), "present_count": len(present), "coverage_pct": round(coverage * 100, 1), "missing_required": missing, "present_optional": optional_present, } def main() -> int: if not REGISTRY_PATH.exists(): print(f"[ERROR] Registry not found: {REGISTRY_PATH}") return 1 registry = yaml.safe_load(REGISTRY_PATH.read_text(encoding="utf-8")) factors = registry.get("factors") or [] payload = _load_json(ROOT / "GatherTradingData.json") available_keys = _flatten_keys(payload) rows = [_check_factor(f, available_keys) for f in factors] eligible = [r for r in rows if r["eligibility"] == "ELIGIBLE"] partial = [r for r in rows if r["eligibility"] == "PARTIAL"] blocked = [r for r in rows if r["eligibility"] == "BLOCKED"] no_data = [r for r in rows if r["eligibility"] == "NO_REQUIRED_DATA"] summary = { "total": len(rows), "eligible_count": len(eligible), "partial_count": len(partial), "blocked_count": len(blocked), "no_required_data_count": len(no_data), "eligible_pct": round(len(eligible) / len(rows) * 100, 1) if rows else 0.0, "shadow_ready_ids": [r["factor_id"] for r in eligible], } result = { "formula_id": FORMULA_ID, "gate": "PASS" if eligible else "FAIL", "summary": summary, "rows": rows, "note": ( "ELIGIBLE = required_data 전체 필드가 GatherTradingData.json에 존재. " "shadow 승격은 별도 spec/57 shadow_promotion_scorecard 기준(live_sample>=30 등) 충족 후 가능." ), } out = TEMP / "factor_shadow_eligibility_v1.json" out.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") print(f"[{FORMULA_ID}] total={len(rows)} eligible={len(eligible)} partial={len(partial)} blocked={len(blocked)}") print(f" Shadow-ready factors ({len(eligible)}): {[r['factor_id'] for r in eligible[:5]]}...") if blocked: print(f" Blocked ({len(blocked)}): {[r['factor_id'] for r in blocked[:5]]}...") return 0 if __name__ == "__main__": raise SystemExit(main())