"""build_fundamental_raw_evidence_v3.py — FUNDAMENTAL_RAW_EVIDENCE_V3 P0-011: 펀더멘털 실측화. ROE/OPM/OCF/FCF 누락을 DATA_MISSING으로 명시하고, 필드 커버리지를 기반으로 confidence_cap을 자동 하향한다. LONG 판단은 커버리지 < 임계치이면 CANDIDATE_ONLY로 강등한다. """ from __future__ import annotations import argparse import json from datetime import datetime, timezone from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] DEFAULT_RAW = ROOT / "Temp" / "fundamental_raw_v2.json" DEFAULT_FINAL_JDG = ROOT / "Temp" / "final_judgment_gate_v1.json" DEFAULT_OUT = ROOT / "Temp" / "fundamental_raw_evidence_v3.json" # 필수 펀더멘털 필드 (P0-011 요구사항) REQUIRED_FIELDS = ["roe_pct", "opm_pct", "ocf_krw", "fcf_krw"] COVERAGE_THRESHOLD = 0.95 # 95% 이상이어야 LONG 판단 허용 LONG_HORIZONS = {"LONG", "POSITION", "MOMENTUM"} # horizon 값 중 장기 분류 def _load(path: Path) -> dict[str, Any]: if not path.exists(): return {} try: obj = json.loads(path.read_text(encoding="utf-8")) except Exception: return {} return obj if isinstance(obj, dict) else {} def _field_presence(row: dict[str, Any], field: str) -> bool: """필드 값이 실제 데이터(None/빈값 아님)인지 확인.""" v = row.get(field) return v is not None and str(v).strip() not in ("", "None", "DATA_MISSING", "N/A") def _coverage(row: dict[str, Any], fields: list[str]) -> float: present = sum(1 for f in fields if _field_presence(row, f)) return present / len(fields) if fields else 0.0 def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--raw", default=str(DEFAULT_RAW)) ap.add_argument("--fj", default=str(DEFAULT_FINAL_JDG)) ap.add_argument("--out", default=str(DEFAULT_OUT)) args = ap.parse_args() raw_path = Path(args.raw) if Path(args.raw).is_absolute() else ROOT / args.raw raw = _load(raw_path) fj = _load(Path(args.fj) if Path(args.fj).is_absolute() else ROOT / args.fj) # data_feed의 OCF_B/FCF_B를 보완 소스로 활용 gtd = _load(ROOT / "GatherTradingData.json") df_list = (gtd.get("data") or {}).get("data_feed") or [] if not isinstance(df_list, list): df_list = [] df_by_ticker: dict[str, dict[str, Any]] = {str(r.get("Ticker") or ""): r for r in df_list} raw_rows = raw.get("rows", []) non_etf = [r for r in raw_rows if not r.get("is_etf")] # verdict/horizon lookup from final judgment horizon_by_ticker: dict[str, str] = {} for row in fj.get("rows", []) if isinstance(fj.get("rows"), list) else []: t = str(row.get("ticker") or "") h = str(row.get("best_horizon") or row.get("horizon") or "") if t: horizon_by_ticker[t] = h evidence_rows = [] total_field_slots = 0 filled_field_slots = 0 for row in non_etf: ticker = str(row.get("ticker") or "") df_row = df_by_ticker.get(ticker, {}) field_status: dict[str, str] = {} # OCF/FCF는 raw_v2의 ocf_krw/fcf_krw 우선, 없으면 data_feed의 OCF_B/FCF_B 사용 if not _field_presence(row, "ocf_krw") and _field_presence(df_row, "OCF_B"): row = dict(row); row["ocf_krw"] = df_row["OCF_B"] if not _field_presence(row, "fcf_krw") and _field_presence(df_row, "FCF_B"): row = dict(row); row["fcf_krw"] = df_row["FCF_B"] for field in REQUIRED_FIELDS: if _field_presence(row, field): field_status[field] = str(row[field]) filled_field_slots += 1 else: field_status[field] = "DATA_MISSING" total_field_slots += 1 field_coverage = _coverage(row, REQUIRED_FIELDS) horizon = horizon_by_ticker.get(ticker, "UNKNOWN") is_long_horizon = any(lh in horizon.upper() for lh in LONG_HORIZONS) long_buy_downgraded = is_long_horizon and field_coverage < COVERAGE_THRESHOLD evidence_rows.append({ "ticker": ticker, "name": row.get("name", ""), "source": row.get("source", ""), "as_of_date": row.get("as_of_date", ""), "field_coverage_pct": round(field_coverage * 100, 2), "horizon": horizon, "is_long_horizon": is_long_horizon, "long_buy_downgraded_to_candidate_only": long_buy_downgraded, "downgrade_reason": f"fundamental_coverage={field_coverage*100:.0f}% < {COVERAGE_THRESHOLD*100:.0f}%" if long_buy_downgraded else None, "fields": field_status, "source_path": str(raw_path.relative_to(ROOT)), "formula_id": "FUNDAMENTAL_RAW_EVIDENCE_V3", }) overall_coverage = (filled_field_slots / total_field_slots * 100.0) if total_field_slots > 0 else 0.0 roe_opm_ocf_fcf_missing_count = sum( 1 for r in evidence_rows for field in REQUIRED_FIELDS if r["fields"].get(field) == "DATA_MISSING" ) long_buy_with_missing = [r for r in evidence_rows if r["long_buy_downgraded_to_candidate_only"]] # gate 판정 if overall_coverage >= 95.0 and len(long_buy_with_missing) == 0: gate = "PASS" elif overall_coverage >= 50.0: gate = "CAUTION" else: gate = "FAIL" result = { "formula_id": "FUNDAMENTAL_RAW_EVIDENCE_V3", "gate": gate, "fundamental_source_field_coverage_pct": round(overall_coverage, 2), "roe_opm_ocf_fcf_missing_count": roe_opm_ocf_fcf_missing_count, "long_horizon_buy_with_missing_fundamental_count": len(long_buy_with_missing), "long_buy_downgraded_tickers": [r["ticker"] for r in long_buy_with_missing], "coverage_threshold_pct": COVERAGE_THRESHOLD * 100, "non_etf_ticker_count": len(non_etf), "rows": evidence_rows, "generated_at": datetime.now(timezone.utc).isoformat(), "source_path": "Temp/fundamental_raw_evidence_v3.json", } out_path = Path(args.out) if Path(args.out).is_absolute() else ROOT / args.out out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") summary = {k: v for k, v in result.items() if k != "rows"} print(json.dumps(summary, indent=2, ensure_ascii=False)) return 0 if __name__ == "__main__": raise SystemExit(main())