"""build_sector_flow_history_progress_v1.py — SECTOR_FLOW_HISTORY_PROGRESS_V1 WBS-2.5 진척도를 실데이터로 요약한다. sector_flow_history 누적 일수와 Flow_Credit 커버리지를 정직하게 노출한다. """ from __future__ import annotations import argparse import json from collections import defaultdict from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] DEFAULT_JSON = ROOT / "GatherTradingData.json" DEFAULT_OUT = ROOT / "Temp" / "sector_flow_history_progress_v1.json" FORMULA_ID = "SECTOR_FLOW_HISTORY_PROGRESS_V1" def _load(path: Path) -> dict[str, Any]: if not path.exists(): return {} try: obj = json.loads(path.read_text(encoding="utf-8")) except Exception: return {} return obj if isinstance(obj, dict) else {} def _rows(data: dict[str, Any], key: str) -> list[dict[str, Any]]: rows = (data.get("data") or {}).get(key) or [] return [r for r in rows if isinstance(r, dict)] def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--json", default=str(DEFAULT_JSON)) ap.add_argument("--out", default=str(DEFAULT_OUT)) args = ap.parse_args() json_path = Path(args.json) json_path = json_path if json_path.is_absolute() else ROOT / json_path out_path = Path(args.out) out_path = out_path if out_path.is_absolute() else ROOT / out_path payload = _load(json_path) sector_rows = _rows(payload, "sector_flow_history") data_feed_rows = _rows(payload, "data_feed") by_date: dict[str, int] = defaultdict(int) for row in sector_rows: snap = row.get("Snapshot_Date") or row.get("snapshot_date") or "" day = str(snap)[:10] if day: by_date[day] += 1 distinct_dates = len(by_date) row_count = len(sector_rows) target_dates = 30 status = "DONE" if distinct_dates >= target_dates else "DATA_GATED" coverage = round(min(100.0, distinct_dates / target_dates * 100.0), 2) flow_credit_values = [row.get("Flow_Credit") for row in data_feed_rows] nonnull_flow_credit = sum(1 for v in flow_credit_values if v is not None) flow_credit_coverage = round((nonnull_flow_credit / len(flow_credit_values) * 100.0), 2) if flow_credit_values else 0.0 result = { "formula_id": FORMULA_ID, "status": status, "current_dates": distinct_dates, "target_dates": target_dates, "coverage_pct": coverage, "row_count": row_count, "rows_per_date": sorted(by_date.items()), "flow_credit_coverage_pct": flow_credit_coverage, "flow_credit_nonnull_count": nonnull_flow_credit, "flow_credit_total_count": len(flow_credit_values), "source": "GatherTradingData.json:data.sector_flow_history + data.data_feed", } out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2) + "\n", encoding="utf-8") print( f"[{FORMULA_ID}] status={status} dates={distinct_dates}/{target_dates} " f"rows={row_count} flow_credit={nonnull_flow_credit}/{len(flow_credit_values)}" ) return 0 if __name__ == "__main__": raise SystemExit(main())