from __future__ import annotations import datetime as dt from typing import Any DEFAULT_MAX_AGE_DAYS = 31 def _txt(value: Any, default: str = "") -> str: if value is None: return default if isinstance(value, str): return value.strip() or default return str(value).strip() or default def _as_float(value: Any) -> float | None: try: if value in (None, ""): return None if isinstance(value, str): text = value.strip().replace("%", "").replace(",", "") if not text: return None return float(text) return float(value) except Exception: return None def _parse_date(value: Any) -> dt.date | None: if value in (None, ""): return None if isinstance(value, dt.date): return value text = _txt(value) if not text: return None for fmt in ("%Y-%m-%d", "%Y.%m.%d", "%Y/%m/%d"): try: return dt.datetime.strptime(text[:10], fmt).date() except Exception: pass try: return dt.date.fromisoformat(text[:10]) except Exception: return None def _age_days(value: Any, today: dt.date | None = None) -> int | None: parsed = _parse_date(value) if parsed is None: return None today = today or dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date() return (today - parsed).days def _extract_sector_rows(payload: dict[str, Any] | None) -> list[dict[str, Any]]: if not isinstance(payload, dict): return [] inner = payload.get("data") if isinstance(inner, dict) and isinstance(inner.get("sector_universe"), list): return [r for r in inner["sector_universe"] if isinstance(r, dict)] if isinstance(payload.get("sector_universe"), list): return [r for r in payload["sector_universe"] if isinstance(r, dict)] return [] def build_sector_universe_refresh_audit(payload: dict[str, Any] | None) -> dict[str, Any]: rows = _extract_sector_rows(payload) today = dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date() grouped: dict[str, list[dict[str, Any]]] = {} for row in rows: sector = _txt(row.get("Sector")) if not sector: continue grouped.setdefault(sector, []).append(row) detail_rows: list[dict[str, Any]] = [] source_kind_counts = { "NAVER_ETF_PAGE": 0, "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED": 0, "NAVER_ETF_PAGE_FAIL": 0, "REPRESENTATIVE_STOCK_PROXY": 0, "SHEET_INPUT": 0, "DEFAULT_TEMPLATE": 0, "OTHER": 0, } transport_mode_counts = { "HTML_SERVER_RENDERED": 0, "MANUAL_OR_TEMPLATE": 0, "LAYOUT_CHANGED": 0, "UNKNOWN": 0, } state_counts = {"CURRENT": 0, "DUE": 0, "OVERDUE": 0, "MISSING": 0, "TEMPLATE": 0, "INVALID": 0} stale_sector_count = 0 layout_changed_count = 0 missing_source_url_count = 0 sheet_input_count = 0 template_count = 0 newest_asof: dt.date | None = None oldest_asof: dt.date | None = None for sector, sector_rows in grouped.items(): source_values = {_txt(r.get("Source"), "SHEET_INPUT") or "SHEET_INPUT" for r in sector_rows} if "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" in source_values: source_kind = "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" elif "NAVER_ETF_PAGE_FAIL" in source_values: source_kind = "NAVER_ETF_PAGE_FAIL" elif "NAVER_ETF_PAGE" in source_values: source_kind = "NAVER_ETF_PAGE" elif "REPRESENTATIVE_STOCK_PROXY" in source_values: source_kind = "REPRESENTATIVE_STOCK_PROXY" elif "DEFAULT_TEMPLATE" in source_values: source_kind = "DEFAULT_TEMPLATE" elif "SHEET_INPUT" in source_values: source_kind = "SHEET_INPUT" else: source_kind = "OTHER" source_kind_counts[source_kind if source_kind in source_kind_counts else "OTHER"] += 1 source_urls = [_txt(r.get("Source_URL")) for r in sector_rows if _txt(r.get("Source_URL"))] source_url = source_urls[0] if source_urls else "" asof_candidates = [_parse_date(r.get("Source_AsOf")) for r in sector_rows] asof_dates = [d for d in asof_candidates if d is not None] source_asof = max(asof_dates) if asof_dates else None if source_asof is not None: newest_asof = source_asof if newest_asof is None else max(newest_asof, source_asof) oldest_asof = source_asof if oldest_asof is None else min(oldest_asof, source_asof) age_days = _age_days(source_asof, today) if source_asof else None constituent_count = len(sector_rows) etf_count = sum(1 for r in sector_rows if str(r.get("Is_ETF") or "").strip().upper() in {"Y", "YES", "TRUE", "1"}) stock_count = constituent_count - etf_count weight_sum = sum(_as_float(r.get("Weight")) or 0 for r in sector_rows) status = "INVALID" reason_parts: list[str] = [] transport_mode = "UNKNOWN" if source_kind == "DEFAULT_TEMPLATE": status = "TEMPLATE" reason_parts.append("DEFAULT_TEMPLATE") template_count += 1 transport_mode = "MANUAL_OR_TEMPLATE" elif source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED": status = "LAYOUT_CHANGED" transport_mode = "LAYOUT_CHANGED" reason_parts.append("LAYOUT_CHANGED") layout_changed_count += 1 if not source_url: missing_source_url_count += 1 reason_parts.append("Source_URL_MISSING") if age_days is None: reason_parts.append("Source_AsOf_MISSING") else: stale_sector_count += 1 reason_parts.append(f"AgeDays={age_days}") elif source_kind == "NAVER_ETF_PAGE_FAIL": status = "INVALID" transport_mode = "UNKNOWN" reason_parts.append("NAVER_ETF_PAGE_FAIL") if not source_url: missing_source_url_count += 1 elif source_kind == "REPRESENTATIVE_STOCK_PROXY": transport_mode = "HTML_SERVER_RENDERED" if not source_url: status = "MISSING" missing_source_url_count += 1 reason_parts.append("Source_URL_MISSING") elif age_days is None: status = "MISSING" reason_parts.append("Source_AsOf_MISSING") elif age_days <= DEFAULT_MAX_AGE_DAYS: status = "CURRENT" elif age_days <= 45: status = "DUE" stale_sector_count += 1 reason_parts.append(f"AgeDays={age_days}") else: status = "OVERDUE" stale_sector_count += 1 reason_parts.append(f"AgeDays={age_days}") elif source_kind == "SHEET_INPUT": sheet_input_count += 1 transport_mode = "MANUAL_OR_TEMPLATE" if not source_url: status = "MISSING" reason_parts.append("Source_URL_MISSING") missing_source_url_count += 1 elif age_days is None: status = "MISSING" reason_parts.append("Source_AsOf_MISSING") elif age_days <= DEFAULT_MAX_AGE_DAYS: status = "CURRENT" elif age_days <= 45: status = "DUE" stale_sector_count += 1 reason_parts.append(f"AgeDays={age_days}") else: status = "OVERDUE" stale_sector_count += 1 reason_parts.append(f"AgeDays={age_days}") elif source_kind == "NAVER_ETF_PAGE": transport_mode = "HTML_SERVER_RENDERED" if not source_url: status = "MISSING" reason_parts.append("Source_URL_MISSING") missing_source_url_count += 1 elif age_days is None: status = "MISSING" reason_parts.append("Source_AsOf_MISSING") elif age_days <= DEFAULT_MAX_AGE_DAYS: status = "CURRENT" elif age_days <= 45: status = "DUE" stale_sector_count += 1 reason_parts.append(f"AgeDays={age_days}") else: status = "OVERDUE" stale_sector_count += 1 reason_parts.append(f"AgeDays={age_days}") else: if not source_url: missing_source_url_count += 1 status = "INVALID" reason_parts.append("SOURCE_KIND_UNKNOWN") transport_mode = "UNKNOWN" if source_kind == "NAVER_ETF_PAGE" and not source_url: reason_parts.append("NAVER_URL_MISSING") if not source_url: reason_parts.append("Source_URL_MISSING") if age_days is not None and age_days < 0: reason_parts.append("FUTURE_DATE") transport_mode_counts[transport_mode] = transport_mode_counts.get(transport_mode, 0) + 1 refresh_reason = ";".join(reason_parts) if reason_parts else "OK" detail_rows.append({ "sector": sector, "proxy_ticker": _txt(sector_rows[0].get("Proxy_Ticker")), "proxy_name": _txt(sector_rows[0].get("Proxy_Name")), "proxy_type": _txt(sector_rows[0].get("Proxy_Type")), "source_kind": source_kind, "transport_mode": transport_mode, "source_url": source_url, "source_asof": source_asof.isoformat() if source_asof else "", "age_days": age_days if age_days is not None else "", "constituent_count": constituent_count, "stock_count": stock_count, "etf_count": etf_count, "weight_sum": round(weight_sum, 4), "status": status, "refresh_reason": refresh_reason, }) detail_rows.sort(key=lambda r: (r.get("status") != "CURRENT", r.get("status"), r.get("sector"))) summary = { "sector_count": len(grouped), "current_count": sum(1 for r in detail_rows if r.get("status") == "CURRENT"), "due_count": sum(1 for r in detail_rows if r.get("status") == "DUE"), "overdue_count": sum(1 for r in detail_rows if r.get("status") == "OVERDUE"), "missing_count": sum(1 for r in detail_rows if r.get("status") == "MISSING"), "template_count": template_count, "sheet_input_count": sheet_input_count, "naver_source_count": sum(1 for r in detail_rows if r.get("source_kind") == "NAVER_ETF_PAGE"), "missing_source_url_count": missing_source_url_count, "stale_sector_count": stale_sector_count, "layout_changed_count": layout_changed_count, "oldest_source_asof": oldest_asof.isoformat() if oldest_asof else "", "newest_source_asof": newest_asof.isoformat() if newest_asof else "", "source_kind_counts": source_kind_counts, "transport_mode_counts": transport_mode_counts, "ajax_mode": "NO", "transport_model": "HTML_SERVER_RENDERED", } gate = "PASS" if template_count > 0 or missing_source_url_count > 0 or stale_sector_count > 0 or layout_changed_count > 0: gate = "FAIL" elif sheet_input_count > 0: gate = "WARN" return { "formula_id": "sector_universe_refresh_audit_v1", "gate": gate, "max_age_days": DEFAULT_MAX_AGE_DAYS, "summary": summary, "rows": detail_rows, "source": { "sector_rows": len(rows), "grouped_sectors": len(grouped), }, }