297 lines
12 KiB
Python
297 lines
12 KiB
Python
from __future__ import annotations
|
|
|
|
import datetime as dt
|
|
from typing import Any
|
|
|
|
|
|
DEFAULT_MAX_AGE_DAYS = 31
|
|
|
|
|
|
def _txt(value: Any, default: str = "") -> str:
|
|
if value is None:
|
|
return default
|
|
if isinstance(value, str):
|
|
return value.strip() or default
|
|
return str(value).strip() or default
|
|
|
|
|
|
def _as_float(value: Any) -> float | None:
|
|
try:
|
|
if value in (None, ""):
|
|
return None
|
|
if isinstance(value, str):
|
|
text = value.strip().replace("%", "").replace(",", "")
|
|
if not text:
|
|
return None
|
|
return float(text)
|
|
return float(value)
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def _parse_date(value: Any) -> dt.date | None:
|
|
if value in (None, ""):
|
|
return None
|
|
if isinstance(value, dt.date):
|
|
return value
|
|
text = _txt(value)
|
|
if not text:
|
|
return None
|
|
for fmt in ("%Y-%m-%d", "%Y.%m.%d", "%Y/%m/%d"):
|
|
try:
|
|
return dt.datetime.strptime(text[:10], fmt).date()
|
|
except Exception:
|
|
pass
|
|
try:
|
|
return dt.date.fromisoformat(text[:10])
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def _age_days(value: Any, today: dt.date | None = None) -> int | None:
|
|
parsed = _parse_date(value)
|
|
if parsed is None:
|
|
return None
|
|
today = today or dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date()
|
|
return (today - parsed).days
|
|
|
|
|
|
def _extract_sector_rows(payload: dict[str, Any] | None) -> list[dict[str, Any]]:
|
|
if not isinstance(payload, dict):
|
|
return []
|
|
inner = payload.get("data")
|
|
if isinstance(inner, dict) and isinstance(inner.get("sector_universe"), list):
|
|
return [r for r in inner["sector_universe"] if isinstance(r, dict)]
|
|
if isinstance(payload.get("sector_universe"), list):
|
|
return [r for r in payload["sector_universe"] if isinstance(r, dict)]
|
|
return []
|
|
|
|
|
|
def build_sector_universe_refresh_audit(payload: dict[str, Any] | None) -> dict[str, Any]:
|
|
rows = _extract_sector_rows(payload)
|
|
today = dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date()
|
|
|
|
grouped: dict[str, list[dict[str, Any]]] = {}
|
|
for row in rows:
|
|
sector = _txt(row.get("Sector"))
|
|
if not sector:
|
|
continue
|
|
grouped.setdefault(sector, []).append(row)
|
|
|
|
detail_rows: list[dict[str, Any]] = []
|
|
source_kind_counts = {
|
|
"NAVER_ETF_PAGE": 0,
|
|
"NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED": 0,
|
|
"NAVER_ETF_PAGE_FAIL": 0,
|
|
"REPRESENTATIVE_STOCK_PROXY": 0,
|
|
"SHEET_INPUT": 0,
|
|
"DEFAULT_TEMPLATE": 0,
|
|
"OTHER": 0,
|
|
}
|
|
transport_mode_counts = {
|
|
"HTML_SERVER_RENDERED": 0,
|
|
"MANUAL_OR_TEMPLATE": 0,
|
|
"LAYOUT_CHANGED": 0,
|
|
"UNKNOWN": 0,
|
|
}
|
|
state_counts = {"CURRENT": 0, "DUE": 0, "OVERDUE": 0, "MISSING": 0, "TEMPLATE": 0, "INVALID": 0}
|
|
stale_sector_count = 0
|
|
layout_changed_count = 0
|
|
missing_source_url_count = 0
|
|
sheet_input_count = 0
|
|
template_count = 0
|
|
newest_asof: dt.date | None = None
|
|
oldest_asof: dt.date | None = None
|
|
|
|
for sector, sector_rows in grouped.items():
|
|
source_values = {_txt(r.get("Source"), "SHEET_INPUT") or "SHEET_INPUT" for r in sector_rows}
|
|
if "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" in source_values:
|
|
source_kind = "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED"
|
|
elif "NAVER_ETF_PAGE_FAIL" in source_values:
|
|
source_kind = "NAVER_ETF_PAGE_FAIL"
|
|
elif "NAVER_ETF_PAGE" in source_values:
|
|
source_kind = "NAVER_ETF_PAGE"
|
|
elif "REPRESENTATIVE_STOCK_PROXY" in source_values:
|
|
source_kind = "REPRESENTATIVE_STOCK_PROXY"
|
|
elif "DEFAULT_TEMPLATE" in source_values:
|
|
source_kind = "DEFAULT_TEMPLATE"
|
|
elif "SHEET_INPUT" in source_values:
|
|
source_kind = "SHEET_INPUT"
|
|
else:
|
|
source_kind = "OTHER"
|
|
source_kind_counts[source_kind if source_kind in source_kind_counts else "OTHER"] += 1
|
|
|
|
source_urls = [_txt(r.get("Source_URL")) for r in sector_rows if _txt(r.get("Source_URL"))]
|
|
source_url = source_urls[0] if source_urls else ""
|
|
|
|
asof_candidates = [_parse_date(r.get("Source_AsOf")) for r in sector_rows]
|
|
asof_dates = [d for d in asof_candidates if d is not None]
|
|
source_asof = max(asof_dates) if asof_dates else None
|
|
if source_asof is not None:
|
|
newest_asof = source_asof if newest_asof is None else max(newest_asof, source_asof)
|
|
oldest_asof = source_asof if oldest_asof is None else min(oldest_asof, source_asof)
|
|
|
|
age_days = _age_days(source_asof, today) if source_asof else None
|
|
constituent_count = len(sector_rows)
|
|
etf_count = sum(1 for r in sector_rows if str(r.get("Is_ETF") or "").strip().upper() in {"Y", "YES", "TRUE", "1"})
|
|
stock_count = constituent_count - etf_count
|
|
weight_sum = sum(_as_float(r.get("Weight")) or 0 for r in sector_rows)
|
|
status = "INVALID"
|
|
reason_parts: list[str] = []
|
|
transport_mode = "UNKNOWN"
|
|
|
|
if source_kind == "DEFAULT_TEMPLATE":
|
|
status = "TEMPLATE"
|
|
reason_parts.append("DEFAULT_TEMPLATE")
|
|
template_count += 1
|
|
transport_mode = "MANUAL_OR_TEMPLATE"
|
|
elif source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED":
|
|
status = "LAYOUT_CHANGED"
|
|
transport_mode = "LAYOUT_CHANGED"
|
|
reason_parts.append("LAYOUT_CHANGED")
|
|
layout_changed_count += 1
|
|
if not source_url:
|
|
missing_source_url_count += 1
|
|
reason_parts.append("Source_URL_MISSING")
|
|
if age_days is None:
|
|
reason_parts.append("Source_AsOf_MISSING")
|
|
else:
|
|
stale_sector_count += 1
|
|
reason_parts.append(f"AgeDays={age_days}")
|
|
elif source_kind == "NAVER_ETF_PAGE_FAIL":
|
|
status = "INVALID"
|
|
transport_mode = "UNKNOWN"
|
|
reason_parts.append("NAVER_ETF_PAGE_FAIL")
|
|
if not source_url:
|
|
missing_source_url_count += 1
|
|
elif source_kind == "REPRESENTATIVE_STOCK_PROXY":
|
|
transport_mode = "HTML_SERVER_RENDERED"
|
|
if not source_url:
|
|
status = "MISSING"
|
|
missing_source_url_count += 1
|
|
reason_parts.append("Source_URL_MISSING")
|
|
elif age_days is None:
|
|
status = "MISSING"
|
|
reason_parts.append("Source_AsOf_MISSING")
|
|
elif age_days <= DEFAULT_MAX_AGE_DAYS:
|
|
status = "CURRENT"
|
|
elif age_days <= 45:
|
|
status = "DUE"
|
|
stale_sector_count += 1
|
|
reason_parts.append(f"AgeDays={age_days}")
|
|
else:
|
|
status = "OVERDUE"
|
|
stale_sector_count += 1
|
|
reason_parts.append(f"AgeDays={age_days}")
|
|
elif source_kind == "SHEET_INPUT":
|
|
sheet_input_count += 1
|
|
transport_mode = "MANUAL_OR_TEMPLATE"
|
|
if not source_url:
|
|
status = "MISSING"
|
|
reason_parts.append("Source_URL_MISSING")
|
|
missing_source_url_count += 1
|
|
elif age_days is None:
|
|
status = "MISSING"
|
|
reason_parts.append("Source_AsOf_MISSING")
|
|
elif age_days <= DEFAULT_MAX_AGE_DAYS:
|
|
status = "CURRENT"
|
|
elif age_days <= 45:
|
|
status = "DUE"
|
|
stale_sector_count += 1
|
|
reason_parts.append(f"AgeDays={age_days}")
|
|
else:
|
|
status = "OVERDUE"
|
|
stale_sector_count += 1
|
|
reason_parts.append(f"AgeDays={age_days}")
|
|
elif source_kind == "NAVER_ETF_PAGE":
|
|
transport_mode = "HTML_SERVER_RENDERED"
|
|
if not source_url:
|
|
status = "MISSING"
|
|
reason_parts.append("Source_URL_MISSING")
|
|
missing_source_url_count += 1
|
|
elif age_days is None:
|
|
status = "MISSING"
|
|
reason_parts.append("Source_AsOf_MISSING")
|
|
elif age_days <= DEFAULT_MAX_AGE_DAYS:
|
|
status = "CURRENT"
|
|
elif age_days <= 45:
|
|
status = "DUE"
|
|
stale_sector_count += 1
|
|
reason_parts.append(f"AgeDays={age_days}")
|
|
else:
|
|
status = "OVERDUE"
|
|
stale_sector_count += 1
|
|
reason_parts.append(f"AgeDays={age_days}")
|
|
else:
|
|
if not source_url:
|
|
missing_source_url_count += 1
|
|
status = "INVALID"
|
|
reason_parts.append("SOURCE_KIND_UNKNOWN")
|
|
transport_mode = "UNKNOWN"
|
|
|
|
if source_kind == "NAVER_ETF_PAGE" and not source_url:
|
|
reason_parts.append("NAVER_URL_MISSING")
|
|
if not source_url:
|
|
reason_parts.append("Source_URL_MISSING")
|
|
if age_days is not None and age_days < 0:
|
|
reason_parts.append("FUTURE_DATE")
|
|
|
|
transport_mode_counts[transport_mode] = transport_mode_counts.get(transport_mode, 0) + 1
|
|
refresh_reason = ";".join(reason_parts) if reason_parts else "OK"
|
|
detail_rows.append({
|
|
"sector": sector,
|
|
"proxy_ticker": _txt(sector_rows[0].get("Proxy_Ticker")),
|
|
"proxy_name": _txt(sector_rows[0].get("Proxy_Name")),
|
|
"proxy_type": _txt(sector_rows[0].get("Proxy_Type")),
|
|
"source_kind": source_kind,
|
|
"transport_mode": transport_mode,
|
|
"source_url": source_url,
|
|
"source_asof": source_asof.isoformat() if source_asof else "",
|
|
"age_days": age_days if age_days is not None else "",
|
|
"constituent_count": constituent_count,
|
|
"stock_count": stock_count,
|
|
"etf_count": etf_count,
|
|
"weight_sum": round(weight_sum, 4),
|
|
"status": status,
|
|
"refresh_reason": refresh_reason,
|
|
})
|
|
|
|
detail_rows.sort(key=lambda r: (r.get("status") != "CURRENT", r.get("status"), r.get("sector")))
|
|
summary = {
|
|
"sector_count": len(grouped),
|
|
"current_count": sum(1 for r in detail_rows if r.get("status") == "CURRENT"),
|
|
"due_count": sum(1 for r in detail_rows if r.get("status") == "DUE"),
|
|
"overdue_count": sum(1 for r in detail_rows if r.get("status") == "OVERDUE"),
|
|
"missing_count": sum(1 for r in detail_rows if r.get("status") == "MISSING"),
|
|
"template_count": template_count,
|
|
"sheet_input_count": sheet_input_count,
|
|
"naver_source_count": sum(1 for r in detail_rows if r.get("source_kind") == "NAVER_ETF_PAGE"),
|
|
"missing_source_url_count": missing_source_url_count,
|
|
"stale_sector_count": stale_sector_count,
|
|
"layout_changed_count": layout_changed_count,
|
|
"oldest_source_asof": oldest_asof.isoformat() if oldest_asof else "",
|
|
"newest_source_asof": newest_asof.isoformat() if newest_asof else "",
|
|
"source_kind_counts": source_kind_counts,
|
|
"transport_mode_counts": transport_mode_counts,
|
|
"ajax_mode": "NO",
|
|
"transport_model": "HTML_SERVER_RENDERED",
|
|
}
|
|
|
|
gate = "PASS"
|
|
if template_count > 0 or missing_source_url_count > 0 or stale_sector_count > 0 or layout_changed_count > 0:
|
|
gate = "FAIL"
|
|
elif sheet_input_count > 0:
|
|
gate = "WARN"
|
|
|
|
return {
|
|
"formula_id": "sector_universe_refresh_audit_v1",
|
|
"gate": gate,
|
|
"max_age_days": DEFAULT_MAX_AGE_DAYS,
|
|
"summary": summary,
|
|
"rows": detail_rows,
|
|
"source": {
|
|
"sector_rows": len(rows),
|
|
"grouped_sectors": len(grouped),
|
|
},
|
|
}
|