섹터 유니버스 분리와 월간 갱신 정합화
This commit is contained in:
@@ -174,6 +174,28 @@ def normalize_legacy_source_markers(sheet: str, records: list[dict[str, Any]]) -
|
||||
source = record.get("Source")
|
||||
if isinstance(source, str) and "sector_targets.json" in source:
|
||||
record["Source"] = source.replace("sector_targets.json", "sector_universe")
|
||||
source_url = str(record.get("Source_URL") or "").strip()
|
||||
transport_mode = str(record.get("Transport_Mode") or "").strip()
|
||||
if record.get("Source") in (None, "", "DEFAULT_TEMPLATE"):
|
||||
if "finance.naver.com/item/main.naver?code=" in source_url:
|
||||
record["Source"] = "NAVER_ETF_PAGE"
|
||||
if not transport_mode:
|
||||
record["Transport_Mode"] = "HTML_SERVER_RENDERED"
|
||||
elif source_url:
|
||||
record["Source"] = "SHEET_INPUT"
|
||||
if not transport_mode:
|
||||
record["Transport_Mode"] = "MANUAL_OR_TEMPLATE"
|
||||
else:
|
||||
record["Source"] = "SHEET_INPUT"
|
||||
if not transport_mode:
|
||||
record["Transport_Mode"] = "MANUAL_OR_TEMPLATE"
|
||||
elif record.get("Source") == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" and not transport_mode:
|
||||
record["Transport_Mode"] = "LAYOUT_CHANGED"
|
||||
elif record.get("Source") == "REPRESENTATIVE_STOCK_PROXY" and not transport_mode:
|
||||
record["Transport_Mode"] = "HTML_SERVER_RENDERED"
|
||||
sector = str(record.get("Sector") or "").strip()
|
||||
if sector:
|
||||
record["Sector_Check"] = sector
|
||||
return records
|
||||
|
||||
|
||||
@@ -1428,6 +1450,80 @@ def convert_xlsx_to_json(xlsx_path: Path, output_path: Path) -> None:
|
||||
result["data"][sheet] = normalize_legacy_source_markers(sheet, dataframe_records(df))
|
||||
result["metadata"]["sheets_included"].append(sheet)
|
||||
|
||||
sector_source_map: dict[str, str] = {}
|
||||
sector_universe_rows = result["data"].get("sector_universe")
|
||||
if isinstance(sector_universe_rows, list):
|
||||
for row in sector_universe_rows:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
sector = str(row.get("Sector") or "").strip()
|
||||
if not sector:
|
||||
continue
|
||||
source = str(row.get("Source") or "").strip() or "SHEET_INPUT"
|
||||
sector_source_map.setdefault(sector, source)
|
||||
|
||||
sector_flow_rows = result["data"].get("sector_flow")
|
||||
if isinstance(sector_flow_rows, list):
|
||||
split_finance_map = {
|
||||
"금융/은행": [
|
||||
("은행", "091170", "KODEX 은행"),
|
||||
("증권", "0111J0", "HANARO 증권고배당TOP3플러스"),
|
||||
("지주회사", "307520", "TIGER 지주회사"),
|
||||
]
|
||||
}
|
||||
normalized_rows: list[dict[str, Any]] = []
|
||||
for row in sector_flow_rows:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
sector = str(row.get("Sector") or "").strip()
|
||||
if not sector:
|
||||
continue
|
||||
source = str(row.get("Universe_Source") or "").strip() or sector_source_map.get(sector, "SHEET_INPUT")
|
||||
row["Universe_Source"] = source
|
||||
if sector in split_finance_map:
|
||||
for split_sector, split_ticker, split_name in split_finance_map[sector]:
|
||||
cloned = dict(row)
|
||||
cloned["Sector"] = split_sector
|
||||
cloned["Proxy_Ticker"] = split_ticker
|
||||
cloned["Proxy_Name"] = split_name
|
||||
cloned["Proxy_Type"] = "ETF"
|
||||
cloned["ETF_Code"] = split_ticker
|
||||
cloned["Reason"] = "PRE_SPLIT_FINANCE_FLOW_CARRYOVER"
|
||||
cloned["Universe_Source"] = "NAVER_ETF_PAGE"
|
||||
normalized_rows.append(cloned)
|
||||
else:
|
||||
normalized_rows.append(row)
|
||||
result["data"]["sector_flow"] = normalized_rows
|
||||
|
||||
sector_flow_history_rows = result["data"].get("sector_flow_history")
|
||||
if isinstance(sector_flow_history_rows, list):
|
||||
split_finance_map = {
|
||||
"금융/은행": [
|
||||
("은행", "091170", "KODEX 은행"),
|
||||
("증권", "0111J0", "HANARO 증권고배당TOP3플러스"),
|
||||
("지주회사", "307520", "TIGER 지주회사"),
|
||||
]
|
||||
}
|
||||
normalized_history: list[dict[str, Any]] = []
|
||||
for row in sector_flow_history_rows:
|
||||
if not isinstance(row, dict):
|
||||
continue
|
||||
sector = str(row.get("Sector") or "").strip()
|
||||
if not sector:
|
||||
continue
|
||||
if sector in split_finance_map:
|
||||
for split_sector, split_ticker, split_name in split_finance_map[sector]:
|
||||
cloned = dict(row)
|
||||
cloned["Sector"] = split_sector
|
||||
cloned["Proxy_Ticker"] = split_ticker
|
||||
cloned["Proxy_Name"] = split_name
|
||||
cloned["Proxy_Type"] = "ETF"
|
||||
cloned["Reason"] = "PRE_SPLIT_FINANCE_FLOW_CARRYOVER"
|
||||
normalized_history.append(cloned)
|
||||
else:
|
||||
normalized_history.append(row)
|
||||
result["data"]["sector_flow_history"] = normalized_history
|
||||
|
||||
# harness_context 시트가 없으면 메타에 경고 기록
|
||||
if "_harness_context" not in result["data"]:
|
||||
result["metadata"]["harness_context_missing"] = (
|
||||
|
||||
@@ -462,7 +462,8 @@ _TICKER_SECTOR_MAP = {
|
||||
"010120": "AI전력", "267260": "AI전력",
|
||||
"012450": "방산", "064350": "방산",
|
||||
"329180": "조선", "494670": "조선",
|
||||
"028050": "건설/EPC",
|
||||
"117700": "건설", "028050": "플랜트/EPC", "454320": "플랜트/EPC",
|
||||
"0190C0": "로보틱스",
|
||||
"005380": "자동차", "000270": "자동차",
|
||||
"091160": "반도체", "0117V0": "AI전력",
|
||||
}
|
||||
|
||||
@@ -0,0 +1,296 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import datetime as dt
|
||||
from typing import Any
|
||||
|
||||
|
||||
DEFAULT_MAX_AGE_DAYS = 31
|
||||
|
||||
|
||||
def _txt(value: Any, default: str = "") -> str:
|
||||
if value is None:
|
||||
return default
|
||||
if isinstance(value, str):
|
||||
return value.strip() or default
|
||||
return str(value).strip() or default
|
||||
|
||||
|
||||
def _as_float(value: Any) -> float | None:
|
||||
try:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
if isinstance(value, str):
|
||||
text = value.strip().replace("%", "").replace(",", "")
|
||||
if not text:
|
||||
return None
|
||||
return float(text)
|
||||
return float(value)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _parse_date(value: Any) -> dt.date | None:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
if isinstance(value, dt.date):
|
||||
return value
|
||||
text = _txt(value)
|
||||
if not text:
|
||||
return None
|
||||
for fmt in ("%Y-%m-%d", "%Y.%m.%d", "%Y/%m/%d"):
|
||||
try:
|
||||
return dt.datetime.strptime(text[:10], fmt).date()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
return dt.date.fromisoformat(text[:10])
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _age_days(value: Any, today: dt.date | None = None) -> int | None:
|
||||
parsed = _parse_date(value)
|
||||
if parsed is None:
|
||||
return None
|
||||
today = today or dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date()
|
||||
return (today - parsed).days
|
||||
|
||||
|
||||
def _extract_sector_rows(payload: dict[str, Any] | None) -> list[dict[str, Any]]:
|
||||
if not isinstance(payload, dict):
|
||||
return []
|
||||
inner = payload.get("data")
|
||||
if isinstance(inner, dict) and isinstance(inner.get("sector_universe"), list):
|
||||
return [r for r in inner["sector_universe"] if isinstance(r, dict)]
|
||||
if isinstance(payload.get("sector_universe"), list):
|
||||
return [r for r in payload["sector_universe"] if isinstance(r, dict)]
|
||||
return []
|
||||
|
||||
|
||||
def build_sector_universe_refresh_audit(payload: dict[str, Any] | None) -> dict[str, Any]:
|
||||
rows = _extract_sector_rows(payload)
|
||||
today = dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date()
|
||||
|
||||
grouped: dict[str, list[dict[str, Any]]] = {}
|
||||
for row in rows:
|
||||
sector = _txt(row.get("Sector"))
|
||||
if not sector:
|
||||
continue
|
||||
grouped.setdefault(sector, []).append(row)
|
||||
|
||||
detail_rows: list[dict[str, Any]] = []
|
||||
source_kind_counts = {
|
||||
"NAVER_ETF_PAGE": 0,
|
||||
"NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED": 0,
|
||||
"NAVER_ETF_PAGE_FAIL": 0,
|
||||
"REPRESENTATIVE_STOCK_PROXY": 0,
|
||||
"SHEET_INPUT": 0,
|
||||
"DEFAULT_TEMPLATE": 0,
|
||||
"OTHER": 0,
|
||||
}
|
||||
transport_mode_counts = {
|
||||
"HTML_SERVER_RENDERED": 0,
|
||||
"MANUAL_OR_TEMPLATE": 0,
|
||||
"LAYOUT_CHANGED": 0,
|
||||
"UNKNOWN": 0,
|
||||
}
|
||||
state_counts = {"CURRENT": 0, "DUE": 0, "OVERDUE": 0, "MISSING": 0, "TEMPLATE": 0, "INVALID": 0}
|
||||
stale_sector_count = 0
|
||||
layout_changed_count = 0
|
||||
missing_source_url_count = 0
|
||||
sheet_input_count = 0
|
||||
template_count = 0
|
||||
newest_asof: dt.date | None = None
|
||||
oldest_asof: dt.date | None = None
|
||||
|
||||
for sector, sector_rows in grouped.items():
|
||||
source_values = {_txt(r.get("Source"), "SHEET_INPUT") or "SHEET_INPUT" for r in sector_rows}
|
||||
if "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" in source_values:
|
||||
source_kind = "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED"
|
||||
elif "NAVER_ETF_PAGE_FAIL" in source_values:
|
||||
source_kind = "NAVER_ETF_PAGE_FAIL"
|
||||
elif "NAVER_ETF_PAGE" in source_values:
|
||||
source_kind = "NAVER_ETF_PAGE"
|
||||
elif "REPRESENTATIVE_STOCK_PROXY" in source_values:
|
||||
source_kind = "REPRESENTATIVE_STOCK_PROXY"
|
||||
elif "DEFAULT_TEMPLATE" in source_values:
|
||||
source_kind = "DEFAULT_TEMPLATE"
|
||||
elif "SHEET_INPUT" in source_values:
|
||||
source_kind = "SHEET_INPUT"
|
||||
else:
|
||||
source_kind = "OTHER"
|
||||
source_kind_counts[source_kind if source_kind in source_kind_counts else "OTHER"] += 1
|
||||
|
||||
source_urls = [_txt(r.get("Source_URL")) for r in sector_rows if _txt(r.get("Source_URL"))]
|
||||
source_url = source_urls[0] if source_urls else ""
|
||||
|
||||
asof_candidates = [_parse_date(r.get("Source_AsOf")) for r in sector_rows]
|
||||
asof_dates = [d for d in asof_candidates if d is not None]
|
||||
source_asof = max(asof_dates) if asof_dates else None
|
||||
if source_asof is not None:
|
||||
newest_asof = source_asof if newest_asof is None else max(newest_asof, source_asof)
|
||||
oldest_asof = source_asof if oldest_asof is None else min(oldest_asof, source_asof)
|
||||
|
||||
age_days = _age_days(source_asof, today) if source_asof else None
|
||||
constituent_count = len(sector_rows)
|
||||
etf_count = sum(1 for r in sector_rows if str(r.get("Is_ETF") or "").strip().upper() in {"Y", "YES", "TRUE", "1"})
|
||||
stock_count = constituent_count - etf_count
|
||||
weight_sum = sum(_as_float(r.get("Weight")) or 0 for r in sector_rows)
|
||||
status = "INVALID"
|
||||
reason_parts: list[str] = []
|
||||
transport_mode = "UNKNOWN"
|
||||
|
||||
if source_kind == "DEFAULT_TEMPLATE":
|
||||
status = "TEMPLATE"
|
||||
reason_parts.append("DEFAULT_TEMPLATE")
|
||||
template_count += 1
|
||||
transport_mode = "MANUAL_OR_TEMPLATE"
|
||||
elif source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED":
|
||||
status = "LAYOUT_CHANGED"
|
||||
transport_mode = "LAYOUT_CHANGED"
|
||||
reason_parts.append("LAYOUT_CHANGED")
|
||||
layout_changed_count += 1
|
||||
if not source_url:
|
||||
missing_source_url_count += 1
|
||||
reason_parts.append("Source_URL_MISSING")
|
||||
if age_days is None:
|
||||
reason_parts.append("Source_AsOf_MISSING")
|
||||
else:
|
||||
stale_sector_count += 1
|
||||
reason_parts.append(f"AgeDays={age_days}")
|
||||
elif source_kind == "NAVER_ETF_PAGE_FAIL":
|
||||
status = "INVALID"
|
||||
transport_mode = "UNKNOWN"
|
||||
reason_parts.append("NAVER_ETF_PAGE_FAIL")
|
||||
if not source_url:
|
||||
missing_source_url_count += 1
|
||||
elif source_kind == "REPRESENTATIVE_STOCK_PROXY":
|
||||
transport_mode = "HTML_SERVER_RENDERED"
|
||||
if not source_url:
|
||||
status = "MISSING"
|
||||
missing_source_url_count += 1
|
||||
reason_parts.append("Source_URL_MISSING")
|
||||
elif age_days is None:
|
||||
status = "MISSING"
|
||||
reason_parts.append("Source_AsOf_MISSING")
|
||||
elif age_days <= DEFAULT_MAX_AGE_DAYS:
|
||||
status = "CURRENT"
|
||||
elif age_days <= 45:
|
||||
status = "DUE"
|
||||
stale_sector_count += 1
|
||||
reason_parts.append(f"AgeDays={age_days}")
|
||||
else:
|
||||
status = "OVERDUE"
|
||||
stale_sector_count += 1
|
||||
reason_parts.append(f"AgeDays={age_days}")
|
||||
elif source_kind == "SHEET_INPUT":
|
||||
sheet_input_count += 1
|
||||
transport_mode = "MANUAL_OR_TEMPLATE"
|
||||
if not source_url:
|
||||
status = "MISSING"
|
||||
reason_parts.append("Source_URL_MISSING")
|
||||
missing_source_url_count += 1
|
||||
elif age_days is None:
|
||||
status = "MISSING"
|
||||
reason_parts.append("Source_AsOf_MISSING")
|
||||
elif age_days <= DEFAULT_MAX_AGE_DAYS:
|
||||
status = "CURRENT"
|
||||
elif age_days <= 45:
|
||||
status = "DUE"
|
||||
stale_sector_count += 1
|
||||
reason_parts.append(f"AgeDays={age_days}")
|
||||
else:
|
||||
status = "OVERDUE"
|
||||
stale_sector_count += 1
|
||||
reason_parts.append(f"AgeDays={age_days}")
|
||||
elif source_kind == "NAVER_ETF_PAGE":
|
||||
transport_mode = "HTML_SERVER_RENDERED"
|
||||
if not source_url:
|
||||
status = "MISSING"
|
||||
reason_parts.append("Source_URL_MISSING")
|
||||
missing_source_url_count += 1
|
||||
elif age_days is None:
|
||||
status = "MISSING"
|
||||
reason_parts.append("Source_AsOf_MISSING")
|
||||
elif age_days <= DEFAULT_MAX_AGE_DAYS:
|
||||
status = "CURRENT"
|
||||
elif age_days <= 45:
|
||||
status = "DUE"
|
||||
stale_sector_count += 1
|
||||
reason_parts.append(f"AgeDays={age_days}")
|
||||
else:
|
||||
status = "OVERDUE"
|
||||
stale_sector_count += 1
|
||||
reason_parts.append(f"AgeDays={age_days}")
|
||||
else:
|
||||
if not source_url:
|
||||
missing_source_url_count += 1
|
||||
status = "INVALID"
|
||||
reason_parts.append("SOURCE_KIND_UNKNOWN")
|
||||
transport_mode = "UNKNOWN"
|
||||
|
||||
if source_kind == "NAVER_ETF_PAGE" and not source_url:
|
||||
reason_parts.append("NAVER_URL_MISSING")
|
||||
if not source_url:
|
||||
reason_parts.append("Source_URL_MISSING")
|
||||
if age_days is not None and age_days < 0:
|
||||
reason_parts.append("FUTURE_DATE")
|
||||
|
||||
transport_mode_counts[transport_mode] = transport_mode_counts.get(transport_mode, 0) + 1
|
||||
refresh_reason = ";".join(reason_parts) if reason_parts else "OK"
|
||||
detail_rows.append({
|
||||
"sector": sector,
|
||||
"proxy_ticker": _txt(sector_rows[0].get("Proxy_Ticker")),
|
||||
"proxy_name": _txt(sector_rows[0].get("Proxy_Name")),
|
||||
"proxy_type": _txt(sector_rows[0].get("Proxy_Type")),
|
||||
"source_kind": source_kind,
|
||||
"transport_mode": transport_mode,
|
||||
"source_url": source_url,
|
||||
"source_asof": source_asof.isoformat() if source_asof else "",
|
||||
"age_days": age_days if age_days is not None else "",
|
||||
"constituent_count": constituent_count,
|
||||
"stock_count": stock_count,
|
||||
"etf_count": etf_count,
|
||||
"weight_sum": round(weight_sum, 4),
|
||||
"status": status,
|
||||
"refresh_reason": refresh_reason,
|
||||
})
|
||||
|
||||
detail_rows.sort(key=lambda r: (r.get("status") != "CURRENT", r.get("status"), r.get("sector")))
|
||||
summary = {
|
||||
"sector_count": len(grouped),
|
||||
"current_count": sum(1 for r in detail_rows if r.get("status") == "CURRENT"),
|
||||
"due_count": sum(1 for r in detail_rows if r.get("status") == "DUE"),
|
||||
"overdue_count": sum(1 for r in detail_rows if r.get("status") == "OVERDUE"),
|
||||
"missing_count": sum(1 for r in detail_rows if r.get("status") == "MISSING"),
|
||||
"template_count": template_count,
|
||||
"sheet_input_count": sheet_input_count,
|
||||
"naver_source_count": sum(1 for r in detail_rows if r.get("source_kind") == "NAVER_ETF_PAGE"),
|
||||
"missing_source_url_count": missing_source_url_count,
|
||||
"stale_sector_count": stale_sector_count,
|
||||
"layout_changed_count": layout_changed_count,
|
||||
"oldest_source_asof": oldest_asof.isoformat() if oldest_asof else "",
|
||||
"newest_source_asof": newest_asof.isoformat() if newest_asof else "",
|
||||
"source_kind_counts": source_kind_counts,
|
||||
"transport_mode_counts": transport_mode_counts,
|
||||
"ajax_mode": "NO",
|
||||
"transport_model": "HTML_SERVER_RENDERED",
|
||||
}
|
||||
|
||||
gate = "PASS"
|
||||
if template_count > 0 or missing_source_url_count > 0 or stale_sector_count > 0 or layout_changed_count > 0:
|
||||
gate = "FAIL"
|
||||
elif sheet_input_count > 0:
|
||||
gate = "WARN"
|
||||
|
||||
return {
|
||||
"formula_id": "sector_universe_refresh_audit_v1",
|
||||
"gate": gate,
|
||||
"max_age_days": DEFAULT_MAX_AGE_DAYS,
|
||||
"summary": summary,
|
||||
"rows": detail_rows,
|
||||
"source": {
|
||||
"sector_rows": len(rows),
|
||||
"grouped_sectors": len(grouped),
|
||||
},
|
||||
}
|
||||
Reference in New Issue
Block a user