from __future__ import annotations import json from collections import Counter, defaultdict from datetime import datetime from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[2] def _parse_jsonish(value: Any) -> Any: if isinstance(value, (dict, list)): return value if isinstance(value, str) and value.strip(): try: return json.loads(value) except Exception: return value return value def _load_payload(payload: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]: data = payload.get("data") if isinstance(payload.get("data"), dict) else {} hctx = data.get("_harness_context") if isinstance(data.get("_harness_context"), dict) else {} return data, hctx def _num(value: Any, default: float = 0.0) -> float: try: return float(value) except Exception: return default def _txt(value: Any, default: str = "") -> str: if value is None: return default text = str(value).strip() return text if text else default def _latest_dates(history: list[dict[str, Any]]) -> tuple[str | None, str | None]: dates = sorted({str(row.get("Snapshot_Date") or "") for row in history if str(row.get("Snapshot_Date") or "")}) if not dates: return None, None latest = dates[-1] previous = dates[-2] if len(dates) >= 2 else None return latest, previous def _rows_by_date(history: list[dict[str, Any]], snapshot_date: str | None) -> dict[str, dict[str, Any]]: if not snapshot_date: return {} rows = {} for row in history: if str(row.get("Snapshot_Date") or "") != snapshot_date: continue sector = str(row.get("Sector") or "").strip() if sector: rows[sector] = row return rows def _trend_state(momentum: dict[str, Any], row: dict[str, Any], prev_row: dict[str, Any] | None) -> str: state = str(momentum.get("momentum_state") or "").upper() if state in {"RISING", "FADING", "TOPPING_OUT", "STABLE"}: return state rank = momentum.get("rank") prev_rank = momentum.get("prev_rank_w1") or momentum.get("prevRank") or momentum.get("rank_w1") delta = None if isinstance(rank, (int, float)) and isinstance(prev_rank, (int, float)): delta = prev_rank - rank if delta is None and prev_row is not None: try: delta = _num(prev_row.get("Sector_Score")) - _num(row.get("Sector_Score")) except Exception: delta = None if delta is not None: if delta >= 2: return "RISING" if delta <= -2: return "FADING" breadth = _num(row.get("Flow_Breadth_5D"), 0.0) if breadth >= 0.6: return "RISING" if breadth <= -0.6: return "FADING" return "STABLE" def _direction_from_flow(value: float, threshold: float = 0.0) -> str: if value > threshold: return "INFLOW" if value < -threshold: return "OUTFLOW" return "NEUTRAL" def _alignment_state(smart_money_direction: str, breadth: float, etf_return_5d: float) -> str: if smart_money_direction == "INFLOW" and breadth > 0 and etf_return_5d >= 0: return "ALIGNED_POSITIVE" if smart_money_direction == "OUTFLOW" and breadth < 0 and etf_return_5d <= 0: return "ALIGNED_NEGATIVE" if smart_money_direction in {"INFLOW", "OUTFLOW"} and abs(breadth) >= 0.5: return "FLOW_CONFIRMING" if smart_money_direction == "NEUTRAL" and abs(breadth) < 0.5: return "MIXED" return "DIVERGING" def _build_timeline(sector_history: list[dict[str, Any]]) -> list[dict[str, Any]]: by_date: dict[str, list[dict[str, Any]]] = defaultdict(list) for row in sector_history: snapshot_date = _txt(row.get("Snapshot_Date")) if snapshot_date: by_date[snapshot_date].append(row) timeline: list[dict[str, Any]] = [] for snapshot_date in sorted(by_date): rows = by_date[snapshot_date] top = max(rows, key=lambda r: _num(r.get("Sector_Score"), 0.0)) if rows else {} total_smart_money = sum(_num(r.get("SmartMoney_5D_KRW"), 0.0) for r in rows) avg_score = round(sum(_num(r.get("Sector_Score"), 0.0) for r in rows) / len(rows), 2) if rows else 0.0 positive_breadth = sum(1 for r in rows if _num(r.get("Flow_Breadth_5D"), 0.0) > 0) liquidity_warn = sum(1 for r in rows if _txt(r.get("ETF_Liquidity_Status"), "UNKNOWN") in {"WARN", "RISK", "BLOCK"}) timeline.append({ "snapshot_date": snapshot_date, "sector_count": len(rows), "avg_sector_score": avg_score, "top_sector": _txt(top.get("Sector")), "top_sector_score": top.get("Sector_Score", ""), "top_sector_rank": top.get("Sector_Rank", ""), "top_sector_smart_money_5d_krw": top.get("SmartMoney_5D_KRW", ""), "positive_breadth_count": positive_breadth, "liquidity_warn_count": liquidity_warn, "net_smart_money_5d_krw": round(total_smart_money, 2), }) return timeline def build_sector_trend_analysis(payload: dict[str, Any]) -> dict[str, Any]: data, hctx = _load_payload(payload) sector_flow = data.get("sector_flow") if isinstance(data.get("sector_flow"), list) else [] sector_history = data.get("sector_flow_history") if isinstance(data.get("sector_flow_history"), list) else [] sector_flow = [r for r in sector_flow if isinstance(r, dict)] sector_history = [r for r in sector_history if isinstance(r, dict)] rotation_rows = _parse_jsonish(hctx.get("sector_rotation_momentum_json")) if not isinstance(rotation_rows, list): rotation_rows = [] concentration_rows = _parse_jsonish(hctx.get("sector_concentration_json")) if not isinstance(concentration_rows, list): concentration_rows = [] momentum_map: dict[str, dict[str, Any]] = {} for row in rotation_rows: if isinstance(row, dict): sec = str(row.get("sector") or "").strip() if sec: momentum_map[sec] = row concentration_map: dict[str, dict[str, Any]] = {} for row in concentration_rows: if isinstance(row, dict): sec = str(row.get("sector") or "").strip() if sec: concentration_map[sec] = row latest_date, previous_date = _latest_dates(sector_history) latest_rows = _rows_by_date(sector_history, latest_date) prev_rows = _rows_by_date(sector_history, previous_date) timeline = _build_timeline(sector_history) rows: list[dict[str, Any]] = [] for row in sorted(sector_flow, key=lambda r: (_num(r.get("Sector_Rank"), 999), -abs(_num(r.get("SmartMoney_5D_KRW"), 0.0)))): sector = str(row.get("Sector") or "").strip() if not sector: continue hist_latest = latest_rows.get(sector, {}) hist_prev = prev_rows.get(sector) mom = momentum_map.get(sector, {}) conc = concentration_map.get(sector, {}) proxy_ticker = _txt(row.get("Proxy_Ticker")) proxy_name = _txt(row.get("Proxy_Name")) proxy_type = _txt(row.get("Proxy_Type"), "UNKNOWN") etf_code = _txt(row.get("ETF_Code"), proxy_ticker) etf_execution_use = _txt(row.get("ETF_Execution_Use")) etf_liquidity_status = _txt(row.get("ETF_Liquidity_Status"), "UNKNOWN") etf_nav_risk = _txt(row.get("ETF_NAV_Risk"), "UNKNOWN") etf_liquidity_score = row.get("ETF_Liquidity_Score", "") data_quality = _txt(row.get("Data_Quality")) stale_count = int(_num(row.get("Stale_Count"), 0.0)) smart_money_5d_krw = _num(row.get("SmartMoney_5D_KRW"), 0.0) smart_money_20d_krw = _num(row.get("SmartMoney_20D_KRW"), 0.0) smart_money_5d_norm = _num(row.get("SmartMoney_5D_Norm"), 0.0) smart_money_20d_norm = _num(row.get("SmartMoney_20D_Norm"), 0.0) flow_breadth_5d = _num(row.get("Flow_Breadth_5D"), 0.0) etf_ret5d = _num(row.get("ETF_Ret5D"), 0.0) etf_ret20d = _num(row.get("ETF_Ret20D"), 0.0) rank = _num(hist_latest.get("Sector_Rank") if hist_latest else row.get("Sector_Rank"), 0) prev_rank_w1 = _num(mom.get("prev_rank_w1") or mom.get("prevRank") or (hist_prev.get("Sector_Rank") if hist_prev else None), 0) prev_rank_w2 = _num(mom.get("prev_rank_w2") or mom.get("prevRankW2"), 0) current_score = _num(hist_latest.get("Sector_Score") if hist_latest else row.get("Sector_Score"), 0) prev_score = _num(hist_prev.get("Sector_Score") if hist_prev else None, 0) state = _trend_state(mom, row, hist_prev) proxy_confidence = "HIGH" if proxy_type != "ETF": proxy_confidence = "MEDIUM" if etf_liquidity_status in {"WARN", "RISK", "BLOCK"} or etf_nav_risk not in {"", "OK", "NONE", "NAV_DATA_OK"}: proxy_confidence = "LOW" if proxy_confidence == "MEDIUM" else "MEDIUM" if stale_count > 0 or data_quality not in {"A", "AA", "AAA"}: proxy_confidence = "LOW" smart_money_direction = _direction_from_flow(smart_money_5d_krw) liquidity_direction = "FLOW_EXPANSION" if flow_breadth_5d >= 0.5 and smart_money_5d_krw > 0 else ( "FLOW_DECAY" if flow_breadth_5d <= -0.5 and smart_money_5d_krw < 0 else "FLOW_MIXED" ) alignment_state = _alignment_state(smart_money_direction, flow_breadth_5d, etf_ret5d) rows.append({ "sector": sector, "proxy_ticker": proxy_ticker, "proxy_name": proxy_name, "proxy_type": proxy_type, "etf_code": etf_code, "etf_execution_use": etf_execution_use, "etf_liquidity_score": etf_liquidity_score, "etf_liquidity_status": etf_liquidity_status, "etf_nav_risk": etf_nav_risk, "proxy_confidence": proxy_confidence, "rank": int(rank) if rank else row.get("Sector_Rank"), "prev_rank_w1": int(prev_rank_w1) if prev_rank_w1 else mom.get("prev_rank_w1", mom.get("prevRank", "")), "prev_rank_w2": int(prev_rank_w2) if prev_rank_w2 else mom.get("prev_rank_w2", mom.get("prevRankW2", "")), "rank_delta_w1": mom.get("rank_delta_w1", (int(prev_rank_w1) - int(rank)) if prev_rank_w1 and rank else ""), "rank_delta_w2": mom.get("rank_delta_w2", (int(prev_rank_w2) - int(rank)) if prev_rank_w2 and rank else ""), "sector_score": current_score if current_score else row.get("Sector_Score", ""), "score_delta": round(current_score - prev_score, 2) if prev_score else "", "sector_ret5d": row.get("Sector_Ret5D", ""), "sector_ret20d": row.get("Sector_Ret20D", ""), "smart_money_5d_krw": row.get("SmartMoney_5D_KRW", ""), "smart_money_20d_krw": row.get("SmartMoney_20D_KRW", ""), "flow_breadth_5d": row.get("Flow_Breadth_5D", ""), "alert_level": row.get("Alert_Level", ""), "decision_use": row.get("Decision_Use", ""), "data_quality": data_quality, "stale_count": stale_count, "smart_money_direction": smart_money_direction, "liquidity_direction": liquidity_direction, "flow_alignment_state": alignment_state, "momentum_state": state, "concentration_weight_pct": conc.get("weight_pct", row.get("Coverage_Weight", "")), "etf_return_5d": row.get("ETF_Ret5D", ""), "etf_return_10d": row.get("ETF_Ret10D", ""), "etf_return_20d": row.get("ETF_Ret20D", ""), "sector_etf_ret_gap_5d": round(_num(row.get("Sector_Ret5D"), 0.0) - etf_ret5d, 2), "sector_etf_ret_gap_20d": round(_num(row.get("Sector_Ret20D"), 0.0) - etf_ret20d, 2), "smart_money_5d_norm": smart_money_5d_norm, "smart_money_20d_norm": smart_money_20d_norm, "smart_money_5d_krw_raw": smart_money_5d_krw, "smart_money_20d_krw_raw": smart_money_20d_krw, "flow_breadth_5d_raw": flow_breadth_5d, }) def _take_top(items: list[dict[str, Any]], key: str, reverse: bool = True, n: int = 3) -> list[str]: ranked = sorted( [r for r in items if isinstance(r.get(key), (int, float))], key=lambda r: r.get(key, 0), reverse=reverse, ) return [str(r.get("sector") or "") for r in ranked[:n] if str(r.get("sector") or "")] rising = sum(1 for r in rows if r.get("momentum_state") == "RISING") fading = sum(1 for r in rows if r.get("momentum_state") == "FADING") stable = sum(1 for r in rows if r.get("momentum_state") == "STABLE") topping = sum(1 for r in rows if r.get("momentum_state") == "TOPPING_OUT") breadth_positive = sum(1 for r in rows if _num(r.get("flow_breadth_5d"), 0.0) > 0) etf_proxy_count = sum(1 for r in rows if str(r.get("proxy_type") or "").upper() == "ETF") liquidity_warn_count = sum(1 for r in rows if str(r.get("etf_liquidity_status") or "").upper() in {"WARN", "RISK", "BLOCK"}) nav_risk_count = sum(1 for r in rows if str(r.get("etf_nav_risk") or "").upper() not in {"", "OK", "NONE", "NAV_DATA_OK"}) low_confidence_count = sum(1 for r in rows if str(r.get("proxy_confidence") or "").upper() == "LOW") smart_money_inflow_count = sum(1 for r in rows if str(r.get("smart_money_direction") or "") == "INFLOW") smart_money_outflow_count = sum(1 for r in rows if str(r.get("smart_money_direction") or "") == "OUTFLOW") flow_aligned_count = sum(1 for r in rows if str(r.get("flow_alignment_state") or "").startswith("ALIGNED")) flow_diverging_count = sum(1 for r in rows if str(r.get("flow_alignment_state") or "") == "DIVERGING") top_inflow = _take_top(rows, "smart_money_5d_krw", True, 3) outflow_warning = [ r["sector"] for r in sorted(rows, key=lambda r: _num(r.get("smart_money_5d_krw"), 0.0)) if _num(r.get("smart_money_5d_krw"), 0.0) < 0 or str(r.get("alert_level") or "").upper().startswith("OUTFLOW") ][:3] strong_smart_money = [ r["sector"] for r in sorted(rows, key=lambda r: _num(r.get("smart_money_5d_krw"), 0.0), reverse=True) if _num(r.get("smart_money_5d_krw"), 0.0) > 0 and _num(r.get("flow_breadth_5d"), 0.0) >= 0 ][:3] conc_rows_sorted = sorted(concentration_rows, key=lambda r: _num(r.get("weight_pct"), 0.0), reverse=True) top_sector = conc_rows_sorted[0] if conc_rows_sorted else {} top2_sum = round(sum(_num(r.get("weight_pct"), 0.0) for r in conc_rows_sorted[:2]), 2) if conc_rows_sorted else 0.0 top1_weight = round(_num(top_sector.get("weight_pct"), 0.0), 2) if top_sector else 0.0 if fading > rising and top1_weight >= 60: posture = "DEFENSIVE_CONCENTRATED" elif liquidity_warn_count >= max(1, len(rows) // 3) or nav_risk_count >= max(1, len(rows) // 4): posture = "ETF_PROXY_RISK" elif rising >= fading and breadth_positive >= max(1, len(rows) // 2): posture = "RISK_ON_ROTATION" elif smart_money_inflow_count > smart_money_outflow_count and flow_aligned_count >= max(1, len(rows) // 3): posture = "SMART_MONEY_CONFIRMED" else: posture = "BALANCED_ROTATION" gate = "PASS" if rows else "DATA_MISSING" if not latest_date: gate = "WARN" result = { "formula_id": "SECTOR_TREND_ANALYSIS_V1", "gate": gate, "latest_snapshot_date": latest_date, "previous_snapshot_date": previous_date, "sector_count": len(rows), "summary": { "rising_count": rising, "fading_count": fading, "stable_count": stable, "topping_out_count": topping, "positive_breadth_count": breadth_positive, "etf_proxy_count": etf_proxy_count, "liquidity_warn_count": liquidity_warn_count, "nav_risk_count": nav_risk_count, "low_proxy_confidence_count": low_confidence_count, "smart_money_inflow_count": smart_money_inflow_count, "smart_money_outflow_count": smart_money_outflow_count, "flow_aligned_count": flow_aligned_count, "flow_diverging_count": flow_diverging_count, "top_inflow_sectors": top_inflow, "outflow_warning_sectors": outflow_warning, "strong_smart_money_sectors": strong_smart_money, "trend_posture": posture, }, "concentration": { "top_sector": top_sector.get("sector", ""), "top_sector_weight_pct": top1_weight, "top2_weight_pct": top2_sum, "concentration_gate": top_sector.get("gate", ""), }, "rows": rows, "timeline": timeline, "source": { "sector_flow_rows": len(sector_flow), "sector_flow_history_rows": len(sector_history), "sector_rotation_momentum_rows": len(rotation_rows), "sector_concentration_rows": len(concentration_rows), "proxy_coverage_pct": round((etf_proxy_count / len(rows)) * 100.0, 2) if rows else 0.0, }, } return result