from __future__ import annotations import argparse import json from datetime import datetime, timezone from pathlib import Path from typing import Any import yaml ROOT = Path(__file__).resolve().parents[1] DEFAULT_JSON = ROOT / "GatherTradingData.json" DEFAULT_OUT = ROOT / "Temp" / "data_integrity_score_v1.json" DEFAULT_POLICY = ROOT / "spec" / "strategy_execution_lock_policy.yaml" def _load(path: Path) -> dict[str, Any]: data = json.loads(path.read_text(encoding="utf-8")) return data if isinstance(data, dict) else {} def _rows(v: Any) -> list[dict[str, Any]]: if isinstance(v, list): return [x for x in v if isinstance(x, dict)] return [] def _load_policy(path: Path) -> dict[str, Any]: if not path.exists(): return {} try: payload = yaml.safe_load(path.read_text(encoding="utf-8")) except Exception: return {} root = payload.get("strategy_execution_lock_policy") if isinstance(payload, dict) else {} obj = root.get("data_integrity_score_v1") if isinstance(root, dict) else {} return obj if isinstance(obj, dict) else {} def _is_placeholder(v: Any, placeholder_tokens: set[Any]) -> bool: if v is None: return None in placeholder_tokens if isinstance(v, str): return v.strip() in placeholder_tokens return False def _is_allowed_tp_stale(row: dict[str, Any], field: str, val: Any) -> bool: if field == "tp1_price" and val is None: return str(row.get("tp1_state") or "").upper() in { "TP1_ALREADY_TRIGGERED", "DEFERRED_SECULAR_LEADER", "DEFERRED_SECULAR_LEADER_OVERHEAT_PENDING", "TRAILING_STOP_PRIORITY_SECULAR_LEADER", } if field == "tp2_price" and val is None: return str(row.get("tp2_state") or "").upper() in {"TP2_ALREADY_TRIGGERED"} return False def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--json", default=str(DEFAULT_JSON)) ap.add_argument("--out", default=str(DEFAULT_OUT)) ap.add_argument("--policy", default=str(DEFAULT_POLICY)) args = ap.parse_args() json_path = Path(args.json) out_path = Path(args.out) policy_path = Path(args.policy) if not json_path.is_absolute(): json_path = ROOT / json_path if not out_path.is_absolute(): out_path = ROOT / out_path if not policy_path.is_absolute(): policy_path = ROOT / policy_path payload = _load(json_path) policy = _load_policy(policy_path) data = payload.get("data") if isinstance(payload.get("data"), dict) else {} h = data.get("_harness_context") if isinstance(data.get("_harness_context"), dict) else {} required_sheets = policy.get("required_sheets") if isinstance(policy.get("required_sheets"), list) else ["data_feed", "sector_flow", "macro", "event_risk", "core_satellite", "sell_priority"] present = sum(1 for s in required_sheets if isinstance(data.get(s), list) and len(data.get(s)) > 0) sheet_completeness = present / len(required_sheets) * 100.0 bp = _rows(h.get("order_blueprint_json")) prices = _rows(h.get("prices_json")) price_keys = {str(r.get("ticker") or "") for r in prices} bp_keys = {str(r.get("ticker") or "") for r in bp} cross_mismatch = len([t for t in bp_keys if t and t not in price_keys]) mismatch_rate = (cross_mismatch / max(1, len(bp_keys))) * 100.0 json_status = str(h.get("json_validation_status") or "") type_ok = 100.0 if json_status else 80.0 captured_at = str(h.get("captured_at") or "") timeliness = 100.0 if captured_at else 70.0 data_feed_rows = _rows(data.get("data_feed")) required_fields = policy.get("data_feed_required_fields") if isinstance(policy.get("data_feed_required_fields"), list) else ["Ticker", "Close", "MA20", "ATR20", "Volume"] total_required_cells = max(1, len(data_feed_rows) * max(1, len(required_fields))) missing_required_cells = 0 for row in data_feed_rows: for f in required_fields: v = row.get(f) if v is None or (isinstance(v, str) and not v.strip()): missing_required_cells += 1 required_field_completeness = max(0.0, 100.0 - (missing_required_cells / total_required_cells) * 100.0) placeholder_raw = policy.get("placeholder_tokens") if isinstance(policy.get("placeholder_tokens"), list) else ["DATA_MISSING", "", "-", None] placeholder_tokens = set(placeholder_raw) prices = _rows(h.get("prices_json")) placeholder_checks = 0 placeholder_hits = 0 placeholder_ledger: list[dict[str, Any]] = [] for row in prices: ticker = str(row.get("ticker") or "") for f in ("stop_price", "tp1_price", "tp2_price"): placeholder_checks += 1 val = row.get(f) if _is_allowed_tp_stale(row, f, val): continue if _is_placeholder(val, placeholder_tokens): placeholder_hits += 1 placeholder_ledger.append({"ticker": ticker, "field": f, "value": val}) placeholder_safety = 100.0 if placeholder_checks == 0 else max(0.0, 100.0 - (placeholder_hits / placeholder_checks) * 100.0) sla_hours = float(policy.get("captured_at_sla_hours") or 24.0) sla_penalty = float(policy.get("timeliness_penalty_if_sla_breached_pct") or 30.0) sla_breached = False capture_age_hours = None if captured_at: try: dt = datetime.fromisoformat(captured_at.replace("Z", "+00:00")) if dt.tzinfo is None: dt = dt.replace(tzinfo=timezone.utc) now = datetime.now(timezone.utc) capture_age_hours = max(0.0, (now - dt.astimezone(timezone.utc)).total_seconds() / 3600.0) if capture_age_hours > sla_hours: sla_breached = True except Exception: capture_age_hours = None if sla_breached: timeliness = max(0.0, timeliness - sla_penalty) w = policy.get("weights") if isinstance(policy.get("weights"), dict) else {} ws = float(w.get("sheet_completeness_pct") or 0.25) wc = float(w.get("cross_mismatch_safety_pct") or 0.20) wt = float(w.get("timeliness_pct") or 0.15) wtp = float(w.get("type_presence_pct") or 0.10) wr = float(w.get("required_field_completeness_pct") or 0.20) wp = float(w.get("placeholder_safety_pct") or 0.10) score = round(max(0.0, min(100.0, ws * sheet_completeness + wc * (100.0 - mismatch_rate) + wt * timeliness + wtp * type_ok + wr * required_field_completeness + wp * placeholder_safety)), 2) grade = "A" if score >= 95 else "B" if score >= 90 else "C" if score >= 80 else "D" pass_th = float(policy.get("pass_threshold") or 90.0) watch_th = float(policy.get("watch_threshold") or 80.0) gate = "PASS" if score >= pass_th else "WATCH_ONLY" if score >= watch_th else "EXPORT_BLOCKED_CRITICAL" result = { "formula_id": "DATA_INTEGRITY_SCORE_V1", "score": score, "grade": grade, "gate": gate, "metrics": { "sheet_completeness_pct": round(sheet_completeness, 2), "cross_mismatch_rate_pct": round(mismatch_rate, 2), "timeliness_pct": timeliness, "type_presence_pct": type_ok, "required_field_completeness_pct": round(required_field_completeness, 2), "placeholder_safety_pct": round(placeholder_safety, 2), "placeholder_hits_count": placeholder_hits, "placeholder_checks_count": placeholder_checks, "placeholder_ledger": placeholder_ledger[:100], "capture_age_hours": round(capture_age_hours, 2) if isinstance(capture_age_hours, (int, float)) else None, "sla_breached": sla_breached, "json_validation_status": json_status or None, }, "policy_used": { "policy_path": str(policy_path), "required_sheets": required_sheets, "data_feed_required_fields": required_fields, "captured_at_sla_hours": sla_hours, "timeliness_penalty_if_sla_breached_pct": sla_penalty, "pass_threshold": pass_th, "watch_threshold": watch_th, }, } out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") print(json.dumps(result, ensure_ascii=False, indent=2)) return 0 if __name__ == "__main__": raise SystemExit(main())