from __future__ import annotations

import argparse
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
import yaml


ROOT = Path(__file__).resolve().parents[1]
DEFAULT_JSON = ROOT / "GatherTradingData.json"
DEFAULT_OUT = ROOT / "Temp" / "data_integrity_score_v1.json"
DEFAULT_POLICY = ROOT / "spec" / "strategy_execution_lock_policy.yaml"


def _load(path: Path) -> dict[str, Any]:
    data = json.loads(path.read_text(encoding="utf-8"))
    return data if isinstance(data, dict) else {}


def _rows(v: Any) -> list[dict[str, Any]]:
    if isinstance(v, list):
        return [x for x in v if isinstance(x, dict)]
    return []


def _load_policy(path: Path) -> dict[str, Any]:
    if not path.exists():
        return {}
    try:
        payload = yaml.safe_load(path.read_text(encoding="utf-8"))
    except Exception:
        return {}
    root = payload.get("strategy_execution_lock_policy") if isinstance(payload, dict) else {}
    obj = root.get("data_integrity_score_v1") if isinstance(root, dict) else {}
    return obj if isinstance(obj, dict) else {}


def _is_placeholder(v: Any, placeholder_tokens: set[Any]) -> bool:
    if v is None:
        return None in placeholder_tokens
    if isinstance(v, str):
        return v.strip() in placeholder_tokens
    return False


def _is_allowed_tp_stale(row: dict[str, Any], field: str, val: Any) -> bool:
    if field == "tp1_price" and val is None:
        return str(row.get("tp1_state") or "").upper() in {
            "TP1_ALREADY_TRIGGERED",
            "DEFERRED_SECULAR_LEADER",
            "DEFERRED_SECULAR_LEADER_OVERHEAT_PENDING",
            "TRAILING_STOP_PRIORITY_SECULAR_LEADER",
        }
    if field == "tp2_price" and val is None:
        return str(row.get("tp2_state") or "").upper() in {"TP2_ALREADY_TRIGGERED"}
    return False


def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("--json", default=str(DEFAULT_JSON))
    ap.add_argument("--out", default=str(DEFAULT_OUT))
    ap.add_argument("--policy", default=str(DEFAULT_POLICY))
    args = ap.parse_args()

    json_path = Path(args.json)
    out_path = Path(args.out)
    policy_path = Path(args.policy)
    if not json_path.is_absolute():
        json_path = ROOT / json_path
    if not out_path.is_absolute():
        out_path = ROOT / out_path
    if not policy_path.is_absolute():
        policy_path = ROOT / policy_path

    payload = _load(json_path)
    policy = _load_policy(policy_path)
    data = payload.get("data") if isinstance(payload.get("data"), dict) else {}
    h = data.get("_harness_context") if isinstance(data.get("_harness_context"), dict) else {}

    required_sheets = policy.get("required_sheets") if isinstance(policy.get("required_sheets"), list) else ["data_feed", "sector_flow", "macro", "event_risk", "core_satellite", "sell_priority"]
    present = sum(1 for s in required_sheets if isinstance(data.get(s), list) and len(data.get(s)) > 0)
    sheet_completeness = present / len(required_sheets) * 100.0

    bp = _rows(h.get("order_blueprint_json"))
    prices = _rows(h.get("prices_json"))
    price_keys = {str(r.get("ticker") or "") for r in prices}
    bp_keys = {str(r.get("ticker") or "") for r in bp}
    cross_mismatch = len([t for t in bp_keys if t and t not in price_keys])
    mismatch_rate = (cross_mismatch / max(1, len(bp_keys))) * 100.0

    json_status = str(h.get("json_validation_status") or "")
    type_ok = 100.0 if json_status else 80.0
    captured_at = str(h.get("captured_at") or "")
    timeliness = 100.0 if captured_at else 70.0

    data_feed_rows = _rows(data.get("data_feed"))
    required_fields = policy.get("data_feed_required_fields") if isinstance(policy.get("data_feed_required_fields"), list) else ["Ticker", "Close", "MA20", "ATR20", "Volume"]
    total_required_cells = max(1, len(data_feed_rows) * max(1, len(required_fields)))
    missing_required_cells = 0
    for row in data_feed_rows:
        for f in required_fields:
            v = row.get(f)
            if v is None or (isinstance(v, str) and not v.strip()):
                missing_required_cells += 1
    required_field_completeness = max(0.0, 100.0 - (missing_required_cells / total_required_cells) * 100.0)

    placeholder_raw = policy.get("placeholder_tokens") if isinstance(policy.get("placeholder_tokens"), list) else ["DATA_MISSING", "", "-", None]
    placeholder_tokens = set(placeholder_raw)
    prices = _rows(h.get("prices_json"))
    placeholder_checks = 0
    placeholder_hits = 0
    placeholder_ledger: list[dict[str, Any]] = []
    for row in prices:
        ticker = str(row.get("ticker") or "")
        for f in ("stop_price", "tp1_price", "tp2_price"):
            placeholder_checks += 1
            val = row.get(f)
            if _is_allowed_tp_stale(row, f, val):
                continue
            if _is_placeholder(val, placeholder_tokens):
                placeholder_hits += 1
                placeholder_ledger.append({"ticker": ticker, "field": f, "value": val})
    placeholder_safety = 100.0 if placeholder_checks == 0 else max(0.0, 100.0 - (placeholder_hits / placeholder_checks) * 100.0)

    sla_hours = float(policy.get("captured_at_sla_hours") or 24.0)
    sla_penalty = float(policy.get("timeliness_penalty_if_sla_breached_pct") or 30.0)
    sla_breached = False
    capture_age_hours = None
    if captured_at:
        try:
            dt = datetime.fromisoformat(captured_at.replace("Z", "+00:00"))
            if dt.tzinfo is None:
                dt = dt.replace(tzinfo=timezone.utc)
            now = datetime.now(timezone.utc)
            capture_age_hours = max(0.0, (now - dt.astimezone(timezone.utc)).total_seconds() / 3600.0)
            if capture_age_hours > sla_hours:
                sla_breached = True
        except Exception:
            capture_age_hours = None
    if sla_breached:
        timeliness = max(0.0, timeliness - sla_penalty)

    w = policy.get("weights") if isinstance(policy.get("weights"), dict) else {}
    ws = float(w.get("sheet_completeness_pct") or 0.25)
    wc = float(w.get("cross_mismatch_safety_pct") or 0.20)
    wt = float(w.get("timeliness_pct") or 0.15)
    wtp = float(w.get("type_presence_pct") or 0.10)
    wr = float(w.get("required_field_completeness_pct") or 0.20)
    wp = float(w.get("placeholder_safety_pct") or 0.10)
    score = round(max(0.0, min(100.0, ws * sheet_completeness + wc * (100.0 - mismatch_rate) + wt * timeliness + wtp * type_ok + wr * required_field_completeness + wp * placeholder_safety)), 2)
    grade = "A" if score >= 95 else "B" if score >= 90 else "C" if score >= 80 else "D"
    pass_th = float(policy.get("pass_threshold") or 90.0)
    watch_th = float(policy.get("watch_threshold") or 80.0)
    gate = "PASS" if score >= pass_th else "WATCH_ONLY" if score >= watch_th else "EXPORT_BLOCKED_CRITICAL"

    result = {
        "formula_id": "DATA_INTEGRITY_SCORE_V1",
        "score": score,
        "grade": grade,
        "gate": gate,
        "metrics": {
            "sheet_completeness_pct": round(sheet_completeness, 2),
            "cross_mismatch_rate_pct": round(mismatch_rate, 2),
            "timeliness_pct": timeliness,
            "type_presence_pct": type_ok,
            "required_field_completeness_pct": round(required_field_completeness, 2),
            "placeholder_safety_pct": round(placeholder_safety, 2),
            "placeholder_hits_count": placeholder_hits,
            "placeholder_checks_count": placeholder_checks,
            "placeholder_ledger": placeholder_ledger[:100],
            "capture_age_hours": round(capture_age_hours, 2) if isinstance(capture_age_hours, (int, float)) else None,
            "sla_breached": sla_breached,
            "json_validation_status": json_status or None,
        },
        "policy_used": {
            "policy_path": str(policy_path),
            "required_sheets": required_sheets,
            "data_feed_required_fields": required_fields,
            "captured_at_sla_hours": sla_hours,
            "timeliness_penalty_if_sla_breached_pct": sla_penalty,
            "pass_threshold": pass_th,
            "watch_threshold": watch_th,
        },
    }
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
    print(json.dumps(result, ensure_ascii=False, indent=2))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())