QuantEngineByItz/tools/build_imputed_data_exposure_gate_v2.py

from __future__ import annotations

import argparse
import json
import sys
from pathlib import Path
from typing import Any


ROOT = Path(__file__).resolve().parents[1]
DEFAULT_JSON = ROOT / "GatherTradingData.json"
DEFAULT_ENGINE_AUDIT = ROOT / "Temp" / "engine_audit_v1.json"
DEFAULT_OUT = ROOT / "Temp" / "imputed_data_exposure_gate_v2.json"

FORMULA_ID = "IMPUTED_DATA_EXPOSURE_GATE_V2"
BLOCK_RATIO = 0.50
WARN_RATIO = 0.25
FUND_FACTOR_MIN_COVERAGE = 0.50
DOMAIN_WEIGHTS = {
    "fundamental_core": 0.30,
    "realized_outcome": 0.30,
    "trade_quality": 0.15,
    "pattern": 0.10,
    "alpha_eval": 0.15,
}

if sys.stdout.encoding and sys.stdout.encoding.lower() not in ("utf-8", "utf8"):
    sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf-8", buffering=1)


def _load_json(path: Path) -> dict[str, Any]:
    if not path.exists():
        return {}
    try:
        data = json.loads(path.read_text(encoding="utf-8"))
    except Exception:
        return {}
    return data if isinstance(data, dict) else {}


def _as_float(value: Any, default: float | None = None) -> float | None:
    try:
        return float(value)
    except Exception:
        return default


def _extract_harness_root(payload: dict[str, Any]) -> dict[str, Any]:
    h_apex = payload.get("hApex")
    data_apex = ((payload.get("data") or {}).get("_harness_context")) if isinstance(payload.get("data"), dict) else None
    if isinstance(h_apex, dict) and isinstance(data_apex, dict):
        merged = dict(data_apex)
        merged.update(h_apex)
        return merged
    if isinstance(h_apex, dict):
        return h_apex
    if isinstance(data_apex, dict):
        return data_apex
    return payload


def build_gate(payload: dict[str, Any], audit: dict[str, Any]) -> dict[str, Any]:
    hctx = _extract_harness_root(payload)
    exposure = audit.get("imputed_data_exposure") if isinstance(audit.get("imputed_data_exposure"), dict) else {}

    weighted_coverage = _as_float(exposure.get("weighted_coverage"))
    imputed_field_ratio = _as_float(exposure.get("imputed_field_ratio"))
    effective_confidence_honest = _as_float(exposure.get("effective_confidence_honest"))
    raw_confidence_cap_basis = _as_float(exposure.get("raw_confidence_cap_basis"))
    confidence_cap_inflation_gap = _as_float(exposure.get("confidence_cap_inflation_gap"))
    fundamental_core_factor_coverage = _as_float(exposure.get("fundamental_core_factor_coverage"))
    fundamental_missing_ratio = _as_float(exposure.get("fundamental_missing_ratio"))
    surrogate_outcome_ratio = _as_float(exposure.get("surrogate_outcome_ratio"))
    domain_coverage = exposure.get("domain_coverage") if isinstance(exposure.get("domain_coverage"), dict) else {}

    if weighted_coverage is None:
        weights = DOMAIN_WEIGHTS
        weighted_coverage = 0.0
        for key, weight in weights.items():
            weighted_coverage += weight * float(domain_coverage.get(key, 0.0) or 0.0)
        weighted_coverage = round(weighted_coverage, 4)

    if imputed_field_ratio is None:
        imputed_field_ratio = round(1.0 - weighted_coverage, 4)

    if raw_confidence_cap_basis is None:
        raw_confidence_cap_basis = _as_float(hctx.get("confidence_cap_basis_score"), 0.0)

    if effective_confidence_honest is None and raw_confidence_cap_basis is not None:
        effective_confidence_honest = round(raw_confidence_cap_basis * (0.4 + 0.6 * weighted_coverage), 1)

    if confidence_cap_inflation_gap is None and raw_confidence_cap_basis is not None and effective_confidence_honest is not None:
        confidence_cap_inflation_gap = round(raw_confidence_cap_basis - effective_confidence_honest, 1)

    if fundamental_core_factor_coverage is None:
        fundamental_core_factor_coverage = _as_float(domain_coverage.get("fundamental_core"), 0.0)

    if fundamental_missing_ratio is None:
        fundamental_missing_ratio = round(max(0.0, 1.0 - (fundamental_core_factor_coverage or 0.0)), 4)

    if surrogate_outcome_ratio is None:
        surrogate_outcome_ratio = round(max(0.0, 1.0 - _as_float(domain_coverage.get("realized_outcome"), 0.0)), 4)

    if imputed_field_ratio >= BLOCK_RATIO:
        gate_status = "IMPUTED_DATA_BLOCK"
    elif imputed_field_ratio >= WARN_RATIO:
        gate_status = "IMPUTED_DATA_WARN"
    else:
        gate_status = "PASS"

    t20_sample = _as_float(hctx.get("t20_operational_sample"), 0.0) or 0.0
    long_horizon_allowed = bool(t20_sample > 0 and (fundamental_core_factor_coverage or 0.0) >= FUND_FACTOR_MIN_COVERAGE)
    fundamental_claim_allowed = bool((fundamental_core_factor_coverage or 0.0) >= FUND_FACTOR_MIN_COVERAGE)

    exposure_reasons: list[str] = []
    if fundamental_core_factor_coverage is not None and fundamental_core_factor_coverage < FUND_FACTOR_MIN_COVERAGE:
        exposure_reasons.append(
            "FUNDAMENTAL_CORE_FACTORS_MISSING: "
            f"coverage={fundamental_core_factor_coverage:.2f}"
        )
    if t20_sample <= 0:
        exposure_reasons.append("REALIZED_OUTCOME_T20_ZERO: t20_sample=0")
    if confidence_cap_inflation_gap is not None and confidence_cap_inflation_gap > 0:
        exposure_reasons.append(
            "CONFIDENCE_CAP_INFLATED: "
            f"reported={raw_confidence_cap_basis} honest={effective_confidence_honest} gap={confidence_cap_inflation_gap}"
        )

    result = {
        "formula_id": FORMULA_ID,
        "gate_status": gate_status,
        "imputed_field_ratio": round(imputed_field_ratio, 4),
        "imputed_domain_ratio": round(sum(1 for v in domain_coverage.values() if float(v or 0.0) < 0.5) / len(DOMAIN_WEIGHTS), 4)
        if domain_coverage
        else 1.0,
        "weighted_coverage": round(weighted_coverage, 4),
        "domain_coverage": {
            key: round(float(domain_coverage.get(key, 0.0) or 0.0), 4)
            for key in DOMAIN_WEIGHTS
        },
        "fundamental_core_factor_coverage": round(fundamental_core_factor_coverage or 0.0, 4),
        "fundamental_missing_ratio": round(fundamental_missing_ratio or 0.0, 4),
        "surrogate_outcome_ratio": round(surrogate_outcome_ratio or 0.0, 4),
        "raw_confidence_cap_basis": raw_confidence_cap_basis,
        "effective_confidence_honest": effective_confidence_honest,
        "confidence_cap_inflation_gap": confidence_cap_inflation_gap,
        "long_horizon_allowed": long_horizon_allowed,
        "fundamental_claim_allowed": fundamental_claim_allowed,
        "report_render_skew": {
            "report_dqg_completeness_pct": audit.get("report_render_skew", {}).get("report_dqg_completeness_pct") if isinstance(audit.get("report_render_skew"), dict) else "not_available",
            "authoritative_dqg_completeness_pct": audit.get("report_render_skew", {}).get("authoritative_dqg_completeness_pct") if isinstance(audit.get("report_render_skew"), dict) else "not_available",
            "skew_detected": bool(audit.get("report_render_skew", {}).get("skew_detected")) if isinstance(audit.get("report_render_skew"), dict) else False,
        },
        "exposure_reasons": exposure_reasons,
        "thresholds": {
            "block_ratio": BLOCK_RATIO,
            "warn_ratio": WARN_RATIO,
            "fund_factor_min_coverage": FUND_FACTOR_MIN_COVERAGE,
        },
        "formula": (
            "weighted_coverage = Σ(weight_d × coverage_d); "
            "imputed_field_ratio = 1 - weighted_coverage; "
            "effective_confidence_honest = raw_cap × (0.4 + 0.6 × weighted_coverage)"
        ),
        "source": {
            "payload_path": str(DEFAULT_JSON),
            "engine_audit_path": str(DEFAULT_ENGINE_AUDIT),
        },
    }
    return result


def main() -> int:
    ap = argparse.ArgumentParser(description="Build imputed data exposure gate from engine audit artifacts.")
    ap.add_argument("--json", default=str(DEFAULT_JSON))
    ap.add_argument("--audit", default=str(DEFAULT_ENGINE_AUDIT))
    ap.add_argument("--out", default=str(DEFAULT_OUT))
    args = ap.parse_args()

    json_path = Path(args.json)
    audit_path = Path(args.audit)
    out_path = Path(args.out)
    if not json_path.is_absolute():
        json_path = ROOT / json_path
    if not audit_path.is_absolute():
        audit_path = ROOT / audit_path
    if not out_path.is_absolute():
        out_path = ROOT / out_path

    payload = _load_json(json_path)
    audit = _load_json(audit_path)
    result = build_gate(payload, audit)

    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
    print(json.dumps(result, ensure_ascii=False, indent=2))
    return 0


if __name__ == "__main__":
    raise SystemExit(main())