#!/usr/bin/env python3 """FORECAST_SIMULATION_ENGINE_V1 — spec/formulas/domains/simulation.yaml. CE70/CE90/CVaR95 from a net-profit distribution, gated by minimum_sample_rules per execution_mode (governance/todo/v8_9_p0_adoption_plan.yaml P0-3.2). Hard rule (AGENTS.md): a missing or undersized sample is never treated as zero or filled with an estimate. spec/29_backtest_harness_contract.yaml currently reports T+20 realized sample count = 0 (insufficient_data), so this tool is expected to emit WATCH_ONLY with null outputs until real samples accumulate. """ from __future__ import annotations import argparse import json from pathlib import Path ROOT = Path(__file__).resolve().parents[1] DEFAULT_BACKTEST_CONTRACT = ROOT / "spec" / "29_backtest_harness_contract.yaml" DEFAULT_DISTRIBUTION = ROOT / "Temp" / "net_profit_distribution_v1.json" DEFAULT_DECISION_PACKET = ROOT / "Temp" / "final_decision_packet_active.json" DEFAULT_OUT = ROOT / "Temp" / "forecast_simulation_engine_v1.json" MINIMUM_SAMPLE_RULES = { "AUDIT_ONLY": {"sample_count_total_min": 0, "sample_count_same_regime_min": 0}, "SHADOW": {"sample_count_total_min": 30, "sample_count_same_regime_min": 10}, "PILOT": {"sample_count_total_min": 80, "sample_count_same_regime_min": 20}, "LIVE_LIMITED": {"sample_count_total_min": 150, "sample_count_same_regime_min": 30}, "LIVE_FULL": {"sample_count_total_min": 300, "sample_count_same_regime_min": 50}, } def _load_json(path: Path) -> dict: if not path.exists(): return {} try: data = json.loads(path.read_text(encoding="utf-8")) return data if isinstance(data, dict) else {} except Exception: return {} def _load_yaml(path: Path) -> dict: if not path.exists(): return {} try: import yaml # type: ignore data = yaml.safe_load(path.read_text(encoding="utf-8")) return data if isinstance(data, dict) else {} except Exception: return {} def _sample_counts_from_backtest_contract(contract: dict) -> tuple[int, int]: metrics = contract.get("current_metrics") or {} direction_accuracy = metrics.get("direction_accuracy") or {} t20 = direction_accuracy.get("t20_op_rate") or {} n_sample = t20.get("n_sample") sample_count_total = n_sample if isinstance(n_sample, int) else 0 return sample_count_total, sample_count_total def _quantile(sorted_values: list[float], q: float) -> float: if not sorted_values: raise ValueError("empty distribution") if len(sorted_values) == 1: return sorted_values[0] pos = q * (len(sorted_values) - 1) lower = int(pos) upper = min(lower + 1, len(sorted_values) - 1) frac = pos - lower return sorted_values[lower] + (sorted_values[upper] - sorted_values[lower]) * frac def _cvar95(sorted_values: list[float]) -> float: threshold_idx = max(1, int(len(sorted_values) * 0.05)) tail = sorted_values[:threshold_idx] return sum(tail) / len(tail) def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--backtest-contract", default=str(DEFAULT_BACKTEST_CONTRACT)) ap.add_argument("--distribution", default=str(DEFAULT_DISTRIBUTION)) ap.add_argument("--decision-packet", default=str(DEFAULT_DECISION_PACKET)) ap.add_argument("--out", default=str(DEFAULT_OUT)) args = ap.parse_args() backtest_contract = _load_yaml(Path(args.backtest_contract)) distribution_doc = _load_json(Path(args.distribution)) decision_packet = _load_json(Path(args.decision_packet)) execution_mode = ( decision_packet.get("execution_mode") or decision_packet.get("global_execution_gate") or "AUDIT_ONLY" ) rule = MINIMUM_SAMPLE_RULES.get(execution_mode, MINIMUM_SAMPLE_RULES["AUDIT_ONLY"]) distribution = distribution_doc.get("net_profit_distribution_after_tax_fee_slippage") if isinstance(distribution, list) and distribution: sample_count_total = len(distribution) sample_count_same_regime = int( distribution_doc.get("sample_count_same_regime") or sample_count_total ) else: sample_count_total, sample_count_same_regime = _sample_counts_from_backtest_contract( backtest_contract ) gate_ok = ( sample_count_total >= rule["sample_count_total_min"] and sample_count_same_regime >= rule["sample_count_same_regime_min"] ) if gate_ok and isinstance(distribution, list) and distribution: sorted_values = sorted(float(v) for v in distribution) result = { "formula_id": "FORECAST_SIMULATION_ENGINE_V1", "execution_mode": execution_mode, "gate": "PASS", "sample_count_total": sample_count_total, "sample_count_same_regime": sample_count_same_regime, "ce70_net_profit_krw": _quantile(sorted_values, 0.30), "ce90_net_profit_krw": _quantile(sorted_values, 0.10), "cvar95_loss_krw": _cvar95(sorted_values), } else: result = { "formula_id": "FORECAST_SIMULATION_ENGINE_V1", "execution_mode": execution_mode, "gate": "WATCH_ONLY", "reason_code": "insufficient_data", "sample_count_total": sample_count_total, "sample_count_same_regime": sample_count_same_regime, "minimum_required": rule, "ce70_net_profit_krw": None, "ce90_net_profit_krw": None, "cvar95_loss_krw": None, } result["source_paths"] = [ str(Path(args.backtest_contract)), str(Path(args.distribution)), str(Path(args.decision_packet)), ] out = Path(args.out) out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") print(json.dumps(result, ensure_ascii=False, indent=2)) return 0 if __name__ == "__main__": raise SystemExit(main())