#!/usr/bin/env python3 """WALK_FORWARD_BOOTSTRAP_V1 — spec/formulas/domains/simulation.yaml. Generates net_profit_distribution_after_tax_fee_slippage from historical_returns via walk-forward (non-overlapping in/out-of-sample split, block resample on out-of-sample only) or regime-matched (filter + resample-with-replacement) bootstrapping. governance/todo/v8_9_p3_adoption_plan.yaml P3-B. Hard rule: no historical_returns or fewer than 2 samples -> DATA_MISSING. Never interpolate or fabricate a distribution. """ from __future__ import annotations import argparse import json import random from pathlib import Path ROOT = Path(__file__).resolve().parents[1] DEFAULT_HISTORICAL_RETURNS = ROOT / "Temp" / "historical_returns_v1.json" DEFAULT_OUT = ROOT / "Temp" / "walk_forward_bootstrap_v1.json" BLOCK_SIZE = 5 def _load(path: Path) -> dict: if not path.exists(): return {} try: data = json.loads(path.read_text(encoding="utf-8")) return data if isinstance(data, dict) else {} except Exception: return {} def walk_forward_resample(historical_returns: list[dict], resample_count: int, rng: random.Random) -> list[float]: sorted_returns = sorted(historical_returns, key=lambda r: r["date"]) split_idx = int(len(sorted_returns) * 0.7) out_of_sample = sorted_returns[split_idx:] if len(out_of_sample) < 2: return [] values = [r["net_return_after_cost_pct"] for r in out_of_sample] distribution = [] for _ in range(resample_count): start = rng.randrange(0, max(1, len(values) - BLOCK_SIZE + 1)) block = values[start:start + BLOCK_SIZE] distribution.append(sum(block) / len(block)) return distribution def regime_matched_resample( historical_returns: list[dict], current_regime_state: str, resample_count: int, rng: random.Random ) -> list[float]: filtered = [r["net_return_after_cost_pct"] for r in historical_returns if r.get("regime_state") == current_regime_state] if len(filtered) < 2: return [] return [rng.choice(filtered) for _ in range(resample_count)] def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--historical-returns", default=str(DEFAULT_HISTORICAL_RETURNS)) ap.add_argument("--current-regime-state", default=None) ap.add_argument("--bootstrap-method", default="walk_forward", choices=["walk_forward", "regime_matched"]) ap.add_argument("--resample-count", type=int, default=1000) ap.add_argument("--out", default=str(DEFAULT_OUT)) ap.add_argument("--seed", type=int, default=None) args = ap.parse_args() doc = _load(Path(args.historical_returns)) historical_returns = doc.get("historical_returns") if isinstance(doc.get("historical_returns"), list) else None if not historical_returns or len(historical_returns) < 2: result = { "formula_id": "WALK_FORWARD_BOOTSTRAP_V1", "gate": "DATA_MISSING", "net_profit_distribution_after_tax_fee_slippage": None, "sample_count_total": len(historical_returns) if historical_returns else 0, "sample_count_same_regime": 0, "source_paths": [str(Path(args.historical_returns))], } out = Path(args.out) out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") print(json.dumps(result, ensure_ascii=False, indent=2)) return 0 rng = random.Random(args.seed) if args.bootstrap_method == "walk_forward": distribution = walk_forward_resample(historical_returns, args.resample_count, rng) else: distribution = regime_matched_resample(historical_returns, args.current_regime_state, args.resample_count, rng) sample_count_same_regime = len( [r for r in historical_returns if r.get("regime_state") == args.current_regime_state] ) if not distribution: result = { "formula_id": "WALK_FORWARD_BOOTSTRAP_V1", "gate": "DATA_MISSING", "net_profit_distribution_after_tax_fee_slippage": None, "sample_count_total": len(historical_returns), "sample_count_same_regime": sample_count_same_regime, "source_paths": [str(Path(args.historical_returns))], } else: result = { "formula_id": "WALK_FORWARD_BOOTSTRAP_V1", "gate": "PASS", "net_profit_distribution_after_tax_fee_slippage": distribution, "sample_count_total": len(historical_returns), "sample_count_same_regime": sample_count_same_regime, "source_paths": [str(Path(args.historical_returns))], } out = Path(args.out) out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") print(json.dumps(result, ensure_ascii=False, indent=2)) return 0 if __name__ == "__main__": raise SystemExit(main())