#!/usr/bin/env python3 from __future__ import annotations import json import sys from pathlib import Path import yaml ROOT = Path(__file__).resolve().parent.parent def main() -> int: taxonomy_path = ROOT / "spec" / "43_quant_factor_taxonomy.yaml" registry_path = ROOT / "spec" / "factor_lifecycle_registry.yaml" if not taxonomy_path.exists(): print(f"Taxonomy spec missing: {taxonomy_path}") return 1 if not registry_path.exists(): print(f"Registry spec missing: {registry_path}") return 1 try: tax_data = yaml.safe_load(taxonomy_path.read_text(encoding="utf-8")) or {} required_fields = tax_data.get("required_lifecycle_fields", []) except Exception as e: print(f"Failed to parse taxonomy: {e}") return 1 try: reg_data = yaml.safe_load(registry_path.read_text(encoding="utf-8")) or {} factors = reg_data.get("factors", []) except Exception as e: print(f"Failed to parse registry: {e}") return 1 required_field_missing_count = 0 active_factor_without_shadow_evidence_count = 0 errors = [] for factor in factors: if not isinstance(factor, dict): continue fid = factor.get("factor_id", "UNKNOWN") gate = str(factor.get("promotion_gate", "draft")).lower() # Enforce lifecycle constraints on active factors if gate == "active": # 1. Check all required lifecycle fields from taxonomy missing_fields = [] for field in required_fields: if field not in factor and field != "input_fields": # input_fields is represented by required_data in our registry missing_fields.append(field) if "required_data" not in factor and "input_fields" not in factor: missing_fields.append("input_fields") if missing_fields: required_field_missing_count += len(missing_fields) errors.append(f"Active factor '{fid}' is missing required fields: {missing_fields}") # 2. Check for shadow evidence (shadow_start_date must be present and valid) shadow_start = factor.get("shadow_start_date") if not shadow_start: active_factor_without_shadow_evidence_count += 1 errors.append(f"Active factor '{fid}' has no shadow_start_date (no shadow evidence)") # 3. Check for golden cases (golden_cases must be non-empty) golden = factor.get("golden_cases") if not golden: required_field_missing_count += 1 errors.append(f"Active factor '{fid}' must have non-empty golden_cases") gate_passed = (required_field_missing_count == 0) and (active_factor_without_shadow_evidence_count == 0) result = { "formula_id": "FACTOR_LIFECYCLE_REGISTRY_VALIDATOR_V1", "factor_required_field_missing_count": required_field_missing_count, "active_factor_without_shadow_evidence_count": active_factor_without_shadow_evidence_count, "errors": errors, "gate": "PASS" if gate_passed else "FAIL" } # Write to Temp out_dir = ROOT / "Temp" out_dir.mkdir(parents=True, exist_ok=True) out_path = out_dir / "factor_lifecycle_registry_validation_v1.json" out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") print(json.dumps(result, ensure_ascii=True, indent=2)) return 0 if gate_passed else 1 if __name__ == "__main__": sys.exit(main())