"""validate_engine_audit_v1.py — ENGINE_AUDIT_V1 산출물 검증기 검증 항목 (프롬프트 §3.10 / §7) - §3.10 필수 섹션 존재(final_json_schema_valid) - decision.decision_source == "rule_engine" - llm_control.final_decision_from_llm == false / llm_generated_decision_field_count == 0 - imputed_data_exposure 불변식: 대체데이터 감지 시 게이트 실제 발동 (fundamental_core_factor_coverage < min → fundamental_claim_allowed == false 등) - 게이트 산식 재현(weighted_coverage / imputed_field_ratio / effective_confidence_honest) 기본 모드: 산출물 무결성만 검증(엔진 status=failed 여도 PASS 가능). --strict : 추가로 final_verdict.status == "passed" 를 요구(엔진 투자준비 게이트). 종료코드: 검증 실패 시 1 (repo validator 컨벤션). """ from __future__ import annotations import argparse import json import sys from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] DEFAULT_JSON = ROOT / "Temp" / "engine_audit_v1.json" REQUIRED_SECTIONS = [ "meta", "data_quality", "routing", "scores", "decision", "sell_plan", "evidence", "risk", "llm_control", "audit", "imputed_data_exposure", "final_verdict", ] EPS = 0.01 def _emit(failures: list[str], cond: bool, msg: str) -> None: if not cond: failures.append(msg) def main() -> int: ap = argparse.ArgumentParser(description="ENGINE_AUDIT_V1 validator") ap.add_argument("--json", default=str(DEFAULT_JSON)) ap.add_argument("--strict", action="store_true", help="final_verdict.status == passed 까지 요구") args = ap.parse_args() path = Path(args.json) if not path.is_absolute(): path = ROOT / path if not path.exists(): print(f"FAIL: file not found: {path}") return 1 try: d = json.loads(path.read_text(encoding="utf-8")) except Exception as exc: # noqa: BLE001 print(f"FAIL: cannot parse JSON: {exc}") return 1 failures: list[str] = [] # 1) 필수 섹션 for sec in REQUIRED_SECTIONS: _emit(failures, sec in d, f"missing required section: {sec}") if failures: for f in failures: print("FAIL:", f) return 1 fv = d["final_verdict"] dec = d["decision"] llm = d["llm_control"] exp = d["imputed_data_exposure"] # 2) 스키마/판단 출처 불변식 _emit(failures, fv.get("final_json_schema_valid") is True, "final_json_schema_valid != true") _emit(failures, dec.get("decision_source") == "rule_engine", "decision.decision_source != rule_engine") _emit(failures, llm.get("final_decision_from_llm") is False, "llm_control.final_decision_from_llm != false") _emit(failures, llm.get("llm_generated_decision_field_count") == 0, "llm_generated_decision_field_count != 0") _emit(failures, fv.get("llm_generated_decision_field_count") == 0, "final_verdict.llm_generated_decision_field_count != 0") # 3) 게이트 불변식: 대체데이터 감지 시 발동 fcc = exp.get("fundamental_core_factor_coverage") minc = (exp.get("thresholds") or {}).get("fund_factor_min_coverage", 0.5) if isinstance(fcc, (int, float)) and fcc < minc: _emit(failures, exp.get("fundamental_claim_allowed") is False, "fundamental coverage < min but fundamental_claim_allowed != false") ifr = exp.get("imputed_field_ratio") block = (exp.get("thresholds") or {}).get("block_ratio", 0.5) if isinstance(ifr, (int, float)) and ifr >= block: _emit(failures, exp.get("gate_status") == "IMPUTED_DATA_BLOCK", f"imputed_field_ratio>={block} but gate_status != IMPUTED_DATA_BLOCK") # 4) 게이트 산식 재현 (weighted_coverage / imputed_field_ratio / honest cap) dc = exp.get("domain_coverage") or {} dw = exp.get("domain_weights") or {} if dc and dw: wc = sum(dw.get(k, 0) * v for k, v in dc.items()) _emit(failures, abs(wc - (exp.get("weighted_coverage") or -1)) < EPS, f"weighted_coverage mismatch: recomputed={wc:.4f} stored={exp.get('weighted_coverage')}") _emit(failures, abs((1.0 - wc) - (exp.get("imputed_field_ratio") or -1)) < EPS, "imputed_field_ratio mismatch (expected 1 - weighted_coverage)") raw = exp.get("raw_confidence_cap_basis") ech = exp.get("effective_confidence_honest") if isinstance(raw, (int, float)) and isinstance(ech, (int, float)): expect = raw * (0.4 + 0.6 * wc) _emit(failures, abs(expect - ech) < 0.2, f"effective_confidence_honest mismatch: expected={expect:.1f} stored={ech}") # 5) failed_metrics ↔ status 정합 fm = fv.get("failed_metrics") or [] _emit(failures, (fv.get("status") == "failed") == (len(fm) > 0), "status/failed_metrics inconsistent") _emit(failures, fv.get("investment_decision_allowed") == (fv.get("status") == "passed"), "investment_decision_allowed inconsistent with status") if failures: for f in failures: print("FAIL:", f) print(f"VALIDATE_ENGINE_AUDIT_V1: FAIL ({len(failures)} issue(s))") return 1 print(f"VALIDATE_ENGINE_AUDIT_V1: OK | engine_status={fv.get('status')} " f"gate={exp.get('gate_status')} imputed_field_ratio={exp.get('imputed_field_ratio')} " f"honest_cap={exp.get('effective_confidence_honest')} (raw={exp.get('raw_confidence_cap_basis')})") if args.strict and fv.get("status") != "passed": print("STRICT_FAIL: final_verdict.status != passed " f"-> failed_metrics={fm}") return 1 return 0 if __name__ == "__main__": sys.exit(main())