"""validate_llm_determinism_pack_v1.py — spec/58: H008_LLM_DETERMINISM_AUDIT Validates that final_context_for_llm_v5.yaml contains all required pre-computed sections and that no section demands arithmetic from the LLM. formula_id: VALIDATE_LLM_DETERMINISM_PACK_V1 contract: spec/58_llm_determinism_contract.yaml """ from __future__ import annotations import json import re import sys from pathlib import Path import yaml ROOT = Path(__file__).resolve().parents[1] DEFAULT_CONTEXT = ROOT / "Temp" / "final_context_for_llm_v5.yaml" OUTPUT_PATH = ROOT / "Temp" / "llm_determinism_pack_v1.json" # Required sections from spec/58 REQUIRED_SECTIONS = [ "01_metadata_and_manifest_alias", "02_portfolio_health", "03_hard_blockers", "04_sell_priority_table", "05_buy_hold_sell_action_table", "06_cash_and_risk_budget", "07_shadow_ledger_visible_items", "08_data_missing_items", "09_market_regime_summary_precomputed", "10_education_notes_preapproved", "11_forbidden_phrases_and_no_math_rules", ] # Patterns that indicate arithmetic instructions to LLM ARITHMETIC_INSTRUCTION_PATTERNS = [ r"계산\s*하시오", r"계산\s*해\s*주", r"더해\s*서", r"나누어", r"빼\s*면", r"평균\s*구하", r"합계\s*구하", r"계산.*결과를\s*출력", r"LLM.*계산", r"\bcompute\b.*\bprice\b", r"\bcalculate\b.*\bquantity\b", ] # Numeric fields that must be pre-filled (not left for LLM) REQUIRED_NUMERIC_FIELDS = [ "total_asset_krw", "cash_ratio_pct", "goal_achievement_pct", "available_cash_krw", "max_allowed_mdd_pct", ] def _load_yaml(path: Path) -> dict: if not path.exists(): return {"_missing": True, "_path": str(path)} try: obj = yaml.safe_load(path.read_text(encoding="utf-8")) return obj if isinstance(obj, dict) else {"_empty": True} except Exception as e: return {"_error": str(e), "_path": str(path)} def _check_required_sections(context: dict) -> tuple[list[str], list[str]]: """Return (found_sections, missing_sections).""" context_text = str(context) found, missing = [], [] for sec in REQUIRED_SECTIONS: if sec in context_text: found.append(sec) else: missing.append(sec) return found, missing def _count_arithmetic_instructions(context: dict) -> tuple[int, list[str]]: context_text = json.dumps(context, ensure_ascii=False) findings = [] for pattern in ARITHMETIC_INSTRUCTION_PATTERNS: if re.search(pattern, context_text, re.IGNORECASE): findings.append(pattern) return len(findings), findings def _check_numeric_fields_precomputed(context: dict) -> tuple[float, list[str]]: """Check that required numeric fields have actual values (not placeholders).""" context_text = json.dumps(context, ensure_ascii=False) unfilled = [] for field in REQUIRED_NUMERIC_FIELDS: # Look for field = null / field = "" / field = "DATA_MISSING" null_pattern = rf'"{field}"\s*:\s*(null|""|"DATA_MISSING")' if re.search(null_pattern, context_text): unfilled.append(field) elif field not in context_text: unfilled.append(field) filled = len(REQUIRED_NUMERIC_FIELDS) - len(unfilled) coverage_pct = 100.0 * filled / len(REQUIRED_NUMERIC_FIELDS) if REQUIRED_NUMERIC_FIELDS else 100.0 return coverage_pct, unfilled def _check_llm_numeric_generation(context: dict) -> int: """Count fields that ask LLM to generate a number.""" context_text = json.dumps(context, ensure_ascii=False) generation_patterns = [ r"최종\s*수량\s*산출", r"손절가\s*계산", r"익절가\s*계산", r"LLM.*숫자.*생성", ] count = 0 for p in generation_patterns: count += len(re.findall(p, context_text, re.IGNORECASE)) return count def run(context_path: Path) -> dict: context = _load_yaml(context_path) if context.get("_missing"): result = { "gate": "SKIP", "reason": f"context file missing: {context_path}", "missing_sections": [], "arithmetic_instruction_count": 0, "precomputed_field_coverage_pct": 0.0, "llm_numeric_generation_count": 0, "contract": "spec/58_llm_determinism_contract.yaml", } OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) OUTPUT_PATH.write_text(json.dumps(result, ensure_ascii=False, indent=2)) return result found_sections, missing_sections = _check_required_sections(context) arith_count, arith_patterns = _count_arithmetic_instructions(context) coverage_pct, unfilled_fields = _check_numeric_fields_precomputed(context) llm_gen_count = _check_llm_numeric_generation(context) gate = "PASS" if missing_sections or arith_count > 0 or llm_gen_count > 0: gate = "FAIL" elif coverage_pct < 80.0: gate = "WARN" result = { "gate": gate, "found_sections": found_sections, "missing_sections": missing_sections, "arithmetic_instruction_count": arith_count, "arithmetic_instruction_patterns": arith_patterns, "precomputed_field_coverage_pct": round(coverage_pct, 2), "unfilled_required_fields": unfilled_fields, "llm_numeric_generation_count": llm_gen_count, "sections_required": len(REQUIRED_SECTIONS), "sections_found": len(found_sections), "contract": "spec/58_llm_determinism_contract.yaml", } OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) OUTPUT_PATH.write_text(json.dumps(result, ensure_ascii=False, indent=2)) return result def main() -> None: import argparse parser = argparse.ArgumentParser(description="H008 LLM Determinism Pack Validator") parser.add_argument("--context", default=str(DEFAULT_CONTEXT)) args = parser.parse_args() result = run(Path(args.context)) gate = result.get("gate", "FAIL") print(f"[H008_LLM_DETERMINISM_PACK] gate={gate} " f"sections={result.get('sections_found', 0)}/{result.get('sections_required', 0)} " f"arithmetic={result.get('arithmetic_instruction_count', 0)} " f"field_coverage={result.get('precomputed_field_coverage_pct', 0):.1f}%") if gate == "FAIL": print(" Missing sections:", result.get("missing_sections")) print(" Arithmetic patterns:", result.get("arithmetic_instruction_patterns")) sys.exit(1) if __name__ == "__main__": main()