Files
QuantEngineByItz/tools/validate_llm_determinism_pack_v1.py
T
kjh2064 ee3e799de1 feat: 리밸런싱 엔진 V1 + GAS 버그 수정 (2026-06-13)
주요 변경:
- tools/build_rebalance_engine_v1.py: REBALANCE_ENGINE_V1 신규
  * account_snapshot 직접 합산(_build_snap_position_map) → 소수주 분리 행 병합
  * 레짐 소스 macro.REGIME_PRELIM 최우선 (GAS 와 동일)
- src/gas_adapter_parts/gdf_06_rebalance.gs: runRebalanceSheet_() 신규
  * Logger.log / getSpreadsheet_() 로 run_all 연동 수정
- src/gas_adapter_parts/gdc_01_fetch_fundamentals.gs
  * _mergePositionRecord_(): 소수주 중복 행 합산 신규
  * parseInt → parseFloat (qty, availQty)
- src/gas_adapter_parts/gdf_01_price_metrics.gs
  * 미보유 종목 SELL_READY → WATCH_EXIT_SIGNAL
- spec/41_release_dag.yaml: build_rebalance_sheet 노드 추가 (step_count 63)
- spec/51_formula_lifecycle_registry.yaml: REBALANCE_ENGINE_V1 등록

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-13 13:20:14 +09:00

192 lines
6.5 KiB
Python

"""validate_llm_determinism_pack_v1.py — spec/58: H008_LLM_DETERMINISM_AUDIT
Validates that final_context_for_llm_v5.yaml contains all required
pre-computed sections and that no section demands arithmetic from the LLM.
formula_id: VALIDATE_LLM_DETERMINISM_PACK_V1
contract: spec/58_llm_determinism_contract.yaml
"""
from __future__ import annotations
import json
import re
import sys
from pathlib import Path
import yaml
ROOT = Path(__file__).resolve().parents[1]
DEFAULT_CONTEXT = ROOT / "Temp" / "final_context_for_llm_v5.yaml"
OUTPUT_PATH = ROOT / "Temp" / "llm_determinism_pack_v1.json"
# Required sections from spec/58
REQUIRED_SECTIONS = [
"01_metadata_and_manifest_alias",
"02_portfolio_health",
"03_hard_blockers",
"04_sell_priority_table",
"05_buy_hold_sell_action_table",
"06_cash_and_risk_budget",
"07_shadow_ledger_visible_items",
"08_data_missing_items",
"09_market_regime_summary_precomputed",
"10_education_notes_preapproved",
"11_forbidden_phrases_and_no_math_rules",
]
# Patterns that indicate arithmetic instructions to LLM
ARITHMETIC_INSTRUCTION_PATTERNS = [
r"계산\s*하시오",
r"계산\s*해\s*주",
r"더해\s*서",
r"나누어",
r"빼\s*면",
r"평균\s*구하",
r"합계\s*구하",
r"계산.*결과를\s*출력",
r"LLM.*계산",
r"\bcompute\b.*\bprice\b",
r"\bcalculate\b.*\bquantity\b",
]
# Numeric fields that must be pre-filled (not left for LLM)
REQUIRED_NUMERIC_FIELDS = [
"total_asset_krw",
"cash_ratio_pct",
"goal_achievement_pct",
"available_cash_krw",
"max_allowed_mdd_pct",
]
def _load_yaml(path: Path) -> dict:
if not path.exists():
return {"_missing": True, "_path": str(path)}
try:
obj = yaml.safe_load(path.read_text(encoding="utf-8"))
return obj if isinstance(obj, dict) else {"_empty": True}
except Exception as e:
return {"_error": str(e), "_path": str(path)}
def _check_required_sections(context: dict) -> tuple[list[str], list[str]]:
"""Return (found_sections, missing_sections)."""
context_text = str(context)
found, missing = [], []
for sec in REQUIRED_SECTIONS:
if sec in context_text:
found.append(sec)
else:
missing.append(sec)
return found, missing
def _count_arithmetic_instructions(context: dict) -> tuple[int, list[str]]:
context_text = json.dumps(context, ensure_ascii=False)
findings = []
for pattern in ARITHMETIC_INSTRUCTION_PATTERNS:
if re.search(pattern, context_text, re.IGNORECASE):
findings.append(pattern)
return len(findings), findings
def _check_numeric_fields_precomputed(context: dict) -> tuple[float, list[str]]:
"""Check that required numeric fields have actual values (not placeholders)."""
context_text = json.dumps(context, ensure_ascii=False)
unfilled = []
for field in REQUIRED_NUMERIC_FIELDS:
# Look for field = null / field = "" / field = "DATA_MISSING"
null_pattern = rf'"{field}"\s*:\s*(null|""|"DATA_MISSING")'
if re.search(null_pattern, context_text):
unfilled.append(field)
elif field not in context_text:
unfilled.append(field)
filled = len(REQUIRED_NUMERIC_FIELDS) - len(unfilled)
coverage_pct = 100.0 * filled / len(REQUIRED_NUMERIC_FIELDS) if REQUIRED_NUMERIC_FIELDS else 100.0
return coverage_pct, unfilled
def _check_llm_numeric_generation(context: dict) -> int:
"""Count fields that ask LLM to generate a number."""
context_text = json.dumps(context, ensure_ascii=False)
generation_patterns = [
r"최종\s*수량\s*산출",
r"손절가\s*계산",
r"익절가\s*계산",
r"LLM.*숫자.*생성",
]
count = 0
for p in generation_patterns:
count += len(re.findall(p, context_text, re.IGNORECASE))
return count
def run(context_path: Path) -> dict:
context = _load_yaml(context_path)
if context.get("_missing"):
result = {
"gate": "SKIP",
"reason": f"context file missing: {context_path}",
"missing_sections": [],
"arithmetic_instruction_count": 0,
"precomputed_field_coverage_pct": 0.0,
"llm_numeric_generation_count": 0,
"contract": "spec/58_llm_determinism_contract.yaml",
}
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
OUTPUT_PATH.write_text(json.dumps(result, ensure_ascii=False, indent=2))
return result
found_sections, missing_sections = _check_required_sections(context)
arith_count, arith_patterns = _count_arithmetic_instructions(context)
coverage_pct, unfilled_fields = _check_numeric_fields_precomputed(context)
llm_gen_count = _check_llm_numeric_generation(context)
gate = "PASS"
if missing_sections or arith_count > 0 or llm_gen_count > 0:
gate = "FAIL"
elif coverage_pct < 80.0:
gate = "WARN"
result = {
"gate": gate,
"found_sections": found_sections,
"missing_sections": missing_sections,
"arithmetic_instruction_count": arith_count,
"arithmetic_instruction_patterns": arith_patterns,
"precomputed_field_coverage_pct": round(coverage_pct, 2),
"unfilled_required_fields": unfilled_fields,
"llm_numeric_generation_count": llm_gen_count,
"sections_required": len(REQUIRED_SECTIONS),
"sections_found": len(found_sections),
"contract": "spec/58_llm_determinism_contract.yaml",
}
OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True)
OUTPUT_PATH.write_text(json.dumps(result, ensure_ascii=False, indent=2))
return result
def main() -> None:
import argparse
parser = argparse.ArgumentParser(description="H008 LLM Determinism Pack Validator")
parser.add_argument("--context", default=str(DEFAULT_CONTEXT))
args = parser.parse_args()
result = run(Path(args.context))
gate = result.get("gate", "FAIL")
print(f"[H008_LLM_DETERMINISM_PACK] gate={gate} "
f"sections={result.get('sections_found', 0)}/{result.get('sections_required', 0)} "
f"arithmetic={result.get('arithmetic_instruction_count', 0)} "
f"field_coverage={result.get('precomputed_field_coverage_pct', 0):.1f}%")
if gate == "FAIL":
print(" Missing sections:", result.get("missing_sections"))
print(" Arithmetic patterns:", result.get("arithmetic_instruction_patterns"))
sys.exit(1)
if __name__ == "__main__":
main()