Files
QuantEngineByItz/tools/build_llm_narrative_template_lock_v1.py
T
kjh2064 ee3e799de1 feat: 리밸런싱 엔진 V1 + GAS 버그 수정 (2026-06-13)
주요 변경:
- tools/build_rebalance_engine_v1.py: REBALANCE_ENGINE_V1 신규
  * account_snapshot 직접 합산(_build_snap_position_map) → 소수주 분리 행 병합
  * 레짐 소스 macro.REGIME_PRELIM 최우선 (GAS 와 동일)
- src/gas_adapter_parts/gdf_06_rebalance.gs: runRebalanceSheet_() 신규
  * Logger.log / getSpreadsheet_() 로 run_all 연동 수정
- src/gas_adapter_parts/gdc_01_fetch_fundamentals.gs
  * _mergePositionRecord_(): 소수주 중복 행 합산 신규
  * parseInt → parseFloat (qty, availQty)
- src/gas_adapter_parts/gdf_01_price_metrics.gs
  * 미보유 종목 SELL_READY → WATCH_EXIT_SIGNAL
- spec/41_release_dag.yaml: build_rebalance_sheet 노드 추가 (step_count 63)
- spec/51_formula_lifecycle_registry.yaml: REBALANCE_ENGINE_V1 등록

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-13 13:20:14 +09:00

204 lines
8.4 KiB
Python

"""LLM_NARRATIVE_TEMPLATE_LOCK_V1 — LLM 서술 어휘 잠금 도구.
operational_report.json 각 section.markdown에서 두 종류의 위반을 스캔한다.
(1) INVALID_NARRATIVE — 금지 어휘 블랙리스트:
한국어: 같다, 약간, 괜찮다, 이번엔, 곧, 조만간, 강한 모멘텀
영어: "seems like", "might be", "probably", "soon", "strong momentum", "pretty good"
(2) INVALID_SOFTENING — verdict 완화 패턴 (P3 확장):
BLOCK/SELL/CRITICAL verdict 근방에서 아래 완화 어휘가 동시 등장하면 차단.
완화 어휘: "그래도", "유연하게", "장기 관점", "재진입 고려", "고려 가능",
"상황에 따라", "아직 괜찮", "지켜볼 만"
감지 조건: 동일 섹션 내에 verdict_keyword + softening_keyword 동시 존재.
허용:
공식 ID (FORMULA_ID_V1 형식), 산출 라벨, 산출 숫자만.
게이트 CHECK_71 + CHECK_72(SOFTENING): 총 위반 0건.
"""
from __future__ import annotations
import argparse
import json
import re
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
DEFAULT_REPORT = ROOT / "Temp" / "operational_report.json"
DEFAULT_OUT = ROOT / "Temp" / "llm_narrative_template_lock_v1.json"
# 금지 어휘 패턴 (정규식)
_FORBIDDEN_PATTERNS: list[tuple[str, str]] = [
# (pattern, label)
(r"(?<![A-Z_])같다(?![A-Z_])", "AMBIGUOUS_PREDICATE:같다"),
(r"(?<![A-Z_])약간(?![A-Z_])", "VAGUE_QUALIFIER:약간"),
(r"(?<![A-Z_])괜찮다(?![A-Z_])", "VAGUE_QUALIFIER:괜찮다"),
(r"이번엔", "INFORMAL_TEMPORAL:이번엔"),
(r"(?<![A-Z_])곧(?![A-Z_])", "VAGUE_TEMPORAL:곧"),
(r"조만간", "VAGUE_TEMPORAL:조만간"),
(r"강한\s*모멘텀", "VAGUE_SIGNAL:강한모멘텀"),
(r"(?i)\bseems?\s+like\b", "VAGUE_ENGLISH:seems_like"),
(r"(?i)\bmight\s+be\b", "VAGUE_ENGLISH:might_be"),
(r"(?i)\bprobably\b", "VAGUE_ENGLISH:probably"),
(r"(?i)\bsoon\b", "VAGUE_ENGLISH:soon"),
(r"(?i)\bstrong\s+momentum\b", "VAGUE_ENGLISH:strong_momentum"),
(r"(?i)\bpretty\s+good\b", "VAGUE_ENGLISH:pretty_good"),
(r"(?i)\blooks?\s+good\b", "VAGUE_ENGLISH:looks_good"),
]
_COMPILED = [(re.compile(p), label) for p, label in _FORBIDDEN_PATTERNS]
# ── INVALID_SOFTENING 감지 (P3 확장) ────────────────────────────────────────
# verdict 키워드: 이 중 하나라도 섹션에 있으면 완화어휘 스캔 트리거
_VERDICT_KEYWORDS = [
"강제 차단", "BLOCK", "CRITICAL", "매도", "손절", "SELL", "BREACH",
"신규 매수.*금지", "매수.*차단", "BLOCKED",
]
_VERDICT_RE = re.compile("|".join(_VERDICT_KEYWORDS))
# 완화 어휘: verdict와 함께 나타나면 INVALID_SOFTENING
_SOFTENING_PATTERNS: list[tuple[str, str]] = [
(r"그래도\s*(?:고려|참고|볼\s*만|매수)", "SOFTENING:그래도_고려"),
(r"유연하게", "SOFTENING:유연하게"),
(r"장기\s*관점\s*(?:재진입|매수|고려)", "SOFTENING:장기관점_재진입"),
(r"재진입\s*(?:고려|기회)", "SOFTENING:재진입_고려"),
(r"고려\s*가능", "SOFTENING:고려_가능"),
(r"상황에\s*따라\s*(?:유연|조정|판단)", "SOFTENING:상황에따라"),
(r"아직\s*괜찮", "SOFTENING:아직_괜찮"),
(r"지켜볼\s*만", "SOFTENING:지켜볼만"),
(r"(?i)still\s+consider", "SOFTENING:still_consider"),
(r"(?i)flexible(?:ly)?", "SOFTENING:flexible"),
]
_SOFTENING_RE_LIST = [(re.compile(p), label) for p, label in _SOFTENING_PATTERNS]
def _scan_softening(text: str) -> list[dict[str, Any]]:
"""BLOCK/SELL verdict 근방에서 완화 어휘 동시 출현 감지."""
# verdict 키워드가 없으면 검사 생략
if not _VERDICT_RE.search(text):
return []
hits = []
for pattern, label in _SOFTENING_RE_LIST:
for m in pattern.finditer(text):
start = max(0, m.start() - 60)
end = min(len(text), m.end() + 60)
context = text[start:end].replace("\n", " ").strip()
hits.append({
"pattern_label": label,
"matched_text": m.group(0),
"context": context,
"position": m.start(),
"violation_type": "INVALID_SOFTENING",
})
return hits
def _load(path: Path) -> dict[str, Any]:
if not path.exists():
return {}
try:
d = json.loads(path.read_text(encoding="utf-8"))
return d if isinstance(d, dict) else {}
except Exception:
return {}
def _scan_text(text: str) -> list[dict[str, Any]]:
"""텍스트에서 금지 어휘 탐색."""
hits = []
for pattern, label in _COMPILED:
for m in pattern.finditer(text):
# 컨텍스트 추출 (±30자)
start = max(0, m.start() - 30)
end = min(len(text), m.end() + 30)
context = text[start:end].replace("\n", " ").strip()
hits.append({
"pattern_label": label,
"matched_text": m.group(0),
"context": context,
"position": m.start(),
})
return hits
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--report", default=str(DEFAULT_REPORT))
ap.add_argument("--out", default=str(DEFAULT_OUT))
args = ap.parse_args()
report_path = Path(args.report) if Path(args.report).is_absolute() else ROOT / args.report
out_path = Path(args.out) if Path(args.out).is_absolute() else ROOT / args.out
report = _load(report_path)
sections = report.get("sections") if isinstance(report.get("sections"), list) else []
total_violations = 0
section_results: list[dict[str, Any]] = []
for section in sections:
if not isinstance(section, dict):
continue
name = str(section.get("name") or "")
markdown = str(section.get("markdown") or "")
if not markdown:
continue
hits = _scan_text(markdown)
softening_hits = _scan_softening(markdown)
all_hits = hits + softening_hits
sec_status = "OK"
if softening_hits:
sec_status = "INVALID_SOFTENING"
elif hits:
sec_status = "INVALID_NARRATIVE"
section_results.append({
"section_name": name,
"violation_count": len(all_hits),
"narrative_violations": len(hits),
"softening_violations": len(softening_hits),
"violations": all_hits,
"status": sec_status,
})
total_violations += len(all_hits)
total_softening = sum(s["softening_violations"] for s in section_results)
total_narrative = sum(s["narrative_violations"] for s in section_results)
gate = "PASS" if total_violations == 0 else "FAIL"
# 요약
failed_sections = [s for s in section_results if s["status"] != "OK"]
result = {
"formula_id": "LLM_NARRATIVE_TEMPLATE_LOCK_V1",
"gate": gate,
"total_violations": total_violations,
"narrative_violations": total_narrative,
"softening_violations": total_softening,
"sections_checked": len(section_results),
"sections_failed": len(failed_sections),
"forbidden_pattern_count": len(_FORBIDDEN_PATTERNS),
"softening_pattern_count": len(_SOFTENING_PATTERNS),
"forbidden_patterns": [label for _, label in _FORBIDDEN_PATTERNS],
"softening_patterns": [label for _, label in _SOFTENING_PATTERNS],
"section_results": section_results,
}
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
print(
f"LLM_NARRATIVE_TEMPLATE_LOCK_V1 gate={gate} "
f"total_violations={total_violations} "
f"(narrative={total_narrative} softening={total_softening}) "
f"sections_checked={len(section_results)} sections_failed={len(failed_sections)}"
)
return 0
if __name__ == "__main__":
raise SystemExit(main())