Files
QuantEngineByItz/tools/validate_report_quality.py
kjh2064 ee3e799de1 feat: 리밸런싱 엔진 V1 + GAS 버그 수정 (2026-06-13)
주요 변경:
- tools/build_rebalance_engine_v1.py: REBALANCE_ENGINE_V1 신규
  * account_snapshot 직접 합산(_build_snap_position_map) → 소수주 분리 행 병합
  * 레짐 소스 macro.REGIME_PRELIM 최우선 (GAS 와 동일)
- src/gas_adapter_parts/gdf_06_rebalance.gs: runRebalanceSheet_() 신규
  * Logger.log / getSpreadsheet_() 로 run_all 연동 수정
- src/gas_adapter_parts/gdc_01_fetch_fundamentals.gs
  * _mergePositionRecord_(): 소수주 중복 행 합산 신규
  * parseInt → parseFloat (qty, availQty)
- src/gas_adapter_parts/gdf_01_price_metrics.gs
  * 미보유 종목 SELL_READY → WATCH_EXIT_SIGNAL
- spec/41_release_dag.yaml: build_rebalance_sheet 노드 추가 (step_count 63)
- spec/51_formula_lifecycle_registry.yaml: REBALANCE_ENGINE_V1 등록

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-13 13:20:14 +09:00

393 lines
20 KiB
Python

from __future__ import annotations
import argparse
import json
import re
import sys
from pathlib import Path
from typing import Any
import yaml
ROOT = Path(__file__).resolve().parents[1]
def safe_print(message: str) -> None:
try:
print(message)
except UnicodeEncodeError:
fallback = message.encode("cp949", errors="backslashreplace").decode("cp949", errors="ignore")
print(fallback)
SECTION_ALIASES = {
"routing_serving_trace": ["라우팅·서빙 추적", "라우팅·서빙", "routing_serving_trace", "라우팅/서빙"],
"QEH_AUDIT_BLOCK": ["QEH_AUDIT_BLOCK", "하네스 공식 검산표"],
"capture_read_ledger": ["capture_read_ledger", "캡처 판독 원장"],
"data_completeness_matrix": ["data_completeness_matrix", "데이터 완성도 매트릭스"],
"backdata_feature_bank_table": ["backdata_feature_bank_table", "백데이터 특성 원장", "백데이터", "GAS 자동 수집"],
"benchmark_relative_harness_table": ["benchmark_relative_harness_table", "비교 기준·위성 품질 하네스", "위성 품질 하네스"],
"index_relative_health_table": ["index_relative_health_table", "지수 상대 건강도 게이트"],
"alpha_lead_table": ["alpha_lead_table", "선행 알파 표", "선행 알파"],
"entry_freshness_gate_table": ["entry_freshness_gate_table", "진입 신선도 게이트"],
"anti_distribution_table": ["anti_distribution_table", "분산 매도 위험 표", "분산위험", "설거지"],
"profit_preservation_table": ["profit_preservation_table", "수익 보호 표", "수익 보호"],
"sell_value_preservation_gate_table": ["sell_value_preservation_gate_table", "회복 보존 매도 게이트"],
"smart_cash_raise_table": ["smart_cash_raise_table", "현금 확보 실행 표", "현금확보"],
"execution_quality_table": ["execution_quality_table", "체결 품질 표", "체결 품질", "execution_quality"],
"order_quantity_4stage_gate": ["order_quantity_4stage_gate", "주문 수량 4단계 게이트"],
"decision_trace_table": ["decision_trace_table", "판단 추적표"],
"sell_priority_decision_table": ["sell_priority_decision_table", "매도 우선순위 표"],
"current_holdings_analysis_report_template": ["current_holdings_analysis_report_template", "보유 종목 분석"],
"proposal_reference_sheet": ["proposal_reference_sheet", "사용자 판단용 제안표", "제안표"],
"satellite_buy_proposal_sheet": ["satellite_buy_proposal_sheet", "위성 신규 매수 제안 원장"],
"concise_hts_input_sheet": ["concise_hts_input_sheet", "간단 주문 입력표", "간단 HTS 입력표"],
"reference_price_ledger": ["reference_price_ledger", "투명한 감시 원장", "감시 원장"],
"core_satellite_timing_gate_table": ["core_satellite_timing_gate_table", "코어·위성 타이밍 게이트", "core_satellite", "T+1위험"],
"engine_feedback_loop_report": ["engine_feedback_loop_report", "엔진 피드백 루프 보고", "평가", "개선제안"],
"prediction_evaluation_improvement_report": ["prediction_evaluation_improvement_report", "예측 결과 평가·개선 하네스", "평가·개선", "하네스 갭"],
"rule_lifecycle_governance_report": ["rule_lifecycle_governance_report", "규칙 강등·퇴역 거버넌스"],
"trade_quality_report": ["trade_quality_report", "품질", "거래 품질", "거래 품질 채점 보고", "TRADE_QUALITY_SCORER_V1"],
"pattern_blacklist_report": ["pattern_blacklist_report", "패턴", "패턴 블랙리스트", "반복 패턴 블랙리스트", "PATTERN_BLACKLIST_AUTO_V1"],
"watch_release_checklist": ["watch_release_checklist", "WATCH 해제 조건 체크리스트"],
"satellite_buy_proposal_sheet": ["satellite_buy_proposal_sheet", "위성 신규 매수 제안 원장"],
"alpha_feedback_loop_report": ["alpha_feedback_loop_report", "알파 피드백 루프 보고"],
"immediate_execution_playbook": ["immediate_execution_playbook", "즉시 실행 플레이북"],
"market_context_learning_note": ["market_context_learning_note", "시장 맥락 학습 노트"],
"t1_evaluation_summary_box": ["t1_evaluation_summary_box", "익일 평가 요약", "T+1 평가 요약"],
# [PROPOSAL49] PA47/PA48 신규 섹션
"watch_breakout_gate": ["watch_breakout_gate", "급등 탐지", "WATCH_BREAKOUT_REALTIME_GATE_V1"],
"anti_whipsaw_reentry_gate": ["anti_whipsaw_reentry_gate", "반등 재진입 감시", "ANTI_WHIPSAW_REENTRY_GATE_V1", "REENTRY_CANDIDATE"],
# [PROPOSAL53]
"fundamental_quality_gate_v1": ["fundamental_quality_gate_v1", "FUNDAMENTAL_QUALITY_GATE_V1", "펀더멘털 품질"],
"horizon_allocation_lock_v1": ["horizon_allocation_lock_v1", "HORIZON_ALLOCATION_LOCK_V1", "투자기간 버킷"],
"smart_money_liquidity_gate_v1": ["smart_money_liquidity_gate_v1", "SMART_MONEY_LIQUIDITY_GATE_V1", "스마트머니·유동성"],
"routing_serving_trace_v2": ["routing_serving_trace_v2", "ROUTING_SERVING_DECISION_TRACE_V2", "Trace V2"],
"fundamental_multifactor_v2": ["fundamental_multifactor_v2", "FUNDAMENTAL_MULTI_FACTOR_SCORE_V2"],
"earnings_growth_quality_v1": ["earnings_growth_quality_v1", "EARNINGS_GROWTH_QUALITY_GATE_V1"],
"market_share_proxy_v1": ["market_share_proxy_v1", "MARKET_SHARE_MOMENTUM_PROXY_V1"],
"cashflow_stability_v1": ["cashflow_stability_v1", "CASHFLOW_STABILITY_GATE_V1"],
"routing_decision_explain_v1": ["routing_decision_explain_v1", "ROUTING_DECISION_EXPLAIN_LOCK_V1"],
}
# [PROPOSAL51 RSO-V1] CORE 섹션이 appendix보다 앞에 옴
ORDER_TOKENS = [
# CORE sections (RSO-V1)
"concise_hts_input_sheet", # CORE-2
"watch_breakout_gate", # CORE-3
"immediate_execution_playbook", # CORE-6
"market_context_learning_note", # CORE-7
# Appendix sections (부록 구분선 이후)
"routing_serving_trace",
"QEH_AUDIT_BLOCK",
"backdata_feature_bank_table",
"alpha_lead_table",
"anti_distribution_table",
"profit_preservation_table",
"smart_cash_raise_table",
"execution_quality_table",
"decision_trace_table",
"anti_whipsaw_reentry_gate",
"proposal_reference_sheet",
"satellite_buy_proposal_sheet",
"core_satellite_timing_gate_table",
"engine_feedback_loop_report",
"prediction_evaluation_improvement_report",
"rule_lifecycle_governance_report",
]
FORBIDDEN_ENGLISH_TOKENS = [
" PASS ",
" FAIL ",
" BLOCKED ",
" ACTIVE ",
" INACTIVE ",
" BUY ",
" SELL ",
" TRIM ",
]
def load_yaml(path: Path) -> dict[str, Any]:
with path.open("r", encoding="utf-8") as handle:
value = yaml.safe_load(handle)
return value or {}
def first_index(text: str, section_name: str) -> int:
for token in SECTION_ALIASES.get(section_name, [section_name]):
index = text.find(token)
if index >= 0:
return index
return -1
def required_sections() -> list[str]:
spec = load_yaml(ROOT / "spec" / "07_output_schema.yaml")
sections = ((spec.get("human_report") or {}).get("required_sections")) or []
return [item.get("name") for item in sections if isinstance(item, dict) and item.get("name")]
def prohibited_headers() -> list[str]:
spec = load_yaml(ROOT / "spec" / "07_output_schema.yaml")
prose_control = (spec.get("output_format") or {}).get("prose_control") or {}
return list(prose_control.get("prohibited_headers") or [])
def watch_forbidden_columns() -> list[str]:
spec = load_yaml(ROOT / "spec" / "07_output_schema.yaml")
watch = (spec.get("human_report") or {}).get("watch_ledger") or {}
return list(watch.get("forbidden_columns") or [])
def find_watch_section(text: str) -> str:
markers = ["## 투명한 감시 원장", "## reference_price_ledger", "투명한 감시 원장"]
indexes = [text.find(marker) for marker in markers if text.find(marker) >= 0]
if not indexes:
return ""
start = min(indexes)
next_heading = re.search(r"\n#{1,6}\s+", text[start + 1 :])
if next_heading:
return text[start : start + 1 + next_heading.start()]
return text[start:]
def find_section_by_heading(text: str, heading: str) -> str:
idx = text.find(heading)
if idx < 0:
return ""
tail = text[idx:]
m = re.search(r"\n#{1,6}\s+", tail[1:])
return tail if not m else tail[: m.start() + 1]
def load_report_text(path: Path) -> str:
"""Validate against rendered markdown when a structured report JSON is provided."""
raw = path.read_text(encoding="utf-8")
try:
payload = json.loads(raw)
except Exception:
return raw
sections = payload.get("sections") if isinstance(payload, dict) else None
if not isinstance(sections, list):
return raw
parts: list[str] = []
for section in sections:
if not isinstance(section, dict):
continue
name = str(section.get("name") or "").strip()
markdown = str(section.get("markdown") or "").rstrip()
if name:
parts.append(f"## {name}")
if markdown:
parts.append(markdown)
return "\n".join(parts)
def load_report_sections(path: Path) -> list[dict[str, Any]]:
raw = path.read_text(encoding="utf-8")
try:
payload = json.loads(raw)
except Exception:
return []
sections = payload.get("sections") if isinstance(payload, dict) else None
if isinstance(sections, list):
return [s for s in sections if isinstance(s, dict)]
return []
def validate_report(path: Path) -> list[str]:
text = load_report_text(path)
sections = load_report_sections(path)
section_map = {str(s.get("name") or ""): str(s.get("markdown") or "") for s in sections}
errors: list[str] = []
for section in required_sections():
if first_index(text, section) < 0:
errors.append(f"missing required human_report section: {section}")
for header in prohibited_headers():
if header and header in text:
errors.append(f"prohibited prose header found: {header}")
if sections:
section_names = [str(sec.get("name") or "") for sec in sections]
positions = [(token, section_names.index(token)) for token in ORDER_TOKENS if token in section_names]
present_positions = positions
else:
positions = [(token, first_index(text, token)) for token in ORDER_TOKENS]
present_positions = [(token, index) for token, index in positions if index >= 0]
for (left_token, left_index), (right_token, right_index) in zip(present_positions, present_positions[1:]):
if left_index > right_index:
errors.append(f"section order violation: {left_token} appears after {right_token}")
watch_section = section_map.get("reference_price_ledger") or section_map.get("watch_breakout_gate") or find_watch_section(text)
if watch_section:
for column in watch_forbidden_columns():
if column and column in watch_section:
errors.append(f"WATCH ledger uses forbidden HTS column: {column}")
if "참고익절상태(tp1/tp2)" not in watch_section:
errors.append("WATCH ledger missing tp1/tp2 state column")
if not re.search(r"tp1=.*tp2=", watch_section):
errors.append("WATCH ledger missing tp1/tp2 state values")
if "기준시점(종가/장중)" not in watch_section:
errors.append("WATCH ledger missing price basis column")
else:
errors.append("WATCH ledger section missing")
breakout_section = find_section_by_heading(text, "## 급등 탐지")
if breakout_section:
header_lines = [ln for ln in breakout_section.splitlines() if ln.strip().startswith("|")]
header_text = "\n".join(header_lines[:2]) if header_lines else ""
for column in watch_forbidden_columns():
if column and column in header_text:
errors.append(f"WATCH breakout uses forbidden HTS column: {column}")
if "기준시점(종가/장중)" not in text:
errors.append("report missing explicit close/intraday basis label")
# CHECK_73: LATE_CHASE_ATTRIBUTION_V1 WATCH_PENDING_SAMPLE 경고 상단 표시
if "WATCH_PENDING_SAMPLE" in text and "LATE_CHASE_ATTRIBUTION_V1" in text:
if "샘플 부족 경고" not in text:
errors.append("CHECK_73_LCA_PENDING_WARNING_MISSING: WATCH_PENDING_SAMPLE 경고 상단 표시 누락")
if "HTS" in text and first_index(text, "execution_quality_table") < 0:
errors.append("HTS order content exists without execution_quality_table")
if re.search(r"\b(BUY|ADD_ON)\b", text) and first_index(text, "alpha_lead_table") < 0:
errors.append("BUY/ADD_ON content exists without alpha_lead_table")
if re.search(r"\b(SELL|TRIM)\b", text) and first_index(text, "smart_cash_raise_table") < 0:
errors.append("SELL/TRIM content exists without smart_cash_raise_table")
for line in text.splitlines():
stripped = line.strip()
if not stripped:
continue
if stripped.startswith(("|", ">", "```")):
continue
padded = f" {stripped} "
for token in FORBIDDEN_ENGLISH_TOKENS:
if token in padded:
errors.append(f"LANGUAGE_LOCALIZATION_FAIL: forbidden English status/action token found: {token.strip()}")
break
if "| 갭 경고 | 경고 |" in text:
errors.append("PREDICTION_IMPROVEMENT_GAP_ALERT: gap matrix contains non-zero gap")
# [PROPOSAL51] CHECK_51~58 — 보고서 품질 정합성 검증
# CHECK_51: 매도 가격 역전 패턴이 PASS 주문에 잔존하면 안 됨
# 레퍼런스 테이블(수식 커버리지 등)은 제외하고 매도 실행 섹션만 검사
_spsv2_section = section_map.get("smart_cash_raise_table") or ""
if re.search(r"INVALID_PRICE_INVERSION|INVALID_TRAILING_STOP_BREACH", _spsv2_section):
errors.append("CHECK_51_SPSV2: INVALID 매도 가격이 보고서에 노출됨 — SPSV2 차단 누락")
# CHECK_52: portfolio_health_score가 Boolean으로 출력되면 안 됨
if re.search(r"portfolio_health_score\s*[=:]\s*(True|False|true|false)\b", text):
errors.append("CHECK_52_HEALTH_TYPE: portfolio_health_score가 Boolean 값으로 출력됨 — 숫자여야 함")
# CHECK_53: 반도체 클러스터 비중이 '-'로 출력되면 안 됨
if re.search(r"cluster_pct\s*=\s*-[%]?[),]", text):
errors.append("CHECK_53_CLUSTER_PCT: cluster_pct=- 출력됨 — GAS 반환키 불일치(current_cluster_pct→cluster_pct)")
# CHECK_54: SCRS-V2 즉시매도 수량 칸에 '-'가 출력되면 안 됨
scrs_section = section_map.get("smart_cash_raise_table") or ""
selected_combo_section = scrs_section if "selected_combo" in scrs_section else ""
if selected_combo_section and re.search(r"(?m)^\|\s*[^|]+\s*\|\s*-\s*\|", selected_combo_section):
errors.append("CHECK_54_SCRS_RENDER: SCRS-V2 immediate_sell_qty='-' 출력됨 — 렌더링 키 불일치")
# CHECK_55: Export Gate 결과가 보고서에 표시되어야 함
if not re.search(r"EXPORT_READY|PENDING_EXPORT|REVIEW_ONLY", text):
errors.append("CHECK_55_EXPORT_GATE: Export Gate 결과(EXPORT_READY/PENDING_EXPORT/REVIEW_ONLY)가 보고서에 없음")
# CHECK_56: M5 V1.1 섹션이 보고서에 있어야 함
if "M5 V1.1" not in text and "mandatory_reduction" not in text.lower():
errors.append("CHECK_56_MANDATORY_REDUCTION: M5 V1.1 강제감축 섹션이 보고서에 없음")
# CHECK_57: REVIEW_ONLY/PENDING_EXPORT 시 원인이 명시되어야 함
if re.search(r"REVIEW_ONLY|PENDING_EXPORT", text):
if not re.search(r"CHECK_\d+_\w+|resolution_guide|원인", text):
errors.append("CHECK_57_RESOLUTION_MISSING: REVIEW_ONLY/PENDING_EXPORT 원인 미명시")
# CHECK_72: failed_checks 표의 해결 안내 공란 금지
if "#### failed_checks" in text:
for line in text.splitlines():
if line.strip().startswith("|") and "해결 안내" not in line and "---" not in line:
cols = [c.strip() for c in line.strip().strip("|").split("|")]
if len(cols) >= 3:
guide = cols[2]
if guide in ("", "-", "N/A", "n/a", "없음"):
errors.append("CHECK_72_EXPORT_RESOLUTION_GUIDE_EMPTY: failed_checks 해결 안내 공란")
break
# CHECK_58: 가격 계층 표시 (지정가/손절가 동시 존재 시 순서 확인)
if "지정가" in text and "손절가" in text:
limit_idx = text.find("지정가")
stop_idx = text.find("손절가")
if limit_idx > 0 and stop_idx > 0 and abs(limit_idx - stop_idx) < 500:
pass # 가격 계층 섹션 존재 — OK
# CHECK_59: CORE-0 집행 안전 선언 섹션이 보고서 상단에 있어야 함 (RSO-V1)
if "집행 안전 선언" not in text and "CORE-0" not in text:
errors.append("CHECK_59_CORE0_MISSING: [CORE-0] 집행 안전 선언 섹션이 보고서에 없음 — RSO-V1 미적용")
# CHECK_60: 현금회복 실행 계획에 "주문 아님" 레이블이 있어야 함 (CRDL-V1)
if re.search(r"현금회복|현금확보", text):
if "주문 아님" not in text and "참고용" not in text:
errors.append("CHECK_60_CRDL_REFERENCE_LABEL: 현금회복 섹션에 '주문 아님' 참고 레이블 없음 — CRDL-V1 미적용")
# CHECK_61: DQG-V2 완성도 등급이 보고서에 있어야 함
if not re.search(r"COMPLETE|PARTIAL|INSUFFICIENT", text):
errors.append("CHECK_61_DQG_V2_GRADE_MISSING: 데이터 완성도 등급(COMPLETE/PARTIAL/INSUFFICIENT)이 보고서에 없음")
# CHECK_62: portfolio_health_score가 숫자로 표시되어야 함 (Boolean/None 금지)
if re.search(r"portfolio_health_score.*False|portfolio_health_score.*None|포트폴리오 건강도.*False", text, re.IGNORECASE):
errors.append("CHECK_62_HEALTH_SCORE_BOOLEAN: portfolio_health_score가 Boolean/None으로 표시됨 — GAS 타입 버그")
# CHECK_63~66: Proposal53 신규 4개 하네스 섹션 강제 노출
if first_index(text, "fundamental_quality_gate_v1") < 0:
errors.append("CHECK_63_FQ_SECTION_MISSING: FUNDAMENTAL_QUALITY_GATE_V1 섹션 누락")
if first_index(text, "horizon_allocation_lock_v1") < 0:
errors.append("CHECK_64_HA_SECTION_MISSING: HORIZON_ALLOCATION_LOCK_V1 섹션 누락")
if first_index(text, "smart_money_liquidity_gate_v1") < 0:
errors.append("CHECK_65_SML_SECTION_MISSING: SMART_MONEY_LIQUIDITY_GATE_V1 섹션 누락")
if first_index(text, "routing_serving_trace_v2") < 0:
errors.append("CHECK_66_TRACEV2_SECTION_MISSING: ROUTING_SERVING_DECISION_TRACE_V2 섹션 누락")
if first_index(text, "fundamental_multifactor_v2") < 0:
errors.append("CHECK_67_FMV2_SECTION_MISSING: FUNDAMENTAL_MULTI_FACTOR_SCORE_V2 섹션 누락")
if first_index(text, "earnings_growth_quality_v1") < 0:
errors.append("CHECK_68_EGQ_SECTION_MISSING: EARNINGS_GROWTH_QUALITY_GATE_V1 섹션 누락")
if first_index(text, "market_share_proxy_v1") < 0:
errors.append("CHECK_69_MSP_SECTION_MISSING: MARKET_SHARE_MOMENTUM_PROXY_V1 섹션 누락")
if first_index(text, "cashflow_stability_v1") < 0:
errors.append("CHECK_70_CFS_SECTION_MISSING: CASHFLOW_STABILITY_GATE_V1 섹션 누락")
if first_index(text, "routing_decision_explain_v1") < 0:
errors.append("CHECK_71_RDE_SECTION_MISSING: ROUTING_DECISION_EXPLAIN_LOCK_V1 섹션 누락")
return errors
def main() -> int:
parser = argparse.ArgumentParser(description="Validate human investment report rendering quality.")
parser.add_argument("report_path", help="Markdown or text report path to validate.")
args = parser.parse_args()
path = Path(args.report_path)
if not path.is_absolute():
path = ROOT / path
if not path.exists():
print(f"FAIL: report not found: {path}", file=sys.stderr)
return 2
errors = validate_report(path)
if errors:
safe_print("FAIL: report quality validation failed")
for error in errors:
safe_print(f"- {error}")
return 1
safe_print("PASS: report quality validation")
return 0
if __name__ == "__main__":
raise SystemExit(main())