from __future__ import annotations import argparse import json import re import sys from pathlib import Path from typing import Any import yaml ROOT = Path(__file__).resolve().parents[1] def safe_print(message: str) -> None: try: print(message) except UnicodeEncodeError: fallback = message.encode("cp949", errors="backslashreplace").decode("cp949", errors="ignore") print(fallback) SECTION_ALIASES = { "routing_serving_trace": ["라우팅·서빙 추적", "라우팅·서빙", "routing_serving_trace", "라우팅/서빙"], "QEH_AUDIT_BLOCK": ["QEH_AUDIT_BLOCK", "하네스 공식 검산표"], "capture_read_ledger": ["capture_read_ledger", "캡처 판독 원장"], "data_completeness_matrix": ["data_completeness_matrix", "데이터 완성도 매트릭스"], "backdata_feature_bank_table": ["backdata_feature_bank_table", "백데이터 특성 원장", "백데이터", "GAS 자동 수집"], "benchmark_relative_harness_table": ["benchmark_relative_harness_table", "비교 기준·위성 품질 하네스", "위성 품질 하네스"], "index_relative_health_table": ["index_relative_health_table", "지수 상대 건강도 게이트"], "alpha_lead_table": ["alpha_lead_table", "선행 알파 표", "선행 알파"], "entry_freshness_gate_table": ["entry_freshness_gate_table", "진입 신선도 게이트"], "anti_distribution_table": ["anti_distribution_table", "분산 매도 위험 표", "분산위험", "설거지"], "profit_preservation_table": ["profit_preservation_table", "수익 보호 표", "수익 보호"], "sell_value_preservation_gate_table": ["sell_value_preservation_gate_table", "회복 보존 매도 게이트"], "smart_cash_raise_table": ["smart_cash_raise_table", "현금 확보 실행 표", "현금확보"], "execution_quality_table": ["execution_quality_table", "체결 품질 표", "체결 품질", "execution_quality"], "order_quantity_4stage_gate": ["order_quantity_4stage_gate", "주문 수량 4단계 게이트"], "decision_trace_table": ["decision_trace_table", "판단 추적표"], "sell_priority_decision_table": ["sell_priority_decision_table", "매도 우선순위 표"], "current_holdings_analysis_report_template": ["current_holdings_analysis_report_template", "보유 종목 분석"], "proposal_reference_sheet": ["proposal_reference_sheet", "사용자 판단용 제안표", "제안표"], "satellite_buy_proposal_sheet": ["satellite_buy_proposal_sheet", "위성 신규 매수 제안 원장"], "concise_hts_input_sheet": ["concise_hts_input_sheet", "간단 주문 입력표", "간단 HTS 입력표"], "reference_price_ledger": ["reference_price_ledger", "투명한 감시 원장", "감시 원장"], "core_satellite_timing_gate_table": ["core_satellite_timing_gate_table", "코어·위성 타이밍 게이트", "core_satellite", "T+1위험"], "engine_feedback_loop_report": ["engine_feedback_loop_report", "엔진 피드백 루프 보고", "평가", "개선제안"], "prediction_evaluation_improvement_report": ["prediction_evaluation_improvement_report", "예측 결과 평가·개선 하네스", "평가·개선", "하네스 갭"], "rule_lifecycle_governance_report": ["rule_lifecycle_governance_report", "규칙 강등·퇴역 거버넌스"], "trade_quality_report": ["trade_quality_report", "품질", "거래 품질", "거래 품질 채점 보고", "TRADE_QUALITY_SCORER_V1"], "pattern_blacklist_report": ["pattern_blacklist_report", "패턴", "패턴 블랙리스트", "반복 패턴 블랙리스트", "PATTERN_BLACKLIST_AUTO_V1"], "watch_release_checklist": ["watch_release_checklist", "WATCH 해제 조건 체크리스트"], "satellite_buy_proposal_sheet": ["satellite_buy_proposal_sheet", "위성 신규 매수 제안 원장"], "alpha_feedback_loop_report": ["alpha_feedback_loop_report", "알파 피드백 루프 보고"], "immediate_execution_playbook": ["immediate_execution_playbook", "즉시 실행 플레이북"], "market_context_learning_note": ["market_context_learning_note", "시장 맥락 학습 노트"], "t1_evaluation_summary_box": ["t1_evaluation_summary_box", "익일 평가 요약", "T+1 평가 요약"], # [PROPOSAL49] PA47/PA48 신규 섹션 "watch_breakout_gate": ["watch_breakout_gate", "급등 탐지", "WATCH_BREAKOUT_REALTIME_GATE_V1"], "anti_whipsaw_reentry_gate": ["anti_whipsaw_reentry_gate", "반등 재진입 감시", "ANTI_WHIPSAW_REENTRY_GATE_V1", "REENTRY_CANDIDATE"], # [PROPOSAL53] "fundamental_quality_gate_v1": ["fundamental_quality_gate_v1", "FUNDAMENTAL_QUALITY_GATE_V1", "펀더멘털 품질"], "horizon_allocation_lock_v1": ["horizon_allocation_lock_v1", "HORIZON_ALLOCATION_LOCK_V1", "투자기간 버킷"], "smart_money_liquidity_gate_v1": ["smart_money_liquidity_gate_v1", "SMART_MONEY_LIQUIDITY_GATE_V1", "스마트머니·유동성"], "routing_serving_trace_v2": ["routing_serving_trace_v2", "ROUTING_SERVING_DECISION_TRACE_V2", "Trace V2"], "fundamental_multifactor_v2": ["fundamental_multifactor_v2", "FUNDAMENTAL_MULTI_FACTOR_SCORE_V2"], "earnings_growth_quality_v1": ["earnings_growth_quality_v1", "EARNINGS_GROWTH_QUALITY_GATE_V1"], "market_share_proxy_v1": ["market_share_proxy_v1", "MARKET_SHARE_MOMENTUM_PROXY_V1"], "cashflow_stability_v1": ["cashflow_stability_v1", "CASHFLOW_STABILITY_GATE_V1"], "routing_decision_explain_v1": ["routing_decision_explain_v1", "ROUTING_DECISION_EXPLAIN_LOCK_V1"], } # [PROPOSAL51 RSO-V1] CORE 섹션이 appendix보다 앞에 옴 ORDER_TOKENS = [ # CORE sections (RSO-V1) "concise_hts_input_sheet", # CORE-2 "watch_breakout_gate", # CORE-3 "immediate_execution_playbook", # CORE-6 "market_context_learning_note", # CORE-7 # Appendix sections (부록 구분선 이후) "routing_serving_trace", "QEH_AUDIT_BLOCK", "backdata_feature_bank_table", "alpha_lead_table", "anti_distribution_table", "profit_preservation_table", "smart_cash_raise_table", "execution_quality_table", "decision_trace_table", "anti_whipsaw_reentry_gate", "proposal_reference_sheet", "satellite_buy_proposal_sheet", "core_satellite_timing_gate_table", "engine_feedback_loop_report", "prediction_evaluation_improvement_report", "rule_lifecycle_governance_report", ] FORBIDDEN_ENGLISH_TOKENS = [ " PASS ", " FAIL ", " BLOCKED ", " ACTIVE ", " INACTIVE ", " BUY ", " SELL ", " TRIM ", ] def load_yaml(path: Path) -> dict[str, Any]: with path.open("r", encoding="utf-8") as handle: value = yaml.safe_load(handle) return value or {} def first_index(text: str, section_name: str) -> int: for token in SECTION_ALIASES.get(section_name, [section_name]): index = text.find(token) if index >= 0: return index return -1 def required_sections() -> list[str]: spec = load_yaml(ROOT / "spec" / "07_output_schema.yaml") sections = ((spec.get("human_report") or {}).get("required_sections")) or [] return [item.get("name") for item in sections if isinstance(item, dict) and item.get("name")] def prohibited_headers() -> list[str]: spec = load_yaml(ROOT / "spec" / "07_output_schema.yaml") prose_control = (spec.get("output_format") or {}).get("prose_control") or {} return list(prose_control.get("prohibited_headers") or []) def watch_forbidden_columns() -> list[str]: spec = load_yaml(ROOT / "spec" / "07_output_schema.yaml") watch = (spec.get("human_report") or {}).get("watch_ledger") or {} return list(watch.get("forbidden_columns") or []) def find_watch_section(text: str) -> str: markers = ["## 투명한 감시 원장", "## reference_price_ledger", "투명한 감시 원장"] indexes = [text.find(marker) for marker in markers if text.find(marker) >= 0] if not indexes: return "" start = min(indexes) next_heading = re.search(r"\n#{1,6}\s+", text[start + 1 :]) if next_heading: return text[start : start + 1 + next_heading.start()] return text[start:] def find_section_by_heading(text: str, heading: str) -> str: idx = text.find(heading) if idx < 0: return "" tail = text[idx:] m = re.search(r"\n#{1,6}\s+", tail[1:]) return tail if not m else tail[: m.start() + 1] def load_report_text(path: Path) -> str: """Validate against rendered markdown when a structured report JSON is provided.""" raw = path.read_text(encoding="utf-8") try: payload = json.loads(raw) except Exception: return raw sections = payload.get("sections") if isinstance(payload, dict) else None if not isinstance(sections, list): return raw parts: list[str] = [] for section in sections: if not isinstance(section, dict): continue name = str(section.get("name") or "").strip() markdown = str(section.get("markdown") or "").rstrip() if name: parts.append(f"## {name}") if markdown: parts.append(markdown) return "\n".join(parts) def load_report_sections(path: Path) -> list[dict[str, Any]]: raw = path.read_text(encoding="utf-8") try: payload = json.loads(raw) except Exception: return [] sections = payload.get("sections") if isinstance(payload, dict) else None if isinstance(sections, list): return [s for s in sections if isinstance(s, dict)] return [] def validate_report(path: Path) -> list[str]: text = load_report_text(path) sections = load_report_sections(path) section_map = {str(s.get("name") or ""): str(s.get("markdown") or "") for s in sections} errors: list[str] = [] for section in required_sections(): if first_index(text, section) < 0: errors.append(f"missing required human_report section: {section}") for header in prohibited_headers(): if header and header in text: errors.append(f"prohibited prose header found: {header}") if sections: section_names = [str(sec.get("name") or "") for sec in sections] positions = [(token, section_names.index(token)) for token in ORDER_TOKENS if token in section_names] present_positions = positions else: positions = [(token, first_index(text, token)) for token in ORDER_TOKENS] present_positions = [(token, index) for token, index in positions if index >= 0] for (left_token, left_index), (right_token, right_index) in zip(present_positions, present_positions[1:]): if left_index > right_index: errors.append(f"section order violation: {left_token} appears after {right_token}") watch_section = section_map.get("reference_price_ledger") or section_map.get("watch_breakout_gate") or find_watch_section(text) if watch_section: for column in watch_forbidden_columns(): if column and column in watch_section: errors.append(f"WATCH ledger uses forbidden HTS column: {column}") if "참고익절상태(tp1/tp2)" not in watch_section: errors.append("WATCH ledger missing tp1/tp2 state column") if not re.search(r"tp1=.*tp2=", watch_section): errors.append("WATCH ledger missing tp1/tp2 state values") if "기준시점(종가/장중)" not in watch_section: errors.append("WATCH ledger missing price basis column") else: errors.append("WATCH ledger section missing") breakout_section = find_section_by_heading(text, "## 급등 탐지") if breakout_section: header_lines = [ln for ln in breakout_section.splitlines() if ln.strip().startswith("|")] header_text = "\n".join(header_lines[:2]) if header_lines else "" for column in watch_forbidden_columns(): if column and column in header_text: errors.append(f"WATCH breakout uses forbidden HTS column: {column}") if "기준시점(종가/장중)" not in text: errors.append("report missing explicit close/intraday basis label") # CHECK_73: LATE_CHASE_ATTRIBUTION_V1 WATCH_PENDING_SAMPLE 경고 상단 표시 if "WATCH_PENDING_SAMPLE" in text and "LATE_CHASE_ATTRIBUTION_V1" in text: if "샘플 부족 경고" not in text: errors.append("CHECK_73_LCA_PENDING_WARNING_MISSING: WATCH_PENDING_SAMPLE 경고 상단 표시 누락") if "HTS" in text and first_index(text, "execution_quality_table") < 0: errors.append("HTS order content exists without execution_quality_table") if re.search(r"\b(BUY|ADD_ON)\b", text) and first_index(text, "alpha_lead_table") < 0: errors.append("BUY/ADD_ON content exists without alpha_lead_table") if re.search(r"\b(SELL|TRIM)\b", text) and first_index(text, "smart_cash_raise_table") < 0: errors.append("SELL/TRIM content exists without smart_cash_raise_table") for line in text.splitlines(): stripped = line.strip() if not stripped: continue if stripped.startswith(("|", ">", "```")): continue padded = f" {stripped} " for token in FORBIDDEN_ENGLISH_TOKENS: if token in padded: errors.append(f"LANGUAGE_LOCALIZATION_FAIL: forbidden English status/action token found: {token.strip()}") break if "| 갭 경고 | 경고 |" in text: errors.append("PREDICTION_IMPROVEMENT_GAP_ALERT: gap matrix contains non-zero gap") # [PROPOSAL51] CHECK_51~58 — 보고서 품질 정합성 검증 # CHECK_51: 매도 가격 역전 패턴이 PASS 주문에 잔존하면 안 됨 # 레퍼런스 테이블(수식 커버리지 등)은 제외하고 매도 실행 섹션만 검사 _spsv2_section = section_map.get("smart_cash_raise_table") or "" if re.search(r"INVALID_PRICE_INVERSION|INVALID_TRAILING_STOP_BREACH", _spsv2_section): errors.append("CHECK_51_SPSV2: INVALID 매도 가격이 보고서에 노출됨 — SPSV2 차단 누락") # CHECK_52: portfolio_health_score가 Boolean으로 출력되면 안 됨 if re.search(r"portfolio_health_score\s*[=:]\s*(True|False|true|false)\b", text): errors.append("CHECK_52_HEALTH_TYPE: portfolio_health_score가 Boolean 값으로 출력됨 — 숫자여야 함") # CHECK_53: 반도체 클러스터 비중이 '-'로 출력되면 안 됨 if re.search(r"cluster_pct\s*=\s*-[%]?[),]", text): errors.append("CHECK_53_CLUSTER_PCT: cluster_pct=- 출력됨 — GAS 반환키 불일치(current_cluster_pct→cluster_pct)") # CHECK_54: SCRS-V2 즉시매도 수량 칸에 '-'가 출력되면 안 됨 scrs_section = section_map.get("smart_cash_raise_table") or "" selected_combo_section = scrs_section if "selected_combo" in scrs_section else "" if selected_combo_section and re.search(r"(?m)^\|\s*[^|]+\s*\|\s*-\s*\|", selected_combo_section): errors.append("CHECK_54_SCRS_RENDER: SCRS-V2 immediate_sell_qty='-' 출력됨 — 렌더링 키 불일치") # CHECK_55: Export Gate 결과가 보고서에 표시되어야 함 if not re.search(r"EXPORT_READY|PENDING_EXPORT|REVIEW_ONLY", text): errors.append("CHECK_55_EXPORT_GATE: Export Gate 결과(EXPORT_READY/PENDING_EXPORT/REVIEW_ONLY)가 보고서에 없음") # CHECK_56: M5 V1.1 섹션이 보고서에 있어야 함 if "M5 V1.1" not in text and "mandatory_reduction" not in text.lower(): errors.append("CHECK_56_MANDATORY_REDUCTION: M5 V1.1 강제감축 섹션이 보고서에 없음") # CHECK_57: REVIEW_ONLY/PENDING_EXPORT 시 원인이 명시되어야 함 if re.search(r"REVIEW_ONLY|PENDING_EXPORT", text): if not re.search(r"CHECK_\d+_\w+|resolution_guide|원인", text): errors.append("CHECK_57_RESOLUTION_MISSING: REVIEW_ONLY/PENDING_EXPORT 원인 미명시") # CHECK_72: failed_checks 표의 해결 안내 공란 금지 if "#### failed_checks" in text: for line in text.splitlines(): if line.strip().startswith("|") and "해결 안내" not in line and "---" not in line: cols = [c.strip() for c in line.strip().strip("|").split("|")] if len(cols) >= 3: guide = cols[2] if guide in ("", "-", "N/A", "n/a", "없음"): errors.append("CHECK_72_EXPORT_RESOLUTION_GUIDE_EMPTY: failed_checks 해결 안내 공란") break # CHECK_58: 가격 계층 표시 (지정가/손절가 동시 존재 시 순서 확인) if "지정가" in text and "손절가" in text: limit_idx = text.find("지정가") stop_idx = text.find("손절가") if limit_idx > 0 and stop_idx > 0 and abs(limit_idx - stop_idx) < 500: pass # 가격 계층 섹션 존재 — OK # CHECK_59: CORE-0 집행 안전 선언 섹션이 보고서 상단에 있어야 함 (RSO-V1) if "집행 안전 선언" not in text and "CORE-0" not in text: errors.append("CHECK_59_CORE0_MISSING: [CORE-0] 집행 안전 선언 섹션이 보고서에 없음 — RSO-V1 미적용") # CHECK_60: 현금회복 실행 계획에 "주문 아님" 레이블이 있어야 함 (CRDL-V1) if re.search(r"현금회복|현금확보", text): if "주문 아님" not in text and "참고용" not in text: errors.append("CHECK_60_CRDL_REFERENCE_LABEL: 현금회복 섹션에 '주문 아님' 참고 레이블 없음 — CRDL-V1 미적용") # CHECK_61: DQG-V2 완성도 등급이 보고서에 있어야 함 if not re.search(r"COMPLETE|PARTIAL|INSUFFICIENT", text): errors.append("CHECK_61_DQG_V2_GRADE_MISSING: 데이터 완성도 등급(COMPLETE/PARTIAL/INSUFFICIENT)이 보고서에 없음") # CHECK_62: portfolio_health_score가 숫자로 표시되어야 함 (Boolean/None 금지) if re.search(r"portfolio_health_score.*False|portfolio_health_score.*None|포트폴리오 건강도.*False", text, re.IGNORECASE): errors.append("CHECK_62_HEALTH_SCORE_BOOLEAN: portfolio_health_score가 Boolean/None으로 표시됨 — GAS 타입 버그") # CHECK_63~66: Proposal53 신규 4개 하네스 섹션 강제 노출 if first_index(text, "fundamental_quality_gate_v1") < 0: errors.append("CHECK_63_FQ_SECTION_MISSING: FUNDAMENTAL_QUALITY_GATE_V1 섹션 누락") if first_index(text, "horizon_allocation_lock_v1") < 0: errors.append("CHECK_64_HA_SECTION_MISSING: HORIZON_ALLOCATION_LOCK_V1 섹션 누락") if first_index(text, "smart_money_liquidity_gate_v1") < 0: errors.append("CHECK_65_SML_SECTION_MISSING: SMART_MONEY_LIQUIDITY_GATE_V1 섹션 누락") if first_index(text, "routing_serving_trace_v2") < 0: errors.append("CHECK_66_TRACEV2_SECTION_MISSING: ROUTING_SERVING_DECISION_TRACE_V2 섹션 누락") if first_index(text, "fundamental_multifactor_v2") < 0: errors.append("CHECK_67_FMV2_SECTION_MISSING: FUNDAMENTAL_MULTI_FACTOR_SCORE_V2 섹션 누락") if first_index(text, "earnings_growth_quality_v1") < 0: errors.append("CHECK_68_EGQ_SECTION_MISSING: EARNINGS_GROWTH_QUALITY_GATE_V1 섹션 누락") if first_index(text, "market_share_proxy_v1") < 0: errors.append("CHECK_69_MSP_SECTION_MISSING: MARKET_SHARE_MOMENTUM_PROXY_V1 섹션 누락") if first_index(text, "cashflow_stability_v1") < 0: errors.append("CHECK_70_CFS_SECTION_MISSING: CASHFLOW_STABILITY_GATE_V1 섹션 누락") if first_index(text, "routing_decision_explain_v1") < 0: errors.append("CHECK_71_RDE_SECTION_MISSING: ROUTING_DECISION_EXPLAIN_LOCK_V1 섹션 누락") return errors def main() -> int: parser = argparse.ArgumentParser(description="Validate human investment report rendering quality.") parser.add_argument("report_path", help="Markdown or text report path to validate.") args = parser.parse_args() path = Path(args.report_path) if not path.is_absolute(): path = ROOT / path if not path.exists(): print(f"FAIL: report not found: {path}", file=sys.stderr) return 2 errors = validate_report(path) if errors: safe_print("FAIL: report quality validation failed") for error in errors: safe_print(f"- {error}") return 1 safe_print("PASS: report quality validation") return 0 if __name__ == "__main__": raise SystemExit(main())