#!/usr/bin/env python3 from __future__ import annotations import argparse import json import re import yaml from pathlib import Path ROOT = Path(__file__).resolve().parents[1] # List of allowed labels indicating origin ALLOWED_LABELS = [ "operational_live", "shadow_live", "replay", "estimated", "참고용", "백테스트", "미검증", "리플레이", "시뮬레이션", "라이브", "주의", "예상" ] def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--registry", default="spec/25_canonical_metrics_registry.yaml") ap.add_argument("--report", default="Temp/operational_report.json") args = ap.parse_args() registry_path = ROOT / args.registry report_path = ROOT / args.report if not registry_path.exists(): print(f"Registry file not found: {registry_path}") return 1 if not report_path.exists(): print(f"Report file not found: {report_path}") return 1 registry = yaml.safe_load(registry_path.read_text(encoding="utf-8")) report_data = json.loads(report_path.read_text(encoding="utf-8")) # Extract all markdown text to scan report_text = "" if isinstance(report_data, dict) and "sections" in report_data: report_text = "\n".join([str(s.get("markdown", "")) for s in report_data["sections"] if isinstance(s, dict)]) else: report_text = str(report_data) metrics = registry.get("metrics", {}) per_ticker_metrics = registry.get("per_ticker_metrics", {}) collisions = [] unlabeled_replay_metrics = [] # 1. Alias collision check: ensure no wrong aliases exist in the report markdown for m_id, item in metrics.items(): fallback_sources = item.get("fallback_sources", []) for fallback in fallback_sources: if fallback in report_text: collisions.append({ "metric_id": m_id, "found_fallback": fallback, "reason": "Report references a non-canonical/fallback metric source." }) for m_id, item in per_ticker_metrics.items(): wrong_alias = item.get("wrong_alias_in_renderer") or item.get("alias_in_renderer_wrong") if wrong_alias and wrong_alias in report_text: collisions.append({ "metric_id": m_id, "found_wrong_alias": wrong_alias, "reason": f"Report contains wrong alias for per_ticker_metric: {wrong_alias}" }) # 2. Replay labeling check: lines = report_text.splitlines() for lineno, line in enumerate(lines, start=1): # Trigger on keywords indicating simulation or replay if "리플레이" in line or "replay" in line or "estimated" in line or "시뮬레이션" in line: # Ensure the line has at least one allowed label/origin marker if not any(lbl.lower() in line.lower() for lbl in ALLOWED_LABELS): unlabeled_replay_metrics.append({ "line": lineno, "text": line.strip(), "reason": "Replay/estimated metric found without clear source/origin label." }) result = { "formula_id": "METRIC_ALIAS_COLLISION_AUDIT_V1", "metric_alias_collision_count": len(collisions), "unlabeled_replay_metric_count": len(unlabeled_replay_metrics), "collisions": collisions[:50], "unlabeled_metrics": unlabeled_replay_metrics[:50], "gate": "PASS" if (len(collisions) == 0 and len(unlabeled_replay_metrics) == 0) else "FAIL" } out_path = ROOT / "Temp" / "metric_alias_collision_audit_v1.json" out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") print(json.dumps(result, ensure_ascii=True, indent=2)) return 0 if result["gate"] == "PASS" else 1 if __name__ == "__main__": import sys sys.exit(main())