캘리브레이션 거버넌스 도구 + WBS-7.1/7.2 실증 격차 가시화

캘리브레이션 백로그 → 우선순위 → 검토리포트 → 승인목록 → 결정초안으로
이어지는 임계값 보정 거버넌스 파이프라인을 추가하고, 2026-06-21
비판적 리뷰에서 발견한 두 가지 stale-수치 문제를 도구 차원에서 해소한다.

- registry_health(): 190여 개 임계값의 source별(SPEC_DERIVED/EXPERT_PRIOR/
  PROVISIONAL/CALIBRATED) 분포를 매 실행마다 자동 집계 — 수동 grep 불필요
- live_t5_status(): T+5 적중률을 하드코딩(35.86 리터럴) 대신
  Temp/prediction_accuracy_harness_v2.json에서 항상 최신값으로 읽음
- spec/calibration_registry.yaml: SEMI_CLUSTER_CAP_RISK_OFF 중복 id로
  인한 조용한 무시 버그 수정(SEMI_CLUSTER_CAP_RISK_OFF_MWA로 분리)
- spec/27_bch_calibration_runbook.yaml: current_status_2026_06_21 블록
  신설(단일 진실원천), 기존 05-30 스냅샷은 "역사적, 현재로 인용 금지"로 명시
This commit is contained in:
2026-06-21 20:07:32 +09:00
parent f99f9821d2
commit ee4d1fdab8
8 changed files with 855 additions and 42 deletions
+107
View File
@@ -0,0 +1,107 @@
from __future__ import annotations
import json
import subprocess
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[2]
def _run(script: str) -> None:
subprocess.run(
[sys.executable, script],
cwd=ROOT,
check=True,
capture_output=True,
text=True,
encoding="utf-8",
)
def test_build_calibration_priority_and_change_ledger(tmp_path):
_run("tools/build_calibration_priority_v1.py")
_run("tools/build_calibration_change_ledger_v4.py")
_run("tools/validate_calibration_change_ledger_v1.py")
priority_path = ROOT / "Temp" / "calibration_priority_v1.json"
ledger_path = ROOT / "Temp" / "calibration_change_ledger_v4.json"
priority = json.loads(priority_path.read_text(encoding="utf-8"))
ledger = json.loads(ledger_path.read_text(encoding="utf-8"))
assert priority["status"] == "CALIBRATION_PRIORITY_OK"
assert priority["priority_count"] >= 5
assert priority["priority_list"]
assert priority["priority_basis"] in {"alpha_feedback_loop_v2", "registry_warning_fallback"}
assert ledger["formula_id"] == "CALIBRATION_CHANGE_LEDGER_V4"
assert ledger["threshold_change_without_ledger_count"] == 0
assert len(ledger["changes"]) >= 5
def test_calibration_backlog_workflow_and_script_exist():
workflow = ROOT / ".gitea" / "workflows" / "calibration_backlog.yml"
package = json.loads((ROOT / "package.json").read_text(encoding="utf-8"))
assert workflow.exists()
assert "ops:calibration-backlog" in package["scripts"]
assert "ops:calibration-review-report" in package["scripts"]
assert "ops:calibration-approval-list" in package["scripts"]
assert "ops:calibration-decision-draft" in package["scripts"]
def test_build_calibration_review_report(tmp_path):
_run("tools/build_calibration_priority_v1.py")
_run("tools/build_calibration_change_ledger_v4.py")
_run("tools/build_calibration_review_report_v1.py")
report_json = ROOT / "Temp" / "calibration_review_report_v1.json"
report_md = ROOT / "Temp" / "calibration_review_report_v1.md"
payload = json.loads(report_json.read_text(encoding="utf-8"))
text = report_md.read_text(encoding="utf-8")
assert payload["formula_id"] == "CALIBRATION_REVIEW_REPORT_V1"
assert payload["summary"]["total_thresholds"] >= 1
assert payload["top_priority_rows"]
assert "Calibration Review Report" in text
assert "Review Candidates" in text
def test_build_calibration_approval_list(tmp_path):
_run("tools/build_calibration_priority_v1.py")
_run("tools/build_calibration_change_ledger_v4.py")
_run("tools/build_calibration_review_report_v1.py")
_run("tools/build_calibration_approval_list_v1.py")
approval_json = ROOT / "Temp" / "calibration_approval_list_v1.json"
approval_md = ROOT / "Temp" / "calibration_approval_list_v1.md"
payload = json.loads(approval_json.read_text(encoding="utf-8"))
text = approval_md.read_text(encoding="utf-8")
assert payload["formula_id"] == "CALIBRATION_APPROVAL_LIST_V1"
assert payload["approval_candidate_count"] >= 1
assert payload["approval_candidates"]
assert "Calibration Approval List" in text
assert "Approval Candidates" in text
def test_build_calibration_decision_draft(tmp_path):
_run("tools/build_calibration_priority_v1.py")
_run("tools/build_calibration_change_ledger_v4.py")
_run("tools/build_calibration_review_report_v1.py")
_run("tools/build_calibration_approval_list_v1.py")
_run("tools/build_calibration_decision_draft_v1.py")
decision_json = ROOT / "Temp" / "calibration_decision_draft_v1.json"
decision_md = ROOT / "Temp" / "calibration_decision_draft_v1.md"
payload = json.loads(decision_json.read_text(encoding="utf-8"))
text = decision_md.read_text(encoding="utf-8")
assert payload["formula_id"] == "CALIBRATION_DECISION_DRAFT_V1"
assert payload["decision_count"] >= 1
assert payload["summary"]["APPROVE"] >= 1
assert payload["summary"]["HOLD"] >= 1
assert payload["summary"]["REJECT"] >= 0
assert "Calibration Decision Draft" in text
assert "Decision Table" in text