캘리브레이션 거버넌스 도구 + WBS-7.1/7.2 실증 격차 가시화
캘리브레이션 백로그 → 우선순위 → 검토리포트 → 승인목록 → 결정초안으로 이어지는 임계값 보정 거버넌스 파이프라인을 추가하고, 2026-06-21 비판적 리뷰에서 발견한 두 가지 stale-수치 문제를 도구 차원에서 해소한다. - registry_health(): 190여 개 임계값의 source별(SPEC_DERIVED/EXPERT_PRIOR/ PROVISIONAL/CALIBRATED) 분포를 매 실행마다 자동 집계 — 수동 grep 불필요 - live_t5_status(): T+5 적중률을 하드코딩(35.86 리터럴) 대신 Temp/prediction_accuracy_harness_v2.json에서 항상 최신값으로 읽음 - spec/calibration_registry.yaml: SEMI_CLUSTER_CAP_RISK_OFF 중복 id로 인한 조용한 무시 버그 수정(SEMI_CLUSTER_CAP_RISK_OFF_MWA로 분리) - spec/27_bch_calibration_runbook.yaml: current_status_2026_06_21 블록 신설(단일 진실원천), 기존 05-30 스냅샷은 "역사적, 현재로 인용 금지"로 명시
This commit is contained in:
@@ -0,0 +1,107 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
ROOT = Path(__file__).resolve().parents[2]
|
||||
|
||||
|
||||
def _run(script: str) -> None:
|
||||
subprocess.run(
|
||||
[sys.executable, script],
|
||||
cwd=ROOT,
|
||||
check=True,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
|
||||
def test_build_calibration_priority_and_change_ledger(tmp_path):
|
||||
_run("tools/build_calibration_priority_v1.py")
|
||||
_run("tools/build_calibration_change_ledger_v4.py")
|
||||
_run("tools/validate_calibration_change_ledger_v1.py")
|
||||
|
||||
priority_path = ROOT / "Temp" / "calibration_priority_v1.json"
|
||||
ledger_path = ROOT / "Temp" / "calibration_change_ledger_v4.json"
|
||||
|
||||
priority = json.loads(priority_path.read_text(encoding="utf-8"))
|
||||
ledger = json.loads(ledger_path.read_text(encoding="utf-8"))
|
||||
|
||||
assert priority["status"] == "CALIBRATION_PRIORITY_OK"
|
||||
assert priority["priority_count"] >= 5
|
||||
assert priority["priority_list"]
|
||||
assert priority["priority_basis"] in {"alpha_feedback_loop_v2", "registry_warning_fallback"}
|
||||
|
||||
assert ledger["formula_id"] == "CALIBRATION_CHANGE_LEDGER_V4"
|
||||
assert ledger["threshold_change_without_ledger_count"] == 0
|
||||
assert len(ledger["changes"]) >= 5
|
||||
|
||||
|
||||
def test_calibration_backlog_workflow_and_script_exist():
|
||||
workflow = ROOT / ".gitea" / "workflows" / "calibration_backlog.yml"
|
||||
package = json.loads((ROOT / "package.json").read_text(encoding="utf-8"))
|
||||
assert workflow.exists()
|
||||
assert "ops:calibration-backlog" in package["scripts"]
|
||||
assert "ops:calibration-review-report" in package["scripts"]
|
||||
assert "ops:calibration-approval-list" in package["scripts"]
|
||||
assert "ops:calibration-decision-draft" in package["scripts"]
|
||||
|
||||
|
||||
def test_build_calibration_review_report(tmp_path):
|
||||
_run("tools/build_calibration_priority_v1.py")
|
||||
_run("tools/build_calibration_change_ledger_v4.py")
|
||||
_run("tools/build_calibration_review_report_v1.py")
|
||||
|
||||
report_json = ROOT / "Temp" / "calibration_review_report_v1.json"
|
||||
report_md = ROOT / "Temp" / "calibration_review_report_v1.md"
|
||||
payload = json.loads(report_json.read_text(encoding="utf-8"))
|
||||
text = report_md.read_text(encoding="utf-8")
|
||||
|
||||
assert payload["formula_id"] == "CALIBRATION_REVIEW_REPORT_V1"
|
||||
assert payload["summary"]["total_thresholds"] >= 1
|
||||
assert payload["top_priority_rows"]
|
||||
assert "Calibration Review Report" in text
|
||||
assert "Review Candidates" in text
|
||||
|
||||
|
||||
def test_build_calibration_approval_list(tmp_path):
|
||||
_run("tools/build_calibration_priority_v1.py")
|
||||
_run("tools/build_calibration_change_ledger_v4.py")
|
||||
_run("tools/build_calibration_review_report_v1.py")
|
||||
_run("tools/build_calibration_approval_list_v1.py")
|
||||
|
||||
approval_json = ROOT / "Temp" / "calibration_approval_list_v1.json"
|
||||
approval_md = ROOT / "Temp" / "calibration_approval_list_v1.md"
|
||||
payload = json.loads(approval_json.read_text(encoding="utf-8"))
|
||||
text = approval_md.read_text(encoding="utf-8")
|
||||
|
||||
assert payload["formula_id"] == "CALIBRATION_APPROVAL_LIST_V1"
|
||||
assert payload["approval_candidate_count"] >= 1
|
||||
assert payload["approval_candidates"]
|
||||
assert "Calibration Approval List" in text
|
||||
assert "Approval Candidates" in text
|
||||
|
||||
|
||||
def test_build_calibration_decision_draft(tmp_path):
|
||||
_run("tools/build_calibration_priority_v1.py")
|
||||
_run("tools/build_calibration_change_ledger_v4.py")
|
||||
_run("tools/build_calibration_review_report_v1.py")
|
||||
_run("tools/build_calibration_approval_list_v1.py")
|
||||
_run("tools/build_calibration_decision_draft_v1.py")
|
||||
|
||||
decision_json = ROOT / "Temp" / "calibration_decision_draft_v1.json"
|
||||
decision_md = ROOT / "Temp" / "calibration_decision_draft_v1.md"
|
||||
payload = json.loads(decision_json.read_text(encoding="utf-8"))
|
||||
text = decision_md.read_text(encoding="utf-8")
|
||||
|
||||
assert payload["formula_id"] == "CALIBRATION_DECISION_DRAFT_V1"
|
||||
assert payload["decision_count"] >= 1
|
||||
assert payload["summary"]["APPROVE"] >= 1
|
||||
assert payload["summary"]["HOLD"] >= 1
|
||||
assert payload["summary"]["REJECT"] >= 0
|
||||
assert "Calibration Decision Draft" in text
|
||||
assert "Decision Table" in text
|
||||
Reference in New Issue
Block a user