From ee4d1fdab8824703679f6397add9c2eb64f67e1f Mon Sep 17 00:00:00 2001 From: kjh2064 Date: Sun, 21 Jun 2026 20:07:32 +0900 Subject: [PATCH] =?UTF-8?q?=EC=BA=98=EB=A6=AC=EB=B8=8C=EB=A0=88=EC=9D=B4?= =?UTF-8?q?=EC=85=98=20=EA=B1=B0=EB=B2=84=EB=84=8C=EC=8A=A4=20=EB=8F=84?= =?UTF-8?q?=EA=B5=AC=20+=20WBS-7.1/7.2=20=EC=8B=A4=EC=A6=9D=20=EA=B2=A9?= =?UTF-8?q?=EC=B0=A8=20=EA=B0=80=EC=8B=9C=ED=99=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 캘리브레이션 백로그 → 우선순위 → 검토리포트 → 승인목록 → 결정초안으로 이어지는 임계값 보정 거버넌스 파이프라인을 추가하고, 2026-06-21 비판적 리뷰에서 발견한 두 가지 stale-수치 문제를 도구 차원에서 해소한다. - registry_health(): 190여 개 임계값의 source별(SPEC_DERIVED/EXPERT_PRIOR/ PROVISIONAL/CALIBRATED) 분포를 매 실행마다 자동 집계 — 수동 grep 불필요 - live_t5_status(): T+5 적중률을 하드코딩(35.86 리터럴) 대신 Temp/prediction_accuracy_harness_v2.json에서 항상 최신값으로 읽음 - spec/calibration_registry.yaml: SEMI_CLUSTER_CAP_RISK_OFF 중복 id로 인한 조용한 무시 버그 수정(SEMI_CLUSTER_CAP_RISK_OFF_MWA로 분리) - spec/27_bch_calibration_runbook.yaml: current_status_2026_06_21 블록 신설(단일 진실원천), 기존 05-30 스냅샷은 "역사적, 현재로 인용 금지"로 명시 --- .gitea/workflows/calibration_backlog.yml | 80 ++++++++ spec/27_bch_calibration_runbook.yaml | 39 +++- spec/calibration_registry.yaml | 29 ++- tests/unit/test_calibration_priority_v1.py | 107 ++++++++++ tools/build_calibration_approval_list_v1.py | 136 ++++++++++++ tools/build_calibration_decision_draft_v1.py | 152 ++++++++++++++ tools/build_calibration_priority_v1.py | 149 ++++++++++---- tools/build_calibration_review_report_v1.py | 205 +++++++++++++++++++ 8 files changed, 855 insertions(+), 42 deletions(-) create mode 100644 .gitea/workflows/calibration_backlog.yml create mode 100644 tests/unit/test_calibration_priority_v1.py create mode 100644 tools/build_calibration_approval_list_v1.py create mode 100644 tools/build_calibration_decision_draft_v1.py create mode 100644 tools/build_calibration_review_report_v1.py diff --git a/.gitea/workflows/calibration_backlog.yml b/.gitea/workflows/calibration_backlog.yml new file mode 100644 index 0000000..8c06377 --- /dev/null +++ b/.gitea/workflows/calibration_backlog.yml @@ -0,0 +1,80 @@ +name: Calibration Backlog (Registry Drift Watch) + +on: + schedule: + - cron: "15 2 * * 1-5" # UTC 02:15 = KST 11:15, weekday backlog update + workflow_dispatch: + +jobs: + build-calibration-backlog: + runs-on: self-hosted + + steps: + - name: Checkout Code + run: | + if [ -d .git ]; then + git remote set-url origin http://x-access-token:${{ secrets.GITHUB_TOKEN }}@192.168.123.100:8418/KimJaeHyun/myfinance.git + else + git init + git remote add origin http://x-access-token:${{ secrets.GITHUB_TOKEN }}@192.168.123.100:8418/KimJaeHyun/myfinance.git + fi + git fetch origin main --depth=1 + git reset --hard FETCH_HEAD + + - name: Configure Runtime Paths + run: | + export PATH=/usr/local/bin:$PATH + echo "/usr/local/bin" >> $GITHUB_PATH + /usr/bin/python3 --version + + - name: Setup Python Environment + run: | + VENV_BASE=/volume1/gitea/python_venv + REQ_HASH=$(md5sum tools/build_calibration_priority_v1.py 2>/dev/null | cut -d' ' -f1 || echo "calib-default") + VENV="$VENV_BASE/$REQ_HASH" + + if [ ! -f "$VENV/bin/python" ]; then + mkdir -p "$VENV_BASE" + /usr/bin/python3 -m venv "$VENV" + if [ ! -f "$VENV/bin/pip" ]; then + curl -sS https://bootstrap.pypa.io/pip/3.8/get-pip.py -o get-pip.py + "$VENV/bin/python" get-pip.py --quiet + rm get-pip.py + fi + "$VENV/bin/pip" install --upgrade pip --quiet + "$VENV/bin/pip" install pyyaml --quiet + fi + echo "$VENV/bin" >> $GITHUB_PATH + + - name: Validate Calibration Registry + run: python3 tools/validate_calibration_registry_v1.py + + - name: Build Calibration Priority Backlog + run: python3 tools/build_calibration_priority_v1.py + + - name: Build Calibration Change Ledger + run: python3 tools/build_calibration_change_ledger_v4.py + + - name: Build Calibration Review Report + run: python3 tools/build_calibration_review_report_v1.py + + - name: Build Calibration Approval List + run: python3 tools/build_calibration_approval_list_v1.py + + - name: Build Calibration Decision Draft + run: python3 tools/build_calibration_decision_draft_v1.py + + - name: Validate Calibration Change Ledger + run: python3 tools/validate_calibration_change_ledger_v1.py + + - name: Summarize Backlog + if: always() + run: | + STATUS="${{ job.status }}" + echo "=== Calibration Backlog Result ===" + echo "status: $STATUS" + echo "priority: Temp/calibration_priority_v1.json" + echo "ledger: Temp/calibration_change_ledger_v4.json" + echo "review: Temp/calibration_review_report_v1.md" + echo "approval: Temp/calibration_approval_list_v1.md" + echo "decision: Temp/calibration_decision_draft_v1.md" diff --git a/spec/27_bch_calibration_runbook.yaml b/spec/27_bch_calibration_runbook.yaml index e735c85..d5e6df7 100644 --- a/spec/27_bch_calibration_runbook.yaml +++ b/spec/27_bch_calibration_runbook.yaml @@ -451,7 +451,30 @@ reject_conditions: - "sample_n < 30인 임계값을 '보정완료'로 처리" # ════════════════════════════════════════════════════════════════════════════ -# 현재 달성 현황 (2026-05-30) +# 현재 달성 현황 (2026-06-21 재검증 — WBS-7.2) +# ════════════════════════════════════════════════════════════════════════════ +# 주의: 아래 current_status_2026_05_30 블록은 그 날짜 기준 정적 스냅샷이며, +# 이후 갱신되지 않은 채 docs/ROADMAP_WBS.md 등에서 "현재 상태"로 인용되어 +# 서로 다른 시점의 T+5 수치(54.76%/35.86%)가 혼재하는 문제를 일으켰다. +# Temp/honest_performance_guard_v1.json(생성: 2026-06-14)과 +# Temp/prediction_accuracy_harness_v2.json(생성: 2026-06-21, 7일 더 최신)을 +# 직접 재확인한 결과는 다음과 같다 — 이 블록을 단일 진실원천으로 삼는다. +current_status_2026_06_21: + source_of_truth: "Temp/prediction_accuracy_harness_v2.json (as_of_date=2026-06-21, 가장 최신)" + t1_match_rate_pct: 52.94 # sample=68, decisive_sample=53, rate_decisive=67.92 + t5_match_rate_pct: null # sample=0 — INSUFFICIENT_SAMPLES. honest_performance_guard_v1.json(2026-06-14)의 + # 35.86%는 7일 전 스냅샷이며 표본이 0으로 줄어 더 이상 유효하지 않음. + t5_sample_regression_note: > + cases_analyzed가 141건(2026-05-30 기준)에서 t5_sample=0(2026-06-21)으로 감소했다. + evaluation_methodology가 ACTIVE_PASSIVE_SPLIT_V1_INCONCLUSIVE_EXCLUDED로 변경되며 + inconclusive/replay 표본이 제외된 것으로 추정 — 근본 원인은 별도 조사 필요(WBS-7.2 잔여 항목). + calibration_registry_total_thresholds: 190 # spec/calibration_registry.yaml 직접 집계 (구문서의 70은 stale) + calibration_registry_expert_prior_count: 59 + calibration_registry_calibrated_count: 0 + rule: "이 문서를 인용할 때는 항상 as_of_date를 동반 표기하고, 아래 5/30 스냅샷을 '현재'로 인용하지 않는다." + +# ════════════════════════════════════════════════════════════════════════════ +# 과거 달성 현황 (2026-05-30, 역사적 스냅샷 — "현재"로 인용 금지) # ════════════════════════════════════════════════════════════════════════════ current_status_2026_05_30: phase_1_bch: COMPLETE @@ -489,3 +512,17 @@ current_status_2026_05_30: cases_analyzed: 141 miss5_count: 51 next_milestone: "cases_analyzed=30 달성 후 ALEG_V2_GATE1_BLOCK_PCT 보정 심사" + automation_entrypoints: + gitea_schedule: ".gitea/workflows/calibration_backlog.yml" + npm_script: "npm run ops:calibration-backlog" + generated_artifacts: + - Temp/calibration_priority_v1.json + - Temp/calibration_change_ledger_v4.json + - Temp/calibration_review_report_v1.json + - Temp/calibration_review_report_v1.md + - Temp/calibration_approval_list_v1.json + - Temp/calibration_approval_list_v1.md + - Temp/calibration_registry_v1.json + promotion_rules: + provisional: "sample_n >= 10 AND direction confirmed AND change_ledger entry exists" + calibrated: "sample_n >= 30 AND backtest_doc exists AND validator overclaimed_count == 0" diff --git a/spec/calibration_registry.yaml b/spec/calibration_registry.yaml index 392a1d2..0f2a9bb 100644 --- a/spec/calibration_registry.yaml +++ b/spec/calibration_registry.yaml @@ -1,3 +1,7 @@ +has_code_implementation: true +code_path: + - "tools/build_calibration_priority_v1.py" + - "tools/validate_calibration_registry_v1.py" thresholds: - id: ALEG_V2_GATE1_BLOCK_PCT value: 3.0 @@ -913,7 +917,7 @@ thresholds: notes: '이벤트 충격 방어: 20% 고정. KOSPI 비중 제공 시 max(20, weight×0.60).' live_sample_requirement: 30 sunset_date: '2026-09-30' -- id: SEMI_CLUSTER_CAP_RISK_OFF +- id: SEMI_CLUSTER_CAP_RISK_OFF_MWA value: 25.0 unit: pct source: EXPERT_PRIOR @@ -921,7 +925,12 @@ thresholds: last_calibrated: null owner_formula: MARKET_WEIGHT_AWARE_CLUSTER_GATE_V1 gs_location: gas_data_feed.gs:3858 - notes: '하락장: 25%. KOSPI 비중 제공 시 max(25, weight×0.80).' + notes: > + 하락장: 25%. KOSPI 비중 제공 시 max(25, weight×0.80). + WBS-7.1(2026-06-21): 원래 id가 SEMI_CLUSTER_CAP_RISK_OFF였으나 + SEMICONDUCTOR_CLUSTER_GATE_V1 소유의 동명 entry(value=20.0)와 id가 충돌해 + dict 기반 조회 시 한쪽이 조용히 무시되는 버그가 있었다. 외부 참조 0건 확인 후 + 이 entry(MARKET_WEIGHT_AWARE_CLUSTER_GATE_V1 소유)만 _MWA suffix로 분리했다. live_sample_requirement: 30 sunset_date: '2026-09-30' - id: SEMI_CLUSTER_CAP_NEUTRAL @@ -1803,6 +1812,22 @@ thresholds: gs_location: gas_data_feed.gs:2164 notes: Base take-profit score used in profit-lock computation. Migrated from GAS SP constant to registry (P5-T01 wave2). +- id: OVERHANG_PRESSURE_V1_FALLBACK_MULT + value: 1.5 + unit: multiplier_of_avg_volume_5d + source: EXPERT_PRIOR + sample_n: 0 + last_calibrated: null + owner_formula: OVERHANG_PRESSURE_V1 + py_location: spec/13_formula_registry.yaml:OVERHANG_PRESSURE_V1.derived_flags.selling_acceleration.without_20d_fallback + notes: > + WBS-7.5(2026-06-21) — frg_20d_sh 미존재 시 selling_acceleration 폴백을 + "frg_5d_sh < -500000"(절대 주식수, 임시) 에서 "frg_5d_sh < -1.5 * avg_volume_5d" + (해당 종목 평균거래량 비례) 로 교체. 1.5 배수는 with_20d 분기에서 동일 공식이 + 이미 사용하는 가속 임계(frg_20d_sh/4 × 1.5)를 그대로 재사용한 것이며, 새로 + 추정한 값이 아니다. 단, 실거래 표본으로 검증되지 않았으므로 EXPERT_PRIOR로 + 등록한다 — CALIBRATED 승격은 sample_n≥30 확보 후 검토. + calibration_policy: honest_disclosure_required: true overclaimed_calibration_definition: 'source=CALIBRATED 이면서 sample_n < 30 → OVERCLAIMED_CALIBRATION. diff --git a/tests/unit/test_calibration_priority_v1.py b/tests/unit/test_calibration_priority_v1.py new file mode 100644 index 0000000..6696a19 --- /dev/null +++ b/tests/unit/test_calibration_priority_v1.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +import json +import subprocess +import sys +from pathlib import Path + + +ROOT = Path(__file__).resolve().parents[2] + + +def _run(script: str) -> None: + subprocess.run( + [sys.executable, script], + cwd=ROOT, + check=True, + capture_output=True, + text=True, + encoding="utf-8", + ) + + +def test_build_calibration_priority_and_change_ledger(tmp_path): + _run("tools/build_calibration_priority_v1.py") + _run("tools/build_calibration_change_ledger_v4.py") + _run("tools/validate_calibration_change_ledger_v1.py") + + priority_path = ROOT / "Temp" / "calibration_priority_v1.json" + ledger_path = ROOT / "Temp" / "calibration_change_ledger_v4.json" + + priority = json.loads(priority_path.read_text(encoding="utf-8")) + ledger = json.loads(ledger_path.read_text(encoding="utf-8")) + + assert priority["status"] == "CALIBRATION_PRIORITY_OK" + assert priority["priority_count"] >= 5 + assert priority["priority_list"] + assert priority["priority_basis"] in {"alpha_feedback_loop_v2", "registry_warning_fallback"} + + assert ledger["formula_id"] == "CALIBRATION_CHANGE_LEDGER_V4" + assert ledger["threshold_change_without_ledger_count"] == 0 + assert len(ledger["changes"]) >= 5 + + +def test_calibration_backlog_workflow_and_script_exist(): + workflow = ROOT / ".gitea" / "workflows" / "calibration_backlog.yml" + package = json.loads((ROOT / "package.json").read_text(encoding="utf-8")) + assert workflow.exists() + assert "ops:calibration-backlog" in package["scripts"] + assert "ops:calibration-review-report" in package["scripts"] + assert "ops:calibration-approval-list" in package["scripts"] + assert "ops:calibration-decision-draft" in package["scripts"] + + +def test_build_calibration_review_report(tmp_path): + _run("tools/build_calibration_priority_v1.py") + _run("tools/build_calibration_change_ledger_v4.py") + _run("tools/build_calibration_review_report_v1.py") + + report_json = ROOT / "Temp" / "calibration_review_report_v1.json" + report_md = ROOT / "Temp" / "calibration_review_report_v1.md" + payload = json.loads(report_json.read_text(encoding="utf-8")) + text = report_md.read_text(encoding="utf-8") + + assert payload["formula_id"] == "CALIBRATION_REVIEW_REPORT_V1" + assert payload["summary"]["total_thresholds"] >= 1 + assert payload["top_priority_rows"] + assert "Calibration Review Report" in text + assert "Review Candidates" in text + + +def test_build_calibration_approval_list(tmp_path): + _run("tools/build_calibration_priority_v1.py") + _run("tools/build_calibration_change_ledger_v4.py") + _run("tools/build_calibration_review_report_v1.py") + _run("tools/build_calibration_approval_list_v1.py") + + approval_json = ROOT / "Temp" / "calibration_approval_list_v1.json" + approval_md = ROOT / "Temp" / "calibration_approval_list_v1.md" + payload = json.loads(approval_json.read_text(encoding="utf-8")) + text = approval_md.read_text(encoding="utf-8") + + assert payload["formula_id"] == "CALIBRATION_APPROVAL_LIST_V1" + assert payload["approval_candidate_count"] >= 1 + assert payload["approval_candidates"] + assert "Calibration Approval List" in text + assert "Approval Candidates" in text + + +def test_build_calibration_decision_draft(tmp_path): + _run("tools/build_calibration_priority_v1.py") + _run("tools/build_calibration_change_ledger_v4.py") + _run("tools/build_calibration_review_report_v1.py") + _run("tools/build_calibration_approval_list_v1.py") + _run("tools/build_calibration_decision_draft_v1.py") + + decision_json = ROOT / "Temp" / "calibration_decision_draft_v1.json" + decision_md = ROOT / "Temp" / "calibration_decision_draft_v1.md" + payload = json.loads(decision_json.read_text(encoding="utf-8")) + text = decision_md.read_text(encoding="utf-8") + + assert payload["formula_id"] == "CALIBRATION_DECISION_DRAFT_V1" + assert payload["decision_count"] >= 1 + assert payload["summary"]["APPROVE"] >= 1 + assert payload["summary"]["HOLD"] >= 1 + assert payload["summary"]["REJECT"] >= 0 + assert "Calibration Decision Draft" in text + assert "Decision Table" in text diff --git a/tools/build_calibration_approval_list_v1.py b/tools/build_calibration_approval_list_v1.py new file mode 100644 index 0000000..c833250 --- /dev/null +++ b/tools/build_calibration_approval_list_v1.py @@ -0,0 +1,136 @@ +#!/usr/bin/env python3 +""" +build_calibration_approval_list_v1.py +─────────────────────────────────────────────────────────────────────────────── +calibration_review_report_v1.json을 읽어 PROVISIONAL 승격 승인 리스트를 만든다. + +목적: + - source=PROVISIONAL 인 임계값을 별도 승인 대상 리스트로 분리 + - reviewer가 바로 볼 수 있는 Markdown/JSON 산출물 생성 + - PROVISIONAL 승격과 provisional review를 분리해 운영 책임을 명확화 + +출력: + Temp/calibration_approval_list_v1.json + Temp/calibration_approval_list_v1.md + +사용법: + python tools/build_calibration_approval_list_v1.py +""" + +from __future__ import annotations + +import json +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +ROOT = Path(__file__).resolve().parent.parent +REVIEW = ROOT / "Temp" / "calibration_review_report_v1.json" +OUT_JSON = ROOT / "Temp" / "calibration_approval_list_v1.json" +OUT_MD = ROOT / "Temp" / "calibration_approval_list_v1.md" + +if sys.stdout.encoding and sys.stdout.encoding.lower() not in ("utf-8", "utf8"): + sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf-8", buffering=1) + + +def _load_json(path: Path) -> dict[str, Any]: + if not path.exists(): + return {} + try: + data = json.loads(path.read_text(encoding="utf-8")) + except Exception: + return {} + return data if isinstance(data, dict) else {} + + +def _table(rows: list[dict[str, Any]], keys: list[str], max_rows: int = 25) -> str: + if not rows: + return "_데이터 없음_" + header = "| " + " | ".join(keys) + " |" + sep = "| " + " | ".join(["---"] * len(keys)) + " |" + body = [] + for row in rows[:max_rows]: + body.append("| " + " | ".join(str(row.get(k, "")).replace("|", "ㅣ") for k in keys) + " |") + suffix = f"\n\n_...총 {len(rows)}행 중 {max_rows}행 표시_" if len(rows) > max_rows else "" + return "\n".join([header, sep, *body]) + suffix + + +def main() -> int: + review = _load_json(REVIEW) + rows = review.get("review_rows") if isinstance(review.get("review_rows"), list) else [] + + approval_candidates: list[dict[str, Any]] = [] + provisional_review_candidates: list[dict[str, Any]] = [] + + for row in rows: + if not isinstance(row, dict): + continue + source = str(row.get("source") or "") + readiness = str(row.get("readiness") or "") + sample_n = int(row.get("sample_n") or 0) + base = { + "id": row.get("id", ""), + "source": source, + "sample_n": sample_n, + "value": row.get("value"), + "unit": row.get("unit", ""), + "owner_formula": row.get("owner_formula", ""), + "readiness": readiness, + "reason": row.get("reason", ""), + } + if source == "PROVISIONAL": + approval_candidates.append(base) + elif readiness == "PROVISIONAL_CANDIDATE": + provisional_review_candidates.append(base) + + approval_candidates.sort(key=lambda item: (-int(item.get("sample_n") or 0), str(item.get("id") or ""))) + provisional_review_candidates.sort(key=lambda item: (-int(item.get("sample_n") or 0), str(item.get("id") or ""))) + + report = { + "formula_id": "CALIBRATION_APPROVAL_LIST_V1", + "generated_at": datetime.now(timezone.utc).isoformat(), + "review_report_path": str(REVIEW), + "approval_candidate_count": len(approval_candidates), + "provisional_review_candidate_count": len(provisional_review_candidates), + "approval_candidates": approval_candidates, + "provisional_review_candidates": provisional_review_candidates, + } + + OUT_JSON.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8") + + md_lines = [ + "# Calibration Approval List", + "", + "## Summary", + "", + f"- approval candidates: {len(approval_candidates)}", + f"- provisional review candidates: {len(provisional_review_candidates)}", + "", + "## Approval Candidates", + "", + _table(approval_candidates, ["id", "source", "sample_n", "value", "unit", "owner_formula", "readiness", "reason"]), + "", + "## Provisional Review Candidates", + "", + _table(provisional_review_candidates, ["id", "source", "sample_n", "value", "unit", "owner_formula", "readiness", "reason"]), + "", + "## Evidence", + "", + f"- review report: {REVIEW}", + ] + OUT_MD.write_text("\n".join(md_lines), encoding="utf-8") + + print(json.dumps({ + "formula_id": report["formula_id"], + "gate": "PASS" if approval_candidates else "WARN", + "approval_candidate_count": len(approval_candidates), + "provisional_review_candidate_count": len(provisional_review_candidates), + "json_path": str(OUT_JSON), + "md_path": str(OUT_MD), + }, ensure_ascii=False, indent=2)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/build_calibration_decision_draft_v1.py b/tools/build_calibration_decision_draft_v1.py new file mode 100644 index 0000000..e25d86e --- /dev/null +++ b/tools/build_calibration_decision_draft_v1.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +""" +build_calibration_decision_draft_v1.py +─────────────────────────────────────────────────────────────────────────────── +calibration_review_report_v1.json / calibration_approval_list_v1.json을 바탕으로 +운영 승인 초안(APPROVE / HOLD / REJECT)을 만든다. + +목적: + - 사람 검토 전 단계에서 결정 초안을 자동 생성 + - source=PROVISIONAL은 원칙적으로 APPROVE + - PROVISIONAL_CANDIDATE는 HOLD + - 나머지는 REJECT 또는 HOLD로 사유를 명시 + +출력: + Temp/calibration_decision_draft_v1.json + Temp/calibration_decision_draft_v1.md + +사용법: + python tools/build_calibration_decision_draft_v1.py +""" + +from __future__ import annotations + +import json +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +ROOT = Path(__file__).resolve().parent.parent +REVIEW = ROOT / "Temp" / "calibration_review_report_v1.json" +APPROVAL = ROOT / "Temp" / "calibration_approval_list_v1.json" +OUT_JSON = ROOT / "Temp" / "calibration_decision_draft_v1.json" +OUT_MD = ROOT / "Temp" / "calibration_decision_draft_v1.md" + +if sys.stdout.encoding and sys.stdout.encoding.lower() not in ("utf-8", "utf8"): + sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf-8", buffering=1) + + +def _load_json(path: Path) -> dict[str, Any]: + if not path.exists(): + return {} + try: + data = json.loads(path.read_text(encoding="utf-8")) + except Exception: + return {} + return data if isinstance(data, dict) else {} + + +def _table(rows: list[dict[str, Any]], keys: list[str], max_rows: int = 25) -> str: + if not rows: + return "_데이터 없음_" + header = "| " + " | ".join(keys) + " |" + sep = "| " + " | ".join(["---"] * len(keys)) + " |" + body = [] + for row in rows[:max_rows]: + body.append("| " + " | ".join(str(row.get(k, "")).replace("|", "ㅣ") for k in keys) + " |") + suffix = f"\n\n_...총 {len(rows)}행 중 {max_rows}행 표시_" if len(rows) > max_rows else "" + return "\n".join([header, sep, *body]) + suffix + + +def _decide(row: dict[str, Any]) -> tuple[str, str]: + source = str(row.get("source") or "") + readiness = str(row.get("readiness") or "") + sample_n = int(row.get("sample_n") or 0) + if source == "PROVISIONAL" and sample_n >= 30: + return "APPROVE", "source=PROVISIONAL and sample_n>=30" + if source == "PROVISIONAL": + return "APPROVE", "source=PROVISIONAL" + if readiness == "PROVISIONAL_CANDIDATE": + return "HOLD", "Needs provisional review" + if sample_n >= 10: + return "HOLD", "Sample present but not provisional" + return "REJECT", "Insufficient evidence" + + +def main() -> int: + review = _load_json(REVIEW) + approval = _load_json(APPROVAL) + + review_rows = review.get("review_rows") if isinstance(review.get("review_rows"), list) else [] + decisions: list[dict[str, Any]] = [] + summary = {"APPROVE": 0, "HOLD": 0, "REJECT": 0} + + for row in review_rows: + if not isinstance(row, dict): + continue + decision, reason = _decide(row) + item = { + "id": row.get("id", ""), + "source": row.get("source", ""), + "sample_n": int(row.get("sample_n") or 0), + "value": row.get("value"), + "unit": row.get("unit", ""), + "owner_formula": row.get("owner_formula", ""), + "readiness": row.get("readiness", ""), + "decision": decision, + "reason": reason, + } + decisions.append(item) + summary[decision] += 1 + + decisions.sort(key=lambda item: ({"APPROVE": 0, "HOLD": 1, "REJECT": 2}.get(str(item.get("decision") or ""), 3), -int(item.get("sample_n") or 0), str(item.get("id") or ""))) + + report = { + "formula_id": "CALIBRATION_DECISION_DRAFT_V1", + "generated_at": datetime.now(timezone.utc).isoformat(), + "review_report_path": str(REVIEW), + "approval_list_path": str(APPROVAL), + "summary": summary, + "decision_count": len(decisions), + "decisions": decisions, + "approval_candidate_count": int(approval.get("approval_candidate_count") or 0), + } + + OUT_JSON.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8") + + md_lines = [ + "# Calibration Decision Draft", + "", + "## Summary", + "", + f"- APPROVE: {summary['APPROVE']}", + f"- HOLD: {summary['HOLD']}", + f"- REJECT: {summary['REJECT']}", + f"- decision_count: {len(decisions)}", + "", + "## Decision Table", + "", + _table(decisions, ["id", "source", "sample_n", "decision", "reason", "owner_formula", "readiness"]), + "", + "## Evidence", + "", + f"- review report: {REVIEW}", + f"- approval list: {APPROVAL}", + ] + OUT_MD.write_text("\n".join(md_lines), encoding="utf-8") + + print(json.dumps({ + "formula_id": report["formula_id"], + "gate": "PASS" if summary["APPROVE"] else "WARN", + "approve_count": summary["APPROVE"], + "hold_count": summary["HOLD"], + "reject_count": summary["REJECT"], + "json_path": str(OUT_JSON), + "md_path": str(OUT_MD), + }, ensure_ascii=False, indent=2)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/tools/build_calibration_priority_v1.py b/tools/build_calibration_priority_v1.py index d2e355a..c0b794c 100644 --- a/tools/build_calibration_priority_v1.py +++ b/tools/build_calibration_priority_v1.py @@ -29,6 +29,41 @@ ROOT = Path(__file__).resolve().parent.parent AFL = ROOT / "Temp" / "alpha_feedback_loop_v2.json" REG = ROOT / "spec" / "calibration_registry.yaml" OUTPUT = ROOT / "Temp" / "calibration_priority_v1.json" +PREDICTION_ACCURACY = ROOT / "Temp" / "prediction_accuracy_harness_v2.json" + + +def registry_source_breakdown(reg_index: dict[str, dict]) -> dict: + """WBS-7.1(2026-06-21) — calibration_registry.yaml 전체의 source별 분포를 매 실행마다 + 집계해 'CALIBRATED 비율이 실제로 몇 %인가'를 사람이 grep으로 직접 세지 않아도 + 항상 최신 상태로 노출한다(2026-06-21 비판적 리뷰 0c절에서 0/190 발견 당시 수동 집계 필요했던 문제 해소).""" + counts: dict[str, int] = {"SPEC_DERIVED": 0, "EXPERT_PRIOR": 0, "PROVISIONAL": 0, "CALIBRATED": 0} + for entry in reg_index.values(): + source = str(entry.get("source", "")).upper() + if source in counts: + counts[source] += 1 + total = sum(counts.values()) + return { + "total_thresholds": total, + "counts": counts, + "calibrated_pct": round(100.0 * counts["CALIBRATED"] / total, 2) if total else 0.0, + "unvalidated_pct": round(100.0 * (counts["SPEC_DERIVED"] + counts["EXPERT_PRIOR"]) / total, 2) if total else 0.0, + } + + +def live_t5_status() -> dict: + """WBS-7.2/7.1(2026-06-21) — T+5 수치를 하드코딩하지 않고 항상 최신 산출물에서 읽는다. + Temp/prediction_accuracy_harness_v2.json이 없거나 sample=0이면 정직하게 DATA_GATED로 보고한다.""" + if not PREDICTION_ACCURACY.exists(): + return {"status": "ARTIFACT_MISSING", "t5_sample": 0, "t5_match_rate_pct": None} + data = load_json(PREDICTION_ACCURACY) + t5_sample = int(data.get("t5_sample") or 0) + t5_rate = data.get("t5_op_rate") + return { + "status": "DATA_GATED" if t5_sample == 0 else "OK", + "as_of_date": data.get("as_of_date"), + "t5_sample": t5_sample, + "t5_match_rate_pct": t5_rate, + } if sys.stdout.encoding and sys.stdout.encoding.lower() not in ("utf-8", "utf8"): sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf-8", buffering=1) @@ -90,6 +125,42 @@ def load_registry(p: Path) -> dict[str, dict]: return {t["id"]: t for t in data.get("thresholds", []) if "id" in t} +def _priority_from_registry_entry(entry: dict, source_tag: str, urgency_bias: int) -> dict: + sample_n = int(entry.get("sample_n", 0) or 0) + source = str(entry.get("source", "EXPERT_PRIOR")) + threshold_class = str(entry.get("threshold_class", "standard")) + urgency = urgency_bias + if source == "EXPERT_PRIOR": + urgency += 10 + if source == "PROVISIONAL": + urgency += 20 + if threshold_class == "live_critical": + urgency += 15 + if sample_n == 0: + urgency += 5 + if sample_n > 0: + urgency += max(0, 30 - sample_n) + return { + "calibration_id": entry.get("id", ""), + "current_value": entry.get("value"), + "owner_formula": entry.get("owner_formula", ""), + "source": source, + "sample_n": sample_n, + "linked_factor": source_tag, + "alpha_action": "registry_review", + "urgency_score": urgency, + "calibration_path": ( + ( + "표본 30건 이상 확보 후 PROVISIONAL 승격 → " + if sample_n >= 30 + else f"표본 {30 - sample_n}건 추가 수집 후 PROVISIONAL 승격 → " + ) + + "실측 T+5 승률 기반 최적값 backtest → CALIBRATED 확정" + ), + "rationale": f"source={source}, class={threshold_class}, sample_n={sample_n}", + } + + def main() -> int: afl_data = load_json(AFL) reg_index = load_registry(REG) @@ -112,48 +183,32 @@ def main() -> int: priority_list: list[dict] = [] for adj in adjustments: - factor = adj.get("factor", "") - action = adj.get("action", "") - rationale = adj.get("rationale", "") - reg_ids = FACTOR_TO_REGISTRY.get(factor, []) + factor = str(adj.get("factor", "")) + action = str(adj.get("action", "")) + rationale = str(adj.get("rationale", "")) + reg_ids = FACTOR_TO_REGISTRY.get(factor, []) for rid in reg_ids: reg_entry = reg_index.get(rid) if not reg_entry: continue - source = reg_entry.get("source", "EXPERT_PRIOR") - sample_n = int(reg_entry.get("sample_n", 0) or 0) - value = reg_entry.get("value") - formula = reg_entry.get("owner_formula", "") + item = _priority_from_registry_entry(reg_entry, factor, miss5_count if factor == "passive_signal_quality" else 0) + item["alpha_action"] = action or "feedback_review" + if rationale: + item["rationale"] = rationale[:200] + priority_list.append(item) - # 보정 우선도 점수: miss5_count 기여 + 미보정 가중 - urgency = 0 - if factor == "passive_signal_quality": - urgency += miss5_count # miss가 많을수록 높은 urgency - if source == "EXPERT_PRIOR": - urgency += 10 - if sample_n == 0: - urgency += 5 - - priority_list.append({ - "calibration_id": rid, - "current_value": value, - "owner_formula": formula, - "source": source, - "sample_n": sample_n, - "linked_factor": factor, - "alpha_action": action, - "urgency_score": urgency, - "calibration_path": ( - ( - "표본 30건 이상 확보 후 PROVISIONAL 승격 → " - if sample_n >= 30 - else f"표본 {30 - sample_n}건 추가 수집 후 PROVISIONAL 승격 → " - ) - + "실측 T+5 승률 기반 최적값 backtest → CALIBRATED 확정" - ), - "rationale": rationale[:200] if rationale else "", - }) + if not priority_list: + # alpha_feedback_loop가 비어 있어도 registry 자체의 보정 debt를 추적할 수 있게 한다. + for reg_id, reg_entry in reg_index.items(): + source = str(reg_entry.get("source", "EXPERT_PRIOR")) + if source not in {"EXPERT_PRIOR", "PROVISIONAL"}: + continue + tag = f"registry:{source.lower()}" + item = _priority_from_registry_entry(reg_entry, tag, 0) + if source == "PROVISIONAL": + item["urgency_score"] += 5 + priority_list.append(item) # 중복 제거 (같은 rid, 높은 urgency 유지) seen: dict[str, dict] = {} @@ -177,7 +232,19 @@ def main() -> int: print(f" Step 2 (30건 후): ALEG_V2_GATE1_BLOCK_PCT 3.0% → 실측 최적값으로 PROVISIONAL 승격") print(f" Step 3 (50건 후): DSD_V1 가중치 logistic regression 최적화") print(f" Step 4 (100건 후): K2_SPLIT_RATIO backtest 비교 → CALIBRATED 확정") - print(f" miss5_count={miss5_count}건 → passive_signal_quality 개선이 T+5 35.86%→50%+ 핵심") + registry_health = registry_source_breakdown(reg_index) + t5_status = live_t5_status() + + print(f"\n [캘리브레이션 레지스트리 건강도] (WBS-7.1)") + print(f" total={registry_health['total_thresholds']} {registry_health['counts']}") + print(f" CALIBRATED={registry_health['calibrated_pct']}% 미검증(SPEC_DERIVED+EXPERT_PRIOR)={registry_health['unvalidated_pct']}%") + + if t5_status["status"] == "DATA_GATED": + print(f" miss5_count={miss5_count}건 → T+5 현재 DATA_GATED(sample=0) — passive_signal_quality 개선 영향은 표본 누적 후 측정 가능") + elif t5_status["status"] == "ARTIFACT_MISSING": + print(f" miss5_count={miss5_count}건 → T+5 산출물 없음(Temp/prediction_accuracy_harness_v2.json) — 먼저 생성 필요") + else: + print(f" miss5_count={miss5_count}건 → T+5={t5_status['t5_match_rate_pct']}% (as_of={t5_status.get('as_of_date')}) → passive_signal_quality 개선 핵심") result = { "status": "CALIBRATION_PRIORITY_OK", @@ -191,10 +258,14 @@ def main() -> int: "step3": "50건 후: DSD_V1 가중치 logistic regression 최적화", "step4": "100건 후: K2_SPLIT_RATIO 30/70~60/40 backtest → CALIBRATED", }, + "priority_basis": "alpha_feedback_loop_v2" if adjustments else "registry_warning_fallback", + "registry_health": registry_health, "target_improvement": { - "current_t5_pct": 35.86, + "t5_status": t5_status["status"], + "current_t5_pct": t5_status["t5_match_rate_pct"], + "t5_as_of_date": t5_status.get("as_of_date"), "target_t5_pct": 55.0, - "key_lever": "passive_signal_quality (miss5_count=51건 개선)", + "key_lever": f"passive_signal_quality (miss5_count={miss5_count}건 개선)", }, } diff --git a/tools/build_calibration_review_report_v1.py b/tools/build_calibration_review_report_v1.py new file mode 100644 index 0000000..6be6442 --- /dev/null +++ b/tools/build_calibration_review_report_v1.py @@ -0,0 +1,205 @@ +#!/usr/bin/env python3 +""" +build_calibration_review_report_v1.py +─────────────────────────────────────────────────────────────────────────────── +calibration_registry.yaml + calibration_priority_v1.json + calibration_change_ledger_v4.json +을 묶어 운영용 보정 리뷰 리포트를 만든다. + +목적: + - PROVISIONAL / CALIBRATED 승격 후보를 사람이 읽을 수 있게 정리 + - registry warning fallback 상태를 숨기지 않고 그대로 공시 + - 월간 보정 운영에서 바로 참고 가능한 Markdown + JSON 산출물 생성 + +출력: + Temp/calibration_review_report_v1.json + Temp/calibration_review_report_v1.md + +사용법: + python tools/build_calibration_review_report_v1.py +""" + +from __future__ import annotations + +import json +import sys +from datetime import datetime, timezone +from pathlib import Path +from typing import Any + +import yaml + +ROOT = Path(__file__).resolve().parent.parent +REGISTRY = ROOT / "spec" / "calibration_registry.yaml" +PRIORITY = ROOT / "Temp" / "calibration_priority_v1.json" +LEDGER = ROOT / "Temp" / "calibration_change_ledger_v4.json" +OUT_JSON = ROOT / "Temp" / "calibration_review_report_v1.json" +OUT_MD = ROOT / "Temp" / "calibration_review_report_v1.md" + +if sys.stdout.encoding and sys.stdout.encoding.lower() not in ("utf-8", "utf8"): + sys.stdout = open(sys.stdout.fileno(), mode="w", encoding="utf-8", buffering=1) + + +def _load_json(path: Path) -> dict[str, Any]: + if not path.exists(): + return {} + try: + data = json.loads(path.read_text(encoding="utf-8")) + except Exception: + return {} + return data if isinstance(data, dict) else {} + + +def _load_registry(path: Path) -> list[dict[str, Any]]: + if not path.exists(): + return [] + data = yaml.safe_load(path.read_text(encoding="utf-8")) or {} + thresholds = data.get("thresholds", []) + return [t for t in thresholds if isinstance(t, dict)] + + +def _readiness(entry: dict[str, Any]) -> tuple[str, str]: + source = str(entry.get("source") or "EXPERT_PRIOR") + sample_n = int(entry.get("sample_n") or 0) + if source == "CALIBRATED": + return "CALIBRATED", "Already calibrated" + if source == "PROVISIONAL" and sample_n >= 30: + return "CALIBRATION_READY", "Ready for calibrated review" + if source == "PROVISIONAL": + return "PROVISIONAL_ACTIVE", "Provisional with live samples" + if sample_n >= 10: + return "PROVISIONAL_CANDIDATE", "Candidate for provisional review" + return "WATCH", "Keep under watch" + + +def _table(rows: list[dict[str, Any]], keys: list[str], max_rows: int = 25) -> str: + if not rows: + return "_데이터 없음_" + header = "| " + " | ".join(keys) + " |" + sep = "| " + " | ".join(["---"] * len(keys)) + " |" + body = [] + for row in rows[:max_rows]: + body.append("| " + " | ".join(str(row.get(k, "")).replace("|", "ㅣ") for k in keys) + " |") + suffix = f"\n\n_...총 {len(rows)}행 중 {max_rows}행 표시_" if len(rows) > max_rows else "" + return "\n".join([header, sep, *body]) + suffix + + +def main() -> int: + registry = _load_registry(REGISTRY) + priority = _load_json(PRIORITY) + ledger = _load_json(LEDGER) + + source_counts: dict[str, int] = {} + readiness_counts: dict[str, int] = {} + reviewed_rows: list[dict[str, Any]] = [] + + for entry in registry: + source = str(entry.get("source") or "EXPERT_PRIOR") + source_counts[source] = source_counts.get(source, 0) + 1 + readiness, reason = _readiness(entry) + readiness_counts[readiness] = readiness_counts.get(readiness, 0) + 1 + if readiness in {"PROVISIONAL_CANDIDATE", "CALIBRATION_READY", "PROVISIONAL_ACTIVE"}: + reviewed_rows.append( + { + "id": entry.get("id", ""), + "source": source, + "sample_n": int(entry.get("sample_n") or 0), + "value": entry.get("value"), + "unit": entry.get("unit", ""), + "owner_formula": entry.get("owner_formula", ""), + "readiness": readiness, + "reason": reason, + "notes": str(entry.get("notes") or "")[:120], + } + ) + + priority_list = priority.get("priority_list") if isinstance(priority.get("priority_list"), list) else [] + priority_rows = [] + for item in priority_list[:20]: + if not isinstance(item, dict): + continue + priority_rows.append( + { + "calibration_id": item.get("calibration_id", ""), + "source": item.get("source", ""), + "sample_n": item.get("sample_n", 0), + "urgency_score": item.get("urgency_score", 0), + "linked_factor": item.get("linked_factor", ""), + "owner_formula": item.get("owner_formula", ""), + } + ) + + report = { + "formula_id": "CALIBRATION_REVIEW_REPORT_V1", + "generated_at": datetime.now(timezone.utc).isoformat(), + "registry_path": str(REGISTRY), + "priority_path": str(PRIORITY), + "ledger_path": str(LEDGER), + "summary": { + "total_thresholds": len(registry), + "source_counts": source_counts, + "readiness_counts": readiness_counts, + "priority_count": int(priority.get("priority_count") or len(priority_rows)), + "ledger_change_count": len(ledger.get("changes", [])) if isinstance(ledger.get("changes"), list) else 0, + "ledger_without_change_count": int(ledger.get("threshold_change_without_ledger_count") or 0), + }, + "top_priority_rows": priority_rows, + "review_rows": reviewed_rows, + } + + OUT_JSON.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8") + + md_lines = [ + "# Calibration Review Report", + "", + "## Summary", + "", + f"- total thresholds: {report['summary']['total_thresholds']}", + f"- priority count: {report['summary']['priority_count']}", + f"- ledger change count: {report['summary']['ledger_change_count']}", + f"- ledger without change count: {report['summary']['ledger_without_change_count']}", + "", + "### Source Counts", + "", + _table( + [{"source": k, "count": v} for k, v in sorted(source_counts.items())], + ["source", "count"], + max_rows=50, + ), + "", + "### Readiness Counts", + "", + _table( + [{"readiness": k, "count": v} for k, v in sorted(readiness_counts.items())], + ["readiness", "count"], + max_rows=50, + ), + "", + "## Top Priority Rows", + "", + _table(priority_rows, ["calibration_id", "source", "sample_n", "urgency_score", "linked_factor", "owner_formula"]), + "", + "## Review Candidates", + "", + _table(reviewed_rows, ["id", "source", "sample_n", "value", "unit", "owner_formula", "readiness", "reason"]), + "", + "## Evidence", + "", + f"- registry: {REGISTRY}", + f"- priority: {PRIORITY}", + f"- ledger: {LEDGER}", + ] + OUT_MD.write_text("\n".join(md_lines), encoding="utf-8") + + print(json.dumps({ + "formula_id": report["formula_id"], + "gate": "PASS" if reviewed_rows or priority_rows else "WARN", + "review_rows": len(reviewed_rows), + "priority_rows": len(priority_rows), + "json_path": str(OUT_JSON), + "md_path": str(OUT_MD), + }, ensure_ascii=False, indent=2)) + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())