Files
QuantEngineByItz/tools/build_data_quality_reconciliation_v1.py
T
kjh2064 ee3e799de1 feat: 리밸런싱 엔진 V1 + GAS 버그 수정 (2026-06-13)
주요 변경:
- tools/build_rebalance_engine_v1.py: REBALANCE_ENGINE_V1 신규
  * account_snapshot 직접 합산(_build_snap_position_map) → 소수주 분리 행 병합
  * 레짐 소스 macro.REGIME_PRELIM 최우선 (GAS 와 동일)
- src/gas_adapter_parts/gdf_06_rebalance.gs: runRebalanceSheet_() 신규
  * Logger.log / getSpreadsheet_() 로 run_all 연동 수정
- src/gas_adapter_parts/gdc_01_fetch_fundamentals.gs
  * _mergePositionRecord_(): 소수주 중복 행 합산 신규
  * parseInt → parseFloat (qty, availQty)
- src/gas_adapter_parts/gdf_01_price_metrics.gs
  * 미보유 종목 SELL_READY → WATCH_EXIT_SIGNAL
- spec/41_release_dag.yaml: build_rebalance_sheet 노드 추가 (step_count 63)
- spec/51_formula_lifecycle_registry.yaml: REBALANCE_ENGINE_V1 등록

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-13 13:20:14 +09:00

172 lines
6.6 KiB
Python

from __future__ import annotations
import argparse
import json
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[1]
DEFAULT_JSON = ROOT / "GatherTradingData.json"
DEFAULT_DI = ROOT / "Temp" / "data_integrity_score_v1.json"
DEFAULT_OUT = ROOT / "Temp" / "data_quality_reconciliation_v1.json"
DEFAULT_FUND_RAW = ROOT / "Temp" / "fundamental_raw_v1.json"
DEFAULT_FUND_MF3 = ROOT / "Temp" / "fundamental_multifactor_v3.json"
DEFAULT_LLM_FREEDOM = ROOT / "Temp" / "llm_freedom_v1.json"
DEFAULT_COVERAGE = ROOT / "Temp" / "harness_coverage_audit.json"
def _load_json(path: Path) -> dict[str, Any]:
if not path.exists():
return {}
try:
obj = json.loads(path.read_text(encoding="utf-8"))
except Exception:
return {}
return obj if isinstance(obj, dict) else {}
def _as_float(value: Any, default: float = 0.0) -> float:
try:
return float(value)
except Exception:
return default
def _extract_harness_root(payload: dict[str, Any]) -> dict[str, Any]:
h_apex = payload.get("hApex")
data_apex = ((payload.get("data") or {}).get("_harness_context")) if isinstance(payload.get("data"), dict) else None
if isinstance(h_apex, dict) and isinstance(data_apex, dict):
merged = dict(data_apex)
merged.update(h_apex)
return merged
if isinstance(h_apex, dict):
return h_apex
if isinstance(data_apex, dict):
return data_apex
return payload
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--json", default=str(DEFAULT_JSON))
ap.add_argument("--integrity", default=str(DEFAULT_DI))
ap.add_argument("--out", default=str(DEFAULT_OUT))
args = ap.parse_args()
json_path = Path(args.json)
if not json_path.is_absolute():
json_path = ROOT / json_path
integrity_path = Path(args.integrity)
if not integrity_path.is_absolute():
integrity_path = ROOT / integrity_path
out_path = Path(args.out)
if not out_path.is_absolute():
out_path = ROOT / out_path
data = _load_json(json_path)
integrity = _load_json(integrity_path)
fund_raw = _load_json(DEFAULT_FUND_RAW)
fund_mf3 = _load_json(DEFAULT_FUND_MF3)
llm_freedom = _load_json(DEFAULT_LLM_FREEDOM)
coverage = _load_json(DEFAULT_COVERAGE)
apex = _extract_harness_root(data)
di_score = _as_float(integrity.get("score"), _as_float(integrity.get("data_integrity_score")))
dqg = apex.get("data_quality_gate_v2_json") or {}
if isinstance(dqg, str):
try:
dqg = json.loads(dqg)
except Exception:
dqg = {}
# [R2-1b] Python authoritative DQG-V2 우선 사용 — GAS 원본은 필드경로 버그로
# 실재 데이터를 0으로 까는 false-negative가 있다. py 재산출값이 있으면 그것을 신뢰.
dqg_py_path = ROOT / "Temp" / "data_quality_gate_v2_py.json"
dqg_py = _load_json(dqg_py_path)
if dqg_py.get("formula_id") == "DATA_QUALITY_GATE_V2_PY":
legacy_completeness_pct = _as_float(dqg_py.get("overall_completeness_pct"))
completeness_grade = str(dqg_py.get("completeness_grade") or "MISSING")
else:
legacy_completeness_pct = _as_float(
(dqg if isinstance(dqg, dict) else {}).get(
"overall_completeness_pct",
(dqg if isinstance(dqg, dict) else {}).get("completeness_pct"),
)
)
completeness_grade = str((dqg if isinstance(dqg, dict) else {}).get("completeness_grade") or "MISSING")
# Modern quality composition based on deterministic artifacts.
fund_raw_cov = _as_float(fund_raw.get("coverage_pct"))
fund_mf3_gate = str(fund_mf3.get("gate") or "FAIL")
fund_mf3_diverse = bool(fund_mf3.get("grade_diverse"))
llm_freedom_pct = _as_float(llm_freedom.get("llm_freedom_pct"), 100.0)
cov_effective = _as_float(coverage.get("effective_coverage_pct"))
fund_mf3_score = 0.0
if fund_mf3_gate in ("PASS", "CAUTION"):
fund_mf3_score = 100.0 if fund_mf3_diverse else 70.0
llm_score = max(0.0, 100.0 - llm_freedom_pct)
modern_completeness_pct = round(
(di_score * 0.30)
+ (fund_raw_cov * 0.25)
+ (fund_mf3_score * 0.20)
+ (llm_score * 0.15)
+ (cov_effective * 0.10),
2,
)
completeness_pct = max(legacy_completeness_pct, modern_completeness_pct)
# 정공법: 블렌드/마스킹 금지. 실데이터 기반 min() 산출.
# legacy=GAS raw field presence, modern=harness artifact quality.
# 두 값의 min이 실질 신뢰 상한. 수치를 인위적으로 끌어올리면 거짓.
confidence_cap_basis_score = round(
min(
legacy_completeness_pct or completeness_pct,
modern_completeness_pct or completeness_pct,
),
2,
)
quality_gap_pct = round(max(0.0, modern_completeness_pct - confidence_cap_basis_score), 2)
quality_conflict_flag = bool(di_score >= 95.0 and completeness_pct < 50.0)
quality_conflict_reason = (
"SCHEMA_PRESENCE_HIGH_BUT_INVESTMENT_QUALITY_LOW"
if quality_conflict_flag
else "NONE"
)
result = {
"formula_id": "DATA_QUALITY_RECONCILIATION_V1",
"schema_presence_score": di_score,
"investment_quality_score": completeness_pct,
"investment_quality_grade": completeness_grade,
"legacy_investment_quality_score": legacy_completeness_pct,
"modern_investment_quality_score": modern_completeness_pct,
"confidence_cap_basis_score": confidence_cap_basis_score,
"quality_gap_pct": quality_gap_pct,
"component_scores": {
"schema_presence_score": di_score,
"fundamental_raw_coverage_pct": fund_raw_cov,
"fundamental_multifactor_score": fund_mf3_score,
"llm_grounding_score": llm_score,
"formula_runtime_coverage_pct": cov_effective,
},
"quality_conflict_flag": quality_conflict_flag,
"quality_conflict_reason": quality_conflict_reason,
"gate": "CONFLICT" if quality_conflict_flag else "PASS",
}
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
print("DATA_QUALITY_RECONCILIATION_V1")
print(f" schema_presence_score: {di_score:.2f}")
print(f" investment_quality_score: {completeness_pct:.2f}")
print(f" confidence_cap_basis_score: {confidence_cap_basis_score:.2f}")
print(f" quality_conflict_flag: {quality_conflict_flag}")
print(f" gate: {result['gate']}")
return 0
if __name__ == "__main__":
raise SystemExit(main())