Files
QuantEngineByItz/tools/build_data_integrity_score_v1.py
T
kjh2064 ee3e799de1 feat: 리밸런싱 엔진 V1 + GAS 버그 수정 (2026-06-13)
주요 변경:
- tools/build_rebalance_engine_v1.py: REBALANCE_ENGINE_V1 신규
  * account_snapshot 직접 합산(_build_snap_position_map) → 소수주 분리 행 병합
  * 레짐 소스 macro.REGIME_PRELIM 최우선 (GAS 와 동일)
- src/gas_adapter_parts/gdf_06_rebalance.gs: runRebalanceSheet_() 신규
  * Logger.log / getSpreadsheet_() 로 run_all 연동 수정
- src/gas_adapter_parts/gdc_01_fetch_fundamentals.gs
  * _mergePositionRecord_(): 소수주 중복 행 합산 신규
  * parseInt → parseFloat (qty, availQty)
- src/gas_adapter_parts/gdf_01_price_metrics.gs
  * 미보유 종목 SELL_READY → WATCH_EXIT_SIGNAL
- spec/41_release_dag.yaml: build_rebalance_sheet 노드 추가 (step_count 63)
- spec/51_formula_lifecycle_registry.yaml: REBALANCE_ENGINE_V1 등록

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-13 13:20:14 +09:00

196 lines
8.3 KiB
Python

from __future__ import annotations
import argparse
import json
from datetime import datetime, timezone
from pathlib import Path
from typing import Any
import yaml
ROOT = Path(__file__).resolve().parents[1]
DEFAULT_JSON = ROOT / "GatherTradingData.json"
DEFAULT_OUT = ROOT / "Temp" / "data_integrity_score_v1.json"
DEFAULT_POLICY = ROOT / "spec" / "strategy_execution_lock_policy.yaml"
def _load(path: Path) -> dict[str, Any]:
data = json.loads(path.read_text(encoding="utf-8"))
return data if isinstance(data, dict) else {}
def _rows(v: Any) -> list[dict[str, Any]]:
if isinstance(v, list):
return [x for x in v if isinstance(x, dict)]
return []
def _load_policy(path: Path) -> dict[str, Any]:
if not path.exists():
return {}
try:
payload = yaml.safe_load(path.read_text(encoding="utf-8"))
except Exception:
return {}
root = payload.get("strategy_execution_lock_policy") if isinstance(payload, dict) else {}
obj = root.get("data_integrity_score_v1") if isinstance(root, dict) else {}
return obj if isinstance(obj, dict) else {}
def _is_placeholder(v: Any, placeholder_tokens: set[Any]) -> bool:
if v is None:
return None in placeholder_tokens
if isinstance(v, str):
return v.strip() in placeholder_tokens
return False
def _is_allowed_tp_stale(row: dict[str, Any], field: str, val: Any) -> bool:
if field == "tp1_price" and val is None:
return str(row.get("tp1_state") or "").upper() in {
"TP1_ALREADY_TRIGGERED",
"DEFERRED_SECULAR_LEADER",
"DEFERRED_SECULAR_LEADER_OVERHEAT_PENDING",
"TRAILING_STOP_PRIORITY_SECULAR_LEADER",
}
if field == "tp2_price" and val is None:
return str(row.get("tp2_state") or "").upper() in {"TP2_ALREADY_TRIGGERED"}
return False
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--json", default=str(DEFAULT_JSON))
ap.add_argument("--out", default=str(DEFAULT_OUT))
ap.add_argument("--policy", default=str(DEFAULT_POLICY))
args = ap.parse_args()
json_path = Path(args.json)
out_path = Path(args.out)
policy_path = Path(args.policy)
if not json_path.is_absolute():
json_path = ROOT / json_path
if not out_path.is_absolute():
out_path = ROOT / out_path
if not policy_path.is_absolute():
policy_path = ROOT / policy_path
payload = _load(json_path)
policy = _load_policy(policy_path)
data = payload.get("data") if isinstance(payload.get("data"), dict) else {}
h = data.get("_harness_context") if isinstance(data.get("_harness_context"), dict) else {}
required_sheets = policy.get("required_sheets") if isinstance(policy.get("required_sheets"), list) else ["data_feed", "sector_flow", "macro", "event_risk", "core_satellite", "sell_priority"]
present = sum(1 for s in required_sheets if isinstance(data.get(s), list) and len(data.get(s)) > 0)
sheet_completeness = present / len(required_sheets) * 100.0
bp = _rows(h.get("order_blueprint_json"))
prices = _rows(h.get("prices_json"))
price_keys = {str(r.get("ticker") or "") for r in prices}
bp_keys = {str(r.get("ticker") or "") for r in bp}
cross_mismatch = len([t for t in bp_keys if t and t not in price_keys])
mismatch_rate = (cross_mismatch / max(1, len(bp_keys))) * 100.0
json_status = str(h.get("json_validation_status") or "")
type_ok = 100.0 if json_status else 80.0
captured_at = str(h.get("captured_at") or "")
timeliness = 100.0 if captured_at else 70.0
data_feed_rows = _rows(data.get("data_feed"))
required_fields = policy.get("data_feed_required_fields") if isinstance(policy.get("data_feed_required_fields"), list) else ["Ticker", "Close", "MA20", "ATR20", "Volume"]
total_required_cells = max(1, len(data_feed_rows) * max(1, len(required_fields)))
missing_required_cells = 0
for row in data_feed_rows:
for f in required_fields:
v = row.get(f)
if v is None or (isinstance(v, str) and not v.strip()):
missing_required_cells += 1
required_field_completeness = max(0.0, 100.0 - (missing_required_cells / total_required_cells) * 100.0)
placeholder_raw = policy.get("placeholder_tokens") if isinstance(policy.get("placeholder_tokens"), list) else ["DATA_MISSING", "", "-", None]
placeholder_tokens = set(placeholder_raw)
prices = _rows(h.get("prices_json"))
placeholder_checks = 0
placeholder_hits = 0
placeholder_ledger: list[dict[str, Any]] = []
for row in prices:
ticker = str(row.get("ticker") or "")
for f in ("stop_price", "tp1_price", "tp2_price"):
placeholder_checks += 1
val = row.get(f)
if _is_allowed_tp_stale(row, f, val):
continue
if _is_placeholder(val, placeholder_tokens):
placeholder_hits += 1
placeholder_ledger.append({"ticker": ticker, "field": f, "value": val})
placeholder_safety = 100.0 if placeholder_checks == 0 else max(0.0, 100.0 - (placeholder_hits / placeholder_checks) * 100.0)
sla_hours = float(policy.get("captured_at_sla_hours") or 24.0)
sla_penalty = float(policy.get("timeliness_penalty_if_sla_breached_pct") or 30.0)
sla_breached = False
capture_age_hours = None
if captured_at:
try:
dt = datetime.fromisoformat(captured_at.replace("Z", "+00:00"))
if dt.tzinfo is None:
dt = dt.replace(tzinfo=timezone.utc)
now = datetime.now(timezone.utc)
capture_age_hours = max(0.0, (now - dt.astimezone(timezone.utc)).total_seconds() / 3600.0)
if capture_age_hours > sla_hours:
sla_breached = True
except Exception:
capture_age_hours = None
if sla_breached:
timeliness = max(0.0, timeliness - sla_penalty)
w = policy.get("weights") if isinstance(policy.get("weights"), dict) else {}
ws = float(w.get("sheet_completeness_pct") or 0.25)
wc = float(w.get("cross_mismatch_safety_pct") or 0.20)
wt = float(w.get("timeliness_pct") or 0.15)
wtp = float(w.get("type_presence_pct") or 0.10)
wr = float(w.get("required_field_completeness_pct") or 0.20)
wp = float(w.get("placeholder_safety_pct") or 0.10)
score = round(max(0.0, min(100.0, ws * sheet_completeness + wc * (100.0 - mismatch_rate) + wt * timeliness + wtp * type_ok + wr * required_field_completeness + wp * placeholder_safety)), 2)
grade = "A" if score >= 95 else "B" if score >= 90 else "C" if score >= 80 else "D"
pass_th = float(policy.get("pass_threshold") or 90.0)
watch_th = float(policy.get("watch_threshold") or 80.0)
gate = "PASS" if score >= pass_th else "WATCH_ONLY" if score >= watch_th else "EXPORT_BLOCKED_CRITICAL"
result = {
"formula_id": "DATA_INTEGRITY_SCORE_V1",
"score": score,
"grade": grade,
"gate": gate,
"metrics": {
"sheet_completeness_pct": round(sheet_completeness, 2),
"cross_mismatch_rate_pct": round(mismatch_rate, 2),
"timeliness_pct": timeliness,
"type_presence_pct": type_ok,
"required_field_completeness_pct": round(required_field_completeness, 2),
"placeholder_safety_pct": round(placeholder_safety, 2),
"placeholder_hits_count": placeholder_hits,
"placeholder_checks_count": placeholder_checks,
"placeholder_ledger": placeholder_ledger[:100],
"capture_age_hours": round(capture_age_hours, 2) if isinstance(capture_age_hours, (int, float)) else None,
"sla_breached": sla_breached,
"json_validation_status": json_status or None,
},
"policy_used": {
"policy_path": str(policy_path),
"required_sheets": required_sheets,
"data_feed_required_fields": required_fields,
"captured_at_sla_hours": sla_hours,
"timeliness_penalty_if_sla_breached_pct": sla_penalty,
"pass_threshold": pass_th,
"watch_threshold": watch_th,
},
}
out_path.parent.mkdir(parents=True, exist_ok=True)
out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
print(json.dumps(result, ensure_ascii=False, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())