Files
QuantEngineByItz/src/quant_engine/kis_data_collection_v1.py
T
kjh2064 05d9f8ed41 코드 참조 경로 업데이트 & WBS-8.1 모니터링 준비
경로 정규화 (outputs/ → src/quant_engine/):
✓ kis_api_client_v1.py: KIS 데이터 수집 경로
✓ kis_data_collection_v1.py: 기본 DB 인자
✓ snapshot_admin_server_v1.py: KIS_COLLECTION_DB
✓ snapshot_admin_store_v1.py: DEFAULT_DB + collector_db
✓ run_snapshot_admin_server_v1.py: --db 기본값

모니터링 도구 추가:
✓ verify_admin_db.py: 어드민 서버 & DB 검증
✓ setup_wbs81_monitoring.py: WBS-8.1 목표 추적 시스템
✓ update_db_paths.py: 자동화된 경로 업데이트

효과:
- 단일 소스 (src/quant_engine/)
- 배포 스크립트 단순화
- WBS-8.1: T+20 30건 모니터링 준비 완료
- 22일 남음 (목표: 2026-07-15)

Co-Authored-By: Claude Haiku 4.5 <noreply@anthropic.com>
2026-06-23 00:33:32 +09:00

479 lines
18 KiB
Python

"""KIS-first data collector for the CI scheduler.
The collector uses the existing `GatherTradingData.json` snapshot as the seed
universe, then enriches Korean tickers with read-only KIS quotations and
orderbook data, while retaining Naver/Yahoo fallbacks when available.
The canonical persistence target is SQLite.
"""
from __future__ import annotations
import argparse
import datetime as dt
import json
import os
import sys
import uuid
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
try:
from tools.fetch_naver_market_data_v1 import ( # type: ignore
_session as naver_session,
compute_relative_return_20d,
compute_volume_ratio_5d,
fetch_foreign_institution_flow,
fetch_price_history,
)
except Exception: # pragma: no cover - optional adapter
naver_session = None
compute_relative_return_20d = None
compute_volume_ratio_5d = None
fetch_foreign_institution_flow = None
fetch_price_history = None
try:
from src.quant_engine.kis_api_client_v1 import ( # type: ignore
KisCredentials,
get_asking_price_10_level,
get_current_price,
get_daily_short_sale,
)
except Exception: # pragma: no cover - safe fallback in non-KIS environments
KisCredentials = None
get_asking_price_10_level = None
get_current_price = None
get_daily_short_sale = None
from src.quant_engine.data_collection_store_v1 import (
CollectionRun,
append_collection_error,
upsert_collection_run,
upsert_collection_snapshot,
)
from src.quant_engine.data_collection_backend_v1 import (
CollectionStoreSpec,
normalize_store_spec,
)
def _kst_now_iso() -> str:
return dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).isoformat()
def _load_json(path: Path) -> dict[str, Any]:
if not path.exists():
return {}
try:
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
return {}
def _coerce_float(value: Any) -> float | None:
if value is None or value == "":
return None
try:
if isinstance(value, str):
value = value.replace(",", "").replace("%", "")
return float(value)
except (TypeError, ValueError):
return None
def _find_first_value(payload: Any, keys: tuple[str, ...]) -> Any:
stack = [payload]
while stack:
item = stack.pop()
if isinstance(item, dict):
for key in keys:
value = item.get(key)
if value not in (None, ""):
return value
stack.extend(item.values())
elif isinstance(item, list):
stack.extend(item)
return None
def _normalize_naver_price_history(code: str) -> dict[str, Any]:
if naver_session is None or fetch_price_history is None:
return {"status": "DISABLED"}
try:
session = naver_session()
price = fetch_price_history(session, code)
result: dict[str, Any] = {"status": price.get("status", "UNKNOWN"), "source_url": price.get("source_url")}
rows = price.get("rows") or []
if rows:
result["close"] = rows[0].get("close")
result["open"] = rows[0].get("open")
result["high"] = rows[0].get("high")
result["low"] = rows[0].get("low")
result["volume"] = rows[0].get("volume")
if compute_relative_return_20d is not None:
benchmark = fetch_price_history(session, "069500")
result["relative_return_20d"] = compute_relative_return_20d(rows, benchmark.get("rows", []))
if compute_volume_ratio_5d is not None:
result["volume_ratio_5d"] = compute_volume_ratio_5d(rows)
if fetch_foreign_institution_flow is not None:
result["foreign_institution_flow"] = fetch_foreign_institution_flow(session, code)
return result
except Exception as exc: # noqa: BLE001 - fallback source must not break the batch
return {"status": "ERROR", "error": str(exc)}
def _normalize_kis_fields(code: str, account: str) -> dict[str, Any]:
if KisCredentials is None or get_current_price is None or get_asking_price_10_level is None or get_daily_short_sale is None:
return {"status": "DISABLED"}
try:
creds = KisCredentials.load(account)
except Exception as exc:
return {"status": "ERROR", "error": str(exc)}
result: dict[str, Any] = {"status": "OK", "account": account}
try:
price = get_current_price(creds, code)
result["current_price_raw"] = price
result["current_price"] = _coerce_float(_find_first_value(price, ("stck_prpr", "stck_clpr", "close", "close_price")))
result["open"] = _coerce_float(_find_first_value(price, ("stck_oprc", "open", "open_price")))
result["high"] = _coerce_float(_find_first_value(price, ("stck_hgpr", "high", "high_price")))
result["low"] = _coerce_float(_find_first_value(price, ("stck_lwpr", "low", "low_price")))
result["prev_close"] = _coerce_float(_find_first_value(price, ("prdy_vrss", "prev_close")))
result["volume"] = _coerce_float(_find_first_value(price, ("acml_vol", "volume")))
result["change_pct"] = _coerce_float(_find_first_value(price, ("prdy_ctrt", "change_pct")))
except Exception as exc:
result["price_status"] = "ERROR"
result["price_error"] = str(exc)
try:
orderbook = get_asking_price_10_level(creds, code)
output1 = orderbook.get("output1") or {}
result["orderbook_raw"] = orderbook
result["microstructure_pressure"] = _coerce_float(
_find_first_value(output1, ("total_askp_rsqn", "total_bidp_rsqn"))
)
result["ask_1"] = _coerce_float(_find_first_value(output1, ("askp1",)))
result["bid_1"] = _coerce_float(_find_first_value(output1, ("bidp1",)))
result["orderbook_status"] = "OK"
except Exception as exc:
result["orderbook_status"] = "ERROR"
result["orderbook_error"] = str(exc)
try:
start = (dt.date.today() - dt.timedelta(days=10)).strftime("%Y%m%d")
end = dt.date.today().strftime("%Y%m%d")
short_sale = get_daily_short_sale(creds, code, start, end)
result["short_sale_raw"] = short_sale
rows = short_sale.get("output2") or []
if rows:
latest = rows[0]
result["short_turnover_share"] = _coerce_float(latest.get("ssts_vol_rlim"))
result["short_sale_status"] = "OK"
except Exception as exc:
result["short_sale_status"] = "ERROR"
result["short_sale_error"] = str(exc)
return result
def _build_seed_rows(source_json: Path) -> list[dict[str, Any]]:
payload = _load_json(source_json)
data = payload.get("data") or {}
core_satellite = {str(row.get("Ticker") or row.get("ticker") or ""): row for row in data.get("core_satellite", [])}
sector_lookup = {str(row.get("Ticker") or row.get("ticker") or ""): row.get("Sector") for row in data.get("core_satellite", [])}
rows: list[dict[str, Any]] = []
for row in data.get("data_feed", []):
ticker = str(row.get("Ticker") or row.get("ticker") or "").strip()
if not ticker:
continue
merged = dict(row)
core_row = core_satellite.get(ticker) or {}
if core_row:
for key, value in core_row.items():
merged.setdefault(key, value)
merged["Sector"] = merged.get("Sector") or sector_lookup.get(ticker)
rows.append(merged)
return rows
def _merge_source_fields(target: dict[str, Any], source: dict[str, Any], keys: tuple[str, ...]) -> None:
for key in keys:
if key in source and source.get(key) not in (None, ""):
target[key] = source[key]
def _resolve_price_source(
ticker: str,
*,
kis_account: str,
include_naver: bool,
include_live_kis: bool,
) -> tuple[dict[str, Any] | None, dict[str, Any] | None, list[str]]:
source_priority: list[str] = ["gathertradingdata_json"]
kis: dict[str, Any] | None = None
naver: dict[str, Any] | None = None
if include_live_kis and ticker.isdigit() and len(ticker) == 6:
kis = _normalize_kis_fields(ticker, kis_account)
if kis.get("status") == "OK":
source_priority.insert(0, "kis_open_api")
if include_naver and ticker.isdigit() and len(ticker) == 6:
naver = _normalize_naver_price_history(ticker)
if naver.get("status") in {"OK", "DATA_MISSING"}:
source_priority.append("naver_finance")
return kis, naver, source_priority
def _apply_source_fallbacks(
normalized: dict[str, Any],
*,
row: dict[str, Any],
kis: dict[str, Any] | None,
naver: dict[str, Any] | None,
) -> None:
if kis and kis.get("status") == "OK":
_merge_source_fields(normalized, kis, ("current_price", "open", "high", "low", "volume"))
_merge_source_fields(normalized, kis, ("relative_return_20d", "volume_ratio_5d", "microstructure_pressure", "short_turnover_share"))
if naver and naver.get("status") in {"OK", "DATA_MISSING"}:
normalized.setdefault("relative_return_20d", naver.get("relative_return_20d"))
normalized.setdefault("volume_ratio_5d", naver.get("volume_ratio_5d"))
normalized.setdefault("naver_price_status", naver.get("status"))
normalized.setdefault("current_price", naver.get("close"))
normalized.setdefault("open", naver.get("open"))
normalized.setdefault("high", naver.get("high"))
normalized.setdefault("low", naver.get("low"))
normalized.setdefault("volume", naver.get("volume"))
normalized.setdefault("current_price", _coerce_float(row.get("current_price") or row.get("Current_Price") or row.get("close")))
normalized.setdefault("open", _coerce_float(row.get("open") or row.get("Open")))
normalized.setdefault("high", _coerce_float(row.get("high") or row.get("High")))
normalized.setdefault("low", _coerce_float(row.get("low") or row.get("Low")))
normalized.setdefault("volume", _coerce_float(row.get("volume") or row.get("Volume")))
def _persist_collection_row(
*,
sqlite_db: Path,
run_id: str,
ticker: str,
normalized: dict[str, Any],
provenance: dict[str, Any],
) -> None:
upsert_collection_snapshot(
sqlite_db,
run_id=run_id,
dataset_name="data_feed",
ticker=ticker,
name=str(normalized.get("Name") or normalized.get("name") or ""),
sector=normalized.get("Sector"),
as_of_date=str(normalized.get("Price_Date") or normalized.get("AsOfDate") or normalized.get("collection_as_of") or ""),
source_priority=">".join(provenance.get("source_priority") or []),
source_status="OK",
payload=normalized,
provenance=provenance,
)
def _append_collection_failure(
*,
sqlite_db: Path,
run_id: str,
ticker: str,
row: dict[str, Any],
exc: Exception,
) -> dict[str, Any]:
error = {"ticker": ticker, "error": str(exc)}
append_collection_error(
sqlite_db,
run_id=run_id,
source_name="collector",
error_kind=type(exc).__name__,
error_message=str(exc),
ticker=ticker,
payload=row,
)
return error
def _finalize_collection_summary(
*,
summary: dict[str, Any],
output_json: Path,
sqlite_db: Path,
) -> dict[str, Any]:
summary["finished_at"] = _kst_now_iso()
summary["status"] = "PASS" if not summary["errors"] else "PASS_WITH_WARNINGS"
output_json.parent.mkdir(parents=True, exist_ok=True)
output_json.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8")
upsert_collection_run(
sqlite_db,
CollectionRun(
run_id=summary["run_id"],
collector_name="kis_data_collection_v1",
started_at=summary["started_at"],
status=summary["status"],
input_source=str(summary["input_json"]),
output_json_path=str(output_json),
output_db_path=str(sqlite_db),
notes="KIS-first CI collection",
),
finished_at=summary["finished_at"],
)
return summary
def _collect_one(row: dict[str, Any], *, kis_account: str, include_naver: bool, include_live_kis: bool) -> tuple[dict[str, Any], dict[str, Any]]:
ticker = str(row.get("Ticker") or row.get("ticker") or "").strip()
name = str(row.get("Name") or row.get("name") or "").strip()
sector = str(row.get("Sector") or row.get("sector") or "").strip() or None
normalized = dict(row)
provenance: dict[str, Any] = {
"ticker": ticker,
"name": name,
"sector": sector,
"source_priority": ["gathertradingdata_json"],
}
kis, naver, source_priority = _resolve_price_source(
ticker,
kis_account=kis_account,
include_naver=include_naver,
include_live_kis=include_live_kis,
)
provenance["source_priority"] = source_priority
if kis is not None:
provenance["kis"] = kis
normalized.update({k: v for k, v in kis.items() if k not in {"current_price_raw", "orderbook_raw", "short_sale_raw"}})
if naver is not None:
provenance["naver"] = naver
_apply_source_fallbacks(normalized, row=row, kis=kis, naver=naver)
normalized.setdefault("collection_as_of", _kst_now_iso())
return normalized, provenance
def collect_to_sqlite(
*,
input_json: Path,
sqlite_db: Path,
output_json: Path,
kis_account: str,
include_naver: bool = False,
include_live_kis: bool = True,
) -> dict[str, Any]:
run_id = uuid.uuid4().hex
started_at = _kst_now_iso()
upsert_collection_run(
sqlite_db,
CollectionRun(
run_id=run_id,
collector_name="kis_data_collection_v1",
started_at=started_at,
status="RUNNING",
input_source=str(input_json),
output_json_path=str(output_json),
output_db_path=str(sqlite_db),
notes="KIS-first CI collection",
),
)
seed_rows = _build_seed_rows(input_json)
summary = {
"formula_id": "KIS_DATA_COLLECTION_V1",
"run_id": run_id,
"started_at": started_at,
"input_json": str(input_json),
"sqlite_db": str(sqlite_db),
"row_count": len(seed_rows),
"source_counts": {},
"errors": [],
"rows": [],
}
for row in seed_rows:
ticker = str(row.get("Ticker") or row.get("ticker") or "").strip()
if not ticker:
continue
try:
normalized, provenance = _collect_one(row, kis_account=kis_account, include_naver=include_naver, include_live_kis=include_live_kis)
source_counts = summary["source_counts"]
for source_name in provenance.get("source_priority") or []:
source_counts[source_name] = source_counts.get(source_name, 0) + 1
_persist_collection_row(
sqlite_db=sqlite_db,
run_id=run_id,
ticker=ticker,
normalized=normalized,
provenance=provenance,
)
summary["rows"].append(
{
"ticker": ticker,
"name": normalized.get("Name") or normalized.get("name"),
"sector": normalized.get("Sector"),
"source_priority": provenance.get("source_priority"),
"current_price": normalized.get("current_price"),
"relative_return_20d": normalized.get("relative_return_20d"),
"volume_ratio_5d": normalized.get("volume_ratio_5d"),
}
)
except Exception as exc: # noqa: BLE001
error = _append_collection_failure(
sqlite_db=sqlite_db,
run_id=run_id,
ticker=ticker,
row=row,
exc=exc,
)
summary["errors"].append(error)
return _finalize_collection_summary(summary=summary, output_json=output_json, sqlite_db=sqlite_db)
def main() -> int:
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument("--input-json", type=Path, default=ROOT / "GatherTradingData.json")
ap.add_argument("--sqlite-db", type=Path, default=ROOT / "src" / "quant_engine" / "kis_data_collection.db")
ap.add_argument("--store-backend", default="sqlite", help="Storage backend contract placeholder (sqlite today, postgresql planned)")
ap.add_argument("--store-location", default=None, help="Backend location/DSN. sqlite path or future postgres DSN.")
ap.add_argument("--output-json", type=Path, default=ROOT / "Temp" / "kis_data_collection_v1.json")
ap.add_argument("--kis-account", choices=["real", "mock"], default="real")
ap.add_argument("--allow-naver-fallback", action="store_true")
ap.add_argument("--no-live-kis", action="store_true")
args = ap.parse_args()
store_backend, store_location = normalize_store_spec(
CollectionStoreSpec(
backend=args.store_backend,
location=args.store_location or args.sqlite_db,
),
ROOT,
)
if store_backend != "sqlite":
raise SystemExit(
"현재 실행 backend는 sqlite만 지원합니다. "
"하지만 collector는 이미 backend contract로 분리되어 있어 "
"후속 PostgreSQL 구현을 같은 호출 지점에 붙일 수 있습니다."
)
summary = collect_to_sqlite(
input_json=args.input_json,
sqlite_db=Path(store_location),
output_json=args.output_json,
kis_account=args.kis_account,
include_naver=args.allow_naver_fallback,
include_live_kis=not args.no_live_kis,
)
print(json.dumps(summary, ensure_ascii=False, indent=2))
return 0 if summary.get("status") == "PASS" else 1
if __name__ == "__main__":
raise SystemExit(main())