KIS Open API 조회전용 연동 + 직접매매 절대금지 안전게이트

매수/매도 주문 및 계좌 잔고조회를 API로 직접 실행하지 않는다는 원칙을
코드 레벨에서 강제하는 안전게이트(governance/rules/06, 07)와 함께,
시세/호가/공매도거래비중 등 조회전용 KIS Open API 연동 및 SQLite
수집 파이프라인을 추가한다.

- kis_api_client_v1: 모든 요청이 _assert_read_only를 통과해야 하며
  /trading/ 경로·주문 TR_ID는 RuntimeError로 즉시 차단
- kis_data_collection_v1: KIS 우선 + Naver 폴백, 네트워크 실패는
  개별 ticker 단위로 흡수(배치 전체 중단 없음)
- data_collection_store_v1 / storage_backend_v1: SQLite 캐노니컬
  저장소, PostgreSQL 전환 대비 백엔드 추상화
- Gitea 영업일 스케줄(2시간 간격) + CI 강제 게이트
  (validate_no_direct_api_trading_v1, validate_kis_api_credentials_v1)
This commit is contained in:
2026-06-21 20:04:44 +09:00
parent 34f6eebba6
commit 4cb206a269
20 changed files with 2034 additions and 0 deletions
@@ -0,0 +1,18 @@
"""Storage backend selection for the collection pipeline.
This module is a thin compatibility wrapper over the generic storage backend
contract. The collector is intentionally designed around a backend contract,
not a hard SQLite-only assumption.
"""
from __future__ import annotations
from pathlib import Path
from src.quant_engine.storage_backend_v1 import StoreSpec, default_sqlite_store_path, normalize_store_spec
CollectionStoreSpec = StoreSpec
def default_collection_store_path(root: Path) -> Path:
return default_sqlite_store_path(root, "kis_data_collection/kis_data_collection.db")
@@ -0,0 +1,370 @@
"""SQLite store for platform-transition data collection outputs.
This store is intentionally small and backend-agnostic enough to be upgraded to
PostgreSQL later without changing the row contract. The canonical payload is the
normalized factor row plus provenance metadata.
"""
from __future__ import annotations
import json
import sqlite3
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Iterable
SCHEMA = """
PRAGMA journal_mode=WAL;
CREATE TABLE IF NOT EXISTS collection_runs (
run_id TEXT PRIMARY KEY,
collector_name TEXT NOT NULL,
started_at TEXT NOT NULL,
finished_at TEXT,
status TEXT NOT NULL,
input_source TEXT,
output_json_path TEXT,
output_db_path TEXT,
notes TEXT,
created_at TEXT DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS collection_snapshots (
run_id TEXT NOT NULL,
dataset_name TEXT NOT NULL,
ticker TEXT NOT NULL,
name TEXT,
sector TEXT,
as_of_date TEXT,
source_priority TEXT,
source_status TEXT,
payload_json TEXT NOT NULL,
provenance_json TEXT NOT NULL,
created_at TEXT DEFAULT (datetime('now')),
PRIMARY KEY (run_id, dataset_name, ticker)
);
CREATE TABLE IF NOT EXISTS collection_source_errors (
run_id TEXT NOT NULL,
ticker TEXT,
source_name TEXT NOT NULL,
error_kind TEXT NOT NULL,
error_message TEXT NOT NULL,
payload_json TEXT,
created_at TEXT DEFAULT (datetime('now'))
);
CREATE INDEX IF NOT EXISTS idx_collection_snapshots_ticker_time
ON collection_snapshots(ticker, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_collection_source_errors_run
ON collection_source_errors(run_id, source_name);
"""
@dataclass(frozen=True)
class CollectionRun:
run_id: str
collector_name: str
started_at: str
status: str
input_source: str | None = None
output_json_path: str | None = None
output_db_path: str | None = None
notes: str | None = None
def init_db(db_path: Path) -> None:
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(db_path)
try:
conn.executescript(SCHEMA)
conn.commit()
finally:
conn.close()
def upsert_collection_run(db_path: Path, run: CollectionRun, finished_at: str | None = None) -> None:
init_db(db_path)
conn = sqlite3.connect(db_path)
try:
conn.execute(
"""
INSERT INTO collection_runs (
run_id, collector_name, started_at, finished_at, status,
input_source, output_json_path, output_db_path, notes
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(run_id) DO UPDATE SET
collector_name=excluded.collector_name,
started_at=excluded.started_at,
finished_at=excluded.finished_at,
status=excluded.status,
input_source=excluded.input_source,
output_json_path=excluded.output_json_path,
output_db_path=excluded.output_db_path,
notes=excluded.notes
""",
(
run.run_id,
run.collector_name,
run.started_at,
finished_at,
run.status,
run.input_source,
run.output_json_path,
run.output_db_path,
run.notes,
),
)
conn.commit()
finally:
conn.close()
def upsert_collection_snapshot(
db_path: Path,
*,
run_id: str,
dataset_name: str,
ticker: str,
name: str | None,
sector: str | None,
as_of_date: str | None,
source_priority: str,
source_status: str,
payload: dict[str, Any],
provenance: dict[str, Any],
) -> None:
init_db(db_path)
conn = sqlite3.connect(db_path)
try:
conn.execute(
"""
INSERT INTO collection_snapshots (
run_id, dataset_name, ticker, name, sector, as_of_date,
source_priority, source_status, payload_json, provenance_json
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(run_id, dataset_name, ticker) DO UPDATE SET
name=excluded.name,
sector=excluded.sector,
as_of_date=excluded.as_of_date,
source_priority=excluded.source_priority,
source_status=excluded.source_status,
payload_json=excluded.payload_json,
provenance_json=excluded.provenance_json
""",
(
run_id,
dataset_name,
ticker,
name,
sector,
as_of_date,
source_priority,
source_status,
json.dumps(payload, ensure_ascii=False, default=str),
json.dumps(provenance, ensure_ascii=False, default=str),
),
)
conn.commit()
finally:
conn.close()
def append_collection_error(
db_path: Path,
*,
run_id: str,
source_name: str,
error_kind: str,
error_message: str,
ticker: str | None = None,
payload: dict[str, Any] | None = None,
) -> None:
init_db(db_path)
conn = sqlite3.connect(db_path)
try:
conn.execute(
"""
INSERT INTO collection_source_errors (
run_id, ticker, source_name, error_kind, error_message, payload_json
) VALUES (?, ?, ?, ?, ?, ?)
""",
(
run_id,
ticker,
source_name,
error_kind,
error_message,
json.dumps(payload or {}, ensure_ascii=False, default=str),
),
)
conn.commit()
finally:
conn.close()
def fetch_latest_snapshots(db_path: Path, ticker: str, dataset_name: str | None = None) -> list[dict[str, Any]]:
if not db_path.exists():
return []
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
if dataset_name:
rows = conn.execute(
"""
SELECT * FROM collection_snapshots
WHERE ticker = ? AND dataset_name = ?
ORDER BY created_at DESC
""",
(ticker, dataset_name),
).fetchall()
else:
rows = conn.execute(
"""
SELECT * FROM collection_snapshots
WHERE ticker = ?
ORDER BY created_at DESC
""",
(ticker,),
).fetchall()
return [dict(row) for row in rows]
finally:
conn.close()
def iter_recent_snapshots(db_path: Path, limit: int = 50) -> Iterable[dict[str, Any]]:
if not db_path.exists():
return []
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"SELECT * FROM collection_snapshots ORDER BY created_at DESC LIMIT ?",
(limit,),
).fetchall()
return [dict(row) for row in rows]
finally:
conn.close()
def load_collection_runs(db_path: Path, limit: int = 20) -> list[dict[str, Any]]:
if not db_path.exists():
return []
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"""
SELECT run_id, collector_name, started_at, finished_at, status,
input_source, output_json_path, output_db_path, notes, created_at
FROM collection_runs
ORDER BY started_at DESC, created_at DESC
LIMIT ?
""",
(int(limit),),
).fetchall()
return [dict(row) for row in rows]
finally:
conn.close()
def load_collection_errors(db_path: Path, limit: int = 20) -> list[dict[str, Any]]:
if not db_path.exists():
return []
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"""
SELECT run_id, ticker, source_name, error_kind, error_message, payload_json, created_at
FROM collection_source_errors
ORDER BY created_at DESC
LIMIT ?
""",
(int(limit),),
).fetchall()
return [dict(row) for row in rows]
finally:
conn.close()
def load_collection_dashboard_state(
db_path: Path | str | None = None,
output_json_path: Path | str | None = None,
*,
limit: int = 8,
) -> dict[str, Any]:
db = Path(db_path) if db_path else Path()
report = Path(output_json_path) if output_json_path else Path()
state: dict[str, Any] = {
"db_path": str(db),
"output_json_path": str(report) if output_json_path else "",
"runs": [],
"recent_snapshots": [],
"recent_errors": [],
"counts": {
"collection_runs": 0,
"collection_snapshots": 0,
"collection_source_errors": 0,
},
"latest_run": {},
"latest_report": {},
}
if report.exists():
try:
state["latest_report"] = json.loads(report.read_text(encoding="utf-8"))
except Exception:
state["latest_report"] = {}
if not db.exists():
return state
conn = sqlite3.connect(db)
conn.row_factory = sqlite3.Row
try:
state["counts"] = {
"collection_runs": conn.execute("SELECT COUNT(*) FROM collection_runs").fetchone()[0],
"collection_snapshots": conn.execute("SELECT COUNT(*) FROM collection_snapshots").fetchone()[0],
"collection_source_errors": conn.execute("SELECT COUNT(*) FROM collection_source_errors").fetchone()[0],
}
run_row = conn.execute(
"""
SELECT run_id, collector_name, started_at, finished_at, status,
input_source, output_json_path, output_db_path, notes, created_at
FROM collection_runs
ORDER BY started_at DESC, created_at DESC
LIMIT 1
"""
).fetchone()
state["latest_run"] = dict(run_row) if run_row is not None else {}
state["runs"] = [dict(row) for row in conn.execute(
"""
SELECT run_id, collector_name, started_at, finished_at, status,
input_source, output_json_path, output_db_path, notes, created_at
FROM collection_runs
ORDER BY started_at DESC, created_at DESC
LIMIT ?
""",
(int(limit),),
).fetchall()]
state["recent_snapshots"] = [dict(row) for row in conn.execute(
"""
SELECT run_id, dataset_name, ticker, name, sector, as_of_date,
source_priority, source_status, created_at
FROM collection_snapshots
ORDER BY created_at DESC
LIMIT ?
""",
(int(limit),),
).fetchall()]
state["recent_errors"] = [dict(row) for row in conn.execute(
"""
SELECT run_id, ticker, source_name, error_kind, error_message, created_at
FROM collection_source_errors
ORDER BY created_at DESC
LIMIT ?
""",
(int(limit),),
).fetchall()]
finally:
conn.close()
return state
+212
View File
@@ -0,0 +1,212 @@
"""한국투자증권(KIS) Open API 클라이언트 — 조회(read-only) 전용.
근거: https://apiportal.koreainvestment.com/apiservice-summary ,
https://github.com/koreainvestment/open-trading-api (2026-06-21 실측 확인된
api_url/tr_id만 사용 — 추정 금지).
══════════════════════════════════════════════════════════════════════════════
[CRITICAL] governance/rules/06_no_direct_api_trading.yaml — 절대 규칙
이 모듈은 매수/매도 주문을 어떤 경로로도 제출하지 않는다. 주문 제출/정정/취소
함수는 이 파일에 일체 작성하지 않으며, 공유 요청 함수(_send_request)는 주문
관련 경로("/trading/")나 TR_ID(TTTC08*/VTTC08* 등)를 만나면 즉시 RuntimeError로
요청을 차단한다(2차 방어). 이 원칙을 어기면 엔진 전체가 '제안 시스템'에서
'자동매매 시스템'으로 변질되어 프로젝트 핵심 전제가 깨진다(사용자 직접 지시).
══════════════════════════════════════════════════════════════════════════════
인증 정보는 Windows 환경변수에서 읽는다(실제계좌: KIS_APP_Key/KIS_APP_Secret,
모의계좌: KIS_APP_Key_TEST/KIS_APP_Secret_TEST). 방금 setx로 설정된 값은 현재
프로세스의 os.environ에 아직 반영되지 않을 수 있어, HKCU\\Environment 레지스트리
폴백을 둔다(읽기만 함, 값을 로그에 남기지 않음).
"""
from __future__ import annotations
import datetime as dt
import json
import sys
from pathlib import Path
from typing import Any
import requests
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
REAL_DOMAIN = "https://openapi.koreainvestment.com:9443"
MOCK_DOMAIN = "https://openapivts.koreainvestment.com:29443"
TOKEN_CACHE_DIR = ROOT / "Temp"
# ── [CRITICAL] 주문 차단 목록 — 절대 수정/완화 금지 (governance/rules/06_no_direct_api_trading.yaml) ──
# "/trading/" 하위 경로는 주문(order)뿐 아니라 계좌잔고조회(inquire-balance)도 포함한다.
# 계좌 보유종목/잔고는 governance/rules/07_no_kis_account_balance_query.yaml에 의해
# 별도로도 금지된다 — HTS 캡처가 유일한 출처(사용자 직접 지시).
FORBIDDEN_PATH_SUBSTRINGS: tuple[str, ...] = ("/trading/",)
FORBIDDEN_TR_ID_PREFIXES: tuple[str, ...] = (
"TTTC08", "VTTC08", "TTTC01", "VTTC01", # 현금/신용 매수·매도·정정·취소
"TTTC8434R", "VTTC8434R", # 주식잔고조회 — 계좌 보유종목 조회 금지(07번 규칙)
)
class OrderEndpointBlockedError(RuntimeError):
"""주문 제출/정정/취소 경로 호출 시도 — 절대 차단."""
def _assert_read_only(path: str, tr_id: str) -> None:
for forbidden in FORBIDDEN_PATH_SUBSTRINGS:
if forbidden in path:
raise OrderEndpointBlockedError(
f"BLOCKED: 주문 관련 경로 호출 시도 차단 — path={path!r}. "
"이 엔진은 매수/매도를 API로 직접 실행하지 않는다(governance/rules/06_no_direct_api_trading.yaml)."
)
for prefix in FORBIDDEN_TR_ID_PREFIXES:
if tr_id.upper().startswith(prefix):
raise OrderEndpointBlockedError(
f"BLOCKED: 주문 관련 TR_ID 호출 시도 차단 — tr_id={tr_id!r}. "
"이 엔진은 매수/매도를 API로 직접 실행하지 않는다(governance/rules/06_no_direct_api_trading.yaml)."
)
def _read_env_var(name: str) -> str | None:
import os
value = os.environ.get(name)
if value:
return value
if sys.platform != "win32":
return None
try:
import winreg
with winreg.OpenKey(winreg.HKEY_CURRENT_USER, "Environment") as key:
value, _ = winreg.QueryValueEx(key, name)
return value or None
except OSError:
return None
class KisCredentials:
def __init__(self, app_key: str, app_secret: str, account: str):
self.app_key = app_key
self.app_secret = app_secret
self.account = account # "real" | "mock"
self.domain = REAL_DOMAIN if account == "real" else MOCK_DOMAIN
@classmethod
def load(cls, account: str = "mock") -> "KisCredentials":
if account == "real":
key_name, secret_name = "KIS_APP_Key", "KIS_APP_Secret"
elif account == "mock":
key_name, secret_name = "KIS_APP_Key_TEST", "KIS_APP_Secret_TEST"
else:
raise ValueError("account must be 'real' or 'mock'")
app_key = _read_env_var(key_name)
app_secret = _read_env_var(secret_name)
if not app_key or not app_secret:
raise RuntimeError(
f"{key_name}/{secret_name} 환경변수를 찾을 수 없음 — Windows 환경변수 설정 후 "
"새 셸에서 재시도하거나 HKCU\\Environment 레지스트리 반영을 확인하세요."
)
return cls(app_key=app_key, app_secret=app_secret, account=account)
def _token_cache_path(creds: KisCredentials) -> Path:
TOKEN_CACHE_DIR.mkdir(parents=True, exist_ok=True)
return TOKEN_CACHE_DIR / f"kis_token_cache_{creds.account}.json"
def _issue_or_reuse_token(creds: KisCredentials) -> str:
"""KIS는 토큰 발급 빈도를 제한한다 — 만료 전까지 캐시 재사용 필수."""
cache_path = _token_cache_path(creds)
if cache_path.exists():
try:
cached = json.loads(cache_path.read_text(encoding="utf-8"))
expires_at = dt.datetime.fromisoformat(cached["expires_at"])
if dt.datetime.now(dt.timezone.utc) < expires_at - dt.timedelta(minutes=10):
return cached["access_token"]
except (json.JSONDecodeError, KeyError, ValueError):
pass
resp = requests.post(
f"{creds.domain}/oauth2/tokenP",
json={"grant_type": "client_credentials", "appkey": creds.app_key, "appsecret": creds.app_secret},
timeout=15,
)
resp.raise_for_status()
body = resp.json()
access_token = body["access_token"]
expires_in_sec = int(body.get("expires_in", 86400))
expires_at = dt.datetime.now(dt.timezone.utc) + dt.timedelta(seconds=expires_in_sec)
cache_path.write_text(
json.dumps({"access_token": access_token, "expires_at": expires_at.isoformat()}, ensure_ascii=False),
encoding="utf-8",
)
return access_token
def _send_request(creds: KisCredentials, path: str, tr_id: str, params: dict[str, Any]) -> dict[str, Any]:
"""모든 KIS REST 호출의 단일 진입점 — 여기서만 가드가 작동하면 충분하다."""
_assert_read_only(path, tr_id) # [CRITICAL] 절대 제거 금지
access_token = _issue_or_reuse_token(creds)
headers = {
"content-type": "application/json; charset=utf-8",
"authorization": f"Bearer {access_token}",
"appkey": creds.app_key,
"appsecret": creds.app_secret,
"tr_id": tr_id,
"custtype": "P",
}
resp = requests.get(f"{creds.domain}{path}", headers=headers, params=params, timeout=15)
resp.raise_for_status()
return resp.json()
# ── 조회(read-only) 함수 — 전부 GET, 전부 quotations/ranking 카테고리 (실측 확인) ──────────
def get_current_price(creds: KisCredentials, code: str) -> dict[str, Any]:
"""주식현재가 시세. api_url=/uapi/domestic-stock/v1/quotations/inquire-price, tr_id=FHKST01010100."""
return _send_request(
creds, "/uapi/domestic-stock/v1/quotations/inquire-price", "FHKST01010100",
{"FID_COND_MRKT_DIV_CODE": "J", "FID_INPUT_ISCD": code},
)
def get_asking_price_10_level(creds: KisCredentials, code: str) -> dict[str, Any]:
"""주식현재가 호가/예상체결 — 10단계 매수/매도 호가.
api_url=/uapi/domestic-stock/v1/quotations/inquire-asking-price-exp-ccn, tr_id=FHKST01010200.
"""
return _send_request(
creds, "/uapi/domestic-stock/v1/quotations/inquire-asking-price-exp-ccn", "FHKST01010200",
{"FID_COND_MRKT_DIV_CODE": "J", "FID_INPUT_ISCD": code},
)
def get_daily_short_sale(creds: KisCredentials, code: str, start_date: str, end_date: str) -> dict[str, Any]:
"""국내주식 공매도 일별추이. api_url=/uapi/domestic-stock/v1/quotations/daily-short-sale,
tr_id=FHPST04830000. start_date/end_date: YYYYMMDD."""
return _send_request(
creds, "/uapi/domestic-stock/v1/quotations/daily-short-sale", "FHPST04830000",
{"FID_COND_MRKT_DIV_CODE": "J", "FID_INPUT_ISCD": code,
"FID_INPUT_DATE_1": start_date, "FID_INPUT_DATE_2": end_date},
)
def get_daily_item_chart_price(
creds: KisCredentials, code: str, start_date: str, end_date: str, period: str = "D",
) -> dict[str, Any]:
"""주식현재가 일자별. api_url=/uapi/domestic-stock/v1/quotations/inquire-daily-itemchartprice,
tr_id=FHKST03010100."""
return _send_request(
creds, "/uapi/domestic-stock/v1/quotations/inquire-daily-itemchartprice", "FHKST03010100",
{"FID_COND_MRKT_DIV_CODE": "J", "FID_INPUT_ISCD": code,
"FID_INPUT_DATE_1": start_date, "FID_INPUT_DATE_2": end_date,
"FID_PERIOD_DIV_CODE": period, "FID_ORG_ADJ_PRC": "0"},
)
def get_investor_trend(creds: KisCredentials, code: str) -> dict[str, Any]:
"""주식현재가 투자자(개인/외국인/기관) 매매동향.
api_url=/uapi/domestic-stock/v1/quotations/inquire-investor, tr_id=FHKST01010900."""
return _send_request(
creds, "/uapi/domestic-stock/v1/quotations/inquire-investor", "FHKST01010900",
{"FID_COND_MRKT_DIV_CODE": "J", "FID_INPUT_ISCD": code},
)
+378
View File
@@ -0,0 +1,378 @@
"""KIS-first data collector for the CI scheduler.
The collector uses the existing `GatherTradingData.json` snapshot as the seed
universe, then enriches Korean tickers with read-only KIS quotations and
orderbook data, while retaining Naver/Yahoo fallbacks when available.
The canonical persistence target is SQLite.
"""
from __future__ import annotations
import argparse
import datetime as dt
import json
import os
import sys
import uuid
from pathlib import Path
from typing import Any
ROOT = Path(__file__).resolve().parents[2]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
try:
from tools.fetch_naver_market_data_v1 import ( # type: ignore
_session as naver_session,
compute_relative_return_20d,
compute_volume_ratio_5d,
fetch_foreign_institution_flow,
fetch_price_history,
)
except Exception: # pragma: no cover - optional adapter
naver_session = None
compute_relative_return_20d = None
compute_volume_ratio_5d = None
fetch_foreign_institution_flow = None
fetch_price_history = None
try:
from src.quant_engine.kis_api_client_v1 import ( # type: ignore
KisCredentials,
get_asking_price_10_level,
get_current_price,
get_daily_short_sale,
)
except Exception: # pragma: no cover - safe fallback in non-KIS environments
KisCredentials = None
get_asking_price_10_level = None
get_current_price = None
get_daily_short_sale = None
from src.quant_engine.data_collection_store_v1 import (
CollectionRun,
append_collection_error,
upsert_collection_run,
upsert_collection_snapshot,
)
from src.quant_engine.data_collection_backend_v1 import (
CollectionStoreSpec,
normalize_store_spec,
)
def _kst_now_iso() -> str:
return dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).isoformat()
def _load_json(path: Path) -> dict[str, Any]:
if not path.exists():
return {}
try:
return json.loads(path.read_text(encoding="utf-8"))
except Exception:
return {}
def _coerce_float(value: Any) -> float | None:
if value is None or value == "":
return None
try:
if isinstance(value, str):
value = value.replace(",", "").replace("%", "")
return float(value)
except (TypeError, ValueError):
return None
def _find_first_value(payload: Any, keys: tuple[str, ...]) -> Any:
stack = [payload]
while stack:
item = stack.pop()
if isinstance(item, dict):
for key in keys:
value = item.get(key)
if value not in (None, ""):
return value
stack.extend(item.values())
elif isinstance(item, list):
stack.extend(item)
return None
def _normalize_naver_price_history(code: str) -> dict[str, Any]:
if naver_session is None or fetch_price_history is None:
return {"status": "DISABLED"}
try:
session = naver_session()
price = fetch_price_history(session, code)
result: dict[str, Any] = {"status": price.get("status", "UNKNOWN"), "source_url": price.get("source_url")}
rows = price.get("rows") or []
if rows:
result["close"] = rows[0].get("close")
result["open"] = rows[0].get("open")
result["high"] = rows[0].get("high")
result["low"] = rows[0].get("low")
result["volume"] = rows[0].get("volume")
if compute_relative_return_20d is not None:
benchmark = fetch_price_history(session, "069500")
result["relative_return_20d"] = compute_relative_return_20d(rows, benchmark.get("rows", []))
if compute_volume_ratio_5d is not None:
result["volume_ratio_5d"] = compute_volume_ratio_5d(rows)
if fetch_foreign_institution_flow is not None:
result["foreign_institution_flow"] = fetch_foreign_institution_flow(session, code)
return result
except Exception as exc: # noqa: BLE001 - fallback source must not break the batch
return {"status": "ERROR", "error": str(exc)}
def _normalize_kis_fields(code: str, account: str) -> dict[str, Any]:
if KisCredentials is None or get_current_price is None or get_asking_price_10_level is None or get_daily_short_sale is None:
return {"status": "DISABLED"}
try:
creds = KisCredentials.load(account)
except Exception as exc:
return {"status": "ERROR", "error": str(exc)}
result: dict[str, Any] = {"status": "OK", "account": account}
try:
price = get_current_price(creds, code)
result["current_price_raw"] = price
result["current_price"] = _coerce_float(_find_first_value(price, ("stck_prpr", "stck_clpr", "close", "close_price")))
result["open"] = _coerce_float(_find_first_value(price, ("stck_oprc", "open", "open_price")))
result["high"] = _coerce_float(_find_first_value(price, ("stck_hgpr", "high", "high_price")))
result["low"] = _coerce_float(_find_first_value(price, ("stck_lwpr", "low", "low_price")))
result["prev_close"] = _coerce_float(_find_first_value(price, ("prdy_vrss", "prev_close")))
result["volume"] = _coerce_float(_find_first_value(price, ("acml_vol", "volume")))
result["change_pct"] = _coerce_float(_find_first_value(price, ("prdy_ctrt", "change_pct")))
except Exception as exc:
result["price_status"] = "ERROR"
result["price_error"] = str(exc)
try:
orderbook = get_asking_price_10_level(creds, code)
output1 = orderbook.get("output1") or {}
result["orderbook_raw"] = orderbook
result["microstructure_pressure"] = _coerce_float(
_find_first_value(output1, ("total_askp_rsqn", "total_bidp_rsqn"))
)
result["ask_1"] = _coerce_float(_find_first_value(output1, ("askp1",)))
result["bid_1"] = _coerce_float(_find_first_value(output1, ("bidp1",)))
result["orderbook_status"] = "OK"
except Exception as exc:
result["orderbook_status"] = "ERROR"
result["orderbook_error"] = str(exc)
try:
start = (dt.date.today() - dt.timedelta(days=10)).strftime("%Y%m%d")
end = dt.date.today().strftime("%Y%m%d")
short_sale = get_daily_short_sale(creds, code, start, end)
result["short_sale_raw"] = short_sale
rows = short_sale.get("output2") or []
if rows:
latest = rows[0]
result["short_turnover_share"] = _coerce_float(latest.get("ssts_vol_rlim"))
result["short_sale_status"] = "OK"
except Exception as exc:
result["short_sale_status"] = "ERROR"
result["short_sale_error"] = str(exc)
return result
def _build_seed_rows(source_json: Path) -> list[dict[str, Any]]:
payload = _load_json(source_json)
data = payload.get("data") or {}
core_satellite = {str(row.get("Ticker") or row.get("ticker") or ""): row for row in data.get("core_satellite", [])}
sector_lookup = {str(row.get("Ticker") or row.get("ticker") or ""): row.get("Sector") for row in data.get("core_satellite", [])}
rows: list[dict[str, Any]] = []
for row in data.get("data_feed", []):
ticker = str(row.get("Ticker") or row.get("ticker") or "").strip()
if not ticker:
continue
merged = dict(row)
core_row = core_satellite.get(ticker) or {}
if core_row:
for key, value in core_row.items():
merged.setdefault(key, value)
merged["Sector"] = merged.get("Sector") or sector_lookup.get(ticker)
rows.append(merged)
return rows
def _collect_one(row: dict[str, Any], *, kis_account: str, include_naver: bool, include_live_kis: bool) -> tuple[dict[str, Any], dict[str, Any]]:
ticker = str(row.get("Ticker") or row.get("ticker") or "").strip()
name = str(row.get("Name") or row.get("name") or "").strip()
sector = str(row.get("Sector") or row.get("sector") or "").strip() or None
normalized = dict(row)
provenance: dict[str, Any] = {
"ticker": ticker,
"name": name,
"sector": sector,
"source_priority": ["gathertradingdata_json"],
}
if include_live_kis and ticker.isdigit() and len(ticker) == 6:
kis = _normalize_kis_fields(ticker, kis_account)
provenance["kis"] = kis
normalized.update({k: v for k, v in kis.items() if k not in {"current_price_raw", "orderbook_raw", "short_sale_raw"}})
if kis.get("status") == "OK":
provenance["source_priority"].insert(0, "kis_open_api")
if include_naver and ticker.isdigit() and len(ticker) == 6:
naver = _normalize_naver_price_history(ticker)
provenance["naver"] = naver
if naver.get("status") in {"OK", "DATA_MISSING"}:
normalized.setdefault("relative_return_20d", naver.get("relative_return_20d"))
normalized.setdefault("volume_ratio_5d", naver.get("volume_ratio_5d"))
normalized.setdefault("naver_price_status", naver.get("status"))
provenance["source_priority"].append("naver_finance")
normalized.setdefault("collection_as_of", _kst_now_iso())
return normalized, provenance
def collect_to_sqlite(
*,
input_json: Path,
sqlite_db: Path,
output_json: Path,
kis_account: str,
include_naver: bool = True,
include_live_kis: bool = True,
) -> dict[str, Any]:
run_id = uuid.uuid4().hex
started_at = _kst_now_iso()
upsert_collection_run(
sqlite_db,
CollectionRun(
run_id=run_id,
collector_name="kis_data_collection_v1",
started_at=started_at,
status="RUNNING",
input_source=str(input_json),
output_json_path=str(output_json),
output_db_path=str(sqlite_db),
notes="KIS-first CI collection",
),
)
seed_rows = _build_seed_rows(input_json)
summary = {
"formula_id": "KIS_DATA_COLLECTION_V1",
"run_id": run_id,
"started_at": started_at,
"input_json": str(input_json),
"sqlite_db": str(sqlite_db),
"row_count": len(seed_rows),
"source_counts": {},
"errors": [],
"rows": [],
}
for row in seed_rows:
ticker = str(row.get("Ticker") or row.get("ticker") or "").strip()
if not ticker:
continue
try:
normalized, provenance = _collect_one(row, kis_account=kis_account, include_naver=include_naver, include_live_kis=include_live_kis)
source_counts = summary["source_counts"]
for source_name in provenance.get("source_priority") or []:
source_counts[source_name] = source_counts.get(source_name, 0) + 1
upsert_collection_snapshot(
sqlite_db,
run_id=run_id,
dataset_name="data_feed",
ticker=ticker,
name=str(normalized.get("Name") or normalized.get("name") or ""),
sector=normalized.get("Sector"),
as_of_date=str(normalized.get("Price_Date") or normalized.get("AsOfDate") or normalized.get("collection_as_of") or ""),
source_priority=">".join(provenance.get("source_priority") or []),
source_status="OK",
payload=normalized,
provenance=provenance,
)
summary["rows"].append(
{
"ticker": ticker,
"name": normalized.get("Name") or normalized.get("name"),
"sector": normalized.get("Sector"),
"source_priority": provenance.get("source_priority"),
"current_price": normalized.get("current_price"),
"relative_return_20d": normalized.get("relative_return_20d"),
"volume_ratio_5d": normalized.get("volume_ratio_5d"),
}
)
except Exception as exc: # noqa: BLE001
error = {"ticker": ticker, "error": str(exc)}
summary["errors"].append(error)
append_collection_error(
sqlite_db,
run_id=run_id,
source_name="collector",
error_kind=type(exc).__name__,
error_message=str(exc),
ticker=ticker,
payload=row,
)
summary["finished_at"] = _kst_now_iso()
summary["status"] = "PASS" if not summary["errors"] else "PASS_WITH_WARNINGS"
output_json.parent.mkdir(parents=True, exist_ok=True)
output_json.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8")
upsert_collection_run(
sqlite_db,
CollectionRun(
run_id=run_id,
collector_name="kis_data_collection_v1",
started_at=started_at,
status=summary["status"],
input_source=str(input_json),
output_json_path=str(output_json),
output_db_path=str(sqlite_db),
notes="KIS-first CI collection",
),
finished_at=summary["finished_at"],
)
return summary
def main() -> int:
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument("--input-json", type=Path, default=ROOT / "GatherTradingData.json")
ap.add_argument("--sqlite-db", type=Path, default=ROOT / "outputs" / "kis_data_collection" / "kis_data_collection.db")
ap.add_argument("--store-backend", default="sqlite", help="Storage backend contract placeholder (sqlite today, postgresql planned)")
ap.add_argument("--store-location", default=None, help="Backend location/DSN. sqlite path or future postgres DSN.")
ap.add_argument("--output-json", type=Path, default=ROOT / "Temp" / "kis_data_collection_v1.json")
ap.add_argument("--kis-account", choices=["real", "mock"], default="real")
ap.add_argument("--no-naver", action="store_true")
ap.add_argument("--no-live-kis", action="store_true")
args = ap.parse_args()
store_backend, store_location = normalize_store_spec(
CollectionStoreSpec(
backend=args.store_backend,
location=args.store_location or args.sqlite_db,
),
ROOT,
)
if store_backend != "sqlite":
raise SystemExit(
"현재 실행 backend는 sqlite만 지원합니다. "
"하지만 collector는 이미 backend contract로 분리되어 있어 "
"후속 PostgreSQL 구현을 같은 호출 지점에 붙일 수 있습니다."
)
summary = collect_to_sqlite(
input_json=args.input_json,
sqlite_db=Path(store_location),
output_json=args.output_json,
kis_account=args.kis_account,
include_naver=not args.no_naver,
include_live_kis=not args.no_live_kis,
)
print(json.dumps(summary, ensure_ascii=False, indent=2))
return 0 if summary.get("status") == "PASS" else 1
if __name__ == "__main__":
raise SystemExit(main())
+50
View File
@@ -0,0 +1,50 @@
"""Generic storage backend contract for canonical time-series stores.
The call sites use this as a small contract layer so SQLite is the executable
backend today while PostgreSQL can be added later without changing callers.
"""
from __future__ import annotations
from dataclasses import dataclass
from pathlib import Path
@dataclass(frozen=True)
class StoreSpec:
backend: str = "sqlite"
location: str | Path | None = None
def normalized_backend(self) -> str:
backend = (self.backend or "sqlite").strip().lower()
if backend in {"sqlite", "sqlite3"}:
return "sqlite"
if backend in {"postgres", "postgresql", "pg"}:
return "postgresql"
return backend
def default_sqlite_store_path(root: Path, default_name: str) -> Path:
return root / "outputs" / default_name
def normalize_store_spec(
spec: StoreSpec,
root: Path,
*,
default_sqlite_name: str = "store.db",
) -> tuple[str, Path | str]:
backend = spec.normalized_backend()
if backend == "sqlite":
if spec.location is None:
return backend, default_sqlite_store_path(root, default_sqlite_name)
if isinstance(spec.location, Path):
return backend, spec.location
location = str(spec.location).strip()
if location.startswith("sqlite:///"):
return backend, Path(location.removeprefix("sqlite:///"))
return backend, Path(location)
if backend == "postgresql":
if not spec.location:
raise ValueError("postgresql backend requires a DSN/location string")
return backend, str(spec.location)
raise ValueError(f"unsupported backend: {spec.backend!r}")