KIS Open API 조회전용 연동 + 직접매매 절대금지 안전게이트

매수/매도 주문 및 계좌 잔고조회를 API로 직접 실행하지 않는다는 원칙을
코드 레벨에서 강제하는 안전게이트(governance/rules/06, 07)와 함께,
시세/호가/공매도거래비중 등 조회전용 KIS Open API 연동 및 SQLite
수집 파이프라인을 추가한다.

- kis_api_client_v1: 모든 요청이 _assert_read_only를 통과해야 하며
  /trading/ 경로·주문 TR_ID는 RuntimeError로 즉시 차단
- kis_data_collection_v1: KIS 우선 + Naver 폴백, 네트워크 실패는
  개별 ticker 단위로 흡수(배치 전체 중단 없음)
- data_collection_store_v1 / storage_backend_v1: SQLite 캐노니컬
  저장소, PostgreSQL 전환 대비 백엔드 추상화
- Gitea 영업일 스케줄(2시간 간격) + CI 강제 게이트
  (validate_no_direct_api_trading_v1, validate_kis_api_credentials_v1)
This commit is contained in:
2026-06-21 20:04:44 +09:00
parent 34f6eebba6
commit 4cb206a269
20 changed files with 2034 additions and 0 deletions
@@ -0,0 +1,370 @@
"""SQLite store for platform-transition data collection outputs.
This store is intentionally small and backend-agnostic enough to be upgraded to
PostgreSQL later without changing the row contract. The canonical payload is the
normalized factor row plus provenance metadata.
"""
from __future__ import annotations
import json
import sqlite3
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Iterable
SCHEMA = """
PRAGMA journal_mode=WAL;
CREATE TABLE IF NOT EXISTS collection_runs (
run_id TEXT PRIMARY KEY,
collector_name TEXT NOT NULL,
started_at TEXT NOT NULL,
finished_at TEXT,
status TEXT NOT NULL,
input_source TEXT,
output_json_path TEXT,
output_db_path TEXT,
notes TEXT,
created_at TEXT DEFAULT (datetime('now'))
);
CREATE TABLE IF NOT EXISTS collection_snapshots (
run_id TEXT NOT NULL,
dataset_name TEXT NOT NULL,
ticker TEXT NOT NULL,
name TEXT,
sector TEXT,
as_of_date TEXT,
source_priority TEXT,
source_status TEXT,
payload_json TEXT NOT NULL,
provenance_json TEXT NOT NULL,
created_at TEXT DEFAULT (datetime('now')),
PRIMARY KEY (run_id, dataset_name, ticker)
);
CREATE TABLE IF NOT EXISTS collection_source_errors (
run_id TEXT NOT NULL,
ticker TEXT,
source_name TEXT NOT NULL,
error_kind TEXT NOT NULL,
error_message TEXT NOT NULL,
payload_json TEXT,
created_at TEXT DEFAULT (datetime('now'))
);
CREATE INDEX IF NOT EXISTS idx_collection_snapshots_ticker_time
ON collection_snapshots(ticker, created_at DESC);
CREATE INDEX IF NOT EXISTS idx_collection_source_errors_run
ON collection_source_errors(run_id, source_name);
"""
@dataclass(frozen=True)
class CollectionRun:
run_id: str
collector_name: str
started_at: str
status: str
input_source: str | None = None
output_json_path: str | None = None
output_db_path: str | None = None
notes: str | None = None
def init_db(db_path: Path) -> None:
db_path.parent.mkdir(parents=True, exist_ok=True)
conn = sqlite3.connect(db_path)
try:
conn.executescript(SCHEMA)
conn.commit()
finally:
conn.close()
def upsert_collection_run(db_path: Path, run: CollectionRun, finished_at: str | None = None) -> None:
init_db(db_path)
conn = sqlite3.connect(db_path)
try:
conn.execute(
"""
INSERT INTO collection_runs (
run_id, collector_name, started_at, finished_at, status,
input_source, output_json_path, output_db_path, notes
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(run_id) DO UPDATE SET
collector_name=excluded.collector_name,
started_at=excluded.started_at,
finished_at=excluded.finished_at,
status=excluded.status,
input_source=excluded.input_source,
output_json_path=excluded.output_json_path,
output_db_path=excluded.output_db_path,
notes=excluded.notes
""",
(
run.run_id,
run.collector_name,
run.started_at,
finished_at,
run.status,
run.input_source,
run.output_json_path,
run.output_db_path,
run.notes,
),
)
conn.commit()
finally:
conn.close()
def upsert_collection_snapshot(
db_path: Path,
*,
run_id: str,
dataset_name: str,
ticker: str,
name: str | None,
sector: str | None,
as_of_date: str | None,
source_priority: str,
source_status: str,
payload: dict[str, Any],
provenance: dict[str, Any],
) -> None:
init_db(db_path)
conn = sqlite3.connect(db_path)
try:
conn.execute(
"""
INSERT INTO collection_snapshots (
run_id, dataset_name, ticker, name, sector, as_of_date,
source_priority, source_status, payload_json, provenance_json
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(run_id, dataset_name, ticker) DO UPDATE SET
name=excluded.name,
sector=excluded.sector,
as_of_date=excluded.as_of_date,
source_priority=excluded.source_priority,
source_status=excluded.source_status,
payload_json=excluded.payload_json,
provenance_json=excluded.provenance_json
""",
(
run_id,
dataset_name,
ticker,
name,
sector,
as_of_date,
source_priority,
source_status,
json.dumps(payload, ensure_ascii=False, default=str),
json.dumps(provenance, ensure_ascii=False, default=str),
),
)
conn.commit()
finally:
conn.close()
def append_collection_error(
db_path: Path,
*,
run_id: str,
source_name: str,
error_kind: str,
error_message: str,
ticker: str | None = None,
payload: dict[str, Any] | None = None,
) -> None:
init_db(db_path)
conn = sqlite3.connect(db_path)
try:
conn.execute(
"""
INSERT INTO collection_source_errors (
run_id, ticker, source_name, error_kind, error_message, payload_json
) VALUES (?, ?, ?, ?, ?, ?)
""",
(
run_id,
ticker,
source_name,
error_kind,
error_message,
json.dumps(payload or {}, ensure_ascii=False, default=str),
),
)
conn.commit()
finally:
conn.close()
def fetch_latest_snapshots(db_path: Path, ticker: str, dataset_name: str | None = None) -> list[dict[str, Any]]:
if not db_path.exists():
return []
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
if dataset_name:
rows = conn.execute(
"""
SELECT * FROM collection_snapshots
WHERE ticker = ? AND dataset_name = ?
ORDER BY created_at DESC
""",
(ticker, dataset_name),
).fetchall()
else:
rows = conn.execute(
"""
SELECT * FROM collection_snapshots
WHERE ticker = ?
ORDER BY created_at DESC
""",
(ticker,),
).fetchall()
return [dict(row) for row in rows]
finally:
conn.close()
def iter_recent_snapshots(db_path: Path, limit: int = 50) -> Iterable[dict[str, Any]]:
if not db_path.exists():
return []
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"SELECT * FROM collection_snapshots ORDER BY created_at DESC LIMIT ?",
(limit,),
).fetchall()
return [dict(row) for row in rows]
finally:
conn.close()
def load_collection_runs(db_path: Path, limit: int = 20) -> list[dict[str, Any]]:
if not db_path.exists():
return []
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"""
SELECT run_id, collector_name, started_at, finished_at, status,
input_source, output_json_path, output_db_path, notes, created_at
FROM collection_runs
ORDER BY started_at DESC, created_at DESC
LIMIT ?
""",
(int(limit),),
).fetchall()
return [dict(row) for row in rows]
finally:
conn.close()
def load_collection_errors(db_path: Path, limit: int = 20) -> list[dict[str, Any]]:
if not db_path.exists():
return []
conn = sqlite3.connect(db_path)
conn.row_factory = sqlite3.Row
try:
rows = conn.execute(
"""
SELECT run_id, ticker, source_name, error_kind, error_message, payload_json, created_at
FROM collection_source_errors
ORDER BY created_at DESC
LIMIT ?
""",
(int(limit),),
).fetchall()
return [dict(row) for row in rows]
finally:
conn.close()
def load_collection_dashboard_state(
db_path: Path | str | None = None,
output_json_path: Path | str | None = None,
*,
limit: int = 8,
) -> dict[str, Any]:
db = Path(db_path) if db_path else Path()
report = Path(output_json_path) if output_json_path else Path()
state: dict[str, Any] = {
"db_path": str(db),
"output_json_path": str(report) if output_json_path else "",
"runs": [],
"recent_snapshots": [],
"recent_errors": [],
"counts": {
"collection_runs": 0,
"collection_snapshots": 0,
"collection_source_errors": 0,
},
"latest_run": {},
"latest_report": {},
}
if report.exists():
try:
state["latest_report"] = json.loads(report.read_text(encoding="utf-8"))
except Exception:
state["latest_report"] = {}
if not db.exists():
return state
conn = sqlite3.connect(db)
conn.row_factory = sqlite3.Row
try:
state["counts"] = {
"collection_runs": conn.execute("SELECT COUNT(*) FROM collection_runs").fetchone()[0],
"collection_snapshots": conn.execute("SELECT COUNT(*) FROM collection_snapshots").fetchone()[0],
"collection_source_errors": conn.execute("SELECT COUNT(*) FROM collection_source_errors").fetchone()[0],
}
run_row = conn.execute(
"""
SELECT run_id, collector_name, started_at, finished_at, status,
input_source, output_json_path, output_db_path, notes, created_at
FROM collection_runs
ORDER BY started_at DESC, created_at DESC
LIMIT 1
"""
).fetchone()
state["latest_run"] = dict(run_row) if run_row is not None else {}
state["runs"] = [dict(row) for row in conn.execute(
"""
SELECT run_id, collector_name, started_at, finished_at, status,
input_source, output_json_path, output_db_path, notes, created_at
FROM collection_runs
ORDER BY started_at DESC, created_at DESC
LIMIT ?
""",
(int(limit),),
).fetchall()]
state["recent_snapshots"] = [dict(row) for row in conn.execute(
"""
SELECT run_id, dataset_name, ticker, name, sector, as_of_date,
source_priority, source_status, created_at
FROM collection_snapshots
ORDER BY created_at DESC
LIMIT ?
""",
(int(limit),),
).fetchall()]
state["recent_errors"] = [dict(row) for row in conn.execute(
"""
SELECT run_id, ticker, source_name, error_kind, error_message, created_at
FROM collection_source_errors
ORDER BY created_at DESC
LIMIT ?
""",
(int(limit),),
).fetchall()]
finally:
conn.close()
return state