KIS Open API 조회전용 연동 + 직접매매 절대금지 안전게이트

매수/매도 주문 및 계좌 잔고조회를 API로 직접 실행하지 않는다는 원칙을 코드 레벨에서 강제하는 안전게이트(governance/rules/06, 07)와 함께, 시세/호가/공매도거래비중 등 조회전용 KIS Open API 연동 및 SQLite 수집 파이프라인을 추가한다. - kis_api_client_v1: 모든 요청이 _assert_read_only를 통과해야 하며 /trading/ 경로·주문 TR_ID는 RuntimeError로 즉시 차단 - kis_data_collection_v1: KIS 우선 + Naver 폴백, 네트워크 실패는 개별 ticker 단위로 흡수(배치 전체 중단 없음) - data_collection_store_v1 / storage_backend_v1: SQLite 캐노니컬 저장소, PostgreSQL 전환 대비 백엔드 추상화 - Gitea 영업일 스케줄(2시간 간격) + CI 강제 게이트 (validate_no_direct_api_trading_v1, validate_kis_api_credentials_v1)
2026-06-21 20:04:44 +09:00
parent 34f6eebba6
commit 4cb206a269
20 changed files with 2034 additions and 0 deletions
@@ -0,0 +1,370 @@
+"""SQLite store for platform-transition data collection outputs.
+
+This store is intentionally small and backend-agnostic enough to be upgraded to
+PostgreSQL later without changing the row contract. The canonical payload is the
+normalized factor row plus provenance metadata.
+"""
+from __future__ import annotations
+
+import json
+import sqlite3
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Iterable
+
+
+SCHEMA = """
+PRAGMA journal_mode=WAL;
+
+CREATE TABLE IF NOT EXISTS collection_runs (
+    run_id TEXT PRIMARY KEY,
+    collector_name TEXT NOT NULL,
+    started_at TEXT NOT NULL,
+    finished_at TEXT,
+    status TEXT NOT NULL,
+    input_source TEXT,
+    output_json_path TEXT,
+    output_db_path TEXT,
+    notes TEXT,
+    created_at TEXT DEFAULT (datetime('now'))
+);
+
+CREATE TABLE IF NOT EXISTS collection_snapshots (
+    run_id TEXT NOT NULL,
+    dataset_name TEXT NOT NULL,
+    ticker TEXT NOT NULL,
+    name TEXT,
+    sector TEXT,
+    as_of_date TEXT,
+    source_priority TEXT,
+    source_status TEXT,
+    payload_json TEXT NOT NULL,
+    provenance_json TEXT NOT NULL,
+    created_at TEXT DEFAULT (datetime('now')),
+    PRIMARY KEY (run_id, dataset_name, ticker)
+);
+
+CREATE TABLE IF NOT EXISTS collection_source_errors (
+    run_id TEXT NOT NULL,
+    ticker TEXT,
+    source_name TEXT NOT NULL,
+    error_kind TEXT NOT NULL,
+    error_message TEXT NOT NULL,
+    payload_json TEXT,
+    created_at TEXT DEFAULT (datetime('now'))
+);
+
+CREATE INDEX IF NOT EXISTS idx_collection_snapshots_ticker_time
+    ON collection_snapshots(ticker, created_at DESC);
+
+CREATE INDEX IF NOT EXISTS idx_collection_source_errors_run
+    ON collection_source_errors(run_id, source_name);
+"""
+
+
+@dataclass(frozen=True)
+class CollectionRun:
+    run_id: str
+    collector_name: str
+    started_at: str
+    status: str
+    input_source: str | None = None
+    output_json_path: str | None = None
+    output_db_path: str | None = None
+    notes: str | None = None
+
+
+def init_db(db_path: Path) -> None:
+    db_path.parent.mkdir(parents=True, exist_ok=True)
+    conn = sqlite3.connect(db_path)
+    try:
+        conn.executescript(SCHEMA)
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def upsert_collection_run(db_path: Path, run: CollectionRun, finished_at: str | None = None) -> None:
+    init_db(db_path)
+    conn = sqlite3.connect(db_path)
+    try:
+        conn.execute(
+            """
+            INSERT INTO collection_runs (
+                run_id, collector_name, started_at, finished_at, status,
+                input_source, output_json_path, output_db_path, notes
+            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
+            ON CONFLICT(run_id) DO UPDATE SET
+                collector_name=excluded.collector_name,
+                started_at=excluded.started_at,
+                finished_at=excluded.finished_at,
+                status=excluded.status,
+                input_source=excluded.input_source,
+                output_json_path=excluded.output_json_path,
+                output_db_path=excluded.output_db_path,
+                notes=excluded.notes
+            """,
+            (
+                run.run_id,
+                run.collector_name,
+                run.started_at,
+                finished_at,
+                run.status,
+                run.input_source,
+                run.output_json_path,
+                run.output_db_path,
+                run.notes,
+            ),
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def upsert_collection_snapshot(
+    db_path: Path,
+    *,
+    run_id: str,
+    dataset_name: str,
+    ticker: str,
+    name: str | None,
+    sector: str | None,
+    as_of_date: str | None,
+    source_priority: str,
+    source_status: str,
+    payload: dict[str, Any],
+    provenance: dict[str, Any],
+) -> None:
+    init_db(db_path)
+    conn = sqlite3.connect(db_path)
+    try:
+        conn.execute(
+            """
+            INSERT INTO collection_snapshots (
+                run_id, dataset_name, ticker, name, sector, as_of_date,
+                source_priority, source_status, payload_json, provenance_json
+            ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+            ON CONFLICT(run_id, dataset_name, ticker) DO UPDATE SET
+                name=excluded.name,
+                sector=excluded.sector,
+                as_of_date=excluded.as_of_date,
+                source_priority=excluded.source_priority,
+                source_status=excluded.source_status,
+                payload_json=excluded.payload_json,
+                provenance_json=excluded.provenance_json
+            """,
+            (
+                run_id,
+                dataset_name,
+                ticker,
+                name,
+                sector,
+                as_of_date,
+                source_priority,
+                source_status,
+                json.dumps(payload, ensure_ascii=False, default=str),
+                json.dumps(provenance, ensure_ascii=False, default=str),
+            ),
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def append_collection_error(
+    db_path: Path,
+    *,
+    run_id: str,
+    source_name: str,
+    error_kind: str,
+    error_message: str,
+    ticker: str | None = None,
+    payload: dict[str, Any] | None = None,
+) -> None:
+    init_db(db_path)
+    conn = sqlite3.connect(db_path)
+    try:
+        conn.execute(
+            """
+            INSERT INTO collection_source_errors (
+                run_id, ticker, source_name, error_kind, error_message, payload_json
+            ) VALUES (?, ?, ?, ?, ?, ?)
+            """,
+            (
+                run_id,
+                ticker,
+                source_name,
+                error_kind,
+                error_message,
+                json.dumps(payload or {}, ensure_ascii=False, default=str),
+            ),
+        )
+        conn.commit()
+    finally:
+        conn.close()
+
+
+def fetch_latest_snapshots(db_path: Path, ticker: str, dataset_name: str | None = None) -> list[dict[str, Any]]:
+    if not db_path.exists():
+        return []
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    try:
+        if dataset_name:
+            rows = conn.execute(
+                """
+                SELECT * FROM collection_snapshots
+                WHERE ticker = ? AND dataset_name = ?
+                ORDER BY created_at DESC
+                """,
+                (ticker, dataset_name),
+            ).fetchall()
+        else:
+            rows = conn.execute(
+                """
+                SELECT * FROM collection_snapshots
+                WHERE ticker = ?
+                ORDER BY created_at DESC
+                """,
+                (ticker,),
+            ).fetchall()
+        return [dict(row) for row in rows]
+    finally:
+        conn.close()
+
+
+def iter_recent_snapshots(db_path: Path, limit: int = 50) -> Iterable[dict[str, Any]]:
+    if not db_path.exists():
+        return []
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    try:
+        rows = conn.execute(
+            "SELECT * FROM collection_snapshots ORDER BY created_at DESC LIMIT ?",
+            (limit,),
+        ).fetchall()
+        return [dict(row) for row in rows]
+    finally:
+        conn.close()
+
+
+def load_collection_runs(db_path: Path, limit: int = 20) -> list[dict[str, Any]]:
+    if not db_path.exists():
+        return []
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    try:
+        rows = conn.execute(
+            """
+            SELECT run_id, collector_name, started_at, finished_at, status,
+                   input_source, output_json_path, output_db_path, notes, created_at
+            FROM collection_runs
+            ORDER BY started_at DESC, created_at DESC
+            LIMIT ?
+            """,
+            (int(limit),),
+        ).fetchall()
+        return [dict(row) for row in rows]
+    finally:
+        conn.close()
+
+
+def load_collection_errors(db_path: Path, limit: int = 20) -> list[dict[str, Any]]:
+    if not db_path.exists():
+        return []
+    conn = sqlite3.connect(db_path)
+    conn.row_factory = sqlite3.Row
+    try:
+        rows = conn.execute(
+            """
+            SELECT run_id, ticker, source_name, error_kind, error_message, payload_json, created_at
+            FROM collection_source_errors
+            ORDER BY created_at DESC
+            LIMIT ?
+            """,
+            (int(limit),),
+        ).fetchall()
+        return [dict(row) for row in rows]
+    finally:
+        conn.close()
+
+
+def load_collection_dashboard_state(
+    db_path: Path | str | None = None,
+    output_json_path: Path | str | None = None,
+    *,
+    limit: int = 8,
+) -> dict[str, Any]:
+    db = Path(db_path) if db_path else Path()
+    report = Path(output_json_path) if output_json_path else Path()
+    state: dict[str, Any] = {
+        "db_path": str(db),
+        "output_json_path": str(report) if output_json_path else "",
+        "runs": [],
+        "recent_snapshots": [],
+        "recent_errors": [],
+        "counts": {
+            "collection_runs": 0,
+            "collection_snapshots": 0,
+            "collection_source_errors": 0,
+        },
+        "latest_run": {},
+        "latest_report": {},
+    }
+    if report.exists():
+        try:
+            state["latest_report"] = json.loads(report.read_text(encoding="utf-8"))
+        except Exception:
+            state["latest_report"] = {}
+    if not db.exists():
+        return state
+    conn = sqlite3.connect(db)
+    conn.row_factory = sqlite3.Row
+    try:
+        state["counts"] = {
+            "collection_runs": conn.execute("SELECT COUNT(*) FROM collection_runs").fetchone()[0],
+            "collection_snapshots": conn.execute("SELECT COUNT(*) FROM collection_snapshots").fetchone()[0],
+            "collection_source_errors": conn.execute("SELECT COUNT(*) FROM collection_source_errors").fetchone()[0],
+        }
+        run_row = conn.execute(
+            """
+            SELECT run_id, collector_name, started_at, finished_at, status,
+                   input_source, output_json_path, output_db_path, notes, created_at
+            FROM collection_runs
+            ORDER BY started_at DESC, created_at DESC
+            LIMIT 1
+            """
+        ).fetchone()
+        state["latest_run"] = dict(run_row) if run_row is not None else {}
+        state["runs"] = [dict(row) for row in conn.execute(
+            """
+            SELECT run_id, collector_name, started_at, finished_at, status,
+                   input_source, output_json_path, output_db_path, notes, created_at
+            FROM collection_runs
+            ORDER BY started_at DESC, created_at DESC
+            LIMIT ?
+            """,
+            (int(limit),),
+        ).fetchall()]
+        state["recent_snapshots"] = [dict(row) for row in conn.execute(
+            """
+            SELECT run_id, dataset_name, ticker, name, sector, as_of_date,
+                   source_priority, source_status, created_at
+            FROM collection_snapshots
+            ORDER BY created_at DESC
+            LIMIT ?
+            """,
+            (int(limit),),
+        ).fetchall()]
+        state["recent_errors"] = [dict(row) for row in conn.execute(
+            """
+            SELECT run_id, ticker, source_name, error_kind, error_message, created_at
+            FROM collection_source_errors
+            ORDER BY created_at DESC
+            LIMIT ?
+            """,
+            (int(limit),),
+        ).fetchall()]
+    finally:
+        conn.close()
+    return state