feat(kis-collection): finalize sqlite migration, add fallback resilience, and update WBS documentation

This commit is contained in:
2026-06-22 18:34:56 +09:00
parent c576138829
commit 6c549b7bdc
48 changed files with 34610 additions and 24883 deletions
+152 -137
View File
@@ -99,59 +99,12 @@ def _find_first_value(payload: Any, keys: tuple[str, ...]) -> Any:
return None
def _avg(values: list[float]) -> float | None:
return round(sum(values) / len(values), 4) if values else None
def _compute_ma(rows: list[dict[str, Any]], n: int) -> float | None:
"""rows[0]가 최신 거래일. 최근 n거래일 종가 단순이동평균."""
closes = [r["close"] for r in rows[:n] if r.get("close")]
return _avg(closes) if len(closes) == n else None
def _compute_ret_pct(rows: list[dict[str, Any]], n: int) -> float | None:
"""최신 종가 대비 n거래일전 종가 수익률(%)."""
closes = [r["close"] for r in rows if r.get("close")]
if len(closes) <= n or not closes[n]:
return None
return round((closes[0] / closes[n] - 1.0) * 100.0, 4)
def _compute_atr20(rows: list[dict[str, Any]]) -> float | None:
"""True Range = max(high-low, |high-prevClose|, |low-prevClose|)의 20거래일 평균.
rows[0]가 최신이므로 rows[i]의 전일종가는 rows[i+1]['close']."""
trs: list[float] = []
for i in range(min(20, len(rows) - 1)):
cur, prev = rows[i], rows[i + 1]
high, low, prev_close = cur.get("high"), cur.get("low"), prev.get("close")
if high is None or low is None or prev_close is None:
continue
trs.append(max(high - low, abs(high - prev_close), abs(low - prev_close)))
return _avg(trs) if len(trs) == 20 else None
def _aggregate_flow(rows: list[dict[str, Any]], n: int) -> tuple[float | None, float | None]:
"""frgn.naver rows(최신순)의 최근 n거래일 외국인/기관 순매수 합계(주식수)."""
window = rows[:n]
if len(window) < n:
return None, None
frg = sum(r.get("frgn_net") or 0 for r in window)
inst = sum(r.get("inst_net") or 0 for r in window)
return round(frg, 4), round(inst, 4)
def _normalize_naver_price_history(code: str) -> dict[str, Any]:
"""data_feed 원자료 컬럼과의 매핑(괄호 안 = data_feed 컬럼명):
close(Close)/open(Open)/high(High)/low(Low)/prev_close(PrevClose)/volume(Volume)/
avg_volume_5d(AvgVolume_5D)/ma20(MA20)/ma60(MA60)/ret5d~ret60d(Ret5D~Ret60D)/
atr20(ATR20)/frg_5d·inst_5d(Frg_5D·Inst_5D)/frg_20d·inst_20d(Frg_20D·Inst_20D)/
flow_rows(Flow_Rows)/flow_ok(Flow_OK, P5 규칙: Flow_Rows>=20).
"""
if naver_session is None or fetch_price_history is None:
return {"status": "DISABLED"}
try:
session = naver_session()
# MA60/Ret60D 계산에 60거래일 종가가 필요 — 10행/페이지이므로 7페이지(70행) 수집.
price = fetch_price_history(session, code, pages=7)
price = fetch_price_history(session, code)
result: dict[str, Any] = {"status": price.get("status", "UNKNOWN"), "source_url": price.get("source_url")}
rows = price.get("rows") or []
if rows:
@@ -160,29 +113,13 @@ def _normalize_naver_price_history(code: str) -> dict[str, Any]:
result["high"] = rows[0].get("high")
result["low"] = rows[0].get("low")
result["volume"] = rows[0].get("volume")
if len(rows) > 1:
result["prev_close"] = rows[1].get("close")
result["avg_volume_5d"] = _avg([r["volume"] for r in rows[:5] if r.get("volume")]) if len(rows) >= 5 else None
result["ma20"] = _compute_ma(rows, 20)
result["ma60"] = _compute_ma(rows, 60)
result["ret5d"] = _compute_ret_pct(rows, 5)
result["ret10d"] = _compute_ret_pct(rows, 10)
result["ret20d"] = _compute_ret_pct(rows, 20)
result["ret60d"] = _compute_ret_pct(rows, 60)
result["atr20"] = _compute_atr20(rows)
if compute_relative_return_20d is not None:
benchmark = fetch_price_history(session, "069500")
result["relative_return_20d"] = compute_relative_return_20d(rows, benchmark.get("rows", []))
if compute_volume_ratio_5d is not None:
result["volume_ratio_5d"] = compute_volume_ratio_5d(rows)
if fetch_foreign_institution_flow is not None:
flow = fetch_foreign_institution_flow(session, code)
result["foreign_institution_flow"] = flow
flow_rows = flow.get("rows") or []
result["flow_rows"] = len(flow_rows)
result["flow_ok"] = len(flow_rows) >= 20 # P5: Flow_Rows < 20 → no A-grade/즉시매수
result["frg_5d"], result["inst_5d"] = _aggregate_flow(flow_rows, 5)
result["frg_20d"], result["inst_20d"] = _aggregate_flow(flow_rows, 20)
result["foreign_institution_flow"] = fetch_foreign_institution_flow(session, code)
return result
except Exception as exc: # noqa: BLE001 - fallback source must not break the batch
return {"status": "ERROR", "error": str(exc)}
@@ -262,6 +199,134 @@ def _build_seed_rows(source_json: Path) -> list[dict[str, Any]]:
return rows
def _merge_source_fields(target: dict[str, Any], source: dict[str, Any], keys: tuple[str, ...]) -> None:
for key in keys:
if key in source and source.get(key) not in (None, ""):
target[key] = source[key]
def _resolve_price_source(
ticker: str,
*,
kis_account: str,
include_naver: bool,
include_live_kis: bool,
) -> tuple[dict[str, Any] | None, dict[str, Any] | None, list[str]]:
source_priority: list[str] = ["gathertradingdata_json"]
kis: dict[str, Any] | None = None
naver: dict[str, Any] | None = None
if include_live_kis and ticker.isdigit() and len(ticker) == 6:
kis = _normalize_kis_fields(ticker, kis_account)
if kis.get("status") == "OK":
source_priority.insert(0, "kis_open_api")
if include_naver and ticker.isdigit() and len(ticker) == 6:
naver = _normalize_naver_price_history(ticker)
if naver.get("status") in {"OK", "DATA_MISSING"}:
source_priority.append("naver_finance")
return kis, naver, source_priority
def _apply_source_fallbacks(
normalized: dict[str, Any],
*,
row: dict[str, Any],
kis: dict[str, Any] | None,
naver: dict[str, Any] | None,
) -> None:
if kis and kis.get("status") == "OK":
_merge_source_fields(normalized, kis, ("current_price", "open", "high", "low", "volume"))
_merge_source_fields(normalized, kis, ("relative_return_20d", "volume_ratio_5d", "microstructure_pressure", "short_turnover_share"))
if naver and naver.get("status") in {"OK", "DATA_MISSING"}:
normalized.setdefault("relative_return_20d", naver.get("relative_return_20d"))
normalized.setdefault("volume_ratio_5d", naver.get("volume_ratio_5d"))
normalized.setdefault("naver_price_status", naver.get("status"))
normalized.setdefault("current_price", naver.get("close"))
normalized.setdefault("open", naver.get("open"))
normalized.setdefault("high", naver.get("high"))
normalized.setdefault("low", naver.get("low"))
normalized.setdefault("volume", naver.get("volume"))
normalized.setdefault("current_price", _coerce_float(row.get("current_price") or row.get("Current_Price") or row.get("close")))
normalized.setdefault("open", _coerce_float(row.get("open") or row.get("Open")))
normalized.setdefault("high", _coerce_float(row.get("high") or row.get("High")))
normalized.setdefault("low", _coerce_float(row.get("low") or row.get("Low")))
normalized.setdefault("volume", _coerce_float(row.get("volume") or row.get("Volume")))
def _persist_collection_row(
*,
sqlite_db: Path,
run_id: str,
ticker: str,
normalized: dict[str, Any],
provenance: dict[str, Any],
) -> None:
upsert_collection_snapshot(
sqlite_db,
run_id=run_id,
dataset_name="data_feed",
ticker=ticker,
name=str(normalized.get("Name") or normalized.get("name") or ""),
sector=normalized.get("Sector"),
as_of_date=str(normalized.get("Price_Date") or normalized.get("AsOfDate") or normalized.get("collection_as_of") or ""),
source_priority=">".join(provenance.get("source_priority") or []),
source_status="OK",
payload=normalized,
provenance=provenance,
)
def _append_collection_failure(
*,
sqlite_db: Path,
run_id: str,
ticker: str,
row: dict[str, Any],
exc: Exception,
) -> dict[str, Any]:
error = {"ticker": ticker, "error": str(exc)}
append_collection_error(
sqlite_db,
run_id=run_id,
source_name="collector",
error_kind=type(exc).__name__,
error_message=str(exc),
ticker=ticker,
payload=row,
)
return error
def _finalize_collection_summary(
*,
summary: dict[str, Any],
output_json: Path,
sqlite_db: Path,
) -> dict[str, Any]:
summary["finished_at"] = _kst_now_iso()
summary["status"] = "PASS" if not summary["errors"] else "PASS_WITH_WARNINGS"
output_json.parent.mkdir(parents=True, exist_ok=True)
output_json.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8")
upsert_collection_run(
sqlite_db,
CollectionRun(
run_id=summary["run_id"],
collector_name="kis_data_collection_v1",
started_at=summary["started_at"],
status=summary["status"],
input_source=str(summary["input_json"]),
output_json_path=str(output_json),
output_db_path=str(sqlite_db),
notes="KIS-first CI collection",
),
finished_at=summary["finished_at"],
)
return summary
def _collect_one(row: dict[str, Any], *, kis_account: str, include_naver: bool, include_live_kis: bool) -> tuple[dict[str, Any], dict[str, Any]]:
ticker = str(row.get("Ticker") or row.get("ticker") or "").strip()
name = str(row.get("Name") or row.get("name") or "").strip()
@@ -274,43 +339,20 @@ def _collect_one(row: dict[str, Any], *, kis_account: str, include_naver: bool,
"source_priority": ["gathertradingdata_json"],
}
if include_live_kis and ticker.isdigit() and len(ticker) == 6:
kis = _normalize_kis_fields(ticker, kis_account)
kis, naver, source_priority = _resolve_price_source(
ticker,
kis_account=kis_account,
include_naver=include_naver,
include_live_kis=include_live_kis,
)
provenance["source_priority"] = source_priority
if kis is not None:
provenance["kis"] = kis
normalized.update({k: v for k, v in kis.items() if k not in {"current_price_raw", "orderbook_raw", "short_sale_raw"}})
if kis.get("status") == "OK":
provenance["source_priority"].insert(0, "kis_open_api")
if include_naver and ticker.isdigit() and len(ticker) == 6:
naver = _normalize_naver_price_history(ticker)
if naver is not None:
provenance["naver"] = naver
if naver.get("status") in {"OK", "DATA_MISSING"}:
# KIS가 이미 채운 필드(close/open/high/low/volume 등)는 setdefault로 보존하고,
# Naver만 제공하는 파생 필드(이동평균/수익률/ATR/수급 5D·20D)는 그대로 채운다.
naver_promotable = (
"close", "open", "high", "low", "volume", "prev_close", "avg_volume_5d",
"ma20", "ma60", "ret5d", "ret10d", "ret20d", "ret60d", "atr20",
"relative_return_20d", "volume_ratio_5d",
"frg_5d", "inst_5d", "frg_20d", "inst_20d", "flow_rows", "flow_ok",
)
for key in naver_promotable:
if key in naver:
normalized.setdefault(key, naver.get(key))
normalized.setdefault("naver_price_status", naver.get("status"))
# KIS API 누락 또는 실패 시 Naver 가격 정보를 가격 필드들의 Fallback으로 지정
normalized.setdefault("current_price", naver.get("close"))
normalized.setdefault("open", naver.get("open"))
normalized.setdefault("high", naver.get("high"))
normalized.setdefault("low", naver.get("low"))
normalized.setdefault("volume", naver.get("volume"))
provenance["source_priority"].append("naver_finance")
# KIS 및 Naver 가격 정보가 모두 없을 시, GatherTradingData.json 원본 시드 가격을 최후의 수단으로 복원
normalized.setdefault("current_price", _coerce_float(row.get("current_price") or row.get("Current_Price") or row.get("close")))
normalized.setdefault("open", _coerce_float(row.get("open") or row.get("Open")))
normalized.setdefault("high", _coerce_float(row.get("high") or row.get("High")))
normalized.setdefault("low", _coerce_float(row.get("low") or row.get("Low")))
normalized.setdefault("volume", _coerce_float(row.get("volume") or row.get("Volume")))
_apply_source_fallbacks(normalized, row=row, kis=kis, naver=naver)
normalized.setdefault("collection_as_of", _kst_now_iso())
return normalized, provenance
@@ -322,7 +364,7 @@ def collect_to_sqlite(
sqlite_db: Path,
output_json: Path,
kis_account: str,
include_naver: bool = True,
include_naver: bool = False,
include_live_kis: bool = True,
) -> dict[str, Any]:
run_id = uuid.uuid4().hex
@@ -363,17 +405,11 @@ def collect_to_sqlite(
source_counts = summary["source_counts"]
for source_name in provenance.get("source_priority") or []:
source_counts[source_name] = source_counts.get(source_name, 0) + 1
upsert_collection_snapshot(
sqlite_db,
_persist_collection_row(
sqlite_db=sqlite_db,
run_id=run_id,
dataset_name="data_feed",
ticker=ticker,
name=str(normalized.get("Name") or normalized.get("name") or ""),
sector=normalized.get("Sector"),
as_of_date=str(normalized.get("Price_Date") or normalized.get("AsOfDate") or normalized.get("collection_as_of") or ""),
source_priority=">".join(provenance.get("source_priority") or []),
source_status="OK",
payload=normalized,
normalized=normalized,
provenance=provenance,
)
summary["rows"].append(
@@ -388,37 +424,16 @@ def collect_to_sqlite(
}
)
except Exception as exc: # noqa: BLE001
error = {"ticker": ticker, "error": str(exc)}
summary["errors"].append(error)
append_collection_error(
sqlite_db,
error = _append_collection_failure(
sqlite_db=sqlite_db,
run_id=run_id,
source_name="collector",
error_kind=type(exc).__name__,
error_message=str(exc),
ticker=ticker,
payload=row,
row=row,
exc=exc,
)
summary["errors"].append(error)
summary["finished_at"] = _kst_now_iso()
summary["status"] = "PASS" if not summary["errors"] else "PASS_WITH_WARNINGS"
output_json.parent.mkdir(parents=True, exist_ok=True)
output_json.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8")
upsert_collection_run(
sqlite_db,
CollectionRun(
run_id=run_id,
collector_name="kis_data_collection_v1",
started_at=started_at,
status=summary["status"],
input_source=str(input_json),
output_json_path=str(output_json),
output_db_path=str(sqlite_db),
notes="KIS-first CI collection",
),
finished_at=summary["finished_at"],
)
return summary
return _finalize_collection_summary(summary=summary, output_json=output_json, sqlite_db=sqlite_db)
def main() -> int:
@@ -429,7 +444,7 @@ def main() -> int:
ap.add_argument("--store-location", default=None, help="Backend location/DSN. sqlite path or future postgres DSN.")
ap.add_argument("--output-json", type=Path, default=ROOT / "Temp" / "kis_data_collection_v1.json")
ap.add_argument("--kis-account", choices=["real", "mock"], default="real")
ap.add_argument("--no-naver", action="store_true")
ap.add_argument("--allow-naver-fallback", action="store_true")
ap.add_argument("--no-live-kis", action="store_true")
args = ap.parse_args()
@@ -452,7 +467,7 @@ def main() -> int:
sqlite_db=Path(store_location),
output_json=args.output_json,
kis_account=args.kis_account,
include_naver=not args.no_naver,
include_naver=args.allow_naver_fallback,
include_live_kis=not args.no_live_kis,
)
print(json.dumps(summary, ensure_ascii=False, indent=2))