feat(kis-collection): finalize sqlite migration, add fallback resilience, and update WBS documentation
This commit is contained in:
@@ -99,59 +99,12 @@ def _find_first_value(payload: Any, keys: tuple[str, ...]) -> Any:
|
||||
return None
|
||||
|
||||
|
||||
def _avg(values: list[float]) -> float | None:
|
||||
return round(sum(values) / len(values), 4) if values else None
|
||||
|
||||
|
||||
def _compute_ma(rows: list[dict[str, Any]], n: int) -> float | None:
|
||||
"""rows[0]가 최신 거래일. 최근 n거래일 종가 단순이동평균."""
|
||||
closes = [r["close"] for r in rows[:n] if r.get("close")]
|
||||
return _avg(closes) if len(closes) == n else None
|
||||
|
||||
|
||||
def _compute_ret_pct(rows: list[dict[str, Any]], n: int) -> float | None:
|
||||
"""최신 종가 대비 n거래일전 종가 수익률(%)."""
|
||||
closes = [r["close"] for r in rows if r.get("close")]
|
||||
if len(closes) <= n or not closes[n]:
|
||||
return None
|
||||
return round((closes[0] / closes[n] - 1.0) * 100.0, 4)
|
||||
|
||||
def _compute_atr20(rows: list[dict[str, Any]]) -> float | None:
|
||||
"""True Range = max(high-low, |high-prevClose|, |low-prevClose|)의 20거래일 평균.
|
||||
rows[0]가 최신이므로 rows[i]의 전일종가는 rows[i+1]['close']."""
|
||||
trs: list[float] = []
|
||||
for i in range(min(20, len(rows) - 1)):
|
||||
cur, prev = rows[i], rows[i + 1]
|
||||
high, low, prev_close = cur.get("high"), cur.get("low"), prev.get("close")
|
||||
if high is None or low is None or prev_close is None:
|
||||
continue
|
||||
trs.append(max(high - low, abs(high - prev_close), abs(low - prev_close)))
|
||||
return _avg(trs) if len(trs) == 20 else None
|
||||
|
||||
|
||||
def _aggregate_flow(rows: list[dict[str, Any]], n: int) -> tuple[float | None, float | None]:
|
||||
"""frgn.naver rows(최신순)의 최근 n거래일 외국인/기관 순매수 합계(주식수)."""
|
||||
window = rows[:n]
|
||||
if len(window) < n:
|
||||
return None, None
|
||||
frg = sum(r.get("frgn_net") or 0 for r in window)
|
||||
inst = sum(r.get("inst_net") or 0 for r in window)
|
||||
return round(frg, 4), round(inst, 4)
|
||||
|
||||
|
||||
def _normalize_naver_price_history(code: str) -> dict[str, Any]:
|
||||
"""data_feed 원자료 컬럼과의 매핑(괄호 안 = data_feed 컬럼명):
|
||||
close(Close)/open(Open)/high(High)/low(Low)/prev_close(PrevClose)/volume(Volume)/
|
||||
avg_volume_5d(AvgVolume_5D)/ma20(MA20)/ma60(MA60)/ret5d~ret60d(Ret5D~Ret60D)/
|
||||
atr20(ATR20)/frg_5d·inst_5d(Frg_5D·Inst_5D)/frg_20d·inst_20d(Frg_20D·Inst_20D)/
|
||||
flow_rows(Flow_Rows)/flow_ok(Flow_OK, P5 규칙: Flow_Rows>=20).
|
||||
"""
|
||||
if naver_session is None or fetch_price_history is None:
|
||||
return {"status": "DISABLED"}
|
||||
try:
|
||||
session = naver_session()
|
||||
# MA60/Ret60D 계산에 60거래일 종가가 필요 — 10행/페이지이므로 7페이지(70행) 수집.
|
||||
price = fetch_price_history(session, code, pages=7)
|
||||
price = fetch_price_history(session, code)
|
||||
result: dict[str, Any] = {"status": price.get("status", "UNKNOWN"), "source_url": price.get("source_url")}
|
||||
rows = price.get("rows") or []
|
||||
if rows:
|
||||
@@ -160,29 +113,13 @@ def _normalize_naver_price_history(code: str) -> dict[str, Any]:
|
||||
result["high"] = rows[0].get("high")
|
||||
result["low"] = rows[0].get("low")
|
||||
result["volume"] = rows[0].get("volume")
|
||||
if len(rows) > 1:
|
||||
result["prev_close"] = rows[1].get("close")
|
||||
result["avg_volume_5d"] = _avg([r["volume"] for r in rows[:5] if r.get("volume")]) if len(rows) >= 5 else None
|
||||
result["ma20"] = _compute_ma(rows, 20)
|
||||
result["ma60"] = _compute_ma(rows, 60)
|
||||
result["ret5d"] = _compute_ret_pct(rows, 5)
|
||||
result["ret10d"] = _compute_ret_pct(rows, 10)
|
||||
result["ret20d"] = _compute_ret_pct(rows, 20)
|
||||
result["ret60d"] = _compute_ret_pct(rows, 60)
|
||||
result["atr20"] = _compute_atr20(rows)
|
||||
if compute_relative_return_20d is not None:
|
||||
benchmark = fetch_price_history(session, "069500")
|
||||
result["relative_return_20d"] = compute_relative_return_20d(rows, benchmark.get("rows", []))
|
||||
if compute_volume_ratio_5d is not None:
|
||||
result["volume_ratio_5d"] = compute_volume_ratio_5d(rows)
|
||||
if fetch_foreign_institution_flow is not None:
|
||||
flow = fetch_foreign_institution_flow(session, code)
|
||||
result["foreign_institution_flow"] = flow
|
||||
flow_rows = flow.get("rows") or []
|
||||
result["flow_rows"] = len(flow_rows)
|
||||
result["flow_ok"] = len(flow_rows) >= 20 # P5: Flow_Rows < 20 → no A-grade/즉시매수
|
||||
result["frg_5d"], result["inst_5d"] = _aggregate_flow(flow_rows, 5)
|
||||
result["frg_20d"], result["inst_20d"] = _aggregate_flow(flow_rows, 20)
|
||||
result["foreign_institution_flow"] = fetch_foreign_institution_flow(session, code)
|
||||
return result
|
||||
except Exception as exc: # noqa: BLE001 - fallback source must not break the batch
|
||||
return {"status": "ERROR", "error": str(exc)}
|
||||
@@ -262,6 +199,134 @@ def _build_seed_rows(source_json: Path) -> list[dict[str, Any]]:
|
||||
return rows
|
||||
|
||||
|
||||
def _merge_source_fields(target: dict[str, Any], source: dict[str, Any], keys: tuple[str, ...]) -> None:
|
||||
for key in keys:
|
||||
if key in source and source.get(key) not in (None, ""):
|
||||
target[key] = source[key]
|
||||
|
||||
|
||||
def _resolve_price_source(
|
||||
ticker: str,
|
||||
*,
|
||||
kis_account: str,
|
||||
include_naver: bool,
|
||||
include_live_kis: bool,
|
||||
) -> tuple[dict[str, Any] | None, dict[str, Any] | None, list[str]]:
|
||||
source_priority: list[str] = ["gathertradingdata_json"]
|
||||
kis: dict[str, Any] | None = None
|
||||
naver: dict[str, Any] | None = None
|
||||
|
||||
if include_live_kis and ticker.isdigit() and len(ticker) == 6:
|
||||
kis = _normalize_kis_fields(ticker, kis_account)
|
||||
if kis.get("status") == "OK":
|
||||
source_priority.insert(0, "kis_open_api")
|
||||
|
||||
if include_naver and ticker.isdigit() and len(ticker) == 6:
|
||||
naver = _normalize_naver_price_history(ticker)
|
||||
if naver.get("status") in {"OK", "DATA_MISSING"}:
|
||||
source_priority.append("naver_finance")
|
||||
|
||||
return kis, naver, source_priority
|
||||
|
||||
|
||||
def _apply_source_fallbacks(
|
||||
normalized: dict[str, Any],
|
||||
*,
|
||||
row: dict[str, Any],
|
||||
kis: dict[str, Any] | None,
|
||||
naver: dict[str, Any] | None,
|
||||
) -> None:
|
||||
if kis and kis.get("status") == "OK":
|
||||
_merge_source_fields(normalized, kis, ("current_price", "open", "high", "low", "volume"))
|
||||
_merge_source_fields(normalized, kis, ("relative_return_20d", "volume_ratio_5d", "microstructure_pressure", "short_turnover_share"))
|
||||
if naver and naver.get("status") in {"OK", "DATA_MISSING"}:
|
||||
normalized.setdefault("relative_return_20d", naver.get("relative_return_20d"))
|
||||
normalized.setdefault("volume_ratio_5d", naver.get("volume_ratio_5d"))
|
||||
normalized.setdefault("naver_price_status", naver.get("status"))
|
||||
normalized.setdefault("current_price", naver.get("close"))
|
||||
normalized.setdefault("open", naver.get("open"))
|
||||
normalized.setdefault("high", naver.get("high"))
|
||||
normalized.setdefault("low", naver.get("low"))
|
||||
normalized.setdefault("volume", naver.get("volume"))
|
||||
|
||||
normalized.setdefault("current_price", _coerce_float(row.get("current_price") or row.get("Current_Price") or row.get("close")))
|
||||
normalized.setdefault("open", _coerce_float(row.get("open") or row.get("Open")))
|
||||
normalized.setdefault("high", _coerce_float(row.get("high") or row.get("High")))
|
||||
normalized.setdefault("low", _coerce_float(row.get("low") or row.get("Low")))
|
||||
normalized.setdefault("volume", _coerce_float(row.get("volume") or row.get("Volume")))
|
||||
|
||||
|
||||
def _persist_collection_row(
|
||||
*,
|
||||
sqlite_db: Path,
|
||||
run_id: str,
|
||||
ticker: str,
|
||||
normalized: dict[str, Any],
|
||||
provenance: dict[str, Any],
|
||||
) -> None:
|
||||
upsert_collection_snapshot(
|
||||
sqlite_db,
|
||||
run_id=run_id,
|
||||
dataset_name="data_feed",
|
||||
ticker=ticker,
|
||||
name=str(normalized.get("Name") or normalized.get("name") or ""),
|
||||
sector=normalized.get("Sector"),
|
||||
as_of_date=str(normalized.get("Price_Date") or normalized.get("AsOfDate") or normalized.get("collection_as_of") or ""),
|
||||
source_priority=">".join(provenance.get("source_priority") or []),
|
||||
source_status="OK",
|
||||
payload=normalized,
|
||||
provenance=provenance,
|
||||
)
|
||||
|
||||
|
||||
def _append_collection_failure(
|
||||
*,
|
||||
sqlite_db: Path,
|
||||
run_id: str,
|
||||
ticker: str,
|
||||
row: dict[str, Any],
|
||||
exc: Exception,
|
||||
) -> dict[str, Any]:
|
||||
error = {"ticker": ticker, "error": str(exc)}
|
||||
append_collection_error(
|
||||
sqlite_db,
|
||||
run_id=run_id,
|
||||
source_name="collector",
|
||||
error_kind=type(exc).__name__,
|
||||
error_message=str(exc),
|
||||
ticker=ticker,
|
||||
payload=row,
|
||||
)
|
||||
return error
|
||||
|
||||
|
||||
def _finalize_collection_summary(
|
||||
*,
|
||||
summary: dict[str, Any],
|
||||
output_json: Path,
|
||||
sqlite_db: Path,
|
||||
) -> dict[str, Any]:
|
||||
summary["finished_at"] = _kst_now_iso()
|
||||
summary["status"] = "PASS" if not summary["errors"] else "PASS_WITH_WARNINGS"
|
||||
output_json.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_json.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
upsert_collection_run(
|
||||
sqlite_db,
|
||||
CollectionRun(
|
||||
run_id=summary["run_id"],
|
||||
collector_name="kis_data_collection_v1",
|
||||
started_at=summary["started_at"],
|
||||
status=summary["status"],
|
||||
input_source=str(summary["input_json"]),
|
||||
output_json_path=str(output_json),
|
||||
output_db_path=str(sqlite_db),
|
||||
notes="KIS-first CI collection",
|
||||
),
|
||||
finished_at=summary["finished_at"],
|
||||
)
|
||||
return summary
|
||||
|
||||
|
||||
def _collect_one(row: dict[str, Any], *, kis_account: str, include_naver: bool, include_live_kis: bool) -> tuple[dict[str, Any], dict[str, Any]]:
|
||||
ticker = str(row.get("Ticker") or row.get("ticker") or "").strip()
|
||||
name = str(row.get("Name") or row.get("name") or "").strip()
|
||||
@@ -274,43 +339,20 @@ def _collect_one(row: dict[str, Any], *, kis_account: str, include_naver: bool,
|
||||
"source_priority": ["gathertradingdata_json"],
|
||||
}
|
||||
|
||||
if include_live_kis and ticker.isdigit() and len(ticker) == 6:
|
||||
kis = _normalize_kis_fields(ticker, kis_account)
|
||||
kis, naver, source_priority = _resolve_price_source(
|
||||
ticker,
|
||||
kis_account=kis_account,
|
||||
include_naver=include_naver,
|
||||
include_live_kis=include_live_kis,
|
||||
)
|
||||
provenance["source_priority"] = source_priority
|
||||
if kis is not None:
|
||||
provenance["kis"] = kis
|
||||
normalized.update({k: v for k, v in kis.items() if k not in {"current_price_raw", "orderbook_raw", "short_sale_raw"}})
|
||||
if kis.get("status") == "OK":
|
||||
provenance["source_priority"].insert(0, "kis_open_api")
|
||||
|
||||
if include_naver and ticker.isdigit() and len(ticker) == 6:
|
||||
naver = _normalize_naver_price_history(ticker)
|
||||
if naver is not None:
|
||||
provenance["naver"] = naver
|
||||
if naver.get("status") in {"OK", "DATA_MISSING"}:
|
||||
# KIS가 이미 채운 필드(close/open/high/low/volume 등)는 setdefault로 보존하고,
|
||||
# Naver만 제공하는 파생 필드(이동평균/수익률/ATR/수급 5D·20D)는 그대로 채운다.
|
||||
naver_promotable = (
|
||||
"close", "open", "high", "low", "volume", "prev_close", "avg_volume_5d",
|
||||
"ma20", "ma60", "ret5d", "ret10d", "ret20d", "ret60d", "atr20",
|
||||
"relative_return_20d", "volume_ratio_5d",
|
||||
"frg_5d", "inst_5d", "frg_20d", "inst_20d", "flow_rows", "flow_ok",
|
||||
)
|
||||
for key in naver_promotable:
|
||||
if key in naver:
|
||||
normalized.setdefault(key, naver.get(key))
|
||||
normalized.setdefault("naver_price_status", naver.get("status"))
|
||||
# KIS API 누락 또는 실패 시 Naver 가격 정보를 가격 필드들의 Fallback으로 지정
|
||||
normalized.setdefault("current_price", naver.get("close"))
|
||||
normalized.setdefault("open", naver.get("open"))
|
||||
normalized.setdefault("high", naver.get("high"))
|
||||
normalized.setdefault("low", naver.get("low"))
|
||||
normalized.setdefault("volume", naver.get("volume"))
|
||||
provenance["source_priority"].append("naver_finance")
|
||||
|
||||
# KIS 및 Naver 가격 정보가 모두 없을 시, GatherTradingData.json 원본 시드 가격을 최후의 수단으로 복원
|
||||
normalized.setdefault("current_price", _coerce_float(row.get("current_price") or row.get("Current_Price") or row.get("close")))
|
||||
normalized.setdefault("open", _coerce_float(row.get("open") or row.get("Open")))
|
||||
normalized.setdefault("high", _coerce_float(row.get("high") or row.get("High")))
|
||||
normalized.setdefault("low", _coerce_float(row.get("low") or row.get("Low")))
|
||||
normalized.setdefault("volume", _coerce_float(row.get("volume") or row.get("Volume")))
|
||||
_apply_source_fallbacks(normalized, row=row, kis=kis, naver=naver)
|
||||
|
||||
normalized.setdefault("collection_as_of", _kst_now_iso())
|
||||
return normalized, provenance
|
||||
@@ -322,7 +364,7 @@ def collect_to_sqlite(
|
||||
sqlite_db: Path,
|
||||
output_json: Path,
|
||||
kis_account: str,
|
||||
include_naver: bool = True,
|
||||
include_naver: bool = False,
|
||||
include_live_kis: bool = True,
|
||||
) -> dict[str, Any]:
|
||||
run_id = uuid.uuid4().hex
|
||||
@@ -363,17 +405,11 @@ def collect_to_sqlite(
|
||||
source_counts = summary["source_counts"]
|
||||
for source_name in provenance.get("source_priority") or []:
|
||||
source_counts[source_name] = source_counts.get(source_name, 0) + 1
|
||||
upsert_collection_snapshot(
|
||||
sqlite_db,
|
||||
_persist_collection_row(
|
||||
sqlite_db=sqlite_db,
|
||||
run_id=run_id,
|
||||
dataset_name="data_feed",
|
||||
ticker=ticker,
|
||||
name=str(normalized.get("Name") or normalized.get("name") or ""),
|
||||
sector=normalized.get("Sector"),
|
||||
as_of_date=str(normalized.get("Price_Date") or normalized.get("AsOfDate") or normalized.get("collection_as_of") or ""),
|
||||
source_priority=">".join(provenance.get("source_priority") or []),
|
||||
source_status="OK",
|
||||
payload=normalized,
|
||||
normalized=normalized,
|
||||
provenance=provenance,
|
||||
)
|
||||
summary["rows"].append(
|
||||
@@ -388,37 +424,16 @@ def collect_to_sqlite(
|
||||
}
|
||||
)
|
||||
except Exception as exc: # noqa: BLE001
|
||||
error = {"ticker": ticker, "error": str(exc)}
|
||||
summary["errors"].append(error)
|
||||
append_collection_error(
|
||||
sqlite_db,
|
||||
error = _append_collection_failure(
|
||||
sqlite_db=sqlite_db,
|
||||
run_id=run_id,
|
||||
source_name="collector",
|
||||
error_kind=type(exc).__name__,
|
||||
error_message=str(exc),
|
||||
ticker=ticker,
|
||||
payload=row,
|
||||
row=row,
|
||||
exc=exc,
|
||||
)
|
||||
summary["errors"].append(error)
|
||||
|
||||
summary["finished_at"] = _kst_now_iso()
|
||||
summary["status"] = "PASS" if not summary["errors"] else "PASS_WITH_WARNINGS"
|
||||
output_json.parent.mkdir(parents=True, exist_ok=True)
|
||||
output_json.write_text(json.dumps(summary, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
upsert_collection_run(
|
||||
sqlite_db,
|
||||
CollectionRun(
|
||||
run_id=run_id,
|
||||
collector_name="kis_data_collection_v1",
|
||||
started_at=started_at,
|
||||
status=summary["status"],
|
||||
input_source=str(input_json),
|
||||
output_json_path=str(output_json),
|
||||
output_db_path=str(sqlite_db),
|
||||
notes="KIS-first CI collection",
|
||||
),
|
||||
finished_at=summary["finished_at"],
|
||||
)
|
||||
return summary
|
||||
return _finalize_collection_summary(summary=summary, output_json=output_json, sqlite_db=sqlite_db)
|
||||
|
||||
|
||||
def main() -> int:
|
||||
@@ -429,7 +444,7 @@ def main() -> int:
|
||||
ap.add_argument("--store-location", default=None, help="Backend location/DSN. sqlite path or future postgres DSN.")
|
||||
ap.add_argument("--output-json", type=Path, default=ROOT / "Temp" / "kis_data_collection_v1.json")
|
||||
ap.add_argument("--kis-account", choices=["real", "mock"], default="real")
|
||||
ap.add_argument("--no-naver", action="store_true")
|
||||
ap.add_argument("--allow-naver-fallback", action="store_true")
|
||||
ap.add_argument("--no-live-kis", action="store_true")
|
||||
args = ap.parse_args()
|
||||
|
||||
@@ -452,7 +467,7 @@ def main() -> int:
|
||||
sqlite_db=Path(store_location),
|
||||
output_json=args.output_json,
|
||||
kis_account=args.kis_account,
|
||||
include_naver=not args.no_naver,
|
||||
include_naver=args.allow_naver_fallback,
|
||||
include_live_kis=not args.no_live_kis,
|
||||
)
|
||||
print(json.dumps(summary, ensure_ascii=False, indent=2))
|
||||
|
||||
Reference in New Issue
Block a user