Files
QuantEngineByItz/.gitea/workflows/kis_data_collection.yml
T

198 lines
9.1 KiB
YAML

name: KIS Data Collection (SQLite Canonical Feed)
# ─────────────────────────────────────────────────────────────────
# [중요] 이 워크플로우는 KIS Open API를 코어로 하는 read-only 데이터 수집만 수행한다.
# xlsx를 직접 읽지 않고 GatherTradingData.json + live read-only APIs를 통해
# SQLite canonical store를 갱신한다. 매수/매도 주문은 어떤 경우에도 실행하지 않는다.
#
# 스케줄: 영업일(월~금) 08:00~17:00 KST, 2시간 간격(08/10/12/14/16시).
# Gitea Actions의 schedule cron은 UTC 기준으로 평가된다(서버 타임존이 별도
# 설정되어 있지 않은 경우의 기본값). 아래 cron은 UTC로 작성했다:
# KST 08:00 = UTC 전날 23:00 → 요일은 "한국 기준 평일"에 맞춰 UTC 0-4(일~목)로 이동
# KST 10/12/14/16:00 = UTC 01/03/05/07:00, 같은 날(UTC 월~금, 1-5)
#
# [실제 Gitea 서버 타임존이 Asia/Seoul로 설정되어 있다면] 아래 cron을 그대로
# "0 8,10,12,14,16 * * 1-5" 한 줄로 교체하면 된다 — 첫 실행 후 Actions 실행
# 기록의 타임스탬프를 확인해 KST 08시 전후로 도는지 검증할 것(추정하지 말고 확인).
#
# 스케줄 주기 변경: 아래 schedule 목록의 cron 줄을 추가/삭제/수정하면 된다.
# 예) 1시간 간격으로 바꾸려면 09,11,13,15시 슬롯을 추가.
# ─────────────────────────────────────────────────────────────────
on:
schedule:
- cron: "0 23 * * 0-4" # KST 월~금 08:00 (UTC 일~목 23:00)
- cron: "0 1 * * 1-5" # KST 월~금 10:00 (UTC 01:00)
- cron: "0 3 * * 1-5" # KST 월~금 12:00 (UTC 03:00)
- cron: "0 5 * * 1-5" # KST 월~금 14:00 (UTC 05:00)
- cron: "0 7 * * 1-5" # KST 월~금 16:00 (UTC 07:00)
workflow_dispatch: # 수동 실행 — 스케줄 검증/즉시 재시도용
jobs:
collect-kis-data:
runs-on: self-hosted
steps:
- name: Checkout Code
run: |
if [ -d .git ]; then
git remote set-url origin http://x-access-token:${{ secrets.GITHUB_TOKEN }}@192.168.123.100:8418/KimJaeHyun/myfinance.git
else
git init
git remote add origin http://x-access-token:${{ secrets.GITHUB_TOKEN }}@192.168.123.100:8418/KimJaeHyun/myfinance.git
fi
TARGET_REF="${GITHUB_REF_NAME:-main}"
git fetch origin "$TARGET_REF" --depth=1
git reset --hard FETCH_HEAD
- name: Prepare Raw Seed Snapshot
run: |
if [ -f GatherTradingData.json ]; then
echo "GatherTradingData.json present"
exit 0
fi
if [ -f GatherTradingData.xlsx ]; then
echo "GatherTradingData.json missing; regenerating from GatherTradingData.xlsx"
python3 tools/convert_xlsx_to_json.py \
--xlsx GatherTradingData.xlsx \
--out GatherTradingData.json
if [ -f GatherTradingData.json ]; then
echo "GatherTradingData.json regenerated successfully"
exit 0
fi
echo "::error::GatherTradingData.xlsx is present but JSON regeneration failed."
echo "::error::Check tools/convert_xlsx_to_json.py and workbook sheet integrity."
exit 1
fi
if [ -f .clasprc.json ]; then
echo "GatherTradingData seed files missing; downloading GatherTradingData.xlsx from Google Drive via .clasprc.json"
python3 tools/download_trading_data.py
if [ -f GatherTradingData.xlsx ]; then
echo "GatherTradingData.xlsx downloaded successfully; regenerating GatherTradingData.json"
python3 tools/convert_xlsx_to_json.py \
--xlsx GatherTradingData.xlsx \
--out GatherTradingData.json
if [ -f GatherTradingData.json ]; then
echo "GatherTradingData.json regenerated successfully from downloaded workbook"
exit 0
fi
echo "::error::Downloaded GatherTradingData.xlsx but JSON regeneration failed."
echo "::error::Check workbook integrity and tools/convert_xlsx_to_json.py."
exit 1
fi
echo "::error::.clasprc.json exists but GatherTradingData.xlsx was not downloaded."
echo "::error::Check Google Drive access and tools/download_trading_data.py."
exit 1
fi
echo "::error::Neither GatherTradingData.json nor GatherTradingData.xlsx exists in the checked-out tree."
echo "::error::This workflow requires a canonical seed snapshot before KIS collection can start."
echo "::error::Fix options:"
echo "::error:: 1) Commit GatherTradingData.json to the repository tree."
echo "::error:: 2) Commit GatherTradingData.xlsx so the workflow can regenerate the JSON."
echo "::error:: 3) Provide .clasprc.json so the workflow can download GatherTradingData.xlsx from Google Drive and regenerate the JSON."
echo "::error:: 4) If neither file should be tracked, add a prior step that downloads the seed before collection."
exit 1
- name: Configure Runtime Paths
run: |
export PATH=/usr/local/bin:$PATH
echo "/usr/local/bin" >> $GITHUB_PATH
/usr/bin/python3 --version
- name: Setup Python Environment
run: |
VENV_BASE=/volume1/gitea/python_venv
REQ_HASH=$(md5sum tools/run_kis_data_collection_v1.py 2>/dev/null | cut -d' ' -f1 || echo "kis-default")
VENV="$VENV_BASE/$REQ_HASH"
if [ ! -f "$VENV/bin/python" ]; then
mkdir -p "$VENV_BASE"
/usr/bin/python3 -m venv "$VENV"
if [ ! -f "$VENV/bin/pip" ]; then
curl -sS https://bootstrap.pypa.io/pip/3.8/get-pip.py -o get-pip.py
"$VENV/bin/python" get-pip.py --quiet
rm get-pip.py
fi
"$VENV/bin/pip" install --upgrade pip --quiet
"$VENV/bin/pip" install requests beautifulsoup4 pyyaml --quiet
ls -dt "$VENV_BASE"/*/ 2>/dev/null | tail -n +3 | xargs rm -rf 2>/dev/null || true
fi
echo "$VENV/bin" >> $GITHUB_PATH
- name: "[CRITICAL] No Direct API Trading Gate"
run: python3 tools/validate_no_direct_api_trading_v1.py
- name: "[CRITICAL] Validate KIS API Credentials (mock)"
env:
# Gitea repository variables are injected here; the Python loader reads these env names.
KIS_APP_Key_TEST: ${{ vars.KIS_APP_KEY_TEST }}
KIS_APP_Secret_TEST: ${{ vars.KIS_APP_SECRET_TEST }}
run: |
if [ -z "${KIS_APP_Key_TEST:-}" ]; then
echo "::error::Gitea variable KIS_APP_KEY_TEST is missing or empty"
exit 1
fi
if [ -z "${KIS_APP_Secret_TEST:-}" ]; then
echo "::error::Gitea variable KIS_APP_SECRET_TEST is missing or empty"
exit 1
fi
python3 tools/validate_kis_api_credentials_v1.py \
--account mock \
--ticker 005930
- name: Collect KIS Market Data to SQLite (read-only)
env:
# Real collection uses repository variables, not Windows shell env syntax.
KIS_APP_Key: ${{ vars.KIS_APP_KEY }}
KIS_APP_Secret: ${{ vars.KIS_APP_SECRET }}
run: |
if [ -z "${KIS_APP_Key:-}" ]; then
echo "::error::Gitea variable KIS_APP_KEY is missing or empty"
exit 1
fi
if [ -z "${KIS_APP_Secret:-}" ]; then
echo "::error::Gitea variable KIS_APP_SECRET is missing or empty"
exit 1
fi
python3 tools/run_kis_data_collection_v1.py \
--input-json GatherTradingData.json \
--sqlite-db outputs/kis_data_collection/kis_data_collection.db \
--output-json Temp/kis_data_collection_v1.json \
--kis-account real
- name: Validate SQLite Artifact
run: |
python3 - <<'PY'
import json, sqlite3
from pathlib import Path
db = Path("outputs/kis_data_collection/kis_data_collection.db")
report = Path("Temp/kis_data_collection_v1.json")
assert db.exists(), f"missing db: {db}"
assert report.exists(), f"missing report: {report}"
conn = sqlite3.connect(db)
try:
run_count = conn.execute("SELECT COUNT(*) FROM collection_runs").fetchone()[0]
snap_count = conn.execute("SELECT COUNT(*) FROM collection_snapshots").fetchone()[0]
print(json.dumps({"run_count": run_count, "snapshot_count": snap_count}, ensure_ascii=False))
assert run_count >= 1
assert snap_count >= 1
finally:
conn.close()
PY
- name: Notify Run Result
if: always()
run: |
STATUS="${{ job.status }}"
RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"
SUMMARY_FILE="Temp/kis_data_collection_v1.json"
SUMMARY_TEXT="(요약 파일 없음)"
[ -f "$SUMMARY_FILE" ] && SUMMARY_TEXT=$(cat "$SUMMARY_FILE")
echo "=== KIS Data Collection Result ==="
echo "status: $STATUS"
echo "summary: $SUMMARY_TEXT"
echo "run log: $RUN_URL"