from __future__ import annotations from collections import Counter from pathlib import Path from openpyxl import load_workbook ROOT = Path(__file__).resolve().parents[1] XLSX = ROOT / "GatherTradingData.xlsx" def main() -> int: wb = load_workbook(XLSX, data_only=True) if "backdata_feature_bank" not in wb.sheetnames: print("FAIL: missing sheet backdata_feature_bank") return 1 ws = wb["backdata_feature_bank"] headers = [ws.cell(row=2, column=c).value for c in range(1, ws.max_column + 1)] col = {str(h): i + 1 for i, h in enumerate(headers) if h} for need in ("Trade_ID", "Source_Origin", "Ticker", "Record_Date"): if need not in col: print(f"FAIL: missing header {need}") return 1 rows = [] for r in range(3, ws.max_row + 1): tid = ws.cell(r, col["Trade_ID"]).value if tid in (None, ""): continue src = str(ws.cell(r, col["Source_Origin"]).value or "").strip() rows.append((str(tid).strip(), src)) counts = Counter(src for _, src in rows) dup = len(rows) - len({tid for tid, _ in rows}) print(f"OK data_rows={len(rows)} replay={counts.get('REPLAY_BACKFILL_KRX_EOD', 0)} gas={counts.get('GAS_AUTO', 0)} dup_trade_id={dup}") if dup > 0: print("FAIL: duplicate trade_id detected") return 1 return 0 if __name__ == "__main__": raise SystemExit(main())