#!/usr/bin/env python3 """ 시트→테이블 동기화 검증 XLSX 시트와 DB 테이블이 정확히 동기화되었는지 확인 """ import pandas as pd import sqlite3 from pathlib import Path from datetime import datetime class SheetTableSyncVerification: """시트-테이블 동기화 검증""" def __init__(self): self.xlsx_file = Path('GatherTradingData.xlsx') self.kis_db = Path('src/quant_engine/kis_data_collection.db') self.snapshot_db = Path('src/quant_engine/snapshot_admin.db') self.results = { "timestamp": datetime.now().isoformat(), "sheets": {}, "tables": {}, "sync_status": {} } def verify_xlsx_sheets(self) -> dict: """XLSX 시트 검증""" print("\n[XLSX 시트 검증]") excel_file = pd.ExcelFile(self.xlsx_file) sheet_names = excel_file.sheet_names print(f" 발견된 시트: {len(sheet_names)}개") for sheet_name in sheet_names: df = pd.read_excel(self.xlsx_file, sheet_name=sheet_name) self.results["sheets"][sheet_name] = { "rows": len(df), "columns": len(df.columns), "col_names": list(df.columns) } print(f" {sheet_name}: {len(df)} rows, {len(df.columns)} cols") return self.results["sheets"] def verify_db_tables(self) -> dict: """DB 테이블 검증""" print("\n[DB 테이블 검증]") db_info = {} # kis_data_collection conn = sqlite3.connect(self.kis_db) cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name != 'sqlite_sequence'") tables = [row[0] for row in cursor.fetchall()] print(f" kis_data_collection.db: {len(tables)}개 테이블") for table in tables: cursor.execute(f"PRAGMA table_info({table})") cols = [col[1] for col in cursor.fetchall()] cursor.execute(f"SELECT COUNT(*) FROM {table}") count = cursor.fetchone()[0] db_info[f"kis.{table}"] = { "rows": count, "columns": len(cols), "col_names": cols } print(f" {table}: {count} rows, {len(cols)} cols") conn.close() # snapshot_admin conn = sqlite3.connect(self.snapshot_db) cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name != 'sqlite_sequence'") tables = [row[0] for row in cursor.fetchall()] print(f" snapshot_admin.db: {len(tables)}개 테이블") for table in tables: cursor.execute(f"PRAGMA table_info({table})") cols = [col[1] for col in cursor.fetchall()] cursor.execute(f"SELECT COUNT(*) FROM {table}") count = cursor.fetchone()[0] db_info[f"snapshot.{table}"] = { "rows": count, "columns": len(cols), "col_names": cols } if count > 0: print(f" {table}: {count} rows, {len(cols)} cols") conn.close() self.results["tables"] = db_info return db_info def verify_sync(self) -> dict: """시트-테이블 동기화 확인""" print("\n[동기화 상태]") sync_status = {} for sheet_name, sheet_info in self.results["sheets"].items(): # kis.data_feed 특수 매핑 if sheet_name == "data_feed": table_key = "kis.data_feed" else: table_key = f"snapshot.{sheet_name}" if table_key in self.results["tables"]: table_info = self.results["tables"][table_key] # 행 수 비교 rows_match = sheet_info["rows"] == table_info["rows"] # 컬럼 수 비교 cols_match = sheet_info["columns"] == table_info["columns"] status = "OK" if (rows_match and cols_match) else "MISMATCH" sync_status[sheet_name] = { "status": status, "sheet_rows": sheet_info["rows"], "table_rows": table_info["rows"], "rows_match": rows_match, "sheet_cols": sheet_info["columns"], "table_cols": table_info["columns"], "cols_match": cols_match } symbol = "[OK]" if status == "OK" else "[!]" print(f" {symbol} {sheet_name}") if not rows_match: print(f" 행: {sheet_info['rows']} vs {table_info['rows']}") if not cols_match: print(f" 컬럼: {sheet_info['columns']} vs {table_info['columns']}") else: sync_status[sheet_name] = { "status": "NOT_FOUND", "message": f"Table {table_key} not found in DB" } print(f" [!] {sheet_name}: 테이블 미발견") self.results["sync_status"] = sync_status return sync_status def verify_data_integrity(self) -> dict: """데이터 무결성 검증""" print("\n[데이터 무결성 검증]") integrity_checks = { "not_null_violations": 0, "duplicate_keys": 0, "orphaned_records": 0 } # kis_data_collection conn = sqlite3.connect(self.kis_db) cursor = conn.cursor() # data_feed의 NULL 검증 cursor.execute("SELECT COUNT(*) FROM data_feed WHERE ticker IS NULL") null_count = cursor.fetchone()[0] if null_count > 0: integrity_checks["not_null_violations"] += null_count print(f" [!] data_feed: {null_count}개 NULL ticker 발견") conn.close() # snapshot_admin conn = sqlite3.connect(self.snapshot_db) cursor = conn.cursor() # settings의 NOT NULL 검증 cursor.execute("SELECT COUNT(*) FROM settings WHERE key IS NULL") null_count = cursor.fetchone()[0] if null_count > 0: integrity_checks["not_null_violations"] += null_count print(f" [!] settings: {null_count}개 NULL key 발견") if integrity_checks["not_null_violations"] == 0: print(f" [OK] NULL 위반 없음") conn.close() return integrity_checks def run(self) -> dict: """전체 실행""" print("="*80) print("시트→테이블 동기화 검증") print("="*80) # 1. XLSX 시트 검증 self.verify_xlsx_sheets() # 2. DB 테이블 검증 self.verify_db_tables() # 3. 동기화 상태 확인 self.verify_sync() # 4. 데이터 무결성 검증 integrity = self.verify_data_integrity() # 최종 요약 print("\n" + "="*80) print("[최종 검증 결과]") total_sheets = len(self.results["sheets"]) synced_sheets = sum(1 for v in self.results["sync_status"].values() if v.get("status") == "OK") print(f" 시트 동기화: {synced_sheets}/{total_sheets}") print(f" 데이터 무결성: {integrity['not_null_violations']}개 위반") overall_status = "PASS" if synced_sheets == total_sheets and integrity['not_null_violations'] == 0 else "FAIL" print(f" 종합 평가: {overall_status}") print("="*80) return self.results if __name__ == "__main__": verifier = SheetTableSyncVerification() result = verifier.run() print("\n[완료] 시트-테이블 동기화 검증 완료")