#!/usr/bin/env python3 """ DB 테이블 커버리지 검증 GatherTradingData.json의 시트 vs 현재 DB 테이블 비교 """ import json import sqlite3 from pathlib import Path def get_xlsx_sheets(): """GatherTradingData.json에서 시트 목록 추출""" try: with open('GatherTradingData.json', encoding='utf-8') as f: full_data = json.load(f) sheets = full_data.get('metadata', {}).get('sheets_included', []) return sheets except: try: with open('GatherTradingData.json', encoding='euc-kr') as f: full_data = json.load(f) sheets = full_data.get('metadata', {}).get('sheets_included', []) return sheets except: return [] def get_db_tables(): """DB의 현재 테이블 조회""" tables = {} for db_name, db_path in [ ("kis_data_collection", "src/quant_engine/kis_data_collection.db"), ("snapshot_admin", "src/quant_engine/snapshot_admin.db") ]: if not Path(db_path).exists(): continue conn = sqlite3.connect(db_path) cursor = conn.cursor() cursor.execute("SELECT name FROM sqlite_master WHERE type='table'") db_tables = [row[0] for row in cursor.fetchall() if row[0] != 'sqlite_sequence'] conn.close() tables[db_name] = db_tables return tables def main(): print("="*80) print("데이터베이스 테이블 커버리지 검증") print("="*80) # XLSX 시트 xlsx_sheets = get_xlsx_sheets() print(f"\n[GatherTradingData.json]") print(f"총 시트 수: {len(xlsx_sheets)}") print("시트 목록:") for i, sheet in enumerate(xlsx_sheets, 1): print(f" {i:2}. {sheet}") # DB 테이블 db_tables = get_db_tables() total_tables = sum(len(t) for t in db_tables.values()) print(f"\n[현재 DB]") print(f"총 테이블 수: {total_tables}") for db_name, tables in db_tables.items(): print(f"\n{db_name}.db:") for table in tables: print(f" - {table}") # 비교 print("\n" + "="*80) print("커버리지 분석") print("="*80) all_db_tables = [] for tables in db_tables.values(): all_db_tables.extend(tables) covered = [s for s in xlsx_sheets if s.lower() in [t.lower() for t in all_db_tables]] missing = [s for s in xlsx_sheets if s.lower() not in [t.lower() for t in all_db_tables]] coverage = (len(covered) / len(xlsx_sheets) * 100) if xlsx_sheets else 0 print(f"\n[결과]") print(f" 커버된 시트: {len(covered)}/{len(xlsx_sheets)} ({coverage:.1f}%)") print(f" 누락된 시트: {len(missing)}") if missing: print(f"\n[누락된 시트]") for sheet in missing: print(f" - {sheet}") print(f"\n[권장]") print("다음 테이블들을 추가하여 커버리지를 완성해야 함:") for sheet in missing[:10]: print(f" - {sheet}") if len(missing) > 10: print(f" ... 및 {len(missing)-10}개 추가") if __name__ == "__main__": main()