#!/usr/bin/env python3 """ 데이터베이스 구조 리팩토링 파편화된 DB 파일들을 정리하고 단일 canonical 위치(src/quant_engine/)를 기준으로 통합한다. """ import shutil from pathlib import Path from datetime import datetime class DatabaseRefactorer: """데이터베이스 구조 리팩토링""" def __init__(self): self.root = Path(".") self.canonical_dir = Path("src/quant_engine") self.results = { "timestamp": datetime.now().isoformat(), "consolidated": [], "moved": [], "deleted": [], "errors": [] } def get_canonical_location(self, db_name: str) -> Path: """DB의 정규 위치 반환""" canonical_map = { "kis_data_collection.db": self.canonical_dir / "kis_data_collection.db", "snapshot_admin.db": self.canonical_dir / "snapshot_admin.db", } return canonical_map.get(db_name) def find_scattered_dbs(self) -> dict: """파편화된 DB 파일 찾기""" scattered = { "outputs": [], "temp": [], "other": [] } # outputs/ 검색 outputs_dir = self.root / "outputs" if outputs_dir.exists(): for db in outputs_dir.rglob("*.db"): scattered["outputs"].append(db) # Temp/ 검색 temp_dir = self.root / "Temp" if temp_dir.exists(): for db in temp_dir.glob("*_collection.db"): scattered["temp"].append(db) for db in temp_dir.glob("*_admin*.db"): scattered["temp"].append(db) return scattered def analyze(self) -> dict: """분석""" scattered = self.find_scattered_dbs() analysis = { "canonical_location": str(self.canonical_dir), "canonical_files": { "kis_data_collection.db": (self.canonical_dir / "kis_data_collection.db").exists(), "snapshot_admin.db": (self.canonical_dir / "snapshot_admin.db").exists(), }, "scattered_files": { "outputs": [str(f.relative_to(self.root)) for f in scattered["outputs"]], "temp": [str(f.relative_to(self.root)) for f in scattered["temp"]], }, "total_scattered": len(scattered["outputs"]) + len(scattered["temp"]) } return analysis, scattered def consolidate(self, scattered: dict, dry_run: bool = True) -> dict: """통합""" action = "Would consolidate" if dry_run else "Consolidating" print(f"\n[Analysis]") analysis, _ = self.analyze() print(f"Canonical location: {analysis['canonical_location']}") print(f" kis_data_collection.db: {'EXISTS' if analysis['canonical_files']['kis_data_collection.db'] else 'MISSING'}") print(f" snapshot_admin.db: {'EXISTS' if analysis['canonical_files']['snapshot_admin.db'] else 'MISSING'}") print(f"\nScattered files found:") print(f" outputs/: {len(analysis['scattered_files']['outputs'])} files") for f in analysis['scattered_files']['outputs'][:5]: print(f" - {f}") print(f" Temp/: {len(analysis['scattered_files']['temp'])} files") for f in analysis['scattered_files']['temp']: print(f" - {f}") print(f"\n[Recommendation]") print(f"1. Keep canonical location: src/quant_engine/") print(f" - kis_data_collection.db (KIS API 데이터)") print(f" - snapshot_admin.db (성능/포지션)") print(f"") print(f"2. Archive old files: archive_db/ (2026-06-23)") print(f" - outputs/kis_data_collection/*") print(f" - outputs/snapshot_admin/smoke*.db") print(f" - Temp/*_collection.db") print(f" - Temp/*_admin*.db") print(f"") print(f"3. Delete: qualitative_sell_strategy.db (unrelated)") return analysis def create_consolidation_plan(self) -> str: """통합 계획서 작성""" analysis, _ = self.analyze() plan = f""" # Database Consolidation Plan (2026-06-23) ## Current State: FRAGMENTED - Canonical: src/quant_engine/ (2 files) - Scattered: outputs/ ({len(analysis['scattered_files']['outputs'])}) + Temp/ ({len(analysis['scattered_files']['temp'])}) - Total: {analysis['total_scattered'] + 2} database files ## Issue 1. kis_data_collection.db in 3 locations: - src/quant_engine/ (CANONICAL) - outputs/kis_data_collection/ - Temp/test_kis_data_collection.db 2. snapshot_admin.db in 4+ locations: - src/quant_engine/ (CANONICAL) - outputs/snapshot_admin/ - Temp/snapshot_admin_*.db (multiple variants) - outputs/qualitative_sell_strategy/ (unrelated) ## Solution ### Step 1: Verify Canonical Copies (src/quant_engine/) - kis_data_collection.db: 5 records [OK] - snapshot_admin.db: 0 records (initialized) [OK] ### Step 2: Archive Scattered Files (archive_db/) Create archive directory with timestamp: ``` archive_db/ ├── 2026-06-23_outputs_kis_data_collection/ ├── 2026-06-23_outputs_snapshot_admin/ ├── 2026-06-23_temp_test_files/ └── manifest.json (record what was archived) ``` ### Step 3: Clean Obsolete References - Remove imports from legacy non-canonical database paths - Remove imports from archive/backup database paths - Update any code expecting these paths ### Step 4: Update Documentation - Update all references to use: src/quant_engine/ - Update deployment docs (Synology) - Update CI/CD workflows ## Benefits - Single source of truth - Easier backup/recovery - Clear separation: live vs. archived - Faster data access - Simplified deployment ## Files to Delete (After Archiving) - archive only genuinely obsolete duplicate DBs - keep canonical DBs in src/quant_engine/ - keep Temp/ only for transient validation artifacts """ return plan def main(): refactorer = DatabaseRefactorer() print("="*80) print("Database Structure Refactoring Analysis") print("="*80) analysis = refactorer.consolidate(None, dry_run=True) plan = refactorer.create_consolidation_plan() print(plan) # 계획서 저장 plan_file = Path("docs/archive/DATABASE_CONSOLIDATION_PLAN_2026_06_23.md") plan_file.parent.mkdir(parents=True, exist_ok=True) with open(plan_file, 'w', encoding='utf-8') as f: f.write(plan) print(f"\n[Saved] Consolidation plan: {plan_file}") if __name__ == "__main__": main()