Files
QuantEngineByItz/tools/refactor_database_structure.py
T

197 lines
6.4 KiB
Python

#!/usr/bin/env python3
"""
데이터베이스 구조 리팩토링
파편화된 DB 파일들을 정리하고 단일 canonical 위치(src/quant_engine/)를 기준으로 통합한다.
"""
import shutil
from pathlib import Path
from datetime import datetime
class DatabaseRefactorer:
"""데이터베이스 구조 리팩토링"""
def __init__(self):
self.root = Path(".")
self.canonical_dir = Path("src/quant_engine")
self.results = {
"timestamp": datetime.now().isoformat(),
"consolidated": [],
"moved": [],
"deleted": [],
"errors": []
}
def get_canonical_location(self, db_name: str) -> Path:
"""DB의 정규 위치 반환"""
canonical_map = {
"kis_data_collection.db": self.canonical_dir / "kis_data_collection.db",
"snapshot_admin.db": self.canonical_dir / "snapshot_admin.db",
}
return canonical_map.get(db_name)
def find_scattered_dbs(self) -> dict:
"""파편화된 DB 파일 찾기"""
scattered = {
"outputs": [],
"temp": [],
"other": []
}
# outputs/ 검색
outputs_dir = self.root / "outputs"
if outputs_dir.exists():
for db in outputs_dir.rglob("*.db"):
scattered["outputs"].append(db)
# Temp/ 검색
temp_dir = self.root / "Temp"
if temp_dir.exists():
for db in temp_dir.glob("*_collection.db"):
scattered["temp"].append(db)
for db in temp_dir.glob("*_admin*.db"):
scattered["temp"].append(db)
return scattered
def analyze(self) -> dict:
"""분석"""
scattered = self.find_scattered_dbs()
analysis = {
"canonical_location": str(self.canonical_dir),
"canonical_files": {
"kis_data_collection.db": (self.canonical_dir / "kis_data_collection.db").exists(),
"snapshot_admin.db": (self.canonical_dir / "snapshot_admin.db").exists(),
},
"scattered_files": {
"outputs": [str(f.relative_to(self.root)) for f in scattered["outputs"]],
"temp": [str(f.relative_to(self.root)) for f in scattered["temp"]],
},
"total_scattered": len(scattered["outputs"]) + len(scattered["temp"])
}
return analysis, scattered
def consolidate(self, scattered: dict, dry_run: bool = True) -> dict:
"""통합"""
action = "Would consolidate" if dry_run else "Consolidating"
print(f"\n[Analysis]")
analysis, _ = self.analyze()
print(f"Canonical location: {analysis['canonical_location']}")
print(f" kis_data_collection.db: {'EXISTS' if analysis['canonical_files']['kis_data_collection.db'] else 'MISSING'}")
print(f" snapshot_admin.db: {'EXISTS' if analysis['canonical_files']['snapshot_admin.db'] else 'MISSING'}")
print(f"\nScattered files found:")
print(f" outputs/: {len(analysis['scattered_files']['outputs'])} files")
for f in analysis['scattered_files']['outputs'][:5]:
print(f" - {f}")
print(f" Temp/: {len(analysis['scattered_files']['temp'])} files")
for f in analysis['scattered_files']['temp']:
print(f" - {f}")
print(f"\n[Recommendation]")
print(f"1. Keep canonical location: src/quant_engine/")
print(f" - kis_data_collection.db (KIS API 데이터)")
print(f" - snapshot_admin.db (성능/포지션)")
print(f"")
print(f"2. Archive old files: archive_db/ (2026-06-23)")
print(f" - outputs/kis_data_collection/*")
print(f" - outputs/snapshot_admin/smoke*.db")
print(f" - Temp/*_collection.db")
print(f" - Temp/*_admin*.db")
print(f"")
print(f"3. Delete: qualitative_sell_strategy.db (unrelated)")
return analysis
def create_consolidation_plan(self) -> str:
"""통합 계획서 작성"""
analysis, _ = self.analyze()
plan = f"""
# Database Consolidation Plan (2026-06-23)
## Current State: FRAGMENTED
- Canonical: src/quant_engine/ (2 files)
- Scattered: outputs/ ({len(analysis['scattered_files']['outputs'])}) + Temp/ ({len(analysis['scattered_files']['temp'])})
- Total: {analysis['total_scattered'] + 2} database files
## Issue
1. kis_data_collection.db in 3 locations:
- src/quant_engine/ (CANONICAL)
- outputs/kis_data_collection/
- Temp/test_kis_data_collection.db
2. snapshot_admin.db in 4+ locations:
- src/quant_engine/ (CANONICAL)
- outputs/snapshot_admin/
- Temp/snapshot_admin_*.db (multiple variants)
- outputs/qualitative_sell_strategy/ (unrelated)
## Solution
### Step 1: Verify Canonical Copies (src/quant_engine/)
- kis_data_collection.db: 5 records [OK]
- snapshot_admin.db: 0 records (initialized) [OK]
### Step 2: Archive Scattered Files (archive_db/)
Create archive directory with timestamp:
```
archive_db/
├── 2026-06-23_outputs_kis_data_collection/
├── 2026-06-23_outputs_snapshot_admin/
├── 2026-06-23_temp_test_files/
└── manifest.json (record what was archived)
```
### Step 3: Clean Obsolete References
- Remove imports from legacy non-canonical database paths
- Remove imports from archive/backup database paths
- Update any code expecting these paths
### Step 4: Update Documentation
- Update all references to use: src/quant_engine/
- Update deployment docs (Synology)
- Update CI/CD workflows
## Benefits
- Single source of truth
- Easier backup/recovery
- Clear separation: live vs. archived
- Faster data access
- Simplified deployment
## Files to Delete (After Archiving)
- archive only genuinely obsolete duplicate DBs
- keep canonical DBs in src/quant_engine/
- keep Temp/ only for transient validation artifacts
"""
return plan
def main():
refactorer = DatabaseRefactorer()
print("="*80)
print("Database Structure Refactoring Analysis")
print("="*80)
analysis = refactorer.consolidate(None, dry_run=True)
plan = refactorer.create_consolidation_plan()
print(plan)
# 계획서 저장
plan_file = Path("docs/archive/DATABASE_CONSOLIDATION_PLAN_2026_06_23.md")
plan_file.parent.mkdir(parents=True, exist_ok=True)
with open(plan_file, 'w', encoding='utf-8') as f:
f.write(plan)
print(f"\n[Saved] Consolidation plan: {plan_file}")
if __name__ == "__main__":
main()