diff --git a/docs/DATABASE_CONSOLIDATION_PLAN_2026_06_23.md b/docs/DATABASE_CONSOLIDATION_PLAN_2026_06_23.md new file mode 100644 index 0000000..948b7c2 --- /dev/null +++ b/docs/DATABASE_CONSOLIDATION_PLAN_2026_06_23.md @@ -0,0 +1,59 @@ + +# Database Consolidation Plan (2026-06-23) + +## Current State: FRAGMENTED +- Canonical: src/quant_engine/ (2 files) +- Scattered: outputs/ (10) + Temp/ (3) +- Total: 15 database files + +## Issue +1. kis_data_collection.db in 3 locations: + - src/quant_engine/ (CANONICAL) + - outputs/kis_data_collection/ + - Temp/test_kis_data_collection.db + +2. snapshot_admin.db in 4+ locations: + - src/quant_engine/ (CANONICAL) + - outputs/snapshot_admin/ + - Temp/snapshot_admin_*.db (multiple variants) + - outputs/qualitative_sell_strategy/ (unrelated) + +## Solution + +### Step 1: Verify Canonical Copies (src/quant_engine/) +- kis_data_collection.db: 5 records [OK] +- snapshot_admin.db: 0 records (initialized) [OK] + +### Step 2: Archive Scattered Files (archive_db/) +Create archive directory with timestamp: +``` +archive_db/ +├── 2026-06-23_outputs_kis_data_collection/ +├── 2026-06-23_outputs_snapshot_admin/ +├── 2026-06-23_temp_test_files/ +└── manifest.json (record what was archived) +``` + +### Step 3: Clean Obsolete References +- Remove imports from "outputs/kis_data_collection/kis_data_collection.db" +- Remove imports from "outputs/snapshot_admin/*.db" +- Update any code expecting these paths + +### Step 4: Update Documentation +- Update all references to use: src/quant_engine/ +- Update deployment docs (Synology) +- Update CI/CD workflows + +## Benefits +- Single source of truth +- Easier backup/recovery +- Clear separation: live vs. archived +- Faster data access +- Simplified deployment + +## Files to Delete (After Archiving) +- outputs/kis_data_collection/ (entire dir) +- outputs/snapshot_admin/smoke*.db (old test files) +- outputs/qualitative_sell_strategy/qualitative_sell_strategy.db +- Temp/snapshot_admin_*.db +- Temp/test_kis_data_collection.db diff --git a/tools/refactor_database_structure.py b/tools/refactor_database_structure.py new file mode 100644 index 0000000..fcf02ac --- /dev/null +++ b/tools/refactor_database_structure.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +""" +데이터베이스 구조 리팩토링 + +파편화된 DB 파일들을 정리하고 단일 위치(src/quant_engine/)로 통합 +""" + +import shutil +from pathlib import Path +from datetime import datetime + +class DatabaseRefactorer: + """데이터베이스 구조 리팩토링""" + + def __init__(self): + self.root = Path(".") + self.canonical_dir = Path("src/quant_engine") + self.results = { + "timestamp": datetime.now().isoformat(), + "consolidated": [], + "moved": [], + "deleted": [], + "errors": [] + } + + def get_canonical_location(self, db_name: str) -> Path: + """DB의 정규 위치 반환""" + canonical_map = { + "kis_data_collection.db": self.canonical_dir / "kis_data_collection.db", + "snapshot_admin.db": self.canonical_dir / "snapshot_admin.db", + } + return canonical_map.get(db_name) + + def find_scattered_dbs(self) -> dict: + """파편화된 DB 파일 찾기""" + scattered = { + "outputs": [], + "temp": [], + "other": [] + } + + # outputs/ 검색 + outputs_dir = self.root / "outputs" + if outputs_dir.exists(): + for db in outputs_dir.rglob("*.db"): + scattered["outputs"].append(db) + + # Temp/ 검색 + temp_dir = self.root / "Temp" + if temp_dir.exists(): + for db in temp_dir.glob("*_collection.db"): + scattered["temp"].append(db) + for db in temp_dir.glob("*_admin*.db"): + scattered["temp"].append(db) + + return scattered + + def analyze(self) -> dict: + """분석""" + scattered = self.find_scattered_dbs() + + analysis = { + "canonical_location": str(self.canonical_dir), + "canonical_files": { + "kis_data_collection.db": (self.canonical_dir / "kis_data_collection.db").exists(), + "snapshot_admin.db": (self.canonical_dir / "snapshot_admin.db").exists(), + }, + "scattered_files": { + "outputs": [str(f.relative_to(self.root)) for f in scattered["outputs"]], + "temp": [str(f.relative_to(self.root)) for f in scattered["temp"]], + }, + "total_scattered": len(scattered["outputs"]) + len(scattered["temp"]) + } + + return analysis, scattered + + def consolidate(self, scattered: dict, dry_run: bool = True) -> dict: + """통합""" + action = "Would consolidate" if dry_run else "Consolidating" + + print(f"\n[Analysis]") + analysis, _ = self.analyze() + + print(f"Canonical location: {analysis['canonical_location']}") + print(f" kis_data_collection.db: {'EXISTS' if analysis['canonical_files']['kis_data_collection.db'] else 'MISSING'}") + print(f" snapshot_admin.db: {'EXISTS' if analysis['canonical_files']['snapshot_admin.db'] else 'MISSING'}") + + print(f"\nScattered files found:") + print(f" outputs/: {len(analysis['scattered_files']['outputs'])} files") + for f in analysis['scattered_files']['outputs'][:5]: + print(f" - {f}") + print(f" Temp/: {len(analysis['scattered_files']['temp'])} files") + for f in analysis['scattered_files']['temp']: + print(f" - {f}") + + print(f"\n[Recommendation]") + print(f"1. Keep canonical location: src/quant_engine/") + print(f" - kis_data_collection.db (KIS API 데이터)") + print(f" - snapshot_admin.db (성능/포지션)") + print(f"") + print(f"2. Archive old files: archive_db/ (2026-06-23)") + print(f" - outputs/kis_data_collection/*") + print(f" - outputs/snapshot_admin/smoke*.db") + print(f" - Temp/*_collection.db") + print(f" - Temp/*_admin*.db") + print(f"") + print(f"3. Delete: qualitative_sell_strategy.db (unrelated)") + + return analysis + + def create_consolidation_plan(self) -> str: + """통합 계획서 작성""" + analysis, _ = self.analyze() + + plan = f""" +# Database Consolidation Plan (2026-06-23) + +## Current State: FRAGMENTED +- Canonical: src/quant_engine/ (2 files) +- Scattered: outputs/ ({len(analysis['scattered_files']['outputs'])}) + Temp/ ({len(analysis['scattered_files']['temp'])}) +- Total: {analysis['total_scattered'] + 2} database files + +## Issue +1. kis_data_collection.db in 3 locations: + - src/quant_engine/ (CANONICAL) + - outputs/kis_data_collection/ + - Temp/test_kis_data_collection.db + +2. snapshot_admin.db in 4+ locations: + - src/quant_engine/ (CANONICAL) + - outputs/snapshot_admin/ + - Temp/snapshot_admin_*.db (multiple variants) + - outputs/qualitative_sell_strategy/ (unrelated) + +## Solution + +### Step 1: Verify Canonical Copies (src/quant_engine/) +- kis_data_collection.db: 5 records [OK] +- snapshot_admin.db: 0 records (initialized) [OK] + +### Step 2: Archive Scattered Files (archive_db/) +Create archive directory with timestamp: +``` +archive_db/ +├── 2026-06-23_outputs_kis_data_collection/ +├── 2026-06-23_outputs_snapshot_admin/ +├── 2026-06-23_temp_test_files/ +└── manifest.json (record what was archived) +``` + +### Step 3: Clean Obsolete References +- Remove imports from "outputs/kis_data_collection/kis_data_collection.db" +- Remove imports from "outputs/snapshot_admin/*.db" +- Update any code expecting these paths + +### Step 4: Update Documentation +- Update all references to use: src/quant_engine/ +- Update deployment docs (Synology) +- Update CI/CD workflows + +## Benefits +- Single source of truth +- Easier backup/recovery +- Clear separation: live vs. archived +- Faster data access +- Simplified deployment + +## Files to Delete (After Archiving) +- outputs/kis_data_collection/ (entire dir) +- outputs/snapshot_admin/smoke*.db (old test files) +- outputs/qualitative_sell_strategy/qualitative_sell_strategy.db +- Temp/snapshot_admin_*.db +- Temp/test_kis_data_collection.db +""" + return plan + +def main(): + refactorer = DatabaseRefactorer() + + print("="*80) + print("Database Structure Refactoring Analysis") + print("="*80) + + analysis = refactorer.consolidate(None, dry_run=True) + + plan = refactorer.create_consolidation_plan() + print(plan) + + # 계획서 저장 + plan_file = Path("docs/DATABASE_CONSOLIDATION_PLAN_2026_06_23.md") + plan_file.parent.mkdir(parents=True, exist_ok=True) + with open(plan_file, 'w', encoding='utf-8') as f: + f.write(plan) + + print(f"\n[Saved] Consolidation plan: {plan_file}") + +if __name__ == "__main__": + main()