Merge branch 'main' of http://192.168.123.100:8418/KimJaeHyun/myfinance

2026-06-23 00:03:26 +09:00
parent 4c4ea717b4 c7fc7942fd
commit 1dddffca5c
7 changed files with 1276 additions and 0 deletions
@@ -0,0 +1,420 @@
+#!/usr/bin/env python3
+"""
+WBS-9.7: 자동 백업 & 복구 전략
+
+목표: 99% 성공률, 복구 < 1시간
+"""
+
+import os
+import shutil
+import sqlite3
+import json
+import hashlib
+from pathlib import Path
+from datetime import datetime, timedelta
+from typing import Dict, List, Tuple
+import subprocess
+
+
+class BackupRecoveryManager:
+    """백업 및 복구 관리자"""
+
+    def __init__(
+        self,
+        data_dir: str = "src/quant_engine",
+        backup_dir: str = "backups",
+        retention_days: int = 30
+    ):
+        self.data_dir = Path(data_dir)
+        self.backup_dir = Path(backup_dir)
+        self.retention_days = retention_days
+        self.backup_dir.mkdir(parents=True, exist_ok=True)
+
+        self.results = {
+            "timestamp": datetime.now().isoformat(),
+            "backups": [],
+            "recovery_tests": [],
+            "summary": {}
+        }
+
+    def create_daily_backup(self) -> Dict:
+        """일일 증분 백업"""
+        backup_name = f"daily_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        backup_path = self.backup_dir / backup_name
+
+        try:
+            # 필요한 파일 목록
+            files_to_backup = [
+                self.data_dir / "data_feed.db",
+                self.data_dir / "calibration_registry.yaml",
+                Path("spec") / "12_field_dictionary.yaml",
+                Path("spec") / "13_formula_registry.yaml",
+            ]
+
+            backup_path.mkdir(parents=True, exist_ok=True)
+
+            # 파일 복사
+            success_count = 0
+            error_count = 0
+            total_size = 0
+
+            for src in files_to_backup:
+                if src.exists():
+                    try:
+                        dst = backup_path / src.name
+                        if src.is_file():
+                            shutil.copy2(src, dst)
+                            total_size += dst.stat().st_size
+                            success_count += 1
+                        elif src.is_dir():
+                            shutil.copytree(src, dst)
+                            total_size += sum(
+                                f.stat().st_size for f in dst.rglob("*") if f.is_file()
+                            )
+                            success_count += 1
+                    except Exception as e:
+                        print(f"Error backing up {src}: {e}")
+                        error_count += 1
+
+            # 메타데이터 저장
+            metadata = {
+                "backup_name": backup_name,
+                "timestamp": datetime.now().isoformat(),
+                "files_backed_up": success_count,
+                "files_failed": error_count,
+                "total_size_bytes": total_size,
+                "type": "daily_incremental"
+            }
+
+            with open(backup_path / "metadata.json", "w") as f:
+                json.dump(metadata, f, indent=2)
+
+            result = {
+                "backup_name": backup_name,
+                "status": "SUCCESS" if error_count == 0 else "PARTIAL_SUCCESS",
+                "files_backed_up": success_count,
+                "total_size_mb": round(total_size / (1024 * 1024), 2),
+                "path": str(backup_path)
+            }
+
+            self.results["backups"].append(result)
+            return result
+
+        except Exception as e:
+            return {
+                "backup_name": backup_name,
+                "status": "FAILED",
+                "error": str(e)
+            }
+
+    def create_weekly_full_backup(self) -> Dict:
+        """주간 전체 백업"""
+        backup_name = f"weekly_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+        backup_path = self.backup_dir / backup_name
+
+        try:
+            # 전체 프로젝트 백업 (제외: 임시 파일, cache)
+            backup_path.mkdir(parents=True, exist_ok=True)
+
+            exclude_dirs = {".git", "__pycache__", ".pytest_cache", "Temp", "outputs"}
+
+            total_size = 0
+            file_count = 0
+
+            for root_dir in [self.data_dir, Path("spec"), Path("formulas")]:
+                if not root_dir.exists():
+                    continue
+
+                for src_file in root_dir.rglob("*"):
+                    # 제외 디렉터리 확인
+                    if any(exc in src_file.parts for exc in exclude_dirs):
+                        continue
+
+                    if src_file.is_file():
+                        rel_path = src_file.relative_to(src_file.anchor)
+                        dst = backup_path / rel_path
+
+                        try:
+                            dst.parent.mkdir(parents=True, exist_ok=True)
+                            shutil.copy2(src_file, dst)
+                            total_size += dst.stat().st_size
+                            file_count += 1
+                        except Exception as e:
+                            print(f"Error backing up {src_file}: {e}")
+
+            metadata = {
+                "backup_name": backup_name,
+                "timestamp": datetime.now().isoformat(),
+                "files_backed_up": file_count,
+                "total_size_bytes": total_size,
+                "type": "weekly_full"
+            }
+
+            with open(backup_path / "metadata.json", "w") as f:
+                json.dump(metadata, f, indent=2)
+
+            result = {
+                "backup_name": backup_name,
+                "status": "SUCCESS",
+                "files_backed_up": file_count,
+                "total_size_mb": round(total_size / (1024 * 1024), 2),
+                "path": str(backup_path)
+            }
+
+            self.results["backups"].append(result)
+            return result
+
+        except Exception as e:
+            return {
+                "backup_name": backup_name,
+                "status": "FAILED",
+                "error": str(e)
+            }
+
+    def restore_from_backup(self, backup_name: str, restore_to: str = None) -> Dict:
+        """백업에서 복원"""
+        backup_path = self.backup_dir / backup_name
+        restore_to = Path(restore_to) if restore_to else self.data_dir
+
+        if not backup_path.exists():
+            return {
+                "backup_name": backup_name,
+                "status": "FAILED",
+                "error": f"Backup not found: {backup_path}"
+            }
+
+        try:
+            start_time = datetime.now()
+            restore_to.parent.mkdir(parents=True, exist_ok=True)
+
+            # 백업 파일 복원
+            restored_count = 0
+            for src in backup_path.glob("*"):
+                if src.name == "metadata.json":
+                    continue
+
+                dst = restore_to / src.name
+                try:
+                    if src.is_file():
+                        shutil.copy2(src, dst)
+                        restored_count += 1
+                    elif src.is_dir():
+                        if dst.exists():
+                            shutil.rmtree(dst)
+                        shutil.copytree(src, dst)
+                        restored_count += 1
+                except Exception as e:
+                    print(f"Error restoring {src}: {e}")
+
+            recovery_time = (datetime.now() - start_time).total_seconds()
+
+            result = {
+                "backup_name": backup_name,
+                "status": "SUCCESS",
+                "files_restored": restored_count,
+                "recovery_time_seconds": round(recovery_time, 2),
+                "restored_to": str(restore_to)
+            }
+
+            self.results["recovery_tests"].append(result)
+            return result
+
+        except Exception as e:
+            return {
+                "backup_name": backup_name,
+                "status": "FAILED",
+                "error": str(e)
+            }
+
+    def cleanup_old_backups(self) -> Dict:
+        """오래된 백업 정리"""
+        cutoff_date = datetime.now() - timedelta(days=self.retention_days)
+        deleted_count = 0
+        freed_size = 0
+
+        try:
+            for backup_dir in self.backup_dir.iterdir():
+                if backup_dir.is_dir():
+                    try:
+                        metadata_file = backup_dir / "metadata.json"
+                        if metadata_file.exists():
+                            with open(metadata_file) as f:
+                                metadata = json.load(f)
+                                backup_time = datetime.fromisoformat(metadata["timestamp"])
+
+                                if backup_time < cutoff_date:
+                                    # 크기 계산
+                                    for f in backup_dir.rglob("*"):
+                                        if f.is_file():
+                                            freed_size += f.stat().st_size
+
+                                    # 삭제
+                                    shutil.rmtree(backup_dir)
+                                    deleted_count += 1
+                    except Exception as e:
+                        print(f"Error processing {backup_dir}: {e}")
+
+            return {
+                "status": "SUCCESS",
+                "deleted_backups": deleted_count,
+                "freed_space_mb": round(freed_size / (1024 * 1024), 2)
+            }
+
+        except Exception as e:
+            return {
+                "status": "FAILED",
+                "error": str(e)
+            }
+
+    def test_backup_integrity(self, backup_name: str) -> Dict:
+        """백업 무결성 테스트"""
+        backup_path = self.backup_dir / backup_name
+
+        if not backup_path.exists():
+            return {
+                "backup_name": backup_name,
+                "status": "FAILED",
+                "error": "Backup not found"
+            }
+
+        try:
+            # 메타데이터 검증
+            metadata_file = backup_path / "metadata.json"
+            if not metadata_file.exists():
+                return {
+                    "backup_name": backup_name,
+                    "status": "FAILED",
+                    "error": "Metadata missing"
+                }
+
+            with open(metadata_file) as f:
+                metadata = json.load(f)
+
+            # 파일 개수 검증
+            actual_files = len(list(backup_path.glob("*"))) - 1  # metadata 제외
+            expected_files = metadata.get("files_backed_up", actual_files)
+
+            # DB 무결성 검증
+            db_file = backup_path / "data_feed.db"
+            db_integrity = "OK"
+            if db_file.exists():
+                try:
+                    conn = sqlite3.connect(db_file)
+                    cursor = conn.execute("PRAGMA integrity_check")
+                    result = cursor.fetchone()
+                    db_integrity = result[0] if result else "UNKNOWN"
+                    conn.close()
+                except Exception:
+                    db_integrity = "FAILED"
+
+            return {
+                "backup_name": backup_name,
+                "status": "SUCCESS",
+                "metadata_valid": True,
+                "file_count": actual_files,
+                "expected_files": expected_files,
+                "database_integrity": db_integrity,
+                "backup_timestamp": metadata.get("timestamp")
+            }
+
+        except Exception as e:
+            return {
+                "backup_name": backup_name,
+                "status": "FAILED",
+                "error": str(e)
+            }
+
+    def generate_backup_report(self) -> Dict:
+        """백업 리포트 생성"""
+        # 존재하는 백업 목록
+        existing_backups = [
+            d.name for d in self.backup_dir.iterdir()
+            if d.is_dir() and (d / "metadata.json").exists()
+        ]
+
+        # 전체 크기 계산
+        total_backup_size = sum(
+            sum(f.stat().st_size for f in (self.backup_dir / b).rglob("*") if f.is_file())
+            for b in existing_backups
+        )
+
+        # Daily/Weekly 분류
+        daily_backups = [b for b in existing_backups if b.startswith("daily_")]
+        weekly_backups = [b for b in existing_backups if b.startswith("weekly_")]
+
+        self.results["summary"] = {
+            "total_backups": len(existing_backups),
+            "daily_backups": len(daily_backups),
+            "weekly_backups": len(weekly_backups),
+            "total_size_mb": round(total_backup_size / (1024 * 1024), 2),
+            "retention_days": self.retention_days,
+            "success_rate": round(
+                (len([b for b in self.results["backups"] if b.get("status") == "SUCCESS"]) /
+                 max(len(self.results["backups"]), 1)) * 100,
+                1
+            ) if self.results["backups"] else 100
+        }
+
+        return self.results
+
+    def print_report(self):
+        """리포트 출력"""
+        print("\n" + "=" * 80)
+        print("BACKUP & RECOVERY MANAGEMENT REPORT")
+        print("=" * 80)
+        print(f"Timestamp: {self.results['timestamp']}\n")
+
+        print("RECENT BACKUPS:")
+        print("-" * 80)
+        for backup in self.results["backups"][-5:]:
+            status_marker = "✓" if backup.get("status") == "SUCCESS" else "✗"
+            print(
+                f"{status_marker} {backup.get('backup_name', 'N/A'):30} "
+                f"| Size: {backup.get('total_size_mb', 0):8.2f}MB | "
+                f"Files: {backup.get('files_backed_up', 0):3}"
+            )
+
+        if self.results["summary"]:
+            s = self.results["summary"]
+            print("\nSUMMARY:")
+            print("-" * 80)
+            print(f"Total backups: {s['total_backups']}")
+            print(f"Daily backups: {s['daily_backups']}")
+            print(f"Weekly backups: {s['weekly_backups']}")
+            print(f"Total size: {s['total_size_mb']:.2f}MB")
+            print(f"Success rate: {s['success_rate']:.1f}%")
+
+        print("=" * 80 + "\n")
+
+    def save_report(self, output_file: str = None):
+        """리포트 저장"""
+        if not output_file:
+            output_file = f"Temp/backup_report_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+
+        Path(output_file).parent.mkdir(parents=True, exist_ok=True)
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.results, f, indent=2, ensure_ascii=False)
+
+        print(f"Report saved: {output_file}")
+
+
+if __name__ == "__main__":
+    manager = BackupRecoveryManager()
+
+    # 일일 백업 실행
+    print("Creating daily backup...")
+    manager.create_daily_backup()
+
+    # 주간 백업 (매주 월요일)
+    if datetime.now().weekday() == 0:
+        print("Creating weekly full backup...")
+        manager.create_weekly_full_backup()
+
+    # 오래된 백업 정리
+    print("Cleaning up old backups...")
+    manager.cleanup_old_backups()
+
+    # 리포트 생성 및 출력
+    manager.generate_backup_report()
+    manager.print_report()
+    manager.save_report()
@@ -0,0 +1,286 @@
+#!/usr/bin/env python3
+"""
+WBS-9.5: 섹터 플로우 신호 신뢰도 측정
+
+목표: 섹터별 flow_credit vs 실제 수익률 상관도 계산
+"""
+
+import json
+import sqlite3
+from pathlib import Path
+from datetime import datetime, timedelta
+from typing import Dict, List, Tuple
+import statistics
+
+class SectorFlowReliabilityMeasure:
+    """섹터 플로우 신뢰도 측정 도구"""
+
+    def __init__(self, db_path: str = None):
+        self.db_path = db_path or "src/quant_engine/data_feed.db"
+        self.results = {
+            "timestamp": datetime.now().isoformat(),
+            "sectors": {},
+            "summary": {}
+        }
+
+    def _query_sector_trades(self, sector: str, days: int = 30) -> List[Dict]:
+        """특정 섹터의 거래 데이터 조회 (T+20 결과 포함)"""
+        try:
+            conn = sqlite3.connect(self.db_path)
+            conn.row_factory = sqlite3.Row
+            cursor = conn.cursor()
+
+            query = """
+            SELECT
+                ticker,
+                entry_date,
+                exit_date,
+                entry_price,
+                exit_price,
+                pnl_pct,
+                flow_credit,
+                sector
+            FROM performance
+            WHERE sector = ?
+                AND entry_date >= datetime('now', '-' || ? || ' days')
+                AND exit_date IS NOT NULL
+            ORDER BY entry_date DESC
+            """
+
+            cursor.execute(query, (sector, days))
+            trades = [dict(row) for row in cursor.fetchall()]
+            conn.close()
+
+            return trades
+        except Exception as e:
+            print(f"Error querying trades for {sector}: {e}")
+            return []
+
+    def _calculate_hit_rate(self, signal_correct: List[bool]) -> float:
+        """신호 정확도 계산 (몇 %가 맞았는가)"""
+        if not signal_correct:
+            return 0.0
+        return (sum(signal_correct) / len(signal_correct)) * 100
+
+    def _calculate_correlation(
+        self,
+        flow_credits: List[float],
+        pnl_pcts: List[float]
+    ) -> float:
+        """flow_credit vs pnl 상관계수 계산"""
+        if len(flow_credits) < 2 or len(pnl_pcts) < 2:
+            return None
+
+        if len(flow_credits) != len(pnl_pcts):
+            return None
+
+        mean_flow = statistics.mean(flow_credits)
+        mean_pnl = statistics.mean(pnl_pcts)
+
+        covariance = sum(
+            (flow_credits[i] - mean_flow) * (pnl_pcts[i] - mean_pnl)
+            for i in range(len(flow_credits))
+        ) / len(flow_credits)
+
+        std_flow = statistics.stdev(flow_credits) if len(flow_credits) > 1 else 0
+        std_pnl = statistics.stdev(pnl_pcts) if len(pnl_pcts) > 1 else 0
+
+        if std_flow == 0 or std_pnl == 0:
+            return 0.0
+
+        correlation = covariance / (std_flow * std_pnl)
+        return round(min(1.0, max(-1.0, correlation)), 3)
+
+    def measure_sector(self, sector: str, days: int = 30) -> Dict:
+        """
+        특정 섹터의 신뢰도 측정
+
+        입력:
+          sector: 섹터명 (e.g., "금융", "IT")
+          days: 회고 기간 (default: 30일)
+
+        출력:
+          {
+            "sector": str,
+            "sample_count": int,
+            "flow_signal_hit_rate": float (0-100),
+            "correlation": float (-1~1),
+            "mean_pnl_correct": float,
+            "mean_pnl_incorrect": float,
+            "reliability_score": float (0-100),
+            "status": "HIGH" | "MEDIUM" | "LOW" | "INSUFFICIENT"
+          }
+        """
+        trades = self._query_sector_trades(sector, days)
+
+        if len(trades) < 5:
+            return {
+                "sector": sector,
+                "sample_count": len(trades),
+                "status": "INSUFFICIENT",
+                "note": f"Samples < 5 ({len(trades)} found)"
+            }
+
+        # 신호 정확도 (flow_credit > 0 인 거래가 실제 수익인가?)
+        flow_credits = []
+        pnl_pcts = []
+        signal_correct = []
+
+        for trade in trades:
+            flow = trade.get("flow_credit", 0)
+            pnl = trade.get("pnl_pct", 0)
+
+            flow_credits.append(flow)
+            pnl_pcts.append(pnl)
+
+            # 신호: flow > 0이면 수익일 것으로 예측
+            is_profitable = pnl > 0
+            signal_predicts_profit = flow > 0
+            is_correct = is_profitable == signal_predicts_profit
+
+            signal_correct.append(is_correct)
+
+        # 상관도 계산
+        correlation = self._calculate_correlation(flow_credits, pnl_pcts)
+
+        # Hit rate (신호 정확도)
+        hit_rate = self._calculate_hit_rate(signal_correct)
+
+        # 평균 수익 (신호 맞음 vs 틀림)
+        correct_pnls = [pnl_pcts[i] for i in range(len(pnl_pcts)) if signal_correct[i]]
+        incorrect_pnls = [pnl_pcts[i] for i in range(len(pnl_pcts)) if not signal_correct[i]]
+
+        mean_pnl_correct = statistics.mean(correct_pnls) if correct_pnls else 0
+        mean_pnl_incorrect = statistics.mean(incorrect_pnls) if incorrect_pnls else 0
+
+        # 신뢰도 점수 (0-100)
+        # Hit rate 60% + Correlation이 높을수록 높음
+        reliability_score = (hit_rate * 0.7) + (
+            (correlation + 1) * 50 * 0.3 if correlation is not None else 0
+        )
+
+        # 상태 판정
+        if reliability_score >= 70:
+            status = "HIGH"
+        elif reliability_score >= 50:
+            status = "MEDIUM"
+        else:
+            status = "LOW"
+
+        return {
+            "sector": sector,
+            "sample_count": len(trades),
+            "flow_signal_hit_rate": round(hit_rate, 1),
+            "correlation": correlation,
+            "mean_pnl_correct": round(mean_pnl_correct, 2),
+            "mean_pnl_incorrect": round(mean_pnl_incorrect, 2),
+            "reliability_score": round(reliability_score, 1),
+            "status": status,
+            "lookback_days": days
+        }
+
+    def measure_all_sectors(self, days: int = 30) -> Dict:
+        """모든 섹터에 대해 신뢰도 측정"""
+        sectors = [
+            "금융", "IT", "전기전자", "화학", "철강금속",
+            "기계", "의약품", "반도체", "통신", "에너지"
+        ]
+
+        for sector in sectors:
+            result = self.measure_sector(sector, days)
+            self.results["sectors"][sector] = result
+
+        self._generate_summary()
+        return self.results
+
+    def _generate_summary(self):
+        """전체 요약 생성"""
+        sectors_results = self.results["sectors"]
+
+        high_reliability = [
+            s for s, r in sectors_results.items() if r.get("status") == "HIGH"
+        ]
+        medium_reliability = [
+            s for s, r in sectors_results.items() if r.get("status") == "MEDIUM"
+        ]
+        low_reliability = [
+            s for s, r in sectors_results.items() if r.get("status") == "LOW"
+        ]
+        insufficient = [
+            s for s, r in sectors_results.items() if r.get("status") == "INSUFFICIENT"
+        ]
+
+        avg_hit_rate = statistics.mean([
+            r["flow_signal_hit_rate"]
+            for r in sectors_results.values()
+            if "flow_signal_hit_rate" in r
+        ]) if any("flow_signal_hit_rate" in r for r in sectors_results.values()) else 0
+
+        self.results["summary"] = {
+            "total_sectors": len(sectors_results),
+            "high_reliability": len(high_reliability),
+            "medium_reliability": len(medium_reliability),
+            "low_reliability": len(low_reliability),
+            "insufficient_data": len(insufficient),
+            "avg_hit_rate": round(avg_hit_rate, 1),
+            "high_reliability_sectors": high_reliability,
+            "low_reliability_sectors": low_reliability,
+            "recommendation": (
+                "✓ 신호 신뢰도 충분 (≥60% hit rate)"
+                if avg_hit_rate >= 60 else
+                "⚠ 신호 신뢰도 미흡 (< 60% hit rate)"
+            )
+        }
+
+    def print_report(self):
+        """리포트 출력"""
+        print("\n" + "=" * 80)
+        print("SECTOR FLOW RELIABILITY MEASUREMENT REPORT")
+        print("=" * 80)
+        print(f"Timestamp: {self.results['timestamp']}\n")
+
+        print("SECTOR-BY-SECTOR RESULTS:")
+        print("-" * 80)
+        for sector, result in sorted(self.results["sectors"].items()):
+            if result.get("status") in ["HIGH", "MEDIUM", "LOW"]:
+                status_marker = "✓" if result["status"] == "HIGH" else "⚠"
+                print(
+                    f"{status_marker} {sector:10} | "
+                    f"Samples: {result['sample_count']:2} | "
+                    f"Hit Rate: {result['flow_signal_hit_rate']:5.1f}% | "
+                    f"Correlation: {result['correlation']:6.3f} | "
+                    f"Score: {result['reliability_score']:5.1f}"
+                )
+            else:
+                print(f"- {sector:10} | {result.get('note', 'INSUFFICIENT DATA')}")
+
+        print("\nSUMMARY:")
+        print("-" * 80)
+        s = self.results["summary"]
+        print(f"Total sectors: {s['total_sectors']}")
+        print(f"High reliability: {s['high_reliability']} {s['high_reliability_sectors']}")
+        print(f"Medium reliability: {s['medium_reliability']}")
+        print(f"Low reliability: {s['low_reliability']} {s['low_reliability_sectors']}")
+        print(f"Insufficient data: {s['insufficient_data']}")
+        print(f"\nAverage hit rate: {s['avg_hit_rate']:.1f}%")
+        print(f"Recommendation: {s['recommendation']}")
+        print("=" * 80 + "\n")
+
+    def save_report(self, output_file: str = None):
+        """리포트 저장"""
+        if not output_file:
+            output_file = f"Temp/sector_flow_reliability_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json"
+
+        Path(output_file).parent.mkdir(parents=True, exist_ok=True)
+        with open(output_file, 'w', encoding='utf-8') as f:
+            json.dump(self.results, f, indent=2, ensure_ascii=False)
+
+        print(f"Report saved: {output_file}")
+
+
+if __name__ == "__main__":
+    # 30일 회고 기반 신뢰도 측정
+    measurer = SectorFlowReliabilityMeasure()
+    measurer.measure_all_sectors(days=30)
+    measurer.print_report()
+    measurer.save_report()