"""lint_repo_hygiene.py — 미사용·중복·상충 파일 감사 도구 사용법: python tools/lint_repo_hygiene.py [--json out.json] [--delete-safe] 종료 코드: 0 = 경고 없음 (또는 warn-only 항목만) 1 = 삭제 권장 파일 존재 """ from __future__ import annotations import argparse import json import os import re import sys from pathlib import Path ROOT = Path(__file__).resolve().parents[1] # ────────────────────────────────────────────────────────── # 1. 참조 수집 헬퍼 # ────────────────────────────────────────────────────────── def _load_pkg_refs() -> set[str]: """package.json scripts에서 tools/*.py 파일 stem 수집.""" pkg_path = ROOT / "package.json" if not pkg_path.exists(): return set() text = pkg_path.read_text(encoding="utf-8") return set(m.group(1) for m in re.finditer(r'tools/([a-z_A-Z0-9]+)\.py', text)) def _load_spec_refs() -> set[str]: """spec/*.yaml 에서 python_tool: 참조 수집.""" refs: set[str] = set() for y in ROOT.rglob("spec/**/*.yaml"): for m in re.finditer(r'python_tool:\s*tools/([a-zA-Z0-9_]+)\.py', y.read_text(encoding="utf-8", errors="ignore")): refs.add(m.group(1)) return refs def _load_py_refs() -> set[str]: """tools/*.py 내부에서 tools/*.py 문자열 참조 수집 (self-reference 제외).""" refs: set[str] = set() for py in (ROOT / "tools").glob("*.py"): try: content = py.read_text(encoding="utf-8", errors="ignore") for m in re.finditer(r'tools/([a-zA-Z0-9_]+)\.py', content): found = m.group(1) if found != py.stem: # self-reference 제외 refs.add(found) except Exception: pass return refs def _load_import_refs() -> set[str]: """tools/*.py 및 root/*.py 에서 Python import 기반 참조 수집.""" tool_stems = {p.stem for p in (ROOT / "tools").glob("*.py")} refs: set[str] = set() for py in list((ROOT / "tools").glob("*.py")) + list(ROOT.glob("*.py")): try: content = py.read_text(encoding="utf-8", errors="ignore") for m in re.finditer(r'^from\s+([a-zA-Z0-9_]+)\s+import', content, re.MULTILINE): if m.group(1) in tool_stems: refs.add(m.group(1)) for m in re.finditer(r'^import\s+([a-zA-Z0-9_]+)', content, re.MULTILINE): if m.group(1) in tool_stems: refs.add(m.group(1)) except Exception: pass return refs def _load_path_refs() -> set[str]: """tools/*.py 내에서 "filename.py" (Path 방식) 참조 수집.""" tool_stems = {p.stem for p in (ROOT / "tools").glob("*.py")} refs: set[str] = set() for py in list((ROOT / "tools").glob("*.py")) + list(ROOT.glob("*.py")): try: content = py.read_text(encoding="utf-8", errors="ignore") for m in re.finditer(r'"([a-zA-Z0-9_]+)\.py"', content): stem = m.group(1) if stem in tool_stems and stem != py.stem: refs.add(stem) except Exception: pass return refs def _load_ps1_refs() -> set[str]: """*.ps1 에서 tools/*.py 참조 수집.""" refs: set[str] = set() for ps1 in ROOT.rglob("*.ps1"): try: for m in re.finditer(r'tools/([a-zA-Z0-9_]+)\.py', ps1.read_text(encoding="utf-8", errors="ignore")): refs.add(m.group(1)) except Exception: pass return refs def _all_py_stems() -> list[str]: stems = [] for py in sorted((ROOT / "tools").glob("*.py")): stems.append(py.stem) for py in sorted((ROOT / "Temp").glob("*.py")): stems.append("__Temp__/" + py.stem) return stems # ────────────────────────────────────────────────────────── # 2. Python 파일 분류 # ────────────────────────────────────────────────────────── # 패턴별 분류 _ONETIME_PREFIXES = ("fix_", "update_formula_registry", "append_golden_cases", "rename_data_files") _APPLY_PREFIX = "apply_" # package.json에서 명시적으로 쓰이는 apply_* 는 유지 _APPLY_KEEP = { "apply_engine_upgrade_v4", # validate-engine-v4 "apply_strategy_execution_locks", # apply-strategy-execution-locks "apply_perf_recovery_overrides_v1", "apply_request_result_adoption_v1", } # 자동 탐지에서 누락되지만 명시적으로 유지해야 하는 파일 _MANUAL_KEEP = { "__init__", # Python package init "backfill_eod_replay_history", # data management tool "run_formula_golden_cases_v3", # test runner (v2 + v3 coexist for different coverage) "sync_replay_sheet_to_history", # data management tool "validate_harness_json", # called from harness_coverage_auditor via Path ref "lint_repo_hygiene", # this tool itself } def _categorize_py(stem: str, all_refs: set[str]) -> str: """KEEP / SAFE_DELETE / REVIEW 반환.""" if stem in all_refs: return "KEEP" raw = stem.replace("__Temp__/", "") if raw in _APPLY_KEEP or raw in _MANUAL_KEEP: return "KEEP" if raw.startswith(_APPLY_PREFIX) or any(raw.startswith(p) for p in _ONETIME_PREFIXES): return "SAFE_DELETE" return "REVIEW" # ────────────────────────────────────────────────────────── # 3. YAML 중복·버전 충돌 감사 # ────────────────────────────────────────────────────────── def _audit_yaml() -> list[dict]: issues: list[dict] = [] spec = ROOT / "spec" # 같은 번호 prefix 파일 검출 (ex: 35_foo_v2, 35_foo_v3) numbered: dict[str, list[Path]] = {} for y in sorted(spec.rglob("*.yaml")): m = re.match(r'(\d+[a-z]?)_', y.name) if m: numbered.setdefault(m.group(1), []).append(y) for num, files in numbered.items(): if len(files) > 1: # 같은 숫자 번호를 가진 복수 파일 → 충돌 가능 names = [f.relative_to(ROOT).as_posix() for f in files] issues.append({ "type": "YAML_NUMBER_CONFLICT", "severity": "WARN", "files": names, "note": f"spec prefix '{num}' shared by {len(files)} files - number conflict", }) # 버전 쌍 감지 (foo_v2.yaml + foo_v3.yaml → v2 리뷰 필요) def _yaml_has_refs(yaml_path: Path) -> list[str]: """Python tools 또는 spec YAML에서 이 파일명을 참조하는지 확인.""" name = yaml_path.name found = [] for py in (ROOT / "tools").glob("*.py"): try: if name in py.read_text(encoding="utf-8", errors="ignore"): found.append(py.name) except Exception: pass # YAML-to-YAML cross-reference (spec files, main YAML) for y in list(ROOT.glob("*.yaml")) + list((ROOT / "spec").rglob("*.yaml")): try: if y != yaml_path and name in y.read_text(encoding="utf-8", errors="ignore"): found.append(y.name) except Exception: pass return found # Both versions intentionally maintained with different test scopes _COEXIST_BASES = {"formula_golden_cases"} versioned: dict[str, list[tuple[int, Path]]] = {} for y in sorted(spec.rglob("*.yaml")): m = re.match(r'(.+?)_v(\d+)\.yaml$', y.name) if m: if m.group(1) in _COEXIST_BASES: continue base = (y.parent / m.group(1)).as_posix() versioned.setdefault(base, []).append((int(m.group(2)), y)) for base, vers in versioned.items(): if len(vers) > 1: vers.sort() latest_ver, latest_path = vers[-1] for ver, path in vers[:-1]: py_refs = _yaml_has_refs(path) if py_refs: # 여전히 Python 도구에서 참조 → INFO만 issues.append({ "type": "YAML_SUPERSEDED_VERSION", "severity": "INFO", "file": path.relative_to(ROOT).as_posix(), "superseded_by": latest_path.relative_to(ROOT).as_posix(), "note": f"v{ver} superseded by v{latest_ver} but still referenced by {py_refs[:2]}", }) else: issues.append({ "type": "YAML_SUPERSEDED_VERSION", "severity": "WARN", "file": path.relative_to(ROOT).as_posix(), "superseded_by": latest_path.relative_to(ROOT).as_posix(), "note": f"v{ver} superseded by v{latest_ver} - no Python refs found, review then delete", }) # 원본 + v1 공존 (예: horizon_allocation.yaml + horizon_allocation_v1.yaml) for y in sorted(spec.rglob("*.yaml")): m = re.match(r'(.+?)_v1\.yaml$', y.name) if m: base_name = m.group(1) + ".yaml" base_path = y.parent / base_name if base_path.exists(): issues.append({ "type": "YAML_ORIGINAL_AND_V1", "severity": "INFO", "file": base_path.relative_to(ROOT).as_posix(), "superseded_by": y.relative_to(ROOT).as_posix(), "note": "base + _v1 coexist - review if base is still needed", }) return issues # ────────────────────────────────────────────────────────── # 4. MD 감사 # ────────────────────────────────────────────────────────── def _audit_md() -> list[dict]: issues: list[dict] = [] # spec/ 내 README.md는 전략/리스크 구조 설명용 — 내용 확인 권장 for md in (ROOT / "spec").rglob("README.md"): issues.append({ "type": "MD_REVIEW", "severity": "INFO", "file": md.relative_to(ROOT).as_posix(), "note": "spec README - verify alignment with current YAML structure", }) # prompts/ — 버전 관리 누락 여부 for md in (ROOT / "prompts").glob("*.md"): issues.append({ "type": "MD_PROMPT", "severity": "INFO", "file": md.relative_to(ROOT).as_posix(), "note": "prompt file - verify sync with latest spec", }) return issues # ────────────────────────────────────────────────────────── # 5. 메인 # ────────────────────────────────────────────────────────── def main() -> int: ap = argparse.ArgumentParser(description="Repo hygiene lint") ap.add_argument("--json", default=None, help="결과를 JSON으로 저장") ap.add_argument("--delete-safe", action="store_true", help="SAFE_DELETE 파일을 실제로 삭제") args = ap.parse_args() # 참조 집합 all_refs = ( _load_pkg_refs() | _load_spec_refs() | _load_py_refs() | _load_ps1_refs() | _load_import_refs() | _load_path_refs() ) # Python 파일 분류 py_stems = _all_py_stems() keep, safe_delete, review = [], [], [] for stem in py_stems: raw = stem.replace("__Temp__/", "") cat = _categorize_py(raw, all_refs) if cat == "KEEP": keep.append(stem) elif cat == "SAFE_DELETE": safe_delete.append(stem) else: review.append(stem) # YAML 감사 yaml_issues = _audit_yaml() # MD 감사 md_issues = _audit_md() # ── 출력 ── print(f"\n{'='*60}") print(f" REPO HYGIENE REPORT - {ROOT.name}") print(f"{'='*60}") print(f"\n[Python] KEEP={len(keep)} SAFE_DELETE={len(safe_delete)} REVIEW={len(review)}") if safe_delete: print("\n>> SAFE_DELETE (1-time fix/apply - delete recommended):") for f in safe_delete: print(f" {f}") if review: print("\n>> REVIEW (unreferenced - verify then delete):") for f in review: print(f" {f}") if yaml_issues: print(f"\n[YAML] {len(yaml_issues)} issues found:") for iss in yaml_issues: icon = "!" if iss["severity"] == "WARN" else "i" print(f" {icon} [{iss['type']}] {iss.get('file', iss.get('files', ''))}") print(f" -> {iss['note']}") if md_issues: print(f"\n[MD] {len(md_issues)} files for review:") for iss in md_issues: print(f" i {iss['file']} - {iss['note']}") # delete safe files deleted = [] if args.delete_safe and safe_delete: print("\n[--delete-safe] Deleting safe files...") for stem in safe_delete: if stem.startswith("__Temp__/"): path = ROOT / "Temp" / (stem.replace("__Temp__/", "") + ".py") else: path = ROOT / "tools" / (stem + ".py") if path.exists(): path.unlink() deleted.append(path.relative_to(ROOT).as_posix()) print(f" deleted: {path.relative_to(ROOT).as_posix()}") # JSON output result = { "python_keep_count": len(keep), "python_safe_delete_count": len(safe_delete), "python_review_count": len(review), "python_safe_delete": safe_delete, "python_review": review, "yaml_issues": yaml_issues, "md_issues": md_issues, "deleted": deleted, "gate": "PASS" if not safe_delete and not review else "WARN", } if args.json: out = Path(args.json) out.parent.mkdir(parents=True, exist_ok=True) out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") print(f"\nSaved: {args.json}") print(f"\n{'='*60}") print(f"gate={result['gate']} py_delete={len(safe_delete)} py_review={len(review)} yaml_issues={len(yaml_issues)}") return 0 if result["gate"] == "PASS" else 1 if __name__ == "__main__": raise SystemExit(main())