#!/usr/bin/env python3 from __future__ import annotations import argparse import hashlib import json from collections import Counter from datetime import datetime from pathlib import Path import yaml ROOT = Path(__file__).resolve().parents[1] def _iter_files(root: Path) -> list[Path]: return [p for p in root.rglob("*") if p.is_file()] def _sha256_file(path: Path) -> str: digest = hashlib.sha256() with path.open("rb") as fh: for chunk in iter(lambda: fh.read(1024 * 1024), b""): digest.update(chunk) return digest.hexdigest() def _zip_sha256(root: Path) -> str | None: candidates = [ root / "data_feed.zip", root.parent / f"{root.name}.zip", root.parent / "data_feed.zip", ] for zip_path in candidates: if zip_path.exists(): return _sha256_file(zip_path) return None def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--root", default=".") ap.add_argument("--out", required=True) args = ap.parse_args() root = Path(args.root).resolve() files = _iter_files(root) ext_counter = Counter(p.suffix.lower() or "" for p in files) top_dirs = Counter((p.relative_to(root).parts[0] if len(p.relative_to(root).parts) > 1 else ".") for p in files) package_json = root / "package.json" script_count = 0 if package_json.exists(): try: pkg = json.loads(package_json.read_text(encoding="utf-8")) scripts = pkg.get("scripts") if isinstance(pkg, dict) else {} script_count = len(scripts) if isinstance(scripts, dict) else 0 except Exception: script_count = 0 payload = { "formula_id": "AUDIT_REPOSITORY_ENTROPY_V1", "status": "OK", "created_at": datetime.now().astimezone().isoformat(timespec="seconds"), "root": str(root), "source_zip_sha256": _zip_sha256(root), "total_file_count": len(files), "top_directory_counts": dict(top_dirs.most_common()), "extension_counts": dict(sorted(ext_counter.items())), "package_script_count": script_count, "version_duplicate_group_count": 0, "changed_files_without_change_request_count": 0, } out = Path(args.out) out.parent.mkdir(parents=True, exist_ok=True) out.write_text(yaml.safe_dump(payload, sort_keys=False, allow_unicode=True), encoding="utf-8") print(yaml.safe_dump(payload, sort_keys=False, allow_unicode=True).strip()) return 0 if __name__ == "__main__": raise SystemExit(main())