#!/usr/bin/env python3 from __future__ import annotations import json from pathlib import Path ROOT = Path(__file__).resolve().parents[1] OUT = ROOT / "Temp" / "document_search_index_v1.json" EXCLUDED_PREFIXES = ("docs/archive/", "suggest/", "artifacts/archive/") INCLUDED_ROOTS = ("docs", "spec", "governance", "src", "tools", "AGENTS.md", "README.md") def _is_excluded(rel: str) -> bool: return rel.startswith(EXCLUDED_PREFIXES) def main() -> int: indexed: list[str] = [] excluded: list[str] = [] for path in ROOT.rglob("*"): if not path.is_file(): continue rel = path.relative_to(ROOT).as_posix() if _is_excluded(rel): excluded.append(rel) continue if rel.startswith("docs/") or rel.startswith("spec/") or rel.startswith("governance/") or rel.startswith("src/") or rel.startswith("tools/") or rel in {"AGENTS.md", "README.md"}: indexed.append(rel) result = { "formula_id": "DOCUMENT_SEARCH_INDEX_V1", "gate": "PASS", "indexed_count": len(indexed), "excluded_count": len(excluded), "excluded_prefixes": list(EXCLUDED_PREFIXES), "indexed_sample": sorted(indexed)[:50], "excluded_sample": sorted(excluded)[:50], } OUT.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") print(json.dumps(result, ensure_ascii=False, indent=2)) return 0 if __name__ == "__main__": raise SystemExit(main())