Files
QuantEngineByItz/tools/build_document_search_index_v1.py

48 lines
1.5 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import json
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
OUT = ROOT / "Temp" / "document_search_index_v1.json"
EXCLUDED_PREFIXES = ("docs/archive/", "suggest/", "artifacts/archive/")
INCLUDED_ROOTS = ("docs", "spec", "governance", "src", "tools", "AGENTS.md", "README.md")
def _is_excluded(rel: str) -> bool:
return rel.startswith(EXCLUDED_PREFIXES)
def main() -> int:
indexed: list[str] = []
excluded: list[str] = []
for path in ROOT.rglob("*"):
if not path.is_file():
continue
rel = path.relative_to(ROOT).as_posix()
if _is_excluded(rel):
excluded.append(rel)
continue
if rel.startswith("docs/") or rel.startswith("spec/") or rel.startswith("governance/") or rel.startswith("src/") or rel.startswith("tools/") or rel in {"AGENTS.md", "README.md"}:
indexed.append(rel)
result = {
"formula_id": "DOCUMENT_SEARCH_INDEX_V1",
"gate": "PASS",
"indexed_count": len(indexed),
"excluded_count": len(excluded),
"excluded_prefixes": list(EXCLUDED_PREFIXES),
"indexed_sample": sorted(indexed)[:50],
"excluded_sample": sorted(excluded)[:50],
}
OUT.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
print(json.dumps(result, ensure_ascii=False, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())