from __future__ import annotations import json import re from pathlib import Path from typing import Any, Iterable import yaml ROOT = Path(__file__).resolve().parents[2] def load_json(path: Path) -> dict[str, Any]: if not path.exists(): return {} try: obj = json.loads(path.read_text(encoding="utf-8")) except Exception: return {} return obj if isinstance(obj, dict) else {} def load_yaml(path: Path) -> dict[str, Any]: if not path.exists(): return {} try: obj = yaml.safe_load(path.read_text(encoding="utf-8")) except Exception: return {} return obj if isinstance(obj, dict) else {} def read_text(path: Path) -> str: if not path.exists(): return "" return path.read_text(encoding="utf-8", errors="ignore") def iter_files(*patterns: str) -> list[Path]: files: list[Path] = [] for pattern in patterns: files.extend([p for p in ROOT.glob(pattern) if p.is_file()]) return files def collect_all_files() -> list[Path]: return [p for p in ROOT.rglob("*") if p.is_file()] def collect_temp_files() -> list[Path]: temp = ROOT / "Temp" return [p for p in temp.rglob("*") if p.is_file()] if temp.exists() else [] def collect_gas_files() -> list[Path]: root_files = [ROOT / n for n in ("gas_apex_alpha_watch.gs", "gas_apex_runtime_core.gs", "gas_data_collect.gs", "gas_data_feed.gs", "gas_harness_rows.gs", "gas_lib.gs", "gas_report.gs") if (ROOT / n).exists()] adapter_parts_dir = ROOT / "src" / "gas_adapter_parts" adapter_files = sorted(adapter_parts_dir.glob("*.gs")) if adapter_parts_dir.exists() else [] return root_files + adapter_files def collect_prompt_files() -> list[Path]: prompt_root = ROOT / "prompts" if not prompt_root.exists(): return [] return [p for p in prompt_root.rglob("*.md") if p.is_file()] def collect_tool_files() -> list[Path]: tool_root = ROOT / "tools" return [p for p in tool_root.rglob("*.py") if p.is_file()] if tool_root.exists() else [] def collect_spec_files() -> list[Path]: spec_root = ROOT / "spec" return [p for p in spec_root.rglob("*.yaml") if p.is_file()] if spec_root.exists() else [] def load_formula_registry() -> dict[str, Any]: payload = load_yaml(ROOT / "spec" / "13_formula_registry.yaml") return ((payload.get("formula_registry") or {}).get("formulas")) or {} def extract_formula_ids() -> list[str]: return [str(fid) for fid in load_formula_registry().keys()] def extract_formula_outputs() -> dict[str, list[str]]: outputs: dict[str, list[str]] = {} formulas = load_formula_registry() for fid, row in formulas.items(): out_fields: list[str] = [] output = row.get("output") if isinstance(output, dict): if isinstance(output.get("field"), str): out_fields.append(output["field"]) if isinstance(output.get("fields"), list): for item in output["fields"]: if isinstance(item, str): out_fields.append(item) elif isinstance(item, dict): if isinstance(item.get("field"), str): out_fields.append(item["field"]) elif isinstance(item.get("name"), str): out_fields.append(item["name"]) if isinstance(output.get("fields"), dict): out_fields.extend([str(k) for k in output["fields"].keys() if isinstance(k, str)]) for key in output.keys(): if key not in {"field", "fields", "note", "notes"} and isinstance(key, str): out_fields.append(key) if isinstance(row.get("expected_outputs"), list): out_fields.extend([str(x) for x in row["expected_outputs"] if isinstance(x, str)]) outputs[str(fid)] = sorted(set(out_fields)) return outputs def find_numbers(text: str) -> list[str]: return re.findall(r"(? dict[str, list[str]]: result: dict[str, list[str]] = {} for path in paths: text = read_text(path) hits = [term for term in terms if term in text] if hits: result[str(path)] = hits return result