#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import re
import yaml
from pathlib import Path

ROOT = Path(__file__).resolve().parents[1]

# Base whitelist of allowed Temp files for runtime read
BASE_WHITELIST = {
    "Temp/final_decision_packet_active.json",
    "Temp/operational_report.json",
    "Temp/operational_report.md",
    "Temp/number_provenance_ledger_v4.json",
    "Temp/final_context_for_llm_v5.yaml",
    "Temp/final_context_for_llm_v4.yaml",
    "Temp/live_replay_separation_v2.json",
    "Temp/live_replay_separation_v3.json",
    "Temp/shadow_ledger_v2.json",
    "Temp/late_chase_attribution_v2.json",
    "Temp/value_preservation_scorer_v2.json",
    "Temp/engine_health_card_v1.json",
    "Temp/operating_cadence_signal_v1.json",
    "Temp/change_request_audit_v1.json",
    "Temp/low_capability_llm_regression_v1.json",
    "Temp/report_numeric_consistency_guard_v2.json",
    "Temp/release_dag_run_v3.json",
    "Temp/release_dag_run_v2.json",
    "Temp/release_dag_run_v1.json",
    "Temp/runtime_source_whitelist_audit_v1.json",
}

EXEMPT_FILES = {
    "tools/clean_temp_artifacts_v1.py",
    "tools/lint_repo_hygiene.py",
    "src/quant_engine/refactor_master_helpers.py",
    "tools/audit_repository_entropy_v2.py",
    "tools/audit_repository_entropy_v1.py",
    "tools/sync_active_manifest_with_canonical_v1.py",
}

def main() -> int:
    ap = argparse.ArgumentParser()
    ap.add_argument("--manifest", default="runtime/active_artifact_manifest.yaml")
    ap.add_argument("--scan", nargs="+", default=["src", "tools"])
    args = ap.parse_args()

    manifest_path = ROOT / args.manifest
    whitelist = set(BASE_WHITELIST)

    if manifest_path.exists():
        try:
            manifest = yaml.safe_load(manifest_path.read_text(encoding="utf-8"))
            active_aliases = manifest.get("active_aliases", {})
            for val in active_aliases.values():
                whitelist.add(val.replace("\\", "/"))
        except Exception as e:
            print(f"Warning: Failed to load manifest: {e}")

    # Convert whitelist elements to standard format
    whitelist = {w.lower() for w in whitelist}

    violations = []
    deprecated_reads = 0
    archive_reads = 0

    # Glob search patterns in scan directories
    scan_paths = []
    for s in args.scan:
        path = ROOT / s
        if path.is_file():
            scan_paths.append(path)
        elif path.is_dir():
            scan_paths.extend(path.rglob("*"))

    # Also check gas_*.gs files in ROOT
    for f in ROOT.glob("gas_*.gs"):
        scan_paths.append(f)

    # Exclude directories like __pycache__ and this script itself
    this_file = Path(__file__).resolve()
    
    for p in scan_paths:
        if not p.is_file():
            continue
        if p.resolve() == this_file:
            continue
        if "__pycache__" in p.parts or ".git" in p.parts or ".claude" in p.parts:
            continue
        if p.suffix not in (".py", ".gs", ".js"):
            continue

        rel_path = str(p.relative_to(ROOT)).replace("\\", "/")
        if rel_path in EXEMPT_FILES:
            continue

        # Skip check for build, validate, run, render, audit scripts in tools using relative path
        path_parts = Path(rel_path).parts
        if path_parts and path_parts[0] == "tools" and (
            p.name.startswith("build_") or 
            p.name.startswith("validate_") or 
            p.name.startswith("run_") or 
            p.name.startswith("render_") or
            p.name.startswith("emit_") or
            p.name.startswith("clean_") or
            p.name.startswith("lint_") or
            p.name.startswith("audit_")
        ):
            continue

        try:
            content = p.read_text(encoding="utf-8")
        except Exception:
            # Skip unreadable files
            continue

        # Look for globbing Temp
        if "glob" in content.lower() and "temp" in content.lower():
            if re.search(r"glob.*\btemp\b", content, re.IGNORECASE) or re.search(r"\btemp\b.*glob", content, re.IGNORECASE):
                violations.append({
                    "file": rel_path,
                    "line": 0,
                    "reason": "Direct globbing of Temp/ directory is forbidden."
                })

        for lineno, line in enumerate(content.splitlines(), start=1):
            line_lower = line.lower()
            
            # Check for archive path reads
            if "archive/" in line_lower or "archive\\" in line_lower:
                if "read_for_audit_only" not in line:
                    violations.append({
                        "file": rel_path,
                        "line": lineno,
                        "reason": "Access to archive/ directory is only allowed if annotated with '# read_for_audit_only'."
                    })
                    archive_reads += 1

            # Check for Temp/ reads
            matches = re.findall(r"['\"](temp[/\\][^'\"]+)['\"]", line, re.IGNORECASE)
            for m in matches:
                normalized_path = m.replace("\\", "/").lower()
                if normalized_path not in whitelist:
                    is_write = any(w in line_lower for w in ["write", "save", "dump", "output", "open(..., 'w'", "open(..., \"w\""])
                    if not is_write:
                        violations.append({
                            "file": rel_path,
                            "line": lineno,
                            "reason": f"Read access to non-whitelisted Temp file: {m}"
                        })
                        deprecated_reads += 1

    result = {
        "formula_id": "RUNTIME_SOURCE_WHITELIST_AUDIT_V1",
        "deprecated_runtime_read_count": deprecated_reads,
        "archive_runtime_read_count": archive_reads,
        "active_alias_resolution_pct": 100.0 if deprecated_reads == 0 else 0.0,
        "violation_count": len(violations),
        "violations": violations[:100],
        "gate": "PASS" if not violations else "FAIL"
    }

    out_path = ROOT / "Temp" / "runtime_source_whitelist_audit_v1.json"
    out_path.parent.mkdir(parents=True, exist_ok=True)
    out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")

    print(json.dumps(result, ensure_ascii=True, indent=2))
    
    return 0 if not violations else 1

if __name__ == "__main__":
    import sys
    sys.exit(main())