from __future__ import annotations from pathlib import Path import yaml ROOT = Path(__file__).resolve().parents[1] def main() -> int: field_dict_path = ROOT / "spec" / "12_field_dictionary.yaml" if not field_dict_path.exists(): print("Field dictionary not found.") return 1 field_data = yaml.safe_load(field_dict_path.read_text(encoding="utf-8")) or {} fields = field_data.get("field_dictionary", {}).get("fields", {}) # Identify all collisions alias_to_canonicals: dict[str, list[str]] = {} for fid, info in fields.items(): if not info: continue canonical_name = info.get("canonical_name", fid) aliases = info.get("aliases", []) all_names = [canonical_name] + aliases for name in all_names: alias_to_canonicals.setdefault(name, []).append(fid) collisions = {name: sorted(list(set(clist))) for name, clist in alias_to_canonicals.items() if len(set(clist)) > 1} if not collisions: print("No collisions to resolve.") return 0 print(f"Resolving {len(collisions)} alias collisions...") # We iterate and apply resolution rules for name, clist in collisions.items(): # Rule 1: If name matches one of the canonical names exactly, keep it only there exact_match = None for fid in clist: if fields[fid].get("canonical_name") == name: exact_match = fid break if exact_match is not None: # Remove from all other fields' aliases for fid in clist: if fid != exact_match: aliases = fields[fid].get("aliases", []) if name in aliases: aliases.remove(name) fields[fid]["aliases"] = aliases continue # Rule 2: Case-insensitive or close matching # Assign to the field whose canonical name is closest to lowercase of the name target_fid = None lower_name = name.lower() # Check if lowercase maps to a canonical name for fid in clist: if fields[fid].get("canonical_name") == lower_name: target_fid = fid break # Suffix/prefix matching heuristic if target_fid is None: for fid in clist: cname = fields[fid].get("canonical_name", "") if cname in lower_name or lower_name in cname: target_fid = fid break # Fallback: just pick the first one if target_fid is None: target_fid = clist[0] # Keep alias in target_fid, remove from others for fid in clist: if fid != target_fid: aliases = fields[fid].get("aliases", []) if name in aliases: aliases.remove(name) fields[fid]["aliases"] = aliases # Save cleaned fields back field_data["field_dictionary"]["fields"] = fields field_dict_path.write_text(yaml.safe_dump(field_data, sort_keys=False, allow_unicode=True), encoding="utf-8") print("Resolved field alias collisions successfully.") return 0 if __name__ == "__main__": raise SystemExit(main())