from __future__ import annotations import argparse import json from pathlib import Path from typing import Any ROOT = Path(__file__).resolve().parents[1] DEFAULT_JSON = ROOT / "GatherTradingData.json" DEFAULT_DI = ROOT / "Temp" / "data_integrity_score_v1.json" DEFAULT_OUT = ROOT / "Temp" / "data_quality_reconciliation_v1.json" DEFAULT_FUND_RAW = ROOT / "Temp" / "fundamental_raw_v1.json" DEFAULT_FUND_MF3 = ROOT / "Temp" / "fundamental_multifactor_v3.json" DEFAULT_LLM_FREEDOM = ROOT / "Temp" / "llm_freedom_v1.json" DEFAULT_COVERAGE = ROOT / "Temp" / "harness_coverage_audit.json" def _load_json(path: Path) -> dict[str, Any]: if not path.exists(): return {} try: obj = json.loads(path.read_text(encoding="utf-8")) except Exception: return {} return obj if isinstance(obj, dict) else {} def _as_float(value: Any, default: float = 0.0) -> float: try: return float(value) except Exception: return default def _extract_harness_root(payload: dict[str, Any]) -> dict[str, Any]: h_apex = payload.get("hApex") data_apex = ((payload.get("data") or {}).get("_harness_context")) if isinstance(payload.get("data"), dict) else None if isinstance(h_apex, dict) and isinstance(data_apex, dict): merged = dict(data_apex) merged.update(h_apex) return merged if isinstance(h_apex, dict): return h_apex if isinstance(data_apex, dict): return data_apex return payload def main() -> int: ap = argparse.ArgumentParser() ap.add_argument("--json", default=str(DEFAULT_JSON)) ap.add_argument("--integrity", default=str(DEFAULT_DI)) ap.add_argument("--out", default=str(DEFAULT_OUT)) args = ap.parse_args() json_path = Path(args.json) if not json_path.is_absolute(): json_path = ROOT / json_path integrity_path = Path(args.integrity) if not integrity_path.is_absolute(): integrity_path = ROOT / integrity_path out_path = Path(args.out) if not out_path.is_absolute(): out_path = ROOT / out_path data = _load_json(json_path) integrity = _load_json(integrity_path) fund_raw = _load_json(DEFAULT_FUND_RAW) fund_mf3 = _load_json(DEFAULT_FUND_MF3) llm_freedom = _load_json(DEFAULT_LLM_FREEDOM) coverage = _load_json(DEFAULT_COVERAGE) apex = _extract_harness_root(data) di_score = _as_float(integrity.get("score"), _as_float(integrity.get("data_integrity_score"))) dqg = apex.get("data_quality_gate_v2_json") or {} if isinstance(dqg, str): try: dqg = json.loads(dqg) except Exception: dqg = {} # [R2-1b] Python authoritative DQG-V2 우선 사용 — GAS 원본은 필드경로 버그로 # 실재 데이터를 0으로 까는 false-negative가 있다. py 재산출값이 있으면 그것을 신뢰. dqg_py_path = ROOT / "Temp" / "data_quality_gate_v2_py.json" dqg_py = _load_json(dqg_py_path) if dqg_py.get("formula_id") == "DATA_QUALITY_GATE_V2_PY": legacy_completeness_pct = _as_float(dqg_py.get("overall_completeness_pct")) completeness_grade = str(dqg_py.get("completeness_grade") or "MISSING") else: legacy_completeness_pct = _as_float( (dqg if isinstance(dqg, dict) else {}).get( "overall_completeness_pct", (dqg if isinstance(dqg, dict) else {}).get("completeness_pct"), ) ) completeness_grade = str((dqg if isinstance(dqg, dict) else {}).get("completeness_grade") or "MISSING") # Modern quality composition based on deterministic artifacts. fund_raw_cov = _as_float(fund_raw.get("coverage_pct")) fund_mf3_gate = str(fund_mf3.get("gate") or "FAIL") fund_mf3_diverse = bool(fund_mf3.get("grade_diverse")) llm_freedom_pct = _as_float(llm_freedom.get("llm_freedom_pct"), 100.0) cov_effective = _as_float(coverage.get("effective_coverage_pct")) fund_mf3_score = 0.0 if fund_mf3_gate in ("PASS", "CAUTION"): fund_mf3_score = 100.0 if fund_mf3_diverse else 70.0 llm_score = max(0.0, 100.0 - llm_freedom_pct) modern_completeness_pct = round( (di_score * 0.30) + (fund_raw_cov * 0.25) + (fund_mf3_score * 0.20) + (llm_score * 0.15) + (cov_effective * 0.10), 2, ) completeness_pct = max(legacy_completeness_pct, modern_completeness_pct) # 정공법: 블렌드/마스킹 금지. 실데이터 기반 min() 산출. # legacy=GAS raw field presence, modern=harness artifact quality. # 두 값의 min이 실질 신뢰 상한. 수치를 인위적으로 끌어올리면 거짓. confidence_cap_basis_score = round( min( legacy_completeness_pct or completeness_pct, modern_completeness_pct or completeness_pct, ), 2, ) quality_gap_pct = round(max(0.0, modern_completeness_pct - confidence_cap_basis_score), 2) quality_conflict_flag = bool(di_score >= 95.0 and completeness_pct < 50.0) quality_conflict_reason = ( "SCHEMA_PRESENCE_HIGH_BUT_INVESTMENT_QUALITY_LOW" if quality_conflict_flag else "NONE" ) result = { "formula_id": "DATA_QUALITY_RECONCILIATION_V1", "schema_presence_score": di_score, "investment_quality_score": completeness_pct, "investment_quality_grade": completeness_grade, "legacy_investment_quality_score": legacy_completeness_pct, "modern_investment_quality_score": modern_completeness_pct, "confidence_cap_basis_score": confidence_cap_basis_score, "quality_gap_pct": quality_gap_pct, "component_scores": { "schema_presence_score": di_score, "fundamental_raw_coverage_pct": fund_raw_cov, "fundamental_multifactor_score": fund_mf3_score, "llm_grounding_score": llm_score, "formula_runtime_coverage_pct": cov_effective, }, "quality_conflict_flag": quality_conflict_flag, "quality_conflict_reason": quality_conflict_reason, "gate": "CONFLICT" if quality_conflict_flag else "PASS", } out_path.parent.mkdir(parents=True, exist_ok=True) out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") print("DATA_QUALITY_RECONCILIATION_V1") print(f" schema_presence_score: {di_score:.2f}") print(f" investment_quality_score: {completeness_pct:.2f}") print(f" confidence_cap_basis_score: {confidence_cap_basis_score:.2f}") print(f" quality_conflict_flag: {quality_conflict_flag}") print(f" gate: {result['gate']}") return 0 if __name__ == "__main__": raise SystemExit(main())