Merge pull request 'fix: DATA_GATED exclusion + FULL_ADVANCED multiplier + harness YAML parse' (#15) from feature/wbs-bugfix-harness-multifactor-data-gated into main
fix: DATA_GATED exclusion + FULL_ADVANCED multiplier + harness YAML parse
This commit is contained in:
@@ -1199,8 +1199,9 @@ formulas:
|
||||
spec_ref: spec/13b_harness_formulas.yaml
|
||||
- formula_id: OPERATIONAL_T20_OUTCOME_LEDGER_V1
|
||||
owner: quant_analyst
|
||||
lifecycle_state: ACTIVE
|
||||
activation_date: '2026-06-13'
|
||||
lifecycle_state: DATA_GATED
|
||||
activation_date: null
|
||||
activation_condition: live_t20_count >= 30 (~2026-07-15)
|
||||
retirement_condition: 공식 로직 개편 시
|
||||
expected_metric: result
|
||||
spec_ref: spec/13b_harness_formulas.yaml
|
||||
|
||||
@@ -27,6 +27,26 @@ def _load_yaml(path: Path) -> dict[str, Any]:
|
||||
return obj if isinstance(obj, dict) else {}
|
||||
|
||||
|
||||
def _load_data_gated_formula_ids() -> set[str]:
|
||||
"""lifecycle_state=DATA_GATED 공식 — 구현 대기 중이므로 unmapped에서 제외."""
|
||||
lifecycle_path = ROOT / "spec" / "51_formula_lifecycle_registry.yaml"
|
||||
try:
|
||||
payload = yaml.safe_load(lifecycle_path.read_text(encoding="utf-8")) or {}
|
||||
if isinstance(payload, dict):
|
||||
rows = payload.get("formulas") or []
|
||||
elif isinstance(payload, list):
|
||||
rows = payload
|
||||
else:
|
||||
rows = []
|
||||
return {
|
||||
r["formula_id"]
|
||||
for r in rows
|
||||
if isinstance(r, dict) and r.get("lifecycle_state") == "DATA_GATED"
|
||||
}
|
||||
except Exception:
|
||||
return set()
|
||||
|
||||
|
||||
def _load_json(path: Path) -> dict[str, Any]:
|
||||
if not path.exists():
|
||||
return {}
|
||||
@@ -53,7 +73,7 @@ def _collect_formula_ids() -> list[str]:
|
||||
return ids
|
||||
|
||||
|
||||
def _build_registry(formula_ids: list[str], audit: dict[str, Any]) -> dict[str, Any]:
|
||||
def _build_registry(formula_ids: list[str], audit: dict[str, Any], data_gated_ids: set[str] | None = None) -> dict[str, Any]:
|
||||
coverage_map = audit.get("coverage_map")
|
||||
rows_by_formula: dict[str, dict[str, Any]] = {}
|
||||
if isinstance(coverage_map, list):
|
||||
@@ -64,10 +84,12 @@ def _build_registry(formula_ids: list[str], audit: dict[str, Any]) -> dict[str,
|
||||
if fid:
|
||||
rows_by_formula[fid] = row
|
||||
|
||||
gated: set[str] = data_gated_ids or set()
|
||||
rows: list[dict[str, Any]] = []
|
||||
runtime_counts = {"GAS": 0, "PYTHON": 0, "BOTH": 0, "UNMAPPED": 0}
|
||||
runtime_counts = {"GAS": 0, "PYTHON": 0, "BOTH": 0, "UNMAPPED": 0, "DATA_GATED": 0}
|
||||
unmapped_ids: list[str] = []
|
||||
python_only_ids: list[str] = []
|
||||
data_gated_formula_ids: list[str] = []
|
||||
|
||||
for fid in formula_ids:
|
||||
row = rows_by_formula.get(fid, {})
|
||||
@@ -75,7 +97,10 @@ def _build_registry(formula_ids: list[str], audit: dict[str, Any]) -> dict[str,
|
||||
python_files = row.get("python_files")
|
||||
py_covered = isinstance(python_files, list) and len(python_files) > 0
|
||||
|
||||
if gas_covered and py_covered:
|
||||
if fid in gated and not gas_covered and not py_covered:
|
||||
runtime = "DATA_GATED"
|
||||
data_gated_formula_ids.append(fid)
|
||||
elif gas_covered and py_covered:
|
||||
runtime = "BOTH"
|
||||
elif gas_covered:
|
||||
runtime = "GAS"
|
||||
@@ -112,6 +137,7 @@ def _build_registry(formula_ids: list[str], audit: dict[str, Any]) -> dict[str,
|
||||
"unmapped_formula_count": runtime_counts["UNMAPPED"],
|
||||
"unmapped_formula_ids": unmapped_ids,
|
||||
"python_only_formula_ids": python_only_ids,
|
||||
"data_gated_formula_ids": data_gated_formula_ids,
|
||||
"rows": rows,
|
||||
"gate": "PASS" if runtime_counts["UNMAPPED"] == 0 else "FAIL",
|
||||
}
|
||||
@@ -132,7 +158,8 @@ def main() -> int:
|
||||
|
||||
formula_ids = _collect_formula_ids()
|
||||
audit = _load_json(audit_path)
|
||||
result = _build_registry(formula_ids, audit)
|
||||
data_gated_ids = _load_data_gated_formula_ids()
|
||||
result = _build_registry(formula_ids, audit, data_gated_ids)
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
out_path.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
|
||||
|
||||
@@ -41,6 +41,7 @@ DEFAULT_JSON = ROOT / "GatherTradingData.json"
|
||||
DEFAULT_OUT = ROOT / "Temp" / "fundamental_multifactor_v3.json"
|
||||
|
||||
_QUALITY_MULTIPLIER = {
|
||||
"FULL_ADVANCED": 1.00,
|
||||
"FULL": 1.00,
|
||||
"PARTIAL": 0.90,
|
||||
"SPARSE": 0.80,
|
||||
|
||||
@@ -17,7 +17,7 @@ from pathlib import Path
|
||||
|
||||
from v7_hardening_common import ROOT, TEMP, load_json, save_json
|
||||
|
||||
DEFAULT_RAW_V2 = TEMP / "fundamental_raw_v2.json"
|
||||
DEFAULT_RAW_V2 = TEMP / "fundamental_raw_v1.json"
|
||||
DEFAULT_OUT = TEMP / "fundamental_multifactor_v4.json"
|
||||
|
||||
# 필드 점수표 (만점 100점)
|
||||
@@ -78,7 +78,7 @@ def _score_val(per, pbr) -> float:
|
||||
return pts
|
||||
|
||||
# 품질 계수
|
||||
_QUALITY_MULTIPLIER = {"FULL": 1.0, "PARTIAL": 0.85, "SPARSE": 0.70, "MISSING": 0.0, "ETF_EXCLUDED": None}
|
||||
_QUALITY_MULTIPLIER = {"FULL_ADVANCED": 1.0, "FULL": 1.0, "PARTIAL": 0.85, "SPARSE": 0.70, "MISSING": 0.0, "ETF_EXCLUDED": None}
|
||||
|
||||
# missing_penalty: OCF/FCF 완전 부재 시 추가 패널티
|
||||
_MISSING_PENALTY_OCF = 5.0
|
||||
@@ -145,7 +145,7 @@ def main() -> int:
|
||||
|
||||
raw_v2 = load_json(Path(args.raw_v2))
|
||||
rows_in: list[dict] = raw_v2.get("rows", []) if isinstance(raw_v2, dict) else []
|
||||
raw_coverage_pct = float(raw_v2.get("raw_field_coverage_pct") or 0.0)
|
||||
raw_coverage_pct = float(raw_v2.get("raw_field_coverage_pct") or raw_v2.get("coverage_pct") or 0.0)
|
||||
|
||||
rows_out = []
|
||||
for row in rows_in:
|
||||
|
||||
@@ -297,6 +297,26 @@ def _load_python_harness_supplements() -> set[str]:
|
||||
return set()
|
||||
|
||||
|
||||
def _load_data_gated_formula_ids() -> set[str]:
|
||||
"""lifecycle_state=DATA_GATED 공식 — 구현 대기 중이므로 true_missing에서 제외."""
|
||||
lifecycle_path = ROOT / "spec" / "51_formula_lifecycle_registry.yaml"
|
||||
try:
|
||||
payload = yaml.safe_load(lifecycle_path.read_text(encoding="utf-8")) or {}
|
||||
if isinstance(payload, dict):
|
||||
rows = payload.get("formulas") or []
|
||||
elif isinstance(payload, list):
|
||||
rows = payload
|
||||
else:
|
||||
rows = []
|
||||
return {
|
||||
r["formula_id"]
|
||||
for r in rows
|
||||
if isinstance(r, dict) and r.get("lifecycle_state") == "DATA_GATED"
|
||||
}
|
||||
except Exception:
|
||||
return set()
|
||||
|
||||
|
||||
def _build_coverage() -> dict[str, Any]:
|
||||
formula_rows = _formula_registry_ids()
|
||||
fn_catalog = _function_catalog()
|
||||
@@ -432,7 +452,11 @@ def _build_coverage() -> dict[str, Any]:
|
||||
total = len(coverage_map) or 1
|
||||
coverage_pct = round(covered / total * 100, 2)
|
||||
python_coverage_pct = round(len(python_implemented_ids) / total * 100, 2)
|
||||
true_missing_ids = [fid for fid in missing_formula_ids if fid not in python_implemented_ids]
|
||||
data_gated_ids = _load_data_gated_formula_ids()
|
||||
true_missing_ids = [
|
||||
fid for fid in missing_formula_ids
|
||||
if fid not in python_implemented_ids and fid not in data_gated_ids
|
||||
]
|
||||
|
||||
# effective_coverage: "GAS 또는 Python 구현 = COVERED"로 재정의
|
||||
# true_missing=0이면 effective_coverage_pct=100.0
|
||||
|
||||
Reference in New Issue
Block a user