Files
QuantEngineByItz/tools/build_fundamental_raw_v2.py
T
kjh2064 ee3e799de1 feat: 리밸런싱 엔진 V1 + GAS 버그 수정 (2026-06-13)
주요 변경:
- tools/build_rebalance_engine_v1.py: REBALANCE_ENGINE_V1 신규
  * account_snapshot 직접 합산(_build_snap_position_map) → 소수주 분리 행 병합
  * 레짐 소스 macro.REGIME_PRELIM 최우선 (GAS 와 동일)
- src/gas_adapter_parts/gdf_06_rebalance.gs: runRebalanceSheet_() 신규
  * Logger.log / getSpreadsheet_() 로 run_all 연동 수정
- src/gas_adapter_parts/gdc_01_fetch_fundamentals.gs
  * _mergePositionRecord_(): 소수주 중복 행 합산 신규
  * parseInt → parseFloat (qty, availQty)
- src/gas_adapter_parts/gdf_01_price_metrics.gs
  * 미보유 종목 SELL_READY → WATCH_EXIT_SIGNAL
- spec/41_release_dag.yaml: build_rebalance_sheet 노드 추가 (step_count 63)
- spec/51_formula_lifecycle_registry.yaml: REBALANCE_ENGINE_V1 등록

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-13 13:20:14 +09:00

123 lines
4.5 KiB
Python

"""build_fundamental_raw_v2.py — FUNDAMENTAL_RAW_V2
P1-011: fundamental_raw_v1의 data_quality=FULL 레이블이 OCF/FCF 부재를 숨기는 문제 해소.
- 필드 단위 coverage 산출 (ticker 단위 아님)
- OCF/FCF 없으면 FULL이 아닌 PARTIAL
- engine_audit(61.6) vs data_quality(100) 충돌 근거 명시
"""
from __future__ import annotations
import argparse
import json
from datetime import datetime, timezone
from pathlib import Path
from v7_hardening_common import ROOT, TEMP, load_json, save_json
DEFAULT_RAW_V1 = TEMP / "fundamental_raw_v1.json"
DEFAULT_OUT = TEMP / "fundamental_raw_v2.json"
# 필드 가중치 (multifactor_v4와 동일)
FIELD_WEIGHTS = {
"roe_pct": 25,
"opm_pct": 20,
"ocf_krw": 15, # OCF/FCF 합산 30점 중 반
"fcf_krw": 15,
"net_debt_krw": 10,
"per": 8,
"pbr": 7,
}
TOTAL_WEIGHT = sum(FIELD_WEIGHTS.values()) # = 100
# FULL 판정: ROE/OPM + 밸류에이션 + (OCF OR FCF) 중 하나라도 있어야 함
def _reclassify_data_quality(row: dict) -> str:
if row.get("data_quality") == "ETF_EXCLUDED":
return "ETF_EXCLUDED"
has_core = (row.get("roe_pct") is not None and row.get("opm_pct") is not None)
has_val = (row.get("per") is not None or row.get("pbr") is not None)
has_cf = (row.get("ocf_krw") is not None or row.get("fcf_krw") is not None)
if has_core and has_val and has_cf:
return "FULL"
if has_core and has_val:
return "PARTIAL" # OCF/FCF 없음
if has_core:
return "SPARSE"
return "MISSING"
def _field_coverage(rows: list[dict]) -> dict[str, float]:
non_etf = [r for r in rows if r.get("data_quality") != "ETF_EXCLUDED"]
if not non_etf:
return {}
return {
field: round(sum(1 for r in non_etf if r.get(field) is not None) / len(non_etf) * 100.0, 2)
for field in FIELD_WEIGHTS
}
def _weighted_coverage(field_cov: dict[str, float]) -> float:
total_w = 0.0
covered_w = 0.0
for field, weight in FIELD_WEIGHTS.items():
total_w += weight
covered_w += weight * (field_cov.get(field, 0.0) / 100.0)
return round(covered_w / total_w * 100.0, 2) if total_w else 0.0
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--raw-v1", default=str(DEFAULT_RAW_V1))
ap.add_argument("--out", default=str(DEFAULT_OUT))
args = ap.parse_args()
raw_v1 = load_json(Path(args.raw_v1))
rows_in: list[dict] = raw_v1.get("rows", []) if isinstance(raw_v1, dict) else []
rows_out = []
for row in rows_in:
r = dict(row)
r["data_quality_v1"] = row.get("data_quality") # 이전 레이블 보존
r["data_quality"] = _reclassify_data_quality(row)
# 각 필드 실측 여부 기록
r["field_coverage"] = {
f: (row.get(f) is not None)
for f in FIELD_WEIGHTS
}
rows_out.append(r)
field_cov = _field_coverage(rows_out)
weighted_cov = _weighted_coverage(field_cov)
non_etf = [r for r in rows_out if r.get("data_quality") != "ETF_EXCLUDED"]
from collections import Counter
dq_counts = Counter(r["data_quality"] for r in rows_out)
result = {
"formula_id": "FUNDAMENTAL_RAW_V2",
"generated_at": datetime.now(timezone.utc).isoformat(),
"ticker_count": len(rows_out),
"non_etf_count": len(non_etf),
# coverage 지표
"raw_field_coverage_pct": weighted_cov,
"field_coverage_pct": field_cov,
"data_quality_counts": dict(dq_counts),
# 충돌 근거 (engine_audit vs data_quality)
"conflict_note": (
"engine_audit가 낮은 fundamental_score를 보고하는 이유: "
"OCF/FCF 0% 커버리지로 인해 가중 커버리지가 낮음. "
"data_quality의 schema_presence_score=100은 필드 존재 여부만 확인."
),
"v1_label_issue": (
f"v1 data_quality=FULL {dq_counts.get('FULL',0)+len([r for r in rows_out if r.get('data_quality_v1')=='FULL' and r['data_quality']=='PARTIAL'])}건 중 "
f"{len([r for r in rows_out if r.get('data_quality_v1')=='FULL' and r['data_quality']=='PARTIAL'])}건이 "
"OCF/FCF 부재로 실제 PARTIAL → 수정됨"
),
"rows": rows_out,
}
save_json(args.out, result)
print(json.dumps({k: v for k, v in result.items() if k != "rows"}, ensure_ascii=False, indent=2))
return 0
if __name__ == "__main__":
raise SystemExit(main())