ee3e799de1
주요 변경: - tools/build_rebalance_engine_v1.py: REBALANCE_ENGINE_V1 신규 * account_snapshot 직접 합산(_build_snap_position_map) → 소수주 분리 행 병합 * 레짐 소스 macro.REGIME_PRELIM 최우선 (GAS 와 동일) - src/gas_adapter_parts/gdf_06_rebalance.gs: runRebalanceSheet_() 신규 * Logger.log / getSpreadsheet_() 로 run_all 연동 수정 - src/gas_adapter_parts/gdc_01_fetch_fundamentals.gs * _mergePositionRecord_(): 소수주 중복 행 합산 신규 * parseInt → parseFloat (qty, availQty) - src/gas_adapter_parts/gdf_01_price_metrics.gs * 미보유 종목 SELL_READY → WATCH_EXIT_SIGNAL - spec/41_release_dag.yaml: build_rebalance_sheet 노드 추가 (step_count 63) - spec/51_formula_lifecycle_registry.yaml: REBALANCE_ENGINE_V1 등록 Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
123 lines
4.5 KiB
Python
123 lines
4.5 KiB
Python
"""build_fundamental_raw_v2.py — FUNDAMENTAL_RAW_V2
|
|
|
|
P1-011: fundamental_raw_v1의 data_quality=FULL 레이블이 OCF/FCF 부재를 숨기는 문제 해소.
|
|
- 필드 단위 coverage 산출 (ticker 단위 아님)
|
|
- OCF/FCF 없으면 FULL이 아닌 PARTIAL
|
|
- engine_audit(61.6) vs data_quality(100) 충돌 근거 명시
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import argparse
|
|
import json
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
from v7_hardening_common import ROOT, TEMP, load_json, save_json
|
|
|
|
DEFAULT_RAW_V1 = TEMP / "fundamental_raw_v1.json"
|
|
DEFAULT_OUT = TEMP / "fundamental_raw_v2.json"
|
|
|
|
# 필드 가중치 (multifactor_v4와 동일)
|
|
FIELD_WEIGHTS = {
|
|
"roe_pct": 25,
|
|
"opm_pct": 20,
|
|
"ocf_krw": 15, # OCF/FCF 합산 30점 중 반
|
|
"fcf_krw": 15,
|
|
"net_debt_krw": 10,
|
|
"per": 8,
|
|
"pbr": 7,
|
|
}
|
|
TOTAL_WEIGHT = sum(FIELD_WEIGHTS.values()) # = 100
|
|
|
|
# FULL 판정: ROE/OPM + 밸류에이션 + (OCF OR FCF) 중 하나라도 있어야 함
|
|
def _reclassify_data_quality(row: dict) -> str:
|
|
if row.get("data_quality") == "ETF_EXCLUDED":
|
|
return "ETF_EXCLUDED"
|
|
has_core = (row.get("roe_pct") is not None and row.get("opm_pct") is not None)
|
|
has_val = (row.get("per") is not None or row.get("pbr") is not None)
|
|
has_cf = (row.get("ocf_krw") is not None or row.get("fcf_krw") is not None)
|
|
if has_core and has_val and has_cf:
|
|
return "FULL"
|
|
if has_core and has_val:
|
|
return "PARTIAL" # OCF/FCF 없음
|
|
if has_core:
|
|
return "SPARSE"
|
|
return "MISSING"
|
|
|
|
|
|
def _field_coverage(rows: list[dict]) -> dict[str, float]:
|
|
non_etf = [r for r in rows if r.get("data_quality") != "ETF_EXCLUDED"]
|
|
if not non_etf:
|
|
return {}
|
|
return {
|
|
field: round(sum(1 for r in non_etf if r.get(field) is not None) / len(non_etf) * 100.0, 2)
|
|
for field in FIELD_WEIGHTS
|
|
}
|
|
|
|
|
|
def _weighted_coverage(field_cov: dict[str, float]) -> float:
|
|
total_w = 0.0
|
|
covered_w = 0.0
|
|
for field, weight in FIELD_WEIGHTS.items():
|
|
total_w += weight
|
|
covered_w += weight * (field_cov.get(field, 0.0) / 100.0)
|
|
return round(covered_w / total_w * 100.0, 2) if total_w else 0.0
|
|
|
|
|
|
def main() -> int:
|
|
ap = argparse.ArgumentParser()
|
|
ap.add_argument("--raw-v1", default=str(DEFAULT_RAW_V1))
|
|
ap.add_argument("--out", default=str(DEFAULT_OUT))
|
|
args = ap.parse_args()
|
|
|
|
raw_v1 = load_json(Path(args.raw_v1))
|
|
rows_in: list[dict] = raw_v1.get("rows", []) if isinstance(raw_v1, dict) else []
|
|
|
|
rows_out = []
|
|
for row in rows_in:
|
|
r = dict(row)
|
|
r["data_quality_v1"] = row.get("data_quality") # 이전 레이블 보존
|
|
r["data_quality"] = _reclassify_data_quality(row)
|
|
# 각 필드 실측 여부 기록
|
|
r["field_coverage"] = {
|
|
f: (row.get(f) is not None)
|
|
for f in FIELD_WEIGHTS
|
|
}
|
|
rows_out.append(r)
|
|
|
|
field_cov = _field_coverage(rows_out)
|
|
weighted_cov = _weighted_coverage(field_cov)
|
|
non_etf = [r for r in rows_out if r.get("data_quality") != "ETF_EXCLUDED"]
|
|
from collections import Counter
|
|
dq_counts = Counter(r["data_quality"] for r in rows_out)
|
|
|
|
result = {
|
|
"formula_id": "FUNDAMENTAL_RAW_V2",
|
|
"generated_at": datetime.now(timezone.utc).isoformat(),
|
|
"ticker_count": len(rows_out),
|
|
"non_etf_count": len(non_etf),
|
|
# coverage 지표
|
|
"raw_field_coverage_pct": weighted_cov,
|
|
"field_coverage_pct": field_cov,
|
|
"data_quality_counts": dict(dq_counts),
|
|
# 충돌 근거 (engine_audit vs data_quality)
|
|
"conflict_note": (
|
|
"engine_audit가 낮은 fundamental_score를 보고하는 이유: "
|
|
"OCF/FCF 0% 커버리지로 인해 가중 커버리지가 낮음. "
|
|
"data_quality의 schema_presence_score=100은 필드 존재 여부만 확인."
|
|
),
|
|
"v1_label_issue": (
|
|
f"v1 data_quality=FULL {dq_counts.get('FULL',0)+len([r for r in rows_out if r.get('data_quality_v1')=='FULL' and r['data_quality']=='PARTIAL'])}건 중 "
|
|
f"{len([r for r in rows_out if r.get('data_quality_v1')=='FULL' and r['data_quality']=='PARTIAL'])}건이 "
|
|
"OCF/FCF 부재로 실제 PARTIAL → 수정됨"
|
|
),
|
|
"rows": rows_out,
|
|
}
|
|
save_json(args.out, result)
|
|
print(json.dumps({k: v for k, v in result.items() if k != "rows"}, ensure_ascii=False, indent=2))
|
|
return 0
|
|
|
|
|
|
if __name__ == "__main__":
|
|
raise SystemExit(main())
|