Files
QuantEngineByItz/src/quant_engine/import_etf_nav_manual.py
T
kjh2064 ee3e799de1 feat: 리밸런싱 엔진 V1 + GAS 버그 수정 (2026-06-13)
주요 변경:
- tools/build_rebalance_engine_v1.py: REBALANCE_ENGINE_V1 신규
  * account_snapshot 직접 합산(_build_snap_position_map) → 소수주 분리 행 병합
  * 레짐 소스 macro.REGIME_PRELIM 최우선 (GAS 와 동일)
- src/gas_adapter_parts/gdf_06_rebalance.gs: runRebalanceSheet_() 신규
  * Logger.log / getSpreadsheet_() 로 run_all 연동 수정
- src/gas_adapter_parts/gdc_01_fetch_fundamentals.gs
  * _mergePositionRecord_(): 소수주 중복 행 합산 신규
  * parseInt → parseFloat (qty, availQty)
- src/gas_adapter_parts/gdf_01_price_metrics.gs
  * 미보유 종목 SELL_READY → WATCH_EXIT_SIGNAL
- spec/41_release_dag.yaml: build_rebalance_sheet 노드 추가 (step_count 63)
- spec/51_formula_lifecycle_registry.yaml: REBALANCE_ENGINE_V1 등록

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-06-13 13:20:14 +09:00

235 lines
8.5 KiB
Python

from __future__ import annotations
import argparse
import csv
import datetime as dt
import re
from pathlib import Path
from typing import Any
import openpyxl
ROOT = Path(__file__).resolve().parents[2]
DEFAULT_XLSX = ROOT / "GatherTradingData.xlsx"
OUTPUT_HEADERS = [
"ETF_Ticker",
"ETF_Name",
"Close",
"NAV",
"iNAV",
"Premium_Discount_Pct",
"Tracking_Error",
"AUM",
"Source_Date",
"Source",
"Enabled",
"Note",
]
COLUMN_ALIASES = {
"ticker": ["ETF_Ticker", "종목코드", "단축코드", "표준코드", "code", "ticker"],
"name": ["ETF_Name", "종목명", "한글종목명", "Name", "name"],
"close": ["Close", "종가", "현재가", "시장가격", "TDD_CLSPRC", "close"],
"nav": ["NAV", "순자산가치", "기준가격", "기준가", "NAV(원)", "nav"],
"inav": ["iNAV", "추정순자산가치", "실시간기준가", "iNAV(원)", "inav"],
"premium_discount_pct": ["Premium_Discount_Pct", "괴리율", "괴리율(%)", "가격괴리율", "premium_discount_pct"],
"tracking_error": ["Tracking_Error", "추적오차율", "추적오차", "추적오차율(%)", "tracking_error"],
"aum": ["AUM", "순자산총액", "순자산총액(원)", "상장좌수", "aum"],
"source_date": ["Source_Date", "기준일", "일자", "거래일자", "Date", "date"],
}
def normalize_header(value: Any) -> str:
return re.sub(r"\s+", "", str(value or "").strip()).lower()
def normalize_ticker(value: Any) -> str:
text = str(value or "").strip()
if text.endswith(".0"):
text = text[:-2]
text = re.sub(r"[^0-9A-Za-z]", "", text)
if text.isdigit():
return text.zfill(6)
if re.fullmatch(r"[0-9A-Za-z]{1,6}", text):
return text.zfill(6)
return text
def parse_number(value: Any) -> float | None:
if value in (None, ""):
return None
if isinstance(value, (int, float)) and not isinstance(value, bool):
return float(value)
text = str(value).strip()
if not text or text in {"-", "N/A", "nan"}:
return None
text = text.replace(",", "").replace("%", "")
try:
return float(text)
except ValueError:
return None
def parse_date(value: Any) -> str:
if value in (None, ""):
return ""
if isinstance(value, (dt.datetime, dt.date)):
return value.strftime("%Y-%m-%d")
text = str(value).strip()
match = re.search(r"(\d{4})[./-]?(\d{1,2})[./-]?(\d{1,2})", text)
if not match:
return ""
y, m, d = match.groups()
return f"{y}-{int(m):02d}-{int(d):02d}"
def read_source_table(path: Path) -> list[dict[str, Any]]:
if path.suffix.lower() in {".xlsx", ".xlsm"}:
wb = openpyxl.load_workbook(path, data_only=True, read_only=True)
ws = wb[wb.sheetnames[0]]
rows = list(ws.iter_rows(values_only=True))
header_row_idx = 0
best_score = -1
alias_tokens = {normalize_header(a) for aliases in COLUMN_ALIASES.values() for a in aliases}
for i, row in enumerate(rows[:20]):
score = sum(1 for cell in row if normalize_header(cell) in alias_tokens)
if score > best_score:
best_score = score
header_row_idx = i
headers = [str(v or "").strip() for v in rows[header_row_idx]]
return [
dict(zip(headers, row))
for row in rows[header_row_idx + 1 :]
if row and any(v not in (None, "") for v in row)
]
encoding_candidates = ["utf-8-sig", "cp949", "euc-kr"]
last_error: Exception | None = None
for encoding in encoding_candidates:
try:
with path.open("r", encoding=encoding, newline="") as f:
sample = f.read(4096)
f.seek(0)
dialect = csv.Sniffer().sniff(sample, delimiters=",\t;")
return list(csv.DictReader(f, dialect=dialect))
except Exception as exc:
last_error = exc
raise RuntimeError(f"failed to read source file {path}: {last_error}")
def resolve_columns(rows: list[dict[str, Any]]) -> dict[str, str]:
if not rows:
return {}
source_headers = list(rows[0].keys())
normalized = {normalize_header(h): h for h in source_headers}
resolved: dict[str, str] = {}
for field, aliases in COLUMN_ALIASES.items():
for alias in aliases:
key = normalize_header(alias)
if key in normalized:
resolved[field] = normalized[key]
break
return resolved
def existing_etfs(wb: openpyxl.Workbook) -> dict[str, str]:
result: dict[str, str] = {}
if "etf_raw" in wb.sheetnames:
ws = wb["etf_raw"]
headers = [ws.cell(2, c).value for c in range(1, ws.max_column + 1)]
idx = {h: i + 1 for i, h in enumerate(headers) if h}
if "ETF_Ticker" in idx:
for r in range(3, ws.max_row + 1):
ticker = normalize_ticker(ws.cell(r, idx["ETF_Ticker"]).value)
if ticker:
result[ticker] = str(ws.cell(r, idx.get("ETF_Name", idx["ETF_Ticker"])).value or "")
return result
def update_workbook(workbook_path: Path, source_path: Path, enable: bool) -> tuple[int, int]:
rows = read_source_table(source_path)
columns = resolve_columns(rows)
if "ticker" not in columns:
raise RuntimeError(f"source file has no ticker/code column. resolved={columns}")
wb = openpyxl.load_workbook(workbook_path)
targets = existing_etfs(wb)
if "etf_nav_manual" in wb.sheetnames:
del wb["etf_nav_manual"]
insert_at = wb.sheetnames.index("etf_raw") + 1 if "etf_raw" in wb.sheetnames else 1
ws = wb.create_sheet("etf_nav_manual", insert_at)
ws.append([f"updated: imported from {source_path.name}"])
ws.append(OUTPUT_HEADERS)
imported = 0
matched = 0
seen: set[str] = set()
for row in rows:
ticker = normalize_ticker(row.get(columns["ticker"]))
if not ticker or ticker in seen:
continue
seen.add(ticker)
name = str(row.get(columns.get("name", ""), "") or targets.get(ticker, "")).strip()
close = parse_number(row.get(columns.get("close", "")))
nav = parse_number(row.get(columns.get("nav", "")))
inav = parse_number(row.get(columns.get("inav", "")))
premium = parse_number(row.get(columns.get("premium_discount_pct", "")))
if premium is None:
basis_nav = nav if nav and nav > 0 else inav
if close is not None and basis_nav and basis_nav > 0:
premium = ((close / basis_nav) - 1) * 100
tracking_error = parse_number(row.get(columns.get("tracking_error", "")))
aum = parse_number(row.get(columns.get("aum", "")))
source_date = parse_date(row.get(columns.get("source_date", "")))
is_match = not targets or ticker in targets
if is_match:
matched += 1
row_enable = "Y" if enable and is_match and (nav is not None or inav is not None) else "N"
ws.append([
ticker,
name,
close,
nav,
inav,
premium,
tracking_error,
aum,
source_date,
f"import:{source_path.name}",
row_enable,
"matched_etf_raw" if is_match else "not_in_etf_raw_review_before_enable",
])
imported += 1
for row in ws.iter_rows(min_row=1, max_row=ws.max_row):
row[0].number_format = "@"
for cell in ws[2]:
cell.font = openpyxl.styles.Font(bold=True, color="FFFFFF")
cell.fill = openpyxl.styles.PatternFill("solid", fgColor="7030A0")
ws.freeze_panes = "A3"
ws.auto_filter.ref = f"A2:L{ws.max_row}"
widths = [14, 34, 14, 14, 14, 20, 16, 16, 16, 28, 10, 42]
for i, width in enumerate(widths, 1):
ws.column_dimensions[openpyxl.utils.get_column_letter(i)].width = width
wb.save(workbook_path)
return imported, matched
def main() -> int:
parser = argparse.ArgumentParser(description="Import official ETF NAV/iNAV data into etf_nav_manual sheet.")
parser.add_argument("source", type=Path, help="KRX/KIND/issuer CSV or XLSX export")
parser.add_argument("--workbook", type=Path, default=DEFAULT_XLSX)
parser.add_argument("--enable", action="store_true", help="Set Enabled=Y for matched rows with NAV or iNAV")
args = parser.parse_args()
imported, matched = update_workbook(args.workbook, args.source, args.enable)
print(f"ETF NAV IMPORT OK: imported={imported} matched_etf_raw={matched} workbook={args.workbook.name}")
return 0
if __name__ == "__main__":
raise SystemExit(main())