"""Naver Finance 시세/수급 수집기 — qualitative_sell_strategy_v1 입력용. 확인된 무인증 엔드포인트만 사용한다(2026-06-21 세션 실측): - https://finance.naver.com/item/sise_day.naver?code={code}&page=N (일별 시세/거래량) - https://finance.naver.com/item/frgn.naver?code={code}&page=N (외국인/기관 수급) - https://polling.finance.naver.com/api/realtime/domestic/stock/{code} (실시간 스냅샷, JSON) investing.com 직접 스크래핑은 403(Cloudflare 차단) 확인됨 — 시도하지 않는다. KRX 공매도 잔고(data.krx.co.kr)는 OTP 세션 필요(LOGOUT 응답) — 시도하지 않는다. 이미 GAS(gdc_01_fetch_fundamentals.gs/gas_event_calendar.gs)에서 수집 중인 외국인/기관 수급·실적발표 일정·경제지표 일정은 보유종목에 대해서는 account_snapshot/ GatherTradingData.xlsx에서 재사용하고, 이 스크립트는 그 시트에 없는 위성 후보군 티커를 평가할 때만 직접 호출한다(중복 수집 금지). """ from __future__ import annotations import argparse import datetime as dt import json import sys from pathlib import Path from typing import Any import requests from bs4 import BeautifulSoup ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36" NAVER_REFERER = "https://finance.naver.com/" def _session() -> requests.Session: s = requests.Session() s.headers.update({ "User-Agent": USER_AGENT, "Referer": NAVER_REFERER, "Accept-Language": "ko-KR,ko;q=0.9,en;q=0.8", }) return s def _num(text: str) -> float: cleaned = text.replace(",", "").replace("+", "").strip() try: return float(cleaned) except ValueError: return 0.0 def fetch_price_history(session: requests.Session, code: str, pages: int = 3) -> dict[str, Any]: """일별 [date, close, change, open, high, low, volume] 최신순. 페이지당 10행.""" rows: list[dict[str, Any]] = [] for page in range(1, pages + 1): url = f"https://finance.naver.com/item/sise_day.naver?code={code}&page={page}" try: resp = session.get(url, timeout=10) if resp.status_code == 403: return { "status": "CLOUDFLARE_BLOCKED_403", "rows": [], "error": "Cloudflare rejected request (403 Forbidden)", "source_url": url, "wbs_ref": "WBS-7.9: Naver 스크래핑 Cloudflare 모니터링", } resp.raise_for_status() except requests.RequestException as e: return { "status": "FETCH_ERROR", "rows": [], "error": str(e), "source_url": url, } resp.encoding = "euc-kr" soup = BeautifulSoup(resp.text, "html.parser") table = soup.find("table", {"class": "type2"}) if table is None: break for tr in table.find_all("tr"): cells = [td.get_text(strip=True) for td in tr.find_all("td")] if len(cells) != 7 or not cells[0]: continue rows.append({ "date": cells[0].replace(".", "-"), "close": _num(cells[1]), "open": _num(cells[3]), "high": _num(cells[4]), "low": _num(cells[5]), "volume": _num(cells[6]), }) if not rows: return {"status": "DATA_MISSING", "rows": [], "source_url": NAVER_REFERER} return { "status": "OK", "rows": rows, "source_url": f"https://finance.naver.com/item/sise_day.naver?code={code}", "source_as_of": dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).isoformat(), } def fetch_foreign_institution_flow(session: requests.Session, code: str, pages: int = 2) -> dict[str, Any]: """외국인/기관 5일·20일 수급. tds: [date, close, change, ret_pct, volume, inst, frgn, frgn_ratio].""" rows: list[dict[str, Any]] = [] for page in range(1, pages + 1): url = f"https://finance.naver.com/item/frgn.naver?code={code}&page={page}" try: resp = session.get(url, timeout=10) if resp.status_code == 403: return { "status": "CLOUDFLARE_BLOCKED_403", "rows": [], "error": "Cloudflare rejected request (403 Forbidden)", "source_url": url, "wbs_ref": "WBS-7.9: Naver 스크래핑 Cloudflare 모니터링", } resp.raise_for_status() except requests.RequestException as e: return { "status": "FETCH_ERROR", "rows": [], "error": str(e), "source_url": url, } resp.encoding = "euc-kr" soup = BeautifulSoup(resp.text, "html.parser") for table in soup.find_all("table", {"class": "type2"}): for tr in table.find_all("tr"): cells = [td.get_text(strip=True) for td in tr.find_all("td")] if len(cells) < 8 or not cells[0] or "." not in cells[0]: continue rows.append({ "date": cells[0].replace(".", "-"), "close": _num(cells[1]), "inst_net": _num(cells[5]), "frgn_net": _num(cells[6]), }) if not rows: return {"status": "DATA_MISSING", "rows": []} return { "status": "OK", "rows": rows, "source_url": f"https://finance.naver.com/item/frgn.naver?code={code}", "source_as_of": dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).isoformat(), } def compute_relative_return_20d(stock_rows: list[dict[str, Any]], benchmark_rows: list[dict[str, Any]]) -> float | None: """종목수익률(최신 vs 20거래일전) - 벤치마크(섹터ETF/KOSPI)수익률, %p.""" def _ret(rows: list[dict[str, Any]]) -> float | None: closes = [r["close"] for r in rows if r.get("close")] if len(closes) < 2: return None recent, past = closes[0], closes[min(len(closes) - 1, 19)] if not past: return None return (recent / past - 1.0) * 100.0 stock_ret = _ret(stock_rows) bench_ret = _ret(benchmark_rows) if stock_ret is None or bench_ret is None: return None return round(stock_ret - bench_ret, 4) def compute_volume_ratio_5d(rows: list[dict[str, Any]]) -> float | None: """오늘 거래량 / 직전 5일 평균거래량.""" volumes = [r["volume"] for r in rows if r.get("volume")] if len(volumes) < 6: return None today_vol = volumes[0] avg5 = sum(volumes[1:6]) / 5.0 if avg5 <= 0: return None return round(today_vol / avg5, 4) def main() -> int: ap = argparse.ArgumentParser(description=__doc__) ap.add_argument("--code", required=True, help="6자리 종목코드") ap.add_argument("--benchmark-code", default="069500", help="비교 벤치마크 코드(기본 KODEX200 069500)") args = ap.parse_args() session = _session() price = fetch_price_history(session, args.code) benchmark = fetch_price_history(session, args.benchmark_code) flow = fetch_foreign_institution_flow(session, args.code) result = { "code": args.code, "price_history": price, "foreign_institution_flow": flow, "relative_return_20d": compute_relative_return_20d(price.get("rows", []), benchmark.get("rows", [])), "volume_ratio_5d": compute_volume_ratio_5d(price.get("rows", [])), } print(json.dumps(result, ensure_ascii=False, indent=2)) return 0 if __name__ == "__main__": raise SystemExit(main())