import os import re import sys import hashlib import requests from datetime import datetime from pathlib import Path from bs4 import BeautifulSoup import openpyxl # Reconfigure stdout for UTF-8 to prevent CP949 encoding crashes on Windows sys.stdout.reconfigure(encoding='utf-8') ROOT = Path(__file__).resolve().parent.parent XLSX_PATH = ROOT / "GatherTradingData.xlsx" headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36' } url = "https://tradingeconomics.com/calendar" TYPE_MAP = [ { 'keys': ['FOMC','연준','Federal Open Market','Fed Rate'], 'type': 'FOMC' }, { 'keys': ['CPI','소비자물가','Consumer Price','Inflation'], 'type': None }, { 'keys': ['PPI','생산자물가','Producer Price'], 'type': 'US_PPI' }, { 'keys': ['PCE','개인소비지출','Personal Consumption'], 'type': 'US_PCE' }, { 'keys': ['NFP','비농업','Nonfarm','Payroll'], 'type': 'US_NFP' }, { 'keys': ['실적','잠정실적','Earnings','EPS','Revenue'], 'type': 'EARNINGS' }, { 'keys': ['옵션만기','선물만기','만기일','Expiry','Triple Witching'], 'type': 'EXPIRY' }, { 'keys': ['한국은행','금통위','BOK','Bank of Korea'], 'type': 'BOK' }, { 'keys': ['환율','FX','Dollar','달러'], 'type': 'FX' }, { 'keys': ['국채','채권','Bond','Treasury','KTB'], 'type': 'BOND' }, { 'keys': ['BOJ','일본은행','Bank of Japan','BOJ Rate','BOJ Interest'], 'type': 'BOJ' }, ] def guessEventType(eventName, region): upper = eventName.upper() reg = region.upper().strip() for rule in TYPE_MAP: if any(k.upper() in upper for k in rule['keys']): if rule['type'] is None: if reg == 'KR' or '한국' in upper or 'KR' in upper: return 'KR_CPI' if reg == 'US' or '미국' in upper or 'US' in upper: return 'US_CPI' return 'CUSTOM' us_only_types = ['US_PPI', 'US_PCE', 'US_NFP', 'FOMC'] if rule['type'] in us_only_types and reg != 'US' and reg != '': return 'CUSTOM' if rule['type'] == 'BOJ' and reg != 'JP' and reg != '': return 'CUSTOM' return rule['type'] return 'CUSTOM' def guessImpact(type_str, eventName): high_types = ['FOMC','US_CPI','US_NFP','BOK','KR_CPI','BOJ'] med_types = ['US_PPI','US_PCE','EARNINGS','EXPIRY'] if type_str in high_types: return 'HIGH' if type_str in med_types: return 'MEDIUM' return 'LOW' def build_key(date_str, event_name, type_str): raw = f"{date_str}|{type_str.upper()}|{event_name.strip()}" return hashlib.md5(raw.encode('utf-8')).hexdigest() def main() -> int: print(f"Loading Excel workbook from {XLSX_PATH}...") if not XLSX_PATH.exists(): print(f"Error: {XLSX_PATH} does not exist!") return 1 wb = openpyxl.load_workbook(XLSX_PATH) sheet_name = "event_calendar" # Auto-create sheet if missing if sheet_name not in wb.sheetnames: print(f"Sheet '{sheet_name}' not found. Creating a new one...") ws = wb.create_sheet(sheet_name) default_headers = ['Date', 'Event', 'Type', 'Impact', 'Alert', 'DaysLeft', 'AlertStatus', 'LastCheckedAt', 'Source', 'SourceUrl', 'Key'] ws.append(default_headers) else: ws = wb[sheet_name] # Ensure all required headers exist in the sheet. Append them automatically if missing. headers_list = [cell.value for cell in ws[1]] header_map = {name: idx + 1 for idx, name in enumerate(headers_list) if name} all_required_headers = ['Date', 'Event', 'Type', 'Impact', 'Alert', 'DaysLeft', 'AlertStatus', 'LastCheckedAt', 'Source', 'SourceUrl', 'Key'] ws_updated = False for req in all_required_headers: if req not in header_map: new_col_idx = len(headers_list) + 1 ws.cell(row=1, column=new_col_idx, value=req) headers_list.append(req) header_map[req] = new_col_idx print(f"Automatically added missing header column '{req}' at column index {new_col_idx}") ws_updated = True if ws_updated: wb.save(XLSX_PATH) print("Excel workbook headers updated and saved.") # Index existing keys to avoid duplicates key_col = header_map['Key'] row_by_key = {} for r_idx in range(2, ws.max_row + 1): k = ws.cell(row=r_idx, column=key_col).value if k: row_by_key[k] = r_idx # Calculate date range (60 days ahead) today = datetime.now().date() today_str = today.strftime("%Y-%m-%d") # Calculate 60 days ahead date from datetime import timedelta end_date = today + timedelta(days=60) end_date_str = end_date.strftime("%Y-%m-%d") print(f"Requesting Trading Economics calendar for range: {today_str} to {end_date_str}...") headers_req = headers.copy() headers_req['Cookie'] = f"cal-custom-range={today_str}|{end_date_str}" try: resp = requests.get(url, headers=headers_req, timeout=12) if resp.status_code != 200: print(f"Error: Fetch failed with HTTP status {resp.status_code}") return 1 soup = BeautifulSoup(resp.text, 'html.parser') t = soup.find('table', id='calendar') if not t: print("Error: Could not find calendar table in HTML response.") return 1 rows = t.find_all('tr') print(f"Found {len(rows)} raw HTML rows. Starting parser...") upsert_count = 0 insert_count = 0 now_str = datetime.now().strftime("%Y-%m-%d %H:%M:%S") for r in rows: if not r.get('data-event'): continue tds = r.find_all('td') if len(tds) < 9: continue # Date from td[0] class td0 = tds[0] date_classes = td0.get('class', []) date_str = "" for c in date_classes: if re.match(r'^\d{4}-\d{2}-\d{2}$', c): date_str = c break if not date_str: continue # Impact (parse stars from td0 html snippet) td0_html = str(td0) impact = 'LOW' if 'calendar-date-3' in td0_html: impact = 'HIGH' elif 'calendar-date-2' in td0_html: impact = 'MEDIUM' # Country ISO code from td[3] country_iso = tds[3].get_text(strip=True).upper() # Event name from a.calendar-event inside td[4] a_ev = tds[4].find('a', class_='calendar-event') if not a_ev: continue eventName = a_ev.get_text(strip=True) # Skip noise countries (US, KR, JP only) if country_iso not in ['US', 'KR', 'JP']: continue type_str = guessEventType(eventName, country_iso) final_impact = guessImpact(type_str, eventName) if final_impact == 'LOW' and impact != 'LOW': final_impact = impact # Skip LOW impact CUSTOM (Except for South Korea) if type_str == 'CUSTOM' and final_impact == 'LOW' and country_iso != 'KR': continue # Actual, Previous, Consensus def clean_text(td_el): val = re.sub(r'<[^>]+>', ' ', str(td_el)) val = re.sub(r'\s+', ' ', val).strip() return val actual = clean_text(tds[5]) previous = clean_text(tds[6]) consensus = clean_text(tds[7]) alert_text_list = [] if actual and actual != '-': alert_text_list.append(f"Act: {actual}") if consensus and consensus != '-': alert_text_list.append(f"Est: {consensus}") if previous and previous != '-': alert_text_list.append(f"Prev: {previous}") alert_str = " ".join(alert_text_list) # Calculate DaysLeft try: ev_date = datetime.strptime(date_str, "%Y-%m-%d").date() days_left = (ev_date - today).days except Exception: days_left = "" key = build_key(date_str, eventName, type_str) # Prepare row cells mapping row_data = { 'Date': date_str, 'Event': eventName, 'Type': type_str, 'Impact': final_impact, 'Alert': alert_str, 'DaysLeft': days_left, 'LastCheckedAt': now_str, 'Source': 'Trading Economics', 'SourceUrl': 'https://tradingeconomics.com/calendar', 'Key': key } if key in row_by_key: # Update existing row r_num = row_by_key[key] for col_name, val in row_data.items(): col_idx = header_map[col_name] ws.cell(row=r_num, column=col_idx, value=val) upsert_count += 1 else: # Append new row max_col = max(header_map.values()) new_row = ["" for _ in range(max_col)] for col_name, val in row_data.items(): col_idx = header_map[col_name] new_row[col_idx - 1] = val ws.append(new_row) row_by_key[key] = ws.max_row # keep index updated insert_count += 1 print(f"Parser complete. Added {insert_count} new events, Updated {upsert_count} existing events.") # Save Excel file print(f"Saving workbook back to {XLSX_PATH}...") wb.save(XLSX_PATH) print("Excel workbook successfully updated!") return 0 except Exception as e: print("Failed to run refresh script:", e) return 1 if __name__ == "__main__": raise SystemExit(main())