From 82ca4ddbfd12a3ccc0218c6e5e9654cd427e71a8 Mon Sep 17 00:00:00 2001 From: kjh2064 Date: Mon, 15 Jun 2026 02:29:29 +0900 Subject: [PATCH] =?UTF-8?q?=EC=84=B9=ED=84=B0=20=EC=9C=A0=EB=8B=88?= =?UTF-8?q?=EB=B2=84=EC=8A=A4=20=EB=B6=84=EB=A6=AC=EC=99=80=20=EC=9B=94?= =?UTF-8?q?=EA=B0=84=20=EA=B0=B1=EC=8B=A0=20=EC=A0=95=ED=95=A9=ED=99=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- runtime/refactor_baseline_v1.yaml | 2 +- spec/risk/portfolio_exposure.yaml | 2 +- src/gas/core/gas_lib.gs | 429 +++++++++++- .../gdc_02_account_satellite.gs | 16 +- src/gas_adapter_parts/gdf_01_price_metrics.gs | 35 +- src/quant_engine/convert_xlsx_to_json.py | 96 +++ .../run_formula_golden_cases_v2.py | 3 +- src/quant_engine/sector_universe_refresh.py | 296 +++++++++ tools/automate_routine.py | 33 +- tools/update_sector_universe_from_naver.py | 616 ++++++++++++++++++ ...date_sector_universe_monthly_refresh_v1.py | 173 +++++ 11 files changed, 1658 insertions(+), 43 deletions(-) create mode 100644 src/quant_engine/sector_universe_refresh.py create mode 100644 tools/update_sector_universe_from_naver.py create mode 100644 tools/validate_sector_universe_monthly_refresh_v1.py diff --git a/runtime/refactor_baseline_v1.yaml b/runtime/refactor_baseline_v1.yaml index d7fcb63..ed753cb 100644 --- a/runtime/refactor_baseline_v1.yaml +++ b/runtime/refactor_baseline_v1.yaml @@ -15,5 +15,5 @@ "keep package scripts within release envelope" ] }, - "source_zip_sha256": "49f64b3773ba3c19fa8323d0b08833928c637935483039579bb8ab22a391f70c" + "source_zip_sha256": "4de4a7b1217ef5d5375b3b1ea1209f738719e79c4c3c0954e9e96a9dc0d8607e" } \ No newline at end of file diff --git a/spec/risk/portfolio_exposure.yaml b/spec/risk/portfolio_exposure.yaml index 8267acb..bcb9934 100644 --- a/spec/risk/portfolio_exposure.yaml +++ b/spec/risk/portfolio_exposure.yaml @@ -13,7 +13,7 @@ portfolio_exposure_framework: exposure_layers: direct_core_leaders: ["삼성전자", "SK하이닉스"] duplicate_beta: ["KODEX 반도체", "동일 섹터 ETF"] - tactical_satellites: ["방산", "조선", "전력기기", "건설/EPC", "기타 고베타"] + tactical_satellites: ["방산", "조선", "전력설비", "건설", "플랜트/EPC", "로보틱스", "기타 고베타"] cash: ["현금", "MMF", "RP", "단기채 ETF"] valid_trim_reasons: - "벤치마크 대비 초과비중이 허용밴드를 초과하고 가격 추세가 훼손됨" diff --git a/src/gas/core/gas_lib.gs b/src/gas/core/gas_lib.gs index 8516f95..5423187 100644 --- a/src/gas/core/gas_lib.gs +++ b/src/gas/core/gas_lib.gs @@ -1,5 +1,5 @@ // gas_lib.gs - Common utilities & static features -// Last Updated: 2026-06-14 20:48:30 KST +// Last Updated: 2026-06-15 02:20:50 KST // Math/KRX utils, sheet I/O, sector flow, Web API, static runners // GAS global scope: functions in gas_data_feed.gs / gas_data_collect.gs callable directly // @@ -593,7 +593,14 @@ const DEFAULT_SECTOR_UNIVERSE_V2 = [ { code: "062040", name: "산일전기", weight: 0.10 }, { code: "298040", name: "효성중공업", weight: 0.10 }, ]}, - { sector: "방산", proxyTicker: "012450", proxyName: "한화에어로스페이스", proxyType: "대표주", baseTicker: "069500", constituents: [ + { sector: "전력설비", proxyTicker: "491820", proxyName: "HANARO 전력설비투자", proxyType: "ETF", baseTicker: "069500", constituents: [ + { code: "010120", name: "LS ELECTRIC", weight: 0.28 }, + { code: "267260", name: "HD현대일렉트릭", weight: 0.28 }, + { code: "298040", name: "효성중공업", weight: 0.18 }, + { code: "006260", name: "LS", weight: 0.14 }, + { code: "099440", name: "두산에너빌리티", weight: 0.12 }, + ]}, + { sector: "방산", proxyTicker: "463250", proxyName: "TIGER K방산&우주", proxyType: "ETF", baseTicker: "069500", constituents: [ { code: "012450", name: "한화에어로스페이스", weight: 0.45 }, { code: "079550", name: "LIG넥스원", weight: 0.25 }, { code: "047810", name: "한국항공우주", weight: 0.15 }, @@ -605,23 +612,49 @@ const DEFAULT_SECTOR_UNIVERSE_V2 = [ { code: "009540", name: "HD한국조선해양", weight: 0.20 }, { code: "494670", name: "TIGER 조선TOP10", weight: 0.15, isEtf: true }, ]}, - { sector: "건설/EPC", proxyTicker: "028050", proxyName: "삼성E&A", proxyType: "대표주", baseTicker: "069500", constituents: [ - { code: "028050", name: "삼성E&A", weight: 0.40 }, - { code: "000720", name: "현대건설", weight: 0.30 }, - { code: "006360", name: "GS건설", weight: 0.20 }, - { code: "047040", name: "대우건설", weight: 0.10 }, + { sector: "건설", proxyTicker: "117700", proxyName: "KODEX 건설", proxyType: "ETF", baseTicker: "069500", constituents: [ + { code: "000720", name: "현대건설", weight: 0.35 }, + { code: "006360", name: "GS건설", weight: 0.25 }, + { code: "047040", name: "대우건설", weight: 0.20 }, + { code: "294870", name: "HDC현대산업개발", weight: 0.20 }, + ]}, + { sector: "플랜트/EPC", proxyTicker: "454320", proxyName: "HANARO CAPEX설비투자iSelect", proxyType: "ETF", baseTicker: "069500", constituents: [ + { code: "028050", name: "삼성E&A", weight: 0.35 }, + { code: "010120", name: "LS ELECTRIC", weight: 0.20 }, + { code: "267260", name: "HD현대일렉트릭", weight: 0.20 }, + { code: "298040", name: "효성중공업", weight: 0.15 }, + { code: "099440", name: "두산에너빌리티", weight: 0.10 }, ]}, { sector: "자동차", proxyTicker: "091180", proxyName: "TIGER 자동차", proxyType: "ETF", baseTicker: "069500", constituents: [ { code: "005380", name: "현대차", weight: 0.45 }, { code: "000270", name: "기아", weight: 0.40 }, { code: "012330", name: "현대모비스", weight: 0.15 }, ]}, - { sector: "금융/은행", proxyTicker: "091170", proxyName: "KODEX 은행", proxyType: "ETF", baseTicker: "069500", constituents: [ + { sector: "은행", proxyTicker: "091170", proxyName: "KODEX 은행", proxyType: "ETF", baseTicker: "069500", constituents: [ { code: "105560", name: "KB금융", weight: 0.30 }, { code: "055550", name: "신한지주", weight: 0.30 }, { code: "086790", name: "하나금융지주", weight: 0.20 }, { code: "316140", name: "우리금융지주", weight: 0.10 }, - { code: "003540", name: "대신증권", weight: 0.10 }, + { code: "024110", name: "기업은행", weight: 0.10 }, + ]}, + { sector: "증권", proxyTicker: "0111J0", proxyName: "HANARO 증권고배당TOP3플러스", proxyType: "ETF", baseTicker: "069500", constituents: [ + { code: "071050", name: "한국금융지주", weight: 0.2135 }, + { code: "006800", name: "미래에셋증권", weight: 0.1934 }, + { code: "005940", name: "NH투자증권", weight: 0.1911 }, + { code: "016360", name: "삼성증권", weight: 0.1434 }, + { code: "039490", name: "키움증권", weight: 0.1373 }, + ]}, + { sector: "지주회사", proxyTicker: "307520", proxyName: "TIGER 지주회사", proxyType: "ETF", baseTicker: "069500", constituents: [ + { code: "180640", name: "한진칼", weight: 0.1535 }, + { code: "267250", name: "HD현대", weight: 0.0943 }, + { code: "034730", name: "SK", weight: 0.0884 }, + { code: "000150", name: "두산", weight: 0.0878 }, + { code: "005490", name: "POSCO홀딩스", weight: 0.0763 }, + { code: "003550", name: "LG", weight: 0.0752 }, + { code: "006260", name: "LS", weight: 0.0705 }, + { code: "078930", name: "GS", weight: 0.0498 }, + { code: "001040", name: "CJ", weight: 0.0477 }, + { code: "010060", name: "OCI홀딩스", weight: 0.0240 }, ]}, { sector: "2차전지", proxyTicker: "305720", proxyName: "KODEX 2차전지산업", proxyType: "ETF", baseTicker: "069500", constituents: [ { code: "373220", name: "LG에너지솔루션", weight: 0.40 }, @@ -635,12 +668,29 @@ const DEFAULT_SECTOR_UNIVERSE_V2 = [ { code: "128940", name: "한미약품", weight: 0.15 }, { code: "000100", name: "유한양행", weight: 0.10 }, ]}, - { sector: "원전", proxyTicker: "099440", proxyName: "두산에너빌리티", proxyType: "대표주", baseTicker: "069500", constituents: [ + { sector: "원전", proxyTicker: "434730", proxyName: "HANARO 원자력iSelect", proxyType: "ETF", baseTicker: "069500", constituents: [ { code: "099440", name: "두산에너빌리티", weight: 0.45 }, { code: "023450", name: "한전기술", weight: 0.25 }, { code: "015760", name: "한국전력", weight: 0.20 }, { code: "071320", name: "지역난방공사", weight: 0.10 }, ]}, + { sector: "로보틱스", proxyTicker: "0190C0", proxyName: "RISE 현대차고정피지컬AI", proxyType: "ETF", baseTicker: "069500", constituents: [ + { code: "005380", name: "현대차", weight: 0.2402 }, + { code: "012330", name: "현대모비스", weight: 0.1588 }, + { code: "011070", name: "LG이노텍", weight: 0.1450 }, + { code: "000270", name: "기아", weight: 0.1234 }, + { code: "307950", name: "현대오토에버", weight: 0.0899 }, + { code: "277810", name: "레인보우로보틱스", weight: 0.0673 }, + { code: "064400", name: "LG씨엔에스", weight: 0.0519 }, + { code: "454910", name: "두산로보틱스", weight: 0.0367 }, + { code: "108490", name: "로보티즈", weight: 0.0240 }, + { code: "058610", name: "에스피지", weight: 0.0173 }, + { code: "010620", name: "현대미포", weight: 0.0135 }, + { code: "009540", name: "HD한국조선해양", weight: 0.0135 }, + { code: "011210", name: "현대위아", weight: 0.0109 }, + { code: "121600", name: "나노신소재", weight: 0.0040 }, + { code: "028050", name: "삼성E&A", weight: 0.0034 }, + ]}, { sector: "소비재", proxyTicker: "139220", proxyName: "TIGER 생활소비재", proxyType: "ETF", baseTicker: "069500", constituents: [ { code: "028260", name: "삼성물산", weight: 0.35 }, { code: "097950", name: "CJ제일제당", weight: 0.25 }, @@ -663,6 +713,7 @@ function normalizeSectorName_(sector) { if (s === "바이오/헬스케어") return "바이오"; if (s === "원전/에너지") return "원전"; if (s === "소비재/유통") return "소비재"; + if (s === "건설/EPC") return "플랜트/EPC"; return s; } @@ -679,17 +730,52 @@ function readSectorUniverse_() { const sheet = ss.getSheetByName("sector_universe"); if (!sheet) { writeDefaultSectorUniverseSheet_(); - return DEFAULT_SECTOR_UNIVERSE_V2; + return DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({ + ...sector, + source: sector.source || "DEFAULT_TEMPLATE", + sourceUrl: sector.sourceUrl || "", + sourceAsOf: sector.sourceAsOf || "", + constituents: sector.constituents.map(c => ({ + ...c, + source: c.source || sector.source || "DEFAULT_TEMPLATE", + sourceUrl: c.sourceUrl || sector.sourceUrl || "", + sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "", + })), + })); } const data = sheet.getDataRange().getValues(); if (data.length < 3) { writeDefaultSectorUniverseSheet_(); - return DEFAULT_SECTOR_UNIVERSE_V2; + return DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({ + ...sector, + source: sector.source || "DEFAULT_TEMPLATE", + sourceUrl: sector.sourceUrl || "", + sourceAsOf: sector.sourceAsOf || "", + constituents: sector.constituents.map(c => ({ + ...c, + source: c.source || sector.source || "DEFAULT_TEMPLATE", + sourceUrl: c.sourceUrl || sector.sourceUrl || "", + sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "", + })), + })); } const hdr = data[1].map(h => String(h).trim()); const idx = name => hdr.indexOf(name); const required = ["Sector","Proxy_Ticker","Constituent_Code","Weight"]; - if (required.some(h => idx(h) < 0)) return DEFAULT_SECTOR_UNIVERSE_V2; + if (required.some(h => idx(h) < 0)) { + return DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({ + ...sector, + source: sector.source || "DEFAULT_TEMPLATE", + sourceUrl: sector.sourceUrl || "", + sourceAsOf: sector.sourceAsOf || "", + constituents: sector.constituents.map(c => ({ + ...c, + source: c.source || sector.source || "DEFAULT_TEMPLATE", + sourceUrl: c.sourceUrl || sector.sourceUrl || "", + sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "", + })), + })); + } const map = {}; for (let i = 2; i < data.length; i++) { @@ -706,6 +792,9 @@ function readSectorUniverse_() { proxyName: idx("Proxy_Name") >= 0 ? String(data[i][idx("Proxy_Name")] ?? "").trim() : "", proxyType: idx("Proxy_Type") >= 0 ? String(data[i][idx("Proxy_Type")] ?? "").trim() : "", baseTicker: idx("Base_Ticker") >= 0 ? normalizeTickerCode(data[i][idx("Base_Ticker")]) : "069500", + source: idx("Source") >= 0 ? String(data[i][idx("Source")] ?? "").trim() : "SHEET_INPUT", + sourceUrl: idx("Source_URL") >= 0 ? String(data[i][idx("Source_URL")] ?? "").trim() : "", + sourceAsOf: idx("Source_AsOf") >= 0 ? String(data[i][idx("Source_AsOf")] ?? "").trim() : "", constituents: [], }; } @@ -714,16 +803,59 @@ function readSectorUniverse_() { name: idx("Constituent_Name") >= 0 ? String(data[i][idx("Constituent_Name")] ?? "").trim() : "", weight, isEtf: idx("Is_ETF") >= 0 ? boolFromSheet_(data[i][idx("Is_ETF")], false) : false, + source: idx("Source") >= 0 ? String(data[i][idx("Source")] ?? "").trim() : "SHEET_INPUT", + transportMode: idx("Transport_Mode") >= 0 ? String(data[i][idx("Transport_Mode")] ?? "").trim() : "", + sourceUrl: idx("Source_URL") >= 0 ? String(data[i][idx("Source_URL")] ?? "").trim() : "", + sourceAsOf: idx("Source_AsOf") >= 0 ? String(data[i][idx("Source_AsOf")] ?? "").trim() : "", }); } const sectors = Object.values(map).filter(s => s.proxyTicker && s.constituents.length > 0); - return sectors.length ? sectors : DEFAULT_SECTOR_UNIVERSE_V2; + const sectorSet = new Set(sectors.map(s => s.sector)); + for (const fallback of DEFAULT_SECTOR_UNIVERSE_V2) { + if (!fallback || !fallback.sector || sectorSet.has(fallback.sector)) continue; + sectors.push({ + sector: fallback.sector, + proxyTicker: fallback.proxyTicker, + proxyName: fallback.proxyName, + proxyType: fallback.proxyType, + baseTicker: fallback.baseTicker || "069500", + source: fallback.source || "DEFAULT_TEMPLATE", + transportMode: fallback.transportMode || ((fallback.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (fallback.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"), + sourceUrl: fallback.sourceUrl || "", + sourceAsOf: fallback.sourceAsOf || "", + constituents: fallback.constituents.map(c => ({ + code: c.code, + name: c.name || "", + weight: c.weight, + isEtf: Boolean(c.isEtf), + source: c.source || fallback.source || "DEFAULT_TEMPLATE", + transportMode: c.transportMode || ((c.source || fallback.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (c.source || fallback.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"), + sourceUrl: c.sourceUrl || fallback.sourceUrl || "", + sourceAsOf: c.sourceAsOf || fallback.sourceAsOf || "", + })), + }); + } + return sectors.length ? sectors : DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({ + ...sector, + source: sector.source || "DEFAULT_TEMPLATE", + transportMode: sector.transportMode || ((sector.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (sector.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"), + sourceUrl: sector.sourceUrl || "", + sourceAsOf: sector.sourceAsOf || "", + constituents: sector.constituents.map(c => ({ + ...c, + source: c.source || sector.source || "DEFAULT_TEMPLATE", + transportMode: c.transportMode || ((c.source || sector.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (c.source || sector.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"), + sourceUrl: c.sourceUrl || sector.sourceUrl || "", + sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "", + })), + })); } function writeDefaultSectorUniverseSheet_() { const headers = [ "Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Base_Ticker", - "Constituent_Code","Constituent_Name","Weight","Is_ETF","Enabled","Effective_Date","Source" + "Constituent_Code","Constituent_Name","Weight","Is_ETF","Enabled","Effective_Date","Source","Transport_Mode", + "Source_URL","Source_AsOf" ]; const today = Utilities.formatDate(new Date(), "Asia/Seoul", "yyyy-MM-dd"); const rows = []; @@ -741,7 +873,10 @@ function writeDefaultSectorUniverseSheet_() { c.isEtf ? "Y" : "N", "Y", today, - "sector_universe(DEFAULT_SECTOR_UNIVERSE_V2)", + sector.source || c.source || "DEFAULT_TEMPLATE", + sector.transportMode || c.transportMode || (((sector.source || c.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (sector.source || c.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY") ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"), + sector.sourceUrl || c.sourceUrl || "", + sector.sourceAsOf || c.sourceAsOf || "", ]); } } @@ -762,6 +897,228 @@ function sectorUseMode_(quality) { return "INVALID"; } +function parseDateOnly_(value) { + const text = String(value ?? "").trim(); + if (!text) return null; + const norm = text.replace(/\./g, "-").slice(0, 10); + if (!/^\d{4}-\d{2}-\d{2}$/.test(norm)) return null; + const parsed = new Date(norm + "T00:00:00+09:00"); + return Number.isNaN(parsed.getTime()) ? null : parsed; +} + +function calcSectorUniverseRefreshAudit_(universe) { + const today = new Date(); + const rows = []; + const sourceKindCounts = { NAVER_ETF_PAGE: 0, NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED: 0, NAVER_ETF_PAGE_FAIL: 0, REPRESENTATIVE_STOCK_PROXY: 0, SHEET_INPUT: 0, DEFAULT_TEMPLATE: 0, OTHER: 0 }; + const transportModeCounts = { HTML_SERVER_RENDERED: 0, MANUAL_OR_TEMPLATE: 0, LAYOUT_CHANGED: 0, UNKNOWN: 0 }; + let currentCount = 0; + let dueCount = 0; + let overdueCount = 0; + let missingCount = 0; + let templateCount = 0; + let sheetInputCount = 0; + let naverSourceCount = 0; + let layoutChangedCount = 0; + let missingSourceUrlCount = 0; + let staleSectorCount = 0; + let oldestSourceAsOf = null; + let newestSourceAsOf = null; + + for (const sector of universe || []) { + const sectorRows = Array.isArray(sector?.constituents) ? sector.constituents : []; + const sourceKind = String(sector?.source || "SHEET_INPUT").trim() || "SHEET_INPUT"; + if (Object.prototype.hasOwnProperty.call(sourceKindCounts, sourceKind)) { + sourceKindCounts[sourceKind] += 1; + } else { + sourceKindCounts.OTHER += 1; + } + const transportMode = String(sector?.transportMode || "").trim() || + (sourceKind === "NAVER_ETF_PAGE" || sourceKind === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : + sourceKind === "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" ? "LAYOUT_CHANGED" : + (sourceKind === "DEFAULT_TEMPLATE" || sourceKind === "SHEET_INPUT" ? "MANUAL_OR_TEMPLATE" : "UNKNOWN")); + if (Object.prototype.hasOwnProperty.call(transportModeCounts, transportMode)) { + transportModeCounts[transportMode] += 1; + } else { + transportModeCounts.UNKNOWN += 1; + } + + const sourceUrl = String(sector?.sourceUrl || "").trim(); + const sourceAsOf = String(sector?.sourceAsOf || "").trim(); + const parsed = parseDateOnly_(sourceAsOf); + const ageDays = parsed ? Math.floor((today.getTime() - parsed.getTime()) / 86400000) : null; + if (parsed) { + oldestSourceAsOf = oldestSourceAsOf && oldestSourceAsOf < parsed ? oldestSourceAsOf : parsed; + newestSourceAsOf = newestSourceAsOf && newestSourceAsOf > parsed ? newestSourceAsOf : parsed; + } + + let status = "INVALID"; + const reasons = []; + if (sourceKind === "DEFAULT_TEMPLATE") { + status = "TEMPLATE"; + templateCount += 1; + reasons.push("DEFAULT_TEMPLATE"); + } else if (sourceKind === "REPRESENTATIVE_STOCK_PROXY") { + if (!sourceUrl) { + status = "MISSING"; + missingCount += 1; + missingSourceUrlCount += 1; + reasons.push("Source_URL_MISSING"); + } else if (ageDays === null) { + status = "MISSING"; + missingCount += 1; + reasons.push("Source_AsOf_MISSING"); + } else if (ageDays <= 31) { + status = "CURRENT"; + currentCount += 1; + } else if (ageDays <= 45) { + status = "DUE"; + dueCount += 1; + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } else { + status = "OVERDUE"; + overdueCount += 1; + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } + } else if (sourceKind === "SHEET_INPUT") { + sheetInputCount += 1; + if (!sourceUrl) { + status = "MISSING"; + missingCount += 1; + missingSourceUrlCount += 1; + reasons.push("Source_URL_MISSING"); + } else if (ageDays === null) { + status = "MISSING"; + missingCount += 1; + reasons.push("Source_AsOf_MISSING"); + } else if (ageDays <= 31) { + status = "CURRENT"; + currentCount += 1; + } else if (ageDays <= 45) { + status = "DUE"; + dueCount += 1; + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } else { + status = "OVERDUE"; + overdueCount += 1; + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } + } else if (sourceKind === "NAVER_ETF_PAGE") { + naverSourceCount += 1; + if (!sourceUrl) { + status = "MISSING"; + missingCount += 1; + missingSourceUrlCount += 1; + reasons.push("Source_URL_MISSING"); + } else if (ageDays === null) { + status = "MISSING"; + missingCount += 1; + reasons.push("Source_AsOf_MISSING"); + } else if (ageDays <= 31) { + status = "CURRENT"; + currentCount += 1; + } else if (ageDays <= 45) { + status = "DUE"; + dueCount += 1; + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } else { + status = "OVERDUE"; + overdueCount += 1; + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } + } else if (sourceKind === "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED") { + layoutChangedCount += 1; + status = "LAYOUT_CHANGED"; + if (!sourceUrl) { + missingSourceUrlCount += 1; + reasons.push("Source_URL_MISSING"); + } + if (ageDays === null) { + reasons.push("Source_AsOf_MISSING"); + } else { + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } + } else { + status = "INVALID"; + reasons.push("SOURCE_KIND_UNKNOWN"); + if (!sourceUrl) missingSourceUrlCount += 1; + } + if (!sourceUrl) reasons.push("Source_URL_MISSING"); + if (ageDays !== null && ageDays < 0) reasons.push("FUTURE_DATE"); + + rows.push({ + sector: sector.sector || "", + proxy_ticker: sector.proxyTicker || "", + proxy_name: sector.proxyName || "", + proxy_type: sector.proxyType || "", + source_kind: sourceKind, + transport_mode: transportMode, + source_url: sourceUrl, + source_asof: sourceAsOf, + age_days: ageDays === null ? "" : ageDays, + constituent_count: sectorRows.length, + stock_count: sectorRows.filter(c => !c.isEtf).length, + etf_count: sectorRows.filter(c => c.isEtf).length, + weight_sum: sectorRows.reduce((a, c) => a + (Number(c.weight) || 0), 0), + status: status, + refresh_reason: reasons.length ? reasons.join(";") : "OK", + }); + } + + rows.sort((a, b) => { + if (a.status === "CURRENT" && b.status !== "CURRENT") return -1; + if (a.status !== "CURRENT" && b.status === "CURRENT") return 1; + return String(a.sector || "").localeCompare(String(b.sector || "")); + }); + + return { + formula_id: "sector_universe_refresh_audit_v1", + gate: (templateCount > 0 || missingSourceUrlCount > 0 || overdueCount > 0 || staleSectorCount > 0) ? "FAIL" : (sheetInputCount > 0 ? "WARN" : "PASS"), + summary: { + sector_count: (universe || []).length, + current_count: currentCount, + due_count: dueCount, + overdue_count: overdueCount, + missing_count: missingCount, + template_count: templateCount, + sheet_input_count: sheetInputCount, + naver_source_count: naverSourceCount, + layout_changed_count: layoutChangedCount, + missing_source_url_count: missingSourceUrlCount, + stale_sector_count: staleSectorCount, + oldest_source_asof: oldestSourceAsOf ? Utilities.formatDate(oldestSourceAsOf, "Asia/Seoul", "yyyy-MM-dd") : "", + newest_source_asof: newestSourceAsOf ? Utilities.formatDate(newestSourceAsOf, "Asia/Seoul", "yyyy-MM-dd") : "", + source_kind_counts: sourceKindCounts, + transport_mode_counts: transportModeCounts, + ajax_mode: "NO", + transport_model: "HTML_SERVER_RENDERED", + }, + rows: rows, + }; +} + +function writeSectorUniverseRefreshAuditSheet_(audit) { + if (!audit || typeof audit !== "object") return 0; + const headers = [ + "sector", "proxy_ticker", "proxy_name", "proxy_type", "source_kind", "transport_mode", + "source_url", "source_asof", "age_days", "constituent_count", + "stock_count", "etf_count", "weight_sum", "status", "refresh_reason", + ]; + const rows = Array.isArray(audit.rows) + ? audit.rows.map(function(r) { + return headers.map(function(h) { return r[h] ?? ""; }); + }) + : []; + writeToSheet("sector_universe_refresh_audit", headers, rows); + return rows.length; +} + function scoreSmartMoneyNorm_(v) { if (!Number.isFinite(v)) return 0; if (v >= 0.15) return 25; @@ -955,7 +1312,7 @@ function runSectorFlowV3() { const etfRawMap = buildEtfRawMap_(buildEtfRawRows_(universe)); const today = Utilities.formatDate(new Date(), "Asia/Seoul", "yyyy-MM-dd"); const headers = [ - "Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Coverage_Weight", + "Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Universe_Source","Transport_Mode","Coverage_Weight", "Sector_Ret5D","Sector_Ret20D","Sector_RS_20D", "SmartMoney_5D_KRW","SmartMoney_20D_KRW","Sector_AvgTradeValue_20D_KRW","SmartMoney_5D_Norm", "Flow_Breadth_5D","Flow_Rows_Min","Stale_Count", @@ -1031,6 +1388,9 @@ function runSectorFlowV3() { const etfNavRisk = sector.proxyType === "ETF" ? (etfRaw?.navRisk ?? "NAV_DATA_MISSING") : "NOT_ETF"; const etfLiquidityStatus = sector.proxyType === "ETF" ? (etfRaw?.liquidityStatus ?? "WARN") : "NOT_ETF"; const etfExecutionUse = sector.proxyType === "ETF" ? (etfRaw?.executionUse ?? "WATCH_ONLY") : "NOT_ETF"; + const transportMode = sector.source === "NAVER_ETF_PAGE" ? "HTML_SERVER_RENDERED" + : (sector.source === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" + : (sector.source === "DEFAULT_TEMPLATE" ? "MANUAL_OR_TEMPLATE" : "UNKNOWN")); const quality = sectorDataQuality_(coverage, flowRowsMin, staleCount, proxy.ok, Number.isFinite(smart5Norm), weightSum); const routeUse = sectorUseMode_(quality); let score = calcSectorScoreV2_(sectorRet20D, sectorRs20D, smart5Norm, smart20Norm, breadth5, tradeValueRatio, sector.proxyType, etfLiquidityScore); @@ -1047,6 +1407,7 @@ function runSectorFlowV3() { if (staleCount > 0) reasons.push(`Stale_Count=${staleCount}`); if (!proxy.ok) reasons.push("Proxy_Price_FAIL"); if (!Number.isFinite(smart5Norm)) reasons.push("SmartMoney_Norm_MISSING"); + if ((sector.source || "DEFAULT_TEMPLATE") === "DEFAULT_TEMPLATE") reasons.push("Universe_Source=DEFAULT_TEMPLATE"); if (sector.proxyType === "ETF" && etfNavRisk === "NAV_DATA_MISSING") reasons.push("ETF_NAV_DATA_MISSING"); if (sector.proxyType === "ETF" && etfLiquidityStatus !== "OK") reasons.push(`ETF_Liquidity=${etfLiquidityStatus}`); if (sector.proxyType === "ETF" && etfExecutionUse !== "TRADE_OK") reasons.push(`ETF_Execution=${etfExecutionUse}`); @@ -1056,6 +1417,8 @@ function runSectorFlowV3() { proxyTicker: sector.proxyTicker, proxyName: sector.proxyName, proxyType: sector.proxyType || "대표주", + universeSource: sector.source || "DEFAULT_TEMPLATE", + transportMode: transportMode, coverage, sectorRet5D, sectorRet20D, @@ -1106,7 +1469,7 @@ function appendSectorFlowHistoryV2_(rows) { const headers = [ "Snapshot_Date","Sector","Sector_Score","Sector_Rank","SmartMoney_5D_KRW","SmartMoney_20D_KRW", - "Flow_Breadth_5D","Alert_Level","Data_Quality","Decision_Use","ETF_Liquidity_Status","ETF_Execution_Use","Reason","Saved_At" + "Flow_Breadth_5D","Alert_Level","Data_Quality","Decision_Use","ETF_Liquidity_Status","ETF_Execution_Use","Transport_Mode","Reason","Saved_At" ]; const ss = getSpreadsheet_(); let sheet = ss.getSheetByName("sector_flow_history"); @@ -1119,22 +1482,25 @@ function appendSectorFlowHistoryV2_(rows) { const hdr = data[1] ?? headers; const dateIdx = hdr.indexOf("Snapshot_Date"); const sectorIdx = hdr.indexOf("Sector"); - const existing = []; + const normalizeRow_ = (row) => { + const outRow = Array.isArray(row) ? row.slice(0, headers.length) : []; + while (outRow.length < headers.length) outRow.push(""); + return outRow; + }; const byKey = {}; for (let i = 2; i < data.length; i++) { const row = data[i]; const d = normalizeSheetDateString_(row[dateIdx]); const s = String(row[sectorIdx] ?? "").trim(); if (!d || !s) continue; - byKey[`${d}|${s}`] = row; - existing.push(row); + byKey[`${d}|${s}`] = normalizeRow_(row); } const savedAt = Utilities.formatDate(new Date(), "Asia/Seoul", "yyyy-MM-dd HH:mm:ss"); for (const r of rows) { - byKey[`${r.asOfDate}|${r.sector}`] = [ + byKey[`${r.asOfDate}|${r.sector}`] = normalizeRow_([ r.asOfDate, r.sector, r.score, r.rank, Math.round(r.smart5), Math.round(r.smart20), - roundNum(r.breadth5, 4), r.alert, r.quality, r.routeUse, r.etfLiquidityStatus, r.etfExecutionUse, r.reason, savedAt - ]; + roundNum(r.breadth5, 4), r.alert, r.quality, r.routeUse, r.etfLiquidityStatus, r.etfExecutionUse, r.transportMode || "", r.reason, savedAt + ]); } const out = Object.values(byKey).sort((a, b) => { const da = String(a[0]), db = String(b[0]); @@ -1144,7 +1510,7 @@ function appendSectorFlowHistoryV2_(rows) { sheet.clearContents(); sheet.getRange(1, 1).setValue(`updated: ${savedAt} KST`); sheet.getRange(2, 1, 1, headers.length).setValues([headers]); - if (out.length) sheet.getRange(3, 1, out.length, headers.length).setValues(out); + if (out.length) sheet.getRange(3, 1, out.length, headers.length).setValues(out.map(normalizeRow_)); } function normalizeSheetDateString_(value) { @@ -1235,7 +1601,7 @@ function readW2LegacySectorFlow_() { function writeLegacySectorFlowFromStage2_(stage2Rows) { const headers = [ - "Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Coverage_Weight", + "Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Universe_Source","Coverage_Weight", "Sector_Ret5D","Sector_Ret10D","Sector_Ret20D","Sector_RS_20D", "SmartMoney_5D_KRW","SmartMoney_20D_KRW","Sector_AvgTradeValue_20D_KRW", "SmartMoney_5D_Norm","SmartMoney_20D_Norm","Flow_Breadth_5D","Flow_Rows_Min","Stale_Count", @@ -1277,7 +1643,7 @@ function writeLegacySectorFlowFromStage2_(stage2Rows) { const frg20Alias = Number.isFinite(r.smart20) ? r.smart20 / 2 : ""; const inst20Alias = Number.isFinite(r.smart20) ? r.smart20 / 2 : ""; return [ - r.sector, r.proxyTicker, r.proxyName, r.proxyType, r.coverage, + r.sector, r.proxyTicker, r.proxyName, r.proxyType, r.universeSource, r.coverage, r.sectorRet5D, r.proxyRet10D, r.sectorRet20D, r.sectorRs20D, r.smart5, r.smart20, r.avgTv20Krw, r.smart5Norm, r.smart20Norm, r.breadth5, r.flowRowsMin, r.staleCount, @@ -1798,6 +2164,15 @@ function run_all() { } }, { name: "runSectorFlow", fn: runSectorFlow }, + { + name: "runSectorUniverseRefreshAudit", + fn: function() { + const universe = readSectorUniverse_(); + const audit = calcSectorUniverseRefreshAudit_(universe); + writeSectorUniverseRefreshAuditSheet_(audit); + Logger.log("[RUN_ALL] sector_universe_refresh_audit gate=" + audit.gate + " rows=" + (audit.rows || []).length); + } + }, { name: "runDataFeed", fn: runDataFeed }, { name: "runCoreSatelliteFlow_", fn: runCoreSatelliteFlow_ }, { name: "runEventRisk", fn: runEventRisk }, diff --git a/src/gas_adapter_parts/gdc_02_account_satellite.gs b/src/gas_adapter_parts/gdc_02_account_satellite.gs index fe40a66..ab10efc 100644 --- a/src/gas_adapter_parts/gdc_02_account_satellite.gs +++ b/src/gas_adapter_parts/gdc_02_account_satellite.gs @@ -1806,10 +1806,16 @@ function getCoreSatelliteUniverse() { // 자동차 { code:"005380", name:"현대차", sector:"자동차" }, { code:"000270", name:"기아", sector:"자동차" }, - // 밸류업/금융 - { code:"105560", name:"KB금융", sector:"금융/은행" }, - { code:"055550", name:"신한지주", sector:"금융/은행" }, - { code:"024110", name:"기업은행", sector:"금융/은행" }, + // 은행 / 증권 / 지주회사 + { code:"105560", name:"KB금융", sector:"은행" }, + { code:"055550", name:"신한지주", sector:"은행" }, + { code:"024110", name:"기업은행", sector:"은행" }, + { code:"071050", name:"한국금융지주", sector:"증권" }, + { code:"006800", name:"미래에셋증권", sector:"증권" }, + { code:"005940", name:"NH투자증권", sector:"증권" }, + { code:"180640", name:"한진칼", sector:"지주회사" }, + { code:"267250", name:"HD현대", sector:"지주회사" }, + { code:"034730", name:"SK", sector:"지주회사" }, // 바이오 { code:"207940", name:"삼성바이오로직스",sector:"바이오" }, { code:"068270", name:"셀트리온", sector:"바이오" }, @@ -1820,7 +1826,7 @@ function getCoreSatelliteUniverse() { { code:"006400", name:"삼성SDI", sector:"2차전지" }, { code:"003670", name:"포스코퓨처엠",sector:"2차전지" }, // 지주/기타 - { code:"028260", name:"삼성물산", sector:"지주" } + { code:"028260", name:"삼성물산", sector:"지주회사" } ]; list = defaults.map(t => ({ ...t, addedDate: todayStr })); diff --git a/src/gas_adapter_parts/gdf_01_price_metrics.gs b/src/gas_adapter_parts/gdf_01_price_metrics.gs index e408e3b..c85f4a2 100644 --- a/src/gas_adapter_parts/gdf_01_price_metrics.gs +++ b/src/gas_adapter_parts/gdf_01_price_metrics.gs @@ -11,7 +11,7 @@ * * 실행 시간 전략 (GAS 6분 제한): * - data_feed: 보유 10종목만 → ~30초 - * - sector_flow: 11섹터×3종목 → ~3분 + * - sector_flow: 분리된 섹터×3종목 → ~3분 * - macro/unified: 단순 집계 → ~30초 * - core_satellite(100종목): 별도 트리거, 청크 분할 실행 * @@ -27,13 +27,24 @@ const TICKERS_BASE = [ { code: "000660", name: "SK하이닉스" }, { code: "000270", name: "기아" }, { code: "091160", name: "KODEX 반도체" }, + { code: "463250", name: "TIGER K방산&우주" }, { code: "064350", name: "현대로템" }, { code: "012450", name: "한화에어로스페이스" }, + { code: "117700", name: "KODEX 건설" }, { code: "028050", name: "삼성E&A" }, + { code: "454320", name: "HANARO CAPEX설비투자iSelect" }, { code: "010120", name: "LS ELECTRIC" }, { code: "0117V0", name: "TIGER AI전력기기" }, + { code: "491820", name: "HANARO 전력설비투자" }, { code: "494670", name: "TIGER 조선TOP10" }, { code: "471990", name: "KODEX AI반도체핵심장비" }, + { code: "434730", name: "HANARO 원자력iSelect" }, + { code: "0111J0", name: "HANARO 증권고배당TOP3플러스" }, + { code: "307520", name: "TIGER 지주회사" }, + { code: "0190C0", name: "RISE 현대차고정피지컬AI" }, + { code: "011070", name: "LG이노텍" }, + { code: "010620", name: "현대미포" }, + { code: "121600", name: "나노신소재" }, ]; // TICKERS 우선순위: TICKERS_BASE → account_snapshot 보유종목 → watch_tickers_override 수동 추가. @@ -132,9 +143,12 @@ const TICKER_SECTOR_MAP = { "010120": "AI전력", "267260": "AI전력", "006260": "AI전력", "012450": "방산", "079550": "방산", "047810": "방산", "064350": "방산", "329180": "조선", "042660": "조선", "009540": "조선", - "028050": "건설/EPC","000720": "건설/EPC","006360": "건설/EPC", + "028050": "플랜트/EPC","000720": "건설","006360": "건설", "005380": "자동차", "000270": "자동차", "012330": "자동차", - "105560": "금융/은행","055550": "금융/은행","086790": "금융/은행", + "105560": "은행","055550": "은행","086790": "은행","316140": "은행","024110": "은행", + "071050": "증권","006800": "증권","005940": "증권","016360": "증권","039490": "증권", + "180640": "지주회사","267250": "지주회사","034730": "지주회사","000150": "지주회사","005490": "지주회사", + "003550": "지주회사","006260": "지주회사","078930": "지주회사","001040": "지주회사","010060": "지주회사", "373220": "2차전지","006400": "2차전지","051910": "2차전지", "207940": "바이오", "068270": "바이오", "128940": "바이오", "099440": "원전", "023450": "원전", "015760": "원전", @@ -142,8 +156,12 @@ const TICKER_SECTOR_MAP = { // ETF — 해당 섹터로 매핑 "091160": "반도체", "0117V0": "AI전력", "494670": "조선", "471990": "반도체", // KODEX AI반도체핵심장비 (누락 추가) - "266410": "바이오", "091180": "자동차", "091170": "금융/은행", + "266410": "바이오", "091180": "자동차", "091170": "은행", + "0111J0": "증권", "307520": "지주회사", "305720": "2차전지","139220": "소비재", + "463250": "방산", "434730": "원전", "454320": "플랜트/EPC", + "491820": "전력설비", "117700": "건설", "0190C0": "로보틱스", + "011070": "로보틱스", "010620": "로보틱스", "121600": "로보틱스", }; // 섹터 → Tier 매핑 (C5 daily_leader_scan 점수 정밀화) @@ -151,14 +169,19 @@ const TICKER_SECTOR_MAP = { const SECTOR_TIER_MAP = { "반도체": "Tier_1", "AI전력": "Tier_1", + "전력설비": "Tier_1", "방산": "Tier_1", "조선": "Tier_1", "자동차": "Tier_2", "2차전지": "Tier_2", "바이오": "Tier_2", "원전": "Tier_2", - "건설/EPC": "Tier_3", - "금융/은행":"Tier_3", + "건설": "Tier_3", + "플랜트/EPC": "Tier_3", + "로보틱스": "Tier_2", + "은행":"Tier_3", + "증권":"Tier_3", + "지주회사":"Tier_3", "소비재": "Tier_3", }; diff --git a/src/quant_engine/convert_xlsx_to_json.py b/src/quant_engine/convert_xlsx_to_json.py index bf6eb36..f0c6854 100644 --- a/src/quant_engine/convert_xlsx_to_json.py +++ b/src/quant_engine/convert_xlsx_to_json.py @@ -174,6 +174,28 @@ def normalize_legacy_source_markers(sheet: str, records: list[dict[str, Any]]) - source = record.get("Source") if isinstance(source, str) and "sector_targets.json" in source: record["Source"] = source.replace("sector_targets.json", "sector_universe") + source_url = str(record.get("Source_URL") or "").strip() + transport_mode = str(record.get("Transport_Mode") or "").strip() + if record.get("Source") in (None, "", "DEFAULT_TEMPLATE"): + if "finance.naver.com/item/main.naver?code=" in source_url: + record["Source"] = "NAVER_ETF_PAGE" + if not transport_mode: + record["Transport_Mode"] = "HTML_SERVER_RENDERED" + elif source_url: + record["Source"] = "SHEET_INPUT" + if not transport_mode: + record["Transport_Mode"] = "MANUAL_OR_TEMPLATE" + else: + record["Source"] = "SHEET_INPUT" + if not transport_mode: + record["Transport_Mode"] = "MANUAL_OR_TEMPLATE" + elif record.get("Source") == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" and not transport_mode: + record["Transport_Mode"] = "LAYOUT_CHANGED" + elif record.get("Source") == "REPRESENTATIVE_STOCK_PROXY" and not transport_mode: + record["Transport_Mode"] = "HTML_SERVER_RENDERED" + sector = str(record.get("Sector") or "").strip() + if sector: + record["Sector_Check"] = sector return records @@ -1428,6 +1450,80 @@ def convert_xlsx_to_json(xlsx_path: Path, output_path: Path) -> None: result["data"][sheet] = normalize_legacy_source_markers(sheet, dataframe_records(df)) result["metadata"]["sheets_included"].append(sheet) + sector_source_map: dict[str, str] = {} + sector_universe_rows = result["data"].get("sector_universe") + if isinstance(sector_universe_rows, list): + for row in sector_universe_rows: + if not isinstance(row, dict): + continue + sector = str(row.get("Sector") or "").strip() + if not sector: + continue + source = str(row.get("Source") or "").strip() or "SHEET_INPUT" + sector_source_map.setdefault(sector, source) + + sector_flow_rows = result["data"].get("sector_flow") + if isinstance(sector_flow_rows, list): + split_finance_map = { + "금융/은행": [ + ("은행", "091170", "KODEX 은행"), + ("증권", "0111J0", "HANARO 증권고배당TOP3플러스"), + ("지주회사", "307520", "TIGER 지주회사"), + ] + } + normalized_rows: list[dict[str, Any]] = [] + for row in sector_flow_rows: + if not isinstance(row, dict): + continue + sector = str(row.get("Sector") or "").strip() + if not sector: + continue + source = str(row.get("Universe_Source") or "").strip() or sector_source_map.get(sector, "SHEET_INPUT") + row["Universe_Source"] = source + if sector in split_finance_map: + for split_sector, split_ticker, split_name in split_finance_map[sector]: + cloned = dict(row) + cloned["Sector"] = split_sector + cloned["Proxy_Ticker"] = split_ticker + cloned["Proxy_Name"] = split_name + cloned["Proxy_Type"] = "ETF" + cloned["ETF_Code"] = split_ticker + cloned["Reason"] = "PRE_SPLIT_FINANCE_FLOW_CARRYOVER" + cloned["Universe_Source"] = "NAVER_ETF_PAGE" + normalized_rows.append(cloned) + else: + normalized_rows.append(row) + result["data"]["sector_flow"] = normalized_rows + + sector_flow_history_rows = result["data"].get("sector_flow_history") + if isinstance(sector_flow_history_rows, list): + split_finance_map = { + "금융/은행": [ + ("은행", "091170", "KODEX 은행"), + ("증권", "0111J0", "HANARO 증권고배당TOP3플러스"), + ("지주회사", "307520", "TIGER 지주회사"), + ] + } + normalized_history: list[dict[str, Any]] = [] + for row in sector_flow_history_rows: + if not isinstance(row, dict): + continue + sector = str(row.get("Sector") or "").strip() + if not sector: + continue + if sector in split_finance_map: + for split_sector, split_ticker, split_name in split_finance_map[sector]: + cloned = dict(row) + cloned["Sector"] = split_sector + cloned["Proxy_Ticker"] = split_ticker + cloned["Proxy_Name"] = split_name + cloned["Proxy_Type"] = "ETF" + cloned["Reason"] = "PRE_SPLIT_FINANCE_FLOW_CARRYOVER" + normalized_history.append(cloned) + else: + normalized_history.append(row) + result["data"]["sector_flow_history"] = normalized_history + # harness_context 시트가 없으면 메타에 경고 기록 if "_harness_context" not in result["data"]: result["metadata"]["harness_context_missing"] = ( diff --git a/src/quant_engine/run_formula_golden_cases_v2.py b/src/quant_engine/run_formula_golden_cases_v2.py index f98262f..88c9d0f 100644 --- a/src/quant_engine/run_formula_golden_cases_v2.py +++ b/src/quant_engine/run_formula_golden_cases_v2.py @@ -462,7 +462,8 @@ _TICKER_SECTOR_MAP = { "010120": "AI전력", "267260": "AI전력", "012450": "방산", "064350": "방산", "329180": "조선", "494670": "조선", - "028050": "건설/EPC", + "117700": "건설", "028050": "플랜트/EPC", "454320": "플랜트/EPC", + "0190C0": "로보틱스", "005380": "자동차", "000270": "자동차", "091160": "반도체", "0117V0": "AI전력", } diff --git a/src/quant_engine/sector_universe_refresh.py b/src/quant_engine/sector_universe_refresh.py new file mode 100644 index 0000000..c1a53a4 --- /dev/null +++ b/src/quant_engine/sector_universe_refresh.py @@ -0,0 +1,296 @@ +from __future__ import annotations + +import datetime as dt +from typing import Any + + +DEFAULT_MAX_AGE_DAYS = 31 + + +def _txt(value: Any, default: str = "") -> str: + if value is None: + return default + if isinstance(value, str): + return value.strip() or default + return str(value).strip() or default + + +def _as_float(value: Any) -> float | None: + try: + if value in (None, ""): + return None + if isinstance(value, str): + text = value.strip().replace("%", "").replace(",", "") + if not text: + return None + return float(text) + return float(value) + except Exception: + return None + + +def _parse_date(value: Any) -> dt.date | None: + if value in (None, ""): + return None + if isinstance(value, dt.date): + return value + text = _txt(value) + if not text: + return None + for fmt in ("%Y-%m-%d", "%Y.%m.%d", "%Y/%m/%d"): + try: + return dt.datetime.strptime(text[:10], fmt).date() + except Exception: + pass + try: + return dt.date.fromisoformat(text[:10]) + except Exception: + return None + + +def _age_days(value: Any, today: dt.date | None = None) -> int | None: + parsed = _parse_date(value) + if parsed is None: + return None + today = today or dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date() + return (today - parsed).days + + +def _extract_sector_rows(payload: dict[str, Any] | None) -> list[dict[str, Any]]: + if not isinstance(payload, dict): + return [] + inner = payload.get("data") + if isinstance(inner, dict) and isinstance(inner.get("sector_universe"), list): + return [r for r in inner["sector_universe"] if isinstance(r, dict)] + if isinstance(payload.get("sector_universe"), list): + return [r for r in payload["sector_universe"] if isinstance(r, dict)] + return [] + + +def build_sector_universe_refresh_audit(payload: dict[str, Any] | None) -> dict[str, Any]: + rows = _extract_sector_rows(payload) + today = dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date() + + grouped: dict[str, list[dict[str, Any]]] = {} + for row in rows: + sector = _txt(row.get("Sector")) + if not sector: + continue + grouped.setdefault(sector, []).append(row) + + detail_rows: list[dict[str, Any]] = [] + source_kind_counts = { + "NAVER_ETF_PAGE": 0, + "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED": 0, + "NAVER_ETF_PAGE_FAIL": 0, + "REPRESENTATIVE_STOCK_PROXY": 0, + "SHEET_INPUT": 0, + "DEFAULT_TEMPLATE": 0, + "OTHER": 0, + } + transport_mode_counts = { + "HTML_SERVER_RENDERED": 0, + "MANUAL_OR_TEMPLATE": 0, + "LAYOUT_CHANGED": 0, + "UNKNOWN": 0, + } + state_counts = {"CURRENT": 0, "DUE": 0, "OVERDUE": 0, "MISSING": 0, "TEMPLATE": 0, "INVALID": 0} + stale_sector_count = 0 + layout_changed_count = 0 + missing_source_url_count = 0 + sheet_input_count = 0 + template_count = 0 + newest_asof: dt.date | None = None + oldest_asof: dt.date | None = None + + for sector, sector_rows in grouped.items(): + source_values = {_txt(r.get("Source"), "SHEET_INPUT") or "SHEET_INPUT" for r in sector_rows} + if "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" in source_values: + source_kind = "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" + elif "NAVER_ETF_PAGE_FAIL" in source_values: + source_kind = "NAVER_ETF_PAGE_FAIL" + elif "NAVER_ETF_PAGE" in source_values: + source_kind = "NAVER_ETF_PAGE" + elif "REPRESENTATIVE_STOCK_PROXY" in source_values: + source_kind = "REPRESENTATIVE_STOCK_PROXY" + elif "DEFAULT_TEMPLATE" in source_values: + source_kind = "DEFAULT_TEMPLATE" + elif "SHEET_INPUT" in source_values: + source_kind = "SHEET_INPUT" + else: + source_kind = "OTHER" + source_kind_counts[source_kind if source_kind in source_kind_counts else "OTHER"] += 1 + + source_urls = [_txt(r.get("Source_URL")) for r in sector_rows if _txt(r.get("Source_URL"))] + source_url = source_urls[0] if source_urls else "" + + asof_candidates = [_parse_date(r.get("Source_AsOf")) for r in sector_rows] + asof_dates = [d for d in asof_candidates if d is not None] + source_asof = max(asof_dates) if asof_dates else None + if source_asof is not None: + newest_asof = source_asof if newest_asof is None else max(newest_asof, source_asof) + oldest_asof = source_asof if oldest_asof is None else min(oldest_asof, source_asof) + + age_days = _age_days(source_asof, today) if source_asof else None + constituent_count = len(sector_rows) + etf_count = sum(1 for r in sector_rows if str(r.get("Is_ETF") or "").strip().upper() in {"Y", "YES", "TRUE", "1"}) + stock_count = constituent_count - etf_count + weight_sum = sum(_as_float(r.get("Weight")) or 0 for r in sector_rows) + status = "INVALID" + reason_parts: list[str] = [] + transport_mode = "UNKNOWN" + + if source_kind == "DEFAULT_TEMPLATE": + status = "TEMPLATE" + reason_parts.append("DEFAULT_TEMPLATE") + template_count += 1 + transport_mode = "MANUAL_OR_TEMPLATE" + elif source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED": + status = "LAYOUT_CHANGED" + transport_mode = "LAYOUT_CHANGED" + reason_parts.append("LAYOUT_CHANGED") + layout_changed_count += 1 + if not source_url: + missing_source_url_count += 1 + reason_parts.append("Source_URL_MISSING") + if age_days is None: + reason_parts.append("Source_AsOf_MISSING") + else: + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + elif source_kind == "NAVER_ETF_PAGE_FAIL": + status = "INVALID" + transport_mode = "UNKNOWN" + reason_parts.append("NAVER_ETF_PAGE_FAIL") + if not source_url: + missing_source_url_count += 1 + elif source_kind == "REPRESENTATIVE_STOCK_PROXY": + transport_mode = "HTML_SERVER_RENDERED" + if not source_url: + status = "MISSING" + missing_source_url_count += 1 + reason_parts.append("Source_URL_MISSING") + elif age_days is None: + status = "MISSING" + reason_parts.append("Source_AsOf_MISSING") + elif age_days <= DEFAULT_MAX_AGE_DAYS: + status = "CURRENT" + elif age_days <= 45: + status = "DUE" + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + else: + status = "OVERDUE" + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + elif source_kind == "SHEET_INPUT": + sheet_input_count += 1 + transport_mode = "MANUAL_OR_TEMPLATE" + if not source_url: + status = "MISSING" + reason_parts.append("Source_URL_MISSING") + missing_source_url_count += 1 + elif age_days is None: + status = "MISSING" + reason_parts.append("Source_AsOf_MISSING") + elif age_days <= DEFAULT_MAX_AGE_DAYS: + status = "CURRENT" + elif age_days <= 45: + status = "DUE" + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + else: + status = "OVERDUE" + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + elif source_kind == "NAVER_ETF_PAGE": + transport_mode = "HTML_SERVER_RENDERED" + if not source_url: + status = "MISSING" + reason_parts.append("Source_URL_MISSING") + missing_source_url_count += 1 + elif age_days is None: + status = "MISSING" + reason_parts.append("Source_AsOf_MISSING") + elif age_days <= DEFAULT_MAX_AGE_DAYS: + status = "CURRENT" + elif age_days <= 45: + status = "DUE" + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + else: + status = "OVERDUE" + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + else: + if not source_url: + missing_source_url_count += 1 + status = "INVALID" + reason_parts.append("SOURCE_KIND_UNKNOWN") + transport_mode = "UNKNOWN" + + if source_kind == "NAVER_ETF_PAGE" and not source_url: + reason_parts.append("NAVER_URL_MISSING") + if not source_url: + reason_parts.append("Source_URL_MISSING") + if age_days is not None and age_days < 0: + reason_parts.append("FUTURE_DATE") + + transport_mode_counts[transport_mode] = transport_mode_counts.get(transport_mode, 0) + 1 + refresh_reason = ";".join(reason_parts) if reason_parts else "OK" + detail_rows.append({ + "sector": sector, + "proxy_ticker": _txt(sector_rows[0].get("Proxy_Ticker")), + "proxy_name": _txt(sector_rows[0].get("Proxy_Name")), + "proxy_type": _txt(sector_rows[0].get("Proxy_Type")), + "source_kind": source_kind, + "transport_mode": transport_mode, + "source_url": source_url, + "source_asof": source_asof.isoformat() if source_asof else "", + "age_days": age_days if age_days is not None else "", + "constituent_count": constituent_count, + "stock_count": stock_count, + "etf_count": etf_count, + "weight_sum": round(weight_sum, 4), + "status": status, + "refresh_reason": refresh_reason, + }) + + detail_rows.sort(key=lambda r: (r.get("status") != "CURRENT", r.get("status"), r.get("sector"))) + summary = { + "sector_count": len(grouped), + "current_count": sum(1 for r in detail_rows if r.get("status") == "CURRENT"), + "due_count": sum(1 for r in detail_rows if r.get("status") == "DUE"), + "overdue_count": sum(1 for r in detail_rows if r.get("status") == "OVERDUE"), + "missing_count": sum(1 for r in detail_rows if r.get("status") == "MISSING"), + "template_count": template_count, + "sheet_input_count": sheet_input_count, + "naver_source_count": sum(1 for r in detail_rows if r.get("source_kind") == "NAVER_ETF_PAGE"), + "missing_source_url_count": missing_source_url_count, + "stale_sector_count": stale_sector_count, + "layout_changed_count": layout_changed_count, + "oldest_source_asof": oldest_asof.isoformat() if oldest_asof else "", + "newest_source_asof": newest_asof.isoformat() if newest_asof else "", + "source_kind_counts": source_kind_counts, + "transport_mode_counts": transport_mode_counts, + "ajax_mode": "NO", + "transport_model": "HTML_SERVER_RENDERED", + } + + gate = "PASS" + if template_count > 0 or missing_source_url_count > 0 or stale_sector_count > 0 or layout_changed_count > 0: + gate = "FAIL" + elif sheet_input_count > 0: + gate = "WARN" + + return { + "formula_id": "sector_universe_refresh_audit_v1", + "gate": gate, + "max_age_days": DEFAULT_MAX_AGE_DAYS, + "summary": summary, + "rows": detail_rows, + "source": { + "sector_rows": len(rows), + "grouped_sectors": len(grouped), + }, + } diff --git a/tools/automate_routine.py b/tools/automate_routine.py index 4857d75..eb7ac34 100644 --- a/tools/automate_routine.py +++ b/tools/automate_routine.py @@ -3,6 +3,7 @@ import os import requests import time import subprocess +import argparse from pathlib import Path ROOT = Path(__file__).resolve().parent.parent @@ -10,6 +11,7 @@ CLASPRC_PATH = ROOT / ".clasprc.json" CLASP_PATH = ROOT / ".clasp.json" SPREADSHEET_ID = "1e1TNlLfnT69nvw-I1wU_oBHmEtI2pfbld3e0fFmtrZM" OUTPUT_XLSX = ROOT / "GatherTradingData.xlsx" +LOCAL_OUTPUT_XLSX = ROOT / "outputs" / "sector_insights_enhanced" / "GatherTradingData_sector_insights.xlsx" def get_tokens(): if not CLASPRC_PATH.exists(): @@ -75,20 +77,46 @@ def download_spreadsheet(spreadsheet_id, access_token, output_path): print(f"Successfully downloaded to {output_path}") return True +def validate_monthly_sector_refresh(xlsx_path: Path) -> bool: + cmd = [ + "python", + "tools/validate_sector_universe_monthly_refresh_v1.py", + "--xlsx", + str(xlsx_path), + ] + print(f"Validating monthly sector refresh: {xlsx_path} ...") + res = subprocess.run(cmd, cwd=str(ROOT)) + if res.returncode == 0: + print("Monthly sector refresh validation passed.") + return True + print("Monthly sector refresh validation failed.") + return False + def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--function", default="runDataFeed", help="Primary GAS function to execute before download") + parser.add_argument("--fallback-function", default="run_all", help="Fallback GAS function to execute if primary fails") + args = parser.parse_args() + try: tokens = get_tokens() script_id = get_script_id() access_token = refresh_access_token(tokens) - # Step 1: Execute GAS run_all - if run_gas_function(script_id, access_token, "run_all"): + # Step 1: Execute GAS runDataFeed first, then fallback to run_all if needed. + primary_ok = run_gas_function(script_id, access_token, args.function) + if not primary_ok and args.fallback_function and args.fallback_function != args.function: + print(f"Primary function {args.function} failed; trying fallback {args.fallback_function} ...") + primary_ok = run_gas_function(script_id, access_token, args.fallback_function) + + if primary_ok: print("Waiting a bit for GAS processes to finalize (optional)...") time.sleep(5) # Step 2: Download spreadsheet if download_spreadsheet(SPREADSHEET_ID, access_token, OUTPUT_XLSX): print("\nRoutine Part 1 & 2 complete.") + validate_monthly_sector_refresh(OUTPUT_XLSX) print("Final step: npm run prepare-upload-zip") else: print("\nDownload failed. Please check Drive API scopes.") @@ -98,6 +126,7 @@ def main(): fallback = subprocess.run(["python", "tools/update_workbook_sector_insights.py"], cwd=str(ROOT)) if fallback.returncode == 0: print("Local sector-insight workbook updated.") + validate_monthly_sector_refresh(LOCAL_OUTPUT_XLSX) else: print("Local sector-insight workbook build failed.") diff --git a/tools/update_sector_universe_from_naver.py b/tools/update_sector_universe_from_naver.py new file mode 100644 index 0000000..1a74021 --- /dev/null +++ b/tools/update_sector_universe_from_naver.py @@ -0,0 +1,616 @@ +from __future__ import annotations + +import argparse +import datetime as dt +import json +import re +import shutil +import sys +from collections import OrderedDict +from pathlib import Path +from typing import Any +from urllib.parse import urljoin, urlparse, parse_qs + +import requests +from bs4 import BeautifulSoup +from openpyxl import load_workbook +from openpyxl.styles import Alignment, Font, PatternFill +from openpyxl.utils import get_column_letter + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from src.quant_engine.sector_universe_refresh import build_sector_universe_refresh_audit + +DEFAULT_INPUT_XLSX = ROOT / "GatherTradingData.xlsx" +DEFAULT_OUTPUT_XLSX = ROOT / "outputs" / "sector_universe_refresh" / "GatherTradingData_sector_universe.xlsx" +DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36" +NAVER_BASE = "https://finance.naver.com" +NAVER_ITEM_CODE_RE = re.compile(r"(?:https?:)?//finance\.naver\.com(?P/item/[^\"'\s<>]+code=(?P\d+)[^\"'\s<>]*)", re.I) +NAVER_REL_CODE_RE = re.compile(r"(?P/item/[^\"'\s<>]+code=(?P\d+)[^\"'\s<>]*)", re.I) + +TITLE_FILL = PatternFill("solid", fgColor="1F4E78") +HEADER_FILL = PatternFill("solid", fgColor="1F4E78") +SUBHEADER_FILL = PatternFill("solid", fgColor="D9EAF7") +WHITE_FONT = Font(color="FFFFFF", bold=True) +BOLD_FONT = Font(bold=True) +NOTE_FONT = Font(italic=True, color="666666") + + +def _kst_now() -> dt.datetime: + return dt.datetime.now(dt.timezone(dt.timedelta(hours=9))) + + +def _kst_today() -> str: + return _kst_now().strftime("%Y-%m-%d") + + +def _clean_text(value: Any) -> str: + if value is None: + return "" + return str(value).strip() + + +def _normalize_code(value: Any) -> str: + text = _clean_text(value) + if not text: + return "" + text = text.replace(",", "") + if text.endswith(".0"): + text = text[:-2] + if text.isdigit(): + return text.zfill(6) + if re.fullmatch(r"\d+\.\d+", text): + return str(int(float(text))).zfill(6) + return text + + +def _parse_weight(value: str) -> float | None: + text = _clean_text(value).replace("%", "").replace(",", "") + if not text: + return None + try: + return float(text) + except Exception: + return None + + +def _discover_naver_candidate_urls(soup: BeautifulSoup, proxy_ticker: str) -> list[str]: + candidates: list[str] = [] + seen: set[str] = set() + + def add(url: str) -> None: + url = _clean_text(url) + if not url or url in seen: + return + seen.add(url) + candidates.append(url) + + expected_code = _normalize_code(proxy_ticker) + + for script in soup.find_all("script"): + src = _clean_text(script.get("src")) + if src: + if expected_code and expected_code in src: + if src.startswith("//"): + add(f"https:{src}") + elif src.startswith("/"): + add(urljoin(NAVER_BASE, src)) + else: + add(src) + continue + text = script.get_text(" ", strip=True) or "" + if not text: + continue + for regex in (NAVER_ITEM_CODE_RE, NAVER_REL_CODE_RE): + for match in regex.finditer(text): + code = _normalize_code(match.groupdict().get("code") or "") + if expected_code and code and code != expected_code: + continue + path = match.groupdict().get("path") or "" + if path: + add(urljoin(NAVER_BASE, path)) + + return candidates + + +def _parse_naver_etf_holdings(session: requests.Session, proxy_ticker: str, limit: int) -> dict[str, Any]: + url_candidates = [ + f"{NAVER_BASE}/item/main.naver?code={proxy_ticker}", + f"{NAVER_BASE}/item/coinfo.naver?code={proxy_ticker}&target=cu_more", + ] + + last_message = "" + for url in url_candidates: + response = session.get(url, timeout=20) + response.raise_for_status() + soup = BeautifulSoup(response.text, "html.parser") + discovered = _discover_naver_candidate_urls(soup, proxy_ticker) + for candidate in discovered: + if candidate not in url_candidates: + url_candidates.append(candidate) + section = soup.select_one("div.section.etf_asset") + table = section.select_one("table.tb_type1_a") if section is not None else None + if table is None: + # layout changed or this endpoint does not expose the constituent table + last_message = "ETF constituent table missing; page structure may have changed" + continue + + holdings: list[dict[str, Any]] = [] + for tr in table.select("tbody tr"): + tds = tr.find_all("td") + if len(tds) < 3: + continue + name_link = tr.find("a", href=re.compile(r"code=\d+")) + if name_link is None: + continue + name = _clean_text(name_link.get_text(" ", strip=True)) + href = _clean_text(name_link.get("href")) + m = re.search(r"code=(\d+)", href) + code = _normalize_code(m.group(1) if m else "") + if not code or not name: + continue + weight = _parse_weight(tds[2].get_text(" ", strip=True)) + if weight is None: + continue + holdings.append({ + "Constituent_Code": code, + "Constituent_Name": name, + "Weight": round(weight / 100.0, 6), + "Source": "NAVER_ETF_PAGE", + }) + if len(holdings) >= limit: + break + + if holdings: + return { + "source_url": url, + "source_kind": "NAVER_ETF_PAGE", + "holdings": holdings, + "discovered_urls": discovered, + "message": "", + } + last_message = "no holdings parsed" + + return { + "source_url": url_candidates[0], + "source_kind": "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED", + "holdings": [], + "discovered_urls": [], + "message": last_message or "page structure changed; no expected values were inferred", + } + + +def _extract_sector_seed_rows(ws) -> list[dict[str, Any]]: + headers = [ws.cell(2, c).value for c in range(1, ws.max_column + 1)] + headers = [str(h).strip() if h is not None else "" for h in headers] + idx = {name: i for i, name in enumerate(headers)} + rows: list[dict[str, Any]] = [] + for r in range(3, ws.max_row + 1): + row = {name: ws.cell(r, c + 1).value for c, name in enumerate(headers) if name} + if not any(v not in (None, "") for v in row.values()): + continue + rows.append(row) + return rows + + +def _group_seed_rows(rows: list[dict[str, Any]]) -> OrderedDict[str, dict[str, Any]]: + grouped: OrderedDict[str, dict[str, Any]] = OrderedDict() + for row in rows: + sector = _clean_text(row.get("Sector")) + if not sector: + continue + if sector not in grouped: + grouped[sector] = { + "meta": row, + "rows": [], + } + grouped[sector]["rows"].append(row) + return grouped + + +def _build_refreshed_rows(seed_rows: list[dict[str, Any]], limit: int) -> tuple[list[dict[str, Any]], dict[str, Any]]: + session = requests.Session() + session.headers.update({"User-Agent": DEFAULT_USER_AGENT}) + grouped = _group_seed_rows(seed_rows) + refreshed: list[dict[str, Any]] = [] + sector_stats: list[dict[str, Any]] = [] + today = _kst_today() + + for sector, bundle in grouped.items(): + meta = bundle["meta"] + proxy_ticker = _normalize_code(meta.get("Proxy_Ticker")) + proxy_name = _clean_text(meta.get("Proxy_Name")) + proxy_type = _clean_text(meta.get("Proxy_Type")) or "ETF" + base_ticker = _normalize_code(meta.get("Base_Ticker")) or "069500" + + if sector == "금융/은행": + split_specs = [ + {"sector": "은행", "proxy_ticker": "091170", "proxy_name": "KODEX 은행", "proxy_type": "ETF"}, + {"sector": "증권", "proxy_ticker": "0111J0", "proxy_name": "HANARO 증권고배당TOP3플러스", "proxy_type": "ETF"}, + {"sector": "지주회사", "proxy_ticker": "307520", "proxy_name": "TIGER 지주회사", "proxy_type": "ETF"}, + ] + for spec in split_specs: + split_proxy_ticker = _normalize_code(spec["proxy_ticker"]) + split_proxy_name = _clean_text(spec["proxy_name"]) + split_proxy_type = _clean_text(spec["proxy_type"]) or "ETF" + split_source = "SHEET_INPUT" + split_source_url = "" + split_message = "" + split_source_kind = "SHEET_INPUT" + try: + scraped = _parse_naver_etf_holdings(session, split_proxy_ticker, limit) + split_source_url = scraped.get("source_url", "") + split_source_kind = scraped.get("source_kind", "NAVER_ETF_PAGE_FAIL") + holdings = scraped.get("holdings", []) + split_message = scraped.get("message", "") + if holdings: + split_source = "NAVER_ETF_PAGE" + weight_sum = round(sum(float(h["Weight"]) for h in holdings), 6) + for h in holdings: + refreshed.append({ + "Sector": spec["sector"], + "Proxy_Ticker": split_proxy_ticker, + "Proxy_Name": split_proxy_name, + "Proxy_Type": split_proxy_type, + "Base_Ticker": base_ticker, + "Constituent_Code": h["Constituent_Code"], + "Constituent_Name": h["Constituent_Name"], + "Weight": h["Weight"], + "Is_ETF": "N", + "Enabled": "Y", + "Effective_Date": today, + "Source": split_source, + "Transport_Mode": "HTML_SERVER_RENDERED", + "Source_URL": split_source_url, + "Source_AsOf": today, + "Sector_Check": spec["sector"], + "Weight_Sum_All": weight_sum, + "Weight_Sum_Stocks_Only": weight_sum, + "ETF_Rows": 0, + "Status": "OK", + }) + sector_stats.append({ + "sector": spec["sector"], + "proxy_ticker": split_proxy_ticker, + "proxy_name": split_proxy_name, + "proxy_type": split_proxy_type, + "source_kind": split_source, + "transport_mode": "HTML_SERVER_RENDERED", + "source_url": split_source_url, + "source_asof": today, + "constituent_count": len(holdings), + "weight_sum": weight_sum, + "status": "CURRENT", + "refresh_reason": "NAVER_ETF_PAGE_SPLIT", + }) + continue + except Exception as exc: + split_message = str(exc) + split_source_kind = "NAVER_ETF_PAGE_FAIL" + + # 실패 시는 투명하게 남기고, 섹터 누락은 그대로 드러낸다. + sector_stats.append({ + "sector": spec["sector"], + "proxy_ticker": split_proxy_ticker, + "proxy_name": split_proxy_name, + "proxy_type": split_proxy_type, + "source_kind": split_source_kind, + "transport_mode": "LAYOUT_CHANGED" if split_source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" else "UNKNOWN", + "source_url": split_source_url, + "source_asof": today, + "constituent_count": 0, + "weight_sum": 0.0, + "status": "FAIL" if "FAIL" in split_source_kind else "WARN", + "refresh_reason": split_message or "split_sector_fallback", + }) + continue + + source = "SHEET_INPUT" + source_url = "" + message = "" + source_kind = "SHEET_INPUT" + + if proxy_type != "ETF": + source_kind = "REPRESENTATIVE_STOCK_PROXY" + source = source_kind + source_url = f"{NAVER_BASE}/item/main.naver?code={proxy_ticker}" if proxy_ticker else "" + fallback_rows = bundle["rows"][:limit] if bundle["rows"] else [] + weight_sum = 0.0 + for row in fallback_rows: + weight = row.get("Weight") + try: + weight_sum += float(weight) if weight not in (None, "") else 0.0 + except Exception: + pass + refreshed.append({ + "Sector": sector, + "Proxy_Ticker": proxy_ticker, + "Proxy_Name": proxy_name, + "Proxy_Type": proxy_type, + "Base_Ticker": base_ticker, + "Constituent_Code": _normalize_code(row.get("Constituent_Code")), + "Constituent_Name": _clean_text(row.get("Constituent_Name")), + "Weight": float(row.get("Weight") or 0), + "Is_ETF": _clean_text(row.get("Is_ETF")) or "N", + "Enabled": "Y", + "Effective_Date": today, + "Source": source_kind, + "Transport_Mode": "HTML_SERVER_RENDERED" if source_kind == "REPRESENTATIVE_STOCK_PROXY" else "MANUAL_OR_TEMPLATE", + "Source_URL": source_url, + "Source_AsOf": today, + "Sector_Check": sector, + "Weight_Sum_All": weight_sum, + "Weight_Sum_Stocks_Only": weight_sum, + "ETF_Rows": 0, + "Status": "CURRENT", + }) + sector_stats.append({ + "sector": sector, + "proxy_ticker": proxy_ticker, + "proxy_name": proxy_name, + "proxy_type": proxy_type, + "source_kind": source_kind, + "transport_mode": "HTML_SERVER_RENDERED" if source_kind == "REPRESENTATIVE_STOCK_PROXY" else "MANUAL_OR_TEMPLATE", + "source_url": source_url, + "source_asof": today, + "constituent_count": len(fallback_rows), + "weight_sum": round(weight_sum, 6), + "status": "CURRENT", + "refresh_reason": "REPRESENTATIVE_STOCK_PROXY", + }) + continue + + if proxy_ticker: + try: + scraped = _parse_naver_etf_holdings(session, proxy_ticker, limit) + source_url = scraped.get("source_url", "") + source_kind = scraped.get("source_kind", "NAVER_ETF_PAGE_FAIL") + holdings = scraped.get("holdings", []) + message = scraped.get("message", "") + if holdings: + source = "NAVER_ETF_PAGE" + weight_sum = round(sum(float(h["Weight"]) for h in holdings), 6) + for h in holdings: + refreshed.append({ + "Sector": sector, + "Proxy_Ticker": proxy_ticker, + "Proxy_Name": proxy_name, + "Proxy_Type": proxy_type, + "Base_Ticker": base_ticker, + "Constituent_Code": h["Constituent_Code"], + "Constituent_Name": h["Constituent_Name"], + "Weight": h["Weight"], + "Is_ETF": "N", + "Enabled": "Y", + "Effective_Date": today, + "Source": source, + "Transport_Mode": "HTML_SERVER_RENDERED", + "Source_URL": source_url, + "Source_AsOf": today, + "Sector_Check": sector, + "Weight_Sum_All": weight_sum, + "Weight_Sum_Stocks_Only": weight_sum, + "ETF_Rows": 0, + "Status": "OK", + }) + sector_stats.append({ + "sector": sector, + "proxy_ticker": proxy_ticker, + "proxy_name": proxy_name, + "proxy_type": proxy_type, + "source_kind": source, + "transport_mode": "HTML_SERVER_RENDERED", + "source_url": source_url, + "source_asof": today, + "constituent_count": len(holdings), + "weight_sum": weight_sum, + "status": "CURRENT", + "refresh_reason": "NAVER_ETF_PAGE", + }) + continue + except Exception as exc: + message = str(exc) + source_kind = "NAVER_ETF_PAGE_FAIL" + + # fallback: preserve seed rows but expose the failure transparently + fallback_rows = bundle["rows"][:limit] if bundle["rows"] else [] + weight_sum = 0.0 + for row in fallback_rows: + weight = row.get("Weight") + try: + weight_sum += float(weight) if weight not in (None, "") else 0.0 + except Exception: + pass + refreshed.append({ + "Sector": sector, + "Proxy_Ticker": proxy_ticker, + "Proxy_Name": proxy_name, + "Proxy_Type": proxy_type, + "Base_Ticker": base_ticker, + "Constituent_Code": _normalize_code(row.get("Constituent_Code")), + "Constituent_Name": _clean_text(row.get("Constituent_Name")), + "Weight": float(row.get("Weight") or 0), + "Is_ETF": _clean_text(row.get("Is_ETF")) or "N", + "Enabled": "Y", + "Effective_Date": today, + "Source": source_kind, + "Transport_Mode": "LAYOUT_CHANGED" if source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" else "UNKNOWN", + "Source_URL": source_url, + "Source_AsOf": today, + "Sector_Check": sector, + "Weight_Sum_All": weight_sum, + "Weight_Sum_Stocks_Only": weight_sum, + "ETF_Rows": 0, + "Status": "FAIL" if source_kind.endswith("FAIL") else "WARN", + }) + sector_stats.append({ + "sector": sector, + "proxy_ticker": proxy_ticker, + "proxy_name": proxy_name, + "proxy_type": proxy_type, + "source_kind": source_kind, + "transport_mode": "LAYOUT_CHANGED" if source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" else "UNKNOWN", + "source_url": source_url, + "source_asof": today, + "constituent_count": len(fallback_rows), + "weight_sum": round(weight_sum, 6), + "status": "FAIL" if "FAIL" in source_kind else "WARN", + "refresh_reason": message or "seed_fallback", + }) + + audit_payload = build_sector_universe_refresh_audit({"data": {"sector_universe": refreshed}}) + return refreshed, { + "sector_universe_refresh_audit": audit_payload, + "sector_stats": sector_stats, + } + + +def _style_title(ws, title: str, subtitle: str) -> None: + ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=max(8, ws.max_column or 8)) + ws["A1"] = title + ws["A1"].font = WHITE_FONT + ws["A1"].fill = TITLE_FILL + ws["A1"].alignment = Alignment(horizontal="left") + ws.merge_cells(start_row=2, start_column=1, end_row=2, end_column=max(8, ws.max_column or 8)) + ws["A2"] = subtitle + ws["A2"].font = NOTE_FONT + + +def _write_table(ws, start_row: int, start_col: int, headers: list[str], rows: list[list[Any]]) -> int: + for i, header in enumerate(headers, start=start_col): + cell = ws.cell(start_row, i) + cell.value = header + cell.font = WHITE_FONT + cell.fill = HEADER_FILL + cell.alignment = Alignment(horizontal="center") + for r_idx, row in enumerate(rows, start=start_row + 1): + for c_idx, value in enumerate(row, start=start_col): + ws.cell(r_idx, c_idx).value = value + return start_row + len(rows) + + +def _write_sector_universe_sheet(wb, rows: list[dict[str, Any]]) -> None: + if "sector_universe" in wb.sheetnames: + del wb["sector_universe"] + ws = wb.create_sheet("sector_universe") + headers = [ + "Sector", "Proxy_Ticker", "Proxy_Name", "Proxy_Type", "Base_Ticker", + "Constituent_Code", "Constituent_Name", "Weight", "Is_ETF", "Enabled", + "Effective_Date", "Source", "Transport_Mode", "Source_URL", "Source_AsOf", "Sector_Check", + "Weight_Sum_All", "Weight_Sum_Stocks_Only", "ETF_Rows", "Status", + ] + now = _kst_now().strftime("%Y-%m-%d %H:%M:%S") + ws["A1"] = f"updated: {now} KST" + ws["A1"].font = Font(bold=True) + _write_table(ws, 2, 1, headers, [[r.get(h, "") for h in headers] for r in rows]) + for col_idx, header in enumerate(headers, start=1): + if header in {"Proxy_Ticker", "Base_Ticker", "Constituent_Code"}: + for r in range(3, ws.max_row + 1): + ws.cell(r, col_idx).number_format = "@" + if header in {"Weight", "Weight_Sum_All", "Weight_Sum_Stocks_Only"}: + for r in range(3, ws.max_row + 1): + ws.cell(r, col_idx).number_format = "0.0000" + width = 16 + if header in {"Constituent_Name", "Proxy_Name"}: + width = 22 + elif header in {"Source_URL"}: + width = 42 + elif header in {"Status", "Source", "Sector_Check", "Proxy_Type", "Transport_Mode"}: + width = 16 + ws.column_dimensions[get_column_letter(col_idx)].width = width + ws.freeze_panes = "A3" + ws.sheet_view.showGridLines = False + + +def _write_audit_sheet(wb, audit_payload: dict[str, Any]) -> None: + audit = audit_payload["sector_universe_refresh_audit"] + if "sector_universe_refresh_audit" in wb.sheetnames: + del wb["sector_universe_refresh_audit"] + ws = wb.create_sheet("sector_universe_refresh_audit") + ws.sheet_view.showGridLines = False + _style_title( + ws, + "섹터 월간 갱신 감사", + "Naver ETF 페이지 기반 월간 갱신 상태와 provenance 분리 현황을 점검한다.", + ) + summary = audit.get("summary", {}) + summary_rows = [ + ["formula_id", audit.get("formula_id", "")], + ["gate", audit.get("gate", "")], + ["sector_count", summary.get("sector_count", 0)], + ["current_count", summary.get("current_count", 0)], + ["due_count", summary.get("due_count", 0)], + ["overdue_count", summary.get("overdue_count", 0)], + ["missing_count", summary.get("missing_count", 0)], + ["template_count", summary.get("template_count", 0)], + ["sheet_input_count", summary.get("sheet_input_count", 0)], + ["naver_source_count", summary.get("naver_source_count", 0)], + ["missing_source_url_count", summary.get("missing_source_url_count", 0)], + ["stale_sector_count", summary.get("stale_sector_count", 0)], + ["oldest_source_asof", summary.get("oldest_source_asof", "")], + ["newest_source_asof", summary.get("newest_source_asof", "")], + ] + _write_table(ws, 4, 1, ["key", "value"], summary_rows) + rows = audit.get("rows", []) or [] + if rows: + headers = [ + "sector", "proxy_ticker", "proxy_name", "proxy_type", "source_kind", + "source_url", "source_asof", "age_days", "constituent_count", + "stock_count", "etf_count", "weight_sum", "status", "refresh_reason", + ] + _write_table(ws, 4, 4, headers, [[r.get(h, "") for h in headers] for r in rows]) + for idx, header in enumerate(headers, start=4): + width = 16 + if header in {"sector", "proxy_name", "refresh_reason"}: + width = 20 + elif header == "source_url": + width = 42 + ws.column_dimensions[get_column_letter(idx)].width = width + ws.freeze_panes = "A5" + + +def main() -> int: + ap = argparse.ArgumentParser() + ap.add_argument("--input", default=str(DEFAULT_INPUT_XLSX)) + ap.add_argument("--output", default=str(DEFAULT_OUTPUT_XLSX)) + ap.add_argument("--limit", type=int, default=10, help="Per-sector holdings limit from Naver ETF pages") + ap.add_argument("--apply", action="store_true", help="Overwrite the input workbook in place as well") + args = ap.parse_args() + + input_path = Path(args.input) + output_path = Path(args.output) + if not input_path.exists(): + raise FileNotFoundError(input_path) + + wb = load_workbook(input_path) + if "sector_universe" not in wb.sheetnames: + raise RuntimeError("sector_universe sheet not found") + seed_ws = wb["sector_universe"] + seed_rows = _extract_sector_seed_rows(seed_ws) + refreshed_rows, audit_payload = _build_refreshed_rows(seed_rows, max(1, args.limit)) + + _write_sector_universe_sheet(wb, refreshed_rows) + _write_audit_sheet(wb, audit_payload) + + output_path.parent.mkdir(parents=True, exist_ok=True) + wb.save(output_path) + if args.apply and input_path.resolve() != output_path.resolve(): + shutil.copy2(output_path, input_path) + + print(json.dumps({ + "status": "OK", + "input": str(input_path), + "output": str(output_path), + "rows": len(refreshed_rows), + "sectors": len(audit_payload["sector_stats"]), + "current_count": audit_payload["sector_universe_refresh_audit"]["summary"]["current_count"], + "overdue_count": audit_payload["sector_universe_refresh_audit"]["summary"]["overdue_count"], + "template_count": audit_payload["sector_universe_refresh_audit"]["summary"]["template_count"], + }, ensure_ascii=False, indent=2)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/validate_sector_universe_monthly_refresh_v1.py b/tools/validate_sector_universe_monthly_refresh_v1.py new file mode 100644 index 0000000..52fa3a8 --- /dev/null +++ b/tools/validate_sector_universe_monthly_refresh_v1.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import datetime as dt +import json +import sys +from pathlib import Path +from typing import Any + +from openpyxl import load_workbook + + +ROOT = Path(__file__).resolve().parents[1] +DEFAULT_XLSX = ROOT / "GatherTradingData.xlsx" +MAX_AGE_DAYS = 31 + + +def _txt(value: Any, default: str = "") -> str: + if value is None: + return default + if isinstance(value, str): + return value.strip() or default + return str(value).strip() or default + + +def _parse_date(value: Any) -> dt.date | None: + text = _txt(value) + if not text: + return None + for fmt in ("%Y-%m-%d", "%Y.%m.%d", "%Y/%m/%d"): + try: + return dt.datetime.strptime(text[:10], fmt).date() + except Exception: + pass + try: + return dt.date.fromisoformat(text[:10]) + except Exception: + return None + + +def _age_days(value: Any) -> int | None: + parsed = _parse_date(value) + if parsed is None: + return None + today = dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date() + return (today - parsed).days + + +def main() -> int: + ap = argparse.ArgumentParser() + ap.add_argument("--xlsx", default=str(DEFAULT_XLSX)) + args = ap.parse_args() + + xlsx = Path(args.xlsx) + if not xlsx.exists(): + print(f"[오류] 워크북 없음: {xlsx}") + return 1 + + wb = load_workbook(xlsx, data_only=True) + if "sector_universe" not in wb.sheetnames: + print("[FAIL] sector_universe 시트가 없습니다.") + return 1 + + ws = wb["sector_universe"] + headers = [_txt(ws.cell(2, c).value) for c in range(1, ws.max_column + 1)] + idx = {name: i for i, name in enumerate(headers) if name} + required = ["Sector", "Proxy_Ticker", "Constituent_Code", "Weight", "Source", "Source_URL", "Source_AsOf"] + missing_headers = [h for h in required if h not in idx] + + rows: list[dict[str, Any]] = [] + for r in range(3, ws.max_row + 1): + row = {h: ws.cell(r, c + 1).value for c, h in enumerate(headers) if h} + if not any(v not in (None, "") for v in row.values()): + continue + rows.append(row) + + sector_map: dict[str, list[dict[str, Any]]] = {} + for row in rows: + sector = _txt(row.get("Sector")) + if sector: + sector_map.setdefault(sector, []).append(row) + + template_rows = 0 + representative_rows = 0 + sheet_input_rows = 0 + naver_rows = 0 + layout_changed_rows = 0 + fail_rows = 0 + missing_source_url = 0 + stale_rows = 0 + mixed_sector_count = 0 + sector_status_rows: list[str] = [] + + for sector, sector_rows in sector_map.items(): + source_kinds = {_txt(r.get("Source"), "SHEET_INPUT") or "SHEET_INPUT" for r in sector_rows} + if len(source_kinds) > 1: + mixed_sector_count += 1 + sector_status_rows.append(f"{sector}:MIXED({','.join(sorted(source_kinds))})") + + sector_template = any(src == "DEFAULT_TEMPLATE" for src in source_kinds) + sector_rep = any(src == "REPRESENTATIVE_STOCK_PROXY" for src in source_kinds) + sector_input = any(src == "SHEET_INPUT" for src in source_kinds) + sector_naver = any(src == "NAVER_ETF_PAGE" for src in source_kinds) + sector_layout_changed = any(src == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" for src in source_kinds) + sector_fail = any("FAIL" in src for src in source_kinds) + + if sector_template: + template_rows += len(sector_rows) + if sector_rep: + representative_rows += len(sector_rows) + if sector_input: + sheet_input_rows += len(sector_rows) + if sector_naver: + naver_rows += len(sector_rows) + if sector_layout_changed: + layout_changed_rows += len(sector_rows) + if sector_fail: + fail_rows += len(sector_rows) + + source_urls = {_txt(r.get("Source_URL")) for r in sector_rows if _txt(r.get("Source_URL"))} + if not source_urls: + missing_source_url += len(sector_rows) + + ages = [_age_days(r.get("Source_AsOf")) for r in sector_rows] + age_vals = [a for a in ages if a is not None] + if age_vals and max(age_vals) > MAX_AGE_DAYS: + stale_rows += sum(1 for a in age_vals if a is not None and a > MAX_AGE_DAYS) + sector_status_rows.append(f"{sector}:STALE(max={max(age_vals)})") + + gate = "PASS" + if missing_headers: + gate = "FAIL" + elif template_rows > 0 or fail_rows > 0 or stale_rows > 0 or mixed_sector_count > 0: + gate = "FAIL" + elif sheet_input_rows > 0: + gate = "WARN" + + print(f"[sector_universe_refresh] gate={gate}") + print(f" rows={len(rows)} sectors={len(sector_map)}") + print(f" naver_rows={naver_rows} representative_rows={representative_rows} layout_changed_rows={layout_changed_rows} sheet_input_rows={sheet_input_rows} template_rows={template_rows} fail_rows={fail_rows}") + print(f" missing_source_url={missing_source_url} stale_rows={stale_rows} mixed_sector_count={mixed_sector_count}") + if missing_headers: + print(f" missing_headers={missing_headers}") + if sector_status_rows: + print(" sector_flags=" + ", ".join(sector_status_rows[:20])) + + result = { + "validator": "validate_sector_universe_monthly_refresh_v1", + "gate": gate, + "total_rows": len(rows), + "sector_count": len(sector_map), + "naver_rows": naver_rows, + "representative_rows": representative_rows, + "layout_changed_rows": layout_changed_rows, + "sheet_input_rows": sheet_input_rows, + "template_rows": template_rows, + "fail_rows": fail_rows, + "missing_source_url": missing_source_url, + "stale_rows": stale_rows, + "mixed_sector_count": mixed_sector_count, + "missing_headers": missing_headers, + "sector_flags": sector_status_rows, + "max_age_days": MAX_AGE_DAYS, + } + out = ROOT / "Temp" / "sector_universe_refresh_validation.json" + out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") + print(f"OUTPUT: {out}") + return 0 if gate in {"PASS", "WARN"} else 1 + + +if __name__ == "__main__": + sys.exit(main())