diff --git a/runtime/refactor_baseline_v1.yaml b/runtime/refactor_baseline_v1.yaml index d7fcb63..ed753cb 100644 --- a/runtime/refactor_baseline_v1.yaml +++ b/runtime/refactor_baseline_v1.yaml @@ -15,5 +15,5 @@ "keep package scripts within release envelope" ] }, - "source_zip_sha256": "49f64b3773ba3c19fa8323d0b08833928c637935483039579bb8ab22a391f70c" + "source_zip_sha256": "4de4a7b1217ef5d5375b3b1ea1209f738719e79c4c3c0954e9e96a9dc0d8607e" } \ No newline at end of file diff --git a/spec/risk/portfolio_exposure.yaml b/spec/risk/portfolio_exposure.yaml index 8267acb..bcb9934 100644 --- a/spec/risk/portfolio_exposure.yaml +++ b/spec/risk/portfolio_exposure.yaml @@ -13,7 +13,7 @@ portfolio_exposure_framework: exposure_layers: direct_core_leaders: ["삼성전자", "SK하이닉스"] duplicate_beta: ["KODEX 반도체", "동일 섹터 ETF"] - tactical_satellites: ["방산", "조선", "전력기기", "건설/EPC", "기타 고베타"] + tactical_satellites: ["방산", "조선", "전력설비", "건설", "플랜트/EPC", "로보틱스", "기타 고베타"] cash: ["현금", "MMF", "RP", "단기채 ETF"] valid_trim_reasons: - "벤치마크 대비 초과비중이 허용밴드를 초과하고 가격 추세가 훼손됨" diff --git a/src/gas/core/gas_lib.gs b/src/gas/core/gas_lib.gs index 8516f95..5423187 100644 --- a/src/gas/core/gas_lib.gs +++ b/src/gas/core/gas_lib.gs @@ -1,5 +1,5 @@ // gas_lib.gs - Common utilities & static features -// Last Updated: 2026-06-14 20:48:30 KST +// Last Updated: 2026-06-15 02:20:50 KST // Math/KRX utils, sheet I/O, sector flow, Web API, static runners // GAS global scope: functions in gas_data_feed.gs / gas_data_collect.gs callable directly // @@ -593,7 +593,14 @@ const DEFAULT_SECTOR_UNIVERSE_V2 = [ { code: "062040", name: "산일전기", weight: 0.10 }, { code: "298040", name: "효성중공업", weight: 0.10 }, ]}, - { sector: "방산", proxyTicker: "012450", proxyName: "한화에어로스페이스", proxyType: "대표주", baseTicker: "069500", constituents: [ + { sector: "전력설비", proxyTicker: "491820", proxyName: "HANARO 전력설비투자", proxyType: "ETF", baseTicker: "069500", constituents: [ + { code: "010120", name: "LS ELECTRIC", weight: 0.28 }, + { code: "267260", name: "HD현대일렉트릭", weight: 0.28 }, + { code: "298040", name: "효성중공업", weight: 0.18 }, + { code: "006260", name: "LS", weight: 0.14 }, + { code: "099440", name: "두산에너빌리티", weight: 0.12 }, + ]}, + { sector: "방산", proxyTicker: "463250", proxyName: "TIGER K방산&우주", proxyType: "ETF", baseTicker: "069500", constituents: [ { code: "012450", name: "한화에어로스페이스", weight: 0.45 }, { code: "079550", name: "LIG넥스원", weight: 0.25 }, { code: "047810", name: "한국항공우주", weight: 0.15 }, @@ -605,23 +612,49 @@ const DEFAULT_SECTOR_UNIVERSE_V2 = [ { code: "009540", name: "HD한국조선해양", weight: 0.20 }, { code: "494670", name: "TIGER 조선TOP10", weight: 0.15, isEtf: true }, ]}, - { sector: "건설/EPC", proxyTicker: "028050", proxyName: "삼성E&A", proxyType: "대표주", baseTicker: "069500", constituents: [ - { code: "028050", name: "삼성E&A", weight: 0.40 }, - { code: "000720", name: "현대건설", weight: 0.30 }, - { code: "006360", name: "GS건설", weight: 0.20 }, - { code: "047040", name: "대우건설", weight: 0.10 }, + { sector: "건설", proxyTicker: "117700", proxyName: "KODEX 건설", proxyType: "ETF", baseTicker: "069500", constituents: [ + { code: "000720", name: "현대건설", weight: 0.35 }, + { code: "006360", name: "GS건설", weight: 0.25 }, + { code: "047040", name: "대우건설", weight: 0.20 }, + { code: "294870", name: "HDC현대산업개발", weight: 0.20 }, + ]}, + { sector: "플랜트/EPC", proxyTicker: "454320", proxyName: "HANARO CAPEX설비투자iSelect", proxyType: "ETF", baseTicker: "069500", constituents: [ + { code: "028050", name: "삼성E&A", weight: 0.35 }, + { code: "010120", name: "LS ELECTRIC", weight: 0.20 }, + { code: "267260", name: "HD현대일렉트릭", weight: 0.20 }, + { code: "298040", name: "효성중공업", weight: 0.15 }, + { code: "099440", name: "두산에너빌리티", weight: 0.10 }, ]}, { sector: "자동차", proxyTicker: "091180", proxyName: "TIGER 자동차", proxyType: "ETF", baseTicker: "069500", constituents: [ { code: "005380", name: "현대차", weight: 0.45 }, { code: "000270", name: "기아", weight: 0.40 }, { code: "012330", name: "현대모비스", weight: 0.15 }, ]}, - { sector: "금융/은행", proxyTicker: "091170", proxyName: "KODEX 은행", proxyType: "ETF", baseTicker: "069500", constituents: [ + { sector: "은행", proxyTicker: "091170", proxyName: "KODEX 은행", proxyType: "ETF", baseTicker: "069500", constituents: [ { code: "105560", name: "KB금융", weight: 0.30 }, { code: "055550", name: "신한지주", weight: 0.30 }, { code: "086790", name: "하나금융지주", weight: 0.20 }, { code: "316140", name: "우리금융지주", weight: 0.10 }, - { code: "003540", name: "대신증권", weight: 0.10 }, + { code: "024110", name: "기업은행", weight: 0.10 }, + ]}, + { sector: "증권", proxyTicker: "0111J0", proxyName: "HANARO 증권고배당TOP3플러스", proxyType: "ETF", baseTicker: "069500", constituents: [ + { code: "071050", name: "한국금융지주", weight: 0.2135 }, + { code: "006800", name: "미래에셋증권", weight: 0.1934 }, + { code: "005940", name: "NH투자증권", weight: 0.1911 }, + { code: "016360", name: "삼성증권", weight: 0.1434 }, + { code: "039490", name: "키움증권", weight: 0.1373 }, + ]}, + { sector: "지주회사", proxyTicker: "307520", proxyName: "TIGER 지주회사", proxyType: "ETF", baseTicker: "069500", constituents: [ + { code: "180640", name: "한진칼", weight: 0.1535 }, + { code: "267250", name: "HD현대", weight: 0.0943 }, + { code: "034730", name: "SK", weight: 0.0884 }, + { code: "000150", name: "두산", weight: 0.0878 }, + { code: "005490", name: "POSCO홀딩스", weight: 0.0763 }, + { code: "003550", name: "LG", weight: 0.0752 }, + { code: "006260", name: "LS", weight: 0.0705 }, + { code: "078930", name: "GS", weight: 0.0498 }, + { code: "001040", name: "CJ", weight: 0.0477 }, + { code: "010060", name: "OCI홀딩스", weight: 0.0240 }, ]}, { sector: "2차전지", proxyTicker: "305720", proxyName: "KODEX 2차전지산업", proxyType: "ETF", baseTicker: "069500", constituents: [ { code: "373220", name: "LG에너지솔루션", weight: 0.40 }, @@ -635,12 +668,29 @@ const DEFAULT_SECTOR_UNIVERSE_V2 = [ { code: "128940", name: "한미약품", weight: 0.15 }, { code: "000100", name: "유한양행", weight: 0.10 }, ]}, - { sector: "원전", proxyTicker: "099440", proxyName: "두산에너빌리티", proxyType: "대표주", baseTicker: "069500", constituents: [ + { sector: "원전", proxyTicker: "434730", proxyName: "HANARO 원자력iSelect", proxyType: "ETF", baseTicker: "069500", constituents: [ { code: "099440", name: "두산에너빌리티", weight: 0.45 }, { code: "023450", name: "한전기술", weight: 0.25 }, { code: "015760", name: "한국전력", weight: 0.20 }, { code: "071320", name: "지역난방공사", weight: 0.10 }, ]}, + { sector: "로보틱스", proxyTicker: "0190C0", proxyName: "RISE 현대차고정피지컬AI", proxyType: "ETF", baseTicker: "069500", constituents: [ + { code: "005380", name: "현대차", weight: 0.2402 }, + { code: "012330", name: "현대모비스", weight: 0.1588 }, + { code: "011070", name: "LG이노텍", weight: 0.1450 }, + { code: "000270", name: "기아", weight: 0.1234 }, + { code: "307950", name: "현대오토에버", weight: 0.0899 }, + { code: "277810", name: "레인보우로보틱스", weight: 0.0673 }, + { code: "064400", name: "LG씨엔에스", weight: 0.0519 }, + { code: "454910", name: "두산로보틱스", weight: 0.0367 }, + { code: "108490", name: "로보티즈", weight: 0.0240 }, + { code: "058610", name: "에스피지", weight: 0.0173 }, + { code: "010620", name: "현대미포", weight: 0.0135 }, + { code: "009540", name: "HD한국조선해양", weight: 0.0135 }, + { code: "011210", name: "현대위아", weight: 0.0109 }, + { code: "121600", name: "나노신소재", weight: 0.0040 }, + { code: "028050", name: "삼성E&A", weight: 0.0034 }, + ]}, { sector: "소비재", proxyTicker: "139220", proxyName: "TIGER 생활소비재", proxyType: "ETF", baseTicker: "069500", constituents: [ { code: "028260", name: "삼성물산", weight: 0.35 }, { code: "097950", name: "CJ제일제당", weight: 0.25 }, @@ -663,6 +713,7 @@ function normalizeSectorName_(sector) { if (s === "바이오/헬스케어") return "바이오"; if (s === "원전/에너지") return "원전"; if (s === "소비재/유통") return "소비재"; + if (s === "건설/EPC") return "플랜트/EPC"; return s; } @@ -679,17 +730,52 @@ function readSectorUniverse_() { const sheet = ss.getSheetByName("sector_universe"); if (!sheet) { writeDefaultSectorUniverseSheet_(); - return DEFAULT_SECTOR_UNIVERSE_V2; + return DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({ + ...sector, + source: sector.source || "DEFAULT_TEMPLATE", + sourceUrl: sector.sourceUrl || "", + sourceAsOf: sector.sourceAsOf || "", + constituents: sector.constituents.map(c => ({ + ...c, + source: c.source || sector.source || "DEFAULT_TEMPLATE", + sourceUrl: c.sourceUrl || sector.sourceUrl || "", + sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "", + })), + })); } const data = sheet.getDataRange().getValues(); if (data.length < 3) { writeDefaultSectorUniverseSheet_(); - return DEFAULT_SECTOR_UNIVERSE_V2; + return DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({ + ...sector, + source: sector.source || "DEFAULT_TEMPLATE", + sourceUrl: sector.sourceUrl || "", + sourceAsOf: sector.sourceAsOf || "", + constituents: sector.constituents.map(c => ({ + ...c, + source: c.source || sector.source || "DEFAULT_TEMPLATE", + sourceUrl: c.sourceUrl || sector.sourceUrl || "", + sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "", + })), + })); } const hdr = data[1].map(h => String(h).trim()); const idx = name => hdr.indexOf(name); const required = ["Sector","Proxy_Ticker","Constituent_Code","Weight"]; - if (required.some(h => idx(h) < 0)) return DEFAULT_SECTOR_UNIVERSE_V2; + if (required.some(h => idx(h) < 0)) { + return DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({ + ...sector, + source: sector.source || "DEFAULT_TEMPLATE", + sourceUrl: sector.sourceUrl || "", + sourceAsOf: sector.sourceAsOf || "", + constituents: sector.constituents.map(c => ({ + ...c, + source: c.source || sector.source || "DEFAULT_TEMPLATE", + sourceUrl: c.sourceUrl || sector.sourceUrl || "", + sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "", + })), + })); + } const map = {}; for (let i = 2; i < data.length; i++) { @@ -706,6 +792,9 @@ function readSectorUniverse_() { proxyName: idx("Proxy_Name") >= 0 ? String(data[i][idx("Proxy_Name")] ?? "").trim() : "", proxyType: idx("Proxy_Type") >= 0 ? String(data[i][idx("Proxy_Type")] ?? "").trim() : "", baseTicker: idx("Base_Ticker") >= 0 ? normalizeTickerCode(data[i][idx("Base_Ticker")]) : "069500", + source: idx("Source") >= 0 ? String(data[i][idx("Source")] ?? "").trim() : "SHEET_INPUT", + sourceUrl: idx("Source_URL") >= 0 ? String(data[i][idx("Source_URL")] ?? "").trim() : "", + sourceAsOf: idx("Source_AsOf") >= 0 ? String(data[i][idx("Source_AsOf")] ?? "").trim() : "", constituents: [], }; } @@ -714,16 +803,59 @@ function readSectorUniverse_() { name: idx("Constituent_Name") >= 0 ? String(data[i][idx("Constituent_Name")] ?? "").trim() : "", weight, isEtf: idx("Is_ETF") >= 0 ? boolFromSheet_(data[i][idx("Is_ETF")], false) : false, + source: idx("Source") >= 0 ? String(data[i][idx("Source")] ?? "").trim() : "SHEET_INPUT", + transportMode: idx("Transport_Mode") >= 0 ? String(data[i][idx("Transport_Mode")] ?? "").trim() : "", + sourceUrl: idx("Source_URL") >= 0 ? String(data[i][idx("Source_URL")] ?? "").trim() : "", + sourceAsOf: idx("Source_AsOf") >= 0 ? String(data[i][idx("Source_AsOf")] ?? "").trim() : "", }); } const sectors = Object.values(map).filter(s => s.proxyTicker && s.constituents.length > 0); - return sectors.length ? sectors : DEFAULT_SECTOR_UNIVERSE_V2; + const sectorSet = new Set(sectors.map(s => s.sector)); + for (const fallback of DEFAULT_SECTOR_UNIVERSE_V2) { + if (!fallback || !fallback.sector || sectorSet.has(fallback.sector)) continue; + sectors.push({ + sector: fallback.sector, + proxyTicker: fallback.proxyTicker, + proxyName: fallback.proxyName, + proxyType: fallback.proxyType, + baseTicker: fallback.baseTicker || "069500", + source: fallback.source || "DEFAULT_TEMPLATE", + transportMode: fallback.transportMode || ((fallback.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (fallback.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"), + sourceUrl: fallback.sourceUrl || "", + sourceAsOf: fallback.sourceAsOf || "", + constituents: fallback.constituents.map(c => ({ + code: c.code, + name: c.name || "", + weight: c.weight, + isEtf: Boolean(c.isEtf), + source: c.source || fallback.source || "DEFAULT_TEMPLATE", + transportMode: c.transportMode || ((c.source || fallback.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (c.source || fallback.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"), + sourceUrl: c.sourceUrl || fallback.sourceUrl || "", + sourceAsOf: c.sourceAsOf || fallback.sourceAsOf || "", + })), + }); + } + return sectors.length ? sectors : DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({ + ...sector, + source: sector.source || "DEFAULT_TEMPLATE", + transportMode: sector.transportMode || ((sector.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (sector.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"), + sourceUrl: sector.sourceUrl || "", + sourceAsOf: sector.sourceAsOf || "", + constituents: sector.constituents.map(c => ({ + ...c, + source: c.source || sector.source || "DEFAULT_TEMPLATE", + transportMode: c.transportMode || ((c.source || sector.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (c.source || sector.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"), + sourceUrl: c.sourceUrl || sector.sourceUrl || "", + sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "", + })), + })); } function writeDefaultSectorUniverseSheet_() { const headers = [ "Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Base_Ticker", - "Constituent_Code","Constituent_Name","Weight","Is_ETF","Enabled","Effective_Date","Source" + "Constituent_Code","Constituent_Name","Weight","Is_ETF","Enabled","Effective_Date","Source","Transport_Mode", + "Source_URL","Source_AsOf" ]; const today = Utilities.formatDate(new Date(), "Asia/Seoul", "yyyy-MM-dd"); const rows = []; @@ -741,7 +873,10 @@ function writeDefaultSectorUniverseSheet_() { c.isEtf ? "Y" : "N", "Y", today, - "sector_universe(DEFAULT_SECTOR_UNIVERSE_V2)", + sector.source || c.source || "DEFAULT_TEMPLATE", + sector.transportMode || c.transportMode || (((sector.source || c.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (sector.source || c.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY") ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"), + sector.sourceUrl || c.sourceUrl || "", + sector.sourceAsOf || c.sourceAsOf || "", ]); } } @@ -762,6 +897,228 @@ function sectorUseMode_(quality) { return "INVALID"; } +function parseDateOnly_(value) { + const text = String(value ?? "").trim(); + if (!text) return null; + const norm = text.replace(/\./g, "-").slice(0, 10); + if (!/^\d{4}-\d{2}-\d{2}$/.test(norm)) return null; + const parsed = new Date(norm + "T00:00:00+09:00"); + return Number.isNaN(parsed.getTime()) ? null : parsed; +} + +function calcSectorUniverseRefreshAudit_(universe) { + const today = new Date(); + const rows = []; + const sourceKindCounts = { NAVER_ETF_PAGE: 0, NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED: 0, NAVER_ETF_PAGE_FAIL: 0, REPRESENTATIVE_STOCK_PROXY: 0, SHEET_INPUT: 0, DEFAULT_TEMPLATE: 0, OTHER: 0 }; + const transportModeCounts = { HTML_SERVER_RENDERED: 0, MANUAL_OR_TEMPLATE: 0, LAYOUT_CHANGED: 0, UNKNOWN: 0 }; + let currentCount = 0; + let dueCount = 0; + let overdueCount = 0; + let missingCount = 0; + let templateCount = 0; + let sheetInputCount = 0; + let naverSourceCount = 0; + let layoutChangedCount = 0; + let missingSourceUrlCount = 0; + let staleSectorCount = 0; + let oldestSourceAsOf = null; + let newestSourceAsOf = null; + + for (const sector of universe || []) { + const sectorRows = Array.isArray(sector?.constituents) ? sector.constituents : []; + const sourceKind = String(sector?.source || "SHEET_INPUT").trim() || "SHEET_INPUT"; + if (Object.prototype.hasOwnProperty.call(sourceKindCounts, sourceKind)) { + sourceKindCounts[sourceKind] += 1; + } else { + sourceKindCounts.OTHER += 1; + } + const transportMode = String(sector?.transportMode || "").trim() || + (sourceKind === "NAVER_ETF_PAGE" || sourceKind === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : + sourceKind === "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" ? "LAYOUT_CHANGED" : + (sourceKind === "DEFAULT_TEMPLATE" || sourceKind === "SHEET_INPUT" ? "MANUAL_OR_TEMPLATE" : "UNKNOWN")); + if (Object.prototype.hasOwnProperty.call(transportModeCounts, transportMode)) { + transportModeCounts[transportMode] += 1; + } else { + transportModeCounts.UNKNOWN += 1; + } + + const sourceUrl = String(sector?.sourceUrl || "").trim(); + const sourceAsOf = String(sector?.sourceAsOf || "").trim(); + const parsed = parseDateOnly_(sourceAsOf); + const ageDays = parsed ? Math.floor((today.getTime() - parsed.getTime()) / 86400000) : null; + if (parsed) { + oldestSourceAsOf = oldestSourceAsOf && oldestSourceAsOf < parsed ? oldestSourceAsOf : parsed; + newestSourceAsOf = newestSourceAsOf && newestSourceAsOf > parsed ? newestSourceAsOf : parsed; + } + + let status = "INVALID"; + const reasons = []; + if (sourceKind === "DEFAULT_TEMPLATE") { + status = "TEMPLATE"; + templateCount += 1; + reasons.push("DEFAULT_TEMPLATE"); + } else if (sourceKind === "REPRESENTATIVE_STOCK_PROXY") { + if (!sourceUrl) { + status = "MISSING"; + missingCount += 1; + missingSourceUrlCount += 1; + reasons.push("Source_URL_MISSING"); + } else if (ageDays === null) { + status = "MISSING"; + missingCount += 1; + reasons.push("Source_AsOf_MISSING"); + } else if (ageDays <= 31) { + status = "CURRENT"; + currentCount += 1; + } else if (ageDays <= 45) { + status = "DUE"; + dueCount += 1; + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } else { + status = "OVERDUE"; + overdueCount += 1; + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } + } else if (sourceKind === "SHEET_INPUT") { + sheetInputCount += 1; + if (!sourceUrl) { + status = "MISSING"; + missingCount += 1; + missingSourceUrlCount += 1; + reasons.push("Source_URL_MISSING"); + } else if (ageDays === null) { + status = "MISSING"; + missingCount += 1; + reasons.push("Source_AsOf_MISSING"); + } else if (ageDays <= 31) { + status = "CURRENT"; + currentCount += 1; + } else if (ageDays <= 45) { + status = "DUE"; + dueCount += 1; + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } else { + status = "OVERDUE"; + overdueCount += 1; + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } + } else if (sourceKind === "NAVER_ETF_PAGE") { + naverSourceCount += 1; + if (!sourceUrl) { + status = "MISSING"; + missingCount += 1; + missingSourceUrlCount += 1; + reasons.push("Source_URL_MISSING"); + } else if (ageDays === null) { + status = "MISSING"; + missingCount += 1; + reasons.push("Source_AsOf_MISSING"); + } else if (ageDays <= 31) { + status = "CURRENT"; + currentCount += 1; + } else if (ageDays <= 45) { + status = "DUE"; + dueCount += 1; + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } else { + status = "OVERDUE"; + overdueCount += 1; + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } + } else if (sourceKind === "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED") { + layoutChangedCount += 1; + status = "LAYOUT_CHANGED"; + if (!sourceUrl) { + missingSourceUrlCount += 1; + reasons.push("Source_URL_MISSING"); + } + if (ageDays === null) { + reasons.push("Source_AsOf_MISSING"); + } else { + staleSectorCount += 1; + reasons.push(`AgeDays=${ageDays}`); + } + } else { + status = "INVALID"; + reasons.push("SOURCE_KIND_UNKNOWN"); + if (!sourceUrl) missingSourceUrlCount += 1; + } + if (!sourceUrl) reasons.push("Source_URL_MISSING"); + if (ageDays !== null && ageDays < 0) reasons.push("FUTURE_DATE"); + + rows.push({ + sector: sector.sector || "", + proxy_ticker: sector.proxyTicker || "", + proxy_name: sector.proxyName || "", + proxy_type: sector.proxyType || "", + source_kind: sourceKind, + transport_mode: transportMode, + source_url: sourceUrl, + source_asof: sourceAsOf, + age_days: ageDays === null ? "" : ageDays, + constituent_count: sectorRows.length, + stock_count: sectorRows.filter(c => !c.isEtf).length, + etf_count: sectorRows.filter(c => c.isEtf).length, + weight_sum: sectorRows.reduce((a, c) => a + (Number(c.weight) || 0), 0), + status: status, + refresh_reason: reasons.length ? reasons.join(";") : "OK", + }); + } + + rows.sort((a, b) => { + if (a.status === "CURRENT" && b.status !== "CURRENT") return -1; + if (a.status !== "CURRENT" && b.status === "CURRENT") return 1; + return String(a.sector || "").localeCompare(String(b.sector || "")); + }); + + return { + formula_id: "sector_universe_refresh_audit_v1", + gate: (templateCount > 0 || missingSourceUrlCount > 0 || overdueCount > 0 || staleSectorCount > 0) ? "FAIL" : (sheetInputCount > 0 ? "WARN" : "PASS"), + summary: { + sector_count: (universe || []).length, + current_count: currentCount, + due_count: dueCount, + overdue_count: overdueCount, + missing_count: missingCount, + template_count: templateCount, + sheet_input_count: sheetInputCount, + naver_source_count: naverSourceCount, + layout_changed_count: layoutChangedCount, + missing_source_url_count: missingSourceUrlCount, + stale_sector_count: staleSectorCount, + oldest_source_asof: oldestSourceAsOf ? Utilities.formatDate(oldestSourceAsOf, "Asia/Seoul", "yyyy-MM-dd") : "", + newest_source_asof: newestSourceAsOf ? Utilities.formatDate(newestSourceAsOf, "Asia/Seoul", "yyyy-MM-dd") : "", + source_kind_counts: sourceKindCounts, + transport_mode_counts: transportModeCounts, + ajax_mode: "NO", + transport_model: "HTML_SERVER_RENDERED", + }, + rows: rows, + }; +} + +function writeSectorUniverseRefreshAuditSheet_(audit) { + if (!audit || typeof audit !== "object") return 0; + const headers = [ + "sector", "proxy_ticker", "proxy_name", "proxy_type", "source_kind", "transport_mode", + "source_url", "source_asof", "age_days", "constituent_count", + "stock_count", "etf_count", "weight_sum", "status", "refresh_reason", + ]; + const rows = Array.isArray(audit.rows) + ? audit.rows.map(function(r) { + return headers.map(function(h) { return r[h] ?? ""; }); + }) + : []; + writeToSheet("sector_universe_refresh_audit", headers, rows); + return rows.length; +} + function scoreSmartMoneyNorm_(v) { if (!Number.isFinite(v)) return 0; if (v >= 0.15) return 25; @@ -955,7 +1312,7 @@ function runSectorFlowV3() { const etfRawMap = buildEtfRawMap_(buildEtfRawRows_(universe)); const today = Utilities.formatDate(new Date(), "Asia/Seoul", "yyyy-MM-dd"); const headers = [ - "Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Coverage_Weight", + "Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Universe_Source","Transport_Mode","Coverage_Weight", "Sector_Ret5D","Sector_Ret20D","Sector_RS_20D", "SmartMoney_5D_KRW","SmartMoney_20D_KRW","Sector_AvgTradeValue_20D_KRW","SmartMoney_5D_Norm", "Flow_Breadth_5D","Flow_Rows_Min","Stale_Count", @@ -1031,6 +1388,9 @@ function runSectorFlowV3() { const etfNavRisk = sector.proxyType === "ETF" ? (etfRaw?.navRisk ?? "NAV_DATA_MISSING") : "NOT_ETF"; const etfLiquidityStatus = sector.proxyType === "ETF" ? (etfRaw?.liquidityStatus ?? "WARN") : "NOT_ETF"; const etfExecutionUse = sector.proxyType === "ETF" ? (etfRaw?.executionUse ?? "WATCH_ONLY") : "NOT_ETF"; + const transportMode = sector.source === "NAVER_ETF_PAGE" ? "HTML_SERVER_RENDERED" + : (sector.source === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" + : (sector.source === "DEFAULT_TEMPLATE" ? "MANUAL_OR_TEMPLATE" : "UNKNOWN")); const quality = sectorDataQuality_(coverage, flowRowsMin, staleCount, proxy.ok, Number.isFinite(smart5Norm), weightSum); const routeUse = sectorUseMode_(quality); let score = calcSectorScoreV2_(sectorRet20D, sectorRs20D, smart5Norm, smart20Norm, breadth5, tradeValueRatio, sector.proxyType, etfLiquidityScore); @@ -1047,6 +1407,7 @@ function runSectorFlowV3() { if (staleCount > 0) reasons.push(`Stale_Count=${staleCount}`); if (!proxy.ok) reasons.push("Proxy_Price_FAIL"); if (!Number.isFinite(smart5Norm)) reasons.push("SmartMoney_Norm_MISSING"); + if ((sector.source || "DEFAULT_TEMPLATE") === "DEFAULT_TEMPLATE") reasons.push("Universe_Source=DEFAULT_TEMPLATE"); if (sector.proxyType === "ETF" && etfNavRisk === "NAV_DATA_MISSING") reasons.push("ETF_NAV_DATA_MISSING"); if (sector.proxyType === "ETF" && etfLiquidityStatus !== "OK") reasons.push(`ETF_Liquidity=${etfLiquidityStatus}`); if (sector.proxyType === "ETF" && etfExecutionUse !== "TRADE_OK") reasons.push(`ETF_Execution=${etfExecutionUse}`); @@ -1056,6 +1417,8 @@ function runSectorFlowV3() { proxyTicker: sector.proxyTicker, proxyName: sector.proxyName, proxyType: sector.proxyType || "대표주", + universeSource: sector.source || "DEFAULT_TEMPLATE", + transportMode: transportMode, coverage, sectorRet5D, sectorRet20D, @@ -1106,7 +1469,7 @@ function appendSectorFlowHistoryV2_(rows) { const headers = [ "Snapshot_Date","Sector","Sector_Score","Sector_Rank","SmartMoney_5D_KRW","SmartMoney_20D_KRW", - "Flow_Breadth_5D","Alert_Level","Data_Quality","Decision_Use","ETF_Liquidity_Status","ETF_Execution_Use","Reason","Saved_At" + "Flow_Breadth_5D","Alert_Level","Data_Quality","Decision_Use","ETF_Liquidity_Status","ETF_Execution_Use","Transport_Mode","Reason","Saved_At" ]; const ss = getSpreadsheet_(); let sheet = ss.getSheetByName("sector_flow_history"); @@ -1119,22 +1482,25 @@ function appendSectorFlowHistoryV2_(rows) { const hdr = data[1] ?? headers; const dateIdx = hdr.indexOf("Snapshot_Date"); const sectorIdx = hdr.indexOf("Sector"); - const existing = []; + const normalizeRow_ = (row) => { + const outRow = Array.isArray(row) ? row.slice(0, headers.length) : []; + while (outRow.length < headers.length) outRow.push(""); + return outRow; + }; const byKey = {}; for (let i = 2; i < data.length; i++) { const row = data[i]; const d = normalizeSheetDateString_(row[dateIdx]); const s = String(row[sectorIdx] ?? "").trim(); if (!d || !s) continue; - byKey[`${d}|${s}`] = row; - existing.push(row); + byKey[`${d}|${s}`] = normalizeRow_(row); } const savedAt = Utilities.formatDate(new Date(), "Asia/Seoul", "yyyy-MM-dd HH:mm:ss"); for (const r of rows) { - byKey[`${r.asOfDate}|${r.sector}`] = [ + byKey[`${r.asOfDate}|${r.sector}`] = normalizeRow_([ r.asOfDate, r.sector, r.score, r.rank, Math.round(r.smart5), Math.round(r.smart20), - roundNum(r.breadth5, 4), r.alert, r.quality, r.routeUse, r.etfLiquidityStatus, r.etfExecutionUse, r.reason, savedAt - ]; + roundNum(r.breadth5, 4), r.alert, r.quality, r.routeUse, r.etfLiquidityStatus, r.etfExecutionUse, r.transportMode || "", r.reason, savedAt + ]); } const out = Object.values(byKey).sort((a, b) => { const da = String(a[0]), db = String(b[0]); @@ -1144,7 +1510,7 @@ function appendSectorFlowHistoryV2_(rows) { sheet.clearContents(); sheet.getRange(1, 1).setValue(`updated: ${savedAt} KST`); sheet.getRange(2, 1, 1, headers.length).setValues([headers]); - if (out.length) sheet.getRange(3, 1, out.length, headers.length).setValues(out); + if (out.length) sheet.getRange(3, 1, out.length, headers.length).setValues(out.map(normalizeRow_)); } function normalizeSheetDateString_(value) { @@ -1235,7 +1601,7 @@ function readW2LegacySectorFlow_() { function writeLegacySectorFlowFromStage2_(stage2Rows) { const headers = [ - "Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Coverage_Weight", + "Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Universe_Source","Coverage_Weight", "Sector_Ret5D","Sector_Ret10D","Sector_Ret20D","Sector_RS_20D", "SmartMoney_5D_KRW","SmartMoney_20D_KRW","Sector_AvgTradeValue_20D_KRW", "SmartMoney_5D_Norm","SmartMoney_20D_Norm","Flow_Breadth_5D","Flow_Rows_Min","Stale_Count", @@ -1277,7 +1643,7 @@ function writeLegacySectorFlowFromStage2_(stage2Rows) { const frg20Alias = Number.isFinite(r.smart20) ? r.smart20 / 2 : ""; const inst20Alias = Number.isFinite(r.smart20) ? r.smart20 / 2 : ""; return [ - r.sector, r.proxyTicker, r.proxyName, r.proxyType, r.coverage, + r.sector, r.proxyTicker, r.proxyName, r.proxyType, r.universeSource, r.coverage, r.sectorRet5D, r.proxyRet10D, r.sectorRet20D, r.sectorRs20D, r.smart5, r.smart20, r.avgTv20Krw, r.smart5Norm, r.smart20Norm, r.breadth5, r.flowRowsMin, r.staleCount, @@ -1798,6 +2164,15 @@ function run_all() { } }, { name: "runSectorFlow", fn: runSectorFlow }, + { + name: "runSectorUniverseRefreshAudit", + fn: function() { + const universe = readSectorUniverse_(); + const audit = calcSectorUniverseRefreshAudit_(universe); + writeSectorUniverseRefreshAuditSheet_(audit); + Logger.log("[RUN_ALL] sector_universe_refresh_audit gate=" + audit.gate + " rows=" + (audit.rows || []).length); + } + }, { name: "runDataFeed", fn: runDataFeed }, { name: "runCoreSatelliteFlow_", fn: runCoreSatelliteFlow_ }, { name: "runEventRisk", fn: runEventRisk }, diff --git a/src/gas_adapter_parts/gdc_02_account_satellite.gs b/src/gas_adapter_parts/gdc_02_account_satellite.gs index fe40a66..ab10efc 100644 --- a/src/gas_adapter_parts/gdc_02_account_satellite.gs +++ b/src/gas_adapter_parts/gdc_02_account_satellite.gs @@ -1806,10 +1806,16 @@ function getCoreSatelliteUniverse() { // 자동차 { code:"005380", name:"현대차", sector:"자동차" }, { code:"000270", name:"기아", sector:"자동차" }, - // 밸류업/금융 - { code:"105560", name:"KB금융", sector:"금융/은행" }, - { code:"055550", name:"신한지주", sector:"금융/은행" }, - { code:"024110", name:"기업은행", sector:"금융/은행" }, + // 은행 / 증권 / 지주회사 + { code:"105560", name:"KB금융", sector:"은행" }, + { code:"055550", name:"신한지주", sector:"은행" }, + { code:"024110", name:"기업은행", sector:"은행" }, + { code:"071050", name:"한국금융지주", sector:"증권" }, + { code:"006800", name:"미래에셋증권", sector:"증권" }, + { code:"005940", name:"NH투자증권", sector:"증권" }, + { code:"180640", name:"한진칼", sector:"지주회사" }, + { code:"267250", name:"HD현대", sector:"지주회사" }, + { code:"034730", name:"SK", sector:"지주회사" }, // 바이오 { code:"207940", name:"삼성바이오로직스",sector:"바이오" }, { code:"068270", name:"셀트리온", sector:"바이오" }, @@ -1820,7 +1826,7 @@ function getCoreSatelliteUniverse() { { code:"006400", name:"삼성SDI", sector:"2차전지" }, { code:"003670", name:"포스코퓨처엠",sector:"2차전지" }, // 지주/기타 - { code:"028260", name:"삼성물산", sector:"지주" } + { code:"028260", name:"삼성물산", sector:"지주회사" } ]; list = defaults.map(t => ({ ...t, addedDate: todayStr })); diff --git a/src/gas_adapter_parts/gdf_01_price_metrics.gs b/src/gas_adapter_parts/gdf_01_price_metrics.gs index e408e3b..c85f4a2 100644 --- a/src/gas_adapter_parts/gdf_01_price_metrics.gs +++ b/src/gas_adapter_parts/gdf_01_price_metrics.gs @@ -11,7 +11,7 @@ * * 실행 시간 전략 (GAS 6분 제한): * - data_feed: 보유 10종목만 → ~30초 - * - sector_flow: 11섹터×3종목 → ~3분 + * - sector_flow: 분리된 섹터×3종목 → ~3분 * - macro/unified: 단순 집계 → ~30초 * - core_satellite(100종목): 별도 트리거, 청크 분할 실행 * @@ -27,13 +27,24 @@ const TICKERS_BASE = [ { code: "000660", name: "SK하이닉스" }, { code: "000270", name: "기아" }, { code: "091160", name: "KODEX 반도체" }, + { code: "463250", name: "TIGER K방산&우주" }, { code: "064350", name: "현대로템" }, { code: "012450", name: "한화에어로스페이스" }, + { code: "117700", name: "KODEX 건설" }, { code: "028050", name: "삼성E&A" }, + { code: "454320", name: "HANARO CAPEX설비투자iSelect" }, { code: "010120", name: "LS ELECTRIC" }, { code: "0117V0", name: "TIGER AI전력기기" }, + { code: "491820", name: "HANARO 전력설비투자" }, { code: "494670", name: "TIGER 조선TOP10" }, { code: "471990", name: "KODEX AI반도체핵심장비" }, + { code: "434730", name: "HANARO 원자력iSelect" }, + { code: "0111J0", name: "HANARO 증권고배당TOP3플러스" }, + { code: "307520", name: "TIGER 지주회사" }, + { code: "0190C0", name: "RISE 현대차고정피지컬AI" }, + { code: "011070", name: "LG이노텍" }, + { code: "010620", name: "현대미포" }, + { code: "121600", name: "나노신소재" }, ]; // TICKERS 우선순위: TICKERS_BASE → account_snapshot 보유종목 → watch_tickers_override 수동 추가. @@ -132,9 +143,12 @@ const TICKER_SECTOR_MAP = { "010120": "AI전력", "267260": "AI전력", "006260": "AI전력", "012450": "방산", "079550": "방산", "047810": "방산", "064350": "방산", "329180": "조선", "042660": "조선", "009540": "조선", - "028050": "건설/EPC","000720": "건설/EPC","006360": "건설/EPC", + "028050": "플랜트/EPC","000720": "건설","006360": "건설", "005380": "자동차", "000270": "자동차", "012330": "자동차", - "105560": "금융/은행","055550": "금융/은행","086790": "금융/은행", + "105560": "은행","055550": "은행","086790": "은행","316140": "은행","024110": "은행", + "071050": "증권","006800": "증권","005940": "증권","016360": "증권","039490": "증권", + "180640": "지주회사","267250": "지주회사","034730": "지주회사","000150": "지주회사","005490": "지주회사", + "003550": "지주회사","006260": "지주회사","078930": "지주회사","001040": "지주회사","010060": "지주회사", "373220": "2차전지","006400": "2차전지","051910": "2차전지", "207940": "바이오", "068270": "바이오", "128940": "바이오", "099440": "원전", "023450": "원전", "015760": "원전", @@ -142,8 +156,12 @@ const TICKER_SECTOR_MAP = { // ETF — 해당 섹터로 매핑 "091160": "반도체", "0117V0": "AI전력", "494670": "조선", "471990": "반도체", // KODEX AI반도체핵심장비 (누락 추가) - "266410": "바이오", "091180": "자동차", "091170": "금융/은행", + "266410": "바이오", "091180": "자동차", "091170": "은행", + "0111J0": "증권", "307520": "지주회사", "305720": "2차전지","139220": "소비재", + "463250": "방산", "434730": "원전", "454320": "플랜트/EPC", + "491820": "전력설비", "117700": "건설", "0190C0": "로보틱스", + "011070": "로보틱스", "010620": "로보틱스", "121600": "로보틱스", }; // 섹터 → Tier 매핑 (C5 daily_leader_scan 점수 정밀화) @@ -151,14 +169,19 @@ const TICKER_SECTOR_MAP = { const SECTOR_TIER_MAP = { "반도체": "Tier_1", "AI전력": "Tier_1", + "전력설비": "Tier_1", "방산": "Tier_1", "조선": "Tier_1", "자동차": "Tier_2", "2차전지": "Tier_2", "바이오": "Tier_2", "원전": "Tier_2", - "건설/EPC": "Tier_3", - "금융/은행":"Tier_3", + "건설": "Tier_3", + "플랜트/EPC": "Tier_3", + "로보틱스": "Tier_2", + "은행":"Tier_3", + "증권":"Tier_3", + "지주회사":"Tier_3", "소비재": "Tier_3", }; diff --git a/src/quant_engine/convert_xlsx_to_json.py b/src/quant_engine/convert_xlsx_to_json.py index bf6eb36..f0c6854 100644 --- a/src/quant_engine/convert_xlsx_to_json.py +++ b/src/quant_engine/convert_xlsx_to_json.py @@ -174,6 +174,28 @@ def normalize_legacy_source_markers(sheet: str, records: list[dict[str, Any]]) - source = record.get("Source") if isinstance(source, str) and "sector_targets.json" in source: record["Source"] = source.replace("sector_targets.json", "sector_universe") + source_url = str(record.get("Source_URL") or "").strip() + transport_mode = str(record.get("Transport_Mode") or "").strip() + if record.get("Source") in (None, "", "DEFAULT_TEMPLATE"): + if "finance.naver.com/item/main.naver?code=" in source_url: + record["Source"] = "NAVER_ETF_PAGE" + if not transport_mode: + record["Transport_Mode"] = "HTML_SERVER_RENDERED" + elif source_url: + record["Source"] = "SHEET_INPUT" + if not transport_mode: + record["Transport_Mode"] = "MANUAL_OR_TEMPLATE" + else: + record["Source"] = "SHEET_INPUT" + if not transport_mode: + record["Transport_Mode"] = "MANUAL_OR_TEMPLATE" + elif record.get("Source") == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" and not transport_mode: + record["Transport_Mode"] = "LAYOUT_CHANGED" + elif record.get("Source") == "REPRESENTATIVE_STOCK_PROXY" and not transport_mode: + record["Transport_Mode"] = "HTML_SERVER_RENDERED" + sector = str(record.get("Sector") or "").strip() + if sector: + record["Sector_Check"] = sector return records @@ -1428,6 +1450,80 @@ def convert_xlsx_to_json(xlsx_path: Path, output_path: Path) -> None: result["data"][sheet] = normalize_legacy_source_markers(sheet, dataframe_records(df)) result["metadata"]["sheets_included"].append(sheet) + sector_source_map: dict[str, str] = {} + sector_universe_rows = result["data"].get("sector_universe") + if isinstance(sector_universe_rows, list): + for row in sector_universe_rows: + if not isinstance(row, dict): + continue + sector = str(row.get("Sector") or "").strip() + if not sector: + continue + source = str(row.get("Source") or "").strip() or "SHEET_INPUT" + sector_source_map.setdefault(sector, source) + + sector_flow_rows = result["data"].get("sector_flow") + if isinstance(sector_flow_rows, list): + split_finance_map = { + "금융/은행": [ + ("은행", "091170", "KODEX 은행"), + ("증권", "0111J0", "HANARO 증권고배당TOP3플러스"), + ("지주회사", "307520", "TIGER 지주회사"), + ] + } + normalized_rows: list[dict[str, Any]] = [] + for row in sector_flow_rows: + if not isinstance(row, dict): + continue + sector = str(row.get("Sector") or "").strip() + if not sector: + continue + source = str(row.get("Universe_Source") or "").strip() or sector_source_map.get(sector, "SHEET_INPUT") + row["Universe_Source"] = source + if sector in split_finance_map: + for split_sector, split_ticker, split_name in split_finance_map[sector]: + cloned = dict(row) + cloned["Sector"] = split_sector + cloned["Proxy_Ticker"] = split_ticker + cloned["Proxy_Name"] = split_name + cloned["Proxy_Type"] = "ETF" + cloned["ETF_Code"] = split_ticker + cloned["Reason"] = "PRE_SPLIT_FINANCE_FLOW_CARRYOVER" + cloned["Universe_Source"] = "NAVER_ETF_PAGE" + normalized_rows.append(cloned) + else: + normalized_rows.append(row) + result["data"]["sector_flow"] = normalized_rows + + sector_flow_history_rows = result["data"].get("sector_flow_history") + if isinstance(sector_flow_history_rows, list): + split_finance_map = { + "금융/은행": [ + ("은행", "091170", "KODEX 은행"), + ("증권", "0111J0", "HANARO 증권고배당TOP3플러스"), + ("지주회사", "307520", "TIGER 지주회사"), + ] + } + normalized_history: list[dict[str, Any]] = [] + for row in sector_flow_history_rows: + if not isinstance(row, dict): + continue + sector = str(row.get("Sector") or "").strip() + if not sector: + continue + if sector in split_finance_map: + for split_sector, split_ticker, split_name in split_finance_map[sector]: + cloned = dict(row) + cloned["Sector"] = split_sector + cloned["Proxy_Ticker"] = split_ticker + cloned["Proxy_Name"] = split_name + cloned["Proxy_Type"] = "ETF" + cloned["Reason"] = "PRE_SPLIT_FINANCE_FLOW_CARRYOVER" + normalized_history.append(cloned) + else: + normalized_history.append(row) + result["data"]["sector_flow_history"] = normalized_history + # harness_context 시트가 없으면 메타에 경고 기록 if "_harness_context" not in result["data"]: result["metadata"]["harness_context_missing"] = ( diff --git a/src/quant_engine/etf_representative_monitor.py b/src/quant_engine/etf_representative_monitor.py index 7e23108..24dc7f4 100644 --- a/src/quant_engine/etf_representative_monitor.py +++ b/src/quant_engine/etf_representative_monitor.py @@ -13,6 +13,29 @@ ETF_NAME_HINTS = ( "SOL", "TIMEFOLIO", "WOORI", "PLUS", "NPLUS", "TREX", "FOCUS", "KIWOOM", ) +ROBOTICS_FALLBACK_PROXY = { + "Sector": "로보틱스", + "Proxy_Ticker": "0190C0", + "Proxy_Name": "RISE 현대차고정피지컬AI", + "Proxy_Type": "ETF", + "Sector_Rank": 12, + "SmartMoney_5D_KRW": 0.0, + "Sector_Ret20D": 0.0, +} + +ROBOTICS_FALLBACK_UNIVERSE = [ + {"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "005380", "Constituent_Name": "현대차", "Weight": 0.2402, "Is_ETF": False}, + {"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "012330", "Constituent_Name": "현대모비스", "Weight": 0.1588, "Is_ETF": False}, + {"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "011070", "Constituent_Name": "LG이노텍", "Weight": 0.1450, "Is_ETF": False}, + {"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "000270", "Constituent_Name": "기아", "Weight": 0.1234, "Is_ETF": False}, + {"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "307950", "Constituent_Name": "현대오토에버", "Weight": 0.0899, "Is_ETF": False}, + {"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "277810", "Constituent_Name": "레인보우로보틱스", "Weight": 0.0673, "Is_ETF": False}, + {"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "064400", "Constituent_Name": "LG씨엔에스", "Weight": 0.0519, "Is_ETF": False}, + {"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "454910", "Constituent_Name": "두산로보틱스", "Weight": 0.0367, "Is_ETF": False}, + {"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "108490", "Constituent_Name": "로보티즈", "Weight": 0.0240, "Is_ETF": False}, + {"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "058610", "Constituent_Name": "에스피지", "Weight": 0.0173, "Is_ETF": False}, +] + def _parse_jsonish(value: Any) -> Any: if isinstance(value, (dict, list)): @@ -174,6 +197,8 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]: continue if _txt(row.get("Proxy_Type")).upper() == "ETF": etf_sectors[sector] = row + if "로보틱스" not in etf_sectors: + etf_sectors["로보틱스"] = ROBOTICS_FALLBACK_PROXY sector_candidates: dict[str, list[dict[str, Any]]] = defaultdict(list) core_by_ticker: dict[str, dict[str, Any]] = {} @@ -201,9 +226,12 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]: if _txt(row.get("Status"), "OK").upper() not in {"OK", "ACTIVE", "LIVE"}: continue universe_candidates[sector].append(row) + if "로보틱스" not in universe_candidates: + universe_candidates["로보틱스"] = ROBOTICS_FALLBACK_UNIVERSE.copy() rows: list[dict[str, Any]] = [] for sector, proxy in sorted(etf_sectors.items(), key=lambda item: (_num(item[1].get("Sector_Rank"), 999), -abs(_num(item[1].get("SmartMoney_5D_KRW"), 0.0)))): + target_rep_count = 5 if sector == "로보틱스" else 3 fallback_rows = sorted( sector_candidates.get(sector, []), key=lambda r: ( @@ -213,31 +241,36 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]: -_num(r.get("Ret10D"), 0.0), ), ) + # ETF 대표주는 구성비 내림차순을 1차 기준으로 고정한다. + # live score는 동일 비중/동일 구성일 때만 보조 판단으로 사용한다. universe_rows = sorted( universe_candidates.get(sector, []), - key=lambda r: _constituent_priority_score( - r, - core_by_ticker.get(_txt(r.get("Constituent_Code"))) - or next((x for x in fallback_rows if _txt(x.get("Ticker")) == _txt(r.get("Constituent_Code"))), None), + key=lambda r: ( + -_num(r.get("Weight"), 0.0), + _constituent_priority_score( + r, + core_by_ticker.get(_txt(r.get("Constituent_Code"))) + or next((x for x in fallback_rows if _txt(x.get("Ticker")) == _txt(r.get("Constituent_Code"))), None), + ), ), ) basket_items: list[dict[str, Any]] = [] - selected_specs: list[tuple[str, dict[str, Any]]] = [("ETF_CONSTITUENT_WEIGHT", row) for row in universe_rows[:3]] - selected_tickers = {_txt(row.get("Constituent_Code")) for row in universe_rows[:3]} - if len(selected_specs) < 3: + selected_specs: list[tuple[str, dict[str, Any]]] = [("ETF_CONSTITUENT_WEIGHT", row) for row in universe_rows[:target_rep_count]] + selected_tickers = {_txt(row.get("Constituent_Code")) for row in universe_rows[:target_rep_count]} + if len(selected_specs) < target_rep_count: for row in fallback_rows: ticker = _txt(row.get("Ticker")) if not ticker or ticker in selected_tickers: continue selected_specs.append(("SECTOR_LIQUIDITY_FALLBACK", row)) selected_tickers.add(ticker) - if len(selected_specs) >= 3: + if len(selected_specs) >= target_rep_count: break if not selected_specs: - selected_specs = [("SECTOR_LIQUIDITY_FALLBACK", row) for row in fallback_rows[:3]] + selected_specs = [("SECTOR_LIQUIDITY_FALLBACK", row) for row in fallback_rows[:target_rep_count]] rep_source = "ETF_CONSTITUENT_WEIGHT" if universe_rows else "SECTOR_LIQUIDITY_FALLBACK" rep_basis_detail = "ETF_WEIGHT_PRIMARY" - if universe_rows and len(universe_rows) < 3 and len(selected_specs) >= 3: + if universe_rows and len(universe_rows) < target_rep_count and len(selected_specs) >= target_rep_count: rep_basis_detail = "ETF_WEIGHT_PRIMARY_PLUS_SECTOR_TOPUP" if not universe_rows: rep_basis_detail = "SECTOR_LIQUIDITY_FALLBACK" @@ -283,7 +316,7 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]: _txt(spec.get("Constituent_Code")), _txt(spec.get("Constituent_Name")), )) - if len(basket_items) < 3: + if len(basket_items) < target_rep_count: used_tickers = {item["ticker"] for item in basket_items} for rep in fallback_rows: ticker = _txt(rep.get("Ticker")) @@ -291,7 +324,7 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]: continue basket_items.append(_build_rep_item(rep, {"Weight": ""}, proxy, "SECTOR_LIQUIDITY_FALLBACK")) used_tickers.add(ticker) - if len(basket_items) >= 3: + if len(basket_items) >= target_rep_count: break if not basket_items: continue @@ -313,6 +346,7 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]: "etf_proxy_ticker": _txt(proxy.get("Proxy_Ticker")), "etf_proxy_name": _txt(proxy.get("Proxy_Name")), "etf_proxy_type": _txt(proxy.get("Proxy_Type")), + "universe_source": _txt(proxy.get("Universe_Source"), "DEFAULT_TEMPLATE"), "sector_rank": proxy.get("Sector_Rank", ""), "sector_score": proxy.get("Sector_Score", ""), "sector_smart_money_5d_krw": proxy.get("SmartMoney_5D_KRW", ""), @@ -348,7 +382,7 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]: "basket_quality_state": basket_quality_state, "representatives": basket_items, "monitor_reason": ( - "ETF 구성비중 상위 3종목이 같은 방향으로 정렬" + f"ETF 구성비중 상위 {target_rep_count}종목이 같은 방향으로 정렬" if basket_state == "BUY_REVIEW" else "대표 종목 바스켓 추세 확인 중" if basket_state == "TRACK" else "유동성/추세 보수 모니터링" @@ -390,6 +424,7 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]: "sector_flow_rows": len(sector_flow), "core_satellite_rows": len(core_satellite), "sector_universe_rows": len(sector_universe), + "template_source_count": sum(1 for r in rows if str(r.get("universe_source") or "").upper() == "DEFAULT_TEMPLATE"), }, } return result diff --git a/src/quant_engine/run_formula_golden_cases_v2.py b/src/quant_engine/run_formula_golden_cases_v2.py index f98262f..88c9d0f 100644 --- a/src/quant_engine/run_formula_golden_cases_v2.py +++ b/src/quant_engine/run_formula_golden_cases_v2.py @@ -462,7 +462,8 @@ _TICKER_SECTOR_MAP = { "010120": "AI전력", "267260": "AI전력", "012450": "방산", "064350": "방산", "329180": "조선", "494670": "조선", - "028050": "건설/EPC", + "117700": "건설", "028050": "플랜트/EPC", "454320": "플랜트/EPC", + "0190C0": "로보틱스", "005380": "자동차", "000270": "자동차", "091160": "반도체", "0117V0": "AI전력", } diff --git a/src/quant_engine/sector_trend_analysis.py b/src/quant_engine/sector_trend_analysis.py index 4aadfb3..65f51ba 100644 --- a/src/quant_engine/sector_trend_analysis.py +++ b/src/quant_engine/sector_trend_analysis.py @@ -187,6 +187,7 @@ def build_sector_trend_analysis(payload: dict[str, Any]) -> dict[str, Any]: proxy_ticker = _txt(row.get("Proxy_Ticker")) proxy_name = _txt(row.get("Proxy_Name")) proxy_type = _txt(row.get("Proxy_Type"), "UNKNOWN") + universe_source = _txt(row.get("Universe_Source"), "DEFAULT_TEMPLATE") etf_code = _txt(row.get("ETF_Code"), proxy_ticker) etf_execution_use = _txt(row.get("ETF_Execution_Use")) etf_liquidity_status = _txt(row.get("ETF_Liquidity_Status"), "UNKNOWN") @@ -224,6 +225,7 @@ def build_sector_trend_analysis(payload: dict[str, Any]) -> dict[str, Any]: "proxy_ticker": proxy_ticker, "proxy_name": proxy_name, "proxy_type": proxy_type, + "universe_source": universe_source, "etf_code": etf_code, "etf_execution_use": etf_execution_use, "etf_liquidity_score": etf_liquidity_score, @@ -356,6 +358,7 @@ def build_sector_trend_analysis(payload: dict[str, Any]) -> dict[str, Any]: "sector_rotation_momentum_rows": len(rotation_rows), "sector_concentration_rows": len(concentration_rows), "proxy_coverage_pct": round((etf_proxy_count / len(rows)) * 100.0, 2) if rows else 0.0, + "template_source_count": sum(1 for r in rows if str(r.get("universe_source") or "").upper() == "DEFAULT_TEMPLATE"), }, } return result diff --git a/src/quant_engine/sector_universe_refresh.py b/src/quant_engine/sector_universe_refresh.py new file mode 100644 index 0000000..c1a53a4 --- /dev/null +++ b/src/quant_engine/sector_universe_refresh.py @@ -0,0 +1,296 @@ +from __future__ import annotations + +import datetime as dt +from typing import Any + + +DEFAULT_MAX_AGE_DAYS = 31 + + +def _txt(value: Any, default: str = "") -> str: + if value is None: + return default + if isinstance(value, str): + return value.strip() or default + return str(value).strip() or default + + +def _as_float(value: Any) -> float | None: + try: + if value in (None, ""): + return None + if isinstance(value, str): + text = value.strip().replace("%", "").replace(",", "") + if not text: + return None + return float(text) + return float(value) + except Exception: + return None + + +def _parse_date(value: Any) -> dt.date | None: + if value in (None, ""): + return None + if isinstance(value, dt.date): + return value + text = _txt(value) + if not text: + return None + for fmt in ("%Y-%m-%d", "%Y.%m.%d", "%Y/%m/%d"): + try: + return dt.datetime.strptime(text[:10], fmt).date() + except Exception: + pass + try: + return dt.date.fromisoformat(text[:10]) + except Exception: + return None + + +def _age_days(value: Any, today: dt.date | None = None) -> int | None: + parsed = _parse_date(value) + if parsed is None: + return None + today = today or dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date() + return (today - parsed).days + + +def _extract_sector_rows(payload: dict[str, Any] | None) -> list[dict[str, Any]]: + if not isinstance(payload, dict): + return [] + inner = payload.get("data") + if isinstance(inner, dict) and isinstance(inner.get("sector_universe"), list): + return [r for r in inner["sector_universe"] if isinstance(r, dict)] + if isinstance(payload.get("sector_universe"), list): + return [r for r in payload["sector_universe"] if isinstance(r, dict)] + return [] + + +def build_sector_universe_refresh_audit(payload: dict[str, Any] | None) -> dict[str, Any]: + rows = _extract_sector_rows(payload) + today = dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date() + + grouped: dict[str, list[dict[str, Any]]] = {} + for row in rows: + sector = _txt(row.get("Sector")) + if not sector: + continue + grouped.setdefault(sector, []).append(row) + + detail_rows: list[dict[str, Any]] = [] + source_kind_counts = { + "NAVER_ETF_PAGE": 0, + "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED": 0, + "NAVER_ETF_PAGE_FAIL": 0, + "REPRESENTATIVE_STOCK_PROXY": 0, + "SHEET_INPUT": 0, + "DEFAULT_TEMPLATE": 0, + "OTHER": 0, + } + transport_mode_counts = { + "HTML_SERVER_RENDERED": 0, + "MANUAL_OR_TEMPLATE": 0, + "LAYOUT_CHANGED": 0, + "UNKNOWN": 0, + } + state_counts = {"CURRENT": 0, "DUE": 0, "OVERDUE": 0, "MISSING": 0, "TEMPLATE": 0, "INVALID": 0} + stale_sector_count = 0 + layout_changed_count = 0 + missing_source_url_count = 0 + sheet_input_count = 0 + template_count = 0 + newest_asof: dt.date | None = None + oldest_asof: dt.date | None = None + + for sector, sector_rows in grouped.items(): + source_values = {_txt(r.get("Source"), "SHEET_INPUT") or "SHEET_INPUT" for r in sector_rows} + if "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" in source_values: + source_kind = "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" + elif "NAVER_ETF_PAGE_FAIL" in source_values: + source_kind = "NAVER_ETF_PAGE_FAIL" + elif "NAVER_ETF_PAGE" in source_values: + source_kind = "NAVER_ETF_PAGE" + elif "REPRESENTATIVE_STOCK_PROXY" in source_values: + source_kind = "REPRESENTATIVE_STOCK_PROXY" + elif "DEFAULT_TEMPLATE" in source_values: + source_kind = "DEFAULT_TEMPLATE" + elif "SHEET_INPUT" in source_values: + source_kind = "SHEET_INPUT" + else: + source_kind = "OTHER" + source_kind_counts[source_kind if source_kind in source_kind_counts else "OTHER"] += 1 + + source_urls = [_txt(r.get("Source_URL")) for r in sector_rows if _txt(r.get("Source_URL"))] + source_url = source_urls[0] if source_urls else "" + + asof_candidates = [_parse_date(r.get("Source_AsOf")) for r in sector_rows] + asof_dates = [d for d in asof_candidates if d is not None] + source_asof = max(asof_dates) if asof_dates else None + if source_asof is not None: + newest_asof = source_asof if newest_asof is None else max(newest_asof, source_asof) + oldest_asof = source_asof if oldest_asof is None else min(oldest_asof, source_asof) + + age_days = _age_days(source_asof, today) if source_asof else None + constituent_count = len(sector_rows) + etf_count = sum(1 for r in sector_rows if str(r.get("Is_ETF") or "").strip().upper() in {"Y", "YES", "TRUE", "1"}) + stock_count = constituent_count - etf_count + weight_sum = sum(_as_float(r.get("Weight")) or 0 for r in sector_rows) + status = "INVALID" + reason_parts: list[str] = [] + transport_mode = "UNKNOWN" + + if source_kind == "DEFAULT_TEMPLATE": + status = "TEMPLATE" + reason_parts.append("DEFAULT_TEMPLATE") + template_count += 1 + transport_mode = "MANUAL_OR_TEMPLATE" + elif source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED": + status = "LAYOUT_CHANGED" + transport_mode = "LAYOUT_CHANGED" + reason_parts.append("LAYOUT_CHANGED") + layout_changed_count += 1 + if not source_url: + missing_source_url_count += 1 + reason_parts.append("Source_URL_MISSING") + if age_days is None: + reason_parts.append("Source_AsOf_MISSING") + else: + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + elif source_kind == "NAVER_ETF_PAGE_FAIL": + status = "INVALID" + transport_mode = "UNKNOWN" + reason_parts.append("NAVER_ETF_PAGE_FAIL") + if not source_url: + missing_source_url_count += 1 + elif source_kind == "REPRESENTATIVE_STOCK_PROXY": + transport_mode = "HTML_SERVER_RENDERED" + if not source_url: + status = "MISSING" + missing_source_url_count += 1 + reason_parts.append("Source_URL_MISSING") + elif age_days is None: + status = "MISSING" + reason_parts.append("Source_AsOf_MISSING") + elif age_days <= DEFAULT_MAX_AGE_DAYS: + status = "CURRENT" + elif age_days <= 45: + status = "DUE" + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + else: + status = "OVERDUE" + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + elif source_kind == "SHEET_INPUT": + sheet_input_count += 1 + transport_mode = "MANUAL_OR_TEMPLATE" + if not source_url: + status = "MISSING" + reason_parts.append("Source_URL_MISSING") + missing_source_url_count += 1 + elif age_days is None: + status = "MISSING" + reason_parts.append("Source_AsOf_MISSING") + elif age_days <= DEFAULT_MAX_AGE_DAYS: + status = "CURRENT" + elif age_days <= 45: + status = "DUE" + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + else: + status = "OVERDUE" + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + elif source_kind == "NAVER_ETF_PAGE": + transport_mode = "HTML_SERVER_RENDERED" + if not source_url: + status = "MISSING" + reason_parts.append("Source_URL_MISSING") + missing_source_url_count += 1 + elif age_days is None: + status = "MISSING" + reason_parts.append("Source_AsOf_MISSING") + elif age_days <= DEFAULT_MAX_AGE_DAYS: + status = "CURRENT" + elif age_days <= 45: + status = "DUE" + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + else: + status = "OVERDUE" + stale_sector_count += 1 + reason_parts.append(f"AgeDays={age_days}") + else: + if not source_url: + missing_source_url_count += 1 + status = "INVALID" + reason_parts.append("SOURCE_KIND_UNKNOWN") + transport_mode = "UNKNOWN" + + if source_kind == "NAVER_ETF_PAGE" and not source_url: + reason_parts.append("NAVER_URL_MISSING") + if not source_url: + reason_parts.append("Source_URL_MISSING") + if age_days is not None and age_days < 0: + reason_parts.append("FUTURE_DATE") + + transport_mode_counts[transport_mode] = transport_mode_counts.get(transport_mode, 0) + 1 + refresh_reason = ";".join(reason_parts) if reason_parts else "OK" + detail_rows.append({ + "sector": sector, + "proxy_ticker": _txt(sector_rows[0].get("Proxy_Ticker")), + "proxy_name": _txt(sector_rows[0].get("Proxy_Name")), + "proxy_type": _txt(sector_rows[0].get("Proxy_Type")), + "source_kind": source_kind, + "transport_mode": transport_mode, + "source_url": source_url, + "source_asof": source_asof.isoformat() if source_asof else "", + "age_days": age_days if age_days is not None else "", + "constituent_count": constituent_count, + "stock_count": stock_count, + "etf_count": etf_count, + "weight_sum": round(weight_sum, 4), + "status": status, + "refresh_reason": refresh_reason, + }) + + detail_rows.sort(key=lambda r: (r.get("status") != "CURRENT", r.get("status"), r.get("sector"))) + summary = { + "sector_count": len(grouped), + "current_count": sum(1 for r in detail_rows if r.get("status") == "CURRENT"), + "due_count": sum(1 for r in detail_rows if r.get("status") == "DUE"), + "overdue_count": sum(1 for r in detail_rows if r.get("status") == "OVERDUE"), + "missing_count": sum(1 for r in detail_rows if r.get("status") == "MISSING"), + "template_count": template_count, + "sheet_input_count": sheet_input_count, + "naver_source_count": sum(1 for r in detail_rows if r.get("source_kind") == "NAVER_ETF_PAGE"), + "missing_source_url_count": missing_source_url_count, + "stale_sector_count": stale_sector_count, + "layout_changed_count": layout_changed_count, + "oldest_source_asof": oldest_asof.isoformat() if oldest_asof else "", + "newest_source_asof": newest_asof.isoformat() if newest_asof else "", + "source_kind_counts": source_kind_counts, + "transport_mode_counts": transport_mode_counts, + "ajax_mode": "NO", + "transport_model": "HTML_SERVER_RENDERED", + } + + gate = "PASS" + if template_count > 0 or missing_source_url_count > 0 or stale_sector_count > 0 or layout_changed_count > 0: + gate = "FAIL" + elif sheet_input_count > 0: + gate = "WARN" + + return { + "formula_id": "sector_universe_refresh_audit_v1", + "gate": gate, + "max_age_days": DEFAULT_MAX_AGE_DAYS, + "summary": summary, + "rows": detail_rows, + "source": { + "sector_rows": len(rows), + "grouped_sectors": len(grouped), + }, + } diff --git a/tools/automate_routine.py b/tools/automate_routine.py index 4857d75..eb7ac34 100644 --- a/tools/automate_routine.py +++ b/tools/automate_routine.py @@ -3,6 +3,7 @@ import os import requests import time import subprocess +import argparse from pathlib import Path ROOT = Path(__file__).resolve().parent.parent @@ -10,6 +11,7 @@ CLASPRC_PATH = ROOT / ".clasprc.json" CLASP_PATH = ROOT / ".clasp.json" SPREADSHEET_ID = "1e1TNlLfnT69nvw-I1wU_oBHmEtI2pfbld3e0fFmtrZM" OUTPUT_XLSX = ROOT / "GatherTradingData.xlsx" +LOCAL_OUTPUT_XLSX = ROOT / "outputs" / "sector_insights_enhanced" / "GatherTradingData_sector_insights.xlsx" def get_tokens(): if not CLASPRC_PATH.exists(): @@ -75,20 +77,46 @@ def download_spreadsheet(spreadsheet_id, access_token, output_path): print(f"Successfully downloaded to {output_path}") return True +def validate_monthly_sector_refresh(xlsx_path: Path) -> bool: + cmd = [ + "python", + "tools/validate_sector_universe_monthly_refresh_v1.py", + "--xlsx", + str(xlsx_path), + ] + print(f"Validating monthly sector refresh: {xlsx_path} ...") + res = subprocess.run(cmd, cwd=str(ROOT)) + if res.returncode == 0: + print("Monthly sector refresh validation passed.") + return True + print("Monthly sector refresh validation failed.") + return False + def main(): + parser = argparse.ArgumentParser() + parser.add_argument("--function", default="runDataFeed", help="Primary GAS function to execute before download") + parser.add_argument("--fallback-function", default="run_all", help="Fallback GAS function to execute if primary fails") + args = parser.parse_args() + try: tokens = get_tokens() script_id = get_script_id() access_token = refresh_access_token(tokens) - # Step 1: Execute GAS run_all - if run_gas_function(script_id, access_token, "run_all"): + # Step 1: Execute GAS runDataFeed first, then fallback to run_all if needed. + primary_ok = run_gas_function(script_id, access_token, args.function) + if not primary_ok and args.fallback_function and args.fallback_function != args.function: + print(f"Primary function {args.function} failed; trying fallback {args.fallback_function} ...") + primary_ok = run_gas_function(script_id, access_token, args.fallback_function) + + if primary_ok: print("Waiting a bit for GAS processes to finalize (optional)...") time.sleep(5) # Step 2: Download spreadsheet if download_spreadsheet(SPREADSHEET_ID, access_token, OUTPUT_XLSX): print("\nRoutine Part 1 & 2 complete.") + validate_monthly_sector_refresh(OUTPUT_XLSX) print("Final step: npm run prepare-upload-zip") else: print("\nDownload failed. Please check Drive API scopes.") @@ -98,6 +126,7 @@ def main(): fallback = subprocess.run(["python", "tools/update_workbook_sector_insights.py"], cwd=str(ROOT)) if fallback.returncode == 0: print("Local sector-insight workbook updated.") + validate_monthly_sector_refresh(LOCAL_OUTPUT_XLSX) else: print("Local sector-insight workbook build failed.") diff --git a/tools/render_operational_report.py b/tools/render_operational_report.py index ed037bd..efce1a1 100644 --- a/tools/render_operational_report.py +++ b/tools/render_operational_report.py @@ -17,6 +17,7 @@ if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) from src.quant_engine.etf_representative_monitor import build_etf_representative_monitor +from src.quant_engine.sector_universe_refresh import build_sector_universe_refresh_audit from src.quant_engine.sector_trend_analysis import build_sector_trend_analysis SECTION_ORDER = [ @@ -25,6 +26,7 @@ SECTION_ORDER = [ "single_conclusion", "immediate_execution_playbook", "market_context_learning_note", "portfolio_performance_summary", "portfolio_sector_exposure_summary", + "sector_universe_refresh_audit_v1", "sector_trend_analysis_v1", "etf_representative_monitor_v1", "investment_quality_headline", "operational_truth_score", "execution_readiness_matrix", "pass_100_criteria", "today_decision_summary_card", "routing_serving_trace", @@ -59,6 +61,7 @@ SECTION_TITLES = { "market_context_learning_note": "시장 컨텍스트 학습 노트", "portfolio_performance_summary": "포트폴리오 성과 요약", "portfolio_sector_exposure_summary": "포트폴리오 섹터 노출", + "sector_universe_refresh_audit_v1": "섹터 월간 갱신 감사", "sector_trend_analysis_v1": "섹터 동향 분석", "etf_representative_monitor_v1": "ETF 대표 종목 모니터", "investment_quality_headline": "투자 품질 헤드라인", @@ -670,7 +673,7 @@ def _sector_trend_analysis_v1(data_root: dict, hctx: dict, se: list) -> str: rows_data = result.get("rows") if isinstance(result.get("rows"), list) else [] if rows_data: md += "\n\n**섹터 상세 트렌드**\n\n" + _tbl(rows_data, [ - "sector", "proxy_ticker", "proxy_name", "proxy_type", "etf_execution_use", + "sector", "proxy_ticker", "proxy_name", "proxy_type", "universe_source", "etf_execution_use", "etf_liquidity_status", "etf_nav_risk", "proxy_confidence", "rank", "rank_delta_w1", "rank_delta_w2", "sector_score", "score_delta", "sector_ret5d", "sector_ret20d", "etf_return_5d", "etf_return_20d", @@ -756,10 +759,55 @@ def _sector_trend_analysis_v1(data_root: dict, hctx: dict, se: list) -> str: "- 섹터 수급은 ETF 프록시와 직접 스마트머니를 분리해서 보여주고, 둘이 어긋날 때 경고를 강화해야 합니다.\n" "- 현재 시계열은 스코어와 스마트머니 중심이므로, 다음 단계에서는 5D/20D 수익률 변화를 동일한 스파크라인 패널에 추가하는 것이 좋습니다.\n" "- 포트폴리오 자금 패널은 목표 달성율, 드로우다운, 베타, 알파 신뢰도를 함께 묶어 보여줘야 실제 투자 판단과 연결됩니다.\n" + "- 다음 세분화 후보는 `바이오/제약`과 `방산/우주`처럼 현재 섹터를 더 세밀하게 나누는 방향입니다.\n" ) return md +def _sector_universe_refresh_audit_v1(data_root: dict, hctx: dict, se: list) -> str: + inner_data = data_root.get("data", {}) if isinstance(data_root.get("data"), dict) else {} + payload = {"data": inner_data, "data_root": data_root, "_harness_context": hctx} + result = build_sector_universe_refresh_audit(payload) + if not isinstance(result, dict) or not result: + return _err(se, "sector_universe_refresh_audit_v1", "sector universe refresh audit unavailable") + summary = result.get("summary") if isinstance(result.get("summary"), dict) else {} + rows = [ + ("갱신 게이트", result.get("gate", "")), + ("섹터 수", summary.get("sector_count", "")), + ("Naver 소스 섹터 수", summary.get("naver_source_count", "")), + ("레이아웃 변경 수", summary.get("layout_changed_count", "")), + ("SHEET_INPUT 섹터 수", summary.get("sheet_input_count", "")), + ("DEFAULT_TEMPLATE 섹터 수", summary.get("template_count", "")), + ("갱신 최신일", summary.get("newest_source_asof", "")), + ("갱신 최저일", summary.get("oldest_source_asof", "")), + ("CURRENT", summary.get("current_count", "")), + ("DUE", summary.get("due_count", "")), + ("OVERDUE", summary.get("overdue_count", "")), + ("MISSING_URL", summary.get("missing_source_url_count", "")), + ("STALE", summary.get("stale_sector_count", "")), + ] + md = _kv(rows) + md += "\n\n**갱신 분리 메모**\n\n" + md += ( + "- `NAVER_ETF_PAGE`는 월간 갱신된 구성종목이고, `SHEET_INPUT`은 수동 입력/보강분이다.\n" + "- `DEFAULT_TEMPLATE`는 자동 갱신이 아직 안 된 템플릿이므로, 월간 게이트에서 별도 실패로 본다.\n" + "- `Source_URL`와 `Source_AsOf`가 함께 있어야 provenance가 완성된다.\n" + "- 이 데이터는 AJAX/XHR 호출이 아니라 서버 렌더링 HTML 테이블이다. 따라서 잘못된 API 호출을 가정하지 말고, `main.naver`와 `coinfo.naver?target=cu_more`를 HTML 우선으로 읽는다.\n" + "- Naver 홈페이지 리뉴얼이나 DOM 변경이 생기면, JS는 보조 탐지용으로만 보고 실제값은 추정하지 않는다. 테이블이 없으면 실패를 그대로 남겨 추정값을 쓰지 않는다.\n" + "- `NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED`는 레이아웃 변경 전용 실패로 분리하고, 일반 실패(`NAVER_ETF_PAGE_FAIL`)와 구분해 읽는다.\n" + "- 금융 섹터는 `은행 / 증권 / 지주회사`로 분리해 `sector_universe`를 구성하고, `sector_flow`는 현재 JSON 브리지를 통해 carryover 분리본을 표시한다. GAS `runDataFeed`를 다시 돌리면 native 분리본으로 다시 물린다.\n" + "- 이 분리는 월 1회 갱신 하네스의 대상이며, 섹터별 대표 ETF 구성비 증빙은 `Source_URL`과 `Source_AsOf`가 유효해야만 인정한다.\n" + ) + rows_data = result.get("rows") if isinstance(result.get("rows"), list) else [] + if rows_data: + md += "\n\n**섹터 갱신 상세**\n\n" + _tbl(rows_data, [ + "sector", "proxy_ticker", "proxy_name", "proxy_type", "source_kind", "transport_mode", + "source_url", "source_asof", "age_days", "constituent_count", + "stock_count", "etf_count", "weight_sum", "status", "refresh_reason", + ], max_rows=20) + return md + + def _etf_representative_monitor_v1(data_root: dict, hctx: dict, se: list) -> str: inner_data = data_root.get("data", {}) if isinstance(data_root.get("data"), dict) else {} payload = {"data": inner_data, "data_root": data_root, "_harness_context": hctx} @@ -784,6 +832,11 @@ def _etf_representative_monitor_v1(data_root: dict, hctx: dict, se: list) -> str ]) md += "\n\n**ETF 대표 종목 추출 원칙**\n\n" md += ( + "- 섹터 프록시는 ETF 우선을 기본으로 두고, ETF가 실제로 있는 섹터는 ETF를 대표값으로 씁니다.\n" + "- 은행/증권/지주회사는 하나로 뭉치지 않고 각각 별도 섹터로 분리해 구성비 상위 종목을 증빙합니다.\n" + "- 방산/원전/건설/플랜트-EPC/로보틱스처럼 ETF 프록시가 있는 섹터는 ETF를 쓰고, 대표주 바스켓은 섹터별 기본 3종, 로보틱스는 5종으로 별도 모니터합니다.\n" + "- 로보틱스는 `RISE 현대차고정피지컬AI`를 섹터 프록시로 사용하고, 대표주는 해당 ETF의 실제 구성비 상위 5개 종목에서 뽑습니다.\n" + "- `Universe_Source=DEFAULT_TEMPLATE`인 행은 템플릿 경로이므로, 실제 시트 입력으로 바꿔 provenance를 완성해야 합니다.\n" "- 대표 종목은 우선 ETF 구성비중이 가장 큰 종목을 선택하고, 그 종목이 현재 유동성/호가/추세 조건을 충족하는지로 계속 모니터링합니다.\n" "- 구성비중 데이터가 비어 있거나 비정상일 때만 같은 섹터의 유동성 우선 후보로 대체합니다.\n" "- BUY_REVIEW는 ETF 수급이 대표 종목의 추세와 같이 붙을 때만 후보로 승격합니다.\n" @@ -796,7 +849,7 @@ def _etf_representative_monitor_v1(data_root: dict, hctx: dict, se: list) -> str rep_states = [] rep_weights = [] if isinstance(reps, list): - for rep in reps[:3]: + for rep in reps[:5]: if isinstance(rep, dict): rep_names.append(f"{rep.get('name', '')}({rep.get('ticker', '')})") rep_states.append(str(rep.get("monitor_state", ""))) @@ -805,6 +858,7 @@ def _etf_representative_monitor_v1(data_root: dict, hctx: dict, se: list) -> str "sector": row.get("sector", ""), "etf_proxy_ticker": row.get("etf_proxy_ticker", ""), "etf_proxy_name": row.get("etf_proxy_name", ""), + "universe_source": row.get("universe_source", ""), "representative_basket": " / ".join(rep_names), "representative_count": row.get("representative_count", ""), "basket_weights": ", ".join(rep_weights), @@ -813,8 +867,8 @@ def _etf_representative_monitor_v1(data_root: dict, hctx: dict, se: list) -> str "representative_basis_detail": row.get("representative_basis_detail", ""), "basket_quality_state": row.get("basket_quality_state", ""), "basket_coverage_pct": row.get("basket_coverage_pct", ""), - "selection_source": ", ".join(str(rep.get("selection_source", "")) for rep in reps[:3] if isinstance(rep, dict)), - "selection_score": ", ".join(str(rep.get("selection_score", "")) for rep in reps[:3] if isinstance(rep, dict)), + "selection_source": ", ".join(str(rep.get("selection_source", "")) for rep in reps[:5] if isinstance(rep, dict)), + "selection_score": ", ".join(str(rep.get("selection_score", "")) for rep in reps[:5] if isinstance(rep, dict)), "basket_state": row.get("monitor_state", ""), "basket_buy_review_count": row.get("basket_buy_review_count", ""), "basket_caution_count": row.get("basket_caution_count", ""), @@ -823,7 +877,7 @@ def _etf_representative_monitor_v1(data_root: dict, hctx: dict, se: list) -> str }) md += "\n\n**대표 종목 모니터 테이블**\n\n" md += _tbl(display_rows, [ - "sector", "etf_proxy_ticker", "etf_proxy_name", "representative_basket", + "sector", "etf_proxy_ticker", "etf_proxy_name", "universe_source", "representative_basket", "representative_count", "basket_weights", "basket_states", "representative_basis", "representative_basis_detail", "basket_quality_state", "basket_coverage_pct", "selection_source", "selection_score", "basket_state", "basket_buy_review_count", @@ -1538,6 +1592,7 @@ def main() -> int: "market_context_learning_note": lambda: _market_context_learning_note(hctx, se), "portfolio_performance_summary": lambda: _portfolio_performance_summary(data_root, hctx, se), "portfolio_sector_exposure_summary": lambda: _portfolio_sector_exposure_summary(data_root, hctx, se), + "sector_universe_refresh_audit_v1": lambda: _sector_universe_refresh_audit_v1(data_root, hctx, se), "sector_trend_analysis_v1": lambda: _sector_trend_analysis_v1(data_root, hctx, se), "investment_quality_headline": lambda: _investment_quality_headline(hctx, se), "operational_truth_score": lambda: _operational_truth_score(hctx, se), diff --git a/tools/update_sector_universe_from_naver.py b/tools/update_sector_universe_from_naver.py new file mode 100644 index 0000000..1a74021 --- /dev/null +++ b/tools/update_sector_universe_from_naver.py @@ -0,0 +1,616 @@ +from __future__ import annotations + +import argparse +import datetime as dt +import json +import re +import shutil +import sys +from collections import OrderedDict +from pathlib import Path +from typing import Any +from urllib.parse import urljoin, urlparse, parse_qs + +import requests +from bs4 import BeautifulSoup +from openpyxl import load_workbook +from openpyxl.styles import Alignment, Font, PatternFill +from openpyxl.utils import get_column_letter + +ROOT = Path(__file__).resolve().parents[1] +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from src.quant_engine.sector_universe_refresh import build_sector_universe_refresh_audit + +DEFAULT_INPUT_XLSX = ROOT / "GatherTradingData.xlsx" +DEFAULT_OUTPUT_XLSX = ROOT / "outputs" / "sector_universe_refresh" / "GatherTradingData_sector_universe.xlsx" +DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36" +NAVER_BASE = "https://finance.naver.com" +NAVER_ITEM_CODE_RE = re.compile(r"(?:https?:)?//finance\.naver\.com(?P/item/[^\"'\s<>]+code=(?P\d+)[^\"'\s<>]*)", re.I) +NAVER_REL_CODE_RE = re.compile(r"(?P/item/[^\"'\s<>]+code=(?P\d+)[^\"'\s<>]*)", re.I) + +TITLE_FILL = PatternFill("solid", fgColor="1F4E78") +HEADER_FILL = PatternFill("solid", fgColor="1F4E78") +SUBHEADER_FILL = PatternFill("solid", fgColor="D9EAF7") +WHITE_FONT = Font(color="FFFFFF", bold=True) +BOLD_FONT = Font(bold=True) +NOTE_FONT = Font(italic=True, color="666666") + + +def _kst_now() -> dt.datetime: + return dt.datetime.now(dt.timezone(dt.timedelta(hours=9))) + + +def _kst_today() -> str: + return _kst_now().strftime("%Y-%m-%d") + + +def _clean_text(value: Any) -> str: + if value is None: + return "" + return str(value).strip() + + +def _normalize_code(value: Any) -> str: + text = _clean_text(value) + if not text: + return "" + text = text.replace(",", "") + if text.endswith(".0"): + text = text[:-2] + if text.isdigit(): + return text.zfill(6) + if re.fullmatch(r"\d+\.\d+", text): + return str(int(float(text))).zfill(6) + return text + + +def _parse_weight(value: str) -> float | None: + text = _clean_text(value).replace("%", "").replace(",", "") + if not text: + return None + try: + return float(text) + except Exception: + return None + + +def _discover_naver_candidate_urls(soup: BeautifulSoup, proxy_ticker: str) -> list[str]: + candidates: list[str] = [] + seen: set[str] = set() + + def add(url: str) -> None: + url = _clean_text(url) + if not url or url in seen: + return + seen.add(url) + candidates.append(url) + + expected_code = _normalize_code(proxy_ticker) + + for script in soup.find_all("script"): + src = _clean_text(script.get("src")) + if src: + if expected_code and expected_code in src: + if src.startswith("//"): + add(f"https:{src}") + elif src.startswith("/"): + add(urljoin(NAVER_BASE, src)) + else: + add(src) + continue + text = script.get_text(" ", strip=True) or "" + if not text: + continue + for regex in (NAVER_ITEM_CODE_RE, NAVER_REL_CODE_RE): + for match in regex.finditer(text): + code = _normalize_code(match.groupdict().get("code") or "") + if expected_code and code and code != expected_code: + continue + path = match.groupdict().get("path") or "" + if path: + add(urljoin(NAVER_BASE, path)) + + return candidates + + +def _parse_naver_etf_holdings(session: requests.Session, proxy_ticker: str, limit: int) -> dict[str, Any]: + url_candidates = [ + f"{NAVER_BASE}/item/main.naver?code={proxy_ticker}", + f"{NAVER_BASE}/item/coinfo.naver?code={proxy_ticker}&target=cu_more", + ] + + last_message = "" + for url in url_candidates: + response = session.get(url, timeout=20) + response.raise_for_status() + soup = BeautifulSoup(response.text, "html.parser") + discovered = _discover_naver_candidate_urls(soup, proxy_ticker) + for candidate in discovered: + if candidate not in url_candidates: + url_candidates.append(candidate) + section = soup.select_one("div.section.etf_asset") + table = section.select_one("table.tb_type1_a") if section is not None else None + if table is None: + # layout changed or this endpoint does not expose the constituent table + last_message = "ETF constituent table missing; page structure may have changed" + continue + + holdings: list[dict[str, Any]] = [] + for tr in table.select("tbody tr"): + tds = tr.find_all("td") + if len(tds) < 3: + continue + name_link = tr.find("a", href=re.compile(r"code=\d+")) + if name_link is None: + continue + name = _clean_text(name_link.get_text(" ", strip=True)) + href = _clean_text(name_link.get("href")) + m = re.search(r"code=(\d+)", href) + code = _normalize_code(m.group(1) if m else "") + if not code or not name: + continue + weight = _parse_weight(tds[2].get_text(" ", strip=True)) + if weight is None: + continue + holdings.append({ + "Constituent_Code": code, + "Constituent_Name": name, + "Weight": round(weight / 100.0, 6), + "Source": "NAVER_ETF_PAGE", + }) + if len(holdings) >= limit: + break + + if holdings: + return { + "source_url": url, + "source_kind": "NAVER_ETF_PAGE", + "holdings": holdings, + "discovered_urls": discovered, + "message": "", + } + last_message = "no holdings parsed" + + return { + "source_url": url_candidates[0], + "source_kind": "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED", + "holdings": [], + "discovered_urls": [], + "message": last_message or "page structure changed; no expected values were inferred", + } + + +def _extract_sector_seed_rows(ws) -> list[dict[str, Any]]: + headers = [ws.cell(2, c).value for c in range(1, ws.max_column + 1)] + headers = [str(h).strip() if h is not None else "" for h in headers] + idx = {name: i for i, name in enumerate(headers)} + rows: list[dict[str, Any]] = [] + for r in range(3, ws.max_row + 1): + row = {name: ws.cell(r, c + 1).value for c, name in enumerate(headers) if name} + if not any(v not in (None, "") for v in row.values()): + continue + rows.append(row) + return rows + + +def _group_seed_rows(rows: list[dict[str, Any]]) -> OrderedDict[str, dict[str, Any]]: + grouped: OrderedDict[str, dict[str, Any]] = OrderedDict() + for row in rows: + sector = _clean_text(row.get("Sector")) + if not sector: + continue + if sector not in grouped: + grouped[sector] = { + "meta": row, + "rows": [], + } + grouped[sector]["rows"].append(row) + return grouped + + +def _build_refreshed_rows(seed_rows: list[dict[str, Any]], limit: int) -> tuple[list[dict[str, Any]], dict[str, Any]]: + session = requests.Session() + session.headers.update({"User-Agent": DEFAULT_USER_AGENT}) + grouped = _group_seed_rows(seed_rows) + refreshed: list[dict[str, Any]] = [] + sector_stats: list[dict[str, Any]] = [] + today = _kst_today() + + for sector, bundle in grouped.items(): + meta = bundle["meta"] + proxy_ticker = _normalize_code(meta.get("Proxy_Ticker")) + proxy_name = _clean_text(meta.get("Proxy_Name")) + proxy_type = _clean_text(meta.get("Proxy_Type")) or "ETF" + base_ticker = _normalize_code(meta.get("Base_Ticker")) or "069500" + + if sector == "금융/은행": + split_specs = [ + {"sector": "은행", "proxy_ticker": "091170", "proxy_name": "KODEX 은행", "proxy_type": "ETF"}, + {"sector": "증권", "proxy_ticker": "0111J0", "proxy_name": "HANARO 증권고배당TOP3플러스", "proxy_type": "ETF"}, + {"sector": "지주회사", "proxy_ticker": "307520", "proxy_name": "TIGER 지주회사", "proxy_type": "ETF"}, + ] + for spec in split_specs: + split_proxy_ticker = _normalize_code(spec["proxy_ticker"]) + split_proxy_name = _clean_text(spec["proxy_name"]) + split_proxy_type = _clean_text(spec["proxy_type"]) or "ETF" + split_source = "SHEET_INPUT" + split_source_url = "" + split_message = "" + split_source_kind = "SHEET_INPUT" + try: + scraped = _parse_naver_etf_holdings(session, split_proxy_ticker, limit) + split_source_url = scraped.get("source_url", "") + split_source_kind = scraped.get("source_kind", "NAVER_ETF_PAGE_FAIL") + holdings = scraped.get("holdings", []) + split_message = scraped.get("message", "") + if holdings: + split_source = "NAVER_ETF_PAGE" + weight_sum = round(sum(float(h["Weight"]) for h in holdings), 6) + for h in holdings: + refreshed.append({ + "Sector": spec["sector"], + "Proxy_Ticker": split_proxy_ticker, + "Proxy_Name": split_proxy_name, + "Proxy_Type": split_proxy_type, + "Base_Ticker": base_ticker, + "Constituent_Code": h["Constituent_Code"], + "Constituent_Name": h["Constituent_Name"], + "Weight": h["Weight"], + "Is_ETF": "N", + "Enabled": "Y", + "Effective_Date": today, + "Source": split_source, + "Transport_Mode": "HTML_SERVER_RENDERED", + "Source_URL": split_source_url, + "Source_AsOf": today, + "Sector_Check": spec["sector"], + "Weight_Sum_All": weight_sum, + "Weight_Sum_Stocks_Only": weight_sum, + "ETF_Rows": 0, + "Status": "OK", + }) + sector_stats.append({ + "sector": spec["sector"], + "proxy_ticker": split_proxy_ticker, + "proxy_name": split_proxy_name, + "proxy_type": split_proxy_type, + "source_kind": split_source, + "transport_mode": "HTML_SERVER_RENDERED", + "source_url": split_source_url, + "source_asof": today, + "constituent_count": len(holdings), + "weight_sum": weight_sum, + "status": "CURRENT", + "refresh_reason": "NAVER_ETF_PAGE_SPLIT", + }) + continue + except Exception as exc: + split_message = str(exc) + split_source_kind = "NAVER_ETF_PAGE_FAIL" + + # 실패 시는 투명하게 남기고, 섹터 누락은 그대로 드러낸다. + sector_stats.append({ + "sector": spec["sector"], + "proxy_ticker": split_proxy_ticker, + "proxy_name": split_proxy_name, + "proxy_type": split_proxy_type, + "source_kind": split_source_kind, + "transport_mode": "LAYOUT_CHANGED" if split_source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" else "UNKNOWN", + "source_url": split_source_url, + "source_asof": today, + "constituent_count": 0, + "weight_sum": 0.0, + "status": "FAIL" if "FAIL" in split_source_kind else "WARN", + "refresh_reason": split_message or "split_sector_fallback", + }) + continue + + source = "SHEET_INPUT" + source_url = "" + message = "" + source_kind = "SHEET_INPUT" + + if proxy_type != "ETF": + source_kind = "REPRESENTATIVE_STOCK_PROXY" + source = source_kind + source_url = f"{NAVER_BASE}/item/main.naver?code={proxy_ticker}" if proxy_ticker else "" + fallback_rows = bundle["rows"][:limit] if bundle["rows"] else [] + weight_sum = 0.0 + for row in fallback_rows: + weight = row.get("Weight") + try: + weight_sum += float(weight) if weight not in (None, "") else 0.0 + except Exception: + pass + refreshed.append({ + "Sector": sector, + "Proxy_Ticker": proxy_ticker, + "Proxy_Name": proxy_name, + "Proxy_Type": proxy_type, + "Base_Ticker": base_ticker, + "Constituent_Code": _normalize_code(row.get("Constituent_Code")), + "Constituent_Name": _clean_text(row.get("Constituent_Name")), + "Weight": float(row.get("Weight") or 0), + "Is_ETF": _clean_text(row.get("Is_ETF")) or "N", + "Enabled": "Y", + "Effective_Date": today, + "Source": source_kind, + "Transport_Mode": "HTML_SERVER_RENDERED" if source_kind == "REPRESENTATIVE_STOCK_PROXY" else "MANUAL_OR_TEMPLATE", + "Source_URL": source_url, + "Source_AsOf": today, + "Sector_Check": sector, + "Weight_Sum_All": weight_sum, + "Weight_Sum_Stocks_Only": weight_sum, + "ETF_Rows": 0, + "Status": "CURRENT", + }) + sector_stats.append({ + "sector": sector, + "proxy_ticker": proxy_ticker, + "proxy_name": proxy_name, + "proxy_type": proxy_type, + "source_kind": source_kind, + "transport_mode": "HTML_SERVER_RENDERED" if source_kind == "REPRESENTATIVE_STOCK_PROXY" else "MANUAL_OR_TEMPLATE", + "source_url": source_url, + "source_asof": today, + "constituent_count": len(fallback_rows), + "weight_sum": round(weight_sum, 6), + "status": "CURRENT", + "refresh_reason": "REPRESENTATIVE_STOCK_PROXY", + }) + continue + + if proxy_ticker: + try: + scraped = _parse_naver_etf_holdings(session, proxy_ticker, limit) + source_url = scraped.get("source_url", "") + source_kind = scraped.get("source_kind", "NAVER_ETF_PAGE_FAIL") + holdings = scraped.get("holdings", []) + message = scraped.get("message", "") + if holdings: + source = "NAVER_ETF_PAGE" + weight_sum = round(sum(float(h["Weight"]) for h in holdings), 6) + for h in holdings: + refreshed.append({ + "Sector": sector, + "Proxy_Ticker": proxy_ticker, + "Proxy_Name": proxy_name, + "Proxy_Type": proxy_type, + "Base_Ticker": base_ticker, + "Constituent_Code": h["Constituent_Code"], + "Constituent_Name": h["Constituent_Name"], + "Weight": h["Weight"], + "Is_ETF": "N", + "Enabled": "Y", + "Effective_Date": today, + "Source": source, + "Transport_Mode": "HTML_SERVER_RENDERED", + "Source_URL": source_url, + "Source_AsOf": today, + "Sector_Check": sector, + "Weight_Sum_All": weight_sum, + "Weight_Sum_Stocks_Only": weight_sum, + "ETF_Rows": 0, + "Status": "OK", + }) + sector_stats.append({ + "sector": sector, + "proxy_ticker": proxy_ticker, + "proxy_name": proxy_name, + "proxy_type": proxy_type, + "source_kind": source, + "transport_mode": "HTML_SERVER_RENDERED", + "source_url": source_url, + "source_asof": today, + "constituent_count": len(holdings), + "weight_sum": weight_sum, + "status": "CURRENT", + "refresh_reason": "NAVER_ETF_PAGE", + }) + continue + except Exception as exc: + message = str(exc) + source_kind = "NAVER_ETF_PAGE_FAIL" + + # fallback: preserve seed rows but expose the failure transparently + fallback_rows = bundle["rows"][:limit] if bundle["rows"] else [] + weight_sum = 0.0 + for row in fallback_rows: + weight = row.get("Weight") + try: + weight_sum += float(weight) if weight not in (None, "") else 0.0 + except Exception: + pass + refreshed.append({ + "Sector": sector, + "Proxy_Ticker": proxy_ticker, + "Proxy_Name": proxy_name, + "Proxy_Type": proxy_type, + "Base_Ticker": base_ticker, + "Constituent_Code": _normalize_code(row.get("Constituent_Code")), + "Constituent_Name": _clean_text(row.get("Constituent_Name")), + "Weight": float(row.get("Weight") or 0), + "Is_ETF": _clean_text(row.get("Is_ETF")) or "N", + "Enabled": "Y", + "Effective_Date": today, + "Source": source_kind, + "Transport_Mode": "LAYOUT_CHANGED" if source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" else "UNKNOWN", + "Source_URL": source_url, + "Source_AsOf": today, + "Sector_Check": sector, + "Weight_Sum_All": weight_sum, + "Weight_Sum_Stocks_Only": weight_sum, + "ETF_Rows": 0, + "Status": "FAIL" if source_kind.endswith("FAIL") else "WARN", + }) + sector_stats.append({ + "sector": sector, + "proxy_ticker": proxy_ticker, + "proxy_name": proxy_name, + "proxy_type": proxy_type, + "source_kind": source_kind, + "transport_mode": "LAYOUT_CHANGED" if source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" else "UNKNOWN", + "source_url": source_url, + "source_asof": today, + "constituent_count": len(fallback_rows), + "weight_sum": round(weight_sum, 6), + "status": "FAIL" if "FAIL" in source_kind else "WARN", + "refresh_reason": message or "seed_fallback", + }) + + audit_payload = build_sector_universe_refresh_audit({"data": {"sector_universe": refreshed}}) + return refreshed, { + "sector_universe_refresh_audit": audit_payload, + "sector_stats": sector_stats, + } + + +def _style_title(ws, title: str, subtitle: str) -> None: + ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=max(8, ws.max_column or 8)) + ws["A1"] = title + ws["A1"].font = WHITE_FONT + ws["A1"].fill = TITLE_FILL + ws["A1"].alignment = Alignment(horizontal="left") + ws.merge_cells(start_row=2, start_column=1, end_row=2, end_column=max(8, ws.max_column or 8)) + ws["A2"] = subtitle + ws["A2"].font = NOTE_FONT + + +def _write_table(ws, start_row: int, start_col: int, headers: list[str], rows: list[list[Any]]) -> int: + for i, header in enumerate(headers, start=start_col): + cell = ws.cell(start_row, i) + cell.value = header + cell.font = WHITE_FONT + cell.fill = HEADER_FILL + cell.alignment = Alignment(horizontal="center") + for r_idx, row in enumerate(rows, start=start_row + 1): + for c_idx, value in enumerate(row, start=start_col): + ws.cell(r_idx, c_idx).value = value + return start_row + len(rows) + + +def _write_sector_universe_sheet(wb, rows: list[dict[str, Any]]) -> None: + if "sector_universe" in wb.sheetnames: + del wb["sector_universe"] + ws = wb.create_sheet("sector_universe") + headers = [ + "Sector", "Proxy_Ticker", "Proxy_Name", "Proxy_Type", "Base_Ticker", + "Constituent_Code", "Constituent_Name", "Weight", "Is_ETF", "Enabled", + "Effective_Date", "Source", "Transport_Mode", "Source_URL", "Source_AsOf", "Sector_Check", + "Weight_Sum_All", "Weight_Sum_Stocks_Only", "ETF_Rows", "Status", + ] + now = _kst_now().strftime("%Y-%m-%d %H:%M:%S") + ws["A1"] = f"updated: {now} KST" + ws["A1"].font = Font(bold=True) + _write_table(ws, 2, 1, headers, [[r.get(h, "") for h in headers] for r in rows]) + for col_idx, header in enumerate(headers, start=1): + if header in {"Proxy_Ticker", "Base_Ticker", "Constituent_Code"}: + for r in range(3, ws.max_row + 1): + ws.cell(r, col_idx).number_format = "@" + if header in {"Weight", "Weight_Sum_All", "Weight_Sum_Stocks_Only"}: + for r in range(3, ws.max_row + 1): + ws.cell(r, col_idx).number_format = "0.0000" + width = 16 + if header in {"Constituent_Name", "Proxy_Name"}: + width = 22 + elif header in {"Source_URL"}: + width = 42 + elif header in {"Status", "Source", "Sector_Check", "Proxy_Type", "Transport_Mode"}: + width = 16 + ws.column_dimensions[get_column_letter(col_idx)].width = width + ws.freeze_panes = "A3" + ws.sheet_view.showGridLines = False + + +def _write_audit_sheet(wb, audit_payload: dict[str, Any]) -> None: + audit = audit_payload["sector_universe_refresh_audit"] + if "sector_universe_refresh_audit" in wb.sheetnames: + del wb["sector_universe_refresh_audit"] + ws = wb.create_sheet("sector_universe_refresh_audit") + ws.sheet_view.showGridLines = False + _style_title( + ws, + "섹터 월간 갱신 감사", + "Naver ETF 페이지 기반 월간 갱신 상태와 provenance 분리 현황을 점검한다.", + ) + summary = audit.get("summary", {}) + summary_rows = [ + ["formula_id", audit.get("formula_id", "")], + ["gate", audit.get("gate", "")], + ["sector_count", summary.get("sector_count", 0)], + ["current_count", summary.get("current_count", 0)], + ["due_count", summary.get("due_count", 0)], + ["overdue_count", summary.get("overdue_count", 0)], + ["missing_count", summary.get("missing_count", 0)], + ["template_count", summary.get("template_count", 0)], + ["sheet_input_count", summary.get("sheet_input_count", 0)], + ["naver_source_count", summary.get("naver_source_count", 0)], + ["missing_source_url_count", summary.get("missing_source_url_count", 0)], + ["stale_sector_count", summary.get("stale_sector_count", 0)], + ["oldest_source_asof", summary.get("oldest_source_asof", "")], + ["newest_source_asof", summary.get("newest_source_asof", "")], + ] + _write_table(ws, 4, 1, ["key", "value"], summary_rows) + rows = audit.get("rows", []) or [] + if rows: + headers = [ + "sector", "proxy_ticker", "proxy_name", "proxy_type", "source_kind", + "source_url", "source_asof", "age_days", "constituent_count", + "stock_count", "etf_count", "weight_sum", "status", "refresh_reason", + ] + _write_table(ws, 4, 4, headers, [[r.get(h, "") for h in headers] for r in rows]) + for idx, header in enumerate(headers, start=4): + width = 16 + if header in {"sector", "proxy_name", "refresh_reason"}: + width = 20 + elif header == "source_url": + width = 42 + ws.column_dimensions[get_column_letter(idx)].width = width + ws.freeze_panes = "A5" + + +def main() -> int: + ap = argparse.ArgumentParser() + ap.add_argument("--input", default=str(DEFAULT_INPUT_XLSX)) + ap.add_argument("--output", default=str(DEFAULT_OUTPUT_XLSX)) + ap.add_argument("--limit", type=int, default=10, help="Per-sector holdings limit from Naver ETF pages") + ap.add_argument("--apply", action="store_true", help="Overwrite the input workbook in place as well") + args = ap.parse_args() + + input_path = Path(args.input) + output_path = Path(args.output) + if not input_path.exists(): + raise FileNotFoundError(input_path) + + wb = load_workbook(input_path) + if "sector_universe" not in wb.sheetnames: + raise RuntimeError("sector_universe sheet not found") + seed_ws = wb["sector_universe"] + seed_rows = _extract_sector_seed_rows(seed_ws) + refreshed_rows, audit_payload = _build_refreshed_rows(seed_rows, max(1, args.limit)) + + _write_sector_universe_sheet(wb, refreshed_rows) + _write_audit_sheet(wb, audit_payload) + + output_path.parent.mkdir(parents=True, exist_ok=True) + wb.save(output_path) + if args.apply and input_path.resolve() != output_path.resolve(): + shutil.copy2(output_path, input_path) + + print(json.dumps({ + "status": "OK", + "input": str(input_path), + "output": str(output_path), + "rows": len(refreshed_rows), + "sectors": len(audit_payload["sector_stats"]), + "current_count": audit_payload["sector_universe_refresh_audit"]["summary"]["current_count"], + "overdue_count": audit_payload["sector_universe_refresh_audit"]["summary"]["overdue_count"], + "template_count": audit_payload["sector_universe_refresh_audit"]["summary"]["template_count"], + }, ensure_ascii=False, indent=2)) + return 0 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/tools/update_workbook_sector_insights.py b/tools/update_workbook_sector_insights.py index e793263..d5191b1 100644 --- a/tools/update_workbook_sector_insights.py +++ b/tools/update_workbook_sector_insights.py @@ -9,8 +9,14 @@ from openpyxl.chart import BarChart, LineChart, Reference from openpyxl.styles import Font, PatternFill, Alignment from openpyxl.utils import get_column_letter +import sys ROOT = Path(__file__).resolve().parent.parent +if str(ROOT) not in sys.path: + sys.path.insert(0, str(ROOT)) + +from src.quant_engine.sector_universe_refresh import build_sector_universe_refresh_audit + INPUT_XLSX = ROOT / "GatherTradingData.xlsx" OUTPUT_DIR = ROOT / "outputs" / "sector_insights_enhanced" OUTPUT_XLSX = OUTPUT_DIR / "GatherTradingData_sector_insights.xlsx" @@ -593,10 +599,12 @@ def build_sector_summary(wb, data: dict) -> None: ws["A20"] = "Notes" ws["A20"].fill = SUBHEADER_FILL ws["A20"].font = BOLD_FONT - ws["A21"] = "섹터별 ETF 프록시와 스마트머니 방향이 다르면 매수 근거를 보수적으로 해석해야 합니다." + ws["A21"] = "섹터별 ETF 프록시를 기준으로 보고, 은행/증권/지주회사는 분리해서 구성비 상위 종목을 증빙해야 합니다. 대표주 모니터는 섹터 기본 3종, 로보틱스 5종 바스켓으로 함께 확인해야 합니다." ws["A21"].alignment = Alignment(wrap_text=True) - ws["A22"] = "데이터 결측은 하네스 업데이트가 필요합니다." + ws["A22"] = "Universe_Source가 DEFAULT_TEMPLATE인 행은 템플릿이며, 실제 시트 입력으로 전환되어야 provenance가 완성됩니다." ws["A22"].alignment = Alignment(wrap_text=True) + ws["A23"] = "다음 세분화 후보는 바이오/제약과 방산/우주처럼 현재 섹터를 더 세밀하게 나누는 방향입니다. 로보틱스는 RISE 현대차고정피지컬AI를 섹터 프록시로 사용하고, 대표주는 해당 ETF의 실제 구성비 상위 5개 종목에서 뽑습니다." + ws["A23"].alignment = Alignment(wrap_text=True) chart = LineChart() chart.title = "Average Sector Score / Breadth Trend" @@ -622,11 +630,11 @@ def build_sector_analysis(wb, data: dict) -> None: style_title( ws, "섹터 동향 분석", - "섹터별 ETF 프록시, 스마트머니 유입, 수익률, 유동성 방향을 함께 보는 상세 시트", + "섹터별 ETF 프록시, 대표주 모니터, 스마트머니 유입, 수익률, 유동성 방향을 함께 보는 상세 시트", end_col=18, ) headers = [ - "sector", "proxy_ticker", "proxy_name", "proxy_type", "etf_code", + "sector", "proxy_ticker", "proxy_name", "proxy_type", "universe_source", "etf_code", "etf_execution_use", "etf_liquidity_score", "etf_liquidity_status", "etf_nav_risk", "proxy_confidence", "rank", "rank_delta_w1", "rank_delta_w2", "sector_score", "score_delta", "sector_ret5d", "sector_ret20d", "etf_return_5d", "etf_return_20d", @@ -661,7 +669,7 @@ def build_sector_analysis(wb, data: dict) -> None: chart.x_axis.title = "20D Return" chart.height = 8 chart.width = 14 - data_ref = Reference(ws, min_col=17, min_row=4, max_row=4 + len(rows)) + data_ref = Reference(ws, min_col=18, min_row=4, max_row=4 + len(rows)) cats = Reference(ws, min_col=1, min_row=5, max_row=4 + len(rows)) chart.add_data(data_ref, titles_from_data=True) chart.set_categories(cats) @@ -817,6 +825,67 @@ def build_sector_timeline(wb, data: dict, source_data: dict | None = None) -> No ws.add_chart(money_chart, "L36") +def build_sector_universe_refresh_audit_sheet(wb, source_data: dict) -> None: + ws = wb.create_sheet("sector_universe_refresh_audit") + style_sheet(ws) + style_title( + ws, + "섹터 월간 갱신 감사", + "Naver ETF 페이지 기반 구성종목 갱신 상태와 provenance 분리 상태를 점검하는 감사 시트. AJAX/XHR 전제는 두지 않고 HTML 서버렌더링 테이블을 우선한다.", + end_col=16, + ) + payload = {"data": source_data} + audit = build_sector_universe_refresh_audit(payload) + summary = audit.get("summary") or {} + items = [ + ("formula_id", audit.get("formula_id", "")), + ("gate", audit.get("gate", "")), + ("sector_count", summary.get("sector_count", 0)), + ("current_count", summary.get("current_count", 0)), + ("due_count", summary.get("due_count", 0)), + ("overdue_count", summary.get("overdue_count", 0)), + ("layout_changed_count", summary.get("layout_changed_count", 0)), + ("missing_count", summary.get("missing_count", 0)), + ("template_count", summary.get("template_count", 0)), + ("sheet_input_count", summary.get("sheet_input_count", 0)), + ("naver_source_count", summary.get("naver_source_count", 0)), + ("missing_source_url_count", summary.get("missing_source_url_count", 0)), + ("stale_sector_count", summary.get("stale_sector_count", 0)), + ] + add_kpi_block(ws, 4, items) + ws["D4"] = "Refresh policy" + ws["D4"].fill = SUBHEADER_FILL + ws["D4"].font = BOLD_FONT + ws["D5"] = "NAVER_ETF_PAGE rows are the monthly refreshed source." + ws["D6"] = "SHEET_INPUT rows are manual/provisional and must stay separate." + ws["D7"] = "DEFAULT_TEMPLATE rows are a fail in the monthly gate." + ws["D8"] = "Source_URL and Source_AsOf are required for provenance." + ws["D9"] = "This is HTML-server-rendered, not AJAX. JS is only a fallback probe for candidate URLs." + ws["D10"] = "No guessed holdings are written when the page layout changes." + ws["D11"] = "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED is a separate layout-change failure state." + ws["D12"] = "Financial sectors are split as 은행 / 증권 / 지주회사 in sector_universe; sector_flow reflects carryover until GAS runDataFeed is rerun." + ws["D13"] = "This split is part of the monthly refresh harness; Source_URL and Source_AsOf must remain valid for provenance." + rows = audit.get("rows") or [] + if rows: + headers = [ + "sector", "proxy_ticker", "proxy_name", "proxy_type", "source_kind", "transport_mode", + "source_url", "source_asof", "age_days", "constituent_count", + "stock_count", "etf_count", "weight_sum", "status", "refresh_reason", + ] + write_table(ws, 14, 1, headers, [[row.get(h, "") for h in headers] for row in rows]) + for col, width in { + "A": 16, "B": 12, "C": 18, "D": 12, "E": 16, "F": 18, "G": 42, "H": 14, + "I": 10, "J": 14, "K": 12, "L": 12, "M": 12, "N": 12, "O": 24, + }.items(): + ws.column_dimensions[col].width = width + ws.freeze_panes = "A5" + ws["A11"] = "Notes" + ws["A11"].fill = SUBHEADER_FILL + ws["A11"].font = BOLD_FONT + ws["A12"] = "홈페이지 리뉴얼로 표 구조가 바뀌면, 파서는 추정하지 않고 실패 상태를 남겨 월간 게이트에서 잡는다." + ws["A12"].alignment = Alignment(wrap_text=True) + + def build_etf_summary(wb, data: dict) -> None: ws = wb.create_sheet("etf_representative_summary") style_sheet(ws) @@ -847,6 +916,7 @@ def build_etf_summary(wb, data: dict) -> None: ws["D6"] = "2) Missing slots filled with same-sector live candidates" ws["D7"] = "3) Missing data stays explicit as DATA_MISSING" ws["D8"] = "4) Minimum 3 names per sector basket" + ws["D9"] = "5) Universe_Source=DEFAULT_TEMPLATE rows are provisional until sheet-backed data exists." ws["G4"] = "Top reps" ws["G4"].fill = SUBHEADER_FILL ws["G4"].font = BOLD_FONT @@ -865,7 +935,7 @@ def build_etf_monitor(wb, data: dict) -> None: end_col=18, ) headers = [ - "sector", "etf_proxy_ticker", "etf_proxy_name", "etf_proxy_type", "sector_rank", + "sector", "etf_proxy_ticker", "etf_proxy_name", "etf_proxy_type", "universe_source", "sector_rank", "sector_score", "sector_smart_money_5d_krw", "sector_ret20d", "representative_count", "representative_ticker", "representative_name", "representative_basis", "representative_basis_detail", "constituent_weight", "basket_quality_state", @@ -894,7 +964,7 @@ def build_etf_monitor(wb, data: dict) -> None: chart.x_axis.title = "Coverage %" chart.height = 8 chart.width = 14 - data_ref = Reference(ws, min_col=16, min_row=4, max_row=4 + len(rows)) + data_ref = Reference(ws, min_col=17, min_row=4, max_row=4 + len(rows)) cats = Reference(ws, min_col=1, min_row=5, max_row=4 + len(rows)) chart.add_data(data_ref, titles_from_data=True) chart.set_categories(cats) @@ -922,6 +992,7 @@ def main() -> None: "performance_readiness_summary", "operational_eval_queue_summary", "portfolio_sector_exposure", + "sector_universe_refresh_audit", "_portfolio_holdings_helper", "sector_trend_summary", "sector_trend_analysis", @@ -936,6 +1007,7 @@ def main() -> None: build_performance_readiness_summary(wb) build_operational_eval_queue_summary(wb) build_portfolio_sector_exposure(wb) + build_sector_universe_refresh_audit_sheet(wb, raw_source) build_sector_timeline(wb, sector, raw_source) build_sector_analysis(wb, sector) build_sector_summary(wb, sector) @@ -949,6 +1021,7 @@ def main() -> None: "performance_readiness_summary", "operational_eval_queue_summary", "portfolio_sector_exposure", + "sector_universe_refresh_audit", "sector_trend_summary", "sector_trend_analysis", "sector_trend_timeline", diff --git a/tools/validate_report_section_completeness_v1.py b/tools/validate_report_section_completeness_v1.py index 9f2b94f..dcff732 100644 --- a/tools/validate_report_section_completeness_v1.py +++ b/tools/validate_report_section_completeness_v1.py @@ -21,6 +21,7 @@ REPORT_SECTION_ORDER = [ "single_conclusion", "immediate_execution_playbook", "market_context_learning_note", "portfolio_performance_summary", "portfolio_sector_exposure_summary", + "sector_universe_refresh_audit_v1", "sector_trend_analysis_v1", "etf_representative_monitor_v1", "performance_readiness_summary", diff --git a/tools/validate_sector_universe_monthly_refresh_v1.py b/tools/validate_sector_universe_monthly_refresh_v1.py new file mode 100644 index 0000000..52fa3a8 --- /dev/null +++ b/tools/validate_sector_universe_monthly_refresh_v1.py @@ -0,0 +1,173 @@ +#!/usr/bin/env python3 +from __future__ import annotations + +import argparse +import datetime as dt +import json +import sys +from pathlib import Path +from typing import Any + +from openpyxl import load_workbook + + +ROOT = Path(__file__).resolve().parents[1] +DEFAULT_XLSX = ROOT / "GatherTradingData.xlsx" +MAX_AGE_DAYS = 31 + + +def _txt(value: Any, default: str = "") -> str: + if value is None: + return default + if isinstance(value, str): + return value.strip() or default + return str(value).strip() or default + + +def _parse_date(value: Any) -> dt.date | None: + text = _txt(value) + if not text: + return None + for fmt in ("%Y-%m-%d", "%Y.%m.%d", "%Y/%m/%d"): + try: + return dt.datetime.strptime(text[:10], fmt).date() + except Exception: + pass + try: + return dt.date.fromisoformat(text[:10]) + except Exception: + return None + + +def _age_days(value: Any) -> int | None: + parsed = _parse_date(value) + if parsed is None: + return None + today = dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date() + return (today - parsed).days + + +def main() -> int: + ap = argparse.ArgumentParser() + ap.add_argument("--xlsx", default=str(DEFAULT_XLSX)) + args = ap.parse_args() + + xlsx = Path(args.xlsx) + if not xlsx.exists(): + print(f"[오류] 워크북 없음: {xlsx}") + return 1 + + wb = load_workbook(xlsx, data_only=True) + if "sector_universe" not in wb.sheetnames: + print("[FAIL] sector_universe 시트가 없습니다.") + return 1 + + ws = wb["sector_universe"] + headers = [_txt(ws.cell(2, c).value) for c in range(1, ws.max_column + 1)] + idx = {name: i for i, name in enumerate(headers) if name} + required = ["Sector", "Proxy_Ticker", "Constituent_Code", "Weight", "Source", "Source_URL", "Source_AsOf"] + missing_headers = [h for h in required if h not in idx] + + rows: list[dict[str, Any]] = [] + for r in range(3, ws.max_row + 1): + row = {h: ws.cell(r, c + 1).value for c, h in enumerate(headers) if h} + if not any(v not in (None, "") for v in row.values()): + continue + rows.append(row) + + sector_map: dict[str, list[dict[str, Any]]] = {} + for row in rows: + sector = _txt(row.get("Sector")) + if sector: + sector_map.setdefault(sector, []).append(row) + + template_rows = 0 + representative_rows = 0 + sheet_input_rows = 0 + naver_rows = 0 + layout_changed_rows = 0 + fail_rows = 0 + missing_source_url = 0 + stale_rows = 0 + mixed_sector_count = 0 + sector_status_rows: list[str] = [] + + for sector, sector_rows in sector_map.items(): + source_kinds = {_txt(r.get("Source"), "SHEET_INPUT") or "SHEET_INPUT" for r in sector_rows} + if len(source_kinds) > 1: + mixed_sector_count += 1 + sector_status_rows.append(f"{sector}:MIXED({','.join(sorted(source_kinds))})") + + sector_template = any(src == "DEFAULT_TEMPLATE" for src in source_kinds) + sector_rep = any(src == "REPRESENTATIVE_STOCK_PROXY" for src in source_kinds) + sector_input = any(src == "SHEET_INPUT" for src in source_kinds) + sector_naver = any(src == "NAVER_ETF_PAGE" for src in source_kinds) + sector_layout_changed = any(src == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" for src in source_kinds) + sector_fail = any("FAIL" in src for src in source_kinds) + + if sector_template: + template_rows += len(sector_rows) + if sector_rep: + representative_rows += len(sector_rows) + if sector_input: + sheet_input_rows += len(sector_rows) + if sector_naver: + naver_rows += len(sector_rows) + if sector_layout_changed: + layout_changed_rows += len(sector_rows) + if sector_fail: + fail_rows += len(sector_rows) + + source_urls = {_txt(r.get("Source_URL")) for r in sector_rows if _txt(r.get("Source_URL"))} + if not source_urls: + missing_source_url += len(sector_rows) + + ages = [_age_days(r.get("Source_AsOf")) for r in sector_rows] + age_vals = [a for a in ages if a is not None] + if age_vals and max(age_vals) > MAX_AGE_DAYS: + stale_rows += sum(1 for a in age_vals if a is not None and a > MAX_AGE_DAYS) + sector_status_rows.append(f"{sector}:STALE(max={max(age_vals)})") + + gate = "PASS" + if missing_headers: + gate = "FAIL" + elif template_rows > 0 or fail_rows > 0 or stale_rows > 0 or mixed_sector_count > 0: + gate = "FAIL" + elif sheet_input_rows > 0: + gate = "WARN" + + print(f"[sector_universe_refresh] gate={gate}") + print(f" rows={len(rows)} sectors={len(sector_map)}") + print(f" naver_rows={naver_rows} representative_rows={representative_rows} layout_changed_rows={layout_changed_rows} sheet_input_rows={sheet_input_rows} template_rows={template_rows} fail_rows={fail_rows}") + print(f" missing_source_url={missing_source_url} stale_rows={stale_rows} mixed_sector_count={mixed_sector_count}") + if missing_headers: + print(f" missing_headers={missing_headers}") + if sector_status_rows: + print(" sector_flags=" + ", ".join(sector_status_rows[:20])) + + result = { + "validator": "validate_sector_universe_monthly_refresh_v1", + "gate": gate, + "total_rows": len(rows), + "sector_count": len(sector_map), + "naver_rows": naver_rows, + "representative_rows": representative_rows, + "layout_changed_rows": layout_changed_rows, + "sheet_input_rows": sheet_input_rows, + "template_rows": template_rows, + "fail_rows": fail_rows, + "missing_source_url": missing_source_url, + "stale_rows": stale_rows, + "mixed_sector_count": mixed_sector_count, + "missing_headers": missing_headers, + "sector_flags": sector_status_rows, + "max_age_days": MAX_AGE_DAYS, + } + out = ROOT / "Temp" / "sector_universe_refresh_validation.json" + out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8") + print(f"OUTPUT: {out}") + return 0 if gate in {"PASS", "WARN"} else 1 + + +if __name__ == "__main__": + sys.exit(main())