섹터 유니버스 분리와 월간 갱신 정합화

This commit is contained in:
2026-06-15 02:29:29 +09:00
parent e2820065d1
commit 82ca4ddbfd
11 changed files with 1658 additions and 43 deletions
+1 -1
View File
@@ -15,5 +15,5 @@
"keep package scripts within release envelope"
]
},
"source_zip_sha256": "49f64b3773ba3c19fa8323d0b08833928c637935483039579bb8ab22a391f70c"
"source_zip_sha256": "4de4a7b1217ef5d5375b3b1ea1209f738719e79c4c3c0954e9e96a9dc0d8607e"
}
+1 -1
View File
@@ -13,7 +13,7 @@ portfolio_exposure_framework:
exposure_layers:
direct_core_leaders: ["삼성전자", "SK하이닉스"]
duplicate_beta: ["KODEX 반도체", "동일 섹터 ETF"]
tactical_satellites: ["방산", "조선", "전력기기", "건설/EPC", "기타 고베타"]
tactical_satellites: ["방산", "조선", "전력설비", "건설", "플랜트/EPC", "로보틱스", "기타 고베타"]
cash: ["현금", "MMF", "RP", "단기채 ETF"]
valid_trim_reasons:
- "벤치마크 대비 초과비중이 허용밴드를 초과하고 가격 추세가 훼손됨"
+402 -27
View File
@@ -1,5 +1,5 @@
// gas_lib.gs - Common utilities & static features
// Last Updated: 2026-06-14 20:48:30 KST
// Last Updated: 2026-06-15 02:20:50 KST
// Math/KRX utils, sheet I/O, sector flow, Web API, static runners
// GAS global scope: functions in gas_data_feed.gs / gas_data_collect.gs callable directly
//
@@ -593,7 +593,14 @@ const DEFAULT_SECTOR_UNIVERSE_V2 = [
{ code: "062040", name: "산일전기", weight: 0.10 },
{ code: "298040", name: "효성중공업", weight: 0.10 },
]},
{ sector: "방산", proxyTicker: "012450", proxyName: "한화에어로스페이스", proxyType: "대표주", baseTicker: "069500", constituents: [
{ sector: "전력설비", proxyTicker: "491820", proxyName: "HANARO 전력설비투자", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "010120", name: "LS ELECTRIC", weight: 0.28 },
{ code: "267260", name: "HD현대일렉트릭", weight: 0.28 },
{ code: "298040", name: "효성중공업", weight: 0.18 },
{ code: "006260", name: "LS", weight: 0.14 },
{ code: "099440", name: "두산에너빌리티", weight: 0.12 },
]},
{ sector: "방산", proxyTicker: "463250", proxyName: "TIGER K방산&우주", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "012450", name: "한화에어로스페이스", weight: 0.45 },
{ code: "079550", name: "LIG넥스원", weight: 0.25 },
{ code: "047810", name: "한국항공우주", weight: 0.15 },
@@ -605,23 +612,49 @@ const DEFAULT_SECTOR_UNIVERSE_V2 = [
{ code: "009540", name: "HD한국조선해양", weight: 0.20 },
{ code: "494670", name: "TIGER 조선TOP10", weight: 0.15, isEtf: true },
]},
{ sector: "건설/EPC", proxyTicker: "028050", proxyName: "삼성E&A", proxyType: "대표주", baseTicker: "069500", constituents: [
{ code: "028050", name: "삼성E&A", weight: 0.40 },
{ code: "000720", name: "현대건설", weight: 0.30 },
{ code: "006360", name: "GS건설", weight: 0.20 },
{ code: "047040", name: "대우건설", weight: 0.10 },
{ sector: "건설", proxyTicker: "117700", proxyName: "KODEX 건설", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "000720", name: "현대건설", weight: 0.35 },
{ code: "006360", name: "GS건설", weight: 0.25 },
{ code: "047040", name: "대우건설", weight: 0.20 },
{ code: "294870", name: "HDC현대산업개발", weight: 0.20 },
]},
{ sector: "플랜트/EPC", proxyTicker: "454320", proxyName: "HANARO CAPEX설비투자iSelect", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "028050", name: "삼성E&A", weight: 0.35 },
{ code: "010120", name: "LS ELECTRIC", weight: 0.20 },
{ code: "267260", name: "HD현대일렉트릭", weight: 0.20 },
{ code: "298040", name: "효성중공업", weight: 0.15 },
{ code: "099440", name: "두산에너빌리티", weight: 0.10 },
]},
{ sector: "자동차", proxyTicker: "091180", proxyName: "TIGER 자동차", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "005380", name: "현대차", weight: 0.45 },
{ code: "000270", name: "기아", weight: 0.40 },
{ code: "012330", name: "현대모비스", weight: 0.15 },
]},
{ sector: "금융/은행", proxyTicker: "091170", proxyName: "KODEX 은행", proxyType: "ETF", baseTicker: "069500", constituents: [
{ sector: "은행", proxyTicker: "091170", proxyName: "KODEX 은행", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "105560", name: "KB금융", weight: 0.30 },
{ code: "055550", name: "신한지주", weight: 0.30 },
{ code: "086790", name: "하나금융지주", weight: 0.20 },
{ code: "316140", name: "우리금융지주", weight: 0.10 },
{ code: "003540", name: "대신증권", weight: 0.10 },
{ code: "024110", name: "기업은행", weight: 0.10 },
]},
{ sector: "증권", proxyTicker: "0111J0", proxyName: "HANARO 증권고배당TOP3플러스", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "071050", name: "한국금융지주", weight: 0.2135 },
{ code: "006800", name: "미래에셋증권", weight: 0.1934 },
{ code: "005940", name: "NH투자증권", weight: 0.1911 },
{ code: "016360", name: "삼성증권", weight: 0.1434 },
{ code: "039490", name: "키움증권", weight: 0.1373 },
]},
{ sector: "지주회사", proxyTicker: "307520", proxyName: "TIGER 지주회사", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "180640", name: "한진칼", weight: 0.1535 },
{ code: "267250", name: "HD현대", weight: 0.0943 },
{ code: "034730", name: "SK", weight: 0.0884 },
{ code: "000150", name: "두산", weight: 0.0878 },
{ code: "005490", name: "POSCO홀딩스", weight: 0.0763 },
{ code: "003550", name: "LG", weight: 0.0752 },
{ code: "006260", name: "LS", weight: 0.0705 },
{ code: "078930", name: "GS", weight: 0.0498 },
{ code: "001040", name: "CJ", weight: 0.0477 },
{ code: "010060", name: "OCI홀딩스", weight: 0.0240 },
]},
{ sector: "2차전지", proxyTicker: "305720", proxyName: "KODEX 2차전지산업", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "373220", name: "LG에너지솔루션", weight: 0.40 },
@@ -635,12 +668,29 @@ const DEFAULT_SECTOR_UNIVERSE_V2 = [
{ code: "128940", name: "한미약품", weight: 0.15 },
{ code: "000100", name: "유한양행", weight: 0.10 },
]},
{ sector: "원전", proxyTicker: "099440", proxyName: "두산에너빌리티", proxyType: "대표주", baseTicker: "069500", constituents: [
{ sector: "원전", proxyTicker: "434730", proxyName: "HANARO 원자력iSelect", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "099440", name: "두산에너빌리티", weight: 0.45 },
{ code: "023450", name: "한전기술", weight: 0.25 },
{ code: "015760", name: "한국전력", weight: 0.20 },
{ code: "071320", name: "지역난방공사", weight: 0.10 },
]},
{ sector: "로보틱스", proxyTicker: "0190C0", proxyName: "RISE 현대차고정피지컬AI", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "005380", name: "현대차", weight: 0.2402 },
{ code: "012330", name: "현대모비스", weight: 0.1588 },
{ code: "011070", name: "LG이노텍", weight: 0.1450 },
{ code: "000270", name: "기아", weight: 0.1234 },
{ code: "307950", name: "현대오토에버", weight: 0.0899 },
{ code: "277810", name: "레인보우로보틱스", weight: 0.0673 },
{ code: "064400", name: "LG씨엔에스", weight: 0.0519 },
{ code: "454910", name: "두산로보틱스", weight: 0.0367 },
{ code: "108490", name: "로보티즈", weight: 0.0240 },
{ code: "058610", name: "에스피지", weight: 0.0173 },
{ code: "010620", name: "현대미포", weight: 0.0135 },
{ code: "009540", name: "HD한국조선해양", weight: 0.0135 },
{ code: "011210", name: "현대위아", weight: 0.0109 },
{ code: "121600", name: "나노신소재", weight: 0.0040 },
{ code: "028050", name: "삼성E&A", weight: 0.0034 },
]},
{ sector: "소비재", proxyTicker: "139220", proxyName: "TIGER 생활소비재", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "028260", name: "삼성물산", weight: 0.35 },
{ code: "097950", name: "CJ제일제당", weight: 0.25 },
@@ -663,6 +713,7 @@ function normalizeSectorName_(sector) {
if (s === "바이오/헬스케어") return "바이오";
if (s === "원전/에너지") return "원전";
if (s === "소비재/유통") return "소비재";
if (s === "건설/EPC") return "플랜트/EPC";
return s;
}
@@ -679,17 +730,52 @@ function readSectorUniverse_() {
const sheet = ss.getSheetByName("sector_universe");
if (!sheet) {
writeDefaultSectorUniverseSheet_();
return DEFAULT_SECTOR_UNIVERSE_V2;
return DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({
...sector,
source: sector.source || "DEFAULT_TEMPLATE",
sourceUrl: sector.sourceUrl || "",
sourceAsOf: sector.sourceAsOf || "",
constituents: sector.constituents.map(c => ({
...c,
source: c.source || sector.source || "DEFAULT_TEMPLATE",
sourceUrl: c.sourceUrl || sector.sourceUrl || "",
sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "",
})),
}));
}
const data = sheet.getDataRange().getValues();
if (data.length < 3) {
writeDefaultSectorUniverseSheet_();
return DEFAULT_SECTOR_UNIVERSE_V2;
return DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({
...sector,
source: sector.source || "DEFAULT_TEMPLATE",
sourceUrl: sector.sourceUrl || "",
sourceAsOf: sector.sourceAsOf || "",
constituents: sector.constituents.map(c => ({
...c,
source: c.source || sector.source || "DEFAULT_TEMPLATE",
sourceUrl: c.sourceUrl || sector.sourceUrl || "",
sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "",
})),
}));
}
const hdr = data[1].map(h => String(h).trim());
const idx = name => hdr.indexOf(name);
const required = ["Sector","Proxy_Ticker","Constituent_Code","Weight"];
if (required.some(h => idx(h) < 0)) return DEFAULT_SECTOR_UNIVERSE_V2;
if (required.some(h => idx(h) < 0)) {
return DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({
...sector,
source: sector.source || "DEFAULT_TEMPLATE",
sourceUrl: sector.sourceUrl || "",
sourceAsOf: sector.sourceAsOf || "",
constituents: sector.constituents.map(c => ({
...c,
source: c.source || sector.source || "DEFAULT_TEMPLATE",
sourceUrl: c.sourceUrl || sector.sourceUrl || "",
sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "",
})),
}));
}
const map = {};
for (let i = 2; i < data.length; i++) {
@@ -706,6 +792,9 @@ function readSectorUniverse_() {
proxyName: idx("Proxy_Name") >= 0 ? String(data[i][idx("Proxy_Name")] ?? "").trim() : "",
proxyType: idx("Proxy_Type") >= 0 ? String(data[i][idx("Proxy_Type")] ?? "").trim() : "",
baseTicker: idx("Base_Ticker") >= 0 ? normalizeTickerCode(data[i][idx("Base_Ticker")]) : "069500",
source: idx("Source") >= 0 ? String(data[i][idx("Source")] ?? "").trim() : "SHEET_INPUT",
sourceUrl: idx("Source_URL") >= 0 ? String(data[i][idx("Source_URL")] ?? "").trim() : "",
sourceAsOf: idx("Source_AsOf") >= 0 ? String(data[i][idx("Source_AsOf")] ?? "").trim() : "",
constituents: [],
};
}
@@ -714,16 +803,59 @@ function readSectorUniverse_() {
name: idx("Constituent_Name") >= 0 ? String(data[i][idx("Constituent_Name")] ?? "").trim() : "",
weight,
isEtf: idx("Is_ETF") >= 0 ? boolFromSheet_(data[i][idx("Is_ETF")], false) : false,
source: idx("Source") >= 0 ? String(data[i][idx("Source")] ?? "").trim() : "SHEET_INPUT",
transportMode: idx("Transport_Mode") >= 0 ? String(data[i][idx("Transport_Mode")] ?? "").trim() : "",
sourceUrl: idx("Source_URL") >= 0 ? String(data[i][idx("Source_URL")] ?? "").trim() : "",
sourceAsOf: idx("Source_AsOf") >= 0 ? String(data[i][idx("Source_AsOf")] ?? "").trim() : "",
});
}
const sectors = Object.values(map).filter(s => s.proxyTicker && s.constituents.length > 0);
return sectors.length ? sectors : DEFAULT_SECTOR_UNIVERSE_V2;
const sectorSet = new Set(sectors.map(s => s.sector));
for (const fallback of DEFAULT_SECTOR_UNIVERSE_V2) {
if (!fallback || !fallback.sector || sectorSet.has(fallback.sector)) continue;
sectors.push({
sector: fallback.sector,
proxyTicker: fallback.proxyTicker,
proxyName: fallback.proxyName,
proxyType: fallback.proxyType,
baseTicker: fallback.baseTicker || "069500",
source: fallback.source || "DEFAULT_TEMPLATE",
transportMode: fallback.transportMode || ((fallback.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (fallback.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"),
sourceUrl: fallback.sourceUrl || "",
sourceAsOf: fallback.sourceAsOf || "",
constituents: fallback.constituents.map(c => ({
code: c.code,
name: c.name || "",
weight: c.weight,
isEtf: Boolean(c.isEtf),
source: c.source || fallback.source || "DEFAULT_TEMPLATE",
transportMode: c.transportMode || ((c.source || fallback.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (c.source || fallback.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"),
sourceUrl: c.sourceUrl || fallback.sourceUrl || "",
sourceAsOf: c.sourceAsOf || fallback.sourceAsOf || "",
})),
});
}
return sectors.length ? sectors : DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({
...sector,
source: sector.source || "DEFAULT_TEMPLATE",
transportMode: sector.transportMode || ((sector.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (sector.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"),
sourceUrl: sector.sourceUrl || "",
sourceAsOf: sector.sourceAsOf || "",
constituents: sector.constituents.map(c => ({
...c,
source: c.source || sector.source || "DEFAULT_TEMPLATE",
transportMode: c.transportMode || ((c.source || sector.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (c.source || sector.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"),
sourceUrl: c.sourceUrl || sector.sourceUrl || "",
sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "",
})),
}));
}
function writeDefaultSectorUniverseSheet_() {
const headers = [
"Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Base_Ticker",
"Constituent_Code","Constituent_Name","Weight","Is_ETF","Enabled","Effective_Date","Source"
"Constituent_Code","Constituent_Name","Weight","Is_ETF","Enabled","Effective_Date","Source","Transport_Mode",
"Source_URL","Source_AsOf"
];
const today = Utilities.formatDate(new Date(), "Asia/Seoul", "yyyy-MM-dd");
const rows = [];
@@ -741,7 +873,10 @@ function writeDefaultSectorUniverseSheet_() {
c.isEtf ? "Y" : "N",
"Y",
today,
"sector_universe(DEFAULT_SECTOR_UNIVERSE_V2)",
sector.source || c.source || "DEFAULT_TEMPLATE",
sector.transportMode || c.transportMode || (((sector.source || c.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (sector.source || c.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY") ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"),
sector.sourceUrl || c.sourceUrl || "",
sector.sourceAsOf || c.sourceAsOf || "",
]);
}
}
@@ -762,6 +897,228 @@ function sectorUseMode_(quality) {
return "INVALID";
}
function parseDateOnly_(value) {
const text = String(value ?? "").trim();
if (!text) return null;
const norm = text.replace(/\./g, "-").slice(0, 10);
if (!/^\d{4}-\d{2}-\d{2}$/.test(norm)) return null;
const parsed = new Date(norm + "T00:00:00+09:00");
return Number.isNaN(parsed.getTime()) ? null : parsed;
}
function calcSectorUniverseRefreshAudit_(universe) {
const today = new Date();
const rows = [];
const sourceKindCounts = { NAVER_ETF_PAGE: 0, NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED: 0, NAVER_ETF_PAGE_FAIL: 0, REPRESENTATIVE_STOCK_PROXY: 0, SHEET_INPUT: 0, DEFAULT_TEMPLATE: 0, OTHER: 0 };
const transportModeCounts = { HTML_SERVER_RENDERED: 0, MANUAL_OR_TEMPLATE: 0, LAYOUT_CHANGED: 0, UNKNOWN: 0 };
let currentCount = 0;
let dueCount = 0;
let overdueCount = 0;
let missingCount = 0;
let templateCount = 0;
let sheetInputCount = 0;
let naverSourceCount = 0;
let layoutChangedCount = 0;
let missingSourceUrlCount = 0;
let staleSectorCount = 0;
let oldestSourceAsOf = null;
let newestSourceAsOf = null;
for (const sector of universe || []) {
const sectorRows = Array.isArray(sector?.constituents) ? sector.constituents : [];
const sourceKind = String(sector?.source || "SHEET_INPUT").trim() || "SHEET_INPUT";
if (Object.prototype.hasOwnProperty.call(sourceKindCounts, sourceKind)) {
sourceKindCounts[sourceKind] += 1;
} else {
sourceKindCounts.OTHER += 1;
}
const transportMode = String(sector?.transportMode || "").trim() ||
(sourceKind === "NAVER_ETF_PAGE" || sourceKind === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" :
sourceKind === "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" ? "LAYOUT_CHANGED" :
(sourceKind === "DEFAULT_TEMPLATE" || sourceKind === "SHEET_INPUT" ? "MANUAL_OR_TEMPLATE" : "UNKNOWN"));
if (Object.prototype.hasOwnProperty.call(transportModeCounts, transportMode)) {
transportModeCounts[transportMode] += 1;
} else {
transportModeCounts.UNKNOWN += 1;
}
const sourceUrl = String(sector?.sourceUrl || "").trim();
const sourceAsOf = String(sector?.sourceAsOf || "").trim();
const parsed = parseDateOnly_(sourceAsOf);
const ageDays = parsed ? Math.floor((today.getTime() - parsed.getTime()) / 86400000) : null;
if (parsed) {
oldestSourceAsOf = oldestSourceAsOf && oldestSourceAsOf < parsed ? oldestSourceAsOf : parsed;
newestSourceAsOf = newestSourceAsOf && newestSourceAsOf > parsed ? newestSourceAsOf : parsed;
}
let status = "INVALID";
const reasons = [];
if (sourceKind === "DEFAULT_TEMPLATE") {
status = "TEMPLATE";
templateCount += 1;
reasons.push("DEFAULT_TEMPLATE");
} else if (sourceKind === "REPRESENTATIVE_STOCK_PROXY") {
if (!sourceUrl) {
status = "MISSING";
missingCount += 1;
missingSourceUrlCount += 1;
reasons.push("Source_URL_MISSING");
} else if (ageDays === null) {
status = "MISSING";
missingCount += 1;
reasons.push("Source_AsOf_MISSING");
} else if (ageDays <= 31) {
status = "CURRENT";
currentCount += 1;
} else if (ageDays <= 45) {
status = "DUE";
dueCount += 1;
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
} else {
status = "OVERDUE";
overdueCount += 1;
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
}
} else if (sourceKind === "SHEET_INPUT") {
sheetInputCount += 1;
if (!sourceUrl) {
status = "MISSING";
missingCount += 1;
missingSourceUrlCount += 1;
reasons.push("Source_URL_MISSING");
} else if (ageDays === null) {
status = "MISSING";
missingCount += 1;
reasons.push("Source_AsOf_MISSING");
} else if (ageDays <= 31) {
status = "CURRENT";
currentCount += 1;
} else if (ageDays <= 45) {
status = "DUE";
dueCount += 1;
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
} else {
status = "OVERDUE";
overdueCount += 1;
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
}
} else if (sourceKind === "NAVER_ETF_PAGE") {
naverSourceCount += 1;
if (!sourceUrl) {
status = "MISSING";
missingCount += 1;
missingSourceUrlCount += 1;
reasons.push("Source_URL_MISSING");
} else if (ageDays === null) {
status = "MISSING";
missingCount += 1;
reasons.push("Source_AsOf_MISSING");
} else if (ageDays <= 31) {
status = "CURRENT";
currentCount += 1;
} else if (ageDays <= 45) {
status = "DUE";
dueCount += 1;
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
} else {
status = "OVERDUE";
overdueCount += 1;
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
}
} else if (sourceKind === "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED") {
layoutChangedCount += 1;
status = "LAYOUT_CHANGED";
if (!sourceUrl) {
missingSourceUrlCount += 1;
reasons.push("Source_URL_MISSING");
}
if (ageDays === null) {
reasons.push("Source_AsOf_MISSING");
} else {
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
}
} else {
status = "INVALID";
reasons.push("SOURCE_KIND_UNKNOWN");
if (!sourceUrl) missingSourceUrlCount += 1;
}
if (!sourceUrl) reasons.push("Source_URL_MISSING");
if (ageDays !== null && ageDays < 0) reasons.push("FUTURE_DATE");
rows.push({
sector: sector.sector || "",
proxy_ticker: sector.proxyTicker || "",
proxy_name: sector.proxyName || "",
proxy_type: sector.proxyType || "",
source_kind: sourceKind,
transport_mode: transportMode,
source_url: sourceUrl,
source_asof: sourceAsOf,
age_days: ageDays === null ? "" : ageDays,
constituent_count: sectorRows.length,
stock_count: sectorRows.filter(c => !c.isEtf).length,
etf_count: sectorRows.filter(c => c.isEtf).length,
weight_sum: sectorRows.reduce((a, c) => a + (Number(c.weight) || 0), 0),
status: status,
refresh_reason: reasons.length ? reasons.join(";") : "OK",
});
}
rows.sort((a, b) => {
if (a.status === "CURRENT" && b.status !== "CURRENT") return -1;
if (a.status !== "CURRENT" && b.status === "CURRENT") return 1;
return String(a.sector || "").localeCompare(String(b.sector || ""));
});
return {
formula_id: "sector_universe_refresh_audit_v1",
gate: (templateCount > 0 || missingSourceUrlCount > 0 || overdueCount > 0 || staleSectorCount > 0) ? "FAIL" : (sheetInputCount > 0 ? "WARN" : "PASS"),
summary: {
sector_count: (universe || []).length,
current_count: currentCount,
due_count: dueCount,
overdue_count: overdueCount,
missing_count: missingCount,
template_count: templateCount,
sheet_input_count: sheetInputCount,
naver_source_count: naverSourceCount,
layout_changed_count: layoutChangedCount,
missing_source_url_count: missingSourceUrlCount,
stale_sector_count: staleSectorCount,
oldest_source_asof: oldestSourceAsOf ? Utilities.formatDate(oldestSourceAsOf, "Asia/Seoul", "yyyy-MM-dd") : "",
newest_source_asof: newestSourceAsOf ? Utilities.formatDate(newestSourceAsOf, "Asia/Seoul", "yyyy-MM-dd") : "",
source_kind_counts: sourceKindCounts,
transport_mode_counts: transportModeCounts,
ajax_mode: "NO",
transport_model: "HTML_SERVER_RENDERED",
},
rows: rows,
};
}
function writeSectorUniverseRefreshAuditSheet_(audit) {
if (!audit || typeof audit !== "object") return 0;
const headers = [
"sector", "proxy_ticker", "proxy_name", "proxy_type", "source_kind", "transport_mode",
"source_url", "source_asof", "age_days", "constituent_count",
"stock_count", "etf_count", "weight_sum", "status", "refresh_reason",
];
const rows = Array.isArray(audit.rows)
? audit.rows.map(function(r) {
return headers.map(function(h) { return r[h] ?? ""; });
})
: [];
writeToSheet("sector_universe_refresh_audit", headers, rows);
return rows.length;
}
function scoreSmartMoneyNorm_(v) {
if (!Number.isFinite(v)) return 0;
if (v >= 0.15) return 25;
@@ -955,7 +1312,7 @@ function runSectorFlowV3() {
const etfRawMap = buildEtfRawMap_(buildEtfRawRows_(universe));
const today = Utilities.formatDate(new Date(), "Asia/Seoul", "yyyy-MM-dd");
const headers = [
"Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Coverage_Weight",
"Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Universe_Source","Transport_Mode","Coverage_Weight",
"Sector_Ret5D","Sector_Ret20D","Sector_RS_20D",
"SmartMoney_5D_KRW","SmartMoney_20D_KRW","Sector_AvgTradeValue_20D_KRW","SmartMoney_5D_Norm",
"Flow_Breadth_5D","Flow_Rows_Min","Stale_Count",
@@ -1031,6 +1388,9 @@ function runSectorFlowV3() {
const etfNavRisk = sector.proxyType === "ETF" ? (etfRaw?.navRisk ?? "NAV_DATA_MISSING") : "NOT_ETF";
const etfLiquidityStatus = sector.proxyType === "ETF" ? (etfRaw?.liquidityStatus ?? "WARN") : "NOT_ETF";
const etfExecutionUse = sector.proxyType === "ETF" ? (etfRaw?.executionUse ?? "WATCH_ONLY") : "NOT_ETF";
const transportMode = sector.source === "NAVER_ETF_PAGE" ? "HTML_SERVER_RENDERED"
: (sector.source === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED"
: (sector.source === "DEFAULT_TEMPLATE" ? "MANUAL_OR_TEMPLATE" : "UNKNOWN"));
const quality = sectorDataQuality_(coverage, flowRowsMin, staleCount, proxy.ok, Number.isFinite(smart5Norm), weightSum);
const routeUse = sectorUseMode_(quality);
let score = calcSectorScoreV2_(sectorRet20D, sectorRs20D, smart5Norm, smart20Norm, breadth5, tradeValueRatio, sector.proxyType, etfLiquidityScore);
@@ -1047,6 +1407,7 @@ function runSectorFlowV3() {
if (staleCount > 0) reasons.push(`Stale_Count=${staleCount}`);
if (!proxy.ok) reasons.push("Proxy_Price_FAIL");
if (!Number.isFinite(smart5Norm)) reasons.push("SmartMoney_Norm_MISSING");
if ((sector.source || "DEFAULT_TEMPLATE") === "DEFAULT_TEMPLATE") reasons.push("Universe_Source=DEFAULT_TEMPLATE");
if (sector.proxyType === "ETF" && etfNavRisk === "NAV_DATA_MISSING") reasons.push("ETF_NAV_DATA_MISSING");
if (sector.proxyType === "ETF" && etfLiquidityStatus !== "OK") reasons.push(`ETF_Liquidity=${etfLiquidityStatus}`);
if (sector.proxyType === "ETF" && etfExecutionUse !== "TRADE_OK") reasons.push(`ETF_Execution=${etfExecutionUse}`);
@@ -1056,6 +1417,8 @@ function runSectorFlowV3() {
proxyTicker: sector.proxyTicker,
proxyName: sector.proxyName,
proxyType: sector.proxyType || "대표주",
universeSource: sector.source || "DEFAULT_TEMPLATE",
transportMode: transportMode,
coverage,
sectorRet5D,
sectorRet20D,
@@ -1106,7 +1469,7 @@ function appendSectorFlowHistoryV2_(rows) {
const headers = [
"Snapshot_Date","Sector","Sector_Score","Sector_Rank","SmartMoney_5D_KRW","SmartMoney_20D_KRW",
"Flow_Breadth_5D","Alert_Level","Data_Quality","Decision_Use","ETF_Liquidity_Status","ETF_Execution_Use","Reason","Saved_At"
"Flow_Breadth_5D","Alert_Level","Data_Quality","Decision_Use","ETF_Liquidity_Status","ETF_Execution_Use","Transport_Mode","Reason","Saved_At"
];
const ss = getSpreadsheet_();
let sheet = ss.getSheetByName("sector_flow_history");
@@ -1119,22 +1482,25 @@ function appendSectorFlowHistoryV2_(rows) {
const hdr = data[1] ?? headers;
const dateIdx = hdr.indexOf("Snapshot_Date");
const sectorIdx = hdr.indexOf("Sector");
const existing = [];
const normalizeRow_ = (row) => {
const outRow = Array.isArray(row) ? row.slice(0, headers.length) : [];
while (outRow.length < headers.length) outRow.push("");
return outRow;
};
const byKey = {};
for (let i = 2; i < data.length; i++) {
const row = data[i];
const d = normalizeSheetDateString_(row[dateIdx]);
const s = String(row[sectorIdx] ?? "").trim();
if (!d || !s) continue;
byKey[`${d}|${s}`] = row;
existing.push(row);
byKey[`${d}|${s}`] = normalizeRow_(row);
}
const savedAt = Utilities.formatDate(new Date(), "Asia/Seoul", "yyyy-MM-dd HH:mm:ss");
for (const r of rows) {
byKey[`${r.asOfDate}|${r.sector}`] = [
byKey[`${r.asOfDate}|${r.sector}`] = normalizeRow_([
r.asOfDate, r.sector, r.score, r.rank, Math.round(r.smart5), Math.round(r.smart20),
roundNum(r.breadth5, 4), r.alert, r.quality, r.routeUse, r.etfLiquidityStatus, r.etfExecutionUse, r.reason, savedAt
];
roundNum(r.breadth5, 4), r.alert, r.quality, r.routeUse, r.etfLiquidityStatus, r.etfExecutionUse, r.transportMode || "", r.reason, savedAt
]);
}
const out = Object.values(byKey).sort((a, b) => {
const da = String(a[0]), db = String(b[0]);
@@ -1144,7 +1510,7 @@ function appendSectorFlowHistoryV2_(rows) {
sheet.clearContents();
sheet.getRange(1, 1).setValue(`updated: ${savedAt} KST`);
sheet.getRange(2, 1, 1, headers.length).setValues([headers]);
if (out.length) sheet.getRange(3, 1, out.length, headers.length).setValues(out);
if (out.length) sheet.getRange(3, 1, out.length, headers.length).setValues(out.map(normalizeRow_));
}
function normalizeSheetDateString_(value) {
@@ -1235,7 +1601,7 @@ function readW2LegacySectorFlow_() {
function writeLegacySectorFlowFromStage2_(stage2Rows) {
const headers = [
"Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Coverage_Weight",
"Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Universe_Source","Coverage_Weight",
"Sector_Ret5D","Sector_Ret10D","Sector_Ret20D","Sector_RS_20D",
"SmartMoney_5D_KRW","SmartMoney_20D_KRW","Sector_AvgTradeValue_20D_KRW",
"SmartMoney_5D_Norm","SmartMoney_20D_Norm","Flow_Breadth_5D","Flow_Rows_Min","Stale_Count",
@@ -1277,7 +1643,7 @@ function writeLegacySectorFlowFromStage2_(stage2Rows) {
const frg20Alias = Number.isFinite(r.smart20) ? r.smart20 / 2 : "";
const inst20Alias = Number.isFinite(r.smart20) ? r.smart20 / 2 : "";
return [
r.sector, r.proxyTicker, r.proxyName, r.proxyType, r.coverage,
r.sector, r.proxyTicker, r.proxyName, r.proxyType, r.universeSource, r.coverage,
r.sectorRet5D, r.proxyRet10D, r.sectorRet20D, r.sectorRs20D,
r.smart5, r.smart20, r.avgTv20Krw,
r.smart5Norm, r.smart20Norm, r.breadth5, r.flowRowsMin, r.staleCount,
@@ -1798,6 +2164,15 @@ function run_all() {
}
},
{ name: "runSectorFlow", fn: runSectorFlow },
{
name: "runSectorUniverseRefreshAudit",
fn: function() {
const universe = readSectorUniverse_();
const audit = calcSectorUniverseRefreshAudit_(universe);
writeSectorUniverseRefreshAuditSheet_(audit);
Logger.log("[RUN_ALL] sector_universe_refresh_audit gate=" + audit.gate + " rows=" + (audit.rows || []).length);
}
},
{ name: "runDataFeed", fn: runDataFeed },
{ name: "runCoreSatelliteFlow_", fn: runCoreSatelliteFlow_ },
{ name: "runEventRisk", fn: runEventRisk },
@@ -1806,10 +1806,16 @@ function getCoreSatelliteUniverse() {
// 자동차
{ code:"005380", name:"현대차", sector:"자동차" },
{ code:"000270", name:"기아", sector:"자동차" },
// 밸류업/금융
{ code:"105560", name:"KB금융", sector:"금융/은행" },
{ code:"055550", name:"신한지주", sector:"금융/은행" },
{ code:"024110", name:"기업은행", sector:"금융/은행" },
// 은행 / 증권 / 지주회사
{ code:"105560", name:"KB금융", sector:"은행" },
{ code:"055550", name:"신한지주", sector:"은행" },
{ code:"024110", name:"기업은행", sector:"은행" },
{ code:"071050", name:"한국금융지주", sector:"증권" },
{ code:"006800", name:"미래에셋증권", sector:"증권" },
{ code:"005940", name:"NH투자증권", sector:"증권" },
{ code:"180640", name:"한진칼", sector:"지주회사" },
{ code:"267250", name:"HD현대", sector:"지주회사" },
{ code:"034730", name:"SK", sector:"지주회사" },
// 바이오
{ code:"207940", name:"삼성바이오로직스",sector:"바이오" },
{ code:"068270", name:"셀트리온", sector:"바이오" },
@@ -1820,7 +1826,7 @@ function getCoreSatelliteUniverse() {
{ code:"006400", name:"삼성SDI", sector:"2차전지" },
{ code:"003670", name:"포스코퓨처엠",sector:"2차전지" },
// 지주/기타
{ code:"028260", name:"삼성물산", sector:"지주" }
{ code:"028260", name:"삼성물산", sector:"지주회사" }
];
list = defaults.map(t => ({ ...t, addedDate: todayStr }));
+29 -6
View File
@@ -11,7 +11,7 @@
*
* 실행 시간 전략 (GAS 6분 제한):
* - data_feed: 보유 10종목만 → ~30초
* - sector_flow: 11섹터×3종목 → ~3분
* - sector_flow: 분리된 섹터×3종목 → ~3분
* - macro/unified: 단순 집계 → ~30초
* - core_satellite(100종목): 별도 트리거, 청크 분할 실행
*
@@ -27,13 +27,24 @@ const TICKERS_BASE = [
{ code: "000660", name: "SK하이닉스" },
{ code: "000270", name: "기아" },
{ code: "091160", name: "KODEX 반도체" },
{ code: "463250", name: "TIGER K방산&우주" },
{ code: "064350", name: "현대로템" },
{ code: "012450", name: "한화에어로스페이스" },
{ code: "117700", name: "KODEX 건설" },
{ code: "028050", name: "삼성E&A" },
{ code: "454320", name: "HANARO CAPEX설비투자iSelect" },
{ code: "010120", name: "LS ELECTRIC" },
{ code: "0117V0", name: "TIGER AI전력기기" },
{ code: "491820", name: "HANARO 전력설비투자" },
{ code: "494670", name: "TIGER 조선TOP10" },
{ code: "471990", name: "KODEX AI반도체핵심장비" },
{ code: "434730", name: "HANARO 원자력iSelect" },
{ code: "0111J0", name: "HANARO 증권고배당TOP3플러스" },
{ code: "307520", name: "TIGER 지주회사" },
{ code: "0190C0", name: "RISE 현대차고정피지컬AI" },
{ code: "011070", name: "LG이노텍" },
{ code: "010620", name: "현대미포" },
{ code: "121600", name: "나노신소재" },
];
// TICKERS 우선순위: TICKERS_BASE → account_snapshot 보유종목 → watch_tickers_override 수동 추가.
@@ -132,9 +143,12 @@ const TICKER_SECTOR_MAP = {
"010120": "AI전력", "267260": "AI전력", "006260": "AI전력",
"012450": "방산", "079550": "방산", "047810": "방산", "064350": "방산",
"329180": "조선", "042660": "조선", "009540": "조선",
"028050": "건설/EPC","000720": "건설/EPC","006360": "건설/EPC",
"028050": "플랜트/EPC","000720": "건설","006360": "건설",
"005380": "자동차", "000270": "자동차", "012330": "자동차",
"105560": "금융/은행","055550": "금융/은행","086790": "금융/은행",
"105560": "은행","055550": "은행","086790": "은행","316140": "은행","024110": "은행",
"071050": "증권","006800": "증권","005940": "증권","016360": "증권","039490": "증권",
"180640": "지주회사","267250": "지주회사","034730": "지주회사","000150": "지주회사","005490": "지주회사",
"003550": "지주회사","006260": "지주회사","078930": "지주회사","001040": "지주회사","010060": "지주회사",
"373220": "2차전지","006400": "2차전지","051910": "2차전지",
"207940": "바이오", "068270": "바이오", "128940": "바이오",
"099440": "원전", "023450": "원전", "015760": "원전",
@@ -142,8 +156,12 @@ const TICKER_SECTOR_MAP = {
// ETF — 해당 섹터로 매핑
"091160": "반도체", "0117V0": "AI전력", "494670": "조선",
"471990": "반도체", // KODEX AI반도체핵심장비 (누락 추가)
"266410": "바이오", "091180": "자동차", "091170": "금융/은행",
"266410": "바이오", "091180": "자동차", "091170": "은행",
"0111J0": "증권", "307520": "지주회사",
"305720": "2차전지","139220": "소비재",
"463250": "방산", "434730": "원전", "454320": "플랜트/EPC",
"491820": "전력설비", "117700": "건설", "0190C0": "로보틱스",
"011070": "로보틱스", "010620": "로보틱스", "121600": "로보틱스",
};
// 섹터 → Tier 매핑 (C5 daily_leader_scan 점수 정밀화)
@@ -151,14 +169,19 @@ const TICKER_SECTOR_MAP = {
const SECTOR_TIER_MAP = {
"반도체": "Tier_1",
"AI전력": "Tier_1",
"전력설비": "Tier_1",
"방산": "Tier_1",
"조선": "Tier_1",
"자동차": "Tier_2",
"2차전지": "Tier_2",
"바이오": "Tier_2",
"원전": "Tier_2",
"건설/EPC": "Tier_3",
"금융/은행":"Tier_3",
"건설": "Tier_3",
"플랜트/EPC": "Tier_3",
"로보틱스": "Tier_2",
"은행":"Tier_3",
"증권":"Tier_3",
"지주회사":"Tier_3",
"소비재": "Tier_3",
};
+96
View File
@@ -174,6 +174,28 @@ def normalize_legacy_source_markers(sheet: str, records: list[dict[str, Any]]) -
source = record.get("Source")
if isinstance(source, str) and "sector_targets.json" in source:
record["Source"] = source.replace("sector_targets.json", "sector_universe")
source_url = str(record.get("Source_URL") or "").strip()
transport_mode = str(record.get("Transport_Mode") or "").strip()
if record.get("Source") in (None, "", "DEFAULT_TEMPLATE"):
if "finance.naver.com/item/main.naver?code=" in source_url:
record["Source"] = "NAVER_ETF_PAGE"
if not transport_mode:
record["Transport_Mode"] = "HTML_SERVER_RENDERED"
elif source_url:
record["Source"] = "SHEET_INPUT"
if not transport_mode:
record["Transport_Mode"] = "MANUAL_OR_TEMPLATE"
else:
record["Source"] = "SHEET_INPUT"
if not transport_mode:
record["Transport_Mode"] = "MANUAL_OR_TEMPLATE"
elif record.get("Source") == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" and not transport_mode:
record["Transport_Mode"] = "LAYOUT_CHANGED"
elif record.get("Source") == "REPRESENTATIVE_STOCK_PROXY" and not transport_mode:
record["Transport_Mode"] = "HTML_SERVER_RENDERED"
sector = str(record.get("Sector") or "").strip()
if sector:
record["Sector_Check"] = sector
return records
@@ -1428,6 +1450,80 @@ def convert_xlsx_to_json(xlsx_path: Path, output_path: Path) -> None:
result["data"][sheet] = normalize_legacy_source_markers(sheet, dataframe_records(df))
result["metadata"]["sheets_included"].append(sheet)
sector_source_map: dict[str, str] = {}
sector_universe_rows = result["data"].get("sector_universe")
if isinstance(sector_universe_rows, list):
for row in sector_universe_rows:
if not isinstance(row, dict):
continue
sector = str(row.get("Sector") or "").strip()
if not sector:
continue
source = str(row.get("Source") or "").strip() or "SHEET_INPUT"
sector_source_map.setdefault(sector, source)
sector_flow_rows = result["data"].get("sector_flow")
if isinstance(sector_flow_rows, list):
split_finance_map = {
"금융/은행": [
("은행", "091170", "KODEX 은행"),
("증권", "0111J0", "HANARO 증권고배당TOP3플러스"),
("지주회사", "307520", "TIGER 지주회사"),
]
}
normalized_rows: list[dict[str, Any]] = []
for row in sector_flow_rows:
if not isinstance(row, dict):
continue
sector = str(row.get("Sector") or "").strip()
if not sector:
continue
source = str(row.get("Universe_Source") or "").strip() or sector_source_map.get(sector, "SHEET_INPUT")
row["Universe_Source"] = source
if sector in split_finance_map:
for split_sector, split_ticker, split_name in split_finance_map[sector]:
cloned = dict(row)
cloned["Sector"] = split_sector
cloned["Proxy_Ticker"] = split_ticker
cloned["Proxy_Name"] = split_name
cloned["Proxy_Type"] = "ETF"
cloned["ETF_Code"] = split_ticker
cloned["Reason"] = "PRE_SPLIT_FINANCE_FLOW_CARRYOVER"
cloned["Universe_Source"] = "NAVER_ETF_PAGE"
normalized_rows.append(cloned)
else:
normalized_rows.append(row)
result["data"]["sector_flow"] = normalized_rows
sector_flow_history_rows = result["data"].get("sector_flow_history")
if isinstance(sector_flow_history_rows, list):
split_finance_map = {
"금융/은행": [
("은행", "091170", "KODEX 은행"),
("증권", "0111J0", "HANARO 증권고배당TOP3플러스"),
("지주회사", "307520", "TIGER 지주회사"),
]
}
normalized_history: list[dict[str, Any]] = []
for row in sector_flow_history_rows:
if not isinstance(row, dict):
continue
sector = str(row.get("Sector") or "").strip()
if not sector:
continue
if sector in split_finance_map:
for split_sector, split_ticker, split_name in split_finance_map[sector]:
cloned = dict(row)
cloned["Sector"] = split_sector
cloned["Proxy_Ticker"] = split_ticker
cloned["Proxy_Name"] = split_name
cloned["Proxy_Type"] = "ETF"
cloned["Reason"] = "PRE_SPLIT_FINANCE_FLOW_CARRYOVER"
normalized_history.append(cloned)
else:
normalized_history.append(row)
result["data"]["sector_flow_history"] = normalized_history
# harness_context 시트가 없으면 메타에 경고 기록
if "_harness_context" not in result["data"]:
result["metadata"]["harness_context_missing"] = (
@@ -462,7 +462,8 @@ _TICKER_SECTOR_MAP = {
"010120": "AI전력", "267260": "AI전력",
"012450": "방산", "064350": "방산",
"329180": "조선", "494670": "조선",
"028050": "건설/EPC",
"117700": "건설", "028050": "플랜트/EPC", "454320": "플랜트/EPC",
"0190C0": "로보틱스",
"005380": "자동차", "000270": "자동차",
"091160": "반도체", "0117V0": "AI전력",
}
+296
View File
@@ -0,0 +1,296 @@
from __future__ import annotations
import datetime as dt
from typing import Any
DEFAULT_MAX_AGE_DAYS = 31
def _txt(value: Any, default: str = "") -> str:
if value is None:
return default
if isinstance(value, str):
return value.strip() or default
return str(value).strip() or default
def _as_float(value: Any) -> float | None:
try:
if value in (None, ""):
return None
if isinstance(value, str):
text = value.strip().replace("%", "").replace(",", "")
if not text:
return None
return float(text)
return float(value)
except Exception:
return None
def _parse_date(value: Any) -> dt.date | None:
if value in (None, ""):
return None
if isinstance(value, dt.date):
return value
text = _txt(value)
if not text:
return None
for fmt in ("%Y-%m-%d", "%Y.%m.%d", "%Y/%m/%d"):
try:
return dt.datetime.strptime(text[:10], fmt).date()
except Exception:
pass
try:
return dt.date.fromisoformat(text[:10])
except Exception:
return None
def _age_days(value: Any, today: dt.date | None = None) -> int | None:
parsed = _parse_date(value)
if parsed is None:
return None
today = today or dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date()
return (today - parsed).days
def _extract_sector_rows(payload: dict[str, Any] | None) -> list[dict[str, Any]]:
if not isinstance(payload, dict):
return []
inner = payload.get("data")
if isinstance(inner, dict) and isinstance(inner.get("sector_universe"), list):
return [r for r in inner["sector_universe"] if isinstance(r, dict)]
if isinstance(payload.get("sector_universe"), list):
return [r for r in payload["sector_universe"] if isinstance(r, dict)]
return []
def build_sector_universe_refresh_audit(payload: dict[str, Any] | None) -> dict[str, Any]:
rows = _extract_sector_rows(payload)
today = dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date()
grouped: dict[str, list[dict[str, Any]]] = {}
for row in rows:
sector = _txt(row.get("Sector"))
if not sector:
continue
grouped.setdefault(sector, []).append(row)
detail_rows: list[dict[str, Any]] = []
source_kind_counts = {
"NAVER_ETF_PAGE": 0,
"NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED": 0,
"NAVER_ETF_PAGE_FAIL": 0,
"REPRESENTATIVE_STOCK_PROXY": 0,
"SHEET_INPUT": 0,
"DEFAULT_TEMPLATE": 0,
"OTHER": 0,
}
transport_mode_counts = {
"HTML_SERVER_RENDERED": 0,
"MANUAL_OR_TEMPLATE": 0,
"LAYOUT_CHANGED": 0,
"UNKNOWN": 0,
}
state_counts = {"CURRENT": 0, "DUE": 0, "OVERDUE": 0, "MISSING": 0, "TEMPLATE": 0, "INVALID": 0}
stale_sector_count = 0
layout_changed_count = 0
missing_source_url_count = 0
sheet_input_count = 0
template_count = 0
newest_asof: dt.date | None = None
oldest_asof: dt.date | None = None
for sector, sector_rows in grouped.items():
source_values = {_txt(r.get("Source"), "SHEET_INPUT") or "SHEET_INPUT" for r in sector_rows}
if "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" in source_values:
source_kind = "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED"
elif "NAVER_ETF_PAGE_FAIL" in source_values:
source_kind = "NAVER_ETF_PAGE_FAIL"
elif "NAVER_ETF_PAGE" in source_values:
source_kind = "NAVER_ETF_PAGE"
elif "REPRESENTATIVE_STOCK_PROXY" in source_values:
source_kind = "REPRESENTATIVE_STOCK_PROXY"
elif "DEFAULT_TEMPLATE" in source_values:
source_kind = "DEFAULT_TEMPLATE"
elif "SHEET_INPUT" in source_values:
source_kind = "SHEET_INPUT"
else:
source_kind = "OTHER"
source_kind_counts[source_kind if source_kind in source_kind_counts else "OTHER"] += 1
source_urls = [_txt(r.get("Source_URL")) for r in sector_rows if _txt(r.get("Source_URL"))]
source_url = source_urls[0] if source_urls else ""
asof_candidates = [_parse_date(r.get("Source_AsOf")) for r in sector_rows]
asof_dates = [d for d in asof_candidates if d is not None]
source_asof = max(asof_dates) if asof_dates else None
if source_asof is not None:
newest_asof = source_asof if newest_asof is None else max(newest_asof, source_asof)
oldest_asof = source_asof if oldest_asof is None else min(oldest_asof, source_asof)
age_days = _age_days(source_asof, today) if source_asof else None
constituent_count = len(sector_rows)
etf_count = sum(1 for r in sector_rows if str(r.get("Is_ETF") or "").strip().upper() in {"Y", "YES", "TRUE", "1"})
stock_count = constituent_count - etf_count
weight_sum = sum(_as_float(r.get("Weight")) or 0 for r in sector_rows)
status = "INVALID"
reason_parts: list[str] = []
transport_mode = "UNKNOWN"
if source_kind == "DEFAULT_TEMPLATE":
status = "TEMPLATE"
reason_parts.append("DEFAULT_TEMPLATE")
template_count += 1
transport_mode = "MANUAL_OR_TEMPLATE"
elif source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED":
status = "LAYOUT_CHANGED"
transport_mode = "LAYOUT_CHANGED"
reason_parts.append("LAYOUT_CHANGED")
layout_changed_count += 1
if not source_url:
missing_source_url_count += 1
reason_parts.append("Source_URL_MISSING")
if age_days is None:
reason_parts.append("Source_AsOf_MISSING")
else:
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
elif source_kind == "NAVER_ETF_PAGE_FAIL":
status = "INVALID"
transport_mode = "UNKNOWN"
reason_parts.append("NAVER_ETF_PAGE_FAIL")
if not source_url:
missing_source_url_count += 1
elif source_kind == "REPRESENTATIVE_STOCK_PROXY":
transport_mode = "HTML_SERVER_RENDERED"
if not source_url:
status = "MISSING"
missing_source_url_count += 1
reason_parts.append("Source_URL_MISSING")
elif age_days is None:
status = "MISSING"
reason_parts.append("Source_AsOf_MISSING")
elif age_days <= DEFAULT_MAX_AGE_DAYS:
status = "CURRENT"
elif age_days <= 45:
status = "DUE"
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
else:
status = "OVERDUE"
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
elif source_kind == "SHEET_INPUT":
sheet_input_count += 1
transport_mode = "MANUAL_OR_TEMPLATE"
if not source_url:
status = "MISSING"
reason_parts.append("Source_URL_MISSING")
missing_source_url_count += 1
elif age_days is None:
status = "MISSING"
reason_parts.append("Source_AsOf_MISSING")
elif age_days <= DEFAULT_MAX_AGE_DAYS:
status = "CURRENT"
elif age_days <= 45:
status = "DUE"
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
else:
status = "OVERDUE"
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
elif source_kind == "NAVER_ETF_PAGE":
transport_mode = "HTML_SERVER_RENDERED"
if not source_url:
status = "MISSING"
reason_parts.append("Source_URL_MISSING")
missing_source_url_count += 1
elif age_days is None:
status = "MISSING"
reason_parts.append("Source_AsOf_MISSING")
elif age_days <= DEFAULT_MAX_AGE_DAYS:
status = "CURRENT"
elif age_days <= 45:
status = "DUE"
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
else:
status = "OVERDUE"
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
else:
if not source_url:
missing_source_url_count += 1
status = "INVALID"
reason_parts.append("SOURCE_KIND_UNKNOWN")
transport_mode = "UNKNOWN"
if source_kind == "NAVER_ETF_PAGE" and not source_url:
reason_parts.append("NAVER_URL_MISSING")
if not source_url:
reason_parts.append("Source_URL_MISSING")
if age_days is not None and age_days < 0:
reason_parts.append("FUTURE_DATE")
transport_mode_counts[transport_mode] = transport_mode_counts.get(transport_mode, 0) + 1
refresh_reason = ";".join(reason_parts) if reason_parts else "OK"
detail_rows.append({
"sector": sector,
"proxy_ticker": _txt(sector_rows[0].get("Proxy_Ticker")),
"proxy_name": _txt(sector_rows[0].get("Proxy_Name")),
"proxy_type": _txt(sector_rows[0].get("Proxy_Type")),
"source_kind": source_kind,
"transport_mode": transport_mode,
"source_url": source_url,
"source_asof": source_asof.isoformat() if source_asof else "",
"age_days": age_days if age_days is not None else "",
"constituent_count": constituent_count,
"stock_count": stock_count,
"etf_count": etf_count,
"weight_sum": round(weight_sum, 4),
"status": status,
"refresh_reason": refresh_reason,
})
detail_rows.sort(key=lambda r: (r.get("status") != "CURRENT", r.get("status"), r.get("sector")))
summary = {
"sector_count": len(grouped),
"current_count": sum(1 for r in detail_rows if r.get("status") == "CURRENT"),
"due_count": sum(1 for r in detail_rows if r.get("status") == "DUE"),
"overdue_count": sum(1 for r in detail_rows if r.get("status") == "OVERDUE"),
"missing_count": sum(1 for r in detail_rows if r.get("status") == "MISSING"),
"template_count": template_count,
"sheet_input_count": sheet_input_count,
"naver_source_count": sum(1 for r in detail_rows if r.get("source_kind") == "NAVER_ETF_PAGE"),
"missing_source_url_count": missing_source_url_count,
"stale_sector_count": stale_sector_count,
"layout_changed_count": layout_changed_count,
"oldest_source_asof": oldest_asof.isoformat() if oldest_asof else "",
"newest_source_asof": newest_asof.isoformat() if newest_asof else "",
"source_kind_counts": source_kind_counts,
"transport_mode_counts": transport_mode_counts,
"ajax_mode": "NO",
"transport_model": "HTML_SERVER_RENDERED",
}
gate = "PASS"
if template_count > 0 or missing_source_url_count > 0 or stale_sector_count > 0 or layout_changed_count > 0:
gate = "FAIL"
elif sheet_input_count > 0:
gate = "WARN"
return {
"formula_id": "sector_universe_refresh_audit_v1",
"gate": gate,
"max_age_days": DEFAULT_MAX_AGE_DAYS,
"summary": summary,
"rows": detail_rows,
"source": {
"sector_rows": len(rows),
"grouped_sectors": len(grouped),
},
}
+31 -2
View File
@@ -3,6 +3,7 @@ import os
import requests
import time
import subprocess
import argparse
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
@@ -10,6 +11,7 @@ CLASPRC_PATH = ROOT / ".clasprc.json"
CLASP_PATH = ROOT / ".clasp.json"
SPREADSHEET_ID = "1e1TNlLfnT69nvw-I1wU_oBHmEtI2pfbld3e0fFmtrZM"
OUTPUT_XLSX = ROOT / "GatherTradingData.xlsx"
LOCAL_OUTPUT_XLSX = ROOT / "outputs" / "sector_insights_enhanced" / "GatherTradingData_sector_insights.xlsx"
def get_tokens():
if not CLASPRC_PATH.exists():
@@ -75,20 +77,46 @@ def download_spreadsheet(spreadsheet_id, access_token, output_path):
print(f"Successfully downloaded to {output_path}")
return True
def validate_monthly_sector_refresh(xlsx_path: Path) -> bool:
cmd = [
"python",
"tools/validate_sector_universe_monthly_refresh_v1.py",
"--xlsx",
str(xlsx_path),
]
print(f"Validating monthly sector refresh: {xlsx_path} ...")
res = subprocess.run(cmd, cwd=str(ROOT))
if res.returncode == 0:
print("Monthly sector refresh validation passed.")
return True
print("Monthly sector refresh validation failed.")
return False
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--function", default="runDataFeed", help="Primary GAS function to execute before download")
parser.add_argument("--fallback-function", default="run_all", help="Fallback GAS function to execute if primary fails")
args = parser.parse_args()
try:
tokens = get_tokens()
script_id = get_script_id()
access_token = refresh_access_token(tokens)
# Step 1: Execute GAS run_all
if run_gas_function(script_id, access_token, "run_all"):
# Step 1: Execute GAS runDataFeed first, then fallback to run_all if needed.
primary_ok = run_gas_function(script_id, access_token, args.function)
if not primary_ok and args.fallback_function and args.fallback_function != args.function:
print(f"Primary function {args.function} failed; trying fallback {args.fallback_function} ...")
primary_ok = run_gas_function(script_id, access_token, args.fallback_function)
if primary_ok:
print("Waiting a bit for GAS processes to finalize (optional)...")
time.sleep(5)
# Step 2: Download spreadsheet
if download_spreadsheet(SPREADSHEET_ID, access_token, OUTPUT_XLSX):
print("\nRoutine Part 1 & 2 complete.")
validate_monthly_sector_refresh(OUTPUT_XLSX)
print("Final step: npm run prepare-upload-zip")
else:
print("\nDownload failed. Please check Drive API scopes.")
@@ -98,6 +126,7 @@ def main():
fallback = subprocess.run(["python", "tools/update_workbook_sector_insights.py"], cwd=str(ROOT))
if fallback.returncode == 0:
print("Local sector-insight workbook updated.")
validate_monthly_sector_refresh(LOCAL_OUTPUT_XLSX)
else:
print("Local sector-insight workbook build failed.")
+616
View File
@@ -0,0 +1,616 @@
from __future__ import annotations
import argparse
import datetime as dt
import json
import re
import shutil
import sys
from collections import OrderedDict
from pathlib import Path
from typing import Any
from urllib.parse import urljoin, urlparse, parse_qs
import requests
from bs4 import BeautifulSoup
from openpyxl import load_workbook
from openpyxl.styles import Alignment, Font, PatternFill
from openpyxl.utils import get_column_letter
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from src.quant_engine.sector_universe_refresh import build_sector_universe_refresh_audit
DEFAULT_INPUT_XLSX = ROOT / "GatherTradingData.xlsx"
DEFAULT_OUTPUT_XLSX = ROOT / "outputs" / "sector_universe_refresh" / "GatherTradingData_sector_universe.xlsx"
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36"
NAVER_BASE = "https://finance.naver.com"
NAVER_ITEM_CODE_RE = re.compile(r"(?:https?:)?//finance\.naver\.com(?P<path>/item/[^\"'\s<>]+code=(?P<code>\d+)[^\"'\s<>]*)", re.I)
NAVER_REL_CODE_RE = re.compile(r"(?P<path>/item/[^\"'\s<>]+code=(?P<code>\d+)[^\"'\s<>]*)", re.I)
TITLE_FILL = PatternFill("solid", fgColor="1F4E78")
HEADER_FILL = PatternFill("solid", fgColor="1F4E78")
SUBHEADER_FILL = PatternFill("solid", fgColor="D9EAF7")
WHITE_FONT = Font(color="FFFFFF", bold=True)
BOLD_FONT = Font(bold=True)
NOTE_FONT = Font(italic=True, color="666666")
def _kst_now() -> dt.datetime:
return dt.datetime.now(dt.timezone(dt.timedelta(hours=9)))
def _kst_today() -> str:
return _kst_now().strftime("%Y-%m-%d")
def _clean_text(value: Any) -> str:
if value is None:
return ""
return str(value).strip()
def _normalize_code(value: Any) -> str:
text = _clean_text(value)
if not text:
return ""
text = text.replace(",", "")
if text.endswith(".0"):
text = text[:-2]
if text.isdigit():
return text.zfill(6)
if re.fullmatch(r"\d+\.\d+", text):
return str(int(float(text))).zfill(6)
return text
def _parse_weight(value: str) -> float | None:
text = _clean_text(value).replace("%", "").replace(",", "")
if not text:
return None
try:
return float(text)
except Exception:
return None
def _discover_naver_candidate_urls(soup: BeautifulSoup, proxy_ticker: str) -> list[str]:
candidates: list[str] = []
seen: set[str] = set()
def add(url: str) -> None:
url = _clean_text(url)
if not url or url in seen:
return
seen.add(url)
candidates.append(url)
expected_code = _normalize_code(proxy_ticker)
for script in soup.find_all("script"):
src = _clean_text(script.get("src"))
if src:
if expected_code and expected_code in src:
if src.startswith("//"):
add(f"https:{src}")
elif src.startswith("/"):
add(urljoin(NAVER_BASE, src))
else:
add(src)
continue
text = script.get_text(" ", strip=True) or ""
if not text:
continue
for regex in (NAVER_ITEM_CODE_RE, NAVER_REL_CODE_RE):
for match in regex.finditer(text):
code = _normalize_code(match.groupdict().get("code") or "")
if expected_code and code and code != expected_code:
continue
path = match.groupdict().get("path") or ""
if path:
add(urljoin(NAVER_BASE, path))
return candidates
def _parse_naver_etf_holdings(session: requests.Session, proxy_ticker: str, limit: int) -> dict[str, Any]:
url_candidates = [
f"{NAVER_BASE}/item/main.naver?code={proxy_ticker}",
f"{NAVER_BASE}/item/coinfo.naver?code={proxy_ticker}&target=cu_more",
]
last_message = ""
for url in url_candidates:
response = session.get(url, timeout=20)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
discovered = _discover_naver_candidate_urls(soup, proxy_ticker)
for candidate in discovered:
if candidate not in url_candidates:
url_candidates.append(candidate)
section = soup.select_one("div.section.etf_asset")
table = section.select_one("table.tb_type1_a") if section is not None else None
if table is None:
# layout changed or this endpoint does not expose the constituent table
last_message = "ETF constituent table missing; page structure may have changed"
continue
holdings: list[dict[str, Any]] = []
for tr in table.select("tbody tr"):
tds = tr.find_all("td")
if len(tds) < 3:
continue
name_link = tr.find("a", href=re.compile(r"code=\d+"))
if name_link is None:
continue
name = _clean_text(name_link.get_text(" ", strip=True))
href = _clean_text(name_link.get("href"))
m = re.search(r"code=(\d+)", href)
code = _normalize_code(m.group(1) if m else "")
if not code or not name:
continue
weight = _parse_weight(tds[2].get_text(" ", strip=True))
if weight is None:
continue
holdings.append({
"Constituent_Code": code,
"Constituent_Name": name,
"Weight": round(weight / 100.0, 6),
"Source": "NAVER_ETF_PAGE",
})
if len(holdings) >= limit:
break
if holdings:
return {
"source_url": url,
"source_kind": "NAVER_ETF_PAGE",
"holdings": holdings,
"discovered_urls": discovered,
"message": "",
}
last_message = "no holdings parsed"
return {
"source_url": url_candidates[0],
"source_kind": "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED",
"holdings": [],
"discovered_urls": [],
"message": last_message or "page structure changed; no expected values were inferred",
}
def _extract_sector_seed_rows(ws) -> list[dict[str, Any]]:
headers = [ws.cell(2, c).value for c in range(1, ws.max_column + 1)]
headers = [str(h).strip() if h is not None else "" for h in headers]
idx = {name: i for i, name in enumerate(headers)}
rows: list[dict[str, Any]] = []
for r in range(3, ws.max_row + 1):
row = {name: ws.cell(r, c + 1).value for c, name in enumerate(headers) if name}
if not any(v not in (None, "") for v in row.values()):
continue
rows.append(row)
return rows
def _group_seed_rows(rows: list[dict[str, Any]]) -> OrderedDict[str, dict[str, Any]]:
grouped: OrderedDict[str, dict[str, Any]] = OrderedDict()
for row in rows:
sector = _clean_text(row.get("Sector"))
if not sector:
continue
if sector not in grouped:
grouped[sector] = {
"meta": row,
"rows": [],
}
grouped[sector]["rows"].append(row)
return grouped
def _build_refreshed_rows(seed_rows: list[dict[str, Any]], limit: int) -> tuple[list[dict[str, Any]], dict[str, Any]]:
session = requests.Session()
session.headers.update({"User-Agent": DEFAULT_USER_AGENT})
grouped = _group_seed_rows(seed_rows)
refreshed: list[dict[str, Any]] = []
sector_stats: list[dict[str, Any]] = []
today = _kst_today()
for sector, bundle in grouped.items():
meta = bundle["meta"]
proxy_ticker = _normalize_code(meta.get("Proxy_Ticker"))
proxy_name = _clean_text(meta.get("Proxy_Name"))
proxy_type = _clean_text(meta.get("Proxy_Type")) or "ETF"
base_ticker = _normalize_code(meta.get("Base_Ticker")) or "069500"
if sector == "금융/은행":
split_specs = [
{"sector": "은행", "proxy_ticker": "091170", "proxy_name": "KODEX 은행", "proxy_type": "ETF"},
{"sector": "증권", "proxy_ticker": "0111J0", "proxy_name": "HANARO 증권고배당TOP3플러스", "proxy_type": "ETF"},
{"sector": "지주회사", "proxy_ticker": "307520", "proxy_name": "TIGER 지주회사", "proxy_type": "ETF"},
]
for spec in split_specs:
split_proxy_ticker = _normalize_code(spec["proxy_ticker"])
split_proxy_name = _clean_text(spec["proxy_name"])
split_proxy_type = _clean_text(spec["proxy_type"]) or "ETF"
split_source = "SHEET_INPUT"
split_source_url = ""
split_message = ""
split_source_kind = "SHEET_INPUT"
try:
scraped = _parse_naver_etf_holdings(session, split_proxy_ticker, limit)
split_source_url = scraped.get("source_url", "")
split_source_kind = scraped.get("source_kind", "NAVER_ETF_PAGE_FAIL")
holdings = scraped.get("holdings", [])
split_message = scraped.get("message", "")
if holdings:
split_source = "NAVER_ETF_PAGE"
weight_sum = round(sum(float(h["Weight"]) for h in holdings), 6)
for h in holdings:
refreshed.append({
"Sector": spec["sector"],
"Proxy_Ticker": split_proxy_ticker,
"Proxy_Name": split_proxy_name,
"Proxy_Type": split_proxy_type,
"Base_Ticker": base_ticker,
"Constituent_Code": h["Constituent_Code"],
"Constituent_Name": h["Constituent_Name"],
"Weight": h["Weight"],
"Is_ETF": "N",
"Enabled": "Y",
"Effective_Date": today,
"Source": split_source,
"Transport_Mode": "HTML_SERVER_RENDERED",
"Source_URL": split_source_url,
"Source_AsOf": today,
"Sector_Check": spec["sector"],
"Weight_Sum_All": weight_sum,
"Weight_Sum_Stocks_Only": weight_sum,
"ETF_Rows": 0,
"Status": "OK",
})
sector_stats.append({
"sector": spec["sector"],
"proxy_ticker": split_proxy_ticker,
"proxy_name": split_proxy_name,
"proxy_type": split_proxy_type,
"source_kind": split_source,
"transport_mode": "HTML_SERVER_RENDERED",
"source_url": split_source_url,
"source_asof": today,
"constituent_count": len(holdings),
"weight_sum": weight_sum,
"status": "CURRENT",
"refresh_reason": "NAVER_ETF_PAGE_SPLIT",
})
continue
except Exception as exc:
split_message = str(exc)
split_source_kind = "NAVER_ETF_PAGE_FAIL"
# 실패 시는 투명하게 남기고, 섹터 누락은 그대로 드러낸다.
sector_stats.append({
"sector": spec["sector"],
"proxy_ticker": split_proxy_ticker,
"proxy_name": split_proxy_name,
"proxy_type": split_proxy_type,
"source_kind": split_source_kind,
"transport_mode": "LAYOUT_CHANGED" if split_source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" else "UNKNOWN",
"source_url": split_source_url,
"source_asof": today,
"constituent_count": 0,
"weight_sum": 0.0,
"status": "FAIL" if "FAIL" in split_source_kind else "WARN",
"refresh_reason": split_message or "split_sector_fallback",
})
continue
source = "SHEET_INPUT"
source_url = ""
message = ""
source_kind = "SHEET_INPUT"
if proxy_type != "ETF":
source_kind = "REPRESENTATIVE_STOCK_PROXY"
source = source_kind
source_url = f"{NAVER_BASE}/item/main.naver?code={proxy_ticker}" if proxy_ticker else ""
fallback_rows = bundle["rows"][:limit] if bundle["rows"] else []
weight_sum = 0.0
for row in fallback_rows:
weight = row.get("Weight")
try:
weight_sum += float(weight) if weight not in (None, "") else 0.0
except Exception:
pass
refreshed.append({
"Sector": sector,
"Proxy_Ticker": proxy_ticker,
"Proxy_Name": proxy_name,
"Proxy_Type": proxy_type,
"Base_Ticker": base_ticker,
"Constituent_Code": _normalize_code(row.get("Constituent_Code")),
"Constituent_Name": _clean_text(row.get("Constituent_Name")),
"Weight": float(row.get("Weight") or 0),
"Is_ETF": _clean_text(row.get("Is_ETF")) or "N",
"Enabled": "Y",
"Effective_Date": today,
"Source": source_kind,
"Transport_Mode": "HTML_SERVER_RENDERED" if source_kind == "REPRESENTATIVE_STOCK_PROXY" else "MANUAL_OR_TEMPLATE",
"Source_URL": source_url,
"Source_AsOf": today,
"Sector_Check": sector,
"Weight_Sum_All": weight_sum,
"Weight_Sum_Stocks_Only": weight_sum,
"ETF_Rows": 0,
"Status": "CURRENT",
})
sector_stats.append({
"sector": sector,
"proxy_ticker": proxy_ticker,
"proxy_name": proxy_name,
"proxy_type": proxy_type,
"source_kind": source_kind,
"transport_mode": "HTML_SERVER_RENDERED" if source_kind == "REPRESENTATIVE_STOCK_PROXY" else "MANUAL_OR_TEMPLATE",
"source_url": source_url,
"source_asof": today,
"constituent_count": len(fallback_rows),
"weight_sum": round(weight_sum, 6),
"status": "CURRENT",
"refresh_reason": "REPRESENTATIVE_STOCK_PROXY",
})
continue
if proxy_ticker:
try:
scraped = _parse_naver_etf_holdings(session, proxy_ticker, limit)
source_url = scraped.get("source_url", "")
source_kind = scraped.get("source_kind", "NAVER_ETF_PAGE_FAIL")
holdings = scraped.get("holdings", [])
message = scraped.get("message", "")
if holdings:
source = "NAVER_ETF_PAGE"
weight_sum = round(sum(float(h["Weight"]) for h in holdings), 6)
for h in holdings:
refreshed.append({
"Sector": sector,
"Proxy_Ticker": proxy_ticker,
"Proxy_Name": proxy_name,
"Proxy_Type": proxy_type,
"Base_Ticker": base_ticker,
"Constituent_Code": h["Constituent_Code"],
"Constituent_Name": h["Constituent_Name"],
"Weight": h["Weight"],
"Is_ETF": "N",
"Enabled": "Y",
"Effective_Date": today,
"Source": source,
"Transport_Mode": "HTML_SERVER_RENDERED",
"Source_URL": source_url,
"Source_AsOf": today,
"Sector_Check": sector,
"Weight_Sum_All": weight_sum,
"Weight_Sum_Stocks_Only": weight_sum,
"ETF_Rows": 0,
"Status": "OK",
})
sector_stats.append({
"sector": sector,
"proxy_ticker": proxy_ticker,
"proxy_name": proxy_name,
"proxy_type": proxy_type,
"source_kind": source,
"transport_mode": "HTML_SERVER_RENDERED",
"source_url": source_url,
"source_asof": today,
"constituent_count": len(holdings),
"weight_sum": weight_sum,
"status": "CURRENT",
"refresh_reason": "NAVER_ETF_PAGE",
})
continue
except Exception as exc:
message = str(exc)
source_kind = "NAVER_ETF_PAGE_FAIL"
# fallback: preserve seed rows but expose the failure transparently
fallback_rows = bundle["rows"][:limit] if bundle["rows"] else []
weight_sum = 0.0
for row in fallback_rows:
weight = row.get("Weight")
try:
weight_sum += float(weight) if weight not in (None, "") else 0.0
except Exception:
pass
refreshed.append({
"Sector": sector,
"Proxy_Ticker": proxy_ticker,
"Proxy_Name": proxy_name,
"Proxy_Type": proxy_type,
"Base_Ticker": base_ticker,
"Constituent_Code": _normalize_code(row.get("Constituent_Code")),
"Constituent_Name": _clean_text(row.get("Constituent_Name")),
"Weight": float(row.get("Weight") or 0),
"Is_ETF": _clean_text(row.get("Is_ETF")) or "N",
"Enabled": "Y",
"Effective_Date": today,
"Source": source_kind,
"Transport_Mode": "LAYOUT_CHANGED" if source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" else "UNKNOWN",
"Source_URL": source_url,
"Source_AsOf": today,
"Sector_Check": sector,
"Weight_Sum_All": weight_sum,
"Weight_Sum_Stocks_Only": weight_sum,
"ETF_Rows": 0,
"Status": "FAIL" if source_kind.endswith("FAIL") else "WARN",
})
sector_stats.append({
"sector": sector,
"proxy_ticker": proxy_ticker,
"proxy_name": proxy_name,
"proxy_type": proxy_type,
"source_kind": source_kind,
"transport_mode": "LAYOUT_CHANGED" if source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" else "UNKNOWN",
"source_url": source_url,
"source_asof": today,
"constituent_count": len(fallback_rows),
"weight_sum": round(weight_sum, 6),
"status": "FAIL" if "FAIL" in source_kind else "WARN",
"refresh_reason": message or "seed_fallback",
})
audit_payload = build_sector_universe_refresh_audit({"data": {"sector_universe": refreshed}})
return refreshed, {
"sector_universe_refresh_audit": audit_payload,
"sector_stats": sector_stats,
}
def _style_title(ws, title: str, subtitle: str) -> None:
ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=max(8, ws.max_column or 8))
ws["A1"] = title
ws["A1"].font = WHITE_FONT
ws["A1"].fill = TITLE_FILL
ws["A1"].alignment = Alignment(horizontal="left")
ws.merge_cells(start_row=2, start_column=1, end_row=2, end_column=max(8, ws.max_column or 8))
ws["A2"] = subtitle
ws["A2"].font = NOTE_FONT
def _write_table(ws, start_row: int, start_col: int, headers: list[str], rows: list[list[Any]]) -> int:
for i, header in enumerate(headers, start=start_col):
cell = ws.cell(start_row, i)
cell.value = header
cell.font = WHITE_FONT
cell.fill = HEADER_FILL
cell.alignment = Alignment(horizontal="center")
for r_idx, row in enumerate(rows, start=start_row + 1):
for c_idx, value in enumerate(row, start=start_col):
ws.cell(r_idx, c_idx).value = value
return start_row + len(rows)
def _write_sector_universe_sheet(wb, rows: list[dict[str, Any]]) -> None:
if "sector_universe" in wb.sheetnames:
del wb["sector_universe"]
ws = wb.create_sheet("sector_universe")
headers = [
"Sector", "Proxy_Ticker", "Proxy_Name", "Proxy_Type", "Base_Ticker",
"Constituent_Code", "Constituent_Name", "Weight", "Is_ETF", "Enabled",
"Effective_Date", "Source", "Transport_Mode", "Source_URL", "Source_AsOf", "Sector_Check",
"Weight_Sum_All", "Weight_Sum_Stocks_Only", "ETF_Rows", "Status",
]
now = _kst_now().strftime("%Y-%m-%d %H:%M:%S")
ws["A1"] = f"updated: {now} KST"
ws["A1"].font = Font(bold=True)
_write_table(ws, 2, 1, headers, [[r.get(h, "") for h in headers] for r in rows])
for col_idx, header in enumerate(headers, start=1):
if header in {"Proxy_Ticker", "Base_Ticker", "Constituent_Code"}:
for r in range(3, ws.max_row + 1):
ws.cell(r, col_idx).number_format = "@"
if header in {"Weight", "Weight_Sum_All", "Weight_Sum_Stocks_Only"}:
for r in range(3, ws.max_row + 1):
ws.cell(r, col_idx).number_format = "0.0000"
width = 16
if header in {"Constituent_Name", "Proxy_Name"}:
width = 22
elif header in {"Source_URL"}:
width = 42
elif header in {"Status", "Source", "Sector_Check", "Proxy_Type", "Transport_Mode"}:
width = 16
ws.column_dimensions[get_column_letter(col_idx)].width = width
ws.freeze_panes = "A3"
ws.sheet_view.showGridLines = False
def _write_audit_sheet(wb, audit_payload: dict[str, Any]) -> None:
audit = audit_payload["sector_universe_refresh_audit"]
if "sector_universe_refresh_audit" in wb.sheetnames:
del wb["sector_universe_refresh_audit"]
ws = wb.create_sheet("sector_universe_refresh_audit")
ws.sheet_view.showGridLines = False
_style_title(
ws,
"섹터 월간 갱신 감사",
"Naver ETF 페이지 기반 월간 갱신 상태와 provenance 분리 현황을 점검한다.",
)
summary = audit.get("summary", {})
summary_rows = [
["formula_id", audit.get("formula_id", "")],
["gate", audit.get("gate", "")],
["sector_count", summary.get("sector_count", 0)],
["current_count", summary.get("current_count", 0)],
["due_count", summary.get("due_count", 0)],
["overdue_count", summary.get("overdue_count", 0)],
["missing_count", summary.get("missing_count", 0)],
["template_count", summary.get("template_count", 0)],
["sheet_input_count", summary.get("sheet_input_count", 0)],
["naver_source_count", summary.get("naver_source_count", 0)],
["missing_source_url_count", summary.get("missing_source_url_count", 0)],
["stale_sector_count", summary.get("stale_sector_count", 0)],
["oldest_source_asof", summary.get("oldest_source_asof", "")],
["newest_source_asof", summary.get("newest_source_asof", "")],
]
_write_table(ws, 4, 1, ["key", "value"], summary_rows)
rows = audit.get("rows", []) or []
if rows:
headers = [
"sector", "proxy_ticker", "proxy_name", "proxy_type", "source_kind",
"source_url", "source_asof", "age_days", "constituent_count",
"stock_count", "etf_count", "weight_sum", "status", "refresh_reason",
]
_write_table(ws, 4, 4, headers, [[r.get(h, "") for h in headers] for r in rows])
for idx, header in enumerate(headers, start=4):
width = 16
if header in {"sector", "proxy_name", "refresh_reason"}:
width = 20
elif header == "source_url":
width = 42
ws.column_dimensions[get_column_letter(idx)].width = width
ws.freeze_panes = "A5"
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--input", default=str(DEFAULT_INPUT_XLSX))
ap.add_argument("--output", default=str(DEFAULT_OUTPUT_XLSX))
ap.add_argument("--limit", type=int, default=10, help="Per-sector holdings limit from Naver ETF pages")
ap.add_argument("--apply", action="store_true", help="Overwrite the input workbook in place as well")
args = ap.parse_args()
input_path = Path(args.input)
output_path = Path(args.output)
if not input_path.exists():
raise FileNotFoundError(input_path)
wb = load_workbook(input_path)
if "sector_universe" not in wb.sheetnames:
raise RuntimeError("sector_universe sheet not found")
seed_ws = wb["sector_universe"]
seed_rows = _extract_sector_seed_rows(seed_ws)
refreshed_rows, audit_payload = _build_refreshed_rows(seed_rows, max(1, args.limit))
_write_sector_universe_sheet(wb, refreshed_rows)
_write_audit_sheet(wb, audit_payload)
output_path.parent.mkdir(parents=True, exist_ok=True)
wb.save(output_path)
if args.apply and input_path.resolve() != output_path.resolve():
shutil.copy2(output_path, input_path)
print(json.dumps({
"status": "OK",
"input": str(input_path),
"output": str(output_path),
"rows": len(refreshed_rows),
"sectors": len(audit_payload["sector_stats"]),
"current_count": audit_payload["sector_universe_refresh_audit"]["summary"]["current_count"],
"overdue_count": audit_payload["sector_universe_refresh_audit"]["summary"]["overdue_count"],
"template_count": audit_payload["sector_universe_refresh_audit"]["summary"]["template_count"],
}, ensure_ascii=False, indent=2))
return 0
if __name__ == "__main__":
sys.exit(main())
@@ -0,0 +1,173 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import datetime as dt
import json
import sys
from pathlib import Path
from typing import Any
from openpyxl import load_workbook
ROOT = Path(__file__).resolve().parents[1]
DEFAULT_XLSX = ROOT / "GatherTradingData.xlsx"
MAX_AGE_DAYS = 31
def _txt(value: Any, default: str = "") -> str:
if value is None:
return default
if isinstance(value, str):
return value.strip() or default
return str(value).strip() or default
def _parse_date(value: Any) -> dt.date | None:
text = _txt(value)
if not text:
return None
for fmt in ("%Y-%m-%d", "%Y.%m.%d", "%Y/%m/%d"):
try:
return dt.datetime.strptime(text[:10], fmt).date()
except Exception:
pass
try:
return dt.date.fromisoformat(text[:10])
except Exception:
return None
def _age_days(value: Any) -> int | None:
parsed = _parse_date(value)
if parsed is None:
return None
today = dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date()
return (today - parsed).days
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--xlsx", default=str(DEFAULT_XLSX))
args = ap.parse_args()
xlsx = Path(args.xlsx)
if not xlsx.exists():
print(f"[오류] 워크북 없음: {xlsx}")
return 1
wb = load_workbook(xlsx, data_only=True)
if "sector_universe" not in wb.sheetnames:
print("[FAIL] sector_universe 시트가 없습니다.")
return 1
ws = wb["sector_universe"]
headers = [_txt(ws.cell(2, c).value) for c in range(1, ws.max_column + 1)]
idx = {name: i for i, name in enumerate(headers) if name}
required = ["Sector", "Proxy_Ticker", "Constituent_Code", "Weight", "Source", "Source_URL", "Source_AsOf"]
missing_headers = [h for h in required if h not in idx]
rows: list[dict[str, Any]] = []
for r in range(3, ws.max_row + 1):
row = {h: ws.cell(r, c + 1).value for c, h in enumerate(headers) if h}
if not any(v not in (None, "") for v in row.values()):
continue
rows.append(row)
sector_map: dict[str, list[dict[str, Any]]] = {}
for row in rows:
sector = _txt(row.get("Sector"))
if sector:
sector_map.setdefault(sector, []).append(row)
template_rows = 0
representative_rows = 0
sheet_input_rows = 0
naver_rows = 0
layout_changed_rows = 0
fail_rows = 0
missing_source_url = 0
stale_rows = 0
mixed_sector_count = 0
sector_status_rows: list[str] = []
for sector, sector_rows in sector_map.items():
source_kinds = {_txt(r.get("Source"), "SHEET_INPUT") or "SHEET_INPUT" for r in sector_rows}
if len(source_kinds) > 1:
mixed_sector_count += 1
sector_status_rows.append(f"{sector}:MIXED({','.join(sorted(source_kinds))})")
sector_template = any(src == "DEFAULT_TEMPLATE" for src in source_kinds)
sector_rep = any(src == "REPRESENTATIVE_STOCK_PROXY" for src in source_kinds)
sector_input = any(src == "SHEET_INPUT" for src in source_kinds)
sector_naver = any(src == "NAVER_ETF_PAGE" for src in source_kinds)
sector_layout_changed = any(src == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" for src in source_kinds)
sector_fail = any("FAIL" in src for src in source_kinds)
if sector_template:
template_rows += len(sector_rows)
if sector_rep:
representative_rows += len(sector_rows)
if sector_input:
sheet_input_rows += len(sector_rows)
if sector_naver:
naver_rows += len(sector_rows)
if sector_layout_changed:
layout_changed_rows += len(sector_rows)
if sector_fail:
fail_rows += len(sector_rows)
source_urls = {_txt(r.get("Source_URL")) for r in sector_rows if _txt(r.get("Source_URL"))}
if not source_urls:
missing_source_url += len(sector_rows)
ages = [_age_days(r.get("Source_AsOf")) for r in sector_rows]
age_vals = [a for a in ages if a is not None]
if age_vals and max(age_vals) > MAX_AGE_DAYS:
stale_rows += sum(1 for a in age_vals if a is not None and a > MAX_AGE_DAYS)
sector_status_rows.append(f"{sector}:STALE(max={max(age_vals)})")
gate = "PASS"
if missing_headers:
gate = "FAIL"
elif template_rows > 0 or fail_rows > 0 or stale_rows > 0 or mixed_sector_count > 0:
gate = "FAIL"
elif sheet_input_rows > 0:
gate = "WARN"
print(f"[sector_universe_refresh] gate={gate}")
print(f" rows={len(rows)} sectors={len(sector_map)}")
print(f" naver_rows={naver_rows} representative_rows={representative_rows} layout_changed_rows={layout_changed_rows} sheet_input_rows={sheet_input_rows} template_rows={template_rows} fail_rows={fail_rows}")
print(f" missing_source_url={missing_source_url} stale_rows={stale_rows} mixed_sector_count={mixed_sector_count}")
if missing_headers:
print(f" missing_headers={missing_headers}")
if sector_status_rows:
print(" sector_flags=" + ", ".join(sector_status_rows[:20]))
result = {
"validator": "validate_sector_universe_monthly_refresh_v1",
"gate": gate,
"total_rows": len(rows),
"sector_count": len(sector_map),
"naver_rows": naver_rows,
"representative_rows": representative_rows,
"layout_changed_rows": layout_changed_rows,
"sheet_input_rows": sheet_input_rows,
"template_rows": template_rows,
"fail_rows": fail_rows,
"missing_source_url": missing_source_url,
"stale_rows": stale_rows,
"mixed_sector_count": mixed_sector_count,
"missing_headers": missing_headers,
"sector_flags": sector_status_rows,
"max_age_days": MAX_AGE_DAYS,
}
out = ROOT / "Temp" / "sector_universe_refresh_validation.json"
out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"OUTPUT: {out}")
return 0 if gate in {"PASS", "WARN"} else 1
if __name__ == "__main__":
sys.exit(main())