Merge pull request '[codex] 섹터 유니버스 분리와 리포트 고도화' (#61) from codex/sector-universe-split-report-hardening into main

Reviewed-on: http://192.168.123.100:8418/KimJaeHyun/myfinance/pulls/61
This commit is contained in:
2026-06-15 02:34:00 +09:00
16 changed files with 1850 additions and 68 deletions
+1 -1
View File
@@ -15,5 +15,5 @@
"keep package scripts within release envelope"
]
},
"source_zip_sha256": "49f64b3773ba3c19fa8323d0b08833928c637935483039579bb8ab22a391f70c"
"source_zip_sha256": "4de4a7b1217ef5d5375b3b1ea1209f738719e79c4c3c0954e9e96a9dc0d8607e"
}
+1 -1
View File
@@ -13,7 +13,7 @@ portfolio_exposure_framework:
exposure_layers:
direct_core_leaders: ["삼성전자", "SK하이닉스"]
duplicate_beta: ["KODEX 반도체", "동일 섹터 ETF"]
tactical_satellites: ["방산", "조선", "전력기기", "건설/EPC", "기타 고베타"]
tactical_satellites: ["방산", "조선", "전력설비", "건설", "플랜트/EPC", "로보틱스", "기타 고베타"]
cash: ["현금", "MMF", "RP", "단기채 ETF"]
valid_trim_reasons:
- "벤치마크 대비 초과비중이 허용밴드를 초과하고 가격 추세가 훼손됨"
+402 -27
View File
@@ -1,5 +1,5 @@
// gas_lib.gs - Common utilities & static features
// Last Updated: 2026-06-14 20:48:30 KST
// Last Updated: 2026-06-15 02:20:50 KST
// Math/KRX utils, sheet I/O, sector flow, Web API, static runners
// GAS global scope: functions in gas_data_feed.gs / gas_data_collect.gs callable directly
//
@@ -593,7 +593,14 @@ const DEFAULT_SECTOR_UNIVERSE_V2 = [
{ code: "062040", name: "산일전기", weight: 0.10 },
{ code: "298040", name: "효성중공업", weight: 0.10 },
]},
{ sector: "방산", proxyTicker: "012450", proxyName: "한화에어로스페이스", proxyType: "대표주", baseTicker: "069500", constituents: [
{ sector: "전력설비", proxyTicker: "491820", proxyName: "HANARO 전력설비투자", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "010120", name: "LS ELECTRIC", weight: 0.28 },
{ code: "267260", name: "HD현대일렉트릭", weight: 0.28 },
{ code: "298040", name: "효성중공업", weight: 0.18 },
{ code: "006260", name: "LS", weight: 0.14 },
{ code: "099440", name: "두산에너빌리티", weight: 0.12 },
]},
{ sector: "방산", proxyTicker: "463250", proxyName: "TIGER K방산&우주", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "012450", name: "한화에어로스페이스", weight: 0.45 },
{ code: "079550", name: "LIG넥스원", weight: 0.25 },
{ code: "047810", name: "한국항공우주", weight: 0.15 },
@@ -605,23 +612,49 @@ const DEFAULT_SECTOR_UNIVERSE_V2 = [
{ code: "009540", name: "HD한국조선해양", weight: 0.20 },
{ code: "494670", name: "TIGER 조선TOP10", weight: 0.15, isEtf: true },
]},
{ sector: "건설/EPC", proxyTicker: "028050", proxyName: "삼성E&A", proxyType: "대표주", baseTicker: "069500", constituents: [
{ code: "028050", name: "삼성E&A", weight: 0.40 },
{ code: "000720", name: "현대건설", weight: 0.30 },
{ code: "006360", name: "GS건설", weight: 0.20 },
{ code: "047040", name: "대우건설", weight: 0.10 },
{ sector: "건설", proxyTicker: "117700", proxyName: "KODEX 건설", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "000720", name: "현대건설", weight: 0.35 },
{ code: "006360", name: "GS건설", weight: 0.25 },
{ code: "047040", name: "대우건설", weight: 0.20 },
{ code: "294870", name: "HDC현대산업개발", weight: 0.20 },
]},
{ sector: "플랜트/EPC", proxyTicker: "454320", proxyName: "HANARO CAPEX설비투자iSelect", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "028050", name: "삼성E&A", weight: 0.35 },
{ code: "010120", name: "LS ELECTRIC", weight: 0.20 },
{ code: "267260", name: "HD현대일렉트릭", weight: 0.20 },
{ code: "298040", name: "효성중공업", weight: 0.15 },
{ code: "099440", name: "두산에너빌리티", weight: 0.10 },
]},
{ sector: "자동차", proxyTicker: "091180", proxyName: "TIGER 자동차", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "005380", name: "현대차", weight: 0.45 },
{ code: "000270", name: "기아", weight: 0.40 },
{ code: "012330", name: "현대모비스", weight: 0.15 },
]},
{ sector: "금융/은행", proxyTicker: "091170", proxyName: "KODEX 은행", proxyType: "ETF", baseTicker: "069500", constituents: [
{ sector: "은행", proxyTicker: "091170", proxyName: "KODEX 은행", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "105560", name: "KB금융", weight: 0.30 },
{ code: "055550", name: "신한지주", weight: 0.30 },
{ code: "086790", name: "하나금융지주", weight: 0.20 },
{ code: "316140", name: "우리금융지주", weight: 0.10 },
{ code: "003540", name: "대신증권", weight: 0.10 },
{ code: "024110", name: "기업은행", weight: 0.10 },
]},
{ sector: "증권", proxyTicker: "0111J0", proxyName: "HANARO 증권고배당TOP3플러스", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "071050", name: "한국금융지주", weight: 0.2135 },
{ code: "006800", name: "미래에셋증권", weight: 0.1934 },
{ code: "005940", name: "NH투자증권", weight: 0.1911 },
{ code: "016360", name: "삼성증권", weight: 0.1434 },
{ code: "039490", name: "키움증권", weight: 0.1373 },
]},
{ sector: "지주회사", proxyTicker: "307520", proxyName: "TIGER 지주회사", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "180640", name: "한진칼", weight: 0.1535 },
{ code: "267250", name: "HD현대", weight: 0.0943 },
{ code: "034730", name: "SK", weight: 0.0884 },
{ code: "000150", name: "두산", weight: 0.0878 },
{ code: "005490", name: "POSCO홀딩스", weight: 0.0763 },
{ code: "003550", name: "LG", weight: 0.0752 },
{ code: "006260", name: "LS", weight: 0.0705 },
{ code: "078930", name: "GS", weight: 0.0498 },
{ code: "001040", name: "CJ", weight: 0.0477 },
{ code: "010060", name: "OCI홀딩스", weight: 0.0240 },
]},
{ sector: "2차전지", proxyTicker: "305720", proxyName: "KODEX 2차전지산업", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "373220", name: "LG에너지솔루션", weight: 0.40 },
@@ -635,12 +668,29 @@ const DEFAULT_SECTOR_UNIVERSE_V2 = [
{ code: "128940", name: "한미약품", weight: 0.15 },
{ code: "000100", name: "유한양행", weight: 0.10 },
]},
{ sector: "원전", proxyTicker: "099440", proxyName: "두산에너빌리티", proxyType: "대표주", baseTicker: "069500", constituents: [
{ sector: "원전", proxyTicker: "434730", proxyName: "HANARO 원자력iSelect", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "099440", name: "두산에너빌리티", weight: 0.45 },
{ code: "023450", name: "한전기술", weight: 0.25 },
{ code: "015760", name: "한국전력", weight: 0.20 },
{ code: "071320", name: "지역난방공사", weight: 0.10 },
]},
{ sector: "로보틱스", proxyTicker: "0190C0", proxyName: "RISE 현대차고정피지컬AI", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "005380", name: "현대차", weight: 0.2402 },
{ code: "012330", name: "현대모비스", weight: 0.1588 },
{ code: "011070", name: "LG이노텍", weight: 0.1450 },
{ code: "000270", name: "기아", weight: 0.1234 },
{ code: "307950", name: "현대오토에버", weight: 0.0899 },
{ code: "277810", name: "레인보우로보틱스", weight: 0.0673 },
{ code: "064400", name: "LG씨엔에스", weight: 0.0519 },
{ code: "454910", name: "두산로보틱스", weight: 0.0367 },
{ code: "108490", name: "로보티즈", weight: 0.0240 },
{ code: "058610", name: "에스피지", weight: 0.0173 },
{ code: "010620", name: "현대미포", weight: 0.0135 },
{ code: "009540", name: "HD한국조선해양", weight: 0.0135 },
{ code: "011210", name: "현대위아", weight: 0.0109 },
{ code: "121600", name: "나노신소재", weight: 0.0040 },
{ code: "028050", name: "삼성E&A", weight: 0.0034 },
]},
{ sector: "소비재", proxyTicker: "139220", proxyName: "TIGER 생활소비재", proxyType: "ETF", baseTicker: "069500", constituents: [
{ code: "028260", name: "삼성물산", weight: 0.35 },
{ code: "097950", name: "CJ제일제당", weight: 0.25 },
@@ -663,6 +713,7 @@ function normalizeSectorName_(sector) {
if (s === "바이오/헬스케어") return "바이오";
if (s === "원전/에너지") return "원전";
if (s === "소비재/유통") return "소비재";
if (s === "건설/EPC") return "플랜트/EPC";
return s;
}
@@ -679,17 +730,52 @@ function readSectorUniverse_() {
const sheet = ss.getSheetByName("sector_universe");
if (!sheet) {
writeDefaultSectorUniverseSheet_();
return DEFAULT_SECTOR_UNIVERSE_V2;
return DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({
...sector,
source: sector.source || "DEFAULT_TEMPLATE",
sourceUrl: sector.sourceUrl || "",
sourceAsOf: sector.sourceAsOf || "",
constituents: sector.constituents.map(c => ({
...c,
source: c.source || sector.source || "DEFAULT_TEMPLATE",
sourceUrl: c.sourceUrl || sector.sourceUrl || "",
sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "",
})),
}));
}
const data = sheet.getDataRange().getValues();
if (data.length < 3) {
writeDefaultSectorUniverseSheet_();
return DEFAULT_SECTOR_UNIVERSE_V2;
return DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({
...sector,
source: sector.source || "DEFAULT_TEMPLATE",
sourceUrl: sector.sourceUrl || "",
sourceAsOf: sector.sourceAsOf || "",
constituents: sector.constituents.map(c => ({
...c,
source: c.source || sector.source || "DEFAULT_TEMPLATE",
sourceUrl: c.sourceUrl || sector.sourceUrl || "",
sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "",
})),
}));
}
const hdr = data[1].map(h => String(h).trim());
const idx = name => hdr.indexOf(name);
const required = ["Sector","Proxy_Ticker","Constituent_Code","Weight"];
if (required.some(h => idx(h) < 0)) return DEFAULT_SECTOR_UNIVERSE_V2;
if (required.some(h => idx(h) < 0)) {
return DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({
...sector,
source: sector.source || "DEFAULT_TEMPLATE",
sourceUrl: sector.sourceUrl || "",
sourceAsOf: sector.sourceAsOf || "",
constituents: sector.constituents.map(c => ({
...c,
source: c.source || sector.source || "DEFAULT_TEMPLATE",
sourceUrl: c.sourceUrl || sector.sourceUrl || "",
sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "",
})),
}));
}
const map = {};
for (let i = 2; i < data.length; i++) {
@@ -706,6 +792,9 @@ function readSectorUniverse_() {
proxyName: idx("Proxy_Name") >= 0 ? String(data[i][idx("Proxy_Name")] ?? "").trim() : "",
proxyType: idx("Proxy_Type") >= 0 ? String(data[i][idx("Proxy_Type")] ?? "").trim() : "",
baseTicker: idx("Base_Ticker") >= 0 ? normalizeTickerCode(data[i][idx("Base_Ticker")]) : "069500",
source: idx("Source") >= 0 ? String(data[i][idx("Source")] ?? "").trim() : "SHEET_INPUT",
sourceUrl: idx("Source_URL") >= 0 ? String(data[i][idx("Source_URL")] ?? "").trim() : "",
sourceAsOf: idx("Source_AsOf") >= 0 ? String(data[i][idx("Source_AsOf")] ?? "").trim() : "",
constituents: [],
};
}
@@ -714,16 +803,59 @@ function readSectorUniverse_() {
name: idx("Constituent_Name") >= 0 ? String(data[i][idx("Constituent_Name")] ?? "").trim() : "",
weight,
isEtf: idx("Is_ETF") >= 0 ? boolFromSheet_(data[i][idx("Is_ETF")], false) : false,
source: idx("Source") >= 0 ? String(data[i][idx("Source")] ?? "").trim() : "SHEET_INPUT",
transportMode: idx("Transport_Mode") >= 0 ? String(data[i][idx("Transport_Mode")] ?? "").trim() : "",
sourceUrl: idx("Source_URL") >= 0 ? String(data[i][idx("Source_URL")] ?? "").trim() : "",
sourceAsOf: idx("Source_AsOf") >= 0 ? String(data[i][idx("Source_AsOf")] ?? "").trim() : "",
});
}
const sectors = Object.values(map).filter(s => s.proxyTicker && s.constituents.length > 0);
return sectors.length ? sectors : DEFAULT_SECTOR_UNIVERSE_V2;
const sectorSet = new Set(sectors.map(s => s.sector));
for (const fallback of DEFAULT_SECTOR_UNIVERSE_V2) {
if (!fallback || !fallback.sector || sectorSet.has(fallback.sector)) continue;
sectors.push({
sector: fallback.sector,
proxyTicker: fallback.proxyTicker,
proxyName: fallback.proxyName,
proxyType: fallback.proxyType,
baseTicker: fallback.baseTicker || "069500",
source: fallback.source || "DEFAULT_TEMPLATE",
transportMode: fallback.transportMode || ((fallback.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (fallback.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"),
sourceUrl: fallback.sourceUrl || "",
sourceAsOf: fallback.sourceAsOf || "",
constituents: fallback.constituents.map(c => ({
code: c.code,
name: c.name || "",
weight: c.weight,
isEtf: Boolean(c.isEtf),
source: c.source || fallback.source || "DEFAULT_TEMPLATE",
transportMode: c.transportMode || ((c.source || fallback.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (c.source || fallback.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"),
sourceUrl: c.sourceUrl || fallback.sourceUrl || "",
sourceAsOf: c.sourceAsOf || fallback.sourceAsOf || "",
})),
});
}
return sectors.length ? sectors : DEFAULT_SECTOR_UNIVERSE_V2.map(sector => ({
...sector,
source: sector.source || "DEFAULT_TEMPLATE",
transportMode: sector.transportMode || ((sector.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (sector.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"),
sourceUrl: sector.sourceUrl || "",
sourceAsOf: sector.sourceAsOf || "",
constituents: sector.constituents.map(c => ({
...c,
source: c.source || sector.source || "DEFAULT_TEMPLATE",
transportMode: c.transportMode || ((c.source || sector.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (c.source || sector.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"),
sourceUrl: c.sourceUrl || sector.sourceUrl || "",
sourceAsOf: c.sourceAsOf || sector.sourceAsOf || "",
})),
}));
}
function writeDefaultSectorUniverseSheet_() {
const headers = [
"Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Base_Ticker",
"Constituent_Code","Constituent_Name","Weight","Is_ETF","Enabled","Effective_Date","Source"
"Constituent_Code","Constituent_Name","Weight","Is_ETF","Enabled","Effective_Date","Source","Transport_Mode",
"Source_URL","Source_AsOf"
];
const today = Utilities.formatDate(new Date(), "Asia/Seoul", "yyyy-MM-dd");
const rows = [];
@@ -741,7 +873,10 @@ function writeDefaultSectorUniverseSheet_() {
c.isEtf ? "Y" : "N",
"Y",
today,
"sector_universe(DEFAULT_SECTOR_UNIVERSE_V2)",
sector.source || c.source || "DEFAULT_TEMPLATE",
sector.transportMode || c.transportMode || (((sector.source || c.source || "DEFAULT_TEMPLATE") === "NAVER_ETF_PAGE" || (sector.source || c.source || "DEFAULT_TEMPLATE") === "REPRESENTATIVE_STOCK_PROXY") ? "HTML_SERVER_RENDERED" : "MANUAL_OR_TEMPLATE"),
sector.sourceUrl || c.sourceUrl || "",
sector.sourceAsOf || c.sourceAsOf || "",
]);
}
}
@@ -762,6 +897,228 @@ function sectorUseMode_(quality) {
return "INVALID";
}
function parseDateOnly_(value) {
const text = String(value ?? "").trim();
if (!text) return null;
const norm = text.replace(/\./g, "-").slice(0, 10);
if (!/^\d{4}-\d{2}-\d{2}$/.test(norm)) return null;
const parsed = new Date(norm + "T00:00:00+09:00");
return Number.isNaN(parsed.getTime()) ? null : parsed;
}
function calcSectorUniverseRefreshAudit_(universe) {
const today = new Date();
const rows = [];
const sourceKindCounts = { NAVER_ETF_PAGE: 0, NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED: 0, NAVER_ETF_PAGE_FAIL: 0, REPRESENTATIVE_STOCK_PROXY: 0, SHEET_INPUT: 0, DEFAULT_TEMPLATE: 0, OTHER: 0 };
const transportModeCounts = { HTML_SERVER_RENDERED: 0, MANUAL_OR_TEMPLATE: 0, LAYOUT_CHANGED: 0, UNKNOWN: 0 };
let currentCount = 0;
let dueCount = 0;
let overdueCount = 0;
let missingCount = 0;
let templateCount = 0;
let sheetInputCount = 0;
let naverSourceCount = 0;
let layoutChangedCount = 0;
let missingSourceUrlCount = 0;
let staleSectorCount = 0;
let oldestSourceAsOf = null;
let newestSourceAsOf = null;
for (const sector of universe || []) {
const sectorRows = Array.isArray(sector?.constituents) ? sector.constituents : [];
const sourceKind = String(sector?.source || "SHEET_INPUT").trim() || "SHEET_INPUT";
if (Object.prototype.hasOwnProperty.call(sourceKindCounts, sourceKind)) {
sourceKindCounts[sourceKind] += 1;
} else {
sourceKindCounts.OTHER += 1;
}
const transportMode = String(sector?.transportMode || "").trim() ||
(sourceKind === "NAVER_ETF_PAGE" || sourceKind === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED" :
sourceKind === "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" ? "LAYOUT_CHANGED" :
(sourceKind === "DEFAULT_TEMPLATE" || sourceKind === "SHEET_INPUT" ? "MANUAL_OR_TEMPLATE" : "UNKNOWN"));
if (Object.prototype.hasOwnProperty.call(transportModeCounts, transportMode)) {
transportModeCounts[transportMode] += 1;
} else {
transportModeCounts.UNKNOWN += 1;
}
const sourceUrl = String(sector?.sourceUrl || "").trim();
const sourceAsOf = String(sector?.sourceAsOf || "").trim();
const parsed = parseDateOnly_(sourceAsOf);
const ageDays = parsed ? Math.floor((today.getTime() - parsed.getTime()) / 86400000) : null;
if (parsed) {
oldestSourceAsOf = oldestSourceAsOf && oldestSourceAsOf < parsed ? oldestSourceAsOf : parsed;
newestSourceAsOf = newestSourceAsOf && newestSourceAsOf > parsed ? newestSourceAsOf : parsed;
}
let status = "INVALID";
const reasons = [];
if (sourceKind === "DEFAULT_TEMPLATE") {
status = "TEMPLATE";
templateCount += 1;
reasons.push("DEFAULT_TEMPLATE");
} else if (sourceKind === "REPRESENTATIVE_STOCK_PROXY") {
if (!sourceUrl) {
status = "MISSING";
missingCount += 1;
missingSourceUrlCount += 1;
reasons.push("Source_URL_MISSING");
} else if (ageDays === null) {
status = "MISSING";
missingCount += 1;
reasons.push("Source_AsOf_MISSING");
} else if (ageDays <= 31) {
status = "CURRENT";
currentCount += 1;
} else if (ageDays <= 45) {
status = "DUE";
dueCount += 1;
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
} else {
status = "OVERDUE";
overdueCount += 1;
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
}
} else if (sourceKind === "SHEET_INPUT") {
sheetInputCount += 1;
if (!sourceUrl) {
status = "MISSING";
missingCount += 1;
missingSourceUrlCount += 1;
reasons.push("Source_URL_MISSING");
} else if (ageDays === null) {
status = "MISSING";
missingCount += 1;
reasons.push("Source_AsOf_MISSING");
} else if (ageDays <= 31) {
status = "CURRENT";
currentCount += 1;
} else if (ageDays <= 45) {
status = "DUE";
dueCount += 1;
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
} else {
status = "OVERDUE";
overdueCount += 1;
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
}
} else if (sourceKind === "NAVER_ETF_PAGE") {
naverSourceCount += 1;
if (!sourceUrl) {
status = "MISSING";
missingCount += 1;
missingSourceUrlCount += 1;
reasons.push("Source_URL_MISSING");
} else if (ageDays === null) {
status = "MISSING";
missingCount += 1;
reasons.push("Source_AsOf_MISSING");
} else if (ageDays <= 31) {
status = "CURRENT";
currentCount += 1;
} else if (ageDays <= 45) {
status = "DUE";
dueCount += 1;
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
} else {
status = "OVERDUE";
overdueCount += 1;
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
}
} else if (sourceKind === "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED") {
layoutChangedCount += 1;
status = "LAYOUT_CHANGED";
if (!sourceUrl) {
missingSourceUrlCount += 1;
reasons.push("Source_URL_MISSING");
}
if (ageDays === null) {
reasons.push("Source_AsOf_MISSING");
} else {
staleSectorCount += 1;
reasons.push(`AgeDays=${ageDays}`);
}
} else {
status = "INVALID";
reasons.push("SOURCE_KIND_UNKNOWN");
if (!sourceUrl) missingSourceUrlCount += 1;
}
if (!sourceUrl) reasons.push("Source_URL_MISSING");
if (ageDays !== null && ageDays < 0) reasons.push("FUTURE_DATE");
rows.push({
sector: sector.sector || "",
proxy_ticker: sector.proxyTicker || "",
proxy_name: sector.proxyName || "",
proxy_type: sector.proxyType || "",
source_kind: sourceKind,
transport_mode: transportMode,
source_url: sourceUrl,
source_asof: sourceAsOf,
age_days: ageDays === null ? "" : ageDays,
constituent_count: sectorRows.length,
stock_count: sectorRows.filter(c => !c.isEtf).length,
etf_count: sectorRows.filter(c => c.isEtf).length,
weight_sum: sectorRows.reduce((a, c) => a + (Number(c.weight) || 0), 0),
status: status,
refresh_reason: reasons.length ? reasons.join(";") : "OK",
});
}
rows.sort((a, b) => {
if (a.status === "CURRENT" && b.status !== "CURRENT") return -1;
if (a.status !== "CURRENT" && b.status === "CURRENT") return 1;
return String(a.sector || "").localeCompare(String(b.sector || ""));
});
return {
formula_id: "sector_universe_refresh_audit_v1",
gate: (templateCount > 0 || missingSourceUrlCount > 0 || overdueCount > 0 || staleSectorCount > 0) ? "FAIL" : (sheetInputCount > 0 ? "WARN" : "PASS"),
summary: {
sector_count: (universe || []).length,
current_count: currentCount,
due_count: dueCount,
overdue_count: overdueCount,
missing_count: missingCount,
template_count: templateCount,
sheet_input_count: sheetInputCount,
naver_source_count: naverSourceCount,
layout_changed_count: layoutChangedCount,
missing_source_url_count: missingSourceUrlCount,
stale_sector_count: staleSectorCount,
oldest_source_asof: oldestSourceAsOf ? Utilities.formatDate(oldestSourceAsOf, "Asia/Seoul", "yyyy-MM-dd") : "",
newest_source_asof: newestSourceAsOf ? Utilities.formatDate(newestSourceAsOf, "Asia/Seoul", "yyyy-MM-dd") : "",
source_kind_counts: sourceKindCounts,
transport_mode_counts: transportModeCounts,
ajax_mode: "NO",
transport_model: "HTML_SERVER_RENDERED",
},
rows: rows,
};
}
function writeSectorUniverseRefreshAuditSheet_(audit) {
if (!audit || typeof audit !== "object") return 0;
const headers = [
"sector", "proxy_ticker", "proxy_name", "proxy_type", "source_kind", "transport_mode",
"source_url", "source_asof", "age_days", "constituent_count",
"stock_count", "etf_count", "weight_sum", "status", "refresh_reason",
];
const rows = Array.isArray(audit.rows)
? audit.rows.map(function(r) {
return headers.map(function(h) { return r[h] ?? ""; });
})
: [];
writeToSheet("sector_universe_refresh_audit", headers, rows);
return rows.length;
}
function scoreSmartMoneyNorm_(v) {
if (!Number.isFinite(v)) return 0;
if (v >= 0.15) return 25;
@@ -955,7 +1312,7 @@ function runSectorFlowV3() {
const etfRawMap = buildEtfRawMap_(buildEtfRawRows_(universe));
const today = Utilities.formatDate(new Date(), "Asia/Seoul", "yyyy-MM-dd");
const headers = [
"Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Coverage_Weight",
"Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Universe_Source","Transport_Mode","Coverage_Weight",
"Sector_Ret5D","Sector_Ret20D","Sector_RS_20D",
"SmartMoney_5D_KRW","SmartMoney_20D_KRW","Sector_AvgTradeValue_20D_KRW","SmartMoney_5D_Norm",
"Flow_Breadth_5D","Flow_Rows_Min","Stale_Count",
@@ -1031,6 +1388,9 @@ function runSectorFlowV3() {
const etfNavRisk = sector.proxyType === "ETF" ? (etfRaw?.navRisk ?? "NAV_DATA_MISSING") : "NOT_ETF";
const etfLiquidityStatus = sector.proxyType === "ETF" ? (etfRaw?.liquidityStatus ?? "WARN") : "NOT_ETF";
const etfExecutionUse = sector.proxyType === "ETF" ? (etfRaw?.executionUse ?? "WATCH_ONLY") : "NOT_ETF";
const transportMode = sector.source === "NAVER_ETF_PAGE" ? "HTML_SERVER_RENDERED"
: (sector.source === "REPRESENTATIVE_STOCK_PROXY" ? "HTML_SERVER_RENDERED"
: (sector.source === "DEFAULT_TEMPLATE" ? "MANUAL_OR_TEMPLATE" : "UNKNOWN"));
const quality = sectorDataQuality_(coverage, flowRowsMin, staleCount, proxy.ok, Number.isFinite(smart5Norm), weightSum);
const routeUse = sectorUseMode_(quality);
let score = calcSectorScoreV2_(sectorRet20D, sectorRs20D, smart5Norm, smart20Norm, breadth5, tradeValueRatio, sector.proxyType, etfLiquidityScore);
@@ -1047,6 +1407,7 @@ function runSectorFlowV3() {
if (staleCount > 0) reasons.push(`Stale_Count=${staleCount}`);
if (!proxy.ok) reasons.push("Proxy_Price_FAIL");
if (!Number.isFinite(smart5Norm)) reasons.push("SmartMoney_Norm_MISSING");
if ((sector.source || "DEFAULT_TEMPLATE") === "DEFAULT_TEMPLATE") reasons.push("Universe_Source=DEFAULT_TEMPLATE");
if (sector.proxyType === "ETF" && etfNavRisk === "NAV_DATA_MISSING") reasons.push("ETF_NAV_DATA_MISSING");
if (sector.proxyType === "ETF" && etfLiquidityStatus !== "OK") reasons.push(`ETF_Liquidity=${etfLiquidityStatus}`);
if (sector.proxyType === "ETF" && etfExecutionUse !== "TRADE_OK") reasons.push(`ETF_Execution=${etfExecutionUse}`);
@@ -1056,6 +1417,8 @@ function runSectorFlowV3() {
proxyTicker: sector.proxyTicker,
proxyName: sector.proxyName,
proxyType: sector.proxyType || "대표주",
universeSource: sector.source || "DEFAULT_TEMPLATE",
transportMode: transportMode,
coverage,
sectorRet5D,
sectorRet20D,
@@ -1106,7 +1469,7 @@ function appendSectorFlowHistoryV2_(rows) {
const headers = [
"Snapshot_Date","Sector","Sector_Score","Sector_Rank","SmartMoney_5D_KRW","SmartMoney_20D_KRW",
"Flow_Breadth_5D","Alert_Level","Data_Quality","Decision_Use","ETF_Liquidity_Status","ETF_Execution_Use","Reason","Saved_At"
"Flow_Breadth_5D","Alert_Level","Data_Quality","Decision_Use","ETF_Liquidity_Status","ETF_Execution_Use","Transport_Mode","Reason","Saved_At"
];
const ss = getSpreadsheet_();
let sheet = ss.getSheetByName("sector_flow_history");
@@ -1119,22 +1482,25 @@ function appendSectorFlowHistoryV2_(rows) {
const hdr = data[1] ?? headers;
const dateIdx = hdr.indexOf("Snapshot_Date");
const sectorIdx = hdr.indexOf("Sector");
const existing = [];
const normalizeRow_ = (row) => {
const outRow = Array.isArray(row) ? row.slice(0, headers.length) : [];
while (outRow.length < headers.length) outRow.push("");
return outRow;
};
const byKey = {};
for (let i = 2; i < data.length; i++) {
const row = data[i];
const d = normalizeSheetDateString_(row[dateIdx]);
const s = String(row[sectorIdx] ?? "").trim();
if (!d || !s) continue;
byKey[`${d}|${s}`] = row;
existing.push(row);
byKey[`${d}|${s}`] = normalizeRow_(row);
}
const savedAt = Utilities.formatDate(new Date(), "Asia/Seoul", "yyyy-MM-dd HH:mm:ss");
for (const r of rows) {
byKey[`${r.asOfDate}|${r.sector}`] = [
byKey[`${r.asOfDate}|${r.sector}`] = normalizeRow_([
r.asOfDate, r.sector, r.score, r.rank, Math.round(r.smart5), Math.round(r.smart20),
roundNum(r.breadth5, 4), r.alert, r.quality, r.routeUse, r.etfLiquidityStatus, r.etfExecutionUse, r.reason, savedAt
];
roundNum(r.breadth5, 4), r.alert, r.quality, r.routeUse, r.etfLiquidityStatus, r.etfExecutionUse, r.transportMode || "", r.reason, savedAt
]);
}
const out = Object.values(byKey).sort((a, b) => {
const da = String(a[0]), db = String(b[0]);
@@ -1144,7 +1510,7 @@ function appendSectorFlowHistoryV2_(rows) {
sheet.clearContents();
sheet.getRange(1, 1).setValue(`updated: ${savedAt} KST`);
sheet.getRange(2, 1, 1, headers.length).setValues([headers]);
if (out.length) sheet.getRange(3, 1, out.length, headers.length).setValues(out);
if (out.length) sheet.getRange(3, 1, out.length, headers.length).setValues(out.map(normalizeRow_));
}
function normalizeSheetDateString_(value) {
@@ -1235,7 +1601,7 @@ function readW2LegacySectorFlow_() {
function writeLegacySectorFlowFromStage2_(stage2Rows) {
const headers = [
"Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Coverage_Weight",
"Sector","Proxy_Ticker","Proxy_Name","Proxy_Type","Universe_Source","Coverage_Weight",
"Sector_Ret5D","Sector_Ret10D","Sector_Ret20D","Sector_RS_20D",
"SmartMoney_5D_KRW","SmartMoney_20D_KRW","Sector_AvgTradeValue_20D_KRW",
"SmartMoney_5D_Norm","SmartMoney_20D_Norm","Flow_Breadth_5D","Flow_Rows_Min","Stale_Count",
@@ -1277,7 +1643,7 @@ function writeLegacySectorFlowFromStage2_(stage2Rows) {
const frg20Alias = Number.isFinite(r.smart20) ? r.smart20 / 2 : "";
const inst20Alias = Number.isFinite(r.smart20) ? r.smart20 / 2 : "";
return [
r.sector, r.proxyTicker, r.proxyName, r.proxyType, r.coverage,
r.sector, r.proxyTicker, r.proxyName, r.proxyType, r.universeSource, r.coverage,
r.sectorRet5D, r.proxyRet10D, r.sectorRet20D, r.sectorRs20D,
r.smart5, r.smart20, r.avgTv20Krw,
r.smart5Norm, r.smart20Norm, r.breadth5, r.flowRowsMin, r.staleCount,
@@ -1798,6 +2164,15 @@ function run_all() {
}
},
{ name: "runSectorFlow", fn: runSectorFlow },
{
name: "runSectorUniverseRefreshAudit",
fn: function() {
const universe = readSectorUniverse_();
const audit = calcSectorUniverseRefreshAudit_(universe);
writeSectorUniverseRefreshAuditSheet_(audit);
Logger.log("[RUN_ALL] sector_universe_refresh_audit gate=" + audit.gate + " rows=" + (audit.rows || []).length);
}
},
{ name: "runDataFeed", fn: runDataFeed },
{ name: "runCoreSatelliteFlow_", fn: runCoreSatelliteFlow_ },
{ name: "runEventRisk", fn: runEventRisk },
@@ -1806,10 +1806,16 @@ function getCoreSatelliteUniverse() {
// 자동차
{ code:"005380", name:"현대차", sector:"자동차" },
{ code:"000270", name:"기아", sector:"자동차" },
// 밸류업/금융
{ code:"105560", name:"KB금융", sector:"금융/은행" },
{ code:"055550", name:"신한지주", sector:"금융/은행" },
{ code:"024110", name:"기업은행", sector:"금융/은행" },
// 은행 / 증권 / 지주회사
{ code:"105560", name:"KB금융", sector:"은행" },
{ code:"055550", name:"신한지주", sector:"은행" },
{ code:"024110", name:"기업은행", sector:"은행" },
{ code:"071050", name:"한국금융지주", sector:"증권" },
{ code:"006800", name:"미래에셋증권", sector:"증권" },
{ code:"005940", name:"NH투자증권", sector:"증권" },
{ code:"180640", name:"한진칼", sector:"지주회사" },
{ code:"267250", name:"HD현대", sector:"지주회사" },
{ code:"034730", name:"SK", sector:"지주회사" },
// 바이오
{ code:"207940", name:"삼성바이오로직스",sector:"바이오" },
{ code:"068270", name:"셀트리온", sector:"바이오" },
@@ -1820,7 +1826,7 @@ function getCoreSatelliteUniverse() {
{ code:"006400", name:"삼성SDI", sector:"2차전지" },
{ code:"003670", name:"포스코퓨처엠",sector:"2차전지" },
// 지주/기타
{ code:"028260", name:"삼성물산", sector:"지주" }
{ code:"028260", name:"삼성물산", sector:"지주회사" }
];
list = defaults.map(t => ({ ...t, addedDate: todayStr }));
+29 -6
View File
@@ -11,7 +11,7 @@
*
* 실행 시간 전략 (GAS 6분 제한):
* - data_feed: 보유 10종목만 → ~30초
* - sector_flow: 11섹터×3종목 → ~3분
* - sector_flow: 분리된 섹터×3종목 → ~3분
* - macro/unified: 단순 집계 → ~30초
* - core_satellite(100종목): 별도 트리거, 청크 분할 실행
*
@@ -27,13 +27,24 @@ const TICKERS_BASE = [
{ code: "000660", name: "SK하이닉스" },
{ code: "000270", name: "기아" },
{ code: "091160", name: "KODEX 반도체" },
{ code: "463250", name: "TIGER K방산&우주" },
{ code: "064350", name: "현대로템" },
{ code: "012450", name: "한화에어로스페이스" },
{ code: "117700", name: "KODEX 건설" },
{ code: "028050", name: "삼성E&A" },
{ code: "454320", name: "HANARO CAPEX설비투자iSelect" },
{ code: "010120", name: "LS ELECTRIC" },
{ code: "0117V0", name: "TIGER AI전력기기" },
{ code: "491820", name: "HANARO 전력설비투자" },
{ code: "494670", name: "TIGER 조선TOP10" },
{ code: "471990", name: "KODEX AI반도체핵심장비" },
{ code: "434730", name: "HANARO 원자력iSelect" },
{ code: "0111J0", name: "HANARO 증권고배당TOP3플러스" },
{ code: "307520", name: "TIGER 지주회사" },
{ code: "0190C0", name: "RISE 현대차고정피지컬AI" },
{ code: "011070", name: "LG이노텍" },
{ code: "010620", name: "현대미포" },
{ code: "121600", name: "나노신소재" },
];
// TICKERS 우선순위: TICKERS_BASE → account_snapshot 보유종목 → watch_tickers_override 수동 추가.
@@ -132,9 +143,12 @@ const TICKER_SECTOR_MAP = {
"010120": "AI전력", "267260": "AI전력", "006260": "AI전력",
"012450": "방산", "079550": "방산", "047810": "방산", "064350": "방산",
"329180": "조선", "042660": "조선", "009540": "조선",
"028050": "건설/EPC","000720": "건설/EPC","006360": "건설/EPC",
"028050": "플랜트/EPC","000720": "건설","006360": "건설",
"005380": "자동차", "000270": "자동차", "012330": "자동차",
"105560": "금융/은행","055550": "금융/은행","086790": "금융/은행",
"105560": "은행","055550": "은행","086790": "은행","316140": "은행","024110": "은행",
"071050": "증권","006800": "증권","005940": "증권","016360": "증권","039490": "증권",
"180640": "지주회사","267250": "지주회사","034730": "지주회사","000150": "지주회사","005490": "지주회사",
"003550": "지주회사","006260": "지주회사","078930": "지주회사","001040": "지주회사","010060": "지주회사",
"373220": "2차전지","006400": "2차전지","051910": "2차전지",
"207940": "바이오", "068270": "바이오", "128940": "바이오",
"099440": "원전", "023450": "원전", "015760": "원전",
@@ -142,8 +156,12 @@ const TICKER_SECTOR_MAP = {
// ETF — 해당 섹터로 매핑
"091160": "반도체", "0117V0": "AI전력", "494670": "조선",
"471990": "반도체", // KODEX AI반도체핵심장비 (누락 추가)
"266410": "바이오", "091180": "자동차", "091170": "금융/은행",
"266410": "바이오", "091180": "자동차", "091170": "은행",
"0111J0": "증권", "307520": "지주회사",
"305720": "2차전지","139220": "소비재",
"463250": "방산", "434730": "원전", "454320": "플랜트/EPC",
"491820": "전력설비", "117700": "건설", "0190C0": "로보틱스",
"011070": "로보틱스", "010620": "로보틱스", "121600": "로보틱스",
};
// 섹터 → Tier 매핑 (C5 daily_leader_scan 점수 정밀화)
@@ -151,14 +169,19 @@ const TICKER_SECTOR_MAP = {
const SECTOR_TIER_MAP = {
"반도체": "Tier_1",
"AI전력": "Tier_1",
"전력설비": "Tier_1",
"방산": "Tier_1",
"조선": "Tier_1",
"자동차": "Tier_2",
"2차전지": "Tier_2",
"바이오": "Tier_2",
"원전": "Tier_2",
"건설/EPC": "Tier_3",
"금융/은행":"Tier_3",
"건설": "Tier_3",
"플랜트/EPC": "Tier_3",
"로보틱스": "Tier_2",
"은행":"Tier_3",
"증권":"Tier_3",
"지주회사":"Tier_3",
"소비재": "Tier_3",
};
+96
View File
@@ -174,6 +174,28 @@ def normalize_legacy_source_markers(sheet: str, records: list[dict[str, Any]]) -
source = record.get("Source")
if isinstance(source, str) and "sector_targets.json" in source:
record["Source"] = source.replace("sector_targets.json", "sector_universe")
source_url = str(record.get("Source_URL") or "").strip()
transport_mode = str(record.get("Transport_Mode") or "").strip()
if record.get("Source") in (None, "", "DEFAULT_TEMPLATE"):
if "finance.naver.com/item/main.naver?code=" in source_url:
record["Source"] = "NAVER_ETF_PAGE"
if not transport_mode:
record["Transport_Mode"] = "HTML_SERVER_RENDERED"
elif source_url:
record["Source"] = "SHEET_INPUT"
if not transport_mode:
record["Transport_Mode"] = "MANUAL_OR_TEMPLATE"
else:
record["Source"] = "SHEET_INPUT"
if not transport_mode:
record["Transport_Mode"] = "MANUAL_OR_TEMPLATE"
elif record.get("Source") == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" and not transport_mode:
record["Transport_Mode"] = "LAYOUT_CHANGED"
elif record.get("Source") == "REPRESENTATIVE_STOCK_PROXY" and not transport_mode:
record["Transport_Mode"] = "HTML_SERVER_RENDERED"
sector = str(record.get("Sector") or "").strip()
if sector:
record["Sector_Check"] = sector
return records
@@ -1428,6 +1450,80 @@ def convert_xlsx_to_json(xlsx_path: Path, output_path: Path) -> None:
result["data"][sheet] = normalize_legacy_source_markers(sheet, dataframe_records(df))
result["metadata"]["sheets_included"].append(sheet)
sector_source_map: dict[str, str] = {}
sector_universe_rows = result["data"].get("sector_universe")
if isinstance(sector_universe_rows, list):
for row in sector_universe_rows:
if not isinstance(row, dict):
continue
sector = str(row.get("Sector") or "").strip()
if not sector:
continue
source = str(row.get("Source") or "").strip() or "SHEET_INPUT"
sector_source_map.setdefault(sector, source)
sector_flow_rows = result["data"].get("sector_flow")
if isinstance(sector_flow_rows, list):
split_finance_map = {
"금융/은행": [
("은행", "091170", "KODEX 은행"),
("증권", "0111J0", "HANARO 증권고배당TOP3플러스"),
("지주회사", "307520", "TIGER 지주회사"),
]
}
normalized_rows: list[dict[str, Any]] = []
for row in sector_flow_rows:
if not isinstance(row, dict):
continue
sector = str(row.get("Sector") or "").strip()
if not sector:
continue
source = str(row.get("Universe_Source") or "").strip() or sector_source_map.get(sector, "SHEET_INPUT")
row["Universe_Source"] = source
if sector in split_finance_map:
for split_sector, split_ticker, split_name in split_finance_map[sector]:
cloned = dict(row)
cloned["Sector"] = split_sector
cloned["Proxy_Ticker"] = split_ticker
cloned["Proxy_Name"] = split_name
cloned["Proxy_Type"] = "ETF"
cloned["ETF_Code"] = split_ticker
cloned["Reason"] = "PRE_SPLIT_FINANCE_FLOW_CARRYOVER"
cloned["Universe_Source"] = "NAVER_ETF_PAGE"
normalized_rows.append(cloned)
else:
normalized_rows.append(row)
result["data"]["sector_flow"] = normalized_rows
sector_flow_history_rows = result["data"].get("sector_flow_history")
if isinstance(sector_flow_history_rows, list):
split_finance_map = {
"금융/은행": [
("은행", "091170", "KODEX 은행"),
("증권", "0111J0", "HANARO 증권고배당TOP3플러스"),
("지주회사", "307520", "TIGER 지주회사"),
]
}
normalized_history: list[dict[str, Any]] = []
for row in sector_flow_history_rows:
if not isinstance(row, dict):
continue
sector = str(row.get("Sector") or "").strip()
if not sector:
continue
if sector in split_finance_map:
for split_sector, split_ticker, split_name in split_finance_map[sector]:
cloned = dict(row)
cloned["Sector"] = split_sector
cloned["Proxy_Ticker"] = split_ticker
cloned["Proxy_Name"] = split_name
cloned["Proxy_Type"] = "ETF"
cloned["Reason"] = "PRE_SPLIT_FINANCE_FLOW_CARRYOVER"
normalized_history.append(cloned)
else:
normalized_history.append(row)
result["data"]["sector_flow_history"] = normalized_history
# harness_context 시트가 없으면 메타에 경고 기록
if "_harness_context" not in result["data"]:
result["metadata"]["harness_context_missing"] = (
+48 -13
View File
@@ -13,6 +13,29 @@ ETF_NAME_HINTS = (
"SOL", "TIMEFOLIO", "WOORI", "PLUS", "NPLUS", "TREX", "FOCUS", "KIWOOM",
)
ROBOTICS_FALLBACK_PROXY = {
"Sector": "로보틱스",
"Proxy_Ticker": "0190C0",
"Proxy_Name": "RISE 현대차고정피지컬AI",
"Proxy_Type": "ETF",
"Sector_Rank": 12,
"SmartMoney_5D_KRW": 0.0,
"Sector_Ret20D": 0.0,
}
ROBOTICS_FALLBACK_UNIVERSE = [
{"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "005380", "Constituent_Name": "현대차", "Weight": 0.2402, "Is_ETF": False},
{"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "012330", "Constituent_Name": "현대모비스", "Weight": 0.1588, "Is_ETF": False},
{"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "011070", "Constituent_Name": "LG이노텍", "Weight": 0.1450, "Is_ETF": False},
{"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "000270", "Constituent_Name": "기아", "Weight": 0.1234, "Is_ETF": False},
{"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "307950", "Constituent_Name": "현대오토에버", "Weight": 0.0899, "Is_ETF": False},
{"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "277810", "Constituent_Name": "레인보우로보틱스", "Weight": 0.0673, "Is_ETF": False},
{"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "064400", "Constituent_Name": "LG씨엔에스", "Weight": 0.0519, "Is_ETF": False},
{"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "454910", "Constituent_Name": "두산로보틱스", "Weight": 0.0367, "Is_ETF": False},
{"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "108490", "Constituent_Name": "로보티즈", "Weight": 0.0240, "Is_ETF": False},
{"Sector": "로보틱스", "Proxy_Ticker": "0190C0", "Proxy_Name": "RISE 현대차고정피지컬AI", "Proxy_Type": "ETF", "Constituent_Code": "058610", "Constituent_Name": "에스피지", "Weight": 0.0173, "Is_ETF": False},
]
def _parse_jsonish(value: Any) -> Any:
if isinstance(value, (dict, list)):
@@ -174,6 +197,8 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]:
continue
if _txt(row.get("Proxy_Type")).upper() == "ETF":
etf_sectors[sector] = row
if "로보틱스" not in etf_sectors:
etf_sectors["로보틱스"] = ROBOTICS_FALLBACK_PROXY
sector_candidates: dict[str, list[dict[str, Any]]] = defaultdict(list)
core_by_ticker: dict[str, dict[str, Any]] = {}
@@ -201,9 +226,12 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]:
if _txt(row.get("Status"), "OK").upper() not in {"OK", "ACTIVE", "LIVE"}:
continue
universe_candidates[sector].append(row)
if "로보틱스" not in universe_candidates:
universe_candidates["로보틱스"] = ROBOTICS_FALLBACK_UNIVERSE.copy()
rows: list[dict[str, Any]] = []
for sector, proxy in sorted(etf_sectors.items(), key=lambda item: (_num(item[1].get("Sector_Rank"), 999), -abs(_num(item[1].get("SmartMoney_5D_KRW"), 0.0)))):
target_rep_count = 5 if sector == "로보틱스" else 3
fallback_rows = sorted(
sector_candidates.get(sector, []),
key=lambda r: (
@@ -213,31 +241,36 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]:
-_num(r.get("Ret10D"), 0.0),
),
)
# ETF 대표주는 구성비 내림차순을 1차 기준으로 고정한다.
# live score는 동일 비중/동일 구성일 때만 보조 판단으로 사용한다.
universe_rows = sorted(
universe_candidates.get(sector, []),
key=lambda r: _constituent_priority_score(
r,
core_by_ticker.get(_txt(r.get("Constituent_Code")))
or next((x for x in fallback_rows if _txt(x.get("Ticker")) == _txt(r.get("Constituent_Code"))), None),
key=lambda r: (
-_num(r.get("Weight"), 0.0),
_constituent_priority_score(
r,
core_by_ticker.get(_txt(r.get("Constituent_Code")))
or next((x for x in fallback_rows if _txt(x.get("Ticker")) == _txt(r.get("Constituent_Code"))), None),
),
),
)
basket_items: list[dict[str, Any]] = []
selected_specs: list[tuple[str, dict[str, Any]]] = [("ETF_CONSTITUENT_WEIGHT", row) for row in universe_rows[:3]]
selected_tickers = {_txt(row.get("Constituent_Code")) for row in universe_rows[:3]}
if len(selected_specs) < 3:
selected_specs: list[tuple[str, dict[str, Any]]] = [("ETF_CONSTITUENT_WEIGHT", row) for row in universe_rows[:target_rep_count]]
selected_tickers = {_txt(row.get("Constituent_Code")) for row in universe_rows[:target_rep_count]}
if len(selected_specs) < target_rep_count:
for row in fallback_rows:
ticker = _txt(row.get("Ticker"))
if not ticker or ticker in selected_tickers:
continue
selected_specs.append(("SECTOR_LIQUIDITY_FALLBACK", row))
selected_tickers.add(ticker)
if len(selected_specs) >= 3:
if len(selected_specs) >= target_rep_count:
break
if not selected_specs:
selected_specs = [("SECTOR_LIQUIDITY_FALLBACK", row) for row in fallback_rows[:3]]
selected_specs = [("SECTOR_LIQUIDITY_FALLBACK", row) for row in fallback_rows[:target_rep_count]]
rep_source = "ETF_CONSTITUENT_WEIGHT" if universe_rows else "SECTOR_LIQUIDITY_FALLBACK"
rep_basis_detail = "ETF_WEIGHT_PRIMARY"
if universe_rows and len(universe_rows) < 3 and len(selected_specs) >= 3:
if universe_rows and len(universe_rows) < target_rep_count and len(selected_specs) >= target_rep_count:
rep_basis_detail = "ETF_WEIGHT_PRIMARY_PLUS_SECTOR_TOPUP"
if not universe_rows:
rep_basis_detail = "SECTOR_LIQUIDITY_FALLBACK"
@@ -283,7 +316,7 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]:
_txt(spec.get("Constituent_Code")),
_txt(spec.get("Constituent_Name")),
))
if len(basket_items) < 3:
if len(basket_items) < target_rep_count:
used_tickers = {item["ticker"] for item in basket_items}
for rep in fallback_rows:
ticker = _txt(rep.get("Ticker"))
@@ -291,7 +324,7 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]:
continue
basket_items.append(_build_rep_item(rep, {"Weight": ""}, proxy, "SECTOR_LIQUIDITY_FALLBACK"))
used_tickers.add(ticker)
if len(basket_items) >= 3:
if len(basket_items) >= target_rep_count:
break
if not basket_items:
continue
@@ -313,6 +346,7 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]:
"etf_proxy_ticker": _txt(proxy.get("Proxy_Ticker")),
"etf_proxy_name": _txt(proxy.get("Proxy_Name")),
"etf_proxy_type": _txt(proxy.get("Proxy_Type")),
"universe_source": _txt(proxy.get("Universe_Source"), "DEFAULT_TEMPLATE"),
"sector_rank": proxy.get("Sector_Rank", ""),
"sector_score": proxy.get("Sector_Score", ""),
"sector_smart_money_5d_krw": proxy.get("SmartMoney_5D_KRW", ""),
@@ -348,7 +382,7 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]:
"basket_quality_state": basket_quality_state,
"representatives": basket_items,
"monitor_reason": (
"ETF 구성비중 상위 3종목이 같은 방향으로 정렬"
f"ETF 구성비중 상위 {target_rep_count}종목이 같은 방향으로 정렬"
if basket_state == "BUY_REVIEW"
else "대표 종목 바스켓 추세 확인 중" if basket_state == "TRACK"
else "유동성/추세 보수 모니터링"
@@ -390,6 +424,7 @@ def build_etf_representative_monitor(payload: dict[str, Any]) -> dict[str, Any]:
"sector_flow_rows": len(sector_flow),
"core_satellite_rows": len(core_satellite),
"sector_universe_rows": len(sector_universe),
"template_source_count": sum(1 for r in rows if str(r.get("universe_source") or "").upper() == "DEFAULT_TEMPLATE"),
},
}
return result
@@ -462,7 +462,8 @@ _TICKER_SECTOR_MAP = {
"010120": "AI전력", "267260": "AI전력",
"012450": "방산", "064350": "방산",
"329180": "조선", "494670": "조선",
"028050": "건설/EPC",
"117700": "건설", "028050": "플랜트/EPC", "454320": "플랜트/EPC",
"0190C0": "로보틱스",
"005380": "자동차", "000270": "자동차",
"091160": "반도체", "0117V0": "AI전력",
}
@@ -187,6 +187,7 @@ def build_sector_trend_analysis(payload: dict[str, Any]) -> dict[str, Any]:
proxy_ticker = _txt(row.get("Proxy_Ticker"))
proxy_name = _txt(row.get("Proxy_Name"))
proxy_type = _txt(row.get("Proxy_Type"), "UNKNOWN")
universe_source = _txt(row.get("Universe_Source"), "DEFAULT_TEMPLATE")
etf_code = _txt(row.get("ETF_Code"), proxy_ticker)
etf_execution_use = _txt(row.get("ETF_Execution_Use"))
etf_liquidity_status = _txt(row.get("ETF_Liquidity_Status"), "UNKNOWN")
@@ -224,6 +225,7 @@ def build_sector_trend_analysis(payload: dict[str, Any]) -> dict[str, Any]:
"proxy_ticker": proxy_ticker,
"proxy_name": proxy_name,
"proxy_type": proxy_type,
"universe_source": universe_source,
"etf_code": etf_code,
"etf_execution_use": etf_execution_use,
"etf_liquidity_score": etf_liquidity_score,
@@ -356,6 +358,7 @@ def build_sector_trend_analysis(payload: dict[str, Any]) -> dict[str, Any]:
"sector_rotation_momentum_rows": len(rotation_rows),
"sector_concentration_rows": len(concentration_rows),
"proxy_coverage_pct": round((etf_proxy_count / len(rows)) * 100.0, 2) if rows else 0.0,
"template_source_count": sum(1 for r in rows if str(r.get("universe_source") or "").upper() == "DEFAULT_TEMPLATE"),
},
}
return result
+296
View File
@@ -0,0 +1,296 @@
from __future__ import annotations
import datetime as dt
from typing import Any
DEFAULT_MAX_AGE_DAYS = 31
def _txt(value: Any, default: str = "") -> str:
if value is None:
return default
if isinstance(value, str):
return value.strip() or default
return str(value).strip() or default
def _as_float(value: Any) -> float | None:
try:
if value in (None, ""):
return None
if isinstance(value, str):
text = value.strip().replace("%", "").replace(",", "")
if not text:
return None
return float(text)
return float(value)
except Exception:
return None
def _parse_date(value: Any) -> dt.date | None:
if value in (None, ""):
return None
if isinstance(value, dt.date):
return value
text = _txt(value)
if not text:
return None
for fmt in ("%Y-%m-%d", "%Y.%m.%d", "%Y/%m/%d"):
try:
return dt.datetime.strptime(text[:10], fmt).date()
except Exception:
pass
try:
return dt.date.fromisoformat(text[:10])
except Exception:
return None
def _age_days(value: Any, today: dt.date | None = None) -> int | None:
parsed = _parse_date(value)
if parsed is None:
return None
today = today or dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date()
return (today - parsed).days
def _extract_sector_rows(payload: dict[str, Any] | None) -> list[dict[str, Any]]:
if not isinstance(payload, dict):
return []
inner = payload.get("data")
if isinstance(inner, dict) and isinstance(inner.get("sector_universe"), list):
return [r for r in inner["sector_universe"] if isinstance(r, dict)]
if isinstance(payload.get("sector_universe"), list):
return [r for r in payload["sector_universe"] if isinstance(r, dict)]
return []
def build_sector_universe_refresh_audit(payload: dict[str, Any] | None) -> dict[str, Any]:
rows = _extract_sector_rows(payload)
today = dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date()
grouped: dict[str, list[dict[str, Any]]] = {}
for row in rows:
sector = _txt(row.get("Sector"))
if not sector:
continue
grouped.setdefault(sector, []).append(row)
detail_rows: list[dict[str, Any]] = []
source_kind_counts = {
"NAVER_ETF_PAGE": 0,
"NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED": 0,
"NAVER_ETF_PAGE_FAIL": 0,
"REPRESENTATIVE_STOCK_PROXY": 0,
"SHEET_INPUT": 0,
"DEFAULT_TEMPLATE": 0,
"OTHER": 0,
}
transport_mode_counts = {
"HTML_SERVER_RENDERED": 0,
"MANUAL_OR_TEMPLATE": 0,
"LAYOUT_CHANGED": 0,
"UNKNOWN": 0,
}
state_counts = {"CURRENT": 0, "DUE": 0, "OVERDUE": 0, "MISSING": 0, "TEMPLATE": 0, "INVALID": 0}
stale_sector_count = 0
layout_changed_count = 0
missing_source_url_count = 0
sheet_input_count = 0
template_count = 0
newest_asof: dt.date | None = None
oldest_asof: dt.date | None = None
for sector, sector_rows in grouped.items():
source_values = {_txt(r.get("Source"), "SHEET_INPUT") or "SHEET_INPUT" for r in sector_rows}
if "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" in source_values:
source_kind = "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED"
elif "NAVER_ETF_PAGE_FAIL" in source_values:
source_kind = "NAVER_ETF_PAGE_FAIL"
elif "NAVER_ETF_PAGE" in source_values:
source_kind = "NAVER_ETF_PAGE"
elif "REPRESENTATIVE_STOCK_PROXY" in source_values:
source_kind = "REPRESENTATIVE_STOCK_PROXY"
elif "DEFAULT_TEMPLATE" in source_values:
source_kind = "DEFAULT_TEMPLATE"
elif "SHEET_INPUT" in source_values:
source_kind = "SHEET_INPUT"
else:
source_kind = "OTHER"
source_kind_counts[source_kind if source_kind in source_kind_counts else "OTHER"] += 1
source_urls = [_txt(r.get("Source_URL")) for r in sector_rows if _txt(r.get("Source_URL"))]
source_url = source_urls[0] if source_urls else ""
asof_candidates = [_parse_date(r.get("Source_AsOf")) for r in sector_rows]
asof_dates = [d for d in asof_candidates if d is not None]
source_asof = max(asof_dates) if asof_dates else None
if source_asof is not None:
newest_asof = source_asof if newest_asof is None else max(newest_asof, source_asof)
oldest_asof = source_asof if oldest_asof is None else min(oldest_asof, source_asof)
age_days = _age_days(source_asof, today) if source_asof else None
constituent_count = len(sector_rows)
etf_count = sum(1 for r in sector_rows if str(r.get("Is_ETF") or "").strip().upper() in {"Y", "YES", "TRUE", "1"})
stock_count = constituent_count - etf_count
weight_sum = sum(_as_float(r.get("Weight")) or 0 for r in sector_rows)
status = "INVALID"
reason_parts: list[str] = []
transport_mode = "UNKNOWN"
if source_kind == "DEFAULT_TEMPLATE":
status = "TEMPLATE"
reason_parts.append("DEFAULT_TEMPLATE")
template_count += 1
transport_mode = "MANUAL_OR_TEMPLATE"
elif source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED":
status = "LAYOUT_CHANGED"
transport_mode = "LAYOUT_CHANGED"
reason_parts.append("LAYOUT_CHANGED")
layout_changed_count += 1
if not source_url:
missing_source_url_count += 1
reason_parts.append("Source_URL_MISSING")
if age_days is None:
reason_parts.append("Source_AsOf_MISSING")
else:
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
elif source_kind == "NAVER_ETF_PAGE_FAIL":
status = "INVALID"
transport_mode = "UNKNOWN"
reason_parts.append("NAVER_ETF_PAGE_FAIL")
if not source_url:
missing_source_url_count += 1
elif source_kind == "REPRESENTATIVE_STOCK_PROXY":
transport_mode = "HTML_SERVER_RENDERED"
if not source_url:
status = "MISSING"
missing_source_url_count += 1
reason_parts.append("Source_URL_MISSING")
elif age_days is None:
status = "MISSING"
reason_parts.append("Source_AsOf_MISSING")
elif age_days <= DEFAULT_MAX_AGE_DAYS:
status = "CURRENT"
elif age_days <= 45:
status = "DUE"
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
else:
status = "OVERDUE"
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
elif source_kind == "SHEET_INPUT":
sheet_input_count += 1
transport_mode = "MANUAL_OR_TEMPLATE"
if not source_url:
status = "MISSING"
reason_parts.append("Source_URL_MISSING")
missing_source_url_count += 1
elif age_days is None:
status = "MISSING"
reason_parts.append("Source_AsOf_MISSING")
elif age_days <= DEFAULT_MAX_AGE_DAYS:
status = "CURRENT"
elif age_days <= 45:
status = "DUE"
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
else:
status = "OVERDUE"
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
elif source_kind == "NAVER_ETF_PAGE":
transport_mode = "HTML_SERVER_RENDERED"
if not source_url:
status = "MISSING"
reason_parts.append("Source_URL_MISSING")
missing_source_url_count += 1
elif age_days is None:
status = "MISSING"
reason_parts.append("Source_AsOf_MISSING")
elif age_days <= DEFAULT_MAX_AGE_DAYS:
status = "CURRENT"
elif age_days <= 45:
status = "DUE"
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
else:
status = "OVERDUE"
stale_sector_count += 1
reason_parts.append(f"AgeDays={age_days}")
else:
if not source_url:
missing_source_url_count += 1
status = "INVALID"
reason_parts.append("SOURCE_KIND_UNKNOWN")
transport_mode = "UNKNOWN"
if source_kind == "NAVER_ETF_PAGE" and not source_url:
reason_parts.append("NAVER_URL_MISSING")
if not source_url:
reason_parts.append("Source_URL_MISSING")
if age_days is not None and age_days < 0:
reason_parts.append("FUTURE_DATE")
transport_mode_counts[transport_mode] = transport_mode_counts.get(transport_mode, 0) + 1
refresh_reason = ";".join(reason_parts) if reason_parts else "OK"
detail_rows.append({
"sector": sector,
"proxy_ticker": _txt(sector_rows[0].get("Proxy_Ticker")),
"proxy_name": _txt(sector_rows[0].get("Proxy_Name")),
"proxy_type": _txt(sector_rows[0].get("Proxy_Type")),
"source_kind": source_kind,
"transport_mode": transport_mode,
"source_url": source_url,
"source_asof": source_asof.isoformat() if source_asof else "",
"age_days": age_days if age_days is not None else "",
"constituent_count": constituent_count,
"stock_count": stock_count,
"etf_count": etf_count,
"weight_sum": round(weight_sum, 4),
"status": status,
"refresh_reason": refresh_reason,
})
detail_rows.sort(key=lambda r: (r.get("status") != "CURRENT", r.get("status"), r.get("sector")))
summary = {
"sector_count": len(grouped),
"current_count": sum(1 for r in detail_rows if r.get("status") == "CURRENT"),
"due_count": sum(1 for r in detail_rows if r.get("status") == "DUE"),
"overdue_count": sum(1 for r in detail_rows if r.get("status") == "OVERDUE"),
"missing_count": sum(1 for r in detail_rows if r.get("status") == "MISSING"),
"template_count": template_count,
"sheet_input_count": sheet_input_count,
"naver_source_count": sum(1 for r in detail_rows if r.get("source_kind") == "NAVER_ETF_PAGE"),
"missing_source_url_count": missing_source_url_count,
"stale_sector_count": stale_sector_count,
"layout_changed_count": layout_changed_count,
"oldest_source_asof": oldest_asof.isoformat() if oldest_asof else "",
"newest_source_asof": newest_asof.isoformat() if newest_asof else "",
"source_kind_counts": source_kind_counts,
"transport_mode_counts": transport_mode_counts,
"ajax_mode": "NO",
"transport_model": "HTML_SERVER_RENDERED",
}
gate = "PASS"
if template_count > 0 or missing_source_url_count > 0 or stale_sector_count > 0 or layout_changed_count > 0:
gate = "FAIL"
elif sheet_input_count > 0:
gate = "WARN"
return {
"formula_id": "sector_universe_refresh_audit_v1",
"gate": gate,
"max_age_days": DEFAULT_MAX_AGE_DAYS,
"summary": summary,
"rows": detail_rows,
"source": {
"sector_rows": len(rows),
"grouped_sectors": len(grouped),
},
}
+31 -2
View File
@@ -3,6 +3,7 @@ import os
import requests
import time
import subprocess
import argparse
from pathlib import Path
ROOT = Path(__file__).resolve().parent.parent
@@ -10,6 +11,7 @@ CLASPRC_PATH = ROOT / ".clasprc.json"
CLASP_PATH = ROOT / ".clasp.json"
SPREADSHEET_ID = "1e1TNlLfnT69nvw-I1wU_oBHmEtI2pfbld3e0fFmtrZM"
OUTPUT_XLSX = ROOT / "GatherTradingData.xlsx"
LOCAL_OUTPUT_XLSX = ROOT / "outputs" / "sector_insights_enhanced" / "GatherTradingData_sector_insights.xlsx"
def get_tokens():
if not CLASPRC_PATH.exists():
@@ -75,20 +77,46 @@ def download_spreadsheet(spreadsheet_id, access_token, output_path):
print(f"Successfully downloaded to {output_path}")
return True
def validate_monthly_sector_refresh(xlsx_path: Path) -> bool:
cmd = [
"python",
"tools/validate_sector_universe_monthly_refresh_v1.py",
"--xlsx",
str(xlsx_path),
]
print(f"Validating monthly sector refresh: {xlsx_path} ...")
res = subprocess.run(cmd, cwd=str(ROOT))
if res.returncode == 0:
print("Monthly sector refresh validation passed.")
return True
print("Monthly sector refresh validation failed.")
return False
def main():
parser = argparse.ArgumentParser()
parser.add_argument("--function", default="runDataFeed", help="Primary GAS function to execute before download")
parser.add_argument("--fallback-function", default="run_all", help="Fallback GAS function to execute if primary fails")
args = parser.parse_args()
try:
tokens = get_tokens()
script_id = get_script_id()
access_token = refresh_access_token(tokens)
# Step 1: Execute GAS run_all
if run_gas_function(script_id, access_token, "run_all"):
# Step 1: Execute GAS runDataFeed first, then fallback to run_all if needed.
primary_ok = run_gas_function(script_id, access_token, args.function)
if not primary_ok and args.fallback_function and args.fallback_function != args.function:
print(f"Primary function {args.function} failed; trying fallback {args.fallback_function} ...")
primary_ok = run_gas_function(script_id, access_token, args.fallback_function)
if primary_ok:
print("Waiting a bit for GAS processes to finalize (optional)...")
time.sleep(5)
# Step 2: Download spreadsheet
if download_spreadsheet(SPREADSHEET_ID, access_token, OUTPUT_XLSX):
print("\nRoutine Part 1 & 2 complete.")
validate_monthly_sector_refresh(OUTPUT_XLSX)
print("Final step: npm run prepare-upload-zip")
else:
print("\nDownload failed. Please check Drive API scopes.")
@@ -98,6 +126,7 @@ def main():
fallback = subprocess.run(["python", "tools/update_workbook_sector_insights.py"], cwd=str(ROOT))
if fallback.returncode == 0:
print("Local sector-insight workbook updated.")
validate_monthly_sector_refresh(LOCAL_OUTPUT_XLSX)
else:
print("Local sector-insight workbook build failed.")
+60 -5
View File
@@ -17,6 +17,7 @@ if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from src.quant_engine.etf_representative_monitor import build_etf_representative_monitor
from src.quant_engine.sector_universe_refresh import build_sector_universe_refresh_audit
from src.quant_engine.sector_trend_analysis import build_sector_trend_analysis
SECTION_ORDER = [
@@ -25,6 +26,7 @@ SECTION_ORDER = [
"single_conclusion", "immediate_execution_playbook", "market_context_learning_note",
"portfolio_performance_summary",
"portfolio_sector_exposure_summary",
"sector_universe_refresh_audit_v1",
"sector_trend_analysis_v1", "etf_representative_monitor_v1", "investment_quality_headline", "operational_truth_score",
"execution_readiness_matrix", "pass_100_criteria",
"today_decision_summary_card", "routing_serving_trace",
@@ -59,6 +61,7 @@ SECTION_TITLES = {
"market_context_learning_note": "시장 컨텍스트 학습 노트",
"portfolio_performance_summary": "포트폴리오 성과 요약",
"portfolio_sector_exposure_summary": "포트폴리오 섹터 노출",
"sector_universe_refresh_audit_v1": "섹터 월간 갱신 감사",
"sector_trend_analysis_v1": "섹터 동향 분석",
"etf_representative_monitor_v1": "ETF 대표 종목 모니터",
"investment_quality_headline": "투자 품질 헤드라인",
@@ -670,7 +673,7 @@ def _sector_trend_analysis_v1(data_root: dict, hctx: dict, se: list) -> str:
rows_data = result.get("rows") if isinstance(result.get("rows"), list) else []
if rows_data:
md += "\n\n**섹터 상세 트렌드**\n\n" + _tbl(rows_data, [
"sector", "proxy_ticker", "proxy_name", "proxy_type", "etf_execution_use",
"sector", "proxy_ticker", "proxy_name", "proxy_type", "universe_source", "etf_execution_use",
"etf_liquidity_status", "etf_nav_risk", "proxy_confidence", "rank",
"rank_delta_w1", "rank_delta_w2", "sector_score", "score_delta",
"sector_ret5d", "sector_ret20d", "etf_return_5d", "etf_return_20d",
@@ -756,10 +759,55 @@ def _sector_trend_analysis_v1(data_root: dict, hctx: dict, se: list) -> str:
"- 섹터 수급은 ETF 프록시와 직접 스마트머니를 분리해서 보여주고, 둘이 어긋날 때 경고를 강화해야 합니다.\n"
"- 현재 시계열은 스코어와 스마트머니 중심이므로, 다음 단계에서는 5D/20D 수익률 변화를 동일한 스파크라인 패널에 추가하는 것이 좋습니다.\n"
"- 포트폴리오 자금 패널은 목표 달성율, 드로우다운, 베타, 알파 신뢰도를 함께 묶어 보여줘야 실제 투자 판단과 연결됩니다.\n"
"- 다음 세분화 후보는 `바이오/제약`과 `방산/우주`처럼 현재 섹터를 더 세밀하게 나누는 방향입니다.\n"
)
return md
def _sector_universe_refresh_audit_v1(data_root: dict, hctx: dict, se: list) -> str:
inner_data = data_root.get("data", {}) if isinstance(data_root.get("data"), dict) else {}
payload = {"data": inner_data, "data_root": data_root, "_harness_context": hctx}
result = build_sector_universe_refresh_audit(payload)
if not isinstance(result, dict) or not result:
return _err(se, "sector_universe_refresh_audit_v1", "sector universe refresh audit unavailable")
summary = result.get("summary") if isinstance(result.get("summary"), dict) else {}
rows = [
("갱신 게이트", result.get("gate", "")),
("섹터 수", summary.get("sector_count", "")),
("Naver 소스 섹터 수", summary.get("naver_source_count", "")),
("레이아웃 변경 수", summary.get("layout_changed_count", "")),
("SHEET_INPUT 섹터 수", summary.get("sheet_input_count", "")),
("DEFAULT_TEMPLATE 섹터 수", summary.get("template_count", "")),
("갱신 최신일", summary.get("newest_source_asof", "")),
("갱신 최저일", summary.get("oldest_source_asof", "")),
("CURRENT", summary.get("current_count", "")),
("DUE", summary.get("due_count", "")),
("OVERDUE", summary.get("overdue_count", "")),
("MISSING_URL", summary.get("missing_source_url_count", "")),
("STALE", summary.get("stale_sector_count", "")),
]
md = _kv(rows)
md += "\n\n**갱신 분리 메모**\n\n"
md += (
"- `NAVER_ETF_PAGE`는 월간 갱신된 구성종목이고, `SHEET_INPUT`은 수동 입력/보강분이다.\n"
"- `DEFAULT_TEMPLATE`는 자동 갱신이 아직 안 된 템플릿이므로, 월간 게이트에서 별도 실패로 본다.\n"
"- `Source_URL`와 `Source_AsOf`가 함께 있어야 provenance가 완성된다.\n"
"- 이 데이터는 AJAX/XHR 호출이 아니라 서버 렌더링 HTML 테이블이다. 따라서 잘못된 API 호출을 가정하지 말고, `main.naver`와 `coinfo.naver?target=cu_more`를 HTML 우선으로 읽는다.\n"
"- Naver 홈페이지 리뉴얼이나 DOM 변경이 생기면, JS는 보조 탐지용으로만 보고 실제값은 추정하지 않는다. 테이블이 없으면 실패를 그대로 남겨 추정값을 쓰지 않는다.\n"
"- `NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED`는 레이아웃 변경 전용 실패로 분리하고, 일반 실패(`NAVER_ETF_PAGE_FAIL`)와 구분해 읽는다.\n"
"- 금융 섹터는 `은행 / 증권 / 지주회사`로 분리해 `sector_universe`를 구성하고, `sector_flow`는 현재 JSON 브리지를 통해 carryover 분리본을 표시한다. GAS `runDataFeed`를 다시 돌리면 native 분리본으로 다시 물린다.\n"
"- 이 분리는 월 1회 갱신 하네스의 대상이며, 섹터별 대표 ETF 구성비 증빙은 `Source_URL`과 `Source_AsOf`가 유효해야만 인정한다.\n"
)
rows_data = result.get("rows") if isinstance(result.get("rows"), list) else []
if rows_data:
md += "\n\n**섹터 갱신 상세**\n\n" + _tbl(rows_data, [
"sector", "proxy_ticker", "proxy_name", "proxy_type", "source_kind", "transport_mode",
"source_url", "source_asof", "age_days", "constituent_count",
"stock_count", "etf_count", "weight_sum", "status", "refresh_reason",
], max_rows=20)
return md
def _etf_representative_monitor_v1(data_root: dict, hctx: dict, se: list) -> str:
inner_data = data_root.get("data", {}) if isinstance(data_root.get("data"), dict) else {}
payload = {"data": inner_data, "data_root": data_root, "_harness_context": hctx}
@@ -784,6 +832,11 @@ def _etf_representative_monitor_v1(data_root: dict, hctx: dict, se: list) -> str
])
md += "\n\n**ETF 대표 종목 추출 원칙**\n\n"
md += (
"- 섹터 프록시는 ETF 우선을 기본으로 두고, ETF가 실제로 있는 섹터는 ETF를 대표값으로 씁니다.\n"
"- 은행/증권/지주회사는 하나로 뭉치지 않고 각각 별도 섹터로 분리해 구성비 상위 종목을 증빙합니다.\n"
"- 방산/원전/건설/플랜트-EPC/로보틱스처럼 ETF 프록시가 있는 섹터는 ETF를 쓰고, 대표주 바스켓은 섹터별 기본 3종, 로보틱스는 5종으로 별도 모니터합니다.\n"
"- 로보틱스는 `RISE 현대차고정피지컬AI`를 섹터 프록시로 사용하고, 대표주는 해당 ETF의 실제 구성비 상위 5개 종목에서 뽑습니다.\n"
"- `Universe_Source=DEFAULT_TEMPLATE`인 행은 템플릿 경로이므로, 실제 시트 입력으로 바꿔 provenance를 완성해야 합니다.\n"
"- 대표 종목은 우선 ETF 구성비중이 가장 큰 종목을 선택하고, 그 종목이 현재 유동성/호가/추세 조건을 충족하는지로 계속 모니터링합니다.\n"
"- 구성비중 데이터가 비어 있거나 비정상일 때만 같은 섹터의 유동성 우선 후보로 대체합니다.\n"
"- BUY_REVIEW는 ETF 수급이 대표 종목의 추세와 같이 붙을 때만 후보로 승격합니다.\n"
@@ -796,7 +849,7 @@ def _etf_representative_monitor_v1(data_root: dict, hctx: dict, se: list) -> str
rep_states = []
rep_weights = []
if isinstance(reps, list):
for rep in reps[:3]:
for rep in reps[:5]:
if isinstance(rep, dict):
rep_names.append(f"{rep.get('name', '')}({rep.get('ticker', '')})")
rep_states.append(str(rep.get("monitor_state", "")))
@@ -805,6 +858,7 @@ def _etf_representative_monitor_v1(data_root: dict, hctx: dict, se: list) -> str
"sector": row.get("sector", ""),
"etf_proxy_ticker": row.get("etf_proxy_ticker", ""),
"etf_proxy_name": row.get("etf_proxy_name", ""),
"universe_source": row.get("universe_source", ""),
"representative_basket": " / ".join(rep_names),
"representative_count": row.get("representative_count", ""),
"basket_weights": ", ".join(rep_weights),
@@ -813,8 +867,8 @@ def _etf_representative_monitor_v1(data_root: dict, hctx: dict, se: list) -> str
"representative_basis_detail": row.get("representative_basis_detail", ""),
"basket_quality_state": row.get("basket_quality_state", ""),
"basket_coverage_pct": row.get("basket_coverage_pct", ""),
"selection_source": ", ".join(str(rep.get("selection_source", "")) for rep in reps[:3] if isinstance(rep, dict)),
"selection_score": ", ".join(str(rep.get("selection_score", "")) for rep in reps[:3] if isinstance(rep, dict)),
"selection_source": ", ".join(str(rep.get("selection_source", "")) for rep in reps[:5] if isinstance(rep, dict)),
"selection_score": ", ".join(str(rep.get("selection_score", "")) for rep in reps[:5] if isinstance(rep, dict)),
"basket_state": row.get("monitor_state", ""),
"basket_buy_review_count": row.get("basket_buy_review_count", ""),
"basket_caution_count": row.get("basket_caution_count", ""),
@@ -823,7 +877,7 @@ def _etf_representative_monitor_v1(data_root: dict, hctx: dict, se: list) -> str
})
md += "\n\n**대표 종목 모니터 테이블**\n\n"
md += _tbl(display_rows, [
"sector", "etf_proxy_ticker", "etf_proxy_name", "representative_basket",
"sector", "etf_proxy_ticker", "etf_proxy_name", "universe_source", "representative_basket",
"representative_count", "basket_weights", "basket_states", "representative_basis",
"representative_basis_detail", "basket_quality_state", "basket_coverage_pct",
"selection_source", "selection_score", "basket_state", "basket_buy_review_count",
@@ -1538,6 +1592,7 @@ def main() -> int:
"market_context_learning_note": lambda: _market_context_learning_note(hctx, se),
"portfolio_performance_summary": lambda: _portfolio_performance_summary(data_root, hctx, se),
"portfolio_sector_exposure_summary": lambda: _portfolio_sector_exposure_summary(data_root, hctx, se),
"sector_universe_refresh_audit_v1": lambda: _sector_universe_refresh_audit_v1(data_root, hctx, se),
"sector_trend_analysis_v1": lambda: _sector_trend_analysis_v1(data_root, hctx, se),
"investment_quality_headline": lambda: _investment_quality_headline(hctx, se),
"operational_truth_score": lambda: _operational_truth_score(hctx, se),
+616
View File
@@ -0,0 +1,616 @@
from __future__ import annotations
import argparse
import datetime as dt
import json
import re
import shutil
import sys
from collections import OrderedDict
from pathlib import Path
from typing import Any
from urllib.parse import urljoin, urlparse, parse_qs
import requests
from bs4 import BeautifulSoup
from openpyxl import load_workbook
from openpyxl.styles import Alignment, Font, PatternFill
from openpyxl.utils import get_column_letter
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from src.quant_engine.sector_universe_refresh import build_sector_universe_refresh_audit
DEFAULT_INPUT_XLSX = ROOT / "GatherTradingData.xlsx"
DEFAULT_OUTPUT_XLSX = ROOT / "outputs" / "sector_universe_refresh" / "GatherTradingData_sector_universe.xlsx"
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0 Safari/537.36"
NAVER_BASE = "https://finance.naver.com"
NAVER_ITEM_CODE_RE = re.compile(r"(?:https?:)?//finance\.naver\.com(?P<path>/item/[^\"'\s<>]+code=(?P<code>\d+)[^\"'\s<>]*)", re.I)
NAVER_REL_CODE_RE = re.compile(r"(?P<path>/item/[^\"'\s<>]+code=(?P<code>\d+)[^\"'\s<>]*)", re.I)
TITLE_FILL = PatternFill("solid", fgColor="1F4E78")
HEADER_FILL = PatternFill("solid", fgColor="1F4E78")
SUBHEADER_FILL = PatternFill("solid", fgColor="D9EAF7")
WHITE_FONT = Font(color="FFFFFF", bold=True)
BOLD_FONT = Font(bold=True)
NOTE_FONT = Font(italic=True, color="666666")
def _kst_now() -> dt.datetime:
return dt.datetime.now(dt.timezone(dt.timedelta(hours=9)))
def _kst_today() -> str:
return _kst_now().strftime("%Y-%m-%d")
def _clean_text(value: Any) -> str:
if value is None:
return ""
return str(value).strip()
def _normalize_code(value: Any) -> str:
text = _clean_text(value)
if not text:
return ""
text = text.replace(",", "")
if text.endswith(".0"):
text = text[:-2]
if text.isdigit():
return text.zfill(6)
if re.fullmatch(r"\d+\.\d+", text):
return str(int(float(text))).zfill(6)
return text
def _parse_weight(value: str) -> float | None:
text = _clean_text(value).replace("%", "").replace(",", "")
if not text:
return None
try:
return float(text)
except Exception:
return None
def _discover_naver_candidate_urls(soup: BeautifulSoup, proxy_ticker: str) -> list[str]:
candidates: list[str] = []
seen: set[str] = set()
def add(url: str) -> None:
url = _clean_text(url)
if not url or url in seen:
return
seen.add(url)
candidates.append(url)
expected_code = _normalize_code(proxy_ticker)
for script in soup.find_all("script"):
src = _clean_text(script.get("src"))
if src:
if expected_code and expected_code in src:
if src.startswith("//"):
add(f"https:{src}")
elif src.startswith("/"):
add(urljoin(NAVER_BASE, src))
else:
add(src)
continue
text = script.get_text(" ", strip=True) or ""
if not text:
continue
for regex in (NAVER_ITEM_CODE_RE, NAVER_REL_CODE_RE):
for match in regex.finditer(text):
code = _normalize_code(match.groupdict().get("code") or "")
if expected_code and code and code != expected_code:
continue
path = match.groupdict().get("path") or ""
if path:
add(urljoin(NAVER_BASE, path))
return candidates
def _parse_naver_etf_holdings(session: requests.Session, proxy_ticker: str, limit: int) -> dict[str, Any]:
url_candidates = [
f"{NAVER_BASE}/item/main.naver?code={proxy_ticker}",
f"{NAVER_BASE}/item/coinfo.naver?code={proxy_ticker}&target=cu_more",
]
last_message = ""
for url in url_candidates:
response = session.get(url, timeout=20)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
discovered = _discover_naver_candidate_urls(soup, proxy_ticker)
for candidate in discovered:
if candidate not in url_candidates:
url_candidates.append(candidate)
section = soup.select_one("div.section.etf_asset")
table = section.select_one("table.tb_type1_a") if section is not None else None
if table is None:
# layout changed or this endpoint does not expose the constituent table
last_message = "ETF constituent table missing; page structure may have changed"
continue
holdings: list[dict[str, Any]] = []
for tr in table.select("tbody tr"):
tds = tr.find_all("td")
if len(tds) < 3:
continue
name_link = tr.find("a", href=re.compile(r"code=\d+"))
if name_link is None:
continue
name = _clean_text(name_link.get_text(" ", strip=True))
href = _clean_text(name_link.get("href"))
m = re.search(r"code=(\d+)", href)
code = _normalize_code(m.group(1) if m else "")
if not code or not name:
continue
weight = _parse_weight(tds[2].get_text(" ", strip=True))
if weight is None:
continue
holdings.append({
"Constituent_Code": code,
"Constituent_Name": name,
"Weight": round(weight / 100.0, 6),
"Source": "NAVER_ETF_PAGE",
})
if len(holdings) >= limit:
break
if holdings:
return {
"source_url": url,
"source_kind": "NAVER_ETF_PAGE",
"holdings": holdings,
"discovered_urls": discovered,
"message": "",
}
last_message = "no holdings parsed"
return {
"source_url": url_candidates[0],
"source_kind": "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED",
"holdings": [],
"discovered_urls": [],
"message": last_message or "page structure changed; no expected values were inferred",
}
def _extract_sector_seed_rows(ws) -> list[dict[str, Any]]:
headers = [ws.cell(2, c).value for c in range(1, ws.max_column + 1)]
headers = [str(h).strip() if h is not None else "" for h in headers]
idx = {name: i for i, name in enumerate(headers)}
rows: list[dict[str, Any]] = []
for r in range(3, ws.max_row + 1):
row = {name: ws.cell(r, c + 1).value for c, name in enumerate(headers) if name}
if not any(v not in (None, "") for v in row.values()):
continue
rows.append(row)
return rows
def _group_seed_rows(rows: list[dict[str, Any]]) -> OrderedDict[str, dict[str, Any]]:
grouped: OrderedDict[str, dict[str, Any]] = OrderedDict()
for row in rows:
sector = _clean_text(row.get("Sector"))
if not sector:
continue
if sector not in grouped:
grouped[sector] = {
"meta": row,
"rows": [],
}
grouped[sector]["rows"].append(row)
return grouped
def _build_refreshed_rows(seed_rows: list[dict[str, Any]], limit: int) -> tuple[list[dict[str, Any]], dict[str, Any]]:
session = requests.Session()
session.headers.update({"User-Agent": DEFAULT_USER_AGENT})
grouped = _group_seed_rows(seed_rows)
refreshed: list[dict[str, Any]] = []
sector_stats: list[dict[str, Any]] = []
today = _kst_today()
for sector, bundle in grouped.items():
meta = bundle["meta"]
proxy_ticker = _normalize_code(meta.get("Proxy_Ticker"))
proxy_name = _clean_text(meta.get("Proxy_Name"))
proxy_type = _clean_text(meta.get("Proxy_Type")) or "ETF"
base_ticker = _normalize_code(meta.get("Base_Ticker")) or "069500"
if sector == "금융/은행":
split_specs = [
{"sector": "은행", "proxy_ticker": "091170", "proxy_name": "KODEX 은행", "proxy_type": "ETF"},
{"sector": "증권", "proxy_ticker": "0111J0", "proxy_name": "HANARO 증권고배당TOP3플러스", "proxy_type": "ETF"},
{"sector": "지주회사", "proxy_ticker": "307520", "proxy_name": "TIGER 지주회사", "proxy_type": "ETF"},
]
for spec in split_specs:
split_proxy_ticker = _normalize_code(spec["proxy_ticker"])
split_proxy_name = _clean_text(spec["proxy_name"])
split_proxy_type = _clean_text(spec["proxy_type"]) or "ETF"
split_source = "SHEET_INPUT"
split_source_url = ""
split_message = ""
split_source_kind = "SHEET_INPUT"
try:
scraped = _parse_naver_etf_holdings(session, split_proxy_ticker, limit)
split_source_url = scraped.get("source_url", "")
split_source_kind = scraped.get("source_kind", "NAVER_ETF_PAGE_FAIL")
holdings = scraped.get("holdings", [])
split_message = scraped.get("message", "")
if holdings:
split_source = "NAVER_ETF_PAGE"
weight_sum = round(sum(float(h["Weight"]) for h in holdings), 6)
for h in holdings:
refreshed.append({
"Sector": spec["sector"],
"Proxy_Ticker": split_proxy_ticker,
"Proxy_Name": split_proxy_name,
"Proxy_Type": split_proxy_type,
"Base_Ticker": base_ticker,
"Constituent_Code": h["Constituent_Code"],
"Constituent_Name": h["Constituent_Name"],
"Weight": h["Weight"],
"Is_ETF": "N",
"Enabled": "Y",
"Effective_Date": today,
"Source": split_source,
"Transport_Mode": "HTML_SERVER_RENDERED",
"Source_URL": split_source_url,
"Source_AsOf": today,
"Sector_Check": spec["sector"],
"Weight_Sum_All": weight_sum,
"Weight_Sum_Stocks_Only": weight_sum,
"ETF_Rows": 0,
"Status": "OK",
})
sector_stats.append({
"sector": spec["sector"],
"proxy_ticker": split_proxy_ticker,
"proxy_name": split_proxy_name,
"proxy_type": split_proxy_type,
"source_kind": split_source,
"transport_mode": "HTML_SERVER_RENDERED",
"source_url": split_source_url,
"source_asof": today,
"constituent_count": len(holdings),
"weight_sum": weight_sum,
"status": "CURRENT",
"refresh_reason": "NAVER_ETF_PAGE_SPLIT",
})
continue
except Exception as exc:
split_message = str(exc)
split_source_kind = "NAVER_ETF_PAGE_FAIL"
# 실패 시는 투명하게 남기고, 섹터 누락은 그대로 드러낸다.
sector_stats.append({
"sector": spec["sector"],
"proxy_ticker": split_proxy_ticker,
"proxy_name": split_proxy_name,
"proxy_type": split_proxy_type,
"source_kind": split_source_kind,
"transport_mode": "LAYOUT_CHANGED" if split_source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" else "UNKNOWN",
"source_url": split_source_url,
"source_asof": today,
"constituent_count": 0,
"weight_sum": 0.0,
"status": "FAIL" if "FAIL" in split_source_kind else "WARN",
"refresh_reason": split_message or "split_sector_fallback",
})
continue
source = "SHEET_INPUT"
source_url = ""
message = ""
source_kind = "SHEET_INPUT"
if proxy_type != "ETF":
source_kind = "REPRESENTATIVE_STOCK_PROXY"
source = source_kind
source_url = f"{NAVER_BASE}/item/main.naver?code={proxy_ticker}" if proxy_ticker else ""
fallback_rows = bundle["rows"][:limit] if bundle["rows"] else []
weight_sum = 0.0
for row in fallback_rows:
weight = row.get("Weight")
try:
weight_sum += float(weight) if weight not in (None, "") else 0.0
except Exception:
pass
refreshed.append({
"Sector": sector,
"Proxy_Ticker": proxy_ticker,
"Proxy_Name": proxy_name,
"Proxy_Type": proxy_type,
"Base_Ticker": base_ticker,
"Constituent_Code": _normalize_code(row.get("Constituent_Code")),
"Constituent_Name": _clean_text(row.get("Constituent_Name")),
"Weight": float(row.get("Weight") or 0),
"Is_ETF": _clean_text(row.get("Is_ETF")) or "N",
"Enabled": "Y",
"Effective_Date": today,
"Source": source_kind,
"Transport_Mode": "HTML_SERVER_RENDERED" if source_kind == "REPRESENTATIVE_STOCK_PROXY" else "MANUAL_OR_TEMPLATE",
"Source_URL": source_url,
"Source_AsOf": today,
"Sector_Check": sector,
"Weight_Sum_All": weight_sum,
"Weight_Sum_Stocks_Only": weight_sum,
"ETF_Rows": 0,
"Status": "CURRENT",
})
sector_stats.append({
"sector": sector,
"proxy_ticker": proxy_ticker,
"proxy_name": proxy_name,
"proxy_type": proxy_type,
"source_kind": source_kind,
"transport_mode": "HTML_SERVER_RENDERED" if source_kind == "REPRESENTATIVE_STOCK_PROXY" else "MANUAL_OR_TEMPLATE",
"source_url": source_url,
"source_asof": today,
"constituent_count": len(fallback_rows),
"weight_sum": round(weight_sum, 6),
"status": "CURRENT",
"refresh_reason": "REPRESENTATIVE_STOCK_PROXY",
})
continue
if proxy_ticker:
try:
scraped = _parse_naver_etf_holdings(session, proxy_ticker, limit)
source_url = scraped.get("source_url", "")
source_kind = scraped.get("source_kind", "NAVER_ETF_PAGE_FAIL")
holdings = scraped.get("holdings", [])
message = scraped.get("message", "")
if holdings:
source = "NAVER_ETF_PAGE"
weight_sum = round(sum(float(h["Weight"]) for h in holdings), 6)
for h in holdings:
refreshed.append({
"Sector": sector,
"Proxy_Ticker": proxy_ticker,
"Proxy_Name": proxy_name,
"Proxy_Type": proxy_type,
"Base_Ticker": base_ticker,
"Constituent_Code": h["Constituent_Code"],
"Constituent_Name": h["Constituent_Name"],
"Weight": h["Weight"],
"Is_ETF": "N",
"Enabled": "Y",
"Effective_Date": today,
"Source": source,
"Transport_Mode": "HTML_SERVER_RENDERED",
"Source_URL": source_url,
"Source_AsOf": today,
"Sector_Check": sector,
"Weight_Sum_All": weight_sum,
"Weight_Sum_Stocks_Only": weight_sum,
"ETF_Rows": 0,
"Status": "OK",
})
sector_stats.append({
"sector": sector,
"proxy_ticker": proxy_ticker,
"proxy_name": proxy_name,
"proxy_type": proxy_type,
"source_kind": source,
"transport_mode": "HTML_SERVER_RENDERED",
"source_url": source_url,
"source_asof": today,
"constituent_count": len(holdings),
"weight_sum": weight_sum,
"status": "CURRENT",
"refresh_reason": "NAVER_ETF_PAGE",
})
continue
except Exception as exc:
message = str(exc)
source_kind = "NAVER_ETF_PAGE_FAIL"
# fallback: preserve seed rows but expose the failure transparently
fallback_rows = bundle["rows"][:limit] if bundle["rows"] else []
weight_sum = 0.0
for row in fallback_rows:
weight = row.get("Weight")
try:
weight_sum += float(weight) if weight not in (None, "") else 0.0
except Exception:
pass
refreshed.append({
"Sector": sector,
"Proxy_Ticker": proxy_ticker,
"Proxy_Name": proxy_name,
"Proxy_Type": proxy_type,
"Base_Ticker": base_ticker,
"Constituent_Code": _normalize_code(row.get("Constituent_Code")),
"Constituent_Name": _clean_text(row.get("Constituent_Name")),
"Weight": float(row.get("Weight") or 0),
"Is_ETF": _clean_text(row.get("Is_ETF")) or "N",
"Enabled": "Y",
"Effective_Date": today,
"Source": source_kind,
"Transport_Mode": "LAYOUT_CHANGED" if source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" else "UNKNOWN",
"Source_URL": source_url,
"Source_AsOf": today,
"Sector_Check": sector,
"Weight_Sum_All": weight_sum,
"Weight_Sum_Stocks_Only": weight_sum,
"ETF_Rows": 0,
"Status": "FAIL" if source_kind.endswith("FAIL") else "WARN",
})
sector_stats.append({
"sector": sector,
"proxy_ticker": proxy_ticker,
"proxy_name": proxy_name,
"proxy_type": proxy_type,
"source_kind": source_kind,
"transport_mode": "LAYOUT_CHANGED" if source_kind == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" else "UNKNOWN",
"source_url": source_url,
"source_asof": today,
"constituent_count": len(fallback_rows),
"weight_sum": round(weight_sum, 6),
"status": "FAIL" if "FAIL" in source_kind else "WARN",
"refresh_reason": message or "seed_fallback",
})
audit_payload = build_sector_universe_refresh_audit({"data": {"sector_universe": refreshed}})
return refreshed, {
"sector_universe_refresh_audit": audit_payload,
"sector_stats": sector_stats,
}
def _style_title(ws, title: str, subtitle: str) -> None:
ws.merge_cells(start_row=1, start_column=1, end_row=1, end_column=max(8, ws.max_column or 8))
ws["A1"] = title
ws["A1"].font = WHITE_FONT
ws["A1"].fill = TITLE_FILL
ws["A1"].alignment = Alignment(horizontal="left")
ws.merge_cells(start_row=2, start_column=1, end_row=2, end_column=max(8, ws.max_column or 8))
ws["A2"] = subtitle
ws["A2"].font = NOTE_FONT
def _write_table(ws, start_row: int, start_col: int, headers: list[str], rows: list[list[Any]]) -> int:
for i, header in enumerate(headers, start=start_col):
cell = ws.cell(start_row, i)
cell.value = header
cell.font = WHITE_FONT
cell.fill = HEADER_FILL
cell.alignment = Alignment(horizontal="center")
for r_idx, row in enumerate(rows, start=start_row + 1):
for c_idx, value in enumerate(row, start=start_col):
ws.cell(r_idx, c_idx).value = value
return start_row + len(rows)
def _write_sector_universe_sheet(wb, rows: list[dict[str, Any]]) -> None:
if "sector_universe" in wb.sheetnames:
del wb["sector_universe"]
ws = wb.create_sheet("sector_universe")
headers = [
"Sector", "Proxy_Ticker", "Proxy_Name", "Proxy_Type", "Base_Ticker",
"Constituent_Code", "Constituent_Name", "Weight", "Is_ETF", "Enabled",
"Effective_Date", "Source", "Transport_Mode", "Source_URL", "Source_AsOf", "Sector_Check",
"Weight_Sum_All", "Weight_Sum_Stocks_Only", "ETF_Rows", "Status",
]
now = _kst_now().strftime("%Y-%m-%d %H:%M:%S")
ws["A1"] = f"updated: {now} KST"
ws["A1"].font = Font(bold=True)
_write_table(ws, 2, 1, headers, [[r.get(h, "") for h in headers] for r in rows])
for col_idx, header in enumerate(headers, start=1):
if header in {"Proxy_Ticker", "Base_Ticker", "Constituent_Code"}:
for r in range(3, ws.max_row + 1):
ws.cell(r, col_idx).number_format = "@"
if header in {"Weight", "Weight_Sum_All", "Weight_Sum_Stocks_Only"}:
for r in range(3, ws.max_row + 1):
ws.cell(r, col_idx).number_format = "0.0000"
width = 16
if header in {"Constituent_Name", "Proxy_Name"}:
width = 22
elif header in {"Source_URL"}:
width = 42
elif header in {"Status", "Source", "Sector_Check", "Proxy_Type", "Transport_Mode"}:
width = 16
ws.column_dimensions[get_column_letter(col_idx)].width = width
ws.freeze_panes = "A3"
ws.sheet_view.showGridLines = False
def _write_audit_sheet(wb, audit_payload: dict[str, Any]) -> None:
audit = audit_payload["sector_universe_refresh_audit"]
if "sector_universe_refresh_audit" in wb.sheetnames:
del wb["sector_universe_refresh_audit"]
ws = wb.create_sheet("sector_universe_refresh_audit")
ws.sheet_view.showGridLines = False
_style_title(
ws,
"섹터 월간 갱신 감사",
"Naver ETF 페이지 기반 월간 갱신 상태와 provenance 분리 현황을 점검한다.",
)
summary = audit.get("summary", {})
summary_rows = [
["formula_id", audit.get("formula_id", "")],
["gate", audit.get("gate", "")],
["sector_count", summary.get("sector_count", 0)],
["current_count", summary.get("current_count", 0)],
["due_count", summary.get("due_count", 0)],
["overdue_count", summary.get("overdue_count", 0)],
["missing_count", summary.get("missing_count", 0)],
["template_count", summary.get("template_count", 0)],
["sheet_input_count", summary.get("sheet_input_count", 0)],
["naver_source_count", summary.get("naver_source_count", 0)],
["missing_source_url_count", summary.get("missing_source_url_count", 0)],
["stale_sector_count", summary.get("stale_sector_count", 0)],
["oldest_source_asof", summary.get("oldest_source_asof", "")],
["newest_source_asof", summary.get("newest_source_asof", "")],
]
_write_table(ws, 4, 1, ["key", "value"], summary_rows)
rows = audit.get("rows", []) or []
if rows:
headers = [
"sector", "proxy_ticker", "proxy_name", "proxy_type", "source_kind",
"source_url", "source_asof", "age_days", "constituent_count",
"stock_count", "etf_count", "weight_sum", "status", "refresh_reason",
]
_write_table(ws, 4, 4, headers, [[r.get(h, "") for h in headers] for r in rows])
for idx, header in enumerate(headers, start=4):
width = 16
if header in {"sector", "proxy_name", "refresh_reason"}:
width = 20
elif header == "source_url":
width = 42
ws.column_dimensions[get_column_letter(idx)].width = width
ws.freeze_panes = "A5"
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--input", default=str(DEFAULT_INPUT_XLSX))
ap.add_argument("--output", default=str(DEFAULT_OUTPUT_XLSX))
ap.add_argument("--limit", type=int, default=10, help="Per-sector holdings limit from Naver ETF pages")
ap.add_argument("--apply", action="store_true", help="Overwrite the input workbook in place as well")
args = ap.parse_args()
input_path = Path(args.input)
output_path = Path(args.output)
if not input_path.exists():
raise FileNotFoundError(input_path)
wb = load_workbook(input_path)
if "sector_universe" not in wb.sheetnames:
raise RuntimeError("sector_universe sheet not found")
seed_ws = wb["sector_universe"]
seed_rows = _extract_sector_seed_rows(seed_ws)
refreshed_rows, audit_payload = _build_refreshed_rows(seed_rows, max(1, args.limit))
_write_sector_universe_sheet(wb, refreshed_rows)
_write_audit_sheet(wb, audit_payload)
output_path.parent.mkdir(parents=True, exist_ok=True)
wb.save(output_path)
if args.apply and input_path.resolve() != output_path.resolve():
shutil.copy2(output_path, input_path)
print(json.dumps({
"status": "OK",
"input": str(input_path),
"output": str(output_path),
"rows": len(refreshed_rows),
"sectors": len(audit_payload["sector_stats"]),
"current_count": audit_payload["sector_universe_refresh_audit"]["summary"]["current_count"],
"overdue_count": audit_payload["sector_universe_refresh_audit"]["summary"]["overdue_count"],
"template_count": audit_payload["sector_universe_refresh_audit"]["summary"]["template_count"],
}, ensure_ascii=False, indent=2))
return 0
if __name__ == "__main__":
sys.exit(main())
+80 -7
View File
@@ -9,8 +9,14 @@ from openpyxl.chart import BarChart, LineChart, Reference
from openpyxl.styles import Font, PatternFill, Alignment
from openpyxl.utils import get_column_letter
import sys
ROOT = Path(__file__).resolve().parent.parent
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from src.quant_engine.sector_universe_refresh import build_sector_universe_refresh_audit
INPUT_XLSX = ROOT / "GatherTradingData.xlsx"
OUTPUT_DIR = ROOT / "outputs" / "sector_insights_enhanced"
OUTPUT_XLSX = OUTPUT_DIR / "GatherTradingData_sector_insights.xlsx"
@@ -593,10 +599,12 @@ def build_sector_summary(wb, data: dict) -> None:
ws["A20"] = "Notes"
ws["A20"].fill = SUBHEADER_FILL
ws["A20"].font = BOLD_FONT
ws["A21"] = "섹터별 ETF 프록시와 스마트머니 방향이 다르면 매수 근거를 보수적으로 해석해야 합니다."
ws["A21"] = "섹터별 ETF 프록시를 기준으로 보고, 은행/증권/지주회사는 분리해서 구성비 상위 종목을 증빙해야 합니다. 대표주 모니터는 섹터 기본 3종, 로보틱스 5종 바스켓으로 함께 확인해야 합니다."
ws["A21"].alignment = Alignment(wrap_text=True)
ws["A22"] = "데이터 결측은 하네스 업데이트가 필요합니다."
ws["A22"] = "Universe_Source가 DEFAULT_TEMPLATE인 행은 템플릿이며, 실제 시트 입력으로 전환되어야 provenance가 완성됩니다."
ws["A22"].alignment = Alignment(wrap_text=True)
ws["A23"] = "다음 세분화 후보는 바이오/제약과 방산/우주처럼 현재 섹터를 더 세밀하게 나누는 방향입니다. 로보틱스는 RISE 현대차고정피지컬AI를 섹터 프록시로 사용하고, 대표주는 해당 ETF의 실제 구성비 상위 5개 종목에서 뽑습니다."
ws["A23"].alignment = Alignment(wrap_text=True)
chart = LineChart()
chart.title = "Average Sector Score / Breadth Trend"
@@ -622,11 +630,11 @@ def build_sector_analysis(wb, data: dict) -> None:
style_title(
ws,
"섹터 동향 분석",
"섹터별 ETF 프록시, 스마트머니 유입, 수익률, 유동성 방향을 함께 보는 상세 시트",
"섹터별 ETF 프록시, 대표주 모니터, 스마트머니 유입, 수익률, 유동성 방향을 함께 보는 상세 시트",
end_col=18,
)
headers = [
"sector", "proxy_ticker", "proxy_name", "proxy_type", "etf_code",
"sector", "proxy_ticker", "proxy_name", "proxy_type", "universe_source", "etf_code",
"etf_execution_use", "etf_liquidity_score", "etf_liquidity_status", "etf_nav_risk",
"proxy_confidence", "rank", "rank_delta_w1", "rank_delta_w2", "sector_score",
"score_delta", "sector_ret5d", "sector_ret20d", "etf_return_5d", "etf_return_20d",
@@ -661,7 +669,7 @@ def build_sector_analysis(wb, data: dict) -> None:
chart.x_axis.title = "20D Return"
chart.height = 8
chart.width = 14
data_ref = Reference(ws, min_col=17, min_row=4, max_row=4 + len(rows))
data_ref = Reference(ws, min_col=18, min_row=4, max_row=4 + len(rows))
cats = Reference(ws, min_col=1, min_row=5, max_row=4 + len(rows))
chart.add_data(data_ref, titles_from_data=True)
chart.set_categories(cats)
@@ -817,6 +825,67 @@ def build_sector_timeline(wb, data: dict, source_data: dict | None = None) -> No
ws.add_chart(money_chart, "L36")
def build_sector_universe_refresh_audit_sheet(wb, source_data: dict) -> None:
ws = wb.create_sheet("sector_universe_refresh_audit")
style_sheet(ws)
style_title(
ws,
"섹터 월간 갱신 감사",
"Naver ETF 페이지 기반 구성종목 갱신 상태와 provenance 분리 상태를 점검하는 감사 시트. AJAX/XHR 전제는 두지 않고 HTML 서버렌더링 테이블을 우선한다.",
end_col=16,
)
payload = {"data": source_data}
audit = build_sector_universe_refresh_audit(payload)
summary = audit.get("summary") or {}
items = [
("formula_id", audit.get("formula_id", "")),
("gate", audit.get("gate", "")),
("sector_count", summary.get("sector_count", 0)),
("current_count", summary.get("current_count", 0)),
("due_count", summary.get("due_count", 0)),
("overdue_count", summary.get("overdue_count", 0)),
("layout_changed_count", summary.get("layout_changed_count", 0)),
("missing_count", summary.get("missing_count", 0)),
("template_count", summary.get("template_count", 0)),
("sheet_input_count", summary.get("sheet_input_count", 0)),
("naver_source_count", summary.get("naver_source_count", 0)),
("missing_source_url_count", summary.get("missing_source_url_count", 0)),
("stale_sector_count", summary.get("stale_sector_count", 0)),
]
add_kpi_block(ws, 4, items)
ws["D4"] = "Refresh policy"
ws["D4"].fill = SUBHEADER_FILL
ws["D4"].font = BOLD_FONT
ws["D5"] = "NAVER_ETF_PAGE rows are the monthly refreshed source."
ws["D6"] = "SHEET_INPUT rows are manual/provisional and must stay separate."
ws["D7"] = "DEFAULT_TEMPLATE rows are a fail in the monthly gate."
ws["D8"] = "Source_URL and Source_AsOf are required for provenance."
ws["D9"] = "This is HTML-server-rendered, not AJAX. JS is only a fallback probe for candidate URLs."
ws["D10"] = "No guessed holdings are written when the page layout changes."
ws["D11"] = "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED is a separate layout-change failure state."
ws["D12"] = "Financial sectors are split as 은행 / 증권 / 지주회사 in sector_universe; sector_flow reflects carryover until GAS runDataFeed is rerun."
ws["D13"] = "This split is part of the monthly refresh harness; Source_URL and Source_AsOf must remain valid for provenance."
rows = audit.get("rows") or []
if rows:
headers = [
"sector", "proxy_ticker", "proxy_name", "proxy_type", "source_kind", "transport_mode",
"source_url", "source_asof", "age_days", "constituent_count",
"stock_count", "etf_count", "weight_sum", "status", "refresh_reason",
]
write_table(ws, 14, 1, headers, [[row.get(h, "") for h in headers] for row in rows])
for col, width in {
"A": 16, "B": 12, "C": 18, "D": 12, "E": 16, "F": 18, "G": 42, "H": 14,
"I": 10, "J": 14, "K": 12, "L": 12, "M": 12, "N": 12, "O": 24,
}.items():
ws.column_dimensions[col].width = width
ws.freeze_panes = "A5"
ws["A11"] = "Notes"
ws["A11"].fill = SUBHEADER_FILL
ws["A11"].font = BOLD_FONT
ws["A12"] = "홈페이지 리뉴얼로 표 구조가 바뀌면, 파서는 추정하지 않고 실패 상태를 남겨 월간 게이트에서 잡는다."
ws["A12"].alignment = Alignment(wrap_text=True)
def build_etf_summary(wb, data: dict) -> None:
ws = wb.create_sheet("etf_representative_summary")
style_sheet(ws)
@@ -847,6 +916,7 @@ def build_etf_summary(wb, data: dict) -> None:
ws["D6"] = "2) Missing slots filled with same-sector live candidates"
ws["D7"] = "3) Missing data stays explicit as DATA_MISSING"
ws["D8"] = "4) Minimum 3 names per sector basket"
ws["D9"] = "5) Universe_Source=DEFAULT_TEMPLATE rows are provisional until sheet-backed data exists."
ws["G4"] = "Top reps"
ws["G4"].fill = SUBHEADER_FILL
ws["G4"].font = BOLD_FONT
@@ -865,7 +935,7 @@ def build_etf_monitor(wb, data: dict) -> None:
end_col=18,
)
headers = [
"sector", "etf_proxy_ticker", "etf_proxy_name", "etf_proxy_type", "sector_rank",
"sector", "etf_proxy_ticker", "etf_proxy_name", "etf_proxy_type", "universe_source", "sector_rank",
"sector_score", "sector_smart_money_5d_krw", "sector_ret20d", "representative_count",
"representative_ticker", "representative_name", "representative_basis",
"representative_basis_detail", "constituent_weight", "basket_quality_state",
@@ -894,7 +964,7 @@ def build_etf_monitor(wb, data: dict) -> None:
chart.x_axis.title = "Coverage %"
chart.height = 8
chart.width = 14
data_ref = Reference(ws, min_col=16, min_row=4, max_row=4 + len(rows))
data_ref = Reference(ws, min_col=17, min_row=4, max_row=4 + len(rows))
cats = Reference(ws, min_col=1, min_row=5, max_row=4 + len(rows))
chart.add_data(data_ref, titles_from_data=True)
chart.set_categories(cats)
@@ -922,6 +992,7 @@ def main() -> None:
"performance_readiness_summary",
"operational_eval_queue_summary",
"portfolio_sector_exposure",
"sector_universe_refresh_audit",
"_portfolio_holdings_helper",
"sector_trend_summary",
"sector_trend_analysis",
@@ -936,6 +1007,7 @@ def main() -> None:
build_performance_readiness_summary(wb)
build_operational_eval_queue_summary(wb)
build_portfolio_sector_exposure(wb)
build_sector_universe_refresh_audit_sheet(wb, raw_source)
build_sector_timeline(wb, sector, raw_source)
build_sector_analysis(wb, sector)
build_sector_summary(wb, sector)
@@ -949,6 +1021,7 @@ def main() -> None:
"performance_readiness_summary",
"operational_eval_queue_summary",
"portfolio_sector_exposure",
"sector_universe_refresh_audit",
"sector_trend_summary",
"sector_trend_analysis",
"sector_trend_timeline",
@@ -21,6 +21,7 @@ REPORT_SECTION_ORDER = [
"single_conclusion", "immediate_execution_playbook", "market_context_learning_note",
"portfolio_performance_summary",
"portfolio_sector_exposure_summary",
"sector_universe_refresh_audit_v1",
"sector_trend_analysis_v1",
"etf_representative_monitor_v1",
"performance_readiness_summary",
@@ -0,0 +1,173 @@
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import datetime as dt
import json
import sys
from pathlib import Path
from typing import Any
from openpyxl import load_workbook
ROOT = Path(__file__).resolve().parents[1]
DEFAULT_XLSX = ROOT / "GatherTradingData.xlsx"
MAX_AGE_DAYS = 31
def _txt(value: Any, default: str = "") -> str:
if value is None:
return default
if isinstance(value, str):
return value.strip() or default
return str(value).strip() or default
def _parse_date(value: Any) -> dt.date | None:
text = _txt(value)
if not text:
return None
for fmt in ("%Y-%m-%d", "%Y.%m.%d", "%Y/%m/%d"):
try:
return dt.datetime.strptime(text[:10], fmt).date()
except Exception:
pass
try:
return dt.date.fromisoformat(text[:10])
except Exception:
return None
def _age_days(value: Any) -> int | None:
parsed = _parse_date(value)
if parsed is None:
return None
today = dt.datetime.now(dt.timezone(dt.timedelta(hours=9))).date()
return (today - parsed).days
def main() -> int:
ap = argparse.ArgumentParser()
ap.add_argument("--xlsx", default=str(DEFAULT_XLSX))
args = ap.parse_args()
xlsx = Path(args.xlsx)
if not xlsx.exists():
print(f"[오류] 워크북 없음: {xlsx}")
return 1
wb = load_workbook(xlsx, data_only=True)
if "sector_universe" not in wb.sheetnames:
print("[FAIL] sector_universe 시트가 없습니다.")
return 1
ws = wb["sector_universe"]
headers = [_txt(ws.cell(2, c).value) for c in range(1, ws.max_column + 1)]
idx = {name: i for i, name in enumerate(headers) if name}
required = ["Sector", "Proxy_Ticker", "Constituent_Code", "Weight", "Source", "Source_URL", "Source_AsOf"]
missing_headers = [h for h in required if h not in idx]
rows: list[dict[str, Any]] = []
for r in range(3, ws.max_row + 1):
row = {h: ws.cell(r, c + 1).value for c, h in enumerate(headers) if h}
if not any(v not in (None, "") for v in row.values()):
continue
rows.append(row)
sector_map: dict[str, list[dict[str, Any]]] = {}
for row in rows:
sector = _txt(row.get("Sector"))
if sector:
sector_map.setdefault(sector, []).append(row)
template_rows = 0
representative_rows = 0
sheet_input_rows = 0
naver_rows = 0
layout_changed_rows = 0
fail_rows = 0
missing_source_url = 0
stale_rows = 0
mixed_sector_count = 0
sector_status_rows: list[str] = []
for sector, sector_rows in sector_map.items():
source_kinds = {_txt(r.get("Source"), "SHEET_INPUT") or "SHEET_INPUT" for r in sector_rows}
if len(source_kinds) > 1:
mixed_sector_count += 1
sector_status_rows.append(f"{sector}:MIXED({','.join(sorted(source_kinds))})")
sector_template = any(src == "DEFAULT_TEMPLATE" for src in source_kinds)
sector_rep = any(src == "REPRESENTATIVE_STOCK_PROXY" for src in source_kinds)
sector_input = any(src == "SHEET_INPUT" for src in source_kinds)
sector_naver = any(src == "NAVER_ETF_PAGE" for src in source_kinds)
sector_layout_changed = any(src == "NAVER_ETF_PAGE_FAIL_LAYOUT_CHANGED" for src in source_kinds)
sector_fail = any("FAIL" in src for src in source_kinds)
if sector_template:
template_rows += len(sector_rows)
if sector_rep:
representative_rows += len(sector_rows)
if sector_input:
sheet_input_rows += len(sector_rows)
if sector_naver:
naver_rows += len(sector_rows)
if sector_layout_changed:
layout_changed_rows += len(sector_rows)
if sector_fail:
fail_rows += len(sector_rows)
source_urls = {_txt(r.get("Source_URL")) for r in sector_rows if _txt(r.get("Source_URL"))}
if not source_urls:
missing_source_url += len(sector_rows)
ages = [_age_days(r.get("Source_AsOf")) for r in sector_rows]
age_vals = [a for a in ages if a is not None]
if age_vals and max(age_vals) > MAX_AGE_DAYS:
stale_rows += sum(1 for a in age_vals if a is not None and a > MAX_AGE_DAYS)
sector_status_rows.append(f"{sector}:STALE(max={max(age_vals)})")
gate = "PASS"
if missing_headers:
gate = "FAIL"
elif template_rows > 0 or fail_rows > 0 or stale_rows > 0 or mixed_sector_count > 0:
gate = "FAIL"
elif sheet_input_rows > 0:
gate = "WARN"
print(f"[sector_universe_refresh] gate={gate}")
print(f" rows={len(rows)} sectors={len(sector_map)}")
print(f" naver_rows={naver_rows} representative_rows={representative_rows} layout_changed_rows={layout_changed_rows} sheet_input_rows={sheet_input_rows} template_rows={template_rows} fail_rows={fail_rows}")
print(f" missing_source_url={missing_source_url} stale_rows={stale_rows} mixed_sector_count={mixed_sector_count}")
if missing_headers:
print(f" missing_headers={missing_headers}")
if sector_status_rows:
print(" sector_flags=" + ", ".join(sector_status_rows[:20]))
result = {
"validator": "validate_sector_universe_monthly_refresh_v1",
"gate": gate,
"total_rows": len(rows),
"sector_count": len(sector_map),
"naver_rows": naver_rows,
"representative_rows": representative_rows,
"layout_changed_rows": layout_changed_rows,
"sheet_input_rows": sheet_input_rows,
"template_rows": template_rows,
"fail_rows": fail_rows,
"missing_source_url": missing_source_url,
"stale_rows": stale_rows,
"mixed_sector_count": mixed_sector_count,
"missing_headers": missing_headers,
"sector_flags": sector_status_rows,
"max_age_days": MAX_AGE_DAYS,
}
out = ROOT / "Temp" / "sector_universe_refresh_validation.json"
out.write_text(json.dumps(result, ensure_ascii=False, indent=2), encoding="utf-8")
print(f"OUTPUT: {out}")
return 0 if gate in {"PASS", "WARN"} else 1
if __name__ == "__main__":
sys.exit(main())