9ae701ff93
TaxBaik CI/CD / build-and-deploy (push) Failing after 3m5s
Today's incident: CI reported successful deploys while the real site
returned 502 (root) then 404 (/taxbaik/) to users. Root cause was three
compounding Nginx issues, none of which the previous CI checks could see
because they only ever curled 127.0.0.1:5001 directly, bypassing Nginx:
1. Two Nginx config files existed. sites-available/default (documented,
but NOT symlinked into sites-enabled/) was being edited repeatedly with
zero effect. The file actually loaded was
sites-available/taxbaik-domains.conf (-> sites-enabled/), undocumented.
2. That real file hardcoded the Green-Blue app port (5003) directly in
both `location /` and `location /taxbaik`, instead of the persistent
TaxBaik.Proxy on 5001. When the active port flipped to 5004, Nginx kept
pointing at the dead 5003 -> 502.
3. Fixing the port to 5001 with a trailing slash on proxy_pass triggered
Nginx URI rewriting, sending a double slash ("//") to the backend,
which 404'd. Confirmed via `curl http://backend//` -> 404.
Changes:
- deploy.yml: replace the old blind `grep sites-available/default` check
(checked the wrong, unloaded file) with a hard-failing check that (a)
resolves the actual file via sites-enabled/ symlinks, (b) fails the
deploy if either location block hardcodes 5003/5004 instead of 5001,
(c) fails if /taxbaik's proxy_pass carries a stray trailing slash.
- deploy.yml: add an external, post-deploy check that curls the real
public domain (www.taxbaik.com root, /taxbaik/, /taxbaik/admin/login)
through Cloudflare + Nginx, with retries — this is what would have
caught the whole incident on the very first broken deploy instead of
requiring live user reports.
- deploy_gb.sh: drop the stale comment implying Nginx needs updating
per-deploy; it never should, since Nginx always points at the
persistent 5001 proxy which reads taxbaik_port itself.
- CLAUDE.md: document the real config file, the 5001-only invariant, the
proxy_pass trailing-slash gotcha, and the Host-header/SNI trick for
testing domain-based server blocks locally; record the incident in the
CI troubleshooting harness section.
Co-Authored-By: Claude Sonnet 5 <noreply@anthropic.com>
334 lines
15 KiB
YAML
334 lines
15 KiB
YAML
name: TaxBaik CI/CD
|
|
|
|
on:
|
|
workflow_dispatch:
|
|
push:
|
|
branches:
|
|
- master
|
|
|
|
jobs:
|
|
build-and-deploy:
|
|
runs-on: ubuntu-latest
|
|
|
|
steps:
|
|
- name: Checkout code
|
|
uses: actions/checkout@v4
|
|
|
|
- name: Setup .NET
|
|
uses: actions/setup-dotnet@v4
|
|
with:
|
|
dotnet-version: '10.0'
|
|
|
|
- name: Restore dependencies
|
|
run: dotnet restore src/TaxBaik.sln
|
|
|
|
- name: Build solution
|
|
run: |
|
|
dotnet clean src/TaxBaik.sln -c Release
|
|
dotnet build src/TaxBaik.sln -c Release --no-restore
|
|
|
|
- name: Test solution
|
|
run: dotnet test src/TaxBaik.sln -c Release --no-build
|
|
|
|
- name: Publish Web (auto-includes WASM from referenced TaxBaik.Web.Client)
|
|
run: dotnet publish src/TaxBaik.Web/ -c Release -o ./publish --no-restore
|
|
|
|
- name: Publish Proxy
|
|
run: dotnet publish src/TaxBaik.Proxy/ -c Release -o ./publish/proxy
|
|
|
|
- name: Write production secrets
|
|
run: |
|
|
set -e
|
|
JWT_SECRET_KEY="${{ secrets.TAXBAIK_JWT_SECRET_KEY }}"
|
|
TELEGRAM_BOT_TOKEN="${{ secrets.TAXBAIK_TELEGRAM_BOT_TOKEN }}"
|
|
TELEGRAM_CHAT_ID="${{ secrets.TAXBAIK_TELEGRAM_CHAT_ID }}"
|
|
TELEGRAM_INQUIRY_CHAT_ID="${{ secrets.TAXBAIK_TELEGRAM_INQUIRY_CHAT_ID }}"
|
|
TELEGRAM_SYSTEM_CHAT_ID="${{ secrets.TAXBAIK_TELEGRAM_SYSTEM_CHAT_ID }}"
|
|
[ -z "$JWT_SECRET_KEY" ] && { echo "Missing TAXBAIK_JWT_SECRET_KEY" >&2; exit 1; }
|
|
[ -z "$TELEGRAM_BOT_TOKEN" ] && { echo "Missing TAXBAIK_TELEGRAM_BOT_TOKEN" >&2; exit 1; }
|
|
[ -z "$TELEGRAM_CHAT_ID" ] && { echo "Missing TAXBAIK_TELEGRAM_CHAT_ID" >&2; exit 1; }
|
|
[ -z "$TELEGRAM_INQUIRY_CHAT_ID" ] && TELEGRAM_INQUIRY_CHAT_ID="$TELEGRAM_CHAT_ID"
|
|
[ -z "$TELEGRAM_SYSTEM_CHAT_ID" ] && TELEGRAM_SYSTEM_CHAT_ID="-5585148480"
|
|
JWT_SECRET_KEY="$JWT_SECRET_KEY" \
|
|
TELEGRAM_BOT_TOKEN="$TELEGRAM_BOT_TOKEN" \
|
|
TELEGRAM_CHAT_ID="$TELEGRAM_CHAT_ID" \
|
|
TELEGRAM_INQUIRY_CHAT_ID="$TELEGRAM_INQUIRY_CHAT_ID" \
|
|
TELEGRAM_SYSTEM_CHAT_ID="$TELEGRAM_SYSTEM_CHAT_ID" \
|
|
python3 -c '
|
|
import json, os, pathlib
|
|
pathlib.Path("./publish/appsettings.Production.json").write_text(
|
|
json.dumps({
|
|
"Jwt": {"SecretKey": os.environ["JWT_SECRET_KEY"]},
|
|
"Telegram": {
|
|
"BotToken": os.environ["TELEGRAM_BOT_TOKEN"],
|
|
"ChatId": os.environ["TELEGRAM_CHAT_ID"],
|
|
"InquiryChatId": os.environ["TELEGRAM_INQUIRY_CHAT_ID"],
|
|
"SystemChatId": os.environ["TELEGRAM_SYSTEM_CHAT_ID"]
|
|
}
|
|
}, ensure_ascii=False, indent=2),
|
|
encoding="utf-8"
|
|
)'
|
|
test -s ./publish/appsettings.Production.json || { echo "appsettings.Production.json is empty" >&2; exit 1; }
|
|
|
|
- name: Verify proxy artifact
|
|
run: |
|
|
test -s ./publish/proxy/TaxBaik.Proxy.dll || { echo "TaxBaik.Proxy.dll missing" >&2; exit 1; }
|
|
test -s ./publish/proxy/TaxBaik.Proxy.runtimeconfig.json || { echo "TaxBaik.Proxy.runtimeconfig.json missing" >&2; exit 1; }
|
|
|
|
- name: Copy migrations
|
|
run: mkdir -p ./publish/db && cp -r db/migrations ./publish/db/ || true
|
|
|
|
- name: Validate migration version uniqueness
|
|
run: bash scripts/validate_migrations.sh db/migrations
|
|
|
|
- name: Validate KST timestamps
|
|
run: bash scripts/validate_kst_timestamps.sh
|
|
|
|
- name: Generate build info
|
|
run: |
|
|
COMMIT_HASH=$(git rev-parse --short HEAD)
|
|
BUILD_TIME=$(TZ=Asia/Seoul date +'%Y-%m-%d %H:%M:%S KST')
|
|
mkdir -p ./publish/wwwroot
|
|
printf '{\n "version": "%s",\n "built": "%s"\n}\n' "$COMMIT_HASH" "$BUILD_TIME" > ./publish/wwwroot/version.json
|
|
echo "✓ Build: $COMMIT_HASH @ $BUILD_TIME"
|
|
|
|
- name: Setup SSH
|
|
run: |
|
|
mkdir -p ~/.ssh
|
|
SSH_KEY_B64="${{ secrets.DEPLOY_SSH_KEY_B64 }}"
|
|
SSH_KEY_RAW="${{ secrets.DEPLOY_SSH_KEY }}"
|
|
if [ -n "$SSH_KEY_B64" ]; then
|
|
printf '%s' "$SSH_KEY_B64" | base64 -d > ~/.ssh/id_ed25519
|
|
elif [ -n "$SSH_KEY_RAW" ]; then
|
|
if printf '%s' "$SSH_KEY_RAW" | grep -q 'BEGIN .*PRIVATE KEY'; then
|
|
printf '%b\n' "$SSH_KEY_RAW" > ~/.ssh/id_ed25519
|
|
else
|
|
printf '%s' "$SSH_KEY_RAW" | base64 -d > ~/.ssh/id_ed25519
|
|
fi
|
|
else
|
|
echo "Missing DEPLOY_SSH_KEY_B64 or DEPLOY_SSH_KEY" >&2; exit 1
|
|
fi
|
|
sed -i 's/\r$//' ~/.ssh/id_ed25519
|
|
chmod 600 ~/.ssh/id_ed25519
|
|
ssh-keyscan -H "${{ secrets.DEPLOY_HOST }}" >> ~/.ssh/known_hosts 2>/dev/null || true
|
|
|
|
- name: Package artifact
|
|
run: |
|
|
cp deploy_gb.sh ./publish/deploy_gb.sh
|
|
mkdir -p ./publish/scripts
|
|
cp scripts/validate_migrations.sh ./publish/scripts/validate_migrations.sh
|
|
chmod +x ./publish/scripts/validate_migrations.sh
|
|
tar -czf taxbaik_deploy.tgz -C ./publish .
|
|
echo "✓ Package: $(du -sh taxbaik_deploy.tgz | cut -f1)"
|
|
|
|
- name: Deploy & verify on server
|
|
run: |
|
|
set -e
|
|
export TAXBAIK_DEPLOY_FROM_CI=1
|
|
TIMESTAMP=$(TZ=Asia/Seoul date +%Y%m%d_%H%M%S)
|
|
COMMIT=$(git rev-parse --short HEAD)
|
|
DEPLOY_HOST="${{ secrets.DEPLOY_HOST }}"
|
|
DEPLOY_USER="${{ secrets.DEPLOY_USER }}"
|
|
TELEGRAM_BOT_TOKEN="${{ secrets.TAXBAIK_TELEGRAM_BOT_TOKEN }}"
|
|
TELEGRAM_SYSTEM_CHAT_ID="${{ secrets.TAXBAIK_TELEGRAM_SYSTEM_CHAT_ID }}"
|
|
TELEGRAM_CHAT_ID="${TELEGRAM_SYSTEM_CHAT_ID:--5585148480}"
|
|
|
|
send_telegram() {
|
|
local text="$1"
|
|
if [ -z "$TELEGRAM_BOT_TOKEN" ]; then
|
|
echo "Skipping Telegram notification: missing TAXBAIK_TELEGRAM_BOT_TOKEN" >&2
|
|
return 0
|
|
fi
|
|
|
|
curl -fsS -X POST "https://api.telegram.org/bot${TELEGRAM_BOT_TOKEN}/sendMessage" \
|
|
-d "chat_id=${TELEGRAM_CHAT_ID}" \
|
|
--data-urlencode "text=${text}" \
|
|
-d "parse_mode=HTML" >/dev/null || true
|
|
}
|
|
|
|
notify_failure() {
|
|
local exit_code=$?
|
|
send_telegram "❌ <b>TaxBaik 배포 실패</b>
|
|
|
|
커밋: <code>${COMMIT}</code>
|
|
시간: <code>${TIMESTAMP}</code>
|
|
단계: CI/CD deploy"
|
|
exit "$exit_code"
|
|
}
|
|
|
|
trap notify_failure ERR
|
|
|
|
echo "=== Deploying TaxBaik $COMMIT ($TIMESTAMP) ==="
|
|
|
|
# 1. 아티팩트 업로드
|
|
scp -i ~/.ssh/id_ed25519 -o StrictHostKeyChecking=yes \
|
|
taxbaik_deploy.tgz "$DEPLOY_USER@$DEPLOY_HOST:/tmp/taxbaik_${TIMESTAMP}.tgz"
|
|
|
|
# 2. 서버에서 배포 + 헬스 체크 (SSH 1회 연결로 처리, Green-Blue 지원)
|
|
ssh -i ~/.ssh/id_ed25519 -o StrictHostKeyChecking=yes \
|
|
-o ServerAliveInterval=10 \
|
|
"$DEPLOY_USER@$DEPLOY_HOST" TAXBAIK_DEPLOY_FROM_CI=1 bash << REMOTE
|
|
set -e
|
|
DEPLOY_HOME="/home/kjh2064"
|
|
DEPLOY_DIR="\$DEPLOY_HOME/deployments/taxbaik_${TIMESTAMP}"
|
|
TIMESTAMP="${TIMESTAMP}"
|
|
|
|
echo "--- [1/5] 압축 해제 ---"
|
|
mkdir -p "\$DEPLOY_DIR"
|
|
tar -xzf "/tmp/taxbaik_\${TIMESTAMP}.tgz" -C "\$DEPLOY_DIR"
|
|
rm -f "/tmp/taxbaik_\${TIMESTAMP}.tgz"
|
|
|
|
echo "--- [2/5] 운영 설정 검증 ---"
|
|
test -s "\$DEPLOY_DIR/appsettings.Production.json" \
|
|
|| { echo "FATAL: appsettings.Production.json 없음" >&2; exit 1; }
|
|
test -s "\$DEPLOY_DIR/proxy/TaxBaik.Proxy.dll" \
|
|
|| { echo "FATAL: TaxBaik.Proxy.dll 없음" >&2; exit 1; }
|
|
|
|
echo "--- [3/5] 마이그레이션 사전 검증 ---"
|
|
test -x "\$DEPLOY_DIR/scripts/validate_migrations.sh" \
|
|
|| { echo "FATAL: validate_migrations.sh 없음" >&2; exit 1; }
|
|
"\$DEPLOY_DIR/scripts/validate_migrations.sh" "\$DEPLOY_DIR/db/migrations" "postgresql://taxbaik:taxbaik123@localhost:5432/taxbaikdb"
|
|
|
|
echo "--- [4/5] Green-Blue 배포 실행 ---"
|
|
chmod +x "\$DEPLOY_DIR/deploy_gb.sh"
|
|
"\$DEPLOY_DIR/deploy_gb.sh" "\$DEPLOY_DIR"
|
|
|
|
echo "--- [4.5/5] Nginx 설정 검증 ---"
|
|
# 실제 로드되는 파일은 sites-enabled/의 심볼릭 링크 대상만이다.
|
|
# sites-available/에 다른 파일(예: default)이 있어도 sites-enabled에
|
|
# 링크되어 있지 않으면 nginx는 그 내용을 절대 읽지 않는다.
|
|
NGINX_CONF=""
|
|
for f in /etc/nginx/sites-enabled/*; do
|
|
if [ -e "\$f" ] && grep -q "location /taxbaik" "\$f" 2>/dev/null; then
|
|
NGINX_CONF=\$(readlink -f "\$f")
|
|
break
|
|
fi
|
|
done
|
|
|
|
if [ -z "\$NGINX_CONF" ]; then
|
|
echo "❌ FATAL: sites-enabled/ 안에서 'location /taxbaik'를 정의한 파일을 찾을 수 없음" >&2
|
|
echo " sites-available/에 파일을 수정해도 sites-enabled에 심볼릭 링크되어 있지 않으면 반영되지 않는다." >&2
|
|
exit 1
|
|
fi
|
|
echo "실제 로드되는 설정 파일: \$NGINX_CONF"
|
|
|
|
# 불변식: '/'와 '/taxbaik' location 모두 반드시 127.0.0.1:5001 (TaxBaik.Proxy)을
|
|
# 가리켜야 한다. 5003/5004를 직접 하드코딩하면 Green-Blue 포트 전환 시
|
|
# 죽은 포트를 가리키게 되어 502/404가 발생한다 (실제 발생했던 장애).
|
|
if grep -E "proxy_pass\s+http://127\.0\.0\.1:500[34]" "\$NGINX_CONF" > /dev/null 2>&1; then
|
|
echo "❌ FATAL: \$NGINX_CONF 가 포트 5003/5004를 직접 참조함 (Green-Blue 전환 시 502 발생)" >&2
|
|
echo " 수정: sudo sed -i 's|127.0.0.1:500[34]|127.0.0.1:5001|g' \$NGINX_CONF && sudo nginx -t && sudo systemctl reload nginx" >&2
|
|
exit 1
|
|
fi
|
|
|
|
# proxy_pass에 URI(끝 슬래시)가 있으면 nginx가 요청 경로를 재작성하며,
|
|
# location 접두사와 슬래시 개수가 안 맞으면 백엔드로 이중 슬래시(//)가
|
|
# 전달되어 404가 발생한다 (실제 발생했던 장애). 접두사 location에서는
|
|
# proxy_pass에 URI를 붙이지 않는다.
|
|
if grep -E "location\s+/taxbaik\s*\{" -A 1 "\$NGINX_CONF" | grep -qE "proxy_pass\s+http://127\.0\.0\.1:5001/;"; then
|
|
echo "❌ FATAL: location /taxbaik 의 proxy_pass 에 불필요한 trailing slash가 있음 (이중 슬래시로 인한 404 위험)" >&2
|
|
exit 1
|
|
fi
|
|
|
|
echo "✓ Nginx 설정 검증 통과 (실제 로드 파일 확인 + 포트 5001 고정 + trailing slash 없음)"
|
|
|
|
echo "--- [5/5] 헬스 체크 (최대 60초) ---"
|
|
ATTEMPTS=20
|
|
for i in \$(seq 1 \$ATTEMPTS); do
|
|
STATUS=\$(curl -sf -o /dev/null -w '%{http_code}' http://127.0.0.1:5001/taxbaik/ 2>/dev/null || echo "000")
|
|
if [ "\$STATUS" = "200" ]; then
|
|
echo "✓ [1/4] 메인 페이지 로드 완료"
|
|
|
|
# 검증 1: CSS 파일 로드
|
|
CSS_STATUS=\$(curl -sf -o /dev/null -w '%{http_code}' http://127.0.0.1:5001/taxbaik/css/admin.css 2>/dev/null || echo "000")
|
|
if [ "\$CSS_STATUS" != "200" ]; then
|
|
echo "❌ CSS 파일 로드 실패 (상태: \$CSS_STATUS)" >&2
|
|
exit 1
|
|
fi
|
|
echo "✓ [2/4] CSS 파일 로드 완료"
|
|
|
|
# 검증 2: 버전 정보
|
|
if [ ! -s "\$DEPLOY_DIR/wwwroot/version.json" ]; then
|
|
echo "❌ version.json 누락" >&2
|
|
exit 1
|
|
fi
|
|
echo "✓ [3/4] 버전 정보 확인 완료"
|
|
|
|
# 검증 4: 5001 프록시 확인
|
|
if ! ss -tlnp | grep -q ':5001 '; then
|
|
echo "❌ 5001 프록시가 실행 중이 아님" >&2
|
|
exit 1
|
|
fi
|
|
echo "✓ [4/5] 5001 프록시 확인 완료"
|
|
|
|
# 검증 5: 관리자 로그인 페이지
|
|
LOGIN_STATUS=\$(curl -sf -o /dev/null -w '%{http_code}' http://127.0.0.1:5001/taxbaik/admin/login 2>/dev/null || echo "000")
|
|
if [ "\$LOGIN_STATUS" != "200" ]; then
|
|
echo "❌ 관리자 로그인 페이지 로드 실패 (상태: \$LOGIN_STATUS)" >&2
|
|
exit 1
|
|
fi
|
|
echo "✓ [5/5] 관리자 페이지 로드 완료"
|
|
|
|
echo "✓ 서비스 정상 (시도 \$i/\$ATTEMPTS)"
|
|
# 구 배포 디렉토리 정리 (최근 5개 보존)
|
|
ls -1dt \$DEPLOY_HOME/deployments/taxbaik_* 2>/dev/null \
|
|
| tail -n +6 | xargs rm -rf 2>/dev/null || true
|
|
exit 0
|
|
fi
|
|
if [ "\$i" -eq "\$ATTEMPTS" ]; then
|
|
echo "=== FATAL: 서비스가 \$ATTEMPTS회 시도 후에도 응답하지 않음 ===" >&2
|
|
echo "--- systemd 상태 ---" >&2
|
|
systemctl is-active taxbaik >&2 || true
|
|
echo "--- 최근 로그 50줄 ---" >&2
|
|
journalctl -u taxbaik --no-pager -n 50 >&2
|
|
exit 1
|
|
fi
|
|
echo " 대기 중... (\$i/\$ATTEMPTS, HTTP \$STATUS)"
|
|
sleep 3
|
|
done
|
|
REMOTE
|
|
|
|
echo "✓ 배포 완료: taxbaik_${TIMESTAMP} @ $DEPLOY_HOST"
|
|
|
|
# 내부 127.0.0.1:5001 헬스 체크는 Nginx/Cloudflare를 거치지 않으므로
|
|
# Nginx 설정 오류(잘못된 파일 수정, 죽은 포트 하드코딩 등)를 잡지 못한다.
|
|
# 실제 사용자가 접속하는 경로 그대로 외부에서 검증해야 이런 장애를 CI가 스스로 잡는다.
|
|
check_public() {
|
|
local url="$1"
|
|
local status
|
|
status=$(curl -s -o /dev/null -w '%{http_code}' --max-time 15 "$url" || echo "000")
|
|
if [ "$status" != "200" ]; then
|
|
echo " ✗ $url → HTTP $status" >&2
|
|
return 1
|
|
fi
|
|
echo " ✓ $url → HTTP $status"
|
|
return 0
|
|
}
|
|
|
|
echo "--- 실제 공개 도메인 종단 간 검증 (Nginx/Cloudflare 경유, 최대 3회 재시도) ---"
|
|
PUBLIC_OK=false
|
|
for i in 1 2 3; do
|
|
if check_public "https://www.taxbaik.com/" \
|
|
&& check_public "https://www.taxbaik.com/taxbaik/" \
|
|
&& check_public "https://www.taxbaik.com/taxbaik/admin/login"; then
|
|
PUBLIC_OK=true
|
|
break
|
|
fi
|
|
echo " 재시도 대기 중... ($i/3)"
|
|
sleep 5
|
|
done
|
|
|
|
if [ "$PUBLIC_OK" != "true" ]; then
|
|
echo "❌ FATAL: 실제 공개 도메인 검증 실패. Nginx가 죽은 포트를 가리키거나 잘못된 파일을 수정했을 가능성이 높다." >&2
|
|
echo " 확인: sites-enabled/의 실제 파일에서 location / 와 location /taxbaik 모두 127.0.0.1:5001을 가리키는지 점검" >&2
|
|
exit 1
|
|
fi
|
|
echo "✓ 실제 공개 도메인 전체 정상"
|
|
|
|
send_telegram "✅ <b>TaxBaik 배포 완료</b>
|
|
|
|
커밋: <code>${COMMIT}</code>
|
|
시간: <code>${TIMESTAMP}</code>
|
|
대상: <code>${DEPLOY_HOST}</code>
|
|
채널: <code>${TELEGRAM_CHAT_ID}</code>"
|