Add post-deploy health checks with auto-rollback and promotion

- Health checks: website, SSH server, Docker containers
- Auto-rollback to previous commit if checks fail
- Auto-create PR to staging when develop passes all checks
- Detailed deployment summary in GitHub Actions
This commit is contained in:
Greg Hendrickson
2026-01-27 21:13:59 +00:00
parent 60046e1dee
commit 469011c4e1

View File

@@ -11,7 +11,9 @@ env:
jobs: jobs:
deploy: deploy:
runs-on: ubuntu-latest runs-on: ubuntu-latest
if: github.event_name == 'push' outputs:
environment: ${{ steps.env.outputs.name }}
deploy_success: ${{ steps.health.outputs.success }}
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -22,15 +24,15 @@ jobs:
if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then if [[ "${{ github.ref }}" == "refs/heads/main" ]]; then
echo "name=production" >> $GITHUB_OUTPUT echo "name=production" >> $GITHUB_OUTPUT
echo "url=https://shellmate.sh" >> $GITHUB_OUTPUT echo "url=https://shellmate.sh" >> $GITHUB_OUTPUT
echo "ssh_user=play" >> $GITHUB_OUTPUT echo "ssh_url=shellmate.sh" >> $GITHUB_OUTPUT
elif [[ "${{ github.ref }}" == "refs/heads/staging" ]]; then elif [[ "${{ github.ref }}" == "refs/heads/staging" ]]; then
echo "name=staging" >> $GITHUB_OUTPUT echo "name=staging" >> $GITHUB_OUTPUT
echo "url=https://staging.shellmate.sh" >> $GITHUB_OUTPUT echo "url=https://staging.shellmate.sh" >> $GITHUB_OUTPUT
echo "ssh_user=play" >> $GITHUB_OUTPUT echo "ssh_url=staging.shellmate.sh" >> $GITHUB_OUTPUT
else else
echo "name=dev" >> $GITHUB_OUTPUT echo "name=dev" >> $GITHUB_OUTPUT
echo "url=https://dev.shellmate.sh" >> $GITHUB_OUTPUT echo "url=https://dev.shellmate.sh" >> $GITHUB_OUTPUT
echo "ssh_user=play" >> $GITHUB_OUTPUT echo "ssh_url=dev.shellmate.sh" >> $GITHUB_OUTPUT
fi fi
- name: Setup SSH - name: Setup SSH
@@ -40,7 +42,16 @@ jobs:
chmod 600 ~/.ssh/deploy_key chmod 600 ~/.ssh/deploy_key
ssh-keyscan -p ${{ env.SSH_PORT }} ${{ env.SERVER_IP }} >> ~/.ssh/known_hosts 2>/dev/null || true ssh-keyscan -p ${{ env.SSH_PORT }} ${{ env.SERVER_IP }} >> ~/.ssh/known_hosts 2>/dev/null || true
- name: Get current commit (for rollback)
id: prev
run: |
PREV_SHA=$(ssh -i ~/.ssh/deploy_key -p ${{ env.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ env.SERVER_IP }} \
"cd /opt/shellmate && git rev-parse HEAD" 2>/dev/null || echo "none")
echo "sha=$PREV_SHA" >> $GITHUB_OUTPUT
echo "Previous commit: $PREV_SHA"
- name: Deploy to ${{ steps.env.outputs.name }} - name: Deploy to ${{ steps.env.outputs.name }}
id: deploy
run: | run: |
ssh -i ~/.ssh/deploy_key -p ${{ env.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ env.SERVER_IP }} << 'ENDSSH' ssh -i ~/.ssh/deploy_key -p ${{ env.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ env.SERVER_IP }} << 'ENDSSH'
cd /opt/shellmate cd /opt/shellmate
@@ -48,16 +59,126 @@ jobs:
git checkout ${{ github.ref_name }} git checkout ${{ github.ref_name }}
git pull origin ${{ github.ref_name }} git pull origin ${{ github.ref_name }}
docker compose up -d --build docker compose up -d --build
echo "Deployed ${{ github.ref_name }} to ${{ steps.env.outputs.name }}" echo "Deployed ${{ github.ref_name }}"
ENDSSH ENDSSH
- name: Wait for services to stabilize
run: sleep 15
- name: Health check - Website
id: health_web
run: |
echo "Checking website..."
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" --max-time 10 "${{ steps.env.outputs.url }}" || echo "000")
echo "Website HTTP: $HTTP_CODE"
if [[ "$HTTP_CODE" == "200" ]]; then
echo "web_ok=true" >> $GITHUB_OUTPUT
else
echo "web_ok=false" >> $GITHUB_OUTPUT
fi
- name: Health check - SSH Server
id: health_ssh
run: |
echo "Checking SSH server..."
# Check if SSH port responds (timeout 5s)
if nc -z -w 5 ${{ env.SERVER_IP }} 22 2>/dev/null; then
echo "ssh_ok=true" >> $GITHUB_OUTPUT
echo "SSH port 22: OK"
else
echo "ssh_ok=false" >> $GITHUB_OUTPUT
echo "SSH port 22: FAILED"
fi
- name: Health check - Docker containers
id: health_docker
run: |
echo "Checking Docker containers..."
HEALTHY=$(ssh -i ~/.ssh/deploy_key -p ${{ env.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ env.SERVER_IP }} \
"docker compose -f /opt/shellmate/docker-compose.yml ps --format json 2>/dev/null | jq -r '.State' | grep -c running || echo 0")
echo "Running containers: $HEALTHY"
if [[ "$HEALTHY" -ge "3" ]]; then
echo "docker_ok=true" >> $GITHUB_OUTPUT
else
echo "docker_ok=false" >> $GITHUB_OUTPUT
fi
- name: Evaluate health
id: health
run: |
WEB="${{ steps.health_web.outputs.web_ok }}"
SSH="${{ steps.health_ssh.outputs.ssh_ok }}"
DOCKER="${{ steps.health_docker.outputs.docker_ok }}"
echo "Web: $WEB | SSH: $SSH | Docker: $DOCKER"
if [[ "$WEB" == "true" && "$SSH" == "true" && "$DOCKER" == "true" ]]; then
echo "success=true" >> $GITHUB_OUTPUT
echo "✅ All health checks passed!"
else
echo "success=false" >> $GITHUB_OUTPUT
echo "❌ Health checks failed!"
fi
- name: Rollback on failure
if: steps.health.outputs.success == 'false' && steps.prev.outputs.sha != 'none'
run: |
echo "🔄 Rolling back to ${{ steps.prev.outputs.sha }}..."
ssh -i ~/.ssh/deploy_key -p ${{ env.SSH_PORT }} -o StrictHostKeyChecking=no root@${{ env.SERVER_IP }} << ENDSSH
cd /opt/shellmate
git checkout ${{ steps.prev.outputs.sha }}
docker compose up -d --build
echo "Rolled back to ${{ steps.prev.outputs.sha }}"
ENDSSH
echo "::error::Deployment failed health checks - rolled back!"
exit 1
- name: Summary - name: Summary
run: | run: |
echo "## 🚀 Deployment Complete" >> $GITHUB_STEP_SUMMARY echo "## 🚀 Deployment Report" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY echo "" >> $GITHUB_STEP_SUMMARY
echo "| | |" >> $GITHUB_STEP_SUMMARY echo "| Check | Status |" >> $GITHUB_STEP_SUMMARY
echo "|---|---|" >> $GITHUB_STEP_SUMMARY echo "|-------|--------|" >> $GITHUB_STEP_SUMMARY
echo "| **Environment** | ${{ steps.env.outputs.name }} |" >> $GITHUB_STEP_SUMMARY echo "| **Environment** | ${{ steps.env.outputs.name }} |" >> $GITHUB_STEP_SUMMARY
echo "| **Branch** | ${{ github.ref_name }} |" >> $GITHUB_STEP_SUMMARY echo "| **Website** | ${{ steps.health_web.outputs.web_ok == 'true' && '✅' || '❌' }} |" >> $GITHUB_STEP_SUMMARY
echo "| **URL** | ${{ steps.env.outputs.url }} |" >> $GITHUB_STEP_SUMMARY echo "| **SSH Server** | ${{ steps.health_ssh.outputs.ssh_ok == 'true' && '✅' || '❌' }} |" >> $GITHUB_STEP_SUMMARY
echo "| **Commit** | \`${{ github.sha }}\` |" >> $GITHUB_STEP_SUMMARY echo "| **Containers** | ${{ steps.health_docker.outputs.docker_ok == 'true' && '✅' || '❌' }} |" >> $GITHUB_STEP_SUMMARY
echo "| **Overall** | ${{ steps.health.outputs.success == 'true' && '✅ HEALTHY' || '❌ FAILED' }} |" >> $GITHUB_STEP_SUMMARY
echo "" >> $GITHUB_STEP_SUMMARY
echo "**Commit:** \`${{ github.sha }}\`" >> $GITHUB_STEP_SUMMARY
# Auto-promote develop to staging if healthy
promote-to-staging:
needs: deploy
if: github.ref == 'refs/heads/develop' && needs.deploy.outputs.deploy_success == 'true'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
token: ${{ secrets.GITHUB_TOKEN }}
- name: Create PR to staging
env:
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
# Check if PR already exists
EXISTING=$(gh pr list --head develop --base staging --json number -q '.[0].number' || echo "")
if [[ -n "$EXISTING" ]]; then
echo "PR #$EXISTING already exists for develop → staging"
else
gh pr create \
--title "🚀 Promote develop to staging" \
--body "Automated PR after successful dev deployment.
**Health checks passed:**
- ✅ Website responsive
- ✅ SSH server running
- ✅ All containers healthy
**Commit:** ${{ github.sha }}" \
--base staging \
--head develop || echo "PR may already exist or no changes"
fi