diff --git a/.github/workflows/deploy-dev.yml b/.github/workflows/deploy-dev.yml index 93c29ff..2ce2d3f 100644 --- a/.github/workflows/deploy-dev.yml +++ b/.github/workflows/deploy-dev.yml @@ -1,122 +1,149 @@ name: Deploy to Development on: - workflow_run: - workflows: ["Build and Push Container Image"] - types: - - completed - branches: [ develop ] push: - branches: [ develop ] + branches: [ main, master ] + paths: + - 'src/**' + - 'Dockerfile' + - 'nginx.conf' + - 'package.json' + - 'manifests/dev/**' workflow_dispatch: - inputs: - image_tag: - description: 'Image tag to deploy (default: latest)' - required: false - default: 'latest' env: REGISTRY: ghcr.io - IMAGE_NAME: ghndrx/k8s-game-2048 + IMAGE_NAME: ${{ github.repository }} jobs: deploy-dev: - name: Deploy to Development runs-on: ubuntu-latest - if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }} environment: development steps: - - name: Checkout repository + - name: Checkout code uses: actions/checkout@v4 - - name: Set up kubectl - uses: azure/setup-kubectl@v3 + - name: Log in to Container Registry + uses: docker/login-action@v3 with: - version: 'latest' + registry: ${{ env.REGISTRY }} + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - - name: Configure kubectl - run: | - mkdir -p ~/.kube - echo "${{ secrets.KUBECONFIG }}" | base64 -d > ~/.kube/config - chmod 600 ~/.kube/config + - name: Extract metadata + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} + tags: | + type=ref,event=branch + type=ref,event=pr + type=sha,prefix={{branch}}- - - name: Set image tag + - name: Build and push Docker image + uses: docker/build-push-action@v5 + with: + context: . + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + + - name: Set image tag for deployment run: | - IMAGE_TAG="${{ github.event.inputs.image_tag || 'latest' }}" + IMAGE_TAG=$(echo "${{ steps.meta.outputs.tags }}" | head -n1 | cut -d':' -f2) echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV - echo "Deploying image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:$IMAGE_TAG" + echo "๐Ÿท๏ธ Using image tag: $IMAGE_TAG" - - name: Deploy to development + - name: Deploy to development via webhook run: | - echo "๐Ÿš€ Deploying to development environment..." + echo "๐Ÿš€ Triggering webhook deployment to development..." - # Apply namespace - kubectl apply -f manifests/dev/namespace.yml + # Prepare deployment payload + PAYLOAD=$(cat </dev/null; then - echo "๐Ÿ” Copying GHCR secret to dev namespace..." - kubectl get secret ghcr-secret -o yaml | \ - sed 's/namespace: default/namespace: game-2048-dev/' | \ - sed '/resourceVersion:/d' | \ - sed '/uid:/d' | \ - sed '/creationTimestamp:/d' | \ - kubectl apply -f - + # Generate HMAC signature for webhook security + SIGNATURE=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "${{ secrets.WEBHOOK_SECRET }}" -binary | base64) + + # Send webhook + HTTP_CODE=$(curl -s -o /tmp/webhook_response.json -w "%{http_code}" \ + -X POST \ + -H "Content-Type: application/json" \ + -H "X-Signature-SHA256: sha256=$SIGNATURE" \ + -H "X-GitHub-Event: deployment" \ + -H "X-GitHub-Delivery: ${{ github.run_id }}" \ + -d "$PAYLOAD" \ + "${{ secrets.DEV_WEBHOOK_URL }}") + + echo "Webhook response code: $HTTP_CODE" + cat /tmp/webhook_response.json || echo "No response body" + + if [ "$HTTP_CODE" -ge 200 ] && [ "$HTTP_CODE" -lt 300 ]; then + echo "โœ… Webhook deployment triggered successfully!" + else + echo "โŒ Webhook deployment failed with code: $HTTP_CODE" + exit 1 fi - - # Apply the Knative service manifest first - kubectl apply -f manifests/dev/service.yml - - # Update image in service - kubectl patch ksvc game-2048-dev -n game-2048-dev --type merge -p '{"spec":{"template":{"spec":{"containers":[{"image":"${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}","imagePullPolicy":"Always"}]}}}}' - - echo "โณ Waiting for deployment to be ready..." - kubectl wait --for=condition=Ready ksvc/game-2048-dev -n game-2048-dev --timeout=300s || echo "โš ๏ธ Service may still be starting" - - name: Verify deployment + - name: Wait for deployment to complete run: | - echo "๐Ÿ“Š Deployment status:" - kubectl get ksvc -n game-2048-dev - - echo "" - echo "โœ… Development deployment completed!" - echo "๐ŸŒ Available at: https://2048-dev.wa.darknex.us" - - - name: Run smoke test - run: | - echo "๐Ÿงช Running smoke test..." + echo "โณ Waiting for deployment to stabilize..." sleep 30 + + - name: Health check + run: | + echo "๐Ÿฅ Performing health check..." + MAX_RETRIES=10 + RETRY_COUNT=0 - for i in {1..5}; do - echo "Attempt $i/5..." - # Test canonical domain first - if curl -s --max-time 30 https://game-2048-dev.game-2048-dev.dev.wa.darknex.us/ | grep -q "2048"; then - echo "โœ… Canonical domain smoke test passed!" - break - # Fallback to custom domain - elif curl -s --max-time 30 https://2048-dev.wa.darknex.us/ | grep -q "2048"; then - echo "โœ… Custom domain smoke test passed!" - break - elif [ $i -eq 5 ]; then - echo "โš ๏ธ Smoke test failed after 5 attempts" - exit 1 + # Get the canonical Knative domain for health check + # Format: service-name.namespace.knative-domain + HEALTH_URL="https://game-2048-dev.game-2048-dev.${{ secrets.KNATIVE_DOMAIN }}" + + while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do + echo "Attempt $((RETRY_COUNT + 1))/$MAX_RETRIES - Checking: $HEALTH_URL" + + if curl -f -s --max-time 10 "$HEALTH_URL" > /dev/null; then + echo "โœ… Health check passed!" + echo "๐ŸŒ Application is available at: $HEALTH_URL" + exit 0 else - echo "Retrying in 30 seconds..." - sleep 30 + echo "โš ๏ธ Health check failed, retrying in 15 seconds..." + sleep 15 + RETRY_COUNT=$((RETRY_COUNT + 1)) fi done + + echo "โŒ Health check failed after $MAX_RETRIES attempts" + echo "The deployment webhook was sent successfully, but the service is not responding" + echo "Please check your cluster logs for deployment issues" + exit 1 - - name: Create deployment summary + - name: Deployment summary + if: always() run: | echo "## ๐Ÿš€ Development Deployment Summary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "| Component | Status |" >> $GITHUB_STEP_SUMMARY - echo "|-----------|--------|" >> $GITHUB_STEP_SUMMARY - echo "| Namespace | โœ… Applied |" >> $GITHUB_STEP_SUMMARY - echo "| Service | โœ… Deployed |" >> $GITHUB_STEP_SUMMARY - echo "| Health Check | โœ… Passed |" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### ๐Ÿ”— URLs" >> $GITHUB_STEP_SUMMARY - echo "- **Canonical**: https://game-2048-dev.game-2048-dev.dev.wa.darknex.us" >> $GITHUB_STEP_SUMMARY - echo "- **Custom**: https://2048-dev.wa.darknex.us" >> $GITHUB_STEP_SUMMARY + echo "- **Environment:** Development" >> $GITHUB_STEP_SUMMARY + echo "- **Image:** \`${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}\`" >> $GITHUB_STEP_SUMMARY + echo "- **Deployment Method:** Webhook-based" >> $GITHUB_STEP_SUMMARY + echo "- **Triggered by:** ${{ github.actor }}" >> $GITHUB_STEP_SUMMARY + echo "- **Commit:** ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY + + if [ "${{ job.status }}" = "success" ]; then + echo "- **Status:** โœ… Success" >> $GITHUB_STEP_SUMMARY + echo "- **URL:** https://game-2048-dev.game-2048-dev.${{ secrets.KNATIVE_DOMAIN }}" >> $GITHUB_STEP_SUMMARY + else + echo "- **Status:** โŒ Failed" >> $GITHUB_STEP_SUMMARY + fi diff --git a/.github/workflows/deploy-prod.yml b/.github/workflows/deploy-prod.yml index 7239d32..63f1bf0 100644 --- a/.github/workflows/deploy-prod.yml +++ b/.github/workflows/deploy-prod.yml @@ -1,8 +1,6 @@ name: Deploy to Production on: - push: - branches: [ main ] workflow_dispatch: inputs: image_tag: @@ -12,253 +10,177 @@ on: confirmation: description: 'Type "DEPLOY" to confirm production deployment' required: true + source_environment: + description: 'Source environment (staging or manual)' + required: false + default: 'staging' + workflow_run: + workflows: ["Deploy to Staging"] + types: + - completed + branches: [ main, master ] env: REGISTRY: ghcr.io - IMAGE_NAME: ghndrx/k8s-game-2048 + IMAGE_NAME: ${{ github.repository }} jobs: deploy-prod: name: Deploy to Production runs-on: ubuntu-latest environment: production - if: ${{ github.event.inputs.confirmation == 'DEPLOY' }} + if: | + (github.event_name == 'workflow_dispatch' && github.event.inputs.confirmation == 'DEPLOY') || + (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Set up kubectl - uses: azure/setup-kubectl@v3 - with: - version: 'latest' - - - name: Configure kubectl - run: | - mkdir -p ~/.kube - echo "${{ secrets.KUBECONFIG }}" | base64 -d > ~/.kube/config - chmod 600 ~/.kube/config - - name: Set image tag run: | - IMAGE_TAG="${{ github.event.inputs.image_tag || 'latest' }}" + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + IMAGE_TAG="${{ github.event.inputs.image_tag || 'latest' }}" + else + # For auto-promotion, use the latest successful build + IMAGE_TAG="main-$(echo "${{ github.sha }}" | cut -c1-7)" + fi echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV echo "Deploying image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:$IMAGE_TAG" - - name: Deploy to production + - name: Deploy to production via webhook (Blue-Green) run: | - echo "๐Ÿš€ Deploying to production environment..." + echo "๐Ÿš€ Triggering blue-green webhook deployment to production..." - # Apply namespace - kubectl apply -f manifests/prod/namespace.yml + # Prepare deployment payload + PAYLOAD=$(cat </dev/null; then - echo "๐Ÿ” Copying GHCR secret to prod namespace..." - kubectl get secret ghcr-secret -o yaml | \ - sed 's/namespace: default/namespace: game-2048-prod/' | \ - sed '/resourceVersion:/d' | \ - sed '/uid:/d' | \ - sed '/creationTimestamp:/d' | \ - kubectl apply -f - - fi + # Generate HMAC signature for webhook security + SIGNATURE=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "${{ secrets.WEBHOOK_SECRET }}" -binary | base64) - # Update image in service and deploy - kubectl patch ksvc game-2048-prod -n game-2048-prod --type merge -p '{"spec":{"template":{"spec":{"containers":[{"image":"${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}","imagePullPolicy":"Always"}]}}}}' + # Send webhook + HTTP_CODE=$(curl -s -o /tmp/webhook_response.json -w "%{http_code}" \ + -X POST \ + -H "Content-Type: application/json" \ + -H "X-Signature-SHA256: sha256=$SIGNATURE" \ + -H "X-GitHub-Event: deployment" \ + -H "X-GitHub-Delivery: ${{ github.run_id }}" \ + -d "$PAYLOAD" \ + "${{ secrets.PROD_WEBHOOK_URL }}") - echo "โณ Waiting for deployment to be ready..." - kubectl wait --for=condition=Ready ksvc/game-2048-prod -n game-2048-prod --timeout=300s || echo "โš ๏ธ Service may still be starting" - - - name: Verify deployment - run: | - echo "๐Ÿ“Š Deployment status:" - kubectl get ksvc -n game-2048-prod + echo "Webhook response code: $HTTP_CODE" + cat /tmp/webhook_response.json || echo "No response body" - echo "" - echo "โœ… Production deployment completed!" - echo "๐ŸŒ Available at: https://2048.wa.darknex.us" - - - name: Run smoke test - run: | - echo "๐Ÿงช Running smoke test..." - sleep 30 - - for i in {1..5}; do - echo "Attempt $i/5..." - # Test canonical domain first - if curl -s --max-time 30 https://game-2048-prod.game-2048-prod.wa.darknex.us/ | grep -q "2048"; then - echo "โœ… Canonical domain smoke test passed!" - break - # Fallback to custom domain - elif curl -s --max-time 30 https://2048.wa.darknex.us/ | grep -q "2048"; then - echo "โœ… Custom domain smoke test passed!" - break - elif [ $i -eq 5 ]; then - echo "โš ๏ธ Smoke test failed after 5 attempts" - exit 1 - else - echo "Retrying in 30 seconds..." - sleep 30 - fi - done - - - name: Create production deployment summary - run: | - echo "## ๐Ÿš€ Production Deployment Summary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "| Field | Value |" >> $GITHUB_STEP_SUMMARY - echo "|-------|-------|" >> $GITHUB_STEP_SUMMARY - echo "| Environment | **Production** |" >> $GITHUB_STEP_SUMMARY - echo "| Image | \`${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}\` |" >> $GITHUB_STEP_SUMMARY - echo "| Domain | https://2048.wa.darknex.us |" >> $GITHUB_STEP_SUMMARY - echo "| Status | โœ… **LIVE** |" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### ๐ŸŽ‰ Production is Live!" >> $GITHUB_STEP_SUMMARY - echo "- ๐ŸŽฎ [Play the game](https://2048.wa.darknex.us)" >> $GITHUB_STEP_SUMMARY - echo "- ๐Ÿงช [Run smoke tests](https://github.com/${{ github.repository }}/actions/workflows/smoke-test.yml)" >> $GITHUB_STEP_SUMMARY - - - name: Log in to Container Registry - uses: docker/login-action@v3 - with: - registry: ${{ env.REGISTRY }} - username: ${{ github.actor }} - password: ${{ secrets.GITHUB_TOKEN }} - - - name: Extract metadata - id: meta - uses: docker/metadata-action@v5 - with: - images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} - tags: | - type=ref,event=tag - type=semver,pattern={{version}} - type=semver,pattern={{major}}.{{minor}} - - - name: Build and push Docker image - uses: docker/build-push-action@v5 - with: - context: . - push: true - tags: ${{ steps.meta.outputs.tags }} - labels: ${{ steps.meta.outputs.labels }} - - - name: Set up kubectl - uses: azure/setup-kubectl@v3 - with: - version: 'v1.28.0' - - - name: Configure kubectl - run: | - echo "${{ secrets.KUBECONFIG }}" | base64 -d > kubeconfig - export KUBECONFIG=kubeconfig - - - name: Update image in manifests - run: | - TAG="${{ github.event.release.tag_name || github.event.inputs.tag }}" - sed -i "s|ghcr.io/ghndrx/k8s-game-2048:v1.0.0|${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${TAG}|g" manifests/prod/service.yml - - - name: Deploy to production with blue-green strategy - run: | - export KUBECONFIG=kubeconfig - - # Deploy new revision with 0% traffic - kubectl apply -f manifests/prod/ - - # Wait for new revision to be ready - kubectl wait --for=condition=Ready ksvc/game-2048-prod -n game-2048-prod --timeout=300s - - # Get the latest revision name - LATEST_REVISION=$(kubectl get ksvc game-2048-prod -n game-2048-prod -o jsonpath='{.status.latestReadyRevisionName}') - - # Gradually shift traffic (10%, 50%, 100%) - kubectl patch ksvc game-2048-prod -n game-2048-prod --type='merge' -p='{"spec":{"traffic":[{"revisionName":"'$LATEST_REVISION'","percent":10},{"latestRevision":false,"percent":90}]}}' - sleep 60 - - kubectl patch ksvc game-2048-prod -n game-2048-prod --type='merge' -p='{"spec":{"traffic":[{"revisionName":"'$LATEST_REVISION'","percent":50},{"latestRevision":false,"percent":50}]}}' - sleep 60 - - kubectl patch ksvc game-2048-prod -n game-2048-prod --type='merge' -p='{"spec":{"traffic":[{"latestRevision":true,"percent":100}]}}' - - - name: Run production health checks - run: | - # Wait for traffic to stabilize - sleep 60 - # Test the production URL - curl -f https://2048.wa.darknex.us/ || exit 1 - # Additional health checks can be added here - - - name: Get service URL - id: get-url - run: | - export KUBECONFIG=kubeconfig - SERVICE_URL=$(kubectl get ksvc game-2048-prod -n game-2048-prod -o jsonpath='{.status.url}') - echo "service_url=$SERVICE_URL" >> $GITHUB_OUTPUT - echo "๐Ÿš€ Production service deployed at: $SERVICE_URL" - - - name: Set up Node.js for testing - uses: actions/setup-node@v4 - with: - node-version: '18' - cache: 'npm' - cache-dependency-path: tests/package.json - - - name: Install Playwright dependencies - run: | - cd tests - npm install - npx playwright install --with-deps - - - name: Run production smoke tests - run: | - cd tests - BASE_URL=${{ steps.get-url.outputs.service_url }} npx playwright test environment.spec.ts - env: - CI: true - - - name: Run full test suite - run: | - cd tests - BASE_URL=${{ steps.get-url.outputs.service_url }} npx playwright test - env: - CI: true - - - name: Upload production test results - uses: actions/upload-artifact@v4 - if: always() - with: - name: playwright-results-production-${{ github.sha }}-${{ github.run_number }} - path: | - tests/playwright-report/ - tests/test-results/ - retention-days: 90 - - - name: Upload production screenshots - uses: actions/upload-artifact@v4 - if: always() - with: - name: screenshots-production-${{ github.sha }}-${{ github.run_number }} - path: tests/test-results/**/*.png - retention-days: 90 - - - name: Production health validation - run: | - # Extended health checks for production - echo "๐Ÿ” Running production health checks..." - - # Test main URL - curl -f https://2048.wa.darknex.us/ || exit 1 - - # Test health endpoint - curl -f https://2048.wa.darknex.us/health || exit 1 - - # Check response times - RESPONSE_TIME=$(curl -o /dev/null -s -w '%{time_total}' https://2048.wa.darknex.us/) - echo "Response time: ${RESPONSE_TIME}s" - - # Fail if response time > 3 seconds - if (( $(echo "$RESPONSE_TIME > 3.0" | bc -l) )); then - echo "โŒ Response time too slow: ${RESPONSE_TIME}s" + if [ "$HTTP_CODE" -ge 200 ] && [ "$HTTP_CODE" -lt 300 ]; then + echo "โœ… Webhook deployment triggered successfully!" + else + echo "โŒ Webhook deployment failed with code: $HTTP_CODE" exit 1 fi + + - name: Wait for blue-green deployment phases + run: | + echo "โณ Waiting for blue-green deployment phases..." + echo "Phase 1: Initial deployment (10% traffic) - 2 minutes" + sleep 120 - echo "โœ… All production health checks passed!" + echo "Phase 2: Intermediate traffic split (50%) - 2 minutes" + sleep 120 + + echo "Phase 3: Full traffic switch (100%) - 1 minute" + sleep 60 + + echo "โœ… Blue-green deployment phases completed" + + - name: Production health check + run: | + echo "๐Ÿฅ Performing comprehensive production health check..." + MAX_RETRIES=10 + RETRY_COUNT=0 + + # Get the canonical Knative domain for health check + HEALTH_URL="https://game-2048-prod.game-2048-prod.${{ secrets.KNATIVE_DOMAIN }}" + + while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do + echo "Attempt $((RETRY_COUNT + 1))/$MAX_RETRIES - Checking: $HEALTH_URL" + + # Check if service responds + if curl -f -s --max-time 10 "$HEALTH_URL" > /dev/null; then + echo "โœ… Basic health check passed!" + + # Additional production validations + echo "๐Ÿ” Running extended production validations..." + + # Check response time + RESPONSE_TIME=$(curl -o /dev/null -s -w '%{time_total}' "$HEALTH_URL") + echo "Response time: ${RESPONSE_TIME}s" + + # Check if response contains expected content + if curl -s --max-time 10 "$HEALTH_URL" | grep -q "2048"; then + echo "โœ… Content validation passed!" + echo "๐ŸŒ Production application is live at: $HEALTH_URL" + exit 0 + else + echo "โš ๏ธ Content validation failed, retrying..." + fi + else + echo "โš ๏ธ Health check failed, retrying in 20 seconds..." + sleep 20 + RETRY_COUNT=$((RETRY_COUNT + 1)) + fi + done + + echo "โŒ Production health check failed after $MAX_RETRIES attempts" + echo "The deployment webhook was sent successfully, but the service is not responding correctly" + echo "Please check your cluster logs and consider rolling back" + exit 1 + + - name: Production deployment summary + if: always() + run: | + echo "## ๐Ÿš€ Production Deployment Summary" >> $GITHUB_STEP_SUMMARY + echo "- **Environment:** Production" >> $GITHUB_STEP_SUMMARY + echo "- **Image:** \`${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}\`" >> $GITHUB_STEP_SUMMARY + echo "- **Deployment Method:** Webhook-based Blue-Green" >> $GITHUB_STEP_SUMMARY + echo "- **Strategy:** 10% โ†’ 50% โ†’ 100% traffic split" >> $GITHUB_STEP_SUMMARY + echo "- **Triggered by:** ${{ github.actor }}" >> $GITHUB_STEP_SUMMARY + echo "- **Commit:** ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY + + if [ "${{ github.event_name }}" = "workflow_run" ]; then + echo "- **Type:** Auto-promotion from Staging" >> $GITHUB_STEP_SUMMARY + else + echo "- **Type:** Manual deployment with confirmation" >> $GITHUB_STEP_SUMMARY + fi + + if [ "${{ job.status }}" = "success" ]; then + echo "- **Status:** โœ… **LIVE IN PRODUCTION**" >> $GITHUB_STEP_SUMMARY + echo "- **URL:** https://game-2048-prod.game-2048-prod.${{ secrets.KNATIVE_DOMAIN }}" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### ๐ŸŽ‰ Production is Live!" >> $GITHUB_STEP_SUMMARY + echo "- ๐ŸŽฎ [Play the game](https://game-2048-prod.game-2048-prod.${{ secrets.KNATIVE_DOMAIN }})" >> $GITHUB_STEP_SUMMARY + echo "- ๐Ÿงช [Run smoke tests](https://github.com/${{ github.repository }}/actions/workflows/smoke-test.yml)" >> $GITHUB_STEP_SUMMARY + else + echo "- **Status:** โŒ Failed" >> $GITHUB_STEP_SUMMARY + echo "" >> $GITHUB_STEP_SUMMARY + echo "### โš ๏ธ Production Deployment Failed" >> $GITHUB_STEP_SUMMARY + echo "Please check the logs and consider manual intervention or rollback." >> $GITHUB_STEP_SUMMARY + fi diff --git a/.github/workflows/deploy-staging.yml b/.github/workflows/deploy-staging.yml index c54dd4b..aaf0544 100644 --- a/.github/workflows/deploy-staging.yml +++ b/.github/workflows/deploy-staging.yml @@ -1,116 +1,139 @@ name: Deploy to Staging on: - push: - branches: [ staging ] workflow_dispatch: inputs: image_tag: description: 'Image tag to deploy (default: latest)' required: false default: 'latest' + workflow_run: + workflows: ["Deploy to Development"] + types: + - completed + branches: [ main, master ] env: REGISTRY: ghcr.io - IMAGE_NAME: ghndrx/k8s-game-2048 + IMAGE_NAME: ${{ github.repository }} jobs: deploy-staging: name: Deploy to Staging runs-on: ubuntu-latest environment: staging + if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }} steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Set up kubectl - uses: azure/setup-kubectl@v3 - with: - version: 'latest' - - - name: Configure kubectl - run: | - mkdir -p ~/.kube - echo "${{ secrets.KUBECONFIG }}" | base64 -d > ~/.kube/config - chmod 600 ~/.kube/config - - name: Set image tag run: | - IMAGE_TAG="${{ github.event.inputs.image_tag || 'latest' }}" + if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then + IMAGE_TAG="${{ github.event.inputs.image_tag || 'latest' }}" + else + # For auto-promotion, use the latest successful build + IMAGE_TAG="main-$(echo "${{ github.sha }}" | cut -c1-7)" + fi echo "IMAGE_TAG=$IMAGE_TAG" >> $GITHUB_ENV echo "Deploying image: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:$IMAGE_TAG" - - name: Deploy to staging + - name: Deploy to staging via webhook run: | - echo "๐Ÿš€ Deploying to staging environment..." + echo "๐Ÿš€ Triggering webhook deployment to staging..." - # Apply namespace - kubectl apply -f manifests/staging/namespace.yml + # Prepare deployment payload + PAYLOAD=$(cat </dev/null; then - echo "๐Ÿ” Copying GHCR secret to staging namespace..." - kubectl get secret ghcr-secret -o yaml | \ - sed 's/namespace: default/namespace: game-2048-staging/' | \ - sed '/resourceVersion:/d' | \ - sed '/uid:/d' | \ - sed '/creationTimestamp:/d' | \ - kubectl apply -f - + # Generate HMAC signature for webhook security + SIGNATURE=$(echo -n "$PAYLOAD" | openssl dgst -sha256 -hmac "${{ secrets.WEBHOOK_SECRET }}" -binary | base64) + + # Send webhook + HTTP_CODE=$(curl -s -o /tmp/webhook_response.json -w "%{http_code}" \ + -X POST \ + -H "Content-Type: application/json" \ + -H "X-Signature-SHA256: sha256=$SIGNATURE" \ + -H "X-GitHub-Event: deployment" \ + -H "X-GitHub-Delivery: ${{ github.run_id }}" \ + -d "$PAYLOAD" \ + "${{ secrets.STAGING_WEBHOOK_URL }}") + + echo "Webhook response code: $HTTP_CODE" + cat /tmp/webhook_response.json || echo "No response body" + + if [ "$HTTP_CODE" -ge 200 ] && [ "$HTTP_CODE" -lt 300 ]; then + echo "โœ… Webhook deployment triggered successfully!" + else + echo "โŒ Webhook deployment failed with code: $HTTP_CODE" + exit 1 fi - - # Apply the Knative service manifest first - kubectl apply -f manifests/staging/service.yml - - # Update image in service - kubectl patch ksvc game-2048-staging -n game-2048-staging --type merge -p '{"spec":{"template":{"spec":{"containers":[{"image":"${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}","imagePullPolicy":"Always"}]}}}}' - - echo "โณ Waiting for deployment to be ready..." - kubectl wait --for=condition=Ready ksvc/game-2048-staging -n game-2048-staging --timeout=300s || echo "โš ๏ธ Service may still be starting" - - name: Verify deployment + - name: Wait for deployment to complete run: | - echo "๐Ÿ“Š Deployment status:" - kubectl get ksvc -n game-2048-staging - - echo "" - echo "โœ… Staging deployment completed!" - echo "๐ŸŒ Available at: https://2048-staging.wa.darknex.us" + echo "โณ Waiting for deployment to stabilize..." + sleep 45 - - name: Run smoke test + - name: Health check run: | - echo "๐Ÿงช Running smoke test..." - sleep 30 + echo "๐Ÿฅ Performing health check..." + MAX_RETRIES=10 + RETRY_COUNT=0 - for i in {1..5}; do - echo "Attempt $i/5..." - # Test canonical domain first - if curl -s --max-time 30 https://game-2048-staging.game-2048-staging.staging.wa.darknex.us/ | grep -q "2048"; then - echo "โœ… Canonical domain smoke test passed!" - break - # Fallback to custom domain - elif curl -s --max-time 30 https://2048-staging.wa.darknex.us/ | grep -q "2048"; then - echo "โœ… Custom domain smoke test passed!" - break - elif [ $i -eq 5 ]; then - echo "โš ๏ธ Smoke test failed after 5 attempts" - exit 1 + # Get the canonical Knative domain for health check + HEALTH_URL="https://game-2048-staging.game-2048-staging.${{ secrets.KNATIVE_DOMAIN }}" + + while [ $RETRY_COUNT -lt $MAX_RETRIES ]; do + echo "Attempt $((RETRY_COUNT + 1))/$MAX_RETRIES - Checking: $HEALTH_URL" + + if curl -f -s --max-time 10 "$HEALTH_URL" > /dev/null; then + echo "โœ… Health check passed!" + echo "๐ŸŒ Application is available at: $HEALTH_URL" + exit 0 else - echo "Retrying in 30 seconds..." - sleep 30 + echo "โš ๏ธ Health check failed, retrying in 15 seconds..." + sleep 15 + RETRY_COUNT=$((RETRY_COUNT + 1)) fi done + + echo "โŒ Health check failed after $MAX_RETRIES attempts" + echo "The deployment webhook was sent successfully, but the service is not responding" + echo "Please check your cluster logs for deployment issues" + exit 1 - - name: Create deployment summary + - name: Deployment summary + if: always() run: | echo "## ๐Ÿš€ Staging Deployment Summary" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "| Component | Status |" >> $GITHUB_STEP_SUMMARY - echo "|-----------|--------|" >> $GITHUB_STEP_SUMMARY - echo "| Namespace | โœ… Applied |" >> $GITHUB_STEP_SUMMARY - echo "| Service | โœ… Deployed |" >> $GITHUB_STEP_SUMMARY - echo "| Health Check | โœ… Passed |" >> $GITHUB_STEP_SUMMARY - echo "" >> $GITHUB_STEP_SUMMARY - echo "### ๐Ÿ”— URLs" >> $GITHUB_STEP_SUMMARY - echo "- **Canonical**: https://game-2048-staging.game-2048-staging.staging.wa.darknex.us" >> $GITHUB_STEP_SUMMARY - echo "- **Custom**: https://2048-staging.wa.darknex.us" >> $GITHUB_STEP_SUMMARY + echo "- **Environment:** Staging" >> $GITHUB_STEP_SUMMARY + echo "- **Image:** \`${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ env.IMAGE_TAG }}\`" >> $GITHUB_STEP_SUMMARY + echo "- **Deployment Method:** Webhook-based" >> $GITHUB_STEP_SUMMARY + echo "- **Triggered by:** ${{ github.actor }}" >> $GITHUB_STEP_SUMMARY + echo "- **Commit:** ${{ github.sha }}" >> $GITHUB_STEP_SUMMARY + + if [ "${{ github.event_name }}" = "workflow_run" ]; then + echo "- **Type:** Auto-promotion from Development" >> $GITHUB_STEP_SUMMARY + else + echo "- **Type:** Manual deployment" >> $GITHUB_STEP_SUMMARY + fi + + if [ "${{ job.status }}" = "success" ]; then + echo "- **Status:** โœ… Success" >> $GITHUB_STEP_SUMMARY + echo "- **URL:** https://game-2048-staging.game-2048-staging.${{ secrets.KNATIVE_DOMAIN }}" >> $GITHUB_STEP_SUMMARY + else + echo "- **Status:** โŒ Failed" >> $GITHUB_STEP_SUMMARY + fi diff --git a/docs/WEBHOOK_DEPLOYMENT.md b/docs/WEBHOOK_DEPLOYMENT.md new file mode 100644 index 0000000..54eb3d5 --- /dev/null +++ b/docs/WEBHOOK_DEPLOYMENT.md @@ -0,0 +1,249 @@ +# Webhook-Based Deployment Guide + +This guide explains how to set up the webhook-based deployment system for the k8s-game-2048 application, designed to work with k3s clusters behind NAT (no direct API access). + +## Overview + +The deployment pipeline uses secure webhooks instead of direct kubectl/SSH access, making it perfect for k3s clusters behind NAT or firewall restrictions. Each environment (dev, staging, prod) has its own webhook endpoint that receives deployment instructions and applies them locally. + +## Architecture + +``` +GitHub Actions โ†’ HTTPS Webhook โ†’ Local Webhook Handler โ†’ kubectl apply +``` + +### Deployment Flow + +1. **Development**: Triggered on push to `main`/`master` +2. **Staging**: Auto-promoted from successful dev deployment +3. **Production**: Auto-promoted from successful staging OR manual deployment with confirmation + +## Required Secrets + +Configure these secrets in your GitHub repository settings: + +### GitHub Container Registry +- `GITHUB_TOKEN` - Automatically provided by GitHub Actions + +### Webhook Endpoints +- `DEV_WEBHOOK_URL` - Your development webhook endpoint +- `STAGING_WEBHOOK_URL` - Your staging webhook endpoint +- `PROD_WEBHOOK_URL` - Your production webhook endpoint + +### Security +- `WEBHOOK_SECRET` - Shared secret for HMAC signature verification +- `KNATIVE_DOMAIN` - Your Knative cluster domain (e.g., `staging.wa.darknex.us`) + +## Webhook Handler Implementation + +You need to implement webhook handlers on your k3s cluster that: + +1. **Receive** webhook POST requests with deployment details +2. **Verify** HMAC signatures for security +3. **Pull** the specified Docker image +4. **Apply** Kubernetes manifests +5. **Return** deployment status + +### Example Webhook Payload + +```json +{ + "environment": "development", + "image": "ghcr.io/owner/repo:tag", + "namespace": "game-2048-dev", + "service_name": "game-2048-dev", + "deployment_id": "123456-1", + "commit_sha": "abc123...", + "triggered_by": "username", + "timestamp": "2024-01-01T12:00:00Z", + "auto_promotion": false, + "deployment_strategy": "rolling" // or "blue-green" for prod +} +``` + +### Security Headers + +The webhook includes these security headers: +- `X-Signature-SHA256`: HMAC-SHA256 signature of the payload +- `X-GitHub-Event`: Always "deployment" +- `X-GitHub-Delivery`: Unique delivery ID + +### Sample Webhook Handler (Python Flask) + +```python +import hashlib +import hmac +import json +import subprocess +from flask import Flask, request, jsonify + +app = Flask(__name__) +WEBHOOK_SECRET = "your-webhook-secret" + +def verify_signature(payload, signature): + expected = hmac.new( + WEBHOOK_SECRET.encode(), + payload, + hashlib.sha256 + ).hexdigest() + return hmac.compare_digest(f"sha256={expected}", signature) + +@app.route('/webhook/deploy', methods=['POST']) +def deploy(): + # Verify signature + signature = request.headers.get('X-Signature-SHA256') + if not verify_signature(request.data, signature): + return jsonify({"error": "Invalid signature"}), 401 + + data = request.json + image = data['image'] + namespace = data['namespace'] + + try: + # Pull image + subprocess.run(['docker', 'pull', image], check=True) + + # Apply manifests + subprocess.run([ + 'kubectl', 'apply', '-f', f'manifests/{data["environment"]}/' + ], check=True) + + # Update image + subprocess.run([ + 'kubectl', 'patch', 'ksvc', data['service_name'], + '-n', namespace, + '--type', 'merge', + '-p', f'{{"spec":{{"template":{{"spec":{{"containers":[{{"image":"{image}","imagePullPolicy":"Always"}}]}}}}}}}}' + ], check=True) + + return jsonify({"status": "success", "deployment_id": data['deployment_id']}) + + except subprocess.CalledProcessError as e: + return jsonify({"error": str(e)}), 500 + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=8080) +``` + +## Deployment Strategies + +### Development & Staging +- **Strategy**: Rolling update +- **Traffic**: Immediate 100% switch +- **Verification**: Health check after 30-45 seconds + +### Production +- **Strategy**: Blue-Green deployment +- **Traffic Split**: 10% โ†’ 50% โ†’ 100% over 5 minutes +- **Verification**: Extended health checks and response time validation + +## Health Checks + +All environments use canonical Knative domains for health checks: +- **Dev**: `https://game-2048-dev.game-2048-dev.{KNATIVE_DOMAIN}` +- **Staging**: `https://game-2048-staging.game-2048-staging.{KNATIVE_DOMAIN}` +- **Prod**: `https://game-2048-prod.game-2048-prod.{KNATIVE_DOMAIN}` + +## Auto-Promotion Pipeline + +``` +Push to main โ†’ Dev Deployment โ†’ Staging Deployment โ†’ Production (manual/auto) +``` + +### Triggers +- **Dev**: Automatic on code changes +- **Staging**: Automatic on successful dev deployment +- **Prod**: Automatic on successful staging deployment OR manual with confirmation + +## Manual Deployment + +### Staging +```bash +# Trigger staging deployment manually +gh workflow run deploy-staging.yml -f image_tag=v1.2.3 +``` + +### Production +```bash +# Trigger production deployment (requires confirmation) +gh workflow run deploy-prod.yml -f image_tag=v1.2.3 -f confirmation=DEPLOY +``` + +## Monitoring & Debugging + +### GitHub Actions Logs +- View deployment progress in Actions tab +- Check webhook response codes and payloads +- Monitor health check results + +### Cluster-Side Debugging +```bash +# Check webhook handler logs +kubectl logs -n webhook-system deployment/webhook-handler + +# Check service status +kubectl get ksvc -n game-2048-dev + +# Check recent deployments +kubectl get revisions -n game-2048-dev +``` + +## Security Considerations + +1. **HMAC Verification**: All webhooks are signed with SHA-256 HMAC +2. **HTTPS Only**: All webhook endpoints must use HTTPS +3. **Secret Rotation**: Regularly rotate the `WEBHOOK_SECRET` +4. **Network Security**: Consider IP allowlisting for webhook endpoints +5. **Audit Logging**: Log all deployment requests with timestamps and users + +## Troubleshooting + +### Common Issues + +#### Webhook Timeout +- **Symptom**: HTTP 408 or connection timeout +- **Solution**: Check webhook handler is running and accessible +- **Debug**: Test webhook endpoint manually with curl + +#### Signature Verification Failed +- **Symptom**: HTTP 401 from webhook +- **Solution**: Verify `WEBHOOK_SECRET` matches on both sides +- **Debug**: Check HMAC calculation in webhook handler + +#### Image Pull Errors +- **Symptom**: Deployment fails after webhook success +- **Solution**: Ensure image exists and registry credentials are configured +- **Debug**: Check `kubectl get events` in the target namespace + +#### Health Check Failures +- **Symptom**: Deployment marked as failed despite successful webhook +- **Solution**: Verify Knative domain configuration and service startup time +- **Debug**: Check service logs and Knative serving controller logs + +### Manual Recovery + +If automated deployment fails, you can deploy manually: + +```bash +# Set image and apply manifests +kubectl patch ksvc game-2048-dev -n game-2048-dev \ + --type merge \ + -p '{"spec":{"template":{"spec":{"containers":[{"image":"ghcr.io/owner/repo:tag","imagePullPolicy":"Always"}]}}}}' +``` + +## Benefits of Webhook-Based Deployment + +1. **NAT-Friendly**: Works with k3s clusters behind NAT/firewall +2. **Secure**: HMAC-signed webhooks prevent unauthorized deployments +3. **Scalable**: Can handle multiple clusters and environments +4. **Auditable**: Full deployment history in GitHub Actions +5. **Flexible**: Supports various deployment strategies +6. **Reliable**: Retry logic and health checks ensure successful deployments + +## Next Steps + +1. Implement webhook handlers for each environment +2. Configure webhook endpoints and secrets +3. Test the deployment pipeline end-to-end +4. Set up monitoring and alerting for webhook handlers +5. Document environment-specific configuration diff --git a/manifests/webhook/webhook-handler.yaml b/manifests/webhook/webhook-handler.yaml new file mode 100644 index 0000000..275e8be --- /dev/null +++ b/manifests/webhook/webhook-handler.yaml @@ -0,0 +1,170 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: webhook-system + labels: + name: webhook-system +--- +apiVersion: v1 +kind: Secret +metadata: + name: webhook-secret + namespace: webhook-system +type: Opaque +stringData: + webhook-secret: "CHANGE_ME_IN_PRODUCTION" # Replace with your actual webhook secret +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: webhook-handler-config + namespace: webhook-system +data: + MANIFESTS_PATH: "/app/manifests" +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: webhook-handler + namespace: webhook-system + labels: + app: webhook-handler +spec: + replicas: 2 # For high availability + selector: + matchLabels: + app: webhook-handler + template: + metadata: + labels: + app: webhook-handler + spec: + serviceAccountName: webhook-handler + containers: + - name: webhook-handler + image: python:3.11-slim + ports: + - containerPort: 8080 + name: http + env: + - name: WEBHOOK_SECRET + valueFrom: + secretKeyRef: + name: webhook-secret + key: webhook-secret + - name: MANIFESTS_PATH + valueFrom: + configMapKeyRef: + name: webhook-handler-config + key: MANIFESTS_PATH + command: + - /bin/bash + - -c + - | + apt-get update && apt-get install -y curl + curl -LO "https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl" + chmod +x kubectl && mv kubectl /usr/local/bin/ + curl -fsSL https://get.docker.com | sh + pip install flask + python /app/webhook-handler.py + volumeMounts: + - name: webhook-handler-script + mountPath: /app/webhook-handler.py + subPath: webhook-handler.py + - name: manifests + mountPath: /app/manifests + - name: docker-socket + mountPath: /var/run/docker.sock + - name: kubeconfig + mountPath: /root/.kube/config + subPath: config + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 5 + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "512Mi" + cpu: "500m" + volumes: + - name: webhook-handler-script + configMap: + name: webhook-handler-script + defaultMode: 0755 + - name: manifests + hostPath: + path: /home/administrator/k8s-game-2048/manifests # Update this path + type: Directory + - name: docker-socket + hostPath: + path: /var/run/docker.sock + type: Socket + - name: kubeconfig + hostPath: + path: /etc/rancher/k3s/k3s.yaml # Default k3s kubeconfig location + type: File +--- +apiVersion: v1 +kind: Service +metadata: + name: webhook-handler + namespace: webhook-system + labels: + app: webhook-handler +spec: + selector: + app: webhook-handler + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP + type: ClusterIP +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: webhook-handler + namespace: webhook-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: webhook-handler +rules: +- apiGroups: [""] + resources: ["namespaces", "secrets", "configmaps", "services"] + verbs: ["get", "list", "create", "update", "patch", "delete"] +- apiGroups: ["apps"] + resources: ["deployments", "replicasets"] + verbs: ["get", "list", "create", "update", "patch", "delete"] +- apiGroups: ["serving.knative.dev"] + resources: ["services", "revisions"] + verbs: ["get", "list", "create", "update", "patch", "delete"] +- apiGroups: [""] + resources: ["events", "pods"] + verbs: ["get", "list", "watch"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: webhook-handler +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: webhook-handler +subjects: +- kind: ServiceAccount + name: webhook-handler + namespace: webhook-system diff --git a/manifests/webhook/webhook-ingress.yaml b/manifests/webhook/webhook-ingress.yaml new file mode 100644 index 0000000..ddd3318 --- /dev/null +++ b/manifests/webhook/webhook-ingress.yaml @@ -0,0 +1,42 @@ +apiVersion: v1 +kind: Service +metadata: + name: webhook-handler-external + namespace: webhook-system + labels: + app: webhook-handler +spec: + selector: + app: webhook-handler + ports: + - name: http + port: 80 + targetPort: 8080 + protocol: TCP + type: LoadBalancer # Change to NodePort if LoadBalancer is not available +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: webhook-handler-ingress + namespace: webhook-system + annotations: + nginx.ingress.kubernetes.io/ssl-redirect: "true" + nginx.ingress.kubernetes.io/force-ssl-redirect: "true" + cert-manager.io/cluster-issuer: "letsencrypt-prod" # Adjust to your cert issuer +spec: + tls: + - hosts: + - webhook.yourdomain.com # Replace with your actual domain + secretName: webhook-tls + rules: + - host: webhook.yourdomain.com # Replace with your actual domain + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: webhook-handler + port: + number: 80 diff --git a/manifests/webhook/webhook-script-configmap.yaml b/manifests/webhook/webhook-script-configmap.yaml new file mode 100644 index 0000000..a63ebd5 --- /dev/null +++ b/manifests/webhook/webhook-script-configmap.yaml @@ -0,0 +1,288 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: webhook-handler-script + namespace: webhook-system +data: + webhook-handler.py: | + #!/usr/bin/env python3 + """ + Webhook deployment handler for k8s-game-2048 + Receives webhook requests from GitHub Actions and deploys to k3s cluster + """ + + import hashlib + import hmac + import json + import logging + import os + import subprocess + import time + from datetime import datetime + from flask import Flask, request, jsonify + + # Configuration + app = Flask(__name__) + logging.basicConfig(level=logging.INFO) + logger = logging.getLogger(__name__) + + WEBHOOK_SECRET = os.environ.get('WEBHOOK_SECRET', 'change-me-in-production') + MANIFESTS_PATH = os.environ.get('MANIFESTS_PATH', '/app/manifests') + + def verify_signature(payload, signature): + """Verify HMAC signature from GitHub webhook""" + if not signature: + return False + + expected = hmac.new( + WEBHOOK_SECRET.encode('utf-8'), + payload, + hashlib.sha256 + ).hexdigest() + + return hmac.compare_digest(f"sha256={expected}", signature) + + def run_command(cmd, **kwargs): + """Run shell command with logging""" + logger.info(f"Running command: {' '.join(cmd)}") + try: + result = subprocess.run(cmd, check=True, capture_output=True, text=True, **kwargs) + logger.info(f"Command output: {result.stdout}") + return result + except subprocess.CalledProcessError as e: + logger.error(f"Command failed: {e.stderr}") + raise + + def pull_image(image): + """Pull Docker image to ensure it's available""" + logger.info(f"Pulling image: {image}") + run_command(['docker', 'pull', image]) + + def apply_manifests(environment): + """Apply Kubernetes manifests for environment""" + manifest_dir = f"{MANIFESTS_PATH}/{environment}" + logger.info(f"Applying manifests from: {manifest_dir}") + + if not os.path.exists(manifest_dir): + raise FileNotFoundError(f"Manifest directory not found: {manifest_dir}") + + run_command(['kubectl', 'apply', '-f', manifest_dir]) + + def update_service_image(service_name, namespace, image): + """Update Knative service with new image""" + logger.info(f"Updating service {service_name} in namespace {namespace} with image {image}") + + patch = { + "spec": { + "template": { + "spec": { + "containers": [{ + "image": image, + "imagePullPolicy": "Always" + }] + } + } + } + } + + run_command([ + 'kubectl', 'patch', 'ksvc', service_name, + '-n', namespace, + '--type', 'merge', + '-p', json.dumps(patch) + ]) + + def wait_for_service_ready(service_name, namespace, timeout=300): + """Wait for Knative service to be ready""" + logger.info(f"Waiting for service {service_name} to be ready...") + + run_command([ + 'kubectl', 'wait', '--for=condition=Ready', + f'ksvc/{service_name}', + '-n', namespace, + f'--timeout={timeout}s' + ]) + + def implement_blue_green_deployment(service_name, namespace, traffic_split): + """Implement blue-green deployment with gradual traffic shifting""" + if not traffic_split: + return + + logger.info("Starting blue-green deployment...") + + # Get the latest revision + result = run_command([ + 'kubectl', 'get', 'ksvc', service_name, + '-n', namespace, + '-o', 'jsonpath={.status.latestReadyRevisionName}' + ]) + latest_revision = result.stdout.strip() + + if not latest_revision: + logger.warning("No latest revision found, skipping traffic split") + return + + # Phase 1: Initial traffic (e.g., 10%) + initial_percent = traffic_split.get('initial', 10) + logger.info(f"Phase 1: Routing {initial_percent}% traffic to new revision") + traffic_patch = { + "spec": { + "traffic": [ + {"revisionName": latest_revision, "percent": initial_percent}, + {"latestRevision": False, "percent": 100 - initial_percent} + ] + } + } + run_command([ + 'kubectl', 'patch', 'ksvc', service_name, + '-n', namespace, + '--type', 'merge', + '-p', json.dumps(traffic_patch) + ]) + time.sleep(60) # Wait 1 minute + + # Phase 2: Intermediate traffic (e.g., 50%) + intermediate_percent = traffic_split.get('intermediate', 50) + logger.info(f"Phase 2: Routing {intermediate_percent}% traffic to new revision") + traffic_patch["spec"]["traffic"] = [ + {"revisionName": latest_revision, "percent": intermediate_percent}, + {"latestRevision": False, "percent": 100 - intermediate_percent} + ] + run_command([ + 'kubectl', 'patch', 'ksvc', service_name, + '-n', namespace, + '--type', 'merge', + '-p', json.dumps(traffic_patch) + ]) + time.sleep(60) # Wait 1 minute + + # Phase 3: Full traffic (100%) + logger.info("Phase 3: Routing 100% traffic to new revision") + traffic_patch["spec"]["traffic"] = [ + {"latestRevision": True, "percent": 100} + ] + run_command([ + 'kubectl', 'patch', 'ksvc', service_name, + '-n', namespace, + '--type', 'merge', + '-p', json.dumps(traffic_patch) + ]) + + @app.route('/webhook/deploy', methods=['POST']) + def deploy(): + """Main webhook endpoint for deployments""" + try: + # Verify signature + signature = request.headers.get('X-Signature-SHA256') + if not verify_signature(request.data, signature): + logger.warning("Invalid webhook signature") + return jsonify({"error": "Invalid signature"}), 401 + + # Parse payload + data = request.json + if not data: + return jsonify({"error": "No JSON payload"}), 400 + + # Extract deployment details + environment = data.get('environment') + image = data.get('image') + namespace = data.get('namespace') + service_name = data.get('service_name') + deployment_id = data.get('deployment_id') + deployment_strategy = data.get('deployment_strategy', 'rolling') + traffic_split = data.get('traffic_split') + + # Validate required fields + required_fields = ['environment', 'image', 'namespace', 'service_name'] + missing_fields = [field for field in required_fields if not data.get(field)] + if missing_fields: + return jsonify({"error": f"Missing required fields: {missing_fields}"}), 400 + + logger.info(f"Starting deployment {deployment_id}") + logger.info(f"Environment: {environment}") + logger.info(f"Image: {image}") + logger.info(f"Namespace: {namespace}") + logger.info(f"Service: {service_name}") + logger.info(f"Strategy: {deployment_strategy}") + + # Step 1: Pull the Docker image + pull_image(image) + + # Step 2: Apply manifests + apply_manifests(environment) + + # Step 3: Update service image + update_service_image(service_name, namespace, image) + + # Step 4: Wait for service to be ready + wait_for_service_ready(service_name, namespace) + + # Step 5: Apply deployment strategy + if deployment_strategy == 'blue-green' and traffic_split: + implement_blue_green_deployment(service_name, namespace, traffic_split) + + logger.info(f"Deployment {deployment_id} completed successfully") + + return jsonify({ + "status": "success", + "deployment_id": deployment_id, + "timestamp": datetime.utcnow().isoformat(), + "environment": environment, + "image": image, + "strategy": deployment_strategy + }) + + except FileNotFoundError as e: + logger.error(f"File not found: {e}") + return jsonify({"error": str(e)}), 404 + + except subprocess.CalledProcessError as e: + logger.error(f"Command failed: {e}") + return jsonify({"error": f"Command failed: {e.stderr}"}), 500 + + except Exception as e: + logger.error(f"Unexpected error: {e}") + return jsonify({"error": str(e)}), 500 + + @app.route('/health', methods=['GET']) + def health(): + """Health check endpoint""" + return jsonify({ + "status": "healthy", + "timestamp": datetime.utcnow().isoformat(), + "version": "1.0.0" + }) + + @app.route('/status', methods=['GET']) + def status(): + """Status endpoint with cluster information""" + try: + # Get cluster info + result = run_command(['kubectl', 'cluster-info']) + cluster_info = result.stdout + + # Get webhook handler pod info + result = run_command(['kubectl', 'get', 'pods', '-n', 'webhook-system', '--selector=app=webhook-handler']) + pod_info = result.stdout + + return jsonify({ + "status": "operational", + "timestamp": datetime.utcnow().isoformat(), + "cluster_info": cluster_info, + "pod_info": pod_info + }) + except Exception as e: + return jsonify({ + "status": "error", + "timestamp": datetime.utcnow().isoformat(), + "error": str(e) + }) + + if __name__ == '__main__': + # Verify environment + logger.info("Starting webhook deployment handler...") + logger.info(f"Webhook secret configured: {'Yes' if WEBHOOK_SECRET != 'change-me-in-production' else 'No (using default)'}") + logger.info(f"Manifests path: {MANIFESTS_PATH}") + + # Start the Flask app + app.run(host='0.0.0.0', port=8080, debug=False) diff --git a/scripts/setup-webhook-deployment.sh b/scripts/setup-webhook-deployment.sh new file mode 100755 index 0000000..d3393a3 --- /dev/null +++ b/scripts/setup-webhook-deployment.sh @@ -0,0 +1,125 @@ +#!/bin/bash +set -e + +# Webhook-based Deployment Setup Script for k8s-game-2048 +echo "๐Ÿš€ Setting up webhook-based deployment for k8s-game-2048..." + +# Configuration +WEBHOOK_SECRET="${WEBHOOK_SECRET:-$(openssl rand -hex 32)}" +MANIFESTS_PATH="${MANIFESTS_PATH:-/home/administrator/k8s-game-2048/manifests}" +WEBHOOK_DOMAIN="${WEBHOOK_DOMAIN:-webhook.$(hostname -f)}" + +echo "๐Ÿ“‹ Configuration:" +echo " Webhook Secret: ${WEBHOOK_SECRET:0:8}..." +echo " Manifests Path: $MANIFESTS_PATH" +echo " Webhook Domain: $WEBHOOK_DOMAIN" + +# Step 1: Create webhook system namespace +echo "" +echo "๐Ÿ“ฆ Creating webhook system namespace..." +kubectl create namespace webhook-system --dry-run=client -o yaml | kubectl apply -f - + +# Step 2: Create webhook secret +echo "๐Ÿ” Creating webhook secret..." +kubectl create secret generic webhook-secret \ + --from-literal=webhook-secret="$WEBHOOK_SECRET" \ + -n webhook-system \ + --dry-run=client -o yaml | kubectl apply -f - + +# Step 3: Update webhook handler manifests with correct paths +echo "๐Ÿ”ง Updating webhook handler manifests..." +sed -i "s|/home/administrator/k8s-game-2048/manifests|$MANIFESTS_PATH|g" manifests/webhook/webhook-handler.yaml +sed -i "s|webhook.yourdomain.com|$WEBHOOK_DOMAIN|g" manifests/webhook/webhook-ingress.yaml + +# Step 4: Deploy webhook handler script ConfigMap +echo "๐Ÿ“œ Deploying webhook handler script..." +kubectl apply -f manifests/webhook/webhook-script-configmap.yaml + +# Step 5: Deploy webhook handler +echo "๐Ÿค– Deploying webhook handler..." +kubectl apply -f manifests/webhook/webhook-handler.yaml + +# Step 6: Deploy ingress (optional) +if [ "$DEPLOY_INGRESS" = "true" ]; then + echo "๐ŸŒ Deploying webhook ingress..." + kubectl apply -f manifests/webhook/webhook-ingress.yaml +else + echo "โญ๏ธ Skipping ingress deployment (set DEPLOY_INGRESS=true to enable)" +fi + +# Step 7: Wait for deployment to be ready +echo "โณ Waiting for webhook handler to be ready..." +kubectl wait --for=condition=available deployment/webhook-handler -n webhook-system --timeout=300s + +# Step 8: Get service information +echo "" +echo "๐Ÿ“Š Webhook handler status:" +kubectl get pods -n webhook-system -l app=webhook-handler + +echo "" +echo "๐ŸŒ Service endpoints:" +kubectl get svc -n webhook-system + +# Step 9: Test webhook handler +echo "" +echo "๐Ÿงช Testing webhook handler..." +WEBHOOK_POD=$(kubectl get pods -n webhook-system -l app=webhook-handler -o jsonpath='{.items[0].metadata.name}') +if [ -n "$WEBHOOK_POD" ]; then + echo "Testing health endpoint..." + kubectl port-forward -n webhook-system pod/$WEBHOOK_POD 8080:8080 & + KUBECTL_PID=$! + sleep 5 + + if curl -s http://localhost:8080/health | grep -q "healthy"; then + echo "โœ… Webhook handler health check passed!" + else + echo "โš ๏ธ Webhook handler health check failed" + fi + + kill $KUBECTL_PID 2>/dev/null || true +fi + +# Step 10: Display setup information +echo "" +echo "๐ŸŽ‰ Webhook-based deployment setup completed!" +echo "" +echo "๐Ÿ“ Next steps:" +echo "1. Configure GitHub repository secrets:" +echo " - WEBHOOK_SECRET: $WEBHOOK_SECRET" +echo " - DEV_WEBHOOK_URL: https://$WEBHOOK_DOMAIN/webhook/deploy" +echo " - STAGING_WEBHOOK_URL: https://$WEBHOOK_DOMAIN/webhook/deploy" +echo " - PROD_WEBHOOK_URL: https://$WEBHOOK_DOMAIN/webhook/deploy" +echo " - KNATIVE_DOMAIN: your-knative-domain.com" +echo "" +echo "2. Expose webhook handler externally:" +if [ "$DEPLOY_INGRESS" != "true" ]; then + echo " # Option A: Use port-forward for testing" + echo " kubectl port-forward -n webhook-system svc/webhook-handler-external 8080:80" + echo "" + echo " # Option B: Get LoadBalancer IP (if available)" + echo " kubectl get svc webhook-handler-external -n webhook-system" + echo "" + echo " # Option C: Deploy ingress with your domain" + echo " DEPLOY_INGRESS=true WEBHOOK_DOMAIN=your-domain.com ./scripts/setup-webhook-deployment.sh" +fi +echo "" +echo "3. Test webhook endpoint:" +echo " curl -X POST https://$WEBHOOK_DOMAIN/webhook/deploy \\" +echo " -H 'Content-Type: application/json' \\" +echo " -H 'X-Signature-SHA256: sha256=SIGNATURE' \\" +echo " -d '{\"environment\":\"dev\",\"image\":\"nginx:latest\",\"namespace\":\"default\",\"service_name\":\"test\"}'" +echo "" +echo "4. Push code changes to trigger automated deployment!" + +# Output webhook secret for GitHub configuration +echo "" +echo "๐Ÿ”‘ GitHub Secrets Configuration:" +echo "===============================|" +echo "SECRET NAME | SECRET VALUE" +echo "===============================|" +echo "WEBHOOK_SECRET | $WEBHOOK_SECRET" +echo "DEV_WEBHOOK_URL | https://$WEBHOOK_DOMAIN/webhook/deploy" +echo "STAGING_WEBHOOK_URL | https://$WEBHOOK_DOMAIN/webhook/deploy" +echo "PROD_WEBHOOK_URL | https://$WEBHOOK_DOMAIN/webhook/deploy" +echo "KNATIVE_DOMAIN | your-knative-domain.com" +echo "===============================|" diff --git a/scripts/webhook-handler.py b/scripts/webhook-handler.py new file mode 100644 index 0000000..dce8543 --- /dev/null +++ b/scripts/webhook-handler.py @@ -0,0 +1,281 @@ +#!/usr/bin/env python3 +""" +Webhook deployment handler for k8s-game-2048 +Receives webhook requests from GitHub Actions and deploys to k3s cluster +""" + +import hashlib +import hmac +import json +import logging +import os +import subprocess +import time +from datetime import datetime +from flask import Flask, request, jsonify + +# Configuration +app = Flask(__name__) +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +WEBHOOK_SECRET = os.environ.get('WEBHOOK_SECRET', 'change-me-in-production') +MANIFESTS_PATH = os.environ.get('MANIFESTS_PATH', '/app/manifests') + +def verify_signature(payload, signature): + """Verify HMAC signature from GitHub webhook""" + if not signature: + return False + + expected = hmac.new( + WEBHOOK_SECRET.encode('utf-8'), + payload, + hashlib.sha256 + ).hexdigest() + + return hmac.compare_digest(f"sha256={expected}", signature) + +def run_command(cmd, **kwargs): + """Run shell command with logging""" + logger.info(f"Running command: {' '.join(cmd)}") + try: + result = subprocess.run(cmd, check=True, capture_output=True, text=True, **kwargs) + logger.info(f"Command output: {result.stdout}") + return result + except subprocess.CalledProcessError as e: + logger.error(f"Command failed: {e.stderr}") + raise + +def pull_image(image): + """Pull Docker image to ensure it's available""" + logger.info(f"Pulling image: {image}") + run_command(['docker', 'pull', image]) + +def apply_manifests(environment): + """Apply Kubernetes manifests for environment""" + manifest_dir = f"{MANIFESTS_PATH}/{environment}" + logger.info(f"Applying manifests from: {manifest_dir}") + + if not os.path.exists(manifest_dir): + raise FileNotFoundError(f"Manifest directory not found: {manifest_dir}") + + run_command(['kubectl', 'apply', '-f', manifest_dir]) + +def update_service_image(service_name, namespace, image): + """Update Knative service with new image""" + logger.info(f"Updating service {service_name} in namespace {namespace} with image {image}") + + patch = { + "spec": { + "template": { + "spec": { + "containers": [{ + "image": image, + "imagePullPolicy": "Always" + }] + } + } + } + } + + run_command([ + 'kubectl', 'patch', 'ksvc', service_name, + '-n', namespace, + '--type', 'merge', + '-p', json.dumps(patch) + ]) + +def wait_for_service_ready(service_name, namespace, timeout=300): + """Wait for Knative service to be ready""" + logger.info(f"Waiting for service {service_name} to be ready...") + + run_command([ + 'kubectl', 'wait', '--for=condition=Ready', + f'ksvc/{service_name}', + '-n', namespace, + f'--timeout={timeout}s' + ]) + +def implement_blue_green_deployment(service_name, namespace, traffic_split): + """Implement blue-green deployment with gradual traffic shifting""" + if not traffic_split: + return + + logger.info("Starting blue-green deployment...") + + # Get the latest revision + result = run_command([ + 'kubectl', 'get', 'ksvc', service_name, + '-n', namespace, + '-o', 'jsonpath={.status.latestReadyRevisionName}' + ]) + latest_revision = result.stdout.strip() + + if not latest_revision: + logger.warning("No latest revision found, skipping traffic split") + return + + # Phase 1: Initial traffic (e.g., 10%) + initial_percent = traffic_split.get('initial', 10) + logger.info(f"Phase 1: Routing {initial_percent}% traffic to new revision") + traffic_patch = { + "spec": { + "traffic": [ + {"revisionName": latest_revision, "percent": initial_percent}, + {"latestRevision": False, "percent": 100 - initial_percent} + ] + } + } + run_command([ + 'kubectl', 'patch', 'ksvc', service_name, + '-n', namespace, + '--type', 'merge', + '-p', json.dumps(traffic_patch) + ]) + time.sleep(60) # Wait 1 minute + + # Phase 2: Intermediate traffic (e.g., 50%) + intermediate_percent = traffic_split.get('intermediate', 50) + logger.info(f"Phase 2: Routing {intermediate_percent}% traffic to new revision") + traffic_patch["spec"]["traffic"] = [ + {"revisionName": latest_revision, "percent": intermediate_percent}, + {"latestRevision": False, "percent": 100 - intermediate_percent} + ] + run_command([ + 'kubectl', 'patch', 'ksvc', service_name, + '-n', namespace, + '--type', 'merge', + '-p', json.dumps(traffic_patch) + ]) + time.sleep(60) # Wait 1 minute + + # Phase 3: Full traffic (100%) + logger.info("Phase 3: Routing 100% traffic to new revision") + traffic_patch["spec"]["traffic"] = [ + {"latestRevision": True, "percent": 100} + ] + run_command([ + 'kubectl', 'patch', 'ksvc', service_name, + '-n', namespace, + '--type', 'merge', + '-p', json.dumps(traffic_patch) + ]) + +@app.route('/webhook/deploy', methods=['POST']) +def deploy(): + """Main webhook endpoint for deployments""" + try: + # Verify signature + signature = request.headers.get('X-Signature-SHA256') + if not verify_signature(request.data, signature): + logger.warning("Invalid webhook signature") + return jsonify({"error": "Invalid signature"}), 401 + + # Parse payload + data = request.json + if not data: + return jsonify({"error": "No JSON payload"}), 400 + + # Extract deployment details + environment = data.get('environment') + image = data.get('image') + namespace = data.get('namespace') + service_name = data.get('service_name') + deployment_id = data.get('deployment_id') + deployment_strategy = data.get('deployment_strategy', 'rolling') + traffic_split = data.get('traffic_split') + + # Validate required fields + required_fields = ['environment', 'image', 'namespace', 'service_name'] + missing_fields = [field for field in required_fields if not data.get(field)] + if missing_fields: + return jsonify({"error": f"Missing required fields: {missing_fields}"}), 400 + + logger.info(f"Starting deployment {deployment_id}") + logger.info(f"Environment: {environment}") + logger.info(f"Image: {image}") + logger.info(f"Namespace: {namespace}") + logger.info(f"Service: {service_name}") + logger.info(f"Strategy: {deployment_strategy}") + + # Step 1: Pull the Docker image + pull_image(image) + + # Step 2: Apply manifests + apply_manifests(environment) + + # Step 3: Update service image + update_service_image(service_name, namespace, image) + + # Step 4: Wait for service to be ready + wait_for_service_ready(service_name, namespace) + + # Step 5: Apply deployment strategy + if deployment_strategy == 'blue-green' and traffic_split: + implement_blue_green_deployment(service_name, namespace, traffic_split) + + logger.info(f"Deployment {deployment_id} completed successfully") + + return jsonify({ + "status": "success", + "deployment_id": deployment_id, + "timestamp": datetime.utcnow().isoformat(), + "environment": environment, + "image": image, + "strategy": deployment_strategy + }) + + except FileNotFoundError as e: + logger.error(f"File not found: {e}") + return jsonify({"error": str(e)}), 404 + + except subprocess.CalledProcessError as e: + logger.error(f"Command failed: {e}") + return jsonify({"error": f"Command failed: {e.stderr}"}), 500 + + except Exception as e: + logger.error(f"Unexpected error: {e}") + return jsonify({"error": str(e)}), 500 + +@app.route('/health', methods=['GET']) +def health(): + """Health check endpoint""" + return jsonify({ + "status": "healthy", + "timestamp": datetime.utcnow().isoformat(), + "version": "1.0.0" + }) + +@app.route('/status', methods=['GET']) +def status(): + """Status endpoint with cluster information""" + try: + # Get cluster info + result = run_command(['kubectl', 'cluster-info']) + cluster_info = result.stdout + + # Get webhook handler pod info + result = run_command(['kubectl', 'get', 'pods', '-n', 'webhook-system', '--selector=app=webhook-handler']) + pod_info = result.stdout + + return jsonify({ + "status": "operational", + "timestamp": datetime.utcnow().isoformat(), + "cluster_info": cluster_info, + "pod_info": pod_info + }) + except Exception as e: + return jsonify({ + "status": "error", + "timestamp": datetime.utcnow().isoformat(), + "error": str(e) + }) + +if __name__ == '__main__': + # Verify environment + logger.info("Starting webhook deployment handler...") + logger.info(f"Webhook secret configured: {'Yes' if WEBHOOK_SECRET != 'change-me-in-production' else 'No (using default)'}") + logger.info(f"Manifests path: {MANIFESTS_PATH}") + + # Start the Flask app + app.run(host='0.0.0.0', port=8080, debug=False)