- Introduced Aggregator.sol for Chainlink-compatible oracle functionality, including round-based updates and access control. - Added OracleWithCCIP.sol to extend Aggregator with CCIP cross-chain messaging capabilities. - Created .gitmodules to include OpenZeppelin contracts as a submodule. - Developed a comprehensive deployment guide in NEXT_STEPS_COMPLETE_GUIDE.md for Phase 2 and smart contract deployment. - Implemented Vite configuration for the orchestration portal, supporting both Vue and React frameworks. - Added server-side logic for the Multi-Cloud Orchestration Portal, including API endpoints for environment management and monitoring. - Created scripts for resource import and usage validation across non-US regions. - Added tests for CCIP error handling and integration to ensure robust functionality. - Included various new files and directories for the orchestration portal and deployment scripts.
193 lines
8.2 KiB
Bash
Executable File
193 lines
8.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Comprehensive fix for deployment issues
|
|
# Deletes failed/canceled clusters and re-runs Terraform
|
|
|
|
set -e
|
|
|
|
SUBSCRIPTION_ID="fc08d829-4f14-413d-ab27-ce024425db0b"
|
|
PROJECT_ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)"
|
|
TERRAFORM_DIR="$PROJECT_ROOT/terraform/well-architected/cloud-sovereignty"
|
|
|
|
echo "╔════════════════════════════════════════════════════════════════╗"
|
|
echo "║ DEPLOYMENT FIX - COMPREHENSIVE CLEANUP & REDEPLOYMENT ║"
|
|
echo "╚════════════════════════════════════════════════════════════════╝"
|
|
echo ""
|
|
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo "Step 1: Delete Failed Clusters (7)"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo ""
|
|
|
|
FAILED_CLUSTERS=$(az aks list --subscription "$SUBSCRIPTION_ID" \
|
|
--query "[?contains(name, 'az-p-') && provisioningState == 'Failed'].{name:name, rg:resourceGroup}" -o json)
|
|
|
|
FAILED_COUNT=$(echo "$FAILED_CLUSTERS" | jq '. | length')
|
|
echo "Found $FAILED_COUNT failed clusters to delete"
|
|
echo ""
|
|
|
|
if [ "$FAILED_COUNT" -gt 0 ]; then
|
|
echo "$FAILED_CLUSTERS" | jq -r '.[] | "\(.rg)|\(.name)"' | while IFS='|' read -r rg name; do
|
|
echo "Deleting failed cluster: $name (RG: $rg)"
|
|
az aks delete --resource-group "$rg" --name "$name" --subscription "$SUBSCRIPTION_ID" --yes --no-wait 2>&1 | grep -v "^$" || true
|
|
echo " ✅ Deletion initiated"
|
|
echo ""
|
|
done
|
|
|
|
echo "Waiting for failed cluster deletions to complete..."
|
|
sleep 30
|
|
|
|
# Wait for deletions
|
|
echo "$FAILED_CLUSTERS" | jq -r '.[] | "\(.rg)|\(.name)"' | while IFS='|' read -r rg name; do
|
|
echo -n " Waiting for $name..."
|
|
while az aks show --resource-group "$rg" --name "$name" --subscription "$SUBSCRIPTION_ID" >/dev/null 2>&1; do
|
|
echo -n "."
|
|
sleep 5
|
|
done
|
|
echo " ✅ Deleted"
|
|
done
|
|
else
|
|
echo "No failed clusters to delete"
|
|
fi
|
|
|
|
echo ""
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo "Step 2: Delete Canceled Clusters (16)"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo ""
|
|
|
|
CANCELED_CLUSTERS=$(az aks list --subscription "$SUBSCRIPTION_ID" \
|
|
--query "[?contains(name, 'az-p-') && provisioningState == 'Canceled'].{name:name, rg:resourceGroup}" -o json)
|
|
|
|
CANCELED_COUNT=$(echo "$CANCELED_CLUSTERS" | jq '. | length')
|
|
echo "Found $CANCELED_COUNT canceled clusters to delete"
|
|
echo ""
|
|
|
|
if [ "$CANCELED_COUNT" -gt 0 ]; then
|
|
echo "$CANCELED_CLUSTERS" | jq -r '.[] | "\(.rg)|\(.name)"' | while IFS='|' read -r rg name; do
|
|
echo "Deleting canceled cluster: $name (RG: $rg)"
|
|
az aks delete --resource-group "$rg" --name "$name" --subscription "$SUBSCRIPTION_ID" --yes --no-wait 2>&1 | grep -v "^$" || true
|
|
echo " ✅ Deletion initiated"
|
|
echo ""
|
|
done
|
|
|
|
echo "Waiting for canceled cluster deletions to complete..."
|
|
sleep 30
|
|
|
|
# Wait for deletions (in batches)
|
|
BATCH_SIZE=5
|
|
BATCH_NUM=0
|
|
echo "$CANCELED_CLUSTERS" | jq -r '.[] | "\(.rg)|\(.name)"' | while IFS='|' read -r rg name; do
|
|
echo -n " Waiting for $name..."
|
|
while az aks show --resource-group "$rg" --name "$name" --subscription "$SUBSCRIPTION_ID" >/dev/null 2>&1; do
|
|
echo -n "."
|
|
sleep 5
|
|
done
|
|
echo " ✅ Deleted"
|
|
done
|
|
else
|
|
echo "No canceled clusters to delete"
|
|
fi
|
|
|
|
echo ""
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo "Step 3: Clean Terraform State"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo ""
|
|
|
|
cd "$TERRAFORM_DIR"
|
|
|
|
echo "Removing deleted clusters from Terraform state..."
|
|
echo ""
|
|
|
|
# Get list of all cluster resources in state
|
|
TERRAFORM_STATE_CLUSTERS=$(terraform state list 2>/dev/null | grep "azurerm_kubernetes_cluster" || true)
|
|
|
|
if [ -n "$TERRAFORM_STATE_CLUSTERS" ]; then
|
|
echo "Checking state for cluster resources..."
|
|
echo "$TERRAFORM_STATE_CLUSTERS" | while read -r resource; do
|
|
CLUSTER_NAME=$(echo "$resource" | sed 's/.*\.main\[.*\]//' || echo "$resource" | awk -F'.' '{print $NF}')
|
|
echo " Checking: $resource"
|
|
|
|
# Try to check if cluster still exists
|
|
if echo "$resource" | grep -q "azurerm_kubernetes_cluster"; then
|
|
echo " Resource in state: $resource"
|
|
fi
|
|
done
|
|
else
|
|
echo "No cluster resources found in Terraform state"
|
|
fi
|
|
|
|
echo ""
|
|
echo "Note: Terraform will automatically handle state cleanup during apply"
|
|
echo ""
|
|
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo "Step 4: Re-run Terraform Deployment"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo ""
|
|
|
|
echo "Initializing Terraform..."
|
|
terraform init -upgrade >/dev/null 2>&1 || true
|
|
|
|
echo ""
|
|
echo "Re-running Terraform deployment..."
|
|
echo "This will recreate all deleted clusters with proper configuration"
|
|
echo ""
|
|
echo "⚠️ This may take 15-30 minutes depending on region availability"
|
|
echo ""
|
|
|
|
# Run Terraform apply with maximum parallelism
|
|
terraform apply -parallelism=128 -auto-approve 2>&1 | tee /tmp/terraform-apply-fixed.log
|
|
|
|
echo ""
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo "Step 5: Verify Deployment"
|
|
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
|
echo ""
|
|
|
|
echo "Waiting 30 seconds for clusters to stabilize..."
|
|
sleep 30
|
|
|
|
echo ""
|
|
echo "Checking cluster status..."
|
|
echo ""
|
|
|
|
READY_COUNT=$(az aks list --subscription "$SUBSCRIPTION_ID" \
|
|
--query "[?contains(name, 'az-p-') && provisioningState == 'Succeeded'].name" -o tsv | wc -l)
|
|
|
|
FAILED_COUNT=$(az aks list --subscription "$SUBSCRIPTION_ID" \
|
|
--query "[?contains(name, 'az-p-') && provisioningState == 'Failed'].name" -o tsv | wc -l)
|
|
|
|
CREATING_COUNT=$(az aks list --subscription "$SUBSCRIPTION_ID" \
|
|
--query "[?contains(name, 'az-p-') && provisioningState == 'Creating'].name" -o tsv | wc -l)
|
|
|
|
echo "📊 Deployment Status:"
|
|
echo " ✅ Ready (Succeeded): $READY_COUNT"
|
|
echo " ❌ Failed: $FAILED_COUNT"
|
|
echo " ⏳ Creating: $CREATING_COUNT"
|
|
echo ""
|
|
|
|
if [ "$CREATING_COUNT" -gt 0 ]; then
|
|
echo "⚠️ Some clusters are still creating. Monitor with:"
|
|
echo " az aks list --subscription $SUBSCRIPTION_ID --query \"[?contains(name, 'az-p-')].{name:name, state:provisioningState}\" -o table"
|
|
fi
|
|
|
|
if [ "$FAILED_COUNT" -gt 0 ]; then
|
|
echo "⚠️ Some clusters failed. Check logs:"
|
|
echo " tail -100 /tmp/terraform-apply-fixed.log"
|
|
echo " ./scripts/azure/analyze-deployment-failures.sh"
|
|
fi
|
|
|
|
echo ""
|
|
echo "✅ Fix process complete!"
|
|
echo ""
|
|
echo "📝 Logs:"
|
|
echo " • Terraform: /tmp/terraform-apply-fixed.log"
|
|
echo " • This script: Check output above"
|
|
echo ""
|
|
echo "🎯 Next Steps:"
|
|
echo " 1. Monitor cluster creation: az aks list --query \"[?contains(name, 'az-p-')].{name:name, state:provisioningState}\" -o table"
|
|
echo " 2. Once ready, run: ./scripts/deployment/wait-and-run-all-next-steps.sh"
|
|
echo ""
|
|
|