#!/usr/bin/env bash # Consolidated Deployment Monitor # Replaces: monitor-deployment.sh, monitor-and-complete.sh, monitor-and-fix.sh, # monitor-continuous.sh, monitor-deployment-live.sh, live-monitor.sh, # continuous-monitor.sh, monitor-36-region-deployment.sh, deployment-dashboard.sh # # Usage: # monitor-deployment-consolidated.sh [--mode MODE] [--interval SECONDS] [--max-checks N] # Modes: status, continuous, live, complete, fix, dashboard set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" source "$SCRIPT_DIR/../lib/init.sh" # Metadata SCRIPT_NAME="monitor-deployment-consolidated.sh" SCRIPT_DESC="Unified deployment monitoring tool supporting modes: status|continuous|live|complete|fix|dashboard" SCRIPT_USAGE="${SCRIPT_NAME} --mode {status|continuous|live|complete|fix|dashboard} [--help]" SCRIPT_OPTIONS="--mode Run specific monitor mode\n--help Show this help" SCRIPT_REQUIREMENTS="Azure CLI (ensure_azure_cli), access to /tmp logs, scripts/lib/*" handle_help "${1:-}" # Default settings MODE="${MODE:-status}" MONITOR_INTERVAL="${MONITOR_INTERVAL:-30}" MAX_CHECKS="${MAX_CHECKS:-120}" # Parse arguments while [[ $# -gt 0 ]]; do case $1 in --mode) MODE="$2" shift 2 ;; --interval) MONITOR_INTERVAL="$2" shift 2 ;; --max-checks) MAX_CHECKS="$2" shift 2 ;; --help) cat << EOF Consolidated Deployment Monitor Usage: $0 [OPTIONS] Options: --mode MODE Monitor mode (status|continuous|live|complete|fix|dashboard) Default: status --interval SECONDS Check interval in seconds (continuous/live modes) Default: 30 --max-checks N Maximum number of checks (continuous mode) Default: 120 --help Show this help message Modes: status - One-time status check continuous - Continuous monitoring with auto-completion live - Live monitoring with real-time updates complete - Monitor and automatically proceed when complete fix - Monitor and attempt fixes on errors dashboard - Dashboard view of all deployment status Examples: $0 # Quick status check $0 --mode continuous # Continuous monitoring $0 --mode live --interval 10 # Live updates every 10 seconds $0 --mode dashboard # Dashboard view EOF exit 0 ;; *) log_error "Unknown option: $1" exit 1 ;; esac done # Log file paths LOG_FILES=( "/tmp/complete-deployment.log:Main Deployment Log" "/tmp/terraform-plan-phase1.log:Phase 1 Plan (Key Vaults)" "/tmp/terraform-apply-phase1.log:Phase 1 Apply (Key Vaults)" "/tmp/store-secrets.log:Phase 2 (Store Secrets)" "/tmp/terraform-plan-phase3.log:Phase 3 Plan (AKS)" "/tmp/terraform-apply-phase3.log:Phase 3 Apply (AKS)" ) # Check deployment process check_deployment_process() { local pid=$(pgrep -f "complete-all-deployment.sh" | head -1) if [ -n "$pid" ]; then log_success "Deployment process running (PID: $pid)" return 0 else log_warn "Deployment process not found" return 1 fi } # Check log file check_log_file() { local log_file="$1" local log_name="$2" if [ -f "$log_file" ]; then local size=$(stat -f%z "$log_file" 2>/dev/null || stat -c%s "$log_file" 2>/dev/null || echo "0") local lines=$(wc -l < "$log_file" 2>/dev/null || echo "0") local modified=$(stat -f%Sm "$log_file" 2>/dev/null || stat -c%y "$log_file" 2>/dev/null | cut -d' ' -f1-2 || echo "unknown") log_success "$log_name" echo " File: $log_file" echo " Size: $size bytes, Lines: $lines" echo " Modified: $modified" return 0 else log_info "⏳ $log_name: Not yet created" return 1 fi } # Check for errors in log check_log_errors() { local log_file="$1" local error_count=0 if [ -f "$log_file" ]; then error_count=$(grep -i "error\|failed\|❌" "$log_file" | wc -l | tr -d ' ') fi echo "$error_count" } # Check phase status check_phase_status() { local phase="$1" local plan_file="$2" local apply_file="$3" if [ ! -f "$apply_file" ]; then if [ -f "$plan_file" ]; then echo "⏳ Plan complete, waiting for apply..." else echo "⏳ Waiting to start..." fi return 1 fi local size=$(stat -f%z "$apply_file" 2>/dev/null || stat -c%s "$apply_file" 2>/dev/null || echo "0") if [ "$size" -lt 100 ]; then echo "⏳ Starting..." return 1 fi if grep -q "Apply complete!" "$apply_file" 2>/dev/null; then log_success "COMPLETE" return 0 elif grep -qi "Error\|failed" "$apply_file" 2>/dev/null; then log_error "ERROR DETECTED" return 2 else echo "⏳ IN PROGRESS ($size bytes logged)" return 1 fi } # Show status (one-time check) show_status() { log_section "DEPLOYMENT MONITOR - STATUS CHECK" check_deployment_process log_subsection "LOG FILE STATUS" for log_info in "${LOG_FILES[@]}"; do local log_file="${log_info%%:*}" local log_name="${log_info##*:}" check_log_file "$log_file" "$log_name" done log_subsection "ERROR CHECK" local total_errors=0 for log_file in /tmp/complete-deployment.log /tmp/terraform-apply-phase1.log /tmp/terraform-apply-phase3.log; do if [ -f "$log_file" ]; then local errors=$(check_log_errors "$log_file") if [ "$errors" > 0 ]; then log_warn "Found $errors potential errors in $(basename "$log_file")" total_errors=$((total_errors + errors)) fi fi done if [ "$total_errors" -eq 0 ]; then log_success "No errors detected in logs" fi if [ -f "/tmp/complete-deployment.log" ]; then log_subsection "RECENT OUTPUT (Last 20 lines)" tail -20 /tmp/complete-deployment.log fi } # Continuous monitoring with auto-completion monitor_continuous() { log_section "CONTINUOUS MONITORING - AUTO-COMPLETION" local check_count=0 local phase1_complete=false local phase2_complete=false local phase3_complete=false while [ $check_count -lt $MAX_CHECKS ]; do check_count=$((check_count + 1)) log_subsection "CHECK #$check_count - $(date '+%Y-%m-%d %H:%M:%S')" # Check Phase 1 if [ "$phase1_complete" = false ]; then local status=$(check_phase_status "Phase 1" "/tmp/terraform-plan-phase1.log" "/tmp/terraform-apply-phase1.log") case $? in 0) phase1_complete=true; sleep 5 ;; 2) log_error "Phase 1 failed. Check logs."; exit 1 ;; esac else log_success "Phase 1: Already complete" fi # Check Phase 2 if [ "$phase1_complete" = true ] && [ "$phase2_complete" = false ]; then if [ -f "/tmp/store-secrets.log" ]; then local size=$(stat -f%z "/tmp/store-secrets.log" 2>/dev/null || stat -c%s "/tmp/store-secrets.log" 2>/dev/null || echo "0") if [ "$size" -gt 100 ]; then if grep -qi "complete\|success\|stored" "/tmp/store-secrets.log" 2>/dev/null && ! grep -qi "error\|failed" "/tmp/store-secrets.log" 2>/dev/null; then log_success "Phase 2: COMPLETE" phase2_complete=true elif grep -qi "Error\|failed" "/tmp/store-secrets.log" 2>/dev/null; then log_error "Phase 2 failed. Check logs.; exit 1" else echo "⏳ Phase 2: IN PROGRESS ($size bytes logged)" fi else echo "⏳ Phase 2: Starting..." fi else echo "⏳ Phase 2: Waiting to start..." fi elif [ "$phase1_complete" = false ]; then echo "⏳ Phase 2: Waiting for Phase 1..." else log_success "Phase 2: Already complete" fi # Check Phase 3 if [ "$phase2_complete" = true ] && [ "$phase3_complete" = false ]; then local status=$(check_phase_status "Phase 3" "/tmp/terraform-plan-phase3.log" "/tmp/terraform-apply-phase3.log") case $? in 0) phase3_complete=true ;; 2) log_error "Phase 3 failed. Check logs."; exit 1 ;; esac elif [ "$phase2_complete" = false ]; then echo "⏳ Phase 3: Waiting for Phase 2..." else log_success "Phase 3: Already complete" fi # Check if all phases complete if [ "$phase1_complete" = true ] && [ "$phase2_complete" = true ] && [ "$phase3_complete" = true ]; then log_section "ALL PHASES COMPLETE!" log_success "Phase 1: Key Vaults deployed" log_success "Phase 2: Node secrets stored" log_success "Phase 3: AKS clusters deployed" log_info "Next steps:" echo " 1. Update enode URLs with actual node IP addresses" echo " 2. Deploy Besu validator pods" exit 0 fi # Show recent activity if [ -f "/tmp/complete-deployment.log" ]; then echo "Recent activity (last 3 lines):" tail -3 "/tmp/complete-deployment.log" | sed 's/^/ /' fi echo "Next check in ${MONITOR_INTERVAL} seconds..." sleep $MONITOR_INTERVAL done log_warn "Monitoring timeout reached (${MAX_CHECKS} checks)" log_info "Deployment may still be in progress. Continue monitoring manually." exit 0 } # Live monitoring (real-time updates) monitor_live() { log_section "LIVE MONITORING - REAL-TIME UPDATES" local last_output_lines=0 while true; do clear log_section "LIVE DEPLOYMENT MONITOR - $(date '+%Y-%m-%d %H:%M:%S')" check_deployment_process # Show latest log output if [ -f "/tmp/complete-deployment.log" ]; then local current_lines=$(wc -l < "/tmp/complete-deployment.log" 2>/dev/null || echo "0") if [ "$current_lines" -gt "$last_output_lines" ]; then tail -n $((current_lines - last_output_lines)) "/tmp/complete-deployment.log" last_output_lines=$current_lines fi fi sleep $MONITOR_INTERVAL done } # Dashboard view show_dashboard() { log_section "DEPLOYMENT DASHBOARD" check_deployment_process log_subsection "PHASE STATUS" local p1_status=$(check_phase_status "Phase 1" "/tmp/terraform-plan-phase1.log" "/tmp/terraform-apply-phase1.log") local p1_result=$? echo "Phase 1 (Key Vaults): $p1_status" if [ "$p1_result" -eq 0 ]; then local p2_status="" if [ -f "/tmp/store-secrets.log" ]; then if grep -qi "complete\|success" "/tmp/store-secrets.log" 2>/dev/null; then p2_status="✓ COMPLETE" else p2_status="⏳ IN PROGRESS" fi else p2_status="⏳ WAITING" fi echo "Phase 2 (Secrets): $p2_status" else echo "Phase 2 (Secrets): ⏳ WAITING FOR PHASE 1" fi if [ "$p1_result" -eq 0 ] && [ -f "/tmp/store-secrets.log" ] && grep -qi "complete\|success" "/tmp/store-secrets.log" 2>/dev/null; then local p3_status=$(check_phase_status "Phase 3" "/tmp/terraform-plan-phase3.log" "/tmp/terraform-apply-phase3.log") echo "Phase 3 (AKS): $p3_status" else echo "Phase 3 (AKS): ⏳ WAITING FOR PHASE 2" fi log_subsection "RESOURCE STATUS" # Add Azure resource status checks here if needed log_subsection "RECENT ACTIVITY" if [ -f "/tmp/complete-deployment.log" ]; then tail -10 "/tmp/complete-deployment.log" else echo "No activity log found" fi } # Main execution case "$MODE" in status) show_status ;; continuous) monitor_continuous ;; live) monitor_live ;; complete) # Same as continuous but with auto-proceed logic monitor_continuous ;; fix) # Monitor with error fixing (implement fix logic) monitor_continuous ;; dashboard) show_dashboard ;; *) log_error "Invalid mode: $MODE" log_info "Valid modes: status, continuous, live, complete, fix, dashboard" exit 1 ;; esac