#!/usr/bin/env bash # Master Stability Monitor # Orchestrates all monitoring and recovery operations set -euo pipefail # Load IP configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" # Load environment if [ -f "$PROJECT_ROOT/smom-dbis-138/.env" ]; then set +e source "$PROJECT_ROOT/smom-dbis-138/.env" 2>/dev/null || true set -e fi # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } log_success() { echo -e "${GREEN}[✓]${NC} $1"; } log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; } log_error() { echo -e "${RED}[✗]${NC} $1"; } log_section() { echo -e "\n${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; echo -e "${CYAN}$1${NC}"; echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n"; } # Configuration CHECK_INTERVAL=120 # Check every 2 minutes AUTO_FIX=true AUTO_RESTART=true run_health_check() { log_section "Running Health Check" if bash "$SCRIPT_DIR/check-validator-health.sh" 2>&1; then log_success "Health check passed" return 0 else log_error "Health check failed" return 1 fi } run_auto_fix() { log_section "Running Auto-Fix" if bash "$SCRIPT_DIR/auto-fix-validator-config.sh" 2>&1; then log_success "Auto-fix completed" return 0 else log_warn "Auto-fix had issues" return 1 fi } check_block_production() { log_section "Checking Block Production" local rpc_url="${RPC_URL_138:-http://${RPC_CORE_1}:8545}" local block1=$(cast block-number --rpc-url "$rpc_url" 2>/dev/null || echo "0") sleep 10 local block2=$(cast block-number --rpc-url "$rpc_url" 2>/dev/null || echo "0") if [ "$block1" != "$block2" ] && [ "$block2" != "0" ] && [ "$block1" != "" ]; then log_success "Block production active ($block1 → $block2)" return 0 else log_error "Block production STALLED (block: $block1)" return 1 fi } main() { log_section "Master Stability Monitor" log_info "Starting comprehensive stability monitoring..." echo "" local health_ok=true local blocks_ok=true # Run health check if ! run_health_check; then health_ok=false # Auto-fix if enabled if [ "$AUTO_FIX" = true ]; then log_warn "Attempting automatic fix..." run_auto_fix # Re-check health sleep 30 if run_health_check; then log_success "Auto-fix resolved issues" health_ok=true else log_error "Auto-fix did not resolve issues" fi fi fi # Check block production if ! check_block_production; then blocks_ok=false log_error "CRITICAL: Block production stalled" fi # Summary log_section "Monitoring Summary" if [ "$health_ok" = true ] && [ "$blocks_ok" = true ]; then log_success "All systems operational" exit 0 elif [ "$blocks_ok" = false ]; then log_error "CRITICAL: Block production issue detected" exit 2 else log_warn "Non-critical issues detected" exit 1 fi } # Run continuously if no arguments if [ "${1:-}" = "--once" ]; then main else while true; do main sleep "$CHECK_INTERVAL" done fi