283 lines
11 KiB
Bash
283 lines
11 KiB
Bash
|
|
#!/usr/bin/env bash
|
|||
|
|
# Check all Validator and Sentry node logs for errors
|
|||
|
|
# Validators: VMIDs 1000-1004
|
|||
|
|
# Sentries: VMIDs 1500-1503
|
|||
|
|
|
|||
|
|
set -euo pipefail
|
|||
|
|
|
|||
|
|
# Colors
|
|||
|
|
RED='\033[0;31m'
|
|||
|
|
GREEN='\033[0;32m'
|
|||
|
|
YELLOW='\033[1;33m'
|
|||
|
|
BLUE='\033[0;34m'
|
|||
|
|
NC='\033[0m' # No Color
|
|||
|
|
|
|||
|
|
# Proxmox host configuration
|
|||
|
|
PROXMOX_HOST="${PROXMOX_HOST:-192.168.11.10}"
|
|||
|
|
SSH_PASSWORD="${SSH_PASSWORD:-L@kers2010}"
|
|||
|
|
|
|||
|
|
# Node IP mappings
|
|||
|
|
declare -A NODE_IPS=(
|
|||
|
|
[1000]="192.168.11.100"
|
|||
|
|
[1001]="192.168.11.101"
|
|||
|
|
[1002]="192.168.11.102"
|
|||
|
|
[1003]="192.168.11.103"
|
|||
|
|
[1004]="192.168.11.104"
|
|||
|
|
[1500]="192.168.11.150"
|
|||
|
|
[1501]="192.168.11.151"
|
|||
|
|
[1502]="192.168.11.152"
|
|||
|
|
[1503]="192.168.11.153"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
# Node definitions
|
|||
|
|
VALIDATORS=(1000 1001 1002 1003 1004)
|
|||
|
|
SENTRIES=(1500 1501 1502 1503)
|
|||
|
|
LOG_LINES="${1:-100}"
|
|||
|
|
|
|||
|
|
# Check if sshpass is available
|
|||
|
|
if ! command -v sshpass >/dev/null 2>&1; then
|
|||
|
|
echo "⚠️ sshpass not installed. Attempting to install..."
|
|||
|
|
sudo apt-get update -qq && sudo apt-get install -y sshpass 2>/dev/null || {
|
|||
|
|
echo "❌ Cannot install sshpass automatically"
|
|||
|
|
echo "Please install manually: sudo apt-get install sshpass"
|
|||
|
|
exit 1
|
|||
|
|
}
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# Error patterns to search for
|
|||
|
|
ERROR_PATTERNS=(
|
|||
|
|
"error"
|
|||
|
|
"Error"
|
|||
|
|
"ERROR"
|
|||
|
|
"failed"
|
|||
|
|
"Failed"
|
|||
|
|
"FAILED"
|
|||
|
|
"exception"
|
|||
|
|
"Exception"
|
|||
|
|
"EXCEPTION"
|
|||
|
|
"fatal"
|
|||
|
|
"Fatal"
|
|||
|
|
"FATAL"
|
|||
|
|
"panic"
|
|||
|
|
"Panic"
|
|||
|
|
"PANIC"
|
|||
|
|
"Unable to read"
|
|||
|
|
"file not found"
|
|||
|
|
"configuration"
|
|||
|
|
"restart"
|
|||
|
|
"crash"
|
|||
|
|
"timeout"
|
|||
|
|
"Timeout"
|
|||
|
|
"connection refused"
|
|||
|
|
"Connection refused"
|
|||
|
|
)
|
|||
|
|
|
|||
|
|
echo -e "${BLUE}╔══════════════════════════════════════════════════════════════╗${NC}"
|
|||
|
|
echo -e "${BLUE}║ CHECKING ALL VALIDATOR AND SENTRY NODE LOGS ║${NC}"
|
|||
|
|
echo -e "${BLUE}╚══════════════════════════════════════════════════════════════╝${NC}"
|
|||
|
|
echo ""
|
|||
|
|
echo "Checking last $LOG_LINES lines of logs for each node"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
# Function to check logs for a node
|
|||
|
|
check_node_logs() {
|
|||
|
|
local vmid=$1
|
|||
|
|
local service_name=$2
|
|||
|
|
local node_type=$3
|
|||
|
|
|
|||
|
|
echo -e "${BLUE}═══════════════════════════════════════════════════════════════${NC}"
|
|||
|
|
echo -e "${BLUE}Checking ${node_type} VMID $vmid (service: $service_name)${NC}"
|
|||
|
|
echo -e "${BLUE}═══════════════════════════════════════════════════════════════${NC}"
|
|||
|
|
|
|||
|
|
# Get container IP
|
|||
|
|
local container_ip="${NODE_IPS[$vmid]}"
|
|||
|
|
if [ -z "$container_ip" ]; then
|
|||
|
|
echo -e "${RED}❌ VMID $vmid: IP address not found in mapping${NC}"
|
|||
|
|
echo ""
|
|||
|
|
return 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# Try to access container directly via SSH first
|
|||
|
|
local logs=""
|
|||
|
|
local service_status="unknown"
|
|||
|
|
|
|||
|
|
# Check if we can access via Proxmox host (preferred method)
|
|||
|
|
if ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=3 -i ~/.ssh/id_ed25519_proxmox "root@${PROXMOX_HOST}" "pct status $vmid 2>/dev/null" &>/dev/null; then
|
|||
|
|
# Access via Proxmox host
|
|||
|
|
local status_output
|
|||
|
|
status_output=$(ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 -i ~/.ssh/id_ed25519_proxmox "root@${PROXMOX_HOST}" \
|
|||
|
|
"pct status $vmid 2>/dev/null" || echo "")
|
|||
|
|
|
|||
|
|
if [ -z "$status_output" ]; then
|
|||
|
|
echo -e "${RED}❌ VMID $vmid: Container not found or not accessible${NC}"
|
|||
|
|
echo ""
|
|||
|
|
return 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
local status=$(echo "$status_output" | awk '{print $2}' || echo "unknown")
|
|||
|
|
if [ "$status" != "running" ]; then
|
|||
|
|
echo -e "${YELLOW}⚠️ VMID $vmid: Container is not running (status: $status)${NC}"
|
|||
|
|
echo ""
|
|||
|
|
return 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# Check service status
|
|||
|
|
service_status=$(ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 -i ~/.ssh/id_ed25519_proxmox "root@${PROXMOX_HOST}" \
|
|||
|
|
"pct exec $vmid -- systemctl is-active $service_name.service 2>/dev/null" || echo "inactive")
|
|||
|
|
|
|||
|
|
# Get recent logs
|
|||
|
|
logs=$(ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 -i ~/.ssh/id_ed25519_proxmox "root@${PROXMOX_HOST}" \
|
|||
|
|
"pct exec $vmid -- journalctl -u $service_name.service -n $LOG_LINES --no-pager 2>/dev/null" || echo "")
|
|||
|
|
else
|
|||
|
|
# Fallback: Try direct SSH to container
|
|||
|
|
echo -e "${YELLOW}⚠️ Cannot access via Proxmox host, trying direct SSH to container...${NC}"
|
|||
|
|
|
|||
|
|
# Check service status via direct SSH
|
|||
|
|
service_status=$(sshpass -p "$SSH_PASSWORD" ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 \
|
|||
|
|
"root@${container_ip}" \
|
|||
|
|
"systemctl is-active $service_name.service 2>/dev/null" || echo "inactive")
|
|||
|
|
|
|||
|
|
# Get recent logs via direct SSH
|
|||
|
|
logs=$(sshpass -p "$SSH_PASSWORD" ssh -o StrictHostKeyChecking=accept-new -o ConnectTimeout=5 \
|
|||
|
|
"root@${container_ip}" \
|
|||
|
|
"journalctl -u $service_name.service -n $LOG_LINES --no-pager 2>/dev/null" || echo "")
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
if [ "$service_status" != "active" ]; then
|
|||
|
|
echo -e "${YELLOW}⚠️ Service $service_name is not active (status: $service_status)${NC}"
|
|||
|
|
else
|
|||
|
|
echo -e "${GREEN}✅ Service $service_name is active${NC}"
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# Get recent logs
|
|||
|
|
echo ""
|
|||
|
|
echo "Recent logs (last $LOG_LINES lines):"
|
|||
|
|
echo "---"
|
|||
|
|
|
|||
|
|
if [ -z "$logs" ]; then
|
|||
|
|
echo -e "${YELLOW}⚠️ No logs found for service $service_name${NC}"
|
|||
|
|
echo ""
|
|||
|
|
return 1
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
# Display logs
|
|||
|
|
echo "$logs"
|
|||
|
|
echo "---"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
# Check for errors
|
|||
|
|
echo "Checking for errors..."
|
|||
|
|
local error_found=false
|
|||
|
|
local error_count=0
|
|||
|
|
|
|||
|
|
for pattern in "${ERROR_PATTERNS[@]}"; do
|
|||
|
|
local matches=$(echo "$logs" | grep -i "$pattern" | grep -v "restart counter" | grep -v "Scheduled restart" | grep -v "CORS Rejected" || true)
|
|||
|
|
if [ -n "$matches" ]; then
|
|||
|
|
local match_count=$(echo "$matches" | wc -l)
|
|||
|
|
error_count=$((error_count + match_count))
|
|||
|
|
if [ "$error_found" = false ]; then
|
|||
|
|
error_found=true
|
|||
|
|
echo -e "${RED}❌ ERRORS FOUND:${NC}"
|
|||
|
|
fi
|
|||
|
|
echo -e "${RED} Pattern '$pattern' found $match_count time(s):${NC}"
|
|||
|
|
echo "$matches" | head -5 | sed 's/^/ /'
|
|||
|
|
if [ "$match_count" -gt 5 ]; then
|
|||
|
|
echo -e "${YELLOW} ... and $((match_count - 5)) more occurrence(s)${NC}"
|
|||
|
|
fi
|
|||
|
|
fi
|
|||
|
|
done
|
|||
|
|
|
|||
|
|
# Check restart count
|
|||
|
|
local restart_count=$(echo "$logs" | grep -i "restart counter" | tail -1 | grep -oP 'restart counter is at \K\d+' || echo "0")
|
|||
|
|
if [ "$restart_count" != "0" ] && [ -n "$restart_count" ]; then
|
|||
|
|
if [ "$restart_count" -gt 10 ]; then
|
|||
|
|
echo -e "${RED}⚠️ High restart count: $restart_count${NC}"
|
|||
|
|
error_found=true
|
|||
|
|
elif [ "$restart_count" -gt 0 ]; then
|
|||
|
|
echo -e "${YELLOW}ℹ️ Restart count: $restart_count${NC}"
|
|||
|
|
fi
|
|||
|
|
fi
|
|||
|
|
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
if [ "$error_found" = false ]; then
|
|||
|
|
echo -e "${GREEN}✅ No errors found in recent logs${NC}"
|
|||
|
|
return 0
|
|||
|
|
else
|
|||
|
|
echo -e "${RED}❌ Total error occurrences: $error_count${NC}"
|
|||
|
|
return 1
|
|||
|
|
fi
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
# Summary tracking
|
|||
|
|
total_validators=0
|
|||
|
|
total_sentries=0
|
|||
|
|
validators_with_errors=0
|
|||
|
|
sentries_with_errors=0
|
|||
|
|
validators_checked=0
|
|||
|
|
sentries_checked=0
|
|||
|
|
|
|||
|
|
# Check all Validator nodes
|
|||
|
|
echo -e "${BLUE}╔══════════════════════════════════════════════════════════════╗${NC}"
|
|||
|
|
echo -e "${BLUE}║ VALIDATOR NODES (VMIDs 1000-1004) ║${NC}"
|
|||
|
|
echo -e "${BLUE}╚══════════════════════════════════════════════════════════════╝${NC}"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
for vmid in "${VALIDATORS[@]}"; do
|
|||
|
|
if check_node_logs "$vmid" "besu-validator" "Validator"; then
|
|||
|
|
validators_checked=$((validators_checked + 1))
|
|||
|
|
else
|
|||
|
|
validators_with_errors=$((validators_with_errors + 1))
|
|||
|
|
validators_checked=$((validators_checked + 1))
|
|||
|
|
fi
|
|||
|
|
total_validators=$((total_validators + 1))
|
|||
|
|
done
|
|||
|
|
|
|||
|
|
# Check all Sentry nodes
|
|||
|
|
echo -e "${BLUE}╔══════════════════════════════════════════════════════════════╗${NC}"
|
|||
|
|
echo -e "${BLUE}║ SENTRY NODES (VMIDs 1500-1503) ║${NC}"
|
|||
|
|
echo -e "${BLUE}╚══════════════════════════════════════════════════════════════╝${NC}"
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
for vmid in "${SENTRIES[@]}"; do
|
|||
|
|
if check_node_logs "$vmid" "besu-sentry" "Sentry"; then
|
|||
|
|
sentries_checked=$((sentries_checked + 1))
|
|||
|
|
else
|
|||
|
|
sentries_with_errors=$((sentries_with_errors + 1))
|
|||
|
|
sentries_checked=$((sentries_checked + 1))
|
|||
|
|
fi
|
|||
|
|
total_sentries=$((total_sentries + 1))
|
|||
|
|
done
|
|||
|
|
|
|||
|
|
# Final Summary
|
|||
|
|
echo -e "${BLUE}╔══════════════════════════════════════════════════════════════╗${NC}"
|
|||
|
|
echo -e "${BLUE}║ SUMMARY ║${NC}"
|
|||
|
|
echo -e "${BLUE}╚══════════════════════════════════════════════════════════════╝${NC}"
|
|||
|
|
echo ""
|
|||
|
|
echo "Validators:"
|
|||
|
|
echo " Total: $total_validators"
|
|||
|
|
echo " Checked: $validators_checked"
|
|||
|
|
if [ "$validators_with_errors" -eq 0 ]; then
|
|||
|
|
echo -e " Errors: ${GREEN}✅ None found${NC}"
|
|||
|
|
else
|
|||
|
|
echo -e " Errors: ${RED}❌ Found in $validators_with_errors node(s)${NC}"
|
|||
|
|
fi
|
|||
|
|
echo ""
|
|||
|
|
echo "Sentries:"
|
|||
|
|
echo " Total: $total_sentries"
|
|||
|
|
echo " Checked: $sentries_checked"
|
|||
|
|
if [ "$sentries_with_errors" -eq 0 ]; then
|
|||
|
|
echo -e " Errors: ${GREEN}✅ None found${NC}"
|
|||
|
|
else
|
|||
|
|
echo -e " Errors: ${RED}❌ Found in $sentries_with_errors node(s)${NC}"
|
|||
|
|
fi
|
|||
|
|
echo ""
|
|||
|
|
|
|||
|
|
if [ "$validators_with_errors" -eq 0 ] && [ "$sentries_with_errors" -eq 0 ]; then
|
|||
|
|
echo -e "${GREEN}✅ All logs checked - No current errors found!${NC}"
|
|||
|
|
exit 0
|
|||
|
|
else
|
|||
|
|
echo -e "${RED}❌ Errors found in some nodes. Review logs above.${NC}"
|
|||
|
|
exit 1
|
|||
|
|
fi
|