#!/usr/bin/env bash # Diagnose Proxmox VE issues on ml110, r630-01, r630-02 # Usage: ./scripts/diagnose-proxmox-hosts.sh [ml110|r630-01|r630-02|all] set -euo pipefail # Load IP configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1"; } log_success() { echo -e "${GREEN}[✓]${NC} $1"; } log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; } log_error() { echo -e "${RED}[✗]${NC} $1"; } # Host configuration (ml110, r630-01, r630-02) declare -A HOSTS HOSTS[ml110]="${PROXMOX_HOST_ML110:-192.168.11.10}:${PROXMOX_PASS_ML110:-password}" HOSTS[r630-01]="${PROXMOX_HOST_R630_01:-192.168.11.11}:${PROXMOX_PASS_R630_01:-password}" HOSTS[r630-02]="${PROXMOX_HOST_R630_02:-192.168.11.12}:${PROXMOX_PASS_R630_02:-password}" # Determine which hosts to check TARGET="${1:-all}" diagnose_host() { local hostname="$1" local ip="${HOSTS[$hostname]%%:*}" local password="${HOSTS[$hostname]#*:}" log_info "=== Diagnosing ${hostname} (${ip}) ===" echo "" # Test connectivity log_info "1. Testing connectivity..." if ping -c 2 -W 2 "$ip" >/dev/null 2>&1; then log_success "Host is reachable" else log_error "Host is NOT reachable" echo "" return 1 fi echo "" # Test SSH log_info "2. Testing SSH access..." if sshpass -p "$password" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=5 root@"$ip" "echo 'SSH OK'" >/dev/null 2>&1; then log_success "SSH access works" else log_error "SSH access failed" echo "" return 1 fi echo "" # Run diagnostics via SSH log_info "3. Running Proxmox VE diagnostics..." echo "" sshpass -p "$password" ssh -o StrictHostKeyChecking=no root@"$ip" bash <<'ENDSSH' echo "=== System Information ===" echo "Hostname: $(hostname)" echo "Uptime: $(uptime)" echo "Proxmox Version: $(pveversion 2>/dev/null || echo 'Not installed or not in PATH')" echo "" echo "=== Proxmox Services Status ===" systemctl status pveproxy --no-pager -l | head -20 || echo "pveproxy service not found" echo "" systemctl status pvedaemon --no-pager -l | head -10 || echo "pvedaemon service not found" echo "" systemctl status pvestatd --no-pager -l | head -10 || echo "pvestatd service not found" echo "" systemctl status pve-cluster --no-pager -l | head -10 || echo "pve-cluster service not found" echo "" echo "=== Port 8006 Status ===" ss -tlnp | grep 8006 || echo "Port 8006 is NOT listening" echo "" echo "=== Proxmox Web Interface Test ===" curl -k -s -o /dev/null -w "HTTP Status: %{http_code}\n" https://localhost:8006/ || echo "Cannot connect to web interface" echo "" echo "=== Cluster Status ===" pvecm status 2>&1 || echo "Not in cluster or pvecm not available" echo "" echo "=== Node Status ===" pvesh get /nodes/$(hostname)/status 2>/dev/null | head -30 || echo "Cannot get node status" echo "" echo "=== Storage Status ===" pvesh get /nodes/$(hostname)/storage 2>/dev/null | head -20 || echo "Cannot get storage status" echo "" echo "=== Recent pveproxy Logs (last 30 lines) ===" journalctl -u pveproxy --no-pager -n 30 2>/dev/null || echo "Cannot read pveproxy logs" echo "" echo "=== Disk Space ===" df -h | grep -E "Filesystem|/dev|/var/lib/vz" echo "" echo "=== Memory Usage ===" free -h echo "" echo "=== Network Interfaces ===" ip addr show | grep -E "^[0-9]+:|inet " | head -20 echo "" echo "=== Container/VMs Count ===" pct list 2>/dev/null | wc -l || echo "pct command not available" qm list 2>/dev/null | wc -l || echo "qm command not available" echo "" ENDSSH echo "" log_info "4. Testing Proxmox Web Interface from remote..." if curl -k -s -o /dev/null -w "%{http_code}" --connect-timeout 5 "https://${ip}:8006/" | grep -q "200\|401\|302"; then log_success "Web interface is accessible on port 8006" else log_warn "Web interface may not be accessible on port 8006" fi echo "" log_success "Diagnostics complete for ${hostname}" echo "" echo "----------------------------------------" echo "" } # Run diagnostics if [[ "$TARGET" == "both" ]] || [[ "$TARGET" == "all" ]]; then diagnose_host "ml110" diagnose_host "r630-01" diagnose_host "r630-02" elif [[ "$TARGET" == "ml110" ]] || [[ "$TARGET" == "r630-01" ]] || [[ "$TARGET" == "r630-02" ]]; then diagnose_host "$TARGET" else log_error "Invalid target: $TARGET" echo "Usage: $0 [ml110|r630-01|r630-02|all]" exit 1 fi log_success "All diagnostics complete!"