#!/usr/bin/env bash # Execute Immediate Actions from Hardware/Storage Investigation # 1. Address thin2 (r630-02) capacity issue # 2. Activate inactive storage pools # 3. Identify and migrate CPU-intensive workloads set -euo pipefail # Load IP configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" REPORT_DIR="${PROJECT_ROOT}/reports/status" TIMESTAMP=$(date +%Y%m%d_%H%M%S) ACTION_LOG="${REPORT_DIR}/immediate_actions_${TIMESTAMP}.log" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' MAGENTA='\033[0;35m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1" | tee -a "$ACTION_LOG"; } log_success() { echo -e "${GREEN}[✓]${NC} $1" | tee -a "$ACTION_LOG"; } log_warn() { echo -e "${YELLOW}[⚠]${NC} $1" | tee -a "$ACTION_LOG"; } log_error() { echo -e "${RED}[✗]${NC} $1" | tee -a "$ACTION_LOG"; } log_header() { echo -e "${CYAN}=== $1 ===${NC}" | tee -a "$ACTION_LOG"; } log_section() { echo -e "\n${MAGENTA}>>> $1 <<<${NC}\n" | tee -a "$ACTION_LOG"; } # Create report directory mkdir -p "$REPORT_DIR" # Proxmox nodes configuration declare -A NODES NODES[ml110]="${PROXMOX_HOST_ML110:-192.168.11.10}:L@kers2010" NODES[r630-01]="${PROXMOX_HOST_R630_01:-192.168.11.11}:password" NODES[r630-02]="${PROXMOX_HOST_R630_02:-192.168.11.12}:password" # SSH helper function ssh_node() { local hostname="$1" shift local ip="${NODES[$hostname]%%:*}" local password="${NODES[$hostname]#*:}" if command -v sshpass >/dev/null 2>&1; then sshpass -p "$password" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$ip" "$@" else ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$ip" "$@" fi } # Check node connectivity check_node() { local hostname="$1" local ip="${NODES[$hostname]%%:*}" if ping -c 1 -W 2 "$ip" >/dev/null 2>&1; then return 0 else return 1 fi } # Action 1: Investigate thin2 (r630-02) capacity issue investigate_thin2_capacity() { log_section "Action 1: Investigating thin2 (r630-02) Capacity Issue" local hostname="r630-02" local ip="${NODES[$hostname]%%:*}" if ! check_node "$hostname"; then log_error "$hostname is not reachable" return 1 fi log_info "Checking thin2 storage usage and VMs using it..." # Get detailed storage info local storage_info=$(ssh_node "$hostname" "pvesh get /nodes/$hostname/storage/thin2/status 2>/dev/null" || echo "") log_info "thin2 Storage Status:" echo "$storage_info" | tee -a "$ACTION_LOG" echo "" | tee -a "$ACTION_LOG" # Find VMs/containers using thin2 log_info "Finding VMs/containers using thin2 storage..." local vms_using_thin2=$(ssh_node "$hostname" bash <<'ENDSSH' echo "=== QEMU VMs on thin2 ===" for vmid in $(qm list 2>/dev/null | tail -n +2 | awk '{print $1}'); do storage=$(qm config $vmid 2>/dev/null | grep -E "^(scsi|virtio|ide|sata)[0-9]+:" | grep -o "storage=[^,]*" | cut -d= -f2 | head -1) if [ "$storage" = "thin2" ]; then name=$(qm config $vmid 2>/dev/null | grep "^name:" | cut -d: -f2 | xargs || echo "VM-$vmid") status=$(qm status $vmid 2>/dev/null | awk '{print $2}') echo "VMID: $vmid | Name: $name | Status: $status" fi done echo "" echo "=== LXC Containers on thin2 ===" for vmid in $(pct list 2>/dev/null | tail -n +2 | awk '{print $1}'); do storage=$(pct config $vmid 2>/dev/null | grep -E "^rootfs:" | grep -o "storage=[^,]*" | cut -d= -f2 | head -1) if [ "$storage" = "thin2" ]; then name=$(pct config $vmid 2>/dev/null | grep "^hostname:" | cut -d: -f2 | xargs || echo "CT-$vmid") status=$(pct status $vmid 2>/dev/null | awk '{print $2}') echo "VMID: $vmid | Name: $name | Status: $status" fi done ENDSSH ) echo "$vms_using_thin2" | tee -a "$ACTION_LOG" echo "" | tee -a "$ACTION_LOG" # Check available space in other thin pools log_info "Checking available space in other storage pools on r630-02..." local other_storage=$(ssh_node "$hostname" "pvesm status 2>/dev/null | grep -E 'thin[1-6]|thin1-r630-02' | grep -v thin2" || echo "") log_info "Available storage pools:" echo "$other_storage" | tee -a "$ACTION_LOG" echo "" | tee -a "$ACTION_LOG" # Get snapshots on thin2 log_info "Checking for snapshots on thin2..." local snapshots=$(ssh_node "$hostname" bash <<'ENDSSH' echo "=== Snapshots on thin2 ===" for vmid in $(qm list 2>/dev/null | tail -n +2 | awk '{print $1}'); do storage=$(qm config $vmid 2>/dev/null | grep -E "^(scsi|virtio|ide|sata)[0-9]+:" | grep -o "storage=[^,]*" | cut -d= -f2 | head -1) if [ "$storage" = "thin2" ]; then snap_list=$(qm listsnapshot $vmid 2>/dev/null | tail -n +2 || echo "") if [ -n "$snap_list" ]; then echo "VM $vmid snapshots:" echo "$snap_list" | head -10 fi fi done for vmid in $(pct list 2>/dev/null | tail -n +2 | awk '{print $1}'); do storage=$(pct config $vmid 2>/dev/null | grep -E "^rootfs:" | grep -o "storage=[^,]*" | cut -d= -f2 | head -1) if [ "$storage" = "thin2" ]; then snap_list=$(pct listsnapshot $vmid 2>/dev/null | tail -n +2 || echo "") if [ -n "$snap_list" ]; then echo "CT $vmid snapshots:" echo "$snap_list" | head -10 fi fi done ENDSSH ) echo "$snapshots" | tee -a "$ACTION_LOG" echo "" | tee -a "$ACTION_LOG" log_success "thin2 investigation complete. Review output above for cleanup/migration opportunities." } # Action 2: Activate inactive storage pools activate_inactive_storage() { log_section "Action 2: Activating Inactive Storage Pools" # Activate thin1 on ml110 log_info "Checking thin1 on ml110..." local hostname="ml110" if check_node "$hostname"; then local thin1_status=$(ssh_node "$hostname" "pvesh get /nodes/$hostname/storage/thin1/status 2>/dev/null" || echo "") log_info "thin1 (ml110) current status:" echo "$thin1_status" | tee -a "$ACTION_LOG" # Check if it's a node restriction issue local storage_config=$(ssh_node "$hostname" "pvesh get /nodes/$hostname/storage/thin1 2>/dev/null | grep -E 'nodes|content' || echo ''" || echo "") log_info "Storage configuration:" echo "$storage_config" | tee -a "$ACTION_LOG" # Try to activate by checking LVM log_info "Checking underlying LVM volume group..." local vg_info=$(ssh_node "$hostname" "vgs 2>/dev/null | grep -E 'thin|VG' || echo 'No VGs found'" || echo "") echo "$vg_info" | tee -a "$ACTION_LOG" log_warn "thin1 on ml110 appears to be configured but inactive. May need manual investigation." else log_error "ml110 is not reachable" fi echo "" | tee -a "$ACTION_LOG" # Activate data and thin1 on r630-02 log_info "Checking data and thin1 on r630-02..." hostname="r630-02" if check_node "$hostname"; then for storage in "data" "thin1"; do log_info "Checking $storage on $hostname..." local storage_status=$(ssh_node "$hostname" "pvesh get /nodes/$hostname/storage/$storage/status 2>/dev/null" || echo "") log_info "$storage ($hostname) current status:" echo "$storage_status" | tee -a "$ACTION_LOG" # Check LVM status log_info "Checking underlying LVM for $storage..." local lv_info=$(ssh_node "$hostname" "lvs 2>/dev/null | grep -E '$storage|LV' || echo 'No LVs found'" || echo "") echo "$lv_info" | tee -a "$ACTION_LOG" # Check if storage is enabled but inactive local enabled=$(ssh_node "$hostname" "pvesh get /nodes/$hostname/storage/$storage 2>/dev/null | grep -oP 'enable.*?:\s*\K[^,}]+' | head -1" || echo "") if [ "$enabled" = "1" ] || [ "$enabled" = "true" ]; then log_info "$storage is enabled but inactive. Checking if we can activate..." # Try to activate by checking if volume group exists local vg_name=$(ssh_node "$hostname" "pvesh get /nodes/$hostname/storage/$storage 2>/dev/null | grep -oP 'vgname.*?:\s*\K[^,}]+' | head -1" || echo "") if [ -n "$vg_name" ]; then log_info "Volume group for $storage: $vg_name" local vg_exists=$(ssh_node "$hostname" "vgs $vg_name 2>/dev/null | grep -c $vg_name || echo '0'" || echo "0") if [ "$vg_exists" = "0" ]; then log_warn "Volume group $vg_name does not exist. Storage cannot be activated." else log_info "Volume group exists. Storage should be activatable." fi fi fi echo "" | tee -a "$ACTION_LOG" done else log_error "r630-02 is not reachable" fi log_success "Storage activation check complete." } # Action 3: Identify CPU-intensive workloads on ml110 identify_cpu_intensive_workloads() { log_section "Action 3: Identifying CPU-Intensive Workloads on ml110" local hostname="ml110" if ! check_node "$hostname"; then log_error "$hostname is not reachable" return 1 fi log_info "Getting CPU usage for all VMs and containers on ml110..." local cpu_usage=$(ssh_node "$hostname" bash <<'ENDSSH' echo "=== QEMU VMs CPU Usage ===" for vmid in $(qm list 2>/dev/null | tail -n +2 | awk '{print $1}'); do if qm status $vmid 2>/dev/null | grep -q "status: running"; then name=$(qm config $vmid 2>/dev/null | grep "^name:" | cut -d: -f2 | xargs || echo "VM-$vmid") cpu_percent=$(qm status $vmid 2>/dev/null | grep -oP 'cpu.*?:\s*\K[0-9.]+' || echo "0") cpus=$(qm config $vmid 2>/dev/null | grep "^cores:" | cut -d: -f2 | xargs || echo "1") mem=$(qm config $vmid 2>/dev/null | grep "^memory:" | cut -d: -f2 | xargs || echo "0") echo "VMID: $vmid | Name: $name | CPUs: $cpus | CPU%: $cpu_percent | Memory: $mem" fi done echo "" echo "=== LXC Containers CPU Usage ===" for vmid in $(pct list 2>/dev/null | tail -n +2 | awk '{print $1}'); do if pct status $vmid 2>/dev/null | grep -q "status: running"; then name=$(pct config $vmid 2>/dev/null | grep "^hostname:" | cut -d: -f2 | xargs || echo "CT-$vmid") # Get CPU usage from inside container if possible cpu_info=$(pct exec $vmid -- top -bn1 2>/dev/null | grep "Cpu(s)" | awk '{print $2}' | sed 's/%us,//' || echo "N/A") cpus=$(pct config $vmid 2>/dev/null | grep "^cores:" | cut -d: -f2 | xargs || echo "1") memory=$(pct config $vmid 2>/dev/null | grep "^memory:" | cut -d: -f2 | xargs || echo "0") echo "VMID: $vmid | Name: $name | CPUs: $cpus | CPU Info: $cpu_info | Memory: $memory" fi done echo "" echo "=== Top CPU Consumers (Host Level) ===" top -bn1 | head -20 ENDSSH ) echo "$cpu_usage" | tee -a "$ACTION_LOG" echo "" | tee -a "$ACTION_LOG" # Get detailed VM/container list with resource allocation log_info "Getting detailed resource allocation..." local resource_allocation=$(ssh_node "$hostname" bash <<'ENDSSH' echo "=== Resource Allocation Summary ===" echo "" echo "QEMU VMs:" qm list 2>/dev/null | head -1 qm list 2>/dev/null | tail -n +2 | awk '{printf "%-6s %-30s %-10s %-10s\n", $1, $2, $3, $4}' echo "" echo "LXC Containers:" pct list 2>/dev/null | head -1 pct list 2>/dev/null | tail -n +2 | awk '{printf "%-6s %-30s %-10s %-10s\n", $1, $2, $3, $4}' ENDSSH ) echo "$resource_allocation" | tee -a "$ACTION_LOG" echo "" | tee -a "$ACTION_LOG" log_success "CPU-intensive workload identification complete." } # Action 4: Check migration readiness check_migration_readiness() { log_section "Action 4: Checking Migration Readiness" log_info "Checking available resources on target nodes..." for hostname in "r630-01" "r630-02"; do if check_node "$hostname"; then log_info "Resources on $hostname:" local resources=$(ssh_node "$hostname" bash <<'ENDSSH' echo "CPU Cores: $(nproc)" echo "CPU Usage: $(top -bn1 | grep 'Cpu(s)' | awk '{print $2}' | sed 's/%us,//')" echo "Memory Total: $(free -h | grep Mem | awk '{print $2}')" echo "Memory Used: $(free -h | grep Mem | awk '{print $3}')" echo "Memory Available: $(free -h | grep Mem | awk '{print $7}')" echo "Memory Usage: $(free | grep Mem | awk '{printf "%.1f%%", $3/$2 * 100.0}')" echo "" echo "Storage Available:" pvesm status 2>/dev/null | grep -E "active" | awk '{printf " %-20s %8s available\n", $1, $5}' echo "" echo "Current VMs/Containers:" echo " QEMU VMs: $(qm list 2>/dev/null | tail -n +2 | wc -l)" echo " LXC Containers: $(pct list 2>/dev/null | tail -n +2 | wc -l)" ENDSSH ) echo "$resources" | tee -a "$ACTION_LOG" echo "" | tee -a "$ACTION_LOG" else log_error "$hostname is not reachable" fi done log_success "Migration readiness check complete." } # Main execution main() { log_header "Executing Immediate Actions from Hardware/Storage Investigation" echo "Log file: $ACTION_LOG" | tee -a "$ACTION_LOG" echo "Timestamp: $(date)" | tee -a "$ACTION_LOG" echo "" | tee -a "$ACTION_LOG" # Execute all actions investigate_thin2_capacity activate_inactive_storage identify_cpu_intensive_workloads check_migration_readiness log_header "Immediate Actions Execution Complete" log_info "Full log saved to: $ACTION_LOG" log_info "Review the log for detailed information and next steps." echo "" log_success "All immediate actions have been executed!" } # Run main function main "$@"