#!/usr/bin/env bash # Fix thin2 Capacity Issue on r630-02 # Migrate containers from thin2 to available storage pools set -euo pipefail # Load IP configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" REPORT_DIR="${PROJECT_ROOT}/reports/status" TIMESTAMP=$(date +%Y%m%d_%H%M%S) THIN2_LOG="${REPORT_DIR}/fix_thin2_${TIMESTAMP}.log" # Colors RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' MAGENTA='\033[0;35m' NC='\033[0m' log_info() { echo -e "${BLUE}[INFO]${NC} $1" | tee -a "$THIN2_LOG"; } log_success() { echo -e "${GREEN}[✓]${NC} $1" | tee -a "$THIN2_LOG"; } log_warn() { echo -e "${YELLOW}[⚠]${NC} $1" | tee -a "$THIN2_LOG"; } log_error() { echo -e "${RED}[✗]${NC} $1" | tee -a "$THIN2_LOG"; } log_header() { echo -e "${CYAN}=== $1 ===${NC}" | tee -a "$THIN2_LOG"; } log_section() { echo -e "\n${MAGENTA}>>> $1 <<<${NC}\n" | tee -a "$THIN2_LOG"; } mkdir -p "$REPORT_DIR" # Proxmox node configuration NODE="r630-02" NODE_IP="${PROXMOX_HOST_R630_02}" NODE_PASS="password" ssh_node() { if command -v sshpass >/dev/null 2>&1; then sshpass -p "$NODE_PASS" ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$NODE_IP" "$@" else ssh -o StrictHostKeyChecking=no -o ConnectTimeout=10 root@"$NODE_IP" "$@" fi } check_node() { if ping -c 1 -W 5 "$NODE_IP" >/dev/null 2>&1; then return 0 else return 1 fi } # Check thin2 current status check_thin2_status() { log_section "Checking thin2 Storage Status" local status=$(ssh_node "pvesm status 2>/dev/null | grep '^thin2'" || echo "") log_info "thin2 Status:" echo "$status" | tee -a "$THIN2_LOG" echo "" | tee -a "$THIN2_LOG" # Get detailed status local detailed=$(ssh_node "pvesh get /nodes/$NODE/storage/thin2/status 2>/dev/null" || echo "") log_info "Detailed thin2 Status:" echo "$detailed" | tee -a "$THIN2_LOG" echo "" | tee -a "$THIN2_LOG" # Find containers using thin2 log_info "Finding containers using thin2..." local containers=$(ssh_node bash <<'ENDSSH' echo "=== Containers on thin2 ===" for vmid in $(pct list 2>/dev/null | tail -n +2 | awk '{print $1}'); do rootfs=$(pct config $vmid 2>/dev/null | grep "^rootfs:" | grep "thin2" || true) if [ -n "$rootfs" ]; then name=$(pct config $vmid 2>/dev/null | grep "^hostname:" | cut -d: -f2 | xargs || echo "CT-$vmid") status=$(pct status $vmid 2>/dev/null | awk '{print $2}') size=$(echo "$rootfs" | grep -oP 'size=\K[^,]+' || echo "unknown") echo "$vmid|$name|$status|$size" fi done ENDSSH ) echo "$containers" | tee -a "$THIN2_LOG" echo "" | tee -a "$THIN2_LOG" } # Migrate container storage (same node, different storage) migrate_container_storage() { local vmid=$1 local name=$2 local target_storage=$3 log_info "=========================================" log_info "Migrating CT $vmid ($name)" log_info "From: thin2 -> To: $target_storage" log_info "=========================================" # Check if container exists local status=$(ssh_node "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "not_found") if [ "$status" = "not_found" ]; then log_error "Container $vmid not found" return 1 fi log_info "Container status: $status" # Check current storage local current_rootfs=$(ssh_node "pct config $vmid 2>/dev/null | grep '^rootfs:'" || echo "") local current_storage=$(echo "$current_rootfs" | grep -oP 'storage=\K[^,]+' || echo "") # Also check if it contains thin2 in the volume name local volume_name=$(echo "$current_rootfs" | grep -oP 'thin2:[^,]+' || echo "") if [ -z "$current_storage" ] && [ -z "$volume_name" ]; then log_warn "Could not determine storage for container $vmid" log_info "Rootfs config: $current_rootfs" return 1 fi # Check if on thin2 (either storage=thin2 or thin2:volume-name) if [ "$current_storage" != "thin2" ] && [ -z "$volume_name" ]; then log_warn "Container is not on thin2 (currently on: $current_storage)" log_info "Rootfs: $current_rootfs" return 0 # Not an error, just skip fi log_info "Current storage: $current_storage (or thin2 volume: $volume_name)" # Get container size local size=$(ssh_node "pct config $vmid 2>/dev/null | grep '^rootfs:' | grep -oP 'size=\K[^,]+' || echo 'unknown'") log_info "Container size: $size" # Step 1: Stop container log_info "Step 1: Stopping container..." if [ "$status" = "running" ]; then local stop_result=$(ssh_node "pct stop $vmid 2>&1" || echo "stop failed") if echo "$stop_result" | grep -q "error\|Error"; then log_error "Failed to stop container: $stop_result" return 1 fi log_success "Container stopped" sleep 3 else log_info "Container already stopped" fi # Step 2: Create backup log_info "Step 2: Creating backup..." log_warn "This may take 5-15 minutes depending on container size..." local backup_result=$(ssh_node bash </dev/null || true # Create backup to local storage vzdump $vmid \\ --storage local \\ --compress gzip \\ --mode stop \\ --remove 0 2>&1 ENDSSH ) if echo "$backup_result" | grep -q "error\|Error\|ERROR\|failed\|Failed"; then log_error "Backup failed: $backup_result" return 1 fi log_success "Backup completed" # Find backup file local backup_file=$(ssh_node "ls -t /var/lib/vz/dump/vzdump-lxc-$vmid-*.tar.gz 2>/dev/null | head -1" || echo "") if [ -z "$backup_file" ]; then log_error "Could not find backup file" return 1 fi local backup_name=$(basename "$backup_file") log_info "Backup file: $backup_name" # Step 3: Destroy container (required before restore) log_info "Step 3: Destroying container (required for restore)..." local destroy_result=$(ssh_node "pct destroy $vmid --force 2>&1" || echo "destroy failed") if echo "$destroy_result" | grep -q "error\|Error" && ! echo "$destroy_result" | grep -q "not exist"; then log_warn "Destroy warning: $destroy_result (continuing anyway)" else log_success "Container destroyed" fi sleep 3 # Step 4: Restore with new storage log_info "Step 4: Restoring container with storage $target_storage..." local restore_result=$(ssh_node bash <&1 ENDSSH ) if echo "$restore_result" | grep -q "error\|Error\|ERROR\|failed\|Failed"; then log_error "Restore failed: $restore_result" return 1 fi log_success "Container restored with storage $target_storage" # Step 5: Start container log_info "Step 5: Starting container..." local start_result=$(ssh_node "pct start $vmid 2>&1" || echo "start failed") if echo "$start_result" | grep -q "error\|Error" && ! echo "$start_result" | grep -q "already running"; then log_warn "Start warning: $start_result" else log_success "Container started" fi # Step 6: Verify log_info "Step 6: Verifying migration..." sleep 5 local verify_status=$(ssh_node "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "not_found") local verify_storage=$(ssh_node "pct config $vmid 2>/dev/null | grep '^rootfs:' | grep -o 'storage=[^,]*' | cut -d= -f2" || echo "") if [ "$verify_status" != "not_found" ] && [ "$verify_storage" = "$target_storage" ]; then log_success "Migration verified: Container on $target_storage, Status: $verify_status" # Clean up backup log_info "Cleaning up backup file..." ssh_node "rm -f /var/lib/vz/dump/$backup_name" 2>&1 || log_warn "Could not clean up backup" return 0 else log_error "Verification failed: Status=$verify_status, Storage=$verify_storage" return 1 fi } # Main execution main() { log_header "Fixing thin2 Capacity Issue on r630-02" echo "Log file: $THIN2_LOG" | tee -a "$THIN2_LOG" echo "Timestamp: $(date)" | tee -a "$THIN2_LOG" echo "" | tee -a "$THIN2_LOG" if ! check_node; then log_error "Node $NODE ($NODE_IP) is not reachable" log_info "Please check network connectivity and try again" return 1 fi log_success "Node $NODE is reachable" # Check thin2 status check_thin2_status # Get available storage log_section "Checking Available Storage" local available_storage=$(ssh_node "pvesm status 2>/dev/null | grep -E 'thin1-r630-02|thin3|thin5|thin6' | grep active" || echo "") log_info "Available storage pools:" echo "$available_storage" | tee -a "$THIN2_LOG" echo "" | tee -a "$THIN2_LOG" # Select target storage (prefer thin1-r630-02, fallback to thin5 or thin6) local target_storage=$(ssh_node "pvesm status 2>/dev/null | grep -E 'thin1-r630-02|thin5|thin6' | grep active | head -1 | awk '{print \$1}'" || echo "thin1-r630-02") if [ -z "$target_storage" ]; then target_storage="thin1-r630-02" # Default fi log_info "Target storage: $target_storage" # Find containers on thin2 log_section "Finding Containers on thin2" local thin2_containers=$(ssh_node bash <<'ENDSSH' for vmid in $(pct list 2>/dev/null | tail -n +2 | awk '{print $1}'); do rootfs=$(pct config $vmid 2>/dev/null | grep "^rootfs:" | grep "thin2" || true) if [ -n "$rootfs" ]; then name=$(pct config $vmid 2>/dev/null | grep "^hostname:" | cut -d: -f2 | xargs || echo "CT-$vmid") echo "$vmid|$name" fi done ENDSSH ) if [ -z "$thin2_containers" ]; then log_warn "No containers found on thin2" check_thin2_status return 0 fi log_info "Containers on thin2:" echo "$thin2_containers" | while IFS='|' read -r vmid name; do log_info " CT $vmid ($name)" done echo "" | tee -a "$THIN2_LOG" # Migrate each container log_section "Migrating Containers from thin2" local success_count=0 local fail_count=0 echo "$thin2_containers" | while IFS='|' read -r vmid name; do if [ -n "$vmid" ] && [ -n "$name" ]; then if migrate_container_storage "$vmid" "$name" "$target_storage"; then ((success_count++)) log_info "Waiting 5 seconds before next migration..." sleep 5 else ((fail_count++)) log_error "Failed to migrate CT $vmid" fi fi done # Final status check log_section "Final thin2 Status Check" local final_status=$(ssh_node "pvesm status 2>/dev/null | grep '^thin2'" || echo "") log_info "thin2 Status After Migration:" echo "$final_status" | tee -a "$THIN2_LOG" echo "" | tee -a "$THIN2_LOG" local final_detailed=$(ssh_node "pvesh get /nodes/$NODE/storage/thin2/status 2>/dev/null" || echo "") log_info "Detailed thin2 Status:" echo "$final_detailed" | tee -a "$THIN2_LOG" echo "" | tee -a "$THIN2_LOG" # Verify no containers on thin2 local remaining=$(ssh_node bash <<'ENDSSH' count=0 for vmid in $(pct list 2>/dev/null | tail -n +2 | awk '{print $1}'); do rootfs=$(pct config $vmid 2>/dev/null | grep "^rootfs:" | grep "thin2" || true) if [ -n "$rootfs" ]; then echo "$vmid" ((count++)) fi done echo "$count" ENDSSH ) local remaining_count=$(echo "$remaining" | tail -1) if [ "$remaining_count" = "0" ] || [ -z "$remaining_count" ]; then log_success "No containers remaining on thin2" else log_warn "$remaining_count container(s) still on thin2" echo "$remaining" | head -n -1 | while read -r vmid; do log_warn " CT $vmid still on thin2" done fi log_header "thin2 Capacity Fix Complete" log_info "Full log saved to: $THIN2_LOG" if [ "$remaining_count" = "0" ] || [ -z "$remaining_count" ]; then log_success "thin2 capacity issue has been resolved!" else log_warn "Some containers may still be on thin2 - please review" fi } main "$@"