Files
proxmox/scripts/archive/consolidated/fix/fix-all-containers-complete.sh
defiQUG fbda1b4beb
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
docs: Ledger Live integration, contract deploy learnings, NEXT_STEPS updates
- ADD_CHAIN138_TO_LEDGER_LIVE: Ledger form done; public code review repo bis-innovations/LedgerLive; init/push commands
- CONTRACT_DEPLOYMENT_RUNBOOK: Chain 138 gas price 1 gwei, 36-addr check, TransactionMirror workaround
- CONTRACT_*: AddressMapper, MirrorManager deployed 2026-02-12; 36-address on-chain check
- NEXT_STEPS_FOR_YOU: Ledger done; steps completable now (no LAN); run-completable-tasks-from-anywhere
- MASTER_INDEX, OPERATOR_OPTIONAL, SMART_CONTRACTS_INVENTORY_SIMPLE: updates
- LEDGER_BLOCKCHAIN_INTEGRATION_COMPLETE: bis-innovations/LedgerLive reference

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-12 15:46:57 -08:00

200 lines
7.2 KiB
Bash
Executable File

#!/usr/bin/env bash
# Complete fix for all container issues - handles disk numbers, missing volumes, and hooks
# Usage: ./scripts/fix-all-containers-complete.sh
set -euo pipefail
# Load IP configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
NODE_IP="${PROXMOX_HOST_R630_01}"
NODE_NAME=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${NODE_IP} "hostname" || echo "r630-01")
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
echo ""
log_info "Fixing all containers on $NODE_NAME ($NODE_IP)..."
echo ""
# All containers
ALL_CONTAINERS=(3000 3001 3002 3003 3500 3501 5200 6000 6400 10000 10001 10020 10030 10040 10050 10060 10070 10080 10090 10091 10092 10100 10101 10120 10130 10150 10151 10200 10201 10202 10210 10230 10232)
FIXED=0
FAILED=0
SKIPPED=0
# Function to fix disk number
fix_disk_number() {
local vmid=$1
local wrong_disk=$2
local correct_disk=$3
rootfs=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${NODE_IP} \
"pct config $vmid 2>/dev/null | grep '^rootfs:'" || echo "")
if [[ -z "$rootfs" ]]; then
return 1
fi
if echo "$rootfs" | grep -q "$wrong_disk"; then
storage_pool=$(echo "$rootfs" | sed 's/^rootfs: //' | cut -d':' -f1)
size=$(echo "$rootfs" | grep -oP 'size=\K[^,]+' || echo "")
if [[ -n "$size" ]]; then
new_rootfs="${storage_pool}:${correct_disk},size=${size}"
else
new_rootfs="${storage_pool}:${correct_disk}"
fi
ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${NODE_IP} \
"pct set $vmid -rootfs $new_rootfs" 2>&1 >/dev/null
return $?
fi
return 0
}
# Function to ensure volume exists
ensure_volume() {
local vmid=$1
local volume_name=$2
local size=$3
local storage_pool=$4
# Check if volume exists
volume_exists=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${NODE_IP} \
"lvs 2>/dev/null | grep -q \"^${volume_name}\" && echo 'exists' || echo 'missing'" || echo "error")
if [[ "$volume_exists" == "missing" ]]; then
log_info " Creating volume $volume_name ($size)..."
# Try to create via pct start (Proxmox will create it)
# Or create manually if needed
ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${NODE_IP} \
"lvcreate -V ${size} -T pve/${storage_pool} -n ${volume_name} 2>&1" >/dev/null || return 1
fi
return 0
}
# Process each container
for vmid in "${ALL_CONTAINERS[@]}"; do
log_info "Processing CT $vmid..."
# Check if already running
status=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${NODE_IP} \
"pct list 2>/dev/null | awk '\$1 == $vmid {print \$2}'" || echo "notfound")
if [[ "$status" == "running" ]]; then
log_success " ✓ Already running"
((FIXED++))
continue
fi
# Get config
rootfs=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${NODE_IP} \
"pct config $vmid 2>/dev/null | grep '^rootfs:'" || echo "")
if [[ -z "$rootfs" ]]; then
log_warn " ⚠️ Config missing, skipping..."
((SKIPPED++))
continue
fi
# Extract storage info
storage_line=$(echo "$rootfs" | sed 's/^rootfs: //')
storage_pool=$(echo "$storage_line" | cut -d':' -f1)
volume_part=$(echo "$storage_line" | cut -d':' -f2 | cut -d',' -f1)
size=$(echo "$storage_line" | grep -oP 'size=\K[^,]+' || echo "")
log_info " Storage: $storage_pool:$volume_part"
# Fix disk number mismatches
case $vmid in
3000) fix_disk_number "$vmid" "vm-3000-disk-1" "vm-3000-disk-0" && volume_part="vm-3000-disk-0" ;;
3001) fix_disk_number "$vmid" "vm-3001-disk-1" "vm-3001-disk-0" && volume_part="vm-3001-disk-0" ;;
3002) fix_disk_number "$vmid" "vm-3002-disk-2" "vm-3002-disk-0" && volume_part="vm-3002-disk-0" ;;
3003) fix_disk_number "$vmid" "vm-3003-disk-1" "vm-3003-disk-0" && volume_part="vm-3003-disk-0" ;;
3500) fix_disk_number "$vmid" "vm-3500-disk-1" "vm-3500-disk-0" && volume_part="vm-3500-disk-0" ;;
3501) fix_disk_number "$vmid" "vm-3501-disk-2" "vm-3501-disk-0" && volume_part="vm-3501-disk-0" ;;
6400) fix_disk_number "$vmid" "vm-6400-disk-1" "vm-6400-disk-0" && volume_part="vm-6400-disk-0" ;;
esac
# Ensure volume exists
if [[ -n "$size" ]] && [[ -n "$volume_part" ]]; then
# Convert size to format lvcreate expects (remove G and use g)
size_lvm=$(echo "$size" | sed 's/G$/g/')
ensure_volume "$vmid" "$volume_part" "$size_lvm" "$storage_pool" || true
fi
# Clear lock for 10232
if [[ "$vmid" == "10232" ]]; then
ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${NODE_IP} \
"rm -f /var/lock/qemu-server/lock-10232 /var/lock/qemu-server/lxc-10232 2>/dev/null" || true
sleep 1
fi
# Try to start
log_info " Attempting to start..."
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${NODE_IP} \
"pct start $vmid" 2>&1 >/dev/null; then
log_success " ✓ Started"
((FIXED++))
sleep 1
else
error=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${NODE_IP} \
"pct start $vmid 2>&1" || true)
if echo "$error" | grep -q "already running"; then
log_success " ✓ Already running"
((FIXED++))
elif echo "$error" | grep -q "hook.pre-start"; then
# Hook error - try multiple times
log_warn " ⚠️ Hook error, retrying..."
sleep 2
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${NODE_IP} \
"pct start $vmid" 2>&1 >/dev/null; then
log_success " ✓ Started on retry"
((FIXED++))
else
log_warn " ⚠️ Still failing with hook error"
((FAILED++))
fi
else
log_warn " ⚠️ Failed:"
echo "$error" | sed 's/^/ /' | head -2
((FAILED++))
fi
fi
echo ""
done
echo ""
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
log_info "SUMMARY"
log_info "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
echo ""
log_info "Fixed/Started: $FIXED"
log_info "Failed: $FAILED"
log_info "Skipped: $SKIPPED"
echo ""
# Final status
log_info "Final container status:"
ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no root@${NODE_IP} \
"pct list 2>/dev/null | grep -E '^[[:space:]]*($(IFS='|'; echo "${ALL_CONTAINERS[*]}"))[[:space:]]' | awk '{printf \" CT %s: %s\\n\", \$1, \$2}'" || true
echo ""
log_success "Complete!"