Files
loc_az_hci/scripts/fix/recreate-template-and-vms.sh
defiQUG c39465c2bd
Some checks failed
Test / test (push) Has been cancelled
Initial commit: loc_az_hci (smom-dbis-138 excluded via .gitignore)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-08 09:04:46 -08:00

449 lines
14 KiB
Bash
Executable File

#!/bin/bash
source ~/.bashrc
# Recreate Template VM 9000 with Proper Cloud-Init
# Then Recreate VMs 100-103 from the new template
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# Load environment variables
if [ -f "$PROJECT_ROOT/.env" ]; then
set -a
source <(grep -v '^#' "$PROJECT_ROOT/.env" | grep -v '^$' | sed 's/#.*$//' | grep '=')
set +a
fi
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() {
echo -e "${GREEN}[INFO]${NC} $1"
}
log_warn() {
echo -e "${YELLOW}[WARN]${NC} $1"
}
log_error() {
echo -e "${RED}[ERROR]${NC} $1"
}
log_step() {
echo ""
echo -e "${BLUE}========================================${NC}"
echo -e "${BLUE}$1${NC}"
echo -e "${BLUE}========================================${NC}"
echo ""
}
PVE_USERNAME="${PVE_USERNAME:-root@pam}"
PVE_PASSWORD="${PVE_ROOT_PASS:-}"
PROXMOX_URL="${PROXMOX_ML110_URL:-https://192.168.1.206:8006}"
PROXMOX_NODE="${PROXMOX_NODE:-pve}"
PROXMOX_HOST="${PROXMOX_ML110_IP:-192.168.1.206}"
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519_proxmox}"
SSH_KEY_FILE="$SSH_KEY.pub"
TEMPLATE_VMID=9000
STORAGE="${STORAGE:-local-lvm}"
# VM definitions: vmid name ip cores memory disk_size
VMS=(
"100 cloudflare-tunnel 192.168.1.188 2 2048 20"
"101 k3s-master 192.168.1.60 4 4096 40"
"102 git-server 192.168.1.121 2 2048 30"
"103 observability 192.168.1.82 2 2048 30"
)
get_api_token() {
local response=$(curl -s -k --connect-timeout 10 --max-time 15 \
-d "username=$PVE_USERNAME&password=$PVE_PASSWORD" \
"$PROXMOX_URL/api2/json/access/ticket" 2>&1)
if echo "$response" | grep -q '"data"'; then
local ticket=$(echo "$response" | grep -o '"ticket":"[^"]*' | cut -d'"' -f4)
local csrf_token=$(echo "$response" | grep -o '"CSRFPreventionToken":"[^"]*' | cut -d'"' -f4)
echo "$ticket|$csrf_token"
else
echo ""
fi
}
recreate_template() {
log_step "Step 1: Recreating Template VM 9000"
if [ ! -f "$SSH_KEY_FILE" ]; then
log_error "SSH key file not found: $SSH_KEY_FILE"
exit 1
fi
log_info "This will destroy and recreate template VM 9000"
log_warn "All VMs cloned from this template will need to be recreated"
echo ""
# Auto-confirm if running non-interactively
if [ -t 0 ]; then
read -p "Continue? (yes/no): " confirm
if [ "$confirm" != "yes" ]; then
log_info "Cancelled"
exit 0
fi
else
log_info "Non-interactive mode: auto-confirming"
fi
log_info "Connecting to Proxmox host to recreate template..."
ssh -i "$SSH_KEY" root@$PROXMOX_HOST <<'TEMPLATE_SCRIPT'
set -e
TEMPLATE_VMID=9000
STORAGE="${STORAGE:-local-lvm}"
SSH_KEY_FILE="/tmp/id_ed25519_proxmox.pub"
# Check if template exists and destroy it
if qm status $TEMPLATE_VMID &>/dev/null; then
echo "Stopping and destroying existing template VM $TEMPLATE_VMID..."
qm stop $TEMPLATE_VMID 2>/dev/null || true
sleep 5
qm destroy $TEMPLATE_VMID 2>/dev/null || true
sleep 2
fi
# Download Ubuntu 24.04 cloud image
echo "Downloading Ubuntu 24.04 cloud image..."
IMAGE_URL="https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img"
IMAGE_FILE="/tmp/ubuntu-24.04-server-cloudimg-amd64.img"
if [ ! -f "$IMAGE_FILE" ]; then
wget -q --show-progress -O "$IMAGE_FILE" "$IMAGE_URL" || {
echo "Failed to download image"
exit 1
}
fi
# Create VM
echo "Creating template VM $TEMPLATE_VMID..."
qm create $TEMPLATE_VMID \
--name ubuntu-24.04-cloud-init \
--memory 2048 \
--cores 2 \
--net0 virtio,bridge=vmbr0 \
--scsihw virtio-scsi-pci \
--scsi0 $STORAGE:0,import-from=$IMAGE_FILE,discard=on \
--ide2 $STORAGE:cloudinit \
--boot order=scsi0 \
--serial0 socket \
--vga serial0 \
--agent enabled=1 \
--ostype l26
# Resize disk to 32GB
echo "Resizing disk to 32GB..."
qm disk resize $TEMPLATE_VMID scsi0 32G
# Configure cloud-init
echo "Configuring cloud-init..."
qm set $TEMPLATE_VMID \
--ciuser ubuntu \
--cipassword "" \
--sshkeys /tmp/id_ed25519_proxmox.pub \
--ipconfig0 ip=dhcp
# Convert to template
echo "Converting to template..."
qm template $TEMPLATE_VMID
echo "✓ Template VM $TEMPLATE_VMID created successfully"
TEMPLATE_SCRIPT
# Copy SSH key to Proxmox host
log_info "Copying SSH key to Proxmox host..."
scp -i "$SSH_KEY" "$SSH_KEY_FILE" root@$PROXMOX_HOST:/tmp/id_ed25519_proxmox.pub
# Execute template creation
ssh -i "$SSH_KEY" root@$PROXMOX_HOST "STORAGE=$STORAGE bash" < <(cat <<'INLINE_SCRIPT'
set -e
TEMPLATE_VMID=9000
STORAGE="${STORAGE:-local-lvm}"
SSH_KEY_FILE="/tmp/id_ed25519_proxmox.pub"
# Check if template exists and destroy it
if qm status $TEMPLATE_VMID &>/dev/null; then
echo "Stopping and destroying existing template VM $TEMPLATE_VMID..."
qm stop $TEMPLATE_VMID 2>/dev/null || true
sleep 5
qm destroy $TEMPLATE_VMID 2>/dev/null || true
sleep 2
fi
# Download Ubuntu 24.04 cloud image
echo "Downloading Ubuntu 24.04 cloud image..."
IMAGE_URL="https://cloud-images.ubuntu.com/releases/24.04/release/ubuntu-24.04-server-cloudimg-amd64.img"
IMAGE_FILE="/tmp/ubuntu-24.04-server-cloudimg-amd64.img"
if [ ! -f "$IMAGE_FILE" ]; then
wget -q --show-progress -O "$IMAGE_FILE" "$IMAGE_URL" || {
echo "Failed to download image"
exit 1
}
fi
# Create VM
echo "Creating template VM $TEMPLATE_VMID..."
qm create $TEMPLATE_VMID \
--name ubuntu-24.04-cloud-init \
--memory 2048 \
--cores 2 \
--net0 virtio,bridge=vmbr0 \
--scsihw virtio-scsi-pci \
--scsi0 $STORAGE:0,import-from=$IMAGE_FILE,discard=on \
--ide2 $STORAGE:cloudinit \
--boot order=scsi0 \
--serial0 socket \
--vga serial0 \
--agent enabled=1 \
--ostype l26
# Resize disk to 32GB
echo "Resizing disk to 32GB..."
qm disk resize $TEMPLATE_VMID scsi0 32G
# Configure cloud-init with SSH key
echo "Configuring cloud-init..."
qm set $TEMPLATE_VMID \
--ciuser ubuntu \
--cipassword "" \
--sshkeys $SSH_KEY_FILE \
--ipconfig0 ip=dhcp
# Convert to template
echo "Converting to template..."
qm template $TEMPLATE_VMID
echo "✓ Template VM $TEMPLATE_VMID created successfully"
INLINE_SCRIPT
)
log_info "✓ Template VM 9000 recreated with proper cloud-init"
}
destroy_existing_vms() {
log_step "Step 2: Destroying Existing VMs"
local tokens=$(get_api_token)
if [ -z "$tokens" ]; then
log_error "Failed to authenticate with Proxmox"
return 1
fi
local ticket=$(echo "$tokens" | cut -d'|' -f1)
local csrf_token=$(echo "$tokens" | cut -d'|' -f2)
for vm_spec in "${VMS[@]}"; do
read -r vmid name ip cores memory disk_size <<< "$vm_spec"
log_info "Destroying VM $vmid ($name)..."
# Stop VM if running
local status=$(curl -s -k -H "Cookie: PVEAuthCookie=$ticket" \
-H "CSRFPreventionToken: $csrf_token" \
"$PROXMOX_URL/api2/json/nodes/$PROXMOX_NODE/qemu/$vmid/status/current" | \
python3 -c "import sys, json; print(json.load(sys.stdin).get('data', {}).get('status', 'stopped'))" 2>/dev/null || echo "stopped")
if [ "$status" = "running" ]; then
log_info "Stopping VM $vmid..."
curl -s -k -X POST \
-H "Cookie: PVEAuthCookie=$ticket" \
-H "CSRFPreventionToken: $csrf_token" \
"$PROXMOX_URL/api2/json/nodes/$PROXMOX_NODE/qemu/$vmid/status/stop" > /dev/null
sleep 5
fi
# Delete VM
curl -s -k -X DELETE \
-H "Cookie: PVEAuthCookie=$ticket" \
-H "CSRFPreventionToken: $csrf_token" \
"$PROXMOX_URL/api2/json/nodes/$PROXMOX_NODE/qemu/$vmid" > /dev/null
log_info "✓ VM $vmid destroyed"
done
}
create_vms_from_template() {
log_step "Step 3: Creating VMs from Template"
local tokens=$(get_api_token)
if [ -z "$tokens" ]; then
log_error "Failed to authenticate with Proxmox"
return 1
fi
local ticket=$(echo "$tokens" | cut -d'|' -f1)
local csrf_token=$(echo "$tokens" | cut -d'|' -f2)
# Read SSH key
local ssh_key_content=$(cat "$SSH_KEY_FILE")
local ssh_key_b64=$(echo "$ssh_key_content" | base64 -w 0)
for vm_spec in "${VMS[@]}"; do
read -r vmid name ip cores memory disk_size <<< "$vm_spec"
log_info "Creating VM $vmid: $name"
# Clone from template
log_info "Cloning from template $TEMPLATE_VMID..."
local clone_response=$(curl -s -k -X POST \
-H "Cookie: PVEAuthCookie=$ticket" \
-H "CSRFPreventionToken: $csrf_token" \
-d "newid=$vmid" \
-d "name=$name" \
"$PROXMOX_URL/api2/json/nodes/$PROXMOX_NODE/qemu/$TEMPLATE_VMID/clone" 2>&1)
if ! echo "$clone_response" | grep -q '"data"'; then
log_error "Failed to clone VM: $clone_response"
continue
fi
log_info "Waiting for clone to complete..."
sleep 10
# Configure VM
log_info "Configuring VM $vmid..."
# Set resources
curl -s -k -X POST \
-H "Cookie: PVEAuthCookie=$ticket" \
-H "CSRFPreventionToken: $csrf_token" \
-d "cores=$cores" \
-d "memory=$memory" \
"$PROXMOX_URL/api2/json/nodes/$PROXMOX_NODE/qemu/$vmid/config" > /dev/null
# Resize disk if needed
if [ "$disk_size" != "32" ]; then
log_info "Resizing disk to ${disk_size}G..."
ssh -i "$SSH_KEY" root@$PROXMOX_HOST "qm disk resize $vmid scsi0 ${disk_size}G" 2>/dev/null || true
fi
# Configure cloud-init with SSH keys and DHCP
log_info "Configuring cloud-init with SSH keys..."
curl -s -k -X POST \
-H "Cookie: PVEAuthCookie=$ticket" \
-H "CSRFPreventionToken: $csrf_token" \
--data-urlencode "ipconfig0=ip=dhcp" \
--data-urlencode "ciuser=ubuntu" \
--data-urlencode "sshkeys=$ssh_key_b64" \
--data-urlencode "agent=1" \
"$PROXMOX_URL/api2/json/nodes/$PROXMOX_NODE/qemu/$vmid/config" > /dev/null
# Start VM
log_info "Starting VM $vmid..."
curl -s -k -X POST \
-H "Cookie: PVEAuthCookie=$ticket" \
-H "CSRFPreventionToken: $csrf_token" \
"$PROXMOX_URL/api2/json/nodes/$PROXMOX_NODE/qemu/$vmid/status/start" > /dev/null
log_info "✓ VM $vmid created and started"
done
}
wait_and_test() {
log_step "Step 4: Waiting for VMs to Boot and Testing SSH"
log_info "Waiting 90 seconds for VMs to boot and apply cloud-init..."
sleep 90
log_info "Discovering IPs via QEMU Guest Agent..."
source "$PROJECT_ROOT/scripts/lib/proxmox_vm_helpers.sh" 2>/dev/null || {
log_warn "Helper library not found, will test SSH manually"
}
local all_ok=true
for vm_spec in "${VMS[@]}"; do
read -r vmid name ip cores memory disk_size <<< "$vm_spec"
# Try to get IP from guest agent
local discovered_ip=""
if command -v get_vm_ip_from_guest_agent &>/dev/null; then
discovered_ip=$(ssh -i "$SSH_KEY" root@$PROXMOX_HOST \
"source /home/intlc/projects/loc_az_hci/scripts/lib/proxmox_vm_helpers.sh 2>/dev/null && \
get_vm_ip_from_guest_agent $vmid 2>/dev/null || echo ''")
fi
if [[ -n "$discovered_ip" ]]; then
log_info "VM $vmid ($name): $discovered_ip"
# Test SSH
if ssh -i "$SSH_KEY" -o ConnectTimeout=5 -o StrictHostKeyChecking=no ubuntu@$discovered_ip "echo 'SSH OK'" &>/dev/null; then
log_info " ✓ SSH working!"
else
log_warn " ✗ SSH not working yet (may need more time)"
all_ok=false
fi
else
log_warn "VM $vmid ($name): IP not discovered yet"
log_info " Try checking router DHCP leases or wait a bit longer"
all_ok=false
fi
done
if [ "$all_ok" = true ]; then
log_info ""
log_info "✓ All VMs recreated successfully with SSH access!"
log_info "You can now run: ./scripts/deploy/complete-all-next-steps.sh"
else
log_warn ""
log_warn "Some VMs may need more time. Wait a few minutes and test again."
log_info "Use: ./scripts/ops/ssh-test-all.sh to test SSH access"
fi
}
main() {
log_step "Recreate Template and VMs with Proper Cloud-Init"
if [ ! -f "$SSH_KEY_FILE" ]; then
log_error "SSH key file not found: $SSH_KEY_FILE"
exit 1
fi
log_warn "This will:"
log_warn " 1. Destroy and recreate template VM 9000"
log_warn " 2. Destroy existing VMs 100-103"
log_warn " 3. Recreate VMs 100-103 from new template"
log_warn " 4. Configure all VMs with SSH keys via cloud-init"
echo ""
# Auto-confirm if running non-interactively
if [ -t 0 ]; then
read -p "Continue? (yes/no): " confirm
if [ "$confirm" != "yes" ]; then
log_info "Cancelled"
exit 0
fi
else
log_info "Non-interactive mode: auto-confirming"
fi
recreate_template
destroy_existing_vms
create_vms_from_template
wait_and_test
log_step "Summary"
log_info "✓ Template VM 9000 recreated with proper cloud-init"
log_info "✓ VMs 100-103 recreated from template"
log_info "✓ SSH keys configured via cloud-init"
log_info "✓ VMs using DHCP (no IP conflicts)"
log_info ""
log_info "Next: Test SSH access and install QEMU Guest Agent"
}
main "$@"