Files
loc_az_hci/scripts/deploy/run-all-next-steps.sh
defiQUG c39465c2bd
Some checks failed
Test / test (push) Has been cancelled
Initial commit: loc_az_hci (smom-dbis-138 excluded via .gitignore)
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-08 09:04:46 -08:00

263 lines
8.6 KiB
Bash
Executable File

#!/bin/bash
source ~/.bashrc
# Run and Complete All Next Steps
# Comprehensive script to complete all remaining deployment tasks
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# Load environment variables
if [ -f "$PROJECT_ROOT/.env" ]; then
set -a
source <(grep -v '^#' "$PROJECT_ROOT/.env" | grep -v '^$' | sed 's/#.*$//' | grep '=')
set +a
fi
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${GREEN}[INFO]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
log_step() { echo -e "\n${BLUE}=== $1 ===${NC}"; }
PROXMOX_HOST="${PROXMOX_ML110_IP:-192.168.1.206}"
SSH_KEY="${SSH_KEY:-$HOME/.ssh/id_ed25519_proxmox}"
SSH_OPTS="-i $SSH_KEY -o StrictHostKeyChecking=no"
VM_USER="${VM_USER:-ubuntu}"
# VM definitions: vmid name cores memory disk_size
VMS=(
"100 cloudflare-tunnel 2 2048 20"
"101 k3s-master 4 4096 40"
"102 git-server 2 2048 30"
)
TEMPLATE_VMID=9000
# Helper functions will be sourced on Proxmox host via SSH
# We don't source locally since qm command is not available
# Step 1: Create missing VMs from improved template
create_missing_vms() {
log_step "Step 1: Creating Missing VMs from Template 9000"
local tokens=$(get_api_token)
if [ -z "$tokens" ]; then
log_error "Failed to authenticate with Proxmox"
return 1
fi
local ticket=$(echo "$tokens" | cut -d'|' -f1)
local csrf_token=$(echo "$tokens" | cut -d'|' -f2)
local PROXMOX_URL="${PROXMOX_ML110_URL:-https://192.168.1.206:8006}"
local PROXMOX_NODE="${PROXMOX_NODE:-pve}"
# Read SSH key
local ssh_key_file="$SSH_KEY.pub"
if [ ! -f "$ssh_key_file" ]; then
log_error "SSH key file not found: $ssh_key_file"
return 1
fi
local ssh_key_content=$(cat "$ssh_key_file")
for vm_spec in "${VMS[@]}"; do
read -r vmid name cores memory disk_size <<< "$vm_spec"
# Check if VM already exists
if ssh $SSH_OPTS "root@$PROXMOX_HOST" "qm config $vmid &>/dev/null"; then
log_info "VM $vmid ($name) already exists, skipping"
continue
fi
log_info "Creating VM $vmid: $name (cores=$cores, memory=${memory}MB, disk=${disk_size}G)"
# Clone from template
local clone_response=$(curl -s -k -X POST \
-H "Cookie: PVEAuthCookie=$ticket" \
-H "CSRFPreventionToken: $csrf_token" \
-d "newid=$vmid" \
-d "name=$name" \
"$PROXMOX_URL/api2/json/nodes/$PROXMOX_NODE/qemu/$TEMPLATE_VMID/clone" 2>&1)
if ! echo "$clone_response" | grep -q '"data"'; then
log_error "Failed to clone VM: $clone_response"
continue
fi
log_info "Waiting for clone to complete..."
sleep 10
# Configure VM resources
log_info "Configuring VM resources..."
curl -s -k -X POST \
-H "Cookie: PVEAuthCookie=$ticket" \
-H "CSRFPreventionToken: $csrf_token" \
-d "cores=$cores" \
-d "memory=$memory" \
"$PROXMOX_URL/api2/json/nodes/$PROXMOX_NODE/qemu/$vmid/config" > /dev/null
# Resize disk if needed
if [ "$disk_size" != "32" ]; then
log_info "Resizing disk to ${disk_size}G..."
ssh $SSH_OPTS "root@$PROXMOX_HOST" "qm disk resize $vmid scsi0 ${disk_size}G" 2>/dev/null || true
fi
# Configure cloud-init with SSH keys and DHCP
log_info "Configuring cloud-init with SSH keys..."
curl -s -k -X POST \
-H "Cookie: PVEAuthCookie=$ticket" \
-H "CSRFPreventionToken: $csrf_token" \
--data-urlencode "ipconfig0=ip=dhcp" \
--data-urlencode "ciuser=ubuntu" \
--data-urlencode "sshkeys=${ssh_key_content}" \
"$PROXMOX_URL/api2/json/nodes/$PROXMOX_NODE/qemu/$vmid/config" > /dev/null
# Start VM
log_info "Starting VM $vmid..."
curl -s -k -X POST \
-H "Cookie: PVEAuthCookie=$ticket" \
-H "CSRFPreventionToken: $csrf_token" \
"$PROXMOX_URL/api2/json/nodes/$PROXMOX_NODE/qemu/$vmid/status/start" > /dev/null
log_info "✓ VM $vmid created and started"
done
log_info "Waiting 60 seconds for VMs to boot..."
sleep 60
}
get_api_token() {
local PROXMOX_URL="${PROXMOX_ML110_URL:-https://192.168.1.206:8006}"
local PVE_USERNAME="${PVE_USERNAME:-root@pam}"
local PVE_PASSWORD="${PVE_ROOT_PASS:-}"
local response=$(curl -s -k --connect-timeout 10 --max-time 15 \
-d "username=$PVE_USERNAME&password=$PVE_PASSWORD" \
"$PROXMOX_URL/api2/json/access/ticket" 2>&1)
if echo "$response" | grep -q '"data"'; then
local ticket=$(echo "$response" | grep -o '"ticket":"[^"]*' | cut -d'"' -f4)
local csrf_token=$(echo "$response" | grep -o '"CSRFPreventionToken":"[^"]*' | cut -d'"' -f4)
echo "$ticket|$csrf_token"
else
echo ""
fi
}
# Step 2: Verify SSH and QGA for all VMs
verify_vms() {
log_step "Step 2: Verifying VMs (SSH and QGA)"
local all_vms=("100 cloudflare-tunnel" "101 k3s-master" "102 git-server" "103 observability")
local all_ok=true
for vm_spec in "${all_vms[@]}"; do
read -r vmid name <<< "$vm_spec"
log_info "Checking VM $vmid ($name)..."
# Get IP via guest agent (running on Proxmox host)
local ip
ip=$(ssh $SSH_OPTS "root@$PROXMOX_HOST" \
"source /home/intlc/projects/loc_az_hci/scripts/lib/proxmox_vm_helpers.sh 2>/dev/null && \
get_vm_ip_from_guest_agent $vmid 2>/dev/null || echo ''" 2>/dev/null || echo "")
if [[ -z "$ip" ]]; then
log_warn " VM $vmid: Could not get IP (may still be booting)"
all_ok=false
continue
fi
log_info " IP: $ip"
# Test SSH
if ssh $SSH_OPTS -o ConnectTimeout=5 "${VM_USER}@${ip}" "echo 'SSH OK'" &>/dev/null; then
log_info " ✓ SSH working"
# Check QGA
if ssh $SSH_OPTS "${VM_USER}@${ip}" "systemctl is-active qemu-guest-agent &>/dev/null && echo 'active' || echo 'inactive'" | grep -q "active"; then
log_info " ✓ QEMU Guest Agent active"
else
log_warn " ⚠ QEMU Guest Agent not active (should be pre-installed from template)"
fi
else
log_warn " ✗ SSH not working yet"
all_ok=false
fi
done
if [ "$all_ok" = false ]; then
log_warn "Some VMs may need more time to boot. Continuing anyway..."
fi
}
# Step 3: Deploy Gitea on VM 102
deploy_gitea() {
log_step "Step 3: Deploying Gitea on VM 102"
if [ -f "$PROJECT_ROOT/scripts/deploy/deploy-gitea.sh" ]; then
"$PROJECT_ROOT/scripts/deploy/deploy-gitea.sh"
else
log_warn "Gitea deployment script not found, skipping"
fi
}
# Step 4: Deploy Observability on VM 103
deploy_observability() {
log_step "Step 4: Deploying Observability Stack on VM 103"
if [ -f "$PROJECT_ROOT/scripts/deploy/deploy-observability.sh" ]; then
"$PROJECT_ROOT/scripts/deploy/deploy-observability.sh"
else
log_warn "Observability deployment script not found, skipping"
fi
}
# Step 5: Final Status Report
final_status() {
log_step "Final Status Report"
log_info "VM Status:"
ssh $SSH_OPTS "root@$PROXMOX_HOST" "qm list | grep -E '(100|101|102|103)'"
echo ""
log_info "VM IPs (via Guest Agent):"
local all_vms=("100 cloudflare-tunnel" "101 k3s-master" "102 git-server" "103 observability")
for vm_spec in "${all_vms[@]}"; do
read -r vmid name <<< "$vm_spec"
local ip
ip=$(ssh $SSH_OPTS "root@$PROXMOX_HOST" \
"source /home/intlc/projects/loc_az_hci/scripts/lib/proxmox_vm_helpers.sh 2>/dev/null && \
get_vm_ip_from_guest_agent $vmid 2>/dev/null || echo 'N/A'")
log_info " VM $vmid ($name): $ip"
done
echo ""
log_info "Service URLs:"
log_info " Gitea: http://<VM-102-IP>:3000"
log_info " Prometheus: http://<VM-103-IP>:9090"
log_info " Grafana: http://<VM-103-IP>:3000 (admin/admin)"
echo ""
log_info "✓ All next steps completed!"
}
main() {
log_step "Running All Next Steps"
create_missing_vms
verify_vms
deploy_gitea
deploy_observability
final_status
}
main "$@"