Files
proxmox/scripts/maintenance/resolve-and-fix-all-via-proxmox-ssh.sh
defiQUG bea1903ac9
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
Sync all local changes: docs, config, scripts, submodule refs, verification evidence
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 15:46:06 -08:00

135 lines
6.1 KiB
Bash
Executable File

#!/usr/bin/env bash
# SSH to Proxmox host(s) and apply fixes: Dev VM (5700) IP → .59, start stopped containers, ensure DBIS services.
#
# Usage: ./scripts/maintenance/resolve-and-fix-all-via-proxmox-ssh.sh [--dry-run]
# Run from a machine that can SSH to 192.168.11.11 (and optionally 192.168.11.10).
# Env: PROXMOX_HOST_R630_01 (default 192.168.11.11), PROXMOX_HOST_ML110 (default 192.168.11.10)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
[[ -f "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" ]] && source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" 2>/dev/null || true
[[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
DRY_RUN=false
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
R630_01="${PROXMOX_HOST_R630_01:-192.168.11.11}"
R630_02="${PROXMOX_HOST_R630_02:-192.168.11.12}"
ML110="${PROXMOX_HOST_ML110:-192.168.11.10}"
DEV_VM_IP="${IP_DEV_VM:-192.168.11.59}"
GATEWAY="${NETWORK_GATEWAY:-192.168.11.1}"
log_info() { echo -e "\033[0;34m[INFO]\033[0m $1"; }
log_ok() { echo -e "\033[0;32m[✓]\033[0m $1"; }
log_warn() { echo -e "\033[0;33m[⚠]\033[0m $1"; }
log_err() { echo -e "\033[0;31m[✗]\033[0m $1"; }
run_ssh() {
local host="$1"
shift
ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new root@"$host" "$@"
}
echo ""
echo "=== Resolve and fix all via Proxmox SSH ==="
echo " r630-01: $R630_01 ml110: $ML110 dry-run=$DRY_RUN"
echo ""
# --- 1. Check SSH to r630-01 ---
if ! run_ssh "$R630_01" "echo OK" 2>/dev/null; then
log_err "Cannot SSH to $R630_01. Run from LAN with key-based auth to root@$R630_01."
exit 1
fi
log_ok "SSH to $R630_01 OK"
# --- 2. Dev VM (5700): on r630-02 (migrated 2026-02-15), set IP to .59 and reboot ---
VMID_DEV=5700
HOST_5700="$R630_01"
status_5700=$(run_ssh "$R630_01" "pct status $VMID_DEV 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")
[[ "$status_5700" == "missing" || -z "$status_5700" ]] && status_5700=$(run_ssh "$R630_02" "pct status $VMID_DEV 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing") && HOST_5700="$R630_02"
if [[ "$status_5700" == "missing" || -z "$status_5700" ]]; then
log_info "VMID $VMID_DEV not found on $R630_01 or $R630_02; skip IP change."
else
current_net=$(run_ssh "$HOST_5700" "pct config $VMID_DEV 2>/dev/null | grep -E '^net0:'" 2>/dev/null || echo "")
if echo "$current_net" | grep -q "$DEV_VM_IP"; then
log_ok "VMID $VMID_DEV already has IP $DEV_VM_IP (on $HOST_5700)"
else
if [[ "$DRY_RUN" == true ]]; then
log_info "Would run on $HOST_5700: pct stop $VMID_DEV; pct set $VMID_DEV --net0 ...; pct start $VMID_DEV"
else
log_info "Stopping VMID $VMID_DEV on $HOST_5700, setting IP to $DEV_VM_IP, then starting..."
run_ssh "$HOST_5700" "pct stop $VMID_DEV" 2>/dev/null || true
sleep 2
run_ssh "$HOST_5700" "pct set $VMID_DEV --net0 name=eth0,bridge=vmbr0,ip=$DEV_VM_IP/24,gw=$GATEWAY" || { log_err "pct set $VMID_DEV failed"; exit 1; }
if ! run_ssh "$HOST_5700" "pct start $VMID_DEV" 2>&1; then
log_err "VMID $VMID_DEV failed to start. If exit 32 (mount): on host run e2fsck -f -y /dev/pve/vm-${VMID_DEV}-disk-0 (after lvchange -ay), then lvchange -an and pct start $VMID_DEV"
else
log_ok "VMID $VMID_DEV reconfigured to $DEV_VM_IP and started (on $HOST_5700)"
fi
fi
fi
fi
# --- 3. Start stopped containers on r630-01 (RPC 2101, DBIS if present) ---
for vmid in 2101 10130 10150 10151 10100 10101 10120; do
status=$(run_ssh "$R630_01" "pct status $vmid 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")
if [[ "$status" == "missing" || -z "$status" ]]; then
continue
fi
if [[ "$status" == "running" ]]; then
log_ok "r630-01 VMID $vmid: already running"
continue
fi
if [[ "$DRY_RUN" == true ]]; then
log_info "Would start VMID $vmid on $R630_01"
continue
fi
log_info "Starting VMID $vmid on $R630_01..."
run_ssh "$R630_01" "pct start $vmid" 2>/dev/null && log_ok "VMID $vmid started" || log_warn "VMID $vmid start failed"
done
# --- 4. Ensure DBIS services (nginx/node) inside containers on r630-01 ---
for vmid in 10130 10150 10151; do
status=$(run_ssh "$R630_01" "pct status $vmid 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")
[[ "$status" != "running" ]] && continue
if [[ "$DRY_RUN" == true ]]; then
log_info "Would ensure nginx/node in VMID $vmid"
continue
fi
run_ssh "$R630_01" "pct exec $vmid -- systemctl start nginx 2>/dev/null" || true
run_ssh "$R630_01" "pct exec $vmid -- systemctl start node 2>/dev/null" || true
log_ok "VMID $vmid services (nginx/node) started"
done
# --- 5. ML110: start stopped DBIS containers ---
if run_ssh "$ML110" "echo OK" 2>/dev/null; then
for vmid in 10130 10150 10151 10100 10101 10120; do
status=$(run_ssh "$ML110" "pct status $vmid 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")
[[ "$status" == "missing" || -z "$status" ]] && continue
[[ "$status" == "running" ]] && continue
if [[ "$DRY_RUN" == true ]]; then
log_info "Would start VMID $vmid on $ML110"
continue
fi
log_info "Starting VMID $vmid on $ML110..."
run_ssh "$ML110" "pct start $vmid" 2>/dev/null && log_ok "ML110 VMID $vmid started" || log_warn "ML110 VMID $vmid start failed"
done
for vmid in 10130 10150 10151; do
status=$(run_ssh "$ML110" "pct status $vmid 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")
[[ "$status" != "running" ]] && continue
[[ "$DRY_RUN" == true ]] && continue
run_ssh "$ML110" "pct exec $vmid -- systemctl start nginx 2>/dev/null" || true
run_ssh "$ML110" "pct exec $vmid -- systemctl start node 2>/dev/null" || true
log_ok "ML110 VMID $vmid services started"
done
else
log_warn "Cannot SSH to $ML110; skipped ML110 container start."
fi
echo ""
log_ok "Done. Next: update NPMplus Fourth proxy to $DEV_VM_IP:3000 (gitea/dev/codespaces) if not already:"
echo " NPM_PASSWORD=xxx bash scripts/nginx-proxy-manager/update-npmplus-fourth-proxy-hosts.sh"
echo " Then: bash scripts/verify/verify-end-to-end-routing.sh"
echo ""