Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
Co-authored-by: Cursor <cursoragent@cursor.com>
135 lines
6.1 KiB
Bash
Executable File
135 lines
6.1 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# SSH to Proxmox host(s) and apply fixes: Dev VM (5700) IP → .59, start stopped containers, ensure DBIS services.
|
|
#
|
|
# Usage: ./scripts/maintenance/resolve-and-fix-all-via-proxmox-ssh.sh [--dry-run]
|
|
# Run from a machine that can SSH to 192.168.11.11 (and optionally 192.168.11.10).
|
|
# Env: PROXMOX_HOST_R630_01 (default 192.168.11.11), PROXMOX_HOST_ML110 (default 192.168.11.10)
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
[[ -f "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" ]] && source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" 2>/dev/null || true
|
|
[[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
|
|
|
DRY_RUN=false
|
|
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
|
|
|
|
R630_01="${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
|
R630_02="${PROXMOX_HOST_R630_02:-192.168.11.12}"
|
|
ML110="${PROXMOX_HOST_ML110:-192.168.11.10}"
|
|
DEV_VM_IP="${IP_DEV_VM:-192.168.11.59}"
|
|
GATEWAY="${NETWORK_GATEWAY:-192.168.11.1}"
|
|
|
|
log_info() { echo -e "\033[0;34m[INFO]\033[0m $1"; }
|
|
log_ok() { echo -e "\033[0;32m[✓]\033[0m $1"; }
|
|
log_warn() { echo -e "\033[0;33m[⚠]\033[0m $1"; }
|
|
log_err() { echo -e "\033[0;31m[✗]\033[0m $1"; }
|
|
|
|
run_ssh() {
|
|
local host="$1"
|
|
shift
|
|
ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=accept-new root@"$host" "$@"
|
|
}
|
|
|
|
echo ""
|
|
echo "=== Resolve and fix all via Proxmox SSH ==="
|
|
echo " r630-01: $R630_01 ml110: $ML110 dry-run=$DRY_RUN"
|
|
echo ""
|
|
|
|
# --- 1. Check SSH to r630-01 ---
|
|
if ! run_ssh "$R630_01" "echo OK" 2>/dev/null; then
|
|
log_err "Cannot SSH to $R630_01. Run from LAN with key-based auth to root@$R630_01."
|
|
exit 1
|
|
fi
|
|
log_ok "SSH to $R630_01 OK"
|
|
|
|
# --- 2. Dev VM (5700): on r630-02 (migrated 2026-02-15), set IP to .59 and reboot ---
|
|
VMID_DEV=5700
|
|
HOST_5700="$R630_01"
|
|
status_5700=$(run_ssh "$R630_01" "pct status $VMID_DEV 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")
|
|
[[ "$status_5700" == "missing" || -z "$status_5700" ]] && status_5700=$(run_ssh "$R630_02" "pct status $VMID_DEV 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing") && HOST_5700="$R630_02"
|
|
if [[ "$status_5700" == "missing" || -z "$status_5700" ]]; then
|
|
log_info "VMID $VMID_DEV not found on $R630_01 or $R630_02; skip IP change."
|
|
else
|
|
current_net=$(run_ssh "$HOST_5700" "pct config $VMID_DEV 2>/dev/null | grep -E '^net0:'" 2>/dev/null || echo "")
|
|
if echo "$current_net" | grep -q "$DEV_VM_IP"; then
|
|
log_ok "VMID $VMID_DEV already has IP $DEV_VM_IP (on $HOST_5700)"
|
|
else
|
|
if [[ "$DRY_RUN" == true ]]; then
|
|
log_info "Would run on $HOST_5700: pct stop $VMID_DEV; pct set $VMID_DEV --net0 ...; pct start $VMID_DEV"
|
|
else
|
|
log_info "Stopping VMID $VMID_DEV on $HOST_5700, setting IP to $DEV_VM_IP, then starting..."
|
|
run_ssh "$HOST_5700" "pct stop $VMID_DEV" 2>/dev/null || true
|
|
sleep 2
|
|
run_ssh "$HOST_5700" "pct set $VMID_DEV --net0 name=eth0,bridge=vmbr0,ip=$DEV_VM_IP/24,gw=$GATEWAY" || { log_err "pct set $VMID_DEV failed"; exit 1; }
|
|
if ! run_ssh "$HOST_5700" "pct start $VMID_DEV" 2>&1; then
|
|
log_err "VMID $VMID_DEV failed to start. If exit 32 (mount): on host run e2fsck -f -y /dev/pve/vm-${VMID_DEV}-disk-0 (after lvchange -ay), then lvchange -an and pct start $VMID_DEV"
|
|
else
|
|
log_ok "VMID $VMID_DEV reconfigured to $DEV_VM_IP and started (on $HOST_5700)"
|
|
fi
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# --- 3. Start stopped containers on r630-01 (RPC 2101, DBIS if present) ---
|
|
for vmid in 2101 10130 10150 10151 10100 10101 10120; do
|
|
status=$(run_ssh "$R630_01" "pct status $vmid 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")
|
|
if [[ "$status" == "missing" || -z "$status" ]]; then
|
|
continue
|
|
fi
|
|
if [[ "$status" == "running" ]]; then
|
|
log_ok "r630-01 VMID $vmid: already running"
|
|
continue
|
|
fi
|
|
if [[ "$DRY_RUN" == true ]]; then
|
|
log_info "Would start VMID $vmid on $R630_01"
|
|
continue
|
|
fi
|
|
log_info "Starting VMID $vmid on $R630_01..."
|
|
run_ssh "$R630_01" "pct start $vmid" 2>/dev/null && log_ok "VMID $vmid started" || log_warn "VMID $vmid start failed"
|
|
done
|
|
|
|
# --- 4. Ensure DBIS services (nginx/node) inside containers on r630-01 ---
|
|
for vmid in 10130 10150 10151; do
|
|
status=$(run_ssh "$R630_01" "pct status $vmid 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")
|
|
[[ "$status" != "running" ]] && continue
|
|
if [[ "$DRY_RUN" == true ]]; then
|
|
log_info "Would ensure nginx/node in VMID $vmid"
|
|
continue
|
|
fi
|
|
run_ssh "$R630_01" "pct exec $vmid -- systemctl start nginx 2>/dev/null" || true
|
|
run_ssh "$R630_01" "pct exec $vmid -- systemctl start node 2>/dev/null" || true
|
|
log_ok "VMID $vmid services (nginx/node) started"
|
|
done
|
|
|
|
# --- 5. ML110: start stopped DBIS containers ---
|
|
if run_ssh "$ML110" "echo OK" 2>/dev/null; then
|
|
for vmid in 10130 10150 10151 10100 10101 10120; do
|
|
status=$(run_ssh "$ML110" "pct status $vmid 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")
|
|
[[ "$status" == "missing" || -z "$status" ]] && continue
|
|
[[ "$status" == "running" ]] && continue
|
|
if [[ "$DRY_RUN" == true ]]; then
|
|
log_info "Would start VMID $vmid on $ML110"
|
|
continue
|
|
fi
|
|
log_info "Starting VMID $vmid on $ML110..."
|
|
run_ssh "$ML110" "pct start $vmid" 2>/dev/null && log_ok "ML110 VMID $vmid started" || log_warn "ML110 VMID $vmid start failed"
|
|
done
|
|
for vmid in 10130 10150 10151; do
|
|
status=$(run_ssh "$ML110" "pct status $vmid 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")
|
|
[[ "$status" != "running" ]] && continue
|
|
[[ "$DRY_RUN" == true ]] && continue
|
|
run_ssh "$ML110" "pct exec $vmid -- systemctl start nginx 2>/dev/null" || true
|
|
run_ssh "$ML110" "pct exec $vmid -- systemctl start node 2>/dev/null" || true
|
|
log_ok "ML110 VMID $vmid services started"
|
|
done
|
|
else
|
|
log_warn "Cannot SSH to $ML110; skipped ML110 container start."
|
|
fi
|
|
|
|
echo ""
|
|
log_ok "Done. Next: update NPMplus Fourth proxy to $DEV_VM_IP:3000 (gitea/dev/codespaces) if not already:"
|
|
echo " NPM_PASSWORD=xxx bash scripts/nginx-proxy-manager/update-npmplus-fourth-proxy-hosts.sh"
|
|
echo " Then: bash scripts/verify/verify-end-to-end-routing.sh"
|
|
echo ""
|