Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
Co-authored-by: Cursor <cursoragent@cursor.com>
169 lines
6.4 KiB
Bash
Executable File
169 lines
6.4 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Run all maintenance/fix scripts that use SSH to Proxmox VE hosts (r630-01, ml110, r630-02).
|
|
# Run from project root. Requires: SSH key-based auth to root@<each host>, LAN access.
|
|
#
|
|
# Usage:
|
|
# ./scripts/maintenance/run-all-maintenance-via-proxmox-ssh.sh
|
|
# ./scripts/maintenance/run-all-maintenance-via-proxmox-ssh.sh --no-npm
|
|
# ./scripts/maintenance/run-all-maintenance-via-proxmox-ssh.sh --e2e
|
|
# ./scripts/maintenance/run-all-maintenance-via-proxmox-ssh.sh --verbose # show all step output (no 2>/dev/null)
|
|
# ./scripts/maintenance/run-all-maintenance-via-proxmox-ssh.sh --dry-run
|
|
#
|
|
# Step 2 (2101 fix) can be slow (apt in CT). Timeout: STEP2_TIMEOUT=900 (default) or 0 to disable.
|
|
#
|
|
# Scripts run (each SSHs to the Proxmox hosts):
|
|
# 0. make-rpc-vmids-writable-via-ssh.sh — Stop 2101,2500-2505; e2fsck rootfs; start (r630-01)
|
|
# 1. resolve-and-fix-all-via-proxmox-ssh.sh — Dev VM IP, start containers, DBIS (r630-01, ml110)
|
|
# 2. fix-rpc-2101-jna-reinstall.sh — 2101 Besu reinstall (r630-01)
|
|
# 3. install-besu-permanent-on-missing-nodes.sh — Besu on 2500-2505, 1505-1508 (r630-01, ml110)
|
|
# 4. address-all-remaining-502s.sh — backends + NPM proxy + RPC diagnostics
|
|
# 5. [optional] verify-end-to-end-routing.sh — E2E (if --e2e)
|
|
#
|
|
# See: docs/00-meta/502_DEEP_DIVE_ROOT_CAUSES_AND_FIXES.md, docs/05-network/CHECK_ALL_UPDATES_AND_CLOUDFLARE_TUNNELS.md
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
cd "$PROJECT_ROOT"
|
|
|
|
[[ -f "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" ]] && source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" 2>/dev/null || true
|
|
[[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
|
|
|
R630_01="${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
|
ML110="${PROXMOX_HOST_ML110:-192.168.11.10}"
|
|
R630_02="${PROXMOX_HOST_R630_02:-192.168.11.12}"
|
|
|
|
SKIP_NPM=false
|
|
RUN_E2E=false
|
|
DRY_RUN=false
|
|
VERBOSE=false
|
|
for arg in "${@:-}"; do
|
|
[[ "$arg" == "--no-npm" ]] && SKIP_NPM=true
|
|
[[ "$arg" == "--e2e" ]] && RUN_E2E=true
|
|
[[ "$arg" == "--dry-run" ]] && DRY_RUN=true
|
|
[[ "$arg" == "--verbose" ]] && VERBOSE=true
|
|
done
|
|
|
|
# Step 2 (2101 fix) timeout in seconds; 0 = no timeout
|
|
STEP2_TIMEOUT="${STEP2_TIMEOUT:-900}"
|
|
|
|
run_step() {
|
|
if $VERBOSE; then
|
|
bash "$@"
|
|
else
|
|
bash "$@" 2>/dev/null
|
|
fi
|
|
}
|
|
|
|
echo ""
|
|
echo "=== Run all maintenance via Proxmox SSH ==="
|
|
echo " Hosts: r630-01=$R630_01 ml110=$ML110 r630-02=$R630_02"
|
|
echo " --no-npm=$SKIP_NPM --e2e=$RUN_E2E --verbose=$VERBOSE --dry-run=$DRY_RUN STEP2_TIMEOUT=$STEP2_TIMEOUT"
|
|
echo ""
|
|
|
|
if $DRY_RUN; then
|
|
echo "Would run in order:"
|
|
echo " 0. make-rpc-vmids-writable-via-ssh.sh"
|
|
echo " 1. resolve-and-fix-all-via-proxmox-ssh.sh"
|
|
echo " 2. fix-rpc-2101-jna-reinstall.sh (timeout=${STEP2_TIMEOUT}s if set)"
|
|
echo " 3. install-besu-permanent-on-missing-nodes.sh"
|
|
echo " 4. address-all-remaining-502s.sh $($SKIP_NPM && echo '--no-npm')"
|
|
$RUN_E2E && echo " 5. verify-end-to-end-routing.sh"
|
|
echo " Use --verbose to show all step output; STEP2_TIMEOUT=0 to disable step 2 timeout."
|
|
echo ""
|
|
exit 0
|
|
fi
|
|
|
|
# Quick SSH check to all hosts we need
|
|
for host in "$R630_01" "$ML110" "$R630_02"; do
|
|
if ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=accept-new "root@$host" "echo OK" 2>/dev/null; then
|
|
echo " SSH $host OK"
|
|
else
|
|
echo " Warning: cannot SSH to $host (some steps may fail)"
|
|
fi
|
|
done
|
|
echo ""
|
|
|
|
# 0. Make RPC VMIDs writable (e2fsck so fix/install scripts can write)
|
|
echo "[0/5] Making RPC VMIDs writable..."
|
|
echo "--- 0/5: Make RPC VMIDs writable (r630-01: 2101, 2500-2505) ---"
|
|
if run_step "${SCRIPT_DIR}/make-rpc-vmids-writable-via-ssh.sh"; then
|
|
echo " Done."
|
|
else
|
|
echo " Step had warnings (check output)."
|
|
fi
|
|
echo ""
|
|
|
|
# 1. Resolve and fix (Dev VM IP, start containers, DBIS)
|
|
echo "[1/5] Resolve and fix (Dev VM, containers, DBIS)..."
|
|
echo "--- 1/5: Resolve and fix via Proxmox SSH (r630-01, ml110) ---"
|
|
if run_step "${SCRIPT_DIR}/resolve-and-fix-all-via-proxmox-ssh.sh"; then
|
|
echo " Done."
|
|
else
|
|
echo " Step had warnings (check output)."
|
|
fi
|
|
echo ""
|
|
|
|
# 2. Fix 2101 JNA reinstall (r630-01) — can be slow (apt in CT); optional timeout
|
|
echo "[2/5] Fix 2101 Besu JNA reinstall (may take several minutes)..."
|
|
echo "--- 2/5: Fix 2101 Besu JNA reinstall (r630-01) ---"
|
|
step2_ok=false
|
|
step2_ret=0
|
|
if [[ -n "${STEP2_TIMEOUT:-}" && "$STEP2_TIMEOUT" -gt 0 ]] && command -v timeout >/dev/null 2>&1; then
|
|
if $VERBOSE; then
|
|
timeout "$STEP2_TIMEOUT" bash "${SCRIPT_DIR}/fix-rpc-2101-jna-reinstall.sh" || step2_ret=$?
|
|
else
|
|
timeout "$STEP2_TIMEOUT" bash "${SCRIPT_DIR}/fix-rpc-2101-jna-reinstall.sh" 2>/dev/null || step2_ret=$?
|
|
fi
|
|
[[ $step2_ret -eq 0 ]] && step2_ok=true
|
|
[[ $step2_ret -eq 124 ]] && echo " Step 2 timed out after ${STEP2_TIMEOUT}s. Re-run manually: ./scripts/maintenance/fix-rpc-2101-jna-reinstall.sh"
|
|
else
|
|
if run_step "${SCRIPT_DIR}/fix-rpc-2101-jna-reinstall.sh"; then
|
|
step2_ok=true
|
|
fi
|
|
fi
|
|
$step2_ok && echo " Done."
|
|
echo ""
|
|
|
|
# 3. Install Besu on missing nodes (r630-01, ml110)
|
|
echo "[3/5] Install Besu on missing nodes..."
|
|
echo "--- 3/5: Install Besu on missing nodes (r630-01, ml110) ---"
|
|
if run_step "${PROJECT_ROOT}/scripts/besu/install-besu-permanent-on-missing-nodes.sh"; then
|
|
echo " Done."
|
|
else
|
|
echo " Step had failures (e.g. disk full or read-only CT)."
|
|
fi
|
|
echo ""
|
|
|
|
# 4. Address all remaining 502s (backends + NPM + diagnostics)
|
|
echo "[4/5] Address all remaining 502s (backends + NPM + diagnostics)..."
|
|
echo "--- 4/5: Address all remaining 502s (SSH to r630-01, r630-02) ---"
|
|
ADDR_ARGS=""
|
|
$SKIP_NPM && ADDR_ARGS="--no-npm"
|
|
if run_step "${SCRIPT_DIR}/address-all-remaining-502s.sh" $ADDR_ARGS; then
|
|
echo " Done."
|
|
else
|
|
echo " Step had warnings (check output)."
|
|
fi
|
|
echo ""
|
|
|
|
# 5. Optional E2E
|
|
if $RUN_E2E; then
|
|
echo "[5/5] E2E verification..."
|
|
echo "--- 5/5: E2E verification ---"
|
|
if [ -f "${PROJECT_ROOT}/scripts/verify/verify-end-to-end-routing.sh" ]; then
|
|
if $VERBOSE; then
|
|
E2E_ACCEPT_502_INTERNAL=1 bash "${PROJECT_ROOT}/scripts/verify/verify-end-to-end-routing.sh" || true
|
|
else
|
|
E2E_ACCEPT_502_INTERNAL=1 bash "${PROJECT_ROOT}/scripts/verify/verify-end-to-end-routing.sh" 2>/dev/null || true
|
|
fi
|
|
fi
|
|
echo ""
|
|
fi
|
|
|
|
echo "=== All maintenance steps (via Proxmox SSH) completed ==="
|
|
echo " Next: ./scripts/verify/verify-end-to-end-routing.sh"
|
|
echo " Reports: docs/04-configuration/verification-evidence/"
|
|
echo ""
|