Files
proxmox/scripts/maintenance/diagnose-rpc-502s.sh
defiQUG bea1903ac9
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
Sync all local changes: docs, config, scripts, submodule refs, verification evidence
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 15:46:06 -08:00

71 lines
2.8 KiB
Bash
Executable File

#!/usr/bin/env bash
# Collect RPC diagnostics for VMIDs 2101 and 2500-2505: listening ports and Besu journal.
# Run from project root. Requires SSH to r630-01 (and ml110 if 2503-2505 are there).
# Output is suitable for piping to a file or tee.
#
# Usage: ./scripts/maintenance/diagnose-rpc-502s.sh
# See: docs/00-meta/502_DEEP_DIVE_ROOT_CAUSES_AND_FIXES.md
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
[[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
R630_01="${PROXMOX_HOST_R630_01:-${PROXMOX_R630_01:-192.168.11.11}}"
ML110="${PROXMOX_HOST_ML110:-${PROXMOX_ML110:-192.168.11.10}}"
SSH_OPTS="-o ConnectTimeout=8 -o StrictHostKeyChecking=accept-new"
run() { ssh $SSH_OPTS "root@$1" "$2" 2>/dev/null || echo "(command failed or host unreachable)"; }
# VMID -> host (2503-2505 may be on ml110 or r630-01)
get_host() {
local v=$1
case $v in
2101|2500|2501|2502) echo "$R630_01" ;;
2503|2504|2505) echo "$R630_01" ;; # default; try ml110 if not running on r630
*) echo "$R630_01" ;;
esac
}
echo "=============================================="
echo "RPC 502 diagnostics — $(date -Iseconds)"
echo "=============================================="
echo ""
for vmid in 2101 2500 2501 2502 2503 2504 2505; do
host=$(get_host "$vmid")
status=$(run "$host" "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "unknown")
echo "--- VMID $vmid @ $host (status: $status) ---"
if [[ "$status" != "running" ]]; then
# If on r630 and not running, try ml110 for 2503-2505
if [[ "$vmid" =~ ^250[345]$ ]] && [[ "$host" == "$R630_01" ]]; then
status2=$(run "$ML110" "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "")
if [[ "$status2" == "running" ]]; then
host="$ML110"
status="$status2"
echo " (found on $ML110)"
fi
fi
if [[ "$status" != "running" ]]; then
echo " Container not running. Skip."
echo ""
continue
fi
fi
echo " Listening ports (ss -tlnp):"
run "$host" "pct exec $vmid -- ss -tlnp 2>/dev/null" | sed 's/^/ /'
echo " Besu service (systemctl list-units):"
run "$host" "pct exec $vmid -- systemctl list-units --type=service --no-legend 2>/dev/null | grep -iE besu" | sed 's/^/ /'
for unit in besu-rpc besu; do
echo " journalctl -u $unit -n 25:"
run "$host" "pct exec $vmid -- journalctl -u $unit -n 25 --no-pager 2>/dev/null" | sed 's/^/ /'
done
echo ""
done
echo "=============================================="
echo "If 8545 is not in ss -tlnp, Besu is not binding. Check journal for genesis/nodekey/config errors."
echo "Then run: ./scripts/besu/fix-all-besu-nodes.sh (optionally --no-restart first)"
echo "=============================================="