#!/usr/bin/env bash # Collect RPC diagnostics for VMIDs 2101 and 2500-2505: listening ports and Besu journal. # Run from project root. Requires SSH to r630-01 (and ml110 if 2503-2505 are there). # Output is suitable for piping to a file or tee. # # Usage: ./scripts/maintenance/diagnose-rpc-502s.sh # See: docs/00-meta/502_DEEP_DIVE_ROOT_CAUSES_AND_FIXES.md set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" [[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true R630_01="${PROXMOX_HOST_R630_01:-${PROXMOX_R630_01:-192.168.11.11}}" ML110="${PROXMOX_HOST_ML110:-${PROXMOX_ML110:-192.168.11.10}}" SSH_OPTS="-o ConnectTimeout=8 -o StrictHostKeyChecking=accept-new" run() { ssh $SSH_OPTS "root@$1" "$2" 2>/dev/null || echo "(command failed or host unreachable)"; } # VMID -> host (2503-2505 may be on ml110 or r630-01) get_host() { local v=$1 case $v in 2101|2500|2501|2502) echo "$R630_01" ;; 2503|2504|2505) echo "$R630_01" ;; # default; try ml110 if not running on r630 *) echo "$R630_01" ;; esac } echo "==============================================" echo "RPC 502 diagnostics — $(date -Iseconds)" echo "==============================================" echo "" for vmid in 2101 2500 2501 2502 2503 2504 2505; do host=$(get_host "$vmid") status=$(run "$host" "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "unknown") echo "--- VMID $vmid @ $host (status: $status) ---" if [[ "$status" != "running" ]]; then # If on r630 and not running, try ml110 for 2503-2505 if [[ "$vmid" =~ ^250[345]$ ]] && [[ "$host" == "$R630_01" ]]; then status2=$(run "$ML110" "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "") if [[ "$status2" == "running" ]]; then host="$ML110" status="$status2" echo " (found on $ML110)" fi fi if [[ "$status" != "running" ]]; then echo " Container not running. Skip." echo "" continue fi fi echo " Listening ports (ss -tlnp):" run "$host" "pct exec $vmid -- ss -tlnp 2>/dev/null" | sed 's/^/ /' echo " Besu service (systemctl list-units):" run "$host" "pct exec $vmid -- systemctl list-units --type=service --no-legend 2>/dev/null | grep -iE besu" | sed 's/^/ /' for unit in besu-rpc besu; do echo " journalctl -u $unit -n 25:" run "$host" "pct exec $vmid -- journalctl -u $unit -n 25 --no-pager 2>/dev/null" | sed 's/^/ /' done echo "" done echo "==============================================" echo "If 8545 is not in ss -tlnp, Besu is not binding. Check journal for genesis/nodekey/config errors." echo "Then run: ./scripts/besu/fix-all-besu-nodes.sh (optionally --no-restart first)" echo "=============================================="