Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
Co-authored-by: Cursor <cursoragent@cursor.com>
71 lines
2.8 KiB
Bash
Executable File
71 lines
2.8 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Collect RPC diagnostics for VMIDs 2101 and 2500-2505: listening ports and Besu journal.
|
|
# Run from project root. Requires SSH to r630-01 (and ml110 if 2503-2505 are there).
|
|
# Output is suitable for piping to a file or tee.
|
|
#
|
|
# Usage: ./scripts/maintenance/diagnose-rpc-502s.sh
|
|
# See: docs/00-meta/502_DEEP_DIVE_ROOT_CAUSES_AND_FIXES.md
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
[[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
|
|
|
R630_01="${PROXMOX_HOST_R630_01:-${PROXMOX_R630_01:-192.168.11.11}}"
|
|
ML110="${PROXMOX_HOST_ML110:-${PROXMOX_ML110:-192.168.11.10}}"
|
|
SSH_OPTS="-o ConnectTimeout=8 -o StrictHostKeyChecking=accept-new"
|
|
|
|
run() { ssh $SSH_OPTS "root@$1" "$2" 2>/dev/null || echo "(command failed or host unreachable)"; }
|
|
|
|
# VMID -> host (2503-2505 may be on ml110 or r630-01)
|
|
get_host() {
|
|
local v=$1
|
|
case $v in
|
|
2101|2500|2501|2502) echo "$R630_01" ;;
|
|
2503|2504|2505) echo "$R630_01" ;; # default; try ml110 if not running on r630
|
|
*) echo "$R630_01" ;;
|
|
esac
|
|
}
|
|
|
|
echo "=============================================="
|
|
echo "RPC 502 diagnostics — $(date -Iseconds)"
|
|
echo "=============================================="
|
|
echo ""
|
|
|
|
for vmid in 2101 2500 2501 2502 2503 2504 2505; do
|
|
host=$(get_host "$vmid")
|
|
status=$(run "$host" "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "unknown")
|
|
echo "--- VMID $vmid @ $host (status: $status) ---"
|
|
if [[ "$status" != "running" ]]; then
|
|
# If on r630 and not running, try ml110 for 2503-2505
|
|
if [[ "$vmid" =~ ^250[345]$ ]] && [[ "$host" == "$R630_01" ]]; then
|
|
status2=$(run "$ML110" "pct status $vmid 2>/dev/null | awk '{print \$2}'" || echo "")
|
|
if [[ "$status2" == "running" ]]; then
|
|
host="$ML110"
|
|
status="$status2"
|
|
echo " (found on $ML110)"
|
|
fi
|
|
fi
|
|
if [[ "$status" != "running" ]]; then
|
|
echo " Container not running. Skip."
|
|
echo ""
|
|
continue
|
|
fi
|
|
fi
|
|
echo " Listening ports (ss -tlnp):"
|
|
run "$host" "pct exec $vmid -- ss -tlnp 2>/dev/null" | sed 's/^/ /'
|
|
echo " Besu service (systemctl list-units):"
|
|
run "$host" "pct exec $vmid -- systemctl list-units --type=service --no-legend 2>/dev/null | grep -iE besu" | sed 's/^/ /'
|
|
for unit in besu-rpc besu; do
|
|
echo " journalctl -u $unit -n 25:"
|
|
run "$host" "pct exec $vmid -- journalctl -u $unit -n 25 --no-pager 2>/dev/null" | sed 's/^/ /'
|
|
done
|
|
echo ""
|
|
done
|
|
|
|
echo "=============================================="
|
|
echo "If 8545 is not in ss -tlnp, Besu is not binding. Check journal for genesis/nodekey/config errors."
|
|
echo "Then run: ./scripts/besu/fix-all-besu-nodes.sh (optionally --no-restart first)"
|
|
echo "=============================================="
|