#!/usr/bin/env bash # Network Bootstrap Script for Besu Validated Set # Orchestrates network bootstrap using script-based approach (static-nodes.json) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" source "$PROJECT_ROOT/lib/common.sh" # Load configuration load_config load_config "$PROJECT_ROOT/config/network.conf" || true # VMID ranges (from config - new ranges) VALIDATORS_START="${VALIDATOR_START:-1000}" VALIDATORS_COUNT="${VALIDATOR_COUNT:-${VALIDATORS_COUNT:-5}}" VALIDATORS_END=$((VALIDATORS_START + VALIDATORS_COUNT - 1)) SENTRIES_START="${SENTRY_START:-1500}" SENTRIES_COUNT="${SENTRY_COUNT:-${SENTRIES_COUNT:-4}}" SENTRIES_END=$((SENTRIES_START + SENTRIES_COUNT - 1)) RPC_START="${RPC_START:-2500}" RPC_COUNT="${RPC_COUNT:-3}" RPC_END=$((RPC_START + RPC_COUNT - 1)) # Build arrays VALIDATORS=() SENTRIES=() RPC_NODES=() ALL_BESU=() for ((vmid=VALIDATORS_START; vmid<=VALIDATORS_END; vmid++)); do VALIDATORS+=($vmid) ALL_BESU+=($vmid) done for ((vmid=SENTRIES_START; vmid<=SENTRIES_END; vmid++)); do SENTRIES+=($vmid) ALL_BESU+=($vmid) done for ((vmid=RPC_START; vmid<=RPC_END; vmid++)); do RPC_NODES+=($vmid) ALL_BESU+=($vmid) done log_info "=========================================" log_info "Network Bootstrap - Script-Based Approach" log_info "=========================================" log_info "" log_info "Validators: ${#VALIDATORS[@]} (${VALIDATORS_START}-${VALIDATORS_END})" log_info "Sentries: ${#SENTRIES[@]} (${SENTRIES_START}-${SENTRIES_END})" log_info "RPC Nodes: ${#RPC_NODES[@]} (${RPC_START}-${RPC_END})" log_info "Total: ${#ALL_BESU[@]} nodes" log_info "" # Function to get container IP address get_container_ip() { local vmid=$1 if pct status "$vmid" 2>/dev/null | grep -q running; then pct exec "$vmid" -- hostname -I 2>/dev/null | awk '{print $1}' || echo "" else echo "" fi } # Function to check if node is ready (P2P listening) check_node_ready() { local vmid=$1 local max_wait=${2:-60} local wait_time=0 log_info "Waiting for node $vmid to be ready (max ${max_wait}s)..." while [[ $wait_time -lt $max_wait ]]; do if pct status "$vmid" 2>/dev/null | grep -q running; then # Check if Besu process is running if pct exec "$vmid" -- pgrep -f "besu" >/dev/null 2>&1; then # Check if P2P port is listening (port 30303) if pct exec "$vmid" -- netstat -tuln 2>/dev/null | grep -q ":30303" || \ pct exec "$vmid" -- ss -tuln 2>/dev/null | grep -q ":30303"; then log_success "Node $vmid is ready" return 0 fi fi fi sleep 2 wait_time=$((wait_time + 2)) if [[ $((wait_time % 10)) -eq 0 ]]; then log_info "Still waiting... (${wait_time}s elapsed)" fi done log_warn "Node $vmid not ready after ${max_wait}s (may still be starting)" return 1 } # Function to extract enode from node extract_enode() { local vmid=$1 local ip=$2 # Try RPC method first (if RPC is enabled) local enode_rpc enode_rpc=$(pct exec "$vmid" -- curl -s -X POST \ -H "Content-Type: application/json" \ -d '{"jsonrpc":"2.0","method":"admin_nodeInfo","params":[],"id":1}' \ http://localhost:8545 2>/dev/null | \ python3 -c "import sys, json; data=json.load(sys.stdin); print(data.get('result', {}).get('enode', ''))" 2>/dev/null || echo "") if [[ -n "$enode_rpc" ]] && [[ "$enode_rpc" != "null" ]] && [[ "$enode_rpc" != "" ]]; then # Replace IP in enode with actual IP echo "$enode_rpc" | sed "s/@[^:]*:/@${ip}:/" return 0 fi # Fallback: Extract from nodekey using Besu public-key export local nodekey_path="/data/besu/nodekey" if pct exec "$vmid" -- test -f "$nodekey_path" 2>/dev/null; then # Try using Besu to export public key local node_pubkey node_pubkey=$(pct exec "$vmid" -- bash -c "cd /data/besu && /opt/besu/bin/besu public-key export --node-private-key-file=nodekey 2>/dev/null | tail -1 | tr -d '\n\r ' || echo """) if [[ -n "$node_pubkey" ]] && [[ ${#node_pubkey} -eq 128 ]]; then echo "enode://${node_pubkey}@${ip}:30303" return 0 fi # Alternative: Try reading from nodekey.pub if it exists if pct exec "$vmid" -- test -f "${nodekey_path}.pub" 2>/dev/null; then node_pubkey=$(pct exec "$vmid" -- cat "${nodekey_path}.pub" 2>/dev/null | tr -d '\n\r ' || echo "") if [[ -n "$node_pubkey" ]] && [[ ${#node_pubkey} -eq 128 ]]; then echo "enode://${node_pubkey}@${ip}:30303" return 0 fi fi fi log_warn "Could not extract enode for node $vmid" return 1 } # Step 1: Collect enodes from all validator nodes log_info "=== Step 1: Collecting Enodes from Validators ===" declare -A ENODE_MAP VALIDATOR_ENODES=() for vmid in "${VALIDATORS[@]}"; do if ! pct status "$vmid" 2>/dev/null | grep -q running; then log_warn "Container $vmid is not running, skipping" continue fi log_info "Collecting enode from validator $vmid..." ip=$(get_container_ip "$vmid") if [[ -z "$ip" ]]; then log_warn "Could not get IP for container $vmid" continue fi if check_node_ready "$vmid" 30; then enode=$(extract_enode "$vmid" "$ip") if [[ -n "$enode" ]]; then ENODE_MAP[$vmid]=$enode VALIDATOR_ENODES+=("$enode") log_success "Validator $vmid: $enode" else log_warn "Could not extract enode from validator $vmid" fi fi done if [[ ${#VALIDATOR_ENODES[@]} -eq 0 ]]; then error_exit "No validator enodes collected. Ensure validators are running and ready." fi log_success "Collected ${#VALIDATOR_ENODES[@]} validator enodes" # Step 2: Generate static-nodes.json (validators only for QBFT) log_info "" log_info "=== Step 2: Generating static-nodes.json ===" STATIC_NODES_JSON="/tmp/static-nodes-$$.json" cat > "$STATIC_NODES_JSON" </dev/null | grep -q running; then log_warn "Container $vmid is not running, skipping" continue fi log_info "Deploying static-nodes.json to container $vmid..." if pct push "$vmid" "$STATIC_NODES_JSON" /etc/besu/static-nodes.json >/dev/null 2>&1; then pct exec "$vmid" -- chown besu:besu /etc/besu/static-nodes.json 2>/dev/null || true log_success "Deployed to container $vmid" else log_warn "Failed to deploy to container $vmid" fi done # Step 4: Restart services in correct order (sentries → validators → RPC) log_info "" log_info "=== Step 4: Restarting Services in Correct Order ===" # Function to restart Besu service restart_besu_service() { local vmid=$1 local service_type=$2 local service_name="" case "$service_type" in validator) service_name="besu-validator" ;; sentry) service_name="besu-sentry" ;; rpc) service_name="besu-rpc" ;; *) log_warn "Unknown service type: $service_type" return 1 ;; esac log_info "Restarting $service_name on container $vmid..." if pct exec "$vmid" -- systemctl restart "$service_name" 2>/dev/null; then sleep 3 if check_node_ready "$vmid" 60; then log_success "Service restarted and ready on container $vmid" return 0 else log_warn "Service restarted but not fully ready on container $vmid" return 1 fi else log_warn "Failed to restart service on container $vmid" return 1 fi } # Restart sentries first log_info "Restarting sentries..." for vmid in "${SENTRIES[@]}"; do if pct status "$vmid" 2>/dev/null | grep -q running; then restart_besu_service "$vmid" "sentry" || true fi done # Wait a bit for sentries to stabilize sleep 5 # Restart validators log_info "Restarting validators..." for vmid in "${VALIDATORS[@]}"; do if pct status "$vmid" 2>/dev/null | grep -q running; then restart_besu_service "$vmid" "validator" || true fi done # Wait a bit for validators to connect sleep 5 # Restart RPC nodes log_info "Restarting RPC nodes..." for vmid in "${RPC_NODES[@]}"; do if pct status "$vmid" 2>/dev/null | grep -q running; then restart_besu_service "$vmid" "rpc" || true fi done # Step 5: Verify peer connections log_info "" log_info "=== Step 5: Verifying Peer Connections ===" sleep 10 # Give nodes time to establish connections VERIFICATION_FAILED=0 for vmid in "${ALL_BESU[@]}"; do if ! pct status "$vmid" 2>/dev/null | grep -q running; then continue fi # Try to get peer count via RPC (if enabled) peer_count=$(pct exec "$vmid" -- curl -s -X POST \ -H "Content-Type: application/json" \ -d '{"jsonrpc":"2.0","method":"admin_peers","params":[],"id":1}' \ http://localhost:8545 2>/dev/null | \ python3 -c "import sys, json; data=json.load(sys.stdin); peers=data.get('result', []); print(len(peers) if isinstance(peers, list) else 0)" 2>/dev/null || echo "0") if [[ -n "$peer_count" ]] && [[ "$peer_count" != "0" ]]; then log_success "Container $vmid: $peer_count peer(s) connected" else log_warn "Container $vmid: No peers detected (may still be connecting)" VERIFICATION_FAILED=$((VERIFICATION_FAILED + 1)) fi done # Cleanup rm -f "$STATIC_NODES_JSON" log_info "" if [[ $VERIFICATION_FAILED -eq 0 ]]; then log_success "=========================================" log_success "Network Bootstrap Complete!" log_success "=========================================" log_info "" log_info "Next steps:" log_info "1. Verify all services are running: systemctl status besu-*" log_info "2. Check consensus is active (blocks being produced)" log_info "3. Validate validator set participation" exit 0 else log_warn "=========================================" log_warn "Network Bootstrap Complete with Warnings" log_warn "=========================================" log_warn "$VERIFICATION_FAILED node(s) may not have peers connected yet" log_info "This is normal if nodes are still starting up" log_info "Wait a few minutes and check again" exit 0 fi