chore: update submodule references and documentation
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
- Marked submodules ai-mcp-pmm-controller, explorer-monorepo, and smom-dbis-138 as dirty to reflect recent changes. - Updated documentation to clarify operator script usage, including dotenv loading and task execution instructions. - Enhanced the README and various index files to provide clearer navigation and task completion guidance. Made-with: Cursor
This commit is contained in:
80
scripts/maintenance/ensure-core-rpc-config-2101-2102.sh
Executable file
80
scripts/maintenance/ensure-core-rpc-config-2101-2102.sh
Executable file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env bash
|
||||
# Ensure Core RPC nodes 2101 and 2102 have TXPOOL and ADMIN (and DEBUG) in rpc-http-api and rpc-ws-api.
|
||||
# Does NOT add txpool_besuClear/txpool_clear/admin_removeTransaction — Besu does not implement them.
|
||||
# See: docs/04-configuration/CORE_RPC_2101_2102_TXPOOL_ADMIN_STATUS.md
|
||||
#
|
||||
# Usage: ./scripts/maintenance/ensure-core-rpc-config-2101-2102.sh [--dry-run] [--2101-only] [--2102-only]
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
# Canonical API list for Core RPC (max that Besu supports for txpool + admin)
|
||||
RPC_HTTP_API='["ETH","NET","WEB3","TXPOOL","QBFT","ADMIN","DEBUG","TRACE"]'
|
||||
RPC_WS_API='["ETH","NET","WEB3","TXPOOL","QBFT","ADMIN"]'
|
||||
|
||||
VMID_2101=2101
|
||||
VMID_2102=2102
|
||||
HOST_2101="${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
||||
HOST_2102="${PROXMOX_HOST_ML110:-192.168.11.10}"
|
||||
CONFIG_2101="/etc/besu/config-rpc-core.toml"
|
||||
CONFIG_2102="/etc/besu/config-rpc.toml"
|
||||
|
||||
DRY_RUN=false
|
||||
ONLY_2101=false
|
||||
ONLY_2102=false
|
||||
for a in "$@"; do
|
||||
[[ "$a" == "--dry-run" ]] && DRY_RUN=true
|
||||
[[ "$a" == "--2101-only" ]] && ONLY_2101=true
|
||||
[[ "$a" == "--2102-only" ]] && ONLY_2102=true
|
||||
done
|
||||
|
||||
run_ssh() { ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no root@"$1" "$2"; }
|
||||
log_ok() { echo -e "\033[0;32m[✓]\033[0m $1"; }
|
||||
log_info() { echo -e "\033[0;34m[INFO]\033[0m $1"; }
|
||||
log_warn() { echo -e "\033[0;33m[⚠]\033[0m $1"; }
|
||||
|
||||
ensure_apis() {
|
||||
local vmid=$1
|
||||
local host=$2
|
||||
local config_path=$3
|
||||
log_info "VMID $vmid ($host): ensuring $config_path has TXPOOL, ADMIN, DEBUG..."
|
||||
if $DRY_RUN; then
|
||||
echo " Would set rpc-http-api and rpc-ws-api to include TXPOOL, ADMIN, DEBUG, QBFT, TRACE (2101/2102)"
|
||||
return 0
|
||||
fi
|
||||
# Pass API lists via env so quoting is safe; remote sed updates the config
|
||||
run_ssh "$host" "pct exec $vmid -- env RPC_HTTP_API='$RPC_HTTP_API' RPC_WS_API='$RPC_WS_API' CFG='$config_path' bash -c '
|
||||
set -e
|
||||
[ -f \"\$CFG\" ] || { echo \"Config \$CFG not found\"; exit 1; }
|
||||
cp \"\$CFG\" \"\${CFG}.bak.\$(date +%Y%m%d%H%M%S)\"
|
||||
grep -q \"rpc-http-api\" \"\$CFG\" && sed -i \"s|^rpc-http-api=.*|rpc-http-api=\$RPC_HTTP_API|\" \"\$CFG\" || echo \"rpc-http-api=\$RPC_HTTP_API\" >> \"\$CFG\"
|
||||
grep -q \"rpc-ws-api\" \"\$CFG\" && sed -i \"s|^rpc-ws-api=.*|rpc-ws-api=\$RPC_WS_API|\" \"\$CFG\" || echo \"rpc-ws-api=\$RPC_WS_API\" >> \"\$CFG\"
|
||||
chown besu:besu \"\$CFG\" 2>/dev/null || true
|
||||
echo OK
|
||||
'" 2>/dev/null || { log_warn "VMID $vmid: SSH or config update failed"; return 1; }
|
||||
log_ok "VMID $vmid: config updated"
|
||||
log_info "Restarting besu-rpc on $vmid..."
|
||||
run_ssh "$host" "pct exec $vmid -- systemctl restart besu-rpc 2>/dev/null || pct exec $vmid -- systemctl restart besu-rpc.service 2>/dev/null" || { log_warn "Restart failed for $vmid"; return 1; }
|
||||
log_ok "VMID $vmid: besu-rpc restarted"
|
||||
return 0
|
||||
}
|
||||
|
||||
echo ""
|
||||
echo "=== Ensure Core RPC 2101 / 2102 — TXPOOL + ADMIN (max Besu supports) ==="
|
||||
echo " dry-run=$DRY_RUN 2101-only=$ONLY_2101 2102-only=$ONLY_2102"
|
||||
echo " Note: txpool_besuClear, txpool_clear, admin_removeTransaction are NOT in Besu; use clear-all-transaction-pools.sh to clear stuck txs."
|
||||
echo ""
|
||||
|
||||
if [[ "$ONLY_2102" != true ]]; then
|
||||
ensure_apis "$VMID_2101" "$HOST_2101" "$CONFIG_2101" || true
|
||||
fi
|
||||
if [[ "$ONLY_2101" != true ]]; then
|
||||
ensure_apis "$VMID_2102" "$HOST_2102" "$CONFIG_2102" || true
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Done. Verify: ./scripts/maintenance/health-check-rpc-2101.sh and curl to 192.168.11.212:8545 for 2102."
|
||||
echo "Ref: docs/04-configuration/CORE_RPC_2101_2102_TXPOOL_ADMIN_STATUS.md"
|
||||
84
scripts/maintenance/fix-block-production-staggered-restart.sh
Executable file
84
scripts/maintenance/fix-block-production-staggered-restart.sh
Executable file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env bash
|
||||
# Staggered restart of Chain 138 validators to restore block production without losing quorum.
|
||||
# When all 5 validators are restarted at once (e.g. clear-all-transaction-pools), they can all
|
||||
# enter "full sync" and no node is at head to produce blocks. Restarting one at a time lets
|
||||
# the rest stay at head so the restarted node syncs quickly and consensus can continue.
|
||||
#
|
||||
# Usage: ./scripts/maintenance/fix-block-production-staggered-restart.sh [--dry-run]
|
||||
# Requires: SSH to Proxmox hosts (192.168.11.10 ML110, 192.168.11.11 R630-01, 192.168.11.12 R630-02)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
DRY_RUN=false
|
||||
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
|
||||
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_ok() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
||||
|
||||
# Order: restart one at a time; wait between so restarted node can sync from others
|
||||
# VMID : host
|
||||
VALIDATORS=(
|
||||
"1004:${PROXMOX_HOST_ML110:-192.168.11.10}"
|
||||
"1003:${PROXMOX_HOST_ML110:-192.168.11.10}"
|
||||
"1002:${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
||||
"1001:${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
||||
"1000:${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
||||
)
|
||||
WAIT_BETWEEN=90
|
||||
RPC="${RPC_URL_138:-http://192.168.11.211:8545}"
|
||||
|
||||
get_block() {
|
||||
curl -s -m 5 -X POST -H "Content-Type: application/json" \
|
||||
-d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' "$RPC" 2>/dev/null | jq -r '.result // "0x0"'
|
||||
}
|
||||
|
||||
echo "=== Staggered validator restart (fix block production) ==="
|
||||
echo " RPC: $RPC"
|
||||
echo " Wait between restarts: ${WAIT_BETWEEN}s"
|
||||
$DRY_RUN && echo " (DRY RUN - no restarts)"
|
||||
echo ""
|
||||
|
||||
BLOCK_BEFORE=$(get_block)
|
||||
log_info "Block before: $BLOCK_BEFORE"
|
||||
|
||||
for entry in "${VALIDATORS[@]}"; do
|
||||
IFS=: read -r vmid host <<< "$entry"
|
||||
log_info "Restarting validator $vmid on $host..."
|
||||
if $DRY_RUN; then
|
||||
echo " Would: ssh root@$host 'pct exec $vmid -- systemctl restart besu-validator'"
|
||||
else
|
||||
# Allow up to 120s for restart (Besu stop/start can take 1-2 min)
|
||||
if timeout 120 ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no root@"$host" "pct exec $vmid -- systemctl restart besu-validator" 2>/dev/null; then
|
||||
log_ok " $vmid restarted"
|
||||
else
|
||||
log_warn " $vmid restart timed out or failed (node may still be restarting)"
|
||||
fi
|
||||
fi
|
||||
if ! $DRY_RUN && [[ "$vmid" != "1000" ]]; then
|
||||
log_info " Waiting ${WAIT_BETWEEN}s for node to rejoin and sync..."
|
||||
sleep "$WAIT_BETWEEN"
|
||||
fi
|
||||
done
|
||||
|
||||
if ! $DRY_RUN; then
|
||||
log_info "Waiting 30s then checking block production..."
|
||||
sleep 30
|
||||
BLOCK_AFTER=$(get_block)
|
||||
log_info "Block after: $BLOCK_AFTER"
|
||||
echo ""
|
||||
echo "Run monitor to confirm blocks are advancing:"
|
||||
echo " ./scripts/monitoring/monitor-blockchain-health.sh"
|
||||
echo " watch -n 5 'cast block-number --rpc-url $RPC'"
|
||||
fi
|
||||
|
||||
log_ok "Done."
|
||||
60
scripts/maintenance/proxmox-load-balance-suggest.sh
Normal file
60
scripts/maintenance/proxmox-load-balance-suggest.sh
Normal file
@@ -0,0 +1,60 @@
|
||||
#!/usr/bin/env bash
|
||||
# Suggest load-balancing migrations: show current load and example commands to move
|
||||
# containers from r630-01 to r630-02 (or ml110). Run from project root.
|
||||
#
|
||||
# Usage: bash scripts/maintenance/proxmox-load-balance-suggest.sh
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
[[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
R630_01="${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
||||
R630_02="${PROXMOX_HOST_R630_02:-192.168.11.12}"
|
||||
ML110="${PROXMOX_HOST_ML110:-192.168.11.10}"
|
||||
SSH_OPTS="-o ConnectTimeout=8 -o StrictHostKeyChecking=no"
|
||||
|
||||
# Candidates safe to suggest (r630-01 -> r630-02). Excludes NPMplus main, core RPC, validators, sentries, DBIS core.
|
||||
CANDIDATES="3500 3501 7804 8640 8642 10232 10235 10236"
|
||||
|
||||
echo ""
|
||||
echo "=== Proxmox load balance — suggestion ==="
|
||||
echo ""
|
||||
|
||||
# Current load and counts
|
||||
for entry in "r630-01:$R630_01" "r630-02:$R630_02" "ml110:$ML110"; do
|
||||
IFS=: read -r name ip <<< "$entry"
|
||||
out=$(ssh $SSH_OPTS root@"$ip" "
|
||||
echo \"LOAD|\$(cat /proc/loadavg 2>/dev/null | cut -d' ' -f1-3)\"
|
||||
echo \"LXC|\$(pct list 2>/dev/null | tail -n +2 | wc -l)\"
|
||||
" 2>/dev/null) || true
|
||||
load=$(echo "$out" | awk -F'|' '$1=="LOAD"{print $2}')
|
||||
lxc=$(echo "$out" | awk -F'|' '$1=="LXC"{print $2}')
|
||||
printf " %-10s %s LXC: %s\n" "$name" "load: $load" "$lxc"
|
||||
done
|
||||
|
||||
echo ""
|
||||
echo "--- Suggested migrations (r630-01 → r630-02) ---"
|
||||
echo "Run from project root. Use --dry-run first. Target storage on r630-02: thin1, thin2, thin5, thin6."
|
||||
echo ""
|
||||
|
||||
for vmid in $CANDIDATES; do
|
||||
# Check if CT exists on r630-01
|
||||
on_src=$(ssh $SSH_OPTS root@"$R630_01" "pct list 2>/dev/null | awk '\$1==$vmid{print \$1}'" 2>/dev/null) || true
|
||||
if [[ -n "$on_src" ]]; then
|
||||
name=$(ssh $SSH_OPTS root@"$R630_01" "pct config $vmid 2>/dev/null | grep -E '^hostname:|^name:' | head -1 | sed 's/^[^:]*:[[:space:]]*//'" 2>/dev/null) || echo "CT-$vmid"
|
||||
echo " VMID $vmid ($name):"
|
||||
echo " ./scripts/maintenance/migrate-ct-r630-01-to-r630-02.sh $vmid thin1 --dry-run"
|
||||
echo " ./scripts/maintenance/migrate-ct-r630-01-to-r630-02.sh $vmid thin1 --destroy-source"
|
||||
echo ""
|
||||
fi
|
||||
done
|
||||
|
||||
echo "--- Cluster check (optional) ---"
|
||||
echo "If nodes are in the same cluster, you can try live migrate from r630-01:"
|
||||
echo " ssh root@$R630_01 \"pvecm status\""
|
||||
echo " ssh root@$R630_01 \"pct migrate <VMID> r630-02 --storage thin1 --restart\""
|
||||
echo ""
|
||||
echo "See: docs/04-configuration/PROXMOX_LOAD_BALANCING_RUNBOOK.md"
|
||||
echo ""
|
||||
Reference in New Issue
Block a user