chore: update submodule references and documentation
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled

- Marked submodules ai-mcp-pmm-controller, explorer-monorepo, and smom-dbis-138 as dirty to reflect recent changes.
- Updated documentation to clarify operator script usage, including dotenv loading and task execution instructions.
- Enhanced the README and various index files to provide clearer navigation and task completion guidance.

Made-with: Cursor
This commit is contained in:
defiQUG
2026-03-04 02:03:08 -08:00
parent 70eadb7bf0
commit e4c9dda0fd
246 changed files with 17774 additions and 93 deletions

View File

@@ -0,0 +1,80 @@
#!/usr/bin/env bash
# Ensure Core RPC nodes 2101 and 2102 have TXPOOL and ADMIN (and DEBUG) in rpc-http-api and rpc-ws-api.
# Does NOT add txpool_besuClear/txpool_clear/admin_removeTransaction — Besu does not implement them.
# See: docs/04-configuration/CORE_RPC_2101_2102_TXPOOL_ADMIN_STATUS.md
#
# Usage: ./scripts/maintenance/ensure-core-rpc-config-2101-2102.sh [--dry-run] [--2101-only] [--2102-only]
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
# Canonical API list for Core RPC (max that Besu supports for txpool + admin)
RPC_HTTP_API='["ETH","NET","WEB3","TXPOOL","QBFT","ADMIN","DEBUG","TRACE"]'
RPC_WS_API='["ETH","NET","WEB3","TXPOOL","QBFT","ADMIN"]'
VMID_2101=2101
VMID_2102=2102
HOST_2101="${PROXMOX_HOST_R630_01:-192.168.11.11}"
HOST_2102="${PROXMOX_HOST_ML110:-192.168.11.10}"
CONFIG_2101="/etc/besu/config-rpc-core.toml"
CONFIG_2102="/etc/besu/config-rpc.toml"
DRY_RUN=false
ONLY_2101=false
ONLY_2102=false
for a in "$@"; do
[[ "$a" == "--dry-run" ]] && DRY_RUN=true
[[ "$a" == "--2101-only" ]] && ONLY_2101=true
[[ "$a" == "--2102-only" ]] && ONLY_2102=true
done
run_ssh() { ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no root@"$1" "$2"; }
log_ok() { echo -e "\033[0;32m[✓]\033[0m $1"; }
log_info() { echo -e "\033[0;34m[INFO]\033[0m $1"; }
log_warn() { echo -e "\033[0;33m[⚠]\033[0m $1"; }
ensure_apis() {
local vmid=$1
local host=$2
local config_path=$3
log_info "VMID $vmid ($host): ensuring $config_path has TXPOOL, ADMIN, DEBUG..."
if $DRY_RUN; then
echo " Would set rpc-http-api and rpc-ws-api to include TXPOOL, ADMIN, DEBUG, QBFT, TRACE (2101/2102)"
return 0
fi
# Pass API lists via env so quoting is safe; remote sed updates the config
run_ssh "$host" "pct exec $vmid -- env RPC_HTTP_API='$RPC_HTTP_API' RPC_WS_API='$RPC_WS_API' CFG='$config_path' bash -c '
set -e
[ -f \"\$CFG\" ] || { echo \"Config \$CFG not found\"; exit 1; }
cp \"\$CFG\" \"\${CFG}.bak.\$(date +%Y%m%d%H%M%S)\"
grep -q \"rpc-http-api\" \"\$CFG\" && sed -i \"s|^rpc-http-api=.*|rpc-http-api=\$RPC_HTTP_API|\" \"\$CFG\" || echo \"rpc-http-api=\$RPC_HTTP_API\" >> \"\$CFG\"
grep -q \"rpc-ws-api\" \"\$CFG\" && sed -i \"s|^rpc-ws-api=.*|rpc-ws-api=\$RPC_WS_API|\" \"\$CFG\" || echo \"rpc-ws-api=\$RPC_WS_API\" >> \"\$CFG\"
chown besu:besu \"\$CFG\" 2>/dev/null || true
echo OK
'" 2>/dev/null || { log_warn "VMID $vmid: SSH or config update failed"; return 1; }
log_ok "VMID $vmid: config updated"
log_info "Restarting besu-rpc on $vmid..."
run_ssh "$host" "pct exec $vmid -- systemctl restart besu-rpc 2>/dev/null || pct exec $vmid -- systemctl restart besu-rpc.service 2>/dev/null" || { log_warn "Restart failed for $vmid"; return 1; }
log_ok "VMID $vmid: besu-rpc restarted"
return 0
}
echo ""
echo "=== Ensure Core RPC 2101 / 2102 — TXPOOL + ADMIN (max Besu supports) ==="
echo " dry-run=$DRY_RUN 2101-only=$ONLY_2101 2102-only=$ONLY_2102"
echo " Note: txpool_besuClear, txpool_clear, admin_removeTransaction are NOT in Besu; use clear-all-transaction-pools.sh to clear stuck txs."
echo ""
if [[ "$ONLY_2102" != true ]]; then
ensure_apis "$VMID_2101" "$HOST_2101" "$CONFIG_2101" || true
fi
if [[ "$ONLY_2101" != true ]]; then
ensure_apis "$VMID_2102" "$HOST_2102" "$CONFIG_2102" || true
fi
echo ""
echo "Done. Verify: ./scripts/maintenance/health-check-rpc-2101.sh and curl to 192.168.11.212:8545 for 2102."
echo "Ref: docs/04-configuration/CORE_RPC_2101_2102_TXPOOL_ADMIN_STATUS.md"

View File

@@ -0,0 +1,84 @@
#!/usr/bin/env bash
# Staggered restart of Chain 138 validators to restore block production without losing quorum.
# When all 5 validators are restarted at once (e.g. clear-all-transaction-pools), they can all
# enter "full sync" and no node is at head to produce blocks. Restarting one at a time lets
# the rest stay at head so the restarted node syncs quickly and consensus can continue.
#
# Usage: ./scripts/maintenance/fix-block-production-staggered-restart.sh [--dry-run]
# Requires: SSH to Proxmox hosts (192.168.11.10 ML110, 192.168.11.11 R630-01, 192.168.11.12 R630-02)
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
DRY_RUN=false
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
log_ok() { echo -e "${GREEN}[✓]${NC} $1"; }
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
# Order: restart one at a time; wait between so restarted node can sync from others
# VMID : host
VALIDATORS=(
"1004:${PROXMOX_HOST_ML110:-192.168.11.10}"
"1003:${PROXMOX_HOST_ML110:-192.168.11.10}"
"1002:${PROXMOX_HOST_R630_01:-192.168.11.11}"
"1001:${PROXMOX_HOST_R630_01:-192.168.11.11}"
"1000:${PROXMOX_HOST_R630_01:-192.168.11.11}"
)
WAIT_BETWEEN=90
RPC="${RPC_URL_138:-http://192.168.11.211:8545}"
get_block() {
curl -s -m 5 -X POST -H "Content-Type: application/json" \
-d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' "$RPC" 2>/dev/null | jq -r '.result // "0x0"'
}
echo "=== Staggered validator restart (fix block production) ==="
echo " RPC: $RPC"
echo " Wait between restarts: ${WAIT_BETWEEN}s"
$DRY_RUN && echo " (DRY RUN - no restarts)"
echo ""
BLOCK_BEFORE=$(get_block)
log_info "Block before: $BLOCK_BEFORE"
for entry in "${VALIDATORS[@]}"; do
IFS=: read -r vmid host <<< "$entry"
log_info "Restarting validator $vmid on $host..."
if $DRY_RUN; then
echo " Would: ssh root@$host 'pct exec $vmid -- systemctl restart besu-validator'"
else
# Allow up to 120s for restart (Besu stop/start can take 1-2 min)
if timeout 120 ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no root@"$host" "pct exec $vmid -- systemctl restart besu-validator" 2>/dev/null; then
log_ok " $vmid restarted"
else
log_warn " $vmid restart timed out or failed (node may still be restarting)"
fi
fi
if ! $DRY_RUN && [[ "$vmid" != "1000" ]]; then
log_info " Waiting ${WAIT_BETWEEN}s for node to rejoin and sync..."
sleep "$WAIT_BETWEEN"
fi
done
if ! $DRY_RUN; then
log_info "Waiting 30s then checking block production..."
sleep 30
BLOCK_AFTER=$(get_block)
log_info "Block after: $BLOCK_AFTER"
echo ""
echo "Run monitor to confirm blocks are advancing:"
echo " ./scripts/monitoring/monitor-blockchain-health.sh"
echo " watch -n 5 'cast block-number --rpc-url $RPC'"
fi
log_ok "Done."

View File

@@ -0,0 +1,60 @@
#!/usr/bin/env bash
# Suggest load-balancing migrations: show current load and example commands to move
# containers from r630-01 to r630-02 (or ml110). Run from project root.
#
# Usage: bash scripts/maintenance/proxmox-load-balance-suggest.sh
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
[[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
R630_01="${PROXMOX_HOST_R630_01:-192.168.11.11}"
R630_02="${PROXMOX_HOST_R630_02:-192.168.11.12}"
ML110="${PROXMOX_HOST_ML110:-192.168.11.10}"
SSH_OPTS="-o ConnectTimeout=8 -o StrictHostKeyChecking=no"
# Candidates safe to suggest (r630-01 -> r630-02). Excludes NPMplus main, core RPC, validators, sentries, DBIS core.
CANDIDATES="3500 3501 7804 8640 8642 10232 10235 10236"
echo ""
echo "=== Proxmox load balance — suggestion ==="
echo ""
# Current load and counts
for entry in "r630-01:$R630_01" "r630-02:$R630_02" "ml110:$ML110"; do
IFS=: read -r name ip <<< "$entry"
out=$(ssh $SSH_OPTS root@"$ip" "
echo \"LOAD|\$(cat /proc/loadavg 2>/dev/null | cut -d' ' -f1-3)\"
echo \"LXC|\$(pct list 2>/dev/null | tail -n +2 | wc -l)\"
" 2>/dev/null) || true
load=$(echo "$out" | awk -F'|' '$1=="LOAD"{print $2}')
lxc=$(echo "$out" | awk -F'|' '$1=="LXC"{print $2}')
printf " %-10s %s LXC: %s\n" "$name" "load: $load" "$lxc"
done
echo ""
echo "--- Suggested migrations (r630-01 → r630-02) ---"
echo "Run from project root. Use --dry-run first. Target storage on r630-02: thin1, thin2, thin5, thin6."
echo ""
for vmid in $CANDIDATES; do
# Check if CT exists on r630-01
on_src=$(ssh $SSH_OPTS root@"$R630_01" "pct list 2>/dev/null | awk '\$1==$vmid{print \$1}'" 2>/dev/null) || true
if [[ -n "$on_src" ]]; then
name=$(ssh $SSH_OPTS root@"$R630_01" "pct config $vmid 2>/dev/null | grep -E '^hostname:|^name:' | head -1 | sed 's/^[^:]*:[[:space:]]*//'" 2>/dev/null) || echo "CT-$vmid"
echo " VMID $vmid ($name):"
echo " ./scripts/maintenance/migrate-ct-r630-01-to-r630-02.sh $vmid thin1 --dry-run"
echo " ./scripts/maintenance/migrate-ct-r630-01-to-r630-02.sh $vmid thin1 --destroy-source"
echo ""
fi
done
echo "--- Cluster check (optional) ---"
echo "If nodes are in the same cluster, you can try live migrate from r630-01:"
echo " ssh root@$R630_01 \"pvecm status\""
echo " ssh root@$R630_01 \"pct migrate <VMID> r630-02 --storage thin1 --restart\""
echo ""
echo "See: docs/04-configuration/PROXMOX_LOAD_BALANCING_RUNBOOK.md"
echo ""