docs: Ledger Live integration, contract deploy learnings, NEXT_STEPS updates
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
- ADD_CHAIN138_TO_LEDGER_LIVE: Ledger form done; public code review repo bis-innovations/LedgerLive; init/push commands - CONTRACT_DEPLOYMENT_RUNBOOK: Chain 138 gas price 1 gwei, 36-addr check, TransactionMirror workaround - CONTRACT_*: AddressMapper, MirrorManager deployed 2026-02-12; 36-address on-chain check - NEXT_STEPS_FOR_YOU: Ledger done; steps completable now (no LAN); run-completable-tasks-from-anywhere - MASTER_INDEX, OPERATOR_OPTIONAL, SMART_CONTRACTS_INVENTORY_SIMPLE: updates - LEDGER_BLOCKCHAIN_INTEGRATION_COMPLETE: bis-innovations/LedgerLive reference Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
46
scripts/monitoring/alert-block-stall.sh
Executable file
46
scripts/monitoring/alert-block-stall.sh
Executable file
@@ -0,0 +1,46 @@
|
||||
#!/usr/bin/env bash
|
||||
# Block Production Stall Alert
|
||||
# Load alert configuration
|
||||
if [ -f "$SCRIPT_DIR/../smom-dbis-138/.env.alerts" ]; then
|
||||
source "$SCRIPT_DIR/../smom-dbis-138/.env.alerts"
|
||||
fi
|
||||
# Sends alerts when block production stalls
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
BLOCK=$1
|
||||
STALL_TIME=$2
|
||||
|
||||
# Alert channels (configure as needed)
|
||||
ALERT_EMAIL="${ALERT_EMAIL:-}"
|
||||
ALERT_WEBHOOK="${ALERT_WEBHOOK:-}"
|
||||
|
||||
log_error() { echo "[ALERT] $1" >&2; }
|
||||
|
||||
send_alert() {
|
||||
local message="$1"
|
||||
|
||||
log_error "BLOCK PRODUCTION STALLED: $message"
|
||||
|
||||
# Email alert
|
||||
if [ -n "$ALERT_EMAIL" ]; then
|
||||
echo "$message" | mail -s "BLOCK PRODUCTION STALLED" "$ALERT_EMAIL" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Webhook alert
|
||||
if [ -n "$ALERT_WEBHOOK" ]; then
|
||||
curl -X POST "$ALERT_WEBHOOK" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "{\"text\":\"$message\"}" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
# Log to file
|
||||
echo "$(date): $message" >> /var/log/block-stall-alerts.log
|
||||
}
|
||||
|
||||
main() {
|
||||
local message="Block production stalled at block $BLOCK for ${STALL_TIME} seconds"
|
||||
send_alert "$message"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
135
scripts/monitoring/auto-fix-validator-config.sh
Executable file
135
scripts/monitoring/auto-fix-validator-config.sh
Executable file
@@ -0,0 +1,135 @@
|
||||
#!/usr/bin/env bash
|
||||
# Automatic Validator Configuration Fix
|
||||
# Detects and fixes common configuration issues
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Load IP configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
VALIDATOR_VMIDS=(1000 1001 1002 1003 1004)
|
||||
PROXMOX_HOSTS=("${PROXMOX_HOST_R630_01:-192.168.11.11}" "${PROXMOX_HOST_R630_01:-192.168.11.11}" "${PROXMOX_HOST_R630_01:-192.168.11.11}" "${PROXMOX_HOST_ML110:-192.168.11.10}" "${PROXMOX_HOST_ML110:-192.168.11.10}")
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
||||
|
||||
fix_validator_config() {
|
||||
local vmid=$1
|
||||
local host=$2
|
||||
|
||||
log_info "Fixing Validator-$vmid configuration..."
|
||||
local fixes_applied=0
|
||||
|
||||
# Fix 1: Genesis file symlink
|
||||
if ! ssh root@$host "pct exec $vmid -- test -f /genesis/genesis.json 2>&1" >/dev/null 2>&1; then
|
||||
log_info " Creating genesis file symlink..."
|
||||
ssh root@$host "pct exec $vmid -- bash -c 'mkdir -p /genesis && if [ -f /etc/besu/genesis.json ]; then ln -sf /etc/besu/genesis.json /genesis/genesis.json; elif [ -f /config/genesis.json ]; then ln -sf /config/genesis.json /genesis/genesis.json; fi'" 2>&1
|
||||
((fixes_applied++))
|
||||
fi
|
||||
|
||||
# Fix 2: Static nodes file symlink
|
||||
if ! ssh root@$host "pct exec $vmid -- test -f /genesis/static-nodes.json 2>&1" >/dev/null 2>&1; then
|
||||
log_info " Creating static-nodes file symlink..."
|
||||
ssh root@$host "pct exec $vmid -- bash -c 'if [ -f /etc/besu/static-nodes.json ]; then ln -sf /etc/besu/static-nodes.json /genesis/static-nodes.json; elif [ -f /config/static-nodes.json ]; then ln -sf /config/static-nodes.json /genesis/static-nodes.json; else echo \"[]\" > /genesis/static-nodes.json; fi'" 2>&1
|
||||
((fixes_applied++))
|
||||
fi
|
||||
|
||||
# Fix 3: Permissions file
|
||||
if ! ssh root@$host "pct exec $vmid -- test -f /permissions/permissions-accounts.toml 2>&1" >/dev/null 2>&1; then
|
||||
log_info " Creating permissions file..."
|
||||
ssh root@$host "pct exec $vmid -- bash -c 'mkdir -p /permissions && cat > /permissions/permissions-accounts.toml <<EOF
|
||||
# Permissions Accounts Configuration
|
||||
# Empty allowlist - all accounts allowed
|
||||
accounts-allowlist=[]
|
||||
EOF
|
||||
'" 2>&1
|
||||
((fixes_applied++))
|
||||
else
|
||||
# Validate TOML format
|
||||
local toml_content=$(ssh root@$host "pct exec $vmid -- cat /permissions/permissions-accounts.toml 2>&1")
|
||||
if ! echo "$toml_content" | grep -q "accounts-allowlist"; then
|
||||
log_info " Fixing invalid permissions file..."
|
||||
ssh root@$host "pct exec $vmid -- bash -c 'cat > /permissions/permissions-accounts.toml <<EOF
|
||||
# Permissions Accounts Configuration
|
||||
# Empty allowlist - all accounts allowed
|
||||
accounts-allowlist=[]
|
||||
EOF
|
||||
'" 2>&1
|
||||
((fixes_applied++))
|
||||
fi
|
||||
fi
|
||||
|
||||
# Fix 4: Disable node permissioning if causing issues
|
||||
local config_file=""
|
||||
if ssh root@$host "pct exec $vmid -- test -f /etc/besu/config-validator.toml 2>&1" >/dev/null 2>&1; then
|
||||
config_file="/etc/besu/config-validator.toml"
|
||||
elif ssh root@$host "pct exec $vmid -- test -f /config/config-validator.toml 2>&1" >/dev/null 2>&1; then
|
||||
config_file="/config/config-validator.toml"
|
||||
fi
|
||||
|
||||
if [ -n "$config_file" ]; then
|
||||
local node_perm_enabled=$(ssh root@$host "pct exec $vmid -- grep 'permissions-nodes-config-file-enabled' $config_file 2>/dev/null" | grep -c "true" 2>/dev/null || true)
|
||||
node_perm_enabled=$(echo "${node_perm_enabled:-0}" | head -1)
|
||||
node_perm_enabled=${node_perm_enabled:-0}
|
||||
if [ "${node_perm_enabled}" -gt 0 ] 2>/dev/null; then
|
||||
log_info " Disabling node permissioning..."
|
||||
ssh root@$host "pct exec $vmid -- sed -i 's/permissions-nodes-config-file-enabled=true/permissions-nodes-config-file-enabled=false/' $config_file 2>&1"
|
||||
((fixes_applied++))
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ "$fixes_applied" -gt 0 ]; then
|
||||
log_success " Applied $fixes_applied fix(es) to Validator-$vmid"
|
||||
return 0
|
||||
else
|
||||
log_success " Validator-$vmid configuration is correct"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
log_info "Starting automatic validator configuration fix..."
|
||||
echo ""
|
||||
|
||||
local total_fixes=0
|
||||
|
||||
for i in "${!VALIDATOR_VMIDS[@]}"; do
|
||||
local vmid=${VALIDATOR_VMIDS[$i]}
|
||||
local host=${PROXMOX_HOSTS[$i]}
|
||||
|
||||
if fix_validator_config "$vmid" "$host"; then
|
||||
((total_fixes++))
|
||||
fi
|
||||
echo ""
|
||||
done
|
||||
|
||||
if [ "$total_fixes" -gt 0 ]; then
|
||||
log_warn "Applied fixes to $total_fixes validator(s). Restarting services..."
|
||||
|
||||
for i in "${!VALIDATOR_VMIDS[@]}"; do
|
||||
local vmid=${VALIDATOR_VMIDS[$i]}
|
||||
local host=${PROXMOX_HOSTS[$i]}
|
||||
log_info "Restarting Validator-$vmid..."
|
||||
ssh root@$host "pct exec $vmid -- systemctl restart besu-validator.service 2>&1" || true
|
||||
done
|
||||
|
||||
log_success "Configuration fixes applied and services restarted"
|
||||
else
|
||||
log_success "All validators have correct configuration"
|
||||
fi
|
||||
}
|
||||
|
||||
main "$@"
|
||||
86
scripts/monitoring/cleanup-stuck-transactions.sh
Executable file
86
scripts/monitoring/cleanup-stuck-transactions.sh
Executable file
@@ -0,0 +1,86 @@
|
||||
#!/usr/bin/env bash
|
||||
# Cleanup Stuck Transactions
|
||||
# Detects and cleans up stuck transactions from mempool
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Load IP configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Load environment
|
||||
if [ -f "$PROJECT_ROOT/smom-dbis-138/.env" ]; then
|
||||
set +e
|
||||
source "$PROJECT_ROOT/smom-dbis-138/.env" 2>/dev/null || true
|
||||
set -e
|
||||
fi
|
||||
|
||||
RPC_URL="${RPC_URL_138:-http://${RPC_CORE_1}:8545}"
|
||||
PRIVATE_KEY="${PRIVATE_KEY:-}"
|
||||
STUCK_THRESHOLD=300 # 5 minutes
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
||||
|
||||
check_and_cleanup() {
|
||||
if [ -z "$PRIVATE_KEY" ]; then
|
||||
log_error "PRIVATE_KEY not set"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local deployer=$(cast wallet address "$PRIVATE_KEY" 2>/dev/null || echo "")
|
||||
if [ -z "$deployer" ]; then
|
||||
log_error "Cannot derive deployer address"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log_info "Checking for stuck transactions..."
|
||||
|
||||
local latest_hex=$(cast rpc eth_getTransactionCount "$deployer" latest --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
local pending_hex=$(cast rpc eth_getTransactionCount "$deployer" pending --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
local latest_clean=$(echo "$latest_hex" | tr -d '"')
|
||||
local pending_clean=$(echo "$pending_hex" | tr -d '"')
|
||||
local latest_dec=$(python3 -c "print(int('$latest_clean', 16))" 2>/dev/null || echo "0")
|
||||
local pending_dec=$(python3 -c "print(int('$pending_clean', 16))" 2>/dev/null || echo "0")
|
||||
local pending_count=$((pending_dec - latest_dec))
|
||||
|
||||
log_info "Latest nonce: $latest_dec"
|
||||
log_info "Pending nonce: $pending_dec"
|
||||
log_info "Pending transactions: $pending_count"
|
||||
|
||||
if [ "$pending_count" -eq 0 ]; then
|
||||
log_success "No stuck transactions found"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log_warn "Found $pending_count pending transaction(s)"
|
||||
log_info "Note: This script can detect stuck transactions but cannot automatically clear them."
|
||||
log_info "Options:"
|
||||
log_info " 1. Wait for transactions to confirm (if blocks are being produced)"
|
||||
log_info " 2. Use nonce skip to deploy new transactions"
|
||||
log_info " 3. Clear transaction pool database on all nodes (requires restart)"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
main() {
|
||||
log_info "Stuck Transaction Cleanup"
|
||||
echo ""
|
||||
check_and_cleanup
|
||||
}
|
||||
|
||||
main "$@"
|
||||
80
scripts/monitoring/cleanup-stuck-transactions.sh.bak
Executable file
80
scripts/monitoring/cleanup-stuck-transactions.sh.bak
Executable file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env bash
|
||||
# Cleanup Stuck Transactions
|
||||
# Detects and cleans up stuck transactions from mempool
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Load environment
|
||||
if [ -f "$PROJECT_ROOT/smom-dbis-138/.env" ]; then
|
||||
set +e
|
||||
source "$PROJECT_ROOT/smom-dbis-138/.env" 2>/dev/null || true
|
||||
set -e
|
||||
fi
|
||||
|
||||
RPC_URL="${RPC_URL_138:-http://192.168.11.211:8545}"
|
||||
PRIVATE_KEY="${PRIVATE_KEY:-}"
|
||||
STUCK_THRESHOLD=300 # 5 minutes
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
||||
|
||||
check_and_cleanup() {
|
||||
if [ -z "$PRIVATE_KEY" ]; then
|
||||
log_error "PRIVATE_KEY not set"
|
||||
return 1
|
||||
fi
|
||||
|
||||
local deployer=$(cast wallet address "$PRIVATE_KEY" 2>/dev/null || echo "")
|
||||
if [ -z "$deployer" ]; then
|
||||
log_error "Cannot derive deployer address"
|
||||
return 1
|
||||
fi
|
||||
|
||||
log_info "Checking for stuck transactions..."
|
||||
|
||||
local latest_hex=$(cast rpc eth_getTransactionCount "$deployer" latest --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
local pending_hex=$(cast rpc eth_getTransactionCount "$deployer" pending --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
local latest_clean=$(echo "$latest_hex" | tr -d '"')
|
||||
local pending_clean=$(echo "$pending_hex" | tr -d '"')
|
||||
local latest_dec=$(python3 -c "print(int('$latest_clean', 16))" 2>/dev/null || echo "0")
|
||||
local pending_dec=$(python3 -c "print(int('$pending_clean', 16))" 2>/dev/null || echo "0")
|
||||
local pending_count=$((pending_dec - latest_dec))
|
||||
|
||||
log_info "Latest nonce: $latest_dec"
|
||||
log_info "Pending nonce: $pending_dec"
|
||||
log_info "Pending transactions: $pending_count"
|
||||
|
||||
if [ "$pending_count" -eq 0 ]; then
|
||||
log_success "No stuck transactions found"
|
||||
return 0
|
||||
fi
|
||||
|
||||
log_warn "Found $pending_count pending transaction(s)"
|
||||
log_info "Note: This script can detect stuck transactions but cannot automatically clear them."
|
||||
log_info "Options:"
|
||||
log_info " 1. Wait for transactions to confirm (if blocks are being produced)"
|
||||
log_info " 2. Use nonce skip to deploy new transactions"
|
||||
log_info " 3. Clear transaction pool database on all nodes (requires restart)"
|
||||
|
||||
return 0
|
||||
}
|
||||
|
||||
main() {
|
||||
log_info "Stuck Transaction Cleanup"
|
||||
echo ""
|
||||
check_and_cleanup
|
||||
}
|
||||
|
||||
main "$@"
|
||||
271
scripts/monitoring/create-monitoring-dashboard.sh
Executable file
271
scripts/monitoring/create-monitoring-dashboard.sh
Executable file
@@ -0,0 +1,271 @@
|
||||
#!/usr/bin/env bash
|
||||
# Create Monitoring Dashboard
|
||||
# Generates a simple HTML dashboard for monitoring status
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Load IP configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Colors
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
|
||||
create_dashboard() {
|
||||
log_info "Creating monitoring dashboard..."
|
||||
|
||||
local dashboard_file="${PROJECT_ROOT}/logs/monitoring/dashboard.html"
|
||||
mkdir -p "$(dirname "$dashboard_file")"
|
||||
|
||||
cat > "$dashboard_file" <<'DASHBOARD'
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Blockchain Monitoring Dashboard</title>
|
||||
<style>
|
||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
|
||||
background: #0f172a;
|
||||
color: #e2e8f0;
|
||||
padding: 20px;
|
||||
}
|
||||
.container { max-width: 1400px; margin: 0 auto; }
|
||||
h1 {
|
||||
color: #60a5fa;
|
||||
margin-bottom: 30px;
|
||||
font-size: 2.5em;
|
||||
text-align: center;
|
||||
}
|
||||
.status-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
||||
gap: 20px;
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
.status-card {
|
||||
background: #1e293b;
|
||||
border-radius: 12px;
|
||||
padding: 20px;
|
||||
border: 2px solid #334155;
|
||||
transition: all 0.3s;
|
||||
}
|
||||
.status-card:hover {
|
||||
border-color: #60a5fa;
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
.status-card.active { border-color: #10b981; }
|
||||
.status-card.warning { border-color: #f59e0b; }
|
||||
.status-card.error { border-color: #ef4444; }
|
||||
.card-title {
|
||||
font-size: 1.2em;
|
||||
color: #94a3b8;
|
||||
margin-bottom: 10px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
}
|
||||
.status-indicator {
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border-radius: 50%;
|
||||
display: inline-block;
|
||||
margin-right: 8px;
|
||||
}
|
||||
.status-indicator.active { background: #10b981; box-shadow: 0 0 8px #10b981; }
|
||||
.status-indicator.warning { background: #f59e0b; box-shadow: 0 0 8px #f59e0b; }
|
||||
.status-indicator.error { background: #ef4444; box-shadow: 0 0 8px #ef4444; }
|
||||
.card-value {
|
||||
font-size: 2em;
|
||||
color: #60a5fa;
|
||||
font-weight: bold;
|
||||
}
|
||||
.log-section {
|
||||
background: #1e293b;
|
||||
border-radius: 12px;
|
||||
padding: 20px;
|
||||
margin-top: 20px;
|
||||
border: 2px solid #334155;
|
||||
}
|
||||
.log-section h2 {
|
||||
color: #60a5fa;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.log-viewer {
|
||||
background: #0f172a;
|
||||
border: 1px solid #334155;
|
||||
border-radius: 8px;
|
||||
padding: 15px;
|
||||
font-family: 'Courier New', monospace;
|
||||
font-size: 0.9em;
|
||||
max-height: 400px;
|
||||
overflow-y: auto;
|
||||
color: #94a3b8;
|
||||
}
|
||||
.log-entry { margin-bottom: 5px; }
|
||||
.log-entry.error { color: #ef4444; }
|
||||
.log-entry.warning { color: #f59e0b; }
|
||||
.log-entry.success { color: #10b981; }
|
||||
.refresh-btn {
|
||||
background: #3b82f6;
|
||||
color: white;
|
||||
border: none;
|
||||
padding: 10px 20px;
|
||||
border-radius: 6px;
|
||||
cursor: pointer;
|
||||
font-size: 1em;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.refresh-btn:hover { background: #2563eb; }
|
||||
.timestamp {
|
||||
color: #64748b;
|
||||
font-size: 0.9em;
|
||||
text-align: center;
|
||||
margin-top: 20px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>🔗 Blockchain Monitoring Dashboard</h1>
|
||||
|
||||
<button class="refresh-btn" onclick="location.reload()">🔄 Refresh</button>
|
||||
|
||||
<div class="status-grid" id="statusGrid">
|
||||
<!-- Status cards will be populated by JavaScript -->
|
||||
</div>
|
||||
|
||||
<div class="log-section">
|
||||
<h2>Recent Activity</h2>
|
||||
<div class="log-viewer" id="logViewer">
|
||||
<!-- Logs will be populated by JavaScript -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="timestamp" id="timestamp"></div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function fetchStatus() {
|
||||
// This would normally fetch from an API
|
||||
// For now, we'll use static data or file reading
|
||||
|
||||
const statusGrid = document.getElementById('statusGrid');
|
||||
const logViewer = document.getElementById('logViewer');
|
||||
|
||||
// Example status data (replace with actual API calls)
|
||||
const statuses = [
|
||||
{ name: 'Block Production', status: 'active', value: 'Active', detail: 'Block: 1153290+' },
|
||||
{ name: 'Validators', status: 'active', value: '5/5', detail: 'All validators running' },
|
||||
{ name: 'Health Checks', status: 'active', value: 'OK', detail: 'All checks passing' },
|
||||
{ name: 'Transaction Pool', status: 'active', value: 'Normal', detail: 'No stuck transactions' },
|
||||
{ name: 'Network Sync', status: 'active', value: 'Synced', detail: 'All nodes synchronized' },
|
||||
{ name: 'Consensus', status: 'active', value: 'QBFT', detail: 'Quorum maintained' }
|
||||
];
|
||||
|
||||
statusGrid.innerHTML = statuses.map(s => `
|
||||
<div class="status-card ${s.status}">
|
||||
<div class="card-title">
|
||||
<span><span class="status-indicator ${s.status}"></span>${s.name}</span>
|
||||
</div>
|
||||
<div class="card-value">${s.value}</div>
|
||||
<div style="color: #94a3b8; margin-top: 5px;">${s.detail}</div>
|
||||
</div>
|
||||
`).join('');
|
||||
|
||||
logViewer.innerHTML = `
|
||||
<div class="log-entry success">[${new Date().toLocaleTimeString()}] All systems operational</div>
|
||||
<div class="log-entry">[${new Date().toLocaleTimeString()}] Health checks running normally</div>
|
||||
<div class="log-entry">[${new Date().toLocaleTimeString()}] Block production active</div>
|
||||
`;
|
||||
|
||||
document.getElementById('timestamp').textContent =
|
||||
`Last updated: ${new Date().toLocaleString()}`;
|
||||
}
|
||||
|
||||
// Initial load
|
||||
fetchStatus();
|
||||
|
||||
// Auto-refresh every 30 seconds
|
||||
setInterval(fetchStatus, 30000);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
DASHBOARD
|
||||
|
||||
log_success "Dashboard created: $dashboard_file"
|
||||
log_info "Open in browser: file://$dashboard_file"
|
||||
}
|
||||
|
||||
create_dashboard_script() {
|
||||
log_info "Creating dashboard update script..."
|
||||
|
||||
cat > "$SCRIPT_DIR/update-dashboard.sh" <<'DASHBOARDSCRIPT'
|
||||
#!/usr/bin/env bash
|
||||
# Update Monitoring Dashboard
|
||||
# Fetches current status and updates dashboard
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Load IP configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Source environment
|
||||
if [ -f "$PROJECT_ROOT/smom-dbis-138/.env" ]; then
|
||||
source "$PROJECT_ROOT/smom-dbis-138/.env" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
RPC_URL="${RPC_URL_138:-http://${RPC_CORE_1}:8545}"
|
||||
|
||||
# Get current block
|
||||
BLOCK=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null || echo "0")
|
||||
|
||||
# Get validator status (simplified)
|
||||
VALIDATOR_COUNT=5
|
||||
|
||||
# Update dashboard with real data
|
||||
# This would be integrated with the HTML dashboard
|
||||
echo "Dashboard update script ready"
|
||||
echo "Block: $BLOCK"
|
||||
echo "Validators: $VALIDATOR_COUNT/5"
|
||||
|
||||
DASHBOARDSCRIPT
|
||||
|
||||
chmod +x "$SCRIPT_DIR/update-dashboard.sh"
|
||||
log_success "update-dashboard.sh script created"
|
||||
}
|
||||
|
||||
main() {
|
||||
log_info "Creating monitoring dashboard..."
|
||||
echo ""
|
||||
|
||||
create_dashboard
|
||||
echo ""
|
||||
|
||||
create_dashboard_script
|
||||
echo ""
|
||||
|
||||
log_success "Monitoring dashboard created!"
|
||||
log_info "Dashboard location: logs/monitoring/dashboard.html"
|
||||
log_info "You can open it in a web browser or set up a web server to serve it"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
259
scripts/monitoring/create-monitoring-dashboard.sh.bak
Executable file
259
scripts/monitoring/create-monitoring-dashboard.sh.bak
Executable file
@@ -0,0 +1,259 @@
|
||||
#!/usr/bin/env bash
|
||||
# Create Monitoring Dashboard
|
||||
# Generates a simple HTML dashboard for monitoring status
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Colors
|
||||
GREEN='\033[0;32m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
|
||||
create_dashboard() {
|
||||
log_info "Creating monitoring dashboard..."
|
||||
|
||||
local dashboard_file="${PROJECT_ROOT}/logs/monitoring/dashboard.html"
|
||||
mkdir -p "$(dirname "$dashboard_file")"
|
||||
|
||||
cat > "$dashboard_file" <<'DASHBOARD'
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>Blockchain Monitoring Dashboard</title>
|
||||
<style>
|
||||
* { margin: 0; padding: 0; box-sizing: border-box; }
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
|
||||
background: #0f172a;
|
||||
color: #e2e8f0;
|
||||
padding: 20px;
|
||||
}
|
||||
.container { max-width: 1400px; margin: 0 auto; }
|
||||
h1 {
|
||||
color: #60a5fa;
|
||||
margin-bottom: 30px;
|
||||
font-size: 2.5em;
|
||||
text-align: center;
|
||||
}
|
||||
.status-grid {
|
||||
display: grid;
|
||||
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
|
||||
gap: 20px;
|
||||
margin-bottom: 30px;
|
||||
}
|
||||
.status-card {
|
||||
background: #1e293b;
|
||||
border-radius: 12px;
|
||||
padding: 20px;
|
||||
border: 2px solid #334155;
|
||||
transition: all 0.3s;
|
||||
}
|
||||
.status-card:hover {
|
||||
border-color: #60a5fa;
|
||||
transform: translateY(-2px);
|
||||
}
|
||||
.status-card.active { border-color: #10b981; }
|
||||
.status-card.warning { border-color: #f59e0b; }
|
||||
.status-card.error { border-color: #ef4444; }
|
||||
.card-title {
|
||||
font-size: 1.2em;
|
||||
color: #94a3b8;
|
||||
margin-bottom: 10px;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: space-between;
|
||||
}
|
||||
.status-indicator {
|
||||
width: 12px;
|
||||
height: 12px;
|
||||
border-radius: 50%;
|
||||
display: inline-block;
|
||||
margin-right: 8px;
|
||||
}
|
||||
.status-indicator.active { background: #10b981; box-shadow: 0 0 8px #10b981; }
|
||||
.status-indicator.warning { background: #f59e0b; box-shadow: 0 0 8px #f59e0b; }
|
||||
.status-indicator.error { background: #ef4444; box-shadow: 0 0 8px #ef4444; }
|
||||
.card-value {
|
||||
font-size: 2em;
|
||||
color: #60a5fa;
|
||||
font-weight: bold;
|
||||
}
|
||||
.log-section {
|
||||
background: #1e293b;
|
||||
border-radius: 12px;
|
||||
padding: 20px;
|
||||
margin-top: 20px;
|
||||
border: 2px solid #334155;
|
||||
}
|
||||
.log-section h2 {
|
||||
color: #60a5fa;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
.log-viewer {
|
||||
background: #0f172a;
|
||||
border: 1px solid #334155;
|
||||
border-radius: 8px;
|
||||
padding: 15px;
|
||||
font-family: 'Courier New', monospace;
|
||||
font-size: 0.9em;
|
||||
max-height: 400px;
|
||||
overflow-y: auto;
|
||||
color: #94a3b8;
|
||||
}
|
||||
.log-entry { margin-bottom: 5px; }
|
||||
.log-entry.error { color: #ef4444; }
|
||||
.log-entry.warning { color: #f59e0b; }
|
||||
.log-entry.success { color: #10b981; }
|
||||
.refresh-btn {
|
||||
background: #3b82f6;
|
||||
color: white;
|
||||
border: none;
|
||||
padding: 10px 20px;
|
||||
border-radius: 6px;
|
||||
cursor: pointer;
|
||||
font-size: 1em;
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
.refresh-btn:hover { background: #2563eb; }
|
||||
.timestamp {
|
||||
color: #64748b;
|
||||
font-size: 0.9em;
|
||||
text-align: center;
|
||||
margin-top: 20px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<h1>🔗 Blockchain Monitoring Dashboard</h1>
|
||||
|
||||
<button class="refresh-btn" onclick="location.reload()">🔄 Refresh</button>
|
||||
|
||||
<div class="status-grid" id="statusGrid">
|
||||
<!-- Status cards will be populated by JavaScript -->
|
||||
</div>
|
||||
|
||||
<div class="log-section">
|
||||
<h2>Recent Activity</h2>
|
||||
<div class="log-viewer" id="logViewer">
|
||||
<!-- Logs will be populated by JavaScript -->
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="timestamp" id="timestamp"></div>
|
||||
</div>
|
||||
|
||||
<script>
|
||||
function fetchStatus() {
|
||||
// This would normally fetch from an API
|
||||
// For now, we'll use static data or file reading
|
||||
|
||||
const statusGrid = document.getElementById('statusGrid');
|
||||
const logViewer = document.getElementById('logViewer');
|
||||
|
||||
// Example status data (replace with actual API calls)
|
||||
const statuses = [
|
||||
{ name: 'Block Production', status: 'active', value: 'Active', detail: 'Block: 1153290+' },
|
||||
{ name: 'Validators', status: 'active', value: '5/5', detail: 'All validators running' },
|
||||
{ name: 'Health Checks', status: 'active', value: 'OK', detail: 'All checks passing' },
|
||||
{ name: 'Transaction Pool', status: 'active', value: 'Normal', detail: 'No stuck transactions' },
|
||||
{ name: 'Network Sync', status: 'active', value: 'Synced', detail: 'All nodes synchronized' },
|
||||
{ name: 'Consensus', status: 'active', value: 'QBFT', detail: 'Quorum maintained' }
|
||||
];
|
||||
|
||||
statusGrid.innerHTML = statuses.map(s => `
|
||||
<div class="status-card ${s.status}">
|
||||
<div class="card-title">
|
||||
<span><span class="status-indicator ${s.status}"></span>${s.name}</span>
|
||||
</div>
|
||||
<div class="card-value">${s.value}</div>
|
||||
<div style="color: #94a3b8; margin-top: 5px;">${s.detail}</div>
|
||||
</div>
|
||||
`).join('');
|
||||
|
||||
logViewer.innerHTML = `
|
||||
<div class="log-entry success">[${new Date().toLocaleTimeString()}] All systems operational</div>
|
||||
<div class="log-entry">[${new Date().toLocaleTimeString()}] Health checks running normally</div>
|
||||
<div class="log-entry">[${new Date().toLocaleTimeString()}] Block production active</div>
|
||||
`;
|
||||
|
||||
document.getElementById('timestamp').textContent =
|
||||
`Last updated: ${new Date().toLocaleString()}`;
|
||||
}
|
||||
|
||||
// Initial load
|
||||
fetchStatus();
|
||||
|
||||
// Auto-refresh every 30 seconds
|
||||
setInterval(fetchStatus, 30000);
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
DASHBOARD
|
||||
|
||||
log_success "Dashboard created: $dashboard_file"
|
||||
log_info "Open in browser: file://$dashboard_file"
|
||||
}
|
||||
|
||||
create_dashboard_script() {
|
||||
log_info "Creating dashboard update script..."
|
||||
|
||||
cat > "$SCRIPT_DIR/update-dashboard.sh" <<'DASHBOARDSCRIPT'
|
||||
#!/usr/bin/env bash
|
||||
# Update Monitoring Dashboard
|
||||
# Fetches current status and updates dashboard
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Source environment
|
||||
if [ -f "$PROJECT_ROOT/smom-dbis-138/.env" ]; then
|
||||
source "$PROJECT_ROOT/smom-dbis-138/.env" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
RPC_URL="${RPC_URL_138:-http://192.168.11.211:8545}"
|
||||
|
||||
# Get current block
|
||||
BLOCK=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null || echo "0")
|
||||
|
||||
# Get validator status (simplified)
|
||||
VALIDATOR_COUNT=5
|
||||
|
||||
# Update dashboard with real data
|
||||
# This would be integrated with the HTML dashboard
|
||||
echo "Dashboard update script ready"
|
||||
echo "Block: $BLOCK"
|
||||
echo "Validators: $VALIDATOR_COUNT/5"
|
||||
|
||||
DASHBOARDSCRIPT
|
||||
|
||||
chmod +x "$SCRIPT_DIR/update-dashboard.sh"
|
||||
log_success "update-dashboard.sh script created"
|
||||
}
|
||||
|
||||
main() {
|
||||
log_info "Creating monitoring dashboard..."
|
||||
echo ""
|
||||
|
||||
create_dashboard
|
||||
echo ""
|
||||
|
||||
create_dashboard_script
|
||||
echo ""
|
||||
|
||||
log_success "Monitoring dashboard created!"
|
||||
log_info "Dashboard location: logs/monitoring/dashboard.html"
|
||||
log_info "You can open it in a web browser or set up a web server to serve it"
|
||||
}
|
||||
|
||||
main "$@"
|
||||
49
scripts/monitoring/enhanced-besu-validator.service
Normal file
49
scripts/monitoring/enhanced-besu-validator.service
Normal file
@@ -0,0 +1,49 @@
|
||||
[Unit]
|
||||
Description=Hyperledger Besu Validator Node (Enhanced with Health Checks)
|
||||
After=network-online.target
|
||||
Wants=network-online.target
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
User=besu
|
||||
Group=besu
|
||||
WorkingDirectory=/opt/besu
|
||||
|
||||
# Enhanced restart policy
|
||||
Restart=always
|
||||
RestartSec=10
|
||||
StartLimitInterval=300
|
||||
StartLimitBurst=5
|
||||
|
||||
# Health check before start
|
||||
ExecStartPre=/usr/local/bin/check-validator-prerequisites.sh
|
||||
ExecStartPre=/bin/sleep 2
|
||||
|
||||
# Main service
|
||||
ExecStart=/opt/besu/bin/besu \
|
||||
--config-file=/etc/besu/config-validator.toml
|
||||
|
||||
# Health check after start
|
||||
ExecStartPost=/usr/local/bin/verify-validator-started.sh
|
||||
ExecStartPost=/bin/sleep 5
|
||||
|
||||
# Stop gracefully
|
||||
TimeoutStopSec=30
|
||||
KillMode=mixed
|
||||
KillSignal=SIGTERM
|
||||
|
||||
# Resource limits
|
||||
LimitNOFILE=65536
|
||||
LimitNPROC=32768
|
||||
|
||||
# Logging
|
||||
StandardOutput=journal
|
||||
StandardError=journal
|
||||
SyslogIdentifier=besu-validator
|
||||
|
||||
# Security
|
||||
NoNewPrivileges=true
|
||||
PrivateTmp=true
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
140
scripts/monitoring/master-stability-monitor.sh
Executable file
140
scripts/monitoring/master-stability-monitor.sh
Executable file
@@ -0,0 +1,140 @@
|
||||
#!/usr/bin/env bash
|
||||
# Master Stability Monitor
|
||||
# Orchestrates all monitoring and recovery operations
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Load IP configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Load environment
|
||||
if [ -f "$PROJECT_ROOT/smom-dbis-138/.env" ]; then
|
||||
set +e
|
||||
source "$PROJECT_ROOT/smom-dbis-138/.env" 2>/dev/null || true
|
||||
set -e
|
||||
fi
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
||||
log_section() { echo -e "\n${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; echo -e "${CYAN}$1${NC}"; echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n"; }
|
||||
|
||||
# Configuration
|
||||
CHECK_INTERVAL=120 # Check every 2 minutes
|
||||
AUTO_FIX=true
|
||||
AUTO_RESTART=true
|
||||
|
||||
run_health_check() {
|
||||
log_section "Running Health Check"
|
||||
if bash "$SCRIPT_DIR/check-validator-health.sh" 2>&1; then
|
||||
log_success "Health check passed"
|
||||
return 0
|
||||
else
|
||||
log_error "Health check failed"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
run_auto_fix() {
|
||||
log_section "Running Auto-Fix"
|
||||
if bash "$SCRIPT_DIR/auto-fix-validator-config.sh" 2>&1; then
|
||||
log_success "Auto-fix completed"
|
||||
return 0
|
||||
else
|
||||
log_warn "Auto-fix had issues"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_block_production() {
|
||||
log_section "Checking Block Production"
|
||||
|
||||
local rpc_url="${RPC_URL_138:-http://${RPC_CORE_1}:8545}"
|
||||
local block1=$(cast block-number --rpc-url "$rpc_url" 2>/dev/null || echo "0")
|
||||
|
||||
sleep 10
|
||||
|
||||
local block2=$(cast block-number --rpc-url "$rpc_url" 2>/dev/null || echo "0")
|
||||
|
||||
if [ "$block1" != "$block2" ] && [ "$block2" != "0" ] && [ "$block1" != "" ]; then
|
||||
log_success "Block production active ($block1 → $block2)"
|
||||
return 0
|
||||
else
|
||||
log_error "Block production STALLED (block: $block1)"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
log_section "Master Stability Monitor"
|
||||
log_info "Starting comprehensive stability monitoring..."
|
||||
echo ""
|
||||
|
||||
local health_ok=true
|
||||
local blocks_ok=true
|
||||
|
||||
# Run health check
|
||||
if ! run_health_check; then
|
||||
health_ok=false
|
||||
|
||||
# Auto-fix if enabled
|
||||
if [ "$AUTO_FIX" = true ]; then
|
||||
log_warn "Attempting automatic fix..."
|
||||
run_auto_fix
|
||||
|
||||
# Re-check health
|
||||
sleep 30
|
||||
if run_health_check; then
|
||||
log_success "Auto-fix resolved issues"
|
||||
health_ok=true
|
||||
else
|
||||
log_error "Auto-fix did not resolve issues"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check block production
|
||||
if ! check_block_production; then
|
||||
blocks_ok=false
|
||||
log_error "CRITICAL: Block production stalled"
|
||||
fi
|
||||
|
||||
# Summary
|
||||
log_section "Monitoring Summary"
|
||||
|
||||
if [ "$health_ok" = true ] && [ "$blocks_ok" = true ]; then
|
||||
log_success "All systems operational"
|
||||
exit 0
|
||||
elif [ "$blocks_ok" = false ]; then
|
||||
log_error "CRITICAL: Block production issue detected"
|
||||
exit 2
|
||||
else
|
||||
log_warn "Non-critical issues detected"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Run continuously if no arguments
|
||||
if [ "${1:-}" = "--once" ]; then
|
||||
main
|
||||
else
|
||||
while true; do
|
||||
main
|
||||
sleep "$CHECK_INTERVAL"
|
||||
done
|
||||
fi
|
||||
134
scripts/monitoring/master-stability-monitor.sh.bak
Executable file
134
scripts/monitoring/master-stability-monitor.sh.bak
Executable file
@@ -0,0 +1,134 @@
|
||||
#!/usr/bin/env bash
|
||||
# Master Stability Monitor
|
||||
# Orchestrates all monitoring and recovery operations
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Load environment
|
||||
if [ -f "$PROJECT_ROOT/smom-dbis-138/.env" ]; then
|
||||
set +e
|
||||
source "$PROJECT_ROOT/smom-dbis-138/.env" 2>/dev/null || true
|
||||
set -e
|
||||
fi
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
||||
log_section() { echo -e "\n${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; echo -e "${CYAN}$1${NC}"; echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n"; }
|
||||
|
||||
# Configuration
|
||||
CHECK_INTERVAL=120 # Check every 2 minutes
|
||||
AUTO_FIX=true
|
||||
AUTO_RESTART=true
|
||||
|
||||
run_health_check() {
|
||||
log_section "Running Health Check"
|
||||
if bash "$SCRIPT_DIR/check-validator-health.sh" 2>&1; then
|
||||
log_success "Health check passed"
|
||||
return 0
|
||||
else
|
||||
log_error "Health check failed"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
run_auto_fix() {
|
||||
log_section "Running Auto-Fix"
|
||||
if bash "$SCRIPT_DIR/auto-fix-validator-config.sh" 2>&1; then
|
||||
log_success "Auto-fix completed"
|
||||
return 0
|
||||
else
|
||||
log_warn "Auto-fix had issues"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_block_production() {
|
||||
log_section "Checking Block Production"
|
||||
|
||||
local rpc_url="${RPC_URL_138:-http://192.168.11.211:8545}"
|
||||
local block1=$(cast block-number --rpc-url "$rpc_url" 2>/dev/null || echo "0")
|
||||
|
||||
sleep 10
|
||||
|
||||
local block2=$(cast block-number --rpc-url "$rpc_url" 2>/dev/null || echo "0")
|
||||
|
||||
if [ "$block1" != "$block2" ] && [ "$block2" != "0" ] && [ "$block1" != "" ]; then
|
||||
log_success "Block production active ($block1 → $block2)"
|
||||
return 0
|
||||
else
|
||||
log_error "Block production STALLED (block: $block1)"
|
||||
return 1
|
||||
fi
|
||||
}
|
||||
|
||||
main() {
|
||||
log_section "Master Stability Monitor"
|
||||
log_info "Starting comprehensive stability monitoring..."
|
||||
echo ""
|
||||
|
||||
local health_ok=true
|
||||
local blocks_ok=true
|
||||
|
||||
# Run health check
|
||||
if ! run_health_check; then
|
||||
health_ok=false
|
||||
|
||||
# Auto-fix if enabled
|
||||
if [ "$AUTO_FIX" = true ]; then
|
||||
log_warn "Attempting automatic fix..."
|
||||
run_auto_fix
|
||||
|
||||
# Re-check health
|
||||
sleep 30
|
||||
if run_health_check; then
|
||||
log_success "Auto-fix resolved issues"
|
||||
health_ok=true
|
||||
else
|
||||
log_error "Auto-fix did not resolve issues"
|
||||
fi
|
||||
fi
|
||||
fi
|
||||
|
||||
# Check block production
|
||||
if ! check_block_production; then
|
||||
blocks_ok=false
|
||||
log_error "CRITICAL: Block production stalled"
|
||||
fi
|
||||
|
||||
# Summary
|
||||
log_section "Monitoring Summary"
|
||||
|
||||
if [ "$health_ok" = true ] && [ "$blocks_ok" = true ]; then
|
||||
log_success "All systems operational"
|
||||
exit 0
|
||||
elif [ "$blocks_ok" = false ]; then
|
||||
log_error "CRITICAL: Block production issue detected"
|
||||
exit 2
|
||||
else
|
||||
log_warn "Non-critical issues detected"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
# Run continuously if no arguments
|
||||
if [ "${1:-}" = "--once" ]; then
|
||||
main
|
||||
else
|
||||
while true; do
|
||||
main
|
||||
sleep "$CHECK_INTERVAL"
|
||||
done
|
||||
fi
|
||||
87
scripts/monitoring/monitor-block-production.sh
Executable file
87
scripts/monitoring/monitor-block-production.sh
Executable file
@@ -0,0 +1,87 @@
|
||||
#!/usr/bin/env bash
|
||||
# Block Production Monitor
|
||||
# Continuously monitors block production and alerts on stalls
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Load IP configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
RPC_URL="${RPC_URL_138:-http://${RPC_CORE_1}:8545}"
|
||||
CHECK_INTERVAL=30 # Check every 30 seconds
|
||||
STALL_THRESHOLD=60 # Alert if no blocks for 60 seconds
|
||||
ALERT_SCRIPT="${SCRIPT_DIR}/alert-block-stall.sh"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
||||
|
||||
monitor_blocks() {
|
||||
local last_block=0
|
||||
local last_block_time=$(date +%s)
|
||||
local stall_detected=false
|
||||
|
||||
log_info "Starting block production monitor..."
|
||||
log_info "RPC URL: $RPC_URL"
|
||||
log_info "Check interval: ${CHECK_INTERVAL}s"
|
||||
log_info "Stall threshold: ${STALL_THRESHOLD}s"
|
||||
echo ""
|
||||
|
||||
while true; do
|
||||
local current_block=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null || echo "0")
|
||||
local current_time=$(date +%s)
|
||||
|
||||
if [ "$current_block" != "0" ] && [ "$current_block" != "" ]; then
|
||||
if [ "$current_block" != "$last_block" ]; then
|
||||
# Block advanced
|
||||
if [ "$stall_detected" = true ]; then
|
||||
log_success "Block production RESUMED! Block: $current_block"
|
||||
stall_detected=false
|
||||
fi
|
||||
|
||||
local time_since_last=$((current_time - last_block_time))
|
||||
log_success "Block: $current_block (advanced in ${time_since_last}s)"
|
||||
|
||||
last_block=$current_block
|
||||
last_block_time=$current_time
|
||||
else
|
||||
# Block not advancing
|
||||
local time_stalled=$((current_time - last_block_time))
|
||||
|
||||
if [ "$time_stalled" -ge "$STALL_THRESHOLD" ]; then
|
||||
if [ "$stall_detected" = false ]; then
|
||||
log_error "BLOCK PRODUCTION STALLED! Block: $current_block (stalled for ${time_stalled}s)"
|
||||
stall_detected=true
|
||||
|
||||
# Trigger alert
|
||||
if [ -f "$ALERT_SCRIPT" ]; then
|
||||
bash "$ALERT_SCRIPT" "$current_block" "$time_stalled"
|
||||
fi
|
||||
else
|
||||
log_error "Still stalled... (${time_stalled}s)"
|
||||
fi
|
||||
else
|
||||
log_warn "Block not advancing (${time_stalled}s, threshold: ${STALL_THRESHOLD}s)"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
log_error "Cannot get block number from RPC"
|
||||
fi
|
||||
|
||||
sleep "$CHECK_INTERVAL"
|
||||
done
|
||||
}
|
||||
|
||||
monitor_blocks "$@"
|
||||
81
scripts/monitoring/monitor-block-production.sh.bak
Executable file
81
scripts/monitoring/monitor-block-production.sh.bak
Executable file
@@ -0,0 +1,81 @@
|
||||
#!/usr/bin/env bash
|
||||
# Block Production Monitor
|
||||
# Continuously monitors block production and alerts on stalls
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
RPC_URL="${RPC_URL_138:-http://192.168.11.211:8545}"
|
||||
CHECK_INTERVAL=30 # Check every 30 seconds
|
||||
STALL_THRESHOLD=60 # Alert if no blocks for 60 seconds
|
||||
ALERT_SCRIPT="${SCRIPT_DIR}/alert-block-stall.sh"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
||||
|
||||
monitor_blocks() {
|
||||
local last_block=0
|
||||
local last_block_time=$(date +%s)
|
||||
local stall_detected=false
|
||||
|
||||
log_info "Starting block production monitor..."
|
||||
log_info "RPC URL: $RPC_URL"
|
||||
log_info "Check interval: ${CHECK_INTERVAL}s"
|
||||
log_info "Stall threshold: ${STALL_THRESHOLD}s"
|
||||
echo ""
|
||||
|
||||
while true; do
|
||||
local current_block=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null || echo "0")
|
||||
local current_time=$(date +%s)
|
||||
|
||||
if [ "$current_block" != "0" ] && [ "$current_block" != "" ]; then
|
||||
if [ "$current_block" != "$last_block" ]; then
|
||||
# Block advanced
|
||||
if [ "$stall_detected" = true ]; then
|
||||
log_success "Block production RESUMED! Block: $current_block"
|
||||
stall_detected=false
|
||||
fi
|
||||
|
||||
local time_since_last=$((current_time - last_block_time))
|
||||
log_success "Block: $current_block (advanced in ${time_since_last}s)"
|
||||
|
||||
last_block=$current_block
|
||||
last_block_time=$current_time
|
||||
else
|
||||
# Block not advancing
|
||||
local time_stalled=$((current_time - last_block_time))
|
||||
|
||||
if [ "$time_stalled" -ge "$STALL_THRESHOLD" ]; then
|
||||
if [ "$stall_detected" = false ]; then
|
||||
log_error "BLOCK PRODUCTION STALLED! Block: $current_block (stalled for ${time_stalled}s)"
|
||||
stall_detected=true
|
||||
|
||||
# Trigger alert
|
||||
if [ -f "$ALERT_SCRIPT" ]; then
|
||||
bash "$ALERT_SCRIPT" "$current_block" "$time_stalled"
|
||||
fi
|
||||
else
|
||||
log_error "Still stalled... (${time_stalled}s)"
|
||||
fi
|
||||
else
|
||||
log_warn "Block not advancing (${time_stalled}s, threshold: ${STALL_THRESHOLD}s)"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
log_error "Cannot get block number from RPC"
|
||||
fi
|
||||
|
||||
sleep "$CHECK_INTERVAL"
|
||||
done
|
||||
}
|
||||
|
||||
monitor_blocks "$@"
|
||||
178
scripts/monitoring/monitor-blockchain-health.sh
Executable file
178
scripts/monitoring/monitor-blockchain-health.sh
Executable file
@@ -0,0 +1,178 @@
|
||||
#!/bin/bash
|
||||
# Comprehensive Blockchain Health Monitoring Script
|
||||
# Monitors block production, transaction inclusion, and node health
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Load IP configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
RPC_CORE_1="${RPC_CORE_1:-192.168.11.211}"
|
||||
RPC_URL="${RPC_URL:-http://${RPC_CORE_1}:8545}"
|
||||
DEPLOYER="${DEPLOYER:-0x4A666F96fC8764181194447A7dFdb7d471b301C8}"
|
||||
PROXMOX_USER="${PROXMOX_USER:-root}"
|
||||
PROXMOX_ML110="${PROXMOX_ML110:-${PROXMOX_HOST_ML110:-192.168.11.10}}"
|
||||
PROXMOX_R630="${PROXMOX_R630:-${PROXMOX_R630_01:-${PROXMOX_HOST_R630_01:-192.168.11.11}}}"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
||||
log_section() { echo -e "\n${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; echo -e "${CYAN}$1${NC}"; echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n"; }
|
||||
|
||||
echo "=== Blockchain Health Monitor ==="
|
||||
echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
echo ""
|
||||
|
||||
# Check RPC connectivity
|
||||
log_section "RPC Node Status"
|
||||
if timeout 5 cast chain-id --rpc-url "$RPC_URL" >/dev/null 2>&1; then
|
||||
CHAIN_ID=$(cast chain-id --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
BLOCK_NUM=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
BLOCK_DEC=$(cast --to-dec "$BLOCK_NUM" 2>/dev/null || echo "0")
|
||||
log_success "RPC accessible"
|
||||
echo " Chain ID: $CHAIN_ID"
|
||||
echo " Latest block: $BLOCK_DEC ($BLOCK_NUM)"
|
||||
else
|
||||
log_error "RPC not accessible"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check block production
|
||||
log_section "Block Production"
|
||||
BLOCK1=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
sleep 5
|
||||
BLOCK2=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
BLOCK1_DEC=$(cast --to-dec "$BLOCK1" 2>/dev/null || echo "0")
|
||||
BLOCK2_DEC=$(cast --to-dec "$BLOCK2" 2>/dev/null || echo "0")
|
||||
BLOCK_DIFF=$((BLOCK2_DEC - BLOCK1_DEC))
|
||||
|
||||
if [ "$BLOCK_DIFF" -gt 0 ]; then
|
||||
log_success "Blocks being produced ($BLOCK_DIFF blocks in 5s)"
|
||||
else
|
||||
log_error "Block production stalled (no new blocks in 5s)"
|
||||
fi
|
||||
|
||||
# Check transaction inclusion
|
||||
log_section "Transaction Inclusion"
|
||||
TX_COUNT_TOTAL=0
|
||||
EMPTY_BLOCKS=0
|
||||
for i in 0 1 2 3 4 5; do
|
||||
BLOCK_NUM=$((BLOCK2_DEC - i))
|
||||
BLOCK_HEX=$(printf '0x%x' $BLOCK_NUM)
|
||||
TX_COUNT=$(cast rpc eth_getBlockTransactionCountByNumber "$BLOCK_HEX" --rpc-url "$RPC_URL" 2>/dev/null | tr -d '"')
|
||||
TX_COUNT_DEC=$(cast --to-dec "$TX_COUNT" 2>/dev/null || echo "0")
|
||||
TX_COUNT_TOTAL=$((TX_COUNT_TOTAL + TX_COUNT_DEC))
|
||||
if [ "$TX_COUNT_DEC" -eq 0 ]; then
|
||||
EMPTY_BLOCKS=$((EMPTY_BLOCKS + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$TX_COUNT_TOTAL" -gt 0 ]; then
|
||||
log_success "Transactions being included ($TX_COUNT_TOTAL txs in last 6 blocks)"
|
||||
else
|
||||
log_warn "No transactions in last 6 blocks ($EMPTY_BLOCKS empty blocks)"
|
||||
fi
|
||||
|
||||
# Check pending transactions
|
||||
log_section "Pending Transactions"
|
||||
LATEST_HEX=$(cast rpc eth_getTransactionCount "$DEPLOYER" latest --rpc-url "$RPC_URL" 2>/dev/null | tr -d '"')
|
||||
PENDING_HEX=$(cast rpc eth_getTransactionCount "$DEPLOYER" pending --rpc-url "$RPC_URL" 2>/dev/null | tr -d '"')
|
||||
LATEST_DEC=$(cast --to-dec "$LATEST_HEX" 2>/dev/null || echo "0")
|
||||
PENDING_DEC=$(cast --to-dec "$PENDING_HEX" 2>/dev/null || echo "0")
|
||||
PENDING_COUNT=$((PENDING_DEC - LATEST_DEC))
|
||||
|
||||
if [ "$PENDING_COUNT" -eq 0 ]; then
|
||||
log_success "No pending transactions"
|
||||
else
|
||||
log_warn "$PENDING_COUNT pending transactions (nonces $((LATEST_DEC + 1))-$PENDING_DEC)"
|
||||
fi
|
||||
|
||||
# Check validator status
|
||||
log_section "Validator Status"
|
||||
VALIDATORS=(
|
||||
"1000:$PROXMOX_R630"
|
||||
"1001:$PROXMOX_R630"
|
||||
"1002:$PROXMOX_R630"
|
||||
"1003:$PROXMOX_ML110"
|
||||
"1004:$PROXMOX_ML110"
|
||||
)
|
||||
|
||||
ACTIVE_COUNT=0
|
||||
for validator in "${VALIDATORS[@]}"; do
|
||||
IFS=':' read -r VMID HOST <<< "$validator"
|
||||
SSH_TARGET="${PROXMOX_USER}@${HOST}"
|
||||
STATUS=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$SSH_TARGET" \
|
||||
"pct exec $VMID -- systemctl is-active besu-validator" 2>/dev/null || echo "unknown")
|
||||
|
||||
if [ "$STATUS" = "active" ]; then
|
||||
ACTIVE_COUNT=$((ACTIVE_COUNT + 1))
|
||||
echo " Validator $VMID: $STATUS"
|
||||
else
|
||||
log_warn "Validator $VMID: $STATUS"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$ACTIVE_COUNT" -eq 5 ]; then
|
||||
log_success "All 5 validators active"
|
||||
else
|
||||
log_error "Only $ACTIVE_COUNT/5 validators active"
|
||||
fi
|
||||
|
||||
# Check peer connections
|
||||
log_section "Peer Connections"
|
||||
PEER_COUNT=$(cast rpc admin_peers --rpc-url "$RPC_URL" 2>/dev/null | jq '. | length' 2>/dev/null || echo "N/A")
|
||||
if [ "$PEER_COUNT" != "N/A" ] && [ "$PEER_COUNT" -ge 5 ]; then
|
||||
log_success "RPC has $PEER_COUNT peer connections"
|
||||
else
|
||||
log_warn "RPC has $PEER_COUNT peer connections (expected >= 5)"
|
||||
fi
|
||||
|
||||
# Summary
|
||||
log_section "Health Summary"
|
||||
ISSUES=0
|
||||
|
||||
if [ "$BLOCK_DIFF" -eq 0 ]; then
|
||||
log_error "❌ Block production stalled"
|
||||
ISSUES=$((ISSUES + 1))
|
||||
else
|
||||
log_success "✓ Block production active"
|
||||
fi
|
||||
|
||||
if [ "$TX_COUNT_TOTAL" -eq 0 ] && [ "$PENDING_COUNT" -gt 0 ]; then
|
||||
log_error "❌ Transactions not being included"
|
||||
ISSUES=$((ISSUES + 1))
|
||||
elif [ "$TX_COUNT_TOTAL" -gt 0 ]; then
|
||||
log_success "✓ Transactions being included"
|
||||
fi
|
||||
|
||||
if [ "$ACTIVE_COUNT" -lt 5 ]; then
|
||||
log_error "❌ Not all validators active"
|
||||
ISSUES=$((ISSUES + 1))
|
||||
else
|
||||
log_success "✓ All validators active"
|
||||
fi
|
||||
|
||||
if [ "$PENDING_COUNT" -gt 10 ]; then
|
||||
log_warn "⚠ High number of pending transactions ($PENDING_COUNT)"
|
||||
ISSUES=$((ISSUES + 1))
|
||||
fi
|
||||
|
||||
echo ""
|
||||
if [ "$ISSUES" -eq 0 ]; then
|
||||
log_success "Overall Status: HEALTHY"
|
||||
exit 0
|
||||
else
|
||||
log_error "Overall Status: $ISSUES issue(s) detected"
|
||||
exit 1
|
||||
fi
|
||||
172
scripts/monitoring/monitor-blockchain-health.sh.bak
Executable file
172
scripts/monitoring/monitor-blockchain-health.sh.bak
Executable file
@@ -0,0 +1,172 @@
|
||||
#!/bin/bash
|
||||
# Comprehensive Blockchain Health Monitoring Script
|
||||
# Monitors block production, transaction inclusion, and node health
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
RPC_URL="${RPC_URL:-http://192.168.11.211:8545}"
|
||||
DEPLOYER="${DEPLOYER:-0x4A666F96fC8764181194447A7dFdb7d471b301C8}"
|
||||
PROXMOX_USER="${PROXMOX_USER:-root}"
|
||||
PROXMOX_ML110="${PROXMOX_ML110:-192.168.11.10}"
|
||||
PROXMOX_R630="${PROXMOX_R630:-192.168.11.11}"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
||||
log_section() { echo -e "\n${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"; echo -e "${CYAN}$1${NC}"; echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}\n"; }
|
||||
|
||||
echo "=== Blockchain Health Monitor ==="
|
||||
echo "Timestamp: $(date '+%Y-%m-%d %H:%M:%S')"
|
||||
echo ""
|
||||
|
||||
# Check RPC connectivity
|
||||
log_section "RPC Node Status"
|
||||
if timeout 5 cast chain-id --rpc-url "$RPC_URL" >/dev/null 2>&1; then
|
||||
CHAIN_ID=$(cast chain-id --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
BLOCK_NUM=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
BLOCK_DEC=$(cast --to-dec "$BLOCK_NUM" 2>/dev/null || echo "0")
|
||||
log_success "RPC accessible"
|
||||
echo " Chain ID: $CHAIN_ID"
|
||||
echo " Latest block: $BLOCK_DEC ($BLOCK_NUM)"
|
||||
else
|
||||
log_error "RPC not accessible"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Check block production
|
||||
log_section "Block Production"
|
||||
BLOCK1=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
sleep 5
|
||||
BLOCK2=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
BLOCK1_DEC=$(cast --to-dec "$BLOCK1" 2>/dev/null || echo "0")
|
||||
BLOCK2_DEC=$(cast --to-dec "$BLOCK2" 2>/dev/null || echo "0")
|
||||
BLOCK_DIFF=$((BLOCK2_DEC - BLOCK1_DEC))
|
||||
|
||||
if [ "$BLOCK_DIFF" -gt 0 ]; then
|
||||
log_success "Blocks being produced ($BLOCK_DIFF blocks in 5s)"
|
||||
else
|
||||
log_error "Block production stalled (no new blocks in 5s)"
|
||||
fi
|
||||
|
||||
# Check transaction inclusion
|
||||
log_section "Transaction Inclusion"
|
||||
TX_COUNT_TOTAL=0
|
||||
EMPTY_BLOCKS=0
|
||||
for i in 0 1 2 3 4 5; do
|
||||
BLOCK_NUM=$((BLOCK2_DEC - i))
|
||||
BLOCK_HEX=$(printf '0x%x' $BLOCK_NUM)
|
||||
TX_COUNT=$(cast rpc eth_getBlockTransactionCountByNumber "$BLOCK_HEX" --rpc-url "$RPC_URL" 2>/dev/null | tr -d '"')
|
||||
TX_COUNT_DEC=$(cast --to-dec "$TX_COUNT" 2>/dev/null || echo "0")
|
||||
TX_COUNT_TOTAL=$((TX_COUNT_TOTAL + TX_COUNT_DEC))
|
||||
if [ "$TX_COUNT_DEC" -eq 0 ]; then
|
||||
EMPTY_BLOCKS=$((EMPTY_BLOCKS + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$TX_COUNT_TOTAL" -gt 0 ]; then
|
||||
log_success "Transactions being included ($TX_COUNT_TOTAL txs in last 6 blocks)"
|
||||
else
|
||||
log_warn "No transactions in last 6 blocks ($EMPTY_BLOCKS empty blocks)"
|
||||
fi
|
||||
|
||||
# Check pending transactions
|
||||
log_section "Pending Transactions"
|
||||
LATEST_HEX=$(cast rpc eth_getTransactionCount "$DEPLOYER" latest --rpc-url "$RPC_URL" 2>/dev/null | tr -d '"')
|
||||
PENDING_HEX=$(cast rpc eth_getTransactionCount "$DEPLOYER" pending --rpc-url "$RPC_URL" 2>/dev/null | tr -d '"')
|
||||
LATEST_DEC=$(cast --to-dec "$LATEST_HEX" 2>/dev/null || echo "0")
|
||||
PENDING_DEC=$(cast --to-dec "$PENDING_HEX" 2>/dev/null || echo "0")
|
||||
PENDING_COUNT=$((PENDING_DEC - LATEST_DEC))
|
||||
|
||||
if [ "$PENDING_COUNT" -eq 0 ]; then
|
||||
log_success "No pending transactions"
|
||||
else
|
||||
log_warn "$PENDING_COUNT pending transactions (nonces $((LATEST_DEC + 1))-$PENDING_DEC)"
|
||||
fi
|
||||
|
||||
# Check validator status
|
||||
log_section "Validator Status"
|
||||
VALIDATORS=(
|
||||
"1000:$PROXMOX_R630"
|
||||
"1001:$PROXMOX_R630"
|
||||
"1002:$PROXMOX_R630"
|
||||
"1003:$PROXMOX_ML110"
|
||||
"1004:$PROXMOX_ML110"
|
||||
)
|
||||
|
||||
ACTIVE_COUNT=0
|
||||
for validator in "${VALIDATORS[@]}"; do
|
||||
IFS=':' read -r VMID HOST <<< "$validator"
|
||||
SSH_TARGET="${PROXMOX_USER}@${HOST}"
|
||||
STATUS=$(ssh -o ConnectTimeout=5 -o StrictHostKeyChecking=no "$SSH_TARGET" \
|
||||
"pct exec $VMID -- systemctl is-active besu-validator" 2>/dev/null || echo "unknown")
|
||||
|
||||
if [ "$STATUS" = "active" ]; then
|
||||
ACTIVE_COUNT=$((ACTIVE_COUNT + 1))
|
||||
echo " Validator $VMID: $STATUS"
|
||||
else
|
||||
log_warn "Validator $VMID: $STATUS"
|
||||
fi
|
||||
done
|
||||
|
||||
if [ "$ACTIVE_COUNT" -eq 5 ]; then
|
||||
log_success "All 5 validators active"
|
||||
else
|
||||
log_error "Only $ACTIVE_COUNT/5 validators active"
|
||||
fi
|
||||
|
||||
# Check peer connections
|
||||
log_section "Peer Connections"
|
||||
PEER_COUNT=$(cast rpc admin_peers --rpc-url "$RPC_URL" 2>/dev/null | jq '. | length' 2>/dev/null || echo "N/A")
|
||||
if [ "$PEER_COUNT" != "N/A" ] && [ "$PEER_COUNT" -ge 5 ]; then
|
||||
log_success "RPC has $PEER_COUNT peer connections"
|
||||
else
|
||||
log_warn "RPC has $PEER_COUNT peer connections (expected >= 5)"
|
||||
fi
|
||||
|
||||
# Summary
|
||||
log_section "Health Summary"
|
||||
ISSUES=0
|
||||
|
||||
if [ "$BLOCK_DIFF" -eq 0 ]; then
|
||||
log_error "❌ Block production stalled"
|
||||
ISSUES=$((ISSUES + 1))
|
||||
else
|
||||
log_success "✓ Block production active"
|
||||
fi
|
||||
|
||||
if [ "$TX_COUNT_TOTAL" -eq 0 ] && [ "$PENDING_COUNT" -gt 0 ]; then
|
||||
log_error "❌ Transactions not being included"
|
||||
ISSUES=$((ISSUES + 1))
|
||||
elif [ "$TX_COUNT_TOTAL" -gt 0 ]; then
|
||||
log_success "✓ Transactions being included"
|
||||
fi
|
||||
|
||||
if [ "$ACTIVE_COUNT" -lt 5 ]; then
|
||||
log_error "❌ Not all validators active"
|
||||
ISSUES=$((ISSUES + 1))
|
||||
else
|
||||
log_success "✓ All validators active"
|
||||
fi
|
||||
|
||||
if [ "$PENDING_COUNT" -gt 10 ]; then
|
||||
log_warn "⚠ High number of pending transactions ($PENDING_COUNT)"
|
||||
ISSUES=$((ISSUES + 1))
|
||||
fi
|
||||
|
||||
echo ""
|
||||
if [ "$ISSUES" -eq 0 ]; then
|
||||
log_success "Overall Status: HEALTHY"
|
||||
exit 0
|
||||
else
|
||||
log_error "Overall Status: $ISSUES issue(s) detected"
|
||||
exit 1
|
||||
fi
|
||||
100
scripts/monitoring/monitor-transaction-pool.sh
Executable file
100
scripts/monitoring/monitor-transaction-pool.sh
Executable file
@@ -0,0 +1,100 @@
|
||||
#!/usr/bin/env bash
|
||||
# Transaction Pool Monitor
|
||||
# Monitors transaction pool for stuck transactions
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Load IP configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Load environment
|
||||
if [ -f "$PROJECT_ROOT/smom-dbis-138/.env" ]; then
|
||||
set +e
|
||||
source "$PROJECT_ROOT/smom-dbis-138/.env" 2>/dev/null || true
|
||||
set -e
|
||||
fi
|
||||
|
||||
RPC_URL="${RPC_URL_138:-http://${RPC_CORE_1}:8545}"
|
||||
PRIVATE_KEY="${PRIVATE_KEY:-}"
|
||||
CHECK_INTERVAL=60 # Check every 60 seconds
|
||||
STUCK_THRESHOLD=300 # Consider stuck if pending for 5 minutes
|
||||
CLEANUP_SCRIPT="${SCRIPT_DIR}/cleanup-stuck-transactions.sh"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
||||
|
||||
check_transaction_pool() {
|
||||
if [ -z "$PRIVATE_KEY" ]; then
|
||||
log_warn "PRIVATE_KEY not set, skipping transaction pool check"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local deployer=$(cast wallet address "$PRIVATE_KEY" 2>/dev/null || echo "")
|
||||
if [ -z "$deployer" ]; then
|
||||
log_warn "Cannot derive deployer address"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local latest_hex=$(cast rpc eth_getTransactionCount "$deployer" latest --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
local pending_hex=$(cast rpc eth_getTransactionCount "$deployer" pending --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
local latest_clean=$(echo "$latest_hex" | tr -d '"')
|
||||
local pending_clean=$(echo "$pending_hex" | tr -d '"')
|
||||
local latest_dec=$(python3 -c "print(int('$latest_clean', 16))" 2>/dev/null || echo "0")
|
||||
local pending_dec=$(python3 -c "print(int('$pending_clean', 16))" 2>/dev/null || echo "0")
|
||||
local pending_count=$((pending_dec - latest_dec))
|
||||
|
||||
log_info "Latest nonce: $latest_dec"
|
||||
log_info "Pending nonce: $pending_dec"
|
||||
log_info "Pending transactions: $pending_count"
|
||||
|
||||
if [ "$pending_count" -gt 0 ]; then
|
||||
log_warn "Found $pending_count pending transaction(s)"
|
||||
|
||||
# Check if transactions are stuck (pending for too long)
|
||||
# This is a simplified check - in production, track transaction age
|
||||
if [ "$pending_count" -gt 5 ]; then
|
||||
log_error "High number of pending transactions: $pending_count"
|
||||
log_warn "Transactions may be stuck. Consider cleanup."
|
||||
|
||||
if [ -f "$CLEANUP_SCRIPT" ]; then
|
||||
log_info "Running cleanup script..."
|
||||
bash "$CLEANUP_SCRIPT"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
log_success "No pending transactions"
|
||||
fi
|
||||
}
|
||||
|
||||
monitor_continuously() {
|
||||
log_info "Starting transaction pool monitor..."
|
||||
log_info "RPC URL: $RPC_URL"
|
||||
log_info "Check interval: ${CHECK_INTERVAL}s"
|
||||
echo ""
|
||||
|
||||
while true; do
|
||||
check_transaction_pool
|
||||
sleep "$CHECK_INTERVAL"
|
||||
done
|
||||
}
|
||||
|
||||
if [ "${1:-}" = "--once" ]; then
|
||||
check_transaction_pool
|
||||
else
|
||||
monitor_continuously
|
||||
fi
|
||||
94
scripts/monitoring/monitor-transaction-pool.sh.bak
Executable file
94
scripts/monitoring/monitor-transaction-pool.sh.bak
Executable file
@@ -0,0 +1,94 @@
|
||||
#!/usr/bin/env bash
|
||||
# Transaction Pool Monitor
|
||||
# Monitors transaction pool for stuck transactions
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Load environment
|
||||
if [ -f "$PROJECT_ROOT/smom-dbis-138/.env" ]; then
|
||||
set +e
|
||||
source "$PROJECT_ROOT/smom-dbis-138/.env" 2>/dev/null || true
|
||||
set -e
|
||||
fi
|
||||
|
||||
RPC_URL="${RPC_URL_138:-http://192.168.11.211:8545}"
|
||||
PRIVATE_KEY="${PRIVATE_KEY:-}"
|
||||
CHECK_INTERVAL=60 # Check every 60 seconds
|
||||
STUCK_THRESHOLD=300 # Consider stuck if pending for 5 minutes
|
||||
CLEANUP_SCRIPT="${SCRIPT_DIR}/cleanup-stuck-transactions.sh"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
||||
log_success() { echo -e "${GREEN}[✓]${NC} $1"; }
|
||||
log_warn() { echo -e "${YELLOW}[⚠]${NC} $1"; }
|
||||
log_error() { echo -e "${RED}[✗]${NC} $1"; }
|
||||
|
||||
check_transaction_pool() {
|
||||
if [ -z "$PRIVATE_KEY" ]; then
|
||||
log_warn "PRIVATE_KEY not set, skipping transaction pool check"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local deployer=$(cast wallet address "$PRIVATE_KEY" 2>/dev/null || echo "")
|
||||
if [ -z "$deployer" ]; then
|
||||
log_warn "Cannot derive deployer address"
|
||||
return 0
|
||||
fi
|
||||
|
||||
local latest_hex=$(cast rpc eth_getTransactionCount "$deployer" latest --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
local pending_hex=$(cast rpc eth_getTransactionCount "$deployer" pending --rpc-url "$RPC_URL" 2>/dev/null)
|
||||
local latest_clean=$(echo "$latest_hex" | tr -d '"')
|
||||
local pending_clean=$(echo "$pending_hex" | tr -d '"')
|
||||
local latest_dec=$(python3 -c "print(int('$latest_clean', 16))" 2>/dev/null || echo "0")
|
||||
local pending_dec=$(python3 -c "print(int('$pending_clean', 16))" 2>/dev/null || echo "0")
|
||||
local pending_count=$((pending_dec - latest_dec))
|
||||
|
||||
log_info "Latest nonce: $latest_dec"
|
||||
log_info "Pending nonce: $pending_dec"
|
||||
log_info "Pending transactions: $pending_count"
|
||||
|
||||
if [ "$pending_count" -gt 0 ]; then
|
||||
log_warn "Found $pending_count pending transaction(s)"
|
||||
|
||||
# Check if transactions are stuck (pending for too long)
|
||||
# This is a simplified check - in production, track transaction age
|
||||
if [ "$pending_count" -gt 5 ]; then
|
||||
log_error "High number of pending transactions: $pending_count"
|
||||
log_warn "Transactions may be stuck. Consider cleanup."
|
||||
|
||||
if [ -f "$CLEANUP_SCRIPT" ]; then
|
||||
log_info "Running cleanup script..."
|
||||
bash "$CLEANUP_SCRIPT"
|
||||
fi
|
||||
fi
|
||||
else
|
||||
log_success "No pending transactions"
|
||||
fi
|
||||
}
|
||||
|
||||
monitor_continuously() {
|
||||
log_info "Starting transaction pool monitor..."
|
||||
log_info "RPC URL: $RPC_URL"
|
||||
log_info "Check interval: ${CHECK_INTERVAL}s"
|
||||
echo ""
|
||||
|
||||
while true; do
|
||||
check_transaction_pool
|
||||
sleep "$CHECK_INTERVAL"
|
||||
done
|
||||
}
|
||||
|
||||
if [ "${1:-}" = "--once" ]; then
|
||||
check_transaction_pool
|
||||
else
|
||||
monitor_continuously
|
||||
fi
|
||||
124
scripts/monitoring/send-alert.sh
Executable file
124
scripts/monitoring/send-alert.sh
Executable file
@@ -0,0 +1,124 @@
|
||||
#!/usr/bin/env bash
|
||||
# Universal Alert Sender
|
||||
# Sends alerts via configured channels
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
CONFIG_FILE="${PROJECT_ROOT}/smom-dbis-138/.env.alerts"
|
||||
|
||||
# Load configuration
|
||||
if [ -f "$CONFIG_FILE" ]; then
|
||||
source "$CONFIG_FILE"
|
||||
fi
|
||||
|
||||
SEVERITY="${1:-WARNING}"
|
||||
TITLE="${2:-Alert}"
|
||||
MESSAGE="${3:-}"
|
||||
|
||||
send_email_alert() {
|
||||
if [ "${ALERT_EMAIL_ENABLED:-false}" != "true" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local to="${ALERT_EMAIL_TO:-}"
|
||||
if [ -z "$to" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
echo "$MESSAGE" | mail -s "[$SEVERITY] $TITLE" "$to" 2>/dev/null || true
|
||||
}
|
||||
|
||||
send_webhook_alert() {
|
||||
local url="${1:-}"
|
||||
if [ -z "$url" ] || [ "${ALERT_WEBHOOK_ENABLED:-false}" != "true" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local payload=$(cat <<EOF
|
||||
{
|
||||
"severity": "$SEVERITY",
|
||||
"title": "$TITLE",
|
||||
"message": "$MESSAGE",
|
||||
"timestamp": "$(date -Iseconds)"
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
curl -X POST "$url" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$payload" \
|
||||
2>/dev/null || true
|
||||
}
|
||||
|
||||
send_slack_alert() {
|
||||
if [ "${ALERT_SLACK_ENABLED:-false}" != "true" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local webhook="${ALERT_SLACK_WEBHOOK_URL:-}"
|
||||
if [ -z "$webhook" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local color="warning"
|
||||
case "$SEVERITY" in
|
||||
CRITICAL|ERROR) color="danger" ;;
|
||||
WARNING) color="warning" ;;
|
||||
INFO) color="good" ;;
|
||||
esac
|
||||
|
||||
local payload=$(cat <<EOF
|
||||
{
|
||||
"attachments": [{
|
||||
"color": "$color",
|
||||
"title": "$TITLE",
|
||||
"text": "$MESSAGE",
|
||||
"footer": "Blockchain Monitoring",
|
||||
"ts": $(date +%s)
|
||||
}]
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
curl -X POST "$webhook" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$payload" \
|
||||
2>/dev/null || true
|
||||
}
|
||||
|
||||
send_discord_alert() {
|
||||
if [ "${ALERT_DISCORD_ENABLED:-false}" != "true" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local webhook="${ALERT_DISCORD_WEBHOOK_URL:-}"
|
||||
if [ -z "$webhook" ]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
local payload=$(cat <<EOF
|
||||
{
|
||||
"embeds": [{
|
||||
"title": "$TITLE",
|
||||
"description": "$MESSAGE",
|
||||
"color": $([ "$SEVERITY" = "CRITICAL" ] && echo "16711680" || echo "16776960"),
|
||||
"timestamp": "$(date -Iseconds)"
|
||||
}]
|
||||
}
|
||||
EOF
|
||||
)
|
||||
|
||||
curl -X POST "$webhook" \
|
||||
-H "Content-Type: application/json" \
|
||||
-d "$payload" \
|
||||
2>/dev/null || true
|
||||
}
|
||||
|
||||
# Send alerts via all enabled channels
|
||||
send_email_alert
|
||||
send_webhook_alert "${ALERT_WEBHOOK_URL:-}"
|
||||
send_slack_alert
|
||||
send_discord_alert
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Setup Health Check Cron Job
|
||||
# Installs cron jobs to monitor Besu node health
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
if ! command -v pct >/dev/null 2>&1; then
|
||||
echo "Error: pct command not found. This script must be run on Proxmox host."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
LOG_DIR="$PROJECT_ROOT/logs/health-checks"
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
# Create cron job script
|
||||
cat > "$PROJECT_ROOT/scripts/monitoring/health-check-cron-wrapper.sh" << 'CRONSCRIPT'
|
||||
#!/bin/bash
|
||||
# Health check wrapper for cron
|
||||
# Checks all Besu nodes and logs results
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
LOG_DIR="$PROJECT_ROOT/logs/health-checks"
|
||||
TIMESTAMP=$(date +%Y%m%d-%H%M%S)
|
||||
|
||||
for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do
|
||||
if [[ -f "$PROJECT_ROOT/scripts/health/check-node-health.sh" ]]; then
|
||||
"$PROJECT_ROOT/scripts/health/check-node-health.sh" "$vmid" >> "$LOG_DIR/health-$vmid-$TIMESTAMP.log" 2>&1
|
||||
fi
|
||||
done
|
||||
|
||||
# Cleanup old logs (keep 7 days)
|
||||
find "$LOG_DIR" -name "health-*.log" -mtime +7 -delete 2>/dev/null || true
|
||||
CRONSCRIPT
|
||||
|
||||
chmod +x "$PROJECT_ROOT/scripts/monitoring/health-check-cron-wrapper.sh"
|
||||
|
||||
# Add to crontab (every 5 minutes)
|
||||
CRON_JOB="*/5 * * * * $PROJECT_ROOT/scripts/monitoring/health-check-cron-wrapper.sh"
|
||||
|
||||
if crontab -l 2>/dev/null | grep -q "health-check-cron-wrapper.sh"; then
|
||||
echo "Cron job already exists"
|
||||
else
|
||||
(crontab -l 2>/dev/null; echo "$CRON_JOB") | crontab -
|
||||
echo "✓ Health check cron job installed (runs every 5 minutes)"
|
||||
echo " Logs: $LOG_DIR/"
|
||||
echo " To remove: crontab -e (then delete the line)"
|
||||
fi
|
||||
34
scripts/monitoring/update-dashboard.sh
Executable file
34
scripts/monitoring/update-dashboard.sh
Executable file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/env bash
|
||||
# Update Monitoring Dashboard
|
||||
# Fetches current status and updates dashboard
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Load IP configuration
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Source environment
|
||||
if [ -f "$PROJECT_ROOT/smom-dbis-138/.env" ]; then
|
||||
source "$PROJECT_ROOT/smom-dbis-138/.env" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
RPC_URL="${RPC_URL_138:-http://${RPC_CORE_1}:8545}"
|
||||
|
||||
# Get current block
|
||||
BLOCK=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null || echo "0")
|
||||
|
||||
# Get validator status (simplified)
|
||||
VALIDATOR_COUNT=5
|
||||
|
||||
# Update dashboard with real data
|
||||
# This would be integrated with the HTML dashboard
|
||||
echo "Dashboard update script ready"
|
||||
echo "Block: $BLOCK"
|
||||
echo "Validators: $VALIDATOR_COUNT/5"
|
||||
|
||||
28
scripts/monitoring/update-dashboard.sh.bak
Executable file
28
scripts/monitoring/update-dashboard.sh.bak
Executable file
@@ -0,0 +1,28 @@
|
||||
#!/usr/bin/env bash
|
||||
# Update Monitoring Dashboard
|
||||
# Fetches current status and updates dashboard
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
|
||||
# Source environment
|
||||
if [ -f "$PROJECT_ROOT/smom-dbis-138/.env" ]; then
|
||||
source "$PROJECT_ROOT/smom-dbis-138/.env" 2>/dev/null || true
|
||||
fi
|
||||
|
||||
RPC_URL="${RPC_URL_138:-http://192.168.11.211:8545}"
|
||||
|
||||
# Get current block
|
||||
BLOCK=$(cast block-number --rpc-url "$RPC_URL" 2>/dev/null || echo "0")
|
||||
|
||||
# Get validator status (simplified)
|
||||
VALIDATOR_COUNT=5
|
||||
|
||||
# Update dashboard with real data
|
||||
# This would be integrated with the HTML dashboard
|
||||
echo "Dashboard update script ready"
|
||||
echo "Block: $BLOCK"
|
||||
echo "Validators: $VALIDATOR_COUNT/5"
|
||||
|
||||
Reference in New Issue
Block a user