Refactor code for improved readability and performance

2025-12-21 22:32:09 -08:00
parent 79e3c02f50
commit b45c2006be
2259 changed files with 380318 additions and 2 deletions
--- a/scripts/monitoring/prometheus-besu-config.yml
+++ b/scripts/monitoring/prometheus-besu-config.yml
@@ -0,0 +1,31 @@
+# Prometheus Configuration for Besu Metrics
+# Add this to your prometheus.yml scrape_configs section
+
+scrape_configs:
+  - job_name: 'besu'
+    scrape_interval: 15s
+    static_configs:
+      # Validators (VMID 1000-1004) - metrics enabled but may not expose RPC
+      - targets:
+          - '192.168.11.100:9545'  # validator-1 (DHCP assigned)
+          - '192.168.11.101:9545'  # validator-2 (DHCP assigned)
+          - '192.168.11.102:9545'  # validator-3 (DHCP assigned)
+          - '192.168.11.103:9545'  # validator-4 (DHCP assigned)
+          - '192.168.11.104:9545'  # validator-5 (DHCP assigned)
+        labels:
+          role: 'validator'
+      # Sentries (VMID 1500-1503)
+      - targets:
+          - '192.168.11.150:9545'  # sentry-1 (DHCP assigned)
+          - '192.168.11.151:9545'  # sentry-2 (DHCP assigned)
+          - '192.168.11.152:9545'  # sentry-3 (DHCP assigned)
+          - '192.168.11.153:9545'  # sentry-4 (DHCP assigned)
+        labels:
+          role: 'sentry'
+      # RPC Nodes (VMID 2500-2502)
+      - targets:
+          - '192.168.11.250:9545'  # rpc-1 (DHCP assigned)
+          - '192.168.11.251:9545'  # rpc-2 (DHCP assigned)
+          - '192.168.11.252:9545'  # rpc-3 (DHCP assigned)
+        labels:
+          role: 'rpc'
--- a/scripts/monitoring/setup-health-check-cron.sh
+++ b/scripts/monitoring/setup-health-check-cron.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# Setup Health Check Cron Job
+# Installs cron jobs to monitor Besu node health
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+if ! command -v pct >/dev/null 2>&1; then
+    echo "Error: pct command not found. This script must be run on Proxmox host."
+    exit 1
+fi
+
+LOG_DIR="$PROJECT_ROOT/logs/health-checks"
+mkdir -p "$LOG_DIR"
+
+# Create cron job script
+cat > "$PROJECT_ROOT/scripts/monitoring/health-check-cron-wrapper.sh" << 'CRONSCRIPT'
+#!/bin/bash
+# Health check wrapper for cron
+# Checks all Besu nodes and logs results
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+LOG_DIR="$PROJECT_ROOT/logs/health-checks"
+TIMESTAMP=$(date +%Y%m%d-%H%M%S)
+
+for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do
+    if [[ -f "$PROJECT_ROOT/scripts/health/check-node-health.sh" ]]; then
+        "$PROJECT_ROOT/scripts/health/check-node-health.sh" "$vmid" >> "$LOG_DIR/health-$vmid-$TIMESTAMP.log" 2>&1
+    fi
+done
+
+# Cleanup old logs (keep 7 days)
+find "$LOG_DIR" -name "health-*.log" -mtime +7 -delete 2>/dev/null || true
+CRONSCRIPT
+
+chmod +x "$PROJECT_ROOT/scripts/monitoring/health-check-cron-wrapper.sh"
+
+# Add to crontab (every 5 minutes)
+CRON_JOB="*/5 * * * * $PROJECT_ROOT/scripts/monitoring/health-check-cron-wrapper.sh"
+
+if crontab -l 2>/dev/null | grep -q "health-check-cron-wrapper.sh"; then
+    echo "Cron job already exists"
+else
+    (crontab -l 2>/dev/null; echo "$CRON_JOB") | crontab -
+    echo "✓ Health check cron job installed (runs every 5 minutes)"
+    echo "  Logs: $LOG_DIR/"
+    echo "  To remove: crontab -e (then delete the line)"
+fi
--- a/scripts/monitoring/simple-alert.sh
+++ b/scripts/monitoring/simple-alert.sh
@@ -0,0 +1,73 @@
+#!/bin/bash
+# Simple Alert Script
+# Sends alerts when Besu services are down
+# Can be extended to send email, Slack, etc.
+
+set -euo pipefail
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
+
+# Configuration
+ALERT_EMAIL="${ALERT_EMAIL:-}"
+ALERT_LOG="$PROJECT_ROOT/logs/alerts.log"
+ALERT_SENT_LOG="$PROJECT_ROOT/logs/alerts-sent.log"
+
+# Ensure log directory exists
+mkdir -p "$(dirname "$ALERT_LOG")"
+
+log_alert() {
+    local message="$1"
+    local timestamp=$(date '+%Y-%m-%d %H:%M:%S')
+    echo "[$timestamp] ALERT: $message" >> "$ALERT_LOG"
+    
+    # Check if we've already sent this alert (avoid spam)
+    local alert_key=$(echo "$message" | md5sum | cut -d' ' -f1)
+    if ! grep -q "$alert_key" "$ALERT_SENT_LOG" 2>/dev/null; then
+        echo "[$timestamp] $alert_key" >> "$ALERT_SENT_LOG"
+        
+        # Send email if configured
+        if [[ -n "$ALERT_EMAIL" ]] && command -v mail >/dev/null 2>&1; then
+            echo "$message" | mail -s "Besu Alert: Container Issue" "$ALERT_EMAIL" 2>/dev/null || true
+        fi
+        
+        # Log to console
+        echo "ALERT: $message"
+    fi
+}
+
+# Check all containers
+for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do
+    # Check if container is running
+    if ! pct status "$vmid" 2>/dev/null | grep -q running; then
+        log_alert "Container $vmid is not running"
+        continue
+    fi
+    
+    # Determine service name
+    service_name=""
+    if [[ $vmid -ge 1000 ]] && [[ $vmid -le 1004 ]]; then
+        service_name="besu-validator"
+    elif [[ $vmid -ge 1500 ]] && [[ $vmid -le 1503 ]]; then
+        service_name="besu-sentry"
+    elif [[ $vmid -ge 2500 ]] && [[ $vmid -le 2502 ]]; then
+        service_name="besu-rpc"
+    fi
+    
+    # Check service status
+    if [[ -n "$service_name" ]]; then
+        if ! pct exec "$vmid" -- systemctl is-active --quiet "$service_name" 2>/dev/null; then
+            log_alert "Service $service_name on container $vmid is not running"
+        fi
+    fi
+done
+
+# Check disk space (alert if < 10% free)
+for vmid in 1000 1001 1002 1003 1004 1500 1501 1502 1503 2500 2501 2502; do
+    if pct status "$vmid" 2>/dev/null | grep -q running; then
+        disk_usage=$(pct exec "$vmid" -- df -h / | awk 'NR==2 {print $5}' | sed 's/%//' 2>/dev/null || echo "0")
+        if [[ $disk_usage -gt 90 ]]; then
+            log_alert "Container $vmid disk usage is at ${disk_usage}%"
+        fi
+    fi
+done