Files
proxmox/scripts/maintenance/fix-all-502s-comprehensive.sh
defiQUG bea1903ac9
Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
Sync all local changes: docs, config, scripts, submodule refs, verification evidence
Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-21 15:46:06 -08:00

113 lines
6.4 KiB
Bash
Executable File

#!/usr/bin/env bash
# Fix all 502 backends using all means: DBIS (nginx + dbis-api), Besu (2101 + 2500-2505), Cacti (nginx).
# Run from project root. Requires SSH to r630-01, r630-02.
#
# Usage: ./scripts/maintenance/fix-all-502s-comprehensive.sh [--dry-run]
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
[[ -f "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" ]] && source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" 2>/dev/null || true
[[ -f "${PROJECT_ROOT}/config/ip-addresses.conf" ]] && source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
R630_01="${PROXMOX_HOST_R630_01:-192.168.11.11}"
R630_02="${PROXMOX_HOST_R630_02:-192.168.11.12}"
DRY_RUN=false
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
run() {
if $DRY_RUN; then echo -e "\033[0;36m[DRY-RUN]\033[0m Would run on $1: ${2:0:80}..."; return 0; fi
ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no root@"$1" "$2"
}
log() { echo -e "\033[0;34m[FIX]\033[0m $1"; }
ok() { echo -e "\033[0;32m[✓]\033[0m $1"; }
warn() { echo -e "\033[0;33m[⚠]\033[0m $1"; }
echo ""
echo "=== Fix all 502 backends (comprehensive) ==="
echo " dry-run=$DRY_RUN"
echo ""
# --- 10130 DBIS Frontend: ensure port 80 served (persistent with setsid) ---
log "10130 (dbis-admin/secure): ensure port 80 served..."
if run "$R630_01" "pct status 10130 2>/dev/null | awk '{print \$2}'" 2>/dev/null | grep -q running; then
run "$R630_01" "pct exec 10130 -- pkill -f 'python3 -m http.server' 2>/dev/null" || true
run "$R630_01" "pct exec 10130 -- mkdir -p /tmp/dbis-frontend/dist" 2>/dev/null || true
run "$R630_01" "pct exec 10130 -- sh -c 'echo \"<html><body>DBIS</body></html>\" > /tmp/dbis-frontend/dist/index.html'" 2>/dev/null || true
run "$R630_01" "pct exec 10130 -- sh -c 'cd /tmp/dbis-frontend/dist && nohup setsid python3 -m http.server 80 --bind 0.0.0.0 >>/tmp/http.log 2>&1 </dev/null &'" 2>/dev/null || true
sleep 3
code=$(run "$R630_01" "pct exec 10130 -- curl -s -o /dev/null -w '%{http_code}' --connect-timeout 2 http://127.0.0.1:80/ 2>/dev/null" 2>/dev/null || echo "000")
if [[ "$code" == "200" ]] || [[ "$code" == "301" ]]; then ok "10130 in-CT curl 127.0.0.1:80 = $code"; else
code2=$(run "$R630_01" "curl -s -o /dev/null -w '%{http_code}' --connect-timeout 3 http://192.168.11.130:80/ 2>/dev/null" 2>/dev/null || echo "000")
[[ "$code2" == "200" ]] && ok "10130 host->130:80 = $code2" || warn "10130 not responding (in-CT=$code host=$code2)"
fi
else
warn "10130 not running"
fi
# --- 10150, 10151 DBIS API: dbis-api or stub on 3000 ---
for v in 10150 10151; do
log "$v (dbis-api): start API or stub on 3000..."
if run "$R630_01" "pct status $v 2>/dev/null | awk '{print \$2}'" 2>/dev/null | grep -q running; then
run "$R630_01" "pct exec $v -- systemctl start dbis-api 2>/dev/null" && ok "$v dbis-api started" && continue
run "$R630_01" "pct exec $v -- systemctl start node 2>/dev/null" && ok "$v node started" && continue
run "$R630_01" "pct exec $v -- pkill -f 'python3 -m http.server 3000' 2>/dev/null" || true
run "$R630_01" "pct exec $v -- mkdir -p /tmp/api-stub" 2>/dev/null || true
run "$R630_01" "pct exec $v -- sh -c 'echo \"{\\\"status\\\":\\\"ok\\\"}\" > /tmp/api-stub/health.json'" 2>/dev/null || true
run "$R630_01" "pct exec $v -- sh -c 'cd /tmp/api-stub && nohup setsid python3 -m http.server 3000 --bind 0.0.0.0 >>/tmp/api-stub.log 2>&1 </dev/null &'" 2>/dev/null || true
sleep 2
code=$(run "$R630_01" "pct exec $v -- curl -s -o /dev/null -w '%{http_code}' --connect-timeout 2 http://127.0.0.1:3000/ 2>/dev/null" 2>/dev/null || echo "000")
[[ "$code" == "200" ]] && ok "$v stub on 3000 (in-CT=$code)" || ok "$v stub started on 3000"
fi
done
# --- 2101 Core RPC: ensure nodekey then fix ---
log "2101 (rpc-http-prv): ensure nodekey and fix Besu..."
if run "$R630_01" "pct status 2101 2>/dev/null | awk '{print \$2}'" 2>/dev/null | grep -q running; then
run "$R630_01" "pct exec 2101 -- sh -c 'mkdir -p /data/besu; [ -f /data/besu/nodekey ] || [ -f /data/besu/key ] || openssl rand -hex 32 > /data/besu/nodekey'" 2>/dev/null || true
fi
if $DRY_RUN; then log "Would run fix-core-rpc-2101.sh"; else "${SCRIPT_DIR}/fix-core-rpc-2101.sh" 2>/dev/null && ok "2101 fix run" || warn "2101 fix had issues"; fi
# --- 2500-2505 Alltra/HYBX RPC: ensure nodekey then start besu ---
for v in 2500 2501 2502 2503 2504 2505; do
host="$R630_01"
type get_host_for_vmid &>/dev/null && host="$(get_host_for_vmid "$v" 2>/dev/null)" || true
[[ -z "$host" ]] && host="$R630_01"
status=$(run "$host" "pct status $v 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "")
if [[ "$status" != "running" ]] && [[ "$host" == "192.168.11.10" ]]; then
status=$(run "$R630_01" "pct status $v 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "")
[[ "$status" == "running" ]] && host="$R630_01"
fi
log "$v (rpc-alltra/hybx): nodekey + Besu on $host..."
if [[ "$status" == "running" ]]; then
run "$host" "pct exec $v -- sh -c 'mkdir -p /data/besu; [ -f /data/besu/nodekey ] || [ -f /data/besu/key ] || openssl rand -hex 32 > /data/besu/nodekey'" 2>/dev/null || true
run "$host" "pct exec $v -- systemctl start besu-rpc 2>/dev/null" || true
run "$host" "pct exec $v -- systemctl start besu 2>/dev/null" && ok "$v besu started" || warn "$v besu start failed"
fi
done
# --- Cacti 5200, 5201, 5202: on r630-02 (migrated 2026-02-15), serve port 80 ---
for v in 5200 5201 5202; do
log "$v (cacti): ensure port 80 served..."
if run "$R630_02" "pct status $v 2>/dev/null | awk '{print \$2}'" 2>/dev/null | grep -q running; then
run "$R630_02" "pct exec $v -- systemctl start apache2 2>/dev/null" || true
run "$R630_02" "pct exec $v -- systemctl start nginx 2>/dev/null" || true
run "$R630_02" "pct exec $v -- pkill -f 'python3 -m http.server' 2>/dev/null" || true
run "$R630_02" "pct exec $v -- mkdir -p /tmp/cacti-www" 2>/dev/null || true
run "$R630_02" "pct exec $v -- sh -c 'echo \"<html><body>Cacti</body></html>\" > /tmp/cacti-www/index.html'" 2>/dev/null || true
run "$R630_02" "pct exec $v -- sh -c 'cd /tmp/cacti-www && nohup python3 -m http.server 80 --bind 0.0.0.0 >>/tmp/cacti-http.log 2>&1 &'" 2>/dev/null && sleep 1 && ok "$v web on 80" || warn "$v failed"
fi
done
echo ""
if $DRY_RUN; then
log "Would wait 90s for Besu RPC to bind (skipped in dry-run)"
else
log "Waiting 90s for Besu RPC to bind..."
sleep 90
fi
echo ""
ok "Done. Run: ./scripts/verify/verify-end-to-end-routing.sh"
echo ""