Some checks failed
Deploy to Phoenix / deploy (push) Has been cancelled
- ADD_CHAIN138_TO_LEDGER_LIVE: Ledger form done; public code review repo bis-innovations/LedgerLive; init/push commands - CONTRACT_DEPLOYMENT_RUNBOOK: Chain 138 gas price 1 gwei, 36-addr check, TransactionMirror workaround - CONTRACT_*: AddressMapper, MirrorManager deployed 2026-02-12; 36-address on-chain check - NEXT_STEPS_FOR_YOU: Ledger done; steps completable now (no LAN); run-completable-tasks-from-anywhere - MASTER_INDEX, OPERATOR_OPTIONAL, SMART_CONTRACTS_INVENTORY_SIMPLE: updates - LEDGER_BLOCKCHAIN_INTEGRATION_COMPLETE: bis-innovations/LedgerLive reference Co-authored-by: Cursor <cursoragent@cursor.com>
103 lines
4.1 KiB
Bash
Executable File
103 lines
4.1 KiB
Bash
Executable File
#!/bin/bash
|
|
# Monitor HA status and send alerts if needed
|
|
|
|
set -euo pipefail
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
|
|
if [ -f "$PROJECT_ROOT/.env" ]; then
|
|
set +euo pipefail
|
|
source "$PROJECT_ROOT/.env" 2>/dev/null || true
|
|
set -euo pipefail
|
|
fi
|
|
|
|
VIP="${VIP:-192.168.11.166}"
|
|
PRIMARY_HOST="${PRIMARY_HOST:-192.168.11.11}"
|
|
SECONDARY_HOST="${SECONDARY_HOST:-192.168.11.12}"
|
|
LOG_FILE="${LOG_FILE:-/tmp/npmplus-ha-monitor.log}"
|
|
|
|
# Colors
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
RED='\033[0;31m'
|
|
BLUE='\033[0;34m'
|
|
NC='\033[0m'
|
|
|
|
log_info() { echo -e "${BLUE}[INFO]${NC} $1"; }
|
|
log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
|
log_error() { echo -e "${RED}[ERROR]${NC} $1"; }
|
|
log_success() { echo -e "${GREEN}[SUCCESS]${NC} $1"; }
|
|
|
|
TIMESTAMP=$(date '+%Y-%m-%d %H:%M:%S')
|
|
|
|
# Check who owns VIP
|
|
VIP_OWNER="UNKNOWN"
|
|
if ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 root@"$PRIMARY_HOST" "ip addr show vmbr0 2>/dev/null | grep -q $VIP" 2>/dev/null; then
|
|
VIP_OWNER="$PRIMARY_HOST"
|
|
elif ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 root@"$SECONDARY_HOST" "ip addr show vmbr0 2>/dev/null | grep -q $VIP" 2>/dev/null; then
|
|
VIP_OWNER="$SECONDARY_HOST"
|
|
fi
|
|
|
|
echo "[$TIMESTAMP] VIP $VIP owner: $VIP_OWNER" >> "$LOG_FILE" 2>&1 || echo "[$TIMESTAMP] VIP $VIP owner: $VIP_OWNER"
|
|
|
|
# Check Keepalived status on both hosts
|
|
PRIMARY_STATUS=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 root@"$PRIMARY_HOST" "systemctl is-active keepalived 2>/dev/null" || echo "unknown")
|
|
SECONDARY_STATUS=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 root@"$SECONDARY_HOST" "systemctl is-active keepalived 2>/dev/null" || echo "unknown")
|
|
|
|
echo "[$TIMESTAMP] Primary Keepalived: $PRIMARY_STATUS" >> "$LOG_FILE" 2>&1 || echo "[$TIMESTAMP] Primary Keepalived: $PRIMARY_STATUS"
|
|
echo "[$TIMESTAMP] Secondary Keepalived: $SECONDARY_STATUS" >> "$LOG_FILE" 2>&1 || echo "[$TIMESTAMP] Secondary Keepalived: $SECONDARY_STATUS"
|
|
|
|
# Alert if both are down
|
|
if [ "$PRIMARY_STATUS" != "active" ] && [ "$SECONDARY_STATUS" != "active" ]; then
|
|
ALERT_MSG="[$TIMESTAMP] ALERT: Both Keepalived instances are down! HA unavailable."
|
|
echo "$ALERT_MSG" >> "$LOG_FILE" 2>&1 || echo "$ALERT_MSG"
|
|
log_error "$ALERT_MSG"
|
|
# Send alert via email/webhook if configured
|
|
if [ -n "${ALERT_EMAIL:-}" ]; then
|
|
echo "$ALERT_MSG" | mail -s "NPMplus HA Alert" "$ALERT_EMAIL" 2>/dev/null || true
|
|
fi
|
|
if [ -n "${ALERT_WEBHOOK:-}" ]; then
|
|
curl -s -X POST "$ALERT_WEBHOOK" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"text\":\"$ALERT_MSG\"}" 2>/dev/null || true
|
|
fi
|
|
fi
|
|
|
|
# Alert if VIP is not owned by either host
|
|
if [ "$VIP_OWNER" = "UNKNOWN" ]; then
|
|
ALERT_MSG="[$TIMESTAMP] ALERT: VIP $VIP is not owned by any host!"
|
|
echo "$ALERT_MSG" >> "$LOG_FILE" 2>&1 || echo "$ALERT_MSG"
|
|
log_error "$ALERT_MSG"
|
|
# TODO: Send alert (email, webhook, etc.)
|
|
fi
|
|
|
|
# Check NPMplus container status on owner
|
|
if [ "$VIP_OWNER" != "UNKNOWN" ]; then
|
|
if [ "$VIP_OWNER" = "$PRIMARY_HOST" ]; then
|
|
NPMPLUS_VMID="${PRIMARY_VMID:-10233}"
|
|
else
|
|
NPMPLUS_VMID="${SECONDARY_VMID:-10234}"
|
|
fi
|
|
|
|
CONTAINER_STATUS=$(ssh -o StrictHostKeyChecking=no -o ConnectTimeout=3 root@"$VIP_OWNER" \
|
|
"pct status $NPMPLUS_VMID 2>/dev/null | grep -o 'running\|stopped' || echo 'unknown'" || echo "unknown")
|
|
|
|
if [ "$CONTAINER_STATUS" != "running" ]; then
|
|
ALERT_MSG="[$TIMESTAMP] ALERT: NPMplus container on $VIP_OWNER (VMID $NPMPLUS_VMID) is $CONTAINER_STATUS"
|
|
echo "$ALERT_MSG" >> "$LOG_FILE" 2>&1 || echo "$ALERT_MSG"
|
|
log_error "$ALERT_MSG"
|
|
# Send alert via email/webhook if configured
|
|
if [ -n "${ALERT_EMAIL:-}" ]; then
|
|
echo "$ALERT_MSG" | mail -s "NPMplus HA Alert" "$ALERT_EMAIL" 2>/dev/null || true
|
|
fi
|
|
if [ -n "${ALERT_WEBHOOK:-}" ]; then
|
|
curl -s -X POST "$ALERT_WEBHOOK" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"text\":\"$ALERT_MSG\"}" 2>/dev/null || true
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
echo "[$TIMESTAMP] HA status check complete" >> "$LOG_FILE" 2>&1 || echo "[$TIMESTAMP] HA status check complete"
|