#!/usr/bin/env bash # Maintenance checks (ALL_IMPROVEMENTS 135–139). Run daily (135–136) or weekly (137–138). # Explorer: hardened to FAIL when API unreachable; indexer lag check (fail if >500 blocks behind). # Usage: ./scripts/maintenance/daily-weekly-checks.sh [daily|weekly|all] # Cron: 0 8 * * * /path/to/daily-weekly-checks.sh daily # Set EXPLORER_FAIL_WHEN_UNREACHABLE=0 to keep legacy SKIP when explorer unreachable (e.g. off-LAN). set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true MODE="${1:-daily}" # Defaults (override via config or env) IP_RPC_2201="${RPC_2201:-192.168.11.221}" IP_BLOCKSCOUT="${IP_BLOCKSCOUT:-192.168.11.140}" BLOCKSCOUT_API_PORT="${BLOCKSCOUT_API_PORT:-4000}" DBIS_API_URL="${DBIS_API_URL:-https://dbis-api.d-bis.org}" PROXMOX_R630_02="${PROXMOX_HOST_R630_02:-192.168.11.12}" PROXMOX_R630_01="${PROXMOX_HOST_R630_01:-192.168.11.11}" PROXMOX_ML110="${PROXMOX_HOST_ML110:-${PROXMOX_ML110:-192.168.11.10}}" # Fail daily run when explorer API unreachable (set 0 to preserve legacy SKIP when off-LAN) EXPLORER_FAIL_WHEN_UNREACHABLE="${EXPLORER_FAIL_WHEN_UNREACHABLE:-1}" # Indexer lag: fail if explorer block is more than this many blocks behind RPC head # Set 1500 temporarily if indexer is catching up after restart (~50 min at 2s/block). EXPLORER_INDEXER_LAG_THRESHOLD="${EXPLORER_INDEXER_LAG_THRESHOLD:-500}" # Optional: write metric file for alerting (FAILED count and timestamp) MAINTENANCE_METRIC_FILE="${MAINTENANCE_METRIC_FILE:-$PROJECT_ROOT/logs/maintenance-checks.metric}" FAILED=0 STORAGE_MAX_PCT=0 check_rpc() { echo -n "[136] RPC (${IP_RPC_2201}:8545)... " if curl -sf --max-time 10 -X POST -H "Content-Type: application/json" \ -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' \ "http://${IP_RPC_2201}:8545" | grep -q '"result"'; then echo "OK" else echo "FAIL" ((FAILED++)) || true fi } # Get RPC chain head block number (decimal). Empty on failure. get_rpc_block_number() { local hex hex=$(curl -sf --max-time 10 -X POST -H "Content-Type: application/json" \ -d '{"jsonrpc":"2.0","method":"eth_blockNumber","params":[],"id":1}' \ "http://${IP_RPC_2201}:8545" 2>/dev/null | sed -n 's/.*"result":"\(0x[0-9a-fA-F]*\)".*/\1/p') [ -n "$hex" ] && echo $((hex)) || true } # Get Blockscout last indexed block (from /api/v2/stats total_blocks or /api/v2/blocks). Empty on failure. get_explorer_block_number() { local body block body=$(curl -sf --max-time 10 "http://${IP_BLOCKSCOUT}:${BLOCKSCOUT_API_PORT}/api/v2/stats" 2>/dev/null || true) if [ -n "$body" ] && echo "$body" | grep -qE '"total_blocks"|"total_transactions"'; then # total_blocks in API v2 can be string or number block=$(echo "$body" | sed -n 's/.*"total_blocks"\s*:\s*"\([0-9]*\)".*/\1/p' | head -1) [ -z "$block" ] && block=$(echo "$body" | sed -n 's/.*"total_blocks"\s*:\s*\([0-9]*\).*/\1/p' | head -1) [ -n "$block" ] && echo "$block" && return fi # Fallback: first block from /api/v2/blocks body=$(curl -sf --max-time 10 "http://${IP_BLOCKSCOUT}:${BLOCKSCOUT_API_PORT}/api/v2/blocks?page_size=1" 2>/dev/null || true) if [ -n "$body" ]; then echo "$body" | sed -n 's/.*"height"\s*:\s*\([0-9]*\).*/\1/p' | head -1 fi } # [135] Explorer: API must return 200 with total_blocks/total_transactions. FAIL when unreachable if EXPLORER_FAIL_WHEN_UNREACHABLE=1. check_explorer_sync() { echo -n "[135] Explorer indexer (${IP_BLOCKSCOUT}:${BLOCKSCOUT_API_PORT})... " local api_ok=0 if curl -sf --max-time 10 "http://${IP_BLOCKSCOUT}:${BLOCKSCOUT_API_PORT}/api/v2/stats" 2>/dev/null | grep -qE '"total_blocks"|"total_transactions"|"indexer"'; then api_ok=1 elif curl -sf --max-time 10 "http://${IP_BLOCKSCOUT}:${BLOCKSCOUT_API_PORT}/api?module=stats&action=eth_price" 2>/dev/null | grep -qE '"result"|"eth_price"'; then api_ok=1 fi if [ "$api_ok" -eq 1 ]; then echo "OK" return fi # Try public URL (in case we're off-LAN and only NPMplus path works) if curl -sf --max-time 10 -k "https://explorer.d-bis.org/api/v2/stats" 2>/dev/null | grep -qE '"total_blocks"|"total_transactions"'; then echo "OK (public)" return fi if [ "${EXPLORER_FAIL_WHEN_UNREACHABLE}" = "1" ]; then echo "FAIL (Blockscout unreachable)" ((FAILED++)) || true else echo "SKIP (Blockscout unreachable; run from LAN or set EXPLORER_FAIL_WHEN_UNREACHABLE=1)" fi } # [135b] Indexer lag: fail if explorer block is more than EXPLORER_INDEXER_LAG_THRESHOLD behind RPC head. check_explorer_indexer_lag() { echo -n "[135b] Explorer indexer lag (RPC vs Blockscout)... " local rpc_block explorer_block lag rpc_block=$(get_rpc_block_number) explorer_block=$(get_explorer_block_number) if [ -z "$rpc_block" ] || [ -z "$explorer_block" ]; then echo "SKIP (RPC or Blockscout unreachable)" return fi if [ "$rpc_block" -gt "$explorer_block" ] 2>/dev/null; then lag=$((rpc_block - explorer_block)) if [ "$lag" -gt "${EXPLORER_INDEXER_LAG_THRESHOLD}" ]; then echo "FAIL (lag ${lag} > ${EXPLORER_INDEXER_LAG_THRESHOLD})" ((FAILED++)) || true else echo "OK (lag ${lag})" fi else echo "OK (explorer caught up)" fi } check_config_api() { echo -n "[137] Config API (${DBIS_API_URL})... " if curl -sf --max-time 10 -o /dev/null -w "%{http_code}" "${DBIS_API_URL}/health" 2>/dev/null | grep -q 200; then echo "OK" else echo "SKIP or FAIL (external URL; may be unreachable off-LAN)" fi } # [138a] Weekly: thin pool / storage usage on one host. Warn >85%, fail at 100%. # Usage: check_thin_pool_one_host