#!/usr/bin/env bash # Ensure the public Cacti CTs on r630-02 keep both their nginx landing page and # Docker-backed Hyperledger Cacti API healthy. # # Expected runtime: # - VMID 5201 / 5202: nginx on :80 for the public landing page # - VMID 5201 / 5202: cacti.service exposing the internal API on :4000 # - Proxmox CT config includes `features: nesting=1,keyctl=1` for Docker-in-LXC # # Usage: ./scripts/maintenance/ensure-cacti-web-via-ssh.sh [--dry-run] # Env: PROXMOX_HOST_R630_02 (default 192.168.11.12) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" [[ -f "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" ]] && source "${PROJECT_ROOT}/scripts/lib/load-project-env.sh" 2>/dev/null || true DRY_RUN=false [[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true PROXMOX_HOST="${PROXMOX_HOST_R630_02:-192.168.11.12}" log_info() { echo -e "\033[0;34m[INFO]\033[0m $1"; } log_ok() { echo -e "\033[0;32m[✓]\033[0m $1"; } log_warn() { echo -e "\033[0;33m[⚠]\033[0m $1"; } run_ssh() { ssh -o ConnectTimeout=10 -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" "$@"; } ensure_ct_features() { local vmid="$1" local conf="/etc/pve/lxc/${vmid}.conf" local features features="$(run_ssh "awk -F': ' '/^features:/{print \$2}' ${conf@Q} 2>/dev/null || true" | tr -d '\r\n')" if [[ "$features" == *"nesting=1"* && "$features" == *"keyctl=1"* ]]; then return 0 fi if [[ "$DRY_RUN" == true ]]; then log_info "Would add features: nesting=1,keyctl=1 to VMID $vmid and restart the CT" return 0 fi run_ssh "cp ${conf@Q} /root/${vmid}.conf.pre-codex.\$(date +%Y%m%d_%H%M%S)" if [[ -n "$features" ]]; then run_ssh "sed -i 's/^features:.*/features: nesting=1,keyctl=1/' ${conf@Q}" else run_ssh "printf '%s\n' 'features: nesting=1,keyctl=1' >> ${conf@Q}" fi run_ssh "pct shutdown $vmid --timeout 30 >/dev/null 2>&1 || pct stop $vmid >/dev/null 2>&1 || true" run_ssh "pct start $vmid" sleep 8 } ensure_cacti_surface() { local vmid="$1" local ip="$2" local label="$3" local status local local_check local remote_script ensure_ct_features "$vmid" status="$(run_ssh "pct status $vmid 2>/dev/null | awk '{print \$2}'" 2>/dev/null || echo "missing")" if [[ "$status" != "running" ]]; then log_warn "$label (VMID $vmid) is not running" return 0 fi local_check="$(run_ssh "timeout 5 curl -sS -o /dev/null -w '%{http_code}' http://${ip}/ 2>/dev/null || true" | tr -d '\r\n')" if [[ "$local_check" == "200" ]] && run_ssh "pct exec $vmid -- bash -lc 'curl -fsS http://127.0.0.1:4000/api/v1/api-server/healthcheck >/dev/null 2>&1'" >/dev/null 2>&1; then log_ok "$label already serves both the landing page and internal Cacti API" return 0 fi if [[ "$DRY_RUN" == true ]]; then log_info "Would restart nginx/docker/cacti.service in VMID $vmid (${label})" return 0 fi printf -v remote_script '%s' "$(cat <<'EOF' set -e id -nG cacti 2>/dev/null | grep -qw docker || usermod -aG docker cacti || true systemctl restart docker systemctl enable --now nginx systemctl reset-failed cacti.service || true systemctl enable --now cacti.service for _ in $(seq 1 20); do if curl -fsS http://127.0.0.1:4000/api/v1/api-server/healthcheck >/dev/null 2>&1; then break fi sleep 2 done curl -fsS http://127.0.0.1/ >/dev/null curl -fsS http://127.0.0.1:4000/api/v1/api-server/healthcheck >/dev/null EOF )" run_ssh "pct exec $vmid -- bash --norc -lc $(printf '%q' "$remote_script")" local_check="$(run_ssh "timeout 5 curl -sS -o /dev/null -w '%{http_code}' http://${ip}/ 2>/dev/null || true" | tr -d '\r\n')" if [[ "$local_check" == "200" ]] && run_ssh "pct exec $vmid -- bash -lc 'curl -fsS http://127.0.0.1:4000/api/v1/api-server/healthcheck >/dev/null 2>&1'" >/dev/null 2>&1; then log_ok "$label restored on ${ip}:80 with a healthy internal Cacti API" else log_warn "$label is still only partially healthy on VMID $vmid" fi } echo "" echo "=== Ensure Cacti surfaces ===" echo " Host: $PROXMOX_HOST dry-run=$DRY_RUN" echo "" ensure_cacti_surface 5201 "192.168.11.177" "Cacti ALLTRA" ensure_cacti_surface 5202 "192.168.11.251" "Cacti HYBX" echo ""