#!/usr/bin/env bash # Additional pass: diagnose I/O + load on Proxmox nodes, then apply safe host-level optimizations. # - Reports: load, PSI, zpool, pvesm, scrub, vzdump, running CT count # - Applies (idempotent): vm.swappiness on ml110; sysstat; host fstrim where supported # # Usage: ./scripts/maintenance/proxmox-host-io-optimize-pass.sh [--diagnose-only] # Requires: SSH key root@ ml110, r630-01, r630-02 (see config/ip-addresses.conf) set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" # shellcheck source=/dev/null source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true ML="${PROXMOX_ML110:-${PROXMOX_HOST_ML110:-192.168.11.10}}" R1="${PROXMOX_R630_01:-${PROXMOX_HOST_R630_01:-192.168.11.11}}" R2="${PROXMOX_R630_02:-${PROXMOX_HOST_R630_02:-192.168.11.12}}" SSH_OPTS=(-o ConnectTimeout=20 -o ServerAliveInterval=15 -o StrictHostKeyChecking=accept-new) DIAG_ONLY=false [[ "${1:-}" == "--diagnose-only" ]] && DIAG_ONLY=true remote() { ssh "${SSH_OPTS[@]}" "root@$1" bash -s; } echo "=== Proxmox host I/O optimize pass ($(date -Is)) ===" echo " ml110=$ML r630-01=$R1 r630-02=$R2 diagnose-only=$DIAG_ONLY" echo "" for H in "$ML" "$R1" "$R2"; do echo "########## DIAGNOSTIC: $H ##########" remote "$H" <<'EOS' set +e hostname uptime echo "--- PSI ---" cat /proc/pressure/cpu 2>/dev/null | head -2 cat /proc/pressure/io 2>/dev/null | head -2 echo "--- pvesm ---" pvesm status 2>/dev/null | head -25 echo "--- running workloads ---" echo -n "LXC running: "; pct list 2>/dev/null | awk 'NR>1 && $2=="running"' | wc -l echo -n "VM running: "; qm list 2>/dev/null | awk 'NR>1 && $3=="running"' | wc -l echo "--- vzdump ---" ps aux 2>/dev/null | grep -E '[v]zdump|[p]bs-|proxmox-backup' | head -5 || echo "(none visible)" echo "--- ZFS ---" zpool status 2>/dev/null | head -20 || echo "no zfs" echo "--- scrub ---" zpool status 2>/dev/null | grep -E 'scan|scrub' || true EOS echo "" done if $DIAG_ONLY; then echo "Diagnose-only: done." exit 0 fi echo "########## OPTIMIZE: ml110 swappiness ##########" remote "$ML" <<'EOS' set -e F=/etc/sysctl.d/99-proxmox-ml110-swappiness.conf if ! grep -q '^vm.swappiness=10$' "$F" 2>/dev/null; then printf '%s\n' '# Prefer RAM over swap when plenty of memory free (operator pass)' 'vm.swappiness=10' > "$F" sysctl -p "$F" echo "Wrote and applied $F" else echo "Already vm.swappiness=10 in $F" sysctl vm.swappiness=10 2>/dev/null || true fi EOS echo "" echo "########## OPTIMIZE: sysstat (all hosts) ##########" for H in "$ML" "$R1" "$R2"; do echo "--- $H ---" remote "$H" <<'EOS' set -e export DEBIAN_FRONTEND=noninteractive if command -v sar >/dev/null 2>&1; then echo "sysstat already present" else apt-get update -qq && apt-get install -y -qq sysstat fi sed -i 's/^ENABLED="false"/ENABLED="true"/' /etc/default/sysstat 2>/dev/null || true systemctl enable sysstat 2>/dev/null || true systemctl restart sysstat 2>/dev/null || true echo "sar: $(command -v sar || echo missing)" EOS done echo "" echo "########## OPTIMIZE: host fstrim (hypervisor root / and /var/lib/vz if supported) ##########" for H in "$ML" "$R1" "$R2"; do echo "--- $H ---" remote "$H" <<'EOS' set +e for m in / /var/lib/vz; do if mountpoint -q "$m" 2>/dev/null; then out=$(fstrim -v "$m" 2>&1) echo "$m: $out" fi done EOS done echo "" echo "########## POST: quick load snapshot ##########" for H in "$ML" "$R1" "$R2"; do echo -n "$H " ssh "${SSH_OPTS[@]}" "root@$H" "cat /proc/loadavg | cut -d' ' -f1-3" 2>/dev/null || echo "unreachable" done echo "" echo "Done. Optional: run ./scripts/maintenance/fstrim-all-running-ct.sh during a quiet window (can be I/O heavy)."