#!/usr/bin/env bash # Fix memory limits for all containers on r630-02 to resolve OOM kills # Usage: ./scripts/fix-container-memory-limits.sh set -euo pipefail # Load IP configuration SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true PROXMOX_HOST="${PROXMOX_HOST_R630_02}" # Colors CYAN='\033[0;36m' GREEN='\033[0;32m' YELLOW='\033[1;33m' RED='\033[0;31m' BLUE='\033[0;34m' NC='\033[0m' log_info() { echo -e "${CYAN}ℹ${NC} $1"; } log_success() { echo -e "${GREEN}✓${NC} $1"; } log_warn() { echo -e "${YELLOW}⚠${NC} $1"; } log_error() { echo -e "${RED}✗${NC} $1"; } echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE}Fixing Container Memory Limits - r630-02${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo "" # Memory limits in MB (will be converted to bytes) # Based on current usage + buffer for growth declare -A MEMORY_LIMITS=( ["5000"]="2048" # blockscout-1: using 736MB, set to 2GB ["6200"]="512" # firefly-1: using 182MB, set to 512MB ["6201"]="512" # firefly-ali-1: using 190MB, set to 512MB ["7810"]="256" # mim-web-1: using 40MB, set to 256MB ["7811"]="1024" # mim-api-1: using 90MB but has OOM issues, set to 1GB ["8641"]="512" # vault-phoenix-2: using 68MB, set to 512MB ["10234"]="24576" # npmplus-secondary: using 20283MB, set to 24GB ) # Swap limits in MB (will be converted to bytes) declare -A SWAP_LIMITS=( ["5000"]="1024" # blockscout-1: 1GB swap ["6200"]="256" # firefly-1: 256MB swap ["6201"]="256" # firefly-ali-1: 256MB swap ["7810"]="128" # mim-web-1: 128MB swap ["7811"]="512" # mim-api-1: 512MB swap (critical container) ["8641"]="256" # vault-phoenix-2: 256MB swap ["10234"]="4096" # npmplus-secondary: 4GB swap ) # Get all containers CONTAINER_VMIDS=$(ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \ "pct list 2>/dev/null | tail -n +2 | awk '{print \$1}'" || echo "") if [ -z "$CONTAINER_VMIDS" ]; then log_warn "No containers found" exit 0 fi SUCCESS_COUNT=0 FAILED_COUNT=0 for vmid in $CONTAINER_VMIDS; do # Get container info CONTAINER_INFO=$(ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \ "pct list 2>/dev/null | grep \"^$vmid\" || echo \"\"" 2>/dev/null) if [ -z "$CONTAINER_INFO" ]; then continue fi name=$(echo "$CONTAINER_INFO" | awk '{print $3}') status=$(echo "$CONTAINER_INFO" | awk '{print $2}') # Get configured memory limits MEMORY_MB="${MEMORY_LIMITS[$vmid]:-512}" SWAP_MB="${SWAP_LIMITS[$vmid]:-256}" # Convert to bytes MEMORY_BYTES=$((MEMORY_MB * 1024 * 1024)) SWAP_BYTES=$((SWAP_MB * 1024 * 1024)) echo "" echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" log_info "Container: $vmid - $name" echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" # Get current limits CURRENT_MEMORY=$(ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \ "pct config $vmid 2>/dev/null | grep '^memory:' | awk '{print \$2}'" 2>/dev/null || echo "0") CURRENT_SWAP=$(ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \ "pct config $vmid 2>/dev/null | grep '^swap:' | awk '{print \$2}'" 2>/dev/null || echo "0") CURRENT_MEMORY_MB=$((CURRENT_MEMORY / 1024 / 1024)) CURRENT_SWAP_MB=$((CURRENT_SWAP / 1024 / 1024)) log_info "Current: Memory=${CURRENT_MEMORY_MB}MB, Swap=${CURRENT_SWAP_MB}MB" log_info "Setting: Memory=${MEMORY_MB}MB, Swap=${SWAP_MB}MB" # Update memory limit log_info "Updating memory limit..." if ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \ "pct set $vmid --memory $MEMORY_BYTES" 2>&1; then log_success "Memory limit updated to ${MEMORY_MB}MB" else log_error "Failed to update memory limit" FAILED_COUNT=$((FAILED_COUNT + 1)) continue fi # Update swap limit log_info "Updating swap limit..." if ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \ "pct set $vmid --swap $SWAP_BYTES" 2>&1; then log_success "Swap limit updated to ${SWAP_MB}MB" else log_warn "Failed to update swap limit (may not be critical)" fi # Verify the changes VERIFY_MEMORY=$(ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \ "pct config $vmid 2>/dev/null | grep '^memory:' | awk '{print \$2}'" 2>/dev/null || echo "0") VERIFY_MEMORY_MB=$((VERIFY_MEMORY / 1024 / 1024)) if [ "$VERIFY_MEMORY_MB" -eq "$MEMORY_MB" ]; then log_success "Memory limit verified: ${VERIFY_MEMORY_MB}MB" SUCCESS_COUNT=$((SUCCESS_COUNT + 1)) else log_warn "Memory limit verification failed: expected ${MEMORY_MB}MB, got ${VERIFY_MEMORY_MB}MB" fi # Note: Container may need restart for changes to take full effect if [ "$status" = "running" ]; then log_info "Container is running. Changes will apply on next restart or can be applied now." log_info "To apply immediately: ssh root@$PROXMOX_HOST 'pct reboot $vmid'" fi done echo "" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo -e "${BLUE}Summary${NC}" echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}" echo "" if [ $SUCCESS_COUNT -gt 0 ]; then log_success "Successfully updated $SUCCESS_COUNT container(s)" fi if [ $FAILED_COUNT -gt 0 ]; then log_error "Failed to update $FAILED_COUNT container(s)" fi echo "" log_info "Memory limits have been updated. Containers may need to be restarted" log_info "for the new limits to take full effect." echo "" log_info "To restart all containers:" echo " ssh root@$PROXMOX_HOST 'for vmid in \$(pct list | tail -n +2 | awk \"{print \\\$1}\"); do pct reboot \$vmid; done'" echo "" log_success "Memory limit update complete!"