Files
proxmox/scripts/archive/consolidated/fix/fix-container-memory-limits.sh

170 lines
6.5 KiB
Bash
Raw Permalink Normal View History

#!/usr/bin/env bash
# Fix memory limits for all containers on r630-02 to resolve OOM kills
# Usage: ./scripts/fix-container-memory-limits.sh
set -euo pipefail
# Load IP configuration
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
PROXMOX_HOST="${PROXMOX_HOST_R630_02}"
# Colors
CYAN='\033[0;36m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
RED='\033[0;31m'
BLUE='\033[0;34m'
NC='\033[0m'
log_info() { echo -e "${CYAN}${NC} $1"; }
log_success() { echo -e "${GREEN}${NC} $1"; }
log_warn() { echo -e "${YELLOW}${NC} $1"; }
log_error() { echo -e "${RED}${NC} $1"; }
echo ""
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE}Fixing Container Memory Limits - r630-02${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
# Memory limits in MB (will be converted to bytes)
# Based on current usage + buffer for growth
declare -A MEMORY_LIMITS=(
["5000"]="2048" # blockscout-1: using 736MB, set to 2GB
["6200"]="512" # firefly-1: using 182MB, set to 512MB
["6201"]="512" # firefly-ali-1: using 190MB, set to 512MB
["7810"]="256" # mim-web-1: using 40MB, set to 256MB
["7811"]="1024" # mim-api-1: using 90MB but has OOM issues, set to 1GB
["8641"]="512" # vault-phoenix-2: using 68MB, set to 512MB
["10234"]="24576" # npmplus-secondary: using 20283MB, set to 24GB
)
# Swap limits in MB (will be converted to bytes)
declare -A SWAP_LIMITS=(
["5000"]="1024" # blockscout-1: 1GB swap
["6200"]="256" # firefly-1: 256MB swap
["6201"]="256" # firefly-ali-1: 256MB swap
["7810"]="128" # mim-web-1: 128MB swap
["7811"]="512" # mim-api-1: 512MB swap (critical container)
["8641"]="256" # vault-phoenix-2: 256MB swap
["10234"]="4096" # npmplus-secondary: 4GB swap
)
# Get all containers
CONTAINER_VMIDS=$(ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct list 2>/dev/null | tail -n +2 | awk '{print \$1}'" || echo "")
if [ -z "$CONTAINER_VMIDS" ]; then
log_warn "No containers found"
exit 0
fi
SUCCESS_COUNT=0
FAILED_COUNT=0
for vmid in $CONTAINER_VMIDS; do
# Get container info
CONTAINER_INFO=$(ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct list 2>/dev/null | grep \"^$vmid\" || echo \"\"" 2>/dev/null)
if [ -z "$CONTAINER_INFO" ]; then
continue
fi
name=$(echo "$CONTAINER_INFO" | awk '{print $3}')
status=$(echo "$CONTAINER_INFO" | awk '{print $2}')
# Get configured memory limits
MEMORY_MB="${MEMORY_LIMITS[$vmid]:-512}"
SWAP_MB="${SWAP_LIMITS[$vmid]:-256}"
# Convert to bytes
MEMORY_BYTES=$((MEMORY_MB * 1024 * 1024))
SWAP_BYTES=$((SWAP_MB * 1024 * 1024))
echo ""
echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
log_info "Container: $vmid - $name"
echo -e "${CYAN}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
# Get current limits
CURRENT_MEMORY=$(ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct config $vmid 2>/dev/null | grep '^memory:' | awk '{print \$2}'" 2>/dev/null || echo "0")
CURRENT_SWAP=$(ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct config $vmid 2>/dev/null | grep '^swap:' | awk '{print \$2}'" 2>/dev/null || echo "0")
CURRENT_MEMORY_MB=$((CURRENT_MEMORY / 1024 / 1024))
CURRENT_SWAP_MB=$((CURRENT_SWAP / 1024 / 1024))
log_info "Current: Memory=${CURRENT_MEMORY_MB}MB, Swap=${CURRENT_SWAP_MB}MB"
log_info "Setting: Memory=${MEMORY_MB}MB, Swap=${SWAP_MB}MB"
# Update memory limit
log_info "Updating memory limit..."
if ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct set $vmid --memory $MEMORY_BYTES" 2>&1; then
log_success "Memory limit updated to ${MEMORY_MB}MB"
else
log_error "Failed to update memory limit"
FAILED_COUNT=$((FAILED_COUNT + 1))
continue
fi
# Update swap limit
log_info "Updating swap limit..."
if ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct set $vmid --swap $SWAP_BYTES" 2>&1; then
log_success "Swap limit updated to ${SWAP_MB}MB"
else
log_warn "Failed to update swap limit (may not be critical)"
fi
# Verify the changes
VERIFY_MEMORY=$(ssh -o StrictHostKeyChecking=no root@"$PROXMOX_HOST" \
"pct config $vmid 2>/dev/null | grep '^memory:' | awk '{print \$2}'" 2>/dev/null || echo "0")
VERIFY_MEMORY_MB=$((VERIFY_MEMORY / 1024 / 1024))
if [ "$VERIFY_MEMORY_MB" -eq "$MEMORY_MB" ]; then
log_success "Memory limit verified: ${VERIFY_MEMORY_MB}MB"
SUCCESS_COUNT=$((SUCCESS_COUNT + 1))
else
log_warn "Memory limit verification failed: expected ${MEMORY_MB}MB, got ${VERIFY_MEMORY_MB}MB"
fi
# Note: Container may need restart for changes to take full effect
if [ "$status" = "running" ]; then
log_info "Container is running. Changes will apply on next restart or can be applied now."
log_info "To apply immediately: ssh root@$PROXMOX_HOST 'pct reboot $vmid'"
fi
done
echo ""
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo -e "${BLUE}Summary${NC}"
echo -e "${BLUE}━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━${NC}"
echo ""
if [ $SUCCESS_COUNT -gt 0 ]; then
log_success "Successfully updated $SUCCESS_COUNT container(s)"
fi
if [ $FAILED_COUNT -gt 0 ]; then
log_error "Failed to update $FAILED_COUNT container(s)"
fi
echo ""
log_info "Memory limits have been updated. Containers may need to be restarted"
log_info "for the new limits to take full effect."
echo ""
log_info "To restart all containers:"
echo " ssh root@$PROXMOX_HOST 'for vmid in \$(pct list | tail -n +2 | awk \"{print \\\$1}\"); do pct reboot \$vmid; done'"
echo ""
log_success "Memory limit update complete!"