fix(storage-monitor): subshell-safe ALERTS, ordered node loop; doc fleet pass
- Replace pipe-while with process substitution so alerts accumulate. - Iterate ml110→r630-04 in fixed order; tolerate unreachable optional nodes. - STORAGE_GROWTH_AND_HEALTH: 2026-03-28 follow-up (7811 syslog, 10100 resize, I/O pass, ZFS scrub, md0 healthy, table refresh for r630-01/02/ml110). Made-with: Cursor
This commit is contained in:
@@ -48,8 +48,8 @@ NODES[r630-02]="${PROXMOX_HOST_R630_02:-192.168.11.12}:password"
|
||||
NODES[r630-03]="${IP_SERVICE_13:-${IP_SERVICE_13:-${IP_SERVICE_13:-${IP_SERVICE_13:-${IP_SERVICE_13:-${IP_SERVICE_13:-192.168.11.13}}}}}}:L@kers2010"
|
||||
NODES[r630-04]="${IP_DEVICE_14:-${IP_DEVICE_14:-${IP_DEVICE_14:-${IP_DEVICE_14:-${IP_DEVICE_14:-${IP_DEVICE_14:-192.168.11.14}}}}}}:L@kers2010"
|
||||
|
||||
# Alert tracking
|
||||
declare -a ALERTS
|
||||
# Alert tracking (must stay in main shell — no pipe-|while subshell)
|
||||
ALERTS=()
|
||||
|
||||
# SSH helper function
|
||||
ssh_node() {
|
||||
@@ -166,22 +166,22 @@ monitor_node() {
|
||||
return 1
|
||||
fi
|
||||
|
||||
# Process each storage line (skip header)
|
||||
echo "$storage_status" | tail -n +2 | while IFS= read -r line; do
|
||||
# Process each storage line (skip header) — process substitution keeps ALERTS in this shell
|
||||
while IFS= read -r line; do
|
||||
if [ -n "$line" ]; then
|
||||
check_storage_usage "$hostname" "$line"
|
||||
fi
|
||||
done
|
||||
done < <(echo "$storage_status" | tail -n +2)
|
||||
|
||||
# Check volume groups
|
||||
local vgs_info=$(ssh_node "$hostname" 'vgs --units g --noheadings -o vg_name,vg_size,vg_free 2>/dev/null' || echo "")
|
||||
|
||||
if [ -n "$vgs_info" ]; then
|
||||
echo "$vgs_info" | while IFS= read -r line; do
|
||||
while IFS= read -r line; do
|
||||
if [ -n "$line" ]; then
|
||||
check_vg_free_space "$hostname" "$line"
|
||||
fi
|
||||
done
|
||||
done < <(echo "$vgs_info")
|
||||
fi
|
||||
|
||||
# Log storage status
|
||||
@@ -199,7 +199,7 @@ monitor_node() {
|
||||
|
||||
# Send alerts (can be extended to email, Slack, etc.)
|
||||
send_alerts() {
|
||||
if [ ${#ALERTS[@]} -eq 0 ]; then
|
||||
if [[ ${#ALERTS[@]} -eq 0 ]]; then
|
||||
log_success "No storage alerts"
|
||||
return 0
|
||||
fi
|
||||
@@ -244,7 +244,8 @@ generate_summary() {
|
||||
echo "=== Proxmox Storage Summary $(date) ==="
|
||||
echo ""
|
||||
echo "Nodes Monitored:"
|
||||
for hostname in "${!NODES[@]}"; do
|
||||
for hostname in ml110 r630-01 r630-02 r630-03 r630-04; do
|
||||
[[ -n "${NODES[$hostname]:-}" ]] || continue
|
||||
if check_node "$hostname"; then
|
||||
echo " ✅ $hostname"
|
||||
else
|
||||
@@ -280,9 +281,10 @@ main() {
|
||||
echo "Date: $(date)"
|
||||
echo ""
|
||||
|
||||
# Monitor all nodes
|
||||
for hostname in "${!NODES[@]}"; do
|
||||
monitor_node "$hostname"
|
||||
# Monitor all nodes (fixed order for readable logs; optional nodes may be unreachable)
|
||||
for hostname in ml110 r630-01 r630-02 r630-03 r630-04; do
|
||||
[[ -n "${NODES[$hostname]:-}" ]] || continue
|
||||
monitor_node "$hostname" || true
|
||||
done
|
||||
|
||||
# Send alerts
|
||||
@@ -297,7 +299,8 @@ main() {
|
||||
status)
|
||||
# Show current status
|
||||
echo "=== Current Storage Status ==="
|
||||
for hostname in "${!NODES[@]}"; do
|
||||
for hostname in ml110 r630-01 r630-02 r630-03 r630-04; do
|
||||
[[ -n "${NODES[$hostname]:-}" ]] || continue
|
||||
if check_node "$hostname"; then
|
||||
echo ""
|
||||
echo "--- $hostname ---"
|
||||
|
||||
Reference in New Issue
Block a user