chore: sync workspace — configs, docs, scripts, CI, pnpm, submodules
- Submodule pins: dbis_core, cross-chain-pmm-lps, mcp-proxmox (local, push may be pending), metamask-integration, smom-dbis-138 - Atomic swap + cross-chain-pmm-lops-publish, deploy-portal workflow, phoenix deploy-targets, routing/aggregator matrices - Docs, token-lists, forge proxy, phoenix API, runbooks, verify scripts Made-with: Cursor
This commit is contained in:
59
scripts/maintenance/deploy-vzdump-prune-cron-to-proxmox-nodes.sh
Executable file
59
scripts/maintenance/deploy-vzdump-prune-cron-to-proxmox-nodes.sh
Executable file
@@ -0,0 +1,59 @@
|
||||
#!/usr/bin/env bash
|
||||
# Install prune-proxmox-vzdump-dump.sh on Proxmox hosts and schedule weekly cron (Sun 04:15).
|
||||
# Loads config/ip-addresses.conf. Requires SSH root key access.
|
||||
#
|
||||
# Backup retention alignment:
|
||||
# - pvesh get /cluster/backup — if you add Datacenter → Scheduled backups, set maxfiles
|
||||
# to the same (or lower) as VZDUMP_PRUNE_KEEP so GUI backups and this prune do not fight.
|
||||
# - Empty job list: retention for /var/lib/vz/dump is this weekly cron + ad-hoc vzdump.
|
||||
#
|
||||
# Usage:
|
||||
# ./scripts/maintenance/deploy-vzdump-prune-cron-to-proxmox-nodes.sh [--dry-run]
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
||||
# shellcheck source=/dev/null
|
||||
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
PROXMOX_SSH_USER="${PROXMOX_SSH_USER:-root}"
|
||||
SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=10 -o BatchMode=yes"
|
||||
PRUNE_LOCAL="${SCRIPT_DIR}/prune-proxmox-vzdump-dump.sh"
|
||||
REMOTE_BIN="/usr/local/sbin/prune-proxmox-vzdump-dump.sh"
|
||||
KEEP="${VZDUMP_PRUNE_KEEP:-2}"
|
||||
|
||||
HOSTS=(
|
||||
"${PROXMOX_HOST_ML110:-192.168.11.10}"
|
||||
"${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
||||
"${PROXMOX_HOST_R630_02:-192.168.11.12}"
|
||||
)
|
||||
|
||||
DRY_RUN=false
|
||||
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
|
||||
|
||||
if [[ ! -f "$PRUNE_LOCAL" ]]; then
|
||||
echo "Missing $PRUNE_LOCAL" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
CRON_FILE="/etc/cron.d/vzdump-prune-local"
|
||||
|
||||
for ip in "${HOSTS[@]}"; do
|
||||
echo "=== ${PROXMOX_SSH_USER}@${ip} ==="
|
||||
if $DRY_RUN; then
|
||||
echo "Would: scp $PRUNE_LOCAL -> ${ip}:${REMOTE_BIN}"
|
||||
echo "Would: install ${CRON_FILE}"
|
||||
continue
|
||||
fi
|
||||
scp $SSH_OPTS "$PRUNE_LOCAL" "${PROXMOX_SSH_USER}@${ip}:${REMOTE_BIN}"
|
||||
{
|
||||
echo 'SHELL=/bin/sh'
|
||||
echo 'PATH=/usr/sbin:/usr/bin:/sbin:/bin'
|
||||
echo "# vzdump retention — see repo scripts/maintenance/prune-proxmox-vzdump-dump.sh"
|
||||
echo "15 4 * * 0 root ${REMOTE_BIN} ${KEEP} >>/var/log/vzdump-prune.log 2>&1"
|
||||
} | ssh $SSH_OPTS "${PROXMOX_SSH_USER}@${ip}" "chmod 755 ${REMOTE_BIN} && cat > ${CRON_FILE} && chmod 644 ${CRON_FILE}"
|
||||
echo "Installed ${REMOTE_BIN} and ${CRON_FILE} (keep=${KEEP})."
|
||||
done
|
||||
|
||||
echo "Done. Logs on nodes: /var/log/vzdump-prune.log (after first run)."
|
||||
@@ -5,7 +5,7 @@
|
||||
# the rest stay at head so the restarted node syncs quickly and consensus can continue.
|
||||
#
|
||||
# Usage: ./scripts/maintenance/fix-block-production-staggered-restart.sh [--dry-run]
|
||||
# Requires: SSH to Proxmox hosts (192.168.11.10 ML110, 192.168.11.11 R630-01, 192.168.11.12 R630-02)
|
||||
# Requires: SSH to Proxmox hosts (192.168.11.11 R630-01, 192.168.11.13 R630-03)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
@@ -28,8 +28,8 @@ log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
|
||||
# Order: restart one at a time; wait between so restarted node can sync from others
|
||||
# VMID : host
|
||||
VALIDATORS=(
|
||||
"1004:${PROXMOX_HOST_ML110:-192.168.11.10}"
|
||||
"1003:${PROXMOX_HOST_ML110:-192.168.11.10}"
|
||||
"1004:${PROXMOX_HOST_R630_03:-192.168.11.13}"
|
||||
"1003:${PROXMOX_HOST_R630_03:-192.168.11.13}"
|
||||
"1002:${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
||||
"1001:${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
||||
"1000:${PROXMOX_HOST_R630_01:-192.168.11.11}"
|
||||
|
||||
65
scripts/maintenance/proxmox-backup-all-running-ct.sh
Executable file
65
scripts/maintenance/proxmox-backup-all-running-ct.sh
Executable file
@@ -0,0 +1,65 @@
|
||||
#!/usr/bin/env bash
|
||||
# Fleet vzdump of all *running* LXC on a Proxmox node (intended: r630-01 via cron).
|
||||
# - Uses zstd (faster / often better than gzip for this workload; extension .tar.zst).
|
||||
# - Retention: do NOT use rough mtime deletes here — keep /etc/cron.d/vzdump-prune-local
|
||||
# (prune-proxmox-vzdump-dump.sh keep=2) as the cap on /var/lib/vz/dump.
|
||||
# Install (from repo, r630-01):
|
||||
# scp scripts/maintenance/proxmox-backup-all-running-ct.sh root@192.168.11.11:/usr/local/bin/proxmox-backup.sh
|
||||
# ssh root@192.168.11.11 'chmod 755 /usr/local/bin/proxmox-backup.sh'
|
||||
# Cron (root on r630-01, single daily run with lock):
|
||||
# 0 2 * * * /usr/bin/flock -n /var/lock/proxmox-backup.lock /usr/local/bin/proxmox-backup.sh
|
||||
# To skip a few very large VMIDs (space-separated) on that run only, use:
|
||||
# VZDUMP_SKIP_VMIDS="2101 2500" /usr/local/bin/proxmox-backup.sh
|
||||
# Or: VZDUMP_COMPRESS=gzip if you must match legacy .tar.gz (not recommended).
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
BACKUP_STORAGE="${BACKUP_STORAGE:-local}"
|
||||
LOG_DIR="${LOG_DIR:-/var/log/proxmox-backups}"
|
||||
LOG_FILE="${LOG_DIR}/backup_$(date +%Y%m%d).log"
|
||||
COMPRESS="${VZDUMP_COMPRESS:-zstd}"
|
||||
# Space-separated VMIDs to skip (e.g. test CTs)
|
||||
SKIP_VMIDS="${VZDUMP_SKIP_VMIDS:-}"
|
||||
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG_FILE"; }
|
||||
|
||||
is_skipped() {
|
||||
local v="$1"
|
||||
for s in $SKIP_VMIDS; do [[ "$v" == "$s" ]] && return 0; done
|
||||
return 1
|
||||
}
|
||||
|
||||
log "Starting backup job (compress=$COMPRESS storage=$BACKUP_STORAGE)..."
|
||||
|
||||
mapfile -t vmids < <(pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}')
|
||||
|
||||
if ((${#vmids[@]} == 0)); then
|
||||
log "No running containers."
|
||||
exit 0
|
||||
fi
|
||||
|
||||
log "VMIDs: ${vmids[*]}"
|
||||
|
||||
ok=0
|
||||
fail=0
|
||||
for vmid in "${vmids[@]}"; do
|
||||
is_skipped "$vmid" && { log "SKIP $vmid (VZDUMP_SKIP_VMIDS)"; continue; }
|
||||
log "vzdump $vmid..."
|
||||
if command -v ionice >/dev/null 2>&1; then
|
||||
run=(nice ionice -c2 -n7 vzdump)
|
||||
else
|
||||
run=(nice vzdump)
|
||||
fi
|
||||
if "${run[@]}" "$vmid" --storage "$BACKUP_STORAGE" --compress "$COMPRESS" --mode snapshot --quiet; then
|
||||
log "OK $vmid"
|
||||
ok=$((ok + 1))
|
||||
else
|
||||
log "FAIL $vmid (non-zero exit)"
|
||||
fail=$((fail + 1))
|
||||
fi
|
||||
done
|
||||
|
||||
log "Done. success=$ok fail=$fail. Retention: /usr/local/sbin/prune-proxmox-vzdump-dump.sh (weekly cron)."
|
||||
exit 0
|
||||
33
scripts/maintenance/prune-orphan-vzdump-logs.sh
Executable file
33
scripts/maintenance/prune-orphan-vzdump-logs.sh
Executable file
@@ -0,0 +1,33 @@
|
||||
#!/usr/bin/env bash
|
||||
# Remove old vzdump *.log files in /var/lib/vz/dump (partial/failed run leftovers).
|
||||
# Usage:
|
||||
# ./scripts/maintenance/prune-orphan-vzdump-logs.sh 192.168.11.12
|
||||
# ./scripts/maintenance/prune-orphan-vzdump-logs.sh 192.168.11.12 --apply
|
||||
# Env: MIN_AGE_DAYS=90 VZDUMP_DIR=/var/lib/vz/dump
|
||||
#
|
||||
set -euo pipefail
|
||||
MIN_AGE_DAYS="${MIN_AGE_DAYS:-90}"
|
||||
DUMP="${VZDUMP_DIR:-/var/lib/vz/dump}"
|
||||
HOST="${1:-}"
|
||||
APPLY=0
|
||||
[[ "${2:-}" == "--apply" ]] && APPLY=1
|
||||
|
||||
if [[ -z "$HOST" ]]; then
|
||||
echo "Usage: $0 <pve_ip> [--apply]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
ssh -o ConnectTimeout=10 -o BatchMode=yes -o StrictHostKeyChecking=no "root@$HOST" \
|
||||
MIN_AGE_DAYS="$MIN_AGE_DAYS" DUMP="$DUMP" APPLY="$APPLY" 'bash' <<'NODERUN'
|
||||
set -euo pipefail
|
||||
c=$(find "$DUMP" -maxdepth 1 -name "vzdump-*.log" -mtime "+${MIN_AGE_DAYS}" 2>/dev/null | wc -l)
|
||||
echo "[$(hostname)] $DUMP: $c log file(s) mtime +${MIN_AGE_DAYS}d"
|
||||
if (( c == 0 )); then exit 0; fi
|
||||
if [[ "$APPLY" == "1" ]]; then
|
||||
find "$DUMP" -maxdepth 1 -name "vzdump-*.log" -mtime "+${MIN_AGE_DAYS}" -print -delete
|
||||
echo "Deleted."
|
||||
else
|
||||
find "$DUMP" -maxdepth 1 -name "vzdump-*.log" -mtime "+${MIN_AGE_DAYS}" -ls 2>/dev/null | head -20
|
||||
echo "Dry-run. Re-run with same host and --apply"
|
||||
fi
|
||||
NODERUN
|
||||
127
scripts/maintenance/prune-proxmox-vzdump-dump.sh
Executable file
127
scripts/maintenance/prune-proxmox-vzdump-dump.sh
Executable file
@@ -0,0 +1,127 @@
|
||||
#!/usr/bin/env bash
|
||||
# Prune old Proxmox vzdump archives under /var/lib/vz/dump (the "local" dir storage).
|
||||
# Keeps the N newest archive per VMID for each family:
|
||||
# - vzdump-lxc-<vmid>-*.(tar.gz|tar.zst|vma.zst|vma.gz)
|
||||
# - vzdump-qemu-<vmid>-*.(tar.gz|tar.zst|vma.zst|vma.gz)
|
||||
# - vzdump-<vmid>-<epoch>.(tar.gz|tar.zst) (legacy naming without lxc/qemu)
|
||||
# Removes matching .log / .notes sidecars when removing an archive.
|
||||
#
|
||||
# Run ON the Proxmox node as root, or via SSH:
|
||||
# ssh root@192.168.11.11 'bash -s' < scripts/maintenance/prune-proxmox-vzdump-dump.sh -- 2
|
||||
#
|
||||
# Args: [KEEP] (default 2). Env: VZDUMP_DIR=/var/lib/vz/dump
|
||||
#
|
||||
set -euo pipefail
|
||||
|
||||
KEEP="${1:-2}"
|
||||
DUMP="${VZDUMP_DIR:-/var/lib/vz/dump}"
|
||||
|
||||
if ! [[ "$KEEP" =~ ^[0-9]+$ ]] || ((KEEP < 1)); then
|
||||
echo "Usage: $0 [KEEP>=1]" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "$DUMP" || {
|
||||
echo "Cannot cd to $DUMP" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
shopt -s nullglob
|
||||
removed=0
|
||||
|
||||
# Sidecars for archive basename $1 (path without extension chain handled per type)
|
||||
remove_sidecars() {
|
||||
local base="$1"
|
||||
rm -f -- "${base}.log" "${base}.notes" "${base}.notes.zst" 2>/dev/null || true
|
||||
}
|
||||
|
||||
# $1 = prefix e.g. vzdump-lxc, $2 = vmid
|
||||
prune_family_globs() {
|
||||
local prefix="$1"
|
||||
local vmid="$2"
|
||||
local -a archives=()
|
||||
local f n i base
|
||||
for f in \
|
||||
"${prefix}-${vmid}-"*.tar.gz \
|
||||
"${prefix}-${vmid}-"*.tar.zst \
|
||||
"${prefix}-${vmid}-"*.vma.zst \
|
||||
"${prefix}-${vmid}-"*.vma.gz; do
|
||||
[[ -f "$f" ]] || continue
|
||||
archives+=("$f")
|
||||
done
|
||||
((${#archives[@]} == 0)) && return 0
|
||||
mapfile -t sorted < <(ls -t "${archives[@]}" 2>/dev/null)
|
||||
n=${#sorted[@]}
|
||||
((n > KEEP)) || return 0
|
||||
for ((i = KEEP; i < n; i++)); do
|
||||
f="${sorted[i]}"
|
||||
base="${f%.tar.gz}"
|
||||
base="${base%.tar.zst}"
|
||||
base="${base%.vma.zst}"
|
||||
base="${base%.vma.gz}"
|
||||
rm -f -- "$f"
|
||||
remove_sidecars "$base"
|
||||
((removed += 1)) || true
|
||||
done
|
||||
}
|
||||
|
||||
# Legacy: vzdump-<vmid>-<digits>.tar.*
|
||||
prune_legacy_vmid() {
|
||||
local vmid="$1"
|
||||
local -a archives=()
|
||||
local f n i base
|
||||
for f in vzdump-"${vmid}"-*.tar.gz vzdump-"${vmid}"-*.tar.zst vzdump-"${vmid}"-*.vma.zst vzdump-"${vmid}"-*.vma.gz; do
|
||||
[[ -f "$f" ]] || continue
|
||||
[[ "$f" == vzdump-lxc-* || "$f" == vzdump-qemu-* ]] && continue
|
||||
archives+=("$f")
|
||||
done
|
||||
((${#archives[@]} == 0)) && return 0
|
||||
mapfile -t sorted < <(ls -t "${archives[@]}" 2>/dev/null)
|
||||
n=${#sorted[@]}
|
||||
((n > KEEP)) || return 0
|
||||
for ((i = KEEP; i < n; i++)); do
|
||||
f="${sorted[i]}"
|
||||
base="${f%.tar.gz}"
|
||||
base="${base%.tar.zst}"
|
||||
base="${base%.vma.zst}"
|
||||
base="${base%.vma.gz}"
|
||||
rm -f -- "$f"
|
||||
remove_sidecars "$base"
|
||||
((removed += 1)) || true
|
||||
done
|
||||
}
|
||||
|
||||
declare -A vmid_lxc=()
|
||||
declare -A vmid_qemu=()
|
||||
declare -A vmid_legacy=()
|
||||
|
||||
for f in vzdump-lxc-*; do
|
||||
[[ -f "$f" ]] || continue
|
||||
[[ "$f" =~ ^vzdump-lxc-([0-9]+)- ]] || continue
|
||||
vmid_lxc["${BASH_REMATCH[1]}"]=1
|
||||
done
|
||||
for f in vzdump-qemu-*; do
|
||||
[[ -f "$f" ]] || continue
|
||||
[[ "$f" =~ ^vzdump-qemu-([0-9]+)- ]] || continue
|
||||
vmid_qemu["${BASH_REMATCH[1]}"]=1
|
||||
done
|
||||
for f in vzdump-[0-9]*-*; do
|
||||
[[ -f "$f" ]] || continue
|
||||
[[ "$f" == vzdump-lxc-* || "$f" == vzdump-qemu-* ]] && continue
|
||||
[[ "$f" =~ \.(tar\.gz|tar\.zst|vma\.zst|vma\.gz)$ ]] || continue
|
||||
[[ "$f" =~ ^vzdump-([0-9]+)-[0-9_]+ ]] || continue
|
||||
vmid_legacy["${BASH_REMATCH[1]}"]=1
|
||||
done
|
||||
|
||||
for vmid in "${!vmid_lxc[@]}"; do
|
||||
prune_family_globs vzdump-lxc "$vmid"
|
||||
done
|
||||
for vmid in "${!vmid_qemu[@]}"; do
|
||||
prune_family_globs vzdump-qemu "$vmid"
|
||||
done
|
||||
for vmid in "${!vmid_legacy[@]}"; do
|
||||
prune_legacy_vmid "$vmid"
|
||||
done
|
||||
|
||||
echo "prune-proxmox-vzdump-dump: removed ${removed} archive(s); keep=${KEEP} newest per VMID in ${DUMP}"
|
||||
df -h "$DUMP" 2>/dev/null || df -h /var/lib/vz
|
||||
19
scripts/maintenance/verify-pve-cluster-health.sh
Executable file
19
scripts/maintenance/verify-pve-cluster-health.sh
Executable file
@@ -0,0 +1,19 @@
|
||||
#!/usr/bin/env bash
|
||||
# Quick post-maintenance check: corosync quorum and node list from one Proxmox host.
|
||||
# Usage: ./scripts/maintenance/verify-pve-cluster-health.sh [r630-01-ip]
|
||||
# Requires SSH root@host (BatchMode + key).
|
||||
#
|
||||
set -euo pipefail
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
# shellcheck source=/dev/null
|
||||
source "${SCRIPT_DIR}/../../config/ip-addresses.conf" 2>/dev/null || true
|
||||
|
||||
IP="${1:-${PROXMOX_HOST_R630_01:-192.168.11.11}}"
|
||||
SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=12 -o BatchMode=yes"
|
||||
|
||||
echo "=== pvecm status ($IP) ==="
|
||||
ssh $SSH_OPTS "root@$IP" "pvecm status; echo '---'; pvesh get /nodes --output-format json-pretty 2>/dev/null | head -80" || {
|
||||
echo "SSH or pvecm failed."
|
||||
exit 1
|
||||
}
|
||||
echo "OK. Expect: Quorate: Yes, and expected nodes online."
|
||||
Reference in New Issue
Block a user