chore: sync workspace — configs, docs, scripts, CI, pnpm, submodules
Some checks failed
Deploy to Phoenix / validate (push) Failing after 15s
Deploy to Phoenix / deploy (push) Has been skipped

- Submodule pins: dbis_core, cross-chain-pmm-lps, mcp-proxmox (local, push may be pending), metamask-integration, smom-dbis-138
- Atomic swap + cross-chain-pmm-lops-publish, deploy-portal workflow, phoenix deploy-targets, routing/aggregator matrices
- Docs, token-lists, forge proxy, phoenix API, runbooks, verify scripts

Made-with: Cursor
This commit is contained in:
defiQUG
2026-04-21 22:01:33 -07:00
parent e6bc7a6d7c
commit b8613905bd
231 changed files with 31657 additions and 2184 deletions

View File

@@ -0,0 +1,59 @@
#!/usr/bin/env bash
# Install prune-proxmox-vzdump-dump.sh on Proxmox hosts and schedule weekly cron (Sun 04:15).
# Loads config/ip-addresses.conf. Requires SSH root key access.
#
# Backup retention alignment:
# - pvesh get /cluster/backup — if you add Datacenter → Scheduled backups, set maxfiles
# to the same (or lower) as VZDUMP_PRUNE_KEEP so GUI backups and this prune do not fight.
# - Empty job list: retention for /var/lib/vz/dump is this weekly cron + ad-hoc vzdump.
#
# Usage:
# ./scripts/maintenance/deploy-vzdump-prune-cron-to-proxmox-nodes.sh [--dry-run]
#
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# shellcheck source=/dev/null
source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true
PROXMOX_SSH_USER="${PROXMOX_SSH_USER:-root}"
SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=10 -o BatchMode=yes"
PRUNE_LOCAL="${SCRIPT_DIR}/prune-proxmox-vzdump-dump.sh"
REMOTE_BIN="/usr/local/sbin/prune-proxmox-vzdump-dump.sh"
KEEP="${VZDUMP_PRUNE_KEEP:-2}"
HOSTS=(
"${PROXMOX_HOST_ML110:-192.168.11.10}"
"${PROXMOX_HOST_R630_01:-192.168.11.11}"
"${PROXMOX_HOST_R630_02:-192.168.11.12}"
)
DRY_RUN=false
[[ "${1:-}" == "--dry-run" ]] && DRY_RUN=true
if [[ ! -f "$PRUNE_LOCAL" ]]; then
echo "Missing $PRUNE_LOCAL" >&2
exit 1
fi
CRON_FILE="/etc/cron.d/vzdump-prune-local"
for ip in "${HOSTS[@]}"; do
echo "=== ${PROXMOX_SSH_USER}@${ip} ==="
if $DRY_RUN; then
echo "Would: scp $PRUNE_LOCAL -> ${ip}:${REMOTE_BIN}"
echo "Would: install ${CRON_FILE}"
continue
fi
scp $SSH_OPTS "$PRUNE_LOCAL" "${PROXMOX_SSH_USER}@${ip}:${REMOTE_BIN}"
{
echo 'SHELL=/bin/sh'
echo 'PATH=/usr/sbin:/usr/bin:/sbin:/bin'
echo "# vzdump retention — see repo scripts/maintenance/prune-proxmox-vzdump-dump.sh"
echo "15 4 * * 0 root ${REMOTE_BIN} ${KEEP} >>/var/log/vzdump-prune.log 2>&1"
} | ssh $SSH_OPTS "${PROXMOX_SSH_USER}@${ip}" "chmod 755 ${REMOTE_BIN} && cat > ${CRON_FILE} && chmod 644 ${CRON_FILE}"
echo "Installed ${REMOTE_BIN} and ${CRON_FILE} (keep=${KEEP})."
done
echo "Done. Logs on nodes: /var/log/vzdump-prune.log (after first run)."

View File

@@ -5,7 +5,7 @@
# the rest stay at head so the restarted node syncs quickly and consensus can continue.
#
# Usage: ./scripts/maintenance/fix-block-production-staggered-restart.sh [--dry-run]
# Requires: SSH to Proxmox hosts (192.168.11.10 ML110, 192.168.11.11 R630-01, 192.168.11.12 R630-02)
# Requires: SSH to Proxmox hosts (192.168.11.11 R630-01, 192.168.11.13 R630-03)
set -euo pipefail
@@ -28,8 +28,8 @@ log_warn() { echo -e "${YELLOW}[WARN]${NC} $1"; }
# Order: restart one at a time; wait between so restarted node can sync from others
# VMID : host
VALIDATORS=(
"1004:${PROXMOX_HOST_ML110:-192.168.11.10}"
"1003:${PROXMOX_HOST_ML110:-192.168.11.10}"
"1004:${PROXMOX_HOST_R630_03:-192.168.11.13}"
"1003:${PROXMOX_HOST_R630_03:-192.168.11.13}"
"1002:${PROXMOX_HOST_R630_01:-192.168.11.11}"
"1001:${PROXMOX_HOST_R630_01:-192.168.11.11}"
"1000:${PROXMOX_HOST_R630_01:-192.168.11.11}"

View File

@@ -0,0 +1,65 @@
#!/usr/bin/env bash
# Fleet vzdump of all *running* LXC on a Proxmox node (intended: r630-01 via cron).
# - Uses zstd (faster / often better than gzip for this workload; extension .tar.zst).
# - Retention: do NOT use rough mtime deletes here — keep /etc/cron.d/vzdump-prune-local
# (prune-proxmox-vzdump-dump.sh keep=2) as the cap on /var/lib/vz/dump.
# Install (from repo, r630-01):
# scp scripts/maintenance/proxmox-backup-all-running-ct.sh root@192.168.11.11:/usr/local/bin/proxmox-backup.sh
# ssh root@192.168.11.11 'chmod 755 /usr/local/bin/proxmox-backup.sh'
# Cron (root on r630-01, single daily run with lock):
# 0 2 * * * /usr/bin/flock -n /var/lock/proxmox-backup.lock /usr/local/bin/proxmox-backup.sh
# To skip a few very large VMIDs (space-separated) on that run only, use:
# VZDUMP_SKIP_VMIDS="2101 2500" /usr/local/bin/proxmox-backup.sh
# Or: VZDUMP_COMPRESS=gzip if you must match legacy .tar.gz (not recommended).
#
set -euo pipefail
BACKUP_STORAGE="${BACKUP_STORAGE:-local}"
LOG_DIR="${LOG_DIR:-/var/log/proxmox-backups}"
LOG_FILE="${LOG_DIR}/backup_$(date +%Y%m%d).log"
COMPRESS="${VZDUMP_COMPRESS:-zstd}"
# Space-separated VMIDs to skip (e.g. test CTs)
SKIP_VMIDS="${VZDUMP_SKIP_VMIDS:-}"
mkdir -p "$LOG_DIR"
log() { echo "[$(date '+%Y-%m-%d %H:%M:%S')] $*" | tee -a "$LOG_FILE"; }
is_skipped() {
local v="$1"
for s in $SKIP_VMIDS; do [[ "$v" == "$s" ]] && return 0; done
return 1
}
log "Starting backup job (compress=$COMPRESS storage=$BACKUP_STORAGE)..."
mapfile -t vmids < <(pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}')
if ((${#vmids[@]} == 0)); then
log "No running containers."
exit 0
fi
log "VMIDs: ${vmids[*]}"
ok=0
fail=0
for vmid in "${vmids[@]}"; do
is_skipped "$vmid" && { log "SKIP $vmid (VZDUMP_SKIP_VMIDS)"; continue; }
log "vzdump $vmid..."
if command -v ionice >/dev/null 2>&1; then
run=(nice ionice -c2 -n7 vzdump)
else
run=(nice vzdump)
fi
if "${run[@]}" "$vmid" --storage "$BACKUP_STORAGE" --compress "$COMPRESS" --mode snapshot --quiet; then
log "OK $vmid"
ok=$((ok + 1))
else
log "FAIL $vmid (non-zero exit)"
fail=$((fail + 1))
fi
done
log "Done. success=$ok fail=$fail. Retention: /usr/local/sbin/prune-proxmox-vzdump-dump.sh (weekly cron)."
exit 0

View File

@@ -0,0 +1,33 @@
#!/usr/bin/env bash
# Remove old vzdump *.log files in /var/lib/vz/dump (partial/failed run leftovers).
# Usage:
# ./scripts/maintenance/prune-orphan-vzdump-logs.sh 192.168.11.12
# ./scripts/maintenance/prune-orphan-vzdump-logs.sh 192.168.11.12 --apply
# Env: MIN_AGE_DAYS=90 VZDUMP_DIR=/var/lib/vz/dump
#
set -euo pipefail
MIN_AGE_DAYS="${MIN_AGE_DAYS:-90}"
DUMP="${VZDUMP_DIR:-/var/lib/vz/dump}"
HOST="${1:-}"
APPLY=0
[[ "${2:-}" == "--apply" ]] && APPLY=1
if [[ -z "$HOST" ]]; then
echo "Usage: $0 <pve_ip> [--apply]" >&2
exit 1
fi
ssh -o ConnectTimeout=10 -o BatchMode=yes -o StrictHostKeyChecking=no "root@$HOST" \
MIN_AGE_DAYS="$MIN_AGE_DAYS" DUMP="$DUMP" APPLY="$APPLY" 'bash' <<'NODERUN'
set -euo pipefail
c=$(find "$DUMP" -maxdepth 1 -name "vzdump-*.log" -mtime "+${MIN_AGE_DAYS}" 2>/dev/null | wc -l)
echo "[$(hostname)] $DUMP: $c log file(s) mtime +${MIN_AGE_DAYS}d"
if (( c == 0 )); then exit 0; fi
if [[ "$APPLY" == "1" ]]; then
find "$DUMP" -maxdepth 1 -name "vzdump-*.log" -mtime "+${MIN_AGE_DAYS}" -print -delete
echo "Deleted."
else
find "$DUMP" -maxdepth 1 -name "vzdump-*.log" -mtime "+${MIN_AGE_DAYS}" -ls 2>/dev/null | head -20
echo "Dry-run. Re-run with same host and --apply"
fi
NODERUN

View File

@@ -0,0 +1,127 @@
#!/usr/bin/env bash
# Prune old Proxmox vzdump archives under /var/lib/vz/dump (the "local" dir storage).
# Keeps the N newest archive per VMID for each family:
# - vzdump-lxc-<vmid>-*.(tar.gz|tar.zst|vma.zst|vma.gz)
# - vzdump-qemu-<vmid>-*.(tar.gz|tar.zst|vma.zst|vma.gz)
# - vzdump-<vmid>-<epoch>.(tar.gz|tar.zst) (legacy naming without lxc/qemu)
# Removes matching .log / .notes sidecars when removing an archive.
#
# Run ON the Proxmox node as root, or via SSH:
# ssh root@192.168.11.11 'bash -s' < scripts/maintenance/prune-proxmox-vzdump-dump.sh -- 2
#
# Args: [KEEP] (default 2). Env: VZDUMP_DIR=/var/lib/vz/dump
#
set -euo pipefail
KEEP="${1:-2}"
DUMP="${VZDUMP_DIR:-/var/lib/vz/dump}"
if ! [[ "$KEEP" =~ ^[0-9]+$ ]] || ((KEEP < 1)); then
echo "Usage: $0 [KEEP>=1]" >&2
exit 1
fi
cd "$DUMP" || {
echo "Cannot cd to $DUMP" >&2
exit 1
}
shopt -s nullglob
removed=0
# Sidecars for archive basename $1 (path without extension chain handled per type)
remove_sidecars() {
local base="$1"
rm -f -- "${base}.log" "${base}.notes" "${base}.notes.zst" 2>/dev/null || true
}
# $1 = prefix e.g. vzdump-lxc, $2 = vmid
prune_family_globs() {
local prefix="$1"
local vmid="$2"
local -a archives=()
local f n i base
for f in \
"${prefix}-${vmid}-"*.tar.gz \
"${prefix}-${vmid}-"*.tar.zst \
"${prefix}-${vmid}-"*.vma.zst \
"${prefix}-${vmid}-"*.vma.gz; do
[[ -f "$f" ]] || continue
archives+=("$f")
done
((${#archives[@]} == 0)) && return 0
mapfile -t sorted < <(ls -t "${archives[@]}" 2>/dev/null)
n=${#sorted[@]}
((n > KEEP)) || return 0
for ((i = KEEP; i < n; i++)); do
f="${sorted[i]}"
base="${f%.tar.gz}"
base="${base%.tar.zst}"
base="${base%.vma.zst}"
base="${base%.vma.gz}"
rm -f -- "$f"
remove_sidecars "$base"
((removed += 1)) || true
done
}
# Legacy: vzdump-<vmid>-<digits>.tar.*
prune_legacy_vmid() {
local vmid="$1"
local -a archives=()
local f n i base
for f in vzdump-"${vmid}"-*.tar.gz vzdump-"${vmid}"-*.tar.zst vzdump-"${vmid}"-*.vma.zst vzdump-"${vmid}"-*.vma.gz; do
[[ -f "$f" ]] || continue
[[ "$f" == vzdump-lxc-* || "$f" == vzdump-qemu-* ]] && continue
archives+=("$f")
done
((${#archives[@]} == 0)) && return 0
mapfile -t sorted < <(ls -t "${archives[@]}" 2>/dev/null)
n=${#sorted[@]}
((n > KEEP)) || return 0
for ((i = KEEP; i < n; i++)); do
f="${sorted[i]}"
base="${f%.tar.gz}"
base="${base%.tar.zst}"
base="${base%.vma.zst}"
base="${base%.vma.gz}"
rm -f -- "$f"
remove_sidecars "$base"
((removed += 1)) || true
done
}
declare -A vmid_lxc=()
declare -A vmid_qemu=()
declare -A vmid_legacy=()
for f in vzdump-lxc-*; do
[[ -f "$f" ]] || continue
[[ "$f" =~ ^vzdump-lxc-([0-9]+)- ]] || continue
vmid_lxc["${BASH_REMATCH[1]}"]=1
done
for f in vzdump-qemu-*; do
[[ -f "$f" ]] || continue
[[ "$f" =~ ^vzdump-qemu-([0-9]+)- ]] || continue
vmid_qemu["${BASH_REMATCH[1]}"]=1
done
for f in vzdump-[0-9]*-*; do
[[ -f "$f" ]] || continue
[[ "$f" == vzdump-lxc-* || "$f" == vzdump-qemu-* ]] && continue
[[ "$f" =~ \.(tar\.gz|tar\.zst|vma\.zst|vma\.gz)$ ]] || continue
[[ "$f" =~ ^vzdump-([0-9]+)-[0-9_]+ ]] || continue
vmid_legacy["${BASH_REMATCH[1]}"]=1
done
for vmid in "${!vmid_lxc[@]}"; do
prune_family_globs vzdump-lxc "$vmid"
done
for vmid in "${!vmid_qemu[@]}"; do
prune_family_globs vzdump-qemu "$vmid"
done
for vmid in "${!vmid_legacy[@]}"; do
prune_legacy_vmid "$vmid"
done
echo "prune-proxmox-vzdump-dump: removed ${removed} archive(s); keep=${KEEP} newest per VMID in ${DUMP}"
df -h "$DUMP" 2>/dev/null || df -h /var/lib/vz

View File

@@ -0,0 +1,19 @@
#!/usr/bin/env bash
# Quick post-maintenance check: corosync quorum and node list from one Proxmox host.
# Usage: ./scripts/maintenance/verify-pve-cluster-health.sh [r630-01-ip]
# Requires SSH root@host (BatchMode + key).
#
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
# shellcheck source=/dev/null
source "${SCRIPT_DIR}/../../config/ip-addresses.conf" 2>/dev/null || true
IP="${1:-${PROXMOX_HOST_R630_01:-192.168.11.11}}"
SSH_OPTS="-o StrictHostKeyChecking=no -o ConnectTimeout=12 -o BatchMode=yes"
echo "=== pvecm status ($IP) ==="
ssh $SSH_OPTS "root@$IP" "pvecm status; echo '---'; pvesh get /nodes --output-format json-pretty 2>/dev/null | head -80" || {
echo "SSH or pvecm failed."
exit 1
}
echo "OK. Expect: Quorate: Yes, and expected nodes online."