Files
proxmox/scripts/verify/run-phase1-discovery.sh
defiQUG 6f53323eae
All checks were successful
Deploy to Phoenix / deploy (push) Successful in 6s
Finalize DBIS infra verification and runtime baselines
2026-03-28 19:18:32 -07:00

219 lines
6.8 KiB
Bash
Executable File
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env bash
# Phase 1 — Reality mapping (read-only): compose Proxmox/Besu audits and optional
# Hyperledger CT probes into a timestamped report under reports/phase1-discovery/.
#
# Usage (repo root, LAN + SSH to Proxmox recommended):
# bash scripts/verify/run-phase1-discovery.sh
# HYPERLEDGER_PROBE=1 bash scripts/verify/run-phase1-discovery.sh # SSH pct exec smoke checks on r630-02
#
# Env: PROXMOX_HOSTS, SSH_USER, SSH_OPTS (same as audit-proxmox-operational-template.sh)
# HYPERLEDGER_PROBE=1 to run optional Fabric/Indy/FireFly container checks (requires SSH to r630-02)
set -uo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
# shellcheck source=/dev/null
source "$PROJECT_ROOT/config/ip-addresses.conf" 2>/dev/null || true
REPORT_DIR="${REPORT_DIR:-$PROJECT_ROOT/reports/phase1-discovery}"
STAMP="$(date -u +%Y%m%d_%H%M%S)"
MD="$REPORT_DIR/phase1-discovery-${STAMP}.md"
LOG="$REPORT_DIR/phase1-discovery-${STAMP}.log"
mkdir -p "$REPORT_DIR"
SSH_USER="${SSH_USER:-root}"
SSH_OPTS="${SSH_OPTS:--o BatchMode=yes -o ConnectTimeout=6 -o StrictHostKeyChecking=accept-new}"
R630_02="${PROXMOX_HOST_R630_02:-192.168.11.12}"
append_cmd() {
local title="$1"
local severity="${2:-info}"
shift 2 || true
local rc=0
local tmp
tmp="$(mktemp)"
"$@" >"$tmp" 2>&1
rc=$?
{
echo ""
echo "## $title"
echo ""
echo '```text'
cat "$tmp"
if (( rc != 0 )); then
echo "[exit $rc]"
fi
echo '```'
} | tee -a "$MD" >>"$LOG"
rm -f "$tmp"
if (( rc != 0 )) && [[ "$severity" == "critical" ]]; then
PHASE1_CRITICAL_FAILURES+=("$title (exit $rc)")
fi
}
PHASE1_CRITICAL_FAILURES=()
{
echo "# Phase 1 discovery report"
echo ""
echo "**Generated (UTC):** $(date -u +%Y-%m-%dT%H:%M:%SZ)"
echo ""
echo "**Runbook:** [docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md](../../docs/03-deployment/PHASE1_DISCOVERY_RUNBOOK.md)"
echo ""
echo "**Doctrine:** [dbis_chain_138_technical_master_plan.md](../../dbis_chain_138_technical_master_plan.md) (Sections 3, 1920)"
echo ""
echo "## Dependency graph (logical)"
echo ""
echo "Same diagram as the runbook; edges reflect documented traffic flow, not live packet capture."
echo ""
cat <<'MERMAID'
```mermaid
flowchart TB
subgraph edge [EdgeIngress]
CF[Cloudflare_DNS]
NPM[NPMplus_LXC]
end
subgraph besu [Chain138_Besu]
RPCpub[RPC_public_2201]
RPCcore[RPC_core_2101]
Val[Validators_1000_1004]
Sen[Sentries_1500_1508]
end
subgraph observe [Observability]
BS[Blockscout_5000]
end
subgraph relay [CrossChain]
CCIP[CCIP_relay_r63001_host]
end
subgraph dlt [Hyperledger_optional]
FF[FireFly_6200_6201]
Fab[Fabric_6000_plus]
Indy[Indy_6400_plus]
end
CF --> NPM
NPM --> RPCpub
NPM --> RPCcore
NPM --> BS
RPCpub --> Sen
RPCcore --> Sen
Sen --> Val
CCIP --> RPCpub
FF --> Fab
FF --> Indy
```
MERMAID
} >"$MD"
touch "$LOG"
append_cmd "Proxmox template vs live VMID audit" critical bash "$PROJECT_ROOT/scripts/verify/audit-proxmox-operational-template.sh"
PROXMOX_HOSTS="${PROXMOX_HOSTS:-${PROXMOX_HOST_ML110:-192.168.11.10} ${PROXMOX_HOST_R630_01:-192.168.11.11} $R630_02}"
append_cmd "Proxmox cluster status (pvecm) per host" critical bash -c "
fail=0
for h in $PROXMOX_HOSTS; do
echo '=== '"\$h"' ==='
ssh $SSH_OPTS ${SSH_USER}@\"\$h\" 'pvecm status 2>&1' || fail=1
echo ''
done
exit \$fail
"
append_cmd "Proxmox storage (pvesm status) per host" critical bash -c "
fail=0
for h in $PROXMOX_HOSTS; do
echo '=== '"\$h"' ==='
ssh $SSH_OPTS ${SSH_USER}@\"\$h\" 'pvesm status 2>&1 | head -80' || fail=1
echo ''
done
exit \$fail
"
append_cmd "Live pct/qm lists per host" critical bash -c "
fail=0
for h in $PROXMOX_HOSTS; do
echo '=== '"\$h"' ==='
ssh $SSH_OPTS ${SSH_USER}@\"\$h\" 'echo PCT:; pct list 2>&1; echo VM:; qm list 2>&1' || fail=1
echo ''
done
exit \$fail
"
if command -v curl &>/dev/null; then
append_cmd "Chain 138 RPC quick probe (core, LAN)" critical bash -c "
curl -sS --connect-timeout 4 -X POST -H 'Content-Type: application/json' \
--data '{\"jsonrpc\":\"2.0\",\"method\":\"eth_chainId\",\"params\":[],\"id\":1}' \
\"http://${IP_BESU_RPC_CORE_1:-192.168.11.211}:8545\" || echo 'curl failed'
"
fi
append_cmd "Besu RPC health script (may fail off-LAN)" critical bash -c "
bash \"$PROJECT_ROOT/scripts/verify/check-chain138-rpc-health.sh\"
"
append_cmd "Besu enodes / IPs verify (may fail off-LAN)" critical bash -c "
bash \"$PROJECT_ROOT/scripts/verify/verify-besu-enodes-and-ips.sh\"
"
if [[ "${HYPERLEDGER_PROBE:-}" == "1" ]]; then
append_cmd "Hyperledger CT smoke (r630-02; pct exec)" critical bash -c "
ssh $SSH_OPTS ${SSH_USER}@$R630_02 '
for id in 6200 6201 6000 6001 6002 6400 6401 6402; do
echo \"=== VMID \$id status ===\"
pct status \$id 2>&1 || true
if pct status \$id 2>/dev/null | grep -q running; then
pct exec \$id -- bash -lc \"command -v docker >/dev/null && docker ps --format 'table {{.Names}}\t{{.Status}}' 2>/dev/null | head -10 || true; command -v systemctl >/dev/null && systemctl list-units --type=service --state=running --no-pager 2>/dev/null | head -20 || true; ss -ltnp 2>/dev/null | head -20 || true\" 2>&1 || echo \"[exec failed]\"
fi
echo \"\"
done
'
"
else
{
echo ""
echo "## Hyperledger CT smoke (skipped)"
echo ""
echo "Set \`HYPERLEDGER_PROBE=1\` to SSH to r630-02 and run \`pct status/exec\` on 6200, 6201, 6000, 6001, 6002, 6400, 6401, 6402."
echo ""
} >>"$MD"
fi
{
echo ""
echo "## Configuration snapshot pointers (no secrets in repo)"
echo ""
echo "- \`config/proxmox-operational-template.json\`"
echo "- \`config/ip-addresses.conf\`"
echo "- \`docs/04-configuration/ALL_VMIDS_ENDPOINTS.md\`"
echo ""
echo "## Next steps"
echo ""
echo "1. Reconcile **Entity owner** / **Region** in [DBIS_NODE_ROLE_MATRIX.md](../../docs/02-architecture/DBIS_NODE_ROLE_MATRIX.md)."
echo "2. If ML110 row shows Proxmox + workloads, update [PHYSICAL_HARDWARE_INVENTORY.md](../../docs/02-architecture/PHYSICAL_HARDWARE_INVENTORY.md) vs [NETWORK_CONFIGURATION_MASTER.md](../../docs/11-references/NETWORK_CONFIGURATION_MASTER.md)."
echo ""
if ((${#PHASE1_CRITICAL_FAILURES[@]} > 0)); then
echo "## Critical failure summary"
echo ""
for failure in "${PHASE1_CRITICAL_FAILURES[@]}"; do
echo "- $failure"
done
echo ""
echo "This report is complete as evidence capture, but the discovery run is **not** a pass. Re-run from LAN with working SSH/RPC access until the critical failures clear."
else
echo "## Critical failure summary"
echo ""
echo "- none"
echo ""
echo "All critical discovery checks completed successfully for this run."
fi
echo ""
} >>"$MD"
echo "Wrote $MD"
echo "Full log mirror: $LOG"
ls -la "$MD" "$LOG"
if ((${#PHASE1_CRITICAL_FAILURES[@]} > 0)); then
exit 1
fi