# IT ops Phase 0 — live inventory scripts (implementation appendix) **Purpose:** Canonical copy of Phase 0 scripts (also on disk under `scripts/it-ops/`). Use this page if you need to restore or review inline. **Spec:** [SANKOFA_IT_OPERATIONS_CONTROLLER_SPEC.md](../02-architecture/SANKOFA_IT_OPERATIONS_CONTROLLER_SPEC.md) section 5.1 and Phase 0. ## File layout | Path | Role | |------|------| | `scripts/it-ops/lib/collect_inventory_remote.py` | Run on PVE via SSH stdin (`python3 -`) | | `scripts/it-ops/compute_ipam_drift.py` | Local: merge live JSON + `config/ip-addresses.conf` + **`ALL_VMIDS_ENDPOINTS.md`** pipe tables (`--all-vmids-md`) | | `scripts/it-ops/export-live-inventory-and-drift.sh` | Orchestrator: ping seed, SSH, write `reports/status/` | | `services/sankofa-it-read-api/server.py` | Read-only HTTP: `/v1/inventory/live`, `/v1/inventory/drift` | | `.github/workflows/live-inventory-drift.yml` | `workflow_dispatch` + weekly (graceful skip without LAN) | **Exit codes (`compute_ipam_drift.py`):** **2** = same LAN IP used by guests with **different** names (address conflict). **0** otherwise. Guests that share an IP but share the **same** hostname (e.g. clone pairs) are listed under **`same_name_duplicate_ip_guests`** only (informational). **`vmid_ip_mismatch_live_vs_all_vmids_doc`** is informational (docs often lag live CT config). --- ## `scripts/it-ops/lib/collect_inventory_remote.py` ```python #!/usr/bin/env python3 """Run ON a Proxmox cluster node (as root). Stdout: JSON live guest inventory.""" from __future__ import annotations import json import re import subprocess import sys from datetime import datetime, timezone def _run(cmd: list[str]) -> str: return subprocess.check_output(cmd, text=True, stderr=subprocess.DEVNULL) def _extract_ip_from_net_line(line: str) -> str | None: m = re.search(r"ip=([0-9.]+)", line) return m.group(1) if m else None def _read_config(path: str) -> str: try: with open(path, encoding="utf-8", errors="replace") as f: return f.read() except OSError: return "" def main() -> None: collected_at = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") try: raw = _run( ["pvesh", "get", "/cluster/resources", "--output-format", "json"] ) resources = json.loads(raw) except (subprocess.CalledProcessError, json.JSONDecodeError) as e: json.dump( { "collected_at": collected_at, "error": f"pvesh_cluster_resources_failed: {e}", "guests": [], }, sys.stdout, indent=2, ) return guests: list[dict] = [] for r in resources: t = r.get("type") if t not in ("lxc", "qemu"): continue vmid = r.get("vmid") node = r.get("node") if vmid is None or not node: continue vmid_s = str(vmid) name = r.get("name") or "" status = r.get("status") or "" if t == "lxc": cfg_path = f"/etc/pve/nodes/{node}/lxc/{vmid_s}.conf" else: cfg_path = f"/etc/pve/nodes/{node}/qemu-server/{vmid_s}.conf" body = _read_config(cfg_path) ip = "" for line in body.splitlines(): if line.startswith("net0:"): got = _extract_ip_from_net_line(line) if got: ip = got break if not ip and t == "qemu": for line in body.splitlines(): if line.startswith("ipconfig0:"): got = _extract_ip_from_net_line(line) if got: ip = got break if not ip and t == "qemu": for line in body.splitlines(): if line.startswith("net0:"): got = _extract_ip_from_net_line(line) if got: ip = got break guests.append( { "vmid": vmid_s, "type": t, "node": str(node), "name": name, "status": status, "ip": ip, "config_path": cfg_path, } ) out = { "collected_at": collected_at, "guests": sorted(guests, key=lambda g: int(g["vmid"])), } json.dump(out, sys.stdout, indent=2) if __name__ == "__main__": main() ``` --- ## `scripts/it-ops/compute_ipam_drift.py` ```python #!/usr/bin/env python3 """Merge live JSON with config/ip-addresses.conf; write live_inventory.json + drift.json.""" from __future__ import annotations import argparse import json import re import sys from pathlib import Path IPV4_RE = re.compile( r"(? tuple[dict[str, str], set[str]]: var_map: dict[str, str] = {} all_ips: set[str] = set() if not path.is_file(): return var_map, all_ips for line in path.read_text(encoding="utf-8", errors="replace").splitlines(): s = line.strip() if not s or s.startswith("#") or "=" not in s: continue key, _, val = s.partition("=") key = key.strip() val = val.strip() if val.startswith('"') and val.endswith('"'): val = val[1:-1] elif val.startswith("'") and val.endswith("'"): val = val[1:-1] var_map[key] = val for m in IPV4_RE.findall(val): all_ips.add(m) return var_map, all_ips def hypervisor_related_keys(var_map: dict[str, str]) -> set[str]: keys = set() for k in var_map: ku = k.upper() if any( x in ku for x in ( "PROXMOX_HOST", "PROXMOX_ML110", "PROXMOX_R630", "PROXMOX_R750", "WAN_AGGREGATOR", "NETWORK_GATEWAY", "UDM_PRO", "PUBLIC_IP_GATEWAY", "PUBLIC_IP_ER605", ) ): keys.add(k) return keys def main() -> None: ap = argparse.ArgumentParser() ap.add_argument("--live", type=Path, help="live JSON file (default stdin)") ap.add_argument("--ip-conf", type=Path, default=Path("config/ip-addresses.conf")) ap.add_argument("--out-dir", type=Path, required=True) args = ap.parse_args() live_raw = args.live.read_text(encoding="utf-8") if args.live else sys.stdin.read() live = json.loads(live_raw) guests = live.get("guests") or [] var_map, conf_ips = parse_ip_addresses_conf(args.ip_conf) hyp_keys = hypervisor_related_keys(var_map) hyp_ips: set[str] = set() for k in hyp_keys: if k not in var_map: continue for m in IPV4_RE.findall(var_map[k]): hyp_ips.add(m) ip_to_vmids: dict[str, list[str]] = {} for g in guests: ip = (g.get("ip") or "").strip() if not ip: continue ip_to_vmids.setdefault(ip, []).append(g.get("vmid", "?")) duplicate_ips = {ip: vms for ip, vms in ip_to_vmids.items() if len(vms) > 1} guest_ip_set = set(ip_to_vmids.keys()) conf_only = sorted(conf_ips - guest_ip_set - hyp_ips) live_only = sorted(guest_ip_set - conf_ips) drift = { "collected_at": live.get("collected_at"), "guest_count": len(guests), "duplicate_ips": duplicate_ips, "guest_ips_not_in_ip_addresses_conf": live_only, "ip_addresses_conf_ips_not_on_guests": conf_only, "hypervisor_and_infra_ips_excluded_from_guest_match": sorted(hyp_ips), "notes": [], } if live.get("error"): drift["notes"].append(live["error"]) inv_out = { "collected_at": live.get("collected_at"), "source": "proxmox_cluster_pvesh_plus_config", "guests": guests, } args.out_dir.mkdir(parents=True, exist_ok=True) (args.out_dir / "live_inventory.json").write_text( json.dumps(inv_out, indent=2), encoding="utf-8" ) (args.out_dir / "drift.json").write_text( json.dumps(drift, indent=2), encoding="utf-8" ) print(f"Wrote {args.out_dir / 'live_inventory.json'}") print(f"Wrote {args.out_dir / 'drift.json'}") sys.exit(2 if duplicate_ips else 0) if __name__ == "__main__": main() ``` --- ## `scripts/it-ops/export-live-inventory-and-drift.sh` ```bash #!/usr/bin/env bash # Live Proxmox guest inventory + drift vs config/ip-addresses.conf. # Usage: bash scripts/it-ops/export-live-inventory-and-drift.sh # Requires: SSH key root@SEED, python3 locally and on PVE. set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" PROJECT_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)" # shellcheck source=/dev/null source "${PROJECT_ROOT}/config/ip-addresses.conf" 2>/dev/null || true SEED="${SEED_HOST:-${PROXMOX_HOST_R630_01:-192.168.11.11}}" OUT_DIR="${OUT_DIR:-${PROJECT_ROOT}/reports/status}" TS="$(date +%Y%m%d_%H%M%S)" TMP="${TMPDIR:-/tmp}/live_inv_${TS}.json" PY="${SCRIPT_DIR}/lib/collect_inventory_remote.py" mkdir -p "$OUT_DIR" stub_unreachable() { python3 - </dev/null 2>&1; then stub_unreachable >"$TMP" else if ! ssh -o BatchMode=yes -o ConnectTimeout=15 -o StrictHostKeyChecking=no \ "root@${SEED}" "python3 -" <"$PY" >"$TMP" 2>/dev/null; then stub_unreachable >"$TMP" fi fi set +e python3 "${SCRIPT_DIR}/compute_ipam_drift.py" --live "$TMP" \ --ip-conf "${PROJECT_ROOT}/config/ip-addresses.conf" --out-dir "$OUT_DIR" DRIFT_RC=$? set -e cp -f "$OUT_DIR/live_inventory.json" "${OUT_DIR}/live_inventory_${TS}.json" 2>/dev/null || true cp -f "$OUT_DIR/drift.json" "${OUT_DIR}/drift_${TS}.json" 2>/dev/null || true rm -f "$TMP" echo "Latest: ${OUT_DIR}/live_inventory.json , ${OUT_DIR}/drift.json" # Exit 2 when duplicate_ips present (for CI). exit "${DRIFT_RC}" ``` After creating files: `chmod +x scripts/it-ops/export-live-inventory-and-drift.sh scripts/it-ops/compute_ipam_drift.py` --- ## `.github/workflows/live-inventory-drift.yml` ```yaml name: Live inventory and IPAM drift on: workflow_dispatch: schedule: - cron: '25 6 * * 1' jobs: drift: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - name: Export live inventory (LAN optional) run: | set +e bash scripts/it-ops/export-live-inventory-and-drift.sh echo "exit=$?" continue-on-error: true - name: Upload artifacts uses: actions/upload-artifact@v4 if: always() with: name: live-inventory-drift path: | reports/status/live_inventory.json reports/status/drift.json ``` **Note:** On GitHub-hosted runners the collector usually writes `seed_unreachable`; use a **self-hosted LAN runner** for real data, or run the shell script on the operator workstation. --- ## `AGENTS.md` row (Quick pointers table) Add: `| IT live inventory + drift (LAN) | `bash scripts/it-ops/export-live-inventory-and-drift.sh` → `reports/status/live_inventory.json`, `drift.json` — see [docs/03-deployment/SANKOFA_IT_OPS_LIVE_INVENTORY_SCRIPTS.md](docs/03-deployment/SANKOFA_IT_OPS_LIVE_INVENTORY_SCRIPTS.md) |` --- ## `docs/MASTER_INDEX.md` Add a row pointing to this deployment appendix and the updated spec.