#!/bin/bash
# pve-memcheck.sh -- Quick memory and ZFS health check
# Usage: ./pve-memcheck.sh
# Works on any Proxmox VE node with ZFS pools

echo "==============================================================================="
echo "  PVE MEMORY & ZFS MONITOR -- $(hostname) -- $(date)"
echo "==============================================================================="

echo ""
echo "--- RAM & Swap ---"
free -h

echo ""
echo "--- ZFS ARC ---"
ARC_SIZE=$(cat /proc/spl/kstat/zfs/arcstats | grep "^size" | awk '{print $3}')
ARC_MB=$((ARC_SIZE / 1024 / 1024))
ARC_MAX=$(cat /sys/module/zfs/parameters/zfs_arc_max)
if [ "$ARC_MAX" -eq 0 ]; then
  ARC_MAX_DISPLAY="UNCAPPED"
else
  ARC_MAX_DISPLAY="$((ARC_MAX / 1024 / 1024)) MB"
fi
echo "  ARC Current: ${ARC_MB} MB"
echo "  ARC Max:     ${ARC_MAX_DISPLAY}"

echo ""
echo "--- ZFS Pool Health ---"
for pool in $(zpool list -H -o name 2>/dev/null); do
  STATE=$(zpool list -H -o health "$pool")
  ERRORS=$(zpool status "$pool" | grep "errors:" | tail -1)
  echo "  $pool: $STATE -- $ERRORS"
done

echo ""
echo "--- Top Swap Consumers ---"
echo "  Swap(KB)  PID     Process"
for pid in /proc/[0-9]*; do
  swap=$(awk '/VmSwap/{print $2}' "$pid/status" 2>/dev/null)
  if [ -n "$swap" ] && [ "$swap" -gt 100000 ]; then
    pidnum=$(basename "$pid")
    name=$(cat "$pid/comm" 2>/dev/null)
    printf "  %-10s %-7s %s\n" "$swap" "$pidnum" "$name"
  fi
done | sort -rn | head -5

echo ""
echo "--- Running VM/CT Memory Allocation ---"
TOTAL_ALLOC=0
for vmid in $(qm list 2>/dev/null | awk 'NR>1 && $3=="running" {print $1}'); do
  mem=$(qm config "$vmid" | grep ^memory | awk '{print $2}')
  name=$(qm config "$vmid" | grep ^name | awk '{print $2}')
  TOTAL_ALLOC=$((TOTAL_ALLOC + mem))
  printf "  VM  %-6s %-25s %s MB\n" "$vmid" "$name" "$mem"
done
for ctid in $(pct list 2>/dev/null | awk 'NR>1 && $2=="running" {print $1}'); do
  mem=$(pct config "$ctid" | grep ^memory | awk '{print $2}')
  host=$(pct config "$ctid" | grep ^hostname | awk '{print $2}')
  TOTAL_ALLOC=$((TOTAL_ALLOC + mem))
  printf "  CT  %-6s %-25s %s MB\n" "$ctid" "$host" "$mem"
done
echo "  ----------------------------------------"
echo "  Total Allocated: ${TOTAL_ALLOC} MB ($((TOTAL_ALLOC / 1024)) GB)"

echo ""
echo "==============================================================================="