Refactor collector.py for improved clarity and config

This commit is contained in:
Frank Harris 2025-09-10 18:47:36 -04:00 committed by GitHub
parent 71990d61c2
commit 4ce3e0c1ad
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -1,46 +1,32 @@
#!/usr/bin/env python3
# collector.py
#
# Runs on each machine via cron (e.g., every minute).
# Scans subfolders (each is a gameserver), finds running PIDs rooted there,
# samples process + machine stats, and writes to MySQL.
# collector.py (place in gameserver root; run via cron)
import os, socket, time, subprocess, json
import os, socket, time, subprocess
from datetime import datetime, timezone
from pathlib import Path
import psutil
import mysql.connector
# ======== CONFIG (edit these) =========
# ======== CONFIG (edit for YOUR PANEL DB) =========
DB_HOST = "127.0.0.1"
DB_USER = "gs_metrics"
DB_USER = "panel_user" # your panel DB user
DB_PASS = "REPLACE_ME"
DB_NAME = "gs_metrics"
DB_NAME = "panel_database" # your panel DB name (not a new DB)
TABLE_PREFIX = "gsp_" # don't change unless you want a different prefix
# ================================================
# The folder this script lives in is the gameserver root containing subfolders.
BASE_DIR = Path(__file__).resolve().parent
# Optional: set the disk path you care about (use BASE_DIRs filesystem)
DISK_PATH = str(BASE_DIR)
# Optional: force a specific interface name (else autodetect default route iface)
NET_IFACE = None # e.g. "eth0"
# Identify the machine (hostname fallback if env not set)
NET_IFACE = None
MACHINE_ID = os.environ.get("GS_MACHINE_ID") or socket.gethostname()
# How long to wait to get stable CPU% readings (seconds)
CPU_SAMPLE_DELAY = 0.5
# =======================================
def utc_now():
return datetime.now(timezone.utc).replace(tzinfo=None) # naive UTC for MySQL DATETIME
return datetime.now(timezone.utc).replace(tzinfo=None)
def get_default_iface():
if NET_IFACE:
return NET_IFACE
# Parse /proc/net/route for default route (Destination == 00000000)
try:
with open("/proc/net/route") as f:
for line in f.readlines()[1:]:
@ -49,7 +35,6 @@ def get_default_iface():
return parts[0]
except Exception:
pass
# Fallback: first "up" interface with stats
stats = psutil.net_if_stats()
for name, st in stats.items():
if st.isup:
@ -57,20 +42,16 @@ def get_default_iface():
return None
def get_folder_size_bytes(path: Path) -> int:
# Fast and robust: use `du -sb` if available
try:
res = subprocess.run(["du", "-sb", str(path)], capture_output=True, text=True, check=True)
return int(res.stdout.split()[0])
except Exception:
# Fallback: Python walk (slower on huge trees)
total = 0
for root, dirs, files in os.walk(path, followlinks=False):
for fn in files:
fp = os.path.join(root, fn)
try:
total += os.path.getsize(fp)
except Exception:
pass
try: total += os.path.getsize(fp)
except Exception: pass
return total
def connect_db():
@ -81,15 +62,15 @@ def connect_db():
def ensure_machine(db, hostname):
cur = db.cursor()
cur.execute(
"INSERT IGNORE INTO machines (machine_id, hostname) VALUES (%s, %s)",
f"INSERT IGNORE INTO {TABLE_PREFIX}machines (machine_id, hostname) VALUES (%s, %s)",
(MACHINE_ID, hostname),
)
cur.close()
def insert_machine_sample(db, ts, load1, load5, load15, cpu_pct, vm, swap, du, iface, rx, tx, speed):
cur = db.cursor()
cur.execute("""
INSERT INTO machine_samples
cur.execute(f"""
INSERT INTO {TABLE_PREFIX}machine_samples
(machine_id, ts, load1, load5, load15, cpu_pct,
mem_used_bytes, mem_total_bytes, mem_used_pct,
swap_used_bytes, swap_total_bytes,
@ -109,51 +90,36 @@ def insert_machine_sample(db, ts, load1, load5, load15, cpu_pct, vm, swap, du, i
def insert_process_sample(db, ts, server_name, server_path, proc, cpu_pct, folder_size):
try:
mi = proc.memory_info()
mem_pct = proc.memory_percent() # % of system RAM
mi = proc.memory_info(); mem_pct = proc.memory_percent()
except Exception:
mi = None
mem_pct = None
rss = mi.rss if mi else None
vms = mi.vms if mi else None
mi = None; mem_pct = None
rss = mi.rss if mi else None; vms = mi.vms if mi else None
try:
io = proc.io_counters()
rd, wr = io.read_bytes, io.write_bytes
io = proc.io_counters(); rd, wr = io.read_bytes, io.write_bytes
except Exception:
rd = wr = None
try:
fds = proc.num_fds() if hasattr(proc, "num_fds") else None
except Exception:
fds = None
# Listening ports (TCP LISTEN, UDP sockets present)
ports = set()
try:
for c in proc.connections(kind="inet"):
try:
if c.status == psutil.CONN_LISTEN or c.type == psutil.SOCK_DGRAM:
if c.laddr and c.laddr.port:
ports.add(str(c.laddr.port))
except Exception:
pass
except Exception:
pass
if c.laddr and c.laddr.port: ports.add(str(c.laddr.port))
except Exception: pass
except Exception: pass
ports_str = ",".join(sorted(ports)) if ports else None
cmd_str = None
try:
cmd = proc.cmdline()
if cmd:
cmd_str = " ".join(cmd)[:1024]
except Exception:
pass
if cmd: cmd_str = " ".join(cmd)[:1024]
except Exception: pass
cur = db.cursor()
cur.execute("""
INSERT INTO process_samples
cur.execute(f"""
INSERT INTO {TABLE_PREFIX}process_samples
(machine_id, ts, server_name, server_path,
pid, proc_name, cmd, cpu_pct,
rss_bytes, vms_bytes, mem_pct,
@ -182,30 +148,24 @@ def main():
if iface and iface in net_counters:
rx = net_counters[iface].bytes_recv
tx = net_counters[iface].bytes_sent
load1=load5=load15 = (0.0,0.0,0.0)
try:
load1, load5, load15 = os.getloadavg()
except Exception:
pass
load1=load5=load15=0.0
# Prime process CPU% and collect candidate PIDs
# Discover servers = immediate child dirs of BASE_DIR (ignore dot dirs)
# discover servers
server_dirs = [p for p in BASE_DIR.iterdir() if p.is_dir() and not p.name.startswith('.')]
# map server -> processes
server_procs = {str(d): [] for d in server_dirs}
# Build a fast list of all processes once
plist = []
for p in psutil.process_iter(attrs=["pid","name","cwd","exe","cmdline"]):
try:
_ = p.status() # touch to ensure alive
p.cpu_percent(interval=None) # prime
_ = p.status()
p.cpu_percent(interval=None)
plist.append(p)
except Exception:
pass
# Associate processes to server dirs by cwd/exe/cmdline path prefix
for d in server_dirs:
dstr = str(d)
for p in plist:
@ -218,37 +178,26 @@ def main():
except Exception:
continue
# Wait briefly so second CPU% read is meaningful
time.sleep(CPU_SAMPLE_DELAY)
# Re-read CPU% for each proc
proc_cpu = {}
for p in plist:
try:
proc_cpu[p.pid] = p.cpu_percent(interval=None) # % of one CPU * cores
except Exception:
proc_cpu[p.pid] = None
try: proc_cpu[p.pid] = p.cpu_percent(interval=None)
except Exception: proc_cpu[p.pid] = None
# Machine-wide stats
vm = psutil.virtual_memory()
swap = psutil.swap_memory()
du = psutil.disk_usage(DISK_PATH)
cpu_pct = psutil.cpu_percent(interval=0.0)
# Insert into DB
db = connect_db()
ensure_machine(db, hostname)
insert_machine_sample(db, ts, load1, load5, load15, cpu_pct, vm, swap, du,
iface, rx, tx, iface_speed)
insert_machine_sample(db, ts, load1, load5, load15, cpu_pct, vm, swap, du, iface, rx, tx, iface_speed)
# For each server dir, capture folder size once and record each process row
for sdir, procs in server_procs.items():
server_name = Path(sdir).name
folder_size = get_folder_size_bytes(Path(sdir))
for p in procs:
insert_process_sample(
db, ts, server_name, sdir, p, proc_cpu.get(p.pid), folder_size
)
insert_process_sample(db, ts, server_name, sdir, p, proc_cpu.get(p.pid), folder_size)
db.close()