fixed stop-start

This commit is contained in:
Frank Harris 2026-06-10 17:52:14 -04:00
parent 17afb98f43
commit aa2dd354cf
2 changed files with 197 additions and 23 deletions

View file

@ -86,6 +86,8 @@ use constant SCREEN_LOGS_DIR =>
Path::Class::Dir->new(AGENT_RUN_DIR, 'screenlogs'); Path::Class::Dir->new(AGENT_RUN_DIR, 'screenlogs');
use constant GAME_STARTUP_DIR => use constant GAME_STARTUP_DIR =>
Path::Class::Dir->new(AGENT_RUN_DIR, 'startups'); Path::Class::Dir->new(AGENT_RUN_DIR, 'startups');
use constant SERVER_RUNTIME_DIR =>
Path::Class::Dir->new(AGENT_RUN_DIR, 'runtime_status');
use constant SCREENRC_FILE => use constant SCREENRC_FILE =>
Path::Class::File->new(AGENT_RUN_DIR, 'ogp_screenrc'); Path::Class::File->new(AGENT_RUN_DIR, 'ogp_screenrc');
use constant SCREENRC_FILE_BK => use constant SCREENRC_FILE_BK =>
@ -209,6 +211,13 @@ if (!-e GAME_STARTUP_DIR)
exit 1; exit 1;
} }
} }
if (!-d SERVER_RUNTIME_DIR && !mkdir SERVER_RUNTIME_DIR)
{
logger "Could not create " . SERVER_RUNTIME_DIR . " directory $!.", 1;
exit -1;
}
elsif ($clear_startups) elsif ($clear_startups)
{ {
opendir(STARTUPDIR, GAME_STARTUP_DIR); opendir(STARTUPDIR, GAME_STARTUP_DIR);
@ -743,6 +752,89 @@ sub read_status_hint
return ($timestamp, $state); return ($timestamp, $state);
} }
sub get_pid_metadata_path
{
my ($home_id) = @_;
$home_id =~ s/[^0-9]//g;
return Path::Class::File->new(SERVER_RUNTIME_DIR, "pid-$home_id.kv");
}
sub write_pid_metadata
{
my ($home_id, $values) = @_;
return 0 unless(ref($values) eq 'HASH');
my $file = get_pid_metadata_path($home_id);
if(open(PIDMETA, '>', $file))
{
foreach my $key (sort keys %$values)
{
my $value = defined($values->{$key}) ? $values->{$key} : "";
$value =~ s/[\r\n]//g;
print PIDMETA "$key=$value\n";
}
close(PIDMETA);
return 1;
}
return 0;
}
sub read_pid_metadata
{
my ($home_id) = @_;
my %values;
my $file = get_pid_metadata_path($home_id);
return \%values unless(-e $file);
if(open(PIDMETA, '<', $file))
{
while(my $line = <PIDMETA>)
{
chomp($line);
next unless($line =~ /^([^=]+)=(.*)$/);
$values{$1} = $2;
}
close(PIDMETA);
}
return \%values;
}
sub clear_pid_metadata
{
my ($home_id) = @_;
my $file = get_pid_metadata_path($home_id);
unlink($file) if(-e $file);
}
sub is_pid_alive_without_decrypt
{
my ($pid) = @_;
return 0 unless(defined($pid) && $pid =~ /^\d+$/ && $pid > 0);
my $retval = system('powershell.exe -NoProfile -ExecutionPolicy Bypass -Command "if(Get-Process -Id '
. $pid
. ' -ErrorAction SilentlyContinue){exit 0}else{exit 1}" >/dev/null 2>&1');
return $retval == 0 ? 1 : 0;
}
sub find_process_pid_by_port_without_decrypt
{
my ($port, $protocol) = @_;
return "" unless(defined($port) && $port =~ /^\d+$/ && $port > 0 && $port <= 65535);
$protocol = "any" unless(defined($protocol) && $protocol =~ /^(tcp|udp|any)$/i);
$protocol = lc($protocol);
my $output = `netstat -ano 2>/dev/null`;
foreach my $line (split(/\r?\n/, $output))
{
if(($protocol eq "tcp" || $protocol eq "any") && $line =~ /^\s*TCP\s+\S+[:.]$port\s+\S+\s+LISTENING\s+([0-9]+)\s*$/i)
{
return $1;
}
if(($protocol eq "udp" || $protocol eq "any") && $line =~ /^\s*UDP\s+\S+[:.]$port\s+\S*\s*([0-9]+)\s*$/i)
{
return $1;
}
}
return "";
}
sub get_screen_pid_without_decrypt sub get_screen_pid_without_decrypt
{ {
my ($home_id) = @_; my ($home_id) = @_;
@ -927,8 +1019,15 @@ sub server_status_without_decrypt
my $screen_id = create_screen_id(SCREEN_TYPE_HOME, $home_id); my $screen_id = create_screen_id(SCREEN_TYPE_HOME, $home_id);
my $session_running = is_screen_running_without_decrypt(SCREEN_TYPE_HOME, $home_id) == 1 ? 1 : 0; my $session_running = is_screen_running_without_decrypt(SCREEN_TYPE_HOME, $home_id) == 1 ? 1 : 0;
my $pid = $session_running ? get_screen_pid_without_decrypt($home_id) : ""; my $screen_pid = $session_running ? get_screen_pid_without_decrypt($home_id) : "";
my $process_running = $session_running; my $pid_meta = read_pid_metadata($home_id);
my $windows_pid = defined($pid_meta->{windows_pid}) ? $pid_meta->{windows_pid} : "";
my $game_pid = defined($pid_meta->{game_pid}) ? $pid_meta->{game_pid} : "";
my $pid_running = 0;
$pid_running = 1 if($screen_pid ne "" && is_pid_alive_without_decrypt($screen_pid) == 1);
$pid_running = 1 if(!$pid_running && $windows_pid ne "" && is_pid_alive_without_decrypt($windows_pid) == 1);
$pid_running = 1 if(!$pid_running && $game_pid ne "" && is_pid_alive_without_decrypt($game_pid) == 1);
my $process_running = ($session_running || $pid_running) ? 1 : 0;
my @expected_ports = build_expected_ports($server_port, $query_port, $rcon_port); my @expected_ports = build_expected_ports($server_port, $query_port, $rcon_port);
my ($listening_ports, $missing_ports) = PORT_VALIDATION_ENABLED ? validate_expected_ports(@expected_ports) : ([], []); my ($listening_ports, $missing_ports) = PORT_VALIDATION_ENABLED ? validate_expected_ports(@expected_ports) : ([], []);
my $expected_count = scalar(@expected_ports); my $expected_count = scalar(@expected_ports);
@ -950,14 +1049,14 @@ sub server_status_without_decrypt
my $status_state = "Unknown"; my $status_state = "Unknown";
my $ready = 0; my $ready = 0;
if(!$session_running && ((!PORT_VALIDATION_ENABLED && $game_port_listening) || ($expected_count > 0 && $listening_count > 0))) if(!$process_running && ((!PORT_VALIDATION_ENABLED && $game_port_listening) || ($expected_count > 0 && $listening_count > 0)))
{ {
$status = "ONLINE"; $status = "ONLINE";
$ready = 1; $ready = 1;
$status_state = $missing_count == 0 ? "Running" : "Warning"; $status_state = $missing_count == 0 ? "Running" : "Warning";
$last_error = "Required port is listening but the managed screen session is not running."; $last_error = "Required port is listening but the managed screen session is not running.";
} }
elsif($session_running) elsif($process_running)
{ {
if(!PORT_VALIDATION_ENABLED && $game_port_listening) if(!PORT_VALIDATION_ENABLED && $game_port_listening)
{ {
@ -1009,6 +1108,7 @@ sub server_status_without_decrypt
ProcessRunning => $process_running, ProcessRunning => $process_running,
process_running => $process_running, process_running => $process_running,
session_running => $session_running, session_running => $session_running,
pid_running => $pid_running,
game_port_listening => $game_port_listening ? 1 : 0, game_port_listening => $game_port_listening ? 1 : 0,
query_port_listening => $query_port_listening ? 1 : 0, query_port_listening => $query_port_listening ? 1 : 0,
rcon_port_listening => $rcon_port_listening ? 1 : 0, rcon_port_listening => $rcon_port_listening ? 1 : 0,
@ -1023,7 +1123,9 @@ sub server_status_without_decrypt
missing_ports => $missing_ports, missing_ports => $missing_ports,
CPUUsage => get_agent_cpu_usage_percent(), CPUUsage => get_agent_cpu_usage_percent(),
MemoryUsage => get_agent_memory_usage_percent(), MemoryUsage => get_agent_memory_usage_percent(),
pid => $pid, pid => $game_pid ne "" ? $game_pid : ($windows_pid ne "" ? $windows_pid : $screen_pid),
screen_pid => $screen_pid,
windows_pid => $windows_pid,
session_name => $screen_id, session_name => $screen_id,
ip => $server_ip, ip => $server_ip,
port => $server_port, port => $server_port,
@ -1235,6 +1337,15 @@ sub universal_start_without_decrypt
system($cli_bin); system($cli_bin);
write_status_hint($home_id, "STARTING"); write_status_hint($home_id, "STARTING");
my $screen_pid = get_screen_pid_without_decrypt($home_id);
my $windows_pid = find_process_pid_by_port_without_decrypt($server_port, "any");
write_pid_metadata($home_id, {
screen_pid => $screen_pid,
windows_pid => $windows_pid,
game_pid => $windows_pid,
ip => $server_ip,
port => $server_port
});
if(defined $preStart && $preStart ne ""){ if(defined $preStart && $preStart ne ""){
# Get it in the format that the startup file can use # Get it in the format that the startup file can use
@ -1252,7 +1363,7 @@ sub universal_start_without_decrypt
# Create startup file for the server. # Create startup file for the server.
my $startup_file = my $startup_file =
Path::Class::File->new(GAME_STARTUP_DIR, "_serverStart.bat"); Path::Class::File->new(GAME_STARTUP_DIR, "$server_ip-$server_port");
if (open(STARTUP, '>', $startup_file)) if (open(STARTUP, '>', $startup_file))
{ {
@ -1479,43 +1590,66 @@ sub stop_server_without_decrypt
chdir $curDir; chdir $curDir;
} }
my $screen_id = create_screen_id(SCREEN_TYPE_HOME, $home_id); my $screen_id = create_screen_id(SCREEN_TYPE_HOME, $home_id);
my $get_screen_pid = "screen -list | grep $screen_id | cut -f1 -d'.' | sed '".'s/\W//g'."' | head -1"; my $screen_pid = get_screen_pid_without_decrypt($home_id);
my $screen_pid = `$get_screen_pid`; my $pid_meta = read_pid_metadata($home_id);
my @pids_to_kill;
push(@pids_to_kill, $screen_pid) if($screen_pid =~ /^\d+$/);
push(@pids_to_kill, $pid_meta->{windows_pid}) if(defined($pid_meta->{windows_pid}) && $pid_meta->{windows_pid} =~ /^\d+$/);
push(@pids_to_kill, $pid_meta->{game_pid}) if(defined($pid_meta->{game_pid}) && $pid_meta->{game_pid} =~ /^\d+$/);
my %seen_pid;
foreach my $pid (@pids_to_kill)
{
next if($seen_pid{$pid});
$seen_pid{$pid} = 1;
logger "Attempting forced stop for PID $pid.";
system("cmd /C taskkill /F /T /PID $pid >nul 2>&1");
}
chomp $screen_pid; system("screen -S $screen_id -X quit > /dev/null 2>&1");
system("screen -wipe > /dev/null 2>&1");
my $windows_pid_command = "ps -W | grep '" . $screen_pid . "' | head -1 | awk '{print \$4}'"; my $port_pid = find_process_pid_by_port_without_decrypt($server_port, "any");
my $windows_pid = `$windows_pid_command`; if($port_pid =~ /^\d+$/)
{
logger "Port $server_port still listening, killing owner PID $port_pid.";
system("cmd /C taskkill /F /T /PID $port_pid >nul 2>&1");
}
chomp $windows_pid; my $stop_result = verify_server_stopped_without_decrypt($home_id, $server_ip, $server_port);
if($stop_result == 0)
# Immediately kill the process {
logger "Immediately killing server process with PID $windows_pid."; clear_pid_metadata($home_id);
system("cmd /C taskkill /f /fi 'PID eq $windows_pid' /T"); }
system("screen -wipe $screen_pid > /dev/null 2>&1"); return $stop_result;
logger "Server ID $screen_pid : $home_id Process killed.";
return verify_server_stopped_without_decrypt($home_id, $server_ip, $server_port);
} }
sub verify_server_stopped_without_decrypt sub verify_server_stopped_without_decrypt
{ {
my ($home_id, $server_ip, $server_port) = @_; my ($home_id, $server_ip, $server_port) = @_;
my $pid_meta = read_pid_metadata($home_id);
for(my $i = 0; $i < 30; $i++) for(my $i = 0; $i < 30; $i++)
{ {
my $session_running = is_screen_running_without_decrypt(SCREEN_TYPE_HOME, $home_id) == 1 ? 1 : 0; my $session_running = is_screen_running_without_decrypt(SCREEN_TYPE_HOME, $home_id) == 1 ? 1 : 0;
my $pid_running = 0;
$pid_running = 1 if(defined($pid_meta->{windows_pid}) && $pid_meta->{windows_pid} =~ /^\d+$/ && is_pid_alive_without_decrypt($pid_meta->{windows_pid}) == 1);
$pid_running = 1 if(!$pid_running && defined($pid_meta->{game_pid}) && $pid_meta->{game_pid} =~ /^\d+$/ && is_pid_alive_without_decrypt($pid_meta->{game_pid}) == 1);
my $port_listening = is_port_listening_without_decrypt($server_ip, $server_port); my $port_listening = is_port_listening_without_decrypt($server_ip, $server_port);
return 0 if(!$session_running && !$port_listening); return 0 if(!$session_running && !$pid_running && !$port_listening);
sleep 2; sleep 2;
} }
my $screen_id = create_screen_id(SCREEN_TYPE_HOME, $home_id); my $screen_id = create_screen_id(SCREEN_TYPE_HOME, $home_id);
system("screen -S $screen_id -X quit"); system("screen -S $screen_id -X quit");
system("screen -wipe > /dev/null 2>&1"); system("screen -wipe > /dev/null 2>&1");
sleep 2; sleep 2;
my $port_pid = find_process_pid_by_port_without_decrypt($server_port, "any");
system("cmd /C taskkill /F /T /PID $port_pid >nul 2>&1") if($port_pid =~ /^\d+$/);
my $session_running = is_screen_running_without_decrypt(SCREEN_TYPE_HOME, $home_id) == 1 ? 1 : 0; my $session_running = is_screen_running_without_decrypt(SCREEN_TYPE_HOME, $home_id) == 1 ? 1 : 0;
my $pid_running = 0;
$pid_running = 1 if(defined($pid_meta->{windows_pid}) && $pid_meta->{windows_pid} =~ /^\d+$/ && is_pid_alive_without_decrypt($pid_meta->{windows_pid}) == 1);
$pid_running = 1 if(!$pid_running && defined($pid_meta->{game_pid}) && $pid_meta->{game_pid} =~ /^\d+$/ && is_pid_alive_without_decrypt($pid_meta->{game_pid}) == 1);
my $port_listening = is_port_listening_without_decrypt($server_ip, $server_port); my $port_listening = is_port_listening_without_decrypt($server_ip, $server_port);
if($session_running || $port_listening) if($session_running || $pid_running || $port_listening)
{ {
logger "Server $server_ip:$server_port is still running or listening after stop escalation."; logger "Server $server_ip:$server_port is still running or listening after stop escalation.";
return 1; return 1;
@ -2808,6 +2942,11 @@ sub restart_server_without_decrypt
{ {
logger "Waiting 60 seconds before starting the server again."; logger "Waiting 60 seconds before starting the server again.";
sleep 60; sleep 60;
if (verify_server_stopped_without_decrypt($home_id, $server_ip, $server_port) != 0)
{
logger "Restart cancelled: previous instance is still active after stop wait window.";
return -2;
}
if (universal_start_without_decrypt($home_id, $home_path, $server_exe, $run_dir, if (universal_start_without_decrypt($home_id, $home_path, $server_exe, $run_dir,
$cmd, $server_port, $server_ip, $cpu, $nice, $preStart, $envVars, $game_key, $console_log) == 1) $cmd, $server_port, $server_ip, $cpu, $nice, $preStart, $envVars, $game_key, $console_log) == 1)
{ {

View file

@ -28,3 +28,38 @@ Keep startup and stop behavior visible and explicit. Failures should be reported
Detailed status validation design: Detailed status validation design:
- [`GSP_WINDOWS_AGENT_PORT_VALIDATION.md`](GSP_WINDOWS_AGENT_PORT_VALIDATION.md) - [`GSP_WINDOWS_AGENT_PORT_VALIDATION.md`](GSP_WINDOWS_AGENT_PORT_VALIDATION.md)
## Lifecycle tracking
The Windows agent now tracks per-home runtime metadata under:
- `OGP64/OGP/runtime_status/pid-<home_id>.kv`
Tracked fields include PID/session information used to stop the right process reliably:
- `screen_pid`
- `windows_pid`
- `game_pid`
- `ip`
- `port`
## Stop escalation flow
When stop is requested, the agent now:
1. marks status hint as `STOPPING`
2. creates `SERVER_STOPPED` marker for autorestart handling
3. removes startup flag for `<ip>-<port>`
4. kills tracked PIDs (`screen_pid`, `windows_pid`, `game_pid`) if present
5. kills managed screen session
6. checks if the game port is still listening and kills the owning PID
7. verifies stop success (no managed session, no tracked running PID, no listening game port)
## Restart behavior
Restart remains stop-first and now guards against duplicates:
- stop is attempted first
- waits 60 seconds
- verifies old instance is stopped before starting again
- returns stop failure if the old process/port is still active