diff --git a/OGP64/OGP/ogp_agent.pl b/OGP64/OGP/ogp_agent.pl index 08ecb86..9a1a856 100644 --- a/OGP64/OGP/ogp_agent.pl +++ b/OGP64/OGP/ogp_agent.pl @@ -86,6 +86,8 @@ use constant SCREEN_LOGS_DIR => Path::Class::Dir->new(AGENT_RUN_DIR, 'screenlogs'); use constant GAME_STARTUP_DIR => Path::Class::Dir->new(AGENT_RUN_DIR, 'startups'); +use constant SERVER_RUNTIME_DIR => + Path::Class::Dir->new(AGENT_RUN_DIR, 'runtime_status'); use constant SCREENRC_FILE => Path::Class::File->new(AGENT_RUN_DIR, 'ogp_screenrc'); use constant SCREENRC_FILE_BK => @@ -209,6 +211,13 @@ if (!-e GAME_STARTUP_DIR) exit 1; } } + +if (!-d SERVER_RUNTIME_DIR && !mkdir SERVER_RUNTIME_DIR) +{ + logger "Could not create " . SERVER_RUNTIME_DIR . " directory $!.", 1; + exit -1; +} + elsif ($clear_startups) { opendir(STARTUPDIR, GAME_STARTUP_DIR); @@ -743,6 +752,89 @@ sub read_status_hint return ($timestamp, $state); } +sub get_pid_metadata_path +{ + my ($home_id) = @_; + $home_id =~ s/[^0-9]//g; + return Path::Class::File->new(SERVER_RUNTIME_DIR, "pid-$home_id.kv"); +} + +sub write_pid_metadata +{ + my ($home_id, $values) = @_; + return 0 unless(ref($values) eq 'HASH'); + my $file = get_pid_metadata_path($home_id); + if(open(PIDMETA, '>', $file)) + { + foreach my $key (sort keys %$values) + { + my $value = defined($values->{$key}) ? $values->{$key} : ""; + $value =~ s/[\r\n]//g; + print PIDMETA "$key=$value\n"; + } + close(PIDMETA); + return 1; + } + return 0; +} + +sub read_pid_metadata +{ + my ($home_id) = @_; + my %values; + my $file = get_pid_metadata_path($home_id); + return \%values unless(-e $file); + if(open(PIDMETA, '<', $file)) + { + while(my $line = ) + { + chomp($line); + next unless($line =~ /^([^=]+)=(.*)$/); + $values{$1} = $2; + } + close(PIDMETA); + } + return \%values; +} + +sub clear_pid_metadata +{ + my ($home_id) = @_; + my $file = get_pid_metadata_path($home_id); + unlink($file) if(-e $file); +} + +sub is_pid_alive_without_decrypt +{ + my ($pid) = @_; + return 0 unless(defined($pid) && $pid =~ /^\d+$/ && $pid > 0); + my $retval = system('powershell.exe -NoProfile -ExecutionPolicy Bypass -Command "if(Get-Process -Id ' + . $pid + . ' -ErrorAction SilentlyContinue){exit 0}else{exit 1}" >/dev/null 2>&1'); + return $retval == 0 ? 1 : 0; +} + +sub find_process_pid_by_port_without_decrypt +{ + my ($port, $protocol) = @_; + return "" unless(defined($port) && $port =~ /^\d+$/ && $port > 0 && $port <= 65535); + $protocol = "any" unless(defined($protocol) && $protocol =~ /^(tcp|udp|any)$/i); + $protocol = lc($protocol); + my $output = `netstat -ano 2>/dev/null`; + foreach my $line (split(/\r?\n/, $output)) + { + if(($protocol eq "tcp" || $protocol eq "any") && $line =~ /^\s*TCP\s+\S+[:.]$port\s+\S+\s+LISTENING\s+([0-9]+)\s*$/i) + { + return $1; + } + if(($protocol eq "udp" || $protocol eq "any") && $line =~ /^\s*UDP\s+\S+[:.]$port\s+\S*\s*([0-9]+)\s*$/i) + { + return $1; + } + } + return ""; +} + sub get_screen_pid_without_decrypt { my ($home_id) = @_; @@ -927,8 +1019,15 @@ sub server_status_without_decrypt my $screen_id = create_screen_id(SCREEN_TYPE_HOME, $home_id); my $session_running = is_screen_running_without_decrypt(SCREEN_TYPE_HOME, $home_id) == 1 ? 1 : 0; - my $pid = $session_running ? get_screen_pid_without_decrypt($home_id) : ""; - my $process_running = $session_running; + my $screen_pid = $session_running ? get_screen_pid_without_decrypt($home_id) : ""; + my $pid_meta = read_pid_metadata($home_id); + my $windows_pid = defined($pid_meta->{windows_pid}) ? $pid_meta->{windows_pid} : ""; + my $game_pid = defined($pid_meta->{game_pid}) ? $pid_meta->{game_pid} : ""; + my $pid_running = 0; + $pid_running = 1 if($screen_pid ne "" && is_pid_alive_without_decrypt($screen_pid) == 1); + $pid_running = 1 if(!$pid_running && $windows_pid ne "" && is_pid_alive_without_decrypt($windows_pid) == 1); + $pid_running = 1 if(!$pid_running && $game_pid ne "" && is_pid_alive_without_decrypt($game_pid) == 1); + my $process_running = ($session_running || $pid_running) ? 1 : 0; my @expected_ports = build_expected_ports($server_port, $query_port, $rcon_port); my ($listening_ports, $missing_ports) = PORT_VALIDATION_ENABLED ? validate_expected_ports(@expected_ports) : ([], []); my $expected_count = scalar(@expected_ports); @@ -950,14 +1049,14 @@ sub server_status_without_decrypt my $status_state = "Unknown"; my $ready = 0; - if(!$session_running && ((!PORT_VALIDATION_ENABLED && $game_port_listening) || ($expected_count > 0 && $listening_count > 0))) + if(!$process_running && ((!PORT_VALIDATION_ENABLED && $game_port_listening) || ($expected_count > 0 && $listening_count > 0))) { $status = "ONLINE"; $ready = 1; $status_state = $missing_count == 0 ? "Running" : "Warning"; $last_error = "Required port is listening but the managed screen session is not running."; } - elsif($session_running) + elsif($process_running) { if(!PORT_VALIDATION_ENABLED && $game_port_listening) { @@ -1009,6 +1108,7 @@ sub server_status_without_decrypt ProcessRunning => $process_running, process_running => $process_running, session_running => $session_running, + pid_running => $pid_running, game_port_listening => $game_port_listening ? 1 : 0, query_port_listening => $query_port_listening ? 1 : 0, rcon_port_listening => $rcon_port_listening ? 1 : 0, @@ -1023,7 +1123,9 @@ sub server_status_without_decrypt missing_ports => $missing_ports, CPUUsage => get_agent_cpu_usage_percent(), MemoryUsage => get_agent_memory_usage_percent(), - pid => $pid, + pid => $game_pid ne "" ? $game_pid : ($windows_pid ne "" ? $windows_pid : $screen_pid), + screen_pid => $screen_pid, + windows_pid => $windows_pid, session_name => $screen_id, ip => $server_ip, port => $server_port, @@ -1235,6 +1337,15 @@ sub universal_start_without_decrypt system($cli_bin); write_status_hint($home_id, "STARTING"); + my $screen_pid = get_screen_pid_without_decrypt($home_id); + my $windows_pid = find_process_pid_by_port_without_decrypt($server_port, "any"); + write_pid_metadata($home_id, { + screen_pid => $screen_pid, + windows_pid => $windows_pid, + game_pid => $windows_pid, + ip => $server_ip, + port => $server_port + }); if(defined $preStart && $preStart ne ""){ # Get it in the format that the startup file can use @@ -1252,7 +1363,7 @@ sub universal_start_without_decrypt # Create startup file for the server. my $startup_file = - Path::Class::File->new(GAME_STARTUP_DIR, "_serverStart.bat"); + Path::Class::File->new(GAME_STARTUP_DIR, "$server_ip-$server_port"); if (open(STARTUP, '>', $startup_file)) { @@ -1479,43 +1590,66 @@ sub stop_server_without_decrypt chdir $curDir; } - my $screen_id = create_screen_id(SCREEN_TYPE_HOME, $home_id); - my $get_screen_pid = "screen -list | grep $screen_id | cut -f1 -d'.' | sed '".'s/\W//g'."' | head -1"; - my $screen_pid = `$get_screen_pid`; + my $screen_id = create_screen_id(SCREEN_TYPE_HOME, $home_id); + my $screen_pid = get_screen_pid_without_decrypt($home_id); + my $pid_meta = read_pid_metadata($home_id); + my @pids_to_kill; + push(@pids_to_kill, $screen_pid) if($screen_pid =~ /^\d+$/); + push(@pids_to_kill, $pid_meta->{windows_pid}) if(defined($pid_meta->{windows_pid}) && $pid_meta->{windows_pid} =~ /^\d+$/); + push(@pids_to_kill, $pid_meta->{game_pid}) if(defined($pid_meta->{game_pid}) && $pid_meta->{game_pid} =~ /^\d+$/); + my %seen_pid; + foreach my $pid (@pids_to_kill) + { + next if($seen_pid{$pid}); + $seen_pid{$pid} = 1; + logger "Attempting forced stop for PID $pid."; + system("cmd /C taskkill /F /T /PID $pid >nul 2>&1"); + } - chomp $screen_pid; + system("screen -S $screen_id -X quit > /dev/null 2>&1"); + system("screen -wipe > /dev/null 2>&1"); - my $windows_pid_command = "ps -W | grep '" . $screen_pid . "' | head -1 | awk '{print \$4}'"; - my $windows_pid = `$windows_pid_command`; + my $port_pid = find_process_pid_by_port_without_decrypt($server_port, "any"); + if($port_pid =~ /^\d+$/) + { + logger "Port $server_port still listening, killing owner PID $port_pid."; + system("cmd /C taskkill /F /T /PID $port_pid >nul 2>&1"); + } - chomp $windows_pid; - - # Immediately kill the process - logger "Immediately killing server process with PID $windows_pid."; - system("cmd /C taskkill /f /fi 'PID eq $windows_pid' /T"); - system("screen -wipe $screen_pid > /dev/null 2>&1"); - logger "Server ID $screen_pid : $home_id Process killed."; - - return verify_server_stopped_without_decrypt($home_id, $server_ip, $server_port); + my $stop_result = verify_server_stopped_without_decrypt($home_id, $server_ip, $server_port); + if($stop_result == 0) + { + clear_pid_metadata($home_id); + } + return $stop_result; } sub verify_server_stopped_without_decrypt { my ($home_id, $server_ip, $server_port) = @_; + my $pid_meta = read_pid_metadata($home_id); for(my $i = 0; $i < 30; $i++) { my $session_running = is_screen_running_without_decrypt(SCREEN_TYPE_HOME, $home_id) == 1 ? 1 : 0; + my $pid_running = 0; + $pid_running = 1 if(defined($pid_meta->{windows_pid}) && $pid_meta->{windows_pid} =~ /^\d+$/ && is_pid_alive_without_decrypt($pid_meta->{windows_pid}) == 1); + $pid_running = 1 if(!$pid_running && defined($pid_meta->{game_pid}) && $pid_meta->{game_pid} =~ /^\d+$/ && is_pid_alive_without_decrypt($pid_meta->{game_pid}) == 1); my $port_listening = is_port_listening_without_decrypt($server_ip, $server_port); - return 0 if(!$session_running && !$port_listening); + return 0 if(!$session_running && !$pid_running && !$port_listening); sleep 2; } my $screen_id = create_screen_id(SCREEN_TYPE_HOME, $home_id); system("screen -S $screen_id -X quit"); system("screen -wipe > /dev/null 2>&1"); sleep 2; + my $port_pid = find_process_pid_by_port_without_decrypt($server_port, "any"); + system("cmd /C taskkill /F /T /PID $port_pid >nul 2>&1") if($port_pid =~ /^\d+$/); my $session_running = is_screen_running_without_decrypt(SCREEN_TYPE_HOME, $home_id) == 1 ? 1 : 0; + my $pid_running = 0; + $pid_running = 1 if(defined($pid_meta->{windows_pid}) && $pid_meta->{windows_pid} =~ /^\d+$/ && is_pid_alive_without_decrypt($pid_meta->{windows_pid}) == 1); + $pid_running = 1 if(!$pid_running && defined($pid_meta->{game_pid}) && $pid_meta->{game_pid} =~ /^\d+$/ && is_pid_alive_without_decrypt($pid_meta->{game_pid}) == 1); my $port_listening = is_port_listening_without_decrypt($server_ip, $server_port); - if($session_running || $port_listening) + if($session_running || $pid_running || $port_listening) { logger "Server $server_ip:$server_port is still running or listening after stop escalation."; return 1; @@ -2808,6 +2942,11 @@ sub restart_server_without_decrypt { logger "Waiting 60 seconds before starting the server again."; sleep 60; + if (verify_server_stopped_without_decrypt($home_id, $server_ip, $server_port) != 0) + { + logger "Restart cancelled: previous instance is still active after stop wait window."; + return -2; + } if (universal_start_without_decrypt($home_id, $home_path, $server_exe, $run_dir, $cmd, $server_port, $server_ip, $cpu, $nice, $preStart, $envVars, $game_key, $console_log) == 1) { diff --git a/docs/PROCESS_MANAGEMENT.md b/docs/PROCESS_MANAGEMENT.md index 6cd829c..f4e7a34 100644 --- a/docs/PROCESS_MANAGEMENT.md +++ b/docs/PROCESS_MANAGEMENT.md @@ -28,3 +28,38 @@ Keep startup and stop behavior visible and explicit. Failures should be reported Detailed status validation design: - [`GSP_WINDOWS_AGENT_PORT_VALIDATION.md`](GSP_WINDOWS_AGENT_PORT_VALIDATION.md) + +## Lifecycle tracking + +The Windows agent now tracks per-home runtime metadata under: + +- `OGP64/OGP/runtime_status/pid-.kv` + +Tracked fields include PID/session information used to stop the right process reliably: + +- `screen_pid` +- `windows_pid` +- `game_pid` +- `ip` +- `port` + +## Stop escalation flow + +When stop is requested, the agent now: + +1. marks status hint as `STOPPING` +2. creates `SERVER_STOPPED` marker for autorestart handling +3. removes startup flag for `-` +4. kills tracked PIDs (`screen_pid`, `windows_pid`, `game_pid`) if present +5. kills managed screen session +6. checks if the game port is still listening and kills the owning PID +7. verifies stop success (no managed session, no tracked running PID, no listening game port) + +## Restart behavior + +Restart remains stop-first and now guards against duplicates: + +- stop is attempted first +- waits 60 seconds +- verifies old instance is stopped before starting again +- returns stop failure if the old process/port is still active