From 724da2f0a204eab50733633d1762fa02e52492d0 Mon Sep 17 00:00:00 2001 From: Frank Harris Date: Sat, 31 Jan 2026 12:08:12 -0600 Subject: [PATCH] GPT code --- CHANGELOG.md | 5 + docs/COPILOT_TODO.md | 1 + .../controllers/SteamWorkshopController.php | 27 +- .../lib/SteamWorkshopService.php | 257 +++++++++++++++--- 4 files changed, 239 insertions(+), 51 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6d1a6c62..12d0010f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 2026-01-31 +- Added a hardened Workshop scraping helper (the same HTML workflow we validated manually) and wired it into the Steam Workshop service as a fallback whenever the official API errors out or returns empty data. +- Surface scraper vs API attempts (including shell commands, exit codes, and stderr) in the JSON response so the Game Monitor can show exactly which backend produced the results. +- Bundled the reusable `workshop_scrape.sh` bash helper inside the module so future diagnostics can be run server-side without re-copying the ad-hoc script. + ## 2026-01-25 - Replaced the Steam Workshop search backend with the official Steam Web API (QueryFiles) so searches are anonymous, paginated, and no longer depend on fragile HTML scraping. - Added detailed Steam API failure logging plus structured JSON responses that expose pagination metadata to the UI. diff --git a/docs/COPILOT_TODO.md b/docs/COPILOT_TODO.md index 9e2c0aee..f8a442e1 100644 --- a/docs/COPILOT_TODO.md +++ b/docs/COPILOT_TODO.md @@ -1,3 +1,4 @@ - Auto-detect which server configs actually support Steam Workshop before showing adapter controls. - Allow players/admins to reorder selected Workshop mods in the new picker UI so load order matches game expectations. - Surface pagination controls in the Workshop picker so users can request additional batches from the new Steam Web API search endpoint. +- Add an admin-facing toggle that makes it clear when the HTML scraper fallback is in use and lets staff force API-only mode if Valve ever objects. diff --git a/modules/steam_workshop/controllers/SteamWorkshopController.php b/modules/steam_workshop/controllers/SteamWorkshopController.php index c8280687..d492295a 100644 --- a/modules/steam_workshop/controllers/SteamWorkshopController.php +++ b/modules/steam_workshop/controllers/SteamWorkshopController.php @@ -152,28 +152,23 @@ class SteamWorkshopController } $payload = $this->service->searchWorkshopItems($gameKey, $query, $perPage, $page); - if ($payload['error'] !== null) { - echo json_encode([ - 'ok' => false, - 'error' => $payload['error'], - 'request' => $payload['request'], - 'status' => sprintf('REQUEST => %s | PARAMS => %s | HTTP => %s | TRANSPORT => %s', - (string)($payload['request']['url'] ?? ''), - http_build_query($payload['request']['params'] ?? [], '', '&'), - (string)($payload['request']['http_code'] ?? ''), - (string)($payload['request']['transport_error'] ?? 'none') - ), - ]); - return; - } - - $requestSummary = sprintf('REQUEST => %s | PARAMS => %s | HTTP => %s | TRANSPORT => %s', + $requestSummary = $payload['request']['summary'] ?? sprintf('REQUEST => %s | PARAMS => %s | HTTP => %s | TRANSPORT => %s', (string)($payload['request']['url'] ?? ''), http_build_query($payload['request']['params'] ?? [], '', '&'), (string)($payload['request']['http_code'] ?? ''), (string)($payload['request']['transport_error'] ?? 'none') ); + if ($payload['error'] !== null) { + echo json_encode([ + 'ok' => false, + 'error' => $payload['error'], + 'request' => $payload['request'], + 'status' => $requestSummary, + ]); + return; + } + $response = [ 'ok' => true, 'results' => $payload['results'], diff --git a/modules/steam_workshop/lib/SteamWorkshopService.php b/modules/steam_workshop/lib/SteamWorkshopService.php index fe7c8688..7a161fb5 100644 --- a/modules/steam_workshop/lib/SteamWorkshopService.php +++ b/modules/steam_workshop/lib/SteamWorkshopService.php @@ -15,6 +15,7 @@ class SteamWorkshopService private string $logDir; private string $apiLogFile; private string $steamCmdLogDir; + private string $scraperScript; public function __construct(OGPDatabase $db) { @@ -29,6 +30,7 @@ class SteamWorkshopService $this->logDir = __DIR__ . '/../logs'; $this->steamCmdLogDir = $this->logDir . '/steamcmd'; $this->apiLogFile = $this->logDir . '/steam_api.log'; + $this->scraperScript = __DIR__ . '/../bin/workshop_scrape.sh'; foreach ([$this->configDir, $this->gameAdapterDir, $this->logDir, $this->steamCmdLogDir] as $dir) { if (!is_dir($dir)) { @@ -643,10 +645,13 @@ class SteamWorkshopService ], 'error' => null, 'request' => [ + 'backend' => 'api', 'url' => null, 'params' => [], 'http_code' => null, 'transport_error' => null, + 'summary' => null, + 'attempts' => [], ], ]; @@ -682,52 +687,223 @@ class SteamWorkshopService ]; $response = $this->executeSteamApiRequest('https://api.steampowered.com/IPublishedFileService/QueryFiles/v1/', $postFields); - $payload['request']['url'] = $response['url']; - $payload['request']['params'] = $response['fields']; - $payload['request']['http_code'] = $response['http_code']; - $payload['request']['transport_error'] = $response['error']; - $payload['request']['summary'] = $this->formatRequestSummary($payload['request']); + $requestContext = [ + 'backend' => 'api', + 'url' => $response['url'], + 'params' => $response['fields'], + 'http_code' => $response['http_code'], + 'transport_error' => $response['error'], + ]; + $requestContext['summary'] = $this->formatRequestSummary($requestContext); + $requestAttempts = [$requestContext]; + $payload['request'] = $requestContext; + $apiFailed = false; if ($response['error'] !== null || $response['http_code'] < 200 || $response['http_code'] >= 300) { + $apiFailed = true; $reason = $response['error'] !== null ? $response['error'] : 'HTTP ' . $response['http_code']; $this->logApiFailure(sprintf('Steam API search failed (app=%s query="%s" page=%d): %s', $appId, $query, $payload['pagination']['page'], $reason)); $payload['error'] = sprintf('Steam API request failed (%s). URL: %s Params: %s', $reason, $response['url'], http_build_query($response['fields'], '', '&')); - return $payload; + } else { + $data = json_decode((string)$response['body'], true); + if (!is_array($data) || !isset($data['response'])) { + $apiFailed = true; + $this->logApiFailure(sprintf('Steam API search returned invalid payload (app=%s query="%s")', $appId, $query)); + $payload['error'] = sprintf('Steam API returned invalid data. URL: %s Params: %s', $response['url'], http_build_query($response['fields'], '', '&')); + } else { + $details = $data['response']['publishedfiledetails'] ?? []; + $total = (int)($data['response']['total'] ?? count($details)); + + foreach ($details as $item) { + $id = isset($item['publishedfileid']) ? preg_replace('/[^0-9]/', '', (string)$item['publishedfileid']) : ''; + if ($id === '') { + continue; + } + $title = isset($item['title']) ? trim((string)$item['title']) : ''; + if ($title === '') { + $title = '@' . $id; + } + $payload['results'][] = [ + 'id' => $id, + 'label' => $title, + 'author' => isset($item['creator']) ? (string)$item['creator'] : '', + 'preview_url' => isset($item['preview_url']) ? (string)$item['preview_url'] : '', + 'time_updated' => isset($item['time_updated']) ? (int)$item['time_updated'] : null, + 'subscriptions' => isset($item['subscriptions']) ? (int)$item['subscriptions'] : 0, + 'source' => 'search', + ]; + } + + $payload['pagination']['total'] = $total; + $payload['pagination']['has_more'] = ($payload['pagination']['page'] * $payload['pagination']['per_page']) < $total; + } } - $data = json_decode((string)$response['body'], true); - if (!is_array($data) || !isset($data['response'])) { - $this->logApiFailure(sprintf('Steam API search returned invalid payload (app=%s query="%s")', $appId, $query)); - $payload['error'] = sprintf('Steam API returned invalid data. URL: %s Params: %s', $response['url'], http_build_query($response['fields'], '', '&')); - return $payload; + if ($this->shouldAttemptScraper($query, $payload)) { + $scrapeResult = $this->scrapeWorkshopItems($appId, $query, $payload['pagination']['per_page'], $payload['pagination']['page']); + $scrapeContext = $scrapeResult['request']; + $requestAttempts[] = $scrapeContext; + if ($scrapeResult['success'] && !empty($scrapeResult['results'])) { + $payload['results'] = $scrapeResult['results']; + $payload['pagination']['total'] = $scrapeResult['total']; + $payload['pagination']['has_more'] = $scrapeResult['has_more']; + $payload['error'] = null; + $payload['request'] = $scrapeContext; + } elseif (!$scrapeResult['success']) { + $fallbackError = $scrapeResult['error'] ?? 'Steam Workshop scrape failed.'; + if ($payload['error'] === null) { + $payload['error'] = $fallbackError; + } else { + $payload['error'] .= ' Scraper fallback failed: ' . $fallbackError; + } + } } - $details = $data['response']['publishedfiledetails'] ?? []; - $total = (int)($data['response']['total'] ?? count($details)); - foreach ($details as $item) { - $id = isset($item['publishedfileid']) ? preg_replace('/[^0-9]/', '', (string)$item['publishedfileid']) : ''; - if ($id === '') { - continue; - } - $title = isset($item['title']) ? trim((string)$item['title']) : ''; - if ($title === '') { - $title = '@' . $id; - } - $payload['results'][] = [ - 'id' => $id, - 'label' => $title, - 'author' => isset($item['creator']) ? (string)$item['creator'] : '', - 'preview_url' => isset($item['preview_url']) ? (string)$item['preview_url'] : '', - 'time_updated' => isset($item['time_updated']) ? (int)$item['time_updated'] : null, - 'subscriptions' => isset($item['subscriptions']) ? (int)$item['subscriptions'] : 0, - 'source' => 'search', + $payload['request']['attempts'] = $requestAttempts; + + return $payload; + } + + private function shouldAttemptScraper(string $query, array $payload): bool + { + if (ctype_digit($query)) { + return false; + } + if (!$this->isScraperAvailable()) { + return false; + } + + $hasResults = !empty($payload['results']); + return $payload['error'] !== null || $hasResults === false; + } + + private function scrapeWorkshopItems(string $appId, string $query, int $perPage, int $page): array + { + $params = [ + 'appid' => $appId, + 'searchtext' => $query, + 'page' => $page, + 'limit' => $perPage, + ]; + $request = [ + 'backend' => 'scraper', + 'url' => 'https://steamcommunity.com/workshop/browse/', + 'params' => $params, + 'http_code' => null, + 'transport_error' => null, + 'command' => null, + 'exit_code' => null, + 'stderr' => null, + ]; + + if (!$this->isScraperAvailable()) { + $request['summary'] = $this->formatRequestSummary($request); + return [ + 'success' => false, + 'error' => 'Workshop scraper helper is not available.', + 'results' => [], + 'total' => 0, + 'has_more' => false, + 'request' => $request, ]; } - $payload['pagination']['total'] = $total; - $payload['pagination']['has_more'] = ($payload['pagination']['page'] * $payload['pagination']['per_page']) < $total; + $queryArg = $this->sanitizeScraperQuery($query); + $command = sprintf( + 'bash %s %s %s %s %s', + escapeshellarg($this->scraperScript), + escapeshellarg($appId), + escapeshellarg($queryArg), + escapeshellarg((string)$page), + escapeshellarg((string)$perPage) + ); + $request['command'] = $command; - return $payload; + $descriptorSpec = [ + 0 => ['pipe', 'r'], + 1 => ['pipe', 'w'], + 2 => ['pipe', 'w'], + ]; + $process = proc_open($command, $descriptorSpec, $pipes); + if (!is_resource($process)) { + $request['summary'] = $this->formatRequestSummary($request); + return [ + 'success' => false, + 'error' => 'Unable to start Workshop scraper helper.', + 'results' => [], + 'total' => 0, + 'has_more' => false, + 'request' => $request, + ]; + } + + fclose($pipes[0]); + $stdout = stream_get_contents($pipes[1]) ?: ''; + $stderr = stream_get_contents($pipes[2]) ?: ''; + fclose($pipes[1]); + fclose($pipes[2]); + $exitCode = (int)proc_close($process); + $request['exit_code'] = $exitCode; + $request['stderr'] = trim($stderr); + + $results = []; + $lines = preg_split('/\r\n|\r|\n/', trim($stdout)); + if (is_array($lines)) { + foreach ($lines as $line) { + if ($line === '') { + continue; + } + $parts = explode("\t", $line, 2); + $id = preg_replace('/[^0-9]/', '', $parts[0] ?? ''); + if ($id === '') { + continue; + } + $title = isset($parts[1]) ? trim($parts[1]) : ''; + if ($title === '') { + $title = '@' . $id; + } + $results[] = [ + 'id' => $id, + 'label' => $title, + 'author' => '', + 'preview_url' => '', + 'time_updated' => null, + 'subscriptions' => 0, + 'source' => 'scraper', + ]; + if (count($results) >= $perPage) { + break; + } + } + } + + $request['summary'] = $this->formatRequestSummary($request); + $success = ($exitCode === 0); + $errorMessage = $success ? null : ($request['stderr'] !== '' ? $request['stderr'] : 'Scraper exited with code ' . $exitCode); + + return [ + 'success' => $success, + 'error' => $errorMessage, + 'results' => $results, + 'total' => count($results), + 'has_more' => count($results) >= $perPage, + 'request' => $request, + ]; + } + + private function isScraperAvailable(): bool + { + return function_exists('proc_open') && is_file($this->scraperScript) && is_readable($this->scraperScript); + } + + private function sanitizeScraperQuery(string $query): string + { + $query = preg_replace('/[\r\n\t]+/', ' ', $query); + $query = trim((string)$query); + if (function_exists('mb_substr')) { + return mb_substr($query, 0, 200); + } + return substr($query, 0, 200); } private function sanitizeInterval(?int $minutes): int @@ -1095,11 +1271,22 @@ class SteamWorkshopService private function formatRequestSummary(array $request): string { - $url = (string)($request['url'] ?? ''); + $backend = strtolower((string)($request['backend'] ?? 'api')); $params = http_build_query($request['params'] ?? [], '', '&'); + if ($backend === 'scraper') { + $command = (string)($request['command'] ?? ''); + $exit = (string)($request['exit_code'] ?? ''); + $stderr = trim((string)($request['stderr'] ?? 'none')); + if ($stderr === '') { + $stderr = 'none'; + } + return sprintf('SCRAPER => COMMAND => %s | PARAMS => %s | EXIT => %s | STDERR => %s', $command, $params, $exit, $stderr); + } + + $url = (string)($request['url'] ?? ''); $http = (string)($request['http_code'] ?? ''); $error = (string)($request['transport_error'] ?? 'none'); - return sprintf('REQUEST => %s | PARAMS => %s | HTTP => %s | TRANSPORT => %s', $url, $params, $http, $error); + return sprintf('API REQUEST => %s | PARAMS => %s | HTTP => %s | TRANSPORT => %s', $url, $params, $http, $error); } private function runSteamCmdDownload(string $steamCmdPath, string $appId, string $workshopId, string $username, ?string $password): array