last chance lol
This commit is contained in:
parent
9ac28031a4
commit
c8b3295654
2 changed files with 218 additions and 2 deletions
|
|
@ -2,6 +2,7 @@
|
|||
|
||||
## 2026-01-31
|
||||
- Added a hardened Workshop scraping helper (the same HTML workflow we validated manually) and wired it into the Steam Workshop service as a fallback whenever the official API errors out or returns empty data.
|
||||
- Added a native PHP HTTP scraper fallback (auto-selected when bash/proc_open is unavailable, e.g., on Windows XAMPP installs) so the Game Monitor search stops showing “Unable to contact the Steam Workshop” when the API dies but the HTML workflow still works.
|
||||
- Surface scraper vs API attempts (including shell commands, exit codes, and stderr) in the JSON response so the Game Monitor can show exactly which backend produced the results.
|
||||
- Bundled the reusable `workshop_scrape.sh` bash helper inside the module so future diagnostics can be run server-side without re-copying the ad-hoc script.
|
||||
|
||||
|
|
|
|||
|
|
@ -742,7 +742,10 @@ class SteamWorkshopService
|
|||
if ($this->shouldAttemptScraper($query, $payload)) {
|
||||
$scrapeResult = $this->scrapeWorkshopItems($appId, $query, $payload['pagination']['per_page'], $payload['pagination']['page']);
|
||||
$scrapeContext = $scrapeResult['request'];
|
||||
$requestAttempts[] = $scrapeContext;
|
||||
$attemptContexts = $scrapeResult['attempts'] ?? [$scrapeContext];
|
||||
foreach ($attemptContexts as $attemptContext) {
|
||||
$requestAttempts[] = $attemptContext;
|
||||
}
|
||||
if ($scrapeResult['success'] && !empty($scrapeResult['results'])) {
|
||||
$payload['results'] = $scrapeResult['results'];
|
||||
$payload['pagination']['total'] = $scrapeResult['total'];
|
||||
|
|
@ -769,7 +772,7 @@ class SteamWorkshopService
|
|||
if (ctype_digit($query)) {
|
||||
return false;
|
||||
}
|
||||
if (!$this->isScraperAvailable()) {
|
||||
if (!$this->hasAnyScraperTransport()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
@ -777,7 +780,46 @@ class SteamWorkshopService
|
|||
return $payload['error'] !== null || $hasResults === false;
|
||||
}
|
||||
|
||||
private function hasAnyScraperTransport(): bool
|
||||
{
|
||||
if ($this->isScraperAvailable()) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return function_exists('curl_init');
|
||||
}
|
||||
|
||||
private function scrapeWorkshopItems(string $appId, string $query, int $perPage, int $page): array
|
||||
{
|
||||
$attempts = [];
|
||||
$shellError = null;
|
||||
|
||||
if ($this->isScraperAvailable()) {
|
||||
$shellResult = $this->runShellScraper($appId, $query, $perPage, $page);
|
||||
$attempts[] = $shellResult['request'];
|
||||
if ($shellResult['success'] && !empty($shellResult['results'])) {
|
||||
$shellResult['attempts'] = $attempts;
|
||||
return $shellResult;
|
||||
}
|
||||
$shellError = $shellResult['error'] ?? null;
|
||||
} else {
|
||||
$shellError = $this->isWindowsPlatform()
|
||||
? 'Shell scraper helper is disabled on Windows hosts.'
|
||||
: 'Workshop scraper helper script is missing or unreadable.';
|
||||
$attempts[] = $this->buildShellUnavailableContext($shellError);
|
||||
}
|
||||
|
||||
$httpResult = $this->scrapeWorkshopItemsHttp($appId, $query, $perPage, $page);
|
||||
$attempts[] = $httpResult['request'];
|
||||
if ($shellError !== null && !$httpResult['success']) {
|
||||
$httpResult['error'] = trim(($httpResult['error'] ?? '') . ' | Shell: ' . $shellError);
|
||||
}
|
||||
$httpResult['attempts'] = $attempts;
|
||||
|
||||
return $httpResult;
|
||||
}
|
||||
|
||||
private function runShellScraper(string $appId, string $query, int $perPage, int $page): array
|
||||
{
|
||||
$params = [
|
||||
'appid' => $appId,
|
||||
|
|
@ -891,11 +933,122 @@ class SteamWorkshopService
|
|||
];
|
||||
}
|
||||
|
||||
private function scrapeWorkshopItemsHttp(string $appId, string $query, int $perPage, int $page): array
|
||||
{
|
||||
$perPage = max(1, $perPage);
|
||||
$params = [
|
||||
'appid' => $appId,
|
||||
'browsesort' => 'textsearch',
|
||||
'section' => 'readytouseitems',
|
||||
'searchtext' => $this->sanitizeScraperQuery($query),
|
||||
'p' => $page,
|
||||
];
|
||||
$request = [
|
||||
'backend' => 'scraper_http',
|
||||
'url' => 'https://steamcommunity.com/workshop/browse/',
|
||||
'params' => $params,
|
||||
'http_code' => null,
|
||||
'transport_error' => null,
|
||||
];
|
||||
|
||||
$response = $this->httpGet($request['url'], $params, $this->getScraperUserAgent());
|
||||
$request['url'] = $response['url'] ?? $request['url'];
|
||||
$request['http_code'] = $response['http_code'];
|
||||
$request['transport_error'] = $response['error'];
|
||||
|
||||
if ($response['error'] !== null || $response['http_code'] < 200 || $response['http_code'] >= 300 || $response['body'] === null) {
|
||||
$request['summary'] = $this->formatRequestSummary($request);
|
||||
$reason = $response['error'] !== null ? $response['error'] : 'HTTP ' . $response['http_code'];
|
||||
return [
|
||||
'success' => false,
|
||||
'error' => 'Steam Community browse request failed: ' . $reason,
|
||||
'results' => [],
|
||||
'total' => 0,
|
||||
'has_more' => false,
|
||||
'request' => $request,
|
||||
];
|
||||
}
|
||||
|
||||
$html = (string)$response['body'];
|
||||
$matches = [];
|
||||
preg_match_all('/sharedfiles\/filedetails\/\?id=([0-9]+)/i', $html, $matches);
|
||||
$rawIds = $matches[1] ?? [];
|
||||
$uniqueIds = [];
|
||||
foreach ($rawIds as $rawId) {
|
||||
$id = preg_replace('/[^0-9]/', '', (string)$rawId);
|
||||
if ($id === '' || isset($uniqueIds[$id])) {
|
||||
continue;
|
||||
}
|
||||
$uniqueIds[$id] = true;
|
||||
}
|
||||
$orderedIds = array_keys($uniqueIds);
|
||||
$hasMore = count($orderedIds) > $perPage;
|
||||
$sliceIds = array_slice($orderedIds, 0, $perPage);
|
||||
|
||||
$results = [];
|
||||
foreach ($sliceIds as $id) {
|
||||
$detailResponse = $this->httpGet('https://steamcommunity.com/sharedfiles/filedetails/', ['id' => $id], $this->getScraperUserAgent());
|
||||
$title = '';
|
||||
if ($detailResponse['error'] === null && $detailResponse['http_code'] >= 200 && $detailResponse['http_code'] < 300 && $detailResponse['body'] !== null) {
|
||||
$title = $this->parseWorkshopTitle((string)$detailResponse['body']);
|
||||
}
|
||||
if ($title === '') {
|
||||
$title = '@' . $id;
|
||||
}
|
||||
|
||||
$results[] = [
|
||||
'id' => $id,
|
||||
'label' => $title,
|
||||
'author' => '',
|
||||
'preview_url' => '',
|
||||
'time_updated' => null,
|
||||
'subscriptions' => 0,
|
||||
'source' => 'scraper_http',
|
||||
];
|
||||
}
|
||||
|
||||
$request['summary'] = $this->formatRequestSummary($request);
|
||||
|
||||
return [
|
||||
'success' => true,
|
||||
'error' => null,
|
||||
'results' => $results,
|
||||
'total' => count($results),
|
||||
'has_more' => $hasMore,
|
||||
'request' => $request,
|
||||
];
|
||||
}
|
||||
|
||||
private function buildShellUnavailableContext(string $message): array
|
||||
{
|
||||
$context = [
|
||||
'backend' => 'scraper',
|
||||
'url' => 'https://steamcommunity.com/workshop/browse/',
|
||||
'params' => [],
|
||||
'http_code' => null,
|
||||
'transport_error' => $message,
|
||||
'command' => '[unavailable]',
|
||||
'exit_code' => null,
|
||||
'stderr' => $message,
|
||||
];
|
||||
$context['summary'] = $this->formatRequestSummary($context);
|
||||
return $context;
|
||||
}
|
||||
|
||||
private function isScraperAvailable(): bool
|
||||
{
|
||||
if ($this->isWindowsPlatform()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return function_exists('proc_open') && is_file($this->scraperScript) && is_readable($this->scraperScript);
|
||||
}
|
||||
|
||||
private function isWindowsPlatform(): bool
|
||||
{
|
||||
return DIRECTORY_SEPARATOR === '\\';
|
||||
}
|
||||
|
||||
private function sanitizeScraperQuery(string $query): string
|
||||
{
|
||||
$query = preg_replace('/[\r\n\t]+/', ' ', $query);
|
||||
|
|
@ -906,6 +1059,27 @@ class SteamWorkshopService
|
|||
return substr($query, 0, 200);
|
||||
}
|
||||
|
||||
private function getScraperUserAgent(): string
|
||||
{
|
||||
return 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0 Safari/537.36';
|
||||
}
|
||||
|
||||
private function parseWorkshopTitle(string $html): string
|
||||
{
|
||||
if (preg_match('/<title>(.*?)<\/title>/is', $html, $matches)) {
|
||||
$title = html_entity_decode(trim($matches[1]), ENT_QUOTES, 'UTF-8');
|
||||
if ($title !== '') {
|
||||
$clean = preg_replace('/ - Steam (Community|Workshop).*$/i', '', $title);
|
||||
if (is_string($clean)) {
|
||||
$clean = trim($clean);
|
||||
}
|
||||
return $clean !== '' ? $clean : $title;
|
||||
}
|
||||
}
|
||||
|
||||
return '';
|
||||
}
|
||||
|
||||
private function sanitizeInterval(?int $minutes): int
|
||||
{
|
||||
if ($minutes === null || $minutes <= 0) {
|
||||
|
|
@ -1269,6 +1443,42 @@ class SteamWorkshopService
|
|||
return ['body' => $error === null ? $body : null, 'http_code' => $status, 'error' => $error, 'url' => $url, 'fields' => $fields];
|
||||
}
|
||||
|
||||
private function httpGet(string $url, array $params = [], ?string $userAgent = null): array
|
||||
{
|
||||
if (!function_exists('curl_init')) {
|
||||
return ['body' => null, 'http_code' => 0, 'error' => 'PHP cURL extension is required', 'url' => $url, 'params' => $params];
|
||||
}
|
||||
|
||||
$queryString = http_build_query($params, '', '&', PHP_QUERY_RFC3986);
|
||||
$fullUrl = $queryString === '' ? $url : $url . (strpos($url, '?') === false ? '?' : '&') . $queryString;
|
||||
$ch = curl_init($fullUrl);
|
||||
if ($ch === false) {
|
||||
return ['body' => null, 'http_code' => 0, 'error' => 'Unable to initialize cURL', 'url' => $fullUrl, 'params' => $params];
|
||||
}
|
||||
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
CURLOPT_TIMEOUT => 20,
|
||||
CURLOPT_ENCODING => '',
|
||||
CURLOPT_USERAGENT => $userAgent ?? 'GSP-Workshop/1.0 (+https://github.com/GameServerPanel/GSP)',
|
||||
CURLOPT_HTTPHEADER => ['Accept: text/html,application/xhtml+xml;q=0.9,*/*;q=0.8'],
|
||||
]);
|
||||
|
||||
$body = curl_exec($ch);
|
||||
$error = curl_errno($ch) ? curl_error($ch) : null;
|
||||
$status = (int)curl_getinfo($ch, CURLINFO_HTTP_CODE);
|
||||
curl_close($ch);
|
||||
|
||||
return [
|
||||
'body' => $error === null ? $body : null,
|
||||
'http_code' => $status,
|
||||
'error' => $error,
|
||||
'url' => $fullUrl,
|
||||
'params' => $params,
|
||||
];
|
||||
}
|
||||
|
||||
private function formatRequestSummary(array $request): string
|
||||
{
|
||||
$backend = strtolower((string)($request['backend'] ?? 'api'));
|
||||
|
|
@ -1281,6 +1491,11 @@ class SteamWorkshopService
|
|||
$stderr = 'none';
|
||||
}
|
||||
return sprintf('SCRAPER => COMMAND => %s | PARAMS => %s | EXIT => %s | STDERR => %s', $command, $params, $exit, $stderr);
|
||||
} elseif ($backend === 'scraper_http') {
|
||||
$url = (string)($request['url'] ?? '');
|
||||
$http = (string)($request['http_code'] ?? '');
|
||||
$error = (string)($request['transport_error'] ?? 'none');
|
||||
return sprintf('SCRAPER_HTTP => URL => %s | PARAMS => %s | HTTP => %s | TRANSPORT => %s', $url, $params, $http, $error);
|
||||
}
|
||||
|
||||
$url = (string)($request['url'] ?? '');
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue