• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tomasnorre / crawler / 17427938490

03 Sep 2025 08:39AM UTC coverage: 68.597% (-0.05%) from 68.647%
17427938490

push

github

web-flow
[BUGFIX] Prevent warning when guzzle crawl requests return errors (#1167)

Resolves: #1166

8 of 13 new or added lines in 2 files covered. (61.54%)

1872 of 2729 relevant lines covered (68.6%)

3.26 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

67.44
/Classes/CrawlStrategy/GuzzleExecutionStrategy.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace AOE\Crawler\CrawlStrategy;
6

7
/*
8
 * (c) 2020 AOE GmbH <dev@aoe.com>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21

22
use GuzzleHttp\Exception\ConnectException;
23
use GuzzleHttp\Exception\RequestException;
24
use Psr\Http\Message\ResponseInterface;
25
use Psr\Http\Message\UriInterface;
26
use Psr\Log\LoggerAwareInterface;
27
use Psr\Log\LoggerAwareTrait;
28
use TYPO3\CMS\Core\Http\Client\GuzzleClientFactory;
29
use TYPO3\CMS\Core\Http\RequestFactory;
30
use TYPO3\CMS\Core\Utility\GeneralUtility;
31

32
/**
33
 * Calls Guzzle / CURL (based on TYPO3 settings) for fetching a URL.
34
 * @internal since v12.0.0
35
 */
36
class GuzzleExecutionStrategy implements LoggerAwareInterface, CrawlStrategyInterface
37
{
38
    use LoggerAwareTrait;
39

40
    /**
41
     * Sets up a CURL / Guzzle Request for fetching the request.
42
     *
43
     * @return array|false See CrawlStrategyInterface::fetchUrlContents()
44
     */
45
    public function fetchUrlContents(UriInterface $url, string $crawlerId)
46
    {
47
        $reqHeaders = $this->buildRequestHeaders($crawlerId);
3✔
48

49
        $options = [
3✔
50
            'headers' => $reqHeaders,
3✔
51
            'connect_timeout' => 5.0,
3✔
52
        ];
3✔
53
        if ($url->getUserInfo()) {
3✔
54
            $options['auth'] = explode(':', $url->getUserInfo());
×
55
        }
56
        try {
57
            $url = (string) $url;
3✔
58
            $response = $this->getResponse($url, $options);
3✔
59
            return unserialize($response->getHeaderLine('X-T3Crawler-Meta'));
2✔
60
        } catch (RequestException $e) {
1✔
61
            $response = $e->getResponse();
×
62
            $message = ($response ? $response->getStatusCode() : 0)
×
63
                . chr(32)
×
64
                . ($response ? $response->getReasonPhrase() : $e->getMessage());
×
65

66
            $this->logger->debug(
×
67
                sprintf('Error while opening "%s" - ' . $message, $url),
×
68
                [
×
69
                    'crawlerId' => $crawlerId,
×
70
                ]
×
71
            );
×
NEW
72
            return [
×
NEW
73
                'errorlog' => [$message],
×
NEW
74
            ];
×
75
        } catch (ConnectException $e) {
1✔
76
            $message = $e->getCode() . chr(32) . $e->getMessage();
1✔
77

78
            $this->logger->debug(
1✔
79
                sprintf('Error while opening "%s" - ' . $message, $url),
1✔
80
                [
1✔
81
                    'crawlerId' => $crawlerId,
1✔
82
                ]
1✔
83
            );
1✔
84
            return [
1✔
85
                'errorlog' => [$message],
1✔
86
            ];
1✔
87
        }
88
    }
89

90
    protected function getResponse(string $url, array $options): ResponseInterface
91
    {
92
        $guzzleClientFactory = GeneralUtility::makeInstance(GuzzleClientFactory::class);
2✔
93
        return GeneralUtility::makeInstance(RequestFactory::class, $guzzleClientFactory)
2✔
94
            ->request($url, 'GET', $options);
2✔
95
    }
96

97
    /**
98
     * Builds HTTP request headers.
99
     */
100
    private function buildRequestHeaders(string $crawlerId): array
101
    {
102
        return [
3✔
103
            'Connection' => 'close',
3✔
104
            'X-T3Crawler' => $crawlerId,
3✔
105
            'User-Agent' => 'TYPO3 crawler',
3✔
106
        ];
3✔
107
    }
108
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc