• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

martinille / meta-tag-extraction / 14258852548

04 Apr 2025 05:43AM UTC coverage: 95.489% (+0.1%) from 95.349%
14258852548

push

github

martinille
Add URL validation and exception handling in WebScraper; enhance tests for error cases

4 of 4 new or added lines in 1 file covered. (100.0%)

3 existing lines in 1 file now uncovered.

127 of 133 relevant lines covered (95.49%)

4.17 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.68
/src/WebScraper.php
1
<?php declare(strict_types = 1);
2

3
namespace MartinIlle\MetaTagExtraction;
4

5
use GuzzleHttp\Client;
6
use GuzzleHttp\Psr7\HttpFactory;
7
use GuzzleHttp\Psr7\Utils;
8
use Psr\Http\Client\ClientExceptionInterface;
9
use Psr\Http\Client\ClientInterface;
10
use Psr\Http\Message\RequestFactoryInterface;
11
use Psr\Http\Message\RequestInterface;
12
use Psr\Http\Message\ResponseInterface;
13
use Psr\SimpleCache\CacheInterface;
14
use Psr\SimpleCache\InvalidArgumentException;
15

16
class WebScraper
17
{
18
    private const USER_AGENT = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3';
19
    private ?ClientInterface $httpClient = null;
20
    private ?RequestFactoryInterface $requestFactory = null;
21
    private ?CacheInterface $cache = null;
22
    private int $cacheTtl = 60;
23

24
    /**
25
     * Sets the cache instance and TTL.
26
     * Cache object must implement the `Psr\SimpleCache\CacheInterface`.
27
     * Use null to disable caching.
28
     * @param  null|CacheInterface  $cache
29
     * @param  int  $ttl
30
     * @return void
31
     * @throws \InvalidArgumentException
32
     */
33
    public function setCache(?CacheInterface $cache, int $ttl = 60): void
3✔
34
    {
35
        if ($this->cache !== null && !($this->cache instanceof CacheInterface)) {
3✔
UNCOV
36
            throw new \InvalidArgumentException('Cache must implement Psr\SimpleCache\CacheInterface');
×
37
        }
38
        if ($ttl <= 0) {
3✔
39
            throw new \InvalidArgumentException('Cache TTL must be greater than 0');
1✔
40
        }
41
        $this->cache = $cache;
2✔
42
        $this->cacheTtl = $ttl;
2✔
43
    }
44

45
    /**
46
     * Sets the request factory instance.
47
     * @param RequestFactoryInterface $requestFactory
48
     */
49
    public function setRequestFactory(RequestFactoryInterface $requestFactory): void
2✔
50
    {
51
        $this->requestFactory = $requestFactory;
2✔
52
    }
53

54

55
    /**
56
     * Sets the HTTP client instance.
57
     * Http client object must implement the `Psr\Http\Client\ClientInterface`.
58
     * For example, you can use `GuzzleHttp\Client` or `Symfony\Component\HttpClient\HttpClient`.
59
     * @param ClientInterface $httpClient
60
     */
61
    public function setHttpClient(ClientInterface $httpClient): void
2✔
62
    {
63
        $this->httpClient = $httpClient;
2✔
64
    }
65

66
    /**
67
     * Fetches the content of a given URL.
68
     * @throws ClientExceptionInterface|InvalidArgumentException
69
     */
70
    public function fetch(string $url): ResponseInterface
4✔
71
    {
72
        if (empty($url)) {
4✔
73
            throw new \InvalidArgumentException('URL cannot be empty');
1✔
74
        }
75

76
        // Validate the URL
77
        if (!filter_var($url, FILTER_VALIDATE_URL)) {
3✔
78
            throw new \InvalidArgumentException('Invalid URL provided');
1✔
79
        }
80

81
        // Create a cache key based on the URL
82
        $cacheKey = $this->getCacheKey($url);
2✔
83

84
        // Try to get the response from cache
85
        if ($this->cache !== null) {
2✔
86
            $cachedResponse = $this->cache->get($cacheKey);
2✔
87
            if (is_string($cachedResponse)) {
2✔
88
                return (new HttpFactory())->createResponse(200)
1✔
89
                    ->withBody(Utils::streamFor($cachedResponse));
1✔
90
            }
91
        }
92

93
        // Create a request object (PSR-7)
94
        $request = $this->prepareRequest($url);
1✔
95

96
        // Send the request and get the response using the HTTP client
97
        $response = $this->httpClient->sendRequest($request);
1✔
98
        $response->getBody()->getContents();
1✔
99

100
        // Get the response body
101
        $body = (string)$response->getBody();
1✔
102

103
        // Save the response to cache
104
        if ($this->cache !== null) {
1✔
105
            $this->cache->set($cacheKey, $body, $this->cacheTtl);
1✔
106
        }
107

108
        return (new HttpFactory())->createResponse($response->getStatusCode())
1✔
109
            ->withBody(Utils::streamFor($body));
1✔
110
    }
111

112
    private function prepareRequest(string $url): RequestInterface
1✔
113
    {
114
        // Initialize the HTTP client and request factory
115
        if ($this->httpClient === null) {
1✔
UNCOV
116
            $this->setHttpClient(new Client());
×
117
        }
118
        if ($this->requestFactory === null) {
1✔
UNCOV
119
            $this->setRequestFactory(new HttpFactory());
×
120
        }
121

122
        // Create a request object (PSR-7)
123
        $request = $this->requestFactory->createRequest('GET', $url);
1✔
124
        $request = $request->withHeader('User-Agent', self::USER_AGENT);
1✔
125
        $request = $request->withHeader('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8');
1✔
126

127
        return $request;
1✔
128
    }
129

130
    private function getCacheKey(string $url): string
3✔
131
    {
132
        return sprintf('%s_%s', __METHOD__, substr(md5($url), 0, 8));
3✔
133
    }
134
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc