• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

JayBizzle / Crawler-Detect / #1224

pending completion
#1224

push

web-flow
Merge pull request #494 from sunnydavis/master

30 of 31 relevant lines covered (96.77%)

8.58 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

96.67
/src/CrawlerDetect.php
1
<?php
2

3
/*
4
 * This file is part of Crawler Detect - the web crawler detection library.
5
 *
6
 * (c) Mark Beech <m@rkbee.ch>
7
 *
8
 * This source file is subject to the MIT license that is bundled
9
 * with this source code in the file LICENSE.
10
 */
11

12
namespace Jaybizzle\CrawlerDetect;
13

14
use Jaybizzle\CrawlerDetect\Fixtures\Crawlers;
15
use Jaybizzle\CrawlerDetect\Fixtures\Exclusions;
16
use Jaybizzle\CrawlerDetect\Fixtures\Headers;
17

18
class CrawlerDetect
19
{
20
    /**
21
     * The user agent.
22
     *
23
     * @var string|null
24
     */
25
    protected $userAgent;
26

27
    /**
28
     * Headers that contain a user agent.
29
     *
30
     * @var array
31
     */
32
    protected $httpHeaders = array();
33

34
    /**
35
     * Store regex matches.
36
     *
37
     * @var array
38
     */
39
    protected $matches = array();
40

41
    /**
42
     * Crawlers object.
43
     *
44
     * @var \Jaybizzle\CrawlerDetect\Fixtures\Crawlers
45
     */
46
    protected $crawlers;
47

48
    /**
49
     * Exclusions object.
50
     *
51
     * @var \Jaybizzle\CrawlerDetect\Fixtures\Exclusions
52
     */
53
    protected $exclusions;
54

55
    /**
56
     * Headers object.
57
     *
58
     * @var \Jaybizzle\CrawlerDetect\Fixtures\Headers
59
     */
60
    protected $uaHttpHeaders;
61

62
    /**
63
     * The compiled regex string.
64
     *
65
     * @var string
66
     */
67
    protected $compiledRegex;
68

69
    /**
70
     * The compiled exclusions regex string.
71
     *
72
     * @var string
73
     */
74
    protected $compiledExclusions;
75

76
    /**
77
     * Class constructor.
78
     */
79
    public function __construct(array $headers = null, $userAgent = null)
80
    {
81
        $this->crawlers = new Crawlers();
10✔
82
        $this->exclusions = new Exclusions();
10✔
83
        $this->uaHttpHeaders = new Headers();
10✔
84

85
        $this->compiledRegex = $this->compileRegex($this->crawlers->getAll());
10✔
86
        $this->compiledExclusions = $this->compileRegex($this->exclusions->getAll());
10✔
87

88
        $this->setHttpHeaders($headers);
10✔
89
        $this->setUserAgent($userAgent);
10✔
90
    }
91

92
    /**
93
     * Compile the regex patterns into one regex string.
94
     *
95
     * @param array
96
     *
97
     * @return string
98
     */
99
    public function compileRegex($patterns)
100
    {
101
        return '('.implode('|', $patterns).')';
10✔
102
    }
103

104
    /**
105
     * Set HTTP headers.
106
     *
107
     * @param array|null $httpHeaders
108
     */
109
    public function setHttpHeaders($httpHeaders)
110
    {
111
        // Use global _SERVER if $httpHeaders aren't defined.
112
        if (! is_array($httpHeaders) || ! count($httpHeaders)) {
10✔
113
            $httpHeaders = $_SERVER;
8✔
114
        }
115

116
        // Clear existing headers.
117
        $this->httpHeaders = array();
10✔
118

119
        // Only save HTTP headers. In PHP land, that means
120
        // only _SERVER vars that start with HTTP_.
121
        foreach ($httpHeaders as $key => $value) {
10✔
122
            if (strpos($key, 'HTTP_') === 0) {
10✔
123
                $this->httpHeaders[$key] = $value;
2✔
124
            }
125
        }
126
    }
127

128
    /**
129
     * Return user agent headers.
130
     *
131
     * @return array
132
     */
133
    public function getUaHttpHeaders()
134
    {
135
        return $this->uaHttpHeaders->getAll();
9✔
136
    }
137

138
    /**
139
     * Set the user agent.
140
     *
141
     * @param string|null $userAgent
142
     */
143
    public function setUserAgent($userAgent)
144
    {
145
        if (is_null($userAgent)) {
10✔
146
            foreach ($this->getUaHttpHeaders() as $altHeader) {
9✔
147
                if (isset($this->httpHeaders[$altHeader])) {
9✔
148
                    $userAgent .= $this->httpHeaders[$altHeader].' ';
2✔
149
                }
150
            }
151
        }
152

153
        return $this->userAgent = $userAgent;
10✔
154
    }
155

156
    /**
157
     * Check user agent string against the regex.
158
     *
159
     * @param string|null $userAgent
160
     *
161
     * @return bool
162
     */
163
    public function isCrawler($userAgent = null)
164
    {
165
        $agent = trim(preg_replace(
10✔
166
            "/{$this->compiledExclusions}/i",
10✔
167
            '',
10✔
168
            $userAgent ?: $this->userAgent ?: ''
10✔
169
        ));
10✔
170

171
        if ($agent === '') {
10✔
172
            return false;
2✔
173
        }
174

175
        return (bool) preg_match("/{$this->compiledRegex}/i", $agent, $this->matches);
9✔
176
    }
177

178
    /**
179
     * Return the matches.
180
     *
181
     * @return string|null
182
     */
183
    public function getMatches()
184
    {
185
        return isset($this->matches[0]) ? $this->matches[0] : null;
4✔
186
    }
187

188

189
    /**
190
     * @return string|null
191
     */
192
    public function getUserAgent()
193
    {
194
        return $this->userAgent;
×
195
    }
196
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc