• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tomasnorre / crawler / 11237471329

08 Oct 2024 02:20PM UTC coverage: 68.586% (-1.3%) from 69.862%
11237471329

push

github

web-flow
ci: Update coveralls workflow (#1109)

1834 of 2674 relevant lines covered (68.59%)

3.37 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

19.61
/Classes/CrawlStrategy/SubProcessExecutionStrategy.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace AOE\Crawler\CrawlStrategy;
6

7
/*
8
 * (c) 2020 AOE GmbH <dev@aoe.com>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21

22
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
23
use AOE\Crawler\Utility\PhpBinaryUtility;
24
use Psr\Http\Message\UriInterface;
25
use Psr\Log\LoggerAwareInterface;
26
use Psr\Log\LoggerAwareTrait;
27
use TYPO3\CMS\Core\Core\Environment;
28
use TYPO3\CMS\Core\Utility\CommandUtility;
29
use TYPO3\CMS\Core\Utility\ExtensionManagementUtility;
30
use TYPO3\CMS\Core\Utility\GeneralUtility;
31

32
/**
33
 * Executes another process via shell_exec() to include cli/bootstrap.php which in turn
34
 * includes the index.php for frontend.
35
 * @internal since v12.0.0
36
 */
37
class SubProcessExecutionStrategy implements LoggerAwareInterface, CrawlStrategyInterface
38
{
39
    use LoggerAwareTrait;
40

41
    protected array $extensionSettings;
42

43
    public function __construct(?ExtensionConfigurationProvider $configurationProvider = null)
44
    {
45
        $configurationProvider ??= GeneralUtility::makeInstance(ExtensionConfigurationProvider::class);
3✔
46
        $settings = $configurationProvider->getExtensionConfiguration();
3✔
47
        $this->extensionSettings = is_array($settings) ? $settings : [];
3✔
48
    }
49

50
    /**
51
     * Fetches a URL by calling a shell script.
52
     *
53
     * @return array|bool|mixed
54
     */
55
    public function fetchUrlContents(UriInterface $url, string $crawlerId)
56
    {
57
        $url = (string) $url;
1✔
58
        $parsedUrl = parse_url($url);
1✔
59

60
        if (!isset($parsedUrl['scheme']) || !in_array($parsedUrl['scheme'], ['', 'http', 'https'], true)) {
1✔
61
            $this->logger?->debug(sprintf('Scheme does not match for url "%s"', $url), [
1✔
62
                'crawlerId' => $crawlerId,
1✔
63
            ]);
1✔
64
            return false;
1✔
65
        }
66

67
        if (!is_array($parsedUrl)) {
×
68
            return [];
×
69
        }
70

71
        $requestHeaders = $this->buildRequestHeaders($parsedUrl, $crawlerId);
×
72

73
        $commandParts = [
×
74
            ExtensionManagementUtility::extPath('crawler') . 'cli/bootstrap.php',
×
75
            $this->getFrontendBasePath(),
×
76
            $url,
×
77
            base64_encode(serialize($requestHeaders)),
×
78
        ];
×
79
        $commandParts = CommandUtility::escapeShellArguments($commandParts);
×
80
        $cmd = escapeshellcmd(PhpBinaryUtility::getPhpBinary());
×
81
        $cmd .= ' ' . implode(' ', $commandParts);
×
82

83
        $startTime = microtime(true);
×
84
        $content = $this->executeShellCommand($cmd);
×
85
        $this->logger?->info($url . ' ' . (microtime(true) - $startTime));
×
86

87
        if ($content === null) {
×
88
            return false;
×
89
        }
90

91
        return [
×
92
            'content' => $content,
×
93
        ];
×
94
    }
95

96
    private function buildRequestHeaders(array $url, string $crawlerId): array
97
    {
98
        $reqHeaders = [];
×
99
        $reqHeaders[] = 'GET ' . $url['path'] . (isset($url['query']) ? '?' . $url['query'] : '') . ' HTTP/1.0';
×
100
        $reqHeaders[] = 'Host: ' . $url['host'];
×
101
        $reqHeaders[] = 'Connection: close';
×
102
        if (isset($url['user'], $url['pass']) && $url['user'] !== '' && $url['pass'] !== '') {
×
103
            $reqHeaders[] = 'Authorization: Basic ' . base64_encode($url['user'] . ':' . $url['pass']);
×
104
        }
105
        $reqHeaders[] = 'X-T3crawler: ' . $crawlerId;
×
106
        $reqHeaders[] = 'User-Agent: TYPO3 crawler';
×
107
        return $reqHeaders;
×
108
    }
109

110
    /**
111
     * Executes a shell command and returns the outputted result.
112
     *
113
     * @param string $command Shell command to be executed
114
     * @return string|null Outputted result of the command execution
115
     */
116
    private function executeShellCommand($command)
117
    {
118
        return shell_exec($command);
×
119
    }
120

121
    /**
122
     * Gets the base path of the website frontend.
123
     * (e.g. if you call http://mydomain.com/cms/index.php in
124
     * the browser the base path is "/cms/")
125
     *
126
     * @return string Base path of the website frontend
127
     */
128
    private function getFrontendBasePath()
129
    {
130
        $frontendBasePath = '/';
×
131

132
        // Get the path from the extension settings:
133
        if (isset($this->extensionSettings['frontendBasePath']) && $this->extensionSettings['frontendBasePath']) {
×
134
            $frontendBasePath = $this->extensionSettings['frontendBasePath'];
×
135
            // If empty, try to use config.absRefPrefix:
136
        } elseif (isset($GLOBALS['TSFE']->absRefPrefix) && !empty($GLOBALS['TSFE']->absRefPrefix)) {
×
137
            $frontendBasePath = $GLOBALS['TSFE']->absRefPrefix;
×
138
            // If not in CLI mode the base path can be determined from $_SERVER environment:
139
        } elseif (!Environment::isCli()) {
×
140
            $frontendBasePath = GeneralUtility::getIndpEnv('TYPO3_SITE_PATH');
×
141
        }
142

143
        // Base path must be '/<pathSegements>/':
144
        if ($frontendBasePath !== '/') {
×
145
            $frontendBasePath = '/' . ltrim((string) $frontendBasePath, '/');
×
146
            $frontendBasePath = rtrim($frontendBasePath, '/') . '/';
×
147
        }
148

149
        return $frontendBasePath;
×
150
    }
151
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc