• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tomasnorre / crawler / 13141805349

04 Feb 2025 05:58PM UTC coverage: 69.194% (+0.02%) from 69.171%
13141805349

Pull #1126

github

web-flow
Merge 3835c8eee into f4b5c8aee
Pull Request #1126: [BUGFIX] Ensure that correct typo3 binary is returned in non-composer installations

2 of 2 new or added lines in 1 file covered. (100.0%)

4 existing lines in 1 file now uncovered.

1871 of 2704 relevant lines covered (69.19%)

3.29 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

90.08
/Classes/Command/BuildQueueCommand.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace AOE\Crawler\Command;
6

7
/*
8
 * (c) 2021 Tomas Norre Mikkelsen <tomasnorre@gmail.com>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21

22
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
23
use AOE\Crawler\Controller\CrawlerController;
24
use AOE\Crawler\Converter\JsonCompatibilityConverter;
25
use AOE\Crawler\Domain\Model\Reason;
26
use AOE\Crawler\Domain\Repository\QueueRepository;
27
use AOE\Crawler\Event\InvokeQueueChangeEvent;
28
use AOE\Crawler\Utility\MessageUtility;
29
use AOE\Crawler\Value\QueueRow;
30
use Symfony\Component\Console\Command\Command;
31
use Symfony\Component\Console\Helper\ProgressBar;
32
use Symfony\Component\Console\Input\InputArgument;
33
use Symfony\Component\Console\Input\InputInterface;
34
use Symfony\Component\Console\Input\InputOption;
35
use Symfony\Component\Console\Output\OutputInterface;
36
use TYPO3\CMS\Core\Domain\Repository\PageRepository;
37
use TYPO3\CMS\Core\EventDispatcher\EventDispatcher;
38
use TYPO3\CMS\Core\Utility\GeneralUtility;
39
use TYPO3\CMS\Core\Utility\MathUtility;
40

41
/**
42
 * @internal since v12.0.0
43
 */
44
class BuildQueueCommand extends Command
45
{
46
    protected function configure(): void
47
    {
48
        $this->setDescription('Create entries in the queue that can be processed at once');
6✔
49

50
        $this->setHelp(
6✔
51
            'Try "typo3 help crawler:buildQueue" to see your options' . chr(10) . chr(10) .
6✔
52
            'Works as a CLI interface to some functionality from the Web > Info > Site Crawler module;
6✔
53
It can put entries in the queue from command line options, return the list of URLs and even execute
54
all entries right away without having to queue them up - this can be useful for immediate re-cache,
55
re-indexing or static publishing from command line.' . chr(10) . chr(10) .
6✔
56
            '
6✔
57
            Examples:
58
              --- Re-cache pages from page 7 and two levels down, executed immediately
59
              $ typo3 crawler:buildQueue 7 defaultConfiguration --depth 2 --mode exec
60

61
              --- Put entries for re-caching pages from page 7 into queue, 4 every minute.
62
              $ typo3 crawler:buildQueue 7 defaultConfiguration --depth 0 --mode queue --number 4
63
            '
6✔
64
        );
6✔
65

66
        $this->addArgument('page', InputArgument::REQUIRED, 'The page from where the queue building should start');
6✔
67

68
        $this->addArgument('conf', InputArgument::REQUIRED, 'A comma separated list of crawler configurations');
6✔
69

70
        $this->addOption(
6✔
71
            'depth',
6✔
72
            'd',
6✔
73
            InputOption::VALUE_OPTIONAL,
6✔
74
            'Tree depth, 0-99\', "How many levels under the \'page_id\' to include.',
6✔
75
            '0'
6✔
76
        );
6✔
77

78
        $this->addOption(
6✔
79
            'mode',
6✔
80
            'm',
6✔
81
            InputOption::VALUE_OPTIONAL,
6✔
82
            'Specifies output modes url : Will list URLs which wget could use as input. queue: Will put entries in queue table. exec: Will execute all entries right away!'
6✔
83
        );
6✔
84

85
        $this->addOption(
6✔
86
            'number',
6✔
87
            '',
6✔
88
            InputOption::VALUE_OPTIONAL,
6✔
89
            'Specifies how many items are put in the queue per minute. Only valid for output mode "queue"',
6✔
90
            '0'
6✔
91
        );
6✔
92
    }
93

94
    /**
95
     * Crawler Command - Submitting URLs to be crawled.
96
     *
97
     * Works as a CLI interface to some functionality from the Web > Info > Site Crawler module;
98
     * It can put entries in the queue from command line options, return the list of URLs and even execute
99
     * all entries right away without having to queue them up - this can be useful for immediate re-cache,
100
     * re-indexing or static publishing from command line.
101
     *
102
     * Examples:
103
     *
104
     * --- Re-cache pages from page 7 and two levels down, executed immediately
105
     * $ typo3 crawler:buildQueue 7 defaultConfiguration --depth 2 --mode exec
106
     *
107
     *
108
     * --- Put entries for re-caching pages from page 7 into queue, 4 every minute.
109
     * $ typo3 crawler:buildQueue 7 defaultConfiguration --depth 0 --mode queue --number 4
110
     */
111
    protected function execute(InputInterface $input, OutputInterface $output): int
112
    {
113
        /** @var JsonCompatibilityConverter $jsonCompatibilityConverter */
114
        $jsonCompatibilityConverter = GeneralUtility::makeInstance(JsonCompatibilityConverter::class);
6✔
115
        $mode = $input->getOption('mode') ?? 'queue';
6✔
116

117
        $extensionSettings = GeneralUtility::makeInstance(
6✔
118
            ExtensionConfigurationProvider::class
6✔
119
        )->getExtensionConfiguration();
6✔
120
        $eventDispatcher = GeneralUtility::makeInstance(EventDispatcher::class);
6✔
121

122
        /** @var CrawlerController $crawlerController */
123
        $crawlerController = GeneralUtility::makeInstance(CrawlerController::class);
6✔
124
        /** @var QueueRepository $queueRepository */
125
        $queueRepository = GeneralUtility::makeInstance(QueueRepository::class);
6✔
126
        $pageRepository = GeneralUtility::makeInstance(PageRepository::class);
6✔
127

128
        if ($mode === 'exec') {
6✔
129
            $crawlerController->registerQueueEntriesInternallyOnly = true;
1✔
130
        }
131

132
        $pageId = MathUtility::forceIntegerInRange((int) $input->getArgument('page'), 0);
6✔
133
        if ($pageId === 0 || empty($pageRepository->getPage($pageId))) {
6✔
134
            $message = "Page {$pageId} is not a valid page, please check you root page id and try again.";
1✔
135
            MessageUtility::addErrorMessage($message);
1✔
136
            $output->writeln("<info>{$message}</info>");
1✔
137
            return Command::FAILURE;
1✔
138
        }
139

140
        $configurationKeys = $this->getConfigurationKeys((string) $input->getArgument('conf'));
5✔
141

142
        if ($mode === 'queue' || $mode === 'exec') {
5✔
143
            $reason = new Reason();
4✔
144
            $reason->setReason(Reason::REASON_CLI_SUBMIT);
4✔
145
            $reason->setDetailText('The cli script of the crawler added to the queue');
4✔
146
            $eventDispatcher->dispatch(new InvokeQueueChangeEvent($reason));
4✔
147
        }
148

149
        if ($extensionSettings['cleanUpOldQueueEntries']) {
5✔
150
            $queueRepository->cleanUpOldQueueEntries();
5✔
151
        }
152

153
        $crawlerController->setID = GeneralUtility::md5int(microtime());
5✔
154
        $queueRows = $crawlerController->getPageTreeAndUrls(
5✔
155
            $pageId,
5✔
156
            MathUtility::forceIntegerInRange((int) $input->getOption('depth'), 0, 99),
5✔
157
            $crawlerController->getCurrentTime(),
5✔
158
            MathUtility::forceIntegerInRange((int) $input->getOption('number') ?: 30, 1, 1000),
5✔
159
            $mode === 'queue' || $mode === 'exec',
5✔
160
            $mode === 'url',
5✔
161
            [],
5✔
162
            $configurationKeys
5✔
163
        );
5✔
164

165

166
        // Consider a swith/match statement here, and extract the code in between.
167
        if ($mode === 'url') {
5✔
168
            $output->writeln('<info>' . implode(PHP_EOL, $crawlerController->downloadUrls) . PHP_EOL . '</info>');
1✔
169
        } elseif ($mode === 'exec') {
4✔
170
            $progressBar = new ProgressBar($output);
1✔
171
            $output->writeln('<info>Executing ' . count($crawlerController->urlList) . ' requests right away:</info>');
1✔
172
            $this->outputUrls($queueRows, $output);
1✔
173
            $output->writeln('<info>Processing</info>' . PHP_EOL);
1✔
174

175
            foreach ($progressBar->iterate($crawlerController->queueEntries) as $queueRec) {
1✔
176
                $p = $jsonCompatibilityConverter->convert($queueRec['parameters']);
1✔
177
                if (is_bool($p)) {
1✔
UNCOV
178
                    continue;
×
179
                }
180

181
                $progressBar->clear();
1✔
182
                if (empty($p['procInstructions'][0])) {
1✔
183
                    $procInstructionsString = '';
1✔
184
                } else {
UNCOV
185
                    $procInstructionsString = ' (' . implode(',', $p['procInstructions']) . ')';
×
186
                }
187
                $output->writeln('<info>' . $p['url'] . $procInstructionsString . ' => ' . '</info>');
1✔
188
                $progressBar->display();
1✔
189

190
                $result = $crawlerController->readUrlFromArray($queueRec);
1✔
191

192
                $resultContent = $result['content'] ?? '';
1✔
193
                $requestResult = $jsonCompatibilityConverter->convert($resultContent);
1✔
194

195
                $progressBar->clear();
1✔
196
                if (is_array($requestResult)) {
1✔
197
                    $resLog = array_key_exists('log', $requestResult)
×
198
                    && is_array($requestResult['log']) ? chr(9) . chr(9) .
×
199
                        implode(PHP_EOL . chr(9) . chr(9), $requestResult['log']) : '';
×
UNCOV
200
                    $output->writeln('<info>OK: ' . $resLog . '</info>' . PHP_EOL);
×
201
                } else {
202
                    $output->writeln(
1✔
203
                        '<error>Error checking Crawler Result:  ' . substr(
1✔
204
                            (string) preg_replace('/\s+/', ' ', strip_tags((string) $resultContent)),
1✔
205
                            0,
1✔
206
                            30000
1✔
207
                        ) . '...' . PHP_EOL . '</error>' . PHP_EOL
1✔
208
                    );
1✔
209
                }
210
                $progressBar->display();
1✔
211
            }
212
            $output->writeln('');
1✔
213
        } elseif ($mode === 'queue') {
3✔
214
            $output->writeln(
3✔
215
                '<info>Putting ' . count($crawlerController->urlList) . ' entries in queue:</info>' . PHP_EOL
3✔
216
            );
3✔
217
            $this->outputUrls($queueRows, $output);
3✔
218
        } else {
219
            $output->writeln(
×
220
                '<info>' . count(
×
221
                    $crawlerController->urlList
×
222
                ) . ' entries found for processing. (Use "mode" to decide action):</info>' . PHP_EOL
×
223
            );
×
UNCOV
224
            $this->outputUrls($queueRows, $output);
×
225
        }
226

227
        return Command::SUCCESS;
5✔
228
    }
229

230
    /**
231
     * Obtains configuration keys from the CLI arguments
232
     */
233
    private function getConfigurationKeys(string $conf): array
234
    {
235
        $parameter = trim($conf);
5✔
236
        return $parameter !== '' ? GeneralUtility::trimExplode(',', $parameter) : [];
5✔
237
    }
238

239
    private function outputUrls(array $queueRows, OutputInterface $output): void
240
    {
241
        /** @var QueueRow $row */
242
        foreach ($queueRows as $row) {
4✔
243
            if (empty($row->message)) {
4✔
244
                $output->writeln('<info>' . $row->urls . '</info>');
4✔
245
            } else {
246
                $output->writeln(
1✔
247
                    '<comment>Page "' . $row->pageTitle . '" is not added to queue' . $row->message . '</comment>'
1✔
248
                );
1✔
249
            }
250
        }
251
    }
252
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc