• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tomasnorre / crawler / 19900399438

03 Dec 2025 04:04PM UTC coverage: 69.514%. Remained the same
19900399438

push

github

web-flow
[CI] Cleanup SKIP rectors (#1240)

4 of 9 new or added lines in 2 files covered. (44.44%)

1 existing line in 1 file now uncovered.

1929 of 2775 relevant lines covered (69.51%)

3.21 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

87.25
/Classes/Command/ProcessQueueCommand.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace AOE\Crawler\Command;
6

7
/*
8
 * (c) 2021 Tomas Norre Mikkelsen <tomasnorre@gmail.com>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21

22
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
23
use AOE\Crawler\Controller\CrawlerController;
24
use AOE\Crawler\Crawler;
25
use AOE\Crawler\Domain\Model\Process;
26
use AOE\Crawler\Domain\Repository\ProcessRepository;
27
use AOE\Crawler\Domain\Repository\QueueRepository;
28
use Symfony\Component\Console\Command\Command;
29
use Symfony\Component\Console\Input\InputInterface;
30
use Symfony\Component\Console\Input\InputOption;
31
use Symfony\Component\Console\Output\OutputInterface;
32
use TYPO3\CMS\Core\Utility\GeneralUtility;
33

34
/**
35
 * @internal since v12.0.0
36
 */
37
class ProcessQueueCommand extends Command
38
{
39
    private const CLI_STATUS_NOTHING_PROCCESSED = 0;
40
    private const CLI_STATUS_REMAIN = 1;
41
    private const CLI_STATUS_PROCESSED = 2;
42
    private const CLI_STATUS_ABORTED = 4;
43
    private readonly string $processId;
44
    private array $extensionSettings;
45

46
    public function __construct(
47
        private readonly Crawler $crawler,
48
        private readonly CrawlerController $crawlerController,
49
        private readonly ProcessRepository $processRepository,
50
        private readonly QueueRepository $queueRepository,
51
    ) {
52
        $this->processId = md5(microtime() . random_bytes(12));
2✔
53
        parent::__construct();
2✔
54
    }
55

56
    /**
57
     * Crawler Command - Crawling the URLs from the queue
58
     *
59
     * Examples:
60
     *
61
     * --- Will trigger the crawler which starts to process the queue entries
62
     * $ typo3 crawler:crawlQueue
63
     */
64
    #[\Override]
65
    public function execute(InputInterface $input, OutputInterface $output): int
66
    {
67
        $amount = $input->getOption('amount');
2✔
68
        $sleeptime = $input->getOption('sleeptime');
2✔
69
        $sleepafter = $input->getOption('sleepafter');
2✔
70

71
        $this->extensionSettings = $this->getExtensionSettings();
2✔
72

73
        $result = self::CLI_STATUS_NOTHING_PROCCESSED;
2✔
74

75
        if (!$this->crawler->isDisabled() && $this->checkAndAcquireNewProcess($this->processId)) {
2✔
76
            $countInARun = $amount ? (int) $amount : (int) $this->extensionSettings['countInARun'];
2✔
77
            $sleepAfterFinish = $sleepafter ? (int) $sleepafter : (int) $this->extensionSettings['sleepAfterFinish'];
2✔
78
            $sleepTime = $sleeptime ? (int) $sleeptime : (int) $this->extensionSettings['sleepTime'];
2✔
79

80
            try {
81
                // Run process:
82
                $result = $this->runProcess($countInARun, $sleepTime, $sleepAfterFinish);
2✔
83
            } catch (\Throwable $e) {
×
NEW
84
                $output->writeln('<warning>' . $e::class . ': ' . $e->getMessage() . '</warning>');
×
85
                $result = self::CLI_STATUS_ABORTED;
×
86
            }
87

88
            // Cleanup
89
            $this->processRepository->deleteProcessesWithoutItemsAssigned();
2✔
90
            $this->processRepository->markRequestedProcessesAsNotActive([$this->processId]);
2✔
91
            $this->queueRepository->unsetProcessScheduledAndProcessIdForQueueEntries([$this->processId]);
2✔
92

93
            $output->writeln(
2✔
94
                '<info>Unprocessed Items remaining:' . count(
2✔
95
                    $this->queueRepository->getUnprocessedItems()
2✔
96
                ) . ' (' . $this->processId . ')</info>'
2✔
97
            );
2✔
98
            $result |= (count(
2✔
99
                $this->queueRepository->getUnprocessedItems()
2✔
100
            ) > 0 ? self::CLI_STATUS_REMAIN : self::CLI_STATUS_NOTHING_PROCCESSED);
2✔
101
        } else {
102
            $result |= self::CLI_STATUS_ABORTED;
×
103
        }
104

105
        $output->writeln((string) $result);
2✔
106
        return $result & self::CLI_STATUS_ABORTED;
2✔
107
    }
108

109
    #[\Override]
110
    protected function configure(): void
111
    {
112
        $this->setDescription('Trigger the crawler to process the queue entries');
2✔
113

114
        $this->setHelp(
2✔
115
            'Crawler Command - Crawling the URLs from the queue' . chr(10) . chr(10) .
2✔
116
            '
2✔
117
            Examples:
118
              --- Will trigger the crawler which starts to process the queue entries
119
              $ typo3 crawler:processqueue --amount 15 --sleepafter 5 --sleeptime 2
120
            '
2✔
121
        );
2✔
122
        $this->addOption(
2✔
123
            'amount',
2✔
124
            '',
2✔
125
            InputOption::VALUE_OPTIONAL,
2✔
126
            'How many pages should be crawled during that run',
2✔
127
            '0'
2✔
128
        );
2✔
129

130
        $this->addOption(
2✔
131
            'sleepafter',
2✔
132
            '',
2✔
133
            InputOption::VALUE_OPTIONAL,
2✔
134
            'Amount of seconds which the system should use to relax after all crawls are done',
2✔
135
            '0'
2✔
136
        );
2✔
137

138
        $this->addOption(
2✔
139
            'sleeptime',
2✔
140
            '',
2✔
141
            InputOption::VALUE_OPTIONAL,
2✔
142
            'Amount of microseconds which the system should use to relax between crawls'
2✔
143
        );
2✔
144
    }
145

146
    /**
147
     * Running the functionality of the CLI (crawling URLs from queue)
148
     */
149
    private function runProcess(int $countInARun, int $sleepTime, int $sleepAfterFinish): int
150
    {
151
        $result = 0;
2✔
152
        $counter = 0;
2✔
153

154
        // Clean up the queue
155
        $this->queueRepository->cleanupQueue();
2✔
156

157
        // Select entries:
158
        $records = $this->queueRepository->fetchRecordsToBeCrawled($countInARun);
2✔
159

160
        if (!empty($records)) {
2✔
161
            $quidList = [];
2✔
162

163
            foreach ($records as $record) {
2✔
164
                $quidList[] = $record['qid'];
2✔
165
            }
166

167
            //save the number of assigned queue entries to determine how many have been processed later
168
            $numberOfAffectedRows = $this->queueRepository->updateProcessIdAndSchedulerForQueueIds(
2✔
169
                $quidList,
2✔
170
                $this->processId
2✔
171
            );
2✔
172
            $this->processRepository->updateProcessAssignItemsCount($numberOfAffectedRows, $this->processId);
2✔
173

174
            if ($numberOfAffectedRows !== count($quidList)) {
2✔
175
                return $result | self::CLI_STATUS_ABORTED;
×
176
            }
177

178
            foreach ($records as $record) {
2✔
179
                $result |= $this->crawlerController->readUrl($record['qid'], false, $this->processId);
2✔
180

181
                $counter++;
2✔
182
                // Just to relax the system
183
                usleep($sleepTime);
2✔
184

185
                // if during the start and the current read url the cli has been disable we need to return from the function
186
                // mark the process NOT as ended.
187
                if ($this->crawler->isDisabled()) {
2✔
188
                    return $result | self::CLI_STATUS_ABORTED;
×
189
                }
190

191
                if (!$this->processRepository->isProcessActive($this->processId)) {
2✔
192
                    $result |= self::CLI_STATUS_ABORTED;
×
193
                    //possible timeout
194
                    break;
×
195
                }
196
            }
197

198
            sleep($sleepAfterFinish);
2✔
199
        }
200

201
        if ($counter > 0) {
2✔
202
            $result |= self::CLI_STATUS_PROCESSED;
2✔
203
        }
204

205
        return $result;
2✔
206
    }
207

208
    /**
209
     * Try to acquire a new process with the given id
210
     * also performs some auto-cleanup for orphan processes
211
     */
212
    private function checkAndAcquireNewProcess(string $id): bool
213
    {
214
        $returnValue = true;
2✔
215

216
        $systemProcessId = getmypid();
2✔
217
        if (!$systemProcessId) {
2✔
218
            return false;
×
219
        }
220

221
        $processCount = 0;
2✔
222
        $orphanProcesses = [];
2✔
223

224
        $activeProcesses = $this->processRepository->findAllActive();
2✔
225

226
        /** @var Process $process */
227
        foreach ($activeProcesses as $process) {
2✔
228
            if ($process->getTtl() < time()) {
×
229
                $orphanProcesses[] = $process->getProcessId();
×
230
            } else {
231
                $processCount++;
×
232
            }
233
        }
234

235
        // if there are less than allowed active processes then add a new one
236
        if ($processCount < (int) $this->extensionSettings['processLimit']) {
2✔
237
            $this->processRepository->addProcess($id, $systemProcessId);
2✔
238
        } else {
239
            $returnValue = false;
×
240
        }
241

242
        $this->processRepository->deleteProcessesMarkedAsDeleted();
2✔
243
        $this->processRepository->markRequestedProcessesAsNotActive($orphanProcesses);
2✔
244
        $this->queueRepository->unsetProcessScheduledAndProcessIdForQueueEntries($orphanProcesses);
2✔
245

246
        return $returnValue;
2✔
247
    }
248

249
    private function getExtensionSettings(): array
250
    {
251
        return GeneralUtility::makeInstance(ExtensionConfigurationProvider::class)->getExtensionConfiguration();
2✔
252
    }
253
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc