• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tomasnorre / crawler / 17378386488

01 Sep 2025 01:00PM UTC coverage: 74.637%. First build
17378386488

Pull #1165

github

web-flow
Merge 2d1193c57 into 909cd889b
Pull Request #1165: [BUGFIX] Clear page cache when crawling for re-indexing

1 of 3 new or added lines in 1 file covered. (33.33%)

1954 of 2618 relevant lines covered (74.64%)

3.71 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

84.0
/Classes/QueueExecutor.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace AOE\Crawler;
6

7
/*
8
 * (c) 2020 AOE GmbH <dev@aoe.com>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21

22
use AOE\Crawler\Controller\CrawlerController;
23
use AOE\Crawler\Converter\JsonCompatibilityConverter;
24
use AOE\Crawler\CrawlStrategy\CallbackExecutionStrategy;
25
use AOE\Crawler\CrawlStrategy\CrawlStrategy;
26
use AOE\Crawler\CrawlStrategy\CrawlStrategyFactory;
27
use AOE\Crawler\Event\AfterUrlCrawledEvent;
28
use TYPO3\CMS\Core\Cache\CacheManager;
29
use TYPO3\CMS\Core\EventDispatcher\EventDispatcher;
30
use TYPO3\CMS\Core\Http\Uri;
31
use TYPO3\CMS\Core\SingletonInterface;
32
use TYPO3\CMS\Core\Utility\GeneralUtility;
33

34
/**
35
 * Fetches a URL based on the selected strategy or via a callback.
36
 * @internal since v9.2.5
37
 */
38
class QueueExecutor implements SingletonInterface
39
{
40
    /**
41
     * @var CrawlStrategy
42
     */
43
    protected $crawlStrategy;
44

45
    private EventDispatcher $eventDispatcher;
46

47
    public function __construct(CrawlStrategyFactory $crawlStrategyFactory, EventDispatcher $eventDispatcher = null)
48
    {
49
        $this->crawlStrategy = $crawlStrategyFactory->create();
28✔
50
        $this->eventDispatcher = $eventDispatcher ?? GeneralUtility::makeInstance(EventDispatcher::class);
28✔
51
    }
52

53
    /**
54
     * Takes a queue record and fetches the contents of the URL.
55
     * In the future, updating the queue item & additional signal/slot/events should also happen in here.
56
     *
57
     * @return array|bool|mixed|string
58
     */
59
    public function executeQueueItem(array $queueItem, CrawlerController $crawlerController)
60
    {
61
        $parameters = '';
8✔
62
        if (isset($queueItem['parameters'])) {
8✔
63
            // Decode parameters:
64
            /** @var JsonCompatibilityConverter $jsonCompatibleConverter */
65
            $jsonCompatibleConverter = GeneralUtility::makeInstance(JsonCompatibilityConverter::class);
7✔
66
            $parameters = $jsonCompatibleConverter->convert($queueItem['parameters']);
7✔
67
        }
68

69
        if (! is_array($parameters) || empty($parameters)) {
8✔
70
            return 'ERROR';
6✔
71
        }
72
        if (isset($parameters['_CALLBACKOBJ'])) {
2✔
73
            $className = $parameters['_CALLBACKOBJ'];
1✔
74
            unset($parameters['_CALLBACKOBJ']);
1✔
75
            $result = GeneralUtility::makeInstance(CallbackExecutionStrategy::class)
1✔
76
                ->fetchByCallback($className, $parameters, $crawlerController);
1✔
77
            $result = ['content' => json_encode($result)];
1✔
78
        } else {
79
            // Regular FE request
80
            $crawlerId = $this->generateCrawlerIdFromQueueItem($queueItem);
1✔
81

82
            if (in_array('tx_indexedsearch_reindex', $parameters['procInstructions'])) {
1✔
NEW
83
                GeneralUtility::makeInstance(CacheManager::class)
×
NEW
84
                    ->flushCachesInGroupByTags('pages', ['pageId_' . $queueItem['page_id']]);
×
85
            }
86

87
            $url = new Uri($parameters['url']);
1✔
88
            $result = $this->crawlStrategy->fetchUrlContents($url, $crawlerId);
1✔
89
            if ($result !== false) {
1✔
90
                $result = ['content' => json_encode($result)];
×
91
                $this->eventDispatcher->dispatch(new AfterUrlCrawledEvent($parameters['url'], $result));
×
92
            }
93
        }
94
        return $result;
2✔
95
    }
96

97
    protected function generateCrawlerIdFromQueueItem(array $queueItem): string
98
    {
99
        return $queueItem['qid'] . ':' . md5($queueItem['qid'] . '|' . $queueItem['set_id'] . '|' . $GLOBALS['TYPO3_CONF_VARS']['SYS']['encryptionKey']);
1✔
100
    }
101
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc