• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tomasnorre / crawler / 11237471329

08 Oct 2024 02:20PM UTC coverage: 68.586% (-1.3%) from 69.862%
11237471329

push

github

web-flow
ci: Update coveralls workflow (#1109)

1834 of 2674 relevant lines covered (68.59%)

3.37 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

72.02
/Classes/Service/ConfigurationService.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace AOE\Crawler\Service;
6

7
/*
8
 * (c) 2022 Tomas Norre Mikkelsen <tomasnorre@gmail.com>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21

22
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
23
use AOE\Crawler\Domain\Repository\ConfigurationRepository;
24
use Doctrine\DBAL\ArrayParameterType;
25
use TYPO3\CMS\Backend\Tree\View\PageTreeView;
26
use TYPO3\CMS\Core\Authentication\BackendUserAuthentication;
27
use TYPO3\CMS\Core\Core\Bootstrap;
28
use TYPO3\CMS\Core\Database\ConnectionPool;
29
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
30
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
31
use TYPO3\CMS\Core\Domain\Repository\PageRepository;
32
use TYPO3\CMS\Core\EventDispatcher\NoopEventDispatcher;
33
use TYPO3\CMS\Core\Type\Bitmask\Permission;
34
use TYPO3\CMS\Core\TypoScript\AST\AstBuilder;
35
use TYPO3\CMS\Core\TypoScript\TypoScriptStringFactory;
36
use TYPO3\CMS\Core\Utility\GeneralUtility;
37
use TYPO3\CMS\Core\Utility\MathUtility;
38

39
/**
40
 * @internal since v9.2.5
41
 */
42
class ConfigurationService
43
{
44
    /**
45
     * @var BackendUserAuthentication|null
46
     */
47
    private $backendUser;
48
    private readonly array $extensionSettings;
49

50
    public function __construct(
51
        private readonly UrlService $urlService,
52
        private readonly ConfigurationRepository $configurationRepository
53
    ) {
54
        $this->extensionSettings = GeneralUtility::makeInstance(
24✔
55
            ExtensionConfigurationProvider::class
24✔
56
        )->getExtensionConfiguration();
24✔
57
    }
58

59
    public static function removeDisallowedConfigurations(array $allowedConfigurations, array $configurations): array
60
    {
61
        if (empty($allowedConfigurations)) {
11✔
62
            return $configurations;
5✔
63
        }
64
        //         remove configuration that does not match the current selection
65
        foreach ($configurations as $confKey => $confArray) {
6✔
66
            if (!in_array($confKey, $allowedConfigurations, true)) {
6✔
67
                unset($configurations[$confKey]);
6✔
68
            }
69
        }
70

71
        return $configurations;
6✔
72
    }
73

74
    public function getConfigurationFromPageTS(
75
        array $pageTSConfig,
76
        int $pageId,
77
        array $res,
78
        string $mountPoint = ''
79
    ): array {
80
        $defaultCompileUrls = 10_000;
15✔
81
        $maxUrlsToCompile = MathUtility::forceIntegerInRange(
15✔
82
            $this->extensionSettings['maxCompileUrls'] ?? $defaultCompileUrls,
15✔
83
            1,
15✔
84
            1_000_000_000,
15✔
85
            $defaultCompileUrls
15✔
86
        );
15✔
87
        $crawlerCfg = $pageTSConfig['tx_crawler.']['crawlerCfg.']['paramSets.'] ?? [];
15✔
88
        foreach ($crawlerCfg as $key => $values) {
15✔
89
            if (!is_array($values)) {
9✔
90
                continue;
9✔
91
            }
92
            $key = str_replace('.', '', (string) $key);
9✔
93
            // Sub configuration for a single configuration string:
94
            $subCfg = (array) $crawlerCfg[$key . '.'];
9✔
95
            $subCfg['key'] = $key;
9✔
96

97
            if (strcmp($subCfg['procInstrFilter'] ?? '', '')) {
9✔
98
                $subCfg['procInstrFilter'] = implode(',', GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']));
9✔
99
            }
100
            $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $subCfg['pidsOnly'] ?? '', true));
9✔
101

102
            // process configuration if it is not page-specific or if the specific page is the current page:
103
            // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
104
            if (!strcmp((string) ($subCfg['pidsOnly'] ?? ''), '') || GeneralUtility::inList(
9✔
105
                $pidOnlyList,
9✔
106
                strval($pageId)
9✔
107
            )) {
9✔
108
                // Explode, process etc.:
109
                $res[$key] = [];
9✔
110
                $res[$key]['subCfg'] = $subCfg;
9✔
111
                $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($crawlerCfg[$key]);
9✔
112
                $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
9✔
113
                $res[$key]['origin'] = 'pagets';
9✔
114

115
                $url = '?id=' . $pageId;
9✔
116
                $url .= $mountPoint !== '' ? '&MP=' . $mountPoint : '';
9✔
117
                $res[$key]['URLs'] = $this->urlService->compileUrls(
9✔
118
                    $res[$key]['paramExpanded'],
9✔
119
                    [$url],
9✔
120
                    $maxUrlsToCompile
9✔
121
                );
9✔
122
            }
123
        }
124
        return $res;
15✔
125
    }
126

127
    public function getConfigurationFromDatabase(int $pageId, array $res): array
128
    {
129
        $maxUrlsToCompile = MathUtility::forceIntegerInRange(
9✔
130
            $this->extensionSettings['maxCompileUrls'],
9✔
131
            1,
9✔
132
            1_000_000_000,
9✔
133
            10000
9✔
134
        );
9✔
135

136
        $crawlerConfigurations = $this->configurationRepository->getCrawlerConfigurationRecordsFromRootLine($pageId);
9✔
137
        foreach ($crawlerConfigurations as $configurationRecord) {
9✔
138
            // check access to the configuration record
139
            if (empty($configurationRecord['begroups']) || $this->getBackendUser()->isAdmin() || UserService::hasGroupAccess(
6✔
140
                $this->getBackendUser()->user['usergroup_cached_list'],
6✔
141
                $configurationRecord['begroups']
6✔
142
            )) {
6✔
143
                $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $configurationRecord['pidsonly'], true));
6✔
144

145
                // process configuration if it is not page-specific or if the specific page is the current page:
146
                // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
147
                if (!strcmp((string) $configurationRecord['pidsonly'], '') || GeneralUtility::inList(
6✔
148
                    $pidOnlyList,
6✔
149
                    strval($pageId)
6✔
150
                )) {
6✔
151
                    $key = $configurationRecord['name'];
6✔
152

153
                    // don't overwrite previously defined paramSets
154
                    if (!isset($res[$key])) {
6✔
155
                        /** @var TypoScriptStringFactory $typoScriptStringFactory */
156
                        $typoScriptStringFactory = GeneralUtility::makeInstance(TypoScriptStringFactory::class);
6✔
157
                        $typoScriptTree = $typoScriptStringFactory->parseFromString(
6✔
158
                            $configurationRecord['processing_instruction_parameters_ts'],
6✔
159
                            new AstBuilder(new NoopEventDispatcher())
6✔
160
                        );
6✔
161

162
                        $subCfg = [
6✔
163
                            'procInstrFilter' => $configurationRecord['processing_instruction_filter'],
6✔
164
                            'procInstrParams.' => $typoScriptTree->toArray(),
6✔
165
                            'baseUrl' => $configurationRecord['base_url'],
6✔
166
                            'force_ssl' => (int) $configurationRecord['force_ssl'],
6✔
167
                            'userGroups' => $configurationRecord['fegroups'],
6✔
168
                            'exclude' => $configurationRecord['exclude'],
6✔
169
                            'key' => $key,
6✔
170
                        ];
6✔
171

172
                        if (!in_array($pageId, $this->expandExcludeString($subCfg['exclude']), true)) {
6✔
173
                            $res[$key] = [];
6✔
174
                            $res[$key]['subCfg'] = $subCfg;
6✔
175
                            $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array(
6✔
176
                                $configurationRecord['configuration']
6✔
177
                            );
6✔
178
                            $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
6✔
179
                            $res[$key]['URLs'] = $this->urlService->compileUrls(
6✔
180
                                $res[$key]['paramExpanded'],
6✔
181
                                ['?id=' . $pageId],
6✔
182
                                $maxUrlsToCompile
6✔
183
                            );
6✔
184
                            $res[$key]['origin'] = 'tx_crawler_configuration_' . $configurationRecord['uid'];
6✔
185
                        }
186
                    }
187
                }
188
            }
189
        }
190
        return $res;
9✔
191
    }
192

193
    public function expandExcludeString(string $excludeString): array
194
    {
195
        // internal static caches;
196
        static $expandedExcludeStringCache;
7✔
197
        static $treeCache = [];
7✔
198

199
        if (!empty($expandedExcludeStringCache[$excludeString])) {
7✔
200
            return $expandedExcludeStringCache[$excludeString];
×
201
        }
202

203
        $pidList = [];
7✔
204

205
        if (!empty($excludeString)) {
7✔
206
            /** @var PageTreeView $tree */
207
            $tree = GeneralUtility::makeInstance(PageTreeView::class);
1✔
208
            $tree->init('AND ' . $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW));
1✔
209

210
            $excludeParts = GeneralUtility::trimExplode(',', $excludeString);
1✔
211

212
            foreach ($excludeParts as $excludePart) {
1✔
213
                $explodedExcludePart = GeneralUtility::trimExplode('+', $excludePart);
1✔
214
                $pid = isset($explodedExcludePart[0]) ? (int) $explodedExcludePart[0] : 0;
1✔
215
                $depth = isset($explodedExcludePart[1]) ? (int) $explodedExcludePart[1] : null;
1✔
216

217
                // default is "page only" = "depth=0"
218
                if (empty($depth)) {
1✔
219
                    $depth = (str_contains($excludePart, '+')) ? 99 : 0;
1✔
220
                }
221

222
                $pidList[] = $pid;
1✔
223
                if ($depth > 0) {
1✔
224
                    $pidList = $this->expandPidList($treeCache, $pid, $depth, $tree, $pidList);
×
225
                }
226
            }
227
        }
228

229
        $expandedExcludeStringCache[$excludeString] = array_unique($pidList);
7✔
230

231
        return $expandedExcludeStringCache[$excludeString];
7✔
232
    }
233

234
    /**
235
     * Will expand the parameters configuration to individual values. This follows a certain syntax of the value of each parameter.
236
     * Syntax of values:
237
     * - Basically: If the value is wrapped in [...] it will be expanded according to the following syntax, otherwise the value is taken literally
238
     * - Configuration is splitted by "|" and the parts are processed individually and finally added together
239
     * - For each configuration part:
240
     *         - "[int]-[int]" = Integer range, will be expanded to all values in between, values included, starting from low to high (max. 1000). Example "1-34" or "-40--30"
241
     *         - "_TABLE:[TCA table name];[_PID:[optional page id, default is current page]];[_ENABLELANG:1]" = Look up of table records from PID, filtering out deleted records. Example "_TABLE:tt_content; _PID:123"
242
     *        _ENABLELANG:1 picks only original records without their language overlays
243
     *         - Default: Literal value
244
     */
245
    private function expandParameters(array $paramArray, int $pid): array
246
    {
247
        // Traverse parameter names:
248
        foreach ($paramArray as $parameter => $parameterValue) {
15✔
249
            $parameterValue = trim((string) $parameterValue);
9✔
250

251
            // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal
252
            if ($this->isWrappedInSquareBrackets($parameterValue)) {
9✔
253
                // So, find the value inside brackets and reset the paramArray value as an array.
254
                $parameterValue = substr($parameterValue, 1, -1);
9✔
255
                $paramArray[$parameter] = [];
9✔
256

257
                // Explode parts and traverse them:
258
                $parts = explode('|', $parameterValue);
9✔
259
                foreach ($parts as $part) {
9✔
260
                    // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30)
261
                    if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($part), $reg)) {
9✔
262
                        $reg = $this->swapIfFirstIsLargerThanSecond($reg);
4✔
263
                        $paramArray = $this->addValuesInRange($reg, $paramArray, $parameter);
4✔
264
                    } elseif (str_starts_with(trim($part), '_TABLE:')) {
5✔
265
                        // Parse parameters:
266
                        $subparts = GeneralUtility::trimExplode(';', $part);
1✔
267
                        $subpartParams = [];
1✔
268
                        foreach ($subparts as $spV) {
1✔
269
                            [$pKey, $pVal] = GeneralUtility::trimExplode(':', $spV);
1✔
270
                            $subpartParams[$pKey] = $pVal;
1✔
271
                        }
272

273
                        // Table exists:
274
                        if (isset($GLOBALS['TCA'][$subpartParams['_TABLE']])) {
1✔
275
                            $paramArray = $this->extractParamsFromCustomTable(
1✔
276
                                $subpartParams,
1✔
277
                                $pid,
1✔
278
                                $paramArray,
1✔
279
                                $parameter
1✔
280
                            );
1✔
281
                        }
282
                    } else {
283
                        // Just add value:
284
                        $paramArray[$parameter][] = $part;
4✔
285
                    }
286
                    // Hook for processing own expandParameters place holder
287
                    $paramArray = $this->runExpandParametersHook($paramArray, $parameter, $part, $pid);
9✔
288
                }
289

290
                // Make unique set of values and sort array by key:
291
                $paramArray[$parameter] = array_unique($paramArray[$parameter]);
9✔
292
                ksort($paramArray);
9✔
293
            } else {
294
                // Set the literal value as only value in array:
295
                $paramArray[$parameter] = [$parameterValue];
7✔
296
            }
297
        }
298

299
        return $paramArray;
15✔
300
    }
301

302
    private function isWrappedInSquareBrackets(string $string): bool
303
    {
304
        return str_starts_with($string, '[') && str_ends_with($string, ']');
9✔
305
    }
306

307
    private function swapIfFirstIsLargerThanSecond(array $reg): array
308
    {
309
        // Swap if first is larger than last:
310
        if ($reg[1] > $reg[2]) {
4✔
311
            $temp = $reg[2];
1✔
312
            $reg[2] = $reg[1];
1✔
313
            $reg[1] = $temp;
1✔
314
        }
315

316
        return $reg;
4✔
317
    }
318

319
    /**
320
     * @return BackendUserAuthentication
321
     */
322
    private function getBackendUser()
323
    {
324
        // Make sure the _cli_ user is loaded
325
        Bootstrap::initializeBackendAuthentication();
1✔
326
        if ($this->backendUser === null) {
1✔
327
            $this->backendUser = $GLOBALS['BE_USER'];
1✔
328
        }
329
        return $this->backendUser;
1✔
330
    }
331

332
    private function getQueryBuilder(string $table): QueryBuilder
333
    {
334
        return GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($table);
×
335
    }
336

337
    /**
338
     * @psalm-param array-key $parameter
339
     */
340
    private function runExpandParametersHook(array $paramArray, int|string $parameter, string $path, int $pid): array
341
    {
342
        if (is_array(
9✔
343
            $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] ?? null
9✔
344
        )) {
9✔
345
            $_params = [
×
346
                'pObj' => &$this,
×
347
                'paramArray' => &$paramArray,
×
348
                'currentKey' => $parameter,
×
349
                'currentValue' => $path,
×
350
                'pid' => $pid,
×
351
            ];
×
352
            foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $_funcRef) {
×
353
                GeneralUtility::callUserFunction($_funcRef, $_params, $this);
×
354
            }
355
        }
356
        return $paramArray;
9✔
357
    }
358

359
    private function getPidArray(int $recursiveDepth, int $lookUpPid): array
360
    {
361
        if ($recursiveDepth > 0) {
×
362
            $pageRepository = GeneralUtility::makeInstance(PageRepository::class);
×
363
            $pidArray = $pageRepository->getPageIdsRecursive([$lookUpPid], $recursiveDepth);
×
364
        } else {
365
            $pidArray = [$lookUpPid];
×
366
        }
367
        return $pidArray;
×
368
    }
369

370
    /**
371
     * Traverse range, add values:
372
     * Limit to size of range!
373
     *
374
     * @psalm-param array-key $parameter
375
     */
376
    private function addValuesInRange(array $reg, array $paramArray, int|string $parameter): array
377
    {
378
        $runAwayBrake = 1000;
4✔
379
        for ($a = $reg[1]; $a <= $reg[2]; $a++) {
4✔
380
            $paramArray[$parameter][] = $a;
4✔
381
            $runAwayBrake--;
4✔
382
            if ($runAwayBrake <= 0) {
4✔
383
                break;
×
384
            }
385
        }
386
        return $paramArray;
4✔
387
    }
388

389
    private function expandPidList(array $treeCache, int $pid, int $depth, PageTreeView $tree, array $pidList): array
390
    {
391
        if (empty($treeCache[$pid][$depth])) {
×
392
            $tree->reset();
×
393
            $tree->getTree($pid, $depth);
×
394
            $treeCache[$pid][$depth] = $tree->tree;
×
395
        }
396

397
        foreach ($treeCache[$pid][$depth] as $data) {
×
398
            $pidList[] = (int) $data['row']['uid'];
×
399
        }
400
        return $pidList;
×
401
    }
402

403
    private function extractParamsFromCustomTable(
404
        array $subpartParams,
405
        int $pid,
406
        array $paramArray,
407
        int|string $parameter
408
    ): array {
409
        $lookUpPid = isset($subpartParams['_PID']) ? (int) $subpartParams['_PID'] : $pid;
×
410
        $recursiveDepth = isset($subpartParams['_RECURSIVE']) ? (int) $subpartParams['_RECURSIVE'] : 0;
×
411
        $pidField = isset($subpartParams['_PIDFIELD']) ? trim((string) $subpartParams['_PIDFIELD']) : 'pid';
×
412
        $where = $subpartParams['_WHERE'] ?? '';
×
413
        $addTable = $subpartParams['_ADDTABLE'] ?? '';
×
414

415
        $fieldName = ($subpartParams['_FIELD'] ?? '') ?: 'uid';
×
416
        if ($fieldName === 'uid' || $GLOBALS['TCA'][$subpartParams['_TABLE']]['columns'][$fieldName]) {
×
417
            $queryBuilder = $this->getQueryBuilder($subpartParams['_TABLE']);
×
418
            $pidArray = $this->getPidArray($recursiveDepth, $lookUpPid);
×
419

420
            $queryBuilder->getRestrictions()
×
421
                ->removeAll()
×
422
                ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
×
423

424
            $queryBuilder
×
425
                ->select($fieldName)
×
426
                ->from($subpartParams['_TABLE'])
×
427
                ->where(
×
428
                    $queryBuilder->expr()->in(
×
429
                        $pidField,
×
430
                        $queryBuilder->createNamedParameter($pidArray, ArrayParameterType::INTEGER)
×
431
                    ),
×
432
                    $where
×
433
                );
×
434

435
            if (!empty($addTable)) {
×
436
                // TODO: Check if this works as intended!
437
                $addTables = GeneralUtility::trimExplode(',', $addTable, true);
×
438
                foreach ($addTables as $table) {
×
439
                    $queryBuilder->from($table);
×
440
                }
441
            }
442
            $transOrigPointerField = $GLOBALS['TCA'][$subpartParams['_TABLE']]['ctrl']['transOrigPointerField'] ?? false;
×
443

444
            if (($subpartParams['_ENABLELANG'] ?? false) && $transOrigPointerField) {
×
445
                $queryBuilder->andWhere($queryBuilder->expr()->lte($transOrigPointerField, 0));
×
446
            }
447

448
            $statement = $queryBuilder->executeQuery();
×
449

450
            $rows = [];
×
451
            while ($row = $statement->fetchAssociative()) {
×
452
                $rows[$row[$fieldName]] = $row;
×
453
            }
454

455
            if (is_array($rows)) {
×
456
                $paramArray[$parameter] = array_merge($paramArray[$parameter], array_keys($rows));
×
457
            }
458
        }
459
        return $paramArray;
×
460
    }
461
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc