• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tomasnorre / crawler / 3696698329

pending completion
3696698329

Pull #988

github

GitHub
Merge d1d32bd2d into 3f94d6a4e
Pull Request #988: [WIP][FEATURE] Setup new Backend Module

417 of 417 new or added lines in 9 files covered. (100.0%)

1601 of 2523 relevant lines covered (63.46%)

3.23 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

71.96
/Classes/Service/ConfigurationService.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace AOE\Crawler\Service;
6

7
/*
8
 * (c) 2022 Tomas Norre Mikkelsen <tomasnorre@gmail.com>
9
 *
10
 * This file is part of the TYPO3 Crawler Extension.
11
 *
12
 * It is free software; you can redistribute it and/or modify it under
13
 * the terms of the GNU General Public License, either version 2
14
 * of the License, or any later version.
15
 *
16
 * For the full copyright and license information, please read the
17
 * LICENSE.txt file that was distributed with this source code.
18
 *
19
 * The TYPO3 project - inspiring people to share!
20
 */
21

22
use AOE\Crawler\Configuration\ExtensionConfigurationProvider;
23
use AOE\Crawler\Domain\Repository\ConfigurationRepository;
24
use Doctrine\DBAL\Connection;
25
use TYPO3\CMS\Backend\Tree\View\PageTreeView;
26
use TYPO3\CMS\Core\Authentication\BackendUserAuthentication;
27
use TYPO3\CMS\Core\Core\Bootstrap;
28
use TYPO3\CMS\Core\Database\ConnectionPool;
29
use TYPO3\CMS\Core\Database\Query\QueryBuilder;
30
use TYPO3\CMS\Core\Database\Query\Restriction\DeletedRestriction;
31
use TYPO3\CMS\Core\Database\QueryGenerator;
32
use TYPO3\CMS\Core\Type\Bitmask\Permission;
33
use TYPO3\CMS\Core\TypoScript\Parser\TypoScriptParser;
34
use TYPO3\CMS\Core\Utility\GeneralUtility;
35
use TYPO3\CMS\Core\Utility\MathUtility;
36

37
/**
38
 * @internal since v9.2.5
39
 */
40
class ConfigurationService
41
{
42
    /**
43
     * @var BackendUserAuthentication|null
44
     */
45
    private $backendUser;
46
    private array $extensionSettings;
47

48
    public function __construct(
49
        private UrlService $urlService,
50
        private ConfigurationRepository $configurationRepository
51
    ) {
52
        $this->extensionSettings = GeneralUtility::makeInstance(
24✔
53
            ExtensionConfigurationProvider::class
24✔
54
        )->getExtensionConfiguration();
24✔
55
    }
56

57
    public static function removeDisallowedConfigurations(array $allowedConfigurations, array $configurations): array
58
    {
59
        if (empty($allowedConfigurations)) {
11✔
60
            return $configurations;
5✔
61
        }
62
        //         remove configuration that does not match the current selection
63
        foreach ($configurations as $confKey => $confArray) {
6✔
64
            if (!in_array($confKey, $allowedConfigurations, true)) {
6✔
65
                unset($configurations[$confKey]);
6✔
66
            }
67
        }
68

69
        return $configurations;
6✔
70
    }
71

72
    public function getConfigurationFromPageTS(
73
        array $pageTSConfig,
74
        int $pageId,
75
        array $res,
76
        string $mountPoint = ''
77
    ): array {
78
        $defaultCompileUrls = 10_000;
15✔
79
        $maxUrlsToCompile = MathUtility::forceIntegerInRange(
15✔
80
            $this->extensionSettings['maxCompileUrls'] ?? $defaultCompileUrls,
15✔
81
            1,
15✔
82
            1_000_000_000,
15✔
83
            $defaultCompileUrls
15✔
84
        );
15✔
85
        $crawlerCfg = $pageTSConfig['tx_crawler.']['crawlerCfg.']['paramSets.'] ?? [];
15✔
86
        foreach ($crawlerCfg as $key => $values) {
15✔
87
            if (!is_array($values)) {
9✔
88
                continue;
9✔
89
            }
90
            $key = (string) str_replace('.', '', $key);
9✔
91
            // Sub configuration for a single configuration string:
92
            $subCfg = (array) $crawlerCfg[$key . '.'];
9✔
93
            $subCfg['key'] = $key;
9✔
94

95
            if (strcmp($subCfg['procInstrFilter'] ?? '', '')) {
9✔
96
                $subCfg['procInstrFilter'] = implode(',', GeneralUtility::trimExplode(',', $subCfg['procInstrFilter']));
9✔
97
            }
98
            $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $subCfg['pidsOnly'] ?? '', true));
9✔
99

100
            // process configuration if it is not page-specific or if the specific page is the current page:
101
            // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
102
            if (!strcmp((string) ($subCfg['pidsOnly'] ?? ''), '') || GeneralUtility::inList(
9✔
103
                $pidOnlyList,
9✔
104
                strval($pageId)
9✔
105
            )) {
9✔
106
                // Explode, process etc.:
107
                $res[$key] = [];
9✔
108
                $res[$key]['subCfg'] = $subCfg;
9✔
109
                $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array($crawlerCfg[$key]);
9✔
110
                $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
9✔
111
                $res[$key]['origin'] = 'pagets';
9✔
112

113
                $url = '?id=' . $pageId;
9✔
114
                $url .= $mountPoint !== '' ? '&MP=' . $mountPoint : '';
9✔
115
                $res[$key]['URLs'] = $this->urlService->compileUrls(
9✔
116
                    $res[$key]['paramExpanded'],
9✔
117
                    [$url],
9✔
118
                    $maxUrlsToCompile
9✔
119
                );
9✔
120
            }
121
        }
122
        return $res;
15✔
123
    }
124

125
    public function getConfigurationFromDatabase(int $pageId, array $res): array
126
    {
127
        $maxUrlsToCompile = MathUtility::forceIntegerInRange(
9✔
128
            $this->extensionSettings['maxCompileUrls'],
9✔
129
            1,
9✔
130
            1_000_000_000,
9✔
131
            10000
9✔
132
        );
9✔
133

134
        $crawlerConfigurations = $this->configurationRepository->getCrawlerConfigurationRecordsFromRootLine($pageId);
9✔
135
        foreach ($crawlerConfigurations as $configurationRecord) {
9✔
136
            // check access to the configuration record
137
            if (empty($configurationRecord['begroups']) || $this->getBackendUser()->isAdmin() || UserService::hasGroupAccess(
6✔
138
                $this->getBackendUser()->user['usergroup_cached_list'],
6✔
139
                $configurationRecord['begroups']
6✔
140
            )) {
6✔
141
                $pidOnlyList = implode(',', GeneralUtility::trimExplode(',', $configurationRecord['pidsonly'], true));
6✔
142

143
                // process configuration if it is not page-specific or if the specific page is the current page:
144
                // TODO: Check if $pidOnlyList can be kept as Array instead of imploded
145
                if (!strcmp($configurationRecord['pidsonly'], '') || GeneralUtility::inList(
6✔
146
                    $pidOnlyList,
6✔
147
                    strval($pageId)
6✔
148
                )) {
6✔
149
                    $key = $configurationRecord['name'];
6✔
150

151
                    // don't overwrite previously defined paramSets
152
                    if (!isset($res[$key])) {
6✔
153
                        /* @var $TSparserObject TypoScriptParser */
154
                        $TSparserObject = GeneralUtility::makeInstance(TypoScriptParser::class);
6✔
155
                        $TSparserObject->parse($configurationRecord['processing_instruction_parameters_ts']);
6✔
156

157
                        $subCfg = [
6✔
158
                            'procInstrFilter' => $configurationRecord['processing_instruction_filter'],
6✔
159
                            'procInstrParams.' => $TSparserObject->setup,
6✔
160
                            'baseUrl' => $configurationRecord['base_url'],
6✔
161
                            'force_ssl' => (int) $configurationRecord['force_ssl'],
6✔
162
                            'userGroups' => $configurationRecord['fegroups'],
6✔
163
                            'exclude' => $configurationRecord['exclude'],
6✔
164
                            'key' => $key,
6✔
165
                        ];
6✔
166

167
                        if (!in_array($pageId, $this->expandExcludeString($subCfg['exclude']), true)) {
6✔
168
                            $res[$key] = [];
6✔
169
                            $res[$key]['subCfg'] = $subCfg;
6✔
170
                            $res[$key]['paramParsed'] = GeneralUtility::explodeUrl2Array(
6✔
171
                                $configurationRecord['configuration']
6✔
172
                            );
6✔
173
                            $res[$key]['paramExpanded'] = $this->expandParameters($res[$key]['paramParsed'], $pageId);
6✔
174
                            $res[$key]['URLs'] = $this->urlService->compileUrls(
6✔
175
                                $res[$key]['paramExpanded'],
6✔
176
                                ['?id=' . $pageId],
6✔
177
                                $maxUrlsToCompile
6✔
178
                            );
6✔
179
                            $res[$key]['origin'] = 'tx_crawler_configuration_' . $configurationRecord['uid'];
6✔
180
                        }
181
                    }
182
                }
183
            }
184
        }
185
        return $res;
9✔
186
    }
187

188
    public function expandExcludeString(string $excludeString): array
189
    {
190
        // internal static caches;
191
        static $expandedExcludeStringCache;
7✔
192
        static $treeCache = [];
7✔
193

194
        if (!empty($expandedExcludeStringCache[$excludeString])) {
7✔
195
            return $expandedExcludeStringCache[$excludeString];
×
196
        }
197

198
        $pidList = [];
7✔
199

200
        if (!empty($excludeString)) {
7✔
201
            /** @var PageTreeView $tree */
202
            $tree = GeneralUtility::makeInstance(PageTreeView::class);
1✔
203
            $tree->init('AND ' . $this->getBackendUser()->getPagePermsClause(Permission::PAGE_SHOW));
1✔
204

205
            $excludeParts = GeneralUtility::trimExplode(',', $excludeString);
1✔
206

207
            foreach ($excludeParts as $excludePart) {
1✔
208
                $explodedExcludePart = GeneralUtility::trimExplode('+', $excludePart);
1✔
209
                $pid = isset($explodedExcludePart[0]) ? (int) $explodedExcludePart[0] : 0;
1✔
210
                $depth = isset($explodedExcludePart[1]) ? (int) $explodedExcludePart[1] : null;
1✔
211

212
                // default is "page only" = "depth=0"
213
                if (empty($depth)) {
1✔
214
                    $depth = (str_contains($excludePart, '+')) ? 99 : 0;
1✔
215
                }
216

217
                $pidList[] = $pid;
1✔
218
                if ($depth > 0) {
1✔
219
                    $pidList = $this->expandPidList($treeCache, $pid, $depth, $tree, $pidList);
×
220
                }
221
            }
222
        }
223

224
        $expandedExcludeStringCache[$excludeString] = array_unique($pidList);
7✔
225

226
        return $expandedExcludeStringCache[$excludeString];
7✔
227
    }
228

229
    /**
230
     * Will expand the parameters configuration to individual values. This follows a certain syntax of the value of each parameter.
231
     * Syntax of values:
232
     * - Basically: If the value is wrapped in [...] it will be expanded according to the following syntax, otherwise the value is taken literally
233
     * - Configuration is splitted by "|" and the parts are processed individually and finally added together
234
     * - For each configuration part:
235
     *         - "[int]-[int]" = Integer range, will be expanded to all values in between, values included, starting from low to high (max. 1000). Example "1-34" or "-40--30"
236
     *         - "_TABLE:[TCA table name];[_PID:[optional page id, default is current page]];[_ENABLELANG:1]" = Look up of table records from PID, filtering out deleted records. Example "_TABLE:tt_content; _PID:123"
237
     *        _ENABLELANG:1 picks only original records without their language overlays
238
     *         - Default: Literal value
239
     * @throws \Doctrine\DBAL\DBALException
240
     */
241
    private function expandParameters(array $paramArray, int $pid): array
242
    {
243
        // Traverse parameter names:
244
        foreach ($paramArray as $parameter => $parameterValue) {
15✔
245
            $parameterValue = trim($parameterValue);
9✔
246

247
            // If value is encapsulated in square brackets it means there are some ranges of values to find, otherwise the value is literal
248
            if ($this->isWrappedInSquareBrackets($parameterValue)) {
9✔
249
                // So, find the value inside brackets and reset the paramArray value as an array.
250
                $parameterValue = substr($parameterValue, 1, -1);
9✔
251
                $paramArray[$parameter] = [];
9✔
252

253
                // Explode parts and traverse them:
254
                $parts = explode('|', $parameterValue);
9✔
255
                foreach ($parts as $part) {
9✔
256
                    // Look for integer range: (fx. 1-34 or -40--30 // reads minus 40 to minus 30)
257
                    if (preg_match('/^(-?[0-9]+)\s*-\s*(-?[0-9]+)$/', trim($part), $reg)) {
9✔
258
                        $reg = $this->swapIfFirstIsLargerThanSecond($reg);
4✔
259
                        $paramArray = $this->addValuesInRange($reg, $paramArray, $parameter);
4✔
260
                    } elseif (str_starts_with(trim($part), '_TABLE:')) {
5✔
261
                        // Parse parameters:
262
                        $subparts = GeneralUtility::trimExplode(';', $part);
1✔
263
                        $subpartParams = [];
1✔
264
                        foreach ($subparts as $spV) {
1✔
265
                            [$pKey, $pVal] = GeneralUtility::trimExplode(':', $spV);
1✔
266
                            $subpartParams[$pKey] = $pVal;
1✔
267
                        }
268

269
                        // Table exists:
270
                        if (isset($GLOBALS['TCA'][$subpartParams['_TABLE']])) {
1✔
271
                            $paramArray = $this->extractParamsFromCustomTable(
1✔
272
                                $subpartParams,
1✔
273
                                $pid,
1✔
274
                                $paramArray,
1✔
275
                                $parameter
1✔
276
                            );
1✔
277
                        }
278
                    } else {
279
                        // Just add value:
280
                        $paramArray[$parameter][] = $part;
4✔
281
                    }
282
                    // Hook for processing own expandParameters place holder
283
                    $paramArray = $this->runExpandParametersHook($paramArray, $parameter, $part, $pid);
9✔
284
                }
285

286
                // Make unique set of values and sort array by key:
287
                $paramArray[$parameter] = array_unique($paramArray[$parameter]);
9✔
288
                ksort($paramArray);
9✔
289
            } else {
290
                // Set the literal value as only value in array:
291
                $paramArray[$parameter] = [$parameterValue];
7✔
292
            }
293
        }
294

295
        return $paramArray;
15✔
296
    }
297

298
    private function isWrappedInSquareBrackets(string $string): bool
299
    {
300
        return (str_starts_with($string, '[') && str_ends_with($string, ']'));
9✔
301
    }
302

303
    private function swapIfFirstIsLargerThanSecond(array $reg): array
304
    {
305
        // Swap if first is larger than last:
306
        if ($reg[1] > $reg[2]) {
4✔
307
            $temp = $reg[2];
1✔
308
            $reg[2] = $reg[1];
1✔
309
            $reg[1] = $temp;
1✔
310
        }
311

312
        return $reg;
4✔
313
    }
314

315
    /**
316
     * @return BackendUserAuthentication
317
     */
318
    private function getBackendUser()
319
    {
320
        // Make sure the _cli_ user is loaded
321
        Bootstrap::initializeBackendAuthentication();
1✔
322
        if ($this->backendUser === null) {
1✔
323
            $this->backendUser = $GLOBALS['BE_USER'];
1✔
324
        }
325
        return $this->backendUser;
1✔
326
    }
327

328
    /**
329
     * Get querybuilder for given table
330
     *
331
     * @return QueryBuilder
332
     */
333
    private function getQueryBuilder(string $table)
334
    {
335
        return GeneralUtility::makeInstance(ConnectionPool::class)->getQueryBuilderForTable($table);
×
336
    }
337

338
    /**
339
     * @psalm-param array-key $parameter
340
     */
341
    private function runExpandParametersHook(array $paramArray, int|string $parameter, string $path, int $pid): array
342
    {
343
        if (is_array(
9✔
344
            $GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] ?? null
9✔
345
        )) {
9✔
346
            $_params = [
×
347
                'pObj' => &$this,
×
348
                'paramArray' => &$paramArray,
×
349
                'currentKey' => $parameter,
×
350
                'currentValue' => $path,
×
351
                'pid' => $pid,
×
352
            ];
×
353
            foreach ($GLOBALS['TYPO3_CONF_VARS']['SC_OPTIONS']['crawler/class.tx_crawler_lib.php']['expandParameters'] as $_funcRef) {
×
354
                GeneralUtility::callUserFunction($_funcRef, $_params, $this);
×
355
            }
356
        }
357
        return $paramArray;
9✔
358
    }
359

360
    private function getPidArray(int $recursiveDepth, int $lookUpPid): array
361
    {
362
        if ($recursiveDepth > 0) {
×
363
            /** @var QueryGenerator $queryGenerator */
364
            $queryGenerator = GeneralUtility::makeInstance(QueryGenerator::class);
×
365
            $pidList = $queryGenerator->getTreeList($lookUpPid, $recursiveDepth);
×
366
            $pidArray = GeneralUtility::intExplode(',', $pidList);
×
367
        } else {
368
            $pidArray = [$lookUpPid];
×
369
        }
370
        return $pidArray;
×
371
    }
372

373
    /**
374
     * Traverse range, add values:
375
     * Limit to size of range!
376
     *
377
     * @psalm-param array-key $parameter
378
     */
379
    private function addValuesInRange(array $reg, array $paramArray, int|string $parameter): array
380
    {
381
        $runAwayBrake = 1000;
4✔
382
        for ($a = $reg[1]; $a <= $reg[2]; $a++) {
4✔
383
            $paramArray[$parameter][] = $a;
4✔
384
            $runAwayBrake--;
4✔
385
            if ($runAwayBrake <= 0) {
4✔
386
                break;
×
387
            }
388
        }
389
        return $paramArray;
4✔
390
    }
391

392
    private function expandPidList(array $treeCache, int $pid, int $depth, PageTreeView $tree, array $pidList): array
393
    {
394
        if (empty($treeCache[$pid][$depth])) {
×
395
            $tree->reset();
×
396
            $tree->getTree($pid, $depth);
×
397
            $treeCache[$pid][$depth] = $tree->tree;
×
398
        }
399

400
        foreach ($treeCache[$pid][$depth] as $data) {
×
401
            $pidList[] = (int) $data['row']['uid'];
×
402
        }
403
        return $pidList;
×
404
    }
405

406
    /**
407
     * @throws \Doctrine\DBAL\DBALException
408
     */
409
    private function extractParamsFromCustomTable(
410
        array $subpartParams,
411
        int $pid,
412
        array $paramArray,
413
        int|string $parameter
414
    ): array {
415
        $lookUpPid = isset($subpartParams['_PID']) ? (int) $subpartParams['_PID'] : $pid;
×
416
        $recursiveDepth = isset($subpartParams['_RECURSIVE']) ? (int) $subpartParams['_RECURSIVE'] : 0;
×
417
        $pidField = isset($subpartParams['_PIDFIELD']) ? trim($subpartParams['_PIDFIELD']) : 'pid';
×
418
        $where = $subpartParams['_WHERE'] ?? '';
×
419
        $addTable = $subpartParams['_ADDTABLE'] ?? '';
×
420

421
        $fieldName = ($subpartParams['_FIELD'] ?? '') ?: 'uid';
×
422
        if ($fieldName === 'uid' || $GLOBALS['TCA'][$subpartParams['_TABLE']]['columns'][$fieldName]) {
×
423
            $queryBuilder = $this->getQueryBuilder($subpartParams['_TABLE']);
×
424
            $pidArray = $this->getPidArray($recursiveDepth, $lookUpPid);
×
425

426
            $queryBuilder->getRestrictions()
×
427
                ->removeAll()
×
428
                ->add(GeneralUtility::makeInstance(DeletedRestriction::class));
×
429

430
            $queryBuilder
×
431
                ->select($fieldName)
×
432
                ->from($subpartParams['_TABLE'])
×
433
                ->where(
×
434
                    $queryBuilder->expr()->in(
×
435
                        $pidField,
×
436
                        $queryBuilder->createNamedParameter($pidArray, Connection::PARAM_INT_ARRAY)
×
437
                    ),
×
438
                    $where
×
439
                );
×
440

441
            if (!empty($addTable)) {
×
442
                // TODO: Check if this works as intended!
443
                $queryBuilder->add('from', $addTable);
×
444
            }
445
            $transOrigPointerField = $GLOBALS['TCA'][$subpartParams['_TABLE']]['ctrl']['transOrigPointerField'];
×
446

447
            if (($subpartParams['_ENABLELANG'] ?? false) && $transOrigPointerField) {
×
448
                $queryBuilder->andWhere($queryBuilder->expr()->lte($transOrigPointerField, 0));
×
449
            }
450

451
            $statement = $queryBuilder->execute();
×
452

453
            $rows = [];
×
454
            while ($row = $statement->fetch()) {
×
455
                $rows[$row[$fieldName]] = $row;
×
456
            }
457

458
            if (is_array($rows)) {
×
459
                $paramArray[$parameter] = array_merge($paramArray[$parameter], array_keys($rows));
×
460
            }
461
        }
462
        return $paramArray;
×
463
    }
464
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc