• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Yoast / Yoast-SEO-for-TYPO3 / 24723291290

21 Apr 2026 12:50PM UTC coverage: 10.457% (+9.2%) from 1.275%
24723291290

push

github

web-flow
Merge pull request #632 from Yoast/feature/yoast-v12

[FEATURE] Version 12.0.0, added v14 support, removed v11 support including php8.0 and php8.1, rewrote backend javascript functionality to typescript and webcomponents

40 of 806 new or added lines in 69 files covered. (4.96%)

40 existing lines in 23 files now uncovered.

284 of 2716 relevant lines covered (10.46%)

0.29 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/Classes/Service/LinkingSuggestionsService.php
1
<?php
2

3
/**
4
 * This file is part of the "yoast_seo" extension for TYPO3 CMS.
5
 *
6
 * For the full copyright and license information, please read the
7
 * LICENSE.txt file that was distributed with this source code.
8
 */
9

10
declare(strict_types=1);
11

12
namespace YoastSeoForTypo3\YoastSeo\Service;
13

14
use TYPO3\CMS\Backend\Utility\BackendUtility;
15
use TYPO3\CMS\Core\Context\LanguageAspect;
16
use TYPO3\CMS\Core\Database\Connection;
17
use TYPO3\CMS\Core\Database\ConnectionPool;
18
use TYPO3\CMS\Core\Domain\Repository\PageRepository;
19
use TYPO3\CMS\Core\Utility\GeneralUtility;
20
use YoastSeoForTypo3\YoastSeo\Constants\TableNames;
21
use YoastSeoForTypo3\YoastSeo\Traits\LanguageServiceTrait;
22

23
class LinkingSuggestionsService
24
{
25
    use LanguageServiceTrait;
26

27
    protected int $excludePageId;
28
    protected int $site;
29
    protected int $languageId;
30

31
    /**
32
     * @var array<string, int>
33
     */
34
    protected array $documentFrequencyCache = [];
35

36
    public function __construct(
37
        protected ConnectionPool $connectionPool,
38
        protected PageRepository $pageRepository,
39
        protected SiteService $siteService,
UNCOV
40
    ) {}
×
41

42
    /**
43
     * @param array<array{occurrences: int, stem: string}> $words
44
     * @return array<array<string, mixed>>
45
     */
46
    public function getLinkingSuggestions(
47
        array $words,
48
        int $excludePageId,
49
        int $languageId,
50
        string $content
51
    ): array {
52
        if ($words === []) {
×
53
            return [];
×
54
        }
55
        $this->excludePageId = $excludePageId;
×
NEW
56
        $this->site = $this->siteService->getSiteRootPageId($excludePageId);
×
57
        $this->languageId = $languageId;
×
NEW
58
        $this->documentFrequencyCache = [];
×
59

NEW
60
        $words = array_column($words, 'occurrences', 'stem');
×
61

62
        // Combine stems, weights and DFs from request
63
        $requestData = $this->composeRequestData($words);
×
64

65
        // Calculate vector length of the request set (needed for score normalization later)
66
        $requestVectorLength = $this->computeVectorLength($requestData);
×
67

68
        $requestStems = array_keys($requestData);
×
69
        $scores = [];
×
NEW
70
        $batchSize = 100;
×
71
        $page = 1;
×
72

73
        do {
74
            // Retrieve the words of all records in this batch that share prominent word stems with request
75
            $candidatesWords = $this->getCandidateWords($requestStems, $batchSize, $page);
×
76

77
            // Transform the prominent words table so that it indexed by record
78
            $candidatesWordsByRecord = $this->groupWordsByRecord($candidatesWords);
×
79

80
            $batchScoresSize = 0;
×
81
            foreach ($candidatesWordsByRecord as $id => $candidateData) {
×
82
                $scores[$id] = $this->calculateScoreForIndexable($requestData, $requestVectorLength, $candidateData);
×
83
                ++$batchScoresSize;
×
84
            }
85

86
            // Sort the list of scores and keep only the top of the scores
87
            $scores = $this->getTopSuggestions($scores);
×
88

89
            ++$page;
×
90
        } while ($batchScoresSize === $batchSize);
×
91

92
        // Return the empty list if no suggestions have been found.
93
        if ($scores === []) {
×
94
            return [];
×
95
        }
96

97
        return $this->linkRecords($scores, $this->getCurrentContentLinks($content));
×
98
    }
99

100
    /**
101
     * @param array<string, int|string> $requestWords
102
     * @return array<string, array{weight: int, df: int}>
103
     */
104
    protected function composeRequestData(array $requestWords): array
105
    {
106
        $requestDocFrequencies = $this->countDocumentFrequencies(array_keys($requestWords));
×
107
        $combinedRequestData = [];
×
108
        foreach ($requestWords as $stem => $weight) {
×
109
            if (!isset($requestDocFrequencies[$stem])) {
×
110
                continue;
×
111
            }
112

113
            $combinedRequestData[$stem] = [
×
114
                'weight' => (int)$weight,
×
115
                'df' => $requestDocFrequencies[$stem],
×
116
            ];
×
117
        }
118
        return $combinedRequestData;
×
119
    }
120

121
    /**
122
     * @param string[] $stems
123
     * @return array<string, int>
124
     */
125
    protected function countDocumentFrequencies(array $stems): array
126
    {
127
        if ($stems === []) {
×
128
            return [];
×
129
        }
130

NEW
131
        $uncachedStems = array_values(array_filter(
×
NEW
132
            $stems,
×
NEW
133
            fn(string $stem): bool => !isset($this->documentFrequencyCache[$stem])
×
NEW
134
        ));
×
135

NEW
136
        if ($uncachedStems !== []) {
×
NEW
137
            $queryBuilder = $this->connectionPool->getQueryBuilderForTable(TableNames::PROMINENT_WORD);
×
NEW
138
            $rawDocFrequencies = $queryBuilder->select('stem')->addSelectLiteral('COUNT(stem) AS document_frequency')->from(
×
NEW
139
                TableNames::PROMINENT_WORD
×
NEW
140
            )->where(
×
141
                $queryBuilder->expr()->in(
×
142
                    'stem',
×
NEW
143
                    $queryBuilder->createNamedParameter($uncachedStems, Connection::PARAM_STR_ARRAY)
×
144
                ),
×
145
                $queryBuilder->expr()->eq('sys_language_uid', $this->languageId),
×
146
                $queryBuilder->expr()->eq('site', $this->site)
×
NEW
147
            )->groupBy('stem')->executeQuery()->fetchAllAssociative();
×
148

NEW
149
            foreach ($rawDocFrequencies as $rawDocFrequency) {
×
NEW
150
                $this->documentFrequencyCache[(string)$rawDocFrequency['stem']] = (int)$rawDocFrequency['document_frequency'];
×
151
            }
152
        }
153

NEW
154
        $docFrequencies = [];
×
NEW
155
        foreach ($stems as $stem) {
×
NEW
156
            if (isset($this->documentFrequencyCache[$stem])) {
×
NEW
157
                $docFrequencies[$stem] = $this->documentFrequencyCache[$stem];
×
158
            }
159
        }
160
        return $docFrequencies;
×
161
    }
162

163
    /**
164
     * @param array<string, array{weight: int, df?: int}> $prominentWords
165
     */
166
    protected function computeVectorLength(array $prominentWords): float
167
    {
168
        $sumOfSquares = 0;
×
169
        foreach ($prominentWords as $word) {
×
170
            $docFrequency = 1;
×
171
            if (array_key_exists('df', $word)) {
×
172
                $docFrequency = $word['df'];
×
173
            }
174

175
            $tfIdf = $this->computeTfIdfScore($word['weight'], $docFrequency);
×
176
            $sumOfSquares += ($tfIdf ** 2);
×
177
        }
178
        return sqrt($sumOfSquares);
×
179
    }
180

181
    protected function computeTfIdfScore(int $termFrequency, int $docFrequency): float
182
    {
183
        $docFrequency = max(1, $docFrequency);
×
184
        return $termFrequency * (1 / $docFrequency);
×
185
    }
186

187
    /**
188
     * @param string[] $stems
189
     * @return array<array{stem: string, weight: int, pid: int, tablenames: string, uid_foreign: int, df?: int}>
190
     */
191
    protected function getCandidateWords(array $stems, int $batchSize, int $page): array
192
    {
193
        return $this->findStemsByRecords(
×
194
            $this->findRecordsByStems($stems, $batchSize, $page)
×
195
        );
×
196
    }
197

198
    /**
199
     * @param array<array{pid: int, tablenames: string}> $records
200
     * @return array<array{stem: string, weight: int, pid: int, tablenames: string, uid_foreign: int, df?: int}>
201
     */
202
    protected function findStemsByRecords(array $records): array
203
    {
204
        if ($records === []) {
×
205
            return [];
×
206
        }
207

208
        $prominentWords = $this->getProminentWords($records);
×
NEW
209
        $prominentStems = array_unique(array_column($prominentWords, 'stem'));
×
210

NEW
211
        $stemCounts = $this->countDocumentFrequencies($prominentStems);
×
212

213
        foreach ($prominentWords as &$prominentWord) {
×
NEW
214
            if (!isset($stemCounts[$prominentWord['stem']])) {
×
215
                continue;
×
216
            }
NEW
217
            $prominentWord['df'] = $stemCounts[$prominentWord['stem']];
×
218
        }
219
        return $prominentWords;
×
220
    }
221

222
    /**
223
     * @param string[] $stems
224
     * @return array<array{pid: int, tablenames: string}>
225
     */
226
    protected function findRecordsByStems(array $stems, int $batchSize, int $page): array
227
    {
NEW
228
        $queryBuilder = $this->connectionPool->getQueryBuilderForTable(TableNames::PROMINENT_WORD);
×
NEW
229
        $queryBuilder->select('pid', 'tablenames')->from(TableNames::PROMINENT_WORD)->where(
×
NEW
230
            $queryBuilder->expr()->in(
×
NEW
231
                'stem',
×
NEW
232
                $queryBuilder->createNamedParameter($stems, Connection::PARAM_STR_ARRAY)
×
NEW
233
            ),
×
NEW
234
            $queryBuilder->expr()->eq('sys_language_uid', $this->languageId),
×
NEW
235
            $queryBuilder->expr()->eq('site', $this->site)
×
NEW
236
        )->groupBy('pid', 'tablenames')
×
237
            ->setMaxResults($batchSize)
×
238
            ->setFirstResult(($page - 1) * $batchSize);
×
239
        /** @var array<array{pid: int, tablenames: string}> $records */
240
        $records = $queryBuilder->executeQuery()->fetchAllAssociative();
×
241
        return $records;
×
242
    }
243

244
    /**
245
     * @param array<array{pid: int, tablenames: string}> $records
246
     * @return array<array{stem: string, weight: int, pid: int, tablenames: string, uid_foreign: int}>
247
     */
248
    protected function getProminentWords(array $records): array
249
    {
250
        // Group pids by tablename to use efficient IN() clauses instead of OR chains
NEW
251
        $pidsByTable = [];
×
252
        foreach ($records as $record) {
×
NEW
253
            $pidsByTable[$record['tablenames']][] = (int)$record['pid'];
×
254
        }
255

NEW
256
        $queryBuilder = $this->connectionPool->getQueryBuilderForTable(TableNames::PROMINENT_WORD);
×
NEW
257
        $tableConditions = [];
×
NEW
258
        foreach ($pidsByTable as $tablename => $pids) {
×
NEW
259
            $tableConditions[] = $queryBuilder->expr()->and(
×
NEW
260
                $queryBuilder->expr()->eq('tablenames', $queryBuilder->createNamedParameter($tablename)),
×
NEW
261
                $queryBuilder->expr()->in('pid', $queryBuilder->createNamedParameter($pids, Connection::PARAM_INT_ARRAY))
×
UNCOV
262
            );
×
263
        }
264

NEW
265
        $queryBuilder->select('stem', 'weight', 'pid', 'tablenames', 'uid_foreign')->from(TableNames::PROMINENT_WORD)
×
266
            ->where(
×
NEW
267
                $queryBuilder->expr()->eq('sys_language_uid', $this->languageId),
×
NEW
268
                $queryBuilder->expr()->or(...$tableConditions)
×
UNCOV
269
            );
×
270
        /** @var array<array{stem: string, weight: int, pid: int, tablenames: string, uid_foreign: int}> $prominentWords */
271
        $prominentWords = $queryBuilder->executeQuery()->fetchAllAssociative();
×
272
        return $prominentWords;
×
273
    }
274

275
    /**
276
     * @param array<array{stem: string, weight: int, pid: int, tablenames: string, uid_foreign: int, df?: int}> $candidateWords
277
     * @return array<string, array<string, array{weight: int, df: int}>>
278
     */
279
    protected function groupWordsByRecord(array $candidateWords): array
280
    {
281
        $candidateWordsByRecords = [];
×
282
        foreach ($candidateWords as $candidateWord) {
×
283
            if (!isset($candidateWord['df'])) {
×
284
                continue;
×
285
            }
286
            $recordKey = $candidateWord['uid_foreign'] . '-' . $candidateWord['tablenames'];
×
287
            $candidateWordsByRecords[$recordKey][$candidateWord['stem']] = [
×
288
                'weight' => (int)$candidateWord['weight'],
×
289
                'df' => (int)$candidateWord['df'],
×
290
            ];
×
291
        }
292
        return $candidateWordsByRecords;
×
293
    }
294

295
    /**
296
     * @param array<string, array{weight: int, df: int}> $requestData
297
     * @param array<string, array{weight: int, df: int}> $candidateData
298
     */
299
    protected function calculateScoreForIndexable(
300
        array $requestData,
301
        float $requestVectorLength,
302
        array $candidateData
303
    ): float {
304
        $rawScore = $this->computeRawScore($requestData, $candidateData);
×
305
        $candidateVectorLength = $this->computeVectorLength($candidateData);
×
306
        return $this->normalizeScore($rawScore, $candidateVectorLength, $requestVectorLength);
×
307
    }
308

309
    /**
310
     * @param array<string, array{weight: int, df: int}> $requestData
311
     * @param array<string, array{weight: int, df: int}> $candidateData
312
     */
313
    protected function computeRawScore(array $requestData, array $candidateData): float
314
    {
315
        $rawScore = 0;
×
316
        foreach ($candidateData as $stem => $candidateWordData) {
×
317
            if (!array_key_exists($stem, $requestData)) {
×
318
                continue;
×
319
            }
320

321
            $wordFromRequestWeight = $requestData[$stem]['weight'];
×
322
            $wordFromRequestDf = $requestData[$stem]['df'];
×
323
            $candidateWeight = $candidateWordData['weight'];
×
324
            $canidateDf = $candidateWordData['df'];
×
325

326
            $tfIdfFromRequest = $this->computeTfIdfScore($wordFromRequestWeight, $wordFromRequestDf);
×
327
            $tfIdfFromDatabase = $this->computeTfIdfScore($candidateWeight, $canidateDf);
×
328

329
            $rawScore += ($tfIdfFromRequest * $tfIdfFromDatabase);
×
330
        }
331
        return (float)$rawScore;
×
332
    }
333

334
    protected function normalizeScore(float $rawScore, float $vectorLengthCandidate, float $vectorLengthRequest): float
335
    {
336
        $normalizingFactor = $vectorLengthRequest * $vectorLengthCandidate;
×
337
        if ($normalizingFactor === 0.0) {
×
338
            // We can't divide by 0, so set the score to 0 instead.
339
            return 0;
×
340
        }
341
        return (float)($rawScore / $normalizingFactor);
×
342
    }
343

344
    /**
345
     * @param array<string, float|int> $scores
346
     * @return array<string, float|int>
347
     */
348
    protected function getTopSuggestions(array $scores): array
349
    {
350
        // Sort the indexables by descending score.
351
        uasort(
×
352
            $scores,
×
353
            static function ($score1, $score2) {
×
354
                if ($score1 === $score2) {
×
355
                    return 0;
×
356
                }
357
                return ($score1 < $score2) ? 1 : -1;
×
358
            }
×
359
        );
×
360

361
        // Take the top $limit suggestions, while preserving their ids specified in the keys of the array elements.
362
        return \array_slice($scores, 0, 20, true);
×
363
    }
364

365
    /**
366
     * @param array<string, float|int> $scores
367
     * @param array<string, bool> $currentLinks
368
     * @return array<string, array{label: string, recordType: string, id: int, table: string, cornerstone: int, score: float, active: bool}>
369
     */
370
    protected function linkRecords(array $scores, array $currentLinks): array
371
    {
372
        $links = [];
×
373
        foreach ($scores as $record => $score) {
×
374
            [$uid, $table] = explode('-', $record);
×
NEW
375
            if ($table === TableNames::PAGES && (int)$uid === $this->excludePageId) {
×
376
                continue;
×
377
            }
378

379
            $data = BackendUtility::getRecord($table, $uid);
×
380
            if ($data === null) {
×
381
                continue;
×
382
            }
NEW
383
            if ($this->languageId > 0 && ($overlay = $this->getRecordOverlay($table, $data, $this->languageId))) {
×
384
                $data = $overlay;
×
385
            }
386

387
            $labelField = $GLOBALS['TCA'][$table]['ctrl']['label'];
×
388

389
            $links[$record] = [
×
390
                'label' => $data[$labelField],
×
391
                'recordType' => $this->getRecordType($table),
×
NEW
392
                'id' => (int)$uid,
×
393
                'table' => $table,
×
394
                'cornerstone' => (int)($data['tx_yoastseo_cornerstone'] ?? 0),
×
395
                'score' => $score,
×
396
                'active' => isset($currentLinks[$record]),
×
397
            ];
×
398
        }
399
        $this->sortSuggestions($links);
×
400

401
        $cornerStoneSuggestions = $this->filterSuggestions($links, true);
×
402
        $nonCornerStoneSuggestions = $this->filterSuggestions($links, false);
×
403

404
        return array_merge_recursive([], $cornerStoneSuggestions, $nonCornerStoneSuggestions);
×
405
    }
406

407
    /**
408
     * @param array<string, array{label: string, recordType: string, id: int, table: string, cornerstone: int, score: float, active: bool}> $links
409
     */
410
    protected function sortSuggestions(array &$links): void
411
    {
NEW
412
        uasort(
×
413
            $links,
×
414
            static function ($suggestion1, $suggestion2) {
×
415
                if ($suggestion1['score'] === $suggestion2['score']) {
×
416
                    return 0;
×
417
                }
418

419
                return ($suggestion1['score'] < $suggestion2['score']) ? 1 : -1;
×
420
            }
×
421
        );
×
422
    }
423

424
    /**
425
     * @param array<string, array{label: string, recordType: string, id: int, table: string, cornerstone: int, score: float, active: bool}> $links
426
     * @return array<string, array{label: string, recordType: string, id: int, table: string, cornerstone: int, score: float, active: bool}>
427
     */
428
    protected function filterSuggestions(array $links, bool $cornerstone): array
429
    {
430
        return \array_filter(
×
431
            $links,
×
432
            static function ($suggestion) use ($cornerstone) {
×
433
                return (bool)$suggestion['cornerstone'] === $cornerstone;
×
434
            }
×
435
        );
×
436
    }
437

438
    /**
439
     * @return array<string, bool>
440
     */
441
    protected function getCurrentContentLinks(string $content): array
442
    {
443
        $currentLinks = [];
×
444
        preg_match_all('/<a href="t3:\/\/(.*)\?uid=([\d]+)/', $content, $matches, PREG_SET_ORDER);
×
445
        foreach ($matches as $match) {
×
NEW
446
            $key = (int)$match[2] . '-' . str_replace('page', TableNames::PAGES, $match[1]);
×
447
            $currentLinks[$key] = true;
×
448
        }
449
        return $currentLinks;
×
450
    }
451

452
    protected function getRecordType(string $table): string
453
    {
454
        return $this->getLanguageService()->sL(
×
455
            $GLOBALS['TCA'][$table]['ctrl']['title']
×
456
        );
×
457
    }
458

459
    /**
460
     * @param array<string, mixed> $data
461
     * @return array<string, mixed>|null
462
     */
463
    protected function getRecordOverlay(string $table, array $data, int $languageId): array|null
464
    {
465
        $languageAspect = GeneralUtility::makeInstance(LanguageAspect::class, $languageId, $languageId, 'mixed');
×
466
        return $this->pageRepository->getLanguageOverlay($table, $data, $languageAspect);
×
467
    }
468
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc