• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemsdk / phpcpd / #32

16 Feb 2026 08:55PM UTC coverage: 78.285% (+2.5%) from 75.818%
#32

push

DKravtsov
### Added

* Added `--ignore-no-files option` to return a success exit code if no files were found.
* Added `#[SuppressCpd]` to ignore code clones inside a class or method (`use Systemsdk\PhpCPD\Attributes\SuppressCpd;`).

### Updated

* Improved Suffix Tree-based algorithm for code clone detection.
* Updated Dev environment: Updated XDebug, Phing, dev composer dependencies.

129 of 150 new or added lines in 6 files covered. (86.0%)

2 existing lines in 2 files now uncovered.

840 of 1073 relevant lines covered (78.29%)

8.49 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

93.62
/src/Detector/Strategy/SuffixTreeStrategy.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace Systemsdk\PhpCPD\Detector\Strategy;
6

7
use Systemsdk\PhpCPD\CodeClone;
8
use Systemsdk\PhpCPD\CodeCloneFile;
9
use Systemsdk\PhpCPD\CodeCloneMap;
10
use Systemsdk\PhpCPD\Detector\Strategy\SuffixTree\AbstractToken;
11
use Systemsdk\PhpCPD\Detector\Strategy\SuffixTree\ApproximateCloneDetectingSuffixTree;
12
use Systemsdk\PhpCPD\Detector\Strategy\SuffixTree\Sentinel;
13
use Systemsdk\PhpCPD\Detector\Strategy\SuffixTree\Token;
14
use Systemsdk\PhpCPD\Detector\Traits\ProgressBarTrait;
15
use Systemsdk\PhpCPD\Exceptions\MissingResultException;
16

17
use function array_key_exists;
18
use function array_keys;
19
use function count;
20
use function file_get_contents;
21
use function is_array;
22
use function token_get_all;
23
use function uniqid;
24

25
use const T_ATTRIBUTE;
26

27
/**
28
 * The suffix tree strategy was implemented in PHP for PHPCPD by Olle Härstedt.
29
 *
30
 * This PHP implementation is based on the Java implementation archived that is available at
31
 * https://www.cqse.eu/en/news/blog/conqat-end-of-life/ under the Apache License 2.0.
32
 *
33
 * The aforementioned Java implementation is based on the algorithm described in
34
 * https://dl.acm.org/doi/10.1109/ICSE.2009.5070547. This paper is available at
35
 * https://www.cqse.eu/fileadmin/content/news/publications/2009-do-code-clones-matter.pdf.
36
 */
37
final class SuffixTreeStrategy extends AbstractStrategy
38
{
39
    use ProgressBarTrait;
40

41
    private const string PROGRESS_BAR_SEARCH_CLONES_TITLE = 'Search for clones';
42
    private const string PROGRESS_BAR_PROCESS_CLONES_TITLE = 'Clones processing';
43
    private const string PROGRESS_BAR_POST_PROCESS_DONE_TITLE = 'Post process done';
44

45
    /**
46
     * @var array<int, AbstractToken>
47
     */
48
    private array $word = [];
49

50
    /**
51
     * @var array<string, int>
52
     */
53
    private array $fileEndPositions = [];
54

55
    private ?CodeCloneMap $result = null;
56

57
    public function processFile(string $file, CodeCloneMap $result): void
58
    {
59
        $content = (string)file_get_contents($file);
11✔
60
        $tokens = token_get_all($content);
11✔
61
        $lastTokenLine = 0;
11✔
62
        $attributeStarted = false;
11✔
63
        $attributeStartedLine = 0;
11✔
64
        $wasSuppressed = false;
11✔
65

66
        $result->addToNumberOfLines(substr_count($content, "\n"));
11✔
67

68
        unset($content);
11✔
69

70
        foreach (array_keys($tokens) as $key) {
11✔
71
            /** @var array{0: int, 1:string, 2:int}|string $token */
72
            $token = $tokens[$key];
11✔
73

74
            if (is_array($token)) {
11✔
75
                $tokenLine = (int)$token[2];
11✔
76

77
                if ($this->guard->isLineSuppressed($file, $tokenLine)) {
11✔
78
                    if (!$wasSuppressed) {
6✔
79
                        // Insert a unique fake barrier token.
80
                        // It has ID -1 and an absolutely unique value, so the Suffix Tree
81
                        // will never find a match for it and will stop the clone search at this point.
82
                        $this->word[] = new Token(
6✔
83
                            -1,
6✔
84
                            'T_SUPPRESSED_BARRIER',
6✔
85
                            $tokenLine,
6✔
86
                            $file,
6✔
87
                            uniqid('barrier_', true)
6✔
88
                        );
6✔
89
                        $wasSuppressed = true;
6✔
90
                    }
91
                    $lastTokenLine = $tokenLine;
6✔
92

93
                    continue; // Skip the actual token, keeping it out of the engine
6✔
94
                }
95

96
                // Exited the suppressed zone
97
                $wasSuppressed = false;
11✔
98

99
                if ($attributeStarted === false && !isset($this->tokensIgnoreList[$token[0]])) {
11✔
100
                    $this->word[] = new Token(
11✔
101
                        $token[0],
11✔
102
                        token_name($token[0]),
11✔
103
                        $tokenLine,
11✔
104
                        $file,
11✔
105
                        $token[1]
11✔
106
                    );
11✔
107
                }
108

109
                if ($token[0] === T_ATTRIBUTE) {
11✔
110
                    $attributeStarted = true;
1✔
111
                    $attributeStartedLine = $tokenLine;
1✔
112
                }
113

114
                $lastTokenLine = $tokenLine;
11✔
115
            } elseif (
116
                $attributeStarted === true && $token === ']'
11✔
117
                && (
118
                    $attributeStartedLine === $lastTokenLine
11✔
119
                    || (array_key_exists($key - 1, $tokens) && $tokens[$key - 1] === ')')
11✔
120
                )
121
            ) {
122
                $attributeStarted = false;
1✔
123
                $attributeStartedLine = 0;
1✔
124
            }
125
        }
126

127
        $lastIndex = count($this->word) - 1;
11✔
128

129
        if ($lastIndex >= 0 && $this->word[$lastIndex]->file === $file) {
11✔
130
            $this->fileEndPositions[$file] = $lastIndex;
11✔
131
        }
132

133
        $this->result = $result;
11✔
134
    }
135

136
    /**
137
     * @throws MissingResultException
138
     */
139
    public function postProcess(bool $useProgressBar): void
140
    {
141
        if (empty($this->result)) {
11✔
142
            throw new MissingResultException('Missing result');
×
143
        }
144

145
        $totalSteps = 2;
11✔
146

147
        if ($useProgressBar) {
11✔
148
            $this->progressBar(0, $totalSteps, self::PROGRESS_BAR_SEARCH_CLONES_TITLE);
×
149
        }
150

151
        // Sentinel = End of word
152
        $this->word[] = new Sentinel();
11✔
153

154
        $cloneInfos = (new ApproximateCloneDetectingSuffixTree($this->word))->findClones(
11✔
155
            $this->config->minTokens(),
11✔
156
            $this->config->editDistance(),
11✔
157
            $this->config->headEquality()
11✔
158
        );
11✔
159

160
        if ($useProgressBar) {
11✔
161
            $this->progressBar(1, $totalSteps, self::PROGRESS_BAR_PROCESS_CLONES_TITLE);
×
162
        }
163

164
        foreach ($cloneInfos as $cloneInfo) {
11✔
165
            /** @var int[] $others */
166
            $others = $cloneInfo->otherClones->extractFirstList();
7✔
167

168
            // Get the exact boundaries of the Original (Head) with O(1) protection against out-of-bounds
169
            $cloneLength = $this->processCloneLength($cloneInfo->position, $cloneInfo->length, $cloneInfo->token->file);
7✔
170
            $headLastToken = $this->getLastToken($cloneInfo->position, $cloneLength);
7✔
171
            $headLines = $headLastToken->line + 1 - $cloneInfo->token->line;
7✔
172

173
            // If the clone size meets our limits
174
            if ($headLines >= $this->config->minLines()) {
7✔
175
                // Add all copies to the result
176
                for ($j = 0, $count = count($others); $j < $count; $j++) {
7✔
177
                    $otherToken = $this->word[$others[$j]];
7✔
178

179
                    // Calculate exact boundaries for each copy
180
                    $otherCloneLength = $this->processCloneLength($others[$j], $cloneLength, $otherToken->file);
7✔
181
                    $otherLastToken = $this->getLastToken($others[$j], $otherCloneLength);
7✔
182
                    $otherLines = $otherLastToken->line + 1 - $otherToken->line;
7✔
183

184
                    /** @phpstan-ignore method.nonObject */
185
                    $this->result->add(
7✔
186
                        new CodeClone(
7✔
187
                            new CodeCloneFile(
7✔
188
                                $cloneInfo->token->file,
7✔
189
                                $cloneInfo->token->line,
7✔
190
                                $cloneInfo->token->line + $headLines
7✔
191
                            ),
7✔
192
                            new CodeCloneFile($otherToken->file, $otherToken->line, $otherToken->line + $otherLines),
7✔
193
                            $headLines,
7✔
194
                            $cloneLength
7✔
195
                        )
7✔
196
                    );
7✔
197
                }
198
            }
199
        }
200

201
        if ($useProgressBar) {
11✔
202
            $this->progressBar(2, $totalSteps, self::PROGRESS_BAR_POST_PROCESS_DONE_TITLE);
×
203
        }
204
    }
205

206
    private function processCloneLength(int $position, int $cloneLength, string $file): int
207
    {
208
        if (!isset($this->fileEndPositions[$file])) {
7✔
NEW
209
            return $cloneLength;
×
210
        }
211

212
        $maxAllowedLength = $this->fileEndPositions[$file] - $position + 1;
7✔
213

214
        if ($cloneLength > $maxAllowedLength) {
7✔
215
            return $maxAllowedLength;
1✔
216
        }
217

218
        return $cloneLength;
7✔
219
    }
220

221
    private function getLastToken(int $position, int $cloneLength): AbstractToken
222
    {
223
        $lastToken = $this->word[$position + $cloneLength - 1];
7✔
224
        // If we stumbled upon the Sentinel, rewind one step.
225
        if ($lastToken instanceof Sentinel) {
7✔
226
            $lastToken = $this->word[$position + $cloneLength - 2];
×
227
        }
228

229
        return $lastToken;
7✔
230
    }
231
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc