• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

systemsdk / phpcpd / #6

23 Mar 2025 04:51PM UTC coverage: 75.711% (-0.08%) from 75.789%
#6

push

DKravtsov
phpcpd 8.1.0 release. Added Suffix Tree-based algorithm for code clone detection (experimental), added progress bar. Made codebase refactoring. Updated packages: sebastian/cli-parser, sebastian/version, phpunit/php-file-iterator, phpunit/php-timer. Updated tests to the PHPUnit 12.

101 of 126 new or added lines in 10 files covered. (80.16%)

1 existing line in 1 file now uncovered.

692 of 914 relevant lines covered (75.71%)

3.7 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.24
/src/Detector/Strategy/SuffixTreeStrategy.php
1
<?php
2

3
declare(strict_types=1);
4

5
namespace Systemsdk\PhpCPD\Detector\Strategy;
6

7
use Systemsdk\PhpCPD\CodeClone;
8
use Systemsdk\PhpCPD\CodeCloneFile;
9
use Systemsdk\PhpCPD\CodeCloneMap;
10
use Systemsdk\PhpCPD\Detector\Strategy\SuffixTree\AbstractToken;
11
use Systemsdk\PhpCPD\Detector\Strategy\SuffixTree\ApproximateCloneDetectingSuffixTree;
12
use Systemsdk\PhpCPD\Detector\Strategy\SuffixTree\Sentinel;
13
use Systemsdk\PhpCPD\Detector\Strategy\SuffixTree\Token;
14
use Systemsdk\PhpCPD\Exceptions\MissingResultException;
15

16
use function array_key_exists;
17
use function array_keys;
18
use function file_get_contents;
19
use function is_array;
20
use function token_get_all;
21

22
use const T_ATTRIBUTE;
23

24
/**
25
 * The suffix tree strategy was implemented in PHP for PHPCPD by Olle Härstedt.
26
 *
27
 * This PHP implementation is based on the Java implementation archived that is available at
28
 * https://www.cqse.eu/en/news/blog/conqat-end-of-life/ under the Apache License 2.0.
29
 *
30
 * The aforementioned Java implementation is based on the algorithm described in
31
 * https://dl.acm.org/doi/10.1109/ICSE.2009.5070547. This paper is available at
32
 * https://www.cqse.eu/fileadmin/content/news/publications/2009-do-code-clones-matter.pdf.
33
 */
34
final class SuffixTreeStrategy extends AbstractStrategy
35
{
36
    /**
37
     * @var array<int, AbstractToken>
38
     */
39
    private array $word = [];
40

41
    /**
42
     * @var array<string, int>
43
     */
44
    private array $fileTokens = [];
45

46
    private ?CodeCloneMap $result = null;
47

48
    public function processFile(string $file, CodeCloneMap $result): void
49
    {
50
        $content = (string)file_get_contents($file);
4✔
51
        $tokens = token_get_all($content);
4✔
52
        $lastTokenLine = 0;
4✔
53
        $attributeStarted = false;
4✔
54
        $attributeStartedLine = 0;
4✔
55
        $this->fileTokens[$file] = 0;
4✔
56

57
        $result->addToNumberOfLines(substr_count($content, "\n"));
4✔
58

59
        unset($content);
4✔
60

61
        foreach (array_keys($tokens) as $key) {
4✔
62
            /** @var array{0: int, 1:string, 2:int}|string $token */
63
            $token = $tokens[$key];
4✔
64

65
            if (is_array($token)) {
4✔
66
                if ($attributeStarted === false && !isset($this->tokensIgnoreList[$token[0]])) {
4✔
67
                    $this->word[] = new Token(
4✔
68
                        $token[0],
4✔
69
                        token_name($token[0]),
4✔
70
                        $token[2],
4✔
71
                        $file,
4✔
72
                        $token[1]
4✔
73
                    );
4✔
74
                    $this->fileTokens[$file]++;
4✔
75
                }
76

77
                if ($token[0] === T_ATTRIBUTE) {
4✔
78
                    $attributeStarted = true;
1✔
79
                    $attributeStartedLine = $token[2];
1✔
80
                }
81

82
                $lastTokenLine = $token[2];
4✔
83
            } elseif (
84
                $attributeStarted === true && $token === ']'
4✔
85
                && (
86
                    $attributeStartedLine === $lastTokenLine
4✔
87
                    || (array_key_exists($key - 1, $tokens) && $tokens[$key - 1] === ')')
4✔
88
                )
89
            ) {
90
                $attributeStarted = false;
1✔
91
                $attributeStartedLine = 0;
1✔
92
            }
93
        }
94

95
        $this->result = $result;
4✔
96
    }
97

98
    /**
99
     * @throws MissingResultException
100
     */
101
    public function postProcess(): void
102
    {
103
        if (empty($this->result)) {
4✔
104
            throw new MissingResultException('Missing result');
×
105
        }
106

107
        // Sentinel = End of word
108
        $this->word[] = new Sentinel();
4✔
109

110
        $cloneInfos = (new ApproximateCloneDetectingSuffixTree($this->word))->findClones(
4✔
111
            $this->config->minTokens(),
4✔
112
            $this->config->editDistance(),
4✔
113
            $this->config->headEquality()
4✔
114
        );
4✔
115

116
        foreach ($cloneInfos as $cloneInfo) {
4✔
117
            /** @var int[] $others */
118
            $others = $cloneInfo->otherClones->extractFirstList();
3✔
119
            $cloneLength = $this->processCloneLength($cloneInfo->length, $cloneInfo->token->file);
3✔
120
            $cloneInfoLastToken = $this->getLastToken($cloneInfo->position, $cloneLength);
3✔
121
            $lines = $cloneInfoLastToken->line + 1 - $cloneInfo->token->line;
3✔
122

123
            if ($lines >= $this->config->minLines()) {
3✔
124
                for ($j = 0, $count = count($others); $j < $count; $j++) {
3✔
125
                    $otherToken = $this->word[$others[$j]];
3✔
126
                    $otherCloneLength = $this->processCloneLength($cloneLength, $otherToken->file);
3✔
127
                    $otherLastToken = $this->getLastToken($others[$j], $otherCloneLength);
3✔
128

129
                    $this->result->add(
3✔
130
                        new CodeClone(
3✔
131
                            new CodeCloneFile($cloneInfo->token->file, $cloneInfo->token->line),
3✔
132
                            new CodeCloneFile($otherToken->file, $otherToken->line, $otherLastToken->line),
3✔
133
                            $lines,
3✔
134
                            $cloneLength
3✔
135
                        )
3✔
136
                    );
3✔
137
                }
138
            }
139
        }
140
    }
141

142
    private function processCloneLength(int $cloneLength, string $file): int
143
    {
144
        if ($cloneLength > $this->fileTokens[$file]) {
3✔
NEW
145
            $cloneLength = $this->fileTokens[$file];
×
146
        }
147

148
        return $cloneLength;
3✔
149
    }
150

151
    private function getLastToken(int $position, int $cloneLength): AbstractToken
152
    {
153
        $lastToken = $this->word[$position + $cloneLength - 1];
3✔
154
        // If we stumbled upon the Sentinel, rewind one step.
155
        if ($lastToken instanceof Sentinel) {
3✔
NEW
156
            $lastToken = $this->word[$position + $cloneLength - 2];
×
157
        }
158

159
        return $lastToken;
3✔
160
    }
161
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc