• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pmd / pmd / 4575

11 May 2025 06:36AM UTC coverage: 77.838% (-0.001%) from 77.839%
4575

push

github

oowekyala
Update release notes, ref #5700

17660 of 23654 branches covered (74.66%)

Branch coverage included in aggregate %.

38710 of 48766 relevant lines covered (79.38%)

0.8 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.31
/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CpdAnalysis.java
1
/**
2
 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3
 */
4

5
package net.sourceforge.pmd.cpd;
6

7
import java.io.IOException;
8
import java.io.Writer;
9
import java.nio.charset.Charset;
10
import java.util.ArrayList;
11
import java.util.HashMap;
12
import java.util.List;
13
import java.util.Map;
14
import java.util.function.Consumer;
15
import java.util.stream.Collectors;
16

17
import org.checkerframework.checker.nullness.qual.NonNull;
18
import org.checkerframework.checker.nullness.qual.Nullable;
19
import org.slf4j.Logger;
20
import org.slf4j.LoggerFactory;
21

22
import net.sourceforge.pmd.internal.util.FileCollectionUtil;
23
import net.sourceforge.pmd.internal.util.IOUtil;
24
import net.sourceforge.pmd.lang.Language;
25
import net.sourceforge.pmd.lang.LanguagePropertyBundle;
26
import net.sourceforge.pmd.lang.ast.FileAnalysisException;
27
import net.sourceforge.pmd.lang.ast.LexException;
28
import net.sourceforge.pmd.lang.document.FileCollector;
29
import net.sourceforge.pmd.lang.document.FileId;
30
import net.sourceforge.pmd.lang.document.InternalApiBridge;
31
import net.sourceforge.pmd.lang.document.TextDocument;
32
import net.sourceforge.pmd.lang.document.TextFile;
33
import net.sourceforge.pmd.properties.PropertyDescriptor;
34
import net.sourceforge.pmd.reporting.Report;
35
import net.sourceforge.pmd.util.log.PmdReporter;
36

37
/**
38
 * Main programmatic API of CPD. This is not a CLI entry point, see module
39
 * {@code pmd-cli} for that.
40
 *
41
 * <h2>Usage overview</h2>
42
 *
43
 * <p>Create and configure a {@link CPDConfiguration}, then use {@link #create(CPDConfiguration)} to
44
 * obtain an instance. You can perform additional configuration on the instance, e.g. adding
45
 * files to process or add a listener. Then call {@link #performAnalysis()} or {@link #performAnalysis(Consumer)}
46
 * in order to get the report directly.
47
 *
48
 * <h2>Simple example</h2>
49
 *
50
 * <pre>{@code
51
 *   CPDConfiguration config = new CPDConfiguration();
52
 *   config.setMinimumTileSize(100);
53
 *   config.setOnlyRecognizeLanguage(config.getLanguageRegistry().getLanguageById("java"));
54
 *   config.setSourceEncoding(StandardCharsets.UTF_8);
55
 *   config.addInputPath(Path.of("src/main/java")
56
 *
57
 *   config.setIgnoreAnnotations(true);
58
 *   config.setIgnoreLiterals(false);
59
 *
60
 *   config.setRendererName("text");
61
 *
62
 *   try (CpdAnalysis cpd = CpdAnalysis.create(config)) {
63
 *      // note: don't use `config` once a CpdAnalysis has been created.
64
 *      // optional: add more files
65
 *      cpd.files().addFile(Paths.get("src", "main", "more-java", "ExtraSource.java"));
66
 *
67
 *      cpd.performAnalysis();
68
 *   }
69
 * }</pre>
70
 */
71
public final class CpdAnalysis implements AutoCloseable {
72

73
    private static final Logger LOGGER = LoggerFactory.getLogger(CpdAnalysis.class);
1✔
74
    private final CPDConfiguration configuration;
75
    private final FileCollector files;
76
    private final PmdReporter reporter;
77
    private final @Nullable CPDReportRenderer renderer;
78
    private @NonNull CPDListener listener = new CPDNullListener();
1✔
79

80

81
    private CpdAnalysis(CPDConfiguration config) {
1✔
82
        this.configuration = config;
1✔
83
        this.reporter = config.getReporter();
1✔
84
        this.files = InternalApiBridge.newCollector(
1✔
85
            config.getLanguageVersionDiscoverer(),
1✔
86
            reporter
87
        );
88

89
        this.renderer = config.getCPDReportRenderer();
1✔
90

91
        FileCollectionUtil.collectFiles(config, files());
1✔
92

93
        for (Language language : config.getLanguageRegistry()) {
1✔
94
            setLanguageProperties(language, config);
1✔
95
        }
1✔
96
    }
1✔
97

98
    /**
99
     * Create a new instance from the given configuration. The configuration
100
     * should not be modified after this.
101
     *
102
     * @param config Configuration
103
     *
104
     * @return A new analysis instance
105
     */
106
    public static CpdAnalysis create(CPDConfiguration config) {
107
        return new CpdAnalysis(config);
1✔
108
    }
109

110
    private static <T> void setPropertyIfMissing(PropertyDescriptor<T> prop, LanguagePropertyBundle sink, T value) {
111
        if (sink.hasDescriptor(prop) && !sink.isPropertyOverridden(prop)) {
1✔
112
            sink.setProperty(prop, value);
1✔
113
        }
114
    }
1✔
115

116
    private void setLanguageProperties(Language language, CPDConfiguration configuration) {
117
        LanguagePropertyBundle props = configuration.getLanguageProperties(language);
1✔
118

119
        setPropertyIfMissing(CpdLanguageProperties.CPD_ANONYMIZE_LITERALS, props, configuration.isIgnoreLiterals());
1✔
120
        setPropertyIfMissing(CpdLanguageProperties.CPD_ANONYMIZE_IDENTIFIERS, props, configuration.isIgnoreIdentifiers());
1✔
121
        setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_METADATA, props, configuration.isIgnoreAnnotations());
1✔
122
        setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_IMPORTS, props, configuration.isIgnoreUsings());
1✔
123
        setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_LITERAL_SEQUENCES, props, configuration.isIgnoreLiteralSequences());
1✔
124
        setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_LITERAL_AND_IDENTIFIER_SEQUENCES, props, configuration.isIgnoreIdentifierAndLiteralSequences());
1✔
125
        if (!configuration.isNoSkipBlocks()) {
1!
126
            // see net.sourceforge.pmd.lang.cpp.CppLanguageModule.CPD_SKIP_BLOCKS
127
            PropertyDescriptor<String> skipBlocks = (PropertyDescriptor) props.getPropertyDescriptor("cpdSkipBlocksPattern");
1✔
128
            setPropertyIfMissing(skipBlocks, props, configuration.getSkipBlocksPattern());
1✔
129
        }
130
    }
1✔
131

132
    public FileCollector files() {
133
        return files;
1✔
134
    }
135

136
    public void setCpdListener(@Nullable CPDListener cpdListener) {
137
        if (cpdListener == null) {
1!
138
            cpdListener = new CPDNullListener();
×
139
        }
140
        this.listener = cpdListener;
1✔
141
    }
1✔
142

143
    private int doTokenize(TextDocument document, CpdLexer cpdLexer, Tokens tokens) throws IOException, LexException {
144
        LOGGER.trace("Tokenizing {}", document.getFileId().getAbsolutePath());
1✔
145
        int lastTokenSize = tokens.size();
1✔
146
        CpdLexer.tokenize(cpdLexer, document, tokens);
1✔
147
        return tokens.size() - lastTokenSize - 1; /* EOF */
1✔
148
    }
149

150
    public void performAnalysis() {
151
        performAnalysis(r -> { });
1✔
152
    }
1✔
153

154
    @SuppressWarnings("PMD.CloseResource")
155
    public void performAnalysis(Consumer<CPDReport> consumer) {
156
        try (SourceManager sourceManager = new SourceManager(files.getCollectedFiles())) {
1✔
157
            Map<Language, CpdLexer> tokenizers =
1✔
158
                sourceManager.getTextFiles().stream()
1✔
159
                             .map(it -> it.getLanguageVersion().getLanguage())
1✔
160
                             .distinct()
1✔
161
                             .filter(it -> it instanceof CpdCapableLanguage)
1✔
162
                             .collect(Collectors.toMap(lang -> lang, lang -> ((CpdCapableLanguage) lang).createCpdLexer(configuration.getLanguageProperties(lang))));
1✔
163

164
            Map<FileId, Integer> numberOfTokensPerFile = new HashMap<>();
1✔
165

166
            List<Report.ProcessingError> processingErrors = new ArrayList<>();
1✔
167
            Tokens tokens = new Tokens();
1✔
168
            for (TextFile textFile : sourceManager.getTextFiles()) {
1✔
169
                TextDocument textDocument = sourceManager.get(textFile);
1✔
170
                Tokens.State savedState = tokens.savePoint();
1✔
171
                try {
172
                    int newTokens = doTokenize(textDocument, tokenizers.get(textFile.getLanguageVersion().getLanguage()), tokens);
1✔
173
                    numberOfTokensPerFile.put(textDocument.getFileId(), newTokens);
1✔
174
                    listener.addedFile(1);
1✔
175
                } catch (IOException | FileAnalysisException e) {
1✔
176
                    if (e instanceof FileAnalysisException) { // NOPMD
1!
177
                        ((FileAnalysisException) e).setFileId(textFile.getFileId());
1✔
178
                    }
179
                    String message = configuration.isSkipLexicalErrors() ? "Skipping file" : "Error while tokenizing";
1✔
180
                    reporter.errorEx(message, e);
1✔
181
                    processingErrors.add(new Report.ProcessingError(e, textFile.getFileId()));
1✔
182
                    savedState.restore(tokens);
1✔
183
                }
1✔
184
            }
1✔
185
            if (!processingErrors.isEmpty() && !configuration.isSkipLexicalErrors()) {
1✔
186
                reporter.error("Errors were detected while lexing source, exiting because --skip-lexical-errors is unset.");
1✔
187
                return;
1✔
188
            }
189

190
            LOGGER.debug("Running match algorithm on {} files...", sourceManager.size());
1✔
191
            MatchAlgorithm matchAlgorithm = new MatchAlgorithm(tokens, configuration.getMinimumTileSize());
1✔
192
            List<Match> matches = matchAlgorithm.findMatches(listener, sourceManager);
1✔
193
            tokens = null; // NOPMD null it out before rendering
1✔
194
            LOGGER.debug("Finished: {} duplicates found", matches.size());
1✔
195

196
            CPDReport cpdReport = new CPDReport(sourceManager, matches, numberOfTokensPerFile, processingErrors);
1✔
197

198
            if (renderer != null) {
1!
199
                try (Writer writer = IOUtil.createWriter(Charset.defaultCharset(), null)) {
×
200
                    renderer.render(cpdReport, writer);
×
201
                }
202
            }
203

204
            consumer.accept(cpdReport);
1✔
205
        } catch (IOException e) {
1✔
206
            reporter.errorEx("Exception while running CPD", e);
×
207
        }
1✔
208
        // source manager is closed and closes all text files now.
209
    }
1✔
210

211

212
    @Override
213
    public void close() throws IOException {
214
        // nothing for now
215
    }
1✔
216

217
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc