• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

pmd / pmd / 43

20 Jun 2025 06:39PM UTC coverage: 78.375% (-0.002%) from 78.377%
43

push

github

adangel
Fix #1639 #5832: Use filtered comment text for UnnecessaryImport (#5833)

Merged pull request #5833 from adangel:java/issue-5832-unnecessaryimport

17714 of 23438 branches covered (75.58%)

Branch coverage included in aggregate %.

3 of 3 new or added lines in 1 file covered. (100.0%)

109 existing lines in 17 files now uncovered.

38908 of 48807 relevant lines covered (79.72%)

0.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.08
/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CpdAnalysis.java
1
/**
2
 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3
 */
4

5
package net.sourceforge.pmd.cpd;
6

7
import java.io.IOException;
8
import java.io.Writer;
9
import java.nio.charset.Charset;
10
import java.nio.file.Path;
11
import java.util.ArrayList;
12
import java.util.Collections;
13
import java.util.HashMap;
14
import java.util.List;
15
import java.util.Map;
16
import java.util.function.Consumer;
17
import java.util.stream.Collectors;
18

19
import org.checkerframework.checker.nullness.qual.NonNull;
20
import org.checkerframework.checker.nullness.qual.Nullable;
21
import org.slf4j.Logger;
22
import org.slf4j.LoggerFactory;
23

24
import net.sourceforge.pmd.internal.util.FileCollectionUtil;
25
import net.sourceforge.pmd.internal.util.IOUtil;
26
import net.sourceforge.pmd.lang.Language;
27
import net.sourceforge.pmd.lang.LanguagePropertyBundle;
28
import net.sourceforge.pmd.lang.ast.FileAnalysisException;
29
import net.sourceforge.pmd.lang.ast.LexException;
30
import net.sourceforge.pmd.lang.document.FileCollector;
31
import net.sourceforge.pmd.lang.document.FileId;
32
import net.sourceforge.pmd.lang.document.InternalApiBridge;
33
import net.sourceforge.pmd.lang.document.TextDocument;
34
import net.sourceforge.pmd.lang.document.TextFile;
35
import net.sourceforge.pmd.properties.PropertyDescriptor;
36
import net.sourceforge.pmd.reporting.Report;
37
import net.sourceforge.pmd.util.log.PmdReporter;
38

39
/**
40
 * Main programmatic API of CPD. This is not a CLI entry point, see module
41
 * {@code pmd-cli} for that.
42
 *
43
 * <h2>Usage overview</h2>
44
 *
45
 * <p>Create and configure a {@link CPDConfiguration}, then use {@link #create(CPDConfiguration)} to
46
 * obtain an instance. You can perform additional configuration on the instance, e.g. adding
47
 * files to process or add a listener. Then call {@link #performAnalysis()} or {@link #performAnalysis(Consumer)}
48
 * in order to get the report directly.
49
 *
50
 * <h2>Simple example</h2>
51
 *
52
 * <pre>{@code
53
 *   CPDConfiguration config = new CPDConfiguration();
54
 *   config.setMinimumTileSize(100);
55
 *   config.setOnlyRecognizeLanguage(config.getLanguageRegistry().getLanguageById("java"));
56
 *   config.setSourceEncoding(StandardCharsets.UTF_8);
57
 *   config.addInputPath(Path.of("src/main/java")
58
 *
59
 *   config.setIgnoreAnnotations(true);
60
 *   config.setIgnoreLiterals(false);
61
 *
62
 *   config.setRendererName("text");
63
 *
64
 *   try (CpdAnalysis cpd = CpdAnalysis.create(config)) {
65
 *      // note: don't use `config` once a CpdAnalysis has been created.
66
 *      // optional: add more files
67
 *      cpd.files().addFile(Paths.get("src", "main", "more-java", "ExtraSource.java"));
68
 *
69
 *      cpd.performAnalysis();
70
 *   }
71
 * }</pre>
72
 */
73
public final class CpdAnalysis implements AutoCloseable {
74

75
    private static final Logger LOGGER = LoggerFactory.getLogger(CpdAnalysis.class);
1✔
76
    private final CPDConfiguration configuration;
77
    private final FileCollector files;
78
    private final PmdReporter reporter;
79
    private final Map<FileId, Integer> numberOfTokensPerFile = new HashMap<>();
1✔
80
    private final List<Report.ProcessingError> processingErrors = new ArrayList<>();
1✔
81
    private final @Nullable CPDReportRenderer renderer;
82
    private @NonNull CPDListener listener = new CPDNullListener();
1✔
83

84

85
    private CpdAnalysis(CPDConfiguration config) {
1✔
86
        this.configuration = config;
1✔
87
        this.reporter = config.getReporter();
1✔
88
        this.files = InternalApiBridge.newCollector(
1✔
89
            config.getLanguageVersionDiscoverer(),
1✔
90
            reporter
91
        );
92

93
        this.renderer = config.getCPDReportRenderer();
1✔
94

95
        FileCollectionUtil.collectFiles(config, files());
1✔
96

97
        for (Language language : config.getLanguageRegistry()) {
1✔
98
            setLanguageProperties(language, config);
1✔
99
        }
1✔
100
    }
1✔
101

102
    /**
103
     * Create a new instance from the given configuration. The configuration
104
     * should not be modified after this.
105
     *
106
     * @param config Configuration
107
     *
108
     * @return A new analysis instance
109
     */
110
    public static CpdAnalysis create(CPDConfiguration config) {
111
        return new CpdAnalysis(config);
1✔
112
    }
113

114
    private static <T> void setPropertyIfMissing(PropertyDescriptor<T> prop, LanguagePropertyBundle sink, T value) {
115
        if (sink.hasDescriptor(prop) && !sink.isPropertyOverridden(prop)) {
1✔
116
            sink.setProperty(prop, value);
1✔
117
        }
118
    }
1✔
119

120
    private void setLanguageProperties(Language language, CPDConfiguration configuration) {
121
        LanguagePropertyBundle props = configuration.getLanguageProperties(language);
1✔
122

123
        setPropertyIfMissing(CpdLanguageProperties.CPD_ANONYMIZE_LITERALS, props, configuration.isIgnoreLiterals());
1✔
124
        setPropertyIfMissing(CpdLanguageProperties.CPD_ANONYMIZE_IDENTIFIERS, props, configuration.isIgnoreIdentifiers());
1✔
125
        setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_METADATA, props, configuration.isIgnoreAnnotations());
1✔
126
        setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_IMPORTS, props, configuration.isIgnoreUsings());
1✔
127
        setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_LITERAL_SEQUENCES, props, configuration.isIgnoreLiteralSequences());
1✔
128
        setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_LITERAL_AND_IDENTIFIER_SEQUENCES, props, configuration.isIgnoreIdentifierAndLiteralSequences());
1✔
129
        if (!configuration.isNoSkipBlocks()) {
1!
130
            // see net.sourceforge.pmd.lang.cpp.CppLanguageModule.CPD_SKIP_BLOCKS
131
            PropertyDescriptor<String> skipBlocks = (PropertyDescriptor) props.getPropertyDescriptor("cpdSkipBlocksPattern");
1✔
132
            setPropertyIfMissing(skipBlocks, props, configuration.getSkipBlocksPattern());
1✔
133
        }
134
    }
1✔
135

136
    public FileCollector files() {
137
        return files;
1✔
138
    }
139

140
    public void setCpdListener(@Nullable CPDListener cpdListener) {
141
        if (cpdListener == null) {
1!
UNCOV
142
            cpdListener = new CPDNullListener();
×
143
        }
144
        this.listener = cpdListener;
1✔
145
    }
1✔
146

147
    private int doTokenize(TextDocument document, CpdLexer cpdLexer, Tokens tokens) throws IOException, LexException {
148
        LOGGER.trace("Tokenizing {}", document.getFileId().getAbsolutePath());
1✔
149
        int lastTokenSize = tokens.size();
1✔
150
        CpdLexer.tokenize(cpdLexer, document, tokens);
1✔
151
        return tokens.size() - lastTokenSize - 1; /* EOF */
1✔
152
    }
153

154
    public void performAnalysis() {
155
        performAnalysis(r -> { });
1✔
156
    }
1✔
157

158
    public void performAnalysis(Consumer<CPDReport> consumer) {
159
        try (SourceManager sourceManager = new SourceManager(files.getCollectedFiles())) {
1✔
160
            if (sourceManager.isEmpty()) {
1✔
161
                reporter.warn("No files to analyze. Check input paths and exclude parameters, use --debug to see file collection traces.");
1✔
162
            }
163

164
            List<Match> matches = findMatches(sourceManager);
1✔
165
            if (shouldAbortEarlyBecauseOfProcessingErrors()) {
1✔
166
                reporter.error("Errors were detected while lexing source, exiting because --skip-lexical-errors is unset.");
1✔
167
                return;
1✔
168
            }
169

170
            CPDReport cpdReport = new CPDReport(sourceManager, matches, numberOfTokensPerFile, processingErrors);
1✔
171

172
            if (renderer != null) {
1✔
173
                Path reportFilePath = configuration.getReportFilePath();
1✔
174
                String reportFileAsString = reportFilePath != null ? reportFilePath.toAbsolutePath().toString() : null;
1!
175
                try (Writer writer = IOUtil.createWriter(Charset.defaultCharset(), reportFileAsString)) {
1✔
176
                    renderer.render(cpdReport, writer);
1✔
177
                }
178
            }
179

180
            consumer.accept(cpdReport);
1✔
181
        } catch (IOException e) {
1✔
UNCOV
182
            reporter.errorEx("Exception while running CPD", e);
×
183
        }
1✔
184
        // source manager is closed and closes all text files now.
185
    }
1✔
186

187
    private boolean shouldAbortEarlyBecauseOfProcessingErrors() {
188
        return !processingErrors.isEmpty() && !configuration.isSkipLexicalErrors();
1✔
189
    }
190

191
    private List<Match> findMatches(SourceManager sourceManager) {
192
        // Note: tokens contains all tokens of all analyzed files which is a huge data structure.
193
        // The tokens are only needed for finding the matches and can be garbage collected afterwards.
194
        // The report only needs the matches. Especially, the tokens are only referenced here and in
195
        // matchAlgorithm. When this method finishes, tokens should be eligible for garbage collection
196
        // making it possible to free up memory for render the report if needed.
197

198
        Tokens tokens = tokenizeFiles(sourceManager);
1✔
199
        if (shouldAbortEarlyBecauseOfProcessingErrors()) {
1✔
200
            return Collections.emptyList();
1✔
201
        }
202

203
        LOGGER.debug("Running match algorithm on {} files...", sourceManager.size());
1✔
204
        MatchAlgorithm matchAlgorithm = new MatchAlgorithm(tokens, configuration.getMinimumTileSize());
1✔
205
        List<Match> matches = matchAlgorithm.findMatches(listener, sourceManager);
1✔
206
        LOGGER.debug("Finished: {} duplicates found", matches.size());
1✔
207
        return matches;
1✔
208
    }
209

210
    @SuppressWarnings("PMD.CloseResource")
211
    // TextFiles and TextDocuments are managed by sourceManager, which closes all text files in the end.
212
    private Tokens tokenizeFiles(SourceManager sourceManager) {
213
        Map<Language, CpdLexer> tokenizers =
1✔
214
                sourceManager.getTextFiles().stream()
1✔
215
                        .map(it -> it.getLanguageVersion().getLanguage())
1✔
216
                        .distinct()
1✔
217
                        .filter(it -> it instanceof CpdCapableLanguage)
1✔
218
                        .collect(Collectors.toMap(lang -> lang, lang -> ((CpdCapableLanguage) lang).createCpdLexer(configuration.getLanguageProperties(lang))));
1✔
219

220
        Tokens tokens = new Tokens();
1✔
221
        for (TextFile textFile : sourceManager.getTextFiles()) {
1✔
222
            TextDocument textDocument = sourceManager.get(textFile);
1✔
223
            Tokens.State savedState = tokens.savePoint();
1✔
224
            try {
225
                int newTokens = doTokenize(textDocument, tokenizers.get(textFile.getLanguageVersion().getLanguage()), tokens);
1✔
226
                numberOfTokensPerFile.put(textDocument.getFileId(), newTokens);
1✔
227
                listener.addedFile(1);
1✔
228
            } catch (IOException | FileAnalysisException e) {
1✔
229
                if (e instanceof FileAnalysisException) { // NOPMD
1!
230
                    ((FileAnalysisException) e).setFileId(textFile.getFileId());
1✔
231
                }
232
                String message = configuration.isSkipLexicalErrors() ? "Skipping file" : "Error while tokenizing";
1✔
233
                reporter.errorEx(message, e);
1✔
234
                processingErrors.add(new Report.ProcessingError(e, textFile.getFileId()));
1✔
235
                savedState.restore(tokens);
1✔
236
            }
1✔
237
        }
1✔
238
        return tokens;
1✔
239
    }
240

241
    @Override
242
    public void close() throws IOException {
243
        // nothing for now
244
    }
1✔
245

246
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc