pmd / pmd / 43

Committed 20 Jun 2025 06:39PM UTC coverage: 78.375% (-0.002%) from 78.377%

Build # 43

Build Type

push

github

Committed by

adangel

Commit Message

Fix #1639 #5832: Use filtered comment text for UnnecessaryImport (#5833)

Merged pull request #5833 from adangel:java/issue-5832-unnecessaryimport

Run Details

17714 of 23438 branches covered (75.58%)

Branch coverage included in aggregate %.

3 of 3 new or added lines in 1 file covered. (100.0%)

109 existing lines in 17 files now uncovered.

38908 of 48807 relevant lines covered (79.72%)

0.81 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

95.08

/pmd-core/src/main/java/net/sourceforge/pmd/cpd/CpdAnalysis.java

/**
 * BSD-style license; for more info see http://pmd.sourceforge.net/license.html
 */

package net.sourceforge.pmd.cpd;

import java.io.IOException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import java.util.stream.Collectors;

import org.checkerframework.checker.nullness.qual.NonNull;
import org.checkerframework.checker.nullness.qual.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import net.sourceforge.pmd.internal.util.FileCollectionUtil;
import net.sourceforge.pmd.internal.util.IOUtil;
import net.sourceforge.pmd.lang.Language;
import net.sourceforge.pmd.lang.LanguagePropertyBundle;
import net.sourceforge.pmd.lang.ast.FileAnalysisException;
import net.sourceforge.pmd.lang.ast.LexException;
import net.sourceforge.pmd.lang.document.FileCollector;
import net.sourceforge.pmd.lang.document.FileId;
import net.sourceforge.pmd.lang.document.InternalApiBridge;
import net.sourceforge.pmd.lang.document.TextDocument;
import net.sourceforge.pmd.lang.document.TextFile;
import net.sourceforge.pmd.properties.PropertyDescriptor;
import net.sourceforge.pmd.reporting.Report;
import net.sourceforge.pmd.util.log.PmdReporter;

/**
 * Main programmatic API of CPD. This is not a CLI entry point, see module
 * {@code pmd-cli} for that.
 *
 * <h2>Usage overview</h2>
 *
 * <p>Create and configure a {@link CPDConfiguration}, then use {@link #create(CPDConfiguration)} to
 * obtain an instance. You can perform additional configuration on the instance, e.g. adding
 * files to process or add a listener. Then call {@link #performAnalysis()} or {@link #performAnalysis(Consumer)}
 * in order to get the report directly.
 *
 * <h2>Simple example</h2>
 *
 * <pre>{@code
 *   CPDConfiguration config = new CPDConfiguration();
 *   config.setMinimumTileSize(100);
 *   config.setOnlyRecognizeLanguage(config.getLanguageRegistry().getLanguageById("java"));
 *   config.setSourceEncoding(StandardCharsets.UTF_8);
 *   config.addInputPath(Path.of("src/main/java")
 *
 *   config.setIgnoreAnnotations(true);
 *   config.setIgnoreLiterals(false);
 *
 *   config.setRendererName("text");
 *
 *   try (CpdAnalysis cpd = CpdAnalysis.create(config)) {
 *      // note: don't use `config` once a CpdAnalysis has been created.
 *      // optional: add more files
 *      cpd.files().addFile(Paths.get("src", "main", "more-java", "ExtraSource.java"));
 *
 *      cpd.performAnalysis();
 *   }
 * }</pre>
 */
public final class CpdAnalysis implements AutoCloseable {

    private static final Logger LOGGER = LoggerFactory.getLogger(CpdAnalysis.class);
    private final CPDConfiguration configuration;
    private final FileCollector files;
    private final PmdReporter reporter;
    private final Map<FileId, Integer> numberOfTokensPerFile = new HashMap<>();
    private final List<Report.ProcessingError> processingErrors = new ArrayList<>();
    private final @Nullable CPDReportRenderer renderer;
    private @NonNull CPDListener listener = new CPDNullListener();


    private CpdAnalysis(CPDConfiguration config) {
        this.configuration = config;
        this.reporter = config.getReporter();
        this.files = InternalApiBridge.newCollector(
            config.getLanguageVersionDiscoverer(),
            reporter
        );

        this.renderer = config.getCPDReportRenderer();

        FileCollectionUtil.collectFiles(config, files());

        for (Language language : config.getLanguageRegistry()) {
            setLanguageProperties(language, config);
        }
    }

    /**
     * Create a new instance from the given configuration. The configuration
     * should not be modified after this.
     *
     * @param config Configuration
     *
     * @return A new analysis instance
     */
    public static CpdAnalysis create(CPDConfiguration config) {
        return new CpdAnalysis(config);
    }

    private static <T> void setPropertyIfMissing(PropertyDescriptor<T> prop, LanguagePropertyBundle sink, T value) {
        if (sink.hasDescriptor(prop) && !sink.isPropertyOverridden(prop)) {
            sink.setProperty(prop, value);
        }
    }

    private void setLanguageProperties(Language language, CPDConfiguration configuration) {
        LanguagePropertyBundle props = configuration.getLanguageProperties(language);

        setPropertyIfMissing(CpdLanguageProperties.CPD_ANONYMIZE_LITERALS, props, configuration.isIgnoreLiterals());
        setPropertyIfMissing(CpdLanguageProperties.CPD_ANONYMIZE_IDENTIFIERS, props, configuration.isIgnoreIdentifiers());
        setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_METADATA, props, configuration.isIgnoreAnnotations());
        setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_IMPORTS, props, configuration.isIgnoreUsings());
        setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_LITERAL_SEQUENCES, props, configuration.isIgnoreLiteralSequences());
        setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_LITERAL_AND_IDENTIFIER_SEQUENCES, props, configuration.isIgnoreIdentifierAndLiteralSequences());
        if (!configuration.isNoSkipBlocks()) {
            // see net.sourceforge.pmd.lang.cpp.CppLanguageModule.CPD_SKIP_BLOCKS
            PropertyDescriptor<String> skipBlocks = (PropertyDescriptor) props.getPropertyDescriptor("cpdSkipBlocksPattern");
            setPropertyIfMissing(skipBlocks, props, configuration.getSkipBlocksPattern());
        }
    }

    public FileCollector files() {
        return files;
    }

    public void setCpdListener(@Nullable CPDListener cpdListener) {
        if (cpdListener == null) {
            cpdListener = new CPDNullListener();
        }
        this.listener = cpdListener;
    }

    private int doTokenize(TextDocument document, CpdLexer cpdLexer, Tokens tokens) throws IOException, LexException {
        LOGGER.trace("Tokenizing {}", document.getFileId().getAbsolutePath());
        int lastTokenSize = tokens.size();
        CpdLexer.tokenize(cpdLexer, document, tokens);
        return tokens.size() - lastTokenSize - 1; /* EOF */
    }

    public void performAnalysis() {
        performAnalysis(r -> { });
    }

    public void performAnalysis(Consumer<CPDReport> consumer) {
        try (SourceManager sourceManager = new SourceManager(files.getCollectedFiles())) {
            if (sourceManager.isEmpty()) {
                reporter.warn("No files to analyze. Check input paths and exclude parameters, use --debug to see file collection traces.");
            }

            List<Match> matches = findMatches(sourceManager);
            if (shouldAbortEarlyBecauseOfProcessingErrors()) {
                reporter.error("Errors were detected while lexing source, exiting because --skip-lexical-errors is unset.");
                return;
            }

            CPDReport cpdReport = new CPDReport(sourceManager, matches, numberOfTokensPerFile, processingErrors);

            if (renderer != null) {
                Path reportFilePath = configuration.getReportFilePath();
                String reportFileAsString = reportFilePath != null ? reportFilePath.toAbsolutePath().toString() : null;
                try (Writer writer = IOUtil.createWriter(Charset.defaultCharset(), reportFileAsString)) {
                    renderer.render(cpdReport, writer);
                }
            }

            consumer.accept(cpdReport);
        } catch (IOException e) {
            reporter.errorEx("Exception while running CPD", e);
        }
        // source manager is closed and closes all text files now.
    }

    private boolean shouldAbortEarlyBecauseOfProcessingErrors() {
        return !processingErrors.isEmpty() && !configuration.isSkipLexicalErrors();
    }

    private List<Match> findMatches(SourceManager sourceManager) {
        // Note: tokens contains all tokens of all analyzed files which is a huge data structure.
        // The tokens are only needed for finding the matches and can be garbage collected afterwards.
        // The report only needs the matches. Especially, the tokens are only referenced here and in
        // matchAlgorithm. When this method finishes, tokens should be eligible for garbage collection
        // making it possible to free up memory for render the report if needed.

        Tokens tokens = tokenizeFiles(sourceManager);
        if (shouldAbortEarlyBecauseOfProcessingErrors()) {
            return Collections.emptyList();
        }

        LOGGER.debug("Running match algorithm on {} files...", sourceManager.size());
        MatchAlgorithm matchAlgorithm = new MatchAlgorithm(tokens, configuration.getMinimumTileSize());
        List<Match> matches = matchAlgorithm.findMatches(listener, sourceManager);
        LOGGER.debug("Finished: {} duplicates found", matches.size());
        return matches;
    }

    @SuppressWarnings("PMD.CloseResource")
    // TextFiles and TextDocuments are managed by sourceManager, which closes all text files in the end.
    private Tokens tokenizeFiles(SourceManager sourceManager) {
        Map<Language, CpdLexer> tokenizers =
                sourceManager.getTextFiles().stream()
                        .map(it -> it.getLanguageVersion().getLanguage())
                        .distinct()
                        .filter(it -> it instanceof CpdCapableLanguage)
                        .collect(Collectors.toMap(lang -> lang, lang -> ((CpdCapableLanguage) lang).createCpdLexer(configuration.getLanguageProperties(lang))));

        Tokens tokens = new Tokens();
        for (TextFile textFile : sourceManager.getTextFiles()) {
            TextDocument textDocument = sourceManager.get(textFile);
            Tokens.State savedState = tokens.savePoint();
            try {
                int newTokens = doTokenize(textDocument, tokenizers.get(textFile.getLanguageVersion().getLanguage()), tokens);
                numberOfTokensPerFile.put(textDocument.getFileId(), newTokens);
                listener.addedFile(1);
            } catch (IOException | FileAnalysisException e) {
                if (e instanceof FileAnalysisException) { // NOPMD
                    ((FileAnalysisException) e).setFileId(textFile.getFileId());
                }
                String message = configuration.isSkipLexicalErrors() ? "Skipping file" : "Error while tokenizing";
                reporter.errorEx(message, e);
                processingErrors.add(new Report.ProcessingError(e, textFile.getFileId()));
                savedState.restore(tokens);
            }
        }
        return tokens;
    }

    @Override
    public void close() throws IOException {
        // nothing for now
    }

}

1	/**
2	* BSD-style license; for more info see http://pmd.sourceforge.net/license.html
3	*/
4
5	package net.sourceforge.pmd.cpd;
6
7	import java.io.IOException;
8	import java.io.Writer;
9	import java.nio.charset.Charset;
10	import java.nio.file.Path;
11	import java.util.ArrayList;
12	import java.util.Collections;
13	import java.util.HashMap;
14	import java.util.List;
15	import java.util.Map;
16	import java.util.function.Consumer;
17	import java.util.stream.Collectors;
18
19	import org.checkerframework.checker.nullness.qual.NonNull;
20	import org.checkerframework.checker.nullness.qual.Nullable;
21	import org.slf4j.Logger;
22	import org.slf4j.LoggerFactory;
23
24	import net.sourceforge.pmd.internal.util.FileCollectionUtil;
25	import net.sourceforge.pmd.internal.util.IOUtil;
26	import net.sourceforge.pmd.lang.Language;
27	import net.sourceforge.pmd.lang.LanguagePropertyBundle;
28	import net.sourceforge.pmd.lang.ast.FileAnalysisException;
29	import net.sourceforge.pmd.lang.ast.LexException;
30	import net.sourceforge.pmd.lang.document.FileCollector;
31	import net.sourceforge.pmd.lang.document.FileId;
32	import net.sourceforge.pmd.lang.document.InternalApiBridge;
33	import net.sourceforge.pmd.lang.document.TextDocument;
34	import net.sourceforge.pmd.lang.document.TextFile;
35	import net.sourceforge.pmd.properties.PropertyDescriptor;
36	import net.sourceforge.pmd.reporting.Report;
37	import net.sourceforge.pmd.util.log.PmdReporter;
38
39	/**
40	* Main programmatic API of CPD. This is not a CLI entry point, see module
41	* {@code pmd-cli} for that.
42	*
43	* <h2>Usage overview</h2>
44	*
45	* <p>Create and configure a {@link CPDConfiguration}, then use {@link #create(CPDConfiguration)} to
46	* obtain an instance. You can perform additional configuration on the instance, e.g. adding
47	* files to process or add a listener. Then call {@link #performAnalysis()} or {@link #performAnalysis(Consumer)}
48	* in order to get the report directly.
49	*
50	* <h2>Simple example</h2>
51	*
52	* <pre>{@code
53	* CPDConfiguration config = new CPDConfiguration();
54	* config.setMinimumTileSize(100);
55	* config.setOnlyRecognizeLanguage(config.getLanguageRegistry().getLanguageById("java"));
56	* config.setSourceEncoding(StandardCharsets.UTF_8);
57	* config.addInputPath(Path.of("src/main/java")
58	*
59	* config.setIgnoreAnnotations(true);
60	* config.setIgnoreLiterals(false);
61	*
62	* config.setRendererName("text");
63	*
64	* try (CpdAnalysis cpd = CpdAnalysis.create(config)) {
65	* // note: don't use `config` once a CpdAnalysis has been created.
66	* // optional: add more files
67	* cpd.files().addFile(Paths.get("src", "main", "more-java", "ExtraSource.java"));
68	*
69	* cpd.performAnalysis();
70	* }
71	* }</pre>
72	*/
73	public final class CpdAnalysis implements AutoCloseable {
74
75	private static final Logger LOGGER = LoggerFactory.getLogger(CpdAnalysis.class);	1✔
76	private final CPDConfiguration configuration;
77	private final FileCollector files;
78	private final PmdReporter reporter;
79	private final Map<FileId, Integer> numberOfTokensPerFile = new HashMap<>();	1✔
80	private final List<Report.ProcessingError> processingErrors = new ArrayList<>();	1✔
81	private final @Nullable CPDReportRenderer renderer;
82	private @NonNull CPDListener listener = new CPDNullListener();	1✔
83
84
85	private CpdAnalysis(CPDConfiguration config) {	1✔
86	this.configuration = config;	1✔
87	this.reporter = config.getReporter();	1✔
88	this.files = InternalApiBridge.newCollector(	1✔
89	config.getLanguageVersionDiscoverer(),	1✔
90	reporter
91	);
92
93	this.renderer = config.getCPDReportRenderer();	1✔
94
95	FileCollectionUtil.collectFiles(config, files());	1✔
96
97	for (Language language : config.getLanguageRegistry()) {	1✔
98	setLanguageProperties(language, config);	1✔
99	}	1✔
100	}	1✔
101
102	/**
103	* Create a new instance from the given configuration. The configuration
104	* should not be modified after this.
105	*
106	* @param config Configuration
107	*
108	* @return A new analysis instance
109	*/
110	public static CpdAnalysis create(CPDConfiguration config) {
111	return new CpdAnalysis(config);	1✔
112	}
113
114	private static <T> void setPropertyIfMissing(PropertyDescriptor<T> prop, LanguagePropertyBundle sink, T value) {
115	if (sink.hasDescriptor(prop) && !sink.isPropertyOverridden(prop)) {	1✔
116	sink.setProperty(prop, value);	1✔
117	}
118	}	1✔
119
120	private void setLanguageProperties(Language language, CPDConfiguration configuration) {
121	LanguagePropertyBundle props = configuration.getLanguageProperties(language);	1✔
122
123	setPropertyIfMissing(CpdLanguageProperties.CPD_ANONYMIZE_LITERALS, props, configuration.isIgnoreLiterals());	1✔
124	setPropertyIfMissing(CpdLanguageProperties.CPD_ANONYMIZE_IDENTIFIERS, props, configuration.isIgnoreIdentifiers());	1✔
125	setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_METADATA, props, configuration.isIgnoreAnnotations());	1✔
126	setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_IMPORTS, props, configuration.isIgnoreUsings());	1✔
127	setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_LITERAL_SEQUENCES, props, configuration.isIgnoreLiteralSequences());	1✔
128	setPropertyIfMissing(CpdLanguageProperties.CPD_IGNORE_LITERAL_AND_IDENTIFIER_SEQUENCES, props, configuration.isIgnoreIdentifierAndLiteralSequences());	1✔
129	if (!configuration.isNoSkipBlocks()) {	1!
130	// see net.sourceforge.pmd.lang.cpp.CppLanguageModule.CPD_SKIP_BLOCKS
131	PropertyDescriptor<String> skipBlocks = (PropertyDescriptor) props.getPropertyDescriptor("cpdSkipBlocksPattern");	1✔
132	setPropertyIfMissing(skipBlocks, props, configuration.getSkipBlocksPattern());	1✔
133	}
134	}	1✔
135
136	public FileCollector files() {
137	return files;	1✔
138	}
139
140	public void setCpdListener(@Nullable CPDListener cpdListener) {
141	if (cpdListener == null) {	1!
UNCOV 142	cpdListener = new CPDNullListener();	×
143	}
144	this.listener = cpdListener;	1✔
145	}	1✔
146
147	private int doTokenize(TextDocument document, CpdLexer cpdLexer, Tokens tokens) throws IOException, LexException {
148	LOGGER.trace("Tokenizing {}", document.getFileId().getAbsolutePath());	1✔
149	int lastTokenSize = tokens.size();	1✔
150	CpdLexer.tokenize(cpdLexer, document, tokens);	1✔
151	return tokens.size() - lastTokenSize - 1; /* EOF */	1✔
152	}
153
154	public void performAnalysis() {
155	performAnalysis(r -> { });	1✔
156	}	1✔
157
158	public void performAnalysis(Consumer<CPDReport> consumer) {
159	try (SourceManager sourceManager = new SourceManager(files.getCollectedFiles())) {	1✔
160	if (sourceManager.isEmpty()) {	1✔
161	reporter.warn("No files to analyze. Check input paths and exclude parameters, use --debug to see file collection traces.");	1✔
162	}
163
164	List<Match> matches = findMatches(sourceManager);	1✔
165	if (shouldAbortEarlyBecauseOfProcessingErrors()) {	1✔
166	reporter.error("Errors were detected while lexing source, exiting because --skip-lexical-errors is unset.");	1✔
167	return;	1✔
168	}
169
170	CPDReport cpdReport = new CPDReport(sourceManager, matches, numberOfTokensPerFile, processingErrors);	1✔
171
172	if (renderer != null) {	1✔
173	Path reportFilePath = configuration.getReportFilePath();	1✔
174	String reportFileAsString = reportFilePath != null ? reportFilePath.toAbsolutePath().toString() : null;	1!
175	try (Writer writer = IOUtil.createWriter(Charset.defaultCharset(), reportFileAsString)) {	1✔
176	renderer.render(cpdReport, writer);	1✔
177	}
178	}
179
180	consumer.accept(cpdReport);	1✔
181	} catch (IOException e) {	1✔
UNCOV 182	reporter.errorEx("Exception while running CPD", e);	×
183	}	1✔
184	// source manager is closed and closes all text files now.
185	}	1✔
186
187	private boolean shouldAbortEarlyBecauseOfProcessingErrors() {
188	return !processingErrors.isEmpty() && !configuration.isSkipLexicalErrors();	1✔
189	}
190
191	private List<Match> findMatches(SourceManager sourceManager) {
192	// Note: tokens contains all tokens of all analyzed files which is a huge data structure.
193	// The tokens are only needed for finding the matches and can be garbage collected afterwards.
194	// The report only needs the matches. Especially, the tokens are only referenced here and in
195	// matchAlgorithm. When this method finishes, tokens should be eligible for garbage collection
196	// making it possible to free up memory for render the report if needed.
197
198	Tokens tokens = tokenizeFiles(sourceManager);	1✔
199	if (shouldAbortEarlyBecauseOfProcessingErrors()) {	1✔
200	return Collections.emptyList();	1✔
201	}
202
203	LOGGER.debug("Running match algorithm on {} files...", sourceManager.size());	1✔
204	MatchAlgorithm matchAlgorithm = new MatchAlgorithm(tokens, configuration.getMinimumTileSize());	1✔
205	List<Match> matches = matchAlgorithm.findMatches(listener, sourceManager);	1✔
206	LOGGER.debug("Finished: {} duplicates found", matches.size());	1✔
207	return matches;	1✔
208	}
209
210	@SuppressWarnings("PMD.CloseResource")
211	// TextFiles and TextDocuments are managed by sourceManager, which closes all text files in the end.
212	private Tokens tokenizeFiles(SourceManager sourceManager) {
213	Map<Language, CpdLexer> tokenizers =	1✔
214	sourceManager.getTextFiles().stream()	1✔
215	.map(it -> it.getLanguageVersion().getLanguage())	1✔
216	.distinct()	1✔
217	.filter(it -> it instanceof CpdCapableLanguage)	1✔
218	.collect(Collectors.toMap(lang -> lang, lang -> ((CpdCapableLanguage) lang).createCpdLexer(configuration.getLanguageProperties(lang))));	1✔
219
220	Tokens tokens = new Tokens();	1✔
221	for (TextFile textFile : sourceManager.getTextFiles()) {	1✔
222	TextDocument textDocument = sourceManager.get(textFile);	1✔
223	Tokens.State savedState = tokens.savePoint();	1✔
224	try {
225	int newTokens = doTokenize(textDocument, tokenizers.get(textFile.getLanguageVersion().getLanguage()), tokens);	1✔
226	numberOfTokensPerFile.put(textDocument.getFileId(), newTokens);	1✔
227	listener.addedFile(1);	1✔
228	} catch (IOException \| FileAnalysisException e) {	1✔
229	if (e instanceof FileAnalysisException) { // NOPMD	1!
230	((FileAnalysisException) e).setFileId(textFile.getFileId());	1✔
231	}
232	String message = configuration.isSkipLexicalErrors() ? "Skipping file" : "Error while tokenizing";	1✔
233	reporter.errorEx(message, e);	1✔
234	processingErrors.add(new Report.ProcessingError(e, textFile.getFileId()));	1✔
235	savedState.restore(tokens);	1✔
236	}	1✔
237	}	1✔
238	return tokens;	1✔
239	}
240
241	@Override
242	public void close() throws IOException {
243	// nothing for now
244	}	1✔
245
246	}

pmd / pmd / 43

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous