• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

oracle / opengrok / #3671

01 Nov 2023 10:10AM UTC coverage: 66.019% (-9.1%) from 75.16%
#3671

push

web-flow
Fix Sonar codesmell issues (#4460)

Signed-off-by: Gino Augustine <ginoaugustine@gmail.com>

308 of 308 new or added lines in 27 files covered. (100.0%)

38690 of 58604 relevant lines covered (66.02%)

0.66 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

83.75
/opengrok-indexer/src/main/java/org/opengrok/indexer/analysis/document/DocumentMatcher.java
1
/*
2
 * CDDL HEADER START
3
 *
4
 * The contents of this file are subject to the terms of the
5
 * Common Development and Distribution License (the "License").
6
 * You may not use this file except in compliance with the License.
7
 *
8
 * See LICENSE.txt included in this distribution for the specific
9
 * language governing permissions and limitations under the License.
10
 *
11
 * When distributing Covered Code, include this CDDL HEADER in each
12
 * file and include the License file at LICENSE.txt.
13
 * If applicable, add the following below this CDDL HEADER, with the
14
 * fields enclosed by brackets "[]" replaced with your own identifying
15
 * information: Portions Copyright [yyyy] [name of copyright owner]
16
 *
17
 * CDDL HEADER END
18
 */
19

20
 /*
21
 * Copyright (c) 2017, 2021, Chris Fraire <cfraire@me.com>.
22
 */
23
package org.opengrok.indexer.analysis.document;
24

25
import java.io.BufferedReader;
26
import java.io.IOException;
27
import java.io.InputStream;
28
import java.io.InputStreamReader;
29
import java.nio.charset.StandardCharsets;
30
import java.util.Arrays;
31
import org.opengrok.indexer.analysis.AnalyzerFactory;
32
import org.opengrok.indexer.analysis.FileAnalyzerFactory.Matcher;
33
import org.opengrok.indexer.util.IOUtils;
34

35
/**
36
 * Represents an implementation of {@link Matcher} that detects a troff- or mandoc-like document.
37
 */
38
public class DocumentMatcher implements Matcher {
39

40
    /**
41
     * Set to 512K {@code int}, but {@code NUMCHARS_FIRST_LOOK} and
42
     * {@code LINE_LIMIT} should apply beforehand. This value is "effectively
43
     * unbounded" without being literally 2_147_483_647 -- as the other limits
44
     * will apply first, and the {@link java.io.BufferedInputStream} will
45
     * manage a reasonably-sized buffer.
46
     */
47
    private static final int MARK_READ_LIMIT = 1024 * 512;
48

49
    private static final int LINE_LIMIT = 100;
50

51
    private static final int FIRST_LOOK_WIDTH = 300;
52

53
    private final AnalyzerFactory factory;
54

55
    private final String[] lineStarters;
56

57
    /**
58
     * Initializes an instance for the required parameters.
59
     * @param factory required factory to return when matched
60
     * @param lineStarters required list of line starters that indicate a match
61
     * @throws IllegalArgumentException thrown if any parameter is null
62
     */
63
    public DocumentMatcher(AnalyzerFactory factory, String[] lineStarters) {
1✔
64
        if (factory == null) {
1✔
65
            throw  new IllegalArgumentException("`factory' is null");
×
66
        }
67
        if (lineStarters == null) {
1✔
68
            throw  new IllegalArgumentException("`lineStarters' is null");
×
69
        }
70
        if (lineStarters.length < 1) {
1✔
71
            throw  new IllegalArgumentException("`lineStarters' is empty");
×
72
        }
73

74
        String[] copyOf = Arrays.copyOf(lineStarters, lineStarters.length);
1✔
75
        for (String elem : copyOf) {
1✔
76
            if (elem == null) {
1✔
77
                throw  new IllegalArgumentException(
×
78
                    "`lineStarters' has null element");
79
            }
80
        }
81

82
        this.factory = factory;
1✔
83
        this.lineStarters = copyOf;
1✔
84
    }
1✔
85

86
    /**
87
     * Try to match the file contents by looking for {@code lineStarters} in
88
     * the first 100 lines while also affirming that the document starts
89
     * with "." or "'" after a limited amount of whitespace.
90
     * <p>
91
     * The stream is reset before returning.
92
     *
93
     * @param contents the first few bytes of a file
94
     * @param in the input stream from which the full file can be read
95
     * @return an analyzer factory if the contents match, or {@code null}
96
     * otherwise
97
     * @throws IOException in case of any read error
98
     */
99
    @Override
100
    public AnalyzerFactory isMagic(byte[] contents, InputStream in)
101
        throws IOException {
102

103
        if (!in.markSupported()) {
1✔
104
            return null;
×
105
        }
106
        in.mark(MARK_READ_LIMIT);
1✔
107

108
        // read encoding, and skip past any BOM
109
        int bomLength = 0;
1✔
110
        String encoding = IOUtils.findBOMEncoding(contents);
1✔
111
        if (encoding == null) {
1✔
112
            encoding = StandardCharsets.UTF_8.name();
1✔
113
        } else {
114
            bomLength = IOUtils.skipForBOM(contents);
1✔
115
            if (in.skip(bomLength) != bomLength) {
1✔
116
                in.reset();
×
117
                return null;
×
118
            }
119
        }
120

121
        // affirm that a LF exists in a first block
122
        boolean foundLF = hasLineFeed(in, encoding);
1✔
123
        in.reset();
1✔
124
        if (!foundLF) {
1✔
125
            return null;
1✔
126
        }
127
        if (bomLength > 0 && in.skip(bomLength) != bomLength) {
1✔
128
            in.reset();
×
129
            return null;
×
130
        }
131

132
        // read line-by-line for a first few lines
133
        BufferedReader rdr = new BufferedReader(new InputStreamReader(
1✔
134
            in, encoding));
135
        boolean foundContent = false;
1✔
136
        int numFirstChars = 0;
1✔
137
        int numLines = 0;
1✔
138
        String line;
139
        while ((line = rdr.readLine()) != null) {
1✔
140
            for (String lineStarter : lineStarters) {
1✔
141
                if (line.startsWith(lineStarter)) {
1✔
142
                    in.reset();
1✔
143
                    return factory;
1✔
144
                }
145
            }
146
            if (++numLines >= LINE_LIMIT) {
1✔
147
                in.reset();
×
148
                return null;
×
149
            }
150

151
            // If not yet `foundContent', then only a limited allowance is
152
            // given until a sentinel '.' or '\'' must be seen after nothing
153
            // else but whitespace.
154
            if (!foundContent) {
1✔
155
                for (int i = 0; i < line.length() && numFirstChars <
1✔
156
                    FIRST_LOOK_WIDTH; ++i, ++numFirstChars) {
1✔
157
                    char c = line.charAt(i);
1✔
158
                    if (c == '.' || c == '\'') {
1✔
159
                        foundContent = true;
1✔
160
                        break;
1✔
161
                    } else if (!Character.isWhitespace(c)) {
1✔
162
                        in.reset();
1✔
163
                        return null;
1✔
164
                    }
165
                }
166
                if (!foundContent && numFirstChars >= FIRST_LOOK_WIDTH) {
1✔
167
                    in.reset();
×
168
                    return null;
×
169
                }
170
            }
171
        }
172

173
        in.reset();
1✔
174
        return null;
1✔
175
    }
176

177
    @Override
178
    public AnalyzerFactory forFactory() {
179
        return factory;
1✔
180
    }
181

182
    /**
183
     * Determines if the {@code in} stream has a line feed character within the
184
     * first {@code FIRST_LOOK_WIDTH} characters.
185
     * @param in the input stream has any BOM (not {@code reset} after use)
186
     * @param encoding the input stream charset
187
     * @return true if a line feed '\n' was found
188
     * @throws IOException thrown on any error in reading
189
     */
190
    private boolean hasLineFeed(InputStream in, String encoding)
191
            throws IOException {
192
        byte[] buf;
193
        int nextra;
194
        int noff;
195
        switch (encoding) {
1✔
196
            case "UTF-16LE":
197
                buf = new byte[FIRST_LOOK_WIDTH * 2];
1✔
198
                nextra = 1;
1✔
199
                noff = 0;
1✔
200
                break;
1✔
201
            case "UTF-16BE":
202
                buf = new byte[FIRST_LOOK_WIDTH * 2];
1✔
203
                nextra = 1;
1✔
204
                noff = 1;
1✔
205
                break;
1✔
206
            default:
207
                buf = new byte[FIRST_LOOK_WIDTH];
1✔
208
                nextra = 0;
1✔
209
                noff = 0;
1✔
210
                break;
211
        }
212

213
        int nread = in.read(buf);
1✔
214
        for (int i = 0; i + nextra < nread; i += 1 + nextra) {
1✔
215
            if (nextra > 0) {
1✔
216
                if (buf[i + noff] == '\n' && buf[i + 1 - noff] == '\0') {
1✔
217
                    return true;
1✔
218
                }
219
            } else {
220
                if (buf[i] == '\n') {
1✔
221
                    return true;
1✔
222
                }
223
            }
224
        }
225
        return false;
1✔
226
    }
227
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc