• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

oracle / opengrok / #3716

30 Nov 2023 08:55AM UTC coverage: 66.158% (+0.05%) from 66.106%
#3716

push

web-flow
Refactoring to reduce sonar code smell fixes (#4485)

---------

Signed-off-by: Gino Augustine <ginoaugustine@gmail.com>
Co-authored-by: Vladimir Kotal <vlada@kotalovi.cz>

389 of 478 new or added lines in 51 files covered. (81.38%)

11 existing lines in 9 files now uncovered.

38764 of 58593 relevant lines covered (66.16%)

0.66 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

78.33
/suggester/src/main/java/org/opengrok/suggest/SuggesterSearcher.java
1
/*
2
 * CDDL HEADER START
3
 *
4
 * The contents of this file are subject to the terms of the
5
 * Common Development and Distribution License (the "License").
6
 * You may not use this file except in compliance with the License.
7
 *
8
 * See LICENSE.txt included in this distribution for the specific
9
 * language governing permissions and limitations under the License.
10
 *
11
 * When distributing Covered Code, include this CDDL HEADER in each
12
 * file and include the License file at LICENSE.txt.
13
 * If applicable, add the following below this CDDL HEADER, with the
14
 * fields enclosed by brackets "[]" replaced with your own identifying
15
 * information: Portions Copyright [yyyy] [name of copyright owner]
16
 *
17
 * CDDL HEADER END
18
 */
19

20
/*
21
 * Copyright (c) 2018, 2021, Oracle and/or its affiliates. All rights reserved.
22
 * Portions Copyright (c) 2020, Chris Fraire <cfraire@me.com>.
23
 */
24
package org.opengrok.suggest;
25

26
import org.apache.lucene.index.IndexReader;
27
import org.apache.lucene.index.LeafReaderContext;
28
import org.apache.lucene.index.PostingsEnum;
29
import org.apache.lucene.index.Term;
30
import org.apache.lucene.index.Terms;
31
import org.apache.lucene.index.TermsEnum;
32
import org.apache.lucene.search.BooleanClause;
33
import org.apache.lucene.search.BooleanQuery;
34
import org.apache.lucene.search.DocIdSetIterator;
35
import org.apache.lucene.search.IndexSearcher;
36
import org.apache.lucene.search.MatchAllDocsQuery;
37
import org.apache.lucene.search.Query;
38
import org.apache.lucene.util.BytesRef;
39
import org.opengrok.suggest.popular.PopularityCounter;
40
import org.opengrok.suggest.query.SuggesterRangeQuery;
41
import org.opengrok.suggest.query.data.BitIntsHolder;
42
import org.opengrok.suggest.query.data.IntsHolder;
43
import org.opengrok.suggest.query.SuggesterQuery;
44
import org.opengrok.suggest.query.customized.CustomPhraseQuery;
45

46
import java.io.IOException;
47
import java.util.ArrayList;
48
import java.util.Collections;
49
import java.util.List;
50
import java.util.Objects;
51
import java.util.Optional;
52
import java.util.Set;
53
import java.util.logging.Level;
54
import java.util.logging.Logger;
55
import java.util.stream.Collectors;
56

57
/**
58
 * Variation of {@link IndexSearcher} but instead of returning the relevant documents can return also suggestions.
59
 */
60
class SuggesterSearcher extends IndexSearcher {
61

62
    public static final int TERM_ALREADY_SEARCHED_MULTIPLIER = 100;
63

64
    private static final Logger logger = Logger.getLogger(SuggesterSearcher.class.getName());
1✔
65

66
    private final int resultSize;
67

68
    private boolean interrupted;
69

70
    private final int numDocs;
71

72
    private final IndexSearcher is;
73

74
    /**
75
     * @param reader reader of the index for which to provide suggestions
76
     * @param resultSize size of the results
77
     */
78
    SuggesterSearcher(final IndexReader reader, final int resultSize) {
79
        super(reader);
1✔
80
        is = new IndexSearcher(reader);
1✔
81
        numDocs = reader.numDocs();
1✔
82
        this.resultSize = resultSize;
1✔
83
    }
1✔
84

85
    /**
86
     * Returns the suggestions for generic {@link SuggesterQuery} (almost all except lone
87
     * {@link org.opengrok.suggest.query.SuggesterPrefixQuery} for which see {@link SuggesterProjectData}).
88
     * @param query query on which the suggestions depend
89
     * @param project name of the project
90
     * @param suggesterQuery query for the suggestions
91
     * @param popularityCounter data structure which contains the number of times the terms were searched for. It is
92
     * used to provide the most popular completion functionality.
93
     * @return suggestions
94
     */
95
    public List<LookupResultItem> suggest(
96
            final Query query,
97
            final String project,
98
            final SuggesterQuery suggesterQuery,
99
            final PopularityCounter popularityCounter
100
    ) {
101
        List<LookupResultItem> results = new ArrayList<>(resultSize * leafContexts.size());
1✔
102

103
        Query rewrittenQuery = null;
1✔
104

105
        try {
106
            if (query != null) {
1✔
107
                rewrittenQuery = query.rewrite(is);
1✔
108
            }
109
        } catch (IOException e) {
×
110
            logger.log(Level.WARNING, "Could not rewrite query", e);
×
111
            return results;
×
112
        }
1✔
113

114
        for (LeafReaderContext context : this.leafContexts) {
1✔
115
            if (interrupted) {
1✔
116
                break;
×
117
            }
118
            try {
119
                results.addAll(suggest(rewrittenQuery, context, project, suggesterQuery, popularityCounter));
1✔
120
            } catch (IOException e) {
×
121
                logger.log(Level.WARNING, "Cannot perform suggester search", e);
×
122
            }
1✔
123
        }
1✔
124

125
        if (results.size() > resultSize) {
1✔
126
            return SuggesterUtils.combineResults(results, resultSize);
×
127
        }
128

129
        return results;
1✔
130
    }
131

132
    private List<LookupResultItem> suggest(
133
            final Query query,
134
            final LeafReaderContext leafReaderContext,
135
            final String project,
136
            final SuggesterQuery suggesterQuery,
137
            final PopularityCounter searchCounts
138
    ) throws IOException {
139
        if (Thread.currentThread().isInterrupted()) {
1✔
140
            interrupted = true;
×
141
            return Collections.emptyList();
×
142
        }
143

144
        Set<BytesRef> tokensAlreadyIncluded = null;
1✔
145
        if (shouldLeaveOutSameTerms(query, suggesterQuery)) {
1✔
146
            tokensAlreadyIncluded = SuggesterUtils.intoTermsExceptPhraseQuery(query).stream()
1✔
147
                    .filter(t -> t.field().equals(suggesterQuery.getField()))
1✔
148
                    .map(Term::bytes)
1✔
149
                    .collect(Collectors.toSet());
1✔
150
        }
151

152
        boolean needsDocumentIds = Optional.ofNullable(query)
1✔
153
                                    .filter(query1 -> !(query1 instanceof MatchAllDocsQuery))
1✔
154
                                    .isPresent();
1✔
155

156
        ComplexQueryData complexQueryData = null;
1✔
157
        if (needsDocumentIds) {
1✔
158
            complexQueryData = getComplexQueryData(query, leafReaderContext);
1✔
159
        }
160
        if (interrupted) {
1✔
NEW
161
            return Collections.emptyList();
×
162
        }
163

164
        Terms terms = leafReaderContext.reader().terms(suggesterQuery.getField());
1✔
165

166
        TermsEnum termsEnum = suggesterQuery.getTermsEnumForSuggestions(terms);
1✔
167

168
        LookupPriorityQueue queue = new LookupPriorityQueue(resultSize);
1✔
169

170
        boolean needPositionsAndFrequencies = needPositionsAndFrequencies(query);
1✔
171

172
        PostingsEnum postingsEnum = null;
1✔
173

174
        BytesRef term = termsEnum.next();
1✔
175
        while (term != null) {
1✔
176
            if (Thread.currentThread().isInterrupted()) {
1✔
177
                interrupted = true;
×
178
                break;
×
179
            }
180
            postingsEnum = derivePostingsEnum(postingsEnum, termsEnum, needPositionsAndFrequencies);
1✔
181

182
            int score = 0;
1✔
183
            if (!needsDocumentIds) {
1✔
184
                score = normalizeDocumentFrequency(termsEnum.docFreq(), numDocs);
1✔
185
            } else if (needPositionsAndFrequencies) {
1✔
186
                score = getPhraseScore(complexQueryData, leafReaderContext.docBase, postingsEnum);
1✔
187
            } else if (complexQueryData != null) {
1✔
188
                score = getDocumentFrequency(complexQueryData.documentIds, leafReaderContext.docBase, postingsEnum);
1✔
189
            }
190

191
            if (shouldAddScoreForTerm(score, term, tokensAlreadyIncluded)) {
1✔
192
                score += searchCounts.get(term) * TERM_ALREADY_SEARCHED_MULTIPLIER;
1✔
193
                insertScoreToQueue(queue, score, term, project);
1✔
194
            }
195

196
            term = termsEnum.next();
1✔
197
        }
1✔
198

199
        return queue.getResult();
1✔
200
    }
201

202
    private PostingsEnum derivePostingsEnum(PostingsEnum postingsEnum,
203
                                           TermsEnum termsEnum,
204
                                           boolean needPositionsAndFrequencies) throws IOException {
205
        if (needPositionsAndFrequencies) {
1✔
206
            return termsEnum.postings(postingsEnum, PostingsEnum.POSITIONS | PostingsEnum.FREQS);
1✔
207
        } else {
208
            return termsEnum.postings(postingsEnum, PostingsEnum.NONE);
1✔
209
        }
210
    }
211

212
    private boolean shouldAddScoreForTerm(int score, BytesRef term,
213
                                          Set<BytesRef> tokensAlreadyIncluded) {
214
        return score > 0 && (Objects.isNull(tokensAlreadyIncluded) || !tokensAlreadyIncluded.contains(term));
1✔
215

216
    }
217
    private void insertScoreToQueue(LookupPriorityQueue queue, int score,
218
                                    BytesRef term, String project) {
219
        if (queue.canInsert(score)) {
1✔
220
            queue.insertWithOverflow(new LookupResultItem(term.utf8ToString(), project, score));
1✔
221
        }
222

223
    }
1✔
224

225
    private boolean shouldLeaveOutSameTerms(final Query query, final SuggesterQuery suggesterQuery) {
226
        if (query instanceof CustomPhraseQuery) {
1✔
227
            return false;
1✔
228
        }
229
        return !(suggesterQuery instanceof SuggesterRangeQuery);
1✔
230
    }
231

232
    private ComplexQueryData getComplexQueryData(final Query query, final LeafReaderContext leafReaderContext) {
233
        ComplexQueryData data = new ComplexQueryData();
1✔
234
        if (query == null || query instanceof SuggesterQuery) {
1✔
235
            data.documentIds = new BitIntsHolder(0);
×
236
            return data;
×
237
        }
238

239
        BitIntsHolder documentIds = new BitIntsHolder();
1✔
240
        try {
241
            search(query, new SuggestResultCollector(leafReaderContext, data, documentIds));
1✔
UNCOV
242
        } catch (IOException e) {
×
243
            if (Thread.currentThread().isInterrupted()) {
×
244
                interrupted = true;
×
245
                return null;
×
246
            } else {
247
                logger.log(Level.WARNING, e, () -> "Could not get document ids for " + query);
×
248
            }
249
        } catch (Exception e) {
×
250
            logger.log(Level.WARNING, e, () -> "Could not get document ids for " + query);
×
251
        }
1✔
252

253
        data.documentIds = documentIds;
1✔
254
        return data;
1✔
255
    }
256

257
    private int getPhraseScore(final ComplexQueryData data, final int docBase, final PostingsEnum postingsEnum)
258
            throws IOException {
259

260
        int weight = 0;
1✔
261
        while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
1✔
262
            int docId = postingsEnum.docID();
1✔
263
            if (data.documentIds.has(docBase + docId)) {
1✔
264
                IntsHolder positions = data.scorer.getPositions(docId);
1✔
265
                if (positions == null) {
1✔
266
                    continue;
×
267
                }
268

269
                int freq = postingsEnum.freq();
1✔
270
                for (int i = 0; i < freq; i++) {
1✔
271
                    int pos = postingsEnum.nextPosition();
1✔
272

273
                    if (positions.has(pos)) {
1✔
274
                        weight++;
1✔
275
                    }
276
                }
277
            }
278
        }
1✔
279

280
        return weight;
1✔
281
    }
282

283
    private int getDocumentFrequency(final IntsHolder documentIds, final int docBase, final PostingsEnum postingsEnum)
284
            throws IOException {
285

286
        int weight = 0;
1✔
287
        while (postingsEnum.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
1✔
288
            if (documentIds.has(docBase + postingsEnum.docID())) {
1✔
289
                weight++;
1✔
290
            }
291
        }
292
        return normalizeDocumentFrequency(weight, documentIds.numberOfElements());
1✔
293
    }
294

295
    private boolean needPositionsAndFrequencies(final Query query) {
296
        if (query instanceof CustomPhraseQuery) {
1✔
297
            return true;
1✔
298
        }
299

300
        if (query instanceof BooleanQuery) {
1✔
301
            for (BooleanClause bc : ((BooleanQuery) query).clauses()) {
×
302
                if (needPositionsAndFrequencies(bc.getQuery())) {
×
303
                    return true;
×
304
                }
305
            }
×
306
        }
307

308
        return false;
1✔
309
    }
310

311
    private static int normalizeDocumentFrequency(final int count, final int documents) {
312
        return (int) (((double) count / documents) * SuggesterUtils.NORMALIZED_DOCUMENT_FREQUENCY_MULTIPLIER);
1✔
313
    }
314

315
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc