• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

evolvedbinary / elemental / 982

29 Apr 2025 08:34PM UTC coverage: 56.409% (+0.007%) from 56.402%
982

push

circleci

adamretter
[feature] Improve README.md badges

28451 of 55847 branches covered (50.94%)

Branch coverage included in aggregate %.

77468 of 131924 relevant lines covered (58.72%)

0.59 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.17
/extensions/indexes/lucene/src/main/java/org/exist/indexing/lucene/XMLToQuery.java
1
/*
2
 * Elemental
3
 * Copyright (C) 2024, Evolved Binary Ltd
4
 *
5
 * admin@evolvedbinary.com
6
 * https://www.evolvedbinary.com | https://www.elemental.xyz
7
 *
8
 * Use of this software is governed by the Business Source License 1.1
9
 * included in the LICENSE file and at www.mariadb.com/bsl11.
10
 *
11
 * Change Date: 2028-04-27
12
 *
13
 * On the date above, in accordance with the Business Source License, use
14
 * of this software will be governed by the Apache License, Version 2.0.
15
 *
16
 * Additional Use Grant: Production use of the Licensed Work for a permitted
17
 * purpose. A Permitted Purpose is any purpose other than a Competing Use.
18
 * A Competing Use means making the Software available to others in a commercial
19
 * product or service that: substitutes for the Software; substitutes for any
20
 * other product or service we offer using the Software that exists as of the
21
 * date we make the Software available; or offers the same or substantially
22
 * similar functionality as the Software.
23
 *
24
 * NOTE: Parts of this file contain code from 'The eXist-db Authors'.
25
 *       The original license header is included below.
26
 *
27
 * =====================================================================
28
 *
29
 * eXist-db Open Source Native XML Database
30
 * Copyright (C) 2001 The eXist-db Authors
31
 *
32
 * info@exist-db.org
33
 * http://www.exist-db.org
34
 *
35
 * This library is free software; you can redistribute it and/or
36
 * modify it under the terms of the GNU Lesser General Public
37
 * License as published by the Free Software Foundation; either
38
 * version 2.1 of the License, or (at your option) any later version.
39
 *
40
 * This library is distributed in the hope that it will be useful,
41
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
42
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
43
 * Lesser General Public License for more details.
44
 *
45
 * You should have received a copy of the GNU Lesser General Public
46
 * License along with this library; if not, write to the Free Software
47
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
48
 */
49
package org.exist.indexing.lucene;
50

51
import org.apache.lucene.analysis.Analyzer;
52
import org.apache.lucene.analysis.TokenStream;
53
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
54
import org.apache.lucene.index.AtomicReaderContext;
55
import org.apache.lucene.index.Term;
56
import org.apache.lucene.index.Terms;
57
import org.apache.lucene.index.TermsEnum;
58
import org.apache.lucene.search.*;
59
import org.apache.lucene.search.spans.*;
60
import org.apache.lucene.util.BytesRef;
61
import org.apache.lucene.util.automaton.Automaton;
62
import org.apache.lucene.util.automaton.CompiledAutomaton;
63
import org.apache.lucene.util.automaton.LevenshteinAutomata;
64
import org.exist.xquery.Expression;
65
import org.exist.xquery.XPathException;
66
import org.exist.xquery.modules.lucene.QueryOptions;
67
import org.w3c.dom.Element;
68
import org.w3c.dom.Node;
69
import org.w3c.dom.NodeList;
70

71
import java.io.IOException;
72
import java.io.StringReader;
73
import java.util.ArrayList;
74
import java.util.List;
75

76
/**
77
 * Parses the XML representation of a Lucene query and transforms
78
 * it into a tree of {@link org.apache.lucene.search.Query} objects.
79
 */
80
public class XMLToQuery {
81

82
    private final LuceneIndex index;
83

84
    public XMLToQuery(LuceneIndex index) {
1✔
85
        this.index = index;
1✔
86
    }
1✔
87

88
    public Query parse(String field, Element root, Analyzer analyzer, QueryOptions options) throws XPathException {
89
        Query query = null;
1✔
90
        String localName = root.getLocalName();
1✔
91
        if (null != localName) {
1!
92
            query = switch (localName) {
1!
93
                case "query" -> parseChildren(field, root, analyzer, options);
1✔
94
                case "term" -> termQuery(getField(root, field), root, analyzer);
1✔
95
                case "wildcard" -> wildcardQuery(getField(root, field), root, options);
1✔
96
                case "prefix" -> prefixQuery(getField(root, field), root, options);
1✔
97
                case "fuzzy" -> fuzzyQuery(getField(root, field), root);
1✔
98
                case "bool" -> booleanQuery(getField(root, field), root, analyzer, options);
1✔
99
                case "phrase" -> phraseQuery(getField(root, field), root, analyzer);
1✔
100
                case "near" -> nearQuery(getField(root, field), root, analyzer);
1✔
101
                case "first" -> getSpanFirst(getField(root, field), root, analyzer);
×
102
                case "regex" -> regexQuery(getField(root, field), root, options);
1✔
103
                default ->
104
                        throw new XPathException((Expression) null, "Unknown element in lucene query expression: " + localName);
1✔
105
            };
106
        }
107

108
        if (query != null) {
1✔
109
            setBoost(root, query);
1✔
110
        }
111

112
        return query;
1✔
113
    }
114

115
    private Query phraseQuery(String field, Element node, Analyzer analyzer) throws XPathException {
116
        NodeList termList = node.getElementsByTagName("term");
1✔
117
        if (termList.getLength() == 0) {
1✔
118
            PhraseQuery query = new PhraseQuery();
1✔
119
            String qstr = getText(node);
1✔
120
            try {
121
                TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr));
1✔
122
                CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
1✔
123
                    stream.reset();
1✔
124
                while (stream.incrementToken()) {
1✔
125
                    query.add(new Term(field, termAttr.toString()));
1✔
126
                }
127
                stream.end();
1✔
128
                stream.close();
1✔
129
            } catch (IOException e) {
×
130
                throw new XPathException((Expression) null, "Error while parsing phrase query: " + qstr);
×
131
            }
1✔
132
            int slop = getSlop(node);
1✔
133
            if (slop > -1)
1✔
134
                query.setSlop(slop);
1✔
135
            return query;
1✔
136
        }
137
        MultiPhraseQuery query = new MultiPhraseQuery();
1✔
138
        for (int i = 0; i < termList.getLength(); i++) {
1✔
139
            Element elem = (Element) termList.item(i);
1✔
140
            String text = getText(elem);
1✔
141
            if (text.indexOf('?') > -1 || text.indexOf('*') > 0) {
1✔
142
                try {
143
                    Term[] expanded = expandTerms(field, text);
1✔
144
                    if (expanded.length > 0)
1✔
145
                        query.add(expanded);
1✔
146
                } catch (IOException e) {
×
147
                    throw new XPathException((Expression) null, "IO error while expanding query terms: " + e.getMessage(), e);
×
148
                }
1✔
149
            } else {
150
                String termStr = getTerm(field, text, analyzer);
1✔
151
                if (termStr != null)
1✔
152
                    query.add(new Term(field, text));
1✔
153
            }
154
        }
155
        int slop = getSlop(node);
1✔
156
        if (slop > -1)
1✔
157
            query.setSlop(slop);
1✔
158
        return query;
1✔
159
    }
160

161
    private SpanQuery nearQuery(String field, Element node, Analyzer analyzer) throws XPathException {
162
        int slop = getSlop(node);
1✔
163
        if (slop < 0)
1✔
164
            slop = 0;
1✔
165
        boolean inOrder = true;
1✔
166
        if (node.hasAttribute("ordered"))
1✔
167
            inOrder = node.getAttribute("ordered").equals("yes");
1✔
168

169
        if (!hasElementContent(node)) {
1✔
170
            String qstr = getText(node);
1✔
171
            List<SpanTermQuery> list = new ArrayList<>(8);
1✔
172
            try {
173
                TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr));
1✔
174
                CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
1✔
175
                    stream.reset();
1✔
176
                while (stream.incrementToken()) {
1✔
177
                    list.add(new SpanTermQuery(new Term(field, termAttr.toString())));
1✔
178
                }
179
                stream.end();
1✔
180
                stream.close();
1✔
181
            } catch (IOException e) {
×
182
                throw new XPathException((Expression) null, "Error while parsing phrase query: " + qstr);
×
183
            }
1✔
184
            return new SpanNearQuery(list.toArray(new SpanTermQuery[0]), slop, inOrder);
1✔
185
        }
186
        SpanQuery[] children = parseSpanChildren(field, node, analyzer);
1✔
187
        return new SpanNearQuery(children, slop, inOrder);
1✔
188
    }
189

190
    private SpanQuery[] parseSpanChildren(String field, Element node, Analyzer analyzer) throws XPathException {
191
        List<SpanQuery> list = new ArrayList<>(8);
1✔
192
        Node child = node.getFirstChild();
1✔
193
        while (child != null) {
1✔
194
            if (child.getNodeType() == Node.ELEMENT_NODE) {
1!
195
                final String localName = child.getLocalName();
1✔
196
                if (null != localName) {
1!
197
                    switch (localName) {
1!
198
                        case "term":
199
                            getSpanTerm(list, field, (Element) child, analyzer);
1✔
200
                            break;
1✔
201
                        case "near":
202
                            list.add(nearQuery(field, (Element) child, analyzer));
1✔
203
                            break;
1✔
204
                        case "first":
205
                            list.add(getSpanFirst(field, (Element) child, analyzer));
1✔
206
                            break;
1✔
207
                        case "regex":
208
                            list.add(getSpanRegex(field, (Element) child, analyzer));
1✔
209
                            break;
1✔
210
                        default:
211
                            throw new XPathException((Expression) null, "Unknown query element: " + child.getNodeName());
×
212
                    }
213
                }
214
            }
215
            child = child.getNextSibling();
1✔
216
        }
217
        return list.toArray(new SpanQuery[0]);
1✔
218
    }
219

220
    private void getSpanTerm(List<SpanQuery> list, String field, Element node, Analyzer analyzer) throws XPathException {
221
            String termStr = getTerm(field, getText(node), analyzer);
1✔
222
            if (termStr != null)
1✔
223
                    list.add(new SpanTermQuery(new Term(field, termStr)));
1✔
224
    }
1✔
225

226
    private SpanQuery getSpanRegex(String field, Element node, Analyzer analyzer) {
227
            String regex = getText(node);
1✔
228
            return new SpanMultiTermQueryWrapper<>(new RegexpQuery(new Term(field, regex)));
1✔
229
    }
230
    
231
    private SpanQuery getSpanFirst(String field, Element node, Analyzer analyzer) throws XPathException {
232
            int slop = getSlop(node);
1✔
233
        if (slop < 0)
1!
234
            slop = 0;
1✔
235
        boolean inOrder = true;
1✔
236
        if (node.hasAttribute("ordered"))
1!
237
            inOrder = node.getAttribute("ordered").equals("yes");
×
238
        SpanQuery query = null;
1✔
239
        if (hasElementContent(node)) {
1!
240
            SpanQuery[] children = parseSpanChildren(field, node, analyzer);
1✔
241
            query = new SpanNearQuery(children, slop, inOrder);
1✔
242
        } else {
1✔
243
                String termStr = getTerm(field, getText(node), analyzer);
×
244
                if (termStr != null)
×
245
                        query = new SpanTermQuery(new Term(field, termStr));
×
246
        }
247
        int end = 0;
1✔
248
        if (node.hasAttribute("end")) {
1!
249
            try {
250
                end = Integer.parseInt(node.getAttribute("end"));
1✔
251
            } catch (NumberFormatException e) {
×
252
                throw new XPathException((Expression) null, "Attribute 'end' to query element 'first' should be a " +
×
253
                        "valid integer. Got: " + node.getAttribute("end"));
×
254
            }
1✔
255
        }
256
        return query != null ? new SpanFirstQuery(query, end) : null;
1!
257
    }
258

259
    private int getSlop(Element node) throws XPathException {
260
        String slop = node.getAttribute("slop");
1✔
261
        if (!slop.isEmpty()) {
1✔
262
            try {
263
                return Integer.parseInt(slop);
1✔
264
            } catch (NumberFormatException e) {
×
265
                throw new XPathException((Expression) null, "Query parameter 'slop' should be an integer value. Got: " + slop);
×
266
            }
267
        }
268
        return -1;
1✔
269
    }
270

271
    private Term[] expandTerms(String field, String queryStr) throws XPathException, IOException {
272
        return index.withReader(reader -> {
1✔
273
            final Automaton automaton = WildcardQuery.toAutomaton(new Term(field, queryStr));
1✔
274
            final CompiledAutomaton compiled = new CompiledAutomaton(automaton);
1✔
275
            final List<Term> termList = new ArrayList<>(8);
1✔
276
            for (AtomicReaderContext atomic : reader.leaves()) {
1✔
277
                Terms terms = atomic.reader().terms(field);
1✔
278
                if (terms != null) {
1!
279
                    TermsEnum termsEnum = compiled.getTermsEnum(terms);
1✔
280
                    BytesRef data = termsEnum.next();
1✔
281
                    while (data != null) {
1✔
282
                        String term = data.utf8ToString();
1✔
283
                        termList.add(new Term(field, term));
1✔
284
                        data = termsEnum.next();
1✔
285
                    }
1✔
286
                }
287
            }
1✔
288
            Term[] matchingTerms = new Term[termList.size()];
1✔
289
            return termList.toArray(matchingTerms);
1✔
290
        });
291
    }
292

293
    private Query termQuery(String field, Element node, Analyzer analyzer) throws XPathException {
294
            String termStr = getTerm(field, getText(node), analyzer);
1✔
295
            return termStr == null ? null : new TermQuery(new Term(field, termStr));
1✔
296
    }
297

298
    private String getTerm(String field, String text, Analyzer analyzer) throws XPathException {
299
            String term = null;
1✔
300
            try {
301
            TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
1✔
302
            CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
1✔
303
                    stream.reset();
1✔
304
                        if (stream.incrementToken()) {
1✔
305
                                term = termAttr.toString();
1✔
306
                        }
307
                        stream.end();
1✔
308
                        stream.close();
1✔
309
                        return term;
1✔
310
                } catch (IOException e) {
×
311
                        throw new XPathException((Expression) null, "Lucene index error while creating query: " + e.getMessage(), e);
×
312
                }
313
    }
314
    
315
    private Query wildcardQuery(String field, Element node, QueryOptions options) {
316
        WildcardQuery query = new WildcardQuery(new Term(field, getText(node)));
1✔
317
        setRewriteMethod(query, node, options);
1✔
318
        return query;
1✔
319
    }
320

321
    private Query prefixQuery(String field, Element node, QueryOptions options) {
322
        PrefixQuery query = new PrefixQuery(new Term(field, getText(node)));
1✔
323
        setRewriteMethod(query, node, options);
1✔
324
        return query;
1✔
325
    }
326

327
    private Query fuzzyQuery(String field, Element node) throws XPathException {
328
        int maxEdits = FuzzyQuery.defaultMaxEdits;
1✔
329
        String attr = node.getAttribute("max-edits");
1✔
330
        if (!attr.isEmpty()) {
1✔
331
            try {
332
                maxEdits = Integer.parseInt(attr);
1✔
333
                if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
1!
334
                    throw new XPathException((Expression) null, "Query parameter max-edits must by <= " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
×
335
                }
336
            } catch (NumberFormatException e) {
×
337
                throw new XPathException((Expression) null, "Query parameter 'max-edits' should be an integer value. Got: " + attr);
×
338
            }
1✔
339
        }
340
        return new FuzzyQuery(new Term(field, getText(node)), maxEdits);
1✔
341
    }
342

343
    private Query regexQuery(String field, Element node, QueryOptions options) {
344
        RegexpQuery query = new RegexpQuery(new Term(field, getText(node)));
1✔
345
        setRewriteMethod(query, node, options);
1✔
346
        return query;
1✔
347
    }
348

349
    private Query booleanQuery(String field, Element node, Analyzer analyzer, QueryOptions options) throws XPathException {
350
        BooleanQuery query = new BooleanQuery();
1✔
351

352
        // Specifies a minimum number of the optional BooleanClauses which must be satisfied.
353
        String minOpt = node.getAttribute("min");
1✔
354
        if (!minOpt.isEmpty()) {
1✔
355
            try {
356
                int minMust = Integer.parseInt(minOpt);
1✔
357
                query.setMinimumNumberShouldMatch(minMust);
1✔
358
            } catch (NumberFormatException ex) {
×
359
                // ignore
360
            }
1✔
361
        }
362

363
        Node child = node.getFirstChild();
1✔
364
        while (child != null) {
1✔
365
            if (child.getNodeType() == Node.ELEMENT_NODE) {
1✔
366
                Element elem = (Element) child;
1✔
367
                Query childQuery = parse(field, elem, analyzer, options);
1✔
368
                if (childQuery != null) {
1✔
369
                        BooleanClause.Occur occur = getOccur(elem);
1✔
370
                        query.add(childQuery, occur);
1✔
371
                }
372
            }
373
            child = child.getNextSibling();
1✔
374
        }
375
        return query;
1✔
376
    }
377

378
    private void setRewriteMethod(MultiTermQuery query, Element node, QueryOptions options) {
379
        boolean doFilterRewrite = options.filterRewrite();
1✔
380
        String option = node.getAttribute("filter-rewrite");
1✔
381
        if (!option.isEmpty()) {
1!
382
            doFilterRewrite = option.equalsIgnoreCase("yes");
×
383
        }
384
        if (doFilterRewrite) {
1!
385
            query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
×
386
        } else {
387
            query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
1✔
388
        }
389
    }
1✔
390

391
    private BooleanClause.Occur getOccur(Element elem) {
392
        BooleanClause.Occur occur = BooleanClause.Occur.SHOULD;
1✔
393
        String occurOpt = elem.getAttribute("occur");
1✔
394
        if (!occurOpt.isEmpty()) {
1✔
395
            occur = switch (occurOpt) {
1!
396
                case "must" -> BooleanClause.Occur.MUST;
1✔
397
                case "not" -> BooleanClause.Occur.MUST_NOT;
1✔
398
                case "should" -> BooleanClause.Occur.SHOULD;
1✔
399
                default -> occur;
1✔
400
            };
401
        }
402
        return occur;
1✔
403
    }
404

405
    private Query parseChildren(String field, Element root, Analyzer analyzer, QueryOptions options) throws XPathException {
406
        Query query = null;
1✔
407
        Node child = root.getFirstChild();
1✔
408
        while (child != null) {
1✔
409
            if (child.getNodeType() == Node.ELEMENT_NODE) {
1✔
410
                Query childQuery = parse(field, (Element) child, analyzer, options);
1✔
411
                if (query != null) {
1✔
412
                    if (query instanceof BooleanQuery)
1!
413
                        ((BooleanQuery) query).add(childQuery, BooleanClause.Occur.SHOULD);
×
414
                    else {
415
                        BooleanQuery boolQuery = new BooleanQuery();
1✔
416
                        boolQuery.add(query, BooleanClause.Occur.SHOULD);
1✔
417
                        boolQuery.add(childQuery, BooleanClause.Occur.SHOULD);
1✔
418
                        query = boolQuery;
1✔
419
                    }
1✔
420
                } else
421
                    query = childQuery;
1✔
422
            }
423
            child = child.getNextSibling();
1✔
424
        }
425
        return query;
1✔
426
    }
427

428
    private void setBoost(Element node, Query query) throws XPathException {
429
        String boost = node.getAttribute("boost");
1✔
430
        if (!boost.isEmpty()) {
1!
431
            try {
432
                query.setBoost(Float.parseFloat(boost));
×
433
            } catch (NumberFormatException e) {
×
434
                throw new XPathException((Expression) null, "Bad value for boost in query parameter. Got: " + boost);
×
435
            }
×
436
        }
437
    }
1✔
438

439
    private String getText(Element root) {
440
        final StringBuilder buf = new StringBuilder();
1✔
441
        Node child = root.getFirstChild();
1✔
442
        while (child != null) {
1✔
443
            if (child.getNodeType() == Node.TEXT_NODE) {
1!
444
                buf.append(child.getNodeValue());
1✔
445
            }
446
            child = child.getNextSibling();
1✔
447
        }
448
        return buf.toString();
1✔
449
    }
450

451
    private boolean hasElementContent(final Element root) {
452
        Node child = root.getFirstChild();
1✔
453
        while (child != null) {
1✔
454
            if (child.getNodeType() == Node.ELEMENT_NODE) {
1✔
455
                return true;
1✔
456
            }
457
            child = child.getNextSibling();
1✔
458
        }
459
        return false;
1✔
460
    }
461

462
    private String getField(Element node, String defaultField) {
463
        final String field = node.getAttribute("field");
1✔
464
        if (!field.isEmpty()) {
1✔
465
            return field;
1✔
466
        }
467
        return defaultField;
1✔
468
    }
469
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc