evolvedbinary / elemental / 982

Committed 29 Apr 2025 08:34PM UTC coverage: 56.409% (+0.007%) from 56.402%

Build # 982

Build Type

push

circleci

Committed by

adamretter

Commit Message

[feature] Improve README.md badges

Run Details

28451 of 55847 branches covered (50.94%)

Branch coverage included in aggregate %.

77468 of 131924 relevant lines covered (58.72%)

0.59 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

86.17

/extensions/indexes/lucene/src/main/java/org/exist/indexing/lucene/XMLToQuery.java

/*
 * Elemental
 * Copyright (C) 2024, Evolved Binary Ltd
 *
 * admin@evolvedbinary.com
 * https://www.evolvedbinary.com | https://www.elemental.xyz
 *
 * Use of this software is governed by the Business Source License 1.1
 * included in the LICENSE file and at www.mariadb.com/bsl11.
 *
 * Change Date: 2028-04-27
 *
 * On the date above, in accordance with the Business Source License, use
 * of this software will be governed by the Apache License, Version 2.0.
 *
 * Additional Use Grant: Production use of the Licensed Work for a permitted
 * purpose. A Permitted Purpose is any purpose other than a Competing Use.
 * A Competing Use means making the Software available to others in a commercial
 * product or service that: substitutes for the Software; substitutes for any
 * other product or service we offer using the Software that exists as of the
 * date we make the Software available; or offers the same or substantially
 * similar functionality as the Software.
 *
 * NOTE: Parts of this file contain code from 'The eXist-db Authors'.
 *       The original license header is included below.
 *
 * =====================================================================
 *
 * eXist-db Open Source Native XML Database
 * Copyright (C) 2001 The eXist-db Authors
 *
 * info@exist-db.org
 * http://www.exist-db.org
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */
package org.exist.indexing.lucene;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.*;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.LevenshteinAutomata;
import org.exist.xquery.Expression;
import org.exist.xquery.XPathException;
import org.exist.xquery.modules.lucene.QueryOptions;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;

/**
 * Parses the XML representation of a Lucene query and transforms
 * it into a tree of {@link org.apache.lucene.search.Query} objects.
 */
public class XMLToQuery {

    private final LuceneIndex index;

    public XMLToQuery(LuceneIndex index) {
        this.index = index;
    }

    public Query parse(String field, Element root, Analyzer analyzer, QueryOptions options) throws XPathException {
        Query query = null;
        String localName = root.getLocalName();
        if (null != localName) {
            query = switch (localName) {
                case "query" -> parseChildren(field, root, analyzer, options);
                case "term" -> termQuery(getField(root, field), root, analyzer);
                case "wildcard" -> wildcardQuery(getField(root, field), root, options);
                case "prefix" -> prefixQuery(getField(root, field), root, options);
                case "fuzzy" -> fuzzyQuery(getField(root, field), root);
                case "bool" -> booleanQuery(getField(root, field), root, analyzer, options);
                case "phrase" -> phraseQuery(getField(root, field), root, analyzer);
                case "near" -> nearQuery(getField(root, field), root, analyzer);
                case "first" -> getSpanFirst(getField(root, field), root, analyzer);
                case "regex" -> regexQuery(getField(root, field), root, options);
                default ->
                        throw new XPathException((Expression) null, "Unknown element in lucene query expression: " + localName);
            };
        }

        if (query != null) {
            setBoost(root, query);
        }

        return query;
    }

    private Query phraseQuery(String field, Element node, Analyzer analyzer) throws XPathException {
        NodeList termList = node.getElementsByTagName("term");
        if (termList.getLength() == 0) {
            PhraseQuery query = new PhraseQuery();
            String qstr = getText(node);
            try {
                TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr));
                CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
                    stream.reset();
                while (stream.incrementToken()) {
                    query.add(new Term(field, termAttr.toString()));
                }
                stream.end();
                stream.close();
            } catch (IOException e) {
                throw new XPathException((Expression) null, "Error while parsing phrase query: " + qstr);
            }
            int slop = getSlop(node);
            if (slop > -1)
                query.setSlop(slop);
            return query;
        }
        MultiPhraseQuery query = new MultiPhraseQuery();
        for (int i = 0; i < termList.getLength(); i++) {
            Element elem = (Element) termList.item(i);
            String text = getText(elem);
            if (text.indexOf('?') > -1 || text.indexOf('*') > 0) {
                try {
                    Term[] expanded = expandTerms(field, text);
                    if (expanded.length > 0)
                        query.add(expanded);
                } catch (IOException e) {
                    throw new XPathException((Expression) null, "IO error while expanding query terms: " + e.getMessage(), e);
                }
            } else {
                String termStr = getTerm(field, text, analyzer);
                if (termStr != null)
                    query.add(new Term(field, text));
            }
        }
        int slop = getSlop(node);
        if (slop > -1)
            query.setSlop(slop);
        return query;
    }

    private SpanQuery nearQuery(String field, Element node, Analyzer analyzer) throws XPathException {
        int slop = getSlop(node);
        if (slop < 0)
            slop = 0;
        boolean inOrder = true;
        if (node.hasAttribute("ordered"))
            inOrder = node.getAttribute("ordered").equals("yes");

        if (!hasElementContent(node)) {
            String qstr = getText(node);
            List<SpanTermQuery> list = new ArrayList<>(8);
            try {
                TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr));
                CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
                    stream.reset();
                while (stream.incrementToken()) {
                    list.add(new SpanTermQuery(new Term(field, termAttr.toString())));
                }
                stream.end();
                stream.close();
            } catch (IOException e) {
                throw new XPathException((Expression) null, "Error while parsing phrase query: " + qstr);
            }
            return new SpanNearQuery(list.toArray(new SpanTermQuery[0]), slop, inOrder);
        }
        SpanQuery[] children = parseSpanChildren(field, node, analyzer);
        return new SpanNearQuery(children, slop, inOrder);
    }

    private SpanQuery[] parseSpanChildren(String field, Element node, Analyzer analyzer) throws XPathException {
        List<SpanQuery> list = new ArrayList<>(8);
        Node child = node.getFirstChild();
        while (child != null) {
            if (child.getNodeType() == Node.ELEMENT_NODE) {
                final String localName = child.getLocalName();
                if (null != localName) {
                    switch (localName) {
                        case "term":
                            getSpanTerm(list, field, (Element) child, analyzer);
                            break;
                        case "near":
                            list.add(nearQuery(field, (Element) child, analyzer));
                            break;
                        case "first":
                            list.add(getSpanFirst(field, (Element) child, analyzer));
                            break;
                        case "regex":
                            list.add(getSpanRegex(field, (Element) child, analyzer));
                            break;
                        default:
                            throw new XPathException((Expression) null, "Unknown query element: " + child.getNodeName());
                    }
                }
            }
            child = child.getNextSibling();
        }
        return list.toArray(new SpanQuery[0]);
    }

    private void getSpanTerm(List<SpanQuery> list, String field, Element node, Analyzer analyzer) throws XPathException {
            String termStr = getTerm(field, getText(node), analyzer);
            if (termStr != null)
                    list.add(new SpanTermQuery(new Term(field, termStr)));
    }

    private SpanQuery getSpanRegex(String field, Element node, Analyzer analyzer) {
            String regex = getText(node);
            return new SpanMultiTermQueryWrapper<>(new RegexpQuery(new Term(field, regex)));
    }
    
    private SpanQuery getSpanFirst(String field, Element node, Analyzer analyzer) throws XPathException {
            int slop = getSlop(node);
        if (slop < 0)
            slop = 0;
        boolean inOrder = true;
        if (node.hasAttribute("ordered"))
            inOrder = node.getAttribute("ordered").equals("yes");
        SpanQuery query = null;
        if (hasElementContent(node)) {
            SpanQuery[] children = parseSpanChildren(field, node, analyzer);
            query = new SpanNearQuery(children, slop, inOrder);
        } else {
                String termStr = getTerm(field, getText(node), analyzer);
                if (termStr != null)
                        query = new SpanTermQuery(new Term(field, termStr));
        }
        int end = 0;
        if (node.hasAttribute("end")) {
            try {
                end = Integer.parseInt(node.getAttribute("end"));
            } catch (NumberFormatException e) {
                throw new XPathException((Expression) null, "Attribute 'end' to query element 'first' should be a " +
                        "valid integer. Got: " + node.getAttribute("end"));
            }
        }
        return query != null ? new SpanFirstQuery(query, end) : null;
    }

    private int getSlop(Element node) throws XPathException {
        String slop = node.getAttribute("slop");
        if (!slop.isEmpty()) {
            try {
                return Integer.parseInt(slop);
            } catch (NumberFormatException e) {
                throw new XPathException((Expression) null, "Query parameter 'slop' should be an integer value. Got: " + slop);
            }
        }
        return -1;
    }

    private Term[] expandTerms(String field, String queryStr) throws XPathException, IOException {
        return index.withReader(reader -> {
            final Automaton automaton = WildcardQuery.toAutomaton(new Term(field, queryStr));
            final CompiledAutomaton compiled = new CompiledAutomaton(automaton);
            final List<Term> termList = new ArrayList<>(8);
            for (AtomicReaderContext atomic : reader.leaves()) {
                Terms terms = atomic.reader().terms(field);
                if (terms != null) {
                    TermsEnum termsEnum = compiled.getTermsEnum(terms);
                    BytesRef data = termsEnum.next();
                    while (data != null) {
                        String term = data.utf8ToString();
                        termList.add(new Term(field, term));
                        data = termsEnum.next();
                    }
                }
            }
            Term[] matchingTerms = new Term[termList.size()];
            return termList.toArray(matchingTerms);
        });
    }

    private Query termQuery(String field, Element node, Analyzer analyzer) throws XPathException {
            String termStr = getTerm(field, getText(node), analyzer);
            return termStr == null ? null : new TermQuery(new Term(field, termStr));
    }

    private String getTerm(String field, String text, Analyzer analyzer) throws XPathException {
            String term = null;
            try {
            TokenStream stream = analyzer.tokenStream(field, new StringReader(text));
            CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);
                    stream.reset();
                        if (stream.incrementToken()) {
                                term = termAttr.toString();
                        }
                        stream.end();
                        stream.close();
                        return term;
                } catch (IOException e) {
                        throw new XPathException((Expression) null, "Lucene index error while creating query: " + e.getMessage(), e);
                }
    }
    
    private Query wildcardQuery(String field, Element node, QueryOptions options) {
        WildcardQuery query = new WildcardQuery(new Term(field, getText(node)));
        setRewriteMethod(query, node, options);
        return query;
    }

    private Query prefixQuery(String field, Element node, QueryOptions options) {
        PrefixQuery query = new PrefixQuery(new Term(field, getText(node)));
        setRewriteMethod(query, node, options);
        return query;
    }

    private Query fuzzyQuery(String field, Element node) throws XPathException {
        int maxEdits = FuzzyQuery.defaultMaxEdits;
        String attr = node.getAttribute("max-edits");
        if (!attr.isEmpty()) {
            try {
                maxEdits = Integer.parseInt(attr);
                if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
                    throw new XPathException((Expression) null, "Query parameter max-edits must by <= " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);
                }
            } catch (NumberFormatException e) {
                throw new XPathException((Expression) null, "Query parameter 'max-edits' should be an integer value. Got: " + attr);
            }
        }
        return new FuzzyQuery(new Term(field, getText(node)), maxEdits);
    }

    private Query regexQuery(String field, Element node, QueryOptions options) {
        RegexpQuery query = new RegexpQuery(new Term(field, getText(node)));
        setRewriteMethod(query, node, options);
        return query;
    }

    private Query booleanQuery(String field, Element node, Analyzer analyzer, QueryOptions options) throws XPathException {
        BooleanQuery query = new BooleanQuery();

        // Specifies a minimum number of the optional BooleanClauses which must be satisfied.
        String minOpt = node.getAttribute("min");
        if (!minOpt.isEmpty()) {
            try {
                int minMust = Integer.parseInt(minOpt);
                query.setMinimumNumberShouldMatch(minMust);
            } catch (NumberFormatException ex) {
                // ignore
            }
        }

        Node child = node.getFirstChild();
        while (child != null) {
            if (child.getNodeType() == Node.ELEMENT_NODE) {
                Element elem = (Element) child;
                Query childQuery = parse(field, elem, analyzer, options);
                if (childQuery != null) {
                        BooleanClause.Occur occur = getOccur(elem);
                        query.add(childQuery, occur);
                }
            }
            child = child.getNextSibling();
        }
        return query;
    }

    private void setRewriteMethod(MultiTermQuery query, Element node, QueryOptions options) {
        boolean doFilterRewrite = options.filterRewrite();
        String option = node.getAttribute("filter-rewrite");
        if (!option.isEmpty()) {
            doFilterRewrite = option.equalsIgnoreCase("yes");
        }
        if (doFilterRewrite) {
            query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);
        } else {
            query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);
        }
    }

    private BooleanClause.Occur getOccur(Element elem) {
        BooleanClause.Occur occur = BooleanClause.Occur.SHOULD;
        String occurOpt = elem.getAttribute("occur");
        if (!occurOpt.isEmpty()) {
            occur = switch (occurOpt) {
                case "must" -> BooleanClause.Occur.MUST;
                case "not" -> BooleanClause.Occur.MUST_NOT;
                case "should" -> BooleanClause.Occur.SHOULD;
                default -> occur;
            };
        }
        return occur;
    }

    private Query parseChildren(String field, Element root, Analyzer analyzer, QueryOptions options) throws XPathException {
        Query query = null;
        Node child = root.getFirstChild();
        while (child != null) {
            if (child.getNodeType() == Node.ELEMENT_NODE) {
                Query childQuery = parse(field, (Element) child, analyzer, options);
                if (query != null) {
                    if (query instanceof BooleanQuery)
                        ((BooleanQuery) query).add(childQuery, BooleanClause.Occur.SHOULD);
                    else {
                        BooleanQuery boolQuery = new BooleanQuery();
                        boolQuery.add(query, BooleanClause.Occur.SHOULD);
                        boolQuery.add(childQuery, BooleanClause.Occur.SHOULD);
                        query = boolQuery;
                    }
                } else
                    query = childQuery;
            }
            child = child.getNextSibling();
        }
        return query;
    }

    private void setBoost(Element node, Query query) throws XPathException {
        String boost = node.getAttribute("boost");
        if (!boost.isEmpty()) {
            try {
                query.setBoost(Float.parseFloat(boost));
            } catch (NumberFormatException e) {
                throw new XPathException((Expression) null, "Bad value for boost in query parameter. Got: " + boost);
            }
        }
    }

    private String getText(Element root) {
        final StringBuilder buf = new StringBuilder();
        Node child = root.getFirstChild();
        while (child != null) {
            if (child.getNodeType() == Node.TEXT_NODE) {
                buf.append(child.getNodeValue());
            }
            child = child.getNextSibling();
        }
        return buf.toString();
    }

    private boolean hasElementContent(final Element root) {
        Node child = root.getFirstChild();
        while (child != null) {
            if (child.getNodeType() == Node.ELEMENT_NODE) {
                return true;
            }
            child = child.getNextSibling();
        }
        return false;
    }

    private String getField(Element node, String defaultField) {
        final String field = node.getAttribute("field");
        if (!field.isEmpty()) {
            return field;
        }
        return defaultField;
    }
}

1	/*
2	* Elemental
3	* Copyright (C) 2024, Evolved Binary Ltd
4	*
5	* admin@evolvedbinary.com
6	* https://www.evolvedbinary.com \| https://www.elemental.xyz
7	*
8	* Use of this software is governed by the Business Source License 1.1
9	* included in the LICENSE file and at www.mariadb.com/bsl11.
10	*
11	* Change Date: 2028-04-27
12	*
13	* On the date above, in accordance with the Business Source License, use
14	* of this software will be governed by the Apache License, Version 2.0.
15	*
16	* Additional Use Grant: Production use of the Licensed Work for a permitted
17	* purpose. A Permitted Purpose is any purpose other than a Competing Use.
18	* A Competing Use means making the Software available to others in a commercial
19	* product or service that: substitutes for the Software; substitutes for any
20	* other product or service we offer using the Software that exists as of the
21	* date we make the Software available; or offers the same or substantially
22	* similar functionality as the Software.
23	*
24	* NOTE: Parts of this file contain code from 'The eXist-db Authors'.
25	* The original license header is included below.
26	*
27	* =====================================================================
28	*
29	* eXist-db Open Source Native XML Database
30	* Copyright (C) 2001 The eXist-db Authors
31	*
32	* info@exist-db.org
33	* http://www.exist-db.org
34	*
35	* This library is free software; you can redistribute it and/or
36	* modify it under the terms of the GNU Lesser General Public
37	* License as published by the Free Software Foundation; either
38	* version 2.1 of the License, or (at your option) any later version.
39	*
40	* This library is distributed in the hope that it will be useful,
41	* but WITHOUT ANY WARRANTY; without even the implied warranty of
42	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
43	* Lesser General Public License for more details.
44	*
45	* You should have received a copy of the GNU Lesser General Public
46	* License along with this library; if not, write to the Free Software
47	* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
48	*/
49	package org.exist.indexing.lucene;
50
51	import org.apache.lucene.analysis.Analyzer;
52	import org.apache.lucene.analysis.TokenStream;
53	import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
54	import org.apache.lucene.index.AtomicReaderContext;
55	import org.apache.lucene.index.Term;
56	import org.apache.lucene.index.Terms;
57	import org.apache.lucene.index.TermsEnum;
58	import org.apache.lucene.search.*;
59	import org.apache.lucene.search.spans.*;
60	import org.apache.lucene.util.BytesRef;
61	import org.apache.lucene.util.automaton.Automaton;
62	import org.apache.lucene.util.automaton.CompiledAutomaton;
63	import org.apache.lucene.util.automaton.LevenshteinAutomata;
64	import org.exist.xquery.Expression;
65	import org.exist.xquery.XPathException;
66	import org.exist.xquery.modules.lucene.QueryOptions;
67	import org.w3c.dom.Element;
68	import org.w3c.dom.Node;
69	import org.w3c.dom.NodeList;
70
71	import java.io.IOException;
72	import java.io.StringReader;
73	import java.util.ArrayList;
74	import java.util.List;
75
76	/**
77	* Parses the XML representation of a Lucene query and transforms
78	* it into a tree of {@link org.apache.lucene.search.Query} objects.
79	*/
80	public class XMLToQuery {
81
82	private final LuceneIndex index;
83
84	public XMLToQuery(LuceneIndex index) {	1✔
85	this.index = index;	1✔
86	}	1✔
87
88	public Query parse(String field, Element root, Analyzer analyzer, QueryOptions options) throws XPathException {
89	Query query = null;	1✔
90	String localName = root.getLocalName();	1✔
91	if (null != localName) {	1!
92	query = switch (localName) {	1!
93	case "query" -> parseChildren(field, root, analyzer, options);	1✔
94	case "term" -> termQuery(getField(root, field), root, analyzer);	1✔
95	case "wildcard" -> wildcardQuery(getField(root, field), root, options);	1✔
96	case "prefix" -> prefixQuery(getField(root, field), root, options);	1✔
97	case "fuzzy" -> fuzzyQuery(getField(root, field), root);	1✔
98	case "bool" -> booleanQuery(getField(root, field), root, analyzer, options);	1✔
99	case "phrase" -> phraseQuery(getField(root, field), root, analyzer);	1✔
100	case "near" -> nearQuery(getField(root, field), root, analyzer);	1✔
101	case "first" -> getSpanFirst(getField(root, field), root, analyzer);	×
102	case "regex" -> regexQuery(getField(root, field), root, options);	1✔
103	default ->
104	throw new XPathException((Expression) null, "Unknown element in lucene query expression: " + localName);	1✔
105	};
106	}
107
108	if (query != null) {	1✔
109	setBoost(root, query);	1✔
110	}
111
112	return query;	1✔
113	}
114
115	private Query phraseQuery(String field, Element node, Analyzer analyzer) throws XPathException {
116	NodeList termList = node.getElementsByTagName("term");	1✔
117	if (termList.getLength() == 0) {	1✔
118	PhraseQuery query = new PhraseQuery();	1✔
119	String qstr = getText(node);	1✔
120	try {
121	TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr));	1✔
122	CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);	1✔
123	stream.reset();	1✔
124	while (stream.incrementToken()) {	1✔
125	query.add(new Term(field, termAttr.toString()));	1✔
126	}
127	stream.end();	1✔
128	stream.close();	1✔
129	} catch (IOException e) {	×
130	throw new XPathException((Expression) null, "Error while parsing phrase query: " + qstr);	×
131	}	1✔
132	int slop = getSlop(node);	1✔
133	if (slop > -1)	1✔
134	query.setSlop(slop);	1✔
135	return query;	1✔
136	}
137	MultiPhraseQuery query = new MultiPhraseQuery();	1✔
138	for (int i = 0; i < termList.getLength(); i++) {	1✔
139	Element elem = (Element) termList.item(i);	1✔
140	String text = getText(elem);	1✔
141	if (text.indexOf('?') > -1 \|\| text.indexOf('*') > 0) {	1✔
142	try {
143	Term[] expanded = expandTerms(field, text);	1✔
144	if (expanded.length > 0)	1✔
145	query.add(expanded);	1✔
146	} catch (IOException e) {	×
147	throw new XPathException((Expression) null, "IO error while expanding query terms: " + e.getMessage(), e);	×
148	}	1✔
149	} else {
150	String termStr = getTerm(field, text, analyzer);	1✔
151	if (termStr != null)	1✔
152	query.add(new Term(field, text));	1✔
153	}
154	}
155	int slop = getSlop(node);	1✔
156	if (slop > -1)	1✔
157	query.setSlop(slop);	1✔
158	return query;	1✔
159	}
160
161	private SpanQuery nearQuery(String field, Element node, Analyzer analyzer) throws XPathException {
162	int slop = getSlop(node);	1✔
163	if (slop < 0)	1✔
164	slop = 0;	1✔
165	boolean inOrder = true;	1✔
166	if (node.hasAttribute("ordered"))	1✔
167	inOrder = node.getAttribute("ordered").equals("yes");	1✔
168
169	if (!hasElementContent(node)) {	1✔
170	String qstr = getText(node);	1✔
171	List<SpanTermQuery> list = new ArrayList<>(8);	1✔
172	try {
173	TokenStream stream = analyzer.tokenStream(field, new StringReader(qstr));	1✔
174	CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);	1✔
175	stream.reset();	1✔
176	while (stream.incrementToken()) {	1✔
177	list.add(new SpanTermQuery(new Term(field, termAttr.toString())));	1✔
178	}
179	stream.end();	1✔
180	stream.close();	1✔
181	} catch (IOException e) {	×
182	throw new XPathException((Expression) null, "Error while parsing phrase query: " + qstr);	×
183	}	1✔
184	return new SpanNearQuery(list.toArray(new SpanTermQuery[0]), slop, inOrder);	1✔
185	}
186	SpanQuery[] children = parseSpanChildren(field, node, analyzer);	1✔
187	return new SpanNearQuery(children, slop, inOrder);	1✔
188	}
189
190	private SpanQuery[] parseSpanChildren(String field, Element node, Analyzer analyzer) throws XPathException {
191	List<SpanQuery> list = new ArrayList<>(8);	1✔
192	Node child = node.getFirstChild();	1✔
193	while (child != null) {	1✔
194	if (child.getNodeType() == Node.ELEMENT_NODE) {	1!
195	final String localName = child.getLocalName();	1✔
196	if (null != localName) {	1!
197	switch (localName) {	1!
198	case "term":
199	getSpanTerm(list, field, (Element) child, analyzer);	1✔
200	break;	1✔
201	case "near":
202	list.add(nearQuery(field, (Element) child, analyzer));	1✔
203	break;	1✔
204	case "first":
205	list.add(getSpanFirst(field, (Element) child, analyzer));	1✔
206	break;	1✔
207	case "regex":
208	list.add(getSpanRegex(field, (Element) child, analyzer));	1✔
209	break;	1✔
210	default:
211	throw new XPathException((Expression) null, "Unknown query element: " + child.getNodeName());	×
212	}
213	}
214	}
215	child = child.getNextSibling();	1✔
216	}
217	return list.toArray(new SpanQuery[0]);	1✔
218	}
219
220	private void getSpanTerm(List<SpanQuery> list, String field, Element node, Analyzer analyzer) throws XPathException {
221	String termStr = getTerm(field, getText(node), analyzer);	1✔
222	if (termStr != null)	1✔
223	list.add(new SpanTermQuery(new Term(field, termStr)));	1✔
224	}	1✔
225
226	private SpanQuery getSpanRegex(String field, Element node, Analyzer analyzer) {
227	String regex = getText(node);	1✔
228	return new SpanMultiTermQueryWrapper<>(new RegexpQuery(new Term(field, regex)));	1✔
229	}
230
231	private SpanQuery getSpanFirst(String field, Element node, Analyzer analyzer) throws XPathException {
232	int slop = getSlop(node);	1✔
233	if (slop < 0)	1!
234	slop = 0;	1✔
235	boolean inOrder = true;	1✔
236	if (node.hasAttribute("ordered"))	1!
237	inOrder = node.getAttribute("ordered").equals("yes");	×
238	SpanQuery query = null;	1✔
239	if (hasElementContent(node)) {	1!
240	SpanQuery[] children = parseSpanChildren(field, node, analyzer);	1✔
241	query = new SpanNearQuery(children, slop, inOrder);	1✔
242	} else {	1✔
243	String termStr = getTerm(field, getText(node), analyzer);	×
244	if (termStr != null)	×
245	query = new SpanTermQuery(new Term(field, termStr));	×
246	}
247	int end = 0;	1✔
248	if (node.hasAttribute("end")) {	1!
249	try {
250	end = Integer.parseInt(node.getAttribute("end"));	1✔
251	} catch (NumberFormatException e) {	×
252	throw new XPathException((Expression) null, "Attribute 'end' to query element 'first' should be a " +	×
253	"valid integer. Got: " + node.getAttribute("end"));	×
254	}	1✔
255	}
256	return query != null ? new SpanFirstQuery(query, end) : null;	1!
257	}
258
259	private int getSlop(Element node) throws XPathException {
260	String slop = node.getAttribute("slop");	1✔
261	if (!slop.isEmpty()) {	1✔
262	try {
263	return Integer.parseInt(slop);	1✔
264	} catch (NumberFormatException e) {	×
265	throw new XPathException((Expression) null, "Query parameter 'slop' should be an integer value. Got: " + slop);	×
266	}
267	}
268	return -1;	1✔
269	}
270
271	private Term[] expandTerms(String field, String queryStr) throws XPathException, IOException {
272	return index.withReader(reader -> {	1✔
273	final Automaton automaton = WildcardQuery.toAutomaton(new Term(field, queryStr));	1✔
274	final CompiledAutomaton compiled = new CompiledAutomaton(automaton);	1✔
275	final List<Term> termList = new ArrayList<>(8);	1✔
276	for (AtomicReaderContext atomic : reader.leaves()) {	1✔
277	Terms terms = atomic.reader().terms(field);	1✔
278	if (terms != null) {	1!
279	TermsEnum termsEnum = compiled.getTermsEnum(terms);	1✔
280	BytesRef data = termsEnum.next();	1✔
281	while (data != null) {	1✔
282	String term = data.utf8ToString();	1✔
283	termList.add(new Term(field, term));	1✔
284	data = termsEnum.next();	1✔
285	}	1✔
286	}
287	}	1✔
288	Term[] matchingTerms = new Term[termList.size()];	1✔
289	return termList.toArray(matchingTerms);	1✔
290	});
291	}
292
293	private Query termQuery(String field, Element node, Analyzer analyzer) throws XPathException {
294	String termStr = getTerm(field, getText(node), analyzer);	1✔
295	return termStr == null ? null : new TermQuery(new Term(field, termStr));	1✔
296	}
297
298	private String getTerm(String field, String text, Analyzer analyzer) throws XPathException {
299	String term = null;	1✔
300	try {
301	TokenStream stream = analyzer.tokenStream(field, new StringReader(text));	1✔
302	CharTermAttribute termAttr = stream.addAttribute(CharTermAttribute.class);	1✔
303	stream.reset();	1✔
304	if (stream.incrementToken()) {	1✔
305	term = termAttr.toString();	1✔
306	}
307	stream.end();	1✔
308	stream.close();	1✔
309	return term;	1✔
310	} catch (IOException e) {	×
311	throw new XPathException((Expression) null, "Lucene index error while creating query: " + e.getMessage(), e);	×
312	}
313	}
314
315	private Query wildcardQuery(String field, Element node, QueryOptions options) {
316	WildcardQuery query = new WildcardQuery(new Term(field, getText(node)));	1✔
317	setRewriteMethod(query, node, options);	1✔
318	return query;	1✔
319	}
320
321	private Query prefixQuery(String field, Element node, QueryOptions options) {
322	PrefixQuery query = new PrefixQuery(new Term(field, getText(node)));	1✔
323	setRewriteMethod(query, node, options);	1✔
324	return query;	1✔
325	}
326
327	private Query fuzzyQuery(String field, Element node) throws XPathException {
328	int maxEdits = FuzzyQuery.defaultMaxEdits;	1✔
329	String attr = node.getAttribute("max-edits");	1✔
330	if (!attr.isEmpty()) {	1✔
331	try {
332	maxEdits = Integer.parseInt(attr);	1✔
333	if (maxEdits < 0 \|\| maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {	1!
334	throw new XPathException((Expression) null, "Query parameter max-edits must by <= " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE);	×
335	}
336	} catch (NumberFormatException e) {	×
337	throw new XPathException((Expression) null, "Query parameter 'max-edits' should be an integer value. Got: " + attr);	×
338	}	1✔
339	}
340	return new FuzzyQuery(new Term(field, getText(node)), maxEdits);	1✔
341	}
342
343	private Query regexQuery(String field, Element node, QueryOptions options) {
344	RegexpQuery query = new RegexpQuery(new Term(field, getText(node)));	1✔
345	setRewriteMethod(query, node, options);	1✔
346	return query;	1✔
347	}
348
349	private Query booleanQuery(String field, Element node, Analyzer analyzer, QueryOptions options) throws XPathException {
350	BooleanQuery query = new BooleanQuery();	1✔
351
352	// Specifies a minimum number of the optional BooleanClauses which must be satisfied.
353	String minOpt = node.getAttribute("min");	1✔
354	if (!minOpt.isEmpty()) {	1✔
355	try {
356	int minMust = Integer.parseInt(minOpt);	1✔
357	query.setMinimumNumberShouldMatch(minMust);	1✔
358	} catch (NumberFormatException ex) {	×
359	// ignore
360	}	1✔
361	}
362
363	Node child = node.getFirstChild();	1✔
364	while (child != null) {	1✔
365	if (child.getNodeType() == Node.ELEMENT_NODE) {	1✔
366	Element elem = (Element) child;	1✔
367	Query childQuery = parse(field, elem, analyzer, options);	1✔
368	if (childQuery != null) {	1✔
369	BooleanClause.Occur occur = getOccur(elem);	1✔
370	query.add(childQuery, occur);	1✔
371	}
372	}
373	child = child.getNextSibling();	1✔
374	}
375	return query;	1✔
376	}
377
378	private void setRewriteMethod(MultiTermQuery query, Element node, QueryOptions options) {
379	boolean doFilterRewrite = options.filterRewrite();	1✔
380	String option = node.getAttribute("filter-rewrite");	1✔
381	if (!option.isEmpty()) {	1!
382	doFilterRewrite = option.equalsIgnoreCase("yes");	×
383	}
384	if (doFilterRewrite) {	1!
385	query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_FILTER_REWRITE);	×
386	} else {
387	query.setRewriteMethod(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT);	1✔
388	}
389	}	1✔
390
391	private BooleanClause.Occur getOccur(Element elem) {
392	BooleanClause.Occur occur = BooleanClause.Occur.SHOULD;	1✔
393	String occurOpt = elem.getAttribute("occur");	1✔
394	if (!occurOpt.isEmpty()) {	1✔
395	occur = switch (occurOpt) {	1!
396	case "must" -> BooleanClause.Occur.MUST;	1✔
397	case "not" -> BooleanClause.Occur.MUST_NOT;	1✔
398	case "should" -> BooleanClause.Occur.SHOULD;	1✔
399	default -> occur;	1✔
400	};
401	}
402	return occur;	1✔
403	}
404
405	private Query parseChildren(String field, Element root, Analyzer analyzer, QueryOptions options) throws XPathException {
406	Query query = null;	1✔
407	Node child = root.getFirstChild();	1✔
408	while (child != null) {	1✔
409	if (child.getNodeType() == Node.ELEMENT_NODE) {	1✔
410	Query childQuery = parse(field, (Element) child, analyzer, options);	1✔
411	if (query != null) {	1✔
412	if (query instanceof BooleanQuery)	1!
413	((BooleanQuery) query).add(childQuery, BooleanClause.Occur.SHOULD);	×
414	else {
415	BooleanQuery boolQuery = new BooleanQuery();	1✔
416	boolQuery.add(query, BooleanClause.Occur.SHOULD);	1✔
417	boolQuery.add(childQuery, BooleanClause.Occur.SHOULD);	1✔
418	query = boolQuery;	1✔
419	}	1✔
420	} else
421	query = childQuery;	1✔
422	}
423	child = child.getNextSibling();	1✔
424	}
425	return query;	1✔
426	}
427
428	private void setBoost(Element node, Query query) throws XPathException {
429	String boost = node.getAttribute("boost");	1✔
430	if (!boost.isEmpty()) {	1!
431	try {
432	query.setBoost(Float.parseFloat(boost));	×
433	} catch (NumberFormatException e) {	×
434	throw new XPathException((Expression) null, "Bad value for boost in query parameter. Got: " + boost);	×
435	}	×
436	}
437	}	1✔
438
439	private String getText(Element root) {
440	final StringBuilder buf = new StringBuilder();	1✔
441	Node child = root.getFirstChild();	1✔
442	while (child != null) {	1✔
443	if (child.getNodeType() == Node.TEXT_NODE) {	1!
444	buf.append(child.getNodeValue());	1✔
445	}
446	child = child.getNextSibling();	1✔
447	}
448	return buf.toString();	1✔
449	}
450
451	private boolean hasElementContent(final Element root) {
452	Node child = root.getFirstChild();	1✔
453	while (child != null) {	1✔
454	if (child.getNodeType() == Node.ELEMENT_NODE) {	1✔
455	return true;	1✔
456	}
457	child = child.getNextSibling();	1✔
458	}
459	return false;	1✔
460	}
461
462	private String getField(Element node, String defaultField) {
463	final String field = node.getAttribute("field");	1✔
464	if (!field.isEmpty()) {	1✔
465	return field;	1✔
466	}
467	return defaultField;	1✔
468	}
469	}

evolvedbinary / elemental / 982

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous