org.opensearch.index.search.SimpleQueryStringQueryParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
Show all versions of opensearch Show documentation
OpenSearch subproject :server
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.index.search;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.simple.SimpleQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostAttribute;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.util.BytesRef;
import org.opensearch.common.lucene.search.Queries;
import org.opensearch.common.unit.Fuzziness;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.query.AbstractQueryBuilder;
import org.opensearch.index.query.MultiMatchQueryBuilder;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.index.query.SimpleQueryStringBuilder;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import static org.opensearch.common.lucene.search.Queries.newUnmappedFieldQuery;
/**
* Wrapper class for Lucene's SimpleQueryStringQueryParser that allows us to redefine
* different types of queries.
*
* @opensearch.internal
*/
public class SimpleQueryStringQueryParser extends SimpleQueryParser {
private final Settings settings;
private QueryShardContext context;
private final MultiMatchQuery queryBuilder;
/** Creates a new parser with custom flags used to enable/disable certain features. */
public SimpleQueryStringQueryParser(Map weights, int flags, Settings settings, QueryShardContext context) {
this(null, weights, flags, settings, context);
}
/** Creates a new parser with custom flags used to enable/disable certain features. */
public SimpleQueryStringQueryParser(
Analyzer analyzer,
Map weights,
int flags,
Settings settings,
QueryShardContext context
) {
super(analyzer, weights, flags);
this.settings = settings;
this.context = context;
this.queryBuilder = new MultiMatchQuery(context);
this.queryBuilder.setAutoGenerateSynonymsPhraseQuery(settings.autoGenerateSynonymsPhraseQuery());
this.queryBuilder.setLenient(settings.lenient());
this.queryBuilder.setZeroTermsQuery(MatchQuery.ZeroTermsQuery.NULL);
if (analyzer != null) {
this.queryBuilder.setAnalyzer(analyzer);
}
}
private Analyzer getAnalyzer(MappedFieldType ft) {
if (getAnalyzer() != null) {
return analyzer;
}
return ft.getTextSearchInfo().getSearchAnalyzer();
}
/**
* Rethrow the runtime exception, unless the lenient flag has been set, returns {@link MatchNoDocsQuery}
*/
private Query rethrowUnlessLenient(RuntimeException e) {
if (settings.lenient()) {
return Queries.newMatchNoDocsQuery("failed query, caused by " + e.getMessage());
}
throw e;
}
@Override
public void setDefaultOperator(BooleanClause.Occur operator) {
super.setDefaultOperator(operator);
queryBuilder.setOccur(operator);
}
@Override
protected Query newTermQuery(Term term, float boost) {
MappedFieldType ft = context.fieldMapper(term.field());
if (ft == null) {
return newUnmappedFieldQuery(term.field());
}
return ft.termQuery(term.bytes(), context);
}
@Override
public Query newDefaultQuery(String text) {
try {
return queryBuilder.parse(MultiMatchQueryBuilder.Type.MOST_FIELDS, weights, text, null);
} catch (IOException e) {
return rethrowUnlessLenient(new IllegalStateException(e.getMessage()));
}
}
@Override
public Query newFuzzyQuery(String text, int fuzziness) {
List disjuncts = new ArrayList<>();
for (Map.Entry entry : weights.entrySet()) {
final String fieldName = entry.getKey();
final MappedFieldType ft = context.fieldMapper(fieldName);
if (ft == null) {
disjuncts.add(newUnmappedFieldQuery(fieldName));
continue;
}
try {
final BytesRef term = getAnalyzer(ft).normalize(fieldName, text);
Query query = ft.fuzzyQuery(
term,
Fuzziness.fromEdits(fuzziness),
settings.fuzzyPrefixLength,
settings.fuzzyMaxExpansions,
settings.fuzzyTranspositions,
context
);
disjuncts.add(wrapWithBoost(query, entry.getValue()));
} catch (RuntimeException e) {
disjuncts.add(rethrowUnlessLenient(e));
}
}
if (disjuncts.size() == 1) {
return disjuncts.get(0);
}
return new DisjunctionMaxQuery(disjuncts, 1.0f);
}
@Override
public Query newPhraseQuery(String text, int slop) {
try {
queryBuilder.setPhraseSlop(slop);
Map phraseWeights;
if (settings.quoteFieldSuffix() != null) {
phraseWeights = QueryParserHelper.resolveMappingFields(context, weights, settings.quoteFieldSuffix());
} else {
phraseWeights = weights;
}
return queryBuilder.parse(MultiMatchQueryBuilder.Type.PHRASE, phraseWeights, text, null);
} catch (IOException e) {
return rethrowUnlessLenient(new IllegalStateException(e.getMessage()));
} finally {
queryBuilder.setPhraseSlop(0);
}
}
@Override
public Query newPrefixQuery(String text) {
List disjuncts = new ArrayList<>();
for (Map.Entry entry : weights.entrySet()) {
final String fieldName = entry.getKey();
final MappedFieldType ft = context.fieldMapper(fieldName);
if (ft == null) {
disjuncts.add(newUnmappedFieldQuery(fieldName));
continue;
}
try {
if (settings.analyzeWildcard()) {
Query analyzedQuery = newPossiblyAnalyzedQuery(fieldName, text, getAnalyzer(ft));
if (analyzedQuery != null) {
disjuncts.add(wrapWithBoost(analyzedQuery, entry.getValue()));
}
} else {
BytesRef term = getAnalyzer(ft).normalize(fieldName, text);
Query query = ft.prefixQuery(term.utf8ToString(), null, context);
disjuncts.add(wrapWithBoost(query, entry.getValue()));
}
} catch (RuntimeException e) {
disjuncts.add(rethrowUnlessLenient(e));
}
}
if (disjuncts.size() == 1) {
return disjuncts.get(0);
}
return new DisjunctionMaxQuery(disjuncts, 1.0f);
}
private static Query wrapWithBoost(Query query, float boost) {
if (query instanceof MatchNoDocsQuery) {
return query;
}
if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
return new BoostQuery(query, boost);
}
return query;
}
/**
* Analyze the given string using its analyzer, constructing either a
* {@code PrefixQuery} or a {@code BooleanQuery} made up
* of {@code TermQuery}s and {@code PrefixQuery}s
*/
private Query newPossiblyAnalyzedQuery(String field, String termStr, Analyzer analyzer) {
List> tlist = new ArrayList<>();
try (TokenStream source = analyzer.tokenStream(field, termStr)) {
source.reset();
List currentPos = new ArrayList<>();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
PositionIncrementAttribute posAtt = source.addAttribute(PositionIncrementAttribute.class);
try {
boolean hasMoreTokens = source.incrementToken();
while (hasMoreTokens) {
if (currentPos.isEmpty() == false && posAtt.getPositionIncrement() > 0) {
tlist.add(currentPos);
currentPos = new ArrayList<>();
}
final BytesRef term = analyzer.normalize(field, termAtt.toString());
currentPos.add(term);
hasMoreTokens = source.incrementToken();
}
if (currentPos.isEmpty() == false) {
tlist.add(currentPos);
}
} catch (IOException e) {
// ignore
// TODO: we should not ignore the exception and return a prefix query with the original term ?
}
} catch (IOException e) {
// Bail on any exceptions, going with a regular prefix query
return new PrefixQuery(new Term(field, termStr));
}
if (tlist.size() == 0) {
return null;
}
if (tlist.size() == 1 && tlist.get(0).size() == 1) {
return new PrefixQuery(new Term(field, tlist.get(0).get(0)));
}
// build a boolean query with prefix on the last position only.
BooleanQuery.Builder builder = new BooleanQuery.Builder();
for (int pos = 0; pos < tlist.size(); pos++) {
List plist = tlist.get(pos);
boolean isLastPos = (pos == tlist.size() - 1);
Query posQuery;
if (plist.size() == 1) {
if (isLastPos) {
posQuery = new PrefixQuery(new Term(field, plist.get(0)));
} else {
posQuery = newTermQuery(new Term(field, plist.get(0)), BoostAttribute.DEFAULT_BOOST);
}
} else if (isLastPos == false) {
// build a synonym query for terms in the same position.
SynonymQuery.Builder sb = new SynonymQuery.Builder(field);
for (BytesRef bytesRef : plist) {
sb.addTerm(new Term(field, bytesRef));
}
posQuery = sb.build();
} else {
BooleanQuery.Builder innerBuilder = new BooleanQuery.Builder();
for (BytesRef token : plist) {
innerBuilder.add(new BooleanClause(new PrefixQuery(new Term(field, token)), BooleanClause.Occur.SHOULD));
}
posQuery = innerBuilder.build();
}
builder.add(new BooleanClause(posQuery, getDefaultOperator()));
}
return builder.build();
}
/**
* Class encapsulating the settings for the SimpleQueryString query, with
* their default values
*
* @opensearch.internal
*/
public static class Settings {
/** Specifies whether lenient query parsing should be used. */
private boolean lenient = SimpleQueryStringBuilder.DEFAULT_LENIENT;
/** Specifies whether wildcards should be analyzed. */
private boolean analyzeWildcard = SimpleQueryStringBuilder.DEFAULT_ANALYZE_WILDCARD;
/** Specifies a suffix, if any, to apply to field names for phrase matching. */
private String quoteFieldSuffix = null;
/** Whether phrase queries should be automatically generated for multi terms synonyms. */
private boolean autoGenerateSynonymsPhraseQuery = true;
/** Prefix length in fuzzy queries.*/
private int fuzzyPrefixLength = SimpleQueryStringBuilder.DEFAULT_FUZZY_PREFIX_LENGTH;
/** The number of terms fuzzy queries will expand to.*/
private int fuzzyMaxExpansions = SimpleQueryStringBuilder.DEFAULT_FUZZY_MAX_EXPANSIONS;
/** Whether transpositions are supported in fuzzy queries.*/
private boolean fuzzyTranspositions = SimpleQueryStringBuilder.DEFAULT_FUZZY_TRANSPOSITIONS;
/**
* Generates default {@link Settings} object (uses ROOT locale, does
* lowercase terms, no lenient parsing, no wildcard analysis).
* */
public Settings() {}
public Settings(Settings other) {
this.lenient = other.lenient;
this.analyzeWildcard = other.analyzeWildcard;
this.quoteFieldSuffix = other.quoteFieldSuffix;
this.autoGenerateSynonymsPhraseQuery = other.autoGenerateSynonymsPhraseQuery;
this.fuzzyPrefixLength = other.fuzzyPrefixLength;
this.fuzzyMaxExpansions = other.fuzzyMaxExpansions;
this.fuzzyTranspositions = other.fuzzyTranspositions;
}
/** Specifies whether to use lenient parsing, defaults to false. */
public void lenient(boolean lenient) {
this.lenient = lenient;
}
/** Returns whether to use lenient parsing. */
public boolean lenient() {
return this.lenient;
}
/** Specifies whether to analyze wildcards. Defaults to false if unset. */
public void analyzeWildcard(boolean analyzeWildcard) {
this.analyzeWildcard = analyzeWildcard;
}
/** Returns whether to analyze wildcards. */
public boolean analyzeWildcard() {
return analyzeWildcard;
}
/**
* Set the suffix to append to field names for phrase matching.
*/
public void quoteFieldSuffix(String suffix) {
this.quoteFieldSuffix = suffix;
}
/**
* Return the suffix to append for phrase matching, or {@code null} if
* no suffix should be appended.
*/
public String quoteFieldSuffix() {
return quoteFieldSuffix;
}
public void autoGenerateSynonymsPhraseQuery(boolean value) {
this.autoGenerateSynonymsPhraseQuery = value;
}
/**
* Whether phrase queries should be automatically generated for multi terms synonyms.
* Defaults to {@code true}.
*/
public boolean autoGenerateSynonymsPhraseQuery() {
return autoGenerateSynonymsPhraseQuery;
}
public int fuzzyPrefixLength() {
return fuzzyPrefixLength;
}
public void fuzzyPrefixLength(int fuzzyPrefixLength) {
this.fuzzyPrefixLength = fuzzyPrefixLength;
}
public int fuzzyMaxExpansions() {
return fuzzyMaxExpansions;
}
public void fuzzyMaxExpansions(int fuzzyMaxExpansions) {
this.fuzzyMaxExpansions = fuzzyMaxExpansions;
}
public boolean fuzzyTranspositions() {
return fuzzyTranspositions;
}
public void fuzzyTranspositions(boolean fuzzyTranspositions) {
this.fuzzyTranspositions = fuzzyTranspositions;
}
@Override
public int hashCode() {
return Objects.hash(
lenient,
analyzeWildcard,
quoteFieldSuffix,
autoGenerateSynonymsPhraseQuery,
fuzzyPrefixLength,
fuzzyMaxExpansions,
fuzzyTranspositions
);
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || getClass() != obj.getClass()) {
return false;
}
Settings other = (Settings) obj;
return Objects.equals(lenient, other.lenient)
&& Objects.equals(analyzeWildcard, other.analyzeWildcard)
&& Objects.equals(quoteFieldSuffix, other.quoteFieldSuffix)
&& Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery)
&& Objects.equals(fuzzyPrefixLength, other.fuzzyPrefixLength)
&& Objects.equals(fuzzyMaxExpansions, other.fuzzyMaxExpansions)
&& Objects.equals(fuzzyTranspositions, other.fuzzyTranspositions);
}
}
}