com.yelp.nrtsearch.server.luceneserver.QueryNodeMapper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of server Show documentation
Show all versions of server Show documentation
GRPC lucene server using near-real-time replication
/*
* Copyright 2020 Yelp Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.yelp.nrtsearch.server.luceneserver;
import static com.yelp.nrtsearch.server.luceneserver.analysis.AnalyzerCreator.isAnalyzerDefined;
import static com.yelp.nrtsearch.server.utils.QueryUtils.computeMaxEditsFromTermLength;
import com.yelp.nrtsearch.server.grpc.*;
import com.yelp.nrtsearch.server.grpc.MultiMatchQuery.MatchType;
import com.yelp.nrtsearch.server.luceneserver.analysis.AnalyzerCreator;
import com.yelp.nrtsearch.server.luceneserver.doc.DocLookup;
import com.yelp.nrtsearch.server.luceneserver.field.FieldDef;
import com.yelp.nrtsearch.server.luceneserver.field.IndexableFieldDef;
import com.yelp.nrtsearch.server.luceneserver.field.TextBaseFieldDef;
import com.yelp.nrtsearch.server.luceneserver.field.properties.GeoQueryable;
import com.yelp.nrtsearch.server.luceneserver.field.properties.PolygonQueryable;
import com.yelp.nrtsearch.server.luceneserver.field.properties.RangeQueryable;
import com.yelp.nrtsearch.server.luceneserver.field.properties.TermQueryable;
import com.yelp.nrtsearch.server.luceneserver.script.ScoreScript;
import com.yelp.nrtsearch.server.luceneserver.script.ScriptService;
import com.yelp.nrtsearch.server.luceneserver.search.query.MatchCrossFieldsQuery;
import com.yelp.nrtsearch.server.luceneserver.search.query.MatchPhrasePrefixQuery;
import com.yelp.nrtsearch.server.luceneserver.search.query.multifunction.MultiFunctionScoreQuery;
import com.yelp.nrtsearch.server.utils.ScriptParamsUtils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.EnumMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.FunctionMatchQuery;
import org.apache.lucene.queries.function.FunctionScoreQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RegexpQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.search.join.QueryBitSetProducer;
import org.apache.lucene.search.join.ScoreMode;
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.suggest.document.CompletionQuery;
import org.apache.lucene.search.suggest.document.FuzzyCompletionQuery;
import org.apache.lucene.search.suggest.document.MyContextQuery;
import org.apache.lucene.search.suggest.document.PrefixCompletionQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;
/** This class maps our GRPC Query object to a Lucene Query object. */
public class QueryNodeMapper {
private static final QueryNodeMapper INSTANCE = new QueryNodeMapper();
public static QueryNodeMapper getInstance() {
return INSTANCE;
}
private final Map
occurMapping = initializeOccurMapping();
private final Map matchOperatorOccurMapping =
new EnumMap<>(
Map.of(
MatchOperator.SHOULD, BooleanClause.Occur.SHOULD,
MatchOperator.MUST, BooleanClause.Occur.MUST));
public Query getQuery(com.yelp.nrtsearch.server.grpc.Query query, IndexState state) {
return getQuery(query, state, state.docLookup);
}
public Query getQuery(
com.yelp.nrtsearch.server.grpc.Query query, IndexState state, DocLookup docLookup) {
Query queryNode = getQueryNode(query, state, docLookup);
if (query.getBoost() < 0) {
throw new IllegalArgumentException("Boost must be a positive number");
}
if (query.getBoost() > 0) {
return new BoostQuery(queryNode, query.getBoost());
}
return queryNode;
}
public Query applyQueryNestedPath(Query query, IndexState indexState, String path) {
if (path == null || path.isEmpty()) {
return query;
}
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(getNestedPathQuery(indexState, path), BooleanClause.Occur.FILTER);
builder.add(query, BooleanClause.Occur.MUST);
return builder.build();
}
/*
* create the query to filter the parent/child document based on the path
* */
public Query getNestedPathQuery(IndexState indexState, String path) {
return new TermQuery(new Term(IndexState.NESTED_PATH, indexState.resolveQueryNestedPath(path)));
}
private Query getQueryNode(
com.yelp.nrtsearch.server.grpc.Query query, IndexState state, DocLookup docLookup) {
switch (query.getQueryNodeCase()) {
case BOOLEANQUERY:
return getBooleanQuery(query.getBooleanQuery(), state, docLookup);
case PHRASEQUERY:
return getPhraseQuery(query.getPhraseQuery());
case FUNCTIONSCOREQUERY:
return getFunctionScoreQuery(query.getFunctionScoreQuery(), state, docLookup);
case TERMQUERY:
return getTermQuery(query.getTermQuery(), state);
case TERMINSETQUERY:
return getTermInSetQuery(query.getTermInSetQuery(), state);
case DISJUNCTIONMAXQUERY:
return getDisjunctionMaxQuery(query.getDisjunctionMaxQuery(), state, docLookup);
case MATCHQUERY:
return getMatchQuery(query.getMatchQuery(), state);
case MATCHPHRASEQUERY:
return getMatchPhraseQuery(query.getMatchPhraseQuery(), state);
case MULTIMATCHQUERY:
return getMultiMatchQuery(query.getMultiMatchQuery(), state);
case RANGEQUERY:
return getRangeQuery(query.getRangeQuery(), state);
case GEOBOUNDINGBOXQUERY:
return getGeoBoundingBoxQuery(query.getGeoBoundingBoxQuery(), state);
case GEOPOINTQUERY:
return getGeoPointQuery(query.getGeoPointQuery(), state);
case NESTEDQUERY:
return getNestedQuery(query.getNestedQuery(), state, docLookup);
case EXISTSQUERY:
return getExistsQuery(query.getExistsQuery(), state);
case GEORADIUSQUERY:
return getGeoRadiusQuery(query.getGeoRadiusQuery(), state);
case FUNCTIONFILTERQUERY:
return getFunctionFilterQuery(query.getFunctionFilterQuery(), state);
case COMPLETIONQUERY:
return getCompletionQuery(query.getCompletionQuery(), state);
case MULTIFUNCTIONSCOREQUERY:
return MultiFunctionScoreQuery.build(query.getMultiFunctionScoreQuery(), state);
case MATCHPHRASEPREFIXQUERY:
return MatchPhrasePrefixQuery.build(query.getMatchPhrasePrefixQuery(), state);
case PREFIXQUERY:
return getPrefixQuery(query.getPrefixQuery(), state);
case CONSTANTSCOREQUERY:
return getConstantScoreQuery(query.getConstantScoreQuery(), state, docLookup);
case SPANQUERY:
return getSpanQuery(query.getSpanQuery(), state);
case GEOPOLYGONQUERY:
return getGeoPolygonQuery(query.getGeoPolygonQuery(), state);
case QUERYNODE_NOT_SET:
return new MatchAllDocsQuery();
default:
throw new UnsupportedOperationException(
"Unsupported query type received: " + query.getQueryNodeCase());
}
}
private Query getCompletionQuery(
com.yelp.nrtsearch.server.grpc.CompletionQuery completionQueryDef, IndexState state) {
CompletionQuery completionQuery;
switch (completionQueryDef.getQueryType()) {
case PREFIX_QUERY:
completionQuery =
new PrefixCompletionQuery(
state.searchAnalyzer,
new Term(completionQueryDef.getField(), completionQueryDef.getText()));
break;
case FUZZY_QUERY:
completionQuery =
new FuzzyCompletionQuery(
state.searchAnalyzer,
new Term(completionQueryDef.getField(), completionQueryDef.getText()));
break;
default:
throw new UnsupportedOperationException(
"Unsupported suggest query type received: " + completionQueryDef.getQueryType());
}
MyContextQuery contextQuery = new MyContextQuery(completionQuery);
contextQuery.addContexts(completionQueryDef.getContextsList());
return contextQuery;
}
private Query getNestedQuery(
com.yelp.nrtsearch.server.grpc.NestedQuery nestedQuery,
IndexState state,
DocLookup docLookup) {
Query childRawQuery = getQuery(nestedQuery.getQuery(), state, docLookup);
Query childQuery =
new BooleanQuery.Builder()
.add(getNestedPathQuery(state, nestedQuery.getPath()), BooleanClause.Occur.FILTER)
.add(childRawQuery, BooleanClause.Occur.MUST)
.build();
Query parentQuery = getNestedPathQuery(state, IndexState.ROOT);
return new ToParentBlockJoinQuery(
childQuery, new QueryBitSetProducer(parentQuery), getScoreMode(nestedQuery));
}
private ScoreMode getScoreMode(com.yelp.nrtsearch.server.grpc.NestedQuery nestedQuery) {
switch (nestedQuery.getScoreMode()) {
case NONE:
return ScoreMode.None;
case AVG:
return ScoreMode.Avg;
case MAX:
return ScoreMode.Max;
case MIN:
return ScoreMode.Min;
case SUM:
return ScoreMode.Total;
default:
throw new UnsupportedOperationException(
"Unsupported score mode received: " + nestedQuery.getScoreMode());
}
}
private BooleanQuery getBooleanQuery(
com.yelp.nrtsearch.server.grpc.BooleanQuery booleanQuery,
IndexState state,
DocLookup docLookup) {
BooleanQuery.Builder builder =
new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(booleanQuery.getMinimumNumberShouldMatch());
if (booleanQuery.getClausesCount() == 0) {
return builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.MUST).build();
}
AtomicBoolean allMustNot = new AtomicBoolean(true);
booleanQuery
.getClausesList()
.forEach(
clause -> {
com.yelp.nrtsearch.server.grpc.BooleanClause.Occur occur = clause.getOccur();
builder.add(getQuery(clause.getQuery(), state, docLookup), occurMapping.get(occur));
if (occur != com.yelp.nrtsearch.server.grpc.BooleanClause.Occur.MUST_NOT) {
allMustNot.set(false);
}
});
if (allMustNot.get()) {
builder.add(new MatchAllDocsQuery(), BooleanClause.Occur.FILTER);
}
return builder.build();
}
private PhraseQuery getPhraseQuery(com.yelp.nrtsearch.server.grpc.PhraseQuery phraseQuery) {
PhraseQuery.Builder builder = new PhraseQuery.Builder().setSlop(phraseQuery.getSlop());
phraseQuery.getTermsList().forEach(term -> builder.add(new Term(phraseQuery.getField(), term)));
return builder.build();
}
private FunctionScoreQuery getFunctionScoreQuery(
com.yelp.nrtsearch.server.grpc.FunctionScoreQuery functionScoreQuery,
IndexState state,
DocLookup docLookup) {
ScoreScript.Factory scriptFactory =
ScriptService.getInstance().compile(functionScoreQuery.getScript(), ScoreScript.CONTEXT);
Map params =
ScriptParamsUtils.decodeParams(functionScoreQuery.getScript().getParamsMap());
return new FunctionScoreQuery(
getQuery(functionScoreQuery.getQuery(), state, docLookup),
scriptFactory.newFactory(params, docLookup));
}
private FunctionMatchQuery getFunctionFilterQuery(
FunctionFilterQuery functionFilterQuery, IndexState state) {
ScoreScript.Factory scriptFactory =
ScriptService.getInstance().compile(functionFilterQuery.getScript(), ScoreScript.CONTEXT);
Map params =
ScriptParamsUtils.decodeParams(functionFilterQuery.getScript().getParamsMap());
return new FunctionMatchQuery(
scriptFactory.newFactory(params, state.docLookup), score -> score > 0);
}
private Query getTermQuery(com.yelp.nrtsearch.server.grpc.TermQuery termQuery, IndexState state) {
String fieldName = termQuery.getField();
FieldDef fieldDef = state.getField(fieldName);
if (fieldDef instanceof TermQueryable) {
validateTermQueryIsSearchable(fieldDef);
return ((TermQueryable) fieldDef).getTermQuery(termQuery);
}
String message =
"Unable to create TermQuery: %s, field type: %s is not supported for TermQuery";
throw new IllegalArgumentException(String.format(message, termQuery, fieldDef.getType()));
}
private void validateTermQueryIsSearchable(FieldDef fieldDef) {
if (fieldDef instanceof IndexableFieldDef && !((IndexableFieldDef) fieldDef).isSearchable()) {
throw new IllegalStateException(
"Field "
+ fieldDef.getName()
+ " is not searchable, which is required for TermQuery / TermInSetQuery");
}
}
private Query getTermInSetQuery(
com.yelp.nrtsearch.server.grpc.TermInSetQuery termInSetQuery, IndexState state) {
String fieldName = termInSetQuery.getField();
FieldDef fieldDef = state.getField(fieldName);
if (fieldDef instanceof TermQueryable) {
validateTermQueryIsSearchable(fieldDef);
return ((TermQueryable) fieldDef).getTermInSetQuery(termInSetQuery);
}
String message =
"Unable to create TermInSetQuery: %s, field type: %s is not supported for TermInSetQuery";
throw new IllegalArgumentException(String.format(message, termInSetQuery, fieldDef.getType()));
}
private DisjunctionMaxQuery getDisjunctionMaxQuery(
com.yelp.nrtsearch.server.grpc.DisjunctionMaxQuery disjunctionMaxQuery,
IndexState state,
DocLookup docLookup) {
List disjuncts =
disjunctionMaxQuery.getDisjunctsList().stream()
.map(query -> getQuery(query, state, docLookup))
.collect(Collectors.toList());
return new DisjunctionMaxQuery(disjuncts, disjunctionMaxQuery.getTieBreakerMultiplier());
}
private Query getMatchQuery(MatchQuery matchQuery, IndexState state) {
Analyzer analyzer =
isAnalyzerDefined(matchQuery.getAnalyzer())
? AnalyzerCreator.getInstance().getAnalyzer(matchQuery.getAnalyzer())
: state.searchAnalyzer;
QueryBuilder queryBuilder = new MatchQueryBuilder(analyzer, matchQuery.getFuzzyParams());
// This created query will be TermQuery or FuzzyQuery if only one token is found after analysis,
// otherwise BooleanQuery. The BooleanQuery may include clauses with TermQuery or FuzzyQuery.
Query query =
queryBuilder.createBooleanQuery(
matchQuery.getField(),
matchQuery.getQuery(),
matchOperatorOccurMapping.get(matchQuery.getOperator()));
// This can happen if there are no tokens found after analyzing the query text
if (query == null) {
return new MatchNoDocsQuery();
}
// TODO: investigate using createMinShouldMatchQuery instead
if (matchQuery.getMinimumNumberShouldMatch() == 0
|| query instanceof TermQuery
|| query instanceof FuzzyQuery) {
return query;
}
BooleanQuery.Builder builder =
new BooleanQuery.Builder()
.setMinimumNumberShouldMatch(matchQuery.getMinimumNumberShouldMatch());
((BooleanQuery) query).clauses().forEach(builder::add);
return builder.build();
}
private Query getMatchPhraseQuery(MatchPhraseQuery matchPhraseQuery, IndexState state) {
Analyzer analyzer =
isAnalyzerDefined(matchPhraseQuery.getAnalyzer())
? AnalyzerCreator.getInstance().getAnalyzer(matchPhraseQuery.getAnalyzer())
: state.searchAnalyzer;
QueryBuilder queryBuilder = new QueryBuilder(analyzer);
// This created query will be TermQuery if only one token is found after analysis, otherwise
// PhraseQuery
Query phraseQuery =
queryBuilder.createPhraseQuery(
matchPhraseQuery.getField(), matchPhraseQuery.getQuery(), matchPhraseQuery.getSlop());
// This can happen if there are no tokens found after analyzing the query text
if (phraseQuery == null) {
MatchPhraseQuery.ZeroTerms zeroTermsQuery = matchPhraseQuery.getZeroTermsQuery();
switch (zeroTermsQuery) {
case NONE_ZERO_TERMS -> {
return new MatchNoDocsQuery();
}
case ALL_ZERO_TERMS -> {
return new MatchAllDocsQuery();
}
default -> throw new IllegalArgumentException(
zeroTermsQuery
+ " not valid. ZeroTermsQuery should be NONE_ZERO_TERMS or ALL_ZERO_TERMS");
}
}
return phraseQuery;
}
private Query getMultiMatchQuery(MultiMatchQuery multiMatchQuery, IndexState state) {
Map fieldBoosts = multiMatchQuery.getFieldBoostsMap();
Collection fields;
// Take all fields if none are provided
if (multiMatchQuery.getFieldsList().isEmpty()) {
fields = state.getAllFields().keySet();
} else {
fields = multiMatchQuery.getFieldsList();
}
if (multiMatchQuery.getType() == MatchType.CROSS_FIELDS) {
return getMultiMatchCrossFieldsQuery(fields, multiMatchQuery, state);
}
List matchQueries =
fields.stream()
.map(
field -> {
Query query;
switch (multiMatchQuery.getType()) {
case BEST_FIELDS:
MatchQuery matchQuery =
MatchQuery.newBuilder()
.setField(field)
.setQuery(multiMatchQuery.getQuery())
.setOperator(multiMatchQuery.getOperator())
.setMinimumNumberShouldMatch(
multiMatchQuery.getMinimumNumberShouldMatch())
.setAnalyzer(
multiMatchQuery
.getAnalyzer()) // TODO: making the analyzer once and using it
// for
// all match queries would be more efficient
.setFuzzyParams(multiMatchQuery.getFuzzyParams())
.build();
query = getMatchQuery(matchQuery, state);
break;
case PHRASE_PREFIX:
query =
MatchPhrasePrefixQuery.build(
com.yelp.nrtsearch.server.grpc.MatchPhrasePrefixQuery.newBuilder()
.setField(field)
.setQuery(multiMatchQuery.getQuery())
.setAnalyzer(multiMatchQuery.getAnalyzer())
.setSlop(multiMatchQuery.getSlop())
.setMaxExpansions(multiMatchQuery.getMaxExpansions())
.build(),
state);
break;
default:
throw new IllegalArgumentException(
"Unknown multi match type: " + multiMatchQuery.getType());
}
Float boost = fieldBoosts.get(field);
if (boost != null) {
if (boost < 0) {
throw new IllegalArgumentException(
String.format(
"Invalid boost %f for field: %s, query: %s",
boost, field, multiMatchQuery));
}
return new BoostQuery(query, boost);
} else {
return query;
}
})
.collect(Collectors.toList());
return new DisjunctionMaxQuery(matchQueries, 0);
}
private Query getMultiMatchCrossFieldsQuery(
Collection fields, MultiMatchQuery multiMatchQuery, IndexState state) {
Analyzer analyzer = null;
for (String field : fields) {
FieldDef fieldDef = state.getField(field);
if (!(fieldDef instanceof TextBaseFieldDef)) {
throw new IllegalArgumentException("Field must be analyzable: " + field);
}
TextBaseFieldDef textBaseFieldDef = (TextBaseFieldDef) fieldDef;
if (!textBaseFieldDef.isSearchable()) {
throw new IllegalArgumentException("Field must be searchable: " + field);
}
if (analyzer == null) {
analyzer =
multiMatchQuery.hasAnalyzer()
? AnalyzerCreator.getInstance().getAnalyzer(multiMatchQuery.getAnalyzer())
: textBaseFieldDef.getSearchAnalyzer().orElse(null);
}
}
if (analyzer == null) {
throw new IllegalArgumentException("Could not determine analyzer for query");
}
return MatchCrossFieldsQuery.build(
multiMatchQuery.getQuery(),
new ArrayList<>(fields),
multiMatchQuery.getFieldBoostsMap(),
multiMatchQuery.getOperator(),
multiMatchQuery.getMinimumNumberShouldMatch(),
multiMatchQuery.getTieBreakerMultiplier(),
analyzer);
}
private Query getRangeQuery(RangeQuery rangeQuery, IndexState state) {
String fieldName = rangeQuery.getField();
FieldDef field = state.getField(fieldName);
if (!(field instanceof RangeQueryable)) {
throw new IllegalArgumentException("Field: " + fieldName + " does not support RangeQuery");
}
return ((RangeQueryable) field).getRangeQuery(rangeQuery);
}
private Query getGeoBoundingBoxQuery(GeoBoundingBoxQuery geoBoundingBoxQuery, IndexState state) {
String fieldName = geoBoundingBoxQuery.getField();
FieldDef field = state.getField(fieldName);
if (!(field instanceof GeoQueryable)) {
throw new IllegalArgumentException(
"Field: " + fieldName + " does not support GeoBoundingBoxQuery");
}
return ((GeoQueryable) field).getGeoBoundingBoxQuery(geoBoundingBoxQuery);
}
private Query getGeoRadiusQuery(GeoRadiusQuery geoRadiusQuery, IndexState state) {
String fieldName = geoRadiusQuery.getField();
FieldDef field = state.getField(fieldName);
if (!(field instanceof GeoQueryable)) {
throw new IllegalArgumentException(
"Field: " + fieldName + " does not support GeoRadiusQuery");
}
return ((GeoQueryable) field).getGeoRadiusQuery(geoRadiusQuery);
}
private Query getGeoPointQuery(GeoPointQuery geoPolygonQuery, IndexState state) {
String fieldName = geoPolygonQuery.getField();
FieldDef field = state.getField(fieldName);
if (!(field instanceof PolygonQueryable)) {
throw new IllegalArgumentException("Field " + fieldName + "does not support GeoPolygonQuery");
}
return ((PolygonQueryable) field).getGeoPointQuery(geoPolygonQuery);
}
private Query getGeoPolygonQuery(GeoPolygonQuery geoPolygonQuery, IndexState state) {
String fieldName = geoPolygonQuery.getField();
FieldDef field = state.getField(fieldName);
if (!(field instanceof GeoQueryable)) {
throw new IllegalArgumentException(
"Field " + fieldName + " does not support GeoPolygonQuery");
}
return ((GeoQueryable) field).getGeoPolygonQuery(geoPolygonQuery);
}
private Map
initializeOccurMapping() {
return Arrays.stream(com.yelp.nrtsearch.server.grpc.BooleanClause.Occur.values())
.filter(v -> v != com.yelp.nrtsearch.server.grpc.BooleanClause.Occur.UNRECOGNIZED)
.collect(
() -> new EnumMap<>(com.yelp.nrtsearch.server.grpc.BooleanClause.Occur.class),
(map, v) -> map.put(v, BooleanClause.Occur.valueOf(v.name())),
EnumMap::putAll);
}
private Query getExistsQuery(ExistsQuery existsQuery, IndexState state) {
String fieldName = existsQuery.getField();
return new ConstantScoreQuery(new TermQuery(new Term(IndexState.FIELD_NAMES, fieldName)));
}
private static Query getPrefixQuery(PrefixQuery prefixQuery, IndexState state) {
FieldDef fieldDef = state.getField(prefixQuery.getField());
if (!(fieldDef instanceof IndexableFieldDef)) {
throw new IllegalArgumentException(
"Field \"" + prefixQuery.getPrefix() + "\" is not indexable");
}
IndexOptions indexOptions = ((IndexableFieldDef) fieldDef).getFieldType().indexOptions();
if (indexOptions == IndexOptions.NONE) {
throw new IllegalArgumentException(
"Field \"" + prefixQuery.getField() + "\" is not indexed with terms");
}
org.apache.lucene.search.PrefixQuery query =
new org.apache.lucene.search.PrefixQuery(
new Term(prefixQuery.getField(), prefixQuery.getPrefix()));
query.setRewriteMethod(
getRewriteMethod(prefixQuery.getRewrite(), prefixQuery.getRewriteTopTermsSize()));
return query;
}
private static MultiTermQuery.RewriteMethod getRewriteMethod(
RewriteMethod rewriteMethodGrpc, int topTermsSize) {
switch (rewriteMethodGrpc) {
case CONSTANT_SCORE:
return MultiTermQuery.CONSTANT_SCORE_REWRITE;
case CONSTANT_SCORE_BOOLEAN:
return MultiTermQuery.CONSTANT_SCORE_BOOLEAN_REWRITE;
case SCORING_BOOLEAN:
return MultiTermQuery.SCORING_BOOLEAN_REWRITE;
case TOP_TERMS_BLENDED_FREQS:
return new MultiTermQuery.TopTermsBlendedFreqScoringRewrite(topTermsSize);
case TOP_TERMS_BOOST:
return new MultiTermQuery.TopTermsBoostOnlyBooleanQueryRewrite(topTermsSize);
case TOP_TERMS:
return new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(topTermsSize);
default:
throw new IllegalArgumentException("Unknown rewrite method: " + rewriteMethodGrpc);
}
}
private Query getConstantScoreQuery(
com.yelp.nrtsearch.server.grpc.ConstantScoreQuery constantScoreQueryGrpc,
IndexState indexState,
DocLookup docLookup) {
Query filterQuery = getQuery(constantScoreQueryGrpc.getFilter(), indexState, docLookup);
return new ConstantScoreQuery(filterQuery);
}
private SpanQuery getSpanQuery(
com.yelp.nrtsearch.server.grpc.SpanQuery protoSpanQuery, IndexState state) {
List clauses = new ArrayList<>();
com.yelp.nrtsearch.server.grpc.SpanQuery.QueryCase queryCase = protoSpanQuery.getQueryCase();
switch (queryCase) {
case SPANNEARQUERY:
com.yelp.nrtsearch.server.grpc.SpanNearQuery protoSpanNearQuery =
protoSpanQuery.getSpanNearQuery();
for (com.yelp.nrtsearch.server.grpc.SpanQuery protoClause :
protoSpanNearQuery.getClausesList()) {
SpanQuery luceneClause = getSpanQuery(protoClause, state);
clauses.add(luceneClause);
}
SpanQuery[] clausesArray = clauses.toArray(new SpanQuery[0]);
int slop = protoSpanNearQuery.getSlop();
boolean inOrder = protoSpanNearQuery.getInOrder();
return new SpanNearQuery(clausesArray, slop, inOrder);
case SPANTERMQUERY:
com.yelp.nrtsearch.server.grpc.TermQuery protoSpanTermQuery =
protoSpanQuery.getSpanTermQuery();
return new SpanTermQuery(
new Term(protoSpanTermQuery.getField(), protoSpanTermQuery.getTextValue()));
case SPANMULTITERMQUERY:
com.yelp.nrtsearch.server.grpc.SpanMultiTermQuery protoSpanMultiTermQuery =
protoSpanQuery.getSpanMultiTermQuery();
return getSpanMultiTermQueryWrapper(protoSpanMultiTermQuery, state);
default:
throw new IllegalArgumentException("Unsupported Span Query: " + protoSpanQuery);
}
}
private SpanMultiTermQueryWrapper getSpanMultiTermQueryWrapper(
com.yelp.nrtsearch.server.grpc.SpanMultiTermQuery protoSpanMultiTermQuery, IndexState state) {
com.yelp.nrtsearch.server.grpc.SpanMultiTermQuery.WrappedQueryCase wrappedQueryCase =
protoSpanMultiTermQuery.getWrappedQueryCase();
switch (wrappedQueryCase) {
case WILDCARDQUERY:
WildcardQuery wildcardQuery =
new WildcardQuery(
new Term(
protoSpanMultiTermQuery.getWildcardQuery().getField(),
protoSpanMultiTermQuery.getWildcardQuery().getText()));
wildcardQuery.setRewriteMethod(
getRewriteMethod(
protoSpanMultiTermQuery.getWildcardQuery().getRewrite(),
protoSpanMultiTermQuery.getWildcardQuery().getRewriteTopTermsSize()));
return new SpanMultiTermQueryWrapper<>(wildcardQuery);
case FUZZYQUERY:
FuzzyQuery fuzzyQuery = getFuzzyQuery(protoSpanMultiTermQuery);
return new SpanMultiTermQueryWrapper<>(fuzzyQuery);
case PREFIXQUERY:
Query prefixQuery = getPrefixQuery(protoSpanMultiTermQuery.getPrefixQuery(), state);
return new SpanMultiTermQueryWrapper<>((MultiTermQuery) prefixQuery);
case REGEXPQUERY:
RegexpQuery regexpQuery = getRegexpQuery(protoSpanMultiTermQuery);
return new SpanMultiTermQueryWrapper<>(regexpQuery);
case TERMRANGEQUERY:
TermRangeQuery termRangeQuery =
getTermRangeQuery(protoSpanMultiTermQuery.getTermRangeQuery());
return new SpanMultiTermQueryWrapper<>(termRangeQuery);
default:
throw new IllegalArgumentException(
"Unsupported Span Multi Query Term Wrapper: " + protoSpanMultiTermQuery);
}
}
private static FuzzyQuery getFuzzyQuery(
com.yelp.nrtsearch.server.grpc.SpanMultiTermQuery protoSpanMultiTermQuery) {
com.yelp.nrtsearch.server.grpc.FuzzyQuery protoFuzzyQuery =
protoSpanMultiTermQuery.getFuzzyQuery();
Term term = new Term(protoFuzzyQuery.getField(), protoFuzzyQuery.getText());
int maxEdits = FuzzyQuery.defaultMaxEdits;
if (protoFuzzyQuery.hasAuto()) {
maxEdits = computeMaxEditsFromTermLength(term, protoFuzzyQuery.getAuto());
} else {
if (protoFuzzyQuery.hasMaxEdits()) {
maxEdits = protoFuzzyQuery.getMaxEdits();
}
}
int prefixLength =
protoFuzzyQuery.hasPrefixLength()
? protoFuzzyQuery.getPrefixLength()
: FuzzyQuery.defaultPrefixLength;
int maxExpansions =
protoFuzzyQuery.hasMaxExpansions()
? protoFuzzyQuery.getMaxExpansions()
: FuzzyQuery.defaultMaxExpansions;
// Set the default transpositions to true, if it is not provided.
boolean transpositions =
protoFuzzyQuery.hasTranspositions()
? protoFuzzyQuery.getTranspositions()
: FuzzyQuery.defaultTranspositions;
FuzzyQuery fuzzyQuery =
new FuzzyQuery(term, maxEdits, prefixLength, maxExpansions, transpositions);
fuzzyQuery.setRewriteMethod(
getRewriteMethod(protoFuzzyQuery.getRewrite(), protoFuzzyQuery.getRewriteTopTermsSize()));
return fuzzyQuery;
}
private static RegexpQuery getRegexpQuery(
com.yelp.nrtsearch.server.grpc.SpanMultiTermQuery protoSpanMultiTermQuery) {
com.yelp.nrtsearch.server.grpc.RegexpQuery protoRegexpQuery =
protoSpanMultiTermQuery.getRegexpQuery();
Term term = new Term(protoRegexpQuery.getField(), protoRegexpQuery.getText());
int flags =
switch (protoRegexpQuery.getFlag()) {
case REGEXP_ALL -> RegExp.ALL;
case REGEXP_ANYSTRING -> RegExp.ANYSTRING;
case REGEXP_AUTOMATON -> RegExp.AUTOMATON;
case REGEXP_COMPLEMENT -> RegExp.COMPLEMENT;
case REGEXP_EMPTY -> RegExp.EMPTY;
case REGEXP_INTERSECTION -> RegExp.INTERSECTION;
case REGEXP_INTERVAL -> RegExp.INTERVAL;
case REGEXP_NONE -> RegExp.NONE;
default -> RegExp.ALL;
};
int maxDeterminizedStates =
protoRegexpQuery.hasMaxDeterminizedStates()
? protoRegexpQuery.getMaxDeterminizedStates()
: Operations.DEFAULT_MAX_DETERMINIZED_STATES;
RegexpQuery regexpQuery = new RegexpQuery(term, flags, maxDeterminizedStates);
regexpQuery.setRewriteMethod(
getRewriteMethod(protoRegexpQuery.getRewrite(), protoRegexpQuery.getRewriteTopTermsSize()));
return regexpQuery;
}
private static TermRangeQuery getTermRangeQuery(
com.yelp.nrtsearch.server.grpc.TermRangeQuery protoTermRangeQuery) {
TermRangeQuery termRangeQuery =
new TermRangeQuery(
protoTermRangeQuery.getField(),
new BytesRef(protoTermRangeQuery.getLowerTerm()),
new BytesRef(protoTermRangeQuery.getUpperTerm()),
protoTermRangeQuery.getIncludeLower(),
protoTermRangeQuery.getIncludeUpper());
termRangeQuery.setRewriteMethod(
getRewriteMethod(
protoTermRangeQuery.getRewrite(), protoTermRangeQuery.getRewriteTopTermsSize()));
return termRangeQuery;
}
}