All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.MultiTermQuery Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;


import java.io.IOException;
import java.util.Objects;

import org.apache.lucene.index.FilteredTermsEnum; // javadocs
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.SingleTermsEnum;   // javadocs
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.AttributeSource;

/**
 * An abstract {@link Query} that matches documents
 * containing a subset of terms provided by a {@link
 * FilteredTermsEnum} enumeration.
 *
 * 

This query cannot be used directly; you must subclass * it and define {@link #getTermsEnum(Terms,AttributeSource)} to provide a {@link * FilteredTermsEnum} that iterates through the terms to be * matched. * *

NOTE: if {@link #setRewriteMethod} is either * {@link #CONSTANT_SCORE_BOOLEAN_REWRITE} or {@link * #SCORING_BOOLEAN_REWRITE}, you may encounter a * {@link BooleanQuery.TooManyClauses} exception during * searching, which happens when the number of terms to be * searched exceeds {@link * BooleanQuery#getMaxClauseCount()}. Setting {@link * #setRewriteMethod} to {@link #CONSTANT_SCORE_REWRITE} * prevents this. * *

The recommended rewrite method is {@link * #CONSTANT_SCORE_REWRITE}: it doesn't spend CPU * computing unhelpful scores, and is the most * performant rewrite method given the query. If you * need scoring (like {@link FuzzyQuery}, use * {@link TopTermsScoringBooleanQueryRewrite} which uses * a priority queue to only collect competitive terms * and not hit this limitation. * * Note that org.apache.lucene.queryparser.classic.QueryParser produces * MultiTermQueries using {@link #CONSTANT_SCORE_REWRITE} * by default. */ public abstract class MultiTermQuery extends Query { protected final String field; protected RewriteMethod rewriteMethod = CONSTANT_SCORE_REWRITE; /** Abstract class that defines how the query is rewritten. */ public static abstract class RewriteMethod { public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException; /** * Returns the {@link MultiTermQuery}s {@link TermsEnum} * @see MultiTermQuery#getTermsEnum(Terms, AttributeSource) */ protected TermsEnum getTermsEnum(MultiTermQuery query, Terms terms, AttributeSource atts) throws IOException { return query.getTermsEnum(terms, atts); // allow RewriteMethod subclasses to pull a TermsEnum from the MTQ } } /** A rewrite method that first creates a private Filter, * by visiting each term in sequence and marking all docs * for that term. Matching documents are assigned a * constant score equal to the query's boost. * *

This method is faster than the BooleanQuery * rewrite methods when the number of matched terms or * matched documents is non-trivial. Also, it will never * hit an errant {@link BooleanQuery.TooManyClauses} * exception. * * @see #setRewriteMethod */ public static final RewriteMethod CONSTANT_SCORE_REWRITE = new RewriteMethod() { @Override public Query rewrite(IndexReader reader, MultiTermQuery query) { return new MultiTermQueryConstantScoreWrapper<>(query); } }; /** Old name of {@link #CONSTANT_SCORE_REWRITE} * @deprecated old name of {@link #CONSTANT_SCORE_REWRITE} */ @Deprecated public static final RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = CONSTANT_SCORE_REWRITE; /** A rewrite method that first translates each term into * {@link BooleanClause.Occur#SHOULD} clause in a * BooleanQuery, and keeps the scores as computed by the * query. Note that typically such scores are * meaningless to the user, and require non-trivial CPU * to compute, so it's almost always better to use {@link * #CONSTANT_SCORE_REWRITE} instead. * *

NOTE: This rewrite method will hit {@link * BooleanQuery.TooManyClauses} if the number of terms * exceeds {@link BooleanQuery#getMaxClauseCount}. * * @see #setRewriteMethod */ public final static RewriteMethod SCORING_BOOLEAN_REWRITE = ScoringRewrite.SCORING_BOOLEAN_REWRITE; /** Old name of {@link #SCORING_BOOLEAN_REWRITE} * @deprecated old name of {@link #SCORING_BOOLEAN_REWRITE} */ @Deprecated public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = SCORING_BOOLEAN_REWRITE; /** Like {@link #SCORING_BOOLEAN_REWRITE} except * scores are not computed. Instead, each matching * document receives a constant score equal to the * query's boost. * *

NOTE: This rewrite method will hit {@link * BooleanQuery.TooManyClauses} if the number of terms * exceeds {@link BooleanQuery#getMaxClauseCount}. * * @see #setRewriteMethod */ public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_REWRITE = ScoringRewrite.CONSTANT_SCORE_BOOLEAN_REWRITE; /** Old name of {@link #CONSTANT_SCORE_BOOLEAN_REWRITE} * @deprecated old name of {@link #CONSTANT_SCORE_BOOLEAN_REWRITE} */ @Deprecated public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = CONSTANT_SCORE_BOOLEAN_REWRITE; /** * A rewrite method that first translates each term into * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the * scores as computed by the query. * *

* This rewrite method only uses the top scoring terms so it will not overflow * the boolean max clause count. It is the default rewrite method for * {@link FuzzyQuery}. * * @see #setRewriteMethod */ public static final class TopTermsScoringBooleanQueryRewrite extends TopTermsRewrite { /** * Create a TopTermsScoringBooleanQueryRewrite for * at most size terms. *

* NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than * size, then it will be used instead. */ public TopTermsScoringBooleanQueryRewrite(int size) { super(size); } @Override protected int getMaxSize() { return BooleanQuery.getMaxClauseCount(); } @Override protected BooleanQuery.Builder getTopLevelBuilder() { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setDisableCoord(true); return builder; } @Override protected Query build(BooleanQuery.Builder builder) { return builder.build(); } @Override protected void addClause(BooleanQuery.Builder topLevel, Term term, int docCount, float boost, TermContext states) { final TermQuery tq = new TermQuery(term, states); topLevel.add(new BoostQuery(tq, boost), BooleanClause.Occur.SHOULD); } } /** * A rewrite method that first translates each term into * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, but adjusts * the frequencies used for scoring to be blended across the terms, otherwise * the rarest term typically ranks highest (often not useful eg in the set of * expanded terms in a FuzzyQuery). * *

* This rewrite method only uses the top scoring terms so it will not overflow * the boolean max clause count. * * @see #setRewriteMethod */ public static final class TopTermsBlendedFreqScoringRewrite extends TopTermsRewrite { /** * Create a TopTermsBlendedScoringBooleanQueryRewrite for at most * size terms. *

* NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than * size, then it will be used instead. */ public TopTermsBlendedFreqScoringRewrite(int size) { super(size); } @Override protected int getMaxSize() { return BooleanQuery.getMaxClauseCount(); } @Override protected BlendedTermQuery.Builder getTopLevelBuilder() { BlendedTermQuery.Builder builder = new BlendedTermQuery.Builder(); builder.setRewriteMethod(BlendedTermQuery.BOOLEAN_REWRITE); return builder; } @Override protected Query build(BlendedTermQuery.Builder builder) { return builder.build(); } @Override protected void addClause(BlendedTermQuery.Builder topLevel, Term term, int docCount, float boost, TermContext states) { topLevel.add(term, boost, states); } } /** * A rewrite method that first translates each term into * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, but the scores * are only computed as the boost. *

* This rewrite method only uses the top scoring terms so it will not overflow * the boolean max clause count. * * @see #setRewriteMethod */ public static final class TopTermsBoostOnlyBooleanQueryRewrite extends TopTermsRewrite { /** * Create a TopTermsBoostOnlyBooleanQueryRewrite for * at most size terms. *

* NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than * size, then it will be used instead. */ public TopTermsBoostOnlyBooleanQueryRewrite(int size) { super(size); } @Override protected int getMaxSize() { return BooleanQuery.getMaxClauseCount(); } @Override protected BooleanQuery.Builder getTopLevelBuilder() { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setDisableCoord(true); return builder; } @Override protected Query build(BooleanQuery.Builder builder) { return builder.build(); } @Override protected void addClause(BooleanQuery.Builder topLevel, Term term, int docFreq, float boost, TermContext states) { final Query q = new ConstantScoreQuery(new TermQuery(term, states)); topLevel.add(new BoostQuery(q, boost), BooleanClause.Occur.SHOULD); } } /** * Constructs a query matching terms that cannot be represented with a single * Term. */ public MultiTermQuery(final String field) { this.field = Objects.requireNonNull(field, "field must not be null"); } /** Returns the field name for this query */ public final String getField() { return field; } /** Construct the enumeration to be used, expanding the * pattern term. This method should only be called if * the field exists (ie, implementations can assume the * field does exist). This method should not return null * (should instead return {@link TermsEnum#EMPTY} if no * terms match). The TermsEnum must already be * positioned to the first matching term. * The given {@link AttributeSource} is passed by the {@link RewriteMethod} to * provide attributes, the rewrite method uses to inform about e.g. maximum competitive boosts. * This is currently only used by {@link TopTermsRewrite} */ protected abstract TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException; /** Convenience method, if no attributes are needed: * This simply passes empty attributes and is equal to: * getTermsEnum(terms, new AttributeSource()) */ protected final TermsEnum getTermsEnum(Terms terms) throws IOException { return getTermsEnum(terms, new AttributeSource()); } /** * To rewrite to a simpler form, instead return a simpler * enum from {@link #getTermsEnum(Terms, AttributeSource)}. For example, * to rewrite to a single term, return a {@link SingleTermsEnum} */ @Override public final Query rewrite(IndexReader reader) throws IOException { if (getBoost() != 1f) { return super.rewrite(reader); } return rewriteMethod.rewrite(reader, this); } /** * @see #setRewriteMethod */ public RewriteMethod getRewriteMethod() { return rewriteMethod; } /** * Sets the rewrite method to be used when executing the * query. You can use one of the four core methods, or * implement your own subclass of {@link RewriteMethod}. */ public void setRewriteMethod(RewriteMethod method) { rewriteMethod = method; } @Override public int hashCode() { int h = super.hashCode(); h = 31 * h + rewriteMethod.hashCode(); h = 31 * h + Objects.hashCode(field); return h; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; MultiTermQuery other = (MultiTermQuery) obj; if (!super.equals(obj)) return false; if (!rewriteMethod.equals(other.rewriteMethod)) { return false; } return (other.field == null ? field == null : other.field.equals(field)); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy