All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.BooleanQuery Maven / Gradle / Ivy

There is a newer version: 6.4.2_1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.Predicate;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.search.BooleanClause.Occur;

/**
 * A Query that matches documents matching boolean combinations of other queries, e.g. {@link
 * TermQuery}s, {@link PhraseQuery}s or other BooleanQuerys.
 */
public class BooleanQuery extends Query implements Iterable {

  /** A builder for boolean queries. */
  public static class Builder {

    private int minimumNumberShouldMatch;
    private final List clauses = new ArrayList<>();

    /** Sole constructor. */
    public Builder() {}

    /**
     * Specifies a minimum number of the optional BooleanClauses which must be satisfied.
     *
     * 

By default no optional clauses are necessary for a match (unless there are no required * clauses). If this method is used, then the specified number of clauses is required. * *

Use of this method is totally independent of specifying that any specific clauses are * required (or prohibited). This number will only be compared against the number of matching * optional clauses. * * @param min the number of optional clauses that must match */ public Builder setMinimumNumberShouldMatch(int min) { this.minimumNumberShouldMatch = min; return this; } /** * Add a new clause to this {@link Builder}. Note that the order in which clauses are added does * not have any impact on matching documents or query performance. * * @throws IndexSearcher.TooManyClauses if the new number of clauses exceeds the maximum clause * number */ public Builder add(BooleanClause clause) { // We do the final deep check for max clauses count limit during // IndexSearcher.rewrite but do this check to short // circuit in case a single query holds more than numClauses // // NOTE: this is not just an early check for optimization -- it's // neccessary to prevent run-away 'rewriting' of bad queries from // creating BQ objects that might eat up all the Heap. if (clauses.size() >= IndexSearcher.maxClauseCount) { throw new IndexSearcher.TooManyClauses(); } clauses.add(clause); return this; } /** * Add a collection of BooleanClauses to this {@link Builder}. Note that the order in which * clauses are added does not have any impact on matching documents or query performance. * * @throws IndexSearcher.TooManyClauses if the new number of clauses exceeds the maximum clause * number */ public Builder add(Collection collection) { // see #addClause(BooleanClause) if ((clauses.size() + collection.size()) > IndexSearcher.maxClauseCount) { throw new IndexSearcher.TooManyClauses(); } clauses.addAll(collection); return this; } /** * Add a new clause to this {@link Builder}. Note that the order in which clauses are added does * not have any impact on matching documents or query performance. * * @throws IndexSearcher.TooManyClauses if the new number of clauses exceeds the maximum clause * number */ public Builder add(Query query, Occur occur) { return add(new BooleanClause(query, occur)); } /** * Create a new {@link BooleanQuery} based on the parameters that have been set on this builder. */ public BooleanQuery build() { return new BooleanQuery(minimumNumberShouldMatch, clauses.toArray(new BooleanClause[0])); } } private final int minimumNumberShouldMatch; private final List clauses; // used for toString() and getClauses() // WARNING: Do not let clauseSets escape from this class as it breaks immutability: private final Map> clauseSets; // used for equals/hashCode private BooleanQuery(int minimumNumberShouldMatch, BooleanClause[] clauses) { this.minimumNumberShouldMatch = minimumNumberShouldMatch; this.clauses = Collections.unmodifiableList(Arrays.asList(clauses)); clauseSets = new EnumMap<>(Occur.class); // duplicates matter for SHOULD and MUST clauseSets.put(Occur.SHOULD, new Multiset<>()); clauseSets.put(Occur.MUST, new Multiset<>()); // but not for FILTER and MUST_NOT clauseSets.put(Occur.FILTER, new HashSet<>()); clauseSets.put(Occur.MUST_NOT, new HashSet<>()); for (BooleanClause clause : clauses) { clauseSets.get(clause.occur()).add(clause.query()); } } /** Gets the minimum number of the optional BooleanClauses which must be satisfied. */ public int getMinimumNumberShouldMatch() { return minimumNumberShouldMatch; } /** Return a list of the clauses of this {@link BooleanQuery}. */ public List clauses() { return clauses; } /** Return the collection of queries for the given {@link Occur}. */ public Collection getClauses(Occur occur) { // turn this immutable here, because we need to preserve the correct collection types for // equals/hashCode! return Collections.unmodifiableCollection(clauseSets.get(occur)); } /** * Whether this query is a pure disjunction, ie. it only has SHOULD clauses and it is enough for a * single clause to match for this boolean query to match. */ boolean isPureDisjunction() { return clauses.size() == getClauses(Occur.SHOULD).size() && minimumNumberShouldMatch <= 1; } /** Whether this query is a two clause disjunction with two term query clauses. */ boolean isTwoClausePureDisjunctionWithTerms() { return clauses.size() == 2 && isPureDisjunction() && clauses.get(0).query() instanceof TermQuery && clauses.get(1).query() instanceof TermQuery; } /** * Rewrite a single two clause disjunction query with terms to two term queries and a conjunction * query using the inclusion–exclusion principle. */ Query[] rewriteTwoClauseDisjunctionWithTermsForCount(IndexSearcher indexSearcher) throws IOException { BooleanQuery.Builder newQuery = new BooleanQuery.Builder(); Query[] queries = new Query[3]; for (int i = 0; i < clauses.size(); i++) { TermQuery termQuery = (TermQuery) clauses.get(i).query(); // Optimization will count term query several times so use cache to avoid multiple terms // dictionary lookups if (termQuery.getTermStates() == null) { termQuery = new TermQuery( termQuery.getTerm(), TermStates.build(indexSearcher, termQuery.getTerm(), false)); } newQuery.add(termQuery, Occur.MUST); queries[i] = termQuery; } queries[2] = newQuery.build(); return queries; } /** * Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface * to make it possible to do: * *

for (BooleanClause clause : booleanQuery) {}
*/ @Override public final Iterator iterator() { return clauses.iterator(); } // Utility method for rewriting BooleanQuery when scores are not needed. // This is called from ConstantScoreQuery#rewrite BooleanQuery rewriteNoScoring() { boolean actuallyRewritten = false; BooleanQuery.Builder newQuery = new BooleanQuery.Builder().setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()); final boolean keepShould = getMinimumNumberShouldMatch() > 0 || (clauseSets.get(Occur.MUST).size() + clauseSets.get(Occur.FILTER).size() == 0); for (BooleanClause clause : clauses) { Query query = clause.query(); // NOTE: rewritingNoScoring() should not call rewrite(), otherwise this // method could run in exponential time with the depth of the query as // every new level would rewrite 2x more than its parent level. Query rewritten = query; if (rewritten instanceof BoostQuery) { rewritten = ((BoostQuery) rewritten).getQuery(); } if (rewritten instanceof ConstantScoreQuery) { rewritten = ((ConstantScoreQuery) rewritten).getQuery(); } if (rewritten instanceof BooleanQuery) { rewritten = ((BooleanQuery) rewritten).rewriteNoScoring(); } BooleanClause.Occur occur = clause.occur(); if (occur == Occur.SHOULD && keepShould == false) { // ignore clause actuallyRewritten = true; } else if (occur == Occur.MUST) { // replace MUST clauses with FILTER clauses newQuery.add(rewritten, Occur.FILTER); actuallyRewritten = true; } else if (query != rewritten) { newQuery.add(rewritten, occur); actuallyRewritten = true; } else { newQuery.add(clause); } } if (actuallyRewritten == false) { return this; } return newQuery.build(); } @Override public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost) throws IOException { return new BooleanWeight(this, searcher, scoreMode, boost); } @Override public Query rewrite(IndexSearcher indexSearcher) throws IOException { if (clauses.size() == 0) { return new MatchNoDocsQuery("empty BooleanQuery"); } // Queries with no positive clauses have no matches if (clauses.size() == clauseSets.get(Occur.MUST_NOT).size()) { return new MatchNoDocsQuery("pure negative BooleanQuery"); } // optimize 1-clause queries if (clauses.size() == 1) { BooleanClause c = clauses.get(0); Query query = c.query(); if (minimumNumberShouldMatch == 1 && c.occur() == Occur.SHOULD) { return query; } else if (minimumNumberShouldMatch == 0) { switch (c.occur()) { case SHOULD: case MUST: return query; case FILTER: // no scoring clauses, so return a score of 0 return new BoostQuery(new ConstantScoreQuery(query), 0); case MUST_NOT: default: throw new AssertionError(); } } } // recursively rewrite { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()); boolean actuallyRewritten = false; for (BooleanClause clause : this) { Query query = clause.query(); BooleanClause.Occur occur = clause.occur(); Query rewritten; if (occur == Occur.FILTER || occur == Occur.MUST_NOT) { // Clauses that are not involved in scoring can get some extra simplifications rewritten = new ConstantScoreQuery(query).rewrite(indexSearcher); if (rewritten instanceof ConstantScoreQuery) { rewritten = ((ConstantScoreQuery) rewritten).getQuery(); } } else { rewritten = query.rewrite(indexSearcher); } if (rewritten != query || query.getClass() == MatchNoDocsQuery.class) { // rewrite clause actuallyRewritten = true; if (rewritten.getClass() == MatchNoDocsQuery.class) { switch (occur) { case SHOULD: case MUST_NOT: // the clause can be safely ignored break; case MUST: case FILTER: return rewritten; } } else { builder.add(rewritten, occur); } } else { // leave as-is builder.add(clause); } } if (actuallyRewritten) { return builder.build(); } } // remove duplicate FILTER and MUST_NOT clauses { int clauseCount = 0; for (Collection queries : clauseSets.values()) { clauseCount += queries.size(); } if (clauseCount != clauses.size()) { // since clauseSets implicitly deduplicates FILTER and MUST_NOT // clauses, this means there were duplicates BooleanQuery.Builder rewritten = new BooleanQuery.Builder(); rewritten.setMinimumNumberShouldMatch(minimumNumberShouldMatch); for (Map.Entry> entry : clauseSets.entrySet()) { final Occur occur = entry.getKey(); for (Query query : entry.getValue()) { rewritten.add(query, occur); } } return rewritten.build(); } } // Check whether some clauses are both required and excluded final Collection mustNotClauses = clauseSets.get(Occur.MUST_NOT); if (!mustNotClauses.isEmpty()) { final Predicate p = clauseSets.get(Occur.MUST)::contains; if (mustNotClauses.stream().anyMatch(p.or(clauseSets.get(Occur.FILTER)::contains))) { return new MatchNoDocsQuery("FILTER or MUST clause also in MUST_NOT"); } if (mustNotClauses.contains(new MatchAllDocsQuery())) { return new MatchNoDocsQuery("MUST_NOT clause is MatchAllDocsQuery"); } } // remove FILTER clauses that are also MUST clauses or that match all documents if (clauseSets.get(Occur.FILTER).size() > 0) { final Set filters = new HashSet<>(clauseSets.get(Occur.FILTER)); boolean modified = false; if (filters.size() > 1 || clauseSets.get(Occur.MUST).isEmpty() == false) { modified = filters.remove(new MatchAllDocsQuery()); } modified |= filters.removeAll(clauseSets.get(Occur.MUST)); if (modified) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()); for (BooleanClause clause : clauses) { if (clause.occur() != Occur.FILTER) { builder.add(clause); } } for (Query filter : filters) { builder.add(filter, Occur.FILTER); } return builder.build(); } } // convert FILTER clauses that are also SHOULD clauses to MUST clauses if (clauseSets.get(Occur.SHOULD).size() > 0 && clauseSets.get(Occur.FILTER).size() > 0) { final Collection filters = clauseSets.get(Occur.FILTER); final Collection shoulds = clauseSets.get(Occur.SHOULD); Set intersection = new HashSet<>(filters); intersection.retainAll(shoulds); if (intersection.isEmpty() == false) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); int minShouldMatch = getMinimumNumberShouldMatch(); for (BooleanClause clause : clauses) { if (intersection.contains(clause.query())) { if (clause.occur() == Occur.SHOULD) { builder.add(new BooleanClause(clause.query(), Occur.MUST)); minShouldMatch--; } } else { builder.add(clause); } } builder.setMinimumNumberShouldMatch(Math.max(0, minShouldMatch)); return builder.build(); } } // Deduplicate SHOULD clauses by summing up their boosts if (clauseSets.get(Occur.SHOULD).size() > 0 && minimumNumberShouldMatch <= 1) { Map shouldClauses = new HashMap<>(); for (Query query : clauseSets.get(Occur.SHOULD)) { double boost = 1; while (query instanceof BoostQuery) { BoostQuery bq = (BoostQuery) query; boost *= bq.getBoost(); query = bq.getQuery(); } shouldClauses.put(query, shouldClauses.getOrDefault(query, 0d) + boost); } if (shouldClauses.size() != clauseSets.get(Occur.SHOULD).size()) { BooleanQuery.Builder builder = new BooleanQuery.Builder().setMinimumNumberShouldMatch(minimumNumberShouldMatch); for (Map.Entry entry : shouldClauses.entrySet()) { Query query = entry.getKey(); float boost = entry.getValue().floatValue(); if (boost != 1f) { query = new BoostQuery(query, boost); } builder.add(query, Occur.SHOULD); } for (BooleanClause clause : clauses) { if (clause.occur() != Occur.SHOULD) { builder.add(clause); } } return builder.build(); } } // Deduplicate MUST clauses by summing up their boosts if (clauseSets.get(Occur.MUST).size() > 0) { Map mustClauses = new HashMap<>(); for (Query query : clauseSets.get(Occur.MUST)) { double boost = 1; while (query instanceof BoostQuery) { BoostQuery bq = (BoostQuery) query; boost *= bq.getBoost(); query = bq.getQuery(); } mustClauses.put(query, mustClauses.getOrDefault(query, 0d) + boost); } if (mustClauses.size() != clauseSets.get(Occur.MUST).size()) { BooleanQuery.Builder builder = new BooleanQuery.Builder().setMinimumNumberShouldMatch(minimumNumberShouldMatch); for (Map.Entry entry : mustClauses.entrySet()) { Query query = entry.getKey(); float boost = entry.getValue().floatValue(); if (boost != 1f) { query = new BoostQuery(query, boost); } builder.add(query, Occur.MUST); } for (BooleanClause clause : clauses) { if (clause.occur() != Occur.MUST) { builder.add(clause); } } return builder.build(); } } // Rewrite queries whose single scoring clause is a MUST clause on a // MatchAllDocsQuery to a ConstantScoreQuery { final Collection musts = clauseSets.get(Occur.MUST); final Collection filters = clauseSets.get(Occur.FILTER); if (musts.size() == 1 && filters.size() > 0) { Query must = musts.iterator().next(); float boost = 1f; if (must instanceof BoostQuery boostQuery) { must = boostQuery.getQuery(); boost = boostQuery.getBoost(); } if (must.getClass() == MatchAllDocsQuery.class) { // our single scoring clause matches everything: rewrite to a CSQ on the filter // ignore SHOULD clause for now BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (BooleanClause clause : clauses) { switch (clause.occur()) { case FILTER: case MUST_NOT: builder.add(clause); break; case MUST: case SHOULD: default: // ignore break; } } Query rewritten = builder.build(); rewritten = new ConstantScoreQuery(rewritten); if (boost != 1f) { rewritten = new BoostQuery(rewritten, boost); } // now add back the SHOULD clauses builder = new BooleanQuery.Builder() .setMinimumNumberShouldMatch(getMinimumNumberShouldMatch()) .add(rewritten, Occur.MUST); for (Query query : clauseSets.get(Occur.SHOULD)) { builder.add(query, Occur.SHOULD); } rewritten = builder.build(); return rewritten; } } } // Flatten nested disjunctions, this is important for block-max WAND to perform well if (minimumNumberShouldMatch <= 1) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch); boolean actuallyRewritten = false; for (BooleanClause clause : clauses) { if (clause.occur() == Occur.SHOULD && clause.query() instanceof BooleanQuery innerQuery) { if (innerQuery.isPureDisjunction()) { actuallyRewritten = true; for (BooleanClause innerClause : innerQuery.clauses()) { builder.add(innerClause); } } else { builder.add(clause); } } else { builder.add(clause); } } if (actuallyRewritten) { return builder.build(); } } // Inline required / prohibited clauses. This helps run filtered conjunctive queries more // efficiently by providing all clauses to the block-max AND scorer. { BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.setMinimumNumberShouldMatch(minimumNumberShouldMatch); boolean actuallyRewritten = false; for (BooleanClause outerClause : clauses) { if (outerClause.isRequired() && outerClause.query() instanceof BooleanQuery innerQuery) { // Inlining prohibited clauses is not legal if the query is a pure negation, since pure // negations have no matches. It works because the inner BooleanQuery would have first // rewritten to a MatchNoDocsQuery if it only had prohibited clauses. assert innerQuery.getClauses(Occur.MUST_NOT).size() != innerQuery.clauses().size(); if (innerQuery.getMinimumNumberShouldMatch() == 0 && innerQuery.getClauses(Occur.SHOULD).isEmpty()) { actuallyRewritten = true; for (BooleanClause innerClause : innerQuery) { Occur innerOccur = innerClause.occur(); if (innerOccur == Occur.FILTER || innerOccur == Occur.MUST_NOT || outerClause.occur() == Occur.MUST) { builder.add(innerClause); } else { assert outerClause.occur() == Occur.FILTER && innerOccur == Occur.MUST; // In this case we need to change the occur of the inner query from MUST to FILTER. builder.add(innerClause.query(), Occur.FILTER); } } } else { builder.add(outerClause); } } else { builder.add(outerClause); } } if (actuallyRewritten) { return builder.build(); } } // SHOULD clause count less than or equal to minimumNumberShouldMatch // Important(this can only be processed after nested clauses have been flattened) { final Collection shoulds = clauseSets.get(Occur.SHOULD); if (shoulds.size() < minimumNumberShouldMatch) { return new MatchNoDocsQuery("SHOULD clause count less than minimumNumberShouldMatch"); } if (shoulds.size() > 0 && shoulds.size() == minimumNumberShouldMatch) { BooleanQuery.Builder builder = new BooleanQuery.Builder(); for (BooleanClause clause : clauses) { if (clause.occur() == Occur.SHOULD) { builder.add(clause.query(), Occur.MUST); } else { builder.add(clause); } } return builder.build(); } } // Inline SHOULD clauses from the only MUST clause { if (clauseSets.get(Occur.SHOULD).isEmpty() && clauseSets.get(Occur.MUST).size() == 1 && clauseSets.get(Occur.MUST).iterator().next() instanceof BooleanQuery inner && inner.clauses.size() == inner.clauseSets.get(Occur.SHOULD).size()) { BooleanQuery.Builder rewritten = new BooleanQuery.Builder(); for (BooleanClause clause : clauses) { if (clause.occur() != Occur.MUST) { rewritten.add(clause); } } for (BooleanClause innerClause : inner.clauses()) { rewritten.add(innerClause); } rewritten.setMinimumNumberShouldMatch(Math.max(1, inner.getMinimumNumberShouldMatch())); return rewritten.build(); } } return super.rewrite(indexSearcher); } @Override public void visit(QueryVisitor visitor) { QueryVisitor sub = visitor.getSubVisitor(Occur.MUST, this); for (BooleanClause.Occur occur : clauseSets.keySet()) { if (clauseSets.get(occur).size() > 0) { if (occur == Occur.MUST) { for (Query q : clauseSets.get(occur)) { q.visit(sub); } } else { QueryVisitor v = sub.getSubVisitor(occur, this); for (Query q : clauseSets.get(occur)) { q.visit(v); } } } } } /** Prints a user-readable version of this query. */ @Override public String toString(String field) { StringBuilder buffer = new StringBuilder(); boolean needParens = getMinimumNumberShouldMatch() > 0; if (needParens) { buffer.append("("); } int i = 0; for (BooleanClause c : this) { buffer.append(c.occur().toString()); Query subQuery = c.query(); if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens buffer.append("("); buffer.append(subQuery.toString(field)); buffer.append(")"); } else { buffer.append(subQuery.toString(field)); } if (i != clauses.size() - 1) { buffer.append(" "); } i += 1; } if (needParens) { buffer.append(")"); } if (getMinimumNumberShouldMatch() > 0) { buffer.append('~'); buffer.append(getMinimumNumberShouldMatch()); } return buffer.toString(); } /** * Compares the specified object with this boolean query for equality. Returns true if and only if * the provided object * *
    *
  • is also a {@link BooleanQuery}, *
  • has the same value of {@link #getMinimumNumberShouldMatch()} *
  • has the same {@link Occur#SHOULD} clauses, regardless of the order *
  • has the same {@link Occur#MUST} clauses, regardless of the order *
  • has the same set of {@link Occur#FILTER} clauses, regardless of the order and regardless * of duplicates *
  • has the same set of {@link Occur#MUST_NOT} clauses, regardless of the order and * regardless of duplicates *
*/ @Override public boolean equals(Object o) { return sameClassAs(o) && equalsTo(getClass().cast(o)); } private boolean equalsTo(BooleanQuery other) { return getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch() && clauseSets.equals(other.clauseSets); } private int computeHashCode() { int hashCode = Objects.hash(minimumNumberShouldMatch, clauseSets); if (hashCode == 0) { hashCode = 1; } return hashCode; } // cached hash code is ok since boolean queries are immutable private int hashCode; @Override public int hashCode() { // no need for synchronization, in the worst case we would just compute the hash several times. if (hashCode == 0) { hashCode = computeHashCode(); assert hashCode != 0; } assert hashCode == computeHashCode(); return hashCode; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy