org.apache.lucene.search.IndexSearcher Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of org.apache.servicemix.bundles.lucene
This OSGi bundle wraps ${pkgArtifactId} ${pkgVersion} jar file.
There is a newer version: 6.4.2_1
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.Executor;
import java.util.function.Supplier;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.QueryTimeout;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.automaton.ByteRunAutomaton;

/**
 * Implements search over a single IndexReader.
 *
 * Applications usually need only call the inherited {@link #search(Query,int)} method. For
 * performance reasons, if your index is unchanging, you should share a single IndexSearcher
 * instance across multiple searches instead of creating a new one per-search. If your index has
 * changed and you wish to see the changes reflected in searching, you should use {@link
 * DirectoryReader#openIfChanged(DirectoryReader)} to obtain a new reader and then create a new
 * IndexSearcher from that. Also, for low-latency turnaround it's best to use a near-real-time
 * reader ({@link DirectoryReader#open(IndexWriter)}). Once you have a new {@link IndexReader}, it's
 * relatively cheap to create a new IndexSearcher from it.
 *
 * 
NOTE: The {@link #search} and {@link #searchAfter} methods are configured to only count
 * top hits accurately up to {@code 1,000} and may return a {@link TotalHits.Relation lower bound}
 * of the hit count if the hit count is greater than or equal to {@code 1,000}. On queries that
 * match lots of documents, counting the number of hits may take much longer than computing the top
 * hits so this trade-off allows to get some minimal information about the hit count without slowing
 * down search too much. The {@link TopDocs#scoreDocs} array is always accurate however. If this
 * behavior doesn't suit your needs, you should create collectorManagers manually with either {@link
 * TopScoreDocCollectorManager} or {@link TopFieldCollectorManager} and call {@link #search(Query,
 * CollectorManager)}.
 *
 * 

 *
 * 
NOTE: {@link
 * IndexSearcher} instances are completely thread safe, meaning multiple threads can call any
 * of its methods, concurrently. If your application requires external synchronization, you should
 * not synchronize on the IndexSearcher instance; use your own (non-Lucene)
 * objects instead.
 */
public class IndexSearcher {

  @SuppressWarnings("NonFinalStaticField")
  static int maxClauseCount = 1024;

  @SuppressWarnings("NonFinalStaticField")
  private static QueryCache DEFAULT_QUERY_CACHE;

  @SuppressWarnings("NonFinalStaticField")
  private static QueryCachingPolicy DEFAULT_CACHING_POLICY = new UsageTrackingQueryCachingPolicy();

  private QueryTimeout queryTimeout = null;
  // partialResult may be set on one of the threads of the executor. It may be correct to not make
  // this variable volatile since joining these threads should ensure a happens-before relationship
  // that guarantees that writes become visible on the main thread, but making the variable volatile
  // shouldn't hurt either.
  private volatile boolean partialResult = false;

  static {
    final int maxCachedQueries = 1000;
    // min of 32MB or 5% of the heap size
    final long maxRamBytesUsed = Math.min(1L << 25, Runtime.getRuntime().maxMemory() / 20);
    DEFAULT_QUERY_CACHE = new LRUQueryCache(maxCachedQueries, maxRamBytesUsed);
  }

  /**
   * By default, we count hits accurately up to 1000. This makes sure that we don't spend most time
   * on computing hit counts
   */
  private static final int TOTAL_HITS_THRESHOLD = 1000;

  /**
   * Thresholds for index slice allocation logic. To change the default, extend  IndexSearcher
   *  and use custom values
   */
  private static final int MAX_DOCS_PER_SLICE = 250_000;

  private static final int MAX_SEGMENTS_PER_SLICE = 5;

  final IndexReader reader; // package private for testing!

  // NOTE: these members might change in incompatible ways
  // in the next release
  protected final IndexReaderContext readerContext;
  protected final List leafContexts;

  private volatile LeafSlice[] leafSlices;

  // Used internally for load balancing threads executing for the query
  private final TaskExecutor taskExecutor;

  // the default Similarity
  private static final Similarity defaultSimilarity = new BM25Similarity();

  private QueryCache queryCache = DEFAULT_QUERY_CACHE;
  private QueryCachingPolicy queryCachingPolicy = DEFAULT_CACHING_POLICY;

  /**
   * Expert: returns a default Similarity instance. In general, this method is only called to
   * initialize searchers and writers. User code and query implementations should respect {@link
   * IndexSearcher#getSimilarity()}.
   *
   * @lucene.internal
   */
  public static Similarity getDefaultSimilarity() {
    return defaultSimilarity;
  }

  /**
   * Expert: returns leaf contexts associated with this searcher. This is an internal method exposed
   * for tests only.
   *
   * @lucene.internal
   */
  public List getLeafContexts() {
    return leafContexts;
  }

  /**
   * Expert: Get the default {@link QueryCache} or {@code null} if the cache is disabled.
   *
   * @lucene.internal
   */
  public static QueryCache getDefaultQueryCache() {
    return DEFAULT_QUERY_CACHE;
  }

  /**
   * Expert: set the default {@link QueryCache} instance.
   *
   * @lucene.internal
   */
  public static void setDefaultQueryCache(QueryCache defaultQueryCache) {
    DEFAULT_QUERY_CACHE = defaultQueryCache;
  }

  /**
   * Expert: Get the default {@link QueryCachingPolicy}.
   *
   * @lucene.internal
   */
  public static QueryCachingPolicy getDefaultQueryCachingPolicy() {
    return DEFAULT_CACHING_POLICY;
  }

  /**
   * Expert: set the default {@link QueryCachingPolicy} instance.
   *
   * @lucene.internal
   */
  public static void setDefaultQueryCachingPolicy(QueryCachingPolicy defaultQueryCachingPolicy) {
    DEFAULT_CACHING_POLICY = defaultQueryCachingPolicy;
  }

  /** The Similarity implementation used by this searcher. */
  private Similarity similarity = defaultSimilarity;

  /** Creates a searcher searching the provided index. */
  public IndexSearcher(IndexReader r) {
    this(r, null);
  }

  /**
   * Runs searches for each segment separately, using the provided Executor. NOTE: if you are using
   * {@link NIOFSDirectory}, do not use the shutdownNow method of ExecutorService as this uses
   * Thread.interrupt under-the-hood which can silently close file descriptors (see LUCENE-2239).
   *
   * @lucene.experimental
   */
  public IndexSearcher(IndexReader r, Executor executor) {
    this(r.getContext(), executor);
  }

  /**
   * Creates a searcher searching the provided top-level {@link IndexReaderContext}.
   *
   * 
Given a non-null {@link Executor} this method runs searches for each segment
   * separately, using the provided Executor. NOTE: if you are using {@link NIOFSDirectory}, do not
   * use the shutdownNow method of ExecutorService as this uses Thread.interrupt under-the-hood
   * which can silently close file descriptors (see LUCENE-2239).
   *
   * @see IndexReaderContext
   * @see IndexReader#getContext()
   * @lucene.experimental
   */
  public IndexSearcher(IndexReaderContext context, Executor executor) {
    assert context.isTopLevel
        : "IndexSearcher's ReaderContext must be topLevel for reader " + context.reader();
    reader = context.reader();
    this.taskExecutor =
        executor == null ? new TaskExecutor(Runnable::run) : new TaskExecutor(executor);
    this.readerContext = context;
    leafContexts = context.leaves();
    if (executor == null) {
      leafSlices =
          leafContexts.isEmpty()
              ? new LeafSlice[0]
              : new LeafSlice[] {LeafSlice.entireSegments(leafContexts)};
    }
  }

  /**
   * Creates a searcher searching the provided top-level {@link IndexReaderContext}.
   *
   * @see IndexReaderContext
   * @see IndexReader#getContext()
   * @lucene.experimental
   */
  public IndexSearcher(IndexReaderContext context) {
    this(context, null);
  }

  /**
   * Return the maximum number of clauses permitted, 1024 by default. Attempts to add more than the
   * permitted number of clauses cause {@link TooManyClauses} to be thrown.
   *
   * @see #setMaxClauseCount(int)
   */
  public static int getMaxClauseCount() {
    return maxClauseCount;
  }

  /** Set the maximum number of clauses permitted per Query. Default value is 1024. */
  public static void setMaxClauseCount(int value) {
    if (value < 1) {
      throw new IllegalArgumentException("maxClauseCount must be >= 1");
    }
    maxClauseCount = value;
  }

  /**
   * Set the {@link QueryCache} to use when scores are not needed. A value of {@code null} indicates
   * that query matches should never be cached. This method should be called before starting
   * using this {@link IndexSearcher}.
   *
   * 
NOTE: When using a query cache, queries should not be modified after they have been passed
   * to IndexSearcher.
   *
   * @see QueryCache
   * @lucene.experimental
   */
  public void setQueryCache(QueryCache queryCache) {
    this.queryCache = queryCache;
  }

  /**
   * Return the query cache of this {@link IndexSearcher}. This will be either the {@link
   * #getDefaultQueryCache() default query cache} or the query cache that was last set through
   * {@link #setQueryCache(QueryCache)}. A return value of {@code null} indicates that caching is
   * disabled.
   *
   * @lucene.experimental
   */
  public QueryCache getQueryCache() {
    return queryCache;
  }

  /**
   * Set the {@link QueryCachingPolicy} to use for query caching. This method should be called
   * before starting using this {@link IndexSearcher}.
   *
   * @see QueryCachingPolicy
   * @lucene.experimental
   */
  public void setQueryCachingPolicy(QueryCachingPolicy queryCachingPolicy) {
    this.queryCachingPolicy = Objects.requireNonNull(queryCachingPolicy);
  }

  /**
   * Return the query cache of this {@link IndexSearcher}. This will be either the {@link
   * #getDefaultQueryCachingPolicy() default policy} or the policy that was last set through {@link
   * #setQueryCachingPolicy(QueryCachingPolicy)}.
   *
   * @lucene.experimental
   */
  public QueryCachingPolicy getQueryCachingPolicy() {
    return queryCachingPolicy;
  }

  /**
   * Expert: Creates an array of leaf slices each holding a subset of the given leaves. Each {@link
   * LeafSlice} is executed in a single thread. By default, segments with more than
   * MAX_DOCS_PER_SLICE will get their own thread.
   *
   * 
It is possible to leverage intra-segment concurrency by splitting segments into multiple
   * partitions. Such behaviour is not enabled by default as there is still a performance penalty
   * for queries that require segment-level computation ahead of time, such as points/range queries.
   * This is an implementation limitation that we expect to improve in future releases, see the corresponding github issue.
   */
  protected LeafSlice[] slices(List leaves) {
    return slices(leaves, MAX_DOCS_PER_SLICE, MAX_SEGMENTS_PER_SLICE, false);
  }

  /**
   * Static method to segregate LeafReaderContexts amongst multiple slices. Creates slices according
   * to the provided max number of documents per slice and max number of segments per slice. Splits
   * segments into partitions when the last argument is true.
   *
   * @param leaves the leaves to slice
   * @param maxDocsPerSlice the maximum number of documents in a single slice
   * @param maxSegmentsPerSlice the maximum number of segments in a single slice
   * @param allowSegmentPartitions whether segments may be split into partitions according to the
   *     provided maxDocsPerSlice argument. When true, if a segment holds more
   *     documents than the provided max docs per slice, it is split into equal size partitions that
   *     each gets its own slice assigned.
   * @return the array of slices
   */
  public static LeafSlice[] slices(
      List leaves,
      int maxDocsPerSlice,
      int maxSegmentsPerSlice,
      boolean allowSegmentPartitions) {

    // Make a copy so we can sort:
    List sortedLeaves = new ArrayList<>(leaves);

    // Sort by maxDoc, descending:
    sortedLeaves.sort(Collections.reverseOrder(Comparator.comparingInt(l -> l.reader().maxDoc())));

    if (allowSegmentPartitions) {
      return slicesWithSegmentPartitions(maxDocsPerSlice, maxSegmentsPerSlice, sortedLeaves);
    }

    final List> groupedLeaves = new ArrayList<>();
    long docSum = 0;
    List group = null;
    for (LeafReaderContext ctx : sortedLeaves) {
      if (ctx.reader().maxDoc() > maxDocsPerSlice) {
        assert group == null;
        groupedLeaves.add(Collections.singletonList(ctx));
      } else {
        if (group == null) {
          group = new ArrayList<>();
          group.add(ctx);

          groupedLeaves.add(group);
        } else {
          group.add(ctx);
        }

        docSum += ctx.reader().maxDoc();
        if (group.size() >= maxSegmentsPerSlice || docSum > maxDocsPerSlice) {
          group = null;
          docSum = 0;
        }
      }
    }

    LeafSlice[] slices = new LeafSlice[groupedLeaves.size()];
    int upto = 0;
    for (List currentLeaf : groupedLeaves) {
      slices[upto] = LeafSlice.entireSegments(currentLeaf);
      ++upto;
    }

    return slices;
  }

  private static LeafSlice[] slicesWithSegmentPartitions(
      int maxDocsPerSlice, int maxSegmentsPerSlice, List sortedLeaves) {
    final List> groupedLeafPartitions = new ArrayList<>();
    int currentSliceNumDocs = 0;
    List group = null;
    for (LeafReaderContext ctx : sortedLeaves) {
      if (ctx.reader().maxDoc() > maxDocsPerSlice) {
        assert group == null;
        // if the segment does not fit in a single slice, we split it into maximum 5 partitions of
        // equal size
        int numSlices = Math.min(5, Math.ceilDiv(ctx.reader().maxDoc(), maxDocsPerSlice));
        int numDocs = ctx.reader().maxDoc() / numSlices;
        int maxDocId = numDocs;
        int minDocId = 0;
        for (int i = 0; i < numSlices - 1; i++) {
          groupedLeafPartitions.add(
              Collections.singletonList(
                  LeafReaderContextPartition.createFromAndTo(ctx, minDocId, maxDocId)));
          minDocId = maxDocId;
          maxDocId += numDocs;
        }
        // the last slice gets all the remaining docs
        groupedLeafPartitions.add(
            Collections.singletonList(
                LeafReaderContextPartition.createFromAndTo(ctx, minDocId, ctx.reader().maxDoc())));
      } else {
        if (group == null) {
          group = new ArrayList<>();
          groupedLeafPartitions.add(group);
        }
        group.add(LeafReaderContextPartition.createForEntireSegment(ctx));

        currentSliceNumDocs += ctx.reader().maxDoc();
        // We only split a segment when it does not fit entirely in a slice. We don't partition
        // the
        // segment that makes the current slice (which holds multiple segments) go over
        // maxDocsPerSlice. This means that a slice either contains multiple entire segments, or a
        // single partition of a segment.
        if (group.size() >= maxSegmentsPerSlice || currentSliceNumDocs > maxDocsPerSlice) {
          group = null;
          currentSliceNumDocs = 0;
        }
      }
    }

    LeafSlice[] slices = new LeafSlice[groupedLeafPartitions.size()];
    int upto = 0;
    for (List currentGroup : groupedLeafPartitions) {
      slices[upto] = new LeafSlice(currentGroup);
      ++upto;
    }
    return slices;
  }

  /** Return the {@link IndexReader} this searches. */
  public IndexReader getIndexReader() {
    return reader;
  }

  /**
   * Returns a {@link StoredFields} reader for the stored fields of this index.
   *
   * 
Sugar for .getIndexReader().storedFields()
   *
   * 
This call never returns {@code null}, even if no stored fields were indexed. The returned
   * instance should only be used by a single thread.
   *
   * 
Example:
   *
   * 
   * TopDocs hits = searcher.search(query, 10);
   * StoredFields storedFields = searcher.storedFields();
   * for (ScoreDoc hit : hits.scoreDocs) {
   *   Document doc = storedFields.document(hit.doc);
   * }
   * 
   *
   * @throws IOException If there is a low-level IO error
   * @see IndexReader#storedFields()
   */
  public StoredFields storedFields() throws IOException {
    return reader.storedFields();
  }

  /** Expert: Set the Similarity implementation used by this IndexSearcher. */
  public void setSimilarity(Similarity similarity) {
    this.similarity = similarity;
  }

  /**
   * Expert: Get the {@link Similarity} to use to compute scores. This returns the {@link
   * Similarity} that has been set through {@link #setSimilarity(Similarity)} or the default {@link
   * Similarity} if none has been set explicitly.
   */
  public Similarity getSimilarity() {
    return similarity;
  }

  /**
   * Count how many documents match the given query. May be faster than counting number of hits by
   * collecting all matches, as the number of hits is retrieved from the index statistics when
   * possible.
   */
  public int count(Query query) throws IOException {
    // Rewrite query before optimization check
    query = rewrite(new ConstantScoreQuery(query));
    if (query instanceof ConstantScoreQuery csq) {
      query = csq.getQuery();
    }

    // Check if two clause disjunction optimization applies
    if (query instanceof BooleanQuery booleanQuery
        && this.reader.hasDeletions() == false
        && booleanQuery.isTwoClausePureDisjunctionWithTerms()) {
      Query[] queries = booleanQuery.rewriteTwoClauseDisjunctionWithTermsForCount(this);
      int countTerm1 = count(queries[0]);
      int countTerm2 = count(queries[1]);
      if (countTerm1 == 0 || countTerm2 == 0) {
        return Math.max(countTerm1, countTerm2);
        // Only apply optimization if the intersection is significantly smaller than the union
      } else if ((double) Math.min(countTerm1, countTerm2) / Math.max(countTerm1, countTerm2)
          < 0.1) {
        return countTerm1 + countTerm2 - count(queries[2]);
      }
    }
    return search(new ConstantScoreQuery(query), new TotalHitCountCollectorManager(getSlices()));
  }

  /**
   * Returns the leaf slices used for concurrent searching. Override {@link #slices(List)} to
   * customize how slices are created.
   *
   * @lucene.experimental
   */
  public final LeafSlice[] getSlices() {
    LeafSlice[] res = leafSlices;
    if (res == null) {
      res = computeAndCacheSlices();
    }
    return res;
  }

  private synchronized LeafSlice[] computeAndCacheSlices() {
    LeafSlice[] res = leafSlices;
    if (res == null) {
      res = slices(leafContexts);
      /*
       * Enforce that there aren't multiple leaf partitions within the same leaf slice pointing to the
       * same leaf context. It is a requirement that {@link Collector#getLeafCollector(LeafReaderContext)}
       * gets called once per leaf context. Also, it does not make sense to partition a segment to then search
       * those partitions as part of the same slice, because the goal of partitioning is parallel searching
       * which happens at the slice level.
       */
      for (LeafSlice leafSlice : res) {
        if (leafSlice.partitions.length <= 1) {
          continue;
        }
        enforceDistinctLeaves(leafSlice);
      }
      leafSlices = res;
    }
    return res;
  }

  private static void enforceDistinctLeaves(LeafSlice leafSlice) {
    Set distinctLeaves = new HashSet<>();
    for (LeafReaderContextPartition leafPartition : leafSlice.partitions) {
      if (distinctLeaves.add(leafPartition.ctx) == false) {
        throw new IllegalStateException(
            "The same slice targets multiple leaf partitions of the same leaf reader context. A physical segment should rather get partitioned to be searched concurrently from as many slices as the number of leaf partitions it is split into.");
      }
    }
  }

  /**
   * Finds the top n hits for query where all results are after a previous
   * result (after).
   *
   * By passing the bottom result from a previous page as after, this method can be
   * used for efficient 'deep-paging' across potentially large result sets.
   *
   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
   *     clauses.
   */
  public TopDocs searchAfter(ScoreDoc after, Query query, int numHits) throws IOException {
    final int limit = Math.max(1, reader.maxDoc());
    if (after != null && after.doc >= limit) {
      throw new IllegalArgumentException(
          "after.doc exceeds the number of documents in the reader: after.doc="
              + after.doc
              + " limit="
              + limit);
    }

    final int cappedNumHits = Math.min(numHits, limit);
    CollectorManager manager =
        new TopScoreDocCollectorManager(cappedNumHits, after, TOTAL_HITS_THRESHOLD);

    return search(query, manager);
  }

  /**
   * Get the configured {@link QueryTimeout} for all searches that run through this {@link
   * IndexSearcher}, or {@code null} if not set.
   */
  public QueryTimeout getTimeout() {
    return this.queryTimeout;
  }

  /** Set a {@link QueryTimeout} for all searches that run through this {@link IndexSearcher}. */
  public void setTimeout(QueryTimeout queryTimeout) {
    this.queryTimeout = queryTimeout;
  }

  /**
   * Finds the top n hits for query.
   *
   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
   *     clauses.
   */
  public TopDocs search(Query query, int n) throws IOException {
    return searchAfter(null, query, n);
  }

  /**
   * Lower-level search API.
   *
   * 
{@link LeafCollector#collect(int)} is called for every matching document.
   *
   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
   *     clauses.
   * @deprecated This method is being deprecated in favor of {@link IndexSearcher#search(Query,
   *     CollectorManager)} due to its support for concurrency in IndexSearcher
   */
  @Deprecated
  public void search(Query query, Collector collector) throws IOException {
    query = rewrite(query, collector.scoreMode().needsScores());
    Weight weight = createWeight(query, collector.scoreMode(), 1);
    collector.setWeight(weight);
    for (LeafReaderContext ctx : leafContexts) { // search each subreader
      searchLeaf(ctx, 0, DocIdSetIterator.NO_MORE_DOCS, weight, collector);
    }
  }

  /** Returns true if any search hit the {@link #setTimeout(QueryTimeout) timeout}. */
  public boolean timedOut() {
    return partialResult;
  }

  /**
   * Search implementation with arbitrary sorting, plus control over whether hit scores and max
   * score should be computed. Finds the top n hits for query, and sorting
   * the hits by the criteria in sort. If doDocScores is true
   * then the score of each hit will be computed and returned. If doMaxScore is 
   * true then the maximum score over all collected hits will be computed.
   *
   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
   *     clauses.
   */
  public TopFieldDocs search(Query query, int n, Sort sort, boolean doDocScores)
      throws IOException {
    return searchAfter(null, query, n, sort, doDocScores);
  }

  /**
   * Search implementation with arbitrary sorting.
   *
   * @param query The query to search for
   * @param n Return only the top n results
   * @param sort The {@link org.apache.lucene.search.Sort} object
   * @return The top docs, sorted according to the supplied {@link org.apache.lucene.search.Sort}
   *     instance
   * @throws IOException if there is a low-level I/O error
   */
  public TopFieldDocs search(Query query, int n, Sort sort) throws IOException {
    return searchAfter(null, query, n, sort, false);
  }

  /**
   * Finds the top n hits for query where all results are after a previous
   * result (after).
   *
   * 
By passing the bottom result from a previous page as after, this method can be
   * used for efficient 'deep-paging' across potentially large result sets.
   *
   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
   *     clauses.
   */
  public TopDocs searchAfter(ScoreDoc after, Query query, int n, Sort sort) throws IOException {
    return searchAfter(after, query, n, sort, false);
  }

  /**
   * Finds the top n hits for query where all results are after a previous
   * result (after), allowing control over whether hit scores and max score should be
   * computed.
   *
   * 
By passing the bottom result from a previous page as after, this method can be
   * used for efficient 'deep-paging' across potentially large result sets. If doDocScores
   *  is true then the score of each hit will be computed and returned. If
   * doMaxScore is true then the maximum score over all collected hits
   * will be computed.
   *
   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
   *     clauses.
   */
  public TopFieldDocs searchAfter(
      ScoreDoc after, Query query, int numHits, Sort sort, boolean doDocScores) throws IOException {
    if (after != null && !(after instanceof FieldDoc)) {
      // TODO: if we fix type safety of TopFieldDocs we can
      // remove this
      throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
    }
    return searchAfter((FieldDoc) after, query, numHits, sort, doDocScores);
  }

  private TopFieldDocs searchAfter(
      FieldDoc after, Query query, int numHits, Sort sort, boolean doDocScores) throws IOException {
    final int limit = Math.max(1, reader.maxDoc());
    if (after != null && after.doc >= limit) {
      throw new IllegalArgumentException(
          "after.doc exceeds the number of documents in the reader: after.doc="
              + after.doc
              + " limit="
              + limit);
    }
    final int cappedNumHits = Math.min(numHits, limit);
    final Sort rewrittenSort = sort.rewrite(this);

    final CollectorManager manager =
        new TopFieldCollectorManager(rewrittenSort, cappedNumHits, after, TOTAL_HITS_THRESHOLD);

    TopFieldDocs topDocs = search(query, manager);
    if (doDocScores) {
      TopFieldCollector.populateScores(topDocs.scoreDocs, this, query);
    }
    return topDocs;
  }

  /**
   * Lower-level search API. Search all leaves using the given {@link CollectorManager}. In contrast
   * to {@link #search(Query, Collector)}, this method will use the searcher's {@link Executor} in
   * order to parallelize execution of the collection on the configured {@link #getSlices()}.
   *
   * @see CollectorManager
   * @lucene.experimental
   */
  public  T search(Query query, CollectorManager collectorManager)
      throws IOException {
    final C firstCollector = collectorManager.newCollector();
    query = rewrite(query, firstCollector.scoreMode().needsScores());
    final Weight weight = createWeight(query, firstCollector.scoreMode(), 1);
    return search(weight, collectorManager, firstCollector);
  }

  private  T search(
      Weight weight, CollectorManager collectorManager, C firstCollector) throws IOException {
    final LeafSlice[] leafSlices = getSlices();
    if (leafSlices.length == 0) {
      // there are no segments, nothing to offload to the executor, but we do need to call reduce to
      // create some kind of empty result
      assert leafContexts.isEmpty();
      return collectorManager.reduce(Collections.singletonList(firstCollector));
    } else {
      final List collectors = new ArrayList<>(leafSlices.length);
      collectors.add(firstCollector);
      final ScoreMode scoreMode = firstCollector.scoreMode();
      for (int i = 1; i < leafSlices.length; ++i) {
        final C collector = collectorManager.newCollector();
        collectors.add(collector);
        if (scoreMode != collector.scoreMode()) {
          throw new IllegalStateException(
              "CollectorManager does not always produce collectors with the same score mode");
        }
      }
      final List> listTasks = new ArrayList<>(leafSlices.length);
      for (int i = 0; i < leafSlices.length; ++i) {
        final LeafReaderContextPartition[] leaves = leafSlices[i].partitions;
        final C collector = collectors.get(i);
        listTasks.add(
            () -> {
              search(leaves, weight, collector);
              return collector;
            });
      }
      List results = taskExecutor.invokeAll(listTasks);
      return collectorManager.reduce(results);
    }
  }

  /**
   * Lower-level search API.
   *
   * 
{@link #searchLeaf(LeafReaderContext, int, int, Weight, Collector)} is called for every leaf
   * partition. 

   *
   * 
NOTE: this method executes the searches on all given leaf partitions exclusively. To search
   * across all the searchers leaves use {@link #leafContexts}.
   *
   * @param partitions the leaf partitions to execute the searches on
   * @param weight to match documents
   * @param collector to receive hits
   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
   *     clauses.
   */
  protected void search(LeafReaderContextPartition[] partitions, Weight weight, Collector collector)
      throws IOException {

    collector.setWeight(weight);

    for (LeafReaderContextPartition partition : partitions) { // search each subreader partition
      searchLeaf(partition.ctx, partition.minDocId, partition.maxDocId, weight, collector);
    }
  }

  /**
   * Lower-level search API
   *
   * 
{@link LeafCollector#collect(int)} is called for every document. 

   *
   * @param ctx the leaf to execute the search against
   * @param minDocId the lower bound of the doc id range to search
   * @param maxDocId the upper bound of the doc id range to search
   * @param weight to match document
   * @param collector to receive hits
   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
   *     clauses.
   */
  protected void searchLeaf(
      LeafReaderContext ctx, int minDocId, int maxDocId, Weight weight, Collector collector)
      throws IOException {
    final LeafCollector leafCollector;
    try {
      leafCollector = collector.getLeafCollector(ctx);
    } catch (
        @SuppressWarnings("unused")
        CollectionTerminatedException e) {
      // there is no doc of interest in this reader context
      // continue with the following leaf
      return;
    }
    ScorerSupplier scorerSupplier = weight.scorerSupplier(ctx);
    if (scorerSupplier != null) {
      scorerSupplier.setTopLevelScoringClause();
      BulkScorer scorer = scorerSupplier.bulkScorer();
      if (queryTimeout != null) {
        scorer = new TimeLimitingBulkScorer(scorer, queryTimeout);
      }
      try {
        // Optimize for the case when live docs are stored in a FixedBitSet.
        Bits acceptDocs = ScorerUtil.likelyLiveDocs(ctx.reader().getLiveDocs());
        scorer.score(leafCollector, acceptDocs, minDocId, maxDocId);
      } catch (
          @SuppressWarnings("unused")
          CollectionTerminatedException e) {
        // collection was terminated prematurely
        // continue with the following leaf
      } catch (
          @SuppressWarnings("unused")
          TimeLimitingBulkScorer.TimeExceededException e) {
        partialResult = true;
      }
    }
    // Note: this is called if collection ran successfully, including the above special cases of
    // CollectionTerminatedException and TimeExceededException, but no other exception.
    leafCollector.finish();
  }

  /**
   * Expert: called to re-write queries into primitive queries.
   *
   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
   *     clauses.
   */
  public Query rewrite(Query original) throws IOException {
    Query query = original;
    for (Query rewrittenQuery = query.rewrite(this);
        rewrittenQuery != query;
        rewrittenQuery = query.rewrite(this)) {
      query = rewrittenQuery;
    }
    query.visit(getNumClausesCheckVisitor());
    return query;
  }

  private Query rewrite(Query original, boolean needsScores) throws IOException {
    if (needsScores) {
      return rewrite(original);
    } else {
      // Take advantage of the few extra rewrite rules of ConstantScoreQuery.
      return rewrite(new ConstantScoreQuery(original));
    }
  }

  /**
   * Returns a QueryVisitor which recursively checks the total number of clauses that a query and
   * its children cumulatively have and validates that the total number does not exceed the
   * specified limit. Throws {@link TooManyNestedClauses} if the limit is exceeded.
   */
  private static QueryVisitor getNumClausesCheckVisitor() {
    return new QueryVisitor() {

      int numClauses;

      @Override
      public QueryVisitor getSubVisitor(BooleanClause.Occur occur, Query parent) {
        // Return this instance even for MUST_NOT and not an empty QueryVisitor
        return this;
      }

      @Override
      public void visitLeaf(Query query) {
        if (numClauses > maxClauseCount) {
          throw new TooManyNestedClauses();
        }
        ++numClauses;
      }

      @Override
      public void consumeTerms(Query query, Term... terms) {
        if (numClauses > maxClauseCount) {
          throw new TooManyNestedClauses();
        }
        ++numClauses;
      }

      @Override
      public void consumeTermsMatching(
          Query query, String field, Supplier automaton) {
        if (numClauses > maxClauseCount) {
          throw new TooManyNestedClauses();
        }
        ++numClauses;
      }
    };
  }

  /**
   * Returns an Explanation that describes how doc scored against query.
   *
   * 
This is intended to be used in developing Similarity implementations, and, for good
   * performance, should not be displayed with every hit. Computing an explanation is as expensive
   * as executing the query over the entire index.
   */
  public Explanation explain(Query query, int doc) throws IOException {
    query = rewrite(query);
    return explain(createWeight(query, ScoreMode.COMPLETE, 1), doc);
  }

  /**
   * Expert: low-level implementation method Returns an Explanation that describes how doc
   *  scored against weight.
   *
   * 
This is intended to be used in developing Similarity implementations, and, for good
   * performance, should not be displayed with every hit. Computing an explanation is as expensive
   * as executing the query over the entire index.
   *
   * 
Applications should call {@link IndexSearcher#explain(Query, int)}.
   *
   * @throws TooManyClauses If a query would exceed {@link IndexSearcher#getMaxClauseCount()}
   *     clauses.
   */
  protected Explanation explain(Weight weight, int doc) throws IOException {
    int n = ReaderUtil.subIndex(doc, leafContexts);
    final LeafReaderContext ctx = leafContexts.get(n);
    int deBasedDoc = doc - ctx.docBase;
    final Bits liveDocs = ctx.reader().getLiveDocs();
    if (liveDocs != null && liveDocs.get(deBasedDoc) == false) {
      return Explanation.noMatch("Document " + doc + " is deleted");
    }
    return weight.explain(ctx, deBasedDoc);
  }

  /**
   * Creates a {@link Weight} for the given query, potentially adding caching if possible and
   * configured.
   *
   * @lucene.experimental
   */
  public Weight createWeight(Query query, ScoreMode scoreMode, float boost) throws IOException {
    final QueryCache queryCache = this.queryCache;
    Weight weight = query.createWeight(this, scoreMode, boost);
    if (scoreMode.needsScores() == false && queryCache != null) {
      weight = queryCache.doCache(weight, queryCachingPolicy);
    }
    return weight;
  }

  /**
   * Returns this searcher's top-level {@link IndexReaderContext}.
   *
   * @see IndexReader#getContext()
   */
  /* sugar for #getReader().getTopReaderContext() */
  public IndexReaderContext getTopReaderContext() {
    return readerContext;
  }

  /**
   * A class holding a subset of the {@link IndexSearcher}s leaf contexts to be executed within a
   * single thread. A leaf slice holds references to one or more {@link LeafReaderContextPartition}
   * instances. Each partition targets a specific doc id range of a {@link LeafReaderContext}.
   *
   * @lucene.experimental
   */
  public static class LeafSlice {

    private static final Comparator COMPARATOR =
        Comparator.comparingInt(l -> l.ctx.docBase)
            .thenComparingInt(l -> l.minDocId);

    /**
     * The leaves that make up this slice.
     *
     * @lucene.experimental
     */
    public final LeafReaderContextPartition[] partitions;

    private final int maxDocs;

    public LeafSlice(List partitions) {
      this(partitions.toArray(new LeafReaderContextPartition[0]));
    }

    private static LeafSlice entireSegments(List contexts) {
      int count = contexts.size();
      LeafReaderContextPartition[] parts = new LeafReaderContextPartition[count];
      for (int i = 0; i < count; i++) {
        parts[i] = LeafReaderContextPartition.createForEntireSegment(contexts.get(i));
      }
      return new LeafSlice(parts);
    }

    private LeafSlice(LeafReaderContextPartition... leafReaderContextPartitions) {
      Arrays.sort(leafReaderContextPartitions, COMPARATOR);
      this.partitions = leafReaderContextPartitions;
      int maxDocs = 0;
      for (LeafReaderContextPartition partition : partitions) {
        maxDocs += partition.maxDocs;
      }
      this.maxDocs = maxDocs;
    }

    /**
     * Returns the total number of docs that a slice targets, by summing the number of docs that
     * each of its leaf context partitions targets.
     */
    public int getMaxDocs() {
      return maxDocs;
    }
  }

  /**
   * Holds information about a specific leaf context and the corresponding range of doc ids to
   * search within. Used to optionally search across partitions of the same segment concurrently.
   *
   * 
A partition instance can be created via {@link #createForEntireSegment(LeafReaderContext)},
   * in which case it will target the entire provided {@link LeafReaderContext}. A true partition of
   * a segment can be created via {@link #createFromAndTo(LeafReaderContext, int, int)} providing
   * the minimum doc id (including) to search as well as the max doc id (excluding).
   *
   * @lucene.experimental
   */
  public static final class LeafReaderContextPartition {
    public final int minDocId;
    public final int maxDocId;
    public final LeafReaderContext ctx;
    // we keep track of maxDocs separately because we use NO_MORE_DOCS as upper bound when targeting
    // the entire segment. We use this only in tests.
    private final int maxDocs;

    private LeafReaderContextPartition(
        LeafReaderContext leafReaderContext, int minDocId, int maxDocId, int maxDocs) {
      if (minDocId >= maxDocId) {
        throw new IllegalArgumentException(
            "minDocId is greater than or equal to maxDocId: ["
                + minDocId
                + "] > ["
                + maxDocId
                + "]");
      }
      if (minDocId < 0) {
        throw new IllegalArgumentException("minDocId is lower than 0: [" + minDocId + "]");
      }
      if (minDocId >= leafReaderContext.reader().maxDoc()) {
        throw new IllegalArgumentException(
            "minDocId is greater than than maxDoc: ["
                + minDocId
                + "] > ["
                + leafReaderContext.reader().maxDoc()
                + "]");
      }

      this.ctx = leafReaderContext;
      this.minDocId = minDocId;
      this.maxDocId = maxDocId;
      this.maxDocs = maxDocs;
    }

    /** Creates a partition of the provided leaf context that targets the entire segment */
    public static LeafReaderContextPartition createForEntireSegment(LeafReaderContext ctx) {
      return new LeafReaderContextPartition(
          ctx, 0, DocIdSetIterator.NO_MORE_DOCS, ctx.reader().maxDoc());
    }

    /**
     * Creates a partition of the provided leaf context that targets a subset of the entire segment,
     * starting from and including the min doc id provided, until and not including the provided max
     * doc id
     */
    public static LeafReaderContextPartition createFromAndTo(
        LeafReaderContext ctx, int minDocId, int maxDocId) {
      assert maxDocId != DocIdSetIterator.NO_MORE_DOCS;
      return new LeafReaderContextPartition(ctx, minDocId, maxDocId, maxDocId - minDocId);
    }
  }

  @Override
  public String toString() {
    return "IndexSearcher(" + reader + "; taskExecutor=" + taskExecutor + ")";
  }

  /**
   * Returns {@link TermStatistics} for a term.
   *
   * 
This can be overridden for example, to return a term's statistics across a distributed
   * collection.
   *
   * @param docFreq The document frequency of the term. It must be greater or equal to 1.
   * @param totalTermFreq The total term frequency.
   * @return A {@link TermStatistics} (never null).
   * @lucene.experimental
   */
  public TermStatistics termStatistics(Term term, int docFreq, long totalTermFreq)
      throws IOException {
    // This constructor will throw an exception if docFreq <= 0.
    return new TermStatistics(term.bytes(), docFreq, totalTermFreq);
  }

  /**
   * Returns {@link CollectionStatistics} for a field, or {@code null} if the field does not exist
   * (has no indexed terms)
   *
   * This can be overridden for example, to return a field's statistics across a distributed
   * collection.
   *
   * @lucene.experimental
   */
  public CollectionStatistics collectionStatistics(String field) throws IOException {
    assert field != null;
    long docCount = 0;
    long sumTotalTermFreq = 0;
    long sumDocFreq = 0;
    for (LeafReaderContext leaf : reader.leaves()) {
      final Terms terms = Terms.getTerms(leaf.reader(), field);
      docCount += terms.getDocCount();
      sumTotalTermFreq += terms.getSumTotalTermFreq();
      sumDocFreq += terms.getSumDocFreq();
    }
    if (docCount == 0) {
      return null;
    }
    return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq);
  }

  /**
   * Returns the {@link TaskExecutor} that this searcher relies on to execute concurrent operations
   *
   * @return the task executor
   */
  public TaskExecutor getTaskExecutor() {
    return taskExecutor;
  }

  /**
   * Thrown when an attempt is made to add more than {@link #getMaxClauseCount()} clauses. This
   * typically happens if a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery is expanded to
   * many terms during search.
   */
  public static class TooManyClauses extends RuntimeException {
    private final int maxClauseCount;

    public TooManyClauses(String msg) {
      super(msg);
      this.maxClauseCount = IndexSearcher.getMaxClauseCount();
    }

    public TooManyClauses() {
      this("maxClauseCount is set to " + IndexSearcher.getMaxClauseCount());
    }

    /** The value of {@link IndexSearcher#getMaxClauseCount()} when this Exception was created */
    public int getMaxClauseCount() {
      return maxClauseCount;
    }
  }

  /**
   * Thrown when a client attempts to execute a Query that has more than {@link
   * #getMaxClauseCount()} total clauses cumulatively in all of its children.
   *
   * @see #rewrite
   */
  public static class TooManyNestedClauses extends TooManyClauses {
    public TooManyNestedClauses() {
      super(
          "Query contains too many nested clauses; maxClauseCount is set to "
              + IndexSearcher.getMaxClauseCount());
    }
  }
}