org.apache.lucene.search.TopFieldCollector Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;


import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.FieldValueHitQueue.Entry;
import org.apache.lucene.search.MaxScoreAccumulator.DocAndScore;
import org.apache.lucene.search.TotalHits.Relation;
import org.apache.lucene.util.FutureObjects;

/**
 * A {@link Collector} that sorts by {@link SortField} using
 * {@link FieldComparator}s.
 * 
 * See the {@link #create(org.apache.lucene.search.Sort, int, int)} method
 * for instantiating a TopFieldCollector.
 *
 * @lucene.experimental
 */
public abstract class TopFieldCollector extends TopDocsCollector {

  // TODO: one optimization we could do is to pre-fill
  // the queue with sentinel value that guaranteed to
  // always compare lower than a real hit; this would
  // save having to check queueFull on each insert

  private static abstract class MultiComparatorLeafCollector implements LeafCollector {

    final LeafFieldComparator comparator;
    final int reverseMul;
    Scorable scorer;

    MultiComparatorLeafCollector(LeafFieldComparator[] comparators, int[] reverseMul) {
      if (comparators.length == 1) {
        this.reverseMul = reverseMul[0];
        this.comparator = comparators[0];
      } else {
        this.reverseMul = 1;
        this.comparator = new MultiLeafFieldComparator(comparators, reverseMul);
      }
    }

    @Override
    public void setScorer(Scorable scorer) throws IOException {
      comparator.setScorer(scorer);
      this.scorer = scorer;
    }
  }

  static boolean canEarlyTerminate(Sort searchSort, Sort indexSort) {
    return canEarlyTerminateOnDocId(searchSort) ||
           canEarlyTerminateOnPrefix(searchSort, indexSort);
  }

  private static boolean canEarlyTerminateOnDocId(Sort searchSort) {
    final SortField[] fields1 = searchSort.getSort();
    return SortField.FIELD_DOC.equals(fields1[0]);
  }

  private static boolean canEarlyTerminateOnPrefix(Sort searchSort, Sort indexSort) {
    if (indexSort != null) {
      final SortField[] fields1 = searchSort.getSort();
      final SortField[] fields2 = indexSort.getSort();
      // early termination is possible if fields1 is a prefix of fields2
      if (fields1.length > fields2.length) {
        return false;
      }
      return Arrays.asList(fields1).equals(Arrays.asList(fields2).subList(0, fields1.length));
    } else {
      return false;
    }
  }

  /*
   * Implements a TopFieldCollector over one SortField criteria, with tracking
   * document scores and maxScore.
   */
  private static class SimpleFieldCollector extends TopFieldCollector {
    final Sort sort;
    final FieldValueHitQueue queue;

    public SimpleFieldCollector(Sort sort, FieldValueHitQueue queue, int numHits,
                                HitsThresholdChecker hitsThresholdChecker,
                                MaxScoreAccumulator minScoreAcc) {
      super(queue, numHits, hitsThresholdChecker, sort.needsScores(), minScoreAcc);
      this.sort = sort;
      this.queue = queue;
    }

    @Override
    public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
      // reset the minimum competitive score
      minCompetitiveScore = 0f;
      docBase = context.docBase;

      // as all segments are sorted in the same way, enough to check only the 1st segment for indexSort
      if (searchSortPartOfIndexSort == null) {
        final Sort indexSort = context.reader().getMetaData().getSort();
        searchSortPartOfIndexSort = canEarlyTerminate(sort, indexSort);
        if (searchSortPartOfIndexSort) {
          firstComparator.disableSkipping();
        }
      }

      final LeafFieldComparator[] comparators = queue.getComparators(context);
      final int[] reverseMul = queue.getReverseMul();

      return new MultiComparatorLeafCollector(comparators, reverseMul) {

        boolean collectedAllCompetitiveHits = false;

        @Override
        public void setScorer(Scorable scorer) throws IOException {
          super.setScorer(scorer);
          if (minScoreAcc == null) {
            updateMinCompetitiveScore(scorer);
          } else {
            updateGlobalMinCompetitiveScore(scorer);
          }
        }

        @Override
        public void collect(int doc) throws IOException {
          ++totalHits;
          hitsThresholdChecker.incrementHitCount();

          if (minScoreAcc != null && (totalHits & minScoreAcc.modInterval) == 0) {
            updateGlobalMinCompetitiveScore(scorer);
          }
          if (scoreMode.isExhaustive() == false && totalHitsRelation == TotalHits.Relation.EQUAL_TO &&
              hitsThresholdChecker.isThresholdReached()) {
            // for the first time hitsThreshold is reached, notify comparator about this
            comparator.setHitsThresholdReached();
            totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
          }

          if (queueFull) {
            if (collectedAllCompetitiveHits || reverseMul * comparator.compareBottom(doc) <= 0) {
              // since docs are visited in doc Id order, if compare is 0, it means
              // this document is largest than anything else in the queue, and
              // therefore not competitive.
              if (searchSortPartOfIndexSort) {
                if (hitsThresholdChecker.isThresholdReached()) {
                  totalHitsRelation = Relation.GREATER_THAN_OR_EQUAL_TO;
                  throw new CollectionTerminatedException();
                } else {
                  collectedAllCompetitiveHits = true;
                }
              } else if (totalHitsRelation == Relation.EQUAL_TO) {
                // we can start setting the min competitive score if the
                // threshold is reached for the first time here.
                updateMinCompetitiveScore(scorer);
              }
              return;
            }

            // This hit is competitive - replace bottom element in queue & adjustTop
            comparator.copy(bottom.slot, doc);
            updateBottom(doc);
            comparator.setBottom(bottom.slot);
            updateMinCompetitiveScore(scorer);
          } else {
            // Startup transient: queue hasn't gathered numHits yet
            final int slot = totalHits - 1;

            // Copy hit into queue
            comparator.copy(slot, doc);
            add(slot, doc);
            if (queueFull) {
              comparator.setBottom(bottom.slot);
              updateMinCompetitiveScore(scorer);
            }
          }
        }

        @Override
        public DocIdSetIterator competitiveIterator() throws IOException {
          return comparator.competitiveIterator();
        }

      };
    }

  }

  /*
   * Implements a TopFieldCollector when after != null.
   */
  private final static class PagingFieldCollector extends TopFieldCollector {

    final Sort sort;
    int collectedHits;
    final FieldValueHitQueue queue;
    final FieldDoc after;

    public PagingFieldCollector(Sort sort, FieldValueHitQueue queue, FieldDoc after, int numHits,
                                HitsThresholdChecker hitsThresholdChecker, MaxScoreAccumulator minScoreAcc) {
      super(queue, numHits, hitsThresholdChecker, sort.needsScores(), minScoreAcc);
      this.sort = sort;
      this.queue = queue;
      this.after = after;

      FieldComparator[] comparators = queue.comparators;
      // Tell all comparators their top value:
      for(int i=0;i comparator = (FieldComparator