All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.grouping.TopGroupsCollector Maven / Gradle / Ivy

There is a newer version: 9.11.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.lucene.search.grouping;

import java.io.IOException;
import java.util.Collection;
import java.util.Objects;
import java.util.function.Supplier;

import org.apache.lucene.search.FilterCollector;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.util.ArrayUtil;

/**
 * A second-pass collector that collects the TopDocs for each group, and
 * returns them as a {@link TopGroups} object
 *
 * @param  the type of the group value
 */
public class TopGroupsCollector extends SecondPassGroupingCollector {

  private final Sort groupSort;
  private final Sort withinGroupSort;
  private final int maxDocsPerGroup;

  /**
   * Create a new TopGroupsCollector
   * @param groupSelector     the group selector used to define groups
   * @param groups            the groups to collect TopDocs for
   * @param groupSort         the order in which groups are returned
   * @param withinGroupSort   the order in which documents are sorted in each group
   * @param maxDocsPerGroup   the maximum number of docs to collect for each group
   * @param getMaxScores      if true, record the maximum score for each group
   */
  public TopGroupsCollector(GroupSelector groupSelector, Collection> groups, Sort groupSort, Sort withinGroupSort,
                            int maxDocsPerGroup, boolean getMaxScores) {
    super(groupSelector, groups,
        new TopDocsReducer<>(withinGroupSort, maxDocsPerGroup, getMaxScores));
    this.groupSort = Objects.requireNonNull(groupSort);
    this.withinGroupSort = Objects.requireNonNull(withinGroupSort);
    this.maxDocsPerGroup = maxDocsPerGroup;

  }

  private static class MaxScoreCollector extends SimpleCollector {
    private Scorable scorer;
    private float maxScore = Float.MIN_VALUE;
    private boolean collectedAnyHits = false;

    public MaxScoreCollector() {}

    public float getMaxScore() {
      return collectedAnyHits ? maxScore : Float.NaN;
    }

    @Override
    public ScoreMode scoreMode() {
      return ScoreMode.COMPLETE;
    }

    @Override
    public void setScorer(Scorable scorer) {
      this.scorer = scorer;
    }

    @Override
    public void collect(int doc) throws IOException {
      collectedAnyHits = true;
      maxScore = Math.max(scorer.score(), maxScore);
    }
  }

  private static class TopDocsAndMaxScoreCollector extends FilterCollector {
    private final TopDocsCollector topDocsCollector;
    private final MaxScoreCollector maxScoreCollector;
    private final boolean sortedByScore;
    
    public TopDocsAndMaxScoreCollector(boolean sortedByScore, TopDocsCollector topDocsCollector, MaxScoreCollector maxScoreCollector) {
      super(MultiCollector.wrap(topDocsCollector, maxScoreCollector));
      this.sortedByScore = sortedByScore;
      this.topDocsCollector = topDocsCollector;
      this.maxScoreCollector = maxScoreCollector;
    }
  }

  private static class TopDocsReducer extends GroupReducer {

    private final Supplier supplier;
    private final boolean needsScores;

    TopDocsReducer(Sort withinGroupSort,
                   int maxDocsPerGroup, boolean getMaxScores) {
      this.needsScores = getMaxScores || withinGroupSort.needsScores();
      if (withinGroupSort == Sort.RELEVANCE) {
        supplier = () -> new TopDocsAndMaxScoreCollector(true, TopScoreDocCollector.create(maxDocsPerGroup, Integer.MAX_VALUE), null);
      } else {
        supplier = () -> {
          TopFieldCollector topDocsCollector = TopFieldCollector.create(withinGroupSort, maxDocsPerGroup, Integer.MAX_VALUE); // TODO: disable exact counts?
          MaxScoreCollector maxScoreCollector = getMaxScores ? new MaxScoreCollector() : null;
          return new TopDocsAndMaxScoreCollector(false, topDocsCollector, maxScoreCollector);
        };
      }
    }

    @Override
    public boolean needsScores() {
      return needsScores;
    }

    @Override
    protected TopDocsAndMaxScoreCollector newCollector() {
      return supplier.get();
    }
  }

  /**
   * Get the TopGroups recorded by this collector
   * @param withinGroupOffset the offset within each group to start collecting documents
   */
  public TopGroups getTopGroups(int withinGroupOffset) {
    @SuppressWarnings({"unchecked","rawtypes"})
    final GroupDocs[] groupDocsResult = (GroupDocs[]) new GroupDocs[groups.size()];

    int groupIDX = 0;
    float maxScore = Float.MIN_VALUE;
    for(SearchGroup group : groups) {
      TopDocsAndMaxScoreCollector collector = (TopDocsAndMaxScoreCollector) groupReducer.getCollector(group.groupValue);
      final TopDocs topDocs;
      final float groupMaxScore;
      if (collector.sortedByScore) {
        TopDocs allTopDocs = collector.topDocsCollector.topDocs();
        groupMaxScore = allTopDocs.scoreDocs.length == 0 ? Float.NaN : allTopDocs.scoreDocs[0].score;
        if (allTopDocs.scoreDocs.length <= withinGroupOffset) {
          topDocs = new TopDocs(allTopDocs.totalHits, new ScoreDoc[0]);
        } else {
          topDocs = new TopDocs(allTopDocs.totalHits, ArrayUtil.copyOfSubArray(allTopDocs.scoreDocs, withinGroupOffset, Math.min(allTopDocs.scoreDocs.length, withinGroupOffset + maxDocsPerGroup)));
        }
      } else {
        topDocs = collector.topDocsCollector.topDocs(withinGroupOffset, maxDocsPerGroup);
        if (collector.maxScoreCollector == null) {
          groupMaxScore = Float.NaN;
        } else {
          groupMaxScore = collector.maxScoreCollector.getMaxScore();
        }
      }
      
      groupDocsResult[groupIDX++] = new GroupDocs<>(Float.NaN,
          groupMaxScore,
          topDocs.totalHits,
          topDocs.scoreDocs,
          group.groupValue,
          group.sortValues);
      maxScore = Math.max(maxScore, groupMaxScore);
    }

    return new TopGroups<>(groupSort.getSort(),
        withinGroupSort.getSort(),
        totalHitCount, totalGroupedHitCount, groupDocsResult,
        maxScore);
  }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy