org.elasticsearch.lucene.grouping.SinglePassGroupingCollector Maven / Gradle / Ivy
/*
* @notice
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Modifications copyright (C) 2020 Elasticsearch B.V.
*/
package org.elasticsearch.lucene.grouping;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.grouping.GroupSelector;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.mapper.MappedFieldType;
import java.io.IOException;
import java.util.Comparator;
import java.util.Map;
import java.util.TreeSet;
import static org.apache.lucene.search.SortField.Type.SCORE;
/**
* A collector that groups documents based on field values and returns {@link TopFieldGroups}
* output. The grouping is done in a single pass by selecting only the top sorted document per grouping key.
* The value used for the key of each group can be found in {@link TopFieldGroups#groupValues}.
*
* This collector optionally supports searching after a previous result through the 'after' parameter.
*
* TODO: If the sort is based on score we should propagate the mininum competitive score when orderedGroups
* is full. This is safe for grouping since the group sort is the same as the query sort.
*/
public class SinglePassGroupingCollector extends SimpleCollector {
private static class SearchGroup extends ScoreDoc {
T groupValue;
int slot;
SearchGroup(int doc, int slot, T groupValue) {
super(doc, Float.NaN);
this.slot = slot;
this.groupValue = groupValue;
}
@Override
public String toString() {
return "slot:" + slot + " " + super.toString();
}
}
/**
* Creates a {@link SinglePassGroupingCollector} on a {@link NumericDocValues} field.
* It accepts also {@link SortedNumericDocValues} field but
* the collect will fail with an {@link IllegalStateException} if a document contains more than one value for the
* field.
*
* @param groupField The sort field used to group documents.
* @param groupFieldType The {@link MappedFieldType} for this sort field.
* @param groupSort The {@link Sort} used to sort the groups.
* The grouping keeps only the top sorted document per grouping key.
* This must be non-null, ie, if you want to groupSort by relevance
* use Sort.RELEVANCE.
* @param topN How many top groups to keep.
* @param after The field values to search after. Can be null.
*/
public static SinglePassGroupingCollector> createNumeric(
String groupField,
MappedFieldType groupFieldType,
Sort groupSort,
int topN,
@Nullable FieldDoc after
) {
return new SinglePassGroupingCollector<>(new GroupingDocValuesSelector.Numeric(groupFieldType), groupField, groupSort, topN, after);
}
/**
* Creates a {@link SinglePassGroupingCollector} on a {@link SortedDocValues} field.
* It accepts also {@link SortedSetDocValues} field but the collect will fail with
* an {@link IllegalStateException} if a document contains more than one value for the field.
*
* @param groupField The sort field used to group documents.
* @param groupFieldType The {@link MappedFieldType} for this sort field.
* @param groupSort The {@link Sort} used to sort the groups. The grouping keeps only the top sorted
* document per grouping key.
* This must be non-null, ie, if you want to groupSort by relevance use Sort.RELEVANCE.
* @param topN How many top groups to keep.
* @param after The field values to search after. Can be null.
*/
public static SinglePassGroupingCollector> createKeyword(
String groupField,
MappedFieldType groupFieldType,
Sort groupSort,
int topN,
@Nullable FieldDoc after
) {
return new SinglePassGroupingCollector<>(new GroupingDocValuesSelector.Keyword(groupFieldType), groupField, groupSort, topN, after);
}
private final String groupField;
private final FieldDoc after;
private final Sort groupSort;
private final GroupSelector groupSelector;
private final FieldComparator>[] comparators;
private final LeafFieldComparator[] leafComparators;
private final int[] reversed;
private final int topNGroups;
private final boolean needsScores;
private final Map> groupMap;
private final int compIDXEnd;
private int totalHitCount;
// Set once we reach topNGroups unique groups:
private TreeSet> orderedGroups;
private int docBase;
private int spareSlot;
private SinglePassGroupingCollector(
GroupSelector groupSelector,
String groupField,
Sort groupSort,
int topNGroups,
@Nullable FieldDoc after
) {
assert after == null || (groupSort.getSort().length == 1 && after.doc == Integer.MAX_VALUE);
this.groupSelector = groupSelector;
this.groupField = groupField;
this.groupSort = groupSort;
this.after = after;
if (topNGroups < 1) {
throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
}
this.topNGroups = topNGroups;
this.needsScores = groupSort.needsScores();
final SortField[] sortFields = groupSort.getSort();
comparators = new FieldComparator>[sortFields.length];
leafComparators = new LeafFieldComparator[sortFields.length];
compIDXEnd = comparators.length - 1;
reversed = new int[sortFields.length];
for (int i = 0; i < sortFields.length; i++) {
final SortField sortField = sortFields[i];
// use topNGroups + 1 so we have a spare slot to use for comparing (tracked by this.spareSlot):
comparators[i] = sortField.getComparator(topNGroups + 1, Pruning.NONE);
reversed[i] = sortField.getReverse() ? -1 : 1;
}
if (after != null) {
@SuppressWarnings("unchecked")
FieldComparator
© 2015 - 2025 Weber Informatics LLC | Privacy Policy