org.elasticsearch.lucene.grouping.SinglePassGroupingCollector Maven / Gradle / Ivy

Go to download
/*
 * @notice
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Modifications copyright (C) 2020 Elasticsearch B.V.
 */
package org.elasticsearch.lucene.grouping;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.grouping.GroupSelector;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.core.Nullable;
import org.elasticsearch.index.mapper.MappedFieldType;

import java.io.IOException;
import java.util.Comparator;
import java.util.Map;
import java.util.TreeSet;

import static org.apache.lucene.search.SortField.Type.SCORE;

/**
 * A collector that groups documents based on field values and returns {@link TopFieldGroups}
 * output. The grouping is done in a single pass by selecting only the top sorted document per grouping key.
 * The value used for the key of each group can be found in {@link TopFieldGroups#groupValues}.
 *
 * This collector optionally supports searching after a previous result through the 'after' parameter.
 *
 * TODO: If the sort is based on score we should propagate the mininum competitive score when orderedGroups
 *       is full. This is safe for grouping since the group sort is the same as the query sort.
 */
public class SinglePassGroupingCollector extends SimpleCollector {

    private static class SearchGroup extends ScoreDoc {
        T groupValue;
        int slot;

        SearchGroup(int doc, int slot, T groupValue) {
            super(doc, Float.NaN);
            this.slot = slot;
            this.groupValue = groupValue;
        }

        @Override
        public String toString() {
            return "slot:" + slot + " " + super.toString();
        }
    }

    /**
     * Creates a {@link SinglePassGroupingCollector} on a {@link NumericDocValues} field.
     * It accepts also {@link SortedNumericDocValues} field but
     * the collect will fail with an {@link IllegalStateException} if a document contains more than one value for the
     * field.
     *
     * @param groupField        The sort field used to group documents.
     * @param groupFieldType    The {@link MappedFieldType} for this sort field.
     * @param groupSort         The {@link Sort} used to sort the groups.
     *                          The grouping keeps only the top sorted document per grouping key.
     *                          This must be non-null, ie, if you want to groupSort by relevance
     *                          use Sort.RELEVANCE.
     * @param topN              How many top groups to keep.
     * @param after             The field values to search after. Can be null.
     */
    public static SinglePassGroupingCollector createNumeric(
        String groupField,
        MappedFieldType groupFieldType,
        Sort groupSort,
        int topN,
        @Nullable FieldDoc after
    ) {
        return new SinglePassGroupingCollector<>(new GroupingDocValuesSelector.Numeric(groupFieldType), groupField, groupSort, topN, after);
    }

    /**
     * Creates a {@link SinglePassGroupingCollector} on a {@link SortedDocValues} field.
     * It accepts also {@link SortedSetDocValues} field but the collect will fail with
     * an {@link IllegalStateException} if a document contains more than one value for the field.
     *
     * @param groupField        The sort field used to group documents.
     * @param groupFieldType    The {@link MappedFieldType} for this sort field.
     * @param groupSort         The {@link Sort} used to sort the groups. The grouping keeps only the top sorted
     *                          document per grouping key.
     *                          This must be non-null, ie, if you want to groupSort by relevance use Sort.RELEVANCE.
     * @param topN              How many top groups to keep.
     * @param after             The field values to search after. Can be null.
     */
    public static SinglePassGroupingCollector createKeyword(
        String groupField,
        MappedFieldType groupFieldType,
        Sort groupSort,
        int topN,
        @Nullable FieldDoc after
    ) {
        return new SinglePassGroupingCollector<>(new GroupingDocValuesSelector.Keyword(groupFieldType), groupField, groupSort, topN, after);
    }

    private final String groupField;
    private final FieldDoc after;
    private final Sort groupSort;
    private final GroupSelector groupSelector;
    private final FieldComparator[] comparators;
    private final LeafFieldComparator[] leafComparators;
    private final int[] reversed;
    private final int topNGroups;
    private final boolean needsScores;
    private final Map> groupMap;
    private final int compIDXEnd;

    private int totalHitCount;

    // Set once we reach topNGroups unique groups:
    private TreeSet> orderedGroups;

    private int docBase;
    private int spareSlot;

    private SinglePassGroupingCollector(
        GroupSelector groupSelector,
        String groupField,
        Sort groupSort,
        int topNGroups,
        @Nullable FieldDoc after
    ) {
        assert after == null || (groupSort.getSort().length == 1 && after.doc == Integer.MAX_VALUE);
        this.groupSelector = groupSelector;
        this.groupField = groupField;
        this.groupSort = groupSort;
        this.after = after;

        if (topNGroups < 1) {
            throw new IllegalArgumentException("topNGroups must be >= 1 (got " + topNGroups + ")");
        }

        this.topNGroups = topNGroups;
        this.needsScores = groupSort.needsScores();
        final SortField[] sortFields = groupSort.getSort();
        comparators = new FieldComparator[sortFields.length];
        leafComparators = new LeafFieldComparator[sortFields.length];
        compIDXEnd = comparators.length - 1;
        reversed = new int[sortFields.length];
        for (int i = 0; i < sortFields.length; i++) {
            final SortField sortField = sortFields[i];
            // use topNGroups + 1 so we have a spare slot to use for comparing (tracked by this.spareSlot):
            comparators[i] = sortField.getComparator(topNGroups + 1, Pruning.NONE);
            reversed[i] = sortField.getReverse() ? -1 : 1;
        }
        if (after != null) {
            @SuppressWarnings("unchecked")
            FieldComparator