org.apache.lucene.search.grouping.CollapsingTopDocsCollector Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.search.grouping;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.util.BytesRef;
import java.io.IOException;
import java.util.Collection;
import java.util.Iterator;
import static org.apache.lucene.search.SortField.Type.SCORE;
/**
* A collector that groups documents based on field values and returns {@link CollapseTopFieldDocs}
* output. The collapsing is done in a single pass by selecting only the top sorted document per collapse key.
* The value used for the collapse key of each group can be found in {@link CollapseTopFieldDocs#collapseValues}.
*/
public abstract class CollapsingTopDocsCollector extends FirstPassGroupingCollector {
protected final String collapseField;
protected final Sort sort;
protected Scorer scorer;
private int totalHitCount;
private float maxScore;
private final boolean trackMaxScore;
private CollapsingTopDocsCollector(String collapseField, Sort sort,
int topN, boolean trackMaxScore) throws IOException {
super(sort, topN);
this.collapseField = collapseField;
this.trackMaxScore = trackMaxScore;
if (trackMaxScore) {
maxScore = Float.NEGATIVE_INFINITY;
} else {
maxScore = Float.NaN;
}
this.sort = sort;
}
/**
* Transform {@link FirstPassGroupingCollector#getTopGroups(int, boolean)} output in
* {@link CollapseTopFieldDocs}. The collapsing needs only one pass so we can create the final top docs at the end
* of the first pass.
*/
public CollapseTopFieldDocs getTopDocs() {
Collection> groups = super.getTopGroups(0, true);
if (groups == null) {
return new CollapseTopFieldDocs(collapseField, totalHitCount, new ScoreDoc[0],
sort.getSort(), new Object[0], Float.NaN);
}
FieldDoc[] docs = new FieldDoc[groups.size()];
Object[] collapseValues = new Object[groups.size()];
int scorePos = -1;
for (int index = 0; index < sort.getSort().length; index++) {
SortField sortField = sort.getSort()[index];
if (sortField.getType() == SCORE) {
scorePos = index;
break;
}
}
int pos = 0;
Iterator> it = orderedGroups.iterator();
for (SearchGroup group : groups) {
assert it.hasNext();
CollectedSearchGroup col = it.next();
float score = Float.NaN;
if (scorePos != -1) {
score = (float) group.sortValues[scorePos];
}
docs[pos] = new FieldDoc(col.topDoc, score, group.sortValues);
collapseValues[pos] = group.groupValue;
pos++;
}
return new CollapseTopFieldDocs(collapseField, totalHitCount, docs, sort.getSort(),
collapseValues, maxScore);
}
@Override
public boolean needsScores() {
if (super.needsScores() == false) {
return trackMaxScore;
}
return true;
}
@Override
public void setScorer(Scorer scorer) throws IOException {
super.setScorer(scorer);
this.scorer = scorer;
}
@Override
public void collect(int doc) throws IOException {
super.collect(doc);
if (trackMaxScore) {
maxScore = Math.max(maxScore, scorer.score());
}
totalHitCount++;
}
private static class Numeric extends CollapsingTopDocsCollector {
private final CollapsingDocValuesSource.Numeric source;
private Numeric(String collapseField, Sort sort, int topN, boolean trackMaxScore) throws IOException {
super(collapseField, sort, topN, trackMaxScore);
source = new CollapsingDocValuesSource.Numeric(collapseField);
}
@Override
protected void doSetNextReader(LeafReaderContext readerContext) throws IOException {
super.doSetNextReader(readerContext);
source.setNextReader(readerContext.reader());
}
@Override
protected Long getDocGroupValue(int doc) {
return source.get(doc);
}
@Override
protected Long copyDocGroupValue(Long groupValue, Long reuse) {
return source.copy(groupValue, reuse);
}
}
private static class Keyword extends CollapsingTopDocsCollector {
private final CollapsingDocValuesSource.Keyword source;
private Keyword(String collapseField, Sort sort, int topN, boolean trackMaxScore) throws IOException {
super(collapseField, sort, topN, trackMaxScore);
source = new CollapsingDocValuesSource.Keyword(collapseField);
}
@Override
protected void doSetNextReader(LeafReaderContext readerContext) throws IOException {
super.doSetNextReader(readerContext);
source.setNextReader(readerContext.reader());
}
@Override
protected BytesRef getDocGroupValue(int doc) {
return source.get(doc);
}
@Override
protected BytesRef copyDocGroupValue(BytesRef groupValue, BytesRef reuse) {
return source.copy(groupValue, reuse);
}
}
/**
* Create a collapsing top docs collector on a {@link org.apache.lucene.index.NumericDocValues} field.
* It accepts also {@link org.apache.lucene.index.SortedNumericDocValues} field but
* the collect will fail with an {@link IllegalStateException} if a document contains more than one value for the
* field.
*
* @param collapseField The sort field used to group
* documents.
* @param sort The {@link Sort} used to sort the collapsed hits.
* The collapsing keeps only the top sorted document per collapsed key.
* This must be non-null, ie, if you want to groupSort by relevance
* use Sort.RELEVANCE.
* @param topN How many top groups to keep.
* @throws IOException When I/O related errors occur
*/
public static CollapsingTopDocsCollector> createNumeric(String collapseField, Sort sort,
int topN, boolean trackMaxScore) throws IOException {
return new Numeric(collapseField, sort, topN, trackMaxScore);
}
/**
* Create a collapsing top docs collector on a {@link org.apache.lucene.index.SortedDocValues} field.
* It accepts also {@link org.apache.lucene.index.SortedSetDocValues} field but
* the collect will fail with an {@link IllegalStateException} if a document contains more than one value for the
* field.
*
* @param collapseField The sort field used to group
* documents.
* @param sort The {@link Sort} used to sort the collapsed hits. The collapsing keeps only the top sorted
* document per collapsed key.
* This must be non-null, ie, if you want to groupSort by relevance use Sort.RELEVANCE.
* @param topN How many top groups to keep.
* @throws IOException When I/O related errors occur
*/
public static CollapsingTopDocsCollector> createKeyword(String collapseField, Sort sort,
int topN, boolean trackMaxScore) throws IOException {
return new Keyword(collapseField, sort, topN, trackMaxScore);
}
}