All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.elasticsearch.index.search.child.TopChildrenQuery Maven / Gradle / Ivy
Go to download
ElasticSearch - Open Source, Distributed, RESTful Search Engine
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.search.child;
import gnu.trove.map.hash.TIntObjectHashMap;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.bytes.HashedBytesArray;
import org.elasticsearch.common.lucene.search.EmptyScorer;
import org.elasticsearch.common.trove.ExtTHashMap;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;
import java.util.Set;
/**
* A query that evaluates the top matching child documents (based on the score) in order to determine what
* parent documents to return. This query tries to find just enough child documents to return the the requested
* number of parent documents (or less if no other child document can be found).
*
* This query executes several internal searches. In the first round it tries to find ((request offset + requested size) * factor)
* child documents. The resulting child documents are mapped into their parent documents including the aggragted child scores.
* If not enough parent documents could be resolved then a subsequent round is executed, requesting previous requested
* documents times incremental_factor. This logic repeats until enough parent documents are resolved or until no more
* child documents are available.
*
* This query is most of the times faster than the {@link ChildrenQuery}. Usually enough parent documents can be returned
* in the first child document query round.
*/
public class TopChildrenQuery extends Query implements SearchContext.Rewrite {
private final SearchContext searchContext;
private final String parentType;
private final String childType;
private final ScoreType scoreType;
private final int factor;
private final int incrementalFactor;
private final Query originalChildQuery;
private Query rewrittenChildQuery;
private ExtTHashMap parentDocs;
// Note, the query is expected to already be filtered to only child type docs
public TopChildrenQuery(SearchContext searchContext, Query childQuery, String childType, String parentType, ScoreType scoreType, int factor, int incrementalFactor) {
this.searchContext = searchContext;
this.originalChildQuery = childQuery;
this.childType = childType;
this.parentType = parentType;
this.scoreType = scoreType;
this.factor = factor;
this.incrementalFactor = incrementalFactor;
}
private TopChildrenQuery(TopChildrenQuery existing, Query rewrittenChildQuery) {
this.searchContext = existing.searchContext;
this.originalChildQuery = existing.originalChildQuery;
this.parentType = existing.parentType;
this.childType = existing.childType;
this.scoreType = existing.scoreType;
this.factor = existing.factor;
this.incrementalFactor = existing.incrementalFactor;
this.parentDocs = existing.parentDocs;
this.rewrittenChildQuery = rewrittenChildQuery;
}
// Rewrite logic:
// 1) query_then_fetch (default): First contextRewrite and then rewrite is executed
// 2) dfs_query_then_fetch:: First rewrite and then contextRewrite is executed. During query phase rewrite isn't
// executed any more because searchContext#queryRewritten() returns true.
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query rewritten;
if (rewrittenChildQuery == null) {
rewritten = originalChildQuery.rewrite(reader);
} else {
rewritten = rewrittenChildQuery;
}
if (rewritten == rewrittenChildQuery) {
return this;
}
// We need to update the rewritten query also in the SearchContext#rewrites b/c we can run into this situation:
// 1) During parsing we set SearchContext#rewrites with queries that implement Rewrite.
// 2) Then during the dfs phase, the main query (which included this query and its child query) gets rewritten
// and updated in SearchContext. So different TopChildrenQuery instances are in SearchContext#rewrites and in the main query.
// 3) Then during the query phase first the queries that impl. Rewrite are executed, which will update their own data
// parentDocs Map. Then when the main query is executed, 0 results are found, b/c the main query holds a different
// TopChildrenQuery instance then in SearchContext#rewrites
int index = searchContext.rewrites().indexOf(this);
TopChildrenQuery rewrite = new TopChildrenQuery(this, rewritten);
searchContext.rewrites().set(index, rewrite);
return rewrite;
}
@Override
public void contextRewrite(SearchContext searchContext) throws Exception {
this.parentDocs = CacheRecycler.popHashMap();
searchContext.idCache().refresh(searchContext.searcher().getTopReaderContext().leaves());
int parentHitsResolved;
int numChildDocs = (searchContext.from() + searchContext.size());
if (numChildDocs == 0) {
numChildDocs = 1;
}
numChildDocs *= factor;
Query childQuery;
if (rewrittenChildQuery == null) {
childQuery = rewrittenChildQuery = searchContext.searcher().rewrite(originalChildQuery);
} else {
childQuery = rewrittenChildQuery;
}
while (true) {
parentDocs.clear();
TopDocs topChildDocs = searchContext.searcher().search(childQuery, numChildDocs);
parentHitsResolved = resolveParentDocuments(topChildDocs, searchContext);
// check if we found enough docs, if so, break
if (parentHitsResolved >= (searchContext.from() + searchContext.size())) {
break;
}
// if we did not find enough docs, check if it make sense to search further
if (topChildDocs.totalHits <= numChildDocs) {
break;
}
// if not, update numDocs, and search again
numChildDocs *= incrementalFactor;
if (numChildDocs > topChildDocs.totalHits) {
numChildDocs = topChildDocs.totalHits;
}
}
}
@Override
public void contextClear() {
if (parentDocs != null) {
CacheRecycler.pushHashMap(parentDocs);
parentDocs = null;
}
}
int resolveParentDocuments(TopDocs topDocs, SearchContext context) {
int parentHitsResolved = 0;
ExtTHashMap> parentDocsPerReader = CacheRecycler.popHashMap();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
int readerIndex = ReaderUtil.subIndex(scoreDoc.doc, context.searcher().getIndexReader().leaves());
AtomicReaderContext subContext = context.searcher().getIndexReader().leaves().get(readerIndex);
int subDoc = scoreDoc.doc - subContext.docBase;
// find the parent id
HashedBytesArray parentId = context.idCache().reader(subContext.reader()).parentIdByDoc(parentType, subDoc);
if (parentId == null) {
// no parent found
continue;
}
// now go over and find the parent doc Id and reader tuple
for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) {
AtomicReader indexReader = atomicReaderContext.reader();
int parentDocId = context.idCache().reader(indexReader).docById(parentType, parentId);
Bits liveDocs = indexReader.getLiveDocs();
if (parentDocId != -1 && (liveDocs == null || liveDocs.get(parentDocId))) {
// we found a match, add it and break
TIntObjectHashMap readerParentDocs = parentDocsPerReader.get(indexReader.getCoreCacheKey());
if (readerParentDocs == null) {
readerParentDocs = CacheRecycler.popIntObjectMap();
parentDocsPerReader.put(indexReader.getCoreCacheKey(), readerParentDocs);
}
ParentDoc parentDoc = readerParentDocs.get(parentDocId);
if (parentDoc == null) {
parentHitsResolved++; // we have a hit on a parent
parentDoc = new ParentDoc();
parentDoc.docId = parentDocId;
parentDoc.count = 1;
parentDoc.maxScore = scoreDoc.score;
parentDoc.sumScores = scoreDoc.score;
readerParentDocs.put(parentDocId, parentDoc);
} else {
parentDoc.count++;
parentDoc.sumScores += scoreDoc.score;
if (scoreDoc.score > parentDoc.maxScore) {
parentDoc.maxScore = scoreDoc.score;
}
}
}
}
}
for (Map.Entry> entry : parentDocsPerReader.entrySet()) {
ParentDoc[] values = entry.getValue().values(new ParentDoc[entry.getValue().size()]);
Arrays.sort(values, PARENT_DOC_COMP);
parentDocs.put(entry.getKey(), values);
CacheRecycler.pushIntObjectMap(entry.getValue());
}
CacheRecycler.pushHashMap(parentDocsPerReader);
return parentHitsResolved;
}
private static final ParentDocComparator PARENT_DOC_COMP = new ParentDocComparator();
static class ParentDocComparator implements Comparator {
@Override
public int compare(ParentDoc o1, ParentDoc o2) {
return o1.docId - o2.docId;
}
}
static class ParentDoc {
public int docId;
public int count;
public float maxScore = Float.NaN;
public float sumScores = 0;
}
@Override
public void extractTerms(Set terms) {
rewrittenChildQuery.extractTerms(terms);
}
@Override
public Weight createWeight(IndexSearcher searcher) throws IOException {
if (parentDocs == null) {
throw new ElasticSearchIllegalStateException("top_children query hasn't executed properly");
}
return new ParentWeight(searcher, rewrittenChildQuery.createWeight(searcher));
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null || obj.getClass() != this.getClass()) {
return false;
}
HasChildFilter that = (HasChildFilter) obj;
if (!originalChildQuery.equals(that.childQuery)) {
return false;
}
if (!childType.equals(that.childType)) {
return false;
}
if (!parentType.equals(that.parentType)) {
return false;
}
return true;
}
@Override
public int hashCode() {
int result = originalChildQuery.hashCode();
result = 31 * result + parentType.hashCode();
result = 31 * result + childType.hashCode();
return result;
}
public String toString(String field) {
StringBuilder sb = new StringBuilder();
sb.append("score_child[").append(childType).append("/").append(parentType).append("](").append(originalChildQuery.toString(field)).append(')');
sb.append(ToStringUtils.boost(getBoost()));
return sb.toString();
}
class ParentWeight extends Weight {
final IndexSearcher searcher;
final Weight queryWeight;
public ParentWeight(IndexSearcher searcher, Weight queryWeight) throws IOException {
this.searcher = searcher;
this.queryWeight = queryWeight;
}
public Query getQuery() {
return TopChildrenQuery.this;
}
@Override
public float getValueForNormalization() throws IOException {
float sum = queryWeight.getValueForNormalization();
sum *= getBoost() * getBoost();
return sum;
}
@Override
public void normalize(float norm, float topLevelBoost) {
// Nothing to normalize
}
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
ParentDoc[] readerParentDocs = parentDocs.get(context.reader().getCoreCacheKey());
if (readerParentDocs != null) {
if (scoreType == ScoreType.MAX) {
return new ParentScorer(this, readerParentDocs) {
@Override
public float score() throws IOException {
assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS;
return doc.maxScore;
}
};
} else if (scoreType == ScoreType.AVG) {
return new ParentScorer(this, readerParentDocs) {
@Override
public float score() throws IOException {
assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS;
return doc.sumScores / doc.count;
}
};
} else if (scoreType == ScoreType.SUM) {
return new ParentScorer(this, readerParentDocs) {
@Override
public float score() throws IOException {
assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS;
return doc.sumScores;
}
};
}
throw new ElasticSearchIllegalStateException("No support for score type [" + scoreType + "]");
}
return new EmptyScorer(this);
}
@Override
public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
return new Explanation(getBoost(), "not implemented yet...");
}
}
static abstract class ParentScorer extends Scorer {
private final ParentDoc spare = new ParentDoc();
protected final ParentDoc[] docs;
protected ParentDoc doc = spare;
private int index = -1;
ParentScorer(ParentWeight weight, ParentDoc[] docs) throws IOException {
super(weight);
this.docs = docs;
spare.docId = -1;
spare.count = -1;
}
@Override
public final int docID() {
return doc.docId;
}
@Override
public final int advance(int target) throws IOException {
int doc;
while ((doc = nextDoc()) < target) {
}
return doc;
}
@Override
public final int nextDoc() throws IOException {
if (++index >= docs.length) {
doc = spare;
doc.count = 0;
return (doc.docId = NO_MORE_DOCS);
}
return (doc = docs[index]).docId;
}
@Override
public final int freq() throws IOException {
return doc.count; // The number of matches in the child doc, which is propagated to parent
}
@Override
public final long cost() {
return docs.length;
}
}
}