org.elasticsearch.index.search.child.TopChildrenQuery Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of elasticsearch
ElasticSearch - Open Source, Distributed, RESTful Search Engine
There is a newer version: 8.14.1
/*
 * Licensed to Elastic Search and Shay Banon under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. Elastic Search licenses this
 * file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.index.search.child;

import gnu.trove.map.hash.TIntObjectHashMap;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import org.elasticsearch.ElasticSearchIllegalStateException;
import org.elasticsearch.common.CacheRecycler;
import org.elasticsearch.common.bytes.HashedBytesArray;
import org.elasticsearch.common.lucene.search.EmptyScorer;
import org.elasticsearch.common.trove.ExtTHashMap;
import org.elasticsearch.search.internal.SearchContext;

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Map;
import java.util.Set;

/**
 * A query that evaluates the top matching child documents (based on the score) in order to determine what
 * parent documents to return. This query tries to find just enough child documents to return the the requested
 * number of parent documents (or less if no other child document can be found).
 * 
 * This query executes several internal searches. In the first round it tries to find ((request offset + requested size) * factor)
 * child documents. The resulting child documents are mapped into their parent documents including the aggragted child scores.
 * If not enough parent documents could be resolved then a subsequent round is executed, requesting previous requested
 * documents times incremental_factor. This logic repeats until enough parent documents are resolved or until no more
 * child documents are available.
 * 
 * This query is most of the times faster than the {@link ChildrenQuery}. Usually enough parent documents can be returned
 * in the first child document query round.
 */
public class TopChildrenQuery extends Query implements SearchContext.Rewrite {

    private final SearchContext searchContext;
    private final String parentType;
    private final String childType;
    private final ScoreType scoreType;
    private final int factor;
    private final int incrementalFactor;
    private final Query originalChildQuery;

    private Query rewrittenChildQuery;
    private ExtTHashMap parentDocs;

    // Note, the query is expected to already be filtered to only child type docs
    public TopChildrenQuery(SearchContext searchContext, Query childQuery, String childType, String parentType, ScoreType scoreType, int factor, int incrementalFactor) {
        this.searchContext = searchContext;
        this.originalChildQuery = childQuery;
        this.childType = childType;
        this.parentType = parentType;
        this.scoreType = scoreType;
        this.factor = factor;
        this.incrementalFactor = incrementalFactor;
    }

    private TopChildrenQuery(TopChildrenQuery existing, Query rewrittenChildQuery) {
        this.searchContext = existing.searchContext;
        this.originalChildQuery = existing.originalChildQuery;
        this.parentType = existing.parentType;
        this.childType = existing.childType;
        this.scoreType = existing.scoreType;
        this.factor = existing.factor;
        this.incrementalFactor = existing.incrementalFactor;
        this.parentDocs = existing.parentDocs;
        this.rewrittenChildQuery = rewrittenChildQuery;
    }


    // Rewrite logic:
    // 1) query_then_fetch (default): First contextRewrite and then rewrite is executed
    // 2) dfs_query_then_fetch:: First rewrite and then contextRewrite is executed. During query phase rewrite isn't
    // executed any more because searchContext#queryRewritten() returns true.

    @Override
    public Query rewrite(IndexReader reader) throws IOException {
        Query rewritten;
        if (rewrittenChildQuery == null) {
            rewritten = originalChildQuery.rewrite(reader);
        } else {
            rewritten = rewrittenChildQuery;
        }
        if (rewritten == rewrittenChildQuery) {
            return this;
        }
        // We need to update the rewritten query also in the SearchContext#rewrites b/c we can run into this situation:
        // 1) During parsing we set SearchContext#rewrites with queries that implement Rewrite.
        // 2) Then during the dfs phase, the main query (which included this query and its child query) gets rewritten
        // and updated in SearchContext. So different TopChildrenQuery instances are in SearchContext#rewrites and in the main query.
        // 3) Then during the query phase first the queries that impl. Rewrite are executed, which will update their own data
        // parentDocs Map. Then when the main query is executed, 0 results are found, b/c the main query holds a different
        // TopChildrenQuery instance then in SearchContext#rewrites
        int index = searchContext.rewrites().indexOf(this);
        TopChildrenQuery rewrite = new TopChildrenQuery(this, rewritten);
        searchContext.rewrites().set(index, rewrite);
        return rewrite;
    }

    @Override
    public void contextRewrite(SearchContext searchContext) throws Exception {
        this.parentDocs = CacheRecycler.popHashMap();
        searchContext.idCache().refresh(searchContext.searcher().getTopReaderContext().leaves());

        int parentHitsResolved;
        int numChildDocs = (searchContext.from() + searchContext.size());
        if (numChildDocs == 0) {
            numChildDocs = 1;
        }
        numChildDocs *= factor;

        Query childQuery;
        if (rewrittenChildQuery == null) {
            childQuery = rewrittenChildQuery = searchContext.searcher().rewrite(originalChildQuery);
        } else {
            childQuery = rewrittenChildQuery;
        }
        while (true) {
            parentDocs.clear();
            TopDocs topChildDocs = searchContext.searcher().search(childQuery, numChildDocs);
            parentHitsResolved = resolveParentDocuments(topChildDocs, searchContext);

            // check if we found enough docs, if so, break
            if (parentHitsResolved >= (searchContext.from() + searchContext.size())) {
                break;
            }
            // if we did not find enough docs, check if it make sense to search further
            if (topChildDocs.totalHits <= numChildDocs) {
                break;
            }
            // if not, update numDocs, and search again
            numChildDocs *= incrementalFactor;
            if (numChildDocs > topChildDocs.totalHits) {
                numChildDocs = topChildDocs.totalHits;
            }
        }
    }

    @Override
    public void contextClear() {
        if (parentDocs != null) {
            CacheRecycler.pushHashMap(parentDocs);
            parentDocs = null;
        }
    }

    int resolveParentDocuments(TopDocs topDocs, SearchContext context) {
        int parentHitsResolved = 0;
        ExtTHashMap> parentDocsPerReader = CacheRecycler.popHashMap();
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            int readerIndex = ReaderUtil.subIndex(scoreDoc.doc, context.searcher().getIndexReader().leaves());
            AtomicReaderContext subContext = context.searcher().getIndexReader().leaves().get(readerIndex);
            int subDoc = scoreDoc.doc - subContext.docBase;

            // find the parent id
            HashedBytesArray parentId = context.idCache().reader(subContext.reader()).parentIdByDoc(parentType, subDoc);
            if (parentId == null) {
                // no parent found
                continue;
            }
            // now go over and find the parent doc Id and reader tuple
            for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) {
                AtomicReader indexReader = atomicReaderContext.reader();
                int parentDocId = context.idCache().reader(indexReader).docById(parentType, parentId);
                Bits liveDocs = indexReader.getLiveDocs();
                if (parentDocId != -1 && (liveDocs == null || liveDocs.get(parentDocId))) {
                    // we found a match, add it and break

                    TIntObjectHashMap readerParentDocs = parentDocsPerReader.get(indexReader.getCoreCacheKey());
                    if (readerParentDocs == null) {
                        readerParentDocs = CacheRecycler.popIntObjectMap();
                        parentDocsPerReader.put(indexReader.getCoreCacheKey(), readerParentDocs);
                    }

                    ParentDoc parentDoc = readerParentDocs.get(parentDocId);
                    if (parentDoc == null) {
                        parentHitsResolved++; // we have a hit on a parent
                        parentDoc = new ParentDoc();
                        parentDoc.docId = parentDocId;
                        parentDoc.count = 1;
                        parentDoc.maxScore = scoreDoc.score;
                        parentDoc.sumScores = scoreDoc.score;
                        readerParentDocs.put(parentDocId, parentDoc);
                    } else {
                        parentDoc.count++;
                        parentDoc.sumScores += scoreDoc.score;
                        if (scoreDoc.score > parentDoc.maxScore) {
                            parentDoc.maxScore = scoreDoc.score;
                        }
                    }
                }
            }
        }

        for (Map.Entry> entry : parentDocsPerReader.entrySet()) {
            ParentDoc[] values = entry.getValue().values(new ParentDoc[entry.getValue().size()]);
            Arrays.sort(values, PARENT_DOC_COMP);
            parentDocs.put(entry.getKey(), values);
            CacheRecycler.pushIntObjectMap(entry.getValue());
        }
        CacheRecycler.pushHashMap(parentDocsPerReader);
        return parentHitsResolved;
    }

    private static final ParentDocComparator PARENT_DOC_COMP = new ParentDocComparator();

    static class ParentDocComparator implements Comparator {
        @Override
        public int compare(ParentDoc o1, ParentDoc o2) {
            return o1.docId - o2.docId;
        }
    }

    static class ParentDoc {
        public int docId;
        public int count;
        public float maxScore = Float.NaN;
        public float sumScores = 0;
    }

    @Override
    public void extractTerms(Set terms) {
        rewrittenChildQuery.extractTerms(terms);
    }

    @Override
    public Weight createWeight(IndexSearcher searcher) throws IOException {
        if (parentDocs == null) {
            throw new ElasticSearchIllegalStateException("top_children query hasn't executed properly");
        }

        return new ParentWeight(searcher, rewrittenChildQuery.createWeight(searcher));
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj == null || obj.getClass() != this.getClass()) {
            return false;
        }

        HasChildFilter that = (HasChildFilter) obj;
        if (!originalChildQuery.equals(that.childQuery)) {
            return false;
        }
        if (!childType.equals(that.childType)) {
            return false;
        }
        if (!parentType.equals(that.parentType)) {
            return false;
        }
        return true;
    }

    @Override
    public int hashCode() {
        int result = originalChildQuery.hashCode();
        result = 31 * result + parentType.hashCode();
        result = 31 * result + childType.hashCode();
        return result;
    }

    public String toString(String field) {
        StringBuilder sb = new StringBuilder();
        sb.append("score_child[").append(childType).append("/").append(parentType).append("](").append(originalChildQuery.toString(field)).append(')');
        sb.append(ToStringUtils.boost(getBoost()));
        return sb.toString();
    }

    class ParentWeight extends Weight {

        final IndexSearcher searcher;

        final Weight queryWeight;

        public ParentWeight(IndexSearcher searcher, Weight queryWeight) throws IOException {
            this.searcher = searcher;
            this.queryWeight = queryWeight;
        }

        public Query getQuery() {
            return TopChildrenQuery.this;
        }

        @Override
        public float getValueForNormalization() throws IOException {
            float sum = queryWeight.getValueForNormalization();
            sum *= getBoost() * getBoost();
            return sum;
        }

        @Override
        public void normalize(float norm, float topLevelBoost) {
            // Nothing to normalize
        }

        @Override
        public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
            ParentDoc[] readerParentDocs = parentDocs.get(context.reader().getCoreCacheKey());
            if (readerParentDocs != null) {
                if (scoreType == ScoreType.MAX) {
                    return new ParentScorer(this, readerParentDocs) {
                        @Override
                        public float score() throws IOException {
                            assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS;
                            return doc.maxScore;
                        }
                    };
                } else if (scoreType == ScoreType.AVG) {
                    return new ParentScorer(this, readerParentDocs) {
                        @Override
                        public float score() throws IOException {
                            assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS; 
                            return doc.sumScores / doc.count;
                        }
                    };
                } else if (scoreType == ScoreType.SUM) {
                    return new ParentScorer(this, readerParentDocs) {
                        @Override
                        public float score() throws IOException {
                            assert doc.docId >= 0 || doc.docId < NO_MORE_DOCS;
                            return doc.sumScores; 
                        }
                        
                    };
                } 
                throw new ElasticSearchIllegalStateException("No support for score type [" + scoreType + "]");                   
            }
            return new EmptyScorer(this);
        }

        @Override
        public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
            return new Explanation(getBoost(), "not implemented yet...");
        }
    }

    static abstract class ParentScorer extends Scorer {
        private final ParentDoc spare = new ParentDoc();
        protected final ParentDoc[] docs;
        protected ParentDoc doc = spare;
        private int index = -1;

        ParentScorer(ParentWeight weight, ParentDoc[] docs) throws IOException {
            super(weight);
            this.docs = docs;
            spare.docId = -1;
            spare.count = -1;
        }

        @Override
        public final int docID() {
            return doc.docId;
        }

        @Override
        public final int advance(int target) throws IOException {
            int doc;
            while ((doc = nextDoc()) < target) {
            }
            return doc;
        }

        @Override
        public final int nextDoc() throws IOException {
            if (++index >= docs.length) {
                doc = spare;
                doc.count = 0;
                return (doc.docId = NO_MORE_DOCS);
            }
            return (doc = docs[index]).docId;
        }

        @Override
        public final int freq() throws IOException {
            return doc.count; // The number of matches in the child doc, which is propagated to parent
        }

        @Override
        public final long cost() {
            return docs.length;
        }
    }
}