All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.search.query.QueryPhase Maven / Gradle / Ivy

There is a newer version: 8.13.2
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.search.query;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.queries.MinDocQuery;
import org.apache.lucene.queries.SearchAfterSortedDocQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.util.FutureArrays;
import org.elasticsearch.action.search.SearchShardTask;
import org.apache.lucene.search.Weight;
import org.elasticsearch.common.Booleans;
import org.elasticsearch.common.CheckedConsumer;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.TopDocsAndMaxScore;
import org.elasticsearch.common.util.concurrent.QueueResizingEsThreadPoolExecutor;
import org.elasticsearch.index.IndexSortConfig;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.SearchPhase;
import org.elasticsearch.search.SearchService;
import org.elasticsearch.search.aggregations.AggregationPhase;
import org.elasticsearch.search.internal.ContextIndexSearcher;
import org.elasticsearch.search.internal.ScrollContext;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.profile.ProfileShardResult;
import org.elasticsearch.search.profile.SearchProfileShardResults;
import org.elasticsearch.search.profile.query.InternalProfileCollector;
import org.elasticsearch.search.rescore.RescorePhase;
import org.elasticsearch.search.sort.SortAndFormats;
import org.elasticsearch.search.suggest.SuggestPhase;
import org.elasticsearch.tasks.TaskCancelledException;
import org.elasticsearch.threadpool.ThreadPool;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.ExecutorService;

import static org.elasticsearch.search.query.QueryCollectorContext.createEarlyTerminationCollectorContext;
import static org.elasticsearch.search.query.QueryCollectorContext.createFilteredCollectorContext;
import static org.elasticsearch.search.query.QueryCollectorContext.createMinScoreCollectorContext;
import static org.elasticsearch.search.query.QueryCollectorContext.createMultiCollectorContext;
import static org.elasticsearch.search.query.TopDocsCollectorContext.createTopDocsCollectorContext;
import static org.elasticsearch.search.query.TopDocsCollectorContext.shortcutTotalHitCount;


/**
 * Query phase of a search request, used to run the query and get back from each shard information about the matching documents
 * (document ids and score or sort criteria) so that matches can be reduced on the coordinating node
 */
public class QueryPhase implements SearchPhase {
    private static final Logger LOGGER = LogManager.getLogger(QueryPhase.class);
    // TODO: remove this property
    public static final boolean SYS_PROP_REWRITE_SORT = Booleans.parseBoolean(System.getProperty("es.search.rewrite_sort", "true"));

    private final AggregationPhase aggregationPhase;
    private final SuggestPhase suggestPhase;
    private final RescorePhase rescorePhase;

    public QueryPhase() {
        this.aggregationPhase = new AggregationPhase();
        this.suggestPhase = new SuggestPhase();
        this.rescorePhase = new RescorePhase();
    }

    @Override
    public void preProcess(SearchContext context) {
        context.preProcess(true);
    }

    @Override
    public void execute(SearchContext searchContext) throws QueryPhaseExecutionException {
        if (searchContext.hasOnlySuggest()) {
            suggestPhase.execute(searchContext);
            searchContext.queryResult().topDocs(new TopDocsAndMaxScore(
                    new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), Lucene.EMPTY_SCORE_DOCS), Float.NaN),
                new DocValueFormat[0]);
            return;
        }

        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("{}", new SearchContextSourcePrinter(searchContext));
        }

        // Pre-process aggregations as late as possible. In the case of a DFS_Q_T_F
        // request, preProcess is called on the DFS phase phase, this is why we pre-process them
        // here to make sure it happens during the QUERY phase
        aggregationPhase.preProcess(searchContext);
        boolean rescore = executeInternal(searchContext);

        if (rescore) { // only if we do a regular search
            rescorePhase.execute(searchContext);
        }
        suggestPhase.execute(searchContext);
        aggregationPhase.execute(searchContext);

        if (searchContext.getProfilers() != null) {
            ProfileShardResult shardResults = SearchProfileShardResults
                .buildShardResults(searchContext.getProfilers());
            searchContext.queryResult().profileResults(shardResults);
        }
    }

    /**
     * In a package-private method so that it can be tested without having to
     * wire everything (mapperService, etc.)
     * @return whether the rescoring phase should be executed
     */
    static boolean executeInternal(SearchContext searchContext) throws QueryPhaseExecutionException {
        final ContextIndexSearcher searcher = searchContext.searcher();
        SortAndFormats sortAndFormatsForRewrittenNumericSort = null;
        final IndexReader reader = searcher.getIndexReader();
        QuerySearchResult queryResult = searchContext.queryResult();
        queryResult.searchTimedOut(false);
        try {
            queryResult.from(searchContext.from());
            queryResult.size(searchContext.size());
            Query query = searchContext.query();
            assert query == searcher.rewrite(query); // already rewritten

            final ScrollContext scrollContext = searchContext.scrollContext();
            if (scrollContext != null) {
                if (scrollContext.totalHits == null) {
                    // first round
                    assert scrollContext.lastEmittedDoc == null;
                    // there is not much that we can optimize here since we want to collect all
                    // documents in order to get the total number of hits

                } else {
                    final ScoreDoc after = scrollContext.lastEmittedDoc;
                    if (returnsDocsInOrder(query, searchContext.sort())) {
                        // now this gets interesting: since we sort in index-order, we can directly
                        // skip to the desired doc
                        if (after != null) {
                            query = new BooleanQuery.Builder()
                                .add(query, BooleanClause.Occur.MUST)
                                .add(new MinDocQuery(after.doc + 1), BooleanClause.Occur.FILTER)
                                .build();
                        }
                        // ... and stop collecting after ${size} matches
                        searchContext.terminateAfter(searchContext.size());
                    } else if (canEarlyTerminate(reader, searchContext.sort())) {
                        // now this gets interesting: since the search sort is a prefix of the index sort, we can directly
                        // skip to the desired doc
                        if (after != null) {
                            query = new BooleanQuery.Builder()
                                .add(query, BooleanClause.Occur.MUST)
                                .add(new SearchAfterSortedDocQuery(searchContext.sort().sort, (FieldDoc) after), BooleanClause.Occur.FILTER)
                                .build();
                        }
                    }
                }
            }

            final LinkedList collectors = new LinkedList<>();
            // whether the chain contains a collector that filters documents
            boolean hasFilterCollector = false;
            if (searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER) {
                // add terminate_after before the filter collectors
                // it will only be applied on documents accepted by these filter collectors
                collectors.add(createEarlyTerminationCollectorContext(searchContext.terminateAfter()));
                // this collector can filter documents during the collection
                hasFilterCollector = true;
            }
            if (searchContext.parsedPostFilter() != null) {
                // add post filters before aggregations
                // it will only be applied to top hits
                collectors.add(createFilteredCollectorContext(searcher, searchContext.parsedPostFilter().query()));
                // this collector can filter documents during the collection
                hasFilterCollector = true;
            }
            if (searchContext.queryCollectors().isEmpty() == false) {
                // plug in additional collectors, like aggregations
                collectors.add(createMultiCollectorContext(searchContext.queryCollectors().values()));
            }
            if (searchContext.minimumScore() != null) {
                // apply the minimum score after multi collector so we filter aggs as well
                collectors.add(createMinScoreCollectorContext(searchContext.minimumScore()));
                // this collector can filter documents during the collection
                hasFilterCollector = true;
            }

            CheckedConsumer, IOException> leafSorter = l -> {};
            // try to rewrite numeric or date sort to the optimized distanceFeatureQuery
            if ((searchContext.sort() != null) && SYS_PROP_REWRITE_SORT) {
                Query rewrittenQuery = tryRewriteLongSort(searchContext, searcher.getIndexReader(), query, hasFilterCollector);
                if (rewrittenQuery != null) {
                    query = rewrittenQuery;
                    // modify sorts: add sort on _score as 1st sort, and move the sort on the original field as the 2nd sort
                    SortField[] oldSortFields = searchContext.sort().sort.getSort();
                    DocValueFormat[] oldFormats = searchContext.sort().formats;
                    SortField[] newSortFields = new SortField[oldSortFields.length + 1];
                    DocValueFormat[] newFormats = new DocValueFormat[oldSortFields.length + 1];
                    newSortFields[0] = SortField.FIELD_SCORE;
                    newFormats[0] = DocValueFormat.RAW;
                    System.arraycopy(oldSortFields, 0, newSortFields, 1, oldSortFields.length);
                    System.arraycopy(oldFormats, 0, newFormats, 1, oldFormats.length);
                    sortAndFormatsForRewrittenNumericSort = searchContext.sort(); // stash SortAndFormats to restore it later
                    searchContext.sort(new SortAndFormats(new Sort(newSortFields), newFormats));
                    leafSorter = createLeafSorter(oldSortFields[0]);
                }
            }

            boolean timeoutSet = scrollContext == null && searchContext.timeout() != null &&
                searchContext.timeout().equals(SearchService.NO_TIMEOUT) == false;

            final Runnable timeoutRunnable;
            if (timeoutSet) {
                final long startTime = searchContext.getRelativeTimeInMillis();
                final long timeout = searchContext.timeout().millis();
                final long maxTime = startTime + timeout;
                timeoutRunnable = () -> {
                    final long time = searchContext.getRelativeTimeInMillis();
                    if (time > maxTime) {
                        throw new TimeExceededException();
                    }
                };
            } else {
                timeoutRunnable = null;
            }

            final Runnable cancellationRunnable;
            if (searchContext.lowLevelCancellation()) {
                SearchShardTask task = searchContext.getTask();
                cancellationRunnable = () -> { if (task.isCancelled()) throw new TaskCancelledException("cancelled"); };
            } else {
                cancellationRunnable = null;
            }

            final Runnable checkCancelled;
            if (timeoutRunnable != null && cancellationRunnable != null) {
                checkCancelled = () -> {
                    timeoutRunnable.run();
                    cancellationRunnable.run();
                };
            } else if (timeoutRunnable != null) {
                checkCancelled = timeoutRunnable;
            } else if (cancellationRunnable != null) {
                checkCancelled = cancellationRunnable;
            } else {
                checkCancelled = null;
            }
            searcher.setCheckCancelled(checkCancelled);

            boolean shouldRescore;
            // if we are optimizing sort and there are no other collectors
            if (sortAndFormatsForRewrittenNumericSort != null && collectors.size() == 0 && searchContext.getProfilers() == null) {
                shouldRescore = searchWithCollectorManager(searchContext, searcher, query, leafSorter, timeoutSet);
            } else {
                shouldRescore = searchWithCollector(searchContext, searcher, query, collectors, hasFilterCollector, timeoutSet);
            }

            // if we rewrote numeric long or date sort, restore fieldDocs based on the original sort
            if (sortAndFormatsForRewrittenNumericSort != null) {
                searchContext.sort(sortAndFormatsForRewrittenNumericSort); // restore SortAndFormats
                restoreTopFieldDocs(queryResult, sortAndFormatsForRewrittenNumericSort);
            }

            ExecutorService executor = searchContext.indexShard().getThreadPool().executor(ThreadPool.Names.SEARCH);
            if (executor instanceof QueueResizingEsThreadPoolExecutor) {
                QueueResizingEsThreadPoolExecutor rExecutor = (QueueResizingEsThreadPoolExecutor) executor;
                queryResult.nodeQueueSize(rExecutor.getCurrentQueueSize());
                queryResult.serviceTimeEWMA((long) rExecutor.getTaskExecutionEWMA());
            }
            return shouldRescore;
        } catch (Exception e) {
            throw new QueryPhaseExecutionException(searchContext.shardTarget(), "Failed to execute main query", e);
        }
    }

    private static boolean searchWithCollector(SearchContext searchContext, ContextIndexSearcher searcher, Query query,
            LinkedList collectors, boolean hasFilterCollector, boolean timeoutSet) throws IOException {
        // create the top docs collector last when the other collectors are known
        final TopDocsCollectorContext topDocsFactory = createTopDocsCollectorContext(searchContext, hasFilterCollector);
        // add the top docs collector, the first collector context in the chain
        collectors.addFirst(topDocsFactory);

        final Collector queryCollector;
        if (searchContext.getProfilers() != null) {
            InternalProfileCollector profileCollector = QueryCollectorContext.createQueryCollectorWithProfiler(collectors);
            searchContext.getProfilers().getCurrentQueryProfiler().setCollector(profileCollector);
            queryCollector = profileCollector;
        } else {
            queryCollector = QueryCollectorContext.createQueryCollector(collectors);
        }
        QuerySearchResult queryResult = searchContext.queryResult();
        try {
            searcher.search(query, queryCollector);
        } catch (EarlyTerminatingCollector.EarlyTerminationException e) {
            queryResult.terminatedEarly(true);
        } catch (TimeExceededException e) {
            assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
            if (searchContext.request().allowPartialSearchResults() == false) {
                // Can't rethrow TimeExceededException because not serializable
                throw new QueryPhaseExecutionException(searchContext.shardTarget(), "Time exceeded");
            }
            queryResult.searchTimedOut(true);
        } finally {
            searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
        }
        if (searchContext.terminateAfter() != SearchContext.DEFAULT_TERMINATE_AFTER && queryResult.terminatedEarly() == null) {
            queryResult.terminatedEarly(false);
        }
        for (QueryCollectorContext ctx : collectors) {
            ctx.postProcess(queryResult);
        }
        return topDocsFactory.shouldRescore();
    }


    /*
     * We use collectorManager during sort optimization, where
     * we have already checked that there are no other collectors, no filters,
     * no search after, no scroll, no collapse, no track scores.
     * Absence of all other collectors and parameters allows us to use TopFieldCollector directly.
     */
    private static boolean searchWithCollectorManager(SearchContext searchContext, ContextIndexSearcher searcher, Query query,
            CheckedConsumer, IOException> leafSorter, boolean timeoutSet) throws IOException {
        final IndexReader reader = searchContext.searcher().getIndexReader();
        final int numHits = Math.min(searchContext.from() + searchContext.size(),  Math.max(1, reader.numDocs()));
        final SortAndFormats sortAndFormats = searchContext.sort();

        int totalHitsThreshold;
        TotalHits totalHits;
        if (searchContext.trackTotalHitsUpTo() == SearchContext.TRACK_TOTAL_HITS_DISABLED) {
            totalHitsThreshold = 1;
            totalHits = new TotalHits(0, TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO);
        } else {
            int hitCount = shortcutTotalHitCount(reader, query);
            if (hitCount == -1) {
                totalHitsThreshold = searchContext.trackTotalHitsUpTo();
                totalHits = null; // will be computed via the collector
            } else {
                totalHitsThreshold = 1;
                totalHits = new TotalHits(hitCount, TotalHits.Relation.EQUAL_TO); // don't compute hit counts via the collector
            }
        }

        CollectorManager sharedManager = TopFieldCollector.createSharedManager(
            sortAndFormats.sort, numHits, null, totalHitsThreshold);

        List leaves = new ArrayList<>(searcher.getIndexReader().leaves());
        leafSorter.accept(leaves);
        try {
            Weight weight = searcher.createWeight(searcher.rewrite(query), ScoreMode.TOP_SCORES, 1f);
            searcher.search(leaves, weight, sharedManager, searchContext.queryResult(), sortAndFormats.formats, totalHits);
        } catch (TimeExceededException e) {
            assert timeoutSet : "TimeExceededException thrown even though timeout wasn't set";
            if (searchContext.request().allowPartialSearchResults() == false) {
                // Can't rethrow TimeExceededException because not serializable
                throw new QueryPhaseExecutionException(searchContext.shardTarget(), "Time exceeded");
            }
            searchContext.queryResult().searchTimedOut(true);
        } finally {
            searchContext.clearReleasables(SearchContext.Lifetime.COLLECTION);
        }
        return false; // no rescoring when sorting by field
    }

    private static Query tryRewriteLongSort(SearchContext searchContext, IndexReader reader,
                                            Query query, boolean hasFilterCollector) throws IOException {
        if (searchContext.searchAfter() != null) return null; //TODO: handle sort optimization with search after
        if (searchContext.scrollContext() != null) return null;
        if (searchContext.collapse() != null) return null;
        if (searchContext.trackScores()) return null;
        if (searchContext.aggregations() != null) return null;
        if (canEarlyTerminate(reader, searchContext.sort())) {
            // disable this optimization if index sorting matches the query sort since it's already optimized by index searcher
            return null;
        }
        Sort sort = searchContext.sort().sort;
        SortField sortField = sort.getSort()[0];
        if (SortField.Type.LONG.equals(IndexSortConfig.getSortFieldType(sortField)) == false) return null;

        // check if this is a field of type Long or Date, that is indexed and has doc values
        String fieldName = sortField.getField();
        if (fieldName == null) return null; // happens when _score or _doc is the 1st sort field
        if (searchContext.mapperService() == null) return null; // mapperService can be null in tests
        final MappedFieldType fieldType = searchContext.mapperService().fieldType(fieldName);
        if (fieldType == null) return null; // for unmapped fields, default behaviour depending on "unmapped_type" flag
        if ((fieldType.typeName().equals("long") == false) && (fieldType instanceof DateFieldType == false)) return null;
        if (fieldType.indexOptions() == IndexOptions.NONE) return null; //TODO: change to pointDataDimensionCount() when implemented
        if (fieldType.hasDocValues() == false) return null;


        // check that all sorts are actual document fields or _doc
        for (int i = 1; i < sort.getSort().length; i++) {
            SortField sField = sort.getSort()[i];
            String sFieldName = sField.getField();
            if (sFieldName == null) {
                if (SortField.FIELD_DOC.equals(sField) == false) return null;
            } else {
                //TODO: find out how to cover _script sort that don't use _score
                if (searchContext.mapperService().fieldType(sFieldName) == null) return null; // could be _script sort that uses _score
            }
        }

        // check that setting of missing values allows optimization
        if (sortField.getMissingValue() == null) return null;
        Long missingValue = (Long) sortField.getMissingValue();
        boolean missingValuesAccordingToSort = (sortField.getReverse() && (missingValue == Long.MIN_VALUE)) ||
            ((sortField.getReverse() == false) && (missingValue == Long.MAX_VALUE));
        if (missingValuesAccordingToSort == false) return null;

        int docCount = PointValues.getDocCount(reader, fieldName);
        // is not worth to run optimization on small index
        if (docCount <= 512) return null;

        // check for multiple values
        if (PointValues.size(reader, fieldName) != docCount) return null; //TODO: handle multiple values

        // check if the optimization makes sense with the track_total_hits setting
        if (searchContext.trackTotalHitsUpTo() == Integer.MAX_VALUE) {
            // with filter, we can't pre-calculate hitsCount, we need to explicitly calculate them => optimization does't make sense
            if (hasFilterCollector) return null;
            // if we can't pre-calculate hitsCount based on the query type, optimization does't make sense
            if (shortcutTotalHitCount(reader, query) == -1) return null;
        }

        byte[] minValueBytes = PointValues.getMinPackedValue(reader, fieldName);
        byte[] maxValueBytes = PointValues.getMaxPackedValue(reader, fieldName);
        if ((maxValueBytes == null) || (minValueBytes == null)) return null;
        long minValue = LongPoint.decodeDimension(minValueBytes, 0);
        long maxValue = LongPoint.decodeDimension(maxValueBytes, 0);

        Query rewrittenQuery;
        if (minValue == maxValue) {
            rewrittenQuery = new DocValuesFieldExistsQuery(fieldName);
        } else {
            if (indexFieldHasDuplicateData(reader, fieldName)) return null;
            long origin = (sortField.getReverse()) ? maxValue : minValue;
            long pivotDistance = (maxValue - minValue) >>> 1; // division by 2 on the unsigned representation to avoid overflow
            if (pivotDistance == 0) { // 0 if maxValue = (minValue + 1)
                pivotDistance = 1;
            }
            rewrittenQuery = LongPoint.newDistanceFeatureQuery(sortField.getField(), 1, origin, pivotDistance);
        }
        rewrittenQuery = new BooleanQuery.Builder()
            .add(query, BooleanClause.Occur.FILTER) // filter for original query
            .add(rewrittenQuery, BooleanClause.Occur.SHOULD) //should for rewrittenQuery
            .build();
        return rewrittenQuery;
    }

    /**
     * Creates a sorter of {@link LeafReaderContext} that orders leaves depending on the minimum
     * value and the sort order of the provided sortField.
     */
    static CheckedConsumer, IOException> createLeafSorter(SortField sortField) {
        return leaves -> {
            long[] sortValues = new long[leaves.size()];
            long missingValue = (long) sortField.getMissingValue();
            for (LeafReaderContext ctx : leaves) {
                PointValues values = ctx.reader().getPointValues(sortField.getField());
                if (values == null) {
                    sortValues[ctx.ord] = missingValue;
                } else {
                    byte[] sortValue = sortField.getReverse() ? values.getMaxPackedValue(): values.getMinPackedValue();
                    sortValues[ctx.ord] = sortValue == null ? missingValue : LongPoint.decodeDimension(sortValue, 0);
                }
            }
            Comparator comparator = Comparator.comparingLong(l -> sortValues[l.ord]);
            if (sortField.getReverse()) {
                comparator = comparator.reversed();
            }
            Collections.sort(leaves, comparator);
        };
    }

    /**
     * Restore fieldsDocs to remove the first _score
     */
    private static void restoreTopFieldDocs(QuerySearchResult result, SortAndFormats originalSortAndFormats) {
        TopDocs topDocs = result.topDocs().topDocs;
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            FieldDoc fieldDoc = (FieldDoc) scoreDoc;
            fieldDoc.fields = Arrays.copyOfRange(fieldDoc.fields, 1, fieldDoc.fields.length);
        }
        TopFieldDocs newTopDocs = new TopFieldDocs(topDocs.totalHits, topDocs.scoreDocs, originalSortAndFormats.sort.getSort());
        result.topDocs(new TopDocsAndMaxScore(newTopDocs, Float.NaN), originalSortAndFormats.formats);
    }

    /**
     * Returns true if the provided query returns docs in index order (internal doc ids).
     * @param query The query to execute
     * @param sf The query sort
     */
    private static boolean returnsDocsInOrder(Query query, SortAndFormats sf) {
        if (sf == null || Sort.RELEVANCE.equals(sf.sort)) {
            // sort by score
            // queries that return constant scores will return docs in index
            // order since Lucene tie-breaks on the doc id
            return query.getClass() == ConstantScoreQuery.class
                || query.getClass() == MatchAllDocsQuery.class;
        } else {
            return Sort.INDEXORDER.equals(sf.sort);
        }
    }

    /**
     * Returns whether collection within the provided reader can be early-terminated if it sorts
     * with sortAndFormats.
     **/
    private static boolean canEarlyTerminate(IndexReader reader, SortAndFormats sortAndFormats) {
        if (sortAndFormats == null || sortAndFormats.sort == null) {
            return false;
        }
        final Sort sort = sortAndFormats.sort;
        for (LeafReaderContext ctx : reader.leaves()) {
            Sort indexSort = ctx.reader().getMetaData().getSort();
            if (indexSort == null || Lucene.canEarlyTerminate(sort, indexSort) == false) {
                return false;
            }
        }
        return true;
    }

    /**
     * Returns true if more than 50% of data in the index have the same value
     * The evaluation is approximation based on finding the median value and estimating its count
     */
    static boolean indexFieldHasDuplicateData(IndexReader reader, String field) throws IOException {
        long docsNoDupl = 0; // number of docs in segments with NO duplicate data that would benefit optimization
        long docsDupl = 0; // number of docs in segments with duplicate data that would NOT benefit optimization
        for (LeafReaderContext lrc : reader.leaves()) {
            PointValues pointValues = lrc.reader().getPointValues(field);
            if (pointValues == null) continue;
            int docCount = pointValues.getDocCount();
            if (docCount <= 512) { // skipping small segments as estimateMedianCount doesn't work well on them
                continue;
            }
            assert(pointValues.size() == docCount); // TODO: modify the code to handle multiple values

            int duplDocCount = docCount/2; // expected doc count of duplicate data
            long minValue = LongPoint.decodeDimension(pointValues.getMinPackedValue(), 0);
            long maxValue = LongPoint.decodeDimension(pointValues.getMaxPackedValue(), 0);
            boolean hasDuplicateData = true;
            while ((minValue < maxValue) && hasDuplicateData) {
                long midValue = Math.floorDiv(minValue, 2) + Math.floorDiv(maxValue, 2); // to avoid overflow first divide each value by 2
                long countLeft = estimatePointCount(pointValues, minValue, midValue);
                long countRight = estimatePointCount(pointValues, midValue + 1, maxValue);
                if ((countLeft >= countRight) && (countLeft > duplDocCount) ) {
                    maxValue = midValue;
                } else if ((countRight > countLeft) && (countRight > duplDocCount)) {
                    minValue = midValue + 1;
                } else {
                    hasDuplicateData = false;
                }
            }
            if (hasDuplicateData) {
                docsDupl += docCount;
            } else {
                docsNoDupl += docCount;
            }
        }
        return (docsDupl > docsNoDupl);
    }


    private static long estimatePointCount(PointValues pointValues, long minValue, long maxValue) {
        final byte[] minValueAsBytes = new byte[Long.BYTES];
        LongPoint.encodeDimension(minValue, minValueAsBytes, 0);
        final byte[] maxValueAsBytes = new byte[Long.BYTES];
        LongPoint.encodeDimension(maxValue, maxValueAsBytes, 0);

        PointValues.IntersectVisitor visitor = new PointValues.IntersectVisitor() {
            @Override
            public void grow(int count) {}

            @Override
            public void visit(int docID) {}

            @Override
            public void visit(int docID, byte[] packedValue) {}

            @Override
            public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
                if (FutureArrays.compareUnsigned(minPackedValue, 0, Long.BYTES, maxValueAsBytes, 0, Long.BYTES) > 0 ||
                    FutureArrays.compareUnsigned(maxPackedValue, 0, Long.BYTES, minValueAsBytes, 0, Long.BYTES) < 0) {
                    return PointValues.Relation.CELL_OUTSIDE_QUERY;
                }
                if (FutureArrays.compareUnsigned(minPackedValue, 0, Long.BYTES, minValueAsBytes, 0, Long.BYTES) < 0 ||
                    FutureArrays.compareUnsigned(maxPackedValue, 0, Long.BYTES, maxValueAsBytes, 0, Long.BYTES) > 0) {
                    return PointValues.Relation.CELL_CROSSES_QUERY;
                }
                return PointValues.Relation.CELL_INSIDE_QUERY;
            }
        };
        return pointValues.estimatePointCount(visitor);
    }

    private static class TimeExceededException extends RuntimeException {}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy