org.elasticsearch.common.lucene.search.XFilteredQuery Maven / Gradle / Ivy

package org.elasticsearch.common.lucene.search;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.FilteredQuery.FilterStrategy;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lucene.docset.DocIdSets;

import java.io.IOException;
import java.util.Set;


/**
 * A query that applies a filter to the results of another query.
 * 
 * 
Note: the bits are retrieved from the filter each time this
 * query is used in a search - use a CachingWrapperFilter to avoid
 * regenerating the bits every time.
 *
 * @see CachingWrapperFilter
 * @since 1.4
 */
// Changes are marked with //CHANGE:
// Delegate to FilteredQuery - this version fixes the bug in LUCENE-4705 and uses ApplyAcceptedDocsFilter internally
public final class XFilteredQuery extends Query {
    private final Filter rawFilter;
    private final FilteredQuery delegate;
    private final FilterStrategy strategy;

    /**
     * Constructs a new query which applies a filter to the results of the original query.
     * {@link Filter#getDocIdSet} will be called every time this query is used in a search.
     *
     * @param query  Query to be filtered, cannot be null.
     * @param filter Filter to apply to query results, cannot be null.
     */
    public XFilteredQuery(Query query, Filter filter) {
        this(query, filter, FilteredQuery.RANDOM_ACCESS_FILTER_STRATEGY);
    }

    /**
     * Expert: Constructs a new query which applies a filter to the results of the original query.
     * {@link Filter#getDocIdSet} will be called every time this query is used in a search.
     *
     * @param query    Query to be filtered, cannot be null.
     * @param filter   Filter to apply to query results, cannot be null.
     * @param strategy a filter strategy used to create a filtered scorer.
     * @see FilterStrategy
     */
    public XFilteredQuery(Query query, Filter filter, FilterStrategy strategy) {
        this(new FilteredQuery(query, new ApplyAcceptedDocsFilter(filter), strategy), filter, strategy);
    }

    private XFilteredQuery(FilteredQuery delegate, Filter filter, FilterStrategy strategy) {
        this.delegate = delegate;
        // CHANGE: we need to wrap it in post application of accepted docs
        this.rawFilter = filter;
        this.strategy = strategy;
    }

    /**
     * Returns a Weight that applies the filter to the enclosed query's Weight.
     * This is accomplished by overriding the Scorer returned by the Weight.
     */
    @Override
    public Weight createWeight(final IndexSearcher searcher) throws IOException {
        return delegate.createWeight(searcher);
    }

    /**
     * Rewrites the query. If the wrapped is an instance of
     * {@link MatchAllDocsQuery} it returns a {@link ConstantScoreQuery}. Otherwise
     * it returns a new {@code FilteredQuery} wrapping the rewritten query.
     */
    @Override
    public Query rewrite(IndexReader reader) throws IOException {
        Query query = delegate.getQuery();
        final Query queryRewritten = query.rewrite(reader);

        // CHANGE: if we push back to Lucene, would love to have an extension for "isMatchAllQuery"
        if (queryRewritten instanceof MatchAllDocsQuery || Queries.isConstantMatchAllQuery(queryRewritten)) {
            // Special case: If the query is a MatchAllDocsQuery, we only
            // return a CSQ(filter).
            final Query rewritten = new ConstantScoreQuery(delegate.getFilter());
            // Combine boost of MatchAllDocsQuery and the wrapped rewritten query:
            rewritten.setBoost(delegate.getBoost() * queryRewritten.getBoost());
            return rewritten;
        }

        if (queryRewritten != query) {
            // rewrite to a new FilteredQuery wrapping the rewritten query
            final Query rewritten = new XFilteredQuery(queryRewritten, rawFilter, strategy);
            rewritten.setBoost(delegate.getBoost());
            return rewritten;
        } else {
            // nothing to rewrite, we are done!
            return this;
        }
    }

    @Override
    public void setBoost(float b) {
        delegate.setBoost(b);
    }

    @Override
    public float getBoost() {
        return delegate.getBoost();
    }

    /**
     * Returns this FilteredQuery's (unfiltered) Query
     */
    public final Query getQuery() {
        return delegate.getQuery();
    }

    /**
     * Returns this FilteredQuery's filter
     */
    public final Filter getFilter() {
        // CHANGE: unwrap the accepted docs filter
        if (rawFilter instanceof ApplyAcceptedDocsFilter) {
            return ((ApplyAcceptedDocsFilter) rawFilter).filter();
        }
        return rawFilter;
    }

    // inherit javadoc
    @Override
    public void extractTerms(Set terms) {
        delegate.extractTerms(terms);
    }

    /**
     * Prints a user-readable version of this query.
     */
    @Override
    public String toString(String s) {
        return delegate.toString(s);
    }

    /**
     * Returns true iff o is equal to this.
     */
    @Override
    public boolean equals(Object o) {
        if (!(o instanceof XFilteredQuery)) {
            return false;
        } else {
            return delegate.equals(((XFilteredQuery)o).delegate);
        }
    }

    /**
     * Returns a hash code value for this object.
     */
    @Override
    public int hashCode() {
        return delegate.hashCode();
    }

    // CHANGE: Add custom random access strategy, allowing to set the threshold
    // CHANGE: Add filter first filter strategy
    public static final FilterStrategy ALWAYS_RANDOM_ACCESS_FILTER_STRATEGY = new CustomRandomAccessFilterStrategy(0);

    public static final CustomRandomAccessFilterStrategy CUSTOM_FILTER_STRATEGY = new CustomRandomAccessFilterStrategy();

    /**
     * Extends {@link org.apache.lucene.search.FilteredQuery.RandomAccessFilterStrategy}.
     * 

     * Adds a threshold value, which defaults to -1. When set to -1, it will check if the filter docSet is
     * *not*  a fast docSet, and if not, it will use {@link FilteredQuery#QUERY_FIRST_FILTER_STRATEGY} (since
     * the assumption is that its a "slow" filter and better computed only on whatever matched the query).
     * 

     * If the threshold value is 0, it always tries to pass "down" the filter as acceptDocs, and it the filter
     * can't be represented as Bits (never really), then it uses {@link FilteredQuery#LEAP_FROG_QUERY_FIRST_STRATEGY}.
     * 

     * If the above conditions are not met, then it reverts to the {@link FilteredQuery.RandomAccessFilterStrategy} logic,
     * with the threshold used to control {@link #useRandomAccess(org.apache.lucene.util.Bits, int)}.
     */
    public static class CustomRandomAccessFilterStrategy extends FilteredQuery.RandomAccessFilterStrategy {

        private final int threshold;

        public CustomRandomAccessFilterStrategy() {
            this.threshold = -1;
        }

        public CustomRandomAccessFilterStrategy(int threshold) {
            this.threshold = threshold;
        }

        @Override
        public Scorer filteredScorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Weight weight, DocIdSet docIdSet) throws IOException {
            // CHANGE: If threshold is 0, always pass down the accept docs, don't pay the price of calling nextDoc even...
            if (threshold == 0) {
                final Bits filterAcceptDocs = docIdSet.bits();
                if (filterAcceptDocs != null) {
                    return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs);
                } else {
                    return FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
                }
            }

            // CHANGE: handle "default" value
            if (threshold == -1) {
                // default  value, don't iterate on only apply filter after query if its not a "fast" docIdSet
                if (!DocIdSets.isFastIterator(docIdSet)) {
                    return FilteredQuery.QUERY_FIRST_FILTER_STRATEGY.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
                }
            }

            return super.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
        }

        /**
         * Expert: decides if a filter should be executed as "random-access" or not.
         * random-access means the filter "filters" in a similar way as deleted docs are filtered
         * in Lucene. This is faster when the filter accepts many documents.
         * However, when the filter is very sparse, it can be faster to execute the query+filter
         * as a conjunction in some cases.
         * 
         * The default implementation returns true if the first document accepted by the
         * filter is < threshold, if threshold is -1 (the default), then it checks for < 100.
         */
        protected boolean useRandomAccess(Bits bits, int firstFilterDoc) {
            // "default"
            if (threshold == -1) {
                return firstFilterDoc < 100;
            }
            //TODO once we have a cost API on filters and scorers we should rethink this heuristic
            return firstFilterDoc < threshold;
        }
    }

    @Override
    public Query clone() {
        return new XFilteredQuery((FilteredQuery) delegate.clone(), rawFilter, strategy);
    }

}