org.elasticsearch.search.aggregations.bucket.filter.QueryToFilterAdapter Maven / Gradle / Ivy
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.search.aggregations.bucket.filter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.sandbox.search.IndexSortSortedNumericDocValuesRangeQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.IndexOrDocValuesQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PointRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.search.aggregations.Aggregator;
import org.elasticsearch.xcontent.XContentBuilder;
import java.io.IOException;
import java.util.function.BiConsumer;
import java.util.function.IntPredicate;
/**
* Adapts a Lucene {@link Query} to the behaviors used be the
* {@link FiltersAggregator}. In general we try to delegate to {@linkplain Query}
* when we don't have a special optimization.
*/
public class QueryToFilterAdapter {
/**
* Build a filter for the query against the provided searcher.
*
* Note: This method rewrites the query against the {@link IndexSearcher}
*/
public static QueryToFilterAdapter build(IndexSearcher searcher, String key, Query query) throws IOException {
// Wrapping with a ConstantScoreQuery enables a few more rewrite
// rules as of Lucene 9.2
query = searcher.rewrite(new ConstantScoreQuery(query));
if (query instanceof ConstantScoreQuery) {
/*
* Unwrap constant score because it gets in the way of us
* understanding what the queries are trying to do and we
* don't use the score at all anyway. Effectively we always
* run in constant score mode.
*/
query = ((ConstantScoreQuery) query).getQuery();
}
if (query instanceof TermQuery) {
return new TermQueryToFilterAdapter(searcher, key, (TermQuery) query);
}
if (query instanceof DocValuesFieldExistsQuery) {
return new DocValuesFieldExistsAdapter(searcher, key, (DocValuesFieldExistsQuery) query);
}
if (query instanceof MatchAllDocsQuery) {
return new MatchAllQueryToFilterAdapter(searcher, key, (MatchAllDocsQuery) query);
}
if (query instanceof MatchNoDocsQuery) {
return new MatchNoneQueryToFilterAdapter(searcher, key, (MatchNoDocsQuery) query);
}
return new QueryToFilterAdapter<>(searcher, key, query);
}
private final IndexSearcher searcher;
private final String key;
private final Q query;
/**
* The weight for the query or {@code null} if we haven't built it. Use
* {@link #weight()} to build it when needed.
*/
private Weight weight;
QueryToFilterAdapter(IndexSearcher searcher, String key, Q query) {
this.searcher = searcher;
this.key = key;
this.query = query;
}
/**
* The query we're adapting.
*
* Subclasses should use this to fetch the query when making query
* specific optimizations.
*/
Q query() {
return query;
}
/**
* Is this an inefficient union of the top level query with the filter?
* If the top level query if complex we can't efficiently merge it with
* the filter. If we can't do that it is likely faster to just run the
* "native" aggregation implementation rather than go filter by filter.
*/
public boolean isInefficientUnion() {
return false;
}
/**
* Key for this filter.
*/
public final String key() {
return key;
}
/**
* Searcher that this filter is targeting.
*/
protected final IndexSearcher searcher() {
return searcher;
}
/**
* Would using index metadata like {@link IndexReader#docFreq}
* or {@link IndexReader#maxDoc} to count the number of matching documents
* produce the same answer as collecting the results with a sequence like
* {@code searcher.collect(counter); return counter.readAndReset();}?
*/
protected static boolean countCanUseMetadata(FiltersAggregator.Counter counter, Bits live) {
if (live != null) {
/*
* We can only use metadata if all of the documents in the reader
* are visible. This is done by returning a null `live` bits. The
* name `live` is traditional because most of the time a non-null
* `live` bits means that there are deleted documents. But `live`
* might also be non-null if document level security is enabled.
*/
return false;
}
/*
* We can only use metadata if we're not using the special docCount
* field. Otherwise we wouldn't know how many documents each lucene
* document represents.
*/
return counter.docCount.alwaysOne();
}
/**
* Make a filter that matches this filter and the provided query.
*
* Note: This method rewrites the query against the {@link IndexSearcher}.
*/
QueryToFilterAdapter union(Query extraQuery) throws IOException {
/*
* Wrapping with a ConstantScoreQuery enables a few more rewrite
* rules as of Lucene 9.2.
* It'd be *wonderful* if Lucene could do fancy optimizations
* when merging queries like combining ranges but it doesn't at
* the moment. Admittedly, we have a much more limited problem.
* We don't care about score here at all. We know which queries
* it's worth spending time to optimize because we know which aggs
* rewrite into this one.
*/
extraQuery = searcher().rewrite(new ConstantScoreQuery(extraQuery));
Query unwrappedExtraQuery = unwrap(extraQuery);
if (unwrappedExtraQuery instanceof MatchAllDocsQuery) {
return this;
}
Query unwrappedQuery = unwrap(query);
if (unwrappedQuery instanceof PointRangeQuery && unwrappedExtraQuery instanceof PointRangeQuery) {
Query merged = MergedPointRangeQuery.merge((PointRangeQuery) unwrappedQuery, (PointRangeQuery) unwrappedExtraQuery);
if (merged != null) {
// Should we rewrap here?
return new QueryToFilterAdapter<>(searcher(), key(), merged);
}
}
BooleanQuery.Builder builder = new BooleanQuery.Builder();
builder.add(query, BooleanClause.Occur.FILTER);
builder.add(extraQuery, BooleanClause.Occur.FILTER);
return new QueryToFilterAdapter<>(searcher(), key(), builder.build()) {
public boolean isInefficientUnion() {
return true;
}
};
}
private static Query unwrap(Query query) {
while (true) {
if (query instanceof ConstantScoreQuery) {
query = ((ConstantScoreQuery) query).getQuery();
continue;
}
if (query instanceof IndexSortSortedNumericDocValuesRangeQuery) {
query = ((IndexSortSortedNumericDocValuesRangeQuery) query).getFallbackQuery();
continue;
}
if (query instanceof IndexOrDocValuesQuery) {
query = ((IndexOrDocValuesQuery) query).getIndexQuery();
continue;
}
return query;
}
}
/**
* Build a predicate that the "compatible" implementation of the
* {@link FiltersAggregator} will use to figure out if the filter matches.
*
* Consumers of this method will always call it with non-negative,
* increasing {@code int}s. A sequence like {@code 0, 1, 7, 8, 10} is fine.
* It won't call with {@code 0, 1, 0} or {@code -1, 0, 1}.
*/
@SuppressWarnings("resource") // Closing the reader is someone else's problem
IntPredicate matchingDocIds(LeafReaderContext ctx) throws IOException {
return Lucene.asSequentialAccessBits(ctx.reader().maxDoc(), weight().scorerSupplier(ctx))::get;
}
/**
* Count the number of documents that match this filter in a leaf.
*/
long count(LeafReaderContext ctx, FiltersAggregator.Counter counter, Bits live) throws IOException {
BulkScorer scorer = weight().bulkScorer(ctx);
if (scorer == null) {
// No hits in this segment.
return 0;
}
scorer.score(counter, live);
return counter.readAndReset(ctx);
}
/**
* Collect all documents that match this filter in this leaf.
*/
void collect(LeafReaderContext ctx, LeafCollector collector, Bits live) throws IOException {
BulkScorer scorer = weight().bulkScorer(ctx);
if (scorer == null) {
// No hits in this segment.
return;
}
scorer.score(collector, live);
}
/**
* Collect profiling information for this filter. Rhymes with
* {@link Aggregator#collectDebugInfo(BiConsumer)}.
*
* Well behaved implementations will always call the superclass
* implementation just in case it has something interesting. They will
* also only add objects which can be serialized with
* {@link StreamOutput#writeGenericValue(Object)} and
* {@link XContentBuilder#value(Object)}. And they'll have an integration
* test.
*/
void collectDebugInfo(BiConsumer add) {
add.accept("query", query.toString());
}
private Weight weight() throws IOException {
if (weight == null) {
weight = searcher().createWeight(query, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
}
return weight;
}
}