com.google.appengine.api.datastore.QuerySplitHelper Maven / Gradle / Ivy
/*
* Copyright 2021 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.appengine.api.datastore;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.appengine.api.datastore.Query.CompositeFilter;
import com.google.appengine.api.datastore.Query.Filter;
import com.google.appengine.api.datastore.Query.FilterPredicate;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Set;
/**
* Helper class for query splitting.
*
* This class creates a {@link MultiQueryBuilder} that will produce a sequence of lists of
* queries. Each list of queries in the sequence should have their results merged (this list could
* consist of a single query, which can just be executed normally). The results of each list should
* then be concatenated with the next list in the sequence.
*
*
This class guarantees that the result of merging the result in the manner described above will
* produce a valid result set.
*
*
The algorithm employed here is as efficient as possible. It has been optimized to favor
* concatenation over merging results in memory, as concatenating results allows for greater
* leveraging of limit, prefetch, and count parameters. This should also improve the performance
* slightly when compared to a query splitter algorithm that attempts to merge all result as the
* results for all queries are fetched synchronously. Even when we start to use the async query
* framework the only loss of speed would be the time saved by the first async prefetch for each set
* of queries. This potential loss is both small and greatly out weighed by the value of respecting
* limit, prefetch and count more accurately. It can also be eliminated by starting the next round
* of queries before the last is done.
*
*
There are also many situations where all queries can be done sequentially. In these cases we
* can also support sorts on keys_only queries.
*
*
This class does not preserve the order in which the filters appear in the query provided it.
*
*
As the number of queries that need to be executed to generate the result set is
* mult_i(|component_i.filters|) (which grows very fast) we rely on {@link #MAX_PARALLEL_QUERIES} to
* limit the number of queries that need to be run in parallel.
*
*/
final class QuerySplitHelper {
private QuerySplitHelper() {}
private static final int MAX_PARALLEL_QUERIES = 30;
private static final Collection QUERY_SPLITTERS =
Collections.synchronizedCollection(
Arrays.asList(new NotEqualQuerySplitter(), new InQuerySplitter()));
/**
* Splits the provided {@link Query} into a list of datastore supported sub-queries using the
* default set of {@link QuerySplitter}s.
*
* @return the resulting list of {@code MultiQueryBuilder}.
*/
static List splitQuery(Query query) {
return splitQuery(query, QUERY_SPLITTERS);
}
/**
* Splits the provided {@link Query} into a list of datastore supported sub-queries.
*
* @param query the query to split
* @param splitters the splitters to use
* @return the resulting list of {@code MultiQueryBuilder}.
*/
@SuppressWarnings("deprecation")
static List splitQuery(Query query, Collection splitters) {
List result;
if (query.getFilter() == null) {
result = Collections.singletonList(splitQuery(query.getFilterPredicates(), query, splitters));
} else {
Set> dnf = getDisjunctiveNormalForm(query.getFilter());
result = Lists.newArrayListWithCapacity(dnf.size());
for (Set filters : dnf) {
result.add(splitQuery(filters, query, splitters));
}
}
int totalParallelQueries = 0;
for (MultiQueryBuilder builder : result) {
totalParallelQueries += builder.getParallelQuerySize();
}
checkArgument(
totalParallelQueries <= MAX_PARALLEL_QUERIES,
"Splitting the provided query requires that too many subqueries are merged in memory.");
return result;
}
/**
* Splits a single list of {@link FilterPredicate}s.
*
* @param filters the filters to split
* @param baseQuery the base query to consider
* @param splitters the splitters to use
* @return the resulting list of {@code MultiQueryBuilder}.
*/
static MultiQueryBuilder splitQuery(
Collection filters, Query baseQuery, Collection splitters) {
List remainingFilters = Lists.newLinkedList(filters);
List components = Lists.newArrayList();
for (QuerySplitter splitter : splitters) {
components.addAll(splitter.split(remainingFilters, baseQuery.getSortPredicates()));
}
return new MultiQueryBuilder(
remainingFilters, components, !baseQuery.getSortPredicates().isEmpty());
}
/**
* Returns the disjunctive normal form of the given filter.
*
* @return A set of sets of filters, where the outer set should be combined with OR and the inner
* sets should be combined with AND.
*/
static Set> getDisjunctiveNormalForm(Filter filter) {
if (filter instanceof CompositeFilter) {
return getDisjunctiveNormalForm((CompositeFilter) filter);
} else if (filter instanceof FilterPredicate) {
return Collections.>singleton(
Sets.newLinkedHashSet(ImmutableSet.of((FilterPredicate) filter)));
}
throw new IllegalArgumentException("Unknown expression type: " + filter.getClass());
}
/**
* @return the disjunctive normal form of the given composite filter
* @see #getDisjunctiveNormalForm(Filter)
*/
static Set> getDisjunctiveNormalForm(CompositeFilter filter) {
switch (filter.getOperator()) {
case AND:
return getDisjunctiveNormalFormAnd(filter.getSubFilters());
case OR:
return getDisjunctiveNormalFormOr(filter.getSubFilters());
}
throw new IllegalArgumentException("Unknown expression operator: " + filter.getOperator());
}
/**
* @return the disjunctive normal form of the given sub filters using the distributive law
* @see #getDisjunctiveNormalForm(Filter)
*/
static Set> getDisjunctiveNormalFormAnd(Collection subFilter) {
Set predicates = Sets.newLinkedHashSetWithExpectedSize(subFilter.size());
Set> result = null;
for (Filter subExp : subFilter) {
if (subExp instanceof FilterPredicate) {
predicates.add((FilterPredicate) subExp);
} else if (subExp instanceof CompositeFilter) {
Set> dnf = getDisjunctiveNormalForm((CompositeFilter) subExp);
if (result == null) {
result = dnf;
} else {
// Apply the distributive law: (X or Y) and (A or B) becomes
// (X and A) or (X and B) or (Y and A) or (Y and B).
Set> combinedDnf =
Sets.newLinkedHashSetWithExpectedSize(dnf.size() * result.size());
for (Set rhs : result) {
for (Set lhs : dnf) {
Set combined =
Sets.newLinkedHashSetWithExpectedSize(rhs.size() + lhs.size());
combined.addAll(rhs);
combined.addAll(lhs);
combinedDnf.add(combined);
}
}
result = combinedDnf;
}
} else {
throw new IllegalArgumentException("Unknown expression type: " + subExp.getClass());
}
}
if (result == null) {
return Collections.singleton(predicates);
}
if (!predicates.isEmpty()) {
// Apply half of the distributive law: (X or Y) and A becomes
// (X and A) or (Y and A).
for (Set clause : result) {
clause.addAll(predicates);
}
}
return result;
}
/**
* @return the disjunctive normal form of the given sub filters by flattening them
* @see #getDisjunctiveNormalForm(Filter)
*/
static Set> getDisjunctiveNormalFormOr(Collection subFilters) {
Set> result = Sets.newLinkedHashSetWithExpectedSize(subFilters.size());
for (Filter subExp : subFilters) {
result.addAll(getDisjunctiveNormalForm(subExp));
}
return result;
}
}