All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.datastore.v1.client.QuerySplitterImpl Maven / Gradle / Ivy

There is a newer version: 2.24.2
Show newest version
/*
 * Copyright 2015 Google LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.google.datastore.v1.client;

import static com.google.datastore.v1.client.DatastoreHelper.makeAndFilter;

import com.google.api.core.BetaApi;
import com.google.datastore.v1.EntityResult;
import com.google.datastore.v1.Filter;
import com.google.datastore.v1.Key;
import com.google.datastore.v1.PartitionId;
import com.google.datastore.v1.Projection;
import com.google.datastore.v1.PropertyFilter;
import com.google.datastore.v1.PropertyFilter.Operator;
import com.google.datastore.v1.PropertyOrder.Direction;
import com.google.datastore.v1.PropertyReference;
import com.google.datastore.v1.Query;
import com.google.datastore.v1.QueryResultBatch;
import com.google.datastore.v1.QueryResultBatch.MoreResultsType;
import com.google.datastore.v1.ReadOptions;
import com.google.datastore.v1.RunQueryRequest;
import com.google.protobuf.Timestamp;
import java.util.ArrayList;
import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import javax.annotation.Nullable;

/**
 * Provides the ability to split a query into multiple shards using Cloud Datastore.
 *
 * 

This implementation of the QuerySplitter uses the __scatter__ property to gather random split * points for a query. */ final class QuerySplitterImpl implements QuerySplitter { /** The number of keys to sample for each split. * */ private static final int KEYS_PER_SPLIT = 32; private static final EnumSet UNSUPPORTED_OPERATORS = EnumSet.of( Operator.LESS_THAN, Operator.LESS_THAN_OR_EQUAL, Operator.GREATER_THAN, Operator.GREATER_THAN_OR_EQUAL); static final QuerySplitter INSTANCE = new QuerySplitterImpl(); private QuerySplitterImpl() { // No initialization required. } @Override public List getSplits( Query query, PartitionId partition, int numSplits, Datastore datastore) throws DatastoreException, IllegalArgumentException { return getSplitsInternal(query, partition, numSplits, datastore, null); } @BetaApi @Override public List getSplits( Query query, PartitionId partition, int numSplits, Datastore datastore, Timestamp readTime) throws DatastoreException, IllegalArgumentException { return getSplitsInternal(query, partition, numSplits, datastore, readTime); } private List getSplitsInternal( Query query, PartitionId partition, int numSplits, Datastore datastore, @Nullable Timestamp readTime) throws DatastoreException, IllegalArgumentException { List splits = new ArrayList(numSplits); if (numSplits == 1) { splits.add(query); return splits; } validateQuery(query); validateSplitSize(numSplits); List scatterKeys = getScatterKeys(numSplits, query, partition, datastore, readTime); Key lastKey = null; for (Key nextKey : getSplitKey(scatterKeys, numSplits)) { splits.add(createSplit(lastKey, nextKey, query)); lastKey = nextKey; } splits.add(createSplit(lastKey, null, query)); return splits; } /** * Verify that the given number of splits is not out of bounds. * * @param numSplits the number of splits. * @throws IllegalArgumentException if the split size is invalid. */ private void validateSplitSize(int numSplits) throws IllegalArgumentException { if (numSplits < 1) { throw new IllegalArgumentException("The number of splits must be greater than 0."); } } /** * Validates that we only have allowable filters. * *

Note that equality and ancestor filters are allowed, however they may result in inefficient * sharding. */ private void validateFilter(Filter filter) throws IllegalArgumentException { switch (filter.getFilterTypeCase()) { case COMPOSITE_FILTER: for (Filter subFilter : filter.getCompositeFilter().getFiltersList()) { validateFilter(subFilter); } break; case PROPERTY_FILTER: if (UNSUPPORTED_OPERATORS.contains(filter.getPropertyFilter().getOp())) { throw new IllegalArgumentException("Query cannot have any inequality filters."); } break; default: throw new IllegalArgumentException( "Unsupported filter type: " + filter.getFilterTypeCase()); } } /** * Verifies that the given query can be properly scattered. * * @param query the query to verify * @throws IllegalArgumentException if the query is invalid. */ private void validateQuery(Query query) throws IllegalArgumentException { if (query.getKindCount() != 1) { throw new IllegalArgumentException("Query must have exactly one kind."); } if (query.getOrderCount() != 0) { throw new IllegalArgumentException("Query cannot have any sort orders."); } if (query.hasFilter()) { validateFilter(query.getFilter()); } } /** * Create a new {@link Query} given the query and range. * * @param lastKey the previous key. If null then assumed to be the beginning. * @param nextKey the next key. If null then assumed to be the end. * @param query the desired query. */ private Query createSplit(Key lastKey, Key nextKey, Query query) { if (lastKey == null && nextKey == null) { return query; } List keyFilters = new ArrayList(); if (query.hasFilter()) { keyFilters.add(query.getFilter()); } if (lastKey != null) { Filter lowerBound = DatastoreHelper.makeFilter( DatastoreHelper.KEY_PROPERTY_NAME, PropertyFilter.Operator.GREATER_THAN_OR_EQUAL, DatastoreHelper.makeValue(lastKey)) .build(); keyFilters.add(lowerBound); } if (nextKey != null) { Filter upperBound = DatastoreHelper.makeFilter( DatastoreHelper.KEY_PROPERTY_NAME, PropertyFilter.Operator.LESS_THAN, DatastoreHelper.makeValue(nextKey)) .build(); keyFilters.add(upperBound); } return Query.newBuilder(query).setFilter(makeAndFilter(keyFilters)).build(); } /** * Gets a list of split keys given a desired number of splits. * *

This list will contain multiple split keys for each split. Only a single split key will be * chosen as the split point, however providing multiple keys allows for more uniform sharding. * * @param numSplits the number of desired splits. * @param query the user query. * @param partition the partition to run the query in. * @param datastore the datastore containing the data. * @param readTime read time at which to get the split keys from the datastore. * @throws DatastoreException if there was an error when executing the datastore query. */ private List getScatterKeys( int numSplits, Query query, PartitionId partition, Datastore datastore, @Nullable Timestamp readTime) throws DatastoreException { Query.Builder scatterPointQuery = createScatterQuery(query, numSplits); List keySplits = new ArrayList(); QueryResultBatch batch; do { RunQueryRequest.Builder scatterRequest = RunQueryRequest.newBuilder().setPartitionId(partition).setQuery(scatterPointQuery); scatterRequest.setProjectId(partition.getProjectId()); scatterRequest.setDatabaseId(partition.getDatabaseId()); if (readTime != null) { scatterRequest.setReadOptions(ReadOptions.newBuilder().setReadTime(readTime).build()); } batch = datastore.runQuery(scatterRequest.build()).getBatch(); for (EntityResult result : batch.getEntityResultsList()) { keySplits.add(result.getEntity().getKey()); } scatterPointQuery.setStartCursor(batch.getEndCursor()); scatterPointQuery .getLimitBuilder() .setValue(scatterPointQuery.getLimit().getValue() - batch.getEntityResultsCount()); } while (batch.getMoreResults() == MoreResultsType.NOT_FINISHED); Collections.sort(keySplits, DatastoreHelper.getKeyComparator()); return keySplits; } /** * Creates a scatter query from the given user query * * @param query the user's query. * @param numSplits the number of splits to create. */ private Query.Builder createScatterQuery(Query query, int numSplits) { // TODO(pcostello): We can potentially support better splits with equality filters in our query // if there exists a composite index on property, __scatter__, __key__. Until an API for // metadata exists, this isn't possible. Note that ancestor and inequality queries fall into // the same category. Query.Builder scatterPointQuery = Query.newBuilder(); scatterPointQuery.addAllKind(query.getKindList()); scatterPointQuery.addOrder( DatastoreHelper.makeOrder(DatastoreHelper.SCATTER_PROPERTY_NAME, Direction.ASCENDING)); // There is a split containing entities before and after each scatter entity: // ||---*------*------*------*------*------*------*---|| = scatter entity // If we represent each split as a region before a scatter entity, there is an extra region // following the last scatter point. Thus, we do not need the scatter entities for the last // region. scatterPointQuery.getLimitBuilder().setValue((numSplits - 1) * KEYS_PER_SPLIT); scatterPointQuery.addProjection( Projection.newBuilder().setProperty(PropertyReference.newBuilder().setName("__key__"))); return scatterPointQuery; } /** * Given a list of keys and a number of splits find the keys to split on. * * @param keys the list of keys. * @param numSplits the number of splits. */ private Iterable getSplitKey(List keys, int numSplits) { // If the number of keys is less than the number of splits, we are limited in the number of // splits we can make. if (keys.size() < numSplits - 1) { return keys; } // Calculate the number of keys per split. This should be KEYS_PER_SPLIT, but may // be less if there are not KEYS_PER_SPLIT * (numSplits - 1) scatter entities. // // Consider the following dataset, where - represents an entity and * represents an entity // that is returned as a scatter entity: // ||---*-----*----*-----*-----*------*----*----|| // If we want 4 splits in this data, the optimal split would look like: // ||---*-----*----*-----*-----*------*----*----|| // | | | // The scatter keys in the last region are not useful to us, so we never request them: // ||---*-----*----*-----*-----*------*---------|| // | | | // With 6 scatter keys we want to set scatter points at indexes: 1, 3, 5. // // We keep this as a double so that any "fractional" keys per split get distributed throughout // the splits and don't make the last split significantly larger than the rest. double numKeysPerSplit = Math.max(1.0, ((double) keys.size()) / (numSplits - 1)); List keysList = new ArrayList(numSplits - 1); // Grab the last sample for each split, otherwise the first split will be too small. for (int i = 1; i < numSplits; i++) { int splitIndex = (int) Math.round(i * numKeysPerSplit) - 1; keysList.add(keys.get(splitIndex)); } return keysList; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy