com.microsoft.azure.documentdb.bulkexecutor.internal.BatchDeleter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of documentdb-bulkexecutor Show documentation
Show all versions of documentdb-bulkexecutor Show documentation
Document Bulk Executor for Azure Cosmos DB Service
The newest version!
/*
* The MIT License (MIT)
* Copyright (c) 2018 Microsoft Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.microsoft.azure.documentdb.bulkexecutor.internal;
import static com.microsoft.azure.documentdb.bulkexecutor.internal.ExceptionUtils.isGone;
import static com.microsoft.azure.documentdb.bulkexecutor.internal.ExceptionUtils.isSplit;
import static com.microsoft.azure.documentdb.bulkexecutor.internal.ExceptionUtils.isThrottled;
import static com.microsoft.azure.documentdb.bulkexecutor.internal.ExceptionUtils.isTimedOut;
import static com.microsoft.azure.documentdb.bulkexecutor.internal.ExceptionUtils.isUnavailable;
import java.io.IOException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.util.concurrent.AtomicDouble;
import com.microsoft.azure.documentdb.DocumentClient;
import com.microsoft.azure.documentdb.DocumentClientException;
import com.microsoft.azure.documentdb.PartitionKeyDefinition;
import com.microsoft.azure.documentdb.RequestOptions;
import com.microsoft.azure.documentdb.SqlParameter;
import com.microsoft.azure.documentdb.SqlParameterCollection;
import com.microsoft.azure.documentdb.SqlQuerySpec;
import com.microsoft.azure.documentdb.StoredProcedureResponse;
import com.microsoft.azure.documentdb.bulkexecutor.BulkDeleteFailure;
import com.microsoft.azure.documentdb.bulkexecutor.BulkImportFailure;
public class BatchDeleter {
/**
* The physical partition key range this batch deleter is responsible for
*/
private final String partitionKeyRangeId;
/**
* DocumentClient instance to be used by this batch deleter
*/
private final DocumentClient client;
/**
* The count of documents bulk deleted by this batch deleted.
*/
public AtomicInteger numberOfDocumentsDeleted;
/**
* The total request units consumed by this batch deleter.
*/
public AtomicDouble totalRequestUnitsConsumed;
/**
* The link to the system bulk delete stored procedure.
*/
private final String bulkDeleteSprocLink;
/**
* The query spec passed to the system bulk delete stored procedure.
*/
private final BulkDeleteQuerySpec querySpec;
/**
* Request options specifying the underlying partition key range id.
*/
private RequestOptions requestOptions;
private static final ObjectMapper objectMapper = new ObjectMapper();
/**
* The logger instance.
*/
private static final Logger logger = LoggerFactory.getLogger(BatchDeleter.class);
/**
* The maximum number of retries when the bulk delete times out.
*/
private final int maxRetryCountOnTimeouts = 5;
/**
* The default max batch size for bulk delete operations
*/
private final static int DEFAULT_BULK_DELETE_BATCH_SIZE = 1000;
private List> pkIdTuplesToDelete;
/**
* The list of failures during the bulk delete execution for the mini batch
*/
private List failedDeletes;
public BatchDeleter(
String partitionKeyRangeId,
DocumentClient client,
String bulkDeleteSprocLink,
BulkDeleteQuerySpec querySpec) {
this.partitionKeyRangeId = partitionKeyRangeId;
this.client = client;
this.bulkDeleteSprocLink = bulkDeleteSprocLink;
this.querySpec = querySpec;
this.numberOfDocumentsDeleted = new AtomicInteger();
this.totalRequestUnitsConsumed = new AtomicDouble();
this.requestOptions = new RequestOptions();
this.failedDeletes = new ArrayList<>();
this.requestOptions = new RequestOptionsInternal(partitionKeyRangeId);
}
public BatchDeleter(
String partitionKeyRangeId,
DocumentClient client,
String bulkDeleteSprocLink,
PartitionKeyDefinition partitionKeyDefinition,
List> pkIdTuplesToDelete) {
this.partitionKeyRangeId = partitionKeyRangeId;
this.client = client;
this.bulkDeleteSprocLink = bulkDeleteSprocLink;
this.numberOfDocumentsDeleted = new AtomicInteger();
this.totalRequestUnitsConsumed = new AtomicDouble();
this.pkIdTuplesToDelete = pkIdTuplesToDelete;
this.querySpec = getBulkDeleteQuerySpecForMiniBatch(pkIdTuplesToDelete, partitionKeyDefinition);
this.failedDeletes = new ArrayList<>();
this.requestOptions = new RequestOptionsInternal(partitionKeyRangeId);
}
/**
* Gets the number of documents deleted by this batch deleter
* @return numberOfDocumentsDeleted
*/
public int getNumberOfDocumentsDeleted() {
return numberOfDocumentsDeleted.get();
}
/**
* Gets the number of Request Units consumed by this batch deleter
* @return totalRequestUnitsConsumed
*/
public double getTotalRequestUnitsConsumed() {
return totalRequestUnitsConsumed.get();
}
/**
* Gets the list of failures during the bulk delete execution for the mini batch
* @return
*/
public List getBulkDeleteFailures() {
return failedDeletes;
}
public Callable executeDelete() {
return new Callable() {
@Override
public Void call() throws Exception {
boolean isDeleteCompleted = false;
int numberOfTimeouts = 0;
while(!isDeleteCompleted) {
boolean isThrottled = false;
Duration retryAfter = Duration.ZERO;
StoredProcedureResponse storedProcedureResponse = null;
try {
storedProcedureResponse = client.executeStoredProcedure(bulkDeleteSprocLink, requestOptions, new Object[] { querySpec, null });
BulkDeleteStoredProcedureResponse bulkDeleteResponse = parseFrom(storedProcedureResponse);
if (bulkDeleteResponse != null) {
if (!bulkDeleteResponse.done) {
logger.warn("pki {} | Delete execution did not complete, retrying..", partitionKeyRangeId);
}
isDeleteCompleted = bulkDeleteResponse.done;
numberOfDocumentsDeleted.addAndGet(bulkDeleteResponse.count);
totalRequestUnitsConsumed.addAndGet(storedProcedureResponse.getRequestCharge());
}
else {
logger.warn("pki {} Failed to receive response", partitionKeyRangeId);
}
}
catch (DocumentClientException e) {
if (isThrottled(e)) {
logger.debug("pki {} Throttled on partition range id", partitionKeyRangeId);
isThrottled = true;
retryAfter = Duration.ofMillis(e.getRetryAfterInMilliseconds());
} else if (isTimedOut(e)) {
logger.debug("pki {} Request timed out", partitionKeyRangeId);
if(numberOfTimeouts < maxRetryCountOnTimeouts) {
numberOfTimeouts++;
} else {
BulkDeleteFailure bulkDeleteFailure = new BulkDeleteFailure();
bulkDeleteFailure.getPkIdTuplesFailedToDelete().addAll(pkIdTuplesToDelete);
bulkDeleteFailure.setBulkDeleteFailureException(e);
failedDeletes.add(bulkDeleteFailure);
isDeleteCompleted = true;
}
} else if (isUnavailable(e)) {
logger.debug("pki {} Service unavailable", partitionKeyRangeId);
// TODO: Build retry policy for service unavailables
} else if (isGone(e)) {
if (isSplit(e)) {
String errorMessage = String.format("pki %s is undergoing split, please retry shortly after re-initializing BulkExecutor", partitionKeyRangeId);
logger.error(errorMessage);
BulkDeleteFailure bulkDeleteFailure = new BulkDeleteFailure();
bulkDeleteFailure.getPkIdTuplesFailedToDelete().addAll(pkIdTuplesToDelete);
bulkDeleteFailure.setBulkDeleteFailureException(e);
failedDeletes.add(bulkDeleteFailure);
isDeleteCompleted = true;
} else {
String errorMessage = String.format("pki %s is gone, please retry shortly after re-initializing BulkExecutor", partitionKeyRangeId);
logger.error(errorMessage);
BulkDeleteFailure bulkDeleteFailure = new BulkDeleteFailure();
bulkDeleteFailure.getPkIdTuplesFailedToDelete().addAll(pkIdTuplesToDelete);
bulkDeleteFailure.setBulkDeleteFailureException(e);
failedDeletes.add(bulkDeleteFailure);
isDeleteCompleted = true;
}
} else {
String errorMessage = String.format("pki %s failed to delete. Exception was %s. Status code was %s",
partitionKeyRangeId,
e.getMessage(),
e.getStatusCode());
logger.error(errorMessage, e);
BulkDeleteFailure bulkDeleteFailure = new BulkDeleteFailure();
bulkDeleteFailure.getPkIdTuplesFailedToDelete().addAll(pkIdTuplesToDelete);
bulkDeleteFailure.setBulkDeleteFailureException(e);
failedDeletes.add(bulkDeleteFailure);
isDeleteCompleted = true;
}
} catch (Exception e) {
String errorMessage = String.format("pki %s Failed to delete. Exception was %s",
partitionKeyRangeId,
e.getMessage());
logger.error(errorMessage, e);
BulkDeleteFailure bulkDeleteFailure = new BulkDeleteFailure();
bulkDeleteFailure.getPkIdTuplesFailedToDelete().addAll(pkIdTuplesToDelete);
bulkDeleteFailure.setBulkDeleteFailureException(e);
failedDeletes.add(bulkDeleteFailure);
isDeleteCompleted = true;
}
if (isThrottled) {
try {
logger.debug("pki {} throttled going to sleep for {} millis ", partitionKeyRangeId, retryAfter.toMillis());
Thread.sleep(retryAfter.toMillis());
} catch (InterruptedException e) {
BulkDeleteFailure bulkDeleteFailure = new BulkDeleteFailure();
bulkDeleteFailure.getPkIdTuplesFailedToDelete().addAll(pkIdTuplesToDelete);
bulkDeleteFailure.setBulkDeleteFailureException(e);
failedDeletes.add(bulkDeleteFailure);
isDeleteCompleted = true;
}
}
}
return null;
}
};
}
private BulkDeleteStoredProcedureResponse parseFrom(StoredProcedureResponse storedProcResponse) throws JsonParseException, JsonMappingException, IOException {
String res = storedProcResponse.getResponseAsString();
logger.debug("Bulk Delete for Partition Key Range Id {}: Stored Proc Response as String {}", partitionKeyRangeId, res);
if (StringUtils.isEmpty(res))
return null;
return objectMapper.readValue(res, BulkDeleteStoredProcedureResponse.class);
}
private BulkDeleteQuerySpec getBulkDeleteQuerySpecForMiniBatch(
List> pkIdPairsToDelete, PartitionKeyDefinition partitionKeyDefinition) {
String partitionKeyField =
partitionKeyDefinition.getPaths().iterator().next().replaceFirst("/", "").replace('/', '.');
StringBuilder bulkDeleteQuerySpecBuilder = new StringBuilder();
bulkDeleteQuerySpecBuilder.append("(c.");
bulkDeleteQuerySpecBuilder.append(partitionKeyField).append(" = \"").append(pkIdPairsToDelete.get(0).getKey()).append("\"");
bulkDeleteQuerySpecBuilder.append(" and c.id = \"").append(pkIdPairsToDelete.get(0).getValue()).append("\")");
for (int eachPkIdPairToDeleteIndex = 1; eachPkIdPairToDeleteIndex < pkIdPairsToDelete.size(); eachPkIdPairToDeleteIndex++) {
Pair eachPkIdPairToDelete = pkIdPairsToDelete.get(eachPkIdPairToDeleteIndex);
String partitionKey = eachPkIdPairToDelete.getKey();
String id = eachPkIdPairToDelete.getValue();
bulkDeleteQuerySpecBuilder.append("or (c.");
bulkDeleteQuerySpecBuilder.append(partitionKeyField).append(" = \"").append(partitionKey).append("\"");
bulkDeleteQuerySpecBuilder.append(" and c.id = \"").append(id).append("\")");
}
String root = "c";
String filterExpression = bulkDeleteQuerySpecBuilder.toString();
SqlParameterCollection sqlParameterCollection = null;
BulkDeleteQuerySpec bulkDeleteQuerySpec =
new BulkDeleteQuerySpec(root, filterExpression, null, sqlParameterCollection, DEFAULT_BULK_DELETE_BATCH_SIZE);
return bulkDeleteQuerySpec;
}
private SqlQuerySpec createPKIdTupleQuerySpec(List> pkIdPairsToDelete, String partitionKeyField) {
StringBuilder queryStringBuilder = new StringBuilder();
SqlParameterCollection parameters = new SqlParameterCollection();
queryStringBuilder.append("( ");
for (int i = 0; i < pkIdPairsToDelete.size(); i++) {
Pair pair = pkIdPairsToDelete.get(i);
String pkValue = pair.getLeft();
String pkParamName = "@param" + (2 * i);
parameters.add(new SqlParameter(pkParamName, pkValue));
String idValue = pair.getRight();
String idParamName = "@param" + (2 * i + 1);
parameters.add(new SqlParameter(idParamName, idValue));
queryStringBuilder.append("(");
queryStringBuilder.append(" c.");
queryStringBuilder.append(partitionKeyField);
queryStringBuilder.append((" = "));
queryStringBuilder.append(pkParamName);
queryStringBuilder.append(" AND ");
queryStringBuilder.append("c.id = ");
queryStringBuilder.append(idParamName);
queryStringBuilder.append(" )");
if (i < pkIdPairsToDelete.size() - 1) {
queryStringBuilder.append(" OR ");
}
}
queryStringBuilder.append(" )");
return new SqlQuerySpec(queryStringBuilder.toString(), parameters);
}
}
class RequestOptionsInternal extends RequestOptions {
RequestOptionsInternal(String partitionKeyRangeId) {
setPartitionKeyRengeId(partitionKeyRangeId);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy