com.microsoft.azure.documentdb.bulkexecutor.internal.BatchUpdater Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of documentdb-bulkexecutor Show documentation
Show all versions of documentdb-bulkexecutor Show documentation
Document Bulk Executor for Azure Cosmos DB Service
The newest version!
/*
* The MIT License (MIT)
* Copyright (c) 2017 Microsoft Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all
* copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
package com.microsoft.azure.documentdb.bulkexecutor.internal;
import java.io.IOException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.apache.http.HttpStatus;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.google.common.base.Stopwatch;
import com.google.common.util.concurrent.AtomicDouble;
import com.microsoft.azure.documentdb.DocumentClient;
import com.microsoft.azure.documentdb.DocumentClientException;
import com.microsoft.azure.documentdb.Error;
import com.microsoft.azure.documentdb.RequestOptions;
import com.microsoft.azure.documentdb.StoredProcedureResponse;
import com.microsoft.azure.documentdb.bulkexecutor.BulkUpdateFailure;
import com.microsoft.azure.documentdb.bulkexecutor.UpdateItem;
import com.microsoft.azure.documentdb.internal.HttpConstants;
import static com.microsoft.azure.documentdb.bulkexecutor.internal.ExceptionUtils.*;
public class BatchUpdater extends BatchOperator {
/**
* The count of documents bulk updated by this batch updater.
*/
public AtomicInteger numberOfDocumentsUpdated;
/**
* The total request units consumed by this batch updater.
*/
public AtomicDouble totalRequestUnitsConsumed;
/**
* The list of mini-batches this batch updater is responsible to updates.
*/
private final List> batchesToUpdate;
/**
* The list of updates which failed and need to be retried due to physical partition(s) being split.
*/
private List documentsFailedToUpdateDueToSplits;
/**
* The list of updates which failed
*/
private List bulkUpdateFailures;
/**
* The link to the system bulk update stored procedure.
*/
private final String bulkUpdateSprocLink;
/**
* The partition key property.
*/
private final String partitionKeyProperty;
/**
* The maximum number of retries when the bulk import times out.
*/
private final int maxRetryCountOnTimeouts = 5;
/**
* The logger instance.
*/
private final Logger logger = LoggerFactory.getLogger(BatchUpdater.class);
public BatchUpdater(String partitionKeyRangeId,
List> batchesToUpdate,
DocumentClient client,
String bulkUpdateSprocLink,
String partitionKeyProperty) {
this.partitionKeyRangeId = partitionKeyRangeId;
this.batchesToUpdate = batchesToUpdate;
this.client = client;
this.bulkUpdateSprocLink = bulkUpdateSprocLink;
this.partitionKeyProperty = partitionKeyProperty;
this.numberOfDocumentsUpdated = new AtomicInteger();
this.totalRequestUnitsConsumed = new AtomicDouble();
this.documentsFailedToUpdateDueToSplits = Collections.synchronizedList(new ArrayList());
this.bulkUpdateFailures = Collections.synchronizedList(new ArrayList());
class RequestOptionsInternal extends RequestOptions {
RequestOptionsInternal(String partitionKeyRangeId) {
setPartitionKeyRengeId(partitionKeyRangeId);
}
}
this.requestOptions = new RequestOptionsInternal(partitionKeyRangeId);
}
public int getNumberOfDocumentsUpdated() {
return numberOfDocumentsUpdated.get();
}
public double getTotalRequestUnitsConsumed() {
return totalRequestUnitsConsumed.get();
}
public List getDocumentsFailedToUpdateDueToSplits() {
return documentsFailedToUpdateDueToSplits;
}
public List getBulkUpdateFailures() {
return bulkUpdateFailures;
}
public Iterator> miniBatchExecutionCallableIterator() {
Stream> stream = batchesToUpdate.stream().map(miniBatch -> {
return new Callable() {
@Override
public OperationMetrics call() {
int currentUpdateItemIndex = 0;
Stopwatch stopwatch = Stopwatch.createStarted();
double requestUnitsCounsumed = 0;
int numberOfThrottles = 0;
int numberOfTimeouts = 0;
try {
logger.debug("pki {} updating mini batch started", partitionKeyRangeId);
StoredProcedureResponse response;
while (currentUpdateItemIndex < miniBatch.size() && !cancel) {
logger.debug("pki {} inside for loop, currentUpdateItemIndex", partitionKeyRangeId, currentUpdateItemIndex);
List updateItemBatch = miniBatch.subList(currentUpdateItemIndex, miniBatch.size());
boolean isThrottled = false;
Duration retryAfter = Duration.ZERO;
try {
logger.debug("pki {}, Trying to update minibatch of {} update items", partitionKeyRangeId, updateItemBatch.size());
final StringBuilder sb = new StringBuilder();
for (int i = 0; i < partitionKeyProperty.length(); i++) {
final char currentChar = partitionKeyProperty.charAt(i);
if (i > 0 && currentChar == '/' && partitionKeyProperty.charAt(i - 1) != '\\') {
sb.append('.');
} else {
sb.append(currentChar);
}
}
final String effectivePartitionKeyProperty = sb.toString();
response = client.executeStoredProcedure(
bulkUpdateSprocLink,
requestOptions,
new Object[] { updateItemBatch, effectivePartitionKeyProperty, null });
BulkUpdateStoredProcedureResponse bulkUpdateResponse = parseFrom(response);
if (bulkUpdateResponse != null) {
if (bulkUpdateResponse.errorCode != 0) {
logger.warn("pki {} Received response error code {}", partitionKeyRangeId, bulkUpdateResponse.errorCode);
if (bulkUpdateResponse.count == 0) {
BulkUpdateFailure bulkUpdateFailure = new BulkUpdateFailure();
bulkUpdateFailure.getFailedUpdateItems().addAll(updateItemBatch);
bulkUpdateFailure.setBulkUpdateFailureException(new RuntimeException(
String.format("Stored proc returned failure %s", bulkUpdateResponse.errorCode)));
bulkUpdateFailures.add(bulkUpdateFailure);
currentUpdateItemIndex = miniBatch.size();
} else if (bulkUpdateResponse.errorCode == 404) {
Map responseHeaders = new HashMap();
responseHeaders.put(HttpConstants.HttpHeaders.SUB_STATUS,
String.valueOf(HttpConstants.SubStatusCodes.PARTITION_KEY_RANGE_GONE));
BulkUpdateFailure bulkUpdateFailure = new BulkUpdateFailure();
bulkUpdateFailure.getFailedUpdateItems().addAll(updateItemBatch);
bulkUpdateFailure.setBulkUpdateFailureException(new DocumentClientException(HttpStatus.SC_NOT_FOUND,
new Error("{ 'message': 'Batch contains non-existent documents' }"), responseHeaders));
bulkUpdateFailures.add(bulkUpdateFailure);
cancel = true;
}
}
double requestCharge = response.getRequestCharge();
currentUpdateItemIndex += bulkUpdateResponse.count;
numberOfDocumentsUpdated.addAndGet(bulkUpdateResponse.count);
requestUnitsCounsumed += requestCharge;
totalRequestUnitsConsumed.addAndGet(requestCharge);
}
else {
logger.warn("pki {} Failed to receive response", partitionKeyRangeId);
}
} catch (DocumentClientException e) {
logger.debug("pki {} Updating minibatch failed", partitionKeyRangeId, e);
if (isThrottled(e)) {
logger.debug("pki {} Throttled on partition range id", partitionKeyRangeId);
numberOfThrottles++;
isThrottled = true;
retryAfter = Duration.ofMillis(e.getRetryAfterInMilliseconds());
// will retry again
} else if (isTimedOut(e)) {
logger.debug("pki {} Request timed out", partitionKeyRangeId);
// will retry a finite number of times
if(numberOfTimeouts < maxRetryCountOnTimeouts) {
numberOfTimeouts++;
} else {
BulkUpdateFailure bulkUpdateFailure = new BulkUpdateFailure();
bulkUpdateFailure.getFailedUpdateItems().addAll(updateItemBatch);
bulkUpdateFailure.setBulkUpdateFailureException(e);
bulkUpdateFailures.add(bulkUpdateFailure);
cancel = true;
}
} else if (isUnavailable(e)) {
logger.debug("pki {} Service unavailable", partitionKeyRangeId);
documentsFailedToUpdateDueToSplits.addAll(updateItemBatch);
logger.warn(
"Received Service unavailable exception when updating a mini-batch for partition key range: " +
partitionKeyRangeId +
". This mini-batch will be retried on the next invocation.");
cancel = true;
}
else if (isGone(e)) {
if (isSplit(e)) {
// In the case of a gone exception for a partition, in particular due to splits, store the updates to retry after re-initializing the BulkExecutor instance
documentsFailedToUpdateDueToSplits.addAll(updateItemBatch);
logger.warn(
"Received a GoneException on Partition range id " +
partitionKeyRangeId +
" as the partition was completing a split | Storing the mini batch and retrying");
} else {
// In the case of a gone exception for a partition, store the updates to retry after re-initializing the BulkExecutor instance
documentsFailedToUpdateDueToSplits.addAll(updateItemBatch);
logger.warn(
"Received a GoneException on Partition range id " +
partitionKeyRangeId +
" | Storing the mini batch and retrying");
}
cancel = true;
} else {
// there is no value in retrying
String errorMessage = String.format("pki %s failed to update mini-batch. Exception was %s. Status code was %s",
partitionKeyRangeId,
e.getMessage(),
e.getStatusCode());
logger.error(errorMessage, e);
BulkUpdateFailure bulkUpdateFailure = new BulkUpdateFailure();
bulkUpdateFailure.getFailedUpdateItems().addAll(updateItemBatch);
bulkUpdateFailure.setBulkUpdateFailureException(new RuntimeException(e));
bulkUpdateFailures.add(bulkUpdateFailure);
cancel = true;
}
} catch (IllegalStateException e) {
documentsFailedToUpdateDueToSplits.addAll(updateItemBatch);
logger.warn(
"Received IllegalStateException since partition key range: " +
partitionKeyRangeId +
" was split or Gone. | Storing the mini batch and retrying");
cancel = true;
} catch (Exception e) {
String errorMessage = String.format("pki %s Failed to update mini-batch. Exception was %s", partitionKeyRangeId,
e.getMessage());
logger.error(errorMessage, e);
BulkUpdateFailure bulkUpdateFailure = new BulkUpdateFailure();
bulkUpdateFailure.getFailedUpdateItems().addAll(updateItemBatch);
bulkUpdateFailure.setBulkUpdateFailureException(new RuntimeException(errorMessage, e));
bulkUpdateFailures.add(bulkUpdateFailure);
cancel = true;
}
if (isThrottled) {
try {
logger.debug("pki {} throttled going to sleep for {} millis ", partitionKeyRangeId, retryAfter.toMillis());
Thread.sleep(retryAfter.toMillis());
} catch (InterruptedException e) {
BulkUpdateFailure bulkUpdateFailure = new BulkUpdateFailure();
bulkUpdateFailure.getFailedUpdateItems().addAll(updateItemBatch);
bulkUpdateFailure.setBulkUpdateFailureException(new RuntimeException(e));
bulkUpdateFailures.add(bulkUpdateFailure);
cancel = true;
}
}
}
} catch (Exception e) {
cancel = true;
String errorMessage = String.format("pki %s Failed to update mini-batch. Exception was %s", partitionKeyRangeId,
e.getMessage());
logger.error(errorMessage, e);
BulkUpdateFailure bulkUpdateFailure = new BulkUpdateFailure();
bulkUpdateFailure.getFailedUpdateItems().addAll(miniBatch);
bulkUpdateFailure.setBulkUpdateFailureException(new RuntimeException(e));
bulkUpdateFailures.add(bulkUpdateFailure);
}
logger.debug("pki {} completed", partitionKeyRangeId);
stopwatch.stop();
OperationMetrics insertMetrics = new OperationMetrics(currentUpdateItemIndex, stopwatch.elapsed(), requestUnitsCounsumed, numberOfThrottles);
return insertMetrics;
}
};
});
return stream.iterator();
}
private BulkUpdateStoredProcedureResponse parseFrom(StoredProcedureResponse storedProcResponse) throws JsonParseException, JsonMappingException, IOException {
String res = storedProcResponse.getResponseAsString();
logger.debug("MiniBatch Update for Partition Key Range Id {}: Stored Proc Response as String {}", partitionKeyRangeId, res);
if (StringUtils.isEmpty(res))
return null;
return objectMapper.readValue(res, BulkUpdateStoredProcedureResponse.class);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy