org.apache.hadoop.dynamodb.DynamoDBClient Maven / Gradle / Ivy
/**
* Copyright 2012-2016 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file
* except in compliance with the License. A copy of the License is located at
*
* http://aws.amazon.com/apache2.0/
*
* or in the "LICENSE.TXT" file accompanying this file. This file is distributed on an "AS IS"
* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under the License.
*/
package org.apache.hadoop.dynamodb;
import static org.apache.hadoop.dynamodb.DynamoDBConstants.DEFAULT_MAX_BATCH_SIZE;
import static org.apache.hadoop.dynamodb.DynamoDBConstants.DEFAULT_MAX_ITEMS_PER_BATCH;
import static org.apache.hadoop.dynamodb.DynamoDBConstants.DEFAULT_MAX_ITEM_SIZE;
import static org.apache.hadoop.dynamodb.DynamoDBConstants.MAX_BATCH_SIZE;
import static org.apache.hadoop.dynamodb.DynamoDBConstants.MAX_ITEMS_PER_BATCH;
import static org.apache.hadoop.dynamodb.DynamoDBConstants.MAX_ITEM_SIZE;
import static org.apache.hadoop.dynamodb.DynamoDBUtil.getDynamoDBEndpoint;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import com.google.common.primitives.Ints;
import com.amazonaws.AmazonClientException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.auth.AWSCredentials;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.AWSCredentialsProviderChain;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.auth.InstanceProfileCredentialsProvider;
import com.amazonaws.services.dynamodbv2.AmazonDynamoDBClient;
import com.amazonaws.services.dynamodbv2.model.AttributeValue;
import com.amazonaws.services.dynamodbv2.model.BatchWriteItemRequest;
import com.amazonaws.services.dynamodbv2.model.BatchWriteItemResult;
import com.amazonaws.services.dynamodbv2.model.Condition;
import com.amazonaws.services.dynamodbv2.model.ConsumedCapacity;
import com.amazonaws.services.dynamodbv2.model.DescribeTableRequest;
import com.amazonaws.services.dynamodbv2.model.DescribeTableResult;
import com.amazonaws.services.dynamodbv2.model.PutRequest;
import com.amazonaws.services.dynamodbv2.model.QueryRequest;
import com.amazonaws.services.dynamodbv2.model.QueryResult;
import com.amazonaws.services.dynamodbv2.model.ReturnConsumedCapacity;
import com.amazonaws.services.dynamodbv2.model.ScanRequest;
import com.amazonaws.services.dynamodbv2.model.ScanResult;
import com.amazonaws.services.dynamodbv2.model.TableDescription;
import com.amazonaws.services.dynamodbv2.model.WriteRequest;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.dynamodb.DynamoDBFibonacciRetryer.RetryResult;
import org.apache.hadoop.dynamodb.filter.DynamoDBQueryFilter;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.util.ReflectionUtils;
import org.joda.time.Duration;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Random;
import java.util.concurrent.Callable;
public class DynamoDBClient {
private static final Log log = LogFactory.getLog(DynamoDBClient.class);
private static final int DEFAULT_RETRY_DURATION = 10;
private static final long MAX_BACKOFF_IN_MILLISECONDS = 1000 * 3;
private static final CredentialPairName DYNAMODB_CREDENTIAL_PAIR_NAME =
new CredentialPairName(
DynamoDBConstants.DYNAMODB_ACCESS_KEY_CONF,
DynamoDBConstants.DYNAMODB_SECRET_KEY_CONF
);
private static final CredentialPairName DEFAULT_CREDENTIAL_PAIR_NAME =
new CredentialPairName(
DynamoDBConstants.DEFAULT_ACCESS_KEY_CONF,
DynamoDBConstants.DEFAULT_SECRET_KEY_CONF
);
private final Map> writeBatchMap = new HashMap<>();
private final AmazonDynamoDBClient dynamoDB;
private int writeBatchMapSizeBytes;
private int batchWriteRetries;
private final Configuration config;
private final long maxBatchSize;
private final long maxItemByteSize;
// For unit testing only
public DynamoDBClient() {
dynamoDB = null;
config = null;
maxBatchSize = DEFAULT_MAX_BATCH_SIZE;
maxItemByteSize = DEFAULT_MAX_ITEM_SIZE;
}
public DynamoDBClient(Configuration conf) {
this(conf, null);
}
public DynamoDBClient(Configuration conf, String region) {
Preconditions.checkNotNull(conf, "conf cannot be null.");
config = conf;
dynamoDB = getDynamoDBClient(conf);
dynamoDB.setEndpoint(getDynamoDBEndpoint(conf, region));
maxBatchSize = config.getLong(MAX_BATCH_SIZE, DEFAULT_MAX_BATCH_SIZE);
maxItemByteSize = config.getLong(MAX_ITEM_SIZE, DEFAULT_MAX_ITEM_SIZE);
}
public TableDescription describeTable(String tableName) {
final DescribeTableRequest describeTablesRequest = new DescribeTableRequest()
.withTableName(tableName);
try {
RetryResult describeResult = getRetryDriver().runWithRetry(
new Callable() {
@Override
public DescribeTableResult call() {
DescribeTableResult result = dynamoDB.describeTable(describeTablesRequest);
log.info("Describe table output: " + result);
return result;
}
}, null, null);
return describeResult.result.getTable();
} catch (Exception e) {
throw new RuntimeException("Could not lookup table " + tableName + " in DynamoDB.", e);
}
}
public RetryResult scanTable(
String tableName, DynamoDBQueryFilter dynamoDBQueryFilter, Integer segment, Integer
totalSegments, Map exclusiveStartKey, long limit, Reporter reporter) {
final ScanRequest scanRequest = new ScanRequest(tableName)
.withExclusiveStartKey(exclusiveStartKey)
.withLimit(Ints.checkedCast(limit))
.withSegment(segment)
.withTotalSegments(totalSegments)
.withReturnConsumedCapacity(ReturnConsumedCapacity.TOTAL);
if (dynamoDBQueryFilter != null) {
Map scanFilter = dynamoDBQueryFilter.getScanFilter();
if (!scanFilter.isEmpty()) {
scanRequest.setScanFilter(scanFilter);
}
}
RetryResult retryResult = getRetryDriver().runWithRetry(new Callable() {
@Override
public ScanResult call() {
log.debug("Executing DynamoDB scan: " + scanRequest);
return dynamoDB.scan(scanRequest);
}
}, reporter, PrintCounter.DynamoDBReadThrottle);
return retryResult;
}
public RetryResult queryTable(
String tableName, DynamoDBQueryFilter dynamoDBQueryFilter, Map
exclusiveStartKey, long limit, Reporter reporter) {
final QueryRequest queryRequest = new QueryRequest()
.withTableName(tableName)
.withExclusiveStartKey(exclusiveStartKey)
.withKeyConditions(dynamoDBQueryFilter.getKeyConditions())
.withLimit(Ints.checkedCast(limit))
.withReturnConsumedCapacity(ReturnConsumedCapacity.TOTAL);
RetryResult retryResult = getRetryDriver().runWithRetry(
new Callable() {
@Override
public QueryResult call() {
log.debug("Executing DynamoDB query: " + queryRequest);
return dynamoDB.query(queryRequest);
}
}, reporter, PrintCounter.DynamoDBReadThrottle);
return retryResult;
}
public BatchWriteItemResult putBatch(String tableName, Map item,
long maxItemsPerBatch, Reporter reporter)
throws UnsupportedEncodingException {
int itemSizeBytes = DynamoDBUtil.getItemSizeBytes(item);
if (itemSizeBytes > maxItemByteSize) {
throw new RuntimeException("Cannot pass items with size greater than " + maxItemByteSize
+ ". Item with size of " + itemSizeBytes + " was given.");
}
maxItemsPerBatch = DynamoDBUtil.getBoundedBatchLimit(config, maxItemsPerBatch);
BatchWriteItemResult result = null;
if (writeBatchMap.containsKey(tableName)) {
boolean writeRequestsForTableAtLimit =
writeBatchMap.get(tableName).size() >= maxItemsPerBatch;
boolean totalSizeOfWriteBatchesOverLimit =
writeBatchMapSizeBytes + itemSizeBytes > maxBatchSize;
if (writeRequestsForTableAtLimit || totalSizeOfWriteBatchesOverLimit) {
result = writeBatch(reporter, itemSizeBytes);
}
}
// writeBatchMap could be cleared from writeBatch()
List writeBatchList;
if (!writeBatchMap.containsKey(tableName)) {
writeBatchList = new ArrayList<>((int) maxItemsPerBatch);
writeBatchMap.put(tableName, writeBatchList);
} else {
writeBatchList = writeBatchMap.get(tableName);
}
writeBatchList.add(new WriteRequest().withPutRequest(new PutRequest().withItem(item)));
writeBatchMapSizeBytes += itemSizeBytes;
return result;
}
public void close() {
while (!writeBatchMap.isEmpty()) {
writeBatch(Reporter.NULL, 0);
}
if (dynamoDB != null) {
dynamoDB.shutdown();
}
}
/**
* @param roomNeeded number of bytes that writeBatch MUST make room for
*/
private BatchWriteItemResult writeBatch(Reporter reporter, final int roomNeeded) {
final BatchWriteItemRequest batchWriteItemRequest = new BatchWriteItemRequest()
.withRequestItems(writeBatchMap)
.withReturnConsumedCapacity(ReturnConsumedCapacity.TOTAL);
RetryResult retryResult = getRetryDriver().runWithRetry(
new Callable() {
@Override
public BatchWriteItemResult call() throws
UnsupportedEncodingException,
InterruptedException {
pauseExponentially(batchWriteRetries);
BatchWriteItemResult result = dynamoDB.batchWriteItem(batchWriteItemRequest);
Map> unprocessedItems = result.getUnprocessedItems();
if (unprocessedItems == null || unprocessedItems.isEmpty()) {
batchWriteRetries = 0;
} else {
batchWriteRetries++;
int unprocessedItemCount = 0;
for (List unprocessedWriteRequests : unprocessedItems.values()) {
unprocessedItemCount += unprocessedWriteRequests.size();
int batchSizeBytes = 0;
for (WriteRequest request : unprocessedWriteRequests) {
batchSizeBytes += DynamoDBUtil.getItemSizeBytes(
request.getPutRequest().getItem());
}
long maxItemsPerBatch =
config.getLong(MAX_ITEMS_PER_BATCH, DEFAULT_MAX_ITEMS_PER_BATCH);
long maxBatchSize = config.getLong(MAX_BATCH_SIZE, DEFAULT_MAX_BATCH_SIZE);
if (unprocessedWriteRequests.size() >= maxItemsPerBatch
|| (maxBatchSize - batchSizeBytes) < roomNeeded) {
throw new AmazonClientException("Full list of write requests not processed");
}
}
double consumed = 0.0;
for (ConsumedCapacity consumedCapacity : result.getConsumedCapacity()) {
consumed = consumedCapacity.getCapacityUnits();
}
int batchSize = 0;
for (List writeRequests :
batchWriteItemRequest.getRequestItems().values()) {
batchSize += writeRequests.size();
}
log.debug(
"BatchWriteItem attempted " + batchSize + " items, consumed " + consumed + " "
+ "wcu, left unprocessed " + unprocessedItemCount + " items," + " "
+ "now at " + "" + batchWriteRetries + " retries");
}
return result;
}
}, reporter, PrintCounter.DynamoDBWriteThrottle);
writeBatchMap.clear();
writeBatchMapSizeBytes = 0;
// If some items failed to go through, add them back to the writeBatchMap
Map> unprocessedItems = retryResult.result.getUnprocessedItems();
for (Entry> entry : unprocessedItems.entrySet()) {
String key = entry.getKey();
List requests = entry.getValue();
for (WriteRequest request : requests) {
writeBatchMapSizeBytes += DynamoDBUtil.getItemSizeBytes(request.getPutRequest().getItem());
}
writeBatchMap.put(key, requests);
}
return retryResult.result;
}
private DynamoDBFibonacciRetryer getRetryDriver() {
return new DynamoDBFibonacciRetryer(Duration.standardMinutes(DEFAULT_RETRY_DURATION));
}
private void pauseExponentially(int retries) throws InterruptedException {
if (retries == 0) {
return;
}
long scaleFactor = 500 + new Random().nextInt(100);
long delay = (long) (Math.pow(2, retries) * scaleFactor) / 4;
delay = Math.min(delay, MAX_BACKOFF_IN_MILLISECONDS);
log.info("Pausing " + delay + " ms at retry " + retries);
Thread.sleep(delay);
}
private AmazonDynamoDBClient getDynamoDBClient(Configuration conf) {
ClientConfiguration clientConfig = new ClientConfiguration().withMaxErrorRetry(1);
applyProxyConfiguration(clientConfig, conf);
return new AmazonDynamoDBClient(getAWSCredentialsProvider(conf), clientConfig);
}
@VisibleForTesting
void applyProxyConfiguration(ClientConfiguration clientConfig, Configuration conf) {
final String proxyHost = conf.get(DynamoDBConstants.PROXY_HOST);
final int proxyPort = conf.getInt(DynamoDBConstants.PROXY_PORT, 0);
final String proxyUsername = conf.get(DynamoDBConstants.PROXY_USERNAME);
final String proxyPassword = conf.get(DynamoDBConstants.PROXY_PASSWORD);
boolean proxyHostAndPortPresent = false;
if (!Strings.isNullOrEmpty(proxyHost) && proxyPort > 0) {
clientConfig.setProxyHost(proxyHost);
clientConfig.setProxyPort(proxyPort);
proxyHostAndPortPresent = true;
} else if (Strings.isNullOrEmpty(proxyHost) ^ proxyPort <= 0) {
throw new RuntimeException("Only one of proxy host and port are set, when both are required");
}
if (!Strings.isNullOrEmpty(proxyUsername) && !Strings.isNullOrEmpty(proxyPassword)) {
if (!proxyHostAndPortPresent) {
throw new RuntimeException("Proxy host and port must be supplied if proxy username and "
+ "password are present");
} else {
clientConfig.setProxyUsername(proxyUsername);
clientConfig.setProxyPassword(proxyPassword);
}
} else if (Strings.isNullOrEmpty(proxyUsername) ^ Strings.isNullOrEmpty(proxyPassword)) {
throw new RuntimeException("Only one of proxy username and password are set, when both are "
+ "required");
}
}
protected AWSCredentialsProvider getAWSCredentialsProvider(Configuration conf) {
List providersList = new ArrayList<>();
// try to load custom credential provider, fail if a provider is specified but cannot be
// initialized
String providerClass = conf.get(DynamoDBConstants.CUSTOM_CREDENTIALS_PROVIDER_CONF);
if (!Strings.isNullOrEmpty(providerClass)) {
try {
providersList.add(
(AWSCredentialsProvider) ReflectionUtils.newInstance(Class.forName(providerClass), conf)
);
} catch (ClassNotFoundException e) {
throw new RuntimeException("Custom AWSCredentialsProvider not found: " + providerClass, e);
}
}
// try to fetch credentials from core-site
String accessKey = conf.get(DYNAMODB_CREDENTIAL_PAIR_NAME.getAccessKeyName());
String secretKey;
if (Strings.isNullOrEmpty(accessKey)) {
accessKey = conf.get(DEFAULT_CREDENTIAL_PAIR_NAME.getAccessKeyName());
secretKey = conf.get(DEFAULT_CREDENTIAL_PAIR_NAME.getSecretKeyName());
} else {
secretKey = conf.get(DYNAMODB_CREDENTIAL_PAIR_NAME.getSecretKeyName());
}
if (Strings.isNullOrEmpty(accessKey) || Strings.isNullOrEmpty(secretKey)) {
providersList.add(new InstanceProfileCredentialsProvider());
} else {
final AWSCredentials credentials = new BasicAWSCredentials(accessKey, secretKey);
providersList.add(new AWSCredentialsProvider() {
@Override
public AWSCredentials getCredentials() {
return credentials;
}
@Override
public void refresh() {
}
});
}
AWSCredentialsProvider[] providerArray = providersList.toArray(
new AWSCredentialsProvider[providersList.size()]
);
AWSCredentialsProviderChain providerChain = new AWSCredentialsProviderChain(providerArray);
providerChain.setReuseLastProvider(true);
return providerChain;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy