org.apache.jackrabbit.aws.ext.ds.S3Backend Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jackrabbit-aws-ext Show documentation
Show all versions of jackrabbit-aws-ext Show documentation
Jackrabbit extenstion to Amazon Webservices
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.aws.ext.ds;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.jackrabbit.aws.ext.S3Constants;
import org.apache.jackrabbit.aws.ext.S3RequestDecorator;
import org.apache.jackrabbit.aws.ext.Utils;
import org.apache.jackrabbit.core.data.AbstractBackend;
import org.apache.jackrabbit.core.data.AsyncTouchCallback;
import org.apache.jackrabbit.core.data.AsyncTouchResult;
import org.apache.jackrabbit.core.data.AsyncUploadCallback;
import org.apache.jackrabbit.core.data.AsyncUploadResult;
import org.apache.jackrabbit.core.data.CachingDataStore;
import org.apache.jackrabbit.core.data.DataIdentifier;
import org.apache.jackrabbit.core.data.DataStoreException;
import org.apache.jackrabbit.core.data.util.NamedThreadFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.event.ProgressEvent;
import com.amazonaws.event.ProgressListener;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.model.CopyObjectRequest;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.DeleteObjectsResult;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.services.s3.model.Region;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectInputStream;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.amazonaws.services.s3.transfer.Copy;
import com.amazonaws.services.s3.transfer.TransferManager;
import com.amazonaws.services.s3.transfer.Upload;
import com.amazonaws.util.StringUtils;
/**
* A data store backend that stores data on Amazon S3.
*/
public class S3Backend extends AbstractBackend {
/**
* Logger instance.
*/
private static final Logger LOG = LoggerFactory.getLogger(S3Backend.class);
private static final String KEY_PREFIX = "dataStore_";
private AmazonS3Client s3service;
private String bucket;
private TransferManager tmx;
private Properties properties;
private Date startTime;
private S3RequestDecorator s3ReqDecorator;
/**
* Initialize S3Backend. It creates AmazonS3Client and TransferManager from
* aws.properties. It creates S3 bucket if it doesn't pre-exist in S3.
*/
@Override
public void init(CachingDataStore store, String homeDir, String config)
throws DataStoreException {
super.init(store, homeDir, config);
Properties initProps = null;
//Check is configuration is already provided. That takes precedence
//over config provided via file based config
if(this.properties != null){
initProps = this.properties;
} else {
if(config == null){
config = Utils.DEFAULT_CONFIG_FILE;
}
try{
initProps = Utils.readConfig(config);
}catch(IOException e){
throw new DataStoreException("Could not initialize S3 from "
+ config, e);
}
this.properties = initProps;
}
init(store, homeDir, initProps);
}
public void init(CachingDataStore store, String homeDir, Properties prop)
throws DataStoreException {
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
startTime = new Date();
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
LOG.debug("init");
setDataStore(store);
s3ReqDecorator = new S3RequestDecorator(prop);
s3service = Utils.openService(prop);
if (bucket == null || "".equals(bucket.trim())) {
bucket = prop.getProperty(S3Constants.S3_BUCKET);
}
String region = prop.getProperty(S3Constants.S3_REGION);
Region s3Region = null;
if (StringUtils.isNullOrEmpty(region)) {
com.amazonaws.regions.Region ec2Region = Regions.getCurrentRegion();
if (ec2Region != null) {
s3Region = Region.fromValue(ec2Region.getName());
} else {
throw new AmazonClientException(
"parameter ["
+ S3Constants.S3_REGION
+ "] not configured and cannot be derived from environment");
}
} else {
if (Utils.DEFAULT_AWS_BUCKET_REGION.equals(region)) {
s3Region = Region.US_Standard;
} else if (Region.EU_Ireland.toString().equals(region)) {
s3Region = Region.EU_Ireland;
} else {
s3Region = Region.fromValue(region);
}
}
if (!s3service.doesBucketExist(bucket)) {
s3service.createBucket(bucket, s3Region);
LOG.info("Created bucket [{}] in [{}] ", bucket, region);
} else {
LOG.info("Using bucket [{}] in [{}] ", bucket, region);
}
int writeThreads = 10;
String writeThreadsStr = prop.getProperty(S3Constants.S3_WRITE_THREADS);
if (writeThreadsStr != null) {
writeThreads = Integer.parseInt(writeThreadsStr);
}
LOG.info("Using thread pool of [{}] threads in S3 transfer manager.", writeThreads);
tmx = new TransferManager(s3service,
(ThreadPoolExecutor) Executors.newFixedThreadPool(writeThreads,
new NamedThreadFactory("s3-transfer-manager-worker")));
int asyncWritePoolSize = 10;
String maxConnsStr = prop.getProperty(S3Constants.S3_MAX_CONNS);
if (maxConnsStr != null) {
asyncWritePoolSize = Integer.parseInt(maxConnsStr)
- writeThreads;
}
setAsyncWritePoolSize(asyncWritePoolSize);
String renameKeyProp = prop.getProperty(S3Constants.S3_RENAME_KEYS);
boolean renameKeyBool = (renameKeyProp == null || "".equals(renameKeyProp))
? false
: Boolean.parseBoolean(renameKeyProp);
LOG.info("Rename keys [{}]", renameKeyBool);
if (renameKeyBool) {
renameKeys();
}
LOG.debug("S3 Backend initialized in [{}] ms",
+(System.currentTimeMillis() - startTime.getTime()));
} catch (Exception e) {
LOG.debug(" error ", e);
throw new DataStoreException("Could not initialize S3 from "
+ prop, e);
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
}
/**
* It uploads file to Amazon S3. If file size is greater than 5MB, this
* method uses parallel concurrent connections to upload.
*/
@Override
public void write(DataIdentifier identifier, File file)
throws DataStoreException {
this.write(identifier, file, false, null);
}
@Override
public void writeAsync(DataIdentifier identifier, File file,
AsyncUploadCallback callback) throws DataStoreException {
if (callback == null) {
throw new IllegalArgumentException(
"callback parameter cannot be null in asyncUpload");
}
getAsyncWriteExecutor().execute(new AsyncUploadJob(identifier, file,
callback));
}
/**
* Check if record identified by identifier exists in Amazon S3.
*/
@Override
public boolean exists(DataIdentifier identifier) throws DataStoreException {
long start = System.currentTimeMillis();
String key = getKeyName(identifier);
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
ObjectMetadata objectMetaData = s3service.getObjectMetadata(bucket,
key);
if (objectMetaData != null) {
LOG.trace("exists [{}]: [true] took [{}] ms.",
identifier, (System.currentTimeMillis() - start) );
return true;
}
return false;
} catch (AmazonServiceException e) {
if (e.getStatusCode() == 404 || e.getStatusCode() == 403) {
LOG.debug("exists [{}]: [false] took [{}] ms.",
identifier, (System.currentTimeMillis() - start) );
return false;
}
throw new DataStoreException(
"Error occured to getObjectMetadata for key ["
+ identifier.toString() + "]", e);
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
}
@Override
public boolean exists(DataIdentifier identifier, boolean touch)
throws DataStoreException {
long start = System.currentTimeMillis();
String key = getKeyName(identifier);
ObjectMetadata objectMetaData = null;
boolean retVal = false;
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
objectMetaData = s3service.getObjectMetadata(bucket, key);
if (objectMetaData != null) {
retVal = true;
if (touch) {
CopyObjectRequest copReq = new CopyObjectRequest(bucket,
key, bucket, key);
copReq.setNewObjectMetadata(objectMetaData);
Copy copy = tmx.copy(s3ReqDecorator.decorate(copReq));
copy.waitForCopyResult();
LOG.debug("[{}] touched took [{}] ms. ", identifier,
(System.currentTimeMillis() - start));
}
} else {
retVal = false;
}
} catch (AmazonServiceException e) {
if (e.getStatusCode() == 404 || e.getStatusCode() == 403) {
retVal = false;
} else {
throw new DataStoreException(
"Error occured to find exists for key ["
+ identifier.toString() + "]", e);
}
} catch (Exception e) {
throw new DataStoreException(
"Error occured to find exists for key "
+ identifier.toString(), e);
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
LOG.debug("exists [{}]: [{}] took [{}] ms.", new Object[] { identifier,
retVal, (System.currentTimeMillis() - start) });
return retVal;
}
@Override
public void touchAsync(final DataIdentifier identifier,
final long minModifiedDate, final AsyncTouchCallback callback)
throws DataStoreException {
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
if (callback == null) {
throw new IllegalArgumentException(
"callback parameter cannot be null in touchAsync");
}
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
getAsyncWriteExecutor().execute(new Runnable() {
@Override
public void run() {
try {
touch(identifier, minModifiedDate);
callback.onSuccess(new AsyncTouchResult(identifier));
} catch (DataStoreException e) {
AsyncTouchResult result = new AsyncTouchResult(
identifier);
result.setException(e);
callback.onFailure(result);
}
}
});
} catch (Exception e) {
callback.onAbort(new AsyncTouchResult(identifier));
throw new DataStoreException("Cannot touch the record "
+ identifier.toString(), e);
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
}
@Override
public void touch(DataIdentifier identifier, long minModifiedDate)
throws DataStoreException {
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
final long start = System.currentTimeMillis();
final String key = getKeyName(identifier);
if (minModifiedDate > 0
&& minModifiedDate > getLastModified(identifier)) {
CopyObjectRequest copReq = new CopyObjectRequest(bucket, key,
bucket, key);
copReq.setNewObjectMetadata(new ObjectMetadata());
Copy copy = tmx.copy(s3ReqDecorator.decorate(copReq));
copy.waitForCompletion();
LOG.debug("[{}] touched. time taken [{}] ms ", new Object[] {
identifier, (System.currentTimeMillis() - start) });
} else {
LOG.trace("[{}] touch not required. time taken [{}] ms ",
new Object[] { identifier,
(System.currentTimeMillis() - start) });
}
} catch (Exception e) {
throw new DataStoreException("Error occured in touching key ["
+ identifier.toString() + "]", e);
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
}
@Override
public InputStream read(DataIdentifier identifier)
throws DataStoreException {
long start = System.currentTimeMillis();
String key = getKeyName(identifier);
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
S3Object object = s3service.getObject(bucket, key);
S3ObjectInputStream s3in = object.getObjectContent();
InputStream in = new S3BackendResourceAbortableInputStream(s3in);
LOG.debug("[{}] read took [{}]ms", identifier,
(System.currentTimeMillis() - start));
return in;
} catch (AmazonServiceException e) {
throw new DataStoreException("Object not found: " + key, e);
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
}
@Override
public Iterator getAllIdentifiers()
throws DataStoreException {
long start = System.currentTimeMillis();
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
Set ids = new HashSet();
ObjectListing prevObjectListing = s3service.listObjects(bucket);
while (true) {
for (S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) {
String id = getIdentifierName(s3ObjSumm.getKey());
if (id != null) {
ids.add(new DataIdentifier(id));
}
}
if (!prevObjectListing.isTruncated()) break;
prevObjectListing = s3service.listNextBatchOfObjects(prevObjectListing);
}
LOG.debug("getAllIdentifiers returned size [{}] took [{}] ms.",
ids.size(), (System.currentTimeMillis() - start));
return ids.iterator();
} catch (AmazonServiceException e) {
throw new DataStoreException("Could not list objects", e);
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
}
@Override
public long getLastModified(DataIdentifier identifier)
throws DataStoreException {
long start = System.currentTimeMillis();
String key = getKeyName(identifier);
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
ObjectMetadata object = s3service.getObjectMetadata(bucket, key);
long lastModified = object.getLastModified().getTime();
LOG.debug(
"Identifier [{}]'s lastModified = [{}] took [{}]ms.",
new Object[] { identifier, lastModified,
(System.currentTimeMillis() - start) });
return lastModified;
} catch (AmazonServiceException e) {
if (e.getStatusCode() == 404 || e.getStatusCode() == 403) {
LOG.info(
"getLastModified:Identifier [{}] not found. Took [{}] ms.",
identifier, (System.currentTimeMillis() - start));
}
throw new DataStoreException(e);
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
}
@Override
public long getLength(DataIdentifier identifier) throws DataStoreException {
long start = System.currentTimeMillis();
String key = getKeyName(identifier);
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
ObjectMetadata object = s3service.getObjectMetadata(bucket, key);
long length = object.getContentLength();
LOG.debug("Identifier [{}]'s length = [{}] took [{}]ms.",
new Object[] { identifier, length,
(System.currentTimeMillis() - start) });
return length;
} catch (AmazonServiceException e) {
throw new DataStoreException("Could not length of dataIdentifier "
+ identifier, e);
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
}
@Override
public void deleteRecord(DataIdentifier identifier)
throws DataStoreException {
long start = System.currentTimeMillis();
String key = getKeyName(identifier);
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
s3service.deleteObject(bucket, key);
LOG.debug("Identifier [{}] deleted. It took [{}]ms.", new Object[] {
identifier, (System.currentTimeMillis() - start) });
} catch (AmazonServiceException e) {
throw new DataStoreException(
"Could not getLastModified of dataIdentifier " + identifier, e);
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
}
@Override
public Set deleteAllOlderThan(long min)
throws DataStoreException {
long start = System.currentTimeMillis();
// S3 stores lastModified to lower boundary of timestamp in ms.
// and hence min is reduced by 1000ms.
min = min - 1000;
Set deleteIdSet = new HashSet(30);
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
ObjectListing prevObjectListing = s3service.listObjects(bucket);
while (true) {
List deleteList = new ArrayList();
for (S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) {
DataIdentifier identifier = new DataIdentifier(
getIdentifierName(s3ObjSumm.getKey()));
long lastModified = s3ObjSumm.getLastModified().getTime();
LOG.debug("Identifier [{}]'s lastModified = [{}]", identifier, lastModified);
if (lastModified < min
&& getDataStore().confirmDelete(identifier)
// confirm once more that record's lastModified < min
// order is important here
&& s3service.getObjectMetadata(bucket,
s3ObjSumm.getKey()).getLastModified().getTime() < min) {
getDataStore().deleteFromCache(identifier);
LOG.debug("add id [{}] to delete lists",
s3ObjSumm.getKey());
deleteList.add(new DeleteObjectsRequest.KeyVersion(
s3ObjSumm.getKey()));
deleteIdSet.add(identifier);
}
}
if (deleteList.size() > 0) {
DeleteObjectsRequest delObjsReq = new DeleteObjectsRequest(
bucket);
delObjsReq.setKeys(deleteList);
DeleteObjectsResult dobjs = s3service.deleteObjects(delObjsReq);
if (dobjs.getDeletedObjects().size() != deleteList.size()) {
throw new DataStoreException(
"Incomplete delete object request. only "
+ dobjs.getDeletedObjects().size() + " out of "
+ deleteList.size() + " are deleted");
} else {
LOG.debug("[{}] records deleted from datastore",
deleteList);
}
}
if (!prevObjectListing.isTruncated()) {
break;
}
prevObjectListing = s3service.listNextBatchOfObjects(prevObjectListing);
}
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
LOG.info(
"deleteAllOlderThan: min=[{}] exit. Deleted[{}] records. Number of records deleted [{}] took [{}]ms",
new Object[] { min, deleteIdSet, deleteIdSet.size(),
(System.currentTimeMillis() - start) });
return deleteIdSet;
}
@Override
public void close() throws DataStoreException {
super.close();
// backend is closing. abort all mulitpart uploads from start.
if(s3service.doesBucketExist(bucket)) {
tmx.abortMultipartUploads(bucket, startTime);
}
tmx.shutdownNow();
s3service.shutdown();
LOG.info("S3Backend closed.");
}
public String getBucket() {
return bucket;
}
public void setBucket(String bucket) {
this.bucket = bucket;
}
/**
* Properties used to configure the backend. If provided explicitly
* before init is invoked then these take precedence
*
* @param properties to configure S3Backend
*/
public void setProperties(Properties properties) {
this.properties = properties;
}
private void write(DataIdentifier identifier, File file,
boolean asyncUpload, AsyncUploadCallback callback)
throws DataStoreException {
String key = getKeyName(identifier);
ObjectMetadata objectMetaData = null;
long start = System.currentTimeMillis();
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
// check if the same record already exists
try {
objectMetaData = s3service.getObjectMetadata(bucket, key);
} catch (AmazonServiceException ase) {
if (!(ase.getStatusCode() == 404 || ase.getStatusCode() == 403)) {
throw ase;
}
}
if (objectMetaData != null) {
long l = objectMetaData.getContentLength();
if (l != file.length()) {
throw new DataStoreException("Collision: " + key
+ " new length: " + file.length() + " old length: " + l);
}
LOG.debug("[{}]'s exists, lastmodified = [{}]", key,
objectMetaData.getLastModified().getTime());
CopyObjectRequest copReq = new CopyObjectRequest(bucket, key,
bucket, key);
copReq.setNewObjectMetadata(objectMetaData);
Copy copy = tmx.copy(s3ReqDecorator.decorate(copReq));
try {
copy.waitForCopyResult();
LOG.debug("lastModified of [{}] updated successfully.", identifier);
if (callback != null) {
callback.onSuccess(new AsyncUploadResult(identifier, file));
}
}catch (Exception e2) {
AsyncUploadResult asyncUpRes= new AsyncUploadResult(identifier, file);
asyncUpRes.setException(e2);
if (callback != null) {
callback.onAbort(asyncUpRes);
}
throw new DataStoreException("Could not upload " + key, e2);
}
}
if (objectMetaData == null) {
try {
// start multipart parallel upload using amazon sdk
Upload up = tmx.upload(s3ReqDecorator.decorate(new PutObjectRequest(
bucket, key, file)));
// wait for upload to finish
if (asyncUpload) {
up.addProgressListener(new S3UploadProgressListener(up,
identifier, file, callback));
LOG.debug(
"added upload progress listener to identifier [{}]",
identifier);
} else {
up.waitForUploadResult();
LOG.debug("synchronous upload to identifier [{}] completed.", identifier);
if (callback != null) {
callback.onSuccess(new AsyncUploadResult(
identifier, file));
}
}
} catch (Exception e2 ) {
AsyncUploadResult asyncUpRes= new AsyncUploadResult(identifier, file);
asyncUpRes.setException(e2);
if (callback != null) {
callback.onAbort(asyncUpRes);
}
throw new DataStoreException("Could not upload " + key, e2);
}
}
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
LOG.debug(
"write of [{}], length=[{}], in async mode [{}], in [{}]ms",
new Object[] { identifier, file.length(), asyncUpload,
(System.currentTimeMillis() - start) });
}
/**
* This method rename object keys in S3 concurrently. The number of
* concurrent threads is defined by 'maxConnections' property in
* aws.properties. As S3 doesn't have "move" command, this method simulate
* move as copy object object to new key and then delete older key.
*/
private void renameKeys() throws DataStoreException {
long startTime = System.currentTimeMillis();
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
long count = 0;
try {
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
ObjectListing prevObjectListing = s3service.listObjects(bucket);
List deleteList = new ArrayList();
int nThreads = Integer.parseInt(properties.getProperty("maxConnections"));
ExecutorService executor = Executors.newFixedThreadPool(nThreads,
new NamedThreadFactory("s3-object-rename-worker"));
boolean taskAdded = false;
while (true) {
for (S3ObjectSummary s3ObjSumm : prevObjectListing.getObjectSummaries()) {
executor.execute(new KeyRenameThread(s3ObjSumm.getKey()));
taskAdded = true;
count++;
// delete the object if it follows old key name format
if( s3ObjSumm.getKey().startsWith(KEY_PREFIX)) {
deleteList.add(new DeleteObjectsRequest.KeyVersion(
s3ObjSumm.getKey()));
}
}
if (!prevObjectListing.isTruncated()) break;
prevObjectListing = s3service.listNextBatchOfObjects(prevObjectListing);
}
// This will make the executor accept no new threads
// and finish all existing threads in the queue
executor.shutdown();
try {
// Wait until all threads are finish
while (taskAdded
&& !executor.awaitTermination(10, TimeUnit.SECONDS)) {
LOG.info("Rename S3 keys tasks timedout. Waiting again");
}
} catch (InterruptedException ie) {
}
LOG.info("Renamed [{}] keys, time taken [{}]sec", count,
((System.currentTimeMillis() - startTime) / 1000));
// Delete older keys.
if (deleteList.size() > 0) {
DeleteObjectsRequest delObjsReq = new DeleteObjectsRequest(
bucket);
int batchSize = 500, startIndex = 0, size = deleteList.size();
int endIndex = batchSize < size ? batchSize : size;
while (endIndex <= size) {
delObjsReq.setKeys(Collections.unmodifiableList(deleteList.subList(
startIndex, endIndex)));
DeleteObjectsResult dobjs = s3service.deleteObjects(delObjsReq);
LOG.info(
"Records[{}] deleted in datastore from index [{}] to [{}]",
new Object[] { dobjs.getDeletedObjects().size(),
startIndex, (endIndex - 1) });
if (endIndex == size) {
break;
} else {
startIndex = endIndex;
endIndex = (startIndex + batchSize) < size
? (startIndex + batchSize)
: size;
}
}
}
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(contextClassLoader);
}
}
}
/**
* The method convert old key format to new format. For e.g. this method
* converts old key dataStore_004cb70c8f87d78f04da41e7547cb434094089ea to
* 004c-b70c8f87d78f04da41e7547cb434094089ea.
*/
private static String convertKey(String oldKey)
throws IllegalArgumentException {
if (!oldKey.startsWith(KEY_PREFIX)) {
return oldKey;
}
String key = oldKey.substring(KEY_PREFIX.length());
return key.substring(0, 4) + Utils.DASH + key.substring(4);
}
/**
* Get key from data identifier. Object is stored with key in S3.
*/
private static String getKeyName(DataIdentifier identifier) {
String key = identifier.toString();
return key.substring(0, 4) + Utils.DASH + key.substring(4);
}
/**
* Get data identifier from key.
*/
private static String getIdentifierName(String key) {
if (!key.contains(Utils.DASH)) {
return null;
}
return key.substring(0, 4) + key.substring(5);
}
/**
* The class renames object key in S3 in a thread.
*/
private class KeyRenameThread implements Runnable {
private String oldKey;
public void run() {
ClassLoader contextClassLoader = Thread.currentThread().getContextClassLoader();
try {
Thread.currentThread().setContextClassLoader(
getClass().getClassLoader());
String newS3Key = convertKey(oldKey);
CopyObjectRequest copReq = new CopyObjectRequest(bucket,
oldKey, bucket, newS3Key);
Copy copy = tmx.copy(s3ReqDecorator.decorate(copReq));
try {
copy.waitForCopyResult();
LOG.debug("[{}] renamed to [{}] ", oldKey, newS3Key);
} catch (InterruptedException ie) {
LOG.error(" Exception in renaming [{}] to [{}] ",
new Object[] { ie, oldKey, newS3Key });
}
} finally {
if (contextClassLoader != null) {
Thread.currentThread().setContextClassLoader(
contextClassLoader);
}
}
}
public KeyRenameThread(String oldKey) {
this.oldKey = oldKey;
}
}
/**
* Listener which receives callback on status of S3 upload.
*/
private class S3UploadProgressListener implements ProgressListener {
private File file;
private DataIdentifier identifier;
private AsyncUploadCallback callback;
private Upload upload;
public S3UploadProgressListener(Upload upload, DataIdentifier identifier, File file,
AsyncUploadCallback callback) {
super();
this.identifier = identifier;
this.file = file;
this.callback = callback;
this.upload = upload;
}
public void progressChanged(ProgressEvent progressEvent) {
switch (progressEvent.getEventCode()) {
case ProgressEvent.COMPLETED_EVENT_CODE:
callback.onSuccess(new AsyncUploadResult(identifier, file));
break;
case ProgressEvent.FAILED_EVENT_CODE:
AsyncUploadResult result = new AsyncUploadResult(
identifier, file);
try {
AmazonClientException e = upload.waitForException();
if (e != null) {
result.setException(e);
}
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
callback.onFailure(result);
break;
default:
break;
}
}
}
/**
* This class implements {@link Runnable} interface to upload {@link File}
* to S3 asynchronously.
*/
private class AsyncUploadJob implements Runnable {
private DataIdentifier identifier;
private File file;
private AsyncUploadCallback callback;
public AsyncUploadJob(DataIdentifier identifier, File file,
AsyncUploadCallback callback) {
super();
this.identifier = identifier;
this.file = file;
this.callback = callback;
}
public void run() {
try {
write(identifier, file, true, callback);
} catch (DataStoreException e) {
LOG.error("Could not upload [" + identifier + "], file[" + file
+ "]", e);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy