
org.deeplearning4j.aws.s3.reader.S3Downloader Maven / Gradle / Ivy
/*
*
* * Copyright 2015 Skymind,Inc.
* *
* * Licensed under the Apache License, Version 2.0 (the "License");
* * you may not use this file except in compliance with the License.
* * You may obtain a copy of the License at
* *
* * http://www.apache.org/licenses/LICENSE-2.0
* *
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* * See the License for the specific language governing permissions and
* * limitations under the License.
*
*/
package org.deeplearning4j.aws.s3.reader;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import com.amazonaws.services.s3.model.*;
import com.amazonaws.services.s3.transfer.MultipleFileDownload;
import com.amazonaws.services.s3.transfer.TransferManager;
import org.apache.commons.io.IOUtils;
import org.deeplearning4j.aws.s3.BaseS3;
import com.amazonaws.services.s3.AmazonS3;
/**
* Downloads files from S3
* @author Adam Gibson
*
*/
public class S3Downloader extends BaseS3 {
/**
* Return the keys for a bucket
* @param bucket the bucket to get the keys for
* @return the bucket's keys
*/
public List keysForBucket(String bucket) {
AmazonS3 s3 = getClient();
List ret = new ArrayList<>();
ListObjectsRequest listObjectsRequest = new ListObjectsRequest()
.withBucketName(bucket);
ObjectListing objectListing;
do {
objectListing = s3.listObjects(listObjectsRequest);
for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
ret.add(objectSummary.getKey());
}
listObjectsRequest.setMarker(objectListing.getNextMarker());
} while (objectListing.isTruncated());
return ret;
}
/**
* Returns the list of buckets in s3
* @return the list of buckets
*/
public List buckets() {
List ret = new ArrayList<>();
AmazonS3 s3 = getClient();
List buckets = s3.listBuckets();
for(Bucket b : buckets)
ret.add(b.getName());
return ret;
}
/**
* Iterate over individual buckets.
* Returns input streams to each object.
* It is your responsibility to close the input streams
* @param bucket the bucket to iterate over
* @return an iterator over the objects in an s3 bucket
*/
public Iterator iterateBucket(String bucket) {
return new BucketIterator(bucket,this);
}
/**
* Iterator style one list at a time
* @param list the list to getFromOrigin the next batch for
* @return the next batch of objects or null if
* none are left
*/
public ObjectListing nextList(ObjectListing list) {
AmazonS3 s3 = getClient();
if(list.isTruncated())
return s3.listNextBatchOfObjects(list);
return null;
}
/**
* Simple way of retrieving the listings for a bucket
* @param bucket the bucket to retrieve listings for
* @return the object listing for this bucket
*/
public ObjectListing listObjects(String bucket) {
AmazonS3 s3 = getClient();
ObjectListing list = s3.listObjects(bucket);
return list;
}
/**
* Paginates through a bucket's keys invoking the listener
* at each key
* @param bucket the bucket to iterate
* @param listener the listener
*/
public void paginate(String bucket,BucketKeyListener listener) {
AmazonS3 s3 = getClient();
ObjectListing list = s3.listObjects(bucket);
for(S3ObjectSummary summary : list.getObjectSummaries()) {
if(listener != null)
listener.onKey(s3, bucket, summary.getKey());
}
while(list.isTruncated()) {
list = s3.listNextBatchOfObjects(list);
for(S3ObjectSummary summary : list.getObjectSummaries()) {
if(listener != null)
listener.onKey(s3, bucket, summary.getKey());
}
}
}
/**
* Returns an input stream for the given bucket and key
* @param bucket the bucket to retrieve from
* @param key the key of the objec t
* @return an input stream to the object
*/
public InputStream objectForKey(String bucket,String key) {
AmazonS3 s3 = getClient();
S3Object obj = s3.getObject(bucket, key);
InputStream is = obj.getObjectContent();
return is;
}
public void download(String bucket,String key,File to) throws IOException {
AmazonS3 s3 = getClient();
S3Object obj = s3.getObject(bucket, key);
InputStream is = obj.getObjectContent();
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(to));
IOUtils.copy(is, bos);
bos.close();
is.close();
obj.close();
}
public void download(String bucket,String key,OutputStream to) throws IOException {
AmazonS3 s3 = getClient();
S3Object obj = s3.getObject(bucket, key);
InputStream is = obj.getObjectContent();
BufferedOutputStream bos = new BufferedOutputStream(to);
IOUtils.copy(is, bos);
bos.close();
is.close();
obj.close();
}
public MultipleFileDownload downloadFolder(String bucketName, String keyPrefix, File folderPath) {
TransferManager transfer = new TransferManager(getClient());
return transfer.downloadDirectory(bucketName, keyPrefix, folderPath);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy