org.deeplearning4j.aws.s3.reader.S3Downloader Maven / Gradle / Ivy
package org.deeplearning4j.aws.s3.reader;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.io.IOUtils;
import org.deeplearning4j.aws.s3.BaseS3;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.Bucket;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectSummary;
/**
* Downloads files from S3
* @author Adam Gibson
*
*/
public class S3Downloader extends BaseS3 {
/**
* Returns the list of buckets in s3
* @return the list of buckets
*/
public List buckets() {
List ret = new ArrayList();
AmazonS3 s3 = getClient();
List buckets = s3.listBuckets();
for(Bucket b : buckets)
ret.add(b.getName());
return ret;
}
/**
* Iterate over individual buckets.
* Returns input streams to each object.
* It is your responsibility to close the input streams
* @param bucket the bucket to iterate over
* @return an iterator over the objects in an s3 bucket
*/
public Iterator iterateBucket(String bucket) {
return new BucketIterator(bucket,this);
}
/**
* Iterator style one list at a time
* @param list the list to getFromOrigin the next batch for
* @return the next batch of objects or null if
* none are left
*/
public ObjectListing nextList(ObjectListing list) {
AmazonS3 s3 = getClient();
if(list.isTruncated())
return s3.listNextBatchOfObjects(list);
return null;
}
/**
* Simple way of retrieving the listings for a bucket
* @param bucket the bucket to retrieve listings for
* @return the object listing for this bucket
*/
public ObjectListing listObjects(String bucket) {
AmazonS3 s3 = getClient();
ObjectListing list = s3.listObjects(bucket);
return list;
}
/**
* Paginates through a bucket's keys invoking the listener
* at each key
* @param bucket the bucket to iterate
* @param listener the listener
*/
public void paginate(String bucket,BucketKeyListener listener) {
AmazonS3 s3 = getClient();
ObjectListing list = s3.listObjects(bucket);
for(S3ObjectSummary summary : list.getObjectSummaries()) {
if(listener != null)
listener.onKey(s3, bucket, summary.getKey());
}
while(list.isTruncated()) {
list = s3.listNextBatchOfObjects(list);
for(S3ObjectSummary summary : list.getObjectSummaries()) {
if(listener != null)
listener.onKey(s3, bucket, summary.getKey());
}
}
}
/**
* Returns an input stream for the given bucket and key
* @param bucket the bucket to retrieve from
* @param key the key of the objec t
* @return an input stream to the object
*/
public InputStream objectForKey(String bucket,String key) {
AmazonS3 s3 = getClient();
S3Object obj = s3.getObject(bucket, key);
InputStream is = obj.getObjectContent();
return is;
}
public void download(String bucket,String key,File to) throws IOException {
AmazonS3 s3 = getClient();
S3Object obj = s3.getObject(bucket, key);
InputStream is = obj.getObjectContent();
BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(to));
IOUtils.copy(is, bos);
bos.close();
is.close();
obj.close();
}
public void download(String bucket,String key,OutputStream to) throws IOException {
AmazonS3 s3 = getClient();
S3Object obj = s3.getObject(bucket, key);
InputStream is = obj.getObjectContent();
BufferedOutputStream bos = new BufferedOutputStream(to);
IOUtils.copy(is, bos);
bos.close();
is.close();
obj.close();
}
}