All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.epam.deltix.util.s3.S3DataStore Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2023 EPAM Systems, Inc
 *
 * See the NOTICE file distributed with this work for additional information
 * regarding copyright ownership. Licensed under the Apache License,
 * Version 2.0 (the "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.epam.deltix.util.s3;

import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.SdkClientException;
import com.amazonaws.auth.AWSStaticCredentialsProvider;
import com.amazonaws.auth.BasicAWSCredentials;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3ClientBuilder;
import com.amazonaws.services.s3.model.*;
import com.amazonaws.services.s3.transfer.TransferManager;
import com.amazonaws.services.s3.transfer.TransferManagerBuilder;
import com.amazonaws.services.s3.transfer.Upload;
import com.epam.deltix.gflog.api.Log;
import com.epam.deltix.gflog.api.LogFactory;
import com.epam.deltix.util.collections.Visitor;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.*;

/**
 * S3DataStore - used for uploading data files to S3 store
 */
public class S3DataStore {
    public static final String KEY_DELIMITER = "/";

    protected static final Log LOG = LogFactory.getLog(S3DataStore.class);

    private String bucket;
    private int maxBatchSize;
    private long maxBatchTime;

    private AmazonS3 s3Client;
    private TransferManager transferManager;

    public S3DataStore(S3StorageOptions options) {
        this.bucket = options.getBucket();
        this.maxBatchSize = options.getMaxBatchSize();
        this.maxBatchTime = options.getMaxBatchTime();
        this.s3Client = getS3Client(options);
        this.transferManager = TransferManagerBuilder.standard().withS3Client(s3Client).build();
    }

    public S3Writer createWriter(String dataKey) {
        return new S3GzipWriter(this, dataKey, maxBatchSize, maxBatchTime);
    }

    public S3Reader createReader(String dataKey) throws IOException {
        return new S3GzipReader(this, dataKey);
    }

    public void upload(String keyName, InputStream dataIn, Map metadata) throws IOException, InterruptedException {
        long beforeTime = System.currentTimeMillis();
        int contentLength = dataIn.available();

        for (int attemptsCount = 0; attemptsCount < 2; attemptsCount++) {
            if (attemptsCount > 0) {
                LOG.warn("Retrying %s upload").with(keyName);
                dataIn.reset();
            }
            ObjectMetadata objMetadata = new ObjectMetadata();
            objMetadata.setContentLength(contentLength);
            if (metadata != null)
                objMetadata.setUserMetadata(metadata);

            Upload upload = transferManager.upload(bucket, keyName, dataIn, objMetadata);
            try {
                upload.waitForCompletion();
                break;
            }
            catch (AmazonClientException ex) {
                LOG.warn("Upload failed with error: %s").with(ex.getMessage());
                if (attemptsCount > 0) throw ex;
            }
        }

        long afterTime = System.currentTimeMillis();
        LOG.trace("Uploaded %s Mb in %s sec to %s").with(contentLength/1000/1000.0).with((afterTime-beforeTime)/1000.0).with(keyName);
    }

    // downloads object to output stream and returns its user metadata
    public Map download(String keyName, OutputStream out) throws IOException {
        S3Object obj = s3Client.getObject(bucket, keyName);
        try (S3ObjectInputStream objStream = obj.getObjectContent()) {
            byte[] read_buf = new byte[1024];
            int read_len = 0;
            while ((read_len = objStream.read(read_buf)) > 0) {
                out.write(read_buf, 0, read_len);
            }
        }
        finally {
            out.close();
        }
        return obj.getObjectMetadata().getUserMetadata();
    }

    // creates or updates an empty object with value stored in its metadata
    public void setMetadata(String dataKey, String key, String value) {
        ObjectMetadata metadata = new ObjectMetadata();
        metadata.setContentLength(0);
        metadata.addUserMetadata(key, value);
        InputStream emptyContent = new ByteArrayInputStream(new byte[0]);

        PutObjectRequest putObjectRequest = new PutObjectRequest(bucket, dataKey, emptyContent, metadata);
        s3Client.putObject(putObjectRequest);

        /*
        ObjectMetadata metadata = new ObjectMetadata();
        metadata.addUserMetadata(metadataKey, value);
        CopyObjectRequest request = new CopyObjectRequest(bucket, objectKey, bucket, objectKey);
        request.setNewObjectMetadata(metadata);
        s3Client.copyObject(request);
         */
    }

    public boolean objectExists(String dataKey) {
        return s3Client.doesObjectExist(bucket, dataKey);
    }

    // returns
    public Map getMetadata(String dataKey) {
        if (!s3Client.doesObjectExist(bucket, dataKey))
            return null;

        ObjectMetadata metadata = s3Client.getObjectMetadata(bucket, dataKey);
        return metadata.getUserMetadata();
    }

    public List getObjectKeys(String keyPrefix, String keySuffix) {
        ArrayList keys = new ArrayList<>();
        ListObjectsV2Request req = new ListObjectsV2Request().withBucketName(bucket).withMaxKeys(1000).withPrefix(keyPrefix);
        ListObjectsV2Result result;
        do {
            result = s3Client.listObjectsV2(req);
            for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
                String key = objectSummary.getKey();
                if (keySuffix != null && key.endsWith(keySuffix))
                    keys.add(objectSummary.getKey());
            }
            req.setContinuationToken(result.getNextContinuationToken());
        } while (result.isTruncated());

        return keys;
    }

    /**
     * @param dataKey S3 Key that corresponds to the folder containing partitions
     * @return List of keys that correspond to S3 top level partition folders
     */
    public Set getPartitions(String dataKey, String extension) {
        assert !dataKey.endsWith(KEY_DELIMITER);
        String keyPrefix = dataKey + KEY_DELIMITER;

        Set keys = new HashSet<>();
        ListObjectsV2Request req = new ListObjectsV2Request().withBucketName(bucket).withMaxKeys(1000).withPrefix(keyPrefix);
        ListObjectsV2Result result;
        do {
            result = s3Client.listObjectsV2(req);
            for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
                String key = objectSummary.getKey();
                if (key.endsWith(extension)) {
                    int index = key.indexOf(KEY_DELIMITER, keyPrefix.length());
                    if (index > 0)
                        keys.add(key.substring(0, index));
                }
            }
            req.setContinuationToken(result.getNextContinuationToken());
        } while (result.isTruncated());

        return keys;
    }

    public void visitObjectKeys(String keyPrefix, Visitor visitor) {
        ListObjectsV2Request req = new ListObjectsV2Request().withBucketName(bucket).withMaxKeys(1000).withPrefix(keyPrefix);
        ListObjectsV2Result result;
        do {
            result = s3Client.listObjectsV2(req);

            for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
                String key = objectSummary.getKey();
                if (!visitor.visit(key))
                    return;
            }
            req.setContinuationToken(result.getNextContinuationToken());
        } while (result.isTruncated());
    }

    private static AmazonS3 getS3Client(S3StorageOptions options) {
        try {
            BasicAWSCredentials credentials = new BasicAWSCredentials(options.getAccessKeyId(), options.getAccessKey());
            return AmazonS3ClientBuilder.standard()
                    .withCredentials(new AWSStaticCredentialsProvider(credentials))
                    .withRegion(options.getRegion())
                    .build();
        }
        catch (AmazonServiceException ex) {
            throw new RuntimeException("Failed to login to AWS", ex);
        }
        catch (SdkClientException ex) {
            throw new RuntimeException("Failed to connect to AWS", ex);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy