All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.qubole.presto.kinesis.s3config.S3TableConfigClient Maven / Gradle / Ivy

There is a newer version: 1.3.0
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.qubole.presto.kinesis.s3config;

import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.AmazonS3URI;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectSummary;

import com.qubole.presto.kinesis.ConnectorShutdown;
import com.qubole.presto.kinesis.KinesisClientProvider;
import com.qubole.presto.kinesis.KinesisConnectorConfig;
import com.qubole.presto.kinesis.KinesisStreamDescription;
import com.facebook.presto.spi.SchemaTableName;

import com.google.common.collect.ImmutableMap;
import io.airlift.json.JsonCodec;
import io.airlift.log.Logger;
import com.google.inject.Inject;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;

import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import static java.util.Objects.requireNonNull;

import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;

/**
 * Utility class to retrieve table definitions from a common place on Amazon S3.
 *
 * This is so that we can add new tables in a central "metastore" location without
 * having to update every single node with the files.
 *
 * This makes calls to Amazon AWS using the S3 client.
 */
public class S3TableConfigClient implements ConnectorShutdown
{
    private static final Logger log = Logger.get(S3TableConfigClient.class);

    public final KinesisConnectorConfig kinesisConnectorConfig;
    private final KinesisClientProvider clientManager;
    private final JsonCodec streamDescriptionCodec;

    private final String bucketUrl;
    private long lastCheck = 0;
    private ScheduledFuture updateTaskHandle = null;

    private Map internalMap =
            Collections.synchronizedMap(new HashMap());

    @Inject
    public S3TableConfigClient(KinesisConnectorConfig aConnectorConfig,
                               KinesisClientProvider aClientManager,
                               JsonCodec jsonCodec)
    {
        this.kinesisConnectorConfig = requireNonNull(aConnectorConfig, "connector configuration object is null");
        this.clientManager = requireNonNull(aClientManager, "client manager object is null");
        this.streamDescriptionCodec = requireNonNull(jsonCodec, "JSON codec object is null");

        // If using S3 start thread that periodically looks for updates
        this.bucketUrl = this.kinesisConnectorConfig.getTableDescriptionsS3();
        if (!this.bucketUrl.isEmpty()) {
            startS3Updates();
        }
    }

    /** Indicates this class is being used and actively reading table definitions from S3. */
    public boolean isUsingS3()
    {
        return !this.bucketUrl.isEmpty();
    }

    /**
     * Main entry point to get table definitions from S3 using bucket and object directory
     * given in the configuration.
     *
     * For safety, an immutable copy built from the internal map is returned.
     *
     * @return
     */
    public Map getTablesFromS3()
    {
        Collection streamValues = this.internalMap.values();
        ImmutableMap.Builder builder = ImmutableMap.builder();
        for (KinesisStreamDescription stream : streamValues) {
            builder.put(new SchemaTableName(stream.getSchemaName(), stream.getTableName()), stream);
        }
        return builder.build();
    }

    /** Shutdown any periodic update jobs. */
    @Override
    public void shutdown()
    {
        if (isUsingS3() && updateTaskHandle != null) {
            updateTaskHandle.cancel(true);
        }
        return;
    }

    protected void startS3Updates()
    {
        ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor();
        this.updateTaskHandle =
                scheduler.scheduleAtFixedRate(() -> updateTablesFromS3(), 5, 600, TimeUnit.SECONDS);
        return;
    }

    /**
     * Call S3 to get the most recent object list.
     *
     * This is an object list request to AWS in the given "directory".
     *
     * @return
     */
    protected List getObjectSummaries()
    {
        AmazonS3Client s3client = this.clientManager.getS3Client();
        AmazonS3URI directoryURI = new AmazonS3URI(this.bucketUrl);

        ArrayList returnList = new ArrayList();
        try {
            log.info("Getting the listing of objects in the S3 table config directory: bucket %s prefix %s :", directoryURI.getBucket(), directoryURI.getKey());
            ListObjectsRequest req = new ListObjectsRequest().withBucketName(directoryURI.getBucket()).
                    withPrefix(directoryURI.getKey() + "/").withDelimiter("/").withMaxKeys(25);
            ObjectListing result;

            do {
                result = s3client.listObjects(req);

                returnList.addAll(result.getObjectSummaries());
                req.setMarker(result.getNextMarker());
            }
            while(result.isTruncated());

            log.info("Completed getting S3 object listing.");
        }
        catch (AmazonServiceException ase) {
            StringBuilder sb = new StringBuilder();
            sb.append("Caught an AmazonServiceException, which means your request made it ");
            sb.append("to Amazon S3, but was rejected with an error response for some reason.\n");
            sb.append("Error Message:    " + ase.getMessage());
            sb.append("HTTP Status Code: " + ase.getStatusCode());
            sb.append("AWS Error Code:   " + ase.getErrorCode());
            sb.append("Error Type:       " + ase.getErrorType());
            sb.append("Request ID:       " + ase.getRequestId());
            log.error(sb.toString(), ase);
        }
        catch (AmazonClientException ace) {
            StringBuilder sb = new StringBuilder();
            sb.append("Caught an AmazonClientException, " +
                    "which means the client encountered " +
                    "an internal error while trying to communicate" +
                    " with S3, " +
                    "such as not being able to access the network.");
            sb.append("Error Message: " + ace.getMessage());
            log.error(sb.toString(), ace);
        }

        return returnList;
    }

    /**
     * Connect to S3 directory to look for new or updated table definitions and then
     * update the map.
     */
    protected void updateTablesFromS3()
    {
        long now = System.currentTimeMillis();

        List objectList = this.getObjectSummaries();
        AmazonS3Client s3client = this.clientManager.getS3Client();
        AmazonS3URI directoryURI = new AmazonS3URI(this.bucketUrl);

        for (S3ObjectSummary objInfo : objectList) {
            if (!this.internalMap.containsKey(objInfo.getKey()) || objInfo.getLastModified().getTime() >= this.lastCheck) {
                // New or updated file, so we must read from AWS
                try {
                    if (objInfo.getKey().endsWith("/")) {
                        continue;
                    }

                    log.info("Getting : %s - %s", objInfo.getBucketName(), objInfo.getKey());
                    S3Object object = s3client.getObject(
                            new GetObjectRequest(objInfo.getBucketName(), objInfo.getKey()));

                    StringBuilder resultStr = new StringBuilder("");
                    try (BufferedReader reader = new BufferedReader(new InputStreamReader(object.getObjectContent()))) {
                        boolean hasMore = true;
                        while (hasMore) {
                            String line = reader.readLine();
                            if (line != null) {
                                resultStr.append(line);
                            }
                            else {
                                hasMore = false;
                            }
                        }

                        KinesisStreamDescription table = streamDescriptionCodec.fromJson(resultStr.toString());

                        internalMap.put(objInfo.getKey(), table);
                        log.info("Put table description into the map from %s", objInfo.getKey());
                    }
                    catch (IOException iox) {
                        log.error("Problem reading input stream from object.", iox);
                    }
                }
                catch (AmazonServiceException ase) {
                    StringBuilder sb = new StringBuilder();
                    sb.append("Caught an AmazonServiceException, which means your request made it ");
                    sb.append("to Amazon S3, but was rejected with an error response for some reason.\n");
                    sb.append("Error Message:    " + ase.getMessage());
                    sb.append("HTTP Status Code: " + ase.getStatusCode());
                    sb.append("AWS Error Code:   " + ase.getErrorCode());
                    sb.append("Error Type:       " + ase.getErrorType());
                    sb.append("Request ID:       " + ase.getRequestId());
                    log.error(sb.toString(), ase);
                }
                catch (AmazonClientException ace) {
                    StringBuilder sb = new StringBuilder();
                    sb.append("Caught an AmazonClientException, " +
                            "which means the client encountered " +
                            "an internal error while trying to communicate" +
                            " with S3, " +
                            "such as not being able to access the network.");
                    sb.append("Error Message: " + ace.getMessage());
                    log.error(sb.toString(), ace);
                }
            }
        } // end loop through object descriptions

        log.info("Completed updating table definitions from S3.");
        this.lastCheck = now;

        return;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy