
com.qubole.presto.kinesis.s3config.S3TableConfigClient Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of kinesis Show documentation
Show all versions of kinesis Show documentation
Presto - Kinesis Connector
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.qubole.presto.kinesis.s3config;
import com.amazonaws.AmazonClientException;
import com.amazonaws.AmazonServiceException;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.AmazonS3URI;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.qubole.presto.kinesis.ConnectorShutdown;
import com.qubole.presto.kinesis.KinesisClientProvider;
import com.qubole.presto.kinesis.KinesisConnectorConfig;
import com.qubole.presto.kinesis.KinesisStreamDescription;
import com.facebook.presto.spi.SchemaTableName;
import com.google.common.collect.ImmutableMap;
import io.airlift.json.JsonCodec;
import io.airlift.log.Logger;
import com.google.inject.Inject;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import static java.util.Objects.requireNonNull;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
/**
* Utility class to retrieve table definitions from a common place on Amazon S3.
*
* This is so that we can add new tables in a central "metastore" location without
* having to update every single node with the files.
*
* This makes calls to Amazon AWS using the S3 client.
*/
public class S3TableConfigClient implements ConnectorShutdown
{
private static final Logger log = Logger.get(S3TableConfigClient.class);
public final KinesisConnectorConfig kinesisConnectorConfig;
private final KinesisClientProvider clientManager;
private final JsonCodec streamDescriptionCodec;
private final String bucketUrl;
private long lastCheck = 0;
private ScheduledFuture> updateTaskHandle = null;
private Map internalMap =
Collections.synchronizedMap(new HashMap());
@Inject
public S3TableConfigClient(KinesisConnectorConfig aConnectorConfig,
KinesisClientProvider aClientManager,
JsonCodec jsonCodec)
{
this.kinesisConnectorConfig = requireNonNull(aConnectorConfig, "connector configuration object is null");
this.clientManager = requireNonNull(aClientManager, "client manager object is null");
this.streamDescriptionCodec = requireNonNull(jsonCodec, "JSON codec object is null");
// If using S3 start thread that periodically looks for updates
this.bucketUrl = this.kinesisConnectorConfig.getTableDescriptionsS3();
if (!this.bucketUrl.isEmpty()) {
startS3Updates();
}
}
/** Indicates this class is being used and actively reading table definitions from S3. */
public boolean isUsingS3()
{
return !this.bucketUrl.isEmpty();
}
/**
* Main entry point to get table definitions from S3 using bucket and object directory
* given in the configuration.
*
* For safety, an immutable copy built from the internal map is returned.
*
* @return
*/
public Map getTablesFromS3()
{
Collection streamValues = this.internalMap.values();
ImmutableMap.Builder builder = ImmutableMap.builder();
for (KinesisStreamDescription stream : streamValues) {
builder.put(new SchemaTableName(stream.getSchemaName(), stream.getTableName()), stream);
}
return builder.build();
}
/** Shutdown any periodic update jobs. */
@Override
public void shutdown()
{
if (isUsingS3() && updateTaskHandle != null) {
updateTaskHandle.cancel(true);
}
return;
}
protected void startS3Updates()
{
ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor();
this.updateTaskHandle =
scheduler.scheduleAtFixedRate(() -> updateTablesFromS3(), 5, 600, TimeUnit.SECONDS);
return;
}
/**
* Call S3 to get the most recent object list.
*
* This is an object list request to AWS in the given "directory".
*
* @return
*/
protected List getObjectSummaries()
{
AmazonS3Client s3client = this.clientManager.getS3Client();
AmazonS3URI directoryURI = new AmazonS3URI(this.bucketUrl);
ArrayList returnList = new ArrayList();
try {
log.info("Getting the listing of objects in the S3 table config directory: bucket %s prefix %s :", directoryURI.getBucket(), directoryURI.getKey());
ListObjectsRequest req = new ListObjectsRequest().withBucketName(directoryURI.getBucket()).
withPrefix(directoryURI.getKey() + "/").withDelimiter("/").withMaxKeys(25);
ObjectListing result;
do {
result = s3client.listObjects(req);
returnList.addAll(result.getObjectSummaries());
req.setMarker(result.getNextMarker());
}
while(result.isTruncated());
log.info("Completed getting S3 object listing.");
}
catch (AmazonServiceException ase) {
StringBuilder sb = new StringBuilder();
sb.append("Caught an AmazonServiceException, which means your request made it ");
sb.append("to Amazon S3, but was rejected with an error response for some reason.\n");
sb.append("Error Message: " + ase.getMessage());
sb.append("HTTP Status Code: " + ase.getStatusCode());
sb.append("AWS Error Code: " + ase.getErrorCode());
sb.append("Error Type: " + ase.getErrorType());
sb.append("Request ID: " + ase.getRequestId());
log.error(sb.toString(), ase);
}
catch (AmazonClientException ace) {
StringBuilder sb = new StringBuilder();
sb.append("Caught an AmazonClientException, " +
"which means the client encountered " +
"an internal error while trying to communicate" +
" with S3, " +
"such as not being able to access the network.");
sb.append("Error Message: " + ace.getMessage());
log.error(sb.toString(), ace);
}
return returnList;
}
/**
* Connect to S3 directory to look for new or updated table definitions and then
* update the map.
*/
protected void updateTablesFromS3()
{
long now = System.currentTimeMillis();
List objectList = this.getObjectSummaries();
AmazonS3Client s3client = this.clientManager.getS3Client();
AmazonS3URI directoryURI = new AmazonS3URI(this.bucketUrl);
for (S3ObjectSummary objInfo : objectList) {
if (!this.internalMap.containsKey(objInfo.getKey()) || objInfo.getLastModified().getTime() >= this.lastCheck) {
// New or updated file, so we must read from AWS
try {
if (objInfo.getKey().endsWith("/")) {
continue;
}
log.info("Getting : %s - %s", objInfo.getBucketName(), objInfo.getKey());
S3Object object = s3client.getObject(
new GetObjectRequest(objInfo.getBucketName(), objInfo.getKey()));
StringBuilder resultStr = new StringBuilder("");
try (BufferedReader reader = new BufferedReader(new InputStreamReader(object.getObjectContent()))) {
boolean hasMore = true;
while (hasMore) {
String line = reader.readLine();
if (line != null) {
resultStr.append(line);
}
else {
hasMore = false;
}
}
KinesisStreamDescription table = streamDescriptionCodec.fromJson(resultStr.toString());
internalMap.put(objInfo.getKey(), table);
log.info("Put table description into the map from %s", objInfo.getKey());
}
catch (IOException iox) {
log.error("Problem reading input stream from object.", iox);
}
}
catch (AmazonServiceException ase) {
StringBuilder sb = new StringBuilder();
sb.append("Caught an AmazonServiceException, which means your request made it ");
sb.append("to Amazon S3, but was rejected with an error response for some reason.\n");
sb.append("Error Message: " + ase.getMessage());
sb.append("HTTP Status Code: " + ase.getStatusCode());
sb.append("AWS Error Code: " + ase.getErrorCode());
sb.append("Error Type: " + ase.getErrorType());
sb.append("Request ID: " + ase.getRequestId());
log.error(sb.toString(), ase);
}
catch (AmazonClientException ace) {
StringBuilder sb = new StringBuilder();
sb.append("Caught an AmazonClientException, " +
"which means the client encountered " +
"an internal error while trying to communicate" +
" with S3, " +
"such as not being able to access the network.");
sb.append("Error Message: " + ace.getMessage());
log.error(sb.toString(), ace);
}
}
} // end loop through object descriptions
log.info("Completed updating table definitions from S3.");
this.lastCheck = now;
return;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy