Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*-
* #%L
* athena-cloudwatch
* %%
* Copyright (C) 2019 Amazon Web Services
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
package com.amazonaws.athena.connectors.cloudwatch;
import com.amazonaws.athena.connector.lambda.ThrottlingInvoker;
import com.amazonaws.athena.connector.lambda.domain.TableName;
import com.amazonaws.services.logs.AWSLogs;
import com.amazonaws.services.logs.model.DescribeLogGroupsRequest;
import com.amazonaws.services.logs.model.DescribeLogGroupsResult;
import com.amazonaws.services.logs.model.DescribeLogStreamsRequest;
import com.amazonaws.services.logs.model.DescribeLogStreamsResult;
import com.amazonaws.services.logs.model.LogGroup;
import com.amazonaws.services.logs.model.LogStream;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeoutException;
import static com.amazonaws.athena.connectors.cloudwatch.CloudwatchMetadataHandler.ALL_LOG_STREAMS_TABLE;
/**
* This class helps with resolving the differences in casing between cloudwatch log and Presto. Presto expects all
* databases, tables, and columns to be lower case. This class allows us to use cloudwatch logGroups and logStreams
* which may have captial letters in them without issue. It does so by caching LogStreams and LogStreams and doing
* a case insentive search over them. It will first try to do a targeted get to reduce the penalty for LogGroups
* and LogStreams which don't have capitalization. It also has an optimization for LAMBDA which is a common
* cause of capitalized LogStreams by doing a targeted replace for LAMBDA's pattern.
*/
public class CloudwatchTableResolver
{
private static final Logger logger = LoggerFactory.getLogger(CloudwatchTableResolver.class);
private AWSLogs awsLogs;
//Used to handle Throttling events using an AIMD strategy for congestion control.
private ThrottlingInvoker invoker;
//The LogStream pattern that is capitalized by LAMBDA
private static final String LAMBDA_PATTERN = "$latest";
//The LogStream pattern to replace
private static final String LAMBDA_ACTUAL_PATTERN = "$LATEST";
//The schema cache that is presto casing to cloudwatch casing
private final LoadingCache schemaCache;
//The table cache that is presto casing to cloudwatch casing
private final LoadingCache tableCache;
/**
* Constructs an instance of the table resolver.
*
* @param invoker The ThrottlingInvoker to use to handle throttling events.
* @param awsLogs The AWSLogs client to use for cache misses.
* @param maxSchemaCacheSize The max number of schemas to cache.
* @param maxTableCacheSize The max tables to cache.
*/
public CloudwatchTableResolver(ThrottlingInvoker invoker, AWSLogs awsLogs, long maxSchemaCacheSize, long maxTableCacheSize)
{
this.invoker = invoker;
this.awsLogs = awsLogs;
this.tableCache = CacheBuilder.newBuilder()
.maximumSize(maxTableCacheSize)
.build(
new CacheLoader()
{
public CloudwatchTableName load(TableName schemaName)
throws TimeoutException
{
return loadLogStreams(schemaName.getSchemaName(), schemaName.getTableName());
}
});
this.schemaCache = CacheBuilder.newBuilder()
.maximumSize(maxSchemaCacheSize)
.build(
new CacheLoader()
{
public String load(String schemaName)
throws TimeoutException
{
return loadLogGroups(schemaName);
}
});
}
/**
* Loads the requested LogStream as identified by the TableName.
*
* @param logGroup The properly cased schema name.
* @param logStream The table name to validate.
* @return The CloudwatchTableName or null if not found.
* @note This method also primes the cache with other CloudwatchTableNames found along the way while scaning Cloudwatch.
*/
private CloudwatchTableName loadLogStreams(String logGroup, String logStream)
throws TimeoutException
{
//As an optimization, see if the table name is an exact match (meaning likely no casing issues)
CloudwatchTableName result = loadLogStream(logGroup, logStream);
if (result != null) {
return result;
}
logger.info("loadLogStreams: Did not find a match for the table, falling back to LogGroup scan for {}:{}",
logGroup, logStream);
DescribeLogStreamsRequest validateTableRequest = new DescribeLogStreamsRequest(logGroup);
DescribeLogStreamsResult validateTableResult;
do {
validateTableResult = invoker.invoke(() -> awsLogs.describeLogStreams(validateTableRequest));
for (LogStream nextStream : validateTableResult.getLogStreams()) {
String logStreamName = nextStream.getLogStreamName();
CloudwatchTableName nextCloudwatch = new CloudwatchTableName(logGroup, logStreamName);
tableCache.put(nextCloudwatch.toTableName(), nextCloudwatch);
if (nextCloudwatch.getLogStreamName().equalsIgnoreCase(logStream)) {
//We stop loading once we find the one we care about. This is an optimization that
//attempt to exploit the fact that we likely access more recent logstreams first.
logger.info("loadLogStreams: Matched {} for {}", nextCloudwatch, logStream);
return nextCloudwatch;
}
}
validateTableRequest.setNextToken(validateTableResult.getNextToken());
}
while (validateTableResult.getNextToken() != null);
//We could not find a match
throw new IllegalArgumentException("No such table " + logGroup + " " + logStream);
}
/**
* Optomizaiton that attempts to load a specific LogStream as identified by the TableName.
*
* @param logGroup The properly cased schema name.
* @param logStream The table name to validate.
* @return The CloudwatchTableName or null if not found.
* @note This method also primes the cache with other CloudwatchTableNames found along the way while scanning Cloudwatch.
*/
private CloudwatchTableName loadLogStream(String logGroup, String logStream)
throws TimeoutException
{
if (ALL_LOG_STREAMS_TABLE.equalsIgnoreCase(logStream)) {
return new CloudwatchTableName(logGroup, ALL_LOG_STREAMS_TABLE);
}
String effectiveTableName = logStream;
if (effectiveTableName.contains(LAMBDA_PATTERN)) {
logger.info("loadLogStream: Appears to be a lambda log_stream, substituting Lambda pattern {} for {}",
LAMBDA_PATTERN, effectiveTableName);
effectiveTableName = effectiveTableName.replace(LAMBDA_PATTERN, LAMBDA_ACTUAL_PATTERN);
}
DescribeLogStreamsRequest request = new DescribeLogStreamsRequest(logGroup)
.withLogStreamNamePrefix(effectiveTableName);
DescribeLogStreamsResult result = invoker.invoke(() -> awsLogs.describeLogStreams(request));
for (LogStream nextStream : result.getLogStreams()) {
String logStreamName = nextStream.getLogStreamName();
CloudwatchTableName nextCloudwatch = new CloudwatchTableName(logGroup, logStreamName);
if (nextCloudwatch.getLogStreamName().equalsIgnoreCase(logStream)) {
logger.info("loadLogStream: Matched {} for {}:{}", nextCloudwatch, logGroup, logStream);
return nextCloudwatch;
}
}
return null;
}
/**
* Loads the requested LogGroup as identified by the schemaName.
*
* @param schemaName The schemaName to load.
* @return The actual LogGroup name in cloudwatch.
* @note This method also primes the cache with other LogGroups found along the way while scanning Cloudwatch.
*/
private String loadLogGroups(String schemaName)
throws TimeoutException
{
//As an optimization, see if the table name is an exact match (meaning likely no casing issues)
String result = loadLogGroup(schemaName);
if (result != null) {
return result;
}
logger.info("loadLogGroups: Did not find a match for the schema, falling back to LogGroup scan for {}", schemaName);
DescribeLogGroupsRequest validateSchemaRequest = new DescribeLogGroupsRequest();
DescribeLogGroupsResult validateSchemaResult;
do {
validateSchemaResult = invoker.invoke(() -> awsLogs.describeLogGroups(validateSchemaRequest));
for (LogGroup next : validateSchemaResult.getLogGroups()) {
String nextLogGroupName = next.getLogGroupName();
schemaCache.put(schemaName, nextLogGroupName);
if (nextLogGroupName.equalsIgnoreCase(schemaName)) {
logger.info("loadLogGroups: Matched {} for {}", nextLogGroupName, schemaName);
return nextLogGroupName;
}
}
validateSchemaRequest.setNextToken(validateSchemaResult.getNextToken());
}
while (validateSchemaResult.getNextToken() != null);
//We could not find a match
throw new IllegalArgumentException("No such schema " + schemaName);
}
/**
* Optomizaiton that attempts to load a specific LogStream as identified by the TableName.
*
* @param schemaName The schemaName to load.
* @return The CloudwatchTableName or null if not found.
*/
private String loadLogGroup(String schemaName)
throws TimeoutException
{
DescribeLogGroupsRequest request = new DescribeLogGroupsRequest().withLogGroupNamePrefix(schemaName);
DescribeLogGroupsResult result = invoker.invoke(() -> awsLogs.describeLogGroups(request));
for (LogGroup next : result.getLogGroups()) {
String nextLogGroupName = next.getLogGroupName();
if (nextLogGroupName.equalsIgnoreCase(schemaName)) {
logger.info("loadLogGroup: Matched {} for {}", nextLogGroupName, schemaName);
return nextLogGroupName;
}
}
return null;
}
/**
* Used to validate and convert the given TableName to a properly cased and qualified CloudwatchTableName.
*
* @param tableName The TableName to validate and convert.
* @return The CloudwatchTableName for the provided TableName or throws if the TableName could not be resolved to a
* CloudwatchTableName. This method mostly handles resolving case mismatches and ensuring the input is a valid entity
* in Cloudwatch.
*/
public CloudwatchTableName validateTable(TableName tableName)
{
String actualSchema = validateSchema(tableName.getSchemaName());
CloudwatchTableName actual = null;
try {
actual = tableCache.get(new TableName(actualSchema, tableName.getTableName()));
if (actual == null) {
throw new IllegalArgumentException("Unknown table[" + tableName + "]");
}
return actual;
}
catch (ExecutionException ex) {
throw new RuntimeException("Exception while attempting to validate table " + tableName, ex);
}
}
/**
* Used to validate and convert the given schema name to a properly cased and qualified CloudwatchTableName.
*
* @param schema The TableName to validate and convert.
* @return The cloudwatch LogGroup (aka schema name) or throws if the schema name could not be resolved to a
* LogGroup. This method mostly handles resolving case mismatches and ensuring the input is a valid entity
* in Cloudwatch.
*/
public String validateSchema(String schema)
{
String actual = null;
try {
actual = schemaCache.get(schema);
if (actual == null) {
throw new IllegalArgumentException("Unknown schema[" + schema + "]");
}
return actual;
}
catch (ExecutionException ex) {
throw new RuntimeException("Exception while attempting to validate schema " + schema, ex);
}
}
}