
com.amazonaws.athena.connectors.cloudwatch.metrics.MetricsMetadataHandler Maven / Gradle / Ivy
/*-
* #%L
* athena-cloudwatch-metrics
* %%
* Copyright (C) 2019 Amazon Web Services
* %%
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* #L%
*/
package com.amazonaws.athena.connectors.cloudwatch.metrics;
import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
import com.amazonaws.athena.connector.lambda.ThrottlingInvoker;
import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
import com.amazonaws.athena.connector.lambda.data.BlockWriter;
import com.amazonaws.athena.connector.lambda.domain.Split;
import com.amazonaws.athena.connector.lambda.domain.TableName;
import com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator;
import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
import com.amazonaws.athena.connector.lambda.handlers.MetadataHandler;
import com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest;
import com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse;
import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest;
import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest;
import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse;
import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest;
import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse;
import com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory;
import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.MetricSamplesTable;
import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.MetricsTable;
import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table;
import com.amazonaws.services.athena.AmazonAthena;
import com.amazonaws.services.cloudwatch.AmazonCloudWatch;
import com.amazonaws.services.cloudwatch.AmazonCloudWatchClientBuilder;
import com.amazonaws.services.cloudwatch.model.ListMetricsRequest;
import com.amazonaws.services.cloudwatch.model.ListMetricsResult;
import com.amazonaws.services.cloudwatch.model.Metric;
import com.amazonaws.services.cloudwatch.model.MetricStat;
import com.amazonaws.services.secretsmanager.AWSSecretsManager;
import com.amazonaws.util.CollectionUtils;
import com.google.common.collect.Lists;
import org.apache.arrow.util.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static com.amazonaws.athena.connectors.cloudwatch.metrics.MetricsExceptionFilter.EXCEPTION_FILTER;
import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.PERIOD_FIELD;
/**
* Handles metadata requests for the Athena Cloudwatch Metrics Connector.
*
* For more detail, please see the module's README.md, some notable characteristics of this class include:
*
* 1. Provides two tables (metrics and metric_samples) for accessing Cloudwatch Metrics data via the "default" schema.
* 2. Supports Predicate Pushdown into Cloudwatch Metrics for most fields.
* 3. If multiple Metrics (namespace, metric, dimension(s), and statistic) are requested, they can be read in parallel.
*/
public class MetricsMetadataHandler
extends MetadataHandler
{
private static final Logger logger = LoggerFactory.getLogger(MetricsMetadataHandler.class);
//Used to log diagnostic info about this connector
private static final String SOURCE_TYPE = "metrics";
//List of available statistics (AVERAGE, p90, etc...).
protected static final List STATISTICS = new ArrayList<>();
//The schema (aka database) supported by this connector
protected static final String SCHEMA_NAME = "default";
//Schema for the metrics table
private static final Table METRIC_TABLE;
//Schema for the metric_samples table.
private static final Table METRIC_DATA_TABLE;
//Name of the table which contains details of available metrics.
private static final String METRIC_TABLE_NAME;
//Name of the table which contains metric samples.
private static final String METRIC_SAMPLES_TABLE_NAME;
//Lookup table for resolving table name to Schema.
private static final Map TABLES = new HashMap<>();
//The default metric period to query (60 seconds)
private static final int DEFAULT_PERIOD_SEC = 60;
//GetMetricData supports up to 100 Metrics per split
private static final int MAX_METRICS_PER_SPLIT = 100;
//The minimum number of splits we'd like to have for some parallelization
private static final int MIN_NUM_SPLITS_FOR_PARALLELIZATION = 3;
//Used to handle throttling events by applying AIMD congestion control
private final ThrottlingInvoker invoker;
private final AmazonCloudWatch metrics;
static {
//The statistics supported by Cloudwatch Metrics by default
STATISTICS.add("Average");
STATISTICS.add("Minimum");
STATISTICS.add("Maximum");
STATISTICS.add("Sum");
STATISTICS.add("SampleCount");
STATISTICS.add("p99");
STATISTICS.add("p95");
STATISTICS.add("p90");
STATISTICS.add("p50");
STATISTICS.add("p10");
METRIC_TABLE = new MetricsTable();
METRIC_DATA_TABLE = new MetricSamplesTable();
METRIC_TABLE_NAME = METRIC_TABLE.getName();
METRIC_SAMPLES_TABLE_NAME = METRIC_DATA_TABLE.getName();
TABLES.put(METRIC_TABLE_NAME, METRIC_TABLE);
TABLES.put(METRIC_SAMPLES_TABLE_NAME, METRIC_DATA_TABLE);
}
public MetricsMetadataHandler(java.util.Map configOptions)
{
super(SOURCE_TYPE, configOptions);
this.metrics = AmazonCloudWatchClientBuilder.standard().build();
this.invoker = ThrottlingInvoker.newDefaultBuilder(EXCEPTION_FILTER, configOptions).build();
}
@VisibleForTesting
protected MetricsMetadataHandler(
AmazonCloudWatch metrics,
EncryptionKeyFactory keyFactory,
AWSSecretsManager secretsManager,
AmazonAthena athena,
String spillBucket,
String spillPrefix,
java.util.Map configOptions)
{
super(keyFactory, secretsManager, athena, SOURCE_TYPE, spillBucket, spillPrefix, configOptions);
this.metrics = metrics;
this.invoker = ThrottlingInvoker.newDefaultBuilder(EXCEPTION_FILTER, configOptions).build();
}
/**
* Only supports a single, static, schema defined by SCHEMA_NAME.
*
* @see MetadataHandler
*/
@Override
public ListSchemasResponse doListSchemaNames(BlockAllocator blockAllocator, ListSchemasRequest listSchemasRequest)
{
return new ListSchemasResponse(listSchemasRequest.getCatalogName(), Collections.singletonList(SCHEMA_NAME));
}
/**
* Supports a set of static tables defined by: TABLES
*
* @see MetadataHandler
*/
@Override
public ListTablesResponse doListTables(BlockAllocator blockAllocator, ListTablesRequest listTablesRequest)
{
List tables = new ArrayList<>();
TABLES.keySet().stream().forEach(next -> tables.add(new TableName(SCHEMA_NAME, next)));
return new ListTablesResponse(listTablesRequest.getCatalogName(), tables, null);
}
/**
* Returns the details of the requested static table.
*
* @see MetadataHandler
*/
@Override
public GetTableResponse doGetTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest)
{
validateTable(getTableRequest.getTableName());
Table table = TABLES.get(getTableRequest.getTableName().getTableName());
return new GetTableResponse(getTableRequest.getCatalogName(),
getTableRequest.getTableName(),
table.getSchema(),
table.getPartitionColumns());
}
/**
* Our table doesn't support complex layouts or partitioning so we simply make this method a NoOp and the SDK will
* automatically generate a single placeholder partition for us since Athena needs at least 1 partition returned
* if there is potetnailly any data to read. We do this because Cloudwatch Metric's APIs do not support the kind of filtering we need to do
* reasonably scoped partition pruning. Instead we do the pruning at Split generation time and return a single
* partition here. The down side to doing it at Split generation time is that we sacrifice parallelizing Split
* generation. However this is not a significant performance detrement to this connector since we can
* generate Splits rather quickly and easily.
*
* @see MetadataHandler
*/
@Override
public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker)
throws Exception
{
validateTable(request.getTableName());
//NoOp as we do not support partitioning.
}
/**
* Each 'metric' in cloudwatch is uniquely identified by a quad of Namespace, List, MetricName, Statistic. If the
* query is for the METRIC_TABLE we return a single split. If the query is for actual metrics data, we start forming batches
* of metrics now that will form the basis of GetMetricData requests during readSplits.
*
* @see MetadataHandler
*/
@Override
public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest)
throws Exception
{
validateTable(getSplitsRequest.getTableName());
//Handle requests for the METRIC_TABLE which requires only 1 split to list available metrics.
if (METRIC_TABLE_NAME.equals(getSplitsRequest.getTableName().getTableName())) {
//The request is just for meta-data about what metrics exist.
Split metricsSplit = Split.newBuilder(makeSpillLocation(getSplitsRequest), makeEncryptionKey()).build();
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), metricsSplit);
}
//handle generating splits for reading actual metrics data.
try (ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(blockAllocator,
METRIC_DATA_TABLE.getSchema(),
getSplitsRequest.getConstraints())) {
ListMetricsRequest listMetricsRequest = new ListMetricsRequest();
MetricUtils.pushDownPredicate(getSplitsRequest.getConstraints(), listMetricsRequest);
listMetricsRequest.setNextToken(getSplitsRequest.getContinuationToken());
String period = getPeriodFromConstraint(getSplitsRequest.getConstraints());
Set splits = new HashSet<>();
ListMetricsResult result = invoker.invoke(() -> metrics.listMetrics(listMetricsRequest));
List metricStats = new ArrayList<>(100);
for (Metric nextMetric : result.getMetrics()) {
for (String nextStatistic : STATISTICS) {
if (MetricUtils.applyMetricConstraints(constraintEvaluator, nextMetric, nextStatistic)) {
metricStats.add(new MetricStat()
.withMetric(new Metric()
.withNamespace(nextMetric.getNamespace())
.withMetricName(nextMetric.getMetricName())
.withDimensions(nextMetric.getDimensions()))
.withPeriod(Integer.valueOf(period))
.withStat(nextStatistic));
}
}
}
String continuationToken = null;
if (result.getNextToken() != null &&
!result.getNextToken().equalsIgnoreCase(listMetricsRequest.getNextToken())) {
continuationToken = result.getNextToken();
}
if (CollectionUtils.isNullOrEmpty(metricStats)) {
logger.info("No metric stats present after filtering predicates.");
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, continuationToken);
}
List> partitions = Lists.partition(metricStats, calculateSplitSize(metricStats.size()));
for (List partition : partitions) {
String serializedMetricStats = MetricStatSerDe.serialize(partition);
splits.add(Split.newBuilder(makeSpillLocation(getSplitsRequest), makeEncryptionKey())
.add(MetricStatSerDe.SERIALIZED_METRIC_STATS_FIELD_NAME, serializedMetricStats)
.build());
}
return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, continuationToken);
}
}
/**
* Resolved the metric period to query, using a default if no period constraint is found.
*/
private String getPeriodFromConstraint(Constraints constraints)
{
ValueSet period = constraints.getSummary().get(PERIOD_FIELD);
if (period != null && period.isSingleValue()) {
return String.valueOf(period.getSingleValue());
}
return String.valueOf(DEFAULT_PERIOD_SEC);
}
/**
* Validates that the requested schema and table exist in our static set of supported tables.
*/
private void validateTable(TableName tableName)
{
if (!SCHEMA_NAME.equals(tableName.getSchemaName())) {
throw new RuntimeException("Unknown table " + tableName);
}
if (TABLES.get(tableName.getTableName()) == null) {
throw new RuntimeException("Unknown table " + tableName);
}
}
/**
* Heuristically determines a split size by finding the minimum between:
* 1. a split size that will allow for some parallelization.
* 2. the maximum split size possible for a GetMetricData request.
*/
private int calculateSplitSize(int datapointCount)
{
int numDataPointsForParallelization = (int) Math.ceil((double) datapointCount / MIN_NUM_SPLITS_FOR_PARALLELIZATION);
return Math.min(numDataPointsForParallelization, MAX_METRICS_PER_SPLIT);
}
}