All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.amazonaws.athena.connectors.cloudwatch.metrics.MetricsMetadataHandler Maven / Gradle / Ivy

/*-
 * #%L
 * athena-cloudwatch-metrics
 * %%
 * Copyright (C) 2019 Amazon Web Services
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
package com.amazonaws.athena.connectors.cloudwatch.metrics;

import com.amazonaws.athena.connector.lambda.QueryStatusChecker;
import com.amazonaws.athena.connector.lambda.ThrottlingInvoker;
import com.amazonaws.athena.connector.lambda.data.BlockAllocator;
import com.amazonaws.athena.connector.lambda.data.BlockWriter;
import com.amazonaws.athena.connector.lambda.domain.Split;
import com.amazonaws.athena.connector.lambda.domain.TableName;
import com.amazonaws.athena.connector.lambda.domain.predicate.ConstraintEvaluator;
import com.amazonaws.athena.connector.lambda.domain.predicate.Constraints;
import com.amazonaws.athena.connector.lambda.domain.predicate.ValueSet;
import com.amazonaws.athena.connector.lambda.handlers.MetadataHandler;
import com.amazonaws.athena.connector.lambda.metadata.GetSplitsRequest;
import com.amazonaws.athena.connector.lambda.metadata.GetSplitsResponse;
import com.amazonaws.athena.connector.lambda.metadata.GetTableLayoutRequest;
import com.amazonaws.athena.connector.lambda.metadata.GetTableRequest;
import com.amazonaws.athena.connector.lambda.metadata.GetTableResponse;
import com.amazonaws.athena.connector.lambda.metadata.ListSchemasRequest;
import com.amazonaws.athena.connector.lambda.metadata.ListSchemasResponse;
import com.amazonaws.athena.connector.lambda.metadata.ListTablesRequest;
import com.amazonaws.athena.connector.lambda.metadata.ListTablesResponse;
import com.amazonaws.athena.connector.lambda.security.EncryptionKeyFactory;
import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.MetricSamplesTable;
import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.MetricsTable;
import com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table;
import com.amazonaws.services.athena.AmazonAthena;
import com.amazonaws.services.cloudwatch.AmazonCloudWatch;
import com.amazonaws.services.cloudwatch.AmazonCloudWatchClientBuilder;
import com.amazonaws.services.cloudwatch.model.ListMetricsRequest;
import com.amazonaws.services.cloudwatch.model.ListMetricsResult;
import com.amazonaws.services.cloudwatch.model.Metric;
import com.amazonaws.services.cloudwatch.model.MetricStat;
import com.amazonaws.services.secretsmanager.AWSSecretsManager;
import com.amazonaws.util.CollectionUtils;
import com.google.common.collect.Lists;
import org.apache.arrow.util.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static com.amazonaws.athena.connectors.cloudwatch.metrics.MetricsExceptionFilter.EXCEPTION_FILTER;
import static com.amazonaws.athena.connectors.cloudwatch.metrics.tables.Table.PERIOD_FIELD;

/**
 * Handles metadata requests for the Athena Cloudwatch Metrics Connector.
 * 

* For more detail, please see the module's README.md, some notable characteristics of this class include: *

* 1. Provides two tables (metrics and metric_samples) for accessing Cloudwatch Metrics data via the "default" schema. * 2. Supports Predicate Pushdown into Cloudwatch Metrics for most fields. * 3. If multiple Metrics (namespace, metric, dimension(s), and statistic) are requested, they can be read in parallel. */ public class MetricsMetadataHandler extends MetadataHandler { private static final Logger logger = LoggerFactory.getLogger(MetricsMetadataHandler.class); //Used to log diagnostic info about this connector private static final String SOURCE_TYPE = "metrics"; //List of available statistics (AVERAGE, p90, etc...). protected static final List STATISTICS = new ArrayList<>(); //The schema (aka database) supported by this connector protected static final String SCHEMA_NAME = "default"; //Schema for the metrics table private static final Table METRIC_TABLE; //Schema for the metric_samples table. private static final Table METRIC_DATA_TABLE; //Name of the table which contains details of available metrics. private static final String METRIC_TABLE_NAME; //Name of the table which contains metric samples. private static final String METRIC_SAMPLES_TABLE_NAME; //Lookup table for resolving table name to Schema. private static final Map TABLES = new HashMap<>(); //The default metric period to query (60 seconds) private static final int DEFAULT_PERIOD_SEC = 60; //GetMetricData supports up to 100 Metrics per split private static final int MAX_METRICS_PER_SPLIT = 100; //The minimum number of splits we'd like to have for some parallelization private static final int MIN_NUM_SPLITS_FOR_PARALLELIZATION = 3; //Used to handle throttling events by applying AIMD congestion control private final ThrottlingInvoker invoker; private final AmazonCloudWatch metrics; static { //The statistics supported by Cloudwatch Metrics by default STATISTICS.add("Average"); STATISTICS.add("Minimum"); STATISTICS.add("Maximum"); STATISTICS.add("Sum"); STATISTICS.add("SampleCount"); STATISTICS.add("p99"); STATISTICS.add("p95"); STATISTICS.add("p90"); STATISTICS.add("p50"); STATISTICS.add("p10"); METRIC_TABLE = new MetricsTable(); METRIC_DATA_TABLE = new MetricSamplesTable(); METRIC_TABLE_NAME = METRIC_TABLE.getName(); METRIC_SAMPLES_TABLE_NAME = METRIC_DATA_TABLE.getName(); TABLES.put(METRIC_TABLE_NAME, METRIC_TABLE); TABLES.put(METRIC_SAMPLES_TABLE_NAME, METRIC_DATA_TABLE); } public MetricsMetadataHandler(java.util.Map configOptions) { super(SOURCE_TYPE, configOptions); this.metrics = AmazonCloudWatchClientBuilder.standard().build(); this.invoker = ThrottlingInvoker.newDefaultBuilder(EXCEPTION_FILTER, configOptions).build(); } @VisibleForTesting protected MetricsMetadataHandler( AmazonCloudWatch metrics, EncryptionKeyFactory keyFactory, AWSSecretsManager secretsManager, AmazonAthena athena, String spillBucket, String spillPrefix, java.util.Map configOptions) { super(keyFactory, secretsManager, athena, SOURCE_TYPE, spillBucket, spillPrefix, configOptions); this.metrics = metrics; this.invoker = ThrottlingInvoker.newDefaultBuilder(EXCEPTION_FILTER, configOptions).build(); } /** * Only supports a single, static, schema defined by SCHEMA_NAME. * * @see MetadataHandler */ @Override public ListSchemasResponse doListSchemaNames(BlockAllocator blockAllocator, ListSchemasRequest listSchemasRequest) { return new ListSchemasResponse(listSchemasRequest.getCatalogName(), Collections.singletonList(SCHEMA_NAME)); } /** * Supports a set of static tables defined by: TABLES * * @see MetadataHandler */ @Override public ListTablesResponse doListTables(BlockAllocator blockAllocator, ListTablesRequest listTablesRequest) { List tables = new ArrayList<>(); TABLES.keySet().stream().forEach(next -> tables.add(new TableName(SCHEMA_NAME, next))); return new ListTablesResponse(listTablesRequest.getCatalogName(), tables, null); } /** * Returns the details of the requested static table. * * @see MetadataHandler */ @Override public GetTableResponse doGetTable(BlockAllocator blockAllocator, GetTableRequest getTableRequest) { validateTable(getTableRequest.getTableName()); Table table = TABLES.get(getTableRequest.getTableName().getTableName()); return new GetTableResponse(getTableRequest.getCatalogName(), getTableRequest.getTableName(), table.getSchema(), table.getPartitionColumns()); } /** * Our table doesn't support complex layouts or partitioning so we simply make this method a NoOp and the SDK will * automatically generate a single placeholder partition for us since Athena needs at least 1 partition returned * if there is potetnailly any data to read. We do this because Cloudwatch Metric's APIs do not support the kind of filtering we need to do * reasonably scoped partition pruning. Instead we do the pruning at Split generation time and return a single * partition here. The down side to doing it at Split generation time is that we sacrifice parallelizing Split * generation. However this is not a significant performance detrement to this connector since we can * generate Splits rather quickly and easily. * * @see MetadataHandler */ @Override public void getPartitions(BlockWriter blockWriter, GetTableLayoutRequest request, QueryStatusChecker queryStatusChecker) throws Exception { validateTable(request.getTableName()); //NoOp as we do not support partitioning. } /** * Each 'metric' in cloudwatch is uniquely identified by a quad of Namespace, List, MetricName, Statistic. If the * query is for the METRIC_TABLE we return a single split. If the query is for actual metrics data, we start forming batches * of metrics now that will form the basis of GetMetricData requests during readSplits. * * @see MetadataHandler */ @Override public GetSplitsResponse doGetSplits(BlockAllocator blockAllocator, GetSplitsRequest getSplitsRequest) throws Exception { validateTable(getSplitsRequest.getTableName()); //Handle requests for the METRIC_TABLE which requires only 1 split to list available metrics. if (METRIC_TABLE_NAME.equals(getSplitsRequest.getTableName().getTableName())) { //The request is just for meta-data about what metrics exist. Split metricsSplit = Split.newBuilder(makeSpillLocation(getSplitsRequest), makeEncryptionKey()).build(); return new GetSplitsResponse(getSplitsRequest.getCatalogName(), metricsSplit); } //handle generating splits for reading actual metrics data. try (ConstraintEvaluator constraintEvaluator = new ConstraintEvaluator(blockAllocator, METRIC_DATA_TABLE.getSchema(), getSplitsRequest.getConstraints())) { ListMetricsRequest listMetricsRequest = new ListMetricsRequest(); MetricUtils.pushDownPredicate(getSplitsRequest.getConstraints(), listMetricsRequest); listMetricsRequest.setNextToken(getSplitsRequest.getContinuationToken()); String period = getPeriodFromConstraint(getSplitsRequest.getConstraints()); Set splits = new HashSet<>(); ListMetricsResult result = invoker.invoke(() -> metrics.listMetrics(listMetricsRequest)); List metricStats = new ArrayList<>(100); for (Metric nextMetric : result.getMetrics()) { for (String nextStatistic : STATISTICS) { if (MetricUtils.applyMetricConstraints(constraintEvaluator, nextMetric, nextStatistic)) { metricStats.add(new MetricStat() .withMetric(new Metric() .withNamespace(nextMetric.getNamespace()) .withMetricName(nextMetric.getMetricName()) .withDimensions(nextMetric.getDimensions())) .withPeriod(Integer.valueOf(period)) .withStat(nextStatistic)); } } } String continuationToken = null; if (result.getNextToken() != null && !result.getNextToken().equalsIgnoreCase(listMetricsRequest.getNextToken())) { continuationToken = result.getNextToken(); } if (CollectionUtils.isNullOrEmpty(metricStats)) { logger.info("No metric stats present after filtering predicates."); return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, continuationToken); } List> partitions = Lists.partition(metricStats, calculateSplitSize(metricStats.size())); for (List partition : partitions) { String serializedMetricStats = MetricStatSerDe.serialize(partition); splits.add(Split.newBuilder(makeSpillLocation(getSplitsRequest), makeEncryptionKey()) .add(MetricStatSerDe.SERIALIZED_METRIC_STATS_FIELD_NAME, serializedMetricStats) .build()); } return new GetSplitsResponse(getSplitsRequest.getCatalogName(), splits, continuationToken); } } /** * Resolved the metric period to query, using a default if no period constraint is found. */ private String getPeriodFromConstraint(Constraints constraints) { ValueSet period = constraints.getSummary().get(PERIOD_FIELD); if (period != null && period.isSingleValue()) { return String.valueOf(period.getSingleValue()); } return String.valueOf(DEFAULT_PERIOD_SEC); } /** * Validates that the requested schema and table exist in our static set of supported tables. */ private void validateTable(TableName tableName) { if (!SCHEMA_NAME.equals(tableName.getSchemaName())) { throw new RuntimeException("Unknown table " + tableName); } if (TABLES.get(tableName.getTableName()) == null) { throw new RuntimeException("Unknown table " + tableName); } } /** * Heuristically determines a split size by finding the minimum between: * 1. a split size that will allow for some parallelization. * 2. the maximum split size possible for a GetMetricData request. */ private int calculateSplitSize(int datapointCount) { int numDataPointsForParallelization = (int) Math.ceil((double) datapointCount / MIN_NUM_SPLITS_FOR_PARALLELIZATION); return Math.min(numDataPointsForParallelization, MAX_METRICS_PER_SPLIT); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy