Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Copyright 2015 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package co.cask.cdap.metrics.store;
import co.cask.cdap.api.dataset.lib.cube.Cube;
import co.cask.cdap.api.dataset.lib.cube.CubeDeleteQuery;
import co.cask.cdap.api.dataset.lib.cube.CubeExploreQuery;
import co.cask.cdap.api.dataset.lib.cube.CubeFact;
import co.cask.cdap.api.dataset.lib.cube.CubeQuery;
import co.cask.cdap.api.dataset.lib.cube.DimensionValue;
import co.cask.cdap.api.dataset.lib.cube.MeasureType;
import co.cask.cdap.api.dataset.lib.cube.Measurement;
import co.cask.cdap.api.dataset.lib.cube.TimeSeries;
import co.cask.cdap.api.metrics.MetricDataQuery;
import co.cask.cdap.api.metrics.MetricDeleteQuery;
import co.cask.cdap.api.metrics.MetricSearchQuery;
import co.cask.cdap.api.metrics.MetricStore;
import co.cask.cdap.api.metrics.MetricTimeSeries;
import co.cask.cdap.api.metrics.MetricType;
import co.cask.cdap.api.metrics.MetricValue;
import co.cask.cdap.api.metrics.MetricValues;
import co.cask.cdap.api.metrics.MetricsContext;
import co.cask.cdap.api.metrics.MetricsMessageId;
import co.cask.cdap.api.metrics.MetricsProcessorStatus;
import co.cask.cdap.api.metrics.TagValue;
import co.cask.cdap.common.conf.CConfiguration;
import co.cask.cdap.common.conf.Constants;
import co.cask.cdap.data2.dataset2.lib.cube.Aggregation;
import co.cask.cdap.data2.dataset2.lib.cube.AggregationAlias;
import co.cask.cdap.data2.dataset2.lib.cube.DefaultAggregation;
import co.cask.cdap.data2.dataset2.lib.cube.DefaultCube;
import co.cask.cdap.data2.dataset2.lib.cube.FactTableSupplier;
import co.cask.cdap.data2.dataset2.lib.timeseries.FactTable;
import co.cask.cdap.messaging.data.MessageId;
import co.cask.cdap.metrics.process.MetricsConsumerMetaTable;
import co.cask.cdap.metrics.process.TopicIdMetaKey;
import co.cask.cdap.metrics.process.TopicProcessMeta;
import co.cask.cdap.proto.id.NamespaceId;
import co.cask.cdap.proto.id.TopicId;
import com.google.common.base.Function;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.inject.Inject;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.Nullable;
/**
* Default implementation of {@link MetricStore}.
*/
public class DefaultMetricStore implements MetricStore {
public static final int TOTALS_RESOLUTION = Integer.MAX_VALUE;
static final Map AGGREGATIONS;
private static final String BY_NAMESPACE = "namespace";
private static final String BY_APP = "app";
private static final String BY_FLOW = "flow";
private static final String BY_FLOWLET_QUEUE = "flow.queue";
private static final String BY_MAPREDUCE = "mapreduce";
private static final String BY_SERVICE = "service";
private static final String BY_WORKER = "worker";
private static final String BY_WORKFLOW = "workflow";
private static final String BY_SPARK = "spark";
private static final String BY_STREAM = "stream";
private static final String BY_DATASET = "dataset";
private static final String BY_COMPONENT = "component";
private static final Map AGGREGATIONS_ALIAS_DIMENSIONS =
ImmutableMap.of(BY_WORKFLOW,
new AggregationAlias(ImmutableMap.of(Constants.Metrics.Tag.RUN_ID,
Constants.Metrics.Tag.WORKFLOW_RUN_ID)));
private final int resolutions[];
private final Supplier cube;
private final Supplier metaTableSupplier;
private MetricsContext metricsContext;
private final List metricsTopics;
static {
// NOTE: changing aggregations will require more work than just changing the below code. See CDAP-1466 for details.
Map aggs = Maps.newHashMap();
// Namespaces:
aggs.put(BY_NAMESPACE, new DefaultAggregation(ImmutableList.of(Constants.Metrics.Tag.NAMESPACE)));
// Applications:
aggs.put(BY_APP, new DefaultAggregation(
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP, Constants.Metrics.Tag.DATASET),
// i.e. for programs only
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP)));
// Programs:
// Note that dataset tag goes before runId and such. This is a trade-off between efficiency of two query types:
// * program metrics
// * dataset metrics per program
// It makes the former a bit slower, but bearable, as program usually doesn't access many datasets. While it speeds
// up the latter significantly, otherwise (if dataset tag is after runId and such) queries like
// "writes into dataset A per program" would be potentially scannig thru whole program history.
// flow
aggs.put(BY_FLOW, new DefaultAggregation(
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.FLOW, Constants.Metrics.Tag.DATASET,
Constants.Metrics.Tag.RUN_ID, Constants.Metrics.Tag.FLOWLET,
Constants.Metrics.Tag.INSTANCE_ID, Constants.Metrics.Tag.FLOWLET_QUEUE),
// i.e. for flows only
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.FLOW)));
// queue
aggs.put(BY_FLOWLET_QUEUE, new DefaultAggregation(
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.FLOW, Constants.Metrics.Tag.CONSUMER,
Constants.Metrics.Tag.PRODUCER, Constants.Metrics.Tag.FLOWLET_QUEUE),
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.FLOW, Constants.Metrics.Tag.CONSUMER,
Constants.Metrics.Tag.PRODUCER, Constants.Metrics.Tag.FLOWLET_QUEUE)));
// mapreduce
aggs.put(BY_MAPREDUCE, new DefaultAggregation(
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.MAPREDUCE, Constants.Metrics.Tag.DATASET,
Constants.Metrics.Tag.RUN_ID, Constants.Metrics.Tag.MR_TASK_TYPE,
Constants.Metrics.Tag.INSTANCE_ID),
// i.e. for mapreduce only
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.MAPREDUCE)));
// service
aggs.put(BY_SERVICE, new DefaultAggregation(
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.SERVICE, Constants.Metrics.Tag.DATASET,
Constants.Metrics.Tag.RUN_ID, Constants.Metrics.Tag.HANDLER,
Constants.Metrics.Tag.METHOD, Constants.Metrics.Tag.INSTANCE_ID,
Constants.Metrics.Tag.THREAD),
// i.e. for service only
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.SERVICE)));
// worker
aggs.put(BY_WORKER, new DefaultAggregation(
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.WORKER, Constants.Metrics.Tag.DATASET,
Constants.Metrics.Tag.RUN_ID, Constants.Metrics.Tag.INSTANCE_ID),
// i.e. for worker only
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.WORKER)));
// workflow
aggs.put(BY_WORKFLOW, new DefaultAggregation(
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.WORKFLOW, Constants.Metrics.Tag.DATASET,
Constants.Metrics.Tag.RUN_ID, Constants.Metrics.Tag.NODE),
// i.e. for workflow only
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.WORKFLOW)));
// spark
aggs.put(BY_SPARK, new DefaultAggregation(
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.SPARK, Constants.Metrics.Tag.DATASET,
Constants.Metrics.Tag.RUN_ID),
// i.e. for spark only
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.APP,
Constants.Metrics.Tag.SPARK)));
// Streams:
aggs.put(BY_STREAM, new DefaultAggregation(
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.STREAM),
// i.e. for streams only
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.STREAM)));
// Datasets:
aggs.put(BY_DATASET, new DefaultAggregation(
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.DATASET),
// i.e. for datasets only
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.DATASET)));
// System components:
aggs.put(BY_COMPONENT, new DefaultAggregation(
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.COMPONENT,
Constants.Metrics.Tag.HANDLER, Constants.Metrics.Tag.METHOD),
// i.e. for components only
ImmutableList.of(Constants.Metrics.Tag.NAMESPACE, Constants.Metrics.Tag.COMPONENT)));
AGGREGATIONS = Collections.unmodifiableMap(aggs);
}
@Inject
public DefaultMetricStore(final MetricDatasetFactory dsFactory,
final CConfiguration cConf) {
this(dsFactory, new int[] {1, 60, 3600, TOTALS_RESOLUTION}, cConf);
}
// NOTE: should never be used apart from data migration during cdap upgrade
public DefaultMetricStore(final MetricDatasetFactory dsFactory, final int resolutions[],
final CConfiguration cConf) {
this.resolutions = resolutions;
final FactTableSupplier factTableSupplier = new FactTableSupplier() {
@Override
public FactTable get(int resolution, int ignoredRollTime) {
// roll time will be taken from configuration todo: clean this up
return dsFactory.getOrCreateFactTable(resolution);
}
};
this.cube = Suppliers.memoize(new Supplier() {
@Override
public Cube get() {
DefaultCube cube = new DefaultCube(resolutions, factTableSupplier, AGGREGATIONS, AGGREGATIONS_ALIAS_DIMENSIONS);
cube.setMetricsCollector(metricsContext);
return cube;
}
});
this.metaTableSupplier = Suppliers.memoize(new Supplier() {
@Override
public MetricsConsumerMetaTable get() {
return dsFactory.createConsumerMeta();
}
});
int topicNumbers = cConf.getInt(Constants.Metrics.MESSAGING_TOPIC_NUM);
String topicPrefix = cConf.get(Constants.Metrics.TOPIC_PREFIX);
metricsTopics = new ArrayList<>();
for (int i = 0; i < topicNumbers; i++) {
this.metricsTopics.add(NamespaceId.SYSTEM.topic(topicPrefix + i));
}
}
@Override
public void setMetricsContext(MetricsContext metricsContext) {
this.metricsContext = metricsContext;
}
@Override
public void add(MetricValues metricValues) throws Exception {
add(ImmutableList.of(metricValues));
}
@Override
public void add(Collection metricValues) throws Exception {
List facts = Lists.newArrayListWithCapacity(metricValues.size());
for (MetricValues metricValue : metricValues) {
String scope = metricValue.getTags().get(Constants.Metrics.Tag.SCOPE);
List metrics = Lists.newArrayList();
// todo improve this logic?
for (MetricValue metric : metricValue.getMetrics()) {
String measureName = (scope == null ? "system." : scope + ".") + metric.getName();
MeasureType type = metric.getType() == MetricType.COUNTER ? MeasureType.COUNTER : MeasureType.GAUGE;
metrics.add(new Measurement(measureName, type, metric.getValue()));
}
CubeFact fact = new CubeFact(metricValue.getTimestamp())
.addDimensionValues(metricValue.getTags())
.addMeasurements(metrics);
facts.add(fact);
}
cube.get().add(facts);
}
@Override
public Collection query(MetricDataQuery query) {
Collection cubeResult = cube.get().query(buildCubeQuery(query));
List result = Lists.newArrayList();
for (TimeSeries timeSeries : cubeResult) {
result.add(new MetricTimeSeries(timeSeries.getMeasureName(),
timeSeries.getDimensionValues(),
timeSeries.getTimeValues()));
}
return result;
}
private CubeQuery buildCubeQuery(MetricDataQuery query) {
String aggregation = getAggregation(query);
return new CubeQuery(aggregation, query.getStartTs(), query.getEndTs(),
query.getResolution(), query.getLimit(), query.getMetrics(),
query.getSliceByTags(), query.getGroupByTags(), query.getInterpolator());
}
@Nullable
private String getAggregation(MetricDataQuery query) {
// We mostly rely on auto-selection of aggregation during query (in which case null is returned from
// this method). In some specific cases we need to help resolve the aggregation though.
Set tagNames = ImmutableSet.builder()
.addAll(query.getSliceByTags().keySet()).addAll(query.getGroupByTags()).build();
if (tagNames.contains(Constants.Metrics.Tag.FLOW)) {
// NOTE: BY_FLOWLET_QUEUE agg has only producer and consumer metrics
if (tagNames.contains(Constants.Metrics.Tag.PRODUCER) || tagNames.contains(Constants.Metrics.Tag.CONSUMER)) {
return BY_FLOWLET_QUEUE;
} else {
return BY_FLOW;
}
}
return null;
}
@Override
public void deleteBefore(long timestamp) throws Exception {
// Delete all data before the timestamp. null for MeasureName indicates match any MeasureName.
for (int resolution : resolutions) {
// NOTE: we do not purge on TTL the "totals" currently, as there might be system components dependent on it
if (TOTALS_RESOLUTION == resolution) {
continue;
}
CubeDeleteQuery query = new CubeDeleteQuery(0, timestamp, resolution, Maps.newHashMap());
cube.get().delete(query);
}
}
@Override
public void delete(MetricDeleteQuery query) throws Exception {
cube.get().delete(buildCubeDeleteQuery(query));
}
@Override
public void deleteAll() throws Exception {
// this will delete all aggregates metrics data
delete(new MetricDeleteQuery(0, System.currentTimeMillis() / 1000, Maps.newHashMap()));
// this will delete all timeseries data
deleteBefore(System.currentTimeMillis() / 1000);
}
private CubeDeleteQuery buildCubeDeleteQuery(MetricDeleteQuery query) {
// note: delete query currently usually executed synchronously,
// so we only attempt to delete totals, to avoid timeout
return new CubeDeleteQuery(query.getStartTs(), query.getEndTs(), TOTALS_RESOLUTION,
query.getSliceByTags(), query.getMetricNames());
}
@Override
public Collection findNextAvailableTags(MetricSearchQuery query) throws Exception {
Collection tags = cube.get().findDimensionValues(buildCubeSearchQuery(query));
Collection result = Lists.newArrayList();
for (DimensionValue dimensionValue : tags) {
result.add(new TagValue(dimensionValue.getName(), dimensionValue.getValue()));
}
return result;
}
private CubeExploreQuery buildCubeSearchQuery(MetricSearchQuery query) {
return new CubeExploreQuery(query.getStartTs(), query.getEndTs(), query.getResolution(),
query.getLimit(), toTagValues(query.getTagValues()));
}
@Override
public Collection findMetricNames(MetricSearchQuery query) throws Exception {
return cube.get().findMeasureNames(buildCubeSearchQuery(query));
}
/**
* Read the metrics processing stats from meta table and return the map of topic information to stats
* @return Map of topic to metrics processing stats
* @throws Exception
*/
@Override
public Map getMetricsProcessorStats() throws Exception {
MetricsConsumerMetaTable metaTable = metaTableSupplier.get();
Map processMap = new HashMap<>();
for (TopicId topicId : metricsTopics) {
TopicProcessMeta topicProcessMeta = metaTable.getTopicProcessMeta(new TopicIdMetaKey(topicId));
if (topicProcessMeta != null) {
MessageId messageId = new MessageId(topicProcessMeta.getMessageId());
MetricsMessageId metricsMessageId = new MetricsMessageId(messageId.getPublishTimestamp(),
messageId.getSequenceId(),
messageId.getPayloadWriteTimestamp(),
messageId.getPayloadSequenceId());
processMap.put(
topicId.getTopic(), new MetricsProcessorStatus(metricsMessageId,
topicProcessMeta.getOldestMetricsTimestamp(),
topicProcessMeta.getLatestMetricsTimestamp(),
topicProcessMeta.getMessagesProcessed(),
topicProcessMeta.getLastProcessedTimestamp()));
}
}
return processMap;
}
private List toTagValues(List input) {
return Lists.transform(input, new Function() {
@Nullable
@Override
public DimensionValue apply(co.cask.cdap.api.metrics.TagValue input) {
if (input == null) {
// SHOULD NEVER happen
throw new NullPointerException();
}
return new DimensionValue(input.getName(), input.getValue());
}
});
}
}