alluxio.metrics.MetricsSystem Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of alluxio-core-common Show documentation
Show all versions of alluxio-core-common Show documentation
Common utilities shared in Alluxio core modules
/*
* The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
* (the "License"). You may not use this work except in compliance with the License, which is
* available at www.apache.org/licenses/LICENSE-2.0
*
* This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
* either express or implied, as more fully set forth in the License.
*
* See the NOTICE file distributed with this work for information regarding copyright ownership.
*/
package alluxio.metrics;
import alluxio.AlluxioURI;
import alluxio.conf.AlluxioConfiguration;
import alluxio.conf.Configuration;
import alluxio.conf.PropertyKey;
import alluxio.grpc.MetricType;
import alluxio.grpc.MetricValue;
import alluxio.metrics.sink.Sink;
import alluxio.util.CommonUtils;
import alluxio.util.network.NetworkAddressUtils;
import com.codahale.metrics.CachedGauge;
import com.codahale.metrics.Counter;
import com.codahale.metrics.Gauge;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.SlidingTimeWindowMovingAverages;
import com.codahale.metrics.Timer;
import com.codahale.metrics.UniformReservoir;
import com.codahale.metrics.jvm.CachedThreadStatesGaugeSet;
import com.codahale.metrics.jvm.ClassLoadingGaugeSet;
import com.codahale.metrics.jvm.GarbageCollectorMetricSet;
import com.codahale.metrics.jvm.JvmAttributeGaugeSet;
import com.codahale.metrics.jvm.MemoryUsageGaugeSet;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.management.BufferPoolMXBean;
import java.lang.management.ManagementFactory;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.annotation.Nullable;
import javax.annotation.concurrent.GuardedBy;
import javax.annotation.concurrent.ThreadSafe;
/**
* A MetricsSystem is created by a specific instance(master, worker). It polls the metrics sources
* periodically and pass the data to the sinks.
*
* The syntax of the metrics configuration file is: sink.[name].[options]=[value]
*/
@ThreadSafe
public final class MetricsSystem {
private static final Logger LOG = LoggerFactory.getLogger(MetricsSystem.class);
private static final ConcurrentHashMap CACHED_METRICS = new ConcurrentHashMap<>();
private static int sResolveTimeout =
(int) Configuration.getMs(PropertyKey.NETWORK_HOST_RESOLUTION_TIMEOUT_MS);
private static boolean sUniqueIDEnabled =
Configuration.getBoolean(PropertyKey.METRICS_KEY_INCLUDING_UNIQUE_ID_ENABLED);
// A map from AlluxioURI to corresponding cached escaped path.
private static final ConcurrentHashMap CACHED_ESCAPED_PATH
= new ConcurrentHashMap<>();
// A map from the metric name to its previous reported value
// This map is used for calculated the counter diff value that will be reported
private static final Map LAST_REPORTED_METRICS = new HashMap<>();
// A map that records all the metrics that should be reported and aggregated at leading master
// from full metric name to its metric type
private static final Map SHOULD_REPORT_METRICS = new ConcurrentHashMap<>();
// A pattern to get the . from the full metric name
private static final Pattern METRIC_NAME_PATTERN = Pattern.compile("^(.*?[.].*?)[.].*");
// A flag telling whether metrics have been reported yet.
// Using this prevents us from initializing {@link #SHOULD_REPORT_METRICS} more than once
private static Set sReported = new HashSet<>();
// The source of the metrics in this metrics system.
// It can be set through property keys based on process types.
// Local hostname will be used if no related property key founds.
private static Supplier sSourceNameSupplier =
CommonUtils.memoize(() -> constructSourceName());
private static final Map
EXECUTOR_SERVICES = new ConcurrentHashMap<>();
private static final int SECONDS_IN_A_MINUTE = 60;
/**
* An enum of supported instance type.
*/
public enum InstanceType {
CLUSTER("Cluster"),
SERVER("Server"),
MASTER("Master"),
WORKER("Worker"),
SECURITY("Security"),
JOB_MASTER("JobMaster"),
JOB_WORKER("JobWorker"),
PLUGIN("Plugin"),
PROCESS("Process"),
PROXY("Proxy"),
CLIENT("Client"),
FUSE("Fuse");
private String mValue;
/**
* Creates the instance type with value.
*
* @param value value of the instance type
*/
InstanceType(String value) {
mValue = value;
}
@Override
public String toString() {
return mValue;
}
/**
* Creates an instance type from the string. This method is case insensitive.
*
* @param text the instance type in string
* @return the created instance
*/
public static InstanceType fromString(String text) {
for (InstanceType type : InstanceType.values()) {
if (type.toString().equalsIgnoreCase(text)) {
return type;
}
}
throw new IllegalArgumentException("No constant with text " + text + " found");
}
}
// Supported special instance names.
public static final String CLUSTER = "Cluster";
public static final BufferPoolMXBean DIRECT_BUFFER_POOL;
public static final MetricRegistry METRIC_REGISTRY;
static {
METRIC_REGISTRY = new MetricRegistry();
METRIC_REGISTRY.registerAll(new JvmAttributeGaugeSet());
METRIC_REGISTRY.registerAll(new GarbageCollectorMetricSet());
METRIC_REGISTRY.registerAll(new MemoryUsageGaugeSet());
METRIC_REGISTRY.registerAll(new ClassLoadingGaugeSet());
METRIC_REGISTRY.registerAll(new CachedThreadStatesGaugeSet(5, TimeUnit.SECONDS));
METRIC_REGISTRY.registerAll(new OperationSystemGaugeSet());
DIRECT_BUFFER_POOL = getDirectBufferPool();
MetricsSystem.registerGaugeIfAbsent(
MetricsSystem.getMetricName(MetricKey.PROCESS_POOL_DIRECT_MEM_USED.getName()),
MetricsSystem::getDirectMemUsed);
}
private static BufferPoolMXBean getDirectBufferPool() {
for (BufferPoolMXBean bufferPoolMXBean
: ManagementFactory.getPlatformMXBeans(BufferPoolMXBean.class)) {
if (bufferPoolMXBean.getName().equals("direct")) {
return bufferPoolMXBean;
}
}
return null;
}
/**
* @return the used direct memory
*/
public static long getDirectMemUsed() {
if (DIRECT_BUFFER_POOL != null) {
return DIRECT_BUFFER_POOL.getMemoryUsed();
}
return 0;
}
@GuardedBy("MetricsSystem")
private static List sSinks;
public static final String SINK_REGEX = "^sink\\.(.+)\\.(.+)";
private static final TimeUnit MINIMAL_POLL_UNIT = TimeUnit.SECONDS;
private static final int MINIMAL_POLL_PERIOD = 1;
/**
* Starts sinks specified in the configuration.
* This is an no-op if the sinks have already been started.
* Note: This has to be called after Alluxio configuration is initialized.
*
* @param metricsConfFile the location of the metrics configuration file
*/
public static void startSinks(String metricsConfFile) {
synchronized (MetricsSystem.class) {
if (sSinks != null) {
LOG.debug("Sinks have already been started.");
return;
}
}
if (metricsConfFile.isEmpty()) {
LOG.info("Metrics is not enabled.");
return;
}
MetricsConfig config = new MetricsConfig(metricsConfFile);
startSinksFromConfig(config);
}
/**
* Constructs the source name of metrics in this {@link MetricsSystem}.
*/
private static String constructSourceName() {
PropertyKey sourceKey = null;
switch (CommonUtils.PROCESS_TYPE.get()) {
case MASTER:
sourceKey = PropertyKey.MASTER_HOSTNAME;
break;
case WORKER:
sourceKey = PropertyKey.WORKER_HOSTNAME;
break;
case CLIENT:
sourceKey = PropertyKey.USER_APP_ID;
break;
case JOB_MASTER:
sourceKey = PropertyKey.JOB_MASTER_HOSTNAME;
break;
case JOB_WORKER:
sourceKey = PropertyKey.JOB_WORKER_HOSTNAME;
break;
default:
break;
}
AlluxioConfiguration conf = Configuration.global();
if (sourceKey != null && conf.isSet(sourceKey)) {
return conf.getString(sourceKey);
}
String hostName;
// Avoid throwing RuntimeException when hostname
// is not resolved on metrics reporting
try {
hostName = NetworkAddressUtils.getLocalHostMetricName(sResolveTimeout);
} catch (RuntimeException e) {
hostName = "unknown";
LOG.error("Can't find local host name", e);
}
return hostName;
}
/**
* Starts sinks from a given metrics configuration. This is made public for unit test.
*
* @param config the metrics config
*/
public static synchronized void startSinksFromConfig(MetricsConfig config) {
if (sSinks != null) {
LOG.debug("Sinks have already been started.");
return;
}
LOG.info("Starting sinks with config: {}.", config);
sSinks = new ArrayList<>();
Map sinkConfigs =
MetricsConfig.subProperties(config.getProperties(), SINK_REGEX);
for (Map.Entry entry : sinkConfigs.entrySet()) {
String classPath = entry.getValue().getProperty("class");
if (classPath != null) {
LOG.info("Starting sink {}.", classPath);
try {
Sink sink =
(Sink) Class.forName(classPath).getConstructor(Properties.class, MetricRegistry.class)
.newInstance(entry.getValue(), METRIC_REGISTRY);
sink.start();
sSinks.add(sink);
} catch (Exception e) {
LOG.error("Sink class {} cannot be instantiated", classPath, e);
}
}
}
}
/**
* Stops all the sinks.
*/
public static synchronized void stopSinks() {
if (sSinks != null) {
for (Sink sink : sSinks) {
sink.stop();
}
}
sSinks = null;
}
/**
* @return true if the metric system is started, false otherwise
*/
@VisibleForTesting
public static synchronized boolean isStarted() {
return sSinks != null;
}
/**
* @return the number of sinks started
*/
public static synchronized int getNumSinks() {
int sz = 0;
if (sSinks != null) {
sz = sSinks.size();
}
return sz;
}
/**
* Get metrics name based on class.
*
* @param obj object for the resource pool
* @return metrics string
*/
public static String getResourcePoolMetricName(Object obj) {
return MetricsSystem.getMetricName("ResourcePool." + obj.getClass().getSimpleName());
}
/**
* Converts a simple string to a qualified metric name based on the process type.
*
* @param name the name of the metric
* @return the metric with instance and id tags
*/
public static String getMetricName(String name) {
if (name.startsWith(CLUSTER)) {
return name;
}
switch (CommonUtils.PROCESS_TYPE.get()) {
case CLIENT:
return getClientMetricName(name);
case MASTER:
return getMasterMetricName(name);
case PROXY:
return getProxyMetricName(name);
case WORKER:
return getWorkerMetricName(name);
case JOB_MASTER:
return getJobMasterMetricName(name);
case JOB_WORKER:
return getJobWorkerMetricName(name);
case PLUGIN:
return getPluginMetricName(name);
case SECURITY:
return getSecurityMetricName(name);
default:
throw new IllegalStateException("Unknown process type");
}
}
/**
* Builds metric registry names for master instance. The pattern is instance.metricName.
*
* @param name the metric name
* @return the metric registry name
*/
public static String getMasterMetricName(String name) {
String result = CACHED_METRICS.get(name);
if (result != null) {
return result;
}
if (name.startsWith(InstanceType.MASTER.toString())) {
return CACHED_METRICS.computeIfAbsent(name, n -> name);
}
return CACHED_METRICS.computeIfAbsent(name, n -> InstanceType.MASTER.toString() + "." + name);
}
/**
* Builds metric registry name for worker instance. The pattern is instance.uniqueId.metricName.
*
* @param name the metric name
* @return the metric registry name
*/
private static String getWorkerMetricName(String name) {
String result = CACHED_METRICS.get(name);
if (result != null) {
return result;
}
return CACHED_METRICS.computeIfAbsent(name,
n -> getMetricNameWithUniqueId(InstanceType.WORKER, name));
}
private static String getSecurityMetricName(String name) {
String result = CACHED_METRICS.get(name);
if (result != null) {
return result;
}
return CACHED_METRICS.computeIfAbsent(name,
n -> getMetricNameWithUniqueId(InstanceType.SECURITY, name));
}
/**
* Builds metric registry name for client instance. The pattern is instance.uniqueId.metricName.
*
* @param name the metric name
* @return the metric registry name
*/
private static String getClientMetricName(String name) {
String result = CACHED_METRICS.get(name);
if (result != null) {
return result;
}
// Added for integration tests where process type is always client
if (name.startsWith(InstanceType.MASTER.toString())
|| name.startsWith(InstanceType.CLUSTER.toString())) {
return CACHED_METRICS.computeIfAbsent(name, n -> name);
}
if (name.startsWith(InstanceType.WORKER.toString())) {
return getWorkerMetricName(name);
}
return CACHED_METRICS.computeIfAbsent(name,
n -> getMetricNameWithUniqueId(InstanceType.CLIENT, name));
}
/**
* Builds metric registry name for a proxy instance. The pattern is instance.uniqueId.metricName.
*
* @param name the metric name
* @return the metric registry name
*/
private static String getProxyMetricName(String name) {
return getMetricNameWithUniqueId(InstanceType.PROXY, name);
}
/**
* Builds metric registry names for the job master instance. The pattern is instance.metricName.
*
* @param name the metric name
* @return the metric registry name
*/
private static String getJobMasterMetricName(String name) {
if (name.startsWith(InstanceType.JOB_MASTER.toString())) {
return name;
}
return Joiner.on(".").join(InstanceType.JOB_MASTER, name);
}
/**
* Builds metric registry name for job worker instance. The pattern is
* instance.uniqueId.metricName.
*
* @param name the metric name
* @return the metric registry name
*/
public static String getJobWorkerMetricName(String name) {
return getMetricNameWithUniqueId(InstanceType.JOB_WORKER, name);
}
/**
* Builds metric registry name for plugin instance. The pattern is
* instance.uniqueId.metricName.
*
* @param name the metric name
* @return the metric registry name
*/
public static String getPluginMetricName(String name) {
if (name.startsWith(InstanceType.PLUGIN.toString())) {
return name;
}
return Joiner.on(".").join(InstanceType.PLUGIN, name);
}
/**
* Builds unique metric registry names with unique ID (set to host name). The pattern is
* instance.metricName.hostname
*
* @param instance the instance name
* @param name the metric name
* @return the metric registry name
*/
private static String getMetricNameWithUniqueId(InstanceType instance, String name) {
String metricsNameWithInstanceType = addInstanceTypeToMetricsName(instance, name);
if (sUniqueIDEnabled) {
return Joiner.on(".").join(metricsNameWithInstanceType, sSourceNameSupplier.get());
}
return metricsNameWithInstanceType;
}
private static String addInstanceTypeToMetricsName(InstanceType instance, String name) {
if (name.startsWith(instance.toString())) {
return name;
}
return Joiner.on(".").join(instance, name);
}
/**
* Checks if the poll period is smaller that the minimal poll period which is 1 second.
*
* @param pollUnit the polling unit
* @param pollPeriod the polling period
* @throws IllegalArgumentException if the polling period is invalid
*/
public static void checkMinimalPollingPeriod(TimeUnit pollUnit, int pollPeriod)
throws IllegalArgumentException {
int period = (int) MINIMAL_POLL_UNIT.convert(pollPeriod, pollUnit);
Preconditions.checkArgument(period >= MINIMAL_POLL_PERIOD,
"Polling period %d %d is below the minimal polling period", pollPeriod, pollUnit);
}
/**
* Removes the instance and host from the given metric name, returning the result.
*
* @param metricsName the long metrics name with instance and host name
* @return the metrics name without instance and host name
*/
public static String stripInstanceAndHost(String metricsName) {
String[] pieces = metricsName.split("\\.");
int len = pieces.length;
if (len <= 1) {
return metricsName;
}
// Master metrics doesn't have hostname included.
if (!pieces[0].equals(MetricsSystem.InstanceType.MASTER.toString())
&& !pieces[0].equals(InstanceType.CLUSTER.toString())
&& pieces.length > 2) {
pieces[len - 1] = null;
}
pieces[0] = null;
return Joiner.on(".").skipNulls().join(pieces);
}
/**
* Escapes a URI, replacing "/" with "%2F".
* Replaces "." with "%2E" because dots are used as tag separators in metric names.
* Replaces "%" with "%25" because it now has special use.
* So when the URI is used in a metric name, the "." and "/" won't be interpreted as
* path separators unescaped and interfere with the internal logic of AlluxioURI.
*
* @param uri the URI to escape
* @return the string representing the escaped URI
*/
public static String escape(AlluxioURI uri) {
return CACHED_ESCAPED_PATH.computeIfAbsent(uri,
u -> u.toString().replace("%", "%25")
.replace("/", "%2F").replace(".", "%2E"));
}
/**
* Unescapes a URI, reverts it to before the escape, to display it correctly.
*
* @param uri the escaped URI to unescape
* @return the string representing the unescaped original URI
*/
public static String unescape(String uri) {
return uri.replace("%2F", "/").replace("%2E", ".").replace("%25", "%");
}
// Some helper functions.
/** Add or replace the instrumented executor service metrics for
* the given name and executor service.
*
* @param delegate the executor service delegate that will be instrumented with metrics
* @param name the name of the metric
* @return the instrumented executor service
*/
public static InstrumentedExecutorService executorService(ExecutorService delegate, String name) {
InstrumentedExecutorService service = new InstrumentedExecutorService(
delegate, METRIC_REGISTRY, getMetricName(name));
EXECUTOR_SERVICES.put(name, service);
return service;
}
/**
* Get or add counter with the given name.
* The counter stores in the metrics system is never removed but may reset to zero.
*
* @param name the name of the metric
* @return a counter object with the qualified metric name
*/
public static Counter counter(String name) {
return METRIC_REGISTRY.counter(getMetricName(name));
}
/**
* Get or add counter with the given name with tags.
* The counter stores in the metrics system is never removed but may reset to zero.
* If this metric can be aggregated at cluster level and should report to leading master,
* add it to the should report metrics map.
*
* This method is added to add worker metrics with ufs tags into the should report metrics map.
*
* @param name the metric name
* @param shouldReport whether this metric should be reported
* @param tags the tag name and tag value pairs
* @return a counter object with the qualified metric name
*/
public static Counter counterWithTags(String name, boolean shouldReport, String... tags) {
String fullName = getMetricName(Metric.getMetricNameWithTags(name, tags));
if (shouldReport) {
SHOULD_REPORT_METRICS.putIfAbsent(fullName, MetricType.COUNTER);
}
return METRIC_REGISTRY.counter(fullName);
}
/**
* Get or add meter with the given name.
* Please don't save the Meter instance since
* the returned Meter instance may not be used due to {@link #resetAllMetrics}
*
* @param name the name of the metric
* @return a meter object with the qualified metric name
*/
public static Meter meter(String name) {
return METRIC_REGISTRY.meter(getMetricName(name),
() -> new Meter(new SlidingTimeWindowMovingAverages()));
}
/**
* Get or add meter with the given name.
* The returned meter may be changed due to {@link #resetAllMetrics}
* If this metric can be aggregated at cluster level and should report to leading master,
* add it to the should report metrics map.
*
* This method is added to add worker metrics with ufs tags into the should report metrics map.
*
* @param name the name of the metric
* @param shouldReport whether this metric should be reported
* @param tags the tag name and tag value pairs
* @return a meter object with the qualified metric name
*/
public static Meter meterWithTags(String name, boolean shouldReport, String... tags) {
String fullName = getMetricName(Metric.getMetricNameWithTags(name, tags));
if (shouldReport) {
SHOULD_REPORT_METRICS.putIfAbsent(fullName, MetricType.METER);
}
return METRIC_REGISTRY.meter(fullName);
}
/**
* Get or add timer with the given name.
* Please don't save the Timer instance since
* the returned Timer instance may not be used due to {@link #resetAllMetrics}
*
* @param name the name of the metric
* @return a timer object with the qualified metric name
*/
public static Timer timer(String name) {
return METRIC_REGISTRY.timer(getMetricName(name));
}
/**
* Same with {@link #timer} but with UniformReservoir for sampling.
*
* @param name the name of the metric
* @return a timer object with the qualified metric name
*/
public static Timer uniformTimer(String name) {
return METRIC_REGISTRY.timer(getMetricName(name),
() -> {
Timer timer = new Timer(new UniformReservoir());
return timer;
});
}
/**
* Get or add a histogram with the given name.
*
* @param name the name of the metric
* @return a histogram object with the qualified metric name
*/
public static Histogram histogram(String name) {
return METRIC_REGISTRY.histogram(getMetricName(name));
}
/**
* Registers a gauge if it has not been registered.
*
* @param name the gauge name
* @param metric the gauge
* @param the type
*/
public static synchronized void registerGaugeIfAbsent(String name, Gauge metric) {
if (!METRIC_REGISTRY.getMetrics().containsKey(name)) {
METRIC_REGISTRY.register(name, metric);
}
}
/**
* Registers a cached gauge if it has not been registered.
*
* @param name the gauge name
* @param metric the gauge
* @param the type
*/
public static synchronized void registerCachedGaugeIfAbsent(String name, Gauge metric) {
registerCachedGaugeIfAbsent(name, metric, 10, TimeUnit.MINUTES);
}
/**
* Registers a cached gauge if it has not been registered.
*
* @param name the gauge name
* @param metric the gauge
* @param timeout the cache gauge timeout
* @param unit the unit of timeout
* @param the type
*/
public static synchronized void registerCachedGaugeIfAbsent(
String name, Gauge metric, long timeout, TimeUnit unit) {
if (!METRIC_REGISTRY.getMetrics().containsKey(name)) {
METRIC_REGISTRY.register(name, new CachedGauge(timeout, unit) {
@Override
protected T loadValue() {
return metric.getValue();
}
});
}
}
/**
* Created a gauge that aggregates the value of existing gauges.
*
* @param name the gauge name
* @param metrics the set of metric values to be aggregated
* @param timeout the cached gauge timeout
* @param timeUnit the unit of timeout
*/
public static synchronized void registerAggregatedCachedGaugeIfAbsent(
String name, Set metrics, long timeout, TimeUnit timeUnit) {
if (METRIC_REGISTRY.getMetrics().containsKey(name)) {
return;
}
METRIC_REGISTRY.register(name, new CachedGauge(timeout, timeUnit) {
@Override
protected Double loadValue() {
double total = 0.0;
for (MetricKey key : metrics) {
Metric m = getMetricValue(key.getName());
if (m == null || m.getMetricType() != MetricType.GAUGE) {
continue;
}
total += m.getValue();
}
return total;
}
});
}
/**
* Removes the metric with the given name.
*
* @param name the metric name
* @return true if the metric was removed, false otherwise
*/
public static synchronized boolean removeMetrics(String name) {
return METRIC_REGISTRY.remove(name);
}
/**
* This method is used to return a list of RPC metric objects which will be sent to the
* MetricsMaster.
*
* It is mainly useful for metrics which have a {@link MetricType} of COUNTER. The metric values
* with this type will be only include value of the difference since the last time that
* {@code reportMetrics} was called.
*
* By sending only the difference since the last RPC, this method will allow the master to
* track the metrics for a given worker, even if the worker is restarted.
*
* The synchronized keyword is added for correctness with {@link #resetAllMetrics}
*/
private static synchronized List reportMetrics(InstanceType instanceType) {
if (!sReported.contains(instanceType)) {
initShouldReportMetrics(instanceType);
sReported.add(instanceType);
}
List rpcMetrics = new ArrayList<>(20);
// Use the getMetrics() call instead of getGauges(),getCounters()... to avoid
// unneeded metrics copy
Map metrics = METRIC_REGISTRY.getMetrics();
for (Map.Entry entry : SHOULD_REPORT_METRICS.entrySet()) {
com.codahale.metrics.Metric metric = metrics.get(entry.getKey());
if (metric == null) {
// This metric does not registered in the metric registry
continue;
}
// Currently all metrics that should be reported are all counters,
// the logic here is to support reporting metrics of all types for future convenience
if (metric instanceof Counter) {
Counter counter = (Counter) metric;
long value = counter.getCount();
Long prev = LAST_REPORTED_METRICS.replace(entry.getKey(), value);
// On restarts counters will be reset to 0, so whatever the value is the first time
// this method is called represents the value which should be added to the master's
// counter.
if (prev == null) {
LAST_REPORTED_METRICS.put(entry.getKey(), value);
}
double diff = prev != null ? value - prev : value;
if (diff != 0) { // Only report non-zero counter values
rpcMetrics.add(Metric.from(entry.getKey(), diff, MetricType.COUNTER).toProto());
}
} else if (metric instanceof Gauge) {
Gauge gauge = (Gauge) metric;
if (!(gauge.getValue() instanceof Number)) {
LOG.debug("The value of metric {} of type {} is not sent to metrics master,"
+ " only metrics value of number can be collected",
entry.getKey(), gauge.getValue().getClass().getSimpleName());
continue;
}
rpcMetrics.add(Metric.from(entry.getKey(),
((Number) gauge.getValue()).longValue(), MetricType.GAUGE).toProto());
} else if (metric instanceof Meter) {
Meter meter = (Meter) metric;
// Note that one minute rate may return 0 in the first several seconds
// that a value marked. For clients, especially short-life clients,
// the minute rates will be zero for their whole life.
// That's why all throughput meters are not aggregated at cluster level.
rpcMetrics.add(Metric.from(entry.getKey(), meter.getOneMinuteRate() / SECONDS_IN_A_MINUTE,
MetricType.METER).toProto());
} else if (metric instanceof Timer) {
Timer timer = (Timer) metric;
rpcMetrics.add(Metric.from(entry.getKey(), timer.getCount(),
MetricType.TIMER).toProto());
} else {
LOG.warn("Metric {} has invalid metric type {} which cannot be reported",
entry.getKey(), entry.getValue());
}
}
return rpcMetrics;
}
/**
* @return the worker metrics to send via RPC
*/
public static List reportWorkerMetrics() {
long start = System.currentTimeMillis();
List metricsList = reportMetrics(InstanceType.WORKER);
LOG.debug("Get the worker metrics list contains {} metrics to report to leading master in {}ms",
metricsList.size(), System.currentTimeMillis() - start);
return metricsList;
}
/**
* @return the client metrics to send via RPC
*/
public static List reportClientMetrics() {
long start = System.currentTimeMillis();
List metricsList = reportMetrics(InstanceType.CLIENT);
LOG.debug("Get the client metrics list contains {} metrics to report to leading master in {}ms",
metricsList.size(), System.currentTimeMillis() - start);
return metricsList;
}
/**
* Gets all the master metrics belongs to the given metric names.
*
* @param metricNames the name of the metrics to get
* @return a metric map from metric name to metrics with this name
*/
public static Map> getMasterMetrics(Set metricNames) {
Map> res = new HashMap<>();
for (Map.Entry entry
: METRIC_REGISTRY.getMetrics().entrySet()) {
Matcher matcher = METRIC_NAME_PATTERN.matcher(entry.getKey());
if (matcher.matches()) {
String name = matcher.group(1);
if (metricNames.contains(name)) {
res.computeIfAbsent(name, m -> new HashSet<>())
.add(getAlluxioMetricFromCodahaleMetric(entry.getKey(), entry.getValue()));
}
}
}
return res;
}
/**
* Gets metric with the given full metric name.
*
* @param fullName the full name of the metric to get
* @return a metric set with the master metric of the given metric name
*/
@Nullable
public static Metric getMetricValue(String fullName) {
Map metricMap = METRIC_REGISTRY.getMetrics();
com.codahale.metrics.Metric metric = metricMap.get(fullName);
if (metric == null) {
return null;
}
return getAlluxioMetricFromCodahaleMetric(fullName, metric);
}
@Nullable
private static Metric getAlluxioMetricFromCodahaleMetric(String name,
com.codahale.metrics.Metric metric) {
if (metric instanceof Gauge) {
Gauge gauge = (Gauge) metric;
return Metric.from(name, ((Number) gauge.getValue()).longValue(), MetricType.GAUGE);
} else if (metric instanceof Counter) {
Counter counter = (Counter) metric;
return Metric.from(name, counter.getCount(), MetricType.COUNTER);
} else if (metric instanceof Meter) {
Meter meter = (Meter) metric;
return Metric.from(name, meter.getOneMinuteRate() / SECONDS_IN_A_MINUTE, MetricType.METER);
} else if (metric instanceof Timer) {
Timer timer = (Timer) metric;
return Metric.from(name, timer.getCount(), MetricType.TIMER);
}
LOG.warn("Metric {} has invalid metric type {}", name, metric.getClass().getName());
return null;
}
/**
* @return a map of all metrics stored in the current node
* from metric name to {@link MetricValue}
*/
public static Map allMetrics() {
Map metricsMap = new HashMap<>();
for (Map.Entry entry
: METRIC_REGISTRY.getMetrics().entrySet()) {
MetricValue.Builder valueBuilder = MetricValue.newBuilder();
com.codahale.metrics.Metric metric = entry.getValue();
if (metric instanceof Gauge) {
Object value = ((Gauge) metric).getValue();
if (value instanceof Number) {
valueBuilder.setDoubleValue(((Number) value).doubleValue());
} else {
valueBuilder.setStringValue(value.toString());
}
valueBuilder.setMetricType(MetricType.GAUGE);
} else if (metric instanceof Counter) {
valueBuilder.setMetricType(MetricType.COUNTER)
.setDoubleValue(((Counter) metric).getCount());
} else if (metric instanceof Meter) {
valueBuilder.setMetricType(MetricType.METER)
.setDoubleValue(((Meter) metric).getOneMinuteRate() / SECONDS_IN_A_MINUTE);
} else if (metric instanceof Timer) {
valueBuilder.setMetricType(MetricType.TIMER)
.setDoubleValue(((Timer) metric).getCount());
} else {
LOG.warn("Metric {} has invalid metric type {}",
entry.getKey(), metric.getClass().getName());
continue;
}
metricsMap.put(unescape(entry.getKey()), valueBuilder.build());
}
return metricsMap;
}
/**
* Initialize the {@link #SHOULD_REPORT_METRICS}. This should be called only once.
*
* Note that this method is able to catch most of the should report metrics
* except worker metrics with ufs tags.
*
* @param instanceType the instance type
*/
@VisibleForTesting
public static void initShouldReportMetrics(InstanceType instanceType) {
Set metricKeys = MetricKey.allShouldReportMetricKeys(instanceType);
for (MetricKey metricKey : metricKeys) {
SHOULD_REPORT_METRICS.putIfAbsent(
getMetricNameWithUniqueId(instanceType, metricKey.getName()),
metricKey.getMetricType());
}
}
/**
* Resets all the metrics in the MetricsSystem.
*
* This method is not thread-safe and should be used sparingly.
*/
public static synchronized void resetAllMetrics() {
long startTime = System.currentTimeMillis();
// Gauge metrics don't need to be changed because they calculate value when getting them
// Counters can be reset to zero values.
for (Counter counter : METRIC_REGISTRY.getCounters().values()) {
counter.dec(counter.getCount());
}
// No reset logic exist in Meter, a remove and add combination is needed
for (String meterName : METRIC_REGISTRY.getMeters().keySet()) {
METRIC_REGISTRY.remove(meterName);
METRIC_REGISTRY.meter(meterName);
}
// No reset logic exist in Timer, a remove and add combination is needed
for (String timerName : METRIC_REGISTRY.getTimers().keySet()) {
METRIC_REGISTRY.remove(timerName);
METRIC_REGISTRY.timer(timerName);
}
// Reset the InstrumentedExecutorServices last as it needs to keep the
// reference to the new metrics objects
EXECUTOR_SERVICES.values().forEach(InstrumentedExecutorService::reset);
LAST_REPORTED_METRICS.clear();
LOG.info("Reset all metrics in the metrics system in {}ms",
System.currentTimeMillis() - startTime);
}
/**
* Resets the metric registry and removes all the metrics.
*/
@VisibleForTesting
public static void clearAllMetrics() {
for (String name : METRIC_REGISTRY.getNames()) {
METRIC_REGISTRY.remove(name);
}
EXECUTOR_SERVICES.clear();
}
/**
* Resets all counters to 0 and unregisters gauges for testing.
*/
@VisibleForTesting
public static void resetCountersAndGauges() {
for (Map.Entry entry : METRIC_REGISTRY.getCounters().entrySet()) {
entry.getValue().dec(entry.getValue().getCount());
}
for (String gauge : METRIC_REGISTRY.getGauges().keySet()) {
METRIC_REGISTRY.remove(gauge);
}
}
/**
* A timer context with multiple timers.
*/
public static class MultiTimerContext implements AutoCloseable {
private final Timer[] mTimers;
private final long mStartTime;
/**
* @param timers timers associated with this context
*/
public MultiTimerContext(Timer... timers) {
mTimers = timers;
mStartTime = System.nanoTime();
}
/**
* Updates the timer with the difference between current and start time. Call to this method
* will not reset the start time. Multiple calls result in multiple updates.
*/
@Override
public void close() {
final long elapsed = System.nanoTime() - mStartTime;
for (Timer timer : mTimers) {
timer.update(elapsed, TimeUnit.NANOSECONDS);
}
}
}
/**
* Disallows any explicit initialization.
*/
private MetricsSystem() {
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy