All Downloads are FREE. Search and download functionalities are using the official Maven repository.

alluxio.master.metrics.MetricsStore Maven / Gradle / Ivy

The newest version!
/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.master.metrics;

import alluxio.grpc.MetricType;
import alluxio.metrics.Metric;
import alluxio.metrics.MetricInfo;
import alluxio.metrics.MetricKey;
import alluxio.metrics.MetricsSystem;
import alluxio.metrics.MetricsSystem.InstanceType;
import alluxio.resource.LockResource;

import com.codahale.metrics.Counter;
import com.google.common.base.Objects;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.time.Clock;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import javax.annotation.concurrent.GuardedBy;
import javax.annotation.concurrent.ThreadSafe;

/**
 * A store of metrics containing the metrics collected from workers and clients.
 */
@ThreadSafe
public class MetricsStore {
  private static final Logger LOG = LoggerFactory.getLogger(MetricsStore.class);
  // The following fields are added for reducing the string processing
  // for MetricKey.WORKER_BYTES_READ_UFS and MetricKey.WORKER_BYTES_WRITTEN_UFS
  private static final String BYTES_READ_UFS = "BytesReadPerUfs";
  private static final String BYTES_WRITTEN_UFS = "BytesWrittenPerUfs";

  private final Clock mClock;

  // The lock to guarantee that only one thread is clearing the metrics
  // and no other interaction with worker/client metrics set is allowed during metrics clearing
  private final ReentrantReadWriteLock mLock;

  // The time of the most recent metrics store clearance.
  // This tracks when the cluster counters start aggregating from the reported metrics.
  @GuardedBy("mLock")
  private long mLastClearTime;

  /**
   * A map from the cluster counter key representing the metrics to be aggregated
   * to the corresponding aggregated cluster Counter.
   * For example, Counter of cluster.BytesReadRemote is aggregated from
   * the worker reported worker.BytesReadRemote.
   *
   * Exceptions are the BytesRead/WrittenUfs metrics which records
   * the actual cluster metrics name to its Counter directly.
   */
  @GuardedBy("mLock")
  private final ConcurrentHashMap mClusterCounters;

  /**
   * Constructs a new {@link MetricsStore}.
   *
   * @param clock the clock to get time of
   */
  public MetricsStore(Clock clock) {
    mClock = clock;
    mLastClearTime = clock.millis();
    mLock = new ReentrantReadWriteLock();
    mClusterCounters = new ConcurrentHashMap<>();
  }

  /**
   * Put the metrics from a worker with a source name.
   *
   * @param source the source name
   * @param metrics the new worker metrics
   */
  public void putWorkerMetrics(String source, List metrics) {
    if (metrics.isEmpty() || source == null) {
      return;
    }
    try (LockResource r = new LockResource(mLock.readLock())) {
      putReportedMetrics(InstanceType.WORKER, metrics);
    }
    LOG.debug("Put {} metrics of worker {}", metrics.size(), source);
  }

  /**
   * Put the metrics from a client with a source name.
   *
   * @param source the source name
   * @param metrics the new metrics
   */
  public void putClientMetrics(String source, List metrics) {
    if (metrics.isEmpty() || source == null) {
      return;
    }
    try (LockResource r = new LockResource(mLock.readLock())) {
      putReportedMetrics(InstanceType.CLIENT, metrics);
    }
    LOG.debug("Put {} metrics of client {}", metrics.size(), source);
  }

  /**
   * Update the reported metrics received from a worker or client.
   *
   * Cluster metrics of {@link MetricType} COUNTER are directly incremented by the reported values.
   *
   * @param instanceType the instance type that reports the metrics
   * @param reportedMetrics the metrics received by the RPC handler
   */
  private void putReportedMetrics(InstanceType instanceType, List reportedMetrics) {
    for (Metric metric : reportedMetrics) {
      if (metric.getSource() == null) {
        continue; // ignore metrics whose source is null
      }

      // If a metric is COUNTER, the value sent via RPC should be the incremental value; i.e.
      // the amount the value has changed since the last RPC. The master should equivalently
      // increment its value based on the received metric rather than replacing it.
      if (metric.getMetricType() == MetricType.COUNTER) {
        ClusterCounterKey key = new ClusterCounterKey(instanceType, metric.getName());
        Counter counter = mClusterCounters.get(key);
        if (counter != null) {
          counter.inc((long) metric.getValue());
          continue;
        }
        if (instanceType.equals(InstanceType.CLIENT)) {
          continue;
        }
        // Need to increment two metrics: one for the specific ufs the current metric recorded from
        // and one to summarize values from all UFSes
        if (metric.getName().equals(BYTES_READ_UFS)) {
          incrementUfsRelatedCounters(metric, MetricKey.CLUSTER_BYTES_READ_UFS.getName(),
              MetricKey.CLUSTER_BYTES_READ_UFS_ALL.getName());
        } else if (metric.getName().equals(BYTES_WRITTEN_UFS)) {
          incrementUfsRelatedCounters(metric, MetricKey.CLUSTER_BYTES_WRITTEN_UFS.getName(),
              MetricKey.CLUSTER_BYTES_WRITTEN_UFS_ALL.getName());
        }
      }
    }
  }

  /**
   * Increments the related counters of a specific metric.
   *
   * @param metric the metric
   * @param perUfsMetricName the per ufs metric name prefix to increment counter value of
   * @param allUfsMetricName the all ufs metric name to increment counter value of
   */
  private void incrementUfsRelatedCounters(Metric metric,
      String perUfsMetricName, String allUfsMetricName) {
    String fullCounterName = Metric.getMetricNameWithTags(perUfsMetricName,
        MetricInfo.TAG_UFS, metric.getTags().get(MetricInfo.TAG_UFS));
    Counter perUfsCounter = mClusterCounters.computeIfAbsent(
        new ClusterCounterKey(InstanceType.CLUSTER, fullCounterName),
        n -> MetricsSystem.counter(fullCounterName));
    long counterValue = (long) metric.getValue();
    perUfsCounter.inc(counterValue);
    mClusterCounters.get(new ClusterCounterKey(InstanceType.CLUSTER, allUfsMetricName))
        .inc(counterValue);
  }

  /**
   * Inits the metrics store.
   * Defines the cluster metrics metrics.
   */
  public void initMetricKeys() {
    try (LockResource r = new LockResource(mLock.readLock())) {
      // worker metrics
      mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER,
              MetricKey.WORKER_BYTES_READ_DIRECT.getMetricName()),
          MetricsSystem.counter(MetricKey.CLUSTER_BYTES_READ_DIRECT.getName()));
      mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER,
          MetricKey.WORKER_BYTES_READ_REMOTE.getMetricName()),
          MetricsSystem.counter(MetricKey.CLUSTER_BYTES_READ_REMOTE.getName()));
      mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER,
          MetricKey.WORKER_BYTES_READ_DOMAIN.getMetricName()),
          MetricsSystem.counter(MetricKey.CLUSTER_BYTES_READ_DOMAIN.getName()));
      mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER,
          MetricKey.WORKER_BYTES_WRITTEN_REMOTE.getMetricName()),
          MetricsSystem.counter(MetricKey.CLUSTER_BYTES_WRITTEN_REMOTE.getName()));
      mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER,
          MetricKey.WORKER_BYTES_WRITTEN_DOMAIN.getMetricName()),
          MetricsSystem.counter(MetricKey.CLUSTER_BYTES_WRITTEN_DOMAIN.getName()));
      mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER,
          MetricKey.WORKER_ACTIVE_RPC_READ_COUNT.getMetricName()),
          MetricsSystem.counter(MetricKey.CLUSTER_ACTIVE_RPC_READ_COUNT.getName()));
      mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.WORKER,
          MetricKey.WORKER_ACTIVE_RPC_WRITE_COUNT.getMetricName()),
          MetricsSystem.counter(MetricKey.CLUSTER_ACTIVE_RPC_WRITE_COUNT.getName()));

      // client metrics
      mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.CLIENT,
          MetricKey.CLIENT_BUSY_EXCEPTION_COUNT.getMetricName()),
          MetricsSystem.counter(MetricKey.CLIENT_BUSY_EXCEPTION_COUNT.getName()));

      // special metrics that have multiple worker metrics to summarize from
      // always use the full name instead of metric name for those metrics
      mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.CLUSTER,
          MetricKey.CLUSTER_BYTES_READ_UFS_ALL.getName()),
          MetricsSystem.counter(MetricKey.CLUSTER_BYTES_READ_UFS_ALL.getName()));
      mClusterCounters.putIfAbsent(new ClusterCounterKey(InstanceType.CLUSTER,
          MetricKey.CLUSTER_BYTES_WRITTEN_UFS_ALL.getName()),
          MetricsSystem.counter(MetricKey.CLUSTER_BYTES_WRITTEN_UFS_ALL.getName()));

      MetricsSystem.registerGaugeIfAbsent(
          MetricsSystem.getMetricName(MetricKey.CLUSTER_CACHE_HIT_RATE.getName()),
          () -> {
            long cacheMisses = MetricsSystem.counter(MetricKey.CLUSTER_BYTES_READ_UFS_ALL.getName())
                .getCount();
            long total =
                MetricsSystem.counter(MetricKey.CLUSTER_BYTES_READ_DIRECT.getName()).getCount()
                + MetricsSystem.counter(MetricKey.CLUSTER_BYTES_READ_REMOTE.getName()).getCount()
                + MetricsSystem.counter(MetricKey.CLUSTER_BYTES_READ_DOMAIN.getName()).getCount();
            if (total > 0) {
              return 1 - cacheMisses / (1.0 * total);
            }
            return 0;
          });
    }
  }

  /**
   * Clears all the metrics.
   *
   * This method should only be called when starting the {@link DefaultMetricsMaster}
   * and before starting the metrics updater to avoid conflicts with
   * other methods in this class which updates or accesses
   * the metrics inside metrics sets.
   */
  public void clear() {
    long start = System.currentTimeMillis();
    try (LockResource r = new LockResource(mLock.writeLock())) {
      for (Counter counter : mClusterCounters.values()) {
        counter.dec(counter.getCount());
      }
      mLastClearTime = mClock.millis();
      MetricsSystem.resetAllMetrics();
    }
    LOG.info("Cleared the metrics store and metrics system in {} ms",
        System.currentTimeMillis() - start);
  }

  /**
   * @return the last metrics store clear time in milliseconds
   */
  public long getLastClearTime() {
    try (LockResource r = new LockResource(mLock.readLock())) {
      return mLastClearTime;
    }
  }

  /**
   * The key for cluster counter map.
   * This class is added to reduce the string concatenation of instancetype + "." + metric name.
   */
  public static class ClusterCounterKey {
    private InstanceType mInstanceType;
    private String mMetricName;

    /**
     * Construct a new {@link ClusterCounterKey}.
     *
     * @param instanceType the instance type
     * @param metricName the metric name
     */
    public ClusterCounterKey(InstanceType instanceType, String metricName) {
      mInstanceType = instanceType;
      mMetricName = metricName;
    }

    /**
     * @return the instance type
     */
    private InstanceType getInstanceType() {
      return mInstanceType;
    }

    /**
     * @return the metric name
     */
    private String getMetricName() {
      return mMetricName;
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) {
        return true;
      }
      if (!(o instanceof ClusterCounterKey)) {
        return false;
      }
      ClusterCounterKey that = (ClusterCounterKey) o;
      return Objects.equal(mInstanceType, that.getInstanceType())
          && Objects.equal(mMetricName, that.getMetricName());
    }

    @Override
    public int hashCode() {
      return Objects.hashCode(mInstanceType, mMetricName);
    }

    @Override
    public String toString() {
      return mInstanceType + "." + mMetricName;
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy