All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.hadoop.fs.gcs.GhfsGlobalStorageStatistics Maven / Gradle / Ivy

Go to download

An implementation of org.apache.hadoop.fs.FileSystem targeting Google Cloud Storage

There is a newer version: 3.1.3
Show newest version
/*
 * Copyright 2023 Google Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.cloud.hadoop.fs.gcs;

import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.DIRECTORIES_CREATED;
import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.FILES_CREATED;
import static com.google.cloud.hadoop.fs.gcs.GhfsStatistic.FILES_DELETED;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.EXCEPTION_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_CLIENT_BAD_REQUEST_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_CLIENT_GONE_RESPONSE_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_CLIENT_NOT_FOUND_RESPONSE_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_CLIENT_PRECONDITION_FAILED_RESPONSE_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_CLIENT_RATE_LIMIT_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_CLIENT_REQUESTED_RANGE_NOT_SATISFIABLE_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_CLIENT_REQUEST_TIMEOUT_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_CLIENT_SIDE_ERROR_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_CLIENT_UNAUTHORIZED_RESPONSE_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_REQUEST_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_SERVER_BAD_GATEWAY_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_SERVER_INTERNAL_ERROR_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_SERVER_SERVICE_UNAVAILABLE_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_SERVER_SIDE_ERROR_COUNT;
import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics.GCS_API_SERVER_TIMEOUT_COUNT;
import static com.google.cloud.hadoop.gcsio.StatisticTypeEnum.TYPE_DURATION;
import static com.google.cloud.hadoop.gcsio.StatisticTypeEnum.TYPE_DURATION_TOTAL;
import static com.google.common.base.Preconditions.checkArgument;

import com.google.cloud.hadoop.gcsio.GoogleCloudStorageStatistics;
import com.google.cloud.hadoop.gcsio.StatisticTypeEnum;
import com.google.cloud.hadoop.util.ITraceFactory;
import com.google.cloud.hadoop.util.ITraceOperation;
import com.google.common.base.Stopwatch;
import com.google.common.flogger.GoogleLogger;
import com.google.common.flogger.LazyArgs;
import com.google.common.util.concurrent.AtomicDouble;
import java.io.IOException;
import java.util.*;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import javax.annotation.Nonnull;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.StorageStatistics;
import org.apache.hadoop.fs.statistics.DurationTrackerFactory;
import org.apache.hadoop.fs.statistics.impl.IOStatisticsBinding;
import org.apache.hadoop.util.functional.CallableRaisingIOE;

/** Storage statistics for GCS */
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class GhfsGlobalStorageStatistics extends StorageStatistics {
  private static final GoogleLogger logger = GoogleLogger.forEnclosingClass();

  /** {@value} The key that stores all the registered metrics */
  public static final String NAME = "GhfsStorageStatistics";

  public static final int LATENCY_LOGGING_THRESHOLD_MS = 500;

  // Instance to be used if it encounters any error while registering to Global Statistics.
  // Error can happen for e.g. when different class loaders are used.
  // If this instance is used, the metrics will not be reported to metrics sinks.
  static final GhfsGlobalStorageStatistics DUMMY_INSTANCE = new GhfsGlobalStorageStatistics();

  // Initial requests are expected to take time due to warmup.
  private static final int WARMUP_THRESHOLD_SEC = 30;

  private static final GhfsThreadLocalStatistics threadLocalStatistics =
      new GhfsThreadLocalStatistics();

  private final Map opsCount = new HashMap<>();
  private final Map minimums = new HashMap<>();
  private final Map maximums = new HashMap<>();
  private final Map means = new HashMap<>();
  private final Map total = new HashMap<>();
  private static final Stopwatch UPTIME_STOP_WATCH = Stopwatch.createStarted();

  public GhfsGlobalStorageStatistics() {

    super(NAME);

    for (GoogleCloudStorageStatistics opType : GoogleCloudStorageStatistics.values()) {
      addStatistic(opType.getSymbol(), opType.getType());
    }

    for (GhfsStatistic opType : GhfsStatistic.values()) {
      addStatistic(opType.getSymbol(), opType.getType());
    }
  }

  static  B trackDuration(
      DurationTrackerFactory factory,
      @Nonnull GhfsGlobalStorageStatistics stats,
      GhfsStatistic statistic,
      Object context,
      ITraceFactory traceFactory,
      CallableRaisingIOE operation)
      throws IOException {
    Stopwatch stopwatch = Stopwatch.createStarted();

    try (ITraceOperation opFS =
        traceFactory.createRootWithLogging(statistic.getSymbol(), context)) {
      stats.increment(statistic);
      return IOStatisticsBinding.trackDuration(factory, statistic.getSymbol(), operation);
    } finally {
      long elapsedMs = stopwatch.elapsed().toMillis();
      stats.updateStats(statistic, elapsedMs, context);
      stats.updateConnectorHadoopApiTime(elapsedMs);
      logger.atFine().log("%s(%s); elapsed=%s", statistic.getSymbol(), context, elapsedMs);

      periodicallyLogMetrics(stats);
    }
  }

  private static void periodicallyLogMetrics(GhfsGlobalStorageStatistics stats) {
    if (UPTIME_STOP_WATCH.elapsed().getSeconds() > WARMUP_THRESHOLD_SEC) {
      // Periodically log the metrics. Once every 5 minutes.
      logger.atInfo().atMostEvery(5, TimeUnit.MINUTES).log(
          "periodic connector metrics: %s", LazyArgs.lazy(() -> stats.getNonZeroMetrics()));
    }
  }

  public GhfsThreadLocalStatistics getThreadLocalStatistics() {
    return threadLocalStatistics;
  }

  private String getNonZeroMetrics() {
    // TreeMap to keep the result sorted.
    TreeMap result = new TreeMap<>();
    for (Iterator it = this.getLongStatistics(); it.hasNext(); ) {
      LongStatistic metric = it.next();
      if (metric.getValue() != 0) {
        result.put(metric.getName(), metric.getValue());
      }
    }

    result.put("uptimeSeconds", UPTIME_STOP_WATCH.elapsed().toSeconds());

    return result.toString();
  }

  private void updateConnectorHadoopApiTime(long elapsedMs) {
    incrementCounter(GhfsStatistic.GCS_CONNECTOR_TIME, elapsedMs);
  }

  private long increment(GhfsStatistic statistic) {
    return incrementCounter(statistic, 1);
  }

  void increment(GoogleCloudStorageStatistics statistic) {
    incrementCounter(statistic, 1);
  }

  /**
   * Increment a specific counter.
   *
   * @param op operation
   * @param count increment value
   * @return the new value
   */
  long incrementCounter(GhfsStatistic op, long count) {
    threadLocalStatistics.increment(op, count);
    return opsCount.get(op.getSymbol()).addAndGet(count);
  }

  /**
   * Increment a specific counter.
   *
   * @param op operation
   * @param count increment value
   */
  void incrementCounter(GoogleCloudStorageStatistics op, long count) {
    opsCount.get(op.getSymbol()).addAndGet(count);
    threadLocalStatistics.increment(op, count);
  }

  @Override
  public void reset() {
    resetLongMetrics(opsCount);
    resetLongMetrics(maximums);
    resetDoubleMetrics(total);

    for (String ms : means.keySet()) {
      means.get(ms).reset();
    }

    for (String ms : minimums.keySet()) {
      minimums.put(ms, null);
    }
  }

  private void resetLongMetrics(Map metrics) {
    for (AtomicLong value : metrics.values()) {
      value.set(0);
    }
  }

  private void resetDoubleMetrics(Map metrics) {
    for (AtomicDouble value : metrics.values()) {
      value.set(0.0);
    }
  }

  void updateStats(GhfsStatistic statistic, long durationMs, Object context) {
    updateStats(statistic.getSymbol(), statistic.getType(), durationMs, context);
  }

  public void updateStats(GoogleCloudStorageStatistics statistic, long duration, Object context) {
    updateStats(statistic.getSymbol(), statistic.getType(), duration, context);
  }

  private void updateStats(
      String symbol, StatisticTypeEnum statType, long durationMs, Object context) {
    checkArgument(
        statType == TYPE_DURATION || statType == TYPE_DURATION_TOTAL,
        String.format("Unexpected instrumentation type %s", statType));

    updateMinMaxStats(durationMs, durationMs, context, symbol);
    addMeanStatistic(symbol, durationMs, 1);
  }

  private void addMeanStatistic(String symbol, long totalDurationMs, int count) {
    String meanKey = getMeanKey(symbol);
    if (means.containsKey(meanKey)) {
      means.get(meanKey).addSample(totalDurationMs, count);
    }
  }

  protected void addTotalTimeStatistic(String statistic) {
    assert (statistic.contains("_duration"));
    String parentCounterKey = statistic.replace("_duration", "");
    String parentMeanKey = getMeanKey(parentCounterKey);

    assert (means.containsKey(parentMeanKey) && opsCount.containsKey(parentCounterKey));

    total.get(statistic).set(means.get(parentMeanKey).sum);
  }

  void updateStats(
      GhfsStatistic statistic,
      long minLatency,
      long maxLatency,
      long totalDuration,
      int count,
      Object context) {

    String symbol = statistic.getSymbol();
    updateMinMaxStats(minLatency, maxLatency, context, symbol);
    addMeanStatistic(statistic.getSymbol(), totalDuration, count);
    incrementCounter(statistic, count);

    updateConnectorHadoopApiTime(totalDuration);
  }

  private void updateMinMaxStats(
      long minDurationMs, long maxDurationMs, Object context, String symbol) {
    String minKey = getMinKey(symbol);

    AtomicLong minVal = minimums.get(minKey);
    if (minVal == null) {
      // There can be race here. It is ok to have the last write win.
      minimums.put(minKey, new AtomicLong(minDurationMs));
    } else if (minDurationMs < minVal.get()) {
      minVal.set(minDurationMs);
    }

    String maxKey = getMaxKey(symbol);
    AtomicLong maxVal = maximums.get(maxKey);
    if (maxDurationMs > maxVal.get()) {
      if (maxDurationMs > LATENCY_LOGGING_THRESHOLD_MS
          && UPTIME_STOP_WATCH.elapsed().getSeconds() > WARMUP_THRESHOLD_SEC) {
        logger.atInfo().log(
            "Detected potential high latency for operation %s. latencyMs=%s; previousMaxLatencyMs=%s; operationCount=%s; context=%s; thread=%s",
            symbol,
            maxDurationMs,
            maxVal.get(),
            opsCount.get(symbol),
            context,
            Thread.currentThread().getName());
      }

      // There can be race here and can have some data points get missed. This is a corner case.
      // Since this function can be called quite frequently, opting for performance over consistency
      // here.
      maxVal.set(maxDurationMs);
    }
  }

  void incrementGcsExceptionCount() {
    increment(EXCEPTION_COUNT);
  }

  void incrementGcsTotalRequestCount() {
    increment(GCS_API_REQUEST_COUNT);
  }

  void incrementRateLimitingCounter() {
    increment(GCS_API_CLIENT_RATE_LIMIT_COUNT);
  }

  void incrementGcsClientSideCounter() {
    increment(GCS_API_CLIENT_SIDE_ERROR_COUNT);
  }

  void incrementGcsServerSideCounter() {
    increment(GCS_API_SERVER_SIDE_ERROR_COUNT);
  }

  void incrementGcsClientBadRequestCount() {
    increment(GCS_API_CLIENT_BAD_REQUEST_COUNT);
  }

  void incrementGcsClientUnauthorizedResponseCount() {
    increment(GCS_API_CLIENT_UNAUTHORIZED_RESPONSE_COUNT);
  }

  void incrementGcsClientNotFoundResponseCount() {
    increment(GCS_API_CLIENT_NOT_FOUND_RESPONSE_COUNT);
  }

  void incrementGcsClientRequestTimeoutCount() {
    increment(GCS_API_CLIENT_REQUEST_TIMEOUT_COUNT);
  }

  void incrementGcsClientGoneResponseCount() {
    increment(GCS_API_CLIENT_GONE_RESPONSE_COUNT);
  }

  void incrementGcsClientPreconditionFailedResponseCount() {
    increment(GCS_API_CLIENT_PRECONDITION_FAILED_RESPONSE_COUNT);
  }

  void incrementGcsClientRequestedRangeNotSatisfiableCount() {
    increment(GCS_API_CLIENT_REQUESTED_RANGE_NOT_SATISFIABLE_COUNT);
  }

  void incrementGcsServerInternalErrorCount() {
    increment(GCS_API_SERVER_INTERNAL_ERROR_COUNT);
  }

  void incrementGcsServerBadGatewayCount() {
    increment(GCS_API_SERVER_BAD_GATEWAY_COUNT);
  }

  void incrementGcsServerServiceUnavailableCount() {
    increment(GCS_API_SERVER_SERVICE_UNAVAILABLE_COUNT);
  }

  void incrementGcsServerTimeoutCount() {
    increment(GCS_API_SERVER_TIMEOUT_COUNT);
  }

  void streamReadBytes(int bytesRead) {
    incrementCounter(GhfsStatistic.STREAM_READ_BYTES, bytesRead);
  }

  /** If more data was requested than was actually returned, this was an incomplete read. */
  void streamReadOperationInComplete(int requested, int actual) {
    if (requested > actual) {
      increment(GhfsStatistic.STREAM_READ_OPERATIONS_INCOMPLETE);
    }
  }

  void streamReadSeekBackward(long negativeOffset) {
    increment(GhfsStatistic.STREAM_READ_SEEK_BACKWARD_OPERATIONS);
    incrementCounter(GhfsStatistic.STREAM_READ_SEEK_BYTES_BACKWARDS, -negativeOffset);
  }

  void streamReadSeekForward(long skipped) {
    if (skipped > 0) {
      incrementCounter(GhfsStatistic.STREAM_READ_SEEK_BYTES_SKIPPED, skipped);
    }

    increment(GhfsStatistic.STREAM_READ_SEEK_FORWARD_OPERATIONS);
  }

  void streamWriteBytes(int bytesWritten) {
    incrementCounter(GhfsStatistic.STREAM_WRITE_BYTES, bytesWritten);
  }

  void filesCreated() {
    increment(FILES_CREATED);
  }

  public void fileDeleted(int count) {
    incrementCounter(FILES_DELETED, count);
  }

  public void directoryCreated() {
    incrementCounter(DIRECTORIES_CREATED, 1);
  }

  private class LongIterator implements Iterator {
    private Iterator iterator = getMetricNames();

    private Iterator getMetricNames() {
      ArrayList metrics = new ArrayList<>();

      metrics.addAll(opsCount.keySet());
      metrics.addAll(minimums.keySet());
      metrics.addAll(maximums.keySet());
      metrics.addAll(means.keySet());
      for (String statistic : total.keySet()) {
        addTotalTimeStatistic(statistic);
      }

      metrics.addAll(total.keySet());

      return metrics.iterator();
    }

    @Override
    public boolean hasNext() {
      return iterator.hasNext();
    }

    @Override
    public LongStatistic next() {
      if (!iterator.hasNext()) {
        throw new NoSuchElementException();
      }

      final String entry = iterator.next();
      return new LongStatistic(entry, getValue(entry));
    }

    @Override
    public void remove() {
      throw new UnsupportedOperationException();
    }
  }

  private long getValue(String key) {
    if (opsCount.containsKey(key)) {
      return opsCount.get(key).longValue();
    }

    if (maximums.containsKey(key)) {
      return maximums.get(key).longValue();
    }

    if (minimums.containsKey(key) && minimums.get(key) != null) {
      return minimums.get(key).longValue();
    }

    if (means.containsKey(key)) {
      return Math.round(means.get(key).getValue());
    }

    if (total.containsKey(key)) {
      return total.get(key).longValue();
    }

    return 0L;
  }

  @Override
  public Iterator getLongStatistics() {
    return new LongIterator();
  }

  @Override
  public Long getLong(String key) {
    return this.getValue(key);
  }

  @Override
  public boolean isTracked(String key) {
    return opsCount.containsKey(key)
        || maximums.containsKey(key)
        || minimums.containsKey(key)
        || means.containsKey(key)
        || total.containsKey(key);
  }

  /**
   * To get the minimum value which is stored with MINIMUM extension
   *
   * @param symbol
   * @return minimum statistic value
   */
  public Long getMin(String symbol) {
    AtomicLong minValue = minimums.get(getMinKey(symbol));
    if (minValue == null) {
      return 0L;
    }

    return minValue.longValue();
  }

  private void addStatistic(String symbol, StatisticTypeEnum type) {
    opsCount.put(symbol, new AtomicLong(0));

    if (type == StatisticTypeEnum.TYPE_DURATION || type == StatisticTypeEnum.TYPE_DURATION_TOTAL) {
      minimums.put(getMinKey(symbol), null);
      maximums.put(getMaxKey(symbol), new AtomicLong(0));
      means.put(getMeanKey(symbol), new MeanStatistic());
      if (type == StatisticTypeEnum.TYPE_DURATION_TOTAL) {
        total.put(getTimeKey(symbol), new AtomicDouble(0.0));
      }
    }
  }

  private String getMinKey(String symbol) {
    return symbol + "_min";
  }

  private String getMaxKey(String symbol) {
    return symbol + "_max";
  }

  private String getMeanKey(String symbol) {
    return symbol + "_mean";
  }

  private String getTimeKey(String symbol) {
    return symbol + "_duration";
  }

  /**
   * To get the maximum value which is stored with MAXIMUM extension
   *
   * @param symbol
   * @return maximum statistic value
   */
  public Long getMax(String symbol) {
    AtomicLong maxValue = maximums.get(getMaxKey(symbol));
    if (maxValue == null) {
      return 0L;
    }

    return maxValue.longValue();
  }

  /**
   * To get the mean value which is stored with MEAN extension
   *
   * @param key
   * @return mean statistic value
   */
  public double getMean(String key) {
    MeanStatistic val = means.get(getMeanKey(key));
    if (val == null) {
      return 0;
    }

    return val.getValue();
  }

  /** This class keeps track of mean statistics by keeping track of sum and number of samples. */
  static class MeanStatistic {
    private int sample;

    private long sum;

    synchronized void addSample(long val) {
      addSample(val, 1);
    }

    synchronized void addSample(long val, int count) {
      sample += count;
      sum += val;
    }

    double getValue() {
      if (sample == 0) {
        return 0;
      }

      return sum / Math.max(1, sample); // to protect against the race with reset().
    }

    public void reset() {
      sum = 0;
      sample = 0;
    }
  }

  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    for (Iterator it = this.getLongStatistics(); it.hasNext(); ) {
      LongStatistic statistic = it.next();

      if (sb.length() != 0) {
        sb.append(", ");
      }
      sb.append(String.format("%s=%s", statistic.getName(), statistic.getValue()));
    }

    return String.format("[%s]", sb);
  }
}