All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hive.spark.client.MetricsCollection Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hive.spark.client;

import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.apache.hadoop.hive.common.classification.InterfaceAudience;
import org.apache.hive.spark.client.metrics.DataReadMethod;
import org.apache.hive.spark.client.metrics.InputMetrics;
import org.apache.hive.spark.client.metrics.Metrics;
import org.apache.hive.spark.client.metrics.ShuffleReadMetrics;
import org.apache.hive.spark.client.metrics.ShuffleWriteMetrics;

import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.collect.Collections2;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;

/**
 * Provides metrics collected for a submitted job.
 *
 * The collected metrics can be analysed at different levels of granularity:
 * - Global (all Spark jobs triggered by client job)
 * - Spark job
 * - Stage
 * - Task
 *
 * Only successful, non-speculative tasks are considered. Metrics are updated as tasks finish,
 * so snapshots can be retrieved before the whole job completes.
 */
@InterfaceAudience.Private
public class MetricsCollection {

  private final List taskMetrics = Lists.newArrayList();
  private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();

  public Metrics getAllMetrics() {
    return aggregate(Predicates.alwaysTrue());
  }

  public Set getJobIds() {
    Function fun = new Function() {
      @Override
      public Integer apply(TaskInfo input) {
        return input.jobId;
      }
    };
    return transform(Predicates.alwaysTrue(), fun);
  }

  public Metrics getJobMetrics(int jobId) {
    return aggregate(new JobFilter(jobId));
  }

  public Set getStageIds(int jobId) {
    Function fun = new Function() {
      @Override
      public Integer apply(TaskInfo input) {
        return input.stageId;
      }
    };
    return transform(new JobFilter(jobId), fun);
  }

  public Metrics getStageMetrics(final int jobId, final int stageId) {
    return aggregate(new StageFilter(jobId, stageId));
  }

  public Set getTaskIds(int jobId, int stageId) {
    Function fun = new Function() {
      @Override
      public Long apply(TaskInfo input) {
        return input.taskId;
      }
    };
    return transform(new StageFilter(jobId, stageId), fun);
  }

  public Metrics getTaskMetrics(final int jobId, final int stageId, final long taskId) {
    Predicate filter = new Predicate() {
      @Override
      public boolean apply(TaskInfo input) {
        return jobId == input.jobId && stageId == input.stageId && taskId == input.taskId;
      }
    };
    lock.readLock().lock();
    try {
      Iterator it = Collections2.filter(taskMetrics, filter).iterator();
      if (it.hasNext()) {
        return it.next().metrics;
      } else {
        throw new NoSuchElementException("Task not found.");
      }
    } finally {
      lock.readLock().unlock();
    }
  }

  public void addMetrics(int jobId, int stageId, long taskId, Metrics metrics) {
    lock.writeLock().lock();
    try {
      taskMetrics.add(new TaskInfo(jobId, stageId, taskId, metrics));
    } finally {
      lock.writeLock().unlock();
    }
  }

  private  Set transform(Predicate filter, Function fun) {
    lock.readLock().lock();
    try {
      Collection filtered = Collections2.filter(taskMetrics, filter);
      return Sets.newHashSet(Collections2.transform(filtered, fun));
    } finally {
      lock.readLock().unlock();
    }
  }

  private Metrics aggregate(Predicate filter) {
    lock.readLock().lock();
    try {
      // Task metrics.
      long executorDeserializeTime = 0L;
      long executorRunTime = 0L;
      long resultSize = 0L;
      long jvmGCTime = 0L;
      long resultSerializationTime = 0L;
      long memoryBytesSpilled = 0L;
      long diskBytesSpilled = 0L;

      // Input metrics.
      boolean hasInputMetrics = false;
      long bytesRead = 0L;

      // Shuffle read metrics.
      boolean hasShuffleReadMetrics = false;
      int remoteBlocksFetched = 0;
      int localBlocksFetched = 0;
      long fetchWaitTime = 0L;
      long remoteBytesRead = 0L;

      // Shuffle write metrics.
      long shuffleBytesWritten = 0L;
      long shuffleWriteTime = 0L;

      for (TaskInfo info : Collections2.filter(taskMetrics, filter)) {
        Metrics m = info.metrics;
        executorDeserializeTime += m.executorDeserializeTime;
        executorRunTime += m.executorRunTime;
        resultSize += m.resultSize;
        jvmGCTime += m.jvmGCTime;
        resultSerializationTime += m.resultSerializationTime;
        memoryBytesSpilled += m.memoryBytesSpilled;
        diskBytesSpilled += m.diskBytesSpilled;

        if (m.inputMetrics != null) {
          hasInputMetrics = true;
          bytesRead += m.inputMetrics.bytesRead;
        }

        if (m.shuffleReadMetrics != null) {
          hasShuffleReadMetrics = true;
          remoteBlocksFetched += m.shuffleReadMetrics.remoteBlocksFetched;
          localBlocksFetched += m.shuffleReadMetrics.localBlocksFetched;
          fetchWaitTime += m.shuffleReadMetrics.fetchWaitTime;
          remoteBytesRead += m.shuffleReadMetrics.remoteBytesRead;
        }

        if (m.shuffleWriteMetrics != null) {
          shuffleBytesWritten += m.shuffleWriteMetrics.shuffleBytesWritten;
          shuffleWriteTime += m.shuffleWriteMetrics.shuffleWriteTime;
        }
      }

      InputMetrics inputMetrics = null;
      if (hasInputMetrics) {
        inputMetrics = new InputMetrics(bytesRead);
      }

      ShuffleReadMetrics shuffleReadMetrics = null;
      if (hasShuffleReadMetrics) {
        shuffleReadMetrics = new ShuffleReadMetrics(
          remoteBlocksFetched,
          localBlocksFetched,
          fetchWaitTime,
          remoteBytesRead);
      }

      ShuffleWriteMetrics shuffleWriteMetrics = null;
      if (hasShuffleReadMetrics) {
        shuffleWriteMetrics = new ShuffleWriteMetrics(
          shuffleBytesWritten,
          shuffleWriteTime);
      }

      return new Metrics(
        executorDeserializeTime,
        executorRunTime,
        resultSize,
        jvmGCTime,
        resultSerializationTime,
        memoryBytesSpilled,
        diskBytesSpilled,
        inputMetrics,
        shuffleReadMetrics,
        shuffleWriteMetrics);
    } finally {
        lock.readLock().unlock();
    }
  }

  private static class TaskInfo {
    final int jobId;
    final int stageId;
    final long taskId;
    final Metrics metrics;

    TaskInfo(int jobId, int stageId, long taskId, Metrics metrics) {
      this.jobId = jobId;
      this.stageId = stageId;
      this.taskId = taskId;
      this.metrics = metrics;
    }

  }

  private static class JobFilter implements Predicate {

    private final int jobId;

    JobFilter(int jobId) {
      this.jobId = jobId;
    }

    @Override
    public boolean apply(TaskInfo input) {
      return jobId == input.jobId;
    }

  }

  private static class StageFilter implements Predicate {

    private final int jobId;
    private final int stageId;

    StageFilter(int jobId, int stageId) {
      this.jobId = jobId;
      this.stageId = stageId;
    }

    @Override
    public boolean apply(TaskInfo input) {
      return jobId == input.jobId && stageId == input.stageId;
    }

  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy