All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.pinterest.doctorkafka.stats.BrokerStatsRetriever Maven / Gradle / Ivy

There is a newer version: 0.3.0-rc.2
Show newest version
package com.pinterest.doctorkafka.stats;

import com.pinterest.doctorkafka.AvroTopicPartition;
import com.pinterest.doctorkafka.BrokerError;
import com.pinterest.doctorkafka.BrokerStats;
import com.pinterest.doctorkafka.ReplicaStat;
import com.pinterest.doctorkafka.util.OperatorUtil;
import com.pinterest.doctorkafka.util.KafkaUtils;

import kafka.common.TopicAndPartition;
import kafka.controller.ReassignedPartitionsContext;
import kafka.utils.ZkUtils;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.Node;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.security.auth.SecurityProtocol;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import javax.management.Attribute;
import javax.management.AttributeList;
import javax.management.InstanceNotFoundException;
import javax.management.MBeanServerConnection;
import javax.management.MalformedObjectNameException;
import javax.management.ObjectName;
import javax.management.ReflectionException;


public class BrokerStatsRetriever {

  private static final String PROC_NET_DEV = "/proc/net/dev";
  private static final Logger LOG = LogManager.getLogger(BrokerStatsRetriever.class);
  private static final String VERSION = "0.1.15";
  private static String KAFKA_LOG = "kafka.log";
  private static String KAFKA_SERVER = "kafka.server";
  private static String LOG_DIR = "log.dir";
  private static String LOG_DIRS = "log.dirs";
  private static String SECURITY_INTER_BROKER_PROTOCOL = "security.inter.broker.protocol";
  private static String ZOOKEEPER_CONNECT = "zookeeper.connect";

  private String zkUrl = null;
  private String kafkaConfigPath;
  private BrokerStats brokerStats = null;
  private SecurityProtocol securityProtocol = SecurityProtocol.PLAINTEXT;
  
  private NetworkStats currentNetworkStats;
  private String primaryNetworkInterfaceName;

  public BrokerStatsRetriever(String kafkaConfigPath, String primaryNetworkInterfaceName) {
    this.kafkaConfigPath = kafkaConfigPath;
    this.primaryNetworkInterfaceName = primaryNetworkInterfaceName;
  }


  private Map> getTopicMetrics(MBeanServerConnection mbs,
                                                                Set topics,
                                                                String metricTemplate,
                                                                String attributeName) {
    Map> futures = new HashMap<>();
    for (String topic : topics) {
      String bytesInMetric = String.format(metricTemplate, topic);
      LOG.info(bytesInMetric);
      Future metricValueFuture =
          MetricsRetriever.getMetricValue(mbs, bytesInMetric, attributeName);
      futures.put(topic, metricValueFuture);
    }
    return futures;
  }


  private Map getTopicNetworkMetricFromFutures(
      Map> futures)
      throws ExecutionException, InterruptedException {
    Map metricValues = new HashMap<>();

    for (Map.Entry> entry : futures.entrySet()) {
      Future future = entry.getValue();
      KafkaMetricValue metricValue = future.get();
      if (!metricValue.getException()) {
        metricValues.put(entry.getKey(), metricValue.toLong());
      } else {
        LOG.warn("Got exception for {}", entry.getKey(), metricValue.exception);
      }
    }
    return metricValues;
  }

  public static double getProcessCpuLoad(MBeanServerConnection mbs)
      throws MalformedObjectNameException, NullPointerException, InstanceNotFoundException,
             ReflectionException, IOException {
    ObjectName name = ObjectName.getInstance("java.lang:type=OperatingSystem");
    AttributeList list = mbs.getAttributes(name, new String[]{"ProcessCpuLoad"});

    if (list.isEmpty()) {
      return 0.0;
    }

    Attribute att = (Attribute) list.get(0);
    Double value = (Double) att.getValue();

    // usually takes a couple of seconds before we get real values
    if (value == -1.0) {
      return 0.0;
    }
    // returns a percentage value with 1 decimal point precision
    return ((int) (value * 1000) / 10.0);
  }
  
  /**
   * Parse system network stats from /proc/net/dev and extract stats for each device
   * 
   * e.g. 
   * Inter-|   Receive                                                |  Transmit
   * face |bytes    packets errs drop fifo frame compressed multicast|bytes    packets errs drop fifo colls carrier compressed
   * eth0: 228953186524426 101855936261    0    0    0     0          0         0 243091098637101 104189136919    0    0    0     0       0          0
   * lo: 2866497873928 6603763924    0    0    0     0          0         0 2866497873928 6603763924    0    0    0     0       0          0
   * @return
   */
  public static Map getSystemNetworkStats() {
    Map deviceNetworkStatsMap = new HashMap<>();
    try {
      List lines = Files.readAllLines(new File(PROC_NET_DEV).toPath());
      // ignore first two lines since they are header
      for (int i = 2; i < lines.size(); i++) {
        String line = lines.get(i);
        String[] splits = line.split("\\s+");
        String deviceName = splits[1].replace(":", "");
        
        NetworkStats deviceStats = new NetworkStats();
        deviceStats.setTimestamp(System.currentTimeMillis());
        
        deviceStats.setRxBytes(Long.parseLong(splits[2]));
        deviceStats.setRxPackets(Long.parseLong(splits[3]));
        
        deviceStats.setTxBytes(Long.parseLong(splits[10]));
        deviceStats.setTxPackets(Long.parseLong(splits[11]));
        
        deviceNetworkStatsMap.put(deviceName, deviceStats);
      }
    } catch (IOException e) {
      LOG.error("Failed to fetch system network stats", e);
    }
    return deviceNetworkStatsMap;
  }

  /**
   *  Kafka jmx metrics only provides per topic network inbound/outbound traffic statis. We need
   *  to compute per replica network traffic based on it. The key observation is that follower
   *  replicas should only have inbound traffic, and leader replicas have both inbound and
   *  outbound traffic. Also note that kafka only keeps track of BytesInPerSec and BytesOutPerSec
   *  metrics for *leader* partitions. See https://github.com/apache/kafka/blob/trunk/core/src/\
   *   main/scala/kafka/server/ReplicaManager.scala#L546 for details.
   *  Specifically, we can find the code path as follows;
   *
   *    partition.appendRecordsToLeader(records, isFromClient, requiredAcks)
   *     ...
   *    BrokerTopicStats.getBrokerTopicStats(topicPartition.topic).bytesInRate.mark(...)
   *    BrokerTopicStats.getBrokerAllTopicsStats.bytesInRate.mark(records.sizeInBytes)
   *    BrokerTopicStats.getBrokerTopicStats(topicPartition.topic).messagesInRate.mark(...)
   *    BrokerTopicStats.getBrokerAllTopicsStats.messagesInRate.mark(numAppendedMessages)
   *
   */
  private void computeTopicPartitionReplicaNetworkTraffic(
      List replicaStats,
      Set topics,
      Map topicsBytesIn1Min, Map topicsBytesOut1Min,
      Map topicsBytesIn5Min, Map topicsBytesOut5Min,
      Map topicsBytesIn15Min, Map topicsBytesOut15Min) {

    Map> replicaStatsMap = new HashMap<>();
    Map> leaderReplicaStatsMap = new HashMap<>();

    for (ReplicaStat replicaStat : replicaStats) {
      String topic = replicaStat.getTopic();
      replicaStatsMap.putIfAbsent(topic, new ArrayList<>());
      replicaStatsMap.get(topic).add(replicaStat);
      if (replicaStat.getIsLeader()) {
        leaderReplicaStatsMap.putIfAbsent(topic, new ArrayList<>());
        leaderReplicaStatsMap.get(topic).add(replicaStat);
      }
    }

    for (String topic : topics) {
      // Get the list of topic partitions
      if (!topicsBytesIn1Min.containsKey(topic) || !leaderReplicaStatsMap.containsKey(topic)) {
        LOG.info("{} does not have ByteInPerSec info", topic);
        continue;
      }
      List leaderReplicaStats = leaderReplicaStatsMap.get(topic);

      long topicBytesIn1Min = topicsBytesIn1Min.get(topic);
      long topicBytesIn5Min = topicsBytesIn5Min.get(topic);
      long topicBytesIn15Min = topicsBytesIn15Min.get(topic);

      long leaderReplicasTotalBytes =
          leaderReplicaStats.stream().map(stat -> stat.getLogSizeInBytes()).reduce(0L, Long::sum);
      for (ReplicaStat stat : leaderReplicaStats) {
        double ratio = (double) stat.getLogSizeInBytes() / (double) leaderReplicasTotalBytes;
        long in1Min = Double.valueOf(topicBytesIn1Min * ratio).longValue();
        long in5Min = Double.valueOf(topicBytesIn5Min * ratio).longValue();
        long in15Min = Double.valueOf(topicBytesIn15Min * ratio).longValue();
        stat.setBytesIn1MinMeanRate(in1Min);
        stat.setBytesIn5MinMeanRate(in5Min);
        stat.setBytesIn15MinMeanRate(in15Min);
      }
    }

    for (String topic : topics) {
      if (!topicsBytesOut1Min.containsKey(topic) || topicsBytesOut1Min.get(topic) <= 0 ||
          !leaderReplicaStatsMap.containsKey(topic)) {
        continue;
      }
      long topicBytesOut1Min = topicsBytesOut1Min.get(topic);
      long topicBytesOut5Min = topicsBytesOut5Min.get(topic);
      long topicBytesOut15Min = topicsBytesOut15Min.get(topic);

      List leaderReplicaStats = leaderReplicaStatsMap.get(topic);
      long replicasTotalBytes =
          leaderReplicaStats.stream().map(stat -> stat.getLogSizeInBytes()).reduce(0L, Long::sum);
      for (ReplicaStat stat : leaderReplicaStats) {
        double ratio = (double) stat.getLogSizeInBytes() / (double) replicasTotalBytes;
        long out1Min = Double.valueOf(topicBytesOut1Min * ratio).longValue();
        long out5Min = Double.valueOf(topicBytesOut5Min * ratio).longValue();
        long out15Min = Double.valueOf(topicBytesOut15Min * ratio).longValue();
        stat.setBytesOut1MinMeanRate(out1Min);
        stat.setBytesOut5MinMeanRate(out5Min);
        stat.setBytesOut15MinMeanRate(out15Min);
      }
    }
  }

  private void computeTopicPartitionReplicaCpuUsage(double totalCpuUsage,
                                                    List replicaStats) {
    long totalTraffic = replicaStats.stream()
        .map(stat -> stat.getBytesIn1MinMeanRate() + stat.getBytesOut1MinMeanRate())
        .reduce(0L, Long::sum);

    replicaStats.stream().forEach(stat -> {
      double ratio = (double) (stat.getBytesIn1MinMeanRate() + stat.getBytesOut1MinMeanRate())
          / (double) totalTraffic;
      double tpCpuUsage = totalCpuUsage * ratio;
      stat.setCpuUsage(tpCpuUsage);
    });
  }


  private Set leaderReplicas = null;
  private Set replicas = null;
  private Set inSyncReplicas = null;
  private Set topicNames = null;
  private Set inReassignReplicas = null;

  /**
   * Get stats on leader replicas, in-reassignment replica etc. through kafka api
   */
  private void retrieveStatsThroughKafkaApi() {
    Properties props = new Properties();
    String bootstrapBrokers = OperatorUtil.getBrokers(this.zkUrl, this.securityProtocol);
    props.put(KafkaUtils.BOOTSTRAP_SERVERS, bootstrapBrokers);
    props.put("group.id", "brokerstats_local");
    props.put("key.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
    props.put("value.deserializer", "org.apache.kafka.common.serialization.ByteArrayDeserializer");
    KafkaConsumer kafkaConsumer = new KafkaConsumer<>(props);

    leaderReplicas = new HashSet<>();
    replicas = new HashSet<>();
    inSyncReplicas = new HashSet<>();
    topicNames = new HashSet<>();
    inReassignReplicas = new HashSet<>();

    try {
      Map> partitionInfoMap = kafkaConsumer.listTopics();
      for (String topic : partitionInfoMap.keySet()) {
        List partitionInfos = partitionInfoMap.get(topic);
        for (PartitionInfo partitionInfo : partitionInfos) {
          TopicPartition tp = new TopicPartition(partitionInfo.topic(), partitionInfo.partition());
          if (partitionInfo.leader().id() == brokerStats.getId()) {
            leaderReplicas.add(tp);
          }
          for (Node node : partitionInfo.replicas()) {
            if (node.id() == brokerStats.getId()) {
              replicas.add(tp);
              topicNames.add(partitionInfo.topic());
            }
          }
          for (Node node : partitionInfo.inSyncReplicas()) {
            if (node.id() == brokerStats.getId()) {
              inSyncReplicas.add(tp);
            }
          }
        }
      }

      brokerStats.setNumReplicas(replicas.size());
      brokerStats.setNumLeaders(leaderReplicas.size());
      brokerStats.setLeaderReplicas(new ArrayList<>());
      for (TopicPartition tp : leaderReplicas) {
        AvroTopicPartition avroTp = new AvroTopicPartition(tp.topic(), tp.partition());
        brokerStats.getLeaderReplicas().add(avroTp);
      }

      brokerStats.setFollowerReplicas(new ArrayList<>());
      for (TopicPartition tp : replicas) {
        if (!leaderReplicas.contains(tp)) {
          AvroTopicPartition avroTp = new AvroTopicPartition(tp.topic(), tp.partition());
          brokerStats.getFollowerReplicas().add(avroTp);
        }
      }
    } catch (Exception e) {
      LOG.error("Failed to get replica info", e);
    } finally {
      kafkaConsumer.close();
    }

    // get the on-going partition reassignment information
    ZkUtils zkUtils = OperatorUtil.getZkUtils(zkUrl);
    scala.collection.Map
        scalaTpMap = zkUtils.getPartitionsBeingReassigned();

    Map tpMap =
        scala.collection.JavaConverters.mapAsJavaMap(scalaTpMap);
    brokerStats.setInReassignmentReplicas(new ArrayList<>());
    for (Map.Entry entry : tpMap.entrySet()) {
      TopicPartition tp = new TopicPartition(entry.getKey().topic(), entry.getKey().partition());
      if (replicas.contains(tp)) {
        AvroTopicPartition avroTp = new AvroTopicPartition(tp.topic(), tp.partition());
        ReassignedPartitionsContext context = entry.getValue();
        brokerStats.getInReassignmentReplicas().add(avroTp);
        inReassignReplicas.add(tp);
      }
    }
  }


  private void retrieveNetworkStats(MBeanServerConnection mbs, Set topics)
      throws ExecutionException, InterruptedException {
    Map> bytesIn1MinFutures = getTopicMetrics(mbs, topics,
        "kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec,topic=%s", "OneMinuteRate");
    Map> bytesOut1MinFutures = getTopicMetrics(mbs, topics,
        "kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec,topic=%s", "OneMinuteRate");

    Map> bytesIn5MinFutures = getTopicMetrics(mbs, topics,
        "kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec,topic=%s", "FiveMinuteRate");
    Map> bytesOut5MinFutures = getTopicMetrics(mbs, topics,
        "kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec,topic=%s", "FiveMinuteRate");

    Map> bytesIn15MinFutures = getTopicMetrics(mbs, topics,
        "kafka.server:type=BrokerTopicMetrics,name=BytesInPerSec,topic=%s", "FifteenMinuteRate");
    Map> bytesOut15MinFutures = getTopicMetrics(mbs, topics,
        "kafka.server:type=BrokerTopicMetrics,name=BytesOutPerSec,topic=%s", "FifteenMinuteRate");

    Map bytesIn1MinRate = getTopicNetworkMetricFromFutures(bytesIn1MinFutures);
    Map bytesOut1MinRate = getTopicNetworkMetricFromFutures(bytesOut1MinFutures);
    Map bytesIn5MinRate = getTopicNetworkMetricFromFutures(bytesIn5MinFutures);
    Map bytesOut5MinRate = getTopicNetworkMetricFromFutures(bytesOut5MinFutures);
    Map bytesIn15MinRate = getTopicNetworkMetricFromFutures(bytesIn15MinFutures);
    Map bytesOut15MinRate = getTopicNetworkMetricFromFutures(bytesOut15MinFutures);

    brokerStats.setTopicsBytesIn1MinRate(bytesIn1MinRate);
    brokerStats.setTopicsBytesOut1MinRate(bytesOut1MinRate);
    brokerStats.setTopicsBytesIn5MinRate(bytesIn5MinRate);
    brokerStats.setTopicsBytesOut5MinRate(bytesOut5MinRate);
    brokerStats.setTopicsBytesIn15MinRate(bytesIn15MinRate);
    brokerStats.setTopicsBytesOut15MinRate(bytesOut15MinRate);

    // set the sum of network in/out traffic in bytes
    long inOneMinuteRateSum = bytesIn1MinRate.values().stream().reduce(0L, (a, b) -> a + b);
    long outOneMinuteRateSum = bytesOut1MinRate.values().stream().reduce(0L, (a, b) -> a + b);

    long in5MinRateSum = bytesIn5MinRate.values().stream().reduce(0L, (a, b) -> a + b);
    long out5MinRateSum = bytesOut5MinRate.values().stream().reduce(0L, (a, b) -> a + b);

    long in15MinRateSum = bytesIn15MinRate.values().stream().reduce(0L, (a, b) -> a + b);
    long out15MinRateSum = bytesOut15MinRate.values().stream().reduce(0L, (a, b) -> a + b);

    brokerStats.setLeadersBytesIn1MinRate(inOneMinuteRateSum);
    brokerStats.setLeadersBytesOut1MinRate(outOneMinuteRateSum);

    brokerStats.setLeadersBytesIn5MinRate(in5MinRateSum);
    brokerStats.setLeadersBytesOut5MinRate(out5MinRateSum);

    brokerStats.setLeadersBytesIn15MinRate(in15MinRateSum);
    brokerStats.setLeadersBytesOut15MinRate(out15MinRateSum);
  }


  /**
   *  set information on availability zone, instance type, ami-id, hostname etc.
   */
  private void setBrokerInstanceInfo() {
    BufferedReader input = null;
    try {
      Process process = Runtime.getRuntime().exec("ec2metadata");
      input = new BufferedReader(new InputStreamReader(process.getInputStream()));

      String outputLine;
      while ((outputLine = input.readLine()) != null) {
        String[] elements = outputLine.split(":");
        if (elements.length != 2) {
          continue;
        }
        if (elements[0].equals("availability-zone")) {
          brokerStats.setAvailabilityZone(elements[1].trim());
        } else if (elements[0].equals("instance-type")) {
          brokerStats.setInstanceType(elements[1].trim());
        } else if (elements[0].equals("ami-id")) {
          brokerStats.setAmiId(elements[1].trim());
        } else if (elements[0].equals("hostname")) { // This only works if the hostname is explicitly set in AWS
          brokerStats.setName(elements[1].trim());
        }
      }
    } catch (Exception e) {
      LOG.error("Failed to get ec2 metadata", e);
    } finally {
      if (input != null) {
        try {
          input.close();
        } catch (IOException e) {
          LOG.error("Failed to close bufferReader", e);
        }
      }
    }
    // Not every Kafka cluster lives in AWS
    if (brokerStats.getName() == null) {
	brokerStats.setName(OperatorUtil.getHostname());
    }
    LOG.info("set hostname to {}", brokerStats.getName());
  }

  /**
   *  set the broker id info for BrokerStats. First we try to find broker.id from
   *  kafka server.properties. If that is not found, we will check meta.properites under
   *  kafka log directory to get the broker id information.
   */
  private void setBrokerIdInfo(BrokerStats brokerStats, Properties kafkaProperties)
      throws IOException {
    String brokerIdStr = kafkaProperties.getProperty("broker.id");
    if (brokerIdStr == null) {
      // load from $kafka_log_dir/meta.properties
      String logDirsStr = kafkaProperties.getProperty("log.dirs").split(",")[0];
      kafkaProperties.load(new FileInputStream(logDirsStr + "/meta.properties"));
      brokerIdStr = kafkaProperties.getProperty("broker.id");
    }

    int brokerId = Integer.parseInt(brokerIdStr);
    brokerStats.setId(brokerId);
  }


  /**
   *  This method is to fill up the information for kafka version, brokerId, etc.
   */
  private void setBrokerConfiguration() {
    BufferedReader input = null;
    try {
      String outputLine;
      // getRuntime: Returns the runtime object associated with the current Java application.
      // exec: Executes the specified string command in a separate process.
      Process process = Runtime.getRuntime().exec("ps -few");
      input = new BufferedReader(new InputStreamReader(process.getInputStream()));


      while ((outputLine = input.readLine()) != null) {
        if (outputLine.contains("kafka.Kafka")) {
          int cpIndex = outputLine.indexOf("-cp");
          outputLine = outputLine.substring(cpIndex);
          String[] args = outputLine.replace('\t', ' ').split(" ");

          String classPathsStr = args[1];
          int clientJarBeginIndex = classPathsStr.indexOf("kafka-clients");
          int clientJarEndIndex = classPathsStr.indexOf(':', clientJarBeginIndex);
          String clientJar = classPathsStr.substring(clientJarBeginIndex, clientJarEndIndex);
          String[] substrs = clientJar.split("-");
          String version = substrs[2].replace(".jar", "");
          brokerStats.setKafkaVersion(version);
        }
      }

      Properties properties = new Properties();
      try {
        properties.load(new FileInputStream(kafkaConfigPath));
      } catch (IOException e) {
        LOG.error("Failed to load configuration file {}", kafkaConfigPath, e);
        brokerStats.setHasFailure(true);
        brokerStats.setFailureReason(BrokerError.KafkaServerProperitiesFailure);
        return;
      }

      setBrokerIdInfo(brokerStats, properties);
      zkUrl = properties.getProperty(ZOOKEEPER_CONNECT);
      brokerStats.setZkUrl(zkUrl);

      String securityProtocolStr = properties.containsKey(SECURITY_INTER_BROKER_PROTOCOL) ? properties.getProperty(SECURITY_INTER_BROKER_PROTOCOL)
	                                                                                  : "PLAINTEXT";
      securityProtocol = Enum.valueOf(SecurityProtocol.class, securityProtocolStr);

      // log.dirs specifies the directories in which the log data is kept.
      // If not set, the value in log.dir is used
      String logDirsStr = properties.containsKey(LOG_DIRS) ? properties.getProperty(LOG_DIRS)
                                                           : properties.getProperty(LOG_DIR);
      brokerStats.setLogFilesPath(logDirsStr);
      long freeSpaceInBytes = 0;
      long totalSpaceInBytes = 0;

      for (String logDir : logDirsStr.split(",")) {
        File tmpDir = new File(logDir);
        if (!tmpDir.exists()) {
          brokerStats.setHasFailure(true);
          brokerStats.setFailureReason(BrokerError.DiskFailure);
          return;
        } else {
          freeSpaceInBytes += tmpDir.getFreeSpace();
          totalSpaceInBytes += tmpDir.getTotalSpace();
        }
      }

      brokerStats.setFreeDiskSpaceInBytes(freeSpaceInBytes);
      brokerStats.setTotalDiskSpaceInBytes(totalSpaceInBytes);
    } catch (Exception e) {
      LOG.error("Failed to get broker configuration", e);
      brokerStats.setHasFailure(true);
      brokerStats.setFailureReason(BrokerError.UnknownError);
    } finally {
      try {
        if (input != null) {
          input.close();
        }
      } catch (Exception e) {
        LOG.error("Failed to close input stream", e);
      }
    }
  }

  private Set getMetricObjectNames(MBeanServerConnection mbs) {
    Set objectNames = null;
    try {
      objectNames = mbs.queryNames(null, null);
    } catch (IOException e) {
      LOG.error("Failed to get object names", e);
    }
    return objectNames;
  }


  /**
   *  The following is the steps for getting broker metrics:
   *     1. get all object names
   *     2. get topic partitions on the broker, and the leader brokers
   *     3. get BytesInPerSec metric for all topics
   *     4. get BytesOutPerSec metric for all topics
   *
   */
  public BrokerStats retrieveBrokerStats(String hostname, String jmxPort) throws Exception {
    // reset the broker stats object
    brokerStats = new BrokerStats();

    brokerStats.setHasFailure(false);
    long currentTime = System.currentTimeMillis();
    brokerStats.setTimestamp(currentTime);
    brokerStats.setStatsVersion(VERSION);

    // set information on availability zone, instance type, ami-id, hostname etc.
    setBrokerInstanceInfo();
    // set broker id, log dir, default retention hours etc.
    setBrokerConfiguration();

    if (brokerStats.getHasFailure()) {
      // return if we have encountered a failure.
      return brokerStats;
    }

    MBeanServerConnection mbs = OperatorUtil.getMBeanServerConnection(hostname, jmxPort);
    if (mbs == null) {
      brokerStats.setHasFailure(true);
      brokerStats.setFailureReason(BrokerError.JmxConnectionFailure);
      return brokerStats;
    }

    Set objectNames = getMetricObjectNames(mbs);
    if (objectNames == null) {
      brokerStats.setHasFailure(true);
      brokerStats.setFailureReason(BrokerError.JmxQueryFailure);
      return brokerStats;
    }

    // get the reassignment partition info etc., and initialize fields like
    // leaderReplicas, replicas, inSyncReplicas, topicNames
    retrieveStatsThroughKafkaApi();

    // get bytesIn/bytesOut per second metrics
    retrieveNetworkStats(mbs, topicNames);

    // retrieve the stats of each replica
    List> replicaStatsFutures = new ArrayList();
    for (TopicPartition topicPartition : leaderReplicas) {
      boolean isLeader = leaderReplicas.contains(topicPartition);
      boolean inReassignment = inReassignReplicas.contains(topicPartition);
      Future future = MetricsRetriever.getTopicPartitionReplicaStats(
          mbs, topicPartition, isLeader, inReassignment);
      replicaStatsFutures.add(future);
    }

    brokerStats.setLeaderReplicaStats(new ArrayList<>());
    for (Future future : replicaStatsFutures) {
      ReplicaStat replicaStat = future.get();
      brokerStats.getLeaderReplicaStats().add(replicaStat);
    }

    // Kafka brokers only report network traffic at the topic level. If one leader replica
    // is involved in partition reassignment, the network traffic for that topic will be skewed.
    // Currently there is no way for us to distinguish the traffic among replicas.
    // Because of this, we are marking all leader replicas of the same topic as "InReassignment",
    // and have DoctorKafka service ignore these stats while computing the MaxIn/MaxOut stats
    // for those topic partitions.
    Set topicsInReassigment = new HashSet<>();
    brokerStats.getLeaderReplicaStats().stream().filter(rs -> rs.getInReassignment())
        .forEach(replicaStat -> topicsInReassigment.add(replicaStat.getTopic()));
    for (ReplicaStat replicaStat : brokerStats.getLeaderReplicaStats()) {
      if (topicsInReassigment.contains(replicaStat.getTopic())) {
        replicaStat.setInReassignment(true);
      }
    }

    computeTopicPartitionReplicaNetworkTraffic(
        brokerStats.getLeaderReplicaStats(), topicNames,
        brokerStats.getTopicsBytesIn1MinRate(), brokerStats.getTopicsBytesOut1MinRate(),
        brokerStats.getTopicsBytesIn5MinRate(), brokerStats.getTopicsBytesOut5MinRate(),
        brokerStats.getTopicsBytesIn15MinRate(), brokerStats.getTopicsBytesOut15MinRate());

    double cpuLoad = getProcessCpuLoad(mbs);
    brokerStats.setCpuUsage(cpuLoad);
    computeTopicPartitionReplicaCpuUsage(cpuLoad, brokerStats.getLeaderReplicaStats());
    
    brokerStats.setHasFailure(false);
    return brokerStats;
  }
  
  private void computeNetworkStats(NetworkStats networkStats, BrokerStats brokerStats) {
    if(this.currentNetworkStats == null) {
      this.currentNetworkStats = networkStats;
    } else {
      // take delta of timestamp
      long deltaT = networkStats.getTimestamp() - currentNetworkStats.getTimestamp();
      // convert to seconds
      deltaT = deltaT / 1000;

      if (deltaT > 0) {
        long rxRate = networkStats.getRxBytes() - currentNetworkStats.getRxBytes();
        long txRate = networkStats.getTxBytes() - currentNetworkStats.getTxBytes();
        // per second average rate during the poll window
        brokerStats.setSysBytesIn1MinRate(rxRate / deltaT);
        brokerStats.setSysBytesOut1MinRate(txRate / deltaT);
      }
      // update the current for next poll
      currentNetworkStats = networkStats;
    }
  }

  public static class NetworkStats {
    
    private long timestamp;
    private long rxBytes;
    private long rxPackets;
    private long txBytes;
    private long txPackets;
    
    public NetworkStats() {
    }

    public NetworkStats(long rxBytes, long rxPackets, long txBytes,
        long txPackets) {
      this.rxBytes = rxBytes;
      this.rxPackets = rxPackets;
      this.txBytes = txBytes;
      this.txPackets = txPackets;
    }

    /**
     * @return the timestamp
     */
    public long getTimestamp() {
      return timestamp;
    }

    /**
     * @param timestamp the timestamp to set
     */
    public void setTimestamp(long timestamp) {
      this.timestamp = timestamp;
    }

    /**
     * @return the rxBytes
     */
    public long getRxBytes() {
      return rxBytes;
    }

    /**
     * @param rxBytes the rxBytes to set
     */
    public void setRxBytes(long rxBytes) {
      this.rxBytes = rxBytes;
    }

    /**
     * @return the rxPackets
     */
    public long getRxPackets() {
      return rxPackets;
    }

    /**
     * @param rxPackets the rxPackets to set
     */
    public void setRxPackets(long rxPackets) {
      this.rxPackets = rxPackets;
    }

    /**
     * @return the txBytes
     */
    public long getTxBytes() {
      return txBytes;
    }

    /**
     * @param txBytes the txBytes to set
     */
    public void setTxBytes(long txBytes) {
      this.txBytes = txBytes;
    }

    /**
     * @return the txPackets
     */
    public long getTxPackets() {
      return txPackets;
    }

    /**
     * @param txPackets the txPackets to set
     */
    public void setTxPackets(long txPackets) {
      this.txPackets = txPackets;
    }
    
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy