All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.ipc.DecayRpcScheduler Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.ipc;

import java.lang.ref.WeakReference;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicLongArray;
import java.util.concurrent.atomic.AtomicReference;

import javax.management.ObjectName;

import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;

import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.util.Preconditions;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.AtomicDoubleArray;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.hadoop.ipc.metrics.DecayRpcSchedulerDetailedMetrics;
import org.apache.hadoop.ipc.metrics.RpcMetrics;
import org.apache.hadoop.metrics2.MetricsCollector;
import org.apache.hadoop.metrics2.MetricsRecordBuilder;
import org.apache.hadoop.metrics2.MetricsSource;
import org.apache.hadoop.metrics2.lib.DefaultMetricsSystem;
import org.apache.hadoop.metrics2.lib.Interns;
import org.apache.hadoop.metrics2.util.MBeans;
import org.apache.hadoop.metrics2.util.Metrics2Util.NameValuePair;
import org.apache.hadoop.metrics2.util.Metrics2Util.TopN;

import org.apache.hadoop.classification.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static org.apache.hadoop.ipc.ProcessingDetails.Timing;

/**
 * The decay RPC scheduler tracks the cost of incoming requests in a map, then
 * decays the costs at a fixed time interval. The scheduler is optimized
 * for large periods (on the order of seconds), as it offloads work to the
 * decay sweep.
 */
public class DecayRpcScheduler implements RpcScheduler,
    DecayRpcSchedulerMXBean, MetricsSource {
  /**
   * Period controls how many milliseconds between each decay sweep.
   */
  public static final String IPC_SCHEDULER_DECAYSCHEDULER_PERIOD_KEY =
      "decay-scheduler.period-ms";
  public static final long IPC_SCHEDULER_DECAYSCHEDULER_PERIOD_DEFAULT =
      5000;
  @Deprecated
  public static final String IPC_FCQ_DECAYSCHEDULER_PERIOD_KEY =
    "faircallqueue.decay-scheduler.period-ms";

  /**
   * Decay factor controls how much each cost is suppressed by on each sweep.
   * Valid numbers are > 0 and < 1. Decay factor works in tandem with
   * period
   * to control how long the scheduler remembers an identity.
   */
  public static final String IPC_SCHEDULER_DECAYSCHEDULER_FACTOR_KEY =
      "decay-scheduler.decay-factor";
  public static final double IPC_SCHEDULER_DECAYSCHEDULER_FACTOR_DEFAULT =
      0.5;
  @Deprecated
  public static final String IPC_FCQ_DECAYSCHEDULER_FACTOR_KEY =
    "faircallqueue.decay-scheduler.decay-factor";

 /**
   * Thresholds are specified as integer percentages, and specify which usage
   * range each queue will be allocated to. For instance, specifying the list
   *  10, 40, 80
   * implies 4 queues, with
   * - q3 from 80% up
   * - q2 from 40 up to 80
   * - q1 from 10 up to 40
   * - q0 otherwise.
   */
  public static final String IPC_DECAYSCHEDULER_THRESHOLDS_KEY =
      "decay-scheduler.thresholds";
  @Deprecated
  public static final String IPC_FCQ_DECAYSCHEDULER_THRESHOLDS_KEY =
      "faircallqueue.decay-scheduler.thresholds";

  /**
   *  Service users will always be scheduled into the highest-priority queue.
   *  They are specified as a comma-separated list.
   */
  public static final String IPC_DECAYSCHEDULER_SERVICE_USERS_KEY =
      "decay-scheduler.service-users";

  // Specifies the identity to use when the IdentityProvider cannot handle
  // a schedulable.
  public static final String DECAYSCHEDULER_UNKNOWN_IDENTITY =
      "IdentityProvider.Unknown";

  public static final String
      IPC_DECAYSCHEDULER_BACKOFF_RESPONSETIME_ENABLE_KEY =
      "decay-scheduler.backoff.responsetime.enable";
  public static final Boolean
      IPC_DECAYSCHEDULER_BACKOFF_RESPONSETIME_ENABLE_DEFAULT = false;

  // Specifies the average response time (ms) thresholds of each
  // level to trigger backoff
  public static final String
      IPC_DECAYSCHEDULER_BACKOFF_RESPONSETIME_THRESHOLDS_KEY =
      "decay-scheduler.backoff.responsetime.thresholds";

  // Specifies the top N user's call count and scheduler decision
  // Metrics2 Source
  public static final String DECAYSCHEDULER_METRICS_TOP_USER_COUNT =
      "decay-scheduler.metrics.top.user.count";
  public static final int DECAYSCHEDULER_METRICS_TOP_USER_COUNT_DEFAULT = 10;

  public static final Logger LOG =
      LoggerFactory.getLogger(DecayRpcScheduler.class);

  private static final ObjectWriter WRITER = new ObjectMapper().writer();

  // Track the decayed and raw (no decay) number of calls for each schedulable
  // identity from all previous decay windows: idx 0 for decayed call cost and
  // idx 1 for the raw call cost
  private final ConcurrentHashMap> callCosts =
      new ConcurrentHashMap>();

  // Should be the sum of all AtomicLongs in decayed callCosts except
  // service-user.
  private final AtomicLong totalDecayedCallCost = new AtomicLong();
  // The sum of all AtomicLongs in raw callCosts except service-user.
  private final AtomicLong totalRawCallCost = new AtomicLong();
  // Should be the sum of all AtomicLongs in decayed callCosts of service-user.
  private final AtomicLong totalServiceUserDecayedCallCost = new AtomicLong();
  // The sum of all AtomicLongs in raw callCosts of service-user.
  private final AtomicLong totalServiceUserRawCallCost = new AtomicLong();


  // Track total call count and response time in current decay window
  private final AtomicLongArray responseTimeCountInCurrWindow;
  private final AtomicLongArray responseTimeTotalInCurrWindow;

  // Track average response time in previous decay window
  private final AtomicDoubleArray responseTimeAvgInLastWindow;
  private final AtomicLongArray responseTimeCountInLastWindow;

  // RPC queue time rates per queue
  private final DecayRpcSchedulerDetailedMetrics
      decayRpcSchedulerDetailedMetrics;

  // Pre-computed scheduling decisions during the decay sweep are
  // atomically swapped in as a read-only map
  private final AtomicReference> scheduleCacheRef =
    new AtomicReference>();

  // Tune the behavior of the scheduler
  private final long decayPeriodMillis; // How long between each tick
  private final double decayFactor; // nextCost = currentCost * decayFactor
  private final int numLevels;
  private final double[] thresholds;
  private final IdentityProvider identityProvider;
  private final boolean backOffByResponseTimeEnabled;
  private final long[] backOffResponseTimeThresholds;
  private final String namespace;
  private final int topUsersCount; // e.g., report top 10 users' metrics
  private static final double PRECISION = 0.0001;
  private final TimeUnit metricsTimeUnit;
  private MetricsProxy metricsProxy;
  private final CostProvider costProvider;
  private final Map staticPriorities = new HashMap<>();
  private Set serviceUserNames;

  /**
   * This TimerTask will call decayCurrentCosts until
   * the scheduler has been garbage collected.
   */
  public static class DecayTask extends TimerTask {
    private WeakReference schedulerRef;
    private Timer timer;

    public DecayTask(DecayRpcScheduler scheduler, Timer timer) {
      this.schedulerRef = new WeakReference(scheduler);
      this.timer = timer;
    }

    @Override
    public void run() {
      DecayRpcScheduler sched = schedulerRef.get();
      if (sched != null) {
        sched.decayCurrentCosts();
      } else {
        // Our scheduler was garbage collected since it is no longer in use,
        // so we should terminate the timer as well
        timer.cancel();
        timer.purge();
      }
    }
  }

  /**
   * Create a decay scheduler.
   * @param numLevels number of priority levels
   * @param ns config prefix, so that we can configure multiple schedulers
   *           in a single instance.
   * @param conf configuration to use.
   */
  public DecayRpcScheduler(int numLevels, String ns, Configuration conf) {
    if(numLevels < 1) {
      throw new IllegalArgumentException("Number of Priority Levels must be " +
          "at least 1");
    }
    this.numLevels = numLevels;
    this.namespace = ns;
    this.decayFactor = parseDecayFactor(ns, conf);
    this.decayPeriodMillis = parseDecayPeriodMillis(ns, conf);
    this.identityProvider = this.parseIdentityProvider(ns, conf);
    this.costProvider = this.parseCostProvider(ns, conf);
    this.thresholds = parseThresholds(ns, conf, numLevels);
    this.backOffByResponseTimeEnabled = parseBackOffByResponseTimeEnabled(ns,
        conf);
    this.backOffResponseTimeThresholds =
        parseBackOffResponseTimeThreshold(ns, conf, numLevels);
    this.serviceUserNames = this.parseServiceUserNames(ns, conf);

    // Setup response time metrics
    responseTimeTotalInCurrWindow = new AtomicLongArray(numLevels);
    responseTimeCountInCurrWindow = new AtomicLongArray(numLevels);
    responseTimeAvgInLastWindow = new AtomicDoubleArray(numLevels);
    responseTimeCountInLastWindow = new AtomicLongArray(numLevels);

    topUsersCount =
        conf.getInt(DECAYSCHEDULER_METRICS_TOP_USER_COUNT,
            DECAYSCHEDULER_METRICS_TOP_USER_COUNT_DEFAULT);
    Preconditions.checkArgument(topUsersCount > 0,
        "the number of top users for scheduler metrics must be at least 1");

    decayRpcSchedulerDetailedMetrics =
        DecayRpcSchedulerDetailedMetrics.create(ns);
    decayRpcSchedulerDetailedMetrics.init(numLevels);

    metricsTimeUnit = RpcMetrics.getMetricsTimeUnit(conf);

    // Setup delay timer
    Timer timer = new Timer(true);
    DecayTask task = new DecayTask(this, timer);
    timer.scheduleAtFixedRate(task, decayPeriodMillis, decayPeriodMillis);

    metricsProxy = MetricsProxy.getInstance(ns, numLevels, this);
    recomputeScheduleCache();
  }

  private CostProvider parseCostProvider(String ns, Configuration conf) {
    List providers = conf.getInstances(
        ns + "." + CommonConfigurationKeys.IPC_COST_PROVIDER_KEY,
        CostProvider.class);

    if (providers.size() < 1) {
      String[] nsPort = ns.split("\\.");
      if (nsPort.length == 2) {
        // Only if ns is split with ".", we can separate namespace and port.
        // In the absence of "ipc..cost-provider.impl" property,
        // we look up "ipc.cost-provider.impl" property.
        providers = conf.getInstances(
            nsPort[0] + "." + CommonConfigurationKeys.IPC_COST_PROVIDER_KEY,
            CostProvider.class);
      }
    }

    if (providers.size() < 1) {
      LOG.info("CostProvider not specified, defaulting to DefaultCostProvider");
      return new DefaultCostProvider();
    } else if (providers.size() > 1) {
      LOG.warn("Found multiple CostProviders; using: {}",
          providers.get(0).getClass());
    }

    CostProvider provider = providers.get(0); // use the first
    provider.init(ns, conf);
    return provider;
  }

  // Load configs
  private IdentityProvider parseIdentityProvider(String ns,
      Configuration conf) {
    List providers = conf.getInstances(
      ns + "." + CommonConfigurationKeys.IPC_IDENTITY_PROVIDER_KEY,
      IdentityProvider.class);

    if (providers.size() < 1) {
      String[] nsPort = ns.split("\\.");
      if (nsPort.length == 2) {
        // Only if ns is split with ".", we can separate namespace and port.
        // In the absence of "ipc..identity-provider.impl" property,
        // we look up "ipc.identity-provider.impl" property.
        providers = conf.getInstances(
            nsPort[0] + "." + CommonConfigurationKeys.IPC_IDENTITY_PROVIDER_KEY,
            IdentityProvider.class);
      }
    }

    if (providers.size() < 1) {
      LOG.info("IdentityProvider not specified, " +
        "defaulting to UserIdentityProvider");
      return new UserIdentityProvider();
    }

    return providers.get(0); // use the first
  }

  private static double parseDecayFactor(String ns, Configuration conf) {
    double factor = conf.getDouble(ns + "." +
        IPC_FCQ_DECAYSCHEDULER_FACTOR_KEY, 0.0);
    if (factor == 0.0) {
      factor = conf.getDouble(ns + "." +
          IPC_SCHEDULER_DECAYSCHEDULER_FACTOR_KEY,
          IPC_SCHEDULER_DECAYSCHEDULER_FACTOR_DEFAULT);
    } else if ((factor > 0.0) && (factor < 1)) {
      LOG.warn(IPC_FCQ_DECAYSCHEDULER_FACTOR_KEY +
          " is deprecated. Please use " +
          IPC_SCHEDULER_DECAYSCHEDULER_FACTOR_KEY + ".");
    }
    if (factor <= 0 || factor >= 1) {
      throw new IllegalArgumentException("Decay Factor " +
        "must be between 0 and 1");
    }

    return factor;
  }

  private static long parseDecayPeriodMillis(String ns, Configuration conf) {
    long period = conf.getLong(ns + "." +
        IPC_FCQ_DECAYSCHEDULER_PERIOD_KEY,
        0);
    if (period == 0) {
      period = conf.getLong(ns + "." +
          IPC_SCHEDULER_DECAYSCHEDULER_PERIOD_KEY,
          IPC_SCHEDULER_DECAYSCHEDULER_PERIOD_DEFAULT);
    } else if (period > 0) {
      LOG.warn((IPC_FCQ_DECAYSCHEDULER_PERIOD_KEY +
          " is deprecated. Please use " +
          IPC_SCHEDULER_DECAYSCHEDULER_PERIOD_KEY));
    }
    if (period <= 0) {
      throw new IllegalArgumentException("Period millis must be >= 0");
    }

    return period;
  }

  private static double[] parseThresholds(String ns, Configuration conf,
      int numLevels) {
    int[] percentages = conf.getInts(ns + "." +
        IPC_FCQ_DECAYSCHEDULER_THRESHOLDS_KEY);

    if (percentages.length == 0) {
      percentages = conf.getInts(ns + "." + IPC_DECAYSCHEDULER_THRESHOLDS_KEY);
      if (percentages.length == 0) {
        return getDefaultThresholds(numLevels);
      }
    } else {
      LOG.warn(IPC_FCQ_DECAYSCHEDULER_THRESHOLDS_KEY +
          " is deprecated. Please use " +
          IPC_DECAYSCHEDULER_THRESHOLDS_KEY);
    }

    if (percentages.length != numLevels-1) {
      throw new IllegalArgumentException("Number of thresholds should be " +
        (numLevels-1) + ". Was: " + percentages.length);
    }

    // Convert integer percentages to decimals
    double[] decimals = new double[percentages.length];
    for (int i = 0; i < percentages.length; i++) {
      decimals[i] = percentages[i] / 100.0;
    }

    return decimals;
  }

  private Set parseServiceUserNames(String ns, Configuration conf) {
    Collection collection = conf.getStringCollection(
        ns + "." + IPC_DECAYSCHEDULER_SERVICE_USERS_KEY);
    return new HashSet<>(collection);
  }

  /**
   * Generate default thresholds if user did not specify. Strategy is
   * to halve each time, since queue usage tends to be exponential.
   * So if numLevels is 4, we would generate: double[]{0.125, 0.25, 0.5}
   * which specifies the boundaries between each queue's usage.
   * @param numLevels number of levels to compute for
   * @return array of boundaries of length numLevels - 1
   */
  private static double[] getDefaultThresholds(int numLevels) {
    double[] ret = new double[numLevels - 1];
    double div = Math.pow(2, numLevels - 1);

    for (int i = 0; i < ret.length; i++) {
      ret[i] = Math.pow(2, i)/div;
    }
    return ret;
  }

  private static long[] parseBackOffResponseTimeThreshold(String ns,
      Configuration conf, int numLevels) {
    long[] responseTimeThresholds = conf.getTimeDurations(ns + "." +
            IPC_DECAYSCHEDULER_BACKOFF_RESPONSETIME_THRESHOLDS_KEY,
        TimeUnit.MILLISECONDS);
    // backoff thresholds not specified
    if (responseTimeThresholds.length == 0) {
      return getDefaultBackOffResponseTimeThresholds(numLevels);
    }
    // backoff thresholds specified but not match with the levels
    if (responseTimeThresholds.length != numLevels) {
      throw new IllegalArgumentException(
          "responseTimeThresholds must match with the number of priority " +
          "levels");
    }
    // invalid thresholds
    for (long responseTimeThreshold: responseTimeThresholds) {
      if (responseTimeThreshold <= 0) {
        throw new IllegalArgumentException(
            "responseTimeThreshold millis must be >= 0");
      }
    }
    return responseTimeThresholds;
  }

  // 10s for level 0, 20s for level 1, 30s for level 2, ...
  private static long[] getDefaultBackOffResponseTimeThresholds(int numLevels) {
    long[] ret = new long[numLevels];
    for (int i = 0; i < ret.length; i++) {
      ret[i] = 10000*(i+1);
    }
    return ret;
  }

  private static Boolean parseBackOffByResponseTimeEnabled(String ns,
      Configuration conf) {
    return conf.getBoolean(ns + "." +
        IPC_DECAYSCHEDULER_BACKOFF_RESPONSETIME_ENABLE_KEY,
        IPC_DECAYSCHEDULER_BACKOFF_RESPONSETIME_ENABLE_DEFAULT);
  }

  /**
   * Decay the stored costs for each user and clean as necessary.
   * This method should be called periodically in order to keep
   * costs current.
   */
  private void decayCurrentCosts() {
    LOG.debug("Start to decay current costs.");
    try {
      long totalDecayedCost = 0;
      long totalRawCost = 0;
      long totalServiceUserDecayedCost = 0;
      long totalServiceUserRawCost = 0;
      Iterator>> it =
          callCosts.entrySet().iterator();

      while (it.hasNext()) {
        Map.Entry> entry = it.next();
        AtomicLong decayedCost = entry.getValue().get(0);
        AtomicLong rawCost = entry.getValue().get(1);


        // Compute the next value by reducing it by the decayFactor
        long currentValue = decayedCost.get();
        long nextValue = (long) (currentValue * decayFactor);
        if (isServiceUser((String) entry.getKey())) {
          totalServiceUserRawCost += rawCost.get();
          totalServiceUserDecayedCost += nextValue;
        } else {
          totalRawCost += rawCost.get();
          totalDecayedCost += nextValue;
        }
        decayedCost.set(nextValue);

        LOG.debug(
            "Decaying costs for the user: {}, its decayedCost: {}, rawCost: {}",
            entry.getKey(), nextValue, rawCost.get());
        if (nextValue == 0) {
          LOG.debug("The decayed cost for the user {} is zero " +
              "and being cleaned.", entry.getKey());
          // We will clean up unused keys here. An interesting optimization
          // might be to have an upper bound on keyspace in callCosts and only
          // clean once we pass it.
          it.remove();
        }
      }

      // Update the total so that we remain in sync
      totalDecayedCallCost.set(totalDecayedCost);
      totalRawCallCost.set(totalRawCost);
      totalServiceUserDecayedCallCost.set(totalServiceUserDecayedCost);
      totalServiceUserRawCallCost.set(totalServiceUserRawCost);

      LOG.debug("After decaying the stored costs, totalDecayedCost: {}, "
              + "totalRawCallCost: {}, totalServiceUserDecayedCost: {},"
              + " totalServiceUserRawCost: {}.", totalDecayedCost, totalRawCost,
          totalServiceUserDecayedCost, totalServiceUserRawCost);
      // Now refresh the cache of scheduling decisions
      recomputeScheduleCache();

      // Update average response time with decay
      updateAverageResponseTime(true);
    } catch (Exception ex) {
      LOG.error("decayCurrentCosts exception: " +
          ExceptionUtils.getStackTrace(ex));
      throw ex;
    }
  }

  /**
   * Update the scheduleCache to match current conditions in callCosts.
   */
  private void recomputeScheduleCache() {
    Map nextCache = new HashMap();

    for (Map.Entry> entry : callCosts.entrySet()) {
      Object id = entry.getKey();
      AtomicLong value = entry.getValue().get(0);

      long snapshot = value.get();
      int computedLevel = computePriorityLevel(snapshot, id);

      nextCache.put(id, computedLevel);
    }

    // Swap in to activate
    scheduleCacheRef.set(Collections.unmodifiableMap(nextCache));
  }

  /**
   * Adjust the stored cost for a given identity.
   *
   * @param identity the identity of the user whose cost should be adjusted
   * @param costDelta the cost to add for the given identity
   */
  private void addCost(Object identity, long costDelta) {
    // We will increment the cost, or create it if no such cost exists
    List cost = this.callCosts.get(identity);
    if (cost == null) {
      // Create the costs since no such cost exists.
      // idx 0 for decayed call cost
      // idx 1 for the raw call cost
      cost = new ArrayList(2);
      cost.add(new AtomicLong(0));
      cost.add(new AtomicLong(0));

      // Put it in, or get the AtomicInteger that was put in by another thread
      List otherCost = callCosts.putIfAbsent(identity, cost);
      if (otherCost != null) {
        cost = otherCost;
      }
    }

    // Update the total
    if (!isServiceUser((String) identity)) {
      totalDecayedCallCost.getAndAdd(costDelta);
      totalRawCallCost.getAndAdd(costDelta);
    } else {
      totalServiceUserDecayedCallCost.getAndAdd(costDelta);
      totalServiceUserRawCallCost.getAndAdd(costDelta);
    }

    // At this point value is guaranteed to be not null. It may however have
    // been clobbered from callCosts. Nonetheless, we return what
    // we have.
    cost.get(1).getAndAdd(costDelta);
    cost.get(0).getAndAdd(costDelta);
  }

  /**
   * Given the cost for an identity, compute a scheduling decision.
   *
   * @param cost the cost for an identity
   * @param identity the identity of the user
   * @return scheduling decision from 0 to numLevels - 1
   */
  private int computePriorityLevel(long cost, Object identity) {
    // The priority for service users is always 0
    if (isServiceUser((String)identity)) {
      return 0;
    }
    Integer staticPriority = staticPriorities.get(identity);
    if (staticPriority != null) {
      return staticPriority.intValue();
    }
    long totalCallSnapshot = totalDecayedCallCost.get();

    double proportion = 0;
    if (totalCallSnapshot > 0) {
      proportion = (double) cost / totalCallSnapshot;
    }

    // Start with low priority levels, since they will be most common
    for(int i = (numLevels - 1); i > 0; i--) {
      if (proportion >= this.thresholds[i - 1]) {
        return i; // We've found our level number
      }
    }

    // If we get this far, we're at level 0
    return 0;
  }

  /**
   * Returns the priority level for a given identity by first trying the cache,
   * then computing it.
   * @param identity an object responding to toString and hashCode
   * @return integer scheduling decision from 0 to numLevels - 1
   */
  private int cachedOrComputedPriorityLevel(Object identity) {
    // Try the cache
    Map scheduleCache = scheduleCacheRef.get();
    if (scheduleCache != null) {
      Integer priority = scheduleCache.get(identity);
      if (priority != null) {
        LOG.debug("Cache priority for: {} with priority: {}", identity,
            priority);
        return priority;
      }
    }

    // Cache was no good, compute it
    List costList = callCosts.get(identity);
    long currentCost = costList == null ? 0 : costList.get(0).get();
    int priority = computePriorityLevel(currentCost, identity);
    LOG.debug("compute priority for identity: {}={}", identity,
        priority);
    return priority;
  }

  private String getIdentity(Schedulable obj) {
    String identity = this.identityProvider.makeIdentity(obj);
    if (identity == null) {
      // Identity provider did not handle this
      identity = DECAYSCHEDULER_UNKNOWN_IDENTITY;
    }
    return identity;
  }

  /**
   * Compute the appropriate priority for a schedulable based on past requests.
   * @param obj the schedulable obj to query and remember
   * @return the level index which we recommend scheduling in
   */
  @Override
  public int getPriorityLevel(Schedulable obj) {
    // First get the identity
    String identity = getIdentity(obj);
    // highest priority users may have a negative priority but their
    // calls will be priority 0.
    return Math.max(0, cachedOrComputedPriorityLevel(identity));
  }

  @VisibleForTesting
  int getPriorityLevel(UserGroupInformation ugi) {
    String identity = getIdentity(newSchedulable(ugi));
    // returns true priority of the user.
    return cachedOrComputedPriorityLevel(identity);
  }

  @VisibleForTesting
  void setPriorityLevel(UserGroupInformation ugi, int priority) {
    String identity = getIdentity(newSchedulable(ugi));
    priority = Math.min(numLevels - 1, priority);
    LOG.info("Setting priority for user: {}={}", identity, priority);
    staticPriorities.put(identity, priority);
  }

  // dummy instance to conform to identity provider api.
  private static Schedulable newSchedulable(UserGroupInformation ugi) {
    return new Schedulable() {
      @Override
      public UserGroupInformation getUserGroupInformation() {
        return ugi;
      }

      @Override
      public int getPriorityLevel() {
        return 0;
      }
    };
  }

  private boolean isServiceUser(String userName) {
    return this.serviceUserNames.contains(userName);
  }

  @Override
  public boolean shouldBackOff(Schedulable obj) {
    Boolean backOff = false;
    if (backOffByResponseTimeEnabled) {
      int priorityLevel = obj.getPriorityLevel();
      if (LOG.isDebugEnabled()) {
        double[] responseTimes = getAverageResponseTime();
        LOG.debug("Current Caller: {}  Priority: {} ",
            obj.getUserGroupInformation().getUserName(),
            obj.getPriorityLevel());
        for (int i = 0; i < numLevels; i++) {
          LOG.debug("Queue: {} responseTime: {} backoffThreshold: {}", i,
              responseTimes[i], backOffResponseTimeThresholds[i]);
        }
      }
      // High priority rpc over threshold triggers back off of low priority rpc
      for (int i = 0; i < priorityLevel + 1; i++) {
        if (responseTimeAvgInLastWindow.get(i) >
            backOffResponseTimeThresholds[i]) {
          backOff = true;
          break;
        }
      }
    }
    return backOff;
  }

  @Override
  public void addResponseTime(String callName, Schedulable schedulable,
      ProcessingDetails details) {
    String user = identityProvider.makeIdentity(schedulable);
    long processingCost = costProvider.getCost(details);
    addCost(user, processingCost);

    int priorityLevel = schedulable.getPriorityLevel();
    long queueTime = details.get(Timing.QUEUE, metricsTimeUnit);
    long processingTime = details.get(Timing.PROCESSING,
        metricsTimeUnit);

    this.decayRpcSchedulerDetailedMetrics.addQueueTime(
        priorityLevel, queueTime);
    this.decayRpcSchedulerDetailedMetrics.addProcessingTime(
        priorityLevel, processingTime);

    responseTimeCountInCurrWindow.getAndIncrement(priorityLevel);
    responseTimeTotalInCurrWindow.getAndAdd(priorityLevel,
        queueTime+processingTime);
    LOG.debug("addResponseTime for call: {}  priority: {} queueTime: {} " +
        "processingTime: {} ", callName, priorityLevel, queueTime,
        processingTime);
  }

  // Update the cached average response time at the end of the decay window
  void updateAverageResponseTime(boolean enableDecay) {
    for (int i = 0; i < numLevels; i++) {
      double averageResponseTime = 0;
      long totalResponseTime = responseTimeTotalInCurrWindow.get(i);
      long responseTimeCount = responseTimeCountInCurrWindow.get(i);
      if (responseTimeCount > 0) {
        averageResponseTime = (double) totalResponseTime / responseTimeCount;
      }
      final double lastAvg = responseTimeAvgInLastWindow.get(i);
      if (lastAvg > PRECISION || averageResponseTime > PRECISION) {
        if (enableDecay) {
          final double decayed = decayFactor * lastAvg + averageResponseTime;
          responseTimeAvgInLastWindow.set(i, decayed);
        } else {
          responseTimeAvgInLastWindow.set(i, averageResponseTime);
        }
      } else {
        responseTimeAvgInLastWindow.set(i, 0);
      }
      responseTimeCountInLastWindow.set(i, responseTimeCount);
      LOG.debug("updateAverageResponseTime queue: {} Average: {} Count: {}",
          i, averageResponseTime, responseTimeCount);
      // Reset for next decay window
      responseTimeTotalInCurrWindow.set(i, 0);
      responseTimeCountInCurrWindow.set(i, 0);
    }
  }

  // For testing
  @VisibleForTesting
  double getDecayFactor() {
    return decayFactor;
  }

  @VisibleForTesting
  long getDecayPeriodMillis() {
    return decayPeriodMillis;
  }

  @VisibleForTesting
  double[] getThresholds() {
    return thresholds;
  }

  @VisibleForTesting
  Set getServiceUserNames() {
    return serviceUserNames;
  }

  @VisibleForTesting
  void forceDecay() {
    decayCurrentCosts();
  }

  @VisibleForTesting
  Map getCallCostSnapshot() {
    HashMap snapshot = new HashMap();

    for (Map.Entry> entry : callCosts.entrySet()) {
      snapshot.put(entry.getKey(), entry.getValue().get(0).get());
    }

    return Collections.unmodifiableMap(snapshot);
  }

  @VisibleForTesting
  long getTotalCallSnapshot() {
    return totalDecayedCallCost.get();
  }

  /**
   * MetricsProxy is a singleton because we may init multiple schedulers and we
   * want to clean up resources when a new scheduler replaces the old one.
   */
  public static final class MetricsProxy implements DecayRpcSchedulerMXBean,
      MetricsSource {
    // One singleton per namespace
    private static final HashMap INSTANCES =
      new HashMap();

    // Weakref for delegate, so we don't retain it forever if it can be GC'd
    private WeakReference delegate;
    private double[] averageResponseTimeDefault;
    private long[] callCountInLastWindowDefault;
    private ObjectName decayRpcSchedulerInfoBeanName;

    private MetricsProxy(String namespace, int numLevels,
        DecayRpcScheduler drs) {
      averageResponseTimeDefault = new double[numLevels];
      callCountInLastWindowDefault = new long[numLevels];
      setDelegate(drs);
      decayRpcSchedulerInfoBeanName =
          MBeans.register(namespace, "DecayRpcScheduler", this);
      this.registerMetrics2Source(namespace);
    }

    public static synchronized MetricsProxy getInstance(String namespace,
        int numLevels, DecayRpcScheduler drs) {
      MetricsProxy mp = INSTANCES.get(namespace);
      if (mp == null) {
        // We must create one
        mp = new MetricsProxy(namespace, numLevels, drs);
        INSTANCES.put(namespace, mp);
      } else  if (drs != mp.delegate.get()){
        // in case of delegate is reclaimed, we should set it again
        mp.setDelegate(drs);
      }
      return mp;
    }

    public static synchronized void removeInstance(String namespace) {
      MetricsProxy.INSTANCES.remove(namespace);
    }

    public void setDelegate(DecayRpcScheduler obj) {
      this.delegate = new WeakReference(obj);
    }

    void registerMetrics2Source(String namespace) {
      final String name = "DecayRpcSchedulerMetrics2." + namespace;
      DefaultMetricsSystem.instance().register(name, name, this);
    }

    void unregisterSource(String namespace) {
      final String name = "DecayRpcSchedulerMetrics2." + namespace;
      DefaultMetricsSystem.instance().unregisterSource(name);
      if (decayRpcSchedulerInfoBeanName != null) {
        MBeans.unregister(decayRpcSchedulerInfoBeanName);
      }
    }

    @Override
    public String getSchedulingDecisionSummary() {
      DecayRpcScheduler scheduler = delegate.get();
      if (scheduler == null) {
        return "No Active Scheduler";
      } else {
        return scheduler.getSchedulingDecisionSummary();
      }
    }

    @Override
    public String getCallVolumeSummary() {
      DecayRpcScheduler scheduler = delegate.get();
      if (scheduler == null) {
        return "No Active Scheduler";
      } else {
        return scheduler.getCallVolumeSummary();
      }
    }

    @Override
    public int getUniqueIdentityCount() {
      DecayRpcScheduler scheduler = delegate.get();
      if (scheduler == null) {
        return -1;
      } else {
        return scheduler.getUniqueIdentityCount();
      }
    }

    @Override
    public long getTotalCallVolume() {
      DecayRpcScheduler scheduler = delegate.get();
      if (scheduler == null) {
        return -1;
      } else {
        return scheduler.getTotalCallVolume();
      }
    }

    @Override
    public double[] getAverageResponseTime() {
      DecayRpcScheduler scheduler = delegate.get();
      if (scheduler == null) {
        return averageResponseTimeDefault;
      } else {
        return scheduler.getAverageResponseTime();
      }
    }

    public long[] getResponseTimeCountInLastWindow() {
      DecayRpcScheduler scheduler = delegate.get();
      if (scheduler == null) {
        return callCountInLastWindowDefault;
      } else {
        return scheduler.getResponseTimeCountInLastWindow();
      }
    }

    @Override
    public void getMetrics(MetricsCollector collector, boolean all) {
      DecayRpcScheduler scheduler = delegate.get();
      if (scheduler != null) {
        scheduler.getMetrics(collector, all);
      }
    }
  }

  public int getUniqueIdentityCount() {
    return callCosts.size();
  }

  public long getTotalCallVolume() {
    return totalDecayedCallCost.get();
  }

  public long getTotalRawCallVolume() {
    return totalRawCallCost.get();
  }

  public long getTotalServiceUserCallVolume() {
    return totalServiceUserDecayedCallCost.get();
  }

  public long getTotalServiceUserRawCallVolume() {
    return totalServiceUserRawCallCost.get();
  }

  public long[] getResponseTimeCountInLastWindow() {
    long[] ret = new long[responseTimeCountInLastWindow.length()];
    for (int i = 0; i < responseTimeCountInLastWindow.length(); i++) {
      ret[i] = responseTimeCountInLastWindow.get(i);
    }
    return ret;
  }

  @Override
  public double[] getAverageResponseTime() {
    double[] ret = new double[responseTimeAvgInLastWindow.length()];
    for (int i = 0; i < responseTimeAvgInLastWindow.length(); i++) {
      ret[i] = responseTimeAvgInLastWindow.get(i);
    }
    return ret;
  }

  @Override
  public void getMetrics(MetricsCollector collector, boolean all) {
    // Metrics2 interface to act as a Metric source
    try {
      MetricsRecordBuilder rb = collector.addRecord(getClass().getName())
          .setContext(namespace);
      addDecayedCallVolume(rb);
      addUniqueIdentityCount(rb);
      addTopNCallerSummary(rb);
      addAvgResponseTimePerPriority(rb);
      addCallVolumePerPriority(rb);
      addRawCallVolume(rb);
      addServiceUserDecayedCallVolume(rb);
      addServiceUserRawCallVolume(rb);
    } catch (Exception e) {
      LOG.warn("Exception thrown while metric collection. Exception : "
          + e.getMessage());
    }
  }

  // Key: UniqueCallers
  private void addUniqueIdentityCount(MetricsRecordBuilder rb) {
    rb.addCounter(Interns.info("UniqueCallers", "Total unique callers"),
        getUniqueIdentityCount());
  }

  // Key: DecayedCallVolume
  private void addDecayedCallVolume(MetricsRecordBuilder rb) {
    rb.addCounter(Interns.info("DecayedCallVolume", "Decayed Total " +
        "incoming Call Volume"), getTotalCallVolume());
  }

  private void addRawCallVolume(MetricsRecordBuilder rb) {
    rb.addCounter(Interns.info("CallVolume", "Raw Total " +
        "incoming Call Volume"), getTotalRawCallVolume());
  }

  // Key: ServiceUserDecayedCallVolume.
  private void addServiceUserDecayedCallVolume(MetricsRecordBuilder rb) {
    rb.addCounter(Interns.info("ServiceUserDecayedCallVolume",
        "Service-user Decayed Total incoming Call Volume"),
        getTotalServiceUserCallVolume());
  }

  // Key: ServiceUserCallVolume.
  private void addServiceUserRawCallVolume(MetricsRecordBuilder rb) {
    rb.addCounter(Interns.info("ServiceUserCallVolume",
        "Service-user Raw Total incoming Call Volume"),
        getTotalServiceUserRawCallVolume());
  }

  // Key: Priority.0.CompletedCallVolume
  private void addCallVolumePerPriority(MetricsRecordBuilder rb) {
    for (int i = 0; i < responseTimeCountInLastWindow.length(); i++) {
      rb.addGauge(Interns.info("Priority." + i + ".CompletedCallVolume",
          "Completed Call volume " +
          "of priority "+ i), responseTimeCountInLastWindow.get(i));
    }
  }

  // Key: Priority.0.AvgResponseTime
  private void addAvgResponseTimePerPriority(MetricsRecordBuilder rb) {
    for (int i = 0; i < responseTimeAvgInLastWindow.length(); i++) {
      rb.addGauge(Interns.info("Priority." + i + ".AvgResponseTime", "Average" +
          " response time of priority " + i),
          responseTimeAvgInLastWindow.get(i));
    }
  }

  // Key: Caller(xyz).Volume and Caller(xyz).Priority
  private void addTopNCallerSummary(MetricsRecordBuilder rb) {
    TopN topNCallers = getTopCallers(topUsersCount);
    Map decisions = scheduleCacheRef.get();
    final int actualCallerCount = topNCallers.size();
    for (int i = 0; i < actualCallerCount; i++) {
      NameValuePair entry =  topNCallers.poll();
      String topCaller = "Caller(" + entry.getName() + ")";
      String topCallerVolume = topCaller + ".Volume";
      String topCallerPriority = topCaller + ".Priority";
      rb.addCounter(Interns.info(topCallerVolume, topCallerVolume),
          entry.getValue());
      Integer priority = decisions.get(entry.getName());
      if (priority != null) {
        rb.addCounter(Interns.info(topCallerPriority, topCallerPriority),
            priority);
      }
    }
  }

  // Get the top N callers' raw call cost and scheduler decision
  private TopN getTopCallers(int n) {
    TopN topNCallers = new TopN(n);
    Iterator>> it =
        callCosts.entrySet().iterator();
    while (it.hasNext()) {
      Map.Entry> entry = it.next();
      String caller = entry.getKey().toString();
      Long cost = entry.getValue().get(1).get();
      if (cost > 0) {
        topNCallers.offer(new NameValuePair(caller, cost));
      }
    }
    return topNCallers;
  }

  public String getSchedulingDecisionSummary() {
    Map decisions = scheduleCacheRef.get();
    if (decisions == null) {
      return "{}";
    } else {
      try {
        return WRITER.writeValueAsString(decisions);
      } catch (Exception e) {
        return "Error: " + e.getMessage();
      }
    }
  }

  public String getCallVolumeSummary() {
    try {
      return WRITER.writeValueAsString(getDecayedCallCosts());
    } catch (Exception e) {
      return "Error: " + e.getMessage();
    }
  }

  private Map getDecayedCallCosts() {
    Map decayedCallCosts = new HashMap<>(callCosts.size());
    Iterator>> it =
        callCosts.entrySet().iterator();
    while (it.hasNext()) {
      Map.Entry> entry = it.next();
      Object user = entry.getKey();
      Long decayedCost = entry.getValue().get(0).get();
      if (decayedCost > 0) {
        decayedCallCosts.put(user, decayedCost);
      }
    }
    return decayedCallCosts;
  }

  @VisibleForTesting
  public DecayRpcSchedulerDetailedMetrics
      getDecayRpcSchedulerDetailedMetrics() {
    return decayRpcSchedulerDetailedMetrics;
  }

  @Override
  public void stop() {
    metricsProxy.unregisterSource(namespace);
    MetricsProxy.removeInstance(namespace);
    decayRpcSchedulerDetailedMetrics.shutdown();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy