com.twitter.crunch.CRUSHPlacementAlgorithm Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of libcrunch Show documentation
A lightweight mapping framework that maps data objects to a number of nodes, subject to constraints
The newest version!
/**
 * Copyright 2013 Twitter, Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.twitter.crunch;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Predicate;
import com.google.common.base.Predicates;

/**
 * The transcription of the CRUSH placement algorithm from the Weil paper. This is a fairly simple
 * adaptation, but a couple of important changes have been made to work with the crunch mapping.
 */
public class CRUSHPlacementAlgorithm implements PlacementAlgorithm {
  /**
   * In case the select() method fails to select after looping back to the origin of selection after
   * so many tries, we stop the search. This constant denotes the maximum number of retries after
   * looping back to the origin. It is expected that in most cases the selection will either succeed
   * with a small number of tries, or it will never succeed. So a reasonably large number to
   * distinguish these two cases should be sufficient.
   */
  private static final int MAX_LOOPBACK_COUNT = 50;
  private static final Logger logger = LoggerFactory.getLogger(CRUSHPlacementAlgorithm.class);

  private final boolean keepOffset;
  private final Map roundOffset;
  private final AssignmentTracker assignmentTracker;

  /**
   * Creates the crush placement object.
   */
  public CRUSHPlacementAlgorithm() {
    this(false);
  }

  /**
   * Creates the crush placement algorithm with the indication whether the round offset should be
   * kept for the duration of this object for successive selection of the same input.
   */
  public CRUSHPlacementAlgorithm(boolean keepOffset) {
    this(keepOffset, null);
  }

  /**
   * Creates the crush placement algorithm object with the assignment tracking.
   */
  public CRUSHPlacementAlgorithm(AssignmentTracker assignmentTracker) {
    this(false, assignmentTracker);
  }

  // TODO consider better constructors for these options
  public CRUSHPlacementAlgorithm(boolean keepOffset, AssignmentTracker assignmentTracker) {
    this.keepOffset = keepOffset;
    roundOffset = keepOffset ? new HashMap() : null;
    this.assignmentTracker = assignmentTracker;
  }

  /**
   * Returns a list of (count) nodes of the desired type. If the count is more than the number of
   * available nodes, an exception is thrown. Note that it is possible for this method to return a
   * list whose size is smaller than the requested size (count) if it is unable to select all the
   * nodes for any reason. Callers should check the size of the returned list and take action if
   * needed.
   *
   */
  public List select(Node parent, long input, int count, int type) {
    return select(parent, input, count, type, Predicates.alwaysTrue());
  }

  public List select(Node parent, long input, int count, int type,
      Predicate nodePredicate) {
    int childCount = parent.getChildrenCount(type);
    if (childCount < count) {
      throw new IllegalArgumentException(count + " nodes of type " + type +
          " were requested but the tree has only " + childCount + " nodes!");
    }

    List selected = new ArrayList(count);
    // use the index stored in the map
    Integer offset;
    if (keepOffset) {
      offset = roundOffset.get(input);
      if (offset == null) {
        offset = 0;
        roundOffset.put(input, offset);
      }
    } else {
      offset = 0;
    }

    int rPrime = 0;
    for (int r = 1; r <= count; r++) {
      int failure = 0;
      // number of times we had to loop back to the origin
      int loopbackCount = 0;
      boolean escape = false;
      boolean retryOrigin;
      Node out = null;
      do {
        retryOrigin = false; // initialize at the outset
        Node in = parent;
        Set rejected = new HashSet();
        boolean retryNode;
        do {
          retryNode = false; // initialize at the outset
          rPrime = r + offset + failure;
          logger.trace("{}.select({}, {})", new Object[] {in, input, rPrime});
          out = in.select(input, rPrime);
          if (out.getType() != type) {
            logger.trace("selected output {} for data {} didn't match the type {}: walking down " +
                "the hierarchy...", new Object[] {out, input, type});
            in = out; // walk down the hierarchy
            retryNode = true; // stay within the node and walk down the tree
          } else { // type matches
            boolean predicateRejected = !nodePredicate.apply(out);
            if (selected.contains(out) || predicateRejected) {
              if (predicateRejected) {
                logger.trace("{} was rejected by the node predicate for data {}: rejecting and " +
                    "increasing rPrime", out, input);
                rejected.add(out);
              } else { // already selected
                logger.trace("{} was already selected for data {}: rejecting and increasing rPrime",
                    out, input);
              }

              // we need to see if we have selected all possible nodes from this parent, in which
              // case we should loop back to the origin and start over
              if (allChildNodesEliminated(in, selected, rejected)) {
                logger.trace("all child nodes of {} have been eliminated", in);
                if (loopbackCount == MAX_LOOPBACK_COUNT) {
                  // we looped back the maximum times we specified; we give up search, and exit
                  escape = true;
                  break;
                }
                loopbackCount++;
                logger.trace("looping back to the original parent node ({})", parent);
                retryOrigin = true;
              } else {
                retryNode = true; // go back and reselect on the same parent
              }
              failure++;
            } else if (nodeIsOut(out)) {
              logger.trace("{} is marked as out (failed or over the maximum assignment) for data " +
                  "{}! looping back to the original parent node", out, input);
              failure++;
              if (loopbackCount == MAX_LOOPBACK_COUNT) {
                // we looped back the maximum times we specified; we give up search, and exit
                escape = true;
                break;
              }
              loopbackCount++;
              // re-selection on the same parent is detrimental in case of node failure: loop back
              // to the origin
              retryOrigin = true;
            } else {
              // we got a successful selection
              break;
            }
          }
        } while (retryNode);
      } while (retryOrigin);

      if (escape) {
        // cannot find a node under this parent; return a smaller set than was intended
        logger.debug("we could not select a node for data {} under parent {}; a smaller data set " +
            "than is requested will be returned", input, parent);
        continue;
      }

      logger.trace("{} was selected for data {}", out, input);
      selected.add(out);
      // track the assignment
      if (assignmentTracker != null) {
        assignmentTracker.trackAssignment(out);
      }
    }
    if (keepOffset) {
      roundOffset.put(input, rPrime);
    }
    return selected;
  }


  private boolean nodeIsOut(Node node) {
    if (node.isLeaf() && node.isFailed()) {
      return true;
    }
    if (assignmentTracker != null) {
      return assignmentTracker.rejectAssignment(node);
    }
    return false;
  }

  /**
   * Examines the immediate child nodes of the given parent node, and sees if all of the children
   * that can be selected (i.e. not failed) are already selected. This is used to determine whether
   * this parent node should no longer be used in the selection.
   */
  private boolean allChildNodesEliminated(Node parent, List selected, Set rejected) {
    List children = parent.getChildren();
    if (children != null) {
      for (Node child: children) {
        if (!nodeIsOut(child) && !selected.contains(child) && !rejected.contains(child)) {
          return false;
        }
      }
    }
    return true;
  }
}