All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.crunch.StrawSelector Maven / Gradle / Ivy

Go to download

A lightweight mapping framework that maps data objects to a number of nodes, subject to constraints

The newest version!
/**
 * Copyright 2013 Twitter, Inc.
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.twitter.crunch;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Selection algorithm based on the "straw" bucket type as described in the CRUSH algorithm.
 */
class StrawSelector implements Selector {
  private final Map straws = new HashMap();
  private final MultiInputHash hashFunction;

  public StrawSelector(Node node) {
    if (!node.isLeaf()) {
      // create a map from the nodes to their values
      List sortedNodes = sortNodes(node.getChildren()); // do a reverse sort by weight

      int numLeft = sortedNodes.size();
      float straw = 1.0f;
      float wbelow = 0.0f;
      float lastw = 0.0f;
      int i = 0;
      final int length = sortedNodes.size();
      while (i < length) {
        Node current = sortedNodes.get(i);
        if (current.getWeight() == 0) {
          straws.put(current, 0L);
          i++;
          continue;
        }
        straws.put(current, (long)(straw*0x10000));
        i++;
        if (i == length) {
          break;
        }

        current = sortedNodes.get(i);
        Node previous = sortedNodes.get(i-1);
        if (current.getWeight() == previous.getWeight()) {
          continue;
        }
        wbelow += (float)(previous.getWeight() - lastw)*numLeft;
        for (int j = i; j < length; j++) {
          if (sortedNodes.get(j).getWeight() == current.getWeight()) {
            numLeft--;
          } else {
            break;
          }
        }
        float wnext = (float)(numLeft * (current.getWeight() - previous.getWeight()));
        float pbelow = wbelow/(wbelow + wnext);
        straw *= Math.pow(1.0/pbelow, 1.0/numLeft);
        lastw = previous.getWeight();
      }
    }
    hashFunction = new JenkinsHash();
  }

  /**
   * Returns a new list that's sorted in the reverse order of the weight.
   */
  private List sortNodes(List nodes) {
    List ret = new ArrayList(nodes);
    sortNodesInPlace(ret);
    return ret;
  }

  /**
   * Sorts the list in place in the reverse order of the weight.
   */
  private void sortNodesInPlace(List nodes) {
    Collections.sort(nodes, new Comparator() {
      public int compare(Node n1, Node n2) {
        if (n2.getWeight() == n1.getWeight()) {
          return 0;
        }
        return (n2.getWeight() - n1.getWeight() > 0) ? 1 : -1;
        // sort by weight only in the reverse order
      }
    });
  }

  public Node select(long input, long round) {
    Node selected = null;
    long hiScore = -1;
    for (Map.Entry e: straws.entrySet()) {
      Node child = e.getKey();
      long straw = e.getValue();
      long score = weightedScore(child, straw, input, round);
      if (score > hiScore) {
        selected = child;
        hiScore = score;
      }
    }
    if (selected == null) {
      throw new IllegalStateException();
    }
    return selected;
  }

  private long weightedScore(Node child, long straw, long input, long round) {
    long hash = hashFunction.hash(input, child.getId(), round);
    long weightedScore = hash*straw;
    return weightedScore;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy