
com.twitter.crunch.StrawSelector Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of libcrunch Show documentation
Show all versions of libcrunch Show documentation
A lightweight mapping framework that maps data objects to a number of nodes, subject to constraints
The newest version!
/**
* Copyright 2013 Twitter, Inc.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.twitter.crunch;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* Selection algorithm based on the "straw" bucket type as described in the CRUSH algorithm.
*/
class StrawSelector implements Selector {
private final Map straws = new HashMap();
private final MultiInputHash hashFunction;
public StrawSelector(Node node) {
if (!node.isLeaf()) {
// create a map from the nodes to their values
List sortedNodes = sortNodes(node.getChildren()); // do a reverse sort by weight
int numLeft = sortedNodes.size();
float straw = 1.0f;
float wbelow = 0.0f;
float lastw = 0.0f;
int i = 0;
final int length = sortedNodes.size();
while (i < length) {
Node current = sortedNodes.get(i);
if (current.getWeight() == 0) {
straws.put(current, 0L);
i++;
continue;
}
straws.put(current, (long)(straw*0x10000));
i++;
if (i == length) {
break;
}
current = sortedNodes.get(i);
Node previous = sortedNodes.get(i-1);
if (current.getWeight() == previous.getWeight()) {
continue;
}
wbelow += (float)(previous.getWeight() - lastw)*numLeft;
for (int j = i; j < length; j++) {
if (sortedNodes.get(j).getWeight() == current.getWeight()) {
numLeft--;
} else {
break;
}
}
float wnext = (float)(numLeft * (current.getWeight() - previous.getWeight()));
float pbelow = wbelow/(wbelow + wnext);
straw *= Math.pow(1.0/pbelow, 1.0/numLeft);
lastw = previous.getWeight();
}
}
hashFunction = new JenkinsHash();
}
/**
* Returns a new list that's sorted in the reverse order of the weight.
*/
private List sortNodes(List nodes) {
List ret = new ArrayList(nodes);
sortNodesInPlace(ret);
return ret;
}
/**
* Sorts the list in place in the reverse order of the weight.
*/
private void sortNodesInPlace(List nodes) {
Collections.sort(nodes, new Comparator() {
public int compare(Node n1, Node n2) {
if (n2.getWeight() == n1.getWeight()) {
return 0;
}
return (n2.getWeight() - n1.getWeight() > 0) ? 1 : -1;
// sort by weight only in the reverse order
}
});
}
public Node select(long input, long round) {
Node selected = null;
long hiScore = -1;
for (Map.Entry e: straws.entrySet()) {
Node child = e.getKey();
long straw = e.getValue();
long score = weightedScore(child, straw, input, round);
if (score > hiScore) {
selected = child;
hiScore = score;
}
}
if (selected == null) {
throw new IllegalStateException();
}
return selected;
}
private long weightedScore(Node child, long straw, long input, long round) {
long hash = hashFunction.hash(input, child.getId(), round);
long weightedScore = hash*straw;
return weightedScore;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy