hex.kmeans.KMeansSimplexSolver Maven / Gradle / Ivy
package hex.kmeans;
import water.Iced;
import water.MRTask;
import water.fvec.Chunk;
import water.fvec.Frame;
import water.fvec.Vec;
import water.util.ArrayUtils;
import java.util.ArrayList;
import java.util.Collections;
* Polynomial implementation in average, exponential in the worst case - slow performance.
* Calculate Minimal Cost Flow problem using simplex method with go through spanning tree.
* The sum of constraints are smaller the time is faster - it uses MCF until all constraints are satisfied then use standard K-means.
class KMeansSimplexSolver {
public Frame _weights; // input data + weight column + calculated distances from all points to all centres + edge indices + columns to store result of cluster assignments
public double _sumWeights; // calculated sum of all weights to calculate maximal capacity value
public boolean _hasWeightsColumn; // weight column existence flag
public long _numberOfNonZeroWeightPoints; //if weights columns is set, how many rows has non zero weight
public int _constraintsLength;
public long _numberOfPoints;
public long _edgeSize;
public long _nodeSize;
public long _resultSize;
// Input graph to store K-means configuration
public Vec.Reader _demandsReader; // store demand of all nodes (-1 for data points, constraints values for constraints nodes, )
public Vec.Reader _capacitiesReader; // store capacities of all edges + edges from all node to leader node
public double _maxAbsDemand; // maximal absolute demand to calculate maximal capacity value
// Spanning tree to calculate min cost flow
public SpanningTree tree;
* Construct K-means simplex solver.
* @param constrains array of constraints
* @param weights input data + weight column + calculated distances from all points to all centres + edge indices + columns to store result of cluster assignments
* @param sumDistances calculated sum of all weights to calculate maximal capacity value
* @param hasWeights weight column existence flag
* @param numberOfNonZeroWeightPoints if weights columns is set, how many rows has non zero weight
public KMeansSimplexSolver(int[] constrains, Frame weights, double sumDistances, boolean hasWeights, long numberOfNonZeroWeightPoints) {
this._numberOfPoints = weights.numRows();
this._nodeSize = this._numberOfPoints + constrains.length + 1;
this._edgeSize = _numberOfPoints * constrains.length + constrains.length;
this._constraintsLength = constrains.length;
Vec demands = Vec.makeCon(0, _nodeSize, Vec.T_NUM);
Vec capacities = Vec.makeCon(0, _edgeSize + _nodeSize, Vec.T_NUM);
this._resultSize = this._numberOfPoints * _constraintsLength;
this._hasWeightsColumn = hasWeights;
this._numberOfNonZeroWeightPoints = numberOfNonZeroWeightPoints;
this._weights = weights;
this._sumWeights = sumDistances;
long constraintsSum = 0;
_maxAbsDemand = Double.MIN_VALUE;
Vec.Writer demandWriter = demands.open();
for (long i = 0; i < _nodeSize; i++) {
if (i < _numberOfPoints) {
demandWriter.set(i, -1);
} else {
long tmpDemand;
if (i < _nodeSize - 1) {
tmpDemand = constrains[(int)(i - _numberOfPoints)];
constraintsSum += constrains[(int)(i - _numberOfPoints)];
} else {
tmpDemand = _numberOfNonZeroWeightPoints - constraintsSum;
demandWriter.set(i, tmpDemand);
if (Math.abs(tmpDemand) > _maxAbsDemand) {
_maxAbsDemand = Math.abs(tmpDemand);
int edgeIndexStart = _weights.numCols() - 3 - _constraintsLength;
long edgeIndex = 0;
for (long i = 0; i < _weights.numRows(); i++) {
for(int j=0; j < _constraintsLength; j++){
_weights.vec(edgeIndexStart + j).set(i, edgeIndex++);
Vec.Writer capacitiesWriter = capacities.open();
// Initialize graph and spanning tree.
// always start with infinity _capacities
for (long i = 0; i < _edgeSize; i++) {
capacitiesWriter.set(i, Long.MAX_VALUE);
// find maximum value for capacity
double maxCapacity = 3 * (_sumWeights > _maxAbsDemand ? _sumWeights : _maxAbsDemand);
// fill max capacity from the leader node to all others _nodes
for (long i = 0; i < _nodeSize; i++) {
capacitiesWriter.set(i + _edgeSize, maxCapacity);
this._capacitiesReader = capacities.new Reader();
//this._additiveWeightsReader = additiveWeights.new Reader();
this._demandsReader = demands.new Reader();
this.tree = new SpanningTree(_nodeSize, _edgeSize, _constraintsLength);
tree.init(_numberOfPoints, maxCapacity, demands);
* Get weight base on edge index from weights data or from additive weights.
* @param edgeIndex
* @return weight by edge index
public double getWeight(long edgeIndex) {
long numberOfFrameWeights = this._numberOfPoints * this._constraintsLength;
if (edgeIndex < numberOfFrameWeights) {
int i = _weights.numCols() - 2 * _constraintsLength - 3 + (int)(edgeIndex % _constraintsLength);
long j = Math.round(edgeIndex / _constraintsLength);
return _weights.vec(i).at(j);
return 0;
* Get weight base on edge index from weights data or from additive weights.
* @param edgeIndex
* @return true if the weight at edge index is not zero
public boolean isNonZeroWeight(long edgeIndex) {
if(_hasWeightsColumn) {
long numberOfFrameWeights = this._numberOfPoints * this._constraintsLength;
if (edgeIndex < numberOfFrameWeights) {
long i = Math.round(edgeIndex / _constraintsLength);
int j = _weights.numCols() - 1 - 2 * _constraintsLength - 3;
return _weights.vec(j).at8(i) == 1;
return true;
* Find edge which has the minimal reduced weight.
* @return edge index
public long findMinimalReducedWeight() {
FindMinimalWeightTask t = new FindMinimalWeightTask(tree, _hasWeightsColumn, _constraintsLength).doAll(_weights);
double minimalWeight = t.minimalWeight;
long minimalIndex = t.minimalIndex;
long additiveEdgesIndexStart = _weights.vec(0).length() * _constraintsLength;
// Iterate over number of constraints, it is size K, MR task is not optimal here
for(long i = additiveEdgesIndexStart; i < _edgeSize; i++){
double tmpWeight = tree.reduceWeight(i, getWeight(i));
boolean countValue = !_hasWeightsColumn || isNonZeroWeight(i);
if (countValue && tmpWeight < minimalWeight) {
minimalWeight = tmpWeight;
minimalIndex = i;
return minimalIndex;
* Find next optimal entering edge to find cycle.
* @return index of the edge
public Edge findNextEnteringEdge() {
// Check if continue
if(!tree.areConstraintsSatisfied()) {
long minimalIndex = findMinimalReducedWeight();
if (tree.getFlowByEdgeIndex(minimalIndex) == 0) {
return new Edge(minimalIndex, tree._sources.at8(minimalIndex), tree._targets.at8(minimalIndex));
} else {
return new Edge(minimalIndex, tree._targets.at8(minimalIndex), tree._sources.at8(minimalIndex));
// if all constraints are satisfied, return null
return null;
* Find cycle from the edge defined by source and target nodes to leader node and back.
* @param edgeIndex
* @param sourceIndex source node index
* @param targetIndex target node index
* @return cycle in spanning tree
public NodesEdgesObject getCycle(long edgeIndex, long sourceIndex, long targetIndex) {
long ancestor = tree.findAncestor(sourceIndex, targetIndex);
NodesEdgesObject resultPath = tree.getPath(sourceIndex, ancestor);
if (resultPath.edgeSize() != 1 || resultPath.getEdge(0) != edgeIndex) {
NodesEdgesObject resultPathBack = tree.getPath(targetIndex, ancestor);
return resultPath;
* Find the leaving edge with minimal residual capacity.
* @param cycle input cycle of edges and nodes to determine leaving edge
* @return the edge with minimal residual capacity
public Edge getLeavingEdge(NodesEdgesObject cycle) {
double minResidualCapacity = Double.MAX_VALUE;
int minIndex = -1;
for (int i = 0; i < cycle.edgeSize(); i++) {
double tmpResidualCapacity = tree.getResidualCapacity(cycle.getEdge(i), cycle.getNode(i), _capacitiesReader.at(cycle.getEdge(i)));
boolean countValue = !_hasWeightsColumn || isNonZeroWeight(cycle.getEdge(i));
if (countValue && tmpResidualCapacity < minResidualCapacity) {
minResidualCapacity = tmpResidualCapacity;
minIndex = i;
assert minIndex != -1;
long nodeIndex = cycle.getNode(minIndex);
long edgeIndex = cycle.getEdge(minIndex);
return new Edge(edgeIndex, nodeIndex, nodeIndex == tree._sources.at8(edgeIndex) ? tree._targets.at8(edgeIndex) : tree._sources.at8(edgeIndex));
* Calculation minimal cost flow using pivot loop and spanning tree:
* - Loop over all entering edges to find minimal cost flow in spanning tree.
* - When edge is find edit spanning tree.
* - If constraints are satisfied or no edge is found, stop.
public void calculateMinimalCostFlow() {
Edge edge = findNextEnteringEdge();
while (edge != null) {
long enteringEdgeIndex = edge.getEdgeIndex();
long enteringEdgeSourceIndex = edge.getSourceIndex();
long enteringEdgeTargetIndex = edge.getTargetIndex();
NodesEdgesObject cycle = getCycle(enteringEdgeIndex, enteringEdgeSourceIndex, enteringEdgeTargetIndex);
Edge leavingEdge = getLeavingEdge(cycle);
long leavingEdgeIndex = leavingEdge.getEdgeIndex();
long leavingEdgeSourceIndex = leavingEdge.getSourceIndex();
long leavingEdgeTargetIndex = leavingEdge.getTargetIndex();
double residualCap = tree.getResidualCapacity(leavingEdgeIndex, leavingEdgeSourceIndex, _capacitiesReader.at(leavingEdgeIndex));
if(residualCap != 0) {
tree.augmentFlow(cycle, residualCap);
if (enteringEdgeIndex != leavingEdgeIndex) {
if (leavingEdgeSourceIndex != tree._parents.at8(leavingEdgeTargetIndex)) {
long tmpS = leavingEdgeSourceIndex;
leavingEdgeSourceIndex = leavingEdgeTargetIndex;
leavingEdgeTargetIndex = tmpS;
if (cycle.indexOfEdge(enteringEdgeIndex) < cycle.indexOfEdge(leavingEdgeIndex)) {
long tmpP = enteringEdgeSourceIndex;
enteringEdgeSourceIndex = enteringEdgeTargetIndex;
enteringEdgeTargetIndex = tmpP;
tree.removeParentEdge(leavingEdgeSourceIndex, leavingEdgeTargetIndex);
tree.addEdge(enteringEdgeIndex, enteringEdgeSourceIndex, enteringEdgeTargetIndex);
tree.updatePotentials(enteringEdgeIndex, enteringEdgeSourceIndex, enteringEdgeTargetIndex, getWeight(enteringEdgeIndex));
edge = findNextEnteringEdge();
public void checkConstraintsCondition(int[] numberOfPointsInCluster){
for(int i = 0; i<_constraintsLength; i++){
assert numberOfPointsInCluster[i] >= _demandsReader.at8(_numberOfPoints+i) : String.format("Cluster %d has %d assigned points however should has assigned at least %d points.", i+1, numberOfPointsInCluster[i], _demandsReader.at8(_numberOfPoints+i));
* Calculate minimal cost flow and based on flow assign cluster to all data points.
* @return input data with new cluster assignments
public Frame assignClusters() {
// run minimal cost flow calculation
// add flow columns to assign clusters
_weights = _weights.add(new Frame(tree._edgeFlowDataPoints));
int dataStopLength = _weights.numCols() - (_hasWeightsColumn ? 1 : 0) - 3 * _constraintsLength - 3;
// assign cluster based on calculated flow
AssignClusterTask task = new AssignClusterTask(_constraintsLength, _hasWeightsColumn, _weights.numCols());
// check constraints are satisfied
// remove distances columns + edge indices columns
for(int i = 0; i < 2 * _constraintsLength; i++) {
_weights.remove(dataStopLength+(_hasWeightsColumn ? 1 : 0));
// remove flow columns
for(int i = 0; i < _constraintsLength; i++) {
return _weights;
* Class to store structures for calculation of flow for minimal cost flow problem.
class SpanningTree extends Iced {
public long _nodeSize;
public long _edgeSize;
public int _secondLayerSize;
public long _dataPointSize;
public Vec[] _edgeFlowDataPoints; // [constraints size] nodeSize - secondLayerSize - 1 (number of data)
public Vec _edgeFlowRest; // secondLayerSize size + node size
public Vec _nodePotentials; // node size, long
public Vec _parents; // node size + 1, integer
public Vec _parentEdges; // node size + 1, integer
public Vec _subtreeSize; // node size + 1, integer
public Vec _nextDepthFirst; // node size + 1, integer
public Vec _previousNodes; // node size + 1, integer
public Vec _lastDescendants; // node size + 1, integer
public Vec _sources; // edge size + node size
public Vec _targets; // edge size + node size
SpanningTree(long nodeSize, long edgeSize, int secondLayerSize){
this._nodeSize = nodeSize;
this._edgeSize = edgeSize;
this._secondLayerSize = secondLayerSize;
this._dataPointSize = nodeSize - secondLayerSize - 1;
this._edgeFlowDataPoints = new Vec[secondLayerSize];
for(int i=0; i < secondLayerSize; i++){
this._edgeFlowDataPoints[i] = Vec.makeCon(0, _dataPointSize, Vec.T_NUM);
this._edgeFlowRest = Vec.makeCon(0, secondLayerSize + nodeSize, Vec.T_NUM);
this._nodePotentials = Vec.makeCon(0, nodeSize, Vec.T_NUM);
this._parents = Vec.makeCon(0, nodeSize+1, Vec.T_NUM);
this._parentEdges = Vec.makeCon(0, nodeSize+1, Vec.T_NUM);
this._subtreeSize = Vec.makeCon(1, nodeSize+1, Vec.T_NUM);
this._nextDepthFirst = Vec.makeCon(0, nodeSize+1, Vec.T_NUM);
this._previousNodes = Vec.makeCon(0, nodeSize+1, Vec.T_NUM);
this._lastDescendants = Vec.makeCon(0, nodeSize+1, Vec.T_NUM);
public void init(long numberOfPoints, double maxCapacity, Vec demands){
_sources = Vec.makeCon(0, _edgeSize + _nodeSize, Vec.T_NUM);
_targets = Vec.makeCon(0, _edgeSize + _nodeSize, Vec.T_NUM);
for (long i = 0; i < _nodeSize; i++) {
if (i < numberOfPoints) {
for (int j = 0; j < _secondLayerSize; j++) {
_sources.set(i * _secondLayerSize + j, i);
_targets.set(i * _secondLayerSize + j, numberOfPoints + j);
} else {
if (i < _nodeSize - 1) {
_sources.set(numberOfPoints* _secondLayerSize +i-numberOfPoints, i);
_targets.set(numberOfPoints* _secondLayerSize +i-numberOfPoints, _nodeSize - 1);
for (long i = 0; i < _nodeSize; i++) {
long demand = demands.at8(i);
if (demand >= 0) {
_sources.set(_edgeSize + i, _nodeSize);
_targets.set(_edgeSize + i, i);
} else {
_sources.set(_edgeSize + i, i);
_targets.set(_edgeSize + i, _nodeSize);
if (i < _nodeSize - 1) {
_nextDepthFirst.set(i, i + 1);
_edgeFlowRest.set(_secondLayerSize+i, Math.abs(demand));
_nodePotentials.set(i, demand < 0 ? maxCapacity : -maxCapacity);
_parents.set(i, _nodeSize);
_parentEdges.set(i, i + _edgeSize);
_previousNodes.set(i, i - 1);
_lastDescendants.set(i, i);
_parents.set(_nodeSize, -1);
_subtreeSize.set(_nodeSize, _nodeSize + 1);
_nextDepthFirst.set(_nodeSize - 1, _nodeSize);
_previousNodes.set(0, _nodeSize);
_previousNodes.set(_nodeSize, _nodeSize - 1);
_lastDescendants.set(_nodeSize, _nodeSize - 1);
* Check if the constraints are satisfied.
* If yes, the algorithm can continue as standard K-means and save time. Useful when constraints are small numbers
* @return true if the constraints are satisfied
public boolean areConstraintsSatisfied() {
Vec.Reader flowReader = _edgeFlowRest.new Reader();
long length = flowReader.length();
for(long i = 2; i < _secondLayerSize + 2; i++) {
if(flowReader.at8(length - i) > 0) {
return false;
return true;
public long findAncestor(long sourceIndex, long targetIndex) {
long subtreeSizeSource = _subtreeSize.at8(sourceIndex);
long subtreeSizeTarget = _subtreeSize.at8(targetIndex);
while (true) {
while (subtreeSizeSource < subtreeSizeTarget) {
sourceIndex = _parents.at8(sourceIndex);
subtreeSizeSource = _subtreeSize.at8(sourceIndex);
while (subtreeSizeSource > subtreeSizeTarget) {
targetIndex = _parents.at8(targetIndex);
subtreeSizeTarget = _subtreeSize.at8(targetIndex);
if (subtreeSizeSource == subtreeSizeTarget) {
if (sourceIndex !=targetIndex) {
sourceIndex = _parents.at8(sourceIndex);
subtreeSizeSource = _subtreeSize.at8(sourceIndex);
targetIndex = _parents.at8(targetIndex);
subtreeSizeTarget = _subtreeSize.at8(targetIndex);
} else {
return sourceIndex;
public long getFlowByEdgeIndex(long edgeIndex){
if(edgeIndex < _dataPointSize * _secondLayerSize) {
int i = (int)(edgeIndex % _secondLayerSize);
long j = Math.round(edgeIndex / _secondLayerSize);
return _edgeFlowDataPoints[i].at8(j);
} else {
return _edgeFlowRest.at8(edgeIndex-_dataPointSize * _secondLayerSize);
public void setFlowByEdgeIndex(long edgeIndex, long value){
if(edgeIndex < _dataPointSize * _secondLayerSize) {
int i = (int)(edgeIndex % _secondLayerSize);
long j = Math.round(edgeIndex / _secondLayerSize);
_edgeFlowDataPoints[i].set(j, value);
} else {
_edgeFlowRest.set(edgeIndex - _dataPointSize * _secondLayerSize, value);
public double reduceWeight(long edgeIndex, double weight) {
double newWeight = weight - _nodePotentials.at(_sources.at8(edgeIndex)) + _nodePotentials.at(_targets.at8(edgeIndex));
return getFlowByEdgeIndex(edgeIndex) == 0 ? newWeight : - newWeight;
public NodesEdgesObject getPath(long node, long ancestor) {
NodesEdgesObject result = new NodesEdgesObject();
while (node != ancestor) {
node = _parents.at8(node);
return result;
public double getResidualCapacity(long edgeIndex, long nodeIndex, double capacity) {
long flow = getFlowByEdgeIndex(edgeIndex);
return nodeIndex == _sources.at8(edgeIndex) ? capacity - flow : flow;
public void augmentFlow(NodesEdgesObject nodesEdges, double flow) {
for (int i = 0; i < nodesEdges.edgeSize(); i++) {
long edge = nodesEdges.getEdge(i);
long node = nodesEdges.getNode(i);
long edgeFlow = getFlowByEdgeIndex(edge);
if (node == _sources.at8(edge)) {
setFlowByEdgeIndex(edge, edgeFlow + (int)flow);
} else {
setFlowByEdgeIndex(edge, edgeFlow - (int)flow);
public void removeParentEdge(long sourceIndex, long targetIndex) {
long subtreeSizeTarget = _subtreeSize.at8(targetIndex);
long previousTargetIndex = _previousNodes.at8(targetIndex);
long lastTargetIndex = _lastDescendants.at8(targetIndex);
long nextTargetIndex = _nextDepthFirst.at8(lastTargetIndex);
_parents.set(targetIndex, -1);
_parentEdges.set(targetIndex, -1);
_nextDepthFirst.set(previousTargetIndex, nextTargetIndex);
_previousNodes.set(nextTargetIndex, previousTargetIndex);
_nextDepthFirst.set(lastTargetIndex, targetIndex);
_previousNodes.set(targetIndex, lastTargetIndex);
while (sourceIndex != -1) {
_subtreeSize.set(sourceIndex, _subtreeSize.at8(sourceIndex) - subtreeSizeTarget);
if (lastTargetIndex == _lastDescendants.at8(sourceIndex)) {
_lastDescendants.set(sourceIndex, previousTargetIndex);
sourceIndex = _parents.at8(sourceIndex);
public void makeRoot(long nodeIndex) {
ArrayList ancestors = new ArrayList<>();
while (nodeIndex != -1) {
nodeIndex = _parents.at8(nodeIndex);
for (int i = 0; i < ancestors.size() - 1; i++) {
long sourceIndex = ancestors.get(i);
long targetIndex = ancestors.get(i + 1);
long subtreeSizeSource = _subtreeSize.at8(sourceIndex);
long lastSourceIndex = _lastDescendants.at8(sourceIndex);
long prevTargetIndex = _previousNodes.at8(targetIndex);
long lastTargetIndex = _lastDescendants.at8(targetIndex);
long nextTargetIndex = _nextDepthFirst.at8(lastTargetIndex);
_parents.set(sourceIndex, targetIndex);
_parents.set(targetIndex, -1);
_parentEdges.set(sourceIndex, _parentEdges.at8(targetIndex));
_parentEdges.set(targetIndex, -1);
_subtreeSize.set(sourceIndex, subtreeSizeSource - _subtreeSize.at8(targetIndex));
_subtreeSize.set(targetIndex, subtreeSizeSource);
_nextDepthFirst.set(prevTargetIndex, nextTargetIndex);
_previousNodes.set(nextTargetIndex, prevTargetIndex);
_nextDepthFirst.set(lastTargetIndex, targetIndex);
_previousNodes.set(targetIndex, lastTargetIndex);
if (lastSourceIndex == lastTargetIndex) {
_lastDescendants.set(sourceIndex, prevTargetIndex);
lastSourceIndex = prevTargetIndex;
_previousNodes.set(sourceIndex, lastTargetIndex);
_nextDepthFirst.set(lastTargetIndex, sourceIndex);
_nextDepthFirst.set(lastSourceIndex, targetIndex);
_previousNodes.set(targetIndex, lastSourceIndex);
_lastDescendants.set(targetIndex, lastSourceIndex);
public void addEdge(long edgeIndex, long sourceIndex, long targetIndex) {
long lastSourceIndex = _lastDescendants.at8(sourceIndex);
long nextSourceIndex = _nextDepthFirst.at8(lastSourceIndex);
long subtreeSizeTarget = _subtreeSize.at8(targetIndex);
long lastTargetIndex = _lastDescendants.at8(targetIndex);
_parents.set(targetIndex, sourceIndex);
_parentEdges.set(targetIndex, edgeIndex);
_nextDepthFirst.set(lastSourceIndex, targetIndex);
_previousNodes.set(targetIndex, lastSourceIndex);
_previousNodes.set(nextSourceIndex, lastTargetIndex);
_nextDepthFirst.set(lastTargetIndex, nextSourceIndex);
while (sourceIndex != -1) {
_subtreeSize.set(sourceIndex, _subtreeSize.at8(sourceIndex) + subtreeSizeTarget);
if (lastSourceIndex == _lastDescendants.at8(sourceIndex)) {
_lastDescendants.set(sourceIndex, lastTargetIndex);
sourceIndex = _parents.at8(sourceIndex);
public void updatePotentials(long edgeIndex, long sourceIndex, long targetIndex, double weight) {
double potential;
if (targetIndex == _targets.at8(edgeIndex)) {
potential = _nodePotentials.at(sourceIndex) - weight - _nodePotentials.at(targetIndex);
} else {
potential = _nodePotentials.at(sourceIndex) + weight - _nodePotentials.at(targetIndex);
_nodePotentials.set(targetIndex, _nodePotentials.at(targetIndex) + potential);
long last = _lastDescendants.at8(targetIndex);
while (targetIndex != last) {
targetIndex = _nextDepthFirst.at8(targetIndex);
_nodePotentials.set(targetIndex, _nodePotentials.at(targetIndex) + potential);
* Helper class to store edges in Spanning tree net
class Edge {
private long _edgeIndex;
private long _sourceIndex;
private long _targetIndex;
public Edge(long edgeIndex, long sourceIndex, long targetIndex) {
this._edgeIndex = edgeIndex;
this._sourceIndex = sourceIndex;
this._targetIndex = targetIndex;
public long getEdgeIndex() {
return _edgeIndex;
public long getSourceIndex() {
return _sourceIndex;
public long getTargetIndex() {
return _targetIndex;
public String toString() {
return _edgeIndex+" "+_sourceIndex+" "+_targetIndex;
* Helper class to store edges and nodes of one cycle in Spanning tree net
class NodesEdgesObject {
private ArrayList _nodes;
private ArrayList _edges;
public NodesEdgesObject() {
this._nodes = new ArrayList<>();
this._edges = new ArrayList<>();
public void addNode(long node){
public void removeLastNode(){
public long getNode(int index){
return _nodes.get(index);
public ArrayList getNodes() {
return _nodes;
public void addEdge(long edge){
public long getEdge(int index){
return _edges.get(index);
public ArrayList getEdges() {
return _edges;
public int edgeSize(){
return _edges.size();
public int indexOfEdge(long value){
return _edges.indexOf(value);
public void reverseNodes(){
public void reverseEdges(){
public void addAllNodes(ArrayList newNodes){
public void addAllEdges(ArrayList newEdges){
public String toString() {
StringBuilder sb = new StringBuilder("NEO: nodes: ");
for (long i: _nodes) {
sb.append(i+" ");
sb.append("edges: ");
for (long i: _edges) {
sb.append(i+" ");
return sb.toString();
* Map Reduce task to find minimal reduced weight (distance).
class FindMinimalWeightTask extends MRTask {
// IN
private SpanningTree _tree;
private boolean _hasWeightsColumn;
private int _constraintsLength;
double minimalWeight = Double.MAX_VALUE;
long minimalIndex = -1;
FindMinimalWeightTask(SpanningTree tree, boolean hasWeightsColumn, int constraintsLength) {
_tree = tree;
_hasWeightsColumn = hasWeightsColumn;
_constraintsLength = constraintsLength;
public void map(Chunk[] cs) {
int startDistancesIndex = cs.length - 2 * _constraintsLength - 3;
int startEdgeIndex = cs.length - 3 - _constraintsLength;
for (int i = 0; i < cs[0]._len; i++) {
for (int j = 0; j < _constraintsLength; j++) {
double weight = cs[startDistancesIndex + j].atd(i);
long edgeIndex = cs[startEdgeIndex + j].at8(i);
double tmpWeight = _tree.reduceWeight(edgeIndex, weight);
boolean countValue = !_hasWeightsColumn || cs[startDistancesIndex-1].at8(i) == 1;
if (countValue && tmpWeight < minimalWeight) {
minimalWeight = tmpWeight;
minimalIndex = edgeIndex;
public void reduce(FindMinimalWeightTask mrt) {
if (mrt.minimalWeight < minimalWeight) {
minimalIndex = mrt.minimalIndex;
minimalWeight = mrt.minimalWeight;
* Map Reduce task to assign cluster index based on calculated flow.
* If no cluster assigned - assign cluster by minimal distance.
* Return number of points in each cluster and changed input frame based on new cluster assignment.
class AssignClusterTask extends MRTask {
// IN
private int _constraintsLength;
private boolean _hasWeightsColumn;
private int _weightIndex;
private int _distanceIndexStart;
private int _flowIndexStart;
private int _oldAssignmentIndex;
private int _newAssignmentIndex;
private int _distanceAssignmentIndex;
private int _dataStopIndex;
// OUT
int[] _numberOfPointsInCluster;
// changed input chunks
AssignClusterTask(int constraintsLength, boolean hasWeightsColumn, int numCols){
// Input data structure should be:
// - data points (number of columns from training dataset)
// - weight (1 column if CV is enabled)
// - distances from data points to each cluster (k columns)
// - edge indices (k columns of columns, not useful here)
// - result distance (1 column, if the cluster is assigned there is distance to this cluster)
// - old assignment (1 column, assignment from the previous iteration)
// - new assignment (1 column, assignment form the current iteration)
// - flow (k columns, calculated assignment from the MCF algorithm)
// Based on this structure indices are calculated and used
_constraintsLength = constraintsLength;
_hasWeightsColumn = hasWeightsColumn;
_distanceAssignmentIndex = numCols - 3 - constraintsLength;
_oldAssignmentIndex = numCols - 2 - constraintsLength;
_newAssignmentIndex = numCols - 1 - constraintsLength;
_dataStopIndex = numCols - (_hasWeightsColumn ? 1 : 0) - 3 * _constraintsLength - 3;
_weightIndex = _dataStopIndex;
_distanceIndexStart = _dataStopIndex + (_hasWeightsColumn ? 1 : 0);
_flowIndexStart = numCols - constraintsLength;
public void assignCluster(Chunk[] cs, int row, int clusterIndex){
// old assignment
cs[_oldAssignmentIndex].set(row, cs[_newAssignmentIndex].at8(row));
// new assignment
cs[_newAssignmentIndex].set(row, clusterIndex);
// distances
cs[_distanceAssignmentIndex].set(row, cs[_dataStopIndex + (_hasWeightsColumn ? 1 : 0) + clusterIndex].atd(row));
public void map(Chunk[] cs) {
_numberOfPointsInCluster = new int[_constraintsLength];
for (int i = 0; i < cs[0].len(); i++) {
if (!_hasWeightsColumn || cs[_weightIndex].at8(i) == 1) {
// CV is not enabled or weight is 1
boolean assigned = false;
for (int j = 0; j < _constraintsLength; j++) {
if (cs[_flowIndexStart + j].at8(i) == 1) {
// data point has assignment from MCF algorithm
assignCluster(cs, i, j);
assigned = true;
// data point has no assignment from MCF -> min distance is used
double minDistance = cs[_distanceIndexStart].atd(i);
int minIndex = 0;
for (int j = 1; j < _constraintsLength; j++) {
double tmpDistance = cs[_distanceIndexStart + j].atd(i);
if(minDistance > tmpDistance){
minDistance = tmpDistance;
minIndex = j;
assignCluster(cs, i, minIndex);
public void reduce(AssignClusterTask mrt) {
ArrayUtils.add(this._numberOfPointsInCluster, mrt._numberOfPointsInCluster);
© 2015 - 2025 Weber Informatics LLC | Privacy Policy