org.apache.hadoop.hbase.util.MunkresAssignment Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-server Show documentation
Server functionality for HBase
There is a newer version: 3.0.0-beta-1
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.util;

import java.util.Arrays;
import java.util.Deque;
import java.util.LinkedList;

import org.apache.hadoop.hbase.classification.InterfaceAudience;

/**
 * Computes the optimal (minimal cost) assignment of jobs to workers (or other
 * analogous) concepts given a cost matrix of each pair of job and worker, using
 * the algorithm by James Munkres in "Algorithms for the Assignment and
 * Transportation Problems", with additional optimizations as described by Jin
 * Kue Wong in "A New Implementation of an Algorithm for the Optimal Assignment
 * Problem: An Improved Version of Munkres' Algorithm". The algorithm runs in
 * O(n^3) time and need O(n^2) auxiliary space where n is the number of jobs or
 * workers, whichever is greater.
 */
@InterfaceAudience.Private
public class MunkresAssignment {

  // The original algorithm by Munkres uses the terms STAR and PRIME to denote
  // different states of zero values in the cost matrix. These values are
  // represented as byte constants instead of enums to save space in the mask
  // matrix by a factor of 4n^2 where n is the size of the problem.
  private static final byte NONE = 0;
  private static final byte STAR = 1;
  private static final byte PRIME = 2;

  // The algorithm requires that the number of column is at least as great as
  // the number of rows. If that is not the case, then the cost matrix should
  // be transposed before computation, and the solution matrix transposed before
  // returning to the caller.
  private final boolean transposed;

  // The number of rows of internal matrices.
  private final int rows;

  // The number of columns of internal matrices.
  private final int cols;

  // The cost matrix, the cost of assigning each row index to column index.
  private float[][] cost;

  // Mask of zero cost assignment states.
  private byte[][] mask;

  // Covering some rows of the cost matrix.
  private boolean[] rowsCovered;

  // Covering some columns of the cost matrix.
  private boolean[] colsCovered;

  // The alternating path between starred zeroes and primed zeroes
  private Deque> path;

  // The solution, marking which rows should be assigned to which columns. The
  // positions of elements in this array correspond to the rows of the cost
  // matrix, and the value of each element correspond to the columns of the cost
  // matrix, i.e. assignments[i] = j indicates that row i should be assigned to
  // column j.
  private int[] assignments;

  // Improvements described by Jin Kue Wong cache the least value in each row,
  // as well as the column index of the least value in each row, and the pending
  // adjustments to each row and each column.
  private float[] leastInRow;
  private int[] leastInRowIndex;
  private float[] rowAdjust;
  private float[] colAdjust;

  /**
   * Construct a new problem instance with the specified cost matrix. The cost
   * matrix must be rectangular, though not necessarily square. If one dimension
   * is greater than the other, some elements in the greater dimension will not
   * be assigned. The input cost matrix will not be modified.
   * @param costMatrix
   */
  public MunkresAssignment(float[][] costMatrix) {
    // The algorithm assumes that the number of columns is at least as great as
    // the number of rows. If this is not the case of the input matrix, then
    // all internal structures must be transposed relative to the input.
    this.transposed = costMatrix.length > costMatrix[0].length;
    if (this.transposed) {
      this.rows = costMatrix[0].length;
      this.cols = costMatrix.length;
    } else {
      this.rows = costMatrix.length;
      this.cols = costMatrix[0].length;
    }

    cost = new float[rows][cols];
    mask = new byte[rows][cols];
    rowsCovered = new boolean[rows];
    colsCovered = new boolean[cols];
    path = new LinkedList>();

    leastInRow = new float[rows];
    leastInRowIndex = new int[rows];
    rowAdjust = new float[rows];
    colAdjust = new float[cols];

    assignments = null;

    // Copy cost matrix.
    if (transposed) {
      for (int r = 0; r < rows; r++) {
        for (int c = 0; c < cols; c++) {
          cost[r][c] = costMatrix[c][r];
        }
      }
    } else {
      for (int r = 0; r < rows; r++) {
        System.arraycopy(costMatrix[r], 0, cost[r], 0, cols);
      }
    }

    // Costs must be finite otherwise the matrix can get into a bad state where
    // no progress can be made. If your use case depends on a distinction
    // between costs of MAX_VALUE and POSITIVE_INFINITY, you're doing it wrong.
    for (int r = 0; r < rows; r++) {
      for (int c = 0; c < cols; c++) {
        if (cost[r][c] == Float.POSITIVE_INFINITY) {
          cost[r][c] = Float.MAX_VALUE;
        }
      }
    }
  }

  /**
   * Get the optimal assignments. The returned array will have the same number
   * of elements as the number of elements as the number of rows in the input
   * cost matrix. Each element will indicate which column should be assigned to
   * that row or -1 if no column should be assigned, i.e. if result[i] = j then
   * row i should be assigned to column j. Subsequent invocations of this method
   * will simply return the same object without additional computation.
   * @return an array with the optimal assignments
   */
  public int[] solve() {
    // If this assignment problem has already been solved, return the known
    // solution
    if (assignments != null) {
      return assignments;
    }

    preliminaries();

    // Find the optimal assignments.
    while (!testIsDone()) {
      while (!stepOne()) {
        stepThree();
      }
      stepTwo();
    }

    // Extract the assignments from the mask matrix.
    if (transposed) {
      assignments = new int[cols];
      outer:
      for (int c = 0; c < cols; c++) {
        for (int r = 0; r < rows; r++) {
          if (mask[r][c] == STAR) {
            assignments[c] = r;
            continue outer;
          }
        }
        // There is no assignment for this row of the input/output.
        assignments[c] = -1;
      }
    } else {
      assignments = new int[rows];
      outer:
      for (int r = 0; r < rows; r++) {
        for (int c = 0; c < cols; c++) {
          if (mask[r][c] == STAR) {
            assignments[r] = c;
            continue outer;
          }
        }
      }
    }

    // Once the solution has been computed, there is no need to keep any of the
    // other internal structures. Clear all unnecessary internal references so
    // the garbage collector may reclaim that memory.
    cost = null;
    mask = null;
    rowsCovered = null;
    colsCovered = null;
    path = null;
    leastInRow = null;
    leastInRowIndex = null;
    rowAdjust = null;
    colAdjust = null;

    return assignments;
  }

  /**
   * Corresponds to the "preliminaries" step of the original algorithm.
   * Guarantees that the matrix is an equivalent non-negative matrix with at
   * least one zero in each row.
   */
  private void preliminaries() {
    for (int r = 0; r < rows; r++) {
      // Find the minimum cost of each row.
      float min = Float.POSITIVE_INFINITY;
      for (int c = 0; c < cols; c++) {
        min = Math.min(min, cost[r][c]);
      }

      // Subtract that minimum cost from each element in the row.
      for (int c = 0; c < cols; c++) {
        cost[r][c] -= min;

        // If the element is now zero and there are no zeroes in the same row
        // or column which are already starred, then star this one. There
        // must be at least one zero because of subtracting the min cost.
        if (cost[r][c] == 0 && !rowsCovered[r] && !colsCovered[c]) {
          mask[r][c] = STAR;
          // Cover this row and column so that no other zeroes in them can be
          // starred.
          rowsCovered[r] = true;
          colsCovered[c] = true;
        }
      }
    }

    // Clear the covered rows and columns.
    Arrays.fill(rowsCovered, false);
    Arrays.fill(colsCovered, false);
  }

  /**
   * Test whether the algorithm is done, i.e. we have the optimal assignment.
   * This occurs when there is exactly one starred zero in each row.
   * @return true if the algorithm is done
   */
  private boolean testIsDone() {
    // Cover all columns containing a starred zero. There can be at most one
    // starred zero per column. Therefore, a covered column has an optimal
    // assignment.
    for (int r = 0; r < rows; r++) {
      for (int c = 0; c < cols; c++) {
        if (mask[r][c] == STAR) {
          colsCovered[c] = true;
        }
      }
    }

    // Count the total number of covered columns.
    int coveredCols = 0;
    for (int c = 0; c < cols; c++) {
      coveredCols += colsCovered[c] ? 1 : 0;
    }

    // Apply an row and column adjustments that are pending.
    for (int r = 0; r < rows; r++) {
      for (int c = 0; c < cols; c++) {
        cost[r][c] += rowAdjust[r];
        cost[r][c] += colAdjust[c];
      }
    }

    // Clear the pending row and column adjustments.
    Arrays.fill(rowAdjust, 0);
    Arrays.fill(colAdjust, 0);

    // The covers on columns and rows may have been reset, recompute the least
    // value for each row.
    for (int r = 0; r < rows; r++) {
      leastInRow[r] = Float.POSITIVE_INFINITY;
      for (int c = 0; c < cols; c++) {
        if (!rowsCovered[r] && !colsCovered[c] && cost[r][c] < leastInRow[r]) {
          leastInRow[r] = cost[r][c];
          leastInRowIndex[r] = c;
        }
      }
    }

    // If all columns are covered, then we are done. Since there may be more
    // columns than rows, we are also done if the number of covered columns is
    // at least as great as the number of rows.
    return (coveredCols == cols || coveredCols >= rows);
  }

  /**
   * Corresponds to step 1 of the original algorithm.
   * @return false if all zeroes are covered
   */
  private boolean stepOne() {
    while (true) {
      Pair zero = findUncoveredZero();
      if (zero == null) {
        // No uncovered zeroes, need to manipulate the cost matrix in step
        // three.
        return false;
      } else {
        // Prime the uncovered zero and find a starred zero in the same row.
        mask[zero.getFirst()][zero.getSecond()] = PRIME;
        Pair star = starInRow(zero.getFirst());
        if (star != null) {
          // Cover the row with both the newly primed zero and the starred zero.
          // Since this is the only place where zeroes are primed, and we cover
          // it here, and rows are only uncovered when primes are erased, then
          // there can be at most one primed uncovered zero.
          rowsCovered[star.getFirst()] = true;
          colsCovered[star.getSecond()] = false;
          updateMin(star.getFirst(), star.getSecond());
        } else {
          // Will go to step two after, where a path will be constructed,
          // starting from the uncovered primed zero (there is only one). Since
          // we have already found it, save it as the first node in the path.
          path.clear();
          path.offerLast(new Pair(zero.getFirst(),
              zero.getSecond()));
          return true;
        }
      }
    }
  }

  /**
   * Corresponds to step 2 of the original algorithm.
   */
  private void stepTwo() {
    // Construct a path of alternating starred zeroes and primed zeroes, where
    // each starred zero is in the same column as the previous primed zero, and
    // each primed zero is in the same row as the previous starred zero. The
    // path will always end in a primed zero.
    while (true) {
      Pair star = starInCol(path.getLast().getSecond());
      if (star != null) {
        path.offerLast(star);
      } else {
        break;
      }
      Pair prime = primeInRow(path.getLast().getFirst());
      path.offerLast(prime);
    }

    // Augment path - unmask all starred zeroes and star all primed zeroes. All
    // nodes in the path will be either starred or primed zeroes. The set of
    // starred zeroes is independent and now one larger than before.
    for (Pair p : path) {
      if (mask[p.getFirst()][p.getSecond()] == STAR) {
        mask[p.getFirst()][p.getSecond()] = NONE;
      } else {
        mask[p.getFirst()][p.getSecond()] = STAR;
      }
    }

    // Clear all covers from rows and columns.
    Arrays.fill(rowsCovered, false);
    Arrays.fill(colsCovered, false);

    // Remove the prime mask from all primed zeroes.
    for (int r = 0; r < rows; r++) {
      for (int c = 0; c < cols; c++) {
        if (mask[r][c] == PRIME) {
          mask[r][c] = NONE;
        }
      }
    }
  }

  /**
   * Corresponds to step 3 of the original algorithm.
   */
  private void stepThree() {
    // Find the minimum uncovered cost.
    float min = leastInRow[0];
    for (int r = 1; r < rows; r++) {
      if (leastInRow[r] < min) {
        min = leastInRow[r];
      }
    }

    // Add the minimum cost to each of the costs in a covered row, or subtract
    // the minimum cost from each of the costs in an uncovered column. As an
    // optimization, do not actually modify the cost matrix yet, but track the
    // adjustments that need to be made to each row and column.
    for (int r = 0; r < rows; r++) {
      if (rowsCovered[r]) {
        rowAdjust[r] += min;
      }
    }
    for (int c = 0; c < cols; c++) {
      if (!colsCovered[c]) {
        colAdjust[c] -= min;
      }
    }

    // Since the cost matrix is not being updated yet, the minimum uncovered
    // cost per row must be updated.
    for (int r = 0; r < rows; r++) {
      if (!colsCovered[leastInRowIndex[r]]) {
        // The least value in this row was in an uncovered column, meaning that
        // it would have had the minimum value subtracted from it, and therefore
        // will still be the minimum value in that row.
        leastInRow[r] -= min;
      } else {
        // The least value in this row was in a covered column and would not
        // have had the minimum value subtracted from it, so the minimum value
        // could be some in another column.
        for (int c = 0; c < cols; c++) {
          if (cost[r][c] + colAdjust[c] + rowAdjust[r] < leastInRow[r]) {
            leastInRow[r] = cost[r][c] + colAdjust[c] + rowAdjust[r];
            leastInRowIndex[r] = c;
          }
        }
      }
    }
  }

  /**
   * Find a zero cost assignment which is not covered. If there are no zero cost
   * assignments which are uncovered, then null will be returned.
   * @return pair of row and column indices of an uncovered zero or null
   */
  private Pair findUncoveredZero() {
    for (int r = 0; r < rows; r++) {
      if (leastInRow[r] == 0) {
        return new Pair(r, leastInRowIndex[r]);
      }
    }
    return null;
  }

  /**
   * A specified row has become covered, and a specified column has become
   * uncovered. The least value per row may need to be updated.
   * @param row the index of the row which was just covered
   * @param col the index of the column which was just uncovered
   */
  private void updateMin(int row, int col) {
    // If the row is covered we want to ignore it as far as least values go.
    leastInRow[row] = Float.POSITIVE_INFINITY;

    for (int r = 0; r < rows; r++) {
      // Since the column has only just been uncovered, it could not have any
      // pending adjustments. Only covered rows can have pending adjustments
      // and covered costs do not count toward row minimums. Therefore, we do
      // not need to consider rowAdjust[r] or colAdjust[col].
      if (!rowsCovered[r] && cost[r][col] < leastInRow[r]) {
        leastInRow[r] = cost[r][col];
        leastInRowIndex[r] = col;
      }
    }
  }

  /**
   * Find a starred zero in a specified row. If there are no starred zeroes in
   * the specified row, then null will be returned.
   * @param r the index of the row to be searched
   * @return pair of row and column indices of starred zero or null
   */
  private Pair starInRow(int r) {
    for (int c = 0; c < cols; c++) {
      if (mask[r][c] == STAR) {
        return new Pair(r, c);
      }
    }
    return null;
  }

  /**
   * Find a starred zero in the specified column. If there are no starred zeroes
   * in the specified row, then null will be returned.
   * @param c the index of the column to be searched
   * @return pair of row and column indices of starred zero or null
   */
  private Pair starInCol(int c) {
    for (int r = 0; r < rows; r++) {
      if (mask[r][c] == STAR) {
        return new Pair(r, c);
      }
    }
    return null;
  }

  /**
   * Find a primed zero in the specified row. If there are no primed zeroes in
   * the specified row, then null will be returned.
   * @param r the index of the row to be searched
   * @return pair of row and column indices of primed zero or null
   */
  private Pair primeInRow(int r) {
    for (int c = 0; c < cols; c++) {
      if (mask[r][c] == PRIME) {
        return new Pair(r, c);
      }
    }
    return null;
  }
}