graphql.schema.diffing.HungarianAlgorithm Maven / Gradle / Ivy
package graphql.schema.diffing;
import com.google.common.util.concurrent.AtomicDoubleArray;
import graphql.Internal;
import java.util.Arrays;
/* Copyright (c) 2012 Kevin L. Stern
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
/**
* An implementation of the Hungarian algorithm for solving the assignment
* problem. An instance of the assignment problem consists of a number of
* workers along with a number of jobs and a cost matrix which gives the cost of
* assigning the i'th worker to the j'th job at position (i, j). The goal is to
* find an assignment of workers to jobs so that no job is assigned more than
* one worker and so that no worker is assigned to more than one job in such a
* manner so as to minimize the total cost of completing the jobs.
*
*
* An assignment for a cost matrix that has more workers than jobs will
* necessarily include unassigned workers, indicated by an assignment value of
* -1; in no other circumstance will there be unassigned workers. Similarly, an
* assignment for a cost matrix that has more jobs than workers will necessarily
* include unassigned jobs; in no other circumstance will there be unassigned
* jobs. For completeness, an assignment for a square cost matrix will give
* exactly one unique worker to each job.
*
*
* This version of the Hungarian algorithm runs in time O(n^3), where n is the
* maximum among the number of workers and the number of jobs.
*
* @author Kevin L. Stern
*/
@Internal
public class HungarianAlgorithm {
// changed by reduce
public final AtomicDoubleArray[] costMatrix;
// constant always
private final int rows;
private final int cols;
private final int dim;
// the assigned workers,jobs for the result
public final int[] matchJobByWorker;
private final int[] matchWorkerByJob;
// reset for each execute
private final int[] minSlackWorkerByJob;
private final double[] minSlackValueByJob;
private final int[] parentWorkerByCommittedJob;
// reset for worker
private final boolean[] committedWorkers;
// labels for both sides of the bipartite graph
private final double[] labelByWorker;
private final double[] labelByJob;
/**
* Construct an instance of the algorithm.
*
* @param costMatrix the cost matrix, where matrix[i][j] holds the cost of assigning
* worker i to job j, for all i, j. The cost matrix must not be
* irregular in the sense that all rows must be the same length; in
* addition, all entries must be non-infinite numbers.
*/
public HungarianAlgorithm(AtomicDoubleArray[] costMatrix) {
this.dim = Math.max(costMatrix.length, costMatrix[0].length());
this.rows = costMatrix.length;
this.cols = costMatrix[0].length();
this.costMatrix = costMatrix;
// for (int w = 0; w < this.dim; w++) {
// if (w < costMatrix.length) {
// if (costMatrix[w].length() != this.cols) {
// throw new IllegalArgumentException("Irregular cost matrix");
// }
//// for (int j = 0; j < this.cols; j++) {
//// if (Double.isInfinite(costMatrix[w].get(j))) {
//// throw new IllegalArgumentException("Infinite cost");
//// }
//// if (Double.isNaN(costMatrix[w].get(j))) {
//// throw new IllegalArgumentException("NaN cost");
//// }
//// }
// this.costMatrix[w] = costMatrix(costMatrix[w], this.dim);
// } else {
// this.costMatrix[w] = new double[this.dim];
// }
// }
labelByWorker = new double[this.dim];
labelByJob = new double[this.dim];
minSlackWorkerByJob = new int[this.dim];
minSlackValueByJob = new double[this.dim];
committedWorkers = new boolean[this.dim];
parentWorkerByCommittedJob = new int[this.dim];
matchJobByWorker = new int[this.dim];
Arrays.fill(matchJobByWorker, -1);
matchWorkerByJob = new int[this.dim];
Arrays.fill(matchWorkerByJob, -1);
}
/**
* Compute an initial feasible solution by assigning zero labels to the
* workers and by assigning to each job a label equal to the minimum cost
* among its incident edges.
*/
protected void computeInitialFeasibleSolution() {
for (int j = 0; j < dim; j++) {
labelByJob[j] = Double.POSITIVE_INFINITY;
}
for (int w = 0; w < dim; w++) {
for (int j = 0; j < dim; j++) {
if (costMatrix[w].get(j) < labelByJob[j]) {
labelByJob[j] = costMatrix[w].get(j);
}
}
}
}
/**
* Execute the algorithm.
*
* @return the minimum cost matching of workers to jobs based upon the
* provided cost matrix. A matching value of -1 indicates that the
* corresponding worker is unassigned.
*/
public int[] execute() {
/*
* Heuristics to improve performance: Reduce rows and columns by their
* smallest element, compute an initial non-zero dual feasible solution and
* create a greedy matching from workers to jobs of the cost matrix.
*/
reduce();
computeInitialFeasibleSolution();
greedyMatch();
int w = fetchUnmatchedWorker();
while (w < dim) {
initializePhase(w);
executePhase();
w = fetchUnmatchedWorker();
}
int[] result = Arrays.copyOf(matchJobByWorker, rows);
for (w = 0; w < result.length; w++) {
if (result[w] >= cols) {
result[w] = -1;
}
}
return result;
}
/**
* Execute a single phase of the algorithm. A phase of the Hungarian algorithm
* consists of building a set of committed workers and a set of committed jobs
* from a root unmatched worker by following alternating unmatched/matched
* zero-slack edges. If an unmatched job is encountered, then an augmenting
* path has been found and the matching is grown. If the connected zero-slack
* edges have been exhausted, the labels of committed workers are increased by
* the minimum slack among committed workers and non-committed jobs to create
* more zero-slack edges (the labels of committed jobs are simultaneously
* decreased by the same amount in order to maintain a feasible labeling).
*
*
* The runtime of a single phase of the algorithm is O(n^2), where n is the
* dimension of the internal square cost matrix, since each edge is visited at
* most once and since increasing the labeling is accomplished in time O(n) by
* maintaining the minimum slack values among non-committed jobs. When a phase
* completes, the matching will have increased in size.
*/
protected void executePhase() {
while (true) {
// the last worker we found
int minSlackWorker = -1;
int minSlackJob = -1;
double minSlackValue = Double.POSITIVE_INFINITY;
for (int j = 0; j < dim; j++) {
if (parentWorkerByCommittedJob[j] == -1) {
if (minSlackValueByJob[j] < minSlackValue) {
minSlackValue = minSlackValueByJob[j];
minSlackWorker = minSlackWorkerByJob[j];
minSlackJob = j;
}
}
}
if (minSlackValue > 0) {
updateLabeling(minSlackValue);
}
// adding (minSlackWorker, minSlackJob) to the path
parentWorkerByCommittedJob[minSlackJob] = minSlackWorker;
// check if minSlackJob is not assigned yet
// the requirement of an augmenting path is that start and end is not matched (assigned) yet
if (matchWorkerByJob[minSlackJob] == -1) {
/*
* An augmenting path has been found.
* Iterating via parentWorkerByCommittedJob and matchJobByWorker through the
* path and add/update matching.
* Job -> Parent worker via parentWorkerByCommittedJob
* and Worker -> Job via matchJobByWorker
*/
int committedJob = minSlackJob;
int parentWorker = parentWorkerByCommittedJob[committedJob];
while (true) {
int temp = matchJobByWorker[parentWorker];
match(parentWorker, committedJob);
committedJob = temp;
if (committedJob == -1) {
break;
}
parentWorker = parentWorkerByCommittedJob[committedJob];
}
return;
} else {
/*
* Update slack values since we increased the size of the committed
* workers set.
*/
// we checked above that minSlackJob is indeed assigned
int worker = matchWorkerByJob[minSlackJob];
// committedWorkers is used when slack is updated
committedWorkers[worker] = true;
for (int j = 0; j < dim; j++) {
if (parentWorkerByCommittedJob[j] == -1) {
double slack = costMatrix[worker].get(j) - labelByWorker[worker]
- labelByJob[j];
if (minSlackValueByJob[j] > slack) {
minSlackValueByJob[j] = slack;
minSlackWorkerByJob[j] = worker;
}
}
}
}
}
}
/**
* @return the first unmatched worker or {@link #dim} if none.
*/
protected int fetchUnmatchedWorker() {
int w;
for (w = 0; w < dim; w++) {
if (matchJobByWorker[w] == -1) {
break;
}
}
return w;
}
/**
* Find a valid matching by greedily selecting among zero-cost matchings. This
* is a heuristic to jump-start the augmentation algorithm.
*/
protected void greedyMatch() {
for (int w = 0; w < dim; w++) {
for (int j = 0; j < dim; j++) {
if (matchJobByWorker[w] == -1 && matchWorkerByJob[j] == -1
&& costMatrix[w].get(j) - labelByWorker[w] - labelByJob[j] == 0) {
match(w, j);
}
}
}
}
/**
* Initialize the next phase of the algorithm by clearing the committed
* workers and jobs sets and by initializing the slack arrays to the values
* corresponding to the specified root worker.
*
* @param w the worker at which to root the next phase.
*/
protected void initializePhase(int w) {
Arrays.fill(committedWorkers, false);
Arrays.fill(parentWorkerByCommittedJob, -1);
committedWorkers[w] = true;
for (int j = 0; j < dim; j++) {
minSlackValueByJob[j] = costMatrix[w].get(j) - labelByWorker[w] - labelByJob[j];
minSlackWorkerByJob[j] = w;
}
}
/**
* Helper method to record a matching between worker w and job j.
*/
protected void match(int w, int j) {
matchJobByWorker[w] = j;
matchWorkerByJob[j] = w;
}
/**
* Reduce the cost matrix by subtracting the smallest element of each row from
* all elements of the row as well as the smallest element of each column from
* all elements of the column. Note that an optimal assignment for a reduced
* cost matrix is optimal for the original cost matrix.
*/
protected void reduce() {
for (int w = 0; w < dim; w++) {
double min = Double.POSITIVE_INFINITY;
for (int j = 0; j < dim; j++) {
if (costMatrix[w].get(j) < min) {
min = costMatrix[w].get(j);
}
}
for (int j = 0; j < dim; j++) {
costMatrix[w].set(j, costMatrix[w].get(j) - min);
}
}
double[] min = new double[dim];
for (int j = 0; j < dim; j++) {
min[j] = Double.POSITIVE_INFINITY;
}
for (int w = 0; w < dim; w++) {
for (int j = 0; j < dim; j++) {
if (costMatrix[w].get(j) < min[j]) {
min[j] = costMatrix[w].get(j);
}
}
}
for (int w = 0; w < dim; w++) {
for (int j = 0; j < dim; j++) {
costMatrix[w].set(j, costMatrix[w].get(j) - min[j]);
}
}
}
/**
* Update labels with the specified slack by adding the slack value for
* committed workers and by subtracting the slack value for committed jobs. In
* addition, update the minimum slack values appropriately.
*/
protected void updateLabeling(double slack) {
for (int w = 0; w < dim; w++) {
if (committedWorkers[w]) {
labelByWorker[w] += slack;
}
}
for (int j = 0; j < dim; j++) {
if (parentWorkerByCommittedJob[j] != -1) {
labelByJob[j] -= slack;
} else {
minSlackValueByJob[j] -= slack;
}
}
}
public int[] nextChild() {
int currentJobAssigned = matchJobByWorker[0];
// we want to make currentJobAssigned not allowed,meaning we set the size to Infinity
costMatrix[0].set(currentJobAssigned, Integer.MAX_VALUE);
matchWorkerByJob[currentJobAssigned] = -1;
matchJobByWorker[0] = -1;
minSlackValueByJob[currentJobAssigned] = Integer.MAX_VALUE;
initializePhase(0);
executePhase();
int[] result = Arrays.copyOf(matchJobByWorker, rows);
return result;
}
}