org.carrot2.matrix.MatrixUtils Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of carrot2-mini Show documentation
Show all versions of carrot2-mini Show documentation
Carrot2 search results clustering framework. Minimal functional subset
(core algorithms and infrastructure, no document sources).
/*
* Carrot2 project.
*
* Copyright (C) 2002-2016, Dawid Weiss, Stanisław Osiński.
* All rights reserved.
*
* Refer to the full license file "carrot2.LICENSE"
* in the root folder of the repository checkout or at:
* http://www.carrot2.org/carrot2.LICENSE
*/
package org.carrot2.matrix;
import java.util.Arrays;
import org.carrot2.mahout.math.function.DoubleFunction;
import org.carrot2.mahout.math.function.Functions;
import org.carrot2.mahout.math.function.IntIntDoubleFunction;
import org.carrot2.mahout.math.matrix.DoubleMatrix2D;
import com.carrotsearch.hppc.sorting.IndirectComparator;
import com.carrotsearch.hppc.sorting.IndirectSort;
/**
* A set of DoubleMatrix2D
shorthands and utility methods.
*/
public class MatrixUtils
{
/**
* Normalizes column vectors of matrix A
so that their L2 norm (Euclidean
* distance) is equal to 1.0.
*
* @param A matrix to normalize
* @param work a temporary array of A.columns()
doubles that will be
* overwritten with column's original L2 norms. Supply a non-null pointer
* to avoid continuous allocation/freeing of memory when doing calculations
* in a loop. If this parameter is null
, a new array will be
* allocated every time this method is called.
* @return A with length-normalized columns (for convenience only)
*/
public static DoubleMatrix2D normalizeColumnL2(DoubleMatrix2D A, double [] work)
{
work = prepareWork(A, work);
// Calculate the L2 norm for each column
for (int r = 0; r < A.rows(); r++)
{
for (int c = 0; c < A.columns(); c++)
{
work[c] += A.getQuick(r, c) * A.getQuick(r, c);
}
}
// Take the square root
for (int c = 0; c < A.columns(); c++)
{
work[c] = Math.sqrt(work[c]);
}
// Normalize
normalizeColumns(A, work);
return A;
}
/**
* Normalizes column vectors of a sparse matrix A
so that their L2 norm
* (Euclidean distance) is equal to 1.0.
*
* @param A matrix to normalize
* @param work a temporary array of A.columns()
doubles that will be
* overwritten with column's original L2 norms. Supply a non-null pointer
* to avoid continuous allocation/freeing of memory when doing calculations
* in a loop. If this parameter is null
, a new array will be
* allocated every time this method is called.
* @return A with length-normalized columns (for convenience only)
*/
public static DoubleMatrix2D normalizeSparseColumnL2(final DoubleMatrix2D A,
final double [] work)
{
final double [] w = prepareWork(A, work);
A.forEachNonZero(new IntIntDoubleFunction()
{
@Override
public double apply(int row, int column, double value)
{
w[column] += value * value;
return value;
}
});
// Take the square root
for (int c = 0; c < A.columns(); c++)
{
w[c] = Math.sqrt(w[c]);
}
// Normalize
A.forEachNonZero(new IntIntDoubleFunction()
{
@Override
public double apply(int row, int column, double value)
{
A.setQuick(row, column, value / w[column]);
return 0;
}
});
return A;
}
/**
* Normalizes column vectors of matrix A
so that their L1 norm is equal
* to 1.0.
*
* @param A matrix to normalize
* @param work a temporary array of A.columns()
doubles that will be
* overwritten with column's original L1 norms. Supply a non-null pointer
* to avoid continuous allocation/freeing of memory when doing calculations
* in a loop. If this parameter is null
, a new array will be
* allocated every time this method is called.
* @return A with L1-normalized columns (for convenience only)
*/
public static DoubleMatrix2D normalizeColumnL1(DoubleMatrix2D A, double [] work)
{
work = prepareWork(A, work);
// Calculate the L1 norm for each column
for (int r = 0; r < A.rows(); r++)
{
for (int c = 0; c < A.columns(); c++)
{
work[c] += A.getQuick(r, c);
}
}
// Normalize
normalizeColumns(A, work);
return A;
}
/**
* Prepares a temporary array for normalizing matrix columns.
*/
private static double [] prepareWork(DoubleMatrix2D A, double [] work)
{
// Colt's dense matrices are stored in a row-major format, so the
// processor's cache will be better used when the rows counter is in the
// outer loop. To do that we need a temporary double vector
if (work == null || work.length != A.columns())
{
work = new double [A.columns()];
}
else
{
Arrays.fill(work, 0);
}
return work;
}
/**
* A common routine for normalizing columns of a matrix.
*/
private static void normalizeColumns(DoubleMatrix2D A, double [] work)
{
for (int r = A.rows() - 1; r >= 0; r--)
{
for (int c = 0; c < A.columns(); c++)
{
if (work[c] != 0)
{
A.setQuick(r, c, A.getQuick(r, c) / work[c]);
}
}
}
}
/**
* Computes the orthogonality of matrix A. The orthogonality is computed as a sum of
* k*(k-1)/2 inner products of A's column vectors, k being the number of columns of A,
* and then normalized to the 0.0 - 1.0 range.
*
* @param A matrix to compute orthogonality for, must be column length-normalized
* @return orthogonality of matrix A. 0.0 denotes a perfect orthogonality between
* every pair of A's column. 1.0 indicates that all columns of A are parallel.
*/
public static double computeOrthogonality(DoubleMatrix2D A)
{
double orthogonality = 0;
// Compute pairwise inner products
DoubleMatrix2D cosines = A.zMult(A, null, 1, 0, true, false);
for (int r = 0; r < cosines.rows(); r++)
{
for (int c = r + 1; c < cosines.columns(); c++)
{
orthogonality += cosines.getQuick(r, c);
}
}
return orthogonality / ((cosines.rows() - 1) * cosines.rows() / 2.0);
}
/**
* Computers sparseness of matrix A
as a fraction of non-zero elements to
* the total number of elements.
*
* @return sparseness of A
, which is a value between 0.0 (all elements
* are zero) and 1.0 (all elements are non-zero)
*/
public static double computeSparseness(DoubleMatrix2D A)
{
int count = 0;
for (int r = 0; r < A.rows(); r++)
{
for (int c = 0; c < A.columns(); c++)
{
if (A.getQuick(r, c) != 0)
{
count++;
}
}
}
return count / (double) (A.rows() * A.columns());
}
/**
* Finds the first minimum element in each column of matrix A. When calculating
* minimum values for each column this version should perform better than scanning
* each column separately.
*
* @param indices an array of A.columns()
integers in which indices of
* the first minimum element will be stored. If this parameter is
* null
a new array will be allocated.
* @param minValues an array of A.columns()
doubles in which values of
* each column's minimum elements will be stored. If this parameter is
* null
a new array will be allocated.
* @return for each column of A the index of the minimum element
*/
public static int [] minInColumns(DoubleMatrix2D A, int [] indices,
double [] minValues)
{
return inColumns(A, indices, minValues, DoubleComparators.REVERSED_ORDER,
Functions.IDENTITY);
}
/**
* Finds the first maximum element in each column of matrix A. When calculating
* maximum values for each column this version should perform better than scanning
* each column separately.
*
* @param indices an array of A.columns()
integers in which indices of
* the first maximum element will be stored. If this parameter is
* null
a new array will be allocated.
* @param maxValues an array of A.columns()
doubles in which values of
* each column's maximum elements will be stored. If this parameter is
* null
a new array will be
* allocated.
* @return for each column of A the index of the maximum element
*/
public static int [] maxInColumns(DoubleMatrix2D A, int [] indices,
double [] maxValues)
{
return maxInColumns(A, indices, maxValues, Functions.IDENTITY);
}
public static int [] maxInColumns(DoubleMatrix2D A, int [] indices,
double [] maxValues, DoubleFunction transform)
{
return inColumns(A, indices, maxValues, DoubleComparators.NATURAL_ORDER,
transform);
}
/**
* Common implementation of finding extreme elements in columns.
*/
private static int [] inColumns(DoubleMatrix2D A, int [] indices, double [] extValues,
DoubleComparator doubleComparator, DoubleFunction transform)
{
if (indices == null)
{
indices = new int [A.columns()];
}
if (A.columns() == 0 || A.rows() == 0)
{
return indices;
}
if (extValues == null)
{
extValues = new double [A.columns()];
}
for (int c = 0; c < A.columns(); c++)
{
extValues[c] = transform.apply(A.getQuick(0, c));
}
Arrays.fill(indices, 0);
for (int r = 1; r < A.rows(); r++)
{
for (int c = 0; c < A.columns(); c++)
{
final double transformed = transform.apply(A.getQuick(r, c));
if (doubleComparator.compare(transformed, extValues[c]) > 0)
{
extValues[c] = transformed;
indices[c] = r;
}
}
}
return indices;
}
private static interface DoubleComparator
{
public int compare(double a, double b);
}
private static final class DoubleComparators
{
/**
* Compares int
in their natural order.
*/
public static final DoubleComparator NATURAL_ORDER = new NaturalOrderDoubleComparator();
/**
* Compares int
in their reversed order.
*/
public static final DoubleComparator REVERSED_ORDER = new ReversedOrderDoubleComparator();
/**
* Natural order.
*/
private static class NaturalOrderDoubleComparator implements DoubleComparator
{
public int compare(double v1, double v2)
{
return Double.compare(v1, v2);
}
}
/**
* Reversed order.
*/
private static class ReversedOrderDoubleComparator implements DoubleComparator
{
public int compare(double v1, double v2)
{
return -Double.compare(v1, v2);
}
}
/**
* No instantiation.
*/
private DoubleComparators()
{
}
}
/**
* Finds the index of the first maximum element in given row of A
.
*
* @param A the matrix to search
* @param row the row to search
* @return index of the first maximum element or -1 if the input matrix is
* null
or has zero size.
*/
public static int maxInRow(DoubleMatrix2D A, int row)
{
int index = 0;
double max = A.getQuick(row, index);
for (int c = 1; c < A.columns(); c++)
{
if (max < A.getQuick(row, c))
{
max = A.getQuick(row, c);
index = c;
}
}
return index;
}
/**
* Calculates the sum of rows of matrix A
.
*
* @param sums an array to store the results. If the array is null
or
* does not match the number of rows in matrix A
, a new array
* will be created.
* @return sums of rows of A
*/
public static double [] sumRows(DoubleMatrix2D A, double [] sums)
{
if (sums == null || A.rows() != sums.length)
{
sums = new double [A.rows()];
}
else
{
Arrays.fill(sums, 0);
}
for (int r = 0; r < A.rows(); r++)
{
for (int c = 0; c < A.columns(); c++)
{
sums[r] += A.getQuick(r, c);
}
}
return sums;
}
/**
* Calculates the Frobenius norm of a matrix.
*
* @see Frobenius
* norm
*/
public static double frobeniusNorm(DoubleMatrix2D matrix)
{
return Math.sqrt(matrix.aggregate(Functions.PLUS, Functions.SQUARE));
}
/**
* Returns view of the provided matrix with rows permuted according to the order
* defined by the provided comparator.
*
* @param matrix to permute
* @param comparator to use
* @return view of the provided matrix with rows permuted according to the order
* defined by the provided comparator.
*/
public static DoubleMatrix2D sortedRowsView(DoubleMatrix2D matrix,
IndirectComparator comparator)
{
return matrix
.viewSelection(IndirectSort.mergesort(0, matrix.rows(), comparator), null);
}
}