All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.math.matrix.FloatSparseMatrix Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2010-2020 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with Smile.  If not, see .
 ******************************************************************************/

package smile.math.matrix;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

import smile.math.MathEx;
import smile.math.blas.Transpose;
import smile.util.Strings;

import static java.util.Spliterator.*;

/**
 * A sparse matrix is a matrix populated primarily with zeros. Conceptually,
 * sparsity corresponds to systems which are loosely coupled. Huge sparse
 * matrices often appear when solving partial differential equations.
 * 

* Operations using standard dense matrix structures and algorithms are slow * and consume large amounts of memory when applied to large sparse matrices. * Indeed, some very large sparse matrices are infeasible to manipulate with * the standard dense algorithms. Sparse data is by nature easily compressed, * and this compression almost always results in significantly less computer * data storage usage. *

* This class employs Harwell-Boeing column-compressed sparse matrix format. * Nonzero values are stored in an array (top-to-bottom, then * left-to-right-bottom). The row indices corresponding to the values are * also stored. Besides, a list of pointers are indexes where each column * starts. This format is efficient for arithmetic operations, column slicing, * and matrix-vector products. One typically uses SparseDataset for * construction of SparseMatrix. *

* For iteration through the elements of a matrix, this class provides * a functional API to iterate through the non-zero elements. This iteration * can be done by passing a lambda to be called on each non-zero element or * by processing a stream of objects representing each non-zero element. * The direct functional API is faster (and is about as fast as writing the * low-level loops against the internals of the matrix itself) while the * streaming interface is more flexible. * * @author Haifeng Li */ public class FloatSparseMatrix extends SMatrix implements Iterable { private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(SparseMatrix.class); private static final long serialVersionUID = 2L; /** * The number of rows. */ private final int m; /** * The number of columns. */ private final int n; /** * The index of the start of columns. */ private final int[] colIndex; /** * The row indices of nonzero values. */ private final int[] rowIndex; /** * The array of nonzero values stored column by column. */ private final float[] nonzeros; /** * Encapsulates an entry in a matrix for use in streaming. As typical stream object, * this object is immutable. But we can update the corresponding value in the matrix * through update method. This provides an efficient way to update the * non-zero entries of a sparse matrix. */ public class Entry { // these fields are exposed for direct access to simplify in-lining by the JVM /** The row index. */ public final int i; /** The column index. */ public final int j; /** The value. */ public final float x; /** The index to the matrix storage. */ public final int index; /** * Private constructor. Only the enclosure matrix can creates * the instances of entry. */ private Entry(int i, int j, int index) { this.i = i; this.j = j; this.x = nonzeros[index]; this.index = index; } /** * Update the value of entry in the matrix. Note that the field value * is final and thus not updated. */ public void update(float value) { nonzeros[index] = value; } @Override public String toString() { return String.format("(%d, %d):%s", i, j, Strings.format(x)); } } /** * Constructor. * @param m the number of rows in the matrix. * @param n the number of columns in the matrix. * @param nvals the number of nonzero entries in the matrix. */ private FloatSparseMatrix(int m, int n, int nvals) { this.m = m; this.n = n; rowIndex = new int[nvals]; colIndex = new int[n + 1]; nonzeros = new float[nvals]; } /** * Constructor. * @param m the number of rows in the matrix. * @param n the number of columns in the matrix. * @param rowIndex the row indices of nonzero values. * @param colIndex the index of the start of columns. * @param nonzeros the array of nonzero values stored column by column. */ public FloatSparseMatrix(int m, int n, float[] nonzeros, int[] rowIndex, int[] colIndex) { this.m = m; this.n = n; this.rowIndex = rowIndex; this.colIndex = colIndex; this.nonzeros = nonzeros; } /** * Constructor. * @param A a dense matrix to converted into sparse matrix format. */ public FloatSparseMatrix(float[][] A) { this(A, 100 * MathEx.FLOAT_EPSILON); } /** * Constructor. * @param A a dense matrix to converted into sparse matrix format. * @param tol the tolerance to regard a value as zero if |x| < tol. */ public FloatSparseMatrix(float[][] A, float tol) { m = A.length; n = A[0].length; int nvals = 0; // number of non-zero elements for (int i = 0; i < m; i++) { for (int j = 0; j < n; j++) { if (Math.abs(A[i][j]) >= tol) { nvals++; } } } nonzeros = new float[nvals]; rowIndex = new int[nvals]; colIndex = new int[n + 1]; colIndex[n] = nvals; int k = 0; for (int j = 0; j < n; j++) { colIndex[j] = k; for (int i = 0; i < m; i++) { if (Math.abs(A[i][j]) >= tol) { rowIndex[k] = i; nonzeros[k] = A[i][j]; k++; } } } } @Override public FloatSparseMatrix clone() { return new FloatSparseMatrix(m, n, nonzeros.clone(), rowIndex.clone(), colIndex.clone()); } @Override public int nrows() { return m; } @Override public int ncols() { return n; } @Override public long size() { return colIndex[n]; } /** * Provides a stream over all of the non-zero elements of a sparse matrix. */ public Stream nonzeros() { Spliterator spliterator = Spliterators.spliterator(iterator(), size(), ORDERED | SIZED | IMMUTABLE); return StreamSupport.stream(spliterator, false); } /** * Provides a stream over all of the non-zero elements of range of columns of a sparse matrix. * * @param beginColumn The beginning column, inclusive. * @param endColumn The end column, exclusive. */ public Stream nonzeros(int beginColumn, int endColumn) { Spliterator spliterator = Spliterators.spliterator(iterator(beginColumn, endColumn), colIndex[endColumn] - colIndex[beginColumn], ORDERED | SIZED | IMMUTABLE); return StreamSupport.stream(spliterator, false); } /** * Returns an iterator of nonzero entries. * @return an iterator of nonzero entries */ @Override public Iterator iterator() { return iterator(0, n); } /** * Returns an iterator of nonzero entries. * @param beginColumn The beginning column, inclusive. * @param endColumn The end column, exclusive. * @return an iterator of nonzero entries */ public Iterator iterator(int beginColumn, int endColumn) { if (beginColumn < 0 || beginColumn >= n) { throw new IllegalArgumentException("Invalid begin column: " + beginColumn); } if (endColumn <= beginColumn || endColumn > n) { throw new IllegalArgumentException("Invalid end column: " + endColumn); } return new Iterator() { int k = colIndex[beginColumn]; // entry index int j = beginColumn; // column @Override public boolean hasNext() { return k < colIndex[endColumn]; } @Override public Entry next() { int i = rowIndex[k]; while (k >= colIndex[j + 1]) j++; return new Entry(i, j, k++); } }; } /** * For each loop on non-zero elements. This will be a bit faster than iterator or stream * by avoiding boxing. But it will be considerably less general. *

* Note that the consumer could be called on values that are either effectively or actually * zero. The only guarantee is that no values that are known to be zero based on the * structure of the matrix will be processed. * * @param consumer The matrix element consumer. */ public void forEachNonZero(DoubleConsumer consumer) { for (int j = 0; j < n; j++) { for (int k = colIndex[j]; k < colIndex[j + 1]; k++) { int i = rowIndex[k]; consumer.accept(i, j, nonzeros[k]); } } } /** * For each loop on non-zero elements. This will be a bit faster than iterator or stream * by avoiding boxing. But it will be considerably less general. *

* Note that the consumer could be called on values that are either effectively or actually * zero. The only guarantee is that no values that are known to be zero based on the * structure of the matrix will be processed. * * @param beginColumn The beginning column, inclusive. * @param endColumn The end column, exclusive. * @param consumer The matrix element consumer. */ public void forEachNonZero(int beginColumn, int endColumn, FloatConsumer consumer) { if (beginColumn < 0 || beginColumn >= n) { throw new IllegalArgumentException("Invalid begin column: " + beginColumn); } if (endColumn <= beginColumn || endColumn > n) { throw new IllegalArgumentException("Invalid end column: " + endColumn); } for (int j = beginColumn; j < endColumn; j++) { for (int k = colIndex[j]; k < colIndex[j + 1]; k++) { int i = rowIndex[k]; consumer.accept(i, j, nonzeros[k]); } } } /** Returns the element at the storage index. */ public float get(int index) { return nonzeros[index]; } /** Sets the element at the storage index. */ public float set(int index, float value) { return nonzeros[index] = value; } @Override public float get(int i, int j) { if (i < 0 || i >= m || j < 0 || j >= n) { throw new IllegalArgumentException("Invalid index: row = " + i + " col = " + j); } for (int k = colIndex[j]; k < colIndex[j + 1]; k++) { if (rowIndex[k] == i) { return nonzeros[k]; } } return 0.0f; } @Override public FloatSparseMatrix set(int i, int j, float x) { throw new UnsupportedOperationException(); } @Override public void mv(Transpose trans, float alpha, float[] x, float beta, float[] y) { int k = trans == Transpose.NO_TRANSPOSE ? m : n; float[] ax = y; if (beta == 0.0f) { Arrays.fill(y, 0.0f); } else { ax = new float[k]; } if (trans == Transpose.NO_TRANSPOSE) { for (int j = 0; j < n; j++) { for (int i = colIndex[j]; i < colIndex[j + 1]; i++) { ax[rowIndex[i]] += nonzeros[i] * x[j]; } } } else { for (int i = 0; i < n; i++) { for (int j = colIndex[i]; j < colIndex[i + 1]; j++) { ax[i] += nonzeros[j] * x[rowIndex[j]]; } } } if (beta != 0.0 || alpha != 1.0) { for (int i = 0; i < k; i++) { y[i] = alpha * ax[i] + beta * y[i]; } } } @Override public void mv(float[] work, int inputOffset, int outputOffset) { Arrays.fill(work, outputOffset, outputOffset + m, 0.0f); for (int j = 0; j < n; j++) { for (int i = colIndex[j]; i < colIndex[j + 1]; i++) { work[outputOffset + rowIndex[i]] += nonzeros[i] * work[inputOffset + j]; } } } @Override public void tv(float[] work, int inputOffset, int outputOffset) { Arrays.fill(work, outputOffset, outputOffset + n, 0.0f); for (int i = 0; i < n; i++) { for (int j = colIndex[i]; j < colIndex[i + 1]; j++) { work[outputOffset + i] += nonzeros[j] * work[inputOffset + rowIndex[j]]; } } } /** * Returns the transpose of matrix. */ public FloatSparseMatrix transpose() { FloatSparseMatrix trans = new FloatSparseMatrix(n, m, nonzeros.length); int[] count = new int[m]; for (int i = 0; i < n; i++) { for (int j = colIndex[i]; j < colIndex[i + 1]; j++) { int k = rowIndex[j]; count[k]++; } } for (int j = 0; j < m; j++) { trans.colIndex[j + 1] = trans.colIndex[j] + count[j]; } Arrays.fill(count, 0); for (int i = 0; i < n; i++) { for (int j = colIndex[i]; j < colIndex[i + 1]; j++) { int k = rowIndex[j]; int index = trans.colIndex[k] + count[k]; trans.rowIndex[index] = i; trans.nonzeros[index] = nonzeros[j]; count[k]++; } } return trans; } /** * Returns the matrix multiplication C = A * B. */ public FloatSparseMatrix mm(FloatSparseMatrix B) { if (n != B.m) { throw new IllegalArgumentException(String.format("Matrix dimensions do not match for matrix multiplication: %d x %d vs %d x %d", nrows(), ncols(), B.nrows(), B.ncols())); } int n = B.n; int anz = colIndex[n]; int[] Bp = B.colIndex; int[] Bi = B.rowIndex; float[] Bx = B.nonzeros; int bnz = Bp[n]; int[] w = new int[m]; float[] abj = new float[m]; int nzmax = Math.max(anz + bnz, m); FloatSparseMatrix C = new FloatSparseMatrix(m, n, nzmax); int[] Cp = C.colIndex; int[] Ci = C.rowIndex; float[] Cx = C.nonzeros; int nz = 0; for (int j = 0; j < n; j++) { if (nz + m > nzmax) { nzmax = 2 * nzmax + m; float[] Cx2 = new float[nzmax]; int[] Ci2 = new int[nzmax]; System.arraycopy(Ci, 0, Ci2, 0, nz); System.arraycopy(Cx, 0, Cx2, 0, nz); Ci = Ci2; Cx = Cx2; C = new FloatSparseMatrix(m, n, Cx2, Ci2, Cp); } // column j of C starts here Cp[j] = nz; for (int p = Bp[j]; p < Bp[j + 1]; p++) { nz = scatter(this, Bi[p], Bx[p], w, abj, j + 1, C, nz); } for (int p = Cp[j]; p < nz; p++) { Cx[p] = abj[Ci[p]]; } } // finalize the last column of C Cp[n] = nz; return C; } /** * x = x + beta * A(:,j), where x is a dense vector and A(:,j) is sparse. */ private static int scatter(FloatSparseMatrix A, int j, float beta, int[] w, float[] x, int mark, FloatSparseMatrix C, int nz) { int[] Ap = A.colIndex; int[] Ai = A.rowIndex; float[] Ax = A.nonzeros; int[] Ci = C.rowIndex; for (int p = Ap[j]; p < Ap[j + 1]; p++) { int i = Ai[p]; // A(i,j) is nonzero if (w[i] < mark) { w[i] = mark; // i is new entry in column j Ci[nz++] = i; // add i to pattern of C(:,j) x[i] = beta * Ax[p]; // x(i) = beta*A(i,j) } else { x[i] += beta * Ax[p]; // i exists in C(:,j) already } } return nz; } /** * Returns A' * A */ public FloatSparseMatrix ata() { FloatSparseMatrix AT = transpose(); return AT.aat(this); } /** * Returns A * A' */ public FloatSparseMatrix aat() { FloatSparseMatrix AT = transpose(); return aat(AT); } /** Returns A * A' */ private FloatSparseMatrix aat(FloatSparseMatrix AT) { int[] done = new int[m]; for (int i = 0; i < m; i++) { done[i] = -1; } // First pass determines the number of nonzeros. int nvals = 0; // Outer loop over columns of A' in AA' for (int j = 0; j < m; j++) { for (int i = AT.colIndex[j]; i < AT.colIndex[j + 1]; i++) { int k = AT.rowIndex[i]; for (int l = colIndex[k]; l < colIndex[k + 1]; l++) { int h = rowIndex[l]; // Test if contribution already included. if (done[h] != j) { done[h] = j; nvals++; } } } } FloatSparseMatrix aat = new FloatSparseMatrix(m, m, nvals); nvals = 0; for (int i = 0; i < m; i++) { done[i] = -1; } // Second pass determines columns of aat. Code is identical to first // pass except colIndex and rowIndex get assigned at appropriate places. for (int j = 0; j < m; j++) { aat.colIndex[j] = nvals; for (int i = AT.colIndex[j]; i < AT.colIndex[j + 1]; i++) { int k = AT.rowIndex[i]; for (int l = colIndex[k]; l < colIndex[k + 1]; l++) { int h = rowIndex[l]; if (done[h] != j) { done[h] = j; aat.rowIndex[nvals] = h; nvals++; } } } } // Set last value. aat.colIndex[m] = nvals; // Sort columns. for (int j = 0; j < m; j++) { if (aat.colIndex[j + 1] - aat.colIndex[j] > 1) { Arrays.sort(aat.rowIndex, aat.colIndex[j], aat.colIndex[j + 1]); } } float[] temp = new float[m]; for (int i = 0; i < m; i++) { for (int j = AT.colIndex[i]; j < AT.colIndex[i + 1]; j++) { int k = AT.rowIndex[j]; for (int l = colIndex[k]; l < colIndex[k + 1]; l++) { int h = rowIndex[l]; temp[h] += AT.nonzeros[j] * nonzeros[l]; } } for (int j = aat.colIndex[i]; j < aat.colIndex[i + 1]; j++) { int k = aat.rowIndex[j]; aat.nonzeros[j] = temp[k]; temp[k] = 0.0f; } } return aat; } @Override public float[] diag() { int n = Math.min(nrows(), ncols()); float[] d = new float[n]; for (int i = 0; i < n; i++) { for (int j = colIndex[i]; j < colIndex[i + 1]; j++) { if (rowIndex[j] == i) { d[i] = nonzeros[j]; break; } } } return d; } /** * Reads a sparse matrix from a Harwell-Boeing Exchange Format file. * For details, see * http://people.sc.fsu.edu/~jburkardt/data/hb/hb.html. * * Note that our implementation supports only real-valued matrix and we * ignore the optional supplementary data (e.g. right hand side vectors). * * @param path the input file path. */ public static FloatSparseMatrix harwell(Path path) throws IOException { logger.info("Reads sparse matrix file '{}'", path.toAbsolutePath()); try (InputStream stream = Files.newInputStream(path); Scanner scanner = new Scanner(stream)) { // Ignore the title line. String line = scanner.nextLine(); logger.info(line); line = scanner.nextLine().trim(); logger.info(line); String[] tokens = line.split("\\s+"); int RHSCRD = Integer.parseInt(tokens[4]); line = scanner.nextLine().trim(); logger.info(line); if (!line.startsWith("R")) { throw new UnsupportedOperationException("SparseMatrix supports only real-valued matrix."); } tokens = line.split("\\s+"); int nrows = Integer.parseInt(tokens[1]); int ncols = Integer.parseInt(tokens[2]); int nz = Integer.parseInt(tokens[3]); line = scanner.nextLine(); logger.info(line); if (RHSCRD > 0) { line = scanner.nextLine(); logger.info(line); } int[] colIndex = new int[ncols + 1]; int[] rowIndex = new int[nz]; float[] data = new float[nz]; for (int i = 0; i <= ncols; i++) { colIndex[i] = scanner.nextInt() - 1; } for (int i = 0; i < nz; i++) { rowIndex[i] = scanner.nextInt() - 1; } for (int i = 0; i < nz; i++) { data[i] = scanner.nextFloat(); } return new FloatSparseMatrix(nrows, ncols, data, rowIndex, colIndex); } } /** * Reads a sparse matrix from a Rutherford-Boeing Exchange Format file. * The Rutherford Boeing format is an updated more flexible version of the * Harwell Boeing format. * For details, see * http://people.sc.fsu.edu/~jburkardt/data/rb/rb.html. * Especially, the supplementary data in the form of right-hand sides, * estimates or solutions are treated as separate files. * * Note that our implementation supports only real-valued matrix and we ignore * the optional supplementary data (e.g. right hand side vectors). * * @param path the input file path. */ public static FloatSparseMatrix rutherford(Path path) throws IOException { // As we ignore the supplementary data, the parsing process // is same as Harwell. return harwell(path); } /** * Reads a sparse matrix from a text file. * The first line contains three integers, which are the number of rows, * the number of columns, and the number of nonzero entries in the matrix. *

* Following the first line, there are m + 1 integers that are the indices of * columns, where m is the number of columns. Then there are n integers that * are the row indices of nonzero entries, where n is the number of nonzero * entries. Finally, there are n float numbers that are the values of nonzero * entries. * * @param path the input file path. */ public static FloatSparseMatrix text(Path path) throws IOException { try (InputStream stream = Files.newInputStream(path); Scanner scanner = new Scanner(stream)) { int nrows = scanner.nextInt(); int ncols = scanner.nextInt(); int nz = scanner.nextInt(); int[] colIndex = new int[ncols + 1]; int[] rowIndex = new int[nz]; float[] data = new float[nz]; for (int i = 0; i <= ncols; i++) { colIndex[i] = scanner.nextInt() - 1; } for (int i = 0; i < nz; i++) { rowIndex[i] = scanner.nextInt() - 1; } for (int i = 0; i < nz; i++) { data[i] = scanner.nextFloat(); } return new FloatSparseMatrix(nrows, ncols, data, rowIndex, colIndex); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy