All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.ucla.sspace.matrix.AtomicGrowingSparseHashMatrix Maven / Gradle / Ivy

Go to download

The S-Space Package is a collection of algorithms for building Semantic Spaces as well as a highly-scalable library for designing new distributional semantics algorithms. Distributional algorithms process text corpora and represent the semantic for words as high dimensional feature vectors. This package also includes matrices, vectors, and numerous clustering algorithms. These approaches are known by many names, such as word spaces, semantic spaces, or distributed semantics and rest upon the Distributional Hypothesis: words that appear in similar contexts have similar meanings.

The newest version!
/*
 * Copyright 2010 David Jurgens 
 *
 * This file is part of the S-Space package and is covered under the terms and
 * conditions therein.
 *
 * The S-Space package is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as published
 * by the Free Software Foundation and distributed hereunder to you.
 *
 * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
 * EXPRESS OR IMPLIED ARE MADE.  BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
 * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
 * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
 * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
 * RIGHTS.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program. If not, see .
 */

package edu.ucla.sspace.matrix;

import edu.ucla.sspace.vector.AtomicSparseVector;
import edu.ucla.sspace.vector.CompactSparseVector;
import edu.ucla.sspace.vector.DoubleVector;
import edu.ucla.sspace.vector.SparseDoubleVector;
import edu.ucla.sspace.vector.SparseHashDoubleVector;
import edu.ucla.sspace.vector.SparseVector;
import edu.ucla.sspace.vector.Vectors;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

import java.util.concurrent.atomic.AtomicInteger;


/**
 * A concurrent, thread-safe, growable {@code SparseMatrix} class that is
 * optimized for operations only access one value of the matrix at a time.  This
 * class allows multiple threads to operate on the same matrix where all methods
 * are concurrent to the fullest extent possible.

* * This class offers different performance trade-offs than the {@link * AtomicGrowingSparseMatrix}. Specifically, this class supports concurrent * write access to the same row. Atomicity is localized to the specific matrix * entry, which ensures high concurrency for workloads consisting primarily of * {@code get}, {@code addAndGet}, {@code getAndAdd} and {@code set}. This * performance comes at a cost for the full row- or column-related operations. * Each time these operations are used after a modification to the matrix, a * full {@code O(m * n)} computation must be done to determine the structure of * the matrix. An subsequent calls until the next modification will not incur * this penalty and will operate in {@code O(k)} where {@code k} is the number * of non-zero entries in the row or column. It is therefore highly recommended * that vector method be used only during periods when the matrix is not likely * to be modified.

* * This class also provides support for access the matrix data without ensuring * data coherency (i.e. non-atomic) through the {@link #getRowVectorUnsafe(int)} * and {@link #getColumnVectorUnsafe(int)} methods. These methods are designed * to be used only in circumstances where the matrix is guaranteed to only be * accessed by one thread. In such circumstances, the overhead of locking row * or column data can be avoided with no side-effects. Use these methods with * great caution.

* * @author David Jurgens * * @see AtomicGrowingSparseMatrix * @see Matrices#synchronizedMatrix(Matrix) */ public class AtomicGrowingSparseHashMatrix implements AtomicMatrix, SparseMatrix { /** * The matrix entries that are currently being write-locked by some thread. * The value of this map is not meaningful. */ private final ConcurrentMap lockedEntries; /** * A mapping from row, column to the value at that entry in the matrix. */ private final ConcurrentMap matrixEntries; /** * The number of rows represented in this {@code AtomicGrowingSparseMatrix}. */ private final AtomicInteger rows; /** * The number of columns represented in this {@code * AtomicGrowingSparseMatrix}. */ private final AtomicInteger cols; /** * A counter of the number of modications to this matrix. This value is * compared with the {@link #lastVectorCacheUpdate} to determine whether the * vector cache is out of date. */ private final AtomicInteger modifications; /** * The value of {@link #modifications} when {@link #updateVectorCache()} was * last called. */ private final AtomicInteger lastVectorCacheUpdate; /** * A mapping from row to the columns that contain non-zero values. This * mapping is only valid when the vector-cache is valid. * * @see #updateVectorCache() */ private int[][] rowToColsCache; /** * A mapping from column to the rows that contain non-zero values. This * mapping is only valid when the vector-cache is valid. * * @see #updateVectorCache() */ private int[][] colToRowsCache; /** * Create an {@code AtomicGrowingSparseMatrix} with 0 rows and 0 columns. */ public AtomicGrowingSparseHashMatrix() { this.rows = new AtomicInteger(0); this.cols = new AtomicInteger(0); modifications = new AtomicInteger(0); lastVectorCacheUpdate = new AtomicInteger(0); rowToColsCache = null; colToRowsCache = null; // Base the concurrency of the maps on the number of avaible processors, // which assumes that all use cases use this value as a hint for // concurrency int threads = Runtime.getRuntime().availableProcessors(); lockedEntries = new ConcurrentHashMap( 1000, .75f, threads * 16); matrixEntries = new ConcurrentHashMap( 10000, 4f, threads * 16); } /** * {@inheritDoc} */ public double add(int row, int col, double delta) { checkIndices(row, col, true); Entry e = new Entry(row, col); // Spin waiting for the entry to be unlocked while (lockedEntries.putIfAbsent(e, new Object()) != null) ; Double val = matrixEntries.get(e); double newVal = (val == null) ? delta : delta + val; if (newVal != 0) { matrixEntries.put(e, newVal); // Only invalidate the cache if the number of rows or columns // containing data has changed if (val == null) modifications.incrementAndGet(); } else { matrixEntries.remove(e); modifications.incrementAndGet(); } lockedEntries.remove(e); return (val == null) ? 0 : val; } /** * {@inheritDoc} */ public double addAndGet(int row, int col, double delta) { checkIndices(row, col, true); Entry e = new Entry(row, col); // Spin waiting for the entry to be unlocked while (lockedEntries.putIfAbsent(e, new Object()) != null) ; Double val = matrixEntries.get(e); double newVal = (val == null) ? delta : delta + val; if (newVal != 0) { matrixEntries.put(e, newVal); // Only invalidate the cache if the number of rows or columns // containing data has changed if (val == null) modifications.incrementAndGet(); } else { matrixEntries.remove(e); modifications.incrementAndGet(); } lockedEntries.remove(e); return newVal; } /** * Verify that the given row and column value is non-negative, and * optionally expand the size of the matrix if the row or column are outside * the current bounds. * * @param row the row index to check. * @param the the column index to check. * @param expand {@code true} if the current dimensions of the matrix should * be updated if either parameter exceeds the current values */ private void checkIndices(int row, int col, boolean expand) { if (row < 0 || col < 0) { throw new ArrayIndexOutOfBoundsException(); } if (expand) { int r = row + 1; int cur = 0; while (r > (cur = rows.get()) && !rows.compareAndSet(cur, r)) ; int c = col + 1; cur = 0; while (c > (cur = cols.get()) && !cols.compareAndSet(cur, c)) ; } } /** * {@inheritDoc} */ public int columns() { return cols.get(); } /** * {@inheritDoc} */ public double get(int row, int col) { checkIndices(row, col, false); Double val = matrixEntries.get(new Entry(row, col)); return (val == null) ? 0 : val; } /** * {@inheritDoc} The length of the returned column reflects the size of * matrix at the time of the call, which may be different from earlier calls * to {@link #rows()} */ public double[] getColumn(int column) { return getColumnVector(column).toArray(); } /** * {@inheritDoc} The length of the returned row vector reflects the size of * matrix at the time of the call, which may be different from earlier calls * to {@link #rows()} */ public SparseDoubleVector getColumnVector(int column) { return getColumnVector(column, true); } /** * Provides non-atomic access to the data at the specified column, which may * present an inconsistent view of the data if this matrix is being * concurrently modified. This method should only be used in special cases * where the vector is being accessed at a time when the matrix (or this * particular column) will not be modified. */ public SparseDoubleVector getColumnVectorUnsafe(int column) { return getColumnVector(column, false); } /** * Returns the column vector, locking the data if {@code shouldLock} is * {@code true}. */ private SparseDoubleVector getColumnVector(int column, boolean shouldLock) { int r = rows.get(); if (shouldLock) lockColumn(column, r); // Ensure that the column data is up to date while (lastVectorCacheUpdate.get() != modifications.get()) updateVectorCache(); int[] rowArr = colToRowsCache[column]; SparseDoubleVector colVec = new SparseHashDoubleVector(r); for (int row : rowArr) colVec.set(row, matrixEntries.get(new Entry(row, column))); if (shouldLock) unlockColumn(column, r); return colVec; } /** * {@inheritDoc} The length of the returned row reflects the size of matrix * at the time of the call, which may be different from earlier calls to * {@link #columns()}. */ public double[] getRow(int row) { return getRowVector(row).toArray(); } /** * {@inheritDoc} The length of the returned row vector reflects the size of * matrix at the time of the call, which may be different from earlier calls * to {@link #columns()}. */ public SparseDoubleVector getRowVector(int row) { return getRowVector(row, true); } /** * Provides non-atomic access to the data at the specified row, which may * present an inconsistent view of the data if this matrix is being * concurrently modified. This method should only be used in special cases * where the vector is being accessed at a time when the matrix (or this * particular row) will not be modified. */ public SparseDoubleVector getRowVectorUnsafe(int row) { return getRowVector(row, false); } /** * Returns the row vector, locking the data if {@code shouldLock} is {@code * true}. */ private SparseDoubleVector getRowVector(int row, boolean shouldLock) { int c = cols.get(); if (shouldLock) lockRow(row, c); // Ensure that the column data is up to date while (lastVectorCacheUpdate.get() != modifications.get()) updateVectorCache(); int[] colArr = rowToColsCache[row]; SparseDoubleVector rowVec = new SparseHashDoubleVector(c); for (int column : colArr) rowVec.set(column, matrixEntries.get(new Entry(row, column))); if (shouldLock) unlockRow(row, c); return rowVec; } /** * Locks all the column entries for this row, thereby preventing write or * read access to the values. Note that the number of columns to lock * must be the same value used with {@link #unlockRow(int,int)}, * otherwise the unlock may potentially unlock matrix entries associated * with this lock call. * * @param row the row to lock * @param colsToLock the number of rows to lock. This value should be the * number of {@link #cols} at the time of the call. */ private void lockRow(int row, int colsToLock) { // Put in an entry for all the row's columns for (int col = 0; col < colsToLock; ++col) { Entry e = new Entry(row, col); // Spin waiting for the entry to be unlocked while (lockedEntries.putIfAbsent(e, new Object()) != null) ; } } /** * Locks all the row entries for this column, thereby preventing write or * read access to the values. Note that the number of rows to lock * must be the same value used with {@link #unlockColumn(int,int)}, * otherwise the unlock may potentially unlock matrix entries associated * with this lock call. * * @param col the column to lock * @param rowsToLock the number of rows to lock. This value should be the * number of {@link #rows} at the time of the call. */ private void lockColumn(int col, int rowsToLock) { // Put in an entry for all the columns's rows for (int row = 0; row < rowsToLock; ++row) { Entry e = new Entry(row, col); // Spin waiting for the entry to be unlocked while (lockedEntries.putIfAbsent(e, new Object()) != null) ; } } /** * {@inheritDoc} */ public int rows() { return rows.get(); } /** * {@inheritDoc} */ public void set(int row, int col, double val) { checkIndices(row, col, true); Entry e = new Entry(row, col); // Spin waiting for the entry to be unlocked while (lockedEntries.putIfAbsent(e, new Object()) != null) ; boolean present = matrixEntries.containsKey(e); if (val != 0) { matrixEntries.put(e, val); // Only invalidate the cache if the number of rows or columns // containing data has changed if (!present) modifications.incrementAndGet(); } else if (present) { matrixEntries.remove(e); modifications.incrementAndGet(); } lockedEntries.remove(e); } /** * {@inheritDoc} */ public void setColumn(int column, double[] values) { setColumn(column, Vectors.asVector(values)); } /** * {@inheritDoc} */ public void setColumn(int column, DoubleVector rowValues) { checkIndices(rowValues.length(), column, true); int r = rows.get(); lockColumn(column, r); boolean modified = false; for (int row = 0; row < r; ++row) { double val = rowValues.get(row); Entry e = new Entry(row, column); boolean present = matrixEntries.containsKey(e); if (val != 0) { matrixEntries.put(e, val); // Only invalidate the cache if the number of rows or columns // containing data has changed modified = modified || !present; } else if (present) { matrixEntries.remove(e); modified = true; } } if (modified) modifications.incrementAndGet(); unlockColumn(column, r); } /** * {@inheritDoc} */ public void setRow(int row, double[] columns) { setRow(row, Vectors.asVector(columns)); } /** * {@inheritDoc} */ public void setRow(int row, DoubleVector colValues) { checkIndices(row, colValues.length(), true); int c = cols.get(); lockRow(row, c); boolean modified = false; for (int col = 0; col < c; ++col) { double val = colValues.get(col); Entry e = new Entry(row, col); boolean present = matrixEntries.containsKey(e); if (val != 0) { matrixEntries.put(e, val); // Only invalidate the cache if the number of rows or columns // containing data has changed modified = modified || !present; } else if (present) { matrixEntries.remove(e); modified = true; } } if (modified) modifications.incrementAndGet(); unlockRow(row, c); } /** * {@inheritDoc} */ public double[][] toDenseArray() { int r = rows.get(); int c = cols.get(); for (int i = 0; i < r; ++i) lockRow(i, c); double[][] m = new double[r][0]; for (int i = 0; i < r; ++i) { DoubleVector row = getRowVector(i); // Ensure that we see a consistent length for all the rows if (row.length() != c) row = Vectors.subview(row, 0, c); m[i] = row.toArray(); } for (int i = 0; i < r; ++i) unlockRow(i, c); return m; } /** * Unlocks the column for write access based on the number of rows that were * initially locked * * @param col the column to unlock * @param rowsToUnlock the number of rows locked by the previous {@link * #lockColumn(int, int) call} */ private void unlockColumn(int col, int rowsToUnlock) { // Remove the locks on all of the columns's columns for (int row = 0; row < rowsToUnlock; ++row) lockedEntries.remove(new Entry(row, col)); } /** * Unlocks the row for write access based on the number of columns that were * initially locked * * @param row the row to unlock * @param colsToUnlock the number of cols locked by the previous {@link * #lockRow(int, int) call} */ private void unlockRow(int row, int colsToUnlock) { // Remove the locks on all of the row's columns for (int col = 0; col < colsToUnlock; ++col) lockedEntries.remove(new Entry(row, col)); } /** * Updates the {@link Vector} cache data so that any call to {@link * #getRowVector(int)} or {@link #getColumnVector(int)} has an accurate * mapping of that row or column's non-zero values. Should the matrix be * modifed during this call, the cache will be repeatedly computed until it * is up-to-date. */ private synchronized void updateVectorCache() { // NOTE: this method is synchronized to prevent having mulitple threads // potentiall recomputing the cache at the same time. while (lastVectorCacheUpdate.get() != modifications.get()) { lastVectorCacheUpdate.set(modifications.get()); Map> rowVals = new HashMap>(); Map> colVals = new HashMap>(); for (Entry e : matrixEntries.keySet()) { int row = e.row; int col = e.col; List c = rowVals.get(row); if (c == null) { c = new ArrayList(); rowVals.put(row, c); } c.add(col); List r = colVals.get(col); if (r == null) { r = new ArrayList(); colVals.put(col, r); } r.add(row); } // Convert the object version to integer lists to save space and // improve access rowToColsCache = new int[rows.get()][0]; for (Map.Entry> e : rowVals.entrySet()) { int row = e.getKey(); List cols = e.getValue(); int[] colArr = new int[cols.size()]; for (int i = 0; i < cols.size(); ++i) colArr[i] = cols.get(i); rowToColsCache[row] = colArr; // null out the list for GC to try to cancel out the overhead of // having both the array and the list in memory at the same time e.setValue(null); } rowVals = null; colToRowsCache = new int[cols.get()][0]; for (Map.Entry> e : colVals.entrySet()) { int col = e.getKey(); List rows = e.getValue(); int[] rowArr = new int[rows.size()]; for (int i = 0; i < rows.size(); ++i) rowArr[i] = rows.get(i); colToRowsCache[col] = rowArr; // null out the list for GC to try to cancel out the overhead of // having both the array and the list in memory at the same time e.setValue(null); } } } /** * A utility class for holding the row-value pair */ private static class Entry { final int row; final int col; public Entry(int row, int col) { this.row = row; this.col = col; } public boolean equals(Object o) { if (o instanceof Entry) { Entry e = (Entry)o; return e.row == row && e.col == col; } return false; } public int hashCode() { return (row << 16) ^ col; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy