org.apache.mahout.math.SequentialAccessSparseVector Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mahout-math Show documentation
Show all versions of mahout-math Show documentation
High performance scientific and technical computing data structures and methods,
mostly based on CERN's
Colt Java API
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.math;
import java.util.Arrays;
import java.util.Iterator;
import java.util.NoSuchElementException;
import com.google.common.primitives.Doubles;
import org.apache.mahout.math.function.Functions;
/**
*
* Implements vector that only stores non-zero doubles as a pair of parallel arrays (OrderedIntDoubleMapping),
* one int[], one double[]. If there are k non-zero elements in the vector, this implementation has
* O(log(k)) random-access read performance, and O(k) random-access write performance, which is far below that
* of the hashmap based {@link org.apache.mahout.math.RandomAccessSparseVector RandomAccessSparseVector}. This
* class is primarily used for operations where the all the elements will be accessed in a read-only fashion
* sequentially: methods which operate not via get() or set(), but via iterateNonZero(), such as (but not limited
* to) :
*
* - dot(Vector)
* - addTo(Vector)
*
*
* See {@link OrderedIntDoubleMapping}
*/
public class SequentialAccessSparseVector extends AbstractVector {
private OrderedIntDoubleMapping values;
/** For serialization purposes only. */
public SequentialAccessSparseVector() {
super(0);
}
public SequentialAccessSparseVector(int cardinality) {
this(cardinality, Math.min(100, cardinality / 1000 < 10 ? 10 : cardinality / 1000)); // arbitrary estimate of
// 'sparseness'
}
public SequentialAccessSparseVector(int cardinality, int size) {
super(cardinality);
values = new OrderedIntDoubleMapping(size);
}
public SequentialAccessSparseVector(Vector other) {
this(other.size(), other.getNumNondefaultElements());
if (other.isSequentialAccess()) {
for (Element e : other.nonZeroes()) {
set(e.index(), e.get());
}
} else {
// If the incoming Vector to copy is random, then adding items
// from the Iterator can degrade performance dramatically if
// the number of elements is large as this Vector tries to stay
// in order as items are added, so it's better to sort the other
// Vector's elements by index and then add them to this
copySortedRandomAccessSparseVector(other);
}
}
// Sorts a RandomAccessSparseVectors Elements before adding them to this
private int copySortedRandomAccessSparseVector(Vector other) {
int elementCount = other.getNumNondefaultElements();
OrderedElement[] sortableElements = new OrderedElement[elementCount];
int s = 0;
for (Element e : other.nonZeroes()) {
sortableElements[s++] = new OrderedElement(e.index(), e.get());
}
Arrays.sort(sortableElements);
for (int i = 0; i < sortableElements.length; i++) {
values.setIndexAt(i, sortableElements[i].index);
values.setValueAt(i, sortableElements[i].value);
}
values = new OrderedIntDoubleMapping(values.getIndices(), values.getValues(), elementCount);
return elementCount;
}
public SequentialAccessSparseVector(SequentialAccessSparseVector other, boolean shallowCopy) {
super(other.size());
values = shallowCopy ? other.values : other.values.clone();
}
public SequentialAccessSparseVector(SequentialAccessSparseVector other) {
this(other.size(), other.getNumNondefaultElements());
values = other.values.clone();
}
private SequentialAccessSparseVector(int cardinality, OrderedIntDoubleMapping values) {
super(cardinality);
this.values = values;
}
@Override
protected Matrix matrixLike(int rows, int columns) {
//return new SparseRowMatrix(rows, columns);
return new SparseMatrix(rows, columns);
}
@SuppressWarnings("CloneDoesntCallSuperClone")
@Override
public SequentialAccessSparseVector clone() {
return new SequentialAccessSparseVector(size(), values.clone());
}
@Override
public void mergeUpdates(OrderedIntDoubleMapping updates) {
values.merge(updates);
}
@Override
public String toString() {
return sparseVectorToString();
}
/**
* @return false
*/
@Override
public boolean isDense() {
return false;
}
/**
* @return true
*/
@Override
public boolean isSequentialAccess() {
return true;
}
/**
* Warning! This takes O(log n) time as it does a binary search behind the scenes!
* Only use it when STRICTLY necessary.
* @param index an int index.
* @return the value at that position in the vector.
*/
@Override
public double getQuick(int index) {
return values.get(index);
}
/**
* Warning! This takes O(log n) time as it does a binary search behind the scenes!
* Only use it when STRICTLY necessary.
* @param index an int index.
*/
@Override
public void setQuick(int index, double value) {
invalidateCachedLength();
values.set(index, value);
}
@Override
public void incrementQuick(int index, double increment) {
invalidateCachedLength();
values.increment(index, increment);
}
@Override
public SequentialAccessSparseVector like() {
return new SequentialAccessSparseVector(size(), values.getNumMappings());
}
@Override
public Vector like(int cardinality) {
return new SequentialAccessSparseVector(cardinality);
}
@Override
public int getNumNondefaultElements() {
return values.getNumMappings();
}
@Override
public int getNumNonZeroElements() {
double[] elementValues = values.getValues();
int numMappedElements = values.getNumMappings();
int numNonZeros = 0;
for (int index = 0; index < numMappedElements; index++) {
if (elementValues[index] != 0) {
numNonZeros++;
}
}
return numNonZeros;
}
@Override
public double getLookupCost() {
return Math.max(1, Math.round(Functions.LOG2.apply(getNumNondefaultElements())));
}
@Override
public double getIteratorAdvanceCost() {
return 1;
}
@Override
public boolean isAddConstantTime() {
return false;
}
@Override
public Iterator iterateNonZero() {
// TODO: this is a bug, since nonDefaultIterator doesn't hold to non-zero contract.
return new NonDefaultIterator();
}
@Override
public Iterator iterator() {
return new AllIterator();
}
private final class NonDefaultIterator implements Iterator {
private final NonDefaultElement element = new NonDefaultElement();
@Override
public boolean hasNext() {
return element.getNextOffset() < values.getNumMappings();
}
@Override
public Element next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
element.advanceOffset();
return element;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
private final class AllIterator implements Iterator {
private final AllElement element = new AllElement();
@Override
public boolean hasNext() {
return element.getNextIndex() < SequentialAccessSparseVector.this.size();
}
@Override
public Element next() {
if (!hasNext()) {
throw new NoSuchElementException();
}
element.advanceIndex();
return element;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
private final class NonDefaultElement implements Element {
private int offset = -1;
void advanceOffset() {
offset++;
}
int getNextOffset() {
return offset + 1;
}
@Override
public double get() {
return values.getValues()[offset];
}
@Override
public int index() {
return values.getIndices()[offset];
}
@Override
public void set(double value) {
invalidateCachedLength();
values.setValueAt(offset, value);
}
}
private final class AllElement implements Element {
private int index = -1;
private int nextOffset;
void advanceIndex() {
index++;
if (nextOffset < values.getNumMappings() && index > values.getIndices()[nextOffset]) {
nextOffset++;
}
}
int getNextIndex() {
return index + 1;
}
@Override
public double get() {
if (nextOffset < values.getNumMappings() && index == values.getIndices()[nextOffset]) {
return values.getValues()[nextOffset];
} else {
return OrderedIntDoubleMapping.DEFAULT_VALUE;
}
}
@Override
public int index() {
return index;
}
@Override
public void set(double value) {
invalidateCachedLength();
if (nextOffset < values.getNumMappings() && index == values.indexAt(nextOffset)) {
values.setValueAt(nextOffset, value);
} else {
// Yes, this works; the offset into indices of the new value's index will still be nextOffset
values.set(index, value);
}
}
}
// Comparable Element for sorting Elements by index
private static final class OrderedElement implements Comparable {
private final int index;
private final double value;
OrderedElement(int index, double value) {
this.index = index;
this.value = value;
}
@Override
public int compareTo(OrderedElement that) {
// both indexes are positive, and neither can be Integer.MAX_VALUE (otherwise there would be
// an array somewhere with Integer.MAX_VALUE + 1 elements)
return this.index - that.index;
}
@Override
public int hashCode() {
return index ^ Doubles.hashCode(value);
}
@Override
public boolean equals(Object o) {
if (!(o instanceof OrderedElement)) {
return false;
}
OrderedElement other = (OrderedElement) o;
return index == other.index && value == other.value;
}
}
}