org.apache.mahout.math.VectorBinaryAssign Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mahout-math Show documentation
Show all versions of mahout-math Show documentation
High performance scientific and technical computing data structures and methods,
mostly based on CERN's
Colt Java API
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.math;
import org.apache.mahout.math.Vector.Element;
import org.apache.mahout.math.function.DoubleDoubleFunction;
import org.apache.mahout.math.set.OpenIntHashSet;
import java.util.Iterator;
/**
* Abstract class encapsulating different algorithms that perform the Vector operations assign().
* x.assign(y, f), for x and y Vectors and f a DoubleDouble function:
* - applies the function f to every element in x and y, f(xi, yi)
* - assigns xi = f(xi, yi) for all indices i
*
* The names of variables, methods and classes used here follow the following conventions:
* The vector being assigned to (the left hand side) is called this or x.
* The right hand side is called that or y.
* The function to be applied is called f.
*
* The different algorithms take into account the different characteristics of vector classes:
* - whether the vectors support sequential iteration (isSequential())
* - whether the vectors support constant-time additions (isAddConstantTime())
* - what the lookup cost is (getLookupCost())
* - what the iterator advancement cost is (getIteratorAdvanceCost())
*
* The names of the actual classes (they're nested in VectorBinaryAssign) describe the used for assignment.
* The most important optimization is iterating just through the nonzeros (only possible if f(0, 0) = 0).
* There are 4 main possibilities:
* - iterating through the nonzeros of just one vector and looking up the corresponding elements in the other
* - iterating through the intersection of nonzeros (those indices where both vectors have nonzero values)
* - iterating through the union of nonzeros (those indices where at least one of the vectors has a nonzero value)
* - iterating through all the elements in some way (either through both at the same time, both one after the other,
* looking up both, looking up just one).
* Then, there are two additional sub-possibilities:
* - if a new value can be added to x in constant time (isAddConstantTime()), the *Inplace updates are used
* - otherwise (really just for SequentialAccessSparseVectors right now), the *Merge updates are used, where
* a sorted list of (index, value) pairs is merged into the vector at the end.
*
* The internal details are not important and a particular algorithm should generally not be called explicitly.
* The best one will be selected through assignBest(), which is itself called through Vector.assign().
*
* See https://docs.google.com/document/d/1g1PjUuvjyh2LBdq2_rKLIcUiDbeOORA1sCJiSsz-JVU/edit# for a more detailed
* explanation.
*/
public abstract class VectorBinaryAssign {
public static final VectorBinaryAssign[] OPERATIONS = {
new AssignNonzerosIterateThisLookupThat(),
new AssignNonzerosIterateThatLookupThisMergeUpdates(),
new AssignNonzerosIterateThatLookupThisInplaceUpdates(),
new AssignIterateIntersection(),
new AssignIterateUnionSequentialMergeUpdates(),
new AssignIterateUnionSequentialInplaceUpdates(),
new AssignIterateUnionRandomMergeUpdates(),
new AssignIterateUnionRandomInplaceUpdates(),
new AssignAllIterateSequentialMergeUpdates(),
new AssignAllIterateSequentialInplaceUpdates(),
new AssignAllIterateThisLookupThatMergeUpdates(),
new AssignAllIterateThisLookupThatInplaceUpdates(),
new AssignAllIterateThatLookupThisMergeUpdates(),
new AssignAllIterateThatLookupThisInplaceUpdates(),
new AssignAllLoopMergeUpdates(),
new AssignAllLoopInplaceUpdates(),
};
/**
* Returns true iff we can use this algorithm to apply f to x and y component-wise and assign the result to x.
*/
public abstract boolean isValid(Vector x, Vector y, DoubleDoubleFunction f);
/**
* Estimates the cost of using this algorithm to compute the assignment. The algorithm is assumed to be valid.
*/
public abstract double estimateCost(Vector x, Vector y, DoubleDoubleFunction f);
/**
* Main method that applies f to x and y component-wise assigning the results to x. It returns the modified vector,
* x.
*/
public abstract Vector assign(Vector x, Vector y, DoubleDoubleFunction f);
/**
* The best operation is the least expensive valid one.
*/
public static VectorBinaryAssign getBestOperation(Vector x, Vector y, DoubleDoubleFunction f) {
int bestOperationIndex = -1;
double bestCost = Double.POSITIVE_INFINITY;
for (int i = 0; i < OPERATIONS.length; ++i) {
if (OPERATIONS[i].isValid(x, y, f)) {
double cost = OPERATIONS[i].estimateCost(x, y, f);
if (cost < bestCost) {
bestCost = cost;
bestOperationIndex = i;
}
}
}
return OPERATIONS[bestOperationIndex];
}
/**
* This is the method that should be used when assigning. It selects the best algorithm and applies it.
* Note that it does NOT invalidate the cached length of the Vector and should only be used through the wrapprs
* in AbstractVector.
*/
public static Vector assignBest(Vector x, Vector y, DoubleDoubleFunction f) {
return getBestOperation(x, y, f).assign(x, y, f);
}
/**
* If f(0, y) = 0, the zeros in x don't matter and we can simply iterate through the nonzeros of x.
* To get the corresponding element of y, we perform a lookup.
* There are no *Merge or *Inplace versions because in this case x cannot become more dense because of f, meaning
* all changes will occur at indices whose values are already nonzero.
*/
public static class AssignNonzerosIterateThisLookupThat extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return f.isLikeLeftMult();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * y.getLookupCost();
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
for (Element xe : x.nonZeroes()) {
xe.set(f.apply(xe.get(), y.getQuick(xe.index())));
}
return x;
}
}
/**
* If f(x, 0) = x, the zeros in y don't matter and we can simply iterate through the nonzeros of y.
* We get the corresponding element of x through a lookup and update x inplace.
*/
public static class AssignNonzerosIterateThatLookupThisInplaceUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return f.isLikeRightPlus();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * x.getLookupCost() * x.getLookupCost();
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
for (Element ye : y.nonZeroes()) {
x.setQuick(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
}
return x;
}
}
/**
* If f(x, 0) = x, the zeros in y don't matter and we can simply iterate through the nonzeros of y.
* We get the corresponding element of x through a lookup and update x by merging.
*/
public static class AssignNonzerosIterateThatLookupThisMergeUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return f.isLikeRightPlus() && y.isSequentialAccess() && !x.isAddConstantTime();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * y.getLookupCost();
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
for (Element ye : y.nonZeroes()) {
updates.set(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
}
x.mergeUpdates(updates);
return x;
}
}
/**
* If f(x, 0) = x and f(0, y) = 0 the zeros in x and y don't matter and we can iterate through the nonzeros
* in both x and y.
* This is only possible if both x and y support sequential access.
*/
public static class AssignIterateIntersection extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return f.isLikeLeftMult() && f.isLikeRightPlus() && x.isSequentialAccess() && y.isSequentialAccess();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return Math.min(x.getNumNondefaultElements() * x.getIteratorAdvanceCost(),
y.getNumNondefaultElements() * y.getIteratorAdvanceCost());
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
Iterator xi = x.nonZeroes().iterator();
Iterator yi = y.nonZeroes().iterator();
Vector.Element xe = null;
Vector.Element ye = null;
boolean advanceThis = true;
boolean advanceThat = true;
while (true) {
if (advanceThis) {
if (xi.hasNext()) {
xe = xi.next();
} else {
break;
}
}
if (advanceThat) {
if (yi.hasNext()) {
ye = yi.next();
} else {
break;
}
}
if (xe.index() == ye.index()) {
xe.set(f.apply(xe.get(), ye.get()));
advanceThis = true;
advanceThat = true;
} else {
if (xe.index() < ye.index()) { // f(x, 0) = 0
advanceThis = true;
advanceThat = false;
} else { // f(0, y) = 0
advanceThis = false;
advanceThat = true;
}
}
}
return x;
}
}
/**
* If f(0, 0) = 0 we can iterate through the nonzeros in either x or y.
* In this case we iterate through them in parallel and update x by merging. Because we're iterating through
* both vectors at the same time, x and y need to support sequential access.
*/
public static class AssignIterateUnionSequentialMergeUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return !f.isDensifying() && x.isSequentialAccess() && y.isSequentialAccess() && !x.isAddConstantTime();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost(),
y.getNumNondefaultElements() * y.getIteratorAdvanceCost());
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
Iterator xi = x.nonZeroes().iterator();
Iterator yi = y.nonZeroes().iterator();
Vector.Element xe = null;
Vector.Element ye = null;
boolean advanceThis = true;
boolean advanceThat = true;
OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
while (true) {
if (advanceThis) {
if (xi.hasNext()) {
xe = xi.next();
} else {
xe = null;
}
}
if (advanceThat) {
if (yi.hasNext()) {
ye = yi.next();
} else {
ye = null;
}
}
if (xe != null && ye != null) { // both vectors have nonzero elements
if (xe.index() == ye.index()) {
xe.set(f.apply(xe.get(), ye.get()));
advanceThis = true;
advanceThat = true;
} else {
if (xe.index() < ye.index()) { // f(x, 0)
xe.set(f.apply(xe.get(), 0));
advanceThis = true;
advanceThat = false;
} else {
updates.set(ye.index(), f.apply(0, ye.get()));
advanceThis = false;
advanceThat = true;
}
}
} else if (xe != null) { // just the first one still has nonzeros
xe.set(f.apply(xe.get(), 0));
advanceThis = true;
advanceThat = false;
} else if (ye != null) { // just the second one has nonzeros
updates.set(ye.index(), f.apply(0, ye.get()));
advanceThis = false;
advanceThat = true;
} else { // we're done, both are empty
break;
}
}
x.mergeUpdates(updates);
return x;
}
}
/**
* If f(0, 0) = 0 we can iterate through the nonzeros in either x or y.
* In this case we iterate through them in parallel and update x inplace. Because we're iterating through
* both vectors at the same time, x and y need to support sequential access.
*/
public static class AssignIterateUnionSequentialInplaceUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return !f.isDensifying() && x.isSequentialAccess() && y.isSequentialAccess() && x.isAddConstantTime();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost(),
y.getNumNondefaultElements() * y.getIteratorAdvanceCost());
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
Iterator xi = x.nonZeroes().iterator();
Iterator yi = y.nonZeroes().iterator();
Vector.Element xe = null;
Vector.Element ye = null;
boolean advanceThis = true;
boolean advanceThat = true;
while (true) {
if (advanceThis) {
if (xi.hasNext()) {
xe = xi.next();
} else {
xe = null;
}
}
if (advanceThat) {
if (yi.hasNext()) {
ye = yi.next();
} else {
ye = null;
}
}
if (xe != null && ye != null) { // both vectors have nonzero elements
if (xe.index() == ye.index()) {
xe.set(f.apply(xe.get(), ye.get()));
advanceThis = true;
advanceThat = true;
} else {
if (xe.index() < ye.index()) { // f(x, 0)
xe.set(f.apply(xe.get(), 0));
advanceThis = true;
advanceThat = false;
} else {
x.setQuick(ye.index(), f.apply(0, ye.get()));
advanceThis = false;
advanceThat = true;
}
}
} else if (xe != null) { // just the first one still has nonzeros
xe.set(f.apply(xe.get(), 0));
advanceThis = true;
advanceThat = false;
} else if (ye != null) { // just the second one has nonzeros
x.setQuick(ye.index(), f.apply(0, ye.get()));
advanceThis = false;
advanceThat = true;
} else { // we're done, both are empty
break;
}
}
return x;
}
}
/**
* If f(0, 0) = 0 we can iterate through the nonzeros in either x or y.
* In this case, we iterate through the nozeros of x and y alternatively (this works even when one of them
* doesn't support sequential access). Since we're merging the results into x, when iterating through y, the
* order of iteration matters and y must support sequential access.
*/
public static class AssignIterateUnionRandomMergeUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return !f.isDensifying() && !x.isAddConstantTime() && y.isSequentialAccess();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * y.getLookupCost(),
y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * x.getLookupCost());
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
OpenIntHashSet visited = new OpenIntHashSet();
for (Element xe : x.nonZeroes()) {
xe.set(f.apply(xe.get(), y.getQuick(xe.index())));
visited.add(xe.index());
}
OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
for (Element ye : y.nonZeroes()) {
if (!visited.contains(ye.index())) {
updates.set(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
}
}
x.mergeUpdates(updates);
return x;
}
}
/**
* If f(0, 0) = 0 we can iterate through the nonzeros in either x or y.
* In this case, we iterate through the nozeros of x and y alternatively (this works even when one of them
* doesn't support sequential access). Because updates to x are inplace, neither x, nor y need to support
* sequential access.
*/
public static class AssignIterateUnionRandomInplaceUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return !f.isDensifying() && x.isAddConstantTime();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * y.getLookupCost(),
y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * x.getLookupCost());
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
OpenIntHashSet visited = new OpenIntHashSet();
for (Element xe : x.nonZeroes()) {
xe.set(f.apply(xe.get(), y.getQuick(xe.index())));
visited.add(xe.index());
}
for (Element ye : y.nonZeroes()) {
if (!visited.contains(ye.index())) {
x.setQuick(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
}
}
return x;
}
}
public static class AssignAllIterateSequentialMergeUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return x.isSequentialAccess() && y.isSequentialAccess() && !x.isAddConstantTime() && !x.isDense() && !y.isDense();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return Math.max(x.size() * x.getIteratorAdvanceCost(), y.size() * y.getIteratorAdvanceCost());
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
Iterator xi = x.all().iterator();
Iterator yi = y.all().iterator();
OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
while (xi.hasNext() && yi.hasNext()) {
Element xe = xi.next();
updates.set(xe.index(), f.apply(xe.get(), yi.next().get()));
}
x.mergeUpdates(updates);
return x;
}
}
public static class AssignAllIterateSequentialInplaceUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return x.isSequentialAccess() && y.isSequentialAccess() && x.isAddConstantTime()
&& !x.isDense() && !y.isDense();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return Math.max(x.size() * x.getIteratorAdvanceCost(), y.size() * y.getIteratorAdvanceCost());
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
Iterator xi = x.all().iterator();
Iterator yi = y.all().iterator();
while (xi.hasNext() && yi.hasNext()) {
Element xe = xi.next();
x.setQuick(xe.index(), f.apply(xe.get(), yi.next().get()));
}
return x;
}
}
public static class AssignAllIterateThisLookupThatMergeUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return !x.isAddConstantTime() && !x.isDense();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return x.size() * x.getIteratorAdvanceCost() * y.getLookupCost();
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
for (Element xe : x.all()) {
updates.set(xe.index(), f.apply(xe.get(), y.getQuick(xe.index())));
}
x.mergeUpdates(updates);
return x;
}
}
public static class AssignAllIterateThisLookupThatInplaceUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return x.isAddConstantTime() && !x.isDense();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return x.size() * x.getIteratorAdvanceCost() * y.getLookupCost();
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
for (Element xe : x.all()) {
x.setQuick(xe.index(), f.apply(xe.get(), y.getQuick(xe.index())));
}
return x;
}
}
public static class AssignAllIterateThatLookupThisMergeUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return !x.isAddConstantTime() && !y.isDense();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return y.size() * y.getIteratorAdvanceCost() * x.getLookupCost();
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
for (Element ye : y.all()) {
updates.set(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
}
x.mergeUpdates(updates);
return x;
}
}
public static class AssignAllIterateThatLookupThisInplaceUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return x.isAddConstantTime() && !y.isDense();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return y.size() * y.getIteratorAdvanceCost() * x.getLookupCost();
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
for (Element ye : y.all()) {
x.setQuick(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
}
return x;
}
}
public static class AssignAllLoopMergeUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return !x.isAddConstantTime();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return x.size() * x.getLookupCost() * y.getLookupCost();
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
for (int i = 0; i < x.size(); ++i) {
updates.set(i, f.apply(x.getQuick(i), y.getQuick(i)));
}
x.mergeUpdates(updates);
return x;
}
}
public static class AssignAllLoopInplaceUpdates extends VectorBinaryAssign {
@Override
public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
return x.isAddConstantTime();
}
@Override
public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
return x.size() * x.getLookupCost() * y.getLookupCost();
}
@Override
public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
for (int i = 0; i < x.size(); ++i) {
x.setQuick(i, f.apply(x.getQuick(i), y.getQuick(i)));
}
return x;
}
}
}