org.apache.mahout.math.Vector Maven / Gradle / Ivy
Show all versions of mahout-math Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.math;
import org.apache.mahout.math.function.DoubleDoubleFunction;
import org.apache.mahout.math.function.DoubleFunction;
/**
* The basic interface including numerous convenience functions NOTE: All implementing classes must have a
* constructor that takes an int for cardinality and a no-arg constructor that can be used for marshalling the Writable
* instance NOTE: Implementations may choose to reuse the Vector.Element in the Iterable methods
*/
public interface Vector extends Cloneable {
/** @return a formatted String suitable for output */
String asFormatString();
/**
* Assign the value to all elements of the receiver
*
* @param value a double value
* @return the modified receiver
*/
Vector assign(double value);
/**
* Assign the values to the receiver
*
* @param values a double[] of values
* @return the modified receiver
* @throws CardinalityException if the cardinalities differ
*/
Vector assign(double[] values);
/**
* Assign the other vector values to the receiver
*
* @param other a Vector
* @return the modified receiver
* @throws CardinalityException if the cardinalities differ
*/
Vector assign(Vector other);
/**
* Apply the function to each element of the receiver
*
* @param function a DoubleFunction to apply
* @return the modified receiver
*/
Vector assign(DoubleFunction function);
/**
* Apply the function to each element of the receiver and the corresponding element of the other argument
*
* @param other a Vector containing the second arguments to the function
* @param function a DoubleDoubleFunction to apply
* @return the modified receiver
* @throws CardinalityException if the cardinalities differ
*/
Vector assign(Vector other, DoubleDoubleFunction function);
/**
* Apply the function to each element of the receiver, using the y value as the second argument of the
* DoubleDoubleFunction
*
* @param f a DoubleDoubleFunction to be applied
* @param y a double value to be argument to the function
* @return the modified receiver
*/
Vector assign(DoubleDoubleFunction f, double y);
/**
* Return the cardinality of the recipient (the maximum number of values)
*
* @return an int
*/
int size();
/**
* @return true iff this implementation should be considered dense -- that it explicitly
* represents every value
*/
boolean isDense();
/**
* @return true iff this implementation should be considered to be iterable in index order in an efficient way.
* In particular this implies that {@link #all()} and {@link #nonZeroes()} ()} return elements
* in ascending order by index.
*/
boolean isSequentialAccess();
/**
* Return a copy of the recipient
*
* @return a new Vector
*/
@SuppressWarnings("CloneDoesntDeclareCloneNotSupportedException")
Vector clone();
Iterable all();
Iterable nonZeroes();
/**
* Return an object of Vector.Element representing an element of this Vector. Useful when designing new iterator
* types.
*
* @param index Index of the Vector.Element required
* @return The Vector.Element Object
*/
Element getElement(int index);
/**
* Merge a set of (index, value) pairs into the vector.
* @param updates an ordered mapping of indices to values to be merged in.
*/
void mergeUpdates(OrderedIntDoubleMapping updates);
/**
* A holder for information about a specific item in the Vector. When using with an Iterator, the implementation
* may choose to reuse this element, so you may need to make a copy if you want to keep it
*/
interface Element {
/** @return the value of this vector element. */
double get();
/** @return the index of this vector element. */
int index();
/** @param value Set the current element to value. */
void set(double value);
}
/**
* Return a new vector containing the values of the recipient divided by the argument
*
* @param x a double value
* @return a new Vector
*/
Vector divide(double x);
/**
* Return the dot product of the recipient and the argument
*
* @param x a Vector
* @return a new Vector
* @throws CardinalityException if the cardinalities differ
*/
double dot(Vector x);
/**
* Return the value at the given index
*
* @param index an int index
* @return the double at the index
* @throws IndexException if the index is out of bounds
*/
double get(int index);
/**
* Return the value at the given index, without checking bounds
*
* @param index an int index
* @return the double at the index
*/
double getQuick(int index);
/**
* Return an empty vector of the same underlying class as the receiver
*
* @return a Vector
*/
Vector like();
/**
* Return a new empty vector of the same underlying class as the receiver with given cardinality
*
* @param cardinality
* @return
*/
Vector like(int cardinality);
/**
* Return a new vector containing the element by element difference of the recipient and the argument
*
* @param x a Vector
* @return a new Vector
* @throws CardinalityException if the cardinalities differ
*/
Vector minus(Vector x);
/**
* Return a new vector containing the normalized (L_2 norm) values of the recipient
*
* @return a new Vector
*/
Vector normalize();
/**
* Return a new Vector containing the normalized (L_power norm) values of the recipient. See
* http://en.wikipedia.org/wiki/Lp_space Technically, when 0 < power < 1, we don't have a norm, just a metric,
* but we'll overload this here. Also supports power == 0 (number of non-zero elements) and power = {@link
* Double#POSITIVE_INFINITY} (max element). Again, see the Wikipedia page for more info
*
* @param power The power to use. Must be >= 0. May also be {@link Double#POSITIVE_INFINITY}. See the Wikipedia link
* for more on this.
* @return a new Vector x such that norm(x, power) == 1
*/
Vector normalize(double power);
/**
* Return a new vector containing the log(1 + entry)/ L_2 norm values of the recipient
*
* @return a new Vector
*/
Vector logNormalize();
/**
* Return a new Vector with a normalized value calculated as log_power(1 + entry)/ L_power norm.
*
* @param power The power to use. Must be > 1. Cannot be {@link Double#POSITIVE_INFINITY}.
* @return a new Vector
*/
Vector logNormalize(double power);
/**
* Return the k-norm of the vector. See http://en.wikipedia.org/wiki/Lp_space Technically, when 0 > power
* < 1, we don't have a norm, just a metric, but we'll overload this here. Also supports power == 0 (number of
* non-zero elements) and power = {@link Double#POSITIVE_INFINITY} (max element). Again, see the Wikipedia page for
* more info.
*
* @param power The power to use.
* @see #normalize(double)
*/
double norm(double power);
/** @return The minimum value in the Vector */
double minValue();
/** @return The index of the minimum value */
int minValueIndex();
/** @return The maximum value in the Vector */
double maxValue();
/** @return The index of the maximum value */
int maxValueIndex();
/**
* Return a new vector containing the sum of each value of the recipient and the argument
*
* @param x a double
* @return a new Vector
*/
Vector plus(double x);
/**
* Return a new vector containing the element by element sum of the recipient and the argument
*
* @param x a Vector
* @return a new Vector
* @throws CardinalityException if the cardinalities differ
*/
Vector plus(Vector x);
/**
* Set the value at the given index
*
* @param index an int index into the receiver
* @param value a double value to set
* @throws IndexException if the index is out of bounds
*/
void set(int index, double value);
/**
* Set the value at the given index, without checking bounds
*
* @param index an int index into the receiver
* @param value a double value to set
*/
void setQuick(int index, double value);
/**
* Increment the value at the given index by the given value.
*
* @param index an int index into the receiver
* @param increment sets the value at the given index to value + increment;
*/
void incrementQuick(int index, double increment);
/**
* Return the number of values in the recipient which are not the default value. For instance, for a
* sparse vector, this would be the number of non-zero values.
*
* @return an int
*/
int getNumNondefaultElements();
/**
* Return the number of non zero elements in the vector.
*
* @return an int
*/
int getNumNonZeroElements();
/**
* Return a new vector containing the product of each value of the recipient and the argument
*
* @param x a double argument
* @return a new Vector
*/
Vector times(double x);
/**
* Return a new vector containing the element-wise product of the recipient and the argument
*
* @param x a Vector argument
* @return a new Vector
* @throws CardinalityException if the cardinalities differ
*/
Vector times(Vector x);
/**
* Return a new vector containing the subset of the recipient
*
* @param offset an int offset into the receiver
* @param length the cardinality of the desired result
* @return a new Vector
* @throws CardinalityException if the length is greater than the cardinality of the receiver
* @throws IndexException if the offset is negative or the offset+length is outside of the receiver
*/
Vector viewPart(int offset, int length);
/**
* Return the sum of all the elements of the receiver
*
* @return a double
*/
double zSum();
/**
* Return the cross product of the receiver and the other vector
*
* @param other another Vector
* @return a Matrix
*/
Matrix cross(Vector other);
/*
* Need stories for these but keeping them here for now.
*/
// void getNonZeros(IntArrayList jx, DoubleArrayList values);
// void foreachNonZero(IntDoubleFunction f);
// DoubleDoubleFunction map);
// NewVector assign(Vector y, DoubleDoubleFunction function, IntArrayList
// nonZeroIndexes);
/**
* Examples speak louder than words: aggregate(plus, pow(2)) is another way to say
* getLengthSquared(), aggregate(max, abs) is norm(Double.POSITIVE_INFINITY). To sum all of the positive values,
* aggregate(plus, max(0)).
* @param aggregator used to combine the current value of the aggregation with the result of map.apply(nextValue)
* @param map a function to apply to each element of the vector in turn before passing to the aggregator
* @return the final aggregation
*/
double aggregate(DoubleDoubleFunction aggregator, DoubleFunction map);
/**
* Generalized inner product - take two vectors, iterate over them both, using the combiner to combine together
* (and possibly map in some way) each pair of values, which are then aggregated with the previous accumulated
* value in the combiner.
*
* Example: dot(other) could be expressed as aggregate(other, Plus, Times), and kernelized inner products (which
* are symmetric on the indices) work similarly.
* @param other a vector to aggregate in combination with
* @param aggregator function we're aggregating with; fa
* @param combiner function we're combining with; fc
* @return the final aggregation; if r0 = fc(this[0], other[0]), ri = fa(r_{i-1}, fc(this[i], other[i]))
* for all i > 0
*/
double aggregate(Vector other, DoubleDoubleFunction aggregator, DoubleDoubleFunction combiner);
/** Return the sum of squares of all elements in the vector. Square root of this value is the length of the vector. */
double getLengthSquared();
/** Get the square of the distance between this vector and the other vector. */
double getDistanceSquared(Vector v);
/**
* Gets an estimate of the cost (in number of operations) it takes to lookup a random element in this vector.
*/
double getLookupCost();
/**
* Gets an estimate of the cost (in number of operations) it takes to advance an iterator through the nonzero
* elements of this vector.
*/
double getIteratorAdvanceCost();
/**
* Return true iff adding a new (nonzero) element takes constant time for this vector.
*/
boolean isAddConstantTime();
}