org.apache.mahout.math.hadoop.stochasticsvd.Omega Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mahout-mr Show documentation
Show all versions of mahout-mr Show documentation
Scalable machine learning libraries
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.math.hadoop.stochasticsvd;
import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import com.google.common.collect.Lists;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.Vector.Element;
/**
* simplistic implementation for Omega matrix in Stochastic SVD method
*/
public class Omega {
private static final double UNIFORM_DIVISOR = Math.pow(2.0, 64);
private final long seed;
private final int kp;
public Omega(long seed, int kp) {
this.seed = seed;
this.kp = kp;
}
/**
* Get omega element at (x,y) uniformly distributed within [-1...1)
*
* @param row
* omega row
* @param column
* omega column
*/
public double getQuick(int row, int column) {
long hash = murmur64((long) row << Integer.SIZE | column, 8, seed);
return hash / UNIFORM_DIVISOR;
}
/**
* compute YRow=ARow*Omega.
*
* @param aRow
* row of matrix A (size n)
* @param yRow
* row of matrix Y (result) must be pre-allocated to size of (k+p)
*/
@Deprecated
public void computeYRow(Vector aRow, double[] yRow) {
// assert yRow.length == kp;
Arrays.fill(yRow, 0.0);
if (aRow.isDense()) {
int n = aRow.size();
for (int j = 0; j < n; j++) {
accumDots(j, aRow.getQuick(j), yRow);
}
} else {
for (Element el : aRow.nonZeroes()) {
accumDots(el.index(), el.get(), yRow);
}
}
}
/**
* A version to compute yRow as a sparse vector in case of extremely sparse
* matrices
*
* @param aRow
* @param yRowOut
*/
public void computeYRow(Vector aRow, Vector yRowOut) {
yRowOut.assign(0.0);
if (aRow.isDense()) {
int n = aRow.size();
for (int j = 0; j < n; j++) {
accumDots(j, aRow.getQuick(j), yRowOut);
}
} else {
for (Element el : aRow.nonZeroes()) {
accumDots(el.index(), el.get(), yRowOut);
}
}
}
/*
* computes t(Omega) %*% v in multithreaded fashion
*/
public Vector mutlithreadedTRightMultiply(final Vector v) {
int nThreads = Runtime.getRuntime().availableProcessors();
ExecutorService es =
new ThreadPoolExecutor(nThreads,
nThreads,
1,
TimeUnit.SECONDS,
new ArrayBlockingQueue(kp));
try {
List> dotFutures = Lists.newArrayListWithCapacity(kp);
for (int i = 0; i < kp; i++) {
final int index = i;
Future dotFuture = es.submit(new Callable() {
@Override
public Double call() throws Exception {
double result = 0.0;
if (v.isDense()) {
for (int k = 0; k < v.size(); k++) {
// it's ok, this is reentrant
result += getQuick(k, index) * v.getQuick(k);
}
} else {
for (Element el : v.nonZeroes()) {
int k = el.index();
result += getQuick(k, index) * el.get();
}
}
return result;
}
});
dotFutures.add(dotFuture);
}
try {
Vector res = new DenseVector(kp);
for (int i = 0; i < kp; i++) {
res.setQuick(i, dotFutures.get(i).get());
}
return res;
} catch (InterruptedException exc) {
throw new IllegalStateException("Interrupted", exc);
} catch (ExecutionException exc) {
if (exc.getCause() instanceof RuntimeException) {
throw (RuntimeException) exc.getCause();
} else {
throw new IllegalStateException(exc.getCause());
}
}
} finally {
es.shutdown();
}
}
protected void accumDots(int aIndex, double aElement, double[] yRow) {
for (int i = 0; i < kp; i++) {
yRow[i] += getQuick(aIndex, i) * aElement;
}
}
protected void accumDots(int aIndex, double aElement, Vector yRow) {
for (int i = 0; i < kp; i++) {
yRow.setQuick(i, yRow.getQuick(i) + getQuick(aIndex, i) * aElement);
}
}
/**
* Shortened version for data < 8 bytes packed into {@code len} lowest bytes
* of {@code val}.
*
* @param val
* the value
* @param len
* the length of data packed into this many low bytes of {@code val}
* @param seed
* the seed to use
* @return murmur hash
*/
public static long murmur64(long val, int len, long seed) {
// assert len > 0 && len <= 8;
long m = 0xc6a4a7935bd1e995L;
long h = seed ^ len * m;
long k = val;
k *= m;
int r = 47;
k ^= k >>> r;
k *= m;
h ^= k;
h *= m;
h ^= h >>> r;
h *= m;
h ^= h >>> r;
return h;
}
public static long murmur64(byte[] val, int offset, int len, long seed) {
long m = 0xc6a4a7935bd1e995L;
int r = 47;
long h = seed ^ (len * m);
int lt = len >>> 3;
for (int i = 0; i < lt; i++, offset += 8) {
long k = 0;
for (int j = 0; j < 8; j++) {
k <<= 8;
k |= val[offset + j] & 0xff;
}
k *= m;
k ^= k >>> r;
k *= m;
h ^= k;
h *= m;
}
if (offset < len) {
long k = 0;
while (offset < len) {
k <<= 8;
k |= val[offset] & 0xff;
offset++;
}
h ^= k;
h *= m;
}
h ^= h >>> r;
h *= m;
h ^= h >>> r;
return h;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy