Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.cf.taste.impl.recommender.svd;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.common.RandomWrapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** Minimalistic implementation of Parallel SGD factorizer based on
*
* "Scalable Collaborative Filtering Approaches for Large Recommender Systems"
* and
*
* "Hogwild!: A Lock-Free Approach to Parallelizing Stochastic Gradient Descent" */
public class ParallelSGDFactorizer extends AbstractFactorizer {
private final DataModel dataModel;
/** Parameter used to prevent overfitting. */
private final double lambda;
/** Number of features used to compute this factorization */
private final int rank;
/** Number of iterations */
private final int numEpochs;
private int numThreads;
// these next two control decayFactor^steps exponential type of annealing learning rate and decay factor
private double mu0 = 0.01;
private double decayFactor = 1;
// these next two control 1/steps^forget type annealing
private int stepOffset = 0;
// -1 equals even weighting of all examples, 0 means only use exponential annealing
private double forgettingExponent = 0;
// The following two should be inversely proportional :)
private double biasMuRatio = 0.5;
private double biasLambdaRatio = 0.1;
/** TODO: this is not safe as += is not atomic on many processors, can be replaced with AtomicDoubleArray
* but it works just fine right now */
/** user features */
protected volatile double[][] userVectors;
/** item features */
protected volatile double[][] itemVectors;
private final PreferenceShuffler shuffler;
private int epoch = 1;
/** place in user vector where the bias is stored */
private static final int USER_BIAS_INDEX = 1;
/** place in item vector where the bias is stored */
private static final int ITEM_BIAS_INDEX = 2;
private static final int FEATURE_OFFSET = 3;
/** Standard deviation for random initialization of features */
private static final double NOISE = 0.02;
private static final Logger logger = LoggerFactory.getLogger(ParallelSGDFactorizer.class);
protected static class PreferenceShuffler {
private Preference[] preferences;
private Preference[] unstagedPreferences;
protected final RandomWrapper random = RandomUtils.getRandom();
public PreferenceShuffler(DataModel dataModel) throws TasteException {
cachePreferences(dataModel);
shuffle();
stage();
}
private int countPreferences(DataModel dataModel) throws TasteException {
int numPreferences = 0;
LongPrimitiveIterator userIDs = dataModel.getUserIDs();
while (userIDs.hasNext()) {
PreferenceArray preferencesFromUser = dataModel.getPreferencesFromUser(userIDs.nextLong());
numPreferences += preferencesFromUser.length();
}
return numPreferences;
}
private void cachePreferences(DataModel dataModel) throws TasteException {
int numPreferences = countPreferences(dataModel);
preferences = new Preference[numPreferences];
LongPrimitiveIterator userIDs = dataModel.getUserIDs();
int index = 0;
while (userIDs.hasNext()) {
long userID = userIDs.nextLong();
PreferenceArray preferencesFromUser = dataModel.getPreferencesFromUser(userID);
for (Preference preference : preferencesFromUser) {
preferences[index++] = preference;
}
}
}
public final void shuffle() {
unstagedPreferences = preferences.clone();
/* Durstenfeld shuffle */
for (int i = unstagedPreferences.length - 1; i > 0; i--) {
int rand = random.nextInt(i + 1);
swapCachedPreferences(i, rand);
}
}
//merge this part into shuffle() will make compiler-optimizer do some real absurd stuff, test on OpenJDK7
private void swapCachedPreferences(int x, int y) {
Preference p = unstagedPreferences[x];
unstagedPreferences[x] = unstagedPreferences[y];
unstagedPreferences[y] = p;
}
public final void stage() {
preferences = unstagedPreferences;
}
public Preference get(int i) {
return preferences[i];
}
public int size() {
return preferences.length;
}
}
public ParallelSGDFactorizer(DataModel dataModel, int numFeatures, double lambda, int numEpochs)
throws TasteException {
super(dataModel);
this.dataModel = dataModel;
this.rank = numFeatures + FEATURE_OFFSET;
this.lambda = lambda;
this.numEpochs = numEpochs;
shuffler = new PreferenceShuffler(dataModel);
//max thread num set to n^0.25 as suggested by hogwild! paper
numThreads = Math.min(Runtime.getRuntime().availableProcessors(), (int) Math.pow((double) shuffler.size(), 0.25));
}
public ParallelSGDFactorizer(DataModel dataModel, int numFeatures, double lambda, int numIterations,
double mu0, double decayFactor, int stepOffset, double forgettingExponent) throws TasteException {
this(dataModel, numFeatures, lambda, numIterations);
this.mu0 = mu0;
this.decayFactor = decayFactor;
this.stepOffset = stepOffset;
this.forgettingExponent = forgettingExponent;
}
public ParallelSGDFactorizer(DataModel dataModel, int numFeatures, double lambda, int numIterations,
double mu0, double decayFactor, int stepOffset, double forgettingExponent, int numThreads) throws TasteException {
this(dataModel, numFeatures, lambda, numIterations, mu0, decayFactor, stepOffset, forgettingExponent);
this.numThreads = numThreads;
}
public ParallelSGDFactorizer(DataModel dataModel, int numFeatures, double lambda, int numIterations,
double mu0, double decayFactor, int stepOffset, double forgettingExponent,
double biasMuRatio, double biasLambdaRatio) throws TasteException {
this(dataModel, numFeatures, lambda, numIterations, mu0, decayFactor, stepOffset, forgettingExponent);
this.biasMuRatio = biasMuRatio;
this.biasLambdaRatio = biasLambdaRatio;
}
public ParallelSGDFactorizer(DataModel dataModel, int numFeatures, double lambda, int numIterations,
double mu0, double decayFactor, int stepOffset, double forgettingExponent,
double biasMuRatio, double biasLambdaRatio, int numThreads) throws TasteException {
this(dataModel, numFeatures, lambda, numIterations, mu0, decayFactor, stepOffset, forgettingExponent, biasMuRatio,
biasLambdaRatio);
this.numThreads = numThreads;
}
protected void initialize() throws TasteException {
RandomWrapper random = RandomUtils.getRandom();
userVectors = new double[dataModel.getNumUsers()][rank];
itemVectors = new double[dataModel.getNumItems()][rank];
double globalAverage = getAveragePreference();
for (int userIndex = 0; userIndex < userVectors.length; userIndex++) {
userVectors[userIndex][0] = globalAverage;
userVectors[userIndex][USER_BIAS_INDEX] = 0; // will store user bias
userVectors[userIndex][ITEM_BIAS_INDEX] = 1; // corresponding item feature contains item bias
for (int feature = FEATURE_OFFSET; feature < rank; feature++) {
userVectors[userIndex][feature] = random.nextGaussian() * NOISE;
}
}
for (int itemIndex = 0; itemIndex < itemVectors.length; itemIndex++) {
itemVectors[itemIndex][0] = 1; // corresponding user feature contains global average
itemVectors[itemIndex][USER_BIAS_INDEX] = 1; // corresponding user feature contains user bias
itemVectors[itemIndex][ITEM_BIAS_INDEX] = 0; // will store item bias
for (int feature = FEATURE_OFFSET; feature < rank; feature++) {
itemVectors[itemIndex][feature] = random.nextGaussian() * NOISE;
}
}
}
//TODO: needs optimization
private double getMu(int i) {
return mu0 * Math.pow(decayFactor, i - 1) * Math.pow(i + stepOffset, forgettingExponent);
}
@Override
public Factorization factorize() throws TasteException {
initialize();
if (logger.isInfoEnabled()) {
logger.info("starting to compute the factorization...");
}
for (epoch = 1; epoch <= numEpochs; epoch++) {
shuffler.stage();
final double mu = getMu(epoch);
int subSize = shuffler.size() / numThreads + 1;
ExecutorService executor=Executors.newFixedThreadPool(numThreads);
try {
for (int t = 0; t < numThreads; t++) {
final int iStart = t * subSize;
final int iEnd = Math.min((t + 1) * subSize, shuffler.size());
executor.execute(new Runnable() {
@Override
public void run() {
for (int i = iStart; i < iEnd; i++) {
update(shuffler.get(i), mu);
}
}
});
}
} finally {
executor.shutdown();
shuffler.shuffle();
try {
boolean terminated = executor.awaitTermination(numEpochs * shuffler.size(), TimeUnit.MICROSECONDS);
if (!terminated) {
logger.error("subtasks takes forever, return anyway");
}
} catch (InterruptedException e) {
throw new TasteException("waiting fof termination interrupted", e);
}
}
}
return createFactorization(userVectors, itemVectors);
}
double getAveragePreference() throws TasteException {
RunningAverage average = new FullRunningAverage();
LongPrimitiveIterator it = dataModel.getUserIDs();
while (it.hasNext()) {
for (Preference pref : dataModel.getPreferencesFromUser(it.nextLong())) {
average.addDatum(pref.getValue());
}
}
return average.getAverage();
}
/** TODO: this is the vanilla sgd by Tacaks 2009, I speculate that using scaling technique proposed in:
* Towards Optimal One Pass Large Scale Learning with Averaged Stochastic Gradient Descent section 5, page 6
* can be beneficial in term s of both speed and accuracy.
*
* Tacaks' method doesn't calculate gradient of regularization correctly, which has non-zero elements everywhere of
* the matrix. While Tacaks' method can only updates a single row/column, if one user has a lot of recommendation,
* her vector will be more affected by regularization using an isolated scaling factor for both user vectors and
* item vectors can remove this issue without inducing more update cost it even reduces it a bit by only performing
* one addition and one multiplication.
*
* BAD SIDE1: the scaling factor decreases fast, it has to be scaled up from time to time before dropped to zero or
* caused roundoff error
* BAD SIDE2: no body experiment on it before, and people generally use very small lambda
* so it's impact on accuracy may still be unknown.
* BAD SIDE3: don't know how to make it work for L1-regularization or
* "pseudorank?" (sum of singular values)-regularization */
protected void update(Preference preference, double mu) {
int userIndex = userIndex(preference.getUserID());
int itemIndex = itemIndex(preference.getItemID());
double[] userVector = userVectors[userIndex];
double[] itemVector = itemVectors[itemIndex];
double prediction = dot(userVector, itemVector);
double err = preference.getValue() - prediction;
// adjust features
for (int k = FEATURE_OFFSET; k < rank; k++) {
double userFeature = userVector[k];
double itemFeature = itemVector[k];
userVector[k] += mu * (err * itemFeature - lambda * userFeature);
itemVector[k] += mu * (err * userFeature - lambda * itemFeature);
}
// adjust user and item bias
userVector[USER_BIAS_INDEX] += biasMuRatio * mu * (err - biasLambdaRatio * lambda * userVector[USER_BIAS_INDEX]);
itemVector[ITEM_BIAS_INDEX] += biasMuRatio * mu * (err - biasLambdaRatio * lambda * itemVector[ITEM_BIAS_INDEX]);
}
private double dot(double[] userVector, double[] itemVector) {
double sum = 0;
for (int k = 0; k < rank; k++) {
sum += userVector[k] * itemVector[k];
}
return sum;
}
}