org.lenskit.predict.ordrec.OrdRecRatingPredictor Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lenskit-predict Show documentation
Show all versions of lenskit-predict Show documentation
Advanced rating prediction support for LensKit.
/*
* LensKit, an open source recommender systems toolkit.
* Copyright 2010-2014 LensKit Contributors. See CONTRIBUTORS.md.
* Work on LensKit has been funded by the National Science Foundation under
* grants IIS 05-34939, 08-08692, 08-12148, and 10-17697.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.lenskit.predict.ordrec;
import it.unimi.dsi.fastutil.longs.LongIterator;
import it.unimi.dsi.fastutil.longs.LongIterators;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;
import org.apache.commons.math3.linear.ArrayRealVector;
import org.apache.commons.math3.linear.RealVector;
import org.lenskit.data.dao.UserEventDAO;
import org.lenskit.data.ratings.Rating;
import org.grouplens.lenskit.data.history.RatingVectorUserHistorySummarizer;
import org.lenskit.data.history.UserHistory;
import org.grouplens.lenskit.iterative.IterationCount;
import org.grouplens.lenskit.iterative.LearningRate;
import org.grouplens.lenskit.iterative.RegularizationTerm;
import org.grouplens.lenskit.vectors.MutableSparseVector;
import org.grouplens.lenskit.vectors.SparseVector;
import org.grouplens.lenskit.vectors.VectorEntry;
import org.lenskit.api.ItemScorer;
import org.lenskit.api.Result;
import org.lenskit.api.ResultMap;
import org.lenskit.basic.AbstractRatingPredictor;
import org.lenskit.results.AbstractResult;
import org.lenskit.results.Results;
import org.lenskit.transform.quantize.Quantizer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nonnull;
import javax.inject.Inject;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
/**
* OrdRec implementation of rating prediction.
*
* The model views user feedback as ordinal. The framework is based on
* a pointwise (rather than pairwise) ordinal approach, it can wrap existing
* CF methods, and upgrade them into being able to tackle ordinal feedback.
* The implementation is based on Koren's paper:
*
* @since 2.1
*/
public class OrdRecRatingPredictor extends AbstractRatingPredictor {
private static final Logger logger = LoggerFactory.getLogger(OrdRecRatingPredictor.class);
private ItemScorer itemScorer;
private UserEventDAO userEventDao;
private Quantizer quantizer;
private final double learningRate;
private final double regTerm;
private final int iterationCount;
/**
* Construct a new OrdRec rating predictor.
*
* @param scorer The ItemScorer to produce the underlyign scores.
* @param dao The DAO to access user events.
* @param quantizer The quantizer to which ratings should be constrained.
* @param rate The learning rate for user profile training.
* @param reg Regularization term for user profile training.
*/
@Inject
public OrdRecRatingPredictor(ItemScorer scorer, UserEventDAO dao, Quantizer quantizer,
@LearningRate double rate,
@RegularizationTerm double reg,
@IterationCount int niters) {
this.userEventDao = dao;
this.itemScorer = scorer;
this.quantizer = quantizer;
this.learningRate = rate;
this.regTerm = reg;
this.iterationCount = niters;
}
/**
* Convenience constructor for testing.
* @param scorer The item scorer.
* @param dao The user event DAO.
* @param q The quantizer.
*/
OrdRecRatingPredictor(ItemScorer scorer, UserEventDAO dao, Quantizer q) {
this.userEventDao = dao;
this.itemScorer = scorer;
this.quantizer = q;
this.learningRate = 1e-3;
this.regTerm = 0.015;
this.iterationCount = 1000;
}
/**
* It is used to generate rating list from UserEventDAO.
*
* @param uid The user ID.
* @param dao The UserEventDAO.
*
* @return The VectorEntry list of rating.
*/
private SparseVector makeUserVector(long uid, UserEventDAO dao) {
UserHistory history = dao.getEventsForUser(uid, Rating.class);
SparseVector vector = null;
if (history != null) {
vector = RatingVectorUserHistorySummarizer.makeRatingVector(history);
}
return vector;
}
/**
* This is a helper function to calculate derivative of parameters.
* this function computes $\frac{d}{dx} (t_r - y_{ui})$, and that r specifies
* what t_r is used, and k speficies x (with k=0, $x = t_1$; for k > 0, it is $x = β_k$).
*
* @param r The index of rth threshold
* @param k The index of kth parameters need to derivative
* @param beta The parameter need to derivative
* @return The derivative of beta
*/
private static double dBeta(int r, int k, double beta) {
if(r >= 0 && k == 0) {
return 1.0;
} else if (k > 0 && r >= k) {
return Math.exp(beta);
} else {
return 0;
}
}
/**
* The train function of OrdRec. Get all parameters after learning process.
*/
@SuppressWarnings("ConstantConditions")
private void trainModel(OrdRecModel model, SparseVector ratings, MutableSparseVector scores) {
RealVector beta = model.getBeta();
RealVector deltaBeta = new ArrayRealVector(beta.getDimension());
double dt1;
// n is the number of iteration;
for (int j = 0; j < iterationCount; j++ ) {
for (VectorEntry rating: ratings) {
long iid = rating.getKey();
double score = scores.get(iid);
int r = quantizer.index(rating.getValue());
double probEqualR = model.getProbEQ(score, r);
double probLessR = model.getProbLE(score, r);
double probLessR_1 = model.getProbLE(score, r - 1);
double t1 = model.getT1();
dt1 = learningRate / probEqualR * ( probLessR * (1 - probLessR) * dBeta(r, 0, t1)
- probLessR_1 * (1 - probLessR_1) * dBeta(r - 1, 0, t1) - regTerm*t1);
double dbetaK;
for(int k = 0; k < beta.getDimension(); k++) {
dbetaK = learningRate / probEqualR * ( probLessR * (1 - probLessR) *
dBeta(r, k + 1, beta.getEntry(k)) - probLessR_1 * (1 - probLessR_1) *
dBeta(r - 1, k + 1, beta.getEntry(k)) - regTerm*beta.getEntry(k));
deltaBeta.setEntry(k, dbetaK);
}
model.update(dt1, deltaBeta);
}
}
}
@Nonnull
@Override
public Map predict(long user, @Nonnull Collection items) {
return computePredictions(user, items, false).scoreMap();
}
/**
* Compute detailed predictions for the user.
* @param user The user ID for whom to generate predictions.
* @param items The items to predict for.
* @return The detailed results; each result is an instance of {@link OrdRecRatingPredictor.FullResult}.
*/
@Nonnull
@Override
public ResultMap predictWithDetails(long user, @Nonnull Collection items) {
return computePredictions(user, items, true);
}
@Nonnull
private ResultMap computePredictions(long user, @Nonnull Collection items, boolean includeDetails) {
logger.debug("predicting {} items for {}", items.size(), user);
SparseVector ratings = makeUserVector(user, userEventDao);
LongSet allItems = new LongOpenHashSet(ratings.keySet());
allItems.addAll(items);
ResultMap baseResults = null;
Map scores;
if (includeDetails) {
baseResults = itemScorer.scoreWithDetails(user, allItems);
scores = baseResults.scoreMap();
} else {
scores = itemScorer.score(user, allItems);
}
MutableSparseVector scoreVector = MutableSparseVector.create(scores);
OrdRecModel params = new OrdRecModel(quantizer);
trainModel(params, ratings, scoreVector);
logger.debug("trained parameters for {}: {}", user, params);
RealVector probabilities = new ArrayRealVector(params.getLevelCount());
List results = new ArrayList<>();
LongIterator iter = LongIterators.asLongIterator(items.iterator());
while (iter.hasNext()) {
final long item = iter.nextLong();
double score = scoreVector.get(item, Double.NaN);
if (Double.isNaN(score)) {
continue;
}
params.getProbDistribution(score, probabilities);
int mlIdx = probabilities.getMaxIndex();
double pred = quantizer.getIndexValue(mlIdx);
if (includeDetails) {
results.add(new FullResult(baseResults.get(item), pred,
new ArrayRealVector(probabilities)));
} else {
results.add(Results.create(item, pred));
}
}
return Results.newResultMap(results);
}
/**
* The result type of OrdRec rating predictions.
*/
public static class FullResult extends AbstractResult implements Serializable {
private static final long serialVersionUID = 1L;
private final Result original;
private final RealVector distribution;
/**
* Create a new full result.
* @param orig The original score.
* @param score The estimated score.
* @param probs The full probability distribution (defensive copy will not be taken).
*/
FullResult(Result orig, double score, RealVector probs) {
super(orig.getId(), score);
original = orig;
distribution = probs;
}
/**
* Get the original result.
* @return The original result.
*/
public Result getOriginalResult() {
return original;
}
/**
* Get the probability distribution from this result.
* @return The probability distribution. It is **not** copied, so this vector should not be modified.
*/
public RealVector getDistribution() {
return distribution;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy