All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.librec.recommender.cf.rating.URPRecommender Maven / Gradle / Ivy

/**
 * Copyright (C) 2016 LibRec
 * 

* This file is part of LibRec. * LibRec is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. *

* LibRec is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. *

* You should have received a copy of the GNU General Public License * along with LibRec. If not, see . */ package net.librec.recommender.cf.rating; import com.google.common.collect.HashBasedTable; import com.google.common.collect.Table; import net.librec.common.LibrecException; import net.librec.math.algorithm.Randoms; import net.librec.math.structure.DenseMatrix; import net.librec.math.structure.DenseVector; import net.librec.math.structure.MatrixEntry; import net.librec.recommender.ProbabilisticGraphicalRecommender; import static net.librec.math.algorithm.Gamma.digamma; /** * User Rating Profile: a LDA model for rating prediction.
*

* Benjamin Marlin, Modeling user rating profiles for collaborative filtering, NIPS 2003.
*

* Nicola Barbieri, Regularized gibbs sampling for user profiling with soft constraints, ASONAM 2011. * * @author Guo Guibing and Haidong Zhang */ public class URPRecommender extends ProbabilisticGraphicalRecommender { private double preRMSE; /** * number of occurrentces of entry (user, topic) */ private DenseMatrix userTopicNum; /** * number of occurences of users */ private DenseVector userNum; /** * number of occurrences of entry (topic, item) */ private DenseMatrix topicItemNum; /** * P(k | u) */ private DenseMatrix userTopicProbs, userTopicSumProbs; /** * user parameters */ private DenseVector alpha; /** * item parameters */ private DenseVector beta; /** * */ protected Table topics; /** * number of topics */ protected int numTopics; /** * */ protected int numRatingLevels; /** * number of occurrences of entry (t, i, r) */ private int[][][] topicItemRatingNum; // Nkir /** * cumulative statistics of probabilities of (t, i, r) */ private double[][][] topicItemRatingSumProbs; //PkirSum; /** * posterior probabilities of parameters phi_{k, i, r} */ protected double[][][] topicItemRatingProbs; //Pkir; @Override protected void setup() throws LibrecException { super.setup(); numTopics = conf.getInt("rec.pgm.number", 10); numRatingLevels = trainMatrix.getValueSet().size(); // cumulative parameters userTopicSumProbs = new DenseMatrix(numUsers, numTopics); topicItemRatingSumProbs = new double[numTopics][numItems][numRatingLevels]; // initialize count variables userTopicNum = new DenseMatrix(numUsers, numTopics); userNum = new DenseVector(numUsers); topicItemRatingNum = new int[numTopics][numItems][numRatingLevels]; topicItemNum = new DenseMatrix(numTopics, numItems); alpha = new DenseVector(numTopics); double initAlpha = conf.getDouble("rec.pgm.bucm.alpha", 1.0 / numTopics); alpha.setAll(initAlpha); beta = new DenseVector(numRatingLevels); double initBeta = conf.getDouble("rec.pgm.bucm.beta", 1.0 / numTopics); beta.setAll(initBeta); // initialize topics topics = HashBasedTable.create(); for (MatrixEntry me : trainMatrix) { int u = me.row(); int i = me.column(); double rui = me.get(); int r = ratingScale.indexOf(rui); // rating level 0 ~ numLevels int t = (int) (Randoms.uniform() * numTopics); // 0 ~ k-1 // Assign a topic t to pair (u, i) topics.put(u, i, t); // number of pairs (u, t) in (u, i, t) userTopicNum.add(u, t, 1); // total number of items of user u userNum.add(u, 1); // number of pairs (t, i, r) topicItemRatingNum[t][i][r]++; // total number of words assigned to topic t topicItemNum.add(t, i, 1); } } @Override protected void eStep() { double sumAlpha = alpha.sum(); double sumBeta = beta.sum(); // collapse Gibbs sampling for (MatrixEntry me : trainMatrix) { int u = me.row(); int i = me.column(); double rui = me.get(); int r = ratingScale.indexOf(rui); // rating level 0 ~ numLevels int t = topics.get(u, i); userTopicNum.add(u, t, -1); userNum.add(u, -1); topicItemRatingNum[t][i][r]--; topicItemNum.add(t, i, -1); // do multinomial sampling via cumulative method: double[] p = new double[numTopics]; for (int k = 0; k < numTopics; k++) { p[k] = (userTopicNum.get(u, k) + alpha.get(k)) / (userNum.get(u) + sumAlpha) * (topicItemRatingNum[k][i][r] + beta.get(r)) / (topicItemNum.get(k, i) + sumBeta); } // cumulate multinomial parameters for (int k = 1; k < p.length; k++) { p[k] += p[k - 1]; } // scaled sample because of unnormalized p[], randomly sampled a new topic t double rand = Randoms.uniform() * p[numTopics - 1]; for (t = 0; t < p.length; t++) { if (rand < p[t]) break; } // new topic t topics.put(u, i, t); // add newly estimated z_i to count variables userTopicNum.add(u, t, 1); userNum.add(u, 1); topicItemRatingNum[t][i][r]++; topicItemNum.add(t, i, 1); } } /** * Thomas P. Minka, Estimating a Dirichlet distribution, see Eq.(55) */ @Override protected void mStep() { double sumAlpha = alpha.sum(); double sumBeta = beta.sum(); double ak, br; // update alpha vector for (int k = 0; k < numTopics; k++) { ak = alpha.get(k); double numerator = 0, denominator = 0; for (int u = 0; u < numUsers; u++) { numerator += digamma(userTopicNum.get(u, k) + ak) - digamma(ak); denominator += digamma(userNum.get(u) + sumAlpha) - digamma(sumAlpha); } if (numerator != 0) alpha.set(k, ak * (numerator / denominator)); } // update beta_k for (int r = 0; r < numRatingLevels; r++) { br = beta.get(r); double numerator = 0, denominator = 0; for (int i = 0; i < numItems; i++) { for (int k = 0; k < numTopics; k++) { numerator += digamma(topicItemRatingNum[k][i][r] + br) - digamma(br); denominator += digamma(topicItemNum.get(k, i) + sumBeta) - digamma(sumBeta); } } if (numerator != 0) beta.set(r, br * (numerator / denominator)); } } protected void readoutParams() { double val = 0; double sumAlpha = alpha.sum(); for (int u = 0; u < numUsers; u++) { for (int k = 0; k < numTopics; k++) { val = (userTopicNum.get(u, k) + alpha.get(k)) / (userNum.get(u) + sumAlpha); userTopicSumProbs.add(u, k, val); } } double sumBeta = beta.sum(); for (int k = 0; k < numTopics; k++) { for (int i = 0; i < numItems; i++) { for (int r = 0; r < numRatingLevels; r++) { val = (topicItemRatingNum[k][i][r] + beta.get(r)) / (topicItemNum.get(k, i) + sumBeta); topicItemRatingSumProbs[k][i][r] += val; } } } numStats++; } @Override protected void estimateParams() { userTopicProbs = userTopicSumProbs.scale(1.0 / numStats); topicItemRatingProbs = new double[numTopics][numItems][numRatingLevels]; for (int k = 0; k < numTopics; k++) { for (int i = 0; i < numItems; i++) { for (int r = 0; r < numRatingLevels; r++) { topicItemRatingProbs[k][i][r] = topicItemRatingSumProbs[k][i][r] / numStats; } } } } @Override protected boolean isConverged(int iter) { if (validMatrix == null) return false; // get posterior probability distribution first estimateParams(); // compute current RMSE int numCount = 0; double sum = 0; for (MatrixEntry me : validMatrix) { double rate = me.get(); int u = me.row(); int j = me.column(); double pred = 0; try { pred = predict(u, j, true); } catch (LibrecException e) { e.printStackTrace(); } if (Double.isNaN(pred)) continue; double err = rate - pred; sum += err * err; numCount++; } double RMSE = Math.sqrt(sum / numCount); double delta = RMSE - preRMSE; if (numStats > 1 && delta > 0) return true; preRMSE = RMSE; return false; } @Override protected double predict(int userIdx, int itemIdx) throws LibrecException { double pred = 0; for (int r = 0; r < numRatingLevels; r++) { double rate = ratingScale.get(r); double prob = 0; for (int k = 0; k < numTopics; k++) { prob += userTopicProbs.get(userIdx, k) * topicItemRatingProbs[k][itemIdx][r]; } pred += prob * rate; } return pred; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy