All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.chen0040.sparkml.recommender.ItemCorrelationRecommender Maven / Gradle / Ivy

There is a newer version: 1.0.5
Show newest version
package com.github.chen0040.sparkml.recommender;


import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import scala.Tuple2;
import scala.Tuple3;
import scala.Tuple7;

import java.util.ArrayList;
import java.util.List;


/**
 * Created by xschen on 5/6/2017.
 */
public class ItemCorrelationRecommender {

   public JavaRDD fitAndTransform(JavaRDD cells){
      JavaPairRDD> rdd2 = cells.mapToPair(cell -> {

            String user = cell.getUser();
            String movie = cell.getItem();
            double rating = cell.getValue();
            return new Tuple2<>(user, new Tuple2<>(movie, rating));
         });

      JavaPairRDD>> rdd3 = rdd2.groupByKey();

      JavaPairRDD>> rdd4 = rdd3.mapValues(s -> {
            List> values = new ArrayList<>();
            for(Tuple2 entry : s) {
               values.add(entry);
            }

            int count = values.size(); // number of ratings given by a user
            List> result = new ArrayList<>();
            for(Tuple2 entry : values) {
               result.add(new Tuple3<>(entry._1(), entry._2(), count));
            }

            return result;
         });

      JavaPairRDD, Tuple7> rdd5 = rdd4.flatMapToPair(s -> {
            String user = s._1();
            Iterable> user_ratings = s._2();
            List> values = new ArrayList<>();
            for(Tuple3 entry : user_ratings) {
               values.add(entry);
            }

            List, Tuple7>> result = new ArrayList<>();

            for(int i=0; i < values.size()-1; ++i) {
               Tuple3 user_rating_i = values.get(i);
               String movie1 = user_rating_i._1();
               double rating1 = user_rating_i._2();
               int numRater1 = user_rating_i._3();

               for(int j=i+1; j < values.size(); ++j) {
                  Tuple3 user_rating_j = values.get(j);
                  String movie2 = user_rating_j._1();
                  double rating2 = user_rating_j._2();
                  int numRater2 = user_rating_j._3();

                  double ratingProd = rating1 * rating2;
                  double ratingSqr1 = rating1 * rating1;
                  double ratingSqr2 = rating2 * rating2;

                  result.add(new Tuple2<>(new Tuple2<>(movie1, movie2),
                          new Tuple7<>(rating1, rating2, numRater1, numRater2, ratingProd, ratingSqr1, ratingSqr2)));
               }
            }

            return result;

         });

      JavaPairRDD, Iterable>> rdd6 = rdd5.groupByKey();

      JavaPairRDD, Iterable>> rdd7 = rdd6.filter(s -> {
            Tuple2 moviePair = s._1();
            return moviePair._1().compareTo(moviePair._2()) < 0;
         });

      return rdd7.map(t -> {

         Iterable> s = t._2();
         Tuple2 movie_pair = t._1();
            double sumRating1 = 0;
            double sumRating2 = 0;
            double sumSqrRating1 = 0;
            double sumSqrRating2 = 0;
            double sumRatingProd = 0;

            int N = 0;
            int maxNumRaters1 = 0;
            int maxNumRaters2 = 0;
            for(Tuple7 entry : s) {
               sumRating1 += entry._1();
               sumRating2 += entry._2();
               maxNumRaters1 = Math.max(maxNumRaters1, entry._3());
               maxNumRaters2 = Math.max(maxNumRaters2, entry._4());

               sumRatingProd += entry._5();

               sumSqrRating1 += entry._6();
               sumSqrRating2 += entry._7();
               N++;
            }



            double pearson = (N * sumRatingProd - sumRating1 * sumRating2) / (Math.sqrt(N * sumSqrRating1 - sumRating1 * sumRating1) * Math.sqrt(N * sumSqrRating2 - sumRating2 * sumRating2));
            double jaccard = (double)N / (maxNumRaters1 + maxNumRaters2 - N);
            double cosine = sumRatingProd / (Math.sqrt(sumSqrRating1) * Math.sqrt(sumSqrRating2));

            ItemCorrelation result = new ItemCorrelation();
            result.setCosine(cosine);
            result.setPearson(pearson);
            result.setJaccard(jaccard);
            result.setItem1(movie_pair._1());
            result.setItem2(movie_pair._2());

            return result;

         });


   }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy