rortega.cf4j-recsys.1.1.0.source-code.DatasetSplitters Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cf4j-recsys Show documentation
Show all versions of cf4j-recsys Show documentation
A Java's Collaborative Filtering library to carry out experiments in research of Collaborative Filtering based Recommender Systems. The library has been designed from researchers to researchers.
The newest version!
package cf4j;
import java.util.List;
import java.util.Map;
import java.util.function.BiFunction;
/**
* This class contains different built-in lambda functions to split dataset into test and training sets
* @author Fernando Ortega
*/
public class DatasetSplitters {
/**
* Divides the set based on a random probability. If a random is lower than the probability, the element
* will be defined as test. Otherwise, the element will be defined as training.
* @param probability Probability of the element to be defined as test.
* @return True if element is test; False otherwise
*/
public static BiFunction , Boolean> random (double probability) {
return (code, ratings) -> { return Math.random() <= probability; };
}
/**
* Divides the set based on the number of ratings of each element. It the element has at least minNumberOfRatings
* ratings it will be defined as test. Otherwise, the element will be defined as training.
* @param minNumberOfRatings Minimum number of ratings of an element to be defined as test.
* @return True if element is test; False otherwise
*/
public static BiFunction , Boolean> minNumberOfRatings (int minNumberOfRatings) {
return (code, ratings) -> { return ratings.size() >= minNumberOfRatings; };
}
/**
* Divides the set based on the number of ratings of each element. It the element has equal or less than
* maxNumberOfRatings it will be defined as test. Otherwise, the element will be defined as training. Useful
* for cold start situations.
* @param maxNumberOfRatings Maximum number of ratings of an element to be defined as test.
* @return True if element is test; False otherwise
*/
public static BiFunction , Boolean> maxNumberOfRatings (int maxNumberOfRatings) {
return (code, ratings) -> { return ratings.size() <= maxNumberOfRatings; };
}
/**
* Divides the set based on an explicit list of codes. It the element code is included on the list, it will
* be defined as test. Otherwise, the element will be defined as training.
* @param codes List of test codes.
* @return True if element is test; False otherwise
*/
public static BiFunction , Boolean> explicit (List codes) {
return (code, ratings) -> { return codes.contains(code); };
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy