org.grouplens.lenskit.knn.item.model.ItemItemModelBuilder Maven / Gradle / Ivy
/*
* LensKit, an open source recommender systems toolkit.
* Copyright 2010-2013 Regents of the University of Minnesota and contributors
* Work on LensKit has been funded by the National Science Foundation under
* grants IIS 05-34939, 08-08692, 08-12148, and 10-17697.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along with
* this program; if not, write to the Free Software Foundation, Inc., 51
* Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.grouplens.lenskit.knn.item.model;
import com.google.common.base.Preconditions;
import it.unimi.dsi.fastutil.longs.Long2ObjectMap;
import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.longs.LongIterator;
import it.unimi.dsi.fastutil.longs.LongSortedSet;
import org.grouplens.lenskit.core.Transient;
import org.grouplens.lenskit.knn.item.ItemSimilarity;
import org.grouplens.lenskit.knn.item.ModelSize;
import org.grouplens.lenskit.scored.ScoredId;
import org.grouplens.lenskit.transform.threshold.Threshold;
import org.grouplens.lenskit.util.ScoredItemAccumulator;
import org.grouplens.lenskit.util.TopNScoredItemAccumulator;
import org.grouplens.lenskit.util.UnlimitedScoredItemAccumulator;
import org.grouplens.lenskit.vectors.SparseVector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.concurrent.NotThreadSafe;
import javax.inject.Inject;
import javax.inject.Provider;
import java.util.List;
/**
* Build an item-item CF model from rating data.
* This builder takes a very simple approach. It does not allow for vector
* normalization and truncates on the fly.
*
* @author GroupLens Research
*/
@NotThreadSafe
public class ItemItemModelBuilder implements Provider {
private static final Logger logger = LoggerFactory.getLogger(ItemItemModelBuilder.class);
private final ItemSimilarity itemSimilarity;
private final ItemItemBuildContextFactory contextFactory;
private final Threshold threshold;
private final int modelSize;
@Inject
public ItemItemModelBuilder(@Transient ItemSimilarity similarity,
@Transient ItemItemBuildContextFactory ctxFactory,
@Transient Threshold thresh,
@ModelSize int size) {
itemSimilarity = similarity;
contextFactory = ctxFactory;
threshold = thresh;
modelSize = size;
}
@Override
public SimilarityMatrixModel get() {
logger.debug("building item-item model");
ItemItemBuildContext buildContext = contextFactory.buildContext();
Accumulator accumulator = new Accumulator(buildContext.getItems(), threshold, modelSize);
for (long itemId1 : buildContext.getItems()) {
SparseVector vec1 = buildContext.itemVector(itemId1);
LongIterator itemIter;
if (itemSimilarity.isSparse()) {
if (itemSimilarity.isSymmetric()) {
itemIter = buildContext.getUserItems(vec1.keySet()).iterator(itemId1);
} else {
itemIter = buildContext.getUserItems(vec1.keySet()).iterator();
}
} else {
if (itemSimilarity.isSymmetric()) {
itemIter = buildContext.getItems().iterator(itemId1);
} else {
itemIter = buildContext.getItems().iterator();
}
}
while (itemIter.hasNext()) {
long itemId2 = itemIter.nextLong();
if (itemId1 != itemId2) {
SparseVector vec2 = buildContext.itemVector(itemId2);
double sim = itemSimilarity.similarity(itemId1, vec1, itemId2, vec2);
accumulator.put(itemId1, itemId2, sim);
if (itemSimilarity.isSymmetric()) {
accumulator.put(itemId2, itemId1, sim);
}
}
}
}
return accumulator.build();
}
static class Accumulator {
private final Threshold threshold;
private Long2ObjectMap rows;
private final LongSortedSet itemUniverse;
public Accumulator(LongSortedSet entities, Threshold threshold, int modelSize) {
logger.debug("Using simple accumulator with modelSize {} for {} items", modelSize, entities.size());
this.threshold = threshold;
itemUniverse = entities;
rows = new Long2ObjectOpenHashMap(entities.size());
for (long itemId : itemUniverse) {
if (modelSize == 0) {
rows.put(itemId, new UnlimitedScoredItemAccumulator());
} else {
rows.put(itemId, new TopNScoredItemAccumulator(modelSize));
}
}
}
public void put(long i, long j, double sim) {
Preconditions.checkState(rows != null, "model already built");
if (!threshold.retain(sim)) {
return;
}
ScoredItemAccumulator q = rows.get(i);
q.put(j, sim);
}
public SimilarityMatrixModel build() {
Long2ObjectMap> data = new Long2ObjectOpenHashMap>(rows.size());
for (Long2ObjectMap.Entry row : rows.long2ObjectEntrySet()) {
List similarities = row.getValue().finish();
data.put(row.getLongKey(), similarities);
}
SimilarityMatrixModel model = new SimilarityMatrixModel(itemUniverse, data);
rows = null; // Mark that this model has already been built.
return model;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy