All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.grouplens.lenskit.knn.item.model.ItemItemBuildContextProvider Maven / Gradle / Ivy

/*
 * LensKit, an open source recommender systems toolkit.
 * Copyright 2010-2014 LensKit Contributors.  See CONTRIBUTORS.md.
 * Work on LensKit has been funded by the National Science Foundation under
 * grants IIS 05-34939, 08-08692, 08-12148, and 10-17697.
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation; either version 2.1 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 51
 * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */
package org.grouplens.lenskit.knn.item.model;

import it.unimi.dsi.fastutil.longs.Long2ObjectMap;
import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.longs.LongSortedSet;
import org.grouplens.lenskit.collections.LongKeyDomain;
import org.grouplens.lenskit.collections.LongUtils;
import org.grouplens.lenskit.core.Transient;
import org.grouplens.lenskit.cursors.Cursor;
import org.grouplens.lenskit.data.dao.UserEventDAO;
import org.grouplens.lenskit.data.event.Event;
import org.grouplens.lenskit.data.history.UserHistory;
import org.grouplens.lenskit.data.history.UserHistorySummarizer;
import org.grouplens.lenskit.scored.ScoredIdListBuilder;
import org.grouplens.lenskit.scored.ScoredIds;
import org.grouplens.lenskit.transform.normalize.UserVectorNormalizer;
import org.grouplens.lenskit.vectors.MutableSparseVector;
import org.grouplens.lenskit.vectors.SparseVector;
import org.grouplens.lenskit.vectors.VectorEntry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.inject.Inject;
import javax.inject.Provider;

/**
 * Provider that sets up an {@link ItemItemBuildContext}.
 * 
 * @author GroupLens Research
 */
public class ItemItemBuildContextProvider implements Provider {

    private static final Logger logger = LoggerFactory.getLogger(ItemItemBuildContextProvider.class);

    private final UserEventDAO userEventDAO;
    private final UserVectorNormalizer normalizer;
    private final UserHistorySummarizer userSummarizer;

    @Inject
    public ItemItemBuildContextProvider(@Transient UserEventDAO edao, 
                                        @Transient UserVectorNormalizer normalizer,
                                        @Transient UserHistorySummarizer userSummarizer) {
        userEventDAO = edao;
        this.normalizer = normalizer;
        this.userSummarizer = userSummarizer;
    }

    /**
     * Constructs and returns a new ItemItemBuildContext.
     *
     * @return a new ItemItemBuildContext.
     */
    @Override
    public ItemItemBuildContext get() {
        logger.info("constructing build context");
        logger.debug("using normalizer {}", normalizer);
        logger.debug("using summarizer {}", userSummarizer);

        logger.debug("Building item data");
        Long2ObjectMap itemRatingData = new Long2ObjectOpenHashMap(1000);
        Long2ObjectMap userItems = new Long2ObjectOpenHashMap(1000);
        buildItemRatings(itemRatingData, userItems);

        LongKeyDomain items = LongKeyDomain.fromCollection(itemRatingData.keySet(), true);
        final int n = items.domainSize();
        assert n == itemRatingData.size();
        // finalize the item data into vectors
        SparseVector[] itemRatings = new SparseVector[n];

        for (int i = 0; i < n; i++) {
            final long item = items.getKey(i);
            ScoredIdListBuilder ratings = itemRatingData.get(item);
            SparseVector v = ratings.buildVector();
            assert v.size() == ratings.size();
            itemRatings[i] = v;
            // release some memory
            ratings.clear();
        }

        logger.debug("item data completed");
        return new ItemItemBuildContext(items, itemRatings, userItems);
    }

    /**
     * Transpose the user matrix so we have a matrix of item ids to ratings. Accumulate user item vectors into
     * the candidate sets for each item
     *
     * @param itemRatings    mapping from item ids to (userId: rating) maps (to be filled)
     * @param userItems mapping of user IDs to rated item sets to be filled.
     */
    private void buildItemRatings(Long2ObjectMap itemRatings,
                                  Long2ObjectMap userItems) {
        // initialize the transposed array to collect item vector data
        Cursor> users = userEventDAO.streamEventsByUser();
        try {
            for (UserHistory user : users) {
                long uid = user.getUserId();
                SparseVector summary = userSummarizer.summarize(user);
                MutableSparseVector normed = summary.mutableCopy();
                normalizer.normalize(uid, summary, normed);

                for (VectorEntry rating : normed) {
                    final long item = rating.getKey();
                    // get the item's rating accumulator
                    ScoredIdListBuilder ivect = itemRatings.get(item);
                    if (ivect == null) {
                        ivect = ScoredIds.newListBuilder(100);
                        itemRatings.put(item, ivect);
                    }
                    ivect.add(uid, rating.getValue());
                }

                // get the item's candidate set
                userItems.put(uid, LongUtils.packedSet(summary.keySet()));
            }
        } finally {
            users.close();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy