All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.myrrix.online.generation.Generation Maven / Gradle / Ivy

/*
 * Copyright Myrrix Ltd
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package net.myrrix.online.generation;

import java.util.List;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import org.apache.commons.math3.linear.RealMatrix;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import net.myrrix.common.collection.FastByIDMap;
import net.myrrix.common.collection.FastIDSet;
import net.myrrix.common.math.IllConditionedSolverException;
import net.myrrix.common.math.MatrixUtils;
import net.myrrix.common.math.Solver;
import net.myrrix.online.candidate.CandidateFilter;
import net.myrrix.online.candidate.CandidateFilterFactory;

/**
 * Encapsulates one generation of the underlying recommender's model. The data in this object is quite
 * specific to the underlying Alternating Least Squares algorithm. A generation contains:
 *
 * 
    *
  • X, the user-feature matrix
  • *
  • Y, the item-feature matrix
  • *
  • XLeftInverse, X's left inverse
  • *
  • YTRightInverse, the right inverse of Y transpose
  • *
  • numFeatures, the column dimension of X and Y
  • *
  • knownItemIDs, the item IDs already associated to each user
  • *
  • itemTagIDs, IDs of "users" that are really item tags
  • *
  • userTagIDs, IDs of "items" that are really user tags
  • *
  • clusters of item IDs (in distributed mode), with centroids
  • *
  • clusters of user IDs (in distributed mode), with centroids
  • *
* * @author Sean Owen * @since 1.0 * @see net.myrrix.online.factorizer.MatrixFactorizer */ public final class Generation { private static final Logger log = LoggerFactory.getLogger(Generation.class); public static final String NO_KNOWN_ITEMS_KEY = "model.noKnownItems"; private final FastByIDMap knownItemIDs; private final FastByIDMap knownUserIDs; private final FastByIDMap X; private Solver XTXsolver; private final FastByIDMap Y; private Solver YTYsolver; private final FastIDSet itemTagIDs; private final FastIDSet userTagIDs; private final List userClusters; private final List itemClusters; private CandidateFilter candidateFilter; private final ReadWriteLock xLock; private final ReadWriteLock yLock; private final ReadWriteLock knownItemLock; private final ReadWriteLock knownUserLock; private final ReadWriteLock userClustersLock; private final ReadWriteLock itemClustersLock; public Generation(FastByIDMap knownItemIDs, FastByIDMap X, FastByIDMap Y) { this(knownItemIDs, X, Y, new FastIDSet(1000, 1.25f), new FastIDSet(1000, 1.25f)); } public Generation(FastByIDMap knownItemIDs, FastByIDMap X, FastByIDMap Y, FastIDSet itemTagIDs, FastIDSet userTagIDs) { this(knownItemIDs, X, Y, itemTagIDs, userTagIDs, Lists.newArrayList(), Lists.newArrayList()); } public Generation(FastByIDMap knownItemIDs, FastByIDMap X, FastByIDMap Y, FastIDSet itemTagIDs, FastIDSet userTagIDs, List userClusters, List itemClusters) { // knownItemIDs may be null, specially Preconditions.checkNotNull(X); Preconditions.checkNotNull(Y); Preconditions.checkNotNull(itemTagIDs); Preconditions.checkNotNull(userTagIDs); Preconditions.checkNotNull(userClusters); Preconditions.checkNotNull(itemClusters); this.knownItemIDs = knownItemIDs; this.knownUserIDs = null; // Not used yet this.X = X; this.XTXsolver = null; this.Y = Y; this.YTYsolver = null; this.itemTagIDs = itemTagIDs; this.userTagIDs = userTagIDs; this.userClusters = userClusters; this.itemClusters = itemClusters; this.candidateFilter = null; this.xLock = new ReentrantReadWriteLock(); this.yLock = new ReentrantReadWriteLock(); this.knownItemLock = new ReentrantReadWriteLock(); this.knownUserLock = null; // Not used yet this.userClustersLock = new ReentrantReadWriteLock(); this.itemClustersLock = new ReentrantReadWriteLock(); recomputeState(); } void recomputeState() { if (Boolean.parseBoolean(System.getProperty("model.solver.xtx.compute", "true"))) { XTXsolver = recomputeSolver(X, xLock.readLock()); } if (Boolean.parseBoolean(System.getProperty("model.solver.yty.compute", "true"))) { YTYsolver = recomputeSolver(Y, yLock.readLock()); } candidateFilter = CandidateFilterFactory.buildCandidateFilter(Y, yLock.readLock()); } private static Solver recomputeSolver(FastByIDMap M, Lock readLock) { readLock.lock(); try { if (M == null || M.isEmpty()) { return null; } RealMatrix MTM = MatrixUtils.transposeTimesSelf(M); double infNorm = MTM.getNorm(); if (infNorm < 1.0) { log.warn("X'*X or Y'*Y has small inf norm ({}); try decreasing model.als.lambda", infNorm); throw new IllConditionedSolverException("infNorm: " + infNorm); } return MatrixUtils.getSolver(MTM); } finally { readLock.unlock(); } } /** * @return the number of "users" in the model (rows of {@link #getX()} */ public int getNumUsers() { return X.size(); } /** * @return the number of "items" in the model (rows of {@link #getY()} */ public int getNumItems() { return Y.size(); } /** * @return the user-feature matrix, implemented as a map from row number (user ID) to feature array */ public FastByIDMap getX() { return X; } /** * @return {@link Solver} for the matrix X' * X */ public Solver getXTXSolver() { return XTXsolver; } /** * @return the item-feature matrix, implemented as a map from row number (item ID) to feature array */ public FastByIDMap getY() { return Y; } /** * @return {@link Solver} for the matrix Y' * Y */ public Solver getYTYSolver() { return YTYsolver; } /** * @return the item IDs already associated to each user, as a map from user IDs to a set of item IDs */ public FastByIDMap getKnownItemIDs() { return knownItemIDs; } /** * Not used, yet. * @return null */ public FastByIDMap getKnownUserIDs() { return knownUserIDs; } /** * @return "user" IDs which are actually item tags */ public FastIDSet getItemTagIDs() { return itemTagIDs; } /** * @return "item" IDs which are actually user tags */ public FastIDSet getUserTagIDs() { return userTagIDs; } /** * @return clusters of user IDs, or {@code null} if not in distributed mode */ public List getUserClusters() { return userClusters; } /** * @return clusters of item IDs, or {@code null} if not in distributed mode */ public List getItemClusters() { return itemClusters; } public CandidateFilter getCandidateFilter() { return candidateFilter; } /** * Acquire this read/write lock before using {@link #getX()} or {@link #getItemTagIDs()}. */ public ReadWriteLock getXLock() { return xLock; } /** * Acquire this read/write lock before using {@link #getY()} or {@link #getUserTagIDs()}. */ public ReadWriteLock getYLock() { return yLock; } /** * Acquire this read/write lock before using {@link #getKnownItemIDs()}. */ public ReadWriteLock getKnownItemLock() { return knownItemLock; } /** * Not used, yet. * @return null */ public ReadWriteLock getKnownUserLock() { return knownUserLock; } /** * Acquire this read/write lock before using {@link #getUserClusters()}. */ public ReadWriteLock getUserClustersLock() { return userClustersLock; } /** * Acquire this read/write lock before using {@link #getItemClusters()}. */ public ReadWriteLock getItemClustersLock() { return itemClustersLock; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy