All Downloads are FREE. Search and download functionalities are using the official Maven repository.

scripts.algorithms.ALS_predict.dml Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
# 
#   http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

#  
# THIS SCRIPT COMPUTES THE RATING/SCORE FOR A GIVEN LIST OF PAIRS: (USER-ID, ITEM-ID) USING 2 FACTOR MATRICES L AND R
# WE ASSUME THAT ALL USERS HAVE RATED AT LEAST ONCE AND ALL ITEMS HAVE BEEN RATED AT LEAST ONCE.
# INPUT   PARAMETERS:
# ---------------------------------------------------------------------------------------------
# NAME    TYPE     DEFAULT  MEANING
# ---------------------------------------------------------------------------------------------
# X       String   ---      The input user-id/item-id list
# Y	  	  String   ---	    The output user-id/item-id/score
# L       String   ---      Location of the factor matrix L: user-id x feature-id 
# R       String   ---      Location to the factor matrix R: feature-id x item-id
# Vrows   Integer  ---      The number of rows in the original matrix
# Vcols   Integer  ---      The number of columns in the original matrix
# fmt     String   "text"   The output format of the factor matrix user-id/item-id/score
# ---------------------------------------------------------------------------------------------
# OUTPUT: Matrix Y containing the predicted ratings for users and items specified in input matrix X
#
# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
# hadoop jar SystemML.jar -f ALS-predict.dml -nvargs Vrows=100000 Vcols=10000 X=INPUT_DIR/X L=OUTPUT_DIR/L R=OUTPUT_DIR/R 
#											  Y=OUTPUT_DIR/Y fmt=csv

fileX      = $X;
fileY 	   = $Y;
fileL	   = $L;
fileR      = $R;
Vrows	   = $Vrows;
Vcols	   = $Vcols;	
fmtO       = ifdef ($fmt, "text");    # $fmt="text";

X = read (fileX);
L = read (fileL);
R = read (fileR);

# we assume that the number of columns in X is equal to 2: user-id and item-id
n = nrow (X);
m = ncol (X);

if (m != 2){
	stop("The input matrix must have 2 columns: user-id and item-id");
}

Lrows = nrow (L);
Rcols = ncol (R);

X_user_max = max (X[,1]);
X_item_max = max (X[,2]);

# initializing Y matrix
Y = matrix(0, rows = n, cols = 3);

if (X_user_max > Vrows | X_item_max >  Vcols ) {
	stop ("Predictions cannot be provided. Maximum user-id (item-id) exceed the number of rows (columns) of V.");
}
if (Lrows != Vrows | Rcols !=  Vcols) {
	stop ("Predictions cannot be provided. Number of rows of L (columns of R) does not match the number of rows (column) of V.");
}


# user2item table
ones = matrix (1, rows = n, cols = 1);
UI = table (X[,1], X[,2], ones, Vrows, Vcols);

# summing up over all items for all users
U = rowSums (UI)

# replacing all rows > 1 with 1
U = U >= 1;

# selecting users from factor L
U_prime = L * U;

V_prime = (U_prime %*% R);

# Applying items filter
V_prime = UI * V_prime;

write(V_prime, fileY, format = fmtO);




© 2015 - 2024 Weber Informatics LLC | Privacy Policy