scripts.algorithms.l2-svm-predict.dml Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
# This script can be used to compute label predictions
# Meant for use with a model learnt using l2-svm.dml
#
# Given ground truth labels, the script will compute an
# accuracy (%) for the predictions
#
# INPUT PARAMETERS:
# ---------------------------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# ---------------------------------------------------------------------------------------------
# X String --- Location to read the matrix X of feature vectors
# model String --- Location of the existing model generated by l2-svm
# fmt String "text" The output format of the output, such as "text" or "csv"
# Y String --- [OPTIONAL] Location to read the true label matrix Y. Only needed
# for evaluating performance (accuracy, confusion) of the model.
# confusion String --- [OPTIONAL] Location to write confusion matrix, valid if Y supplied
# accuracy String --- [OPTIONAL] Location to write accuracy matrix, valid if Y supplied
# scores String --- [OPTIONAL] Location to write model predictions
# ---------------------------------------------------------------------------------------------
#
# Example Usage:
# hadoop jar SystemML.jar -f l2-svm-predict.dml -nvargs X=data Y=labels model=model scores=scores accuracy=accuracy confusion=confusion fmt="text"
#
# Note about inputs:
# labels (entries in Y) should either be set to +1/-1
# or be the result of recoding
# anything else may prompt an error message from this script
cmdLine_Y = ifdef($Y, " ")
cmdLine_confusion = ifdef($confusion, " ")
cmdLine_accuracy = ifdef($accuracy, " ")
cmdLine_scores = ifdef($scores, " ")
cmdLine_scoring_only = ifdef($scoring_only, FALSE)
cmdLine_fmt = ifdef($fmt, "text")
X = read($X)
w = read($model)
dimensions = as.scalar(w[nrow(w),1])
if(dimensions != ncol(X))
stop("Stopping due to invalid input: Model dimensions do not seem to match input data dimensions")
intercept = as.scalar(w[nrow(w)-1,1])
negative_label = as.scalar(w[nrow(w)-2,1])
positive_label = as.scalar(w[nrow(w)-3,1])
w = w[1:(nrow(w)-4),]
b = 0.0
if(intercept == 1)
b = as.scalar(w[nrow(w),1])
scores = b + (X %*% w[1:ncol(X),])
if(cmdLine_scores != " ")
write(scores, cmdLine_scores, format=cmdLine_fmt)
if(!cmdLine_scoring_only){
Y = read(cmdLine_Y)
pred = (scores >= 0)
pred_labels = pred*positive_label + (1-pred)*negative_label
num_correct = sum(pred_labels == Y)
acc = 100*num_correct/nrow(X)
acc_str = "Accuracy (%): " + acc
print(acc_str)
if(cmdLine_accuracy != " ")
write(acc_str, cmdLine_accuracy)
if(cmdLine_confusion != " "){
pred = 2*pred - 1
if(negative_label != -1 | positive_label != +1)
Y = 2/(positive_label - negative_label)*Y - (negative_label + positive_label)/(positive_label - negative_label)
pred_is_minus = (pred == -1)
pred_is_plus = 1 - pred_is_minus
y_is_minus = (Y == -1)
y_is_plus = 1 - y_is_minus
check_min_y_minus = sum(pred_is_minus*y_is_minus)
check_min_y_plus = sum(pred_is_minus*y_is_plus)
check_max_y_minus = sum(pred_is_plus*y_is_minus)
check_max_y_plus = sum(pred_is_plus*y_is_plus)
confusion_mat = matrix(0, rows=2, cols=2)
confusion_mat[1,1] = check_min_y_minus
confusion_mat[1,2] = check_min_y_plus
confusion_mat[2,1] = check_max_y_minus
confusion_mat[2,2] = check_max_y_plus
write(confusion_mat, cmdLine_confusion, format="csv")
}
}