scripts.algorithms.l2-svm-predict.dml Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Declarative Machine Learning
There is a newer version: 1.2.0
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
# 
#   http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------

# This script can be used to compute label predictions
# Meant for use with a model learnt using l2-svm.dml
#
# Given ground truth labels, the script will compute an 
# accuracy (%) for the predictions
#
# INPUT PARAMETERS:
# ---------------------------------------------------------------------------------------------
# NAME      TYPE        DEFAULT     MEANING
# ---------------------------------------------------------------------------------------------
# X         String      ---         Location to read the matrix X of feature vectors
# model     String      ---         Location of the existing model generated by l2-svm
# fmt       String      "text"      The output format of the output, such as "text" or "csv"
# Y         String      ---         [OPTIONAL] Location to read the true label matrix Y. Only needed
#                                   for evaluating performance (accuracy, confusion) of the model.
# confusion String      ---         [OPTIONAL] Location to write confusion matrix, valid if Y supplied
# accuracy  String      ---         [OPTIONAL] Location to write accuracy matrix, valid if Y supplied
# scores    String      ---         [OPTIONAL] Location to write model predictions
# ---------------------------------------------------------------------------------------------
#
# Example Usage:
# hadoop jar SystemML.jar -f l2-svm-predict.dml -nvargs X=data Y=labels model=model scores=scores accuracy=accuracy confusion=confusion fmt="text"
#
# Note about inputs: 
# labels (entries in Y) should either be set to +1/-1
# or be the result of recoding
# anything else may prompt an error message from this script

cmdLine_Y = ifdef($Y, " ")
cmdLine_confusion = ifdef($confusion, " ")
cmdLine_accuracy = ifdef($accuracy, " ")
cmdLine_scores = ifdef($scores, " ")
cmdLine_scoring_only = ifdef($scoring_only, FALSE)
cmdLine_fmt = ifdef($fmt, "text")

X = read($X)

w = read($model)

dimensions = as.scalar(w[nrow(w),1])
if(dimensions != ncol(X))
  stop("Stopping due to invalid input: Model dimensions do not seem to match input data dimensions")
	
intercept = as.scalar(w[nrow(w)-1,1])
negative_label = as.scalar(w[nrow(w)-2,1])
positive_label = as.scalar(w[nrow(w)-3,1])
w = w[1:(nrow(w)-4),]

b = 0.0
if(intercept == 1)
  b = as.scalar(w[nrow(w),1])

scores = b + (X %*% w[1:ncol(X),])

if(cmdLine_scores != " ")
  write(scores, cmdLine_scores, format=cmdLine_fmt)

if(!cmdLine_scoring_only){
  Y = read(cmdLine_Y)

  pred = (scores >= 0)
  pred_labels = pred*positive_label + (1-pred)*negative_label
  num_correct = sum(pred_labels == Y)
  acc = 100*num_correct/nrow(X)

  acc_str = "Accuracy (%): " + acc
  print(acc_str)
  
  if(cmdLine_accuracy != " ")
    write(acc_str, cmdLine_accuracy)

  if(cmdLine_confusion != " "){
  
    pred = 2*pred - 1
    
    if(negative_label != -1 | positive_label != +1)
      Y = 2/(positive_label - negative_label)*Y - (negative_label + positive_label)/(positive_label - negative_label)
    		
    pred_is_minus = (pred == -1)
    pred_is_plus = 1 - pred_is_minus
    y_is_minus = (Y == -1)
    y_is_plus = 1 - y_is_minus

    check_min_y_minus = sum(pred_is_minus*y_is_minus)
    check_min_y_plus = sum(pred_is_minus*y_is_plus)
    check_max_y_minus = sum(pred_is_plus*y_is_minus)
    check_max_y_plus = sum(pred_is_plus*y_is_plus)
		
    confusion_mat = matrix(0, rows=2, cols=2)
    confusion_mat[1,1] = check_min_y_minus
    confusion_mat[1,2] = check_min_y_plus
    confusion_mat[2,1] = check_max_y_minus
    confusion_mat[2,2] = check_max_y_plus
	
    write(confusion_mat, cmdLine_confusion, format="csv")
  }
}