All Downloads are FREE. Search and download functionalities are using the official Maven repository.

scripts.algorithms.CsplineDS.dml Maven / Gradle / Ivy

There is a newer version: 1.2.0
Show newest version
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
# 
#   http://www.apache.org/licenses/LICENSE-2.0
# 
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
#
# THIS SCRIPT SOLVES CUBIC SPLINE INTERPOLATION USING THE DIRECT SOLVER
#
# INPUT PARAMETERS:
# --------------------------------------------------------------------------------------------
# NAME  TYPE   DEFAULT  MEANING
# --------------------------------------------------------------------------------------------
# X     String  ---     Location (on HDFS) to read the 1-column matrix of x values knots
# Y     String  ---     Location (on HDFS) to read the 1-column matrix of corresponding y values knots
# K     String  ---     Location to store the $k_{i}$ -file for the calculated k vectors. the default is to print it to the standard output
# O     String  ---     Location to store the output predicted y the default is to print it to the standard output
# inp_x Double  ---     the given input x, for which the cspline will find predicted y.
#
# fmt   String "text"   Matrix file output format, such as `text`,`mm`, or `csv`
# --------------------------------------------------------------------------------------------
# OUTPUT: Matrix of k parameters (the betas)
#
# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
# hadoop jar SystemML.jar -f CsplineDS.dml -nvargs X=INPUT_DIR/X Y= INPUT_DIR/Y O=OUTPUT_DIR/Out
# -inp_x=4.5 fmt=csv




#Assumptions:
# - The inputs xs are monotonically increasing,
# - there is no duplicates points in x

#Algorithms: It implement the https://en.wikipedia.org/wiki/Spline_interpolation#Algorithm_to_find_the_interpolating_cubic_spline
#it usages natural spline with q1''(x0) == qn''(xn) == 0.0



##BEGIN Main Func
fileX = $X;
fileY = $Y;
fileK = $K;
fileO = ifdef($O, " ");
inp_x = $inp_x
fmtO = ifdef ($fmt, "text");

print ("BEGIN CUBIC SPLINE SCRIPT");

print ("Reading X and Y ...");
xs = read (fileX);
ys = read (fileY);

print("Calculating Ks ...")
ks = xs
ks = calcKnotsDerivKs(xs, ys)
print("Writing Ks ...")
write (ks, fileK, format=fmtO);

print("Interpolating ...")
x = inp_x
y = interpSpline(x, xs, ys, ks)

print("For inp_x = " + $inp_x + " Calculated y = " + y)

y_mat = matrix(y, 1, 1)

if (fileO != " ") {
   write (y_mat, fileO);
 } else {
   print(y)
}


print ("END CUBIC SPLINE REGRESSION SCRIPT");

##END Main Func



#given X and corresponding Y values for the function. where each (xi,yi) represents a knot.
#it calculates the first derivates i.e k of the interpolated polynomial
calcKnotsDerivKs = function (
  matrix[double] X, matrix[double] Y
) return (matrix[double] K) {
  nx = nrow(X)
  ny = nrow(Y)
  if (nx != ny) {
      stop("X and Y vectors are of different size")
  }

  Xu = trunc(X, 1, "up") # Xu is (X where X[0] is removed)
  Xd = trunc(X, 1, "down") # Xd is (X where X[n] is removed)

  Bx=1/(Xu-Xd) # The expr => 1/Delta(X(i) = 1/(X(i)-X(i-1))


  Bxd = resize(Bx, 1, 0, "tr") # Bxd is (0, Bx) vector
  Bxu = resize(Bx, 1, 0, "br") # Bxu is (Bx, 0) vector
  Dx = 2*(Bxd + Bxu) # the major diagonal entry 2(1/Delta(X(i) + 1/Delta(X(i+1)))

  MDx = diag(Dx) # convert vector to diagonal matrix

  MBx = diag(Bx) # this is the temp diagonal matrix, which will form the bands of the tri-diagonal matrix
  MUx = resize(MBx, 1, 1, "bl") # the upper diagonal matrix of the band
  MLx = resize(MBx, 1, 1, "tr") # the lower diagonal matrix of the band

  A=MUx+MDx+MLx # create the complete tri-diagonal matrix

  #calculate b matrix
  Yu = trunc(Y, 1, "up") # Yu is (Y where Y[0] is removed)
  Yd = trunc(Y, 1, "down") # Yd is (Y where Y[n] is removed)
  By=(Yu-Yd)/(Bx*Bx) # the expr => Delta(Y(i))/Delta(X(i))*Delta(X(i))

  By1=resize(By, 1, 0, "tr") # By1 is (0, By) vector
  By2=resize(By, 1, 0, "br") # By2 is (By, 0) vector
  b=3*(By1+By2) # the b entries 3*(Delta(Y(i))/Delta(X(i))*Delta(X(i)) + Delta(Y(i+1))/Delta(X(i+1))*Delta(X(i+1)))


  K = solve(A, b)  /* solve Ax = b for x vector and assign it to K*/

}

#given the X and Y n sample points and K (the first derivative of the interp polynomial), it calculate the
#  y for the given x using the cubic spline interpolation
interpSpline = function(
  double x, matrix[double] X, matrix[double] Y, matrix[double] K
) return (double q) {

  #first find the right knots for interpolation
  i = as.integer(nrow(X) - sum(X >= x) + 1)

  #calc the y as per the algo docs
  t = (x - X[i-1,1]) / ( X[i,1] - X[i-1,1])

  a =  K[i-1,1]*(X[i,1]-X[i-1,1]) - (Y[i,1]-Y[i-1,1])
  b = -K[i,1]*(X[i,1]-X[i-1,1]) + (Y[i,1]-Y[i-1,1])

  qm = (1-t)*Y[i-1,1] + t*Y[i,1] + t*(1-t)*(a*(1-t)+b*t)

  q = as.scalar(qm)

}

#
# trunc the matrix by the specified amount in the specified direction.
# The shifted cells are discarded, the matrix is smaller in size
#
trunc = function (
  matrix[double] X, # nxm matrix
  int by, # shift amount
  String dir # currently only 'up' is supported. but should handle 'up', 'down', 'left', 'right'
) return (matrix[double] Y) # Y is
{
    r = nrow(X); c = ncol(X);
    if (by > r) {
      stop("can't pop matrix more than number of rows")
    }
    Y = matrix(0.0, r-by, c)

    if (r != by ) {
      if (dir == "up") {
        Y[1:r-by,] = X[1+by:r,]
      } else if (dir == "down") {
        Y[1:r-by,] = X[1:r-by,]
      } else {
        stop("trunc unsupported direction " + dir)
      }
    }
}

# resize (only grow and not truncate) the matrix by the specified amount in the specified direction
resize = function(
  matrix[double] X, #nxm matrix
  int rby, # row resize count
  int cby,  # col resize count
  String dir
) return (matrix[double] Y) # Y is
{
   r = nrow(X); c = ncol(X);
   rn = r + rby; cn = c + cby;
   Y = matrix(0.0, rn, cn)
   if (dir == "tr") { # top right
     Y[1+rby:rn, 1:c] = X
   } else if (dir == "bl") { # bottom left
     Y[1:r, 1+cby:cn] = X
   } else if (dir == "tl") { # top left
     Y[1+rby:rn, 1+cby:cn ] = X
   } else if (dir == "br") { # bottom right
     Y[1:r, 1:c] = X
   } else {
     stop("Unknown direction dir => " + dir)
   }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy