scripts.algorithms.CsplineDS.dml Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of systemml Show documentation
Show all versions of systemml Show documentation
Declarative Machine Learning
#-------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
#-------------------------------------------------------------
#
# THIS SCRIPT SOLVES CUBIC SPLINE INTERPOLATION USING THE DIRECT SOLVER
#
# INPUT PARAMETERS:
# --------------------------------------------------------------------------------------------
# NAME TYPE DEFAULT MEANING
# --------------------------------------------------------------------------------------------
# X String --- Location (on HDFS) to read the 1-column matrix of x values knots
# Y String --- Location (on HDFS) to read the 1-column matrix of corresponding y values knots
# K String --- Location to store the $k_{i}$ -file for the calculated k vectors. the default is to print it to the standard output
# O String --- Location to store the output predicted y the default is to print it to the standard output
# inp_x Double --- the given input x, for which the cspline will find predicted y.
#
# fmt String "text" Matrix file output format, such as `text`,`mm`, or `csv`
# --------------------------------------------------------------------------------------------
# OUTPUT: Matrix of k parameters (the betas)
#
# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
# hadoop jar SystemML.jar -f CsplineDS.dml -nvargs X=INPUT_DIR/X Y= INPUT_DIR/Y O=OUTPUT_DIR/Out
# -inp_x=4.5 fmt=csv
#Assumptions:
# - The inputs xs are monotonically increasing,
# - there is no duplicates points in x
#Algorithms: It implement the https://en.wikipedia.org/wiki/Spline_interpolation#Algorithm_to_find_the_interpolating_cubic_spline
#it usages natural spline with q1''(x0) == qn''(xn) == 0.0
##BEGIN Main Func
fileX = $X;
fileY = $Y;
fileK = $K;
fileO = ifdef($O, " ");
inp_x = $inp_x
fmtO = ifdef ($fmt, "text");
print ("BEGIN CUBIC SPLINE SCRIPT");
print ("Reading X and Y ...");
xs = read (fileX);
ys = read (fileY);
print("Calculating Ks ...")
ks = xs
ks = calcKnotsDerivKs(xs, ys)
print("Writing Ks ...")
write (ks, fileK, format=fmtO);
print("Interpolating ...")
x = inp_x
y = interpSpline(x, xs, ys, ks)
print("For inp_x = " + $inp_x + " Calculated y = " + y)
y_mat = matrix(y, 1, 1)
if (fileO != " ") {
write (y_mat, fileO);
} else {
print(y)
}
print ("END CUBIC SPLINE REGRESSION SCRIPT");
##END Main Func
#given X and corresponding Y values for the function. where each (xi,yi) represents a knot.
#it calculates the first derivates i.e k of the interpolated polynomial
calcKnotsDerivKs = function (
matrix[double] X, matrix[double] Y
) return (matrix[double] K) {
nx = nrow(X)
ny = nrow(Y)
if (nx != ny) {
stop("X and Y vectors are of different size")
}
Xu = trunc(X, 1, "up") # Xu is (X where X[0] is removed)
Xd = trunc(X, 1, "down") # Xd is (X where X[n] is removed)
Bx=1/(Xu-Xd) # The expr => 1/Delta(X(i) = 1/(X(i)-X(i-1))
Bxd = resize(Bx, 1, 0, "tr") # Bxd is (0, Bx) vector
Bxu = resize(Bx, 1, 0, "br") # Bxu is (Bx, 0) vector
Dx = 2*(Bxd + Bxu) # the major diagonal entry 2(1/Delta(X(i) + 1/Delta(X(i+1)))
MDx = diag(Dx) # convert vector to diagonal matrix
MBx = diag(Bx) # this is the temp diagonal matrix, which will form the bands of the tri-diagonal matrix
MUx = resize(MBx, 1, 1, "bl") # the upper diagonal matrix of the band
MLx = resize(MBx, 1, 1, "tr") # the lower diagonal matrix of the band
A=MUx+MDx+MLx # create the complete tri-diagonal matrix
#calculate b matrix
Yu = trunc(Y, 1, "up") # Yu is (Y where Y[0] is removed)
Yd = trunc(Y, 1, "down") # Yd is (Y where Y[n] is removed)
By=(Yu-Yd)/(Bx*Bx) # the expr => Delta(Y(i))/Delta(X(i))*Delta(X(i))
By1=resize(By, 1, 0, "tr") # By1 is (0, By) vector
By2=resize(By, 1, 0, "br") # By2 is (By, 0) vector
b=3*(By1+By2) # the b entries 3*(Delta(Y(i))/Delta(X(i))*Delta(X(i)) + Delta(Y(i+1))/Delta(X(i+1))*Delta(X(i+1)))
K = solve(A, b) /* solve Ax = b for x vector and assign it to K*/
}
#given the X and Y n sample points and K (the first derivative of the interp polynomial), it calculate the
# y for the given x using the cubic spline interpolation
interpSpline = function(
double x, matrix[double] X, matrix[double] Y, matrix[double] K
) return (double q) {
#first find the right knots for interpolation
i = as.integer(nrow(X) - sum(X >= x) + 1)
#calc the y as per the algo docs
t = (x - X[i-1,1]) / ( X[i,1] - X[i-1,1])
a = K[i-1,1]*(X[i,1]-X[i-1,1]) - (Y[i,1]-Y[i-1,1])
b = -K[i,1]*(X[i,1]-X[i-1,1]) + (Y[i,1]-Y[i-1,1])
qm = (1-t)*Y[i-1,1] + t*Y[i,1] + t*(1-t)*(a*(1-t)+b*t)
q = as.scalar(qm)
}
#
# trunc the matrix by the specified amount in the specified direction.
# The shifted cells are discarded, the matrix is smaller in size
#
trunc = function (
matrix[double] X, # nxm matrix
int by, # shift amount
String dir # currently only 'up' is supported. but should handle 'up', 'down', 'left', 'right'
) return (matrix[double] Y) # Y is
{
r = nrow(X); c = ncol(X);
if (by > r) {
stop("can't pop matrix more than number of rows")
}
Y = matrix(0.0, r-by, c)
if (r != by ) {
if (dir == "up") {
Y[1:r-by,] = X[1+by:r,]
} else if (dir == "down") {
Y[1:r-by,] = X[1:r-by,]
} else {
stop("trunc unsupported direction " + dir)
}
}
}
# resize (only grow and not truncate) the matrix by the specified amount in the specified direction
resize = function(
matrix[double] X, #nxm matrix
int rby, # row resize count
int cby, # col resize count
String dir
) return (matrix[double] Y) # Y is
{
r = nrow(X); c = ncol(X);
rn = r + rby; cn = c + cby;
Y = matrix(0.0, rn, cn)
if (dir == "tr") { # top right
Y[1+rby:rn, 1:c] = X
} else if (dir == "bl") { # bottom left
Y[1:r, 1+cby:cn] = X
} else if (dir == "tl") { # top left
Y[1+rby:rn, 1+cby:cn ] = X
} else if (dir == "br") { # bottom right
Y[1:r, 1:c] = X
} else {
stop("Unknown direction dir => " + dir)
}
}