
JCublasJNI.src.JCublas.cpp Maven / Gradle / Ivy
/*
* JCublas - Java bindings for CUBLAS, the NVIDIA CUDA BLAS library,
* to be used with JCuda
*
* Copyright (c) 2008-2015 Marco Hutter - http://www.jcuda.org
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "JCublas.hpp"
#include "JCublas_common.hpp"
#include
#include
#include
* void
* cublasDrotm (int n, double *x, int incx, double *y, int incy,
* const double* sparam)
*
* applies the modified Givens transformation, h, to the 2 x n matrix
*
* ( transpose(x) )
* ( transpose(y) )
*
* The elements of x are in x[lx + i * incx], i = 0 to n-1, where lx = 1 if
* incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and
* incy. With sparam[0] = sflag, h has one of the following forms:
*
* sflag = -1.0 sflag = 0.0 sflag = 1.0 sflag = -2.0
*
* (sh00 sh01) (1.0 sh01) (sh00 1.0) (1.0 0.0)
* h = ( ) ( ) ( ) ( )
* (sh10 sh11) (sh10 1.0) (-1.0 sh11) (0.0 1.0)
*
* Input
* -----
* n number of elements in input vectors
* x double-precision vector with n elements
* incx storage spacing between elements of x
* y double-precision vector with n elements
* incy storage spacing between elements of y
* sparam 5-element vector. sparam[0] is sflag described above. sparam[1]
* through sparam[4] contain the 2x2 rotation matrix h: sparam[1]
* contains sh00, sparam[2] contains sh10, sparam[3] contains sh01,
* and sprams[4] contains sh11.
*
* Output
* ------
* x rotated vector x (unchanged if n <= 0)
* y rotated vector y (unchanged if n <= 0)
*
* Reference: http://www.netlib.org/blas/drotm.f
*
* Error status for this function can be retrieved via cublasGetError().
*
* Error Status
* ------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
* CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support
* CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU
*
*/
JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDrotmNative
(JNIEnv *env, jclass cla, jint n, jobject x, jint incx, jobject y, jint incy, jdoubleArray sparam)
{
if (x == NULL)
{
ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDrotm");
return;
}
if (y == NULL)
{
ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDrotm");
return;
}
if (sparam == NULL)
{
ThrowByName(env, "java/lang/NullPointerException", "Parameter 'sparam' is null for cublasDrotm");
return;
}
void *deviceMemoryX = NULL;
void *deviceMemoryY = NULL;
double *sparamArrayElements = NULL;
deviceMemoryX = getPointer(env, x);
deviceMemoryY = getPointer(env, y);
if (!getDoubleArrayElements(env, sparam, sparamArrayElements, 5)) return;
Logger::log(LOG_TRACE, "Executing cublasSrotm(%d, '%s', %d, '%s', %d, [%lf, %lf, %lf, %lf, %lf])\n",
n, "x", incx, "y", incy, sparamArrayElements[0],
sparamArrayElements[1], sparamArrayElements[2],
sparamArrayElements[3], sparamArrayElements[4]);
cublasDrotm(n, ((double*)deviceMemoryX), incx, ((double*)deviceMemoryY), incy, sparamArrayElements);
env->ReleaseDoubleArrayElements(sparam, sparamArrayElements, JNI_ABORT);
}
/**
*
* void
* cublasDrotmg (double *psd1, double *psd2, double *psx1, const double *psy1,
* double *sparam)
*
* constructs the modified Givens transformation matrix h which zeros
* the second component of the 2-vector transpose(sqrt(sd1)*sx1,sqrt(sd2)*sy1).
* With sparam[0] = sflag, h has one of the following forms:
*
* sflag = -1.0 sflag = 0.0 sflag = 1.0 sflag = -2.0
*
* (sh00 sh01) (1.0 sh01) (sh00 1.0) (1.0 0.0)
* h = ( ) ( ) ( ) ( )
* (sh10 sh11) (sh10 1.0) (-1.0 sh11) (0.0 1.0)
*
* sparam[1] through sparam[4] contain sh00, sh10, sh01, sh11,
* respectively. Values of 1.0, -1.0, or 0.0 implied by the value
* of sflag are not stored in sparam.
*
* Input
* -----
* sd1 single precision scalar
* sd2 single precision scalar
* sx1 single precision scalar
* sy1 single precision scalar
*
* Output
* ------
* sd1 changed to represent the effect of the transformation
* sd2 changed to represent the effect of the transformation
* sx1 changed to represent the effect of the transformation
* sparam 5-element vector. sparam[0] is sflag described above. sparam[1]
* through sparam[4] contain the 2x2 rotation matrix h: sparam[1]
* contains sh00, sparam[2] contains sh10, sparam[3] contains sh01,
* and sprams[4] contains sh11.
*
* Reference: http://www.netlib.org/blas/drotmg.f
*
* This functions does not set any error status.
*
*/
JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDrotmgNative
(JNIEnv *env, jclass cla, jdoubleArray sd1, jdoubleArray sd2, jdoubleArray sx1, jdouble sy1, jdoubleArray sparam)
{
if (sd1 == NULL)
{
ThrowByName(env, "java/lang/NullPointerException", "Parameter 'sd1' is null for cublasDrotmg");
return;
}
if (sd2 == NULL)
{
ThrowByName(env, "java/lang/NullPointerException", "Parameter 'sd2' is null for cublasDrotmg");
return;
}
if (sx1 == NULL)
{
ThrowByName(env, "java/lang/NullPointerException", "Parameter 'sx1' is null for cublasDrotmg");
return;
}
if (sparam == NULL)
{
ThrowByName(env, "java/lang/NullPointerException", "Parameter 'sparam' is null for cublasDrotmg");
return;
}
double *sd1ArrayElements = NULL;
double *sd2ArrayElements = NULL;
double *sx1ArrayElements = NULL;
double *sparamArrayElements = NULL;
if (!getDoubleArrayElements(env, sd1, sd1ArrayElements, 1)) return;
if (!getDoubleArrayElements(env, sd2, sd2ArrayElements, 1)) return;
if (!getDoubleArrayElements(env, sx1, sx1ArrayElements, 1)) return;
if (!getDoubleArrayElements(env, sparam, sparamArrayElements, 5)) return;
Logger::log(LOG_TRACE, "Executing cublasSrotmg(%lf, %lf, %lf, %lf, [%lf, %lf, %lf, %lf, %lf])\n",
sd1ArrayElements[0], sd2ArrayElements[0], sx1ArrayElements[0], sy1,
sparamArrayElements[0], sparamArrayElements[1], sparamArrayElements[2],
sparamArrayElements[3], sparamArrayElements[4]);
cublasDrotmg(sd1ArrayElements, sd2ArrayElements,
sx1ArrayElements, &sy1, sparamArrayElements);
env->ReleaseDoubleArrayElements(sparam, sparamArrayElements, 0);
env->ReleaseDoubleArrayElements(sd1, sd1ArrayElements, 0);
env->ReleaseDoubleArrayElements(sd2, sd2ArrayElements, 0);
env->ReleaseDoubleArrayElements(sx1, sx1ArrayElements, 0);
}
//============================================================================
// Auto-generated part:
/**
* * int * cublasIsamax (int n, const float *x, int incx) * * finds the smallest index of the maximum magnitude element of single * precision vector x; that is, the result is the first i, i = 0 to n - 1, * that maximizes abs(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x single precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/isamax.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jint JNICALL Java_jcuda_jcublas_JCublas_cublasIsamaxNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasIsamax"); return 0; } float* nativeX; nativeX = (float*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasIsamax(%d, '%s', %d)\n", n, "x", incx); return cublasIsamax(n, nativeX, incx); } /** *
* int * cublasIsamin (int n, const float *x, int incx) * * finds the smallest index of the minimum magnitude element of single * precision vector x; that is, the result is the first i, i = 0 to n - 1, * that minimizes abs(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x single precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/scilib/blass.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jint JNICALL Java_jcuda_jcublas_JCublas_cublasIsaminNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasIsamin"); return 0; } float* nativeX; nativeX = (float*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasIsamin(%d, '%s', %d)\n", n, "x", incx); return cublasIsamin(n, nativeX, incx); } /** *
* float * cublasSasum (int n, const float *x, int incx) * * computes the sum of the absolute values of the elements of single * precision vector x; that is, the result is the sum from i = 0 to n - 1 of * abs(x[1 + i * incx]). * * Input * ----- * n number of elements in input vector * x single precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the single precision sum of absolute values * (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/sasum.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jfloat JNICALL Java_jcuda_jcublas_JCublas_cublasSasumNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSasum"); return 0.0; } float* nativeX; nativeX = (float*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasSasum(%d, '%s', %d)\n", n, "x", incx); return cublasSasum(n, nativeX, incx); } /** *
* void * cublasSaxpy (int n, float alpha, const float *x, int incx, float *y, * int incy) * * multiplies single precision vector x by single precision scalar alpha * and adds the result to single precision vector y; that is, it overwrites * single precision y with single precision alpha * x + y. For i = 0 to n - 1, * it replaces y[ly + i * incy] with alpha * x[lx + i * incx] + y[ly + i * * incy], where lx = 1 if incx >= 0, else lx = 1 +(1 - n) * incx, and ly is * defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * alpha single precision scalar multiplier * x single precision vector with n elements * incx storage spacing between elements of x * y single precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * y single precision result (unchanged if n <= 0) * * Reference: http://www.netlib.org/blas/saxpy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSaxpyNative (JNIEnv *env, jclass cls, jint n, jfloat alpha, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSaxpy"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasSaxpy"); return; } float* nativeX; float* nativeY; nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasSaxpy(%d, %f, '%s', %d, '%s', %d)\n", n, alpha, "x", incx, "y", incy); cublasSaxpy(n, alpha, nativeX, incx, nativeY, incy); } /** *
* void * cublasScopy (int n, const float *x, int incx, float *y, int incy) * * copies the single precision vector x to the single precision vector y. For * i = 0 to n-1, copies x[lx + i * incx] to y[ly + i * incy], where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a similar * way using incy. * * Input * ----- * n number of elements in input vectors * x single precision vector with n elements * incx storage spacing between elements of x * y single precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * y contains single precision vector x * * Reference: http://www.netlib.org/blas/scopy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasScopyNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasScopy"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasScopy"); return; } float* nativeX; float* nativeY; nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasScopy(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); cublasScopy(n, nativeX, incx, nativeY, incy); } /** *
* float * cublasSdot (int n, const float *x, int incx, const float *y, int incy) * * computes the dot product of two single precision vectors. It returns the * dot product of the single precision vectors x and y if successful, and * 0.0f otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * * incx] * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * *incx, and ly is defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * x single precision vector with n elements * incx storage spacing between elements of x * y single precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * returns single precision dot product (zero if n <= 0) * * Reference: http://www.netlib.org/blas/sdot.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU **/ JNIEXPORT jfloat JNICALL Java_jcuda_jcublas_JCublas_cublasSdotNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSdot"); return 0.0; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasSdot"); return 0.0; } float* nativeX; float* nativeY; nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasSdot(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); return cublasSdot(n, nativeX, incx, nativeY, incy); } /** *
* float * cublasSnrm2 (int n, const float *x, int incx) * * computes the Euclidean norm of the single precision n-vector x (with * storage increment incx). This code uses a multiphase model of * accumulation to avoid intermediate underflow and overflow. * * Input * ----- * n number of elements in input vector * x single precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns Euclidian norm (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/snrm2.f * Reference: http://www.netlib.org/slatec/lin/snrm2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jfloat JNICALL Java_jcuda_jcublas_JCublas_cublasSnrm2Native (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSnrm2"); return 0.0; } float* nativeX; nativeX = (float*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasSnrm2(%d, '%s', %d)\n", n, "x", incx); return cublasSnrm2(n, nativeX, incx); } /** *
* void * cublasSrot (int n, float *x, int incx, float *y, int incy, float sc, * float ss) * * multiplies a 2x2 matrix ( sc ss) with the 2xn matrix ( transpose(x) ) * (-ss sc) ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. * * Input * ----- * n number of elements in input vectors * x single precision vector with n elements * incx storage spacing between elements of x * y single precision vector with n elements * incy storage spacing between elements of y * sc element of rotation matrix * ss element of rotation matrix * * Output * ------ * x rotated vector x (unchanged if n <= 0) * y rotated vector y (unchanged if n <= 0) * * Reference http://www.netlib.org/blas/srot.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSrotNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy, jfloat sc, jfloat ss) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSrot"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasSrot"); return; } float* nativeX; float* nativeY; nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasSrot(%d, '%s', %d, '%s', %d, %f, %f)\n", n, "x", incx, "y", incy, sc, ss); cublasSrot(n, nativeX, incx, nativeY, incy, sc, ss); } /** *
* void * cublasSrotg (float *host_sa, float *host_sb, float *host_sc, float *host_ss) * * constructs the Givens tranformation * * ( sc ss ) * G = ( ) , sc^2 + ss^2 = 1, * (-ss sc ) * * which zeros the second entry of the 2-vector transpose(sa, sb). * * The quantity r = (+/-) sqrt (sa^2 + sb^2) overwrites sa in storage. The * value of sb is overwritten by a value z which allows sc and ss to be * recovered by the following algorithm: * * if z=1 set sc = 0.0 and ss = 1.0 * if abs(z) < 1 set sc = sqrt(1-z^2) and ss = z * if abs(z) > 1 set sc = 1/z and ss = sqrt(1-sc^2) * * The function srot (n, x, incx, y, incy, sc, ss) normally is called next * to apply the transformation to a 2 x n matrix. * Note that is function is provided for completeness and run exclusively * on the Host. * * Input * ----- * sa single precision scalar * sb single precision scalar * * Output * ------ * sa single precision r * sb single precision z * sc single precision result * ss single precision result * * Reference: http://www.netlib.org/blas/srotg.f * * This function does not set any error status. **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSrotgNative (JNIEnv *env, jclass cls, jobject host_sa, jobject host_sb, jobject host_sc, jobject host_ss) { if (host_sa == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_sa' is null for cublasSrotg"); return; } if (host_sb == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_sb' is null for cublasSrotg"); return; } if (host_sc == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_sc' is null for cublasSrotg"); return; } if (host_ss == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_ss' is null for cublasSrotg"); return; } float* nativeHOST_SA; float* nativeHOST_SB; float* nativeHOST_SC; float* nativeHOST_SS; nativeHOST_SA = (float*)getPointer(env, host_sa); nativeHOST_SB = (float*)getPointer(env, host_sb); nativeHOST_SC = (float*)getPointer(env, host_sc); nativeHOST_SS = (float*)getPointer(env, host_ss); Logger::log(LOG_TRACE, "Executing cublasSrotg('%s', '%s', '%s', '%s')\n", "host_sa", "host_sb", "host_sc", "host_ss"); cublasSrotg(nativeHOST_SA, nativeHOST_SB, nativeHOST_SC, nativeHOST_SS); } /** *
* void * sscal (int n, float alpha, float *x, int incx) * * replaces single precision vector x with single precision alpha * x. For i * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx], * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vectors * alpha single precision scalar multiplier * x single precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * x single precision result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/sscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSscalNative (JNIEnv *env, jclass cls, jint n, jfloat alpha, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSscal"); return; } float* nativeX; nativeX = (float*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasSscal(%d, %f, '%s', %d)\n", n, alpha, "x", incx); cublasSscal(n, alpha, nativeX, incx); } /** *
* void * cublasSswap (int n, float *x, int incx, float *y, int incy) * * replaces single precision vector x with single precision alpha * x. For i * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx], * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vectors * alpha single precision scalar multiplier * x single precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * x single precision result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/sscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSswapNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSswap"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasSswap"); return; } float* nativeX; float* nativeY; nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasSswap(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); cublasSswap(n, nativeX, incx, nativeY, incy); } /** *
* void * cublasCaxpy (int n, cuComplex alpha, const cuComplex *x, int incx, * cuComplex *y, int incy) * * multiplies single-complex vector x by single-complex scalar alpha and adds * the result to single-complex vector y; that is, it overwrites single-complex * y with single-complex alpha * x + y. For i = 0 to n - 1, it replaces * y[ly + i * incy] with alpha * x[lx + i * incx] + y[ly + i * incy], where * lx = 0 if incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a * similar way using incy. * * Input * ----- * n number of elements in input vectors * alpha single-complex scalar multiplier * x single-complex vector with n elements * incx storage spacing between elements of x * y single-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * y single-complex result (unchanged if n <= 0) * * Reference: http://www.netlib.org/blas/caxpy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCaxpyNative (JNIEnv *env, jclass cls, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCaxpy"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasCaxpy"); return; } cuComplex* nativeX; cuComplex* nativeY; cuComplex complexAlpha; nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCaxpy(%d, [%f,%f], '%s', %d, '%s', %d)\n", n, complexAlpha.x, complexAlpha.y, "x", incx, "y", incy); cublasCaxpy(n, complexAlpha, nativeX, incx, nativeY, incy); } /** *
* void * cublasCcopy (int n, const cuComplex *x, int incx, cuComplex *y, int incy) * * copies the single-complex vector x to the single-complex vector y. For * i = 0 to n-1, copies x[lx + i * incx] to y[ly + i * incy], where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a similar * way using incy. * * Input * ----- * n number of elements in input vectors * x single-complex vector with n elements * incx storage spacing between elements of x * y single-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * y contains single complex vector x * * Reference: http://www.netlib.org/blas/ccopy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCcopyNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCcopy"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasCcopy"); return; } cuComplex* nativeX; cuComplex* nativeY; nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasCcopy(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); cublasCcopy(n, nativeX, incx, nativeY, incy); } /** *
* void * cublasZcopy (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) * * copies the double-complex vector x to the double-complex vector y. For * i = 0 to n-1, copies x[lx + i * incx] to y[ly + i * incy], where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a similar * way using incy. * * Input * ----- * n number of elements in input vectors * x double-complex vector with n elements * incx storage spacing between elements of x * y double-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * y contains double complex vector x * * Reference: http://www.netlib.org/blas/zcopy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZcopyNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZcopy"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZcopy"); return; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasZcopy(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); cublasZcopy(n, nativeX, incx, nativeY, incy); } /** *
* void * cublasCscal (int n, cuComplex alpha, cuComplex *x, int incx) * * replaces single-complex vector x with single-complex alpha * x. For i * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx], * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vectors * alpha single-complex scalar multiplier * x single-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * x single-complex result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/cscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCscalNative (JNIEnv *env, jclass cls, jint n, jobject alpha, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCscal"); return; } cuComplex* nativeX; cuComplex complexAlpha; nativeX = (cuComplex*)getPointer(env, x); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCscal(%d, [%f,%f], '%s', %d)\n", n, complexAlpha.x, complexAlpha.y, "x", incx); cublasCscal(n, complexAlpha, nativeX, incx); } /** *
* void * cublasCrotg (cuComplex *host_ca, cuComplex cb, float *host_sc, cuComplex *host_cs) * * constructs the complex Givens tranformation * * ( sc cs ) * G = ( ) , sc^2 + cabs(cs)^2 = 1, * (-cs sc ) * * which zeros the second entry of the complex 2-vector transpose(ca, cb). * * The quantity ca/cabs(ca)*norm(ca,cb) overwrites ca in storage. The * function crot (n, x, incx, y, incy, sc, cs) is normally called next * to apply the transformation to a 2 x n matrix. * Note that is function is provided for completeness and run exclusively * on the Host. * * Input * ----- * ca single-precision complex precision scalar * cb single-precision complex scalar * * Output * ------ * ca single-precision complex ca/cabs(ca)*norm(ca,cb) * sc single-precision cosine component of rotation matrix * cs single-precision complex sine component of rotation matrix * * Reference: http://www.netlib.org/blas/crotg.f * * This function does not set any error status. **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCrotgNative (JNIEnv *env, jclass cls, jobject host_ca, jobject cb, jobject host_sc, jobject host_cs) { if (host_ca == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_ca' is null for cublasCrotg"); return; } if (host_sc == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_sc' is null for cublasCrotg"); return; } if (host_cs == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_cs' is null for cublasCrotg"); return; } cuComplex* nativeHOST_CA; float* nativeHOST_SC; cuComplex* nativeHOST_CS; cuComplex complexCb; nativeHOST_CA = (cuComplex*)getPointer(env, host_ca); nativeHOST_SC = (float*)getPointer(env, host_sc); nativeHOST_CS = (cuComplex*)getPointer(env, host_cs); complexCb.x = env->GetFloatField(cb, cuComplex_x); complexCb.y = env->GetFloatField(cb, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCrotg('%s', [%f,%f], '%s', '%s')\n", "host_ca", complexCb.x, complexCb.y, "host_sc", "host_cs"); cublasCrotg(nativeHOST_CA, complexCb, nativeHOST_SC, nativeHOST_CS); } /** *
* void * cublasCrot (int n, cuComplex *x, int incx, cuComplex *y, int incy, float sc, * cuComplex cs) * * multiplies a 2x2 matrix ( sc cs) with the 2xn matrix ( transpose(x) ) * (-conj(cs) sc) ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. * * Input * ----- * n number of elements in input vectors * x single-precision complex vector with n elements * incx storage spacing between elements of x * y single-precision complex vector with n elements * incy storage spacing between elements of y * sc single-precision cosine component of rotation matrix * cs single-precision complex sine component of rotation matrix * * Output * ------ * x rotated single-precision complex vector x (unchanged if n <= 0) * y rotated single-precision complex vector y (unchanged if n <= 0) * * Reference: http://netlib.org/lapack/explore-html/crot.f.html * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCrotNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy, jfloat c, jobject s) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCrot"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasCrot"); return; } cuComplex* nativeX; cuComplex* nativeY; cuComplex complexS; nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); complexS.x = env->GetFloatField(s, cuComplex_x); complexS.y = env->GetFloatField(s, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCrot(%d, '%s', %d, '%s', %d, %f, [%f,%f])\n", n, "x", incx, "y", incy, c, complexS.x, complexS.y); cublasCrot(n, nativeX, incx, nativeY, incy, c, complexS); } /** *
* void * csrot (int n, cuComplex *x, int incx, cuCumplex *y, int incy, float c, * float s) * * multiplies a 2x2 rotation matrix ( c s) with a 2xn matrix ( transpose(x) ) * (-s c) ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. * * Input * ----- * n number of elements in input vectors * x single-precision complex vector with n elements * incx storage spacing between elements of x * y single-precision complex vector with n elements * incy storage spacing between elements of y * c cosine component of rotation matrix * s sine component of rotation matrix * * Output * ------ * x rotated vector x (unchanged if n <= 0) * y rotated vector y (unchanged if n <= 0) * * Reference http://www.netlib.org/blas/csrot.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCsrotNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy, jfloat c, jfloat s) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCsrot"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasCsrot"); return; } cuComplex* nativeX; cuComplex* nativeY; nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasCsrot(%d, '%s', %d, '%s', %d, %f, %f)\n", n, "x", incx, "y", incy, c, s); cublasCsrot(n, nativeX, incx, nativeY, incy, c, s); } /** *
* void * cublasCsscal (int n, float alpha, cuComplex *x, int incx) * * replaces single-complex vector x with single-complex alpha * x. For i * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx], * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vectors * alpha single precision scalar multiplier * x single-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * x single-complex result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/csscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCsscalNative (JNIEnv *env, jclass cls, jint n, jfloat alpha, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCsscal"); return; } cuComplex* nativeX; nativeX = (cuComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasCsscal(%d, %f, '%s', %d)\n", n, alpha, "x", incx); cublasCsscal(n, alpha, nativeX, incx); } /** *
* void * cublasCswap (int n, const cuComplex *x, int incx, cuComplex *y, int incy) * * interchanges the single-complex vector x with the single-complex vector y. * For i = 0 to n-1, interchanges x[lx + i * incx] with y[ly + i * incy], where * lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a * similar way using incy. * * Input * ----- * n number of elements in input vectors * x single-complex vector with n elements * incx storage spacing between elements of x * y single-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * x contains-single complex vector y * y contains-single complex vector x * * Reference: http://www.netlib.org/blas/cswap.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCswapNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCswap"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasCswap"); return; } cuComplex* nativeX; cuComplex* nativeY; nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasCswap(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); cublasCswap(n, nativeX, incx, nativeY, incy); } /** *
* void * cublasZswap (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) * * interchanges the double-complex vector x with the double-complex vector y. * For i = 0 to n-1, interchanges x[lx + i * incx] with y[ly + i * incy], where * lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a * similar way using incy. * * Input * ----- * n number of elements in input vectors * x double-complex vector with n elements * incx storage spacing between elements of x * y double-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * x contains-double complex vector y * y contains-double complex vector x * * Reference: http://www.netlib.org/blas/zswap.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZswapNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZswap"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZswap"); return; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasZswap(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); cublasZswap(n, nativeX, incx, nativeY, incy); } /** *
* cuComplex * cdotu (int n, const cuComplex *x, int incx, const cuComplex *y, int incy) * * computes the dot product of two single-complex vectors. It returns the * dot product of the single-complex vectors x and y if successful, and complex * zero otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * incx] * * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx; * ly is defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * x single-complex vector with n elements * incx storage spacing between elements of x * y single-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * returns single-complex dot product (zero if n <= 0) * * Reference: http://www.netlib.org/blas/cdotu.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU **/ JNIEXPORT jobject JNICALL Java_jcuda_jcublas_JCublas_cublasCdotuNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCdotu"); return NULL; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasCdotu"); return NULL; } cuComplex* nativeX; cuComplex* nativeY; nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasCdotu(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); cuComplex nativeResult = cublasCdotu(n, nativeX, incx, nativeY, incy); jobject result = env->NewObject(cuComplex_Class, cuComplex_Constructor); if (env->ExceptionCheck()) { return NULL; } env->SetFloatField(result, cuComplex_x, nativeResult.x); env->SetFloatField(result, cuComplex_y, nativeResult.y); return result; } /** *
* cuComplex * cublasCdotc (int n, const cuComplex *x, int incx, const cuComplex *y, * int incy) * * computes the dot product of two single-complex vectors. It returns the * dot product of the single-complex vectors x and y if successful, and complex * zero otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * incx] * * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx; * ly is defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * x single-complex vector with n elements * incx storage spacing between elements of x * y single-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * returns single-complex dot product (zero if n <= 0) * * Reference: http://www.netlib.org/blas/cdotc.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU **/ JNIEXPORT jobject JNICALL Java_jcuda_jcublas_JCublas_cublasCdotcNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCdotc"); return NULL; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasCdotc"); return NULL; } cuComplex* nativeX; cuComplex* nativeY; nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasCdotc(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); cuComplex nativeResult = cublasCdotc(n, nativeX, incx, nativeY, incy); jobject result = env->NewObject(cuComplex_Class, cuComplex_Constructor); if (env->ExceptionCheck()) { return NULL; } env->SetFloatField(result, cuComplex_x, nativeResult.x); env->SetFloatField(result, cuComplex_y, nativeResult.y); return result; } /** *
* int * cublasIcamax (int n, const float *x, int incx) * * finds the smallest index of the element having maximum absolute value * in single-complex vector x; that is, the result is the first i, i = 0 * to n - 1 that maximizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x single-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/icamax.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jint JNICALL Java_jcuda_jcublas_JCublas_cublasIcamaxNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasIcamax"); return 0; } cuComplex* nativeX; nativeX = (cuComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasIcamax(%d, '%s', %d)\n", n, "x", incx); return cublasIcamax(n, nativeX, incx); } /** *
* int * cublasIcamin (int n, const float *x, int incx) * * finds the smallest index of the element having minimum absolute value * in single-complex vector x; that is, the result is the first i, i = 0 * to n - 1 that minimizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x single-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: see ICAMAX. * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jint JNICALL Java_jcuda_jcublas_JCublas_cublasIcaminNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasIcamin"); return 0; } cuComplex* nativeX; nativeX = (cuComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasIcamin(%d, '%s', %d)\n", n, "x", incx); return cublasIcamin(n, nativeX, incx); } /** *
* float * cublasScasum (int n, const cuDouble *x, int incx) * * takes the sum of the absolute values of a complex vector and returns a * single precision result. Note that this is not the L1 norm of the vector. * The result is the sum from 0 to n-1 of abs(real(x[ix+i*incx])) + * abs(imag(x(ix+i*incx))), where ix = 1 if incx <= 0, else ix = 1+(1-n)*incx. * * Input * ----- * n number of elements in input vector * x single-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the single precision sum of absolute values of real and imaginary * parts (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/scasum.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jfloat JNICALL Java_jcuda_jcublas_JCublas_cublasScasumNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasScasum"); return 0.0; } cuComplex* nativeX; nativeX = (cuComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasScasum(%d, '%s', %d)\n", n, "x", incx); return cublasScasum(n, nativeX, incx); } /** *
* float * cublasScnrm2 (int n, const cuComplex *x, int incx) * * computes the Euclidean norm of the single-complex n-vector x. This code * uses simple scaling to avoid intermediate underflow and overflow. * * Input * ----- * n number of elements in input vector * x single-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns Euclidian norm (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/scnrm2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jfloat JNICALL Java_jcuda_jcublas_JCublas_cublasScnrm2Native (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasScnrm2"); return 0.0; } cuComplex* nativeX; nativeX = (cuComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasScnrm2(%d, '%s', %d)\n", n, "x", incx); return cublasScnrm2(n, nativeX, incx); } /** *
* void * cublasZaxpy (int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx, * cuDoubleComplex *y, int incy) * * multiplies double-complex vector x by double-complex scalar alpha and adds * the result to double-complex vector y; that is, it overwrites double-complex * y with double-complex alpha * x + y. For i = 0 to n - 1, it replaces * y[ly + i * incy] with alpha * x[lx + i * incx] + y[ly + i * incy], where * lx = 0 if incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a * similar way using incy. * * Input * ----- * n number of elements in input vectors * alpha double-complex scalar multiplier * x double-complex vector with n elements * incx storage spacing between elements of x * y double-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * y double-complex result (unchanged if n <= 0) * * Reference: http://www.netlib.org/blas/zaxpy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZaxpyNative (JNIEnv *env, jclass cls, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZaxpy"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZaxpy"); return; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; cuDoubleComplex dobuleComplexAlpha; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZaxpy(%d, [%lf,%lf], '%s', %d, '%s', %d)\n", n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "x", incx, "y", incy); cublasZaxpy(n, dobuleComplexAlpha, nativeX, incx, nativeY, incy); } /** *
* cuDoubleComplex * zdotu (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, int incy) * * computes the dot product of two double-complex vectors. It returns the * dot product of the double-complex vectors x and y if successful, and double-complex * zero otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * incx] * * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx; * ly is defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * x double-complex vector with n elements * incx storage spacing between elements of x * y double-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * returns double-complex dot product (zero if n <= 0) * * Reference: http://www.netlib.org/blas/zdotu.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU **/ JNIEXPORT jobject JNICALL Java_jcuda_jcublas_JCublas_cublasZdotuNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZdotu"); return NULL; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZdotu"); return NULL; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasZdotu(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); cuDoubleComplex nativeResult = cublasZdotu(n, nativeX, incx, nativeY, incy); jobject result = env->NewObject(cuDoubleComplex_Class, cuDoubleComplex_Constructor); if (env->ExceptionCheck()) { return NULL; } env->SetDoubleField(result, cuDoubleComplex_x, nativeResult.x); env->SetDoubleField(result, cuDoubleComplex_y, nativeResult.y); return result; } /** *
* cuDoubleComplex * cublasZdotc (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, int incy) * * computes the dot product of two double-precision complex vectors. It returns the * dot product of the double-precision complex vectors conjugate(x) and y if successful, * and double-precision complex zero otherwise. It computes the * sum for i = 0 to n - 1 of conjugate(x[lx + i * incx]) * y[ly + i * incy], * where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx; * ly is defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * x double-precision complex vector with n elements * incx storage spacing between elements of x * y double-precision complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * returns double-complex dot product (zero if n <= 0) * * Reference: http://www.netlib.org/blas/zdotc.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU **/ JNIEXPORT jobject JNICALL Java_jcuda_jcublas_JCublas_cublasZdotcNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZdotc"); return NULL; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZdotc"); return NULL; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasZdotc(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); cuDoubleComplex nativeResult = cublasZdotc(n, nativeX, incx, nativeY, incy); jobject result = env->NewObject(cuDoubleComplex_Class, cuDoubleComplex_Constructor); if (env->ExceptionCheck()) { return NULL; } env->SetDoubleField(result, cuDoubleComplex_x, nativeResult.x); env->SetDoubleField(result, cuDoubleComplex_y, nativeResult.y); return result; } /** *
* void * cublasZscal (int n, cuComplex alpha, cuComplex *x, int incx) * * replaces double-complex vector x with double-complex alpha * x. For i * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx], * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vectors * alpha double-complex scalar multiplier * x double-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * x double-complex result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/zscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZscalNative (JNIEnv *env, jclass cls, jint n, jobject alpha, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZscal"); return; } cuDoubleComplex* nativeX; cuDoubleComplex dobuleComplexAlpha; nativeX = (cuDoubleComplex*)getPointer(env, x); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZscal(%d, [%lf,%lf], '%s', %d)\n", n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "x", incx); cublasZscal(n, dobuleComplexAlpha, nativeX, incx); } /** *
* void * cublasZdscal (int n, double alpha, cuDoubleComplex *x, int incx) * * replaces double-complex vector x with double-complex alpha * x. For i * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx], * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vectors * alpha double precision scalar multiplier * x double-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * x double-complex result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/zdscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZdscalNative (JNIEnv *env, jclass cls, jint n, jdouble alpha, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZdscal"); return; } cuDoubleComplex* nativeX; nativeX = (cuDoubleComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasZdscal(%d, %lf, '%s', %d)\n", n, alpha, "x", incx); cublasZdscal(n, alpha, nativeX, incx); } /** *
* double * cublasDznrm2 (int n, const cuDoubleComplex *x, int incx) * * computes the Euclidean norm of the double precision complex n-vector x. This code * uses simple scaling to avoid intermediate underflow and overflow. * * Input * ----- * n number of elements in input vector * x double-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns Euclidian norm (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/dznrm2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jdouble JNICALL Java_jcuda_jcublas_JCublas_cublasDznrm2Native (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDznrm2"); return 0.0; } cuDoubleComplex* nativeX; nativeX = (cuDoubleComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasDznrm2(%d, '%s', %d)\n", n, "x", incx); return cublasDznrm2(n, nativeX, incx); } /** *
* void * cublasZrotg (cuDoubleComplex *host_ca, cuDoubleComplex cb, double *host_sc, double *host_cs) * * constructs the complex Givens tranformation * * ( sc cs ) * G = ( ) , sc^2 + cabs(cs)^2 = 1, * (-cs sc ) * * which zeros the second entry of the complex 2-vector transpose(ca, cb). * * The quantity ca/cabs(ca)*norm(ca,cb) overwrites ca in storage. The * function crot (n, x, incx, y, incy, sc, cs) is normally called next * to apply the transformation to a 2 x n matrix. * Note that is function is provided for completeness and run exclusively * on the Host. * * Input * ----- * ca double-precision complex precision scalar * cb double-precision complex scalar * * Output * ------ * ca double-precision complex ca/cabs(ca)*norm(ca,cb) * sc double-precision cosine component of rotation matrix * cs double-precision complex sine component of rotation matrix * * Reference: http://www.netlib.org/blas/zrotg.f * * This function does not set any error status. **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZrotgNative (JNIEnv *env, jclass cls, jobject host_ca, jobject cb, jobject host_sc, jobject host_cs) { if (host_ca == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_ca' is null for cublasZrotg"); return; } if (host_sc == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_sc' is null for cublasZrotg"); return; } if (host_cs == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_cs' is null for cublasZrotg"); return; } cuDoubleComplex* nativeHOST_CA; double* nativeHOST_SC; cuDoubleComplex* nativeHOST_CS; cuDoubleComplex dobuleComplexCb; nativeHOST_CA = (cuDoubleComplex*)getPointer(env, host_ca); nativeHOST_SC = (double*)getPointer(env, host_sc); nativeHOST_CS = (cuDoubleComplex*)getPointer(env, host_cs); dobuleComplexCb.x = env->GetDoubleField(cb, cuDoubleComplex_x); dobuleComplexCb.y = env->GetDoubleField(cb, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZrotg('%s', [%lf,%lf], '%s', '%s')\n", "host_ca", dobuleComplexCb.x, dobuleComplexCb.y, "host_sc", "host_cs"); cublasZrotg(nativeHOST_CA, dobuleComplexCb, nativeHOST_SC, nativeHOST_CS); } /** *
* cublasZrot (int n, cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy, double sc, * cuDoubleComplex cs) * * multiplies a 2x2 matrix ( sc cs) with the 2xn matrix ( transpose(x) ) * (-conj(cs) sc) ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. * * Input * ----- * n number of elements in input vectors * x double-precision complex vector with n elements * incx storage spacing between elements of x * y double-precision complex vector with n elements * incy storage spacing between elements of y * sc double-precision cosine component of rotation matrix * cs double-precision complex sine component of rotation matrix * * Output * ------ * x rotated double-precision complex vector x (unchanged if n <= 0) * y rotated double-precision complex vector y (unchanged if n <= 0) * * Reference: http://netlib.org/lapack/explore-html/zrot.f.html * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZrotNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy, jdouble sc, jobject cs) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZrot"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZrot"); return; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; cuDoubleComplex dobuleComplexCs; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); dobuleComplexCs.x = env->GetDoubleField(cs, cuDoubleComplex_x); dobuleComplexCs.y = env->GetDoubleField(cs, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZrot(%d, '%s', %d, '%s', %d, %lf, [%lf,%lf])\n", n, "x", incx, "y", incy, sc, dobuleComplexCs.x, dobuleComplexCs.y); cublasZrot(n, nativeX, incx, nativeY, incy, sc, dobuleComplexCs); } /** *
* void * zdrot (int n, cuDoubleComplex *x, int incx, cuCumplex *y, int incy, double c, * double s) * * multiplies a 2x2 matrix ( c s) with the 2xn matrix ( transpose(x) ) * (-s c) ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. * * Input * ----- * n number of elements in input vectors * x double-precision complex vector with n elements * incx storage spacing between elements of x * y double-precision complex vector with n elements * incy storage spacing between elements of y * c cosine component of rotation matrix * s sine component of rotation matrix * * Output * ------ * x rotated vector x (unchanged if n <= 0) * y rotated vector y (unchanged if n <= 0) * * Reference http://www.netlib.org/blas/zdrot.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZdrotNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy, jdouble c, jdouble s) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZdrot"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZdrot"); return; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasZdrot(%d, '%s', %d, '%s', %d, %lf, %lf)\n", n, "x", incx, "y", incy, c, s); cublasZdrot(n, nativeX, incx, nativeY, incy, c, s); } /** *
* int * cublasIzamax (int n, const double *x, int incx) * * finds the smallest index of the element having maximum absolute value * in double-complex vector x; that is, the result is the first i, i = 0 * to n - 1 that maximizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x double-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/izamax.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jint JNICALL Java_jcuda_jcublas_JCublas_cublasIzamaxNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasIzamax"); return 0; } cuDoubleComplex* nativeX; nativeX = (cuDoubleComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasIzamax(%d, '%s', %d)\n", n, "x", incx); return cublasIzamax(n, nativeX, incx); } /** *
* int * cublasIzamin (int n, const cuDoubleComplex *x, int incx) * * finds the smallest index of the element having minimum absolute value * in double-complex vector x; that is, the result is the first i, i = 0 * to n - 1 that minimizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x double-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: Analogous to IZAMAX, see there. * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jint JNICALL Java_jcuda_jcublas_JCublas_cublasIzaminNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasIzamin"); return 0; } cuDoubleComplex* nativeX; nativeX = (cuDoubleComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasIzamin(%d, '%s', %d)\n", n, "x", incx); return cublasIzamin(n, nativeX, incx); } /** *
* double * cublasDzasum (int n, const cuDoubleComplex *x, int incx) * * takes the sum of the absolute values of a complex vector and returns a * double precision result. Note that this is not the L1 norm of the vector. * The result is the sum from 0 to n-1 of abs(real(x[ix+i*incx])) + * abs(imag(x(ix+i*incx))), where ix = 1 if incx <= 0, else ix = 1+(1-n)*incx. * * Input * ----- * n number of elements in input vector * x double-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the double precision sum of absolute values of real and imaginary * parts (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/dzasum.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jdouble JNICALL Java_jcuda_jcublas_JCublas_cublasDzasumNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDzasum"); return 0.0; } cuDoubleComplex* nativeX; nativeX = (cuDoubleComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasDzasum(%d, '%s', %d)\n", n, "x", incx); return cublasDzasum(n, nativeX, incx); } /** *
* void * cublasSgbmv (char trans, int m, int n, int kl, int ku, float alpha, * const float *A, int lda, const float *x, int incx, float beta, * float *y, int incy) * * performs one of the matrix-vector operations * * y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A) * * alpha and beta are single precision scalars. x and y are single precision * vectors. A is an m by n band matrix consisting of single precision elements * with kl sub-diagonals and ku super-diagonals. * * Input * ----- * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A) * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * kl specifies the number of sub-diagonals of matrix A. It must be at * least zero. * ku specifies the number of super-diagonals of matrix A. It must be at * least zero. * alpha single precision scalar multiplier applied to op(A). * A single precision array of dimensions (lda, n). The leading * (kl + ku + 1) x n part of the array A must contain the band matrix A, * supplied column by column, with the leading diagonal of the matrix * in row (ku + 1) of the array, the first super-diagonal starting at * position 2 in row ku, the first sub-diagonal starting at position 1 * in row (ku + 2), and so on. Elements in the array A that do not * correspond to elements in the band matrix (such as the top left * ku x ku triangle) are not referenced. * lda leading dimension of A. lda must be at least (kl + ku + 1). * x single precision array of length at least (1+(n-1)*abs(incx)) when * trans == 'N' or 'n' and at least (1+(m-1)*abs(incx)) otherwise. * incx storage spacing between elements of x. incx must not be zero. * beta single precision scalar multiplier applied to vector y. If beta is * zero, y is not read. * y single precision array of length at least (1+(m-1)*abs(incy)) when * trans == 'N' or 'n' and at least (1+(n-1)*abs(incy)) otherwise. If * beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*op(A)*x + beta*y * * Reference: http://www.netlib.org/blas/sgbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n, kl, or ku < 0; if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSgbmvNative (JNIEnv *env, jclass cls, jchar trans, jint m, jint n, jint kl, jint ku, jfloat alpha, jobject A, jint lda, jobject x, jint incx, jfloat beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasSgbmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSgbmv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasSgbmv"); return; } float* nativeA; float* nativeX; float* nativeY; nativeA = (float*)getPointer(env, A); nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasSgbmv(%c, %d, %d, %d, %d, %f, '%s', %d, '%s', %d, %f, '%s', %d)\n", trans, m, n, kl, ku, alpha, "A", lda, "x", incx, beta, "y", incy); cublasSgbmv((char)trans, m, n, kl, ku, alpha, nativeA, lda, nativeX, incx, beta, nativeY, incy); } /** *
* cublasSgemv (char trans, int m, int n, float alpha, const float *A, int lda, * const float *x, int incx, float beta, float *y, int incy) * * performs one of the matrix-vector operations * * y = alpha * op(A) * x + beta * y, * * where op(A) is one of * * op(A) = A or op(A) = transpose(A) * * where alpha and beta are single precision scalars, x and y are single * precision vectors, and A is an m x n matrix consisting of single precision * elements. Matrix A is stored in column major format, and lda is the leading * dimension of the two-dimensional array in which A is stored. * * Input * ----- * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If trans = * trans = 't', 'T', 'c', or 'C', op(A) = transpose(A) * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * alpha single precision scalar multiplier applied to op(A). * A single precision array of dimensions (lda, n) if trans = 'n' or * 'N'), and of dimensions (lda, m) otherwise. lda must be at least * max(1, m) and at least max(1, n) otherwise. * lda leading dimension of two-dimensional array used to store matrix A * x single precision array of length at least (1 + (n - 1) * abs(incx)) * when trans = 'N' or 'n' and at least (1 + (m - 1) * abs(incx)) * otherwise. * incx specifies the storage spacing between elements of x. incx must not * be zero. * beta single precision scalar multiplier applied to vector y. If beta * is zero, y is not read. * y single precision array of length at least (1 + (m - 1) * abs(incy)) * when trans = 'N' or 'n' and at least (1 + (n - 1) * abs(incy)) * otherwise. * incy specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * y updated according to alpha * op(A) * x + beta * y * * Reference: http://www.netlib.org/blas/sgemv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSgemvNative (JNIEnv *env, jclass cls, jchar trans, jint m, jint n, jfloat alpha, jobject A, jint lda, jobject x, jint incx, jfloat beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasSgemv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSgemv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasSgemv"); return; } float* nativeA; float* nativeX; float* nativeY; nativeA = (float*)getPointer(env, A); nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasSgemv(%c, %d, %d, %f, '%s', %d, '%s', %d, %f, '%s', %d)\n", trans, m, n, alpha, "A", lda, "x", incx, beta, "y", incy); cublasSgemv((char)trans, m, n, alpha, nativeA, lda, nativeX, incx, beta, nativeY, incy); } /** *
* cublasSger (int m, int n, float alpha, const float *x, int incx, * const float *y, int incy, float *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(y) + A, * * where alpha is a single precision scalar, x is an m element single * precision vector, y is an n element single precision vector, and A * is an m by n matrix consisting of single precision elements. Matrix A * is stored in column major format, and lda is the leading dimension of * the two-dimensional array used to store A. * * Input * ----- * m specifies the number of rows of the matrix A. It must be at least * zero. * n specifies the number of columns of the matrix A. It must be at * least zero. * alpha single precision scalar multiplier applied to x * transpose(y) * x single precision array of length at least (1 + (m - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must not * be zero. * y single precision array of length at least (1 + (n - 1) * abs(incy)) * incy specifies the storage spacing between elements of y. incy must not * be zero. * A single precision array of dimensions (lda, n). * lda leading dimension of two-dimensional array used to store matrix A * * Output * ------ * A updated according to A = alpha * x * transpose(y) + A * * Reference: http://www.netlib.org/blas/sger.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSgerNative (JNIEnv *env, jclass cls, jint m, jint n, jfloat alpha, jobject x, jint incx, jobject y, jint incy, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSger"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasSger"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasSger"); return; } float* nativeX; float* nativeY; float* nativeA; nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); nativeA = (float*)getPointer(env, A); Logger::log(LOG_TRACE, "Executing cublasSger(%d, %d, %f, '%s', %d, '%s', %d, '%s', %d)\n", m, n, alpha, "x", incx, "y", incy, "A", lda); cublasSger(m, n, alpha, nativeX, incx, nativeY, incy, nativeA, lda); } /** *
* void * cublasSsbmv (char uplo, int n, int k, float alpha, const float *A, int lda, * const float *x, int incx, float beta, float *y, int incy) * * performs the matrix-vector operation * * y := alpha*A*x + beta*y * * alpha and beta are single precision scalars. x and y are single precision * vectors with n elements. A is an n x n symmetric band matrix consisting * of single precision elements, with k super-diagonals and the same number * of sub-diagonals. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the symmetric * band matrix A is being supplied. If uplo == 'U' or 'u', the upper * triangular part is being supplied. If uplo == 'L' or 'l', the lower * triangular part is being supplied. * n specifies the number of rows and the number of columns of the * symmetric matrix A. n must be at least zero. * k specifies the number of super-diagonals of matrix A. Since the matrix * is symmetric, this is also the number of sub-diagonals. k must be at * least zero. * alpha single precision scalar multiplier applied to A*x. * A single precision array of dimensions (lda, n). When uplo == 'U' or * 'u', the leading (k + 1) x n part of array A must contain the upper * triangular band of the symmetric matrix, supplied column by column, * with the leading diagonal of the matrix in row (k+1) of the array, * the first super-diagonal starting at position 2 in row k, and so on. * The top left k x k triangle of the array A is not referenced. When * uplo == 'L' or 'l', the leading (k + 1) x n part of the array A must * contain the lower triangular band part of the symmetric matrix, * supplied column by column, with the leading diagonal of the matrix in * row 1 of the array, the first sub-diagonal starting at position 1 in * row 2, and so on. The bottom right k x k triangle of the array A is * not referenced. * lda leading dimension of A. lda must be at least (k + 1). * x single precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta single precision scalar multiplier applied to vector y. If beta is * zero, y is not read. * y single precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/ssbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_INVALID_VALUE if k or n < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSsbmvNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jint k, jfloat alpha, jobject A, jint lda, jobject x, jint incx, jfloat beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasSsbmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSsbmv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasSsbmv"); return; } float* nativeA; float* nativeX; float* nativeY; nativeA = (float*)getPointer(env, A); nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasSsbmv(%c, %d, %d, %f, '%s', %d, '%s', %d, %f, '%s', %d)\n", uplo, n, k, alpha, "A", lda, "x", incx, beta, "y", incy); cublasSsbmv((char)uplo, n, k, alpha, nativeA, lda, nativeX, incx, beta, nativeY, incy); } /** *
* void * cublasSspmv (char uplo, int n, float alpha, const float *AP, const float *x, * int incx, float beta, float *y, int incy) * * performs the matrix-vector operation * * y = alpha * A * x + beta * y * * Alpha and beta are single precision scalars, and x and y are single * precision vectors with n elements. A is a symmetric n x n matrix * consisting of single precision elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision scalar multiplier applied to A*x. * AP single precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * x single precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta single precision scalar multiplier applied to vector y; * y single precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/sspmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSspmvNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jfloat alpha, jobject AP, jobject x, jint incx, jfloat beta, jobject y, jint incy) { if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasSspmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSspmv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasSspmv"); return; } float* nativeAP; float* nativeX; float* nativeY; nativeAP = (float*)getPointer(env, AP); nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasSspmv(%c, %d, %f, '%s', '%s', %d, %f, '%s', %d)\n", uplo, n, alpha, "AP", "x", incx, beta, "y", incy); cublasSspmv((char)uplo, n, alpha, nativeAP, nativeX, incx, beta, nativeY, incy); } /** *
* void * cublasSspr (char uplo, int n, float alpha, const float *x, int incx, * float *AP) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(x) + A, * * where alpha is a single precision scalar and x is an n element single * precision vector. A is a symmetric n x n matrix consisting of single * precision elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision scalar multiplier applied to x * transpose(x). * x single precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * AP single precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * * Output * ------ * A updated according to A = alpha * x * transpose(x) + A * * Reference: http://www.netlib.org/blas/sspr.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSsprNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jfloat alpha, jobject x, jint incx, jobject AP) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSspr"); return; } if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasSspr"); return; } float* nativeX; float* nativeAP; nativeX = (float*)getPointer(env, x); nativeAP = (float*)getPointer(env, AP); Logger::log(LOG_TRACE, "Executing cublasSspr(%c, %d, %f, '%s', %d, '%s')\n", uplo, n, alpha, "x", incx, "AP"); cublasSspr((char)uplo, n, alpha, nativeX, incx, nativeAP); } /** *
* void * cublasSspr2 (char uplo, int n, float alpha, const float *x, int incx, * const float *y, int incy, float *AP) * * performs the symmetric rank 2 operation * * A = alpha*x*transpose(y) + alpha*y*transpose(x) + A, * * where alpha is a single precision scalar, and x and y are n element single * precision vectors. A is a symmetric n x n matrix consisting of single * precision elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision scalar multiplier applied to x * transpose(y) + * y * transpose(x). * x single precision array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y single precision array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * AP single precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * * Output * ------ * A updated according to A = alpha*x*transpose(y)+alpha*y*transpose(x)+A * * Reference: http://www.netlib.org/blas/sspr2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSspr2Native (JNIEnv *env, jclass cls, jchar uplo, jint n, jfloat alpha, jobject x, jint incx, jobject y, jint incy, jobject AP) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSspr2"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasSspr2"); return; } if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasSspr2"); return; } float* nativeX; float* nativeY; float* nativeAP; nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); nativeAP = (float*)getPointer(env, AP); Logger::log(LOG_TRACE, "Executing cublasSspr2(%c, %d, %f, '%s', %d, '%s', %d, '%s')\n", uplo, n, alpha, "x", incx, "y", incy, "AP"); cublasSspr2((char)uplo, n, alpha, nativeX, incx, nativeY, incy, nativeAP); } /** *
* void * cublasSsymv (char uplo, int n, float alpha, const float *A, int lda, * const float *x, int incx, float beta, float *y, int incy) * * performs the matrix-vector operation * * y = alpha*A*x + beta*y * * Alpha and beta are single precision scalars, and x and y are single * precision vectors, each with n elements. A is a symmetric n x n matrix * consisting of single precision elements that is stored in either upper or * lower storage mode. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the array A * is to be referenced. If uplo == 'U' or 'u', the symmetric matrix A * is stored in upper storage mode, i.e. only the upper triangular part * of A is to be referenced while the lower triangular part of A is to * be inferred. If uplo == 'L' or 'l', the symmetric matrix A is stored * in lower storage mode, i.e. only the lower triangular part of A is * to be referenced while the upper triangular part of A is to be * inferred. * n specifies the number of rows and the number of columns of the * symmetric matrix A. n must be at least zero. * alpha single precision scalar multiplier applied to A*x. * A single precision array of dimensions (lda, n). If uplo == 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular part of the symmetric matrix and the strictly * lower triangular part of A is not referenced. If uplo == 'L' or 'l', * the leading n x n lower triangular part of the array A must contain * the lower triangular part of the symmetric matrix and the strictly * upper triangular part of A is not referenced. * lda leading dimension of A. It must be at least max (1, n). * x single precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta single precision scalar multiplier applied to vector y. * y single precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/ssymv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSsymvNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jfloat alpha, jobject A, jint lda, jobject x, jint incx, jfloat beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasSsymv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSsymv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasSsymv"); return; } float* nativeA; float* nativeX; float* nativeY; nativeA = (float*)getPointer(env, A); nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasSsymv(%c, %d, %f, '%s', %d, '%s', %d, %f, '%s', %d)\n", uplo, n, alpha, "A", lda, "x", incx, beta, "y", incy); cublasSsymv((char)uplo, n, alpha, nativeA, lda, nativeX, incx, beta, nativeY, incy); } /** *
* void * cublasSsyr (char uplo, int n, float alpha, const float *x, int incx, * float *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(x) + A, * * where alpha is a single precision scalar, x is an n element single * precision vector and A is an n x n symmetric matrix consisting of * single precision elements. Matrix A is stored in column major format, * and lda is the leading dimension of the two-dimensional array * containing A. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or * the lower triangular part of array A. If uplo = 'U' or 'u', * then only the upper triangular part of A may be referenced. * If uplo = 'L' or 'l', then only the lower triangular part of * A may be referenced. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * alpha single precision scalar multiplier applied to x * transpose(x) * x single precision array of length at least (1 + (n - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must * not be zero. * A single precision array of dimensions (lda, n). If uplo = 'U' or * 'u', then A must contain the upper triangular part of a symmetric * matrix, and the strictly lower triangular part is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part * of a symmetric matrix, and the strictly upper triangular part is * not referenced. * lda leading dimension of the two-dimensional array containing A. lda * must be at least max(1, n). * * Output * ------ * A updated according to A = alpha * x * transpose(x) + A * * Reference: http://www.netlib.org/blas/ssyr.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSsyrNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jfloat alpha, jobject x, jint incx, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSsyr"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasSsyr"); return; } float* nativeX; float* nativeA; nativeX = (float*)getPointer(env, x); nativeA = (float*)getPointer(env, A); Logger::log(LOG_TRACE, "Executing cublasSsyr(%c, %d, %f, '%s', %d, '%s', %d)\n", uplo, n, alpha, "x", incx, "A", lda); cublasSsyr((char)uplo, n, alpha, nativeX, incx, nativeA, lda); } /** *
* void * cublasSsyr2 (char uplo, int n, float alpha, const float *x, int incx, * const float *y, int incy, float *A, int lda) * * performs the symmetric rank 2 operation * * A = alpha*x*transpose(y) + alpha*y*transpose(x) + A, * * where alpha is a single precision scalar, x and y are n element single * precision vector and A is an n by n symmetric matrix consisting of single * precision elements. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision scalar multiplier applied to x * transpose(y) + * y * transpose(x). * x single precision array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y single precision array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * A single precision array of dimensions (lda, n). If uplo == 'U' or 'u', * then A must contains the upper triangular part of a symmetric matrix, * and the strictly lower triangular parts is not referenced. If uplo == * 'L' or 'l', then A contains the lower triangular part of a symmetric * matrix, and the strictly upper triangular part is not referenced. * lda leading dimension of A. It must be at least max(1, n). * * Output * ------ * A updated according to A = alpha*x*transpose(y)+alpha*y*transpose(x)+A * * Reference: http://www.netlib.org/blas/ssyr2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSsyr2Native (JNIEnv *env, jclass cls, jchar uplo, jint n, jfloat alpha, jobject x, jint incx, jobject y, jint incy, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasSsyr2"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasSsyr2"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasSsyr2"); return; } float* nativeX; float* nativeY; float* nativeA; nativeX = (float*)getPointer(env, x); nativeY = (float*)getPointer(env, y); nativeA = (float*)getPointer(env, A); Logger::log(LOG_TRACE, "Executing cublasSsyr2(%c, %d, %f, '%s', %d, '%s', %d, '%s', %d)\n", uplo, n, alpha, "x", incx, "y", incy, "A", lda); cublasSsyr2((char)uplo, n, alpha, nativeX, incx, nativeY, incy, nativeA, lda); } /** *
* void * cublasStbmv (char uplo, char trans, char diag, int n, int k, const float *A, * int lda, float *x, int incx) * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A * or op(A) = transpose(A). x is an n-element single precision vector, and A is * an n x n, unit or non-unit upper or lower triangular band matrix consisting * of single precision elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular band * matrix. If uplo == 'U' or 'u', A is an upper triangular band matrix. * If uplo == 'L' or 'l', A is a lower triangular band matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A). * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. In the current implementation n must not exceed 4070. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A single precision array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first * super-diagonal starting at position 2 in row k, and so on. The top * left k x k triangle of the array A is not referenced. If uplo == 'L' * or 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal startingat position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * lda is the leading dimension of A. It must be at least (k + 1). * x single precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x * * Reference: http://www.netlib.org/blas/stbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, k < 0, or incx == 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasStbmvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jint k, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasStbmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasStbmv"); return; } float* nativeA; float* nativeX; nativeA = (float*)getPointer(env, A); nativeX = (float*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasStbmv(%c, %c, %c, %d, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, k, "A", lda, "x", incx); cublasStbmv((char)uplo, (char)trans, (char)diag, n, k, nativeA, lda, nativeX, incx); } /** *
* void cublasStbsv (char uplo, char trans, char diag, int n, int k, * const float *A, int lda, float *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A or op(A) = transpose(A). b and x are n-element vectors, and A is * an n x n unit or non-unit, upper or lower triangular band matrix with k + 1 * diagonals. No test for singularity or near-singularity is included in this * function. Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular band * matrix as follows: If uplo == 'U' or 'u', A is an upper triangular * band matrix. If uplo == 'L' or 'l', A is a lower triangular band * matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must be at least * zero. * A single precision array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first super- * diagonal starting at position 2 in row k, and so on. The top left * k x k triangle of the array A is not referenced. If uplo == 'L' or * 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal starting at position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * x single precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the n-element right-hand side vector b. On exit, * it is overwritten with the solution vector x. * incx storage spacing between elements of x. incx must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/stbsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0 or n > 4070 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasStbsvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jint k, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasStbsv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasStbsv"); return; } float* nativeA; float* nativeX; nativeA = (float*)getPointer(env, A); nativeX = (float*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasStbsv(%c, %c, %c, %d, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, k, "A", lda, "x", incx); cublasStbsv((char)uplo, (char)trans, (char)diag, n, k, nativeA, lda, nativeX, incx); } /** *
* void * cublasStpmv (char uplo, char trans, char diag, int n, const float *AP, * float *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * or op(A) = transpose(A). x is an n element single precision vector, and A * is an n x n, unit or non-unit, upper or lower triangular matrix composed * of single precision elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo == 'U' or 'u', then A is an upper triangular matrix. * If uplo == 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A) * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * AP single precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored in AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * x single precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/stpmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasStpmvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject AP, jobject x, jint incx) { if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasStpmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasStpmv"); return; } float* nativeAP; float* nativeX; nativeAP = (float*)getPointer(env, AP); nativeX = (float*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasStpmv(%c, %c, %c, %d, '%s', '%s', %d)\n", uplo, trans, diag, n, "AP", "x", incx); cublasStpmv((char)uplo, (char)trans, (char)diag, n, nativeAP, nativeX, incx); } /** *
* void * cublasStpsv (char uplo, char trans, char diag, int n, const float *AP, * float *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A or op(A) = transpose(A). b and x are n element vectors, and A is * an n x n unit or non-unit, upper or lower triangular matrix. No test for * singularity or near-singularity is included in this function. Such tests * must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular matrix * as follows: If uplo == 'U' or 'u', A is an upper triangluar matrix. * If uplo == 'L' or 'l', A is a lower triangular matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. In the current implementation n must not exceed 4070. * AP single precision array with at least ((n*(n+1))/2) elements. If uplo * == 'U' or 'u', the array AP contains the upper triangular matrix A, * packed sequentially, column by column; that is, if i <= j, then * A[i,j] is stored is AP[i+(j*(j+1)/2)]. If uplo == 'L' or 'L', the * array AP contains the lower triangular matrix A, packed sequentially, * column by column; that is, if i >= j, then A[i,j] is stored in * AP[i+((2*n-j+1)*j)/2]. When diag = 'U' or 'u', the diagonal elements * of A are not referenced and are assumed to be unity. * x single precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the n-element right-hand side vector b. On exit, * it is overwritten with the solution vector x. * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/stpsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0, or n > 4070 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasStpsvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject AP, jobject x, jint incx) { if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasStpsv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasStpsv"); return; } float* nativeAP; float* nativeX; nativeAP = (float*)getPointer(env, AP); nativeX = (float*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasStpsv(%c, %c, %c, %d, '%s', '%s', %d)\n", uplo, trans, diag, n, "AP", "x", incx); cublasStpsv((char)uplo, (char)trans, (char)diag, n, nativeAP, nativeX, incx); } /** *
* void * cublasStrmv (char uplo, char trans, char diag, int n, const float *A, * int lda, float *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = = A, or op(A) = transpose(A). x is an n-element single precision vector, and * A is an n x n, unit or non-unit, upper or lower, triangular matrix composed * of single precision elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', then A is an upper triangular matrix. * If uplo = 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If transa = 'N' or 'n', op(A) = A. If trans = 'T', * 't', 'C', or 'c', op(A) = transpose(A) * diag specifies whether or not matrix A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * A single precision array of dimension (lda, n). If uplo = 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular matrix and the strictly lower triangular part * of A is not referenced. If uplo = 'L' or 'l', the leading n x n lower * triangular part of the array A must contain the lower triangular * matrix and the strictly upper triangular part of A is not referenced. * When diag = 'U' or 'u', the diagonal elements of A are not referenced * either, but are are assumed to be unity. * lda is the leading dimension of A. It must be at least max (1, n). * x single precision array of length at least (1 + (n - 1) * abs(incx) ). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/strmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasStrmvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasStrmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasStrmv"); return; } float* nativeA; float* nativeX; nativeA = (float*)getPointer(env, A); nativeX = (float*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasStrmv(%c, %c, %c, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, "A", lda, "x", incx); cublasStrmv((char)uplo, (char)trans, (char)diag, n, nativeA, lda, nativeX, incx); } /** *
* void * cublasStrsv (char uplo, char trans, char diag, int n, const float *A, * int lda, float *x, int incx) * * solves a system of equations op(A) * x = b, where op(A) is either A or * transpose(A). b and x are single precision vectors consisting of n * elements, and A is an n x n matrix composed of a unit or non-unit, upper * or lower triangular matrix. Matrix A is stored in column major format, * and lda is the leading dimension of the two-dimensional array containing * A. * * No test for singularity or near-singularity is included in this function. * Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the * lower triangular part of array A. If uplo = 'U' or 'u', then only * the upper triangular part of A may be referenced. If uplo = 'L' or * 'l', then only the lower triangular part of A may be referenced. * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = 't', * 'T', 'c', or 'C', op(A) = transpose(A) * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * A is a single precision array of dimensions (lda, n). If uplo = 'U' * or 'u', then A must contains the upper triangular part of a symmetric * matrix, and the strictly lower triangular parts is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part of * a symmetric matrix, and the strictly upper triangular part is not * referenced. * lda is the leading dimension of the two-dimensional array containing A. * lda must be at least max(1, n). * x single precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the n element right-hand side vector b. On exit, * it is overwritten with the solution vector x. * incx specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/strsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasStrsvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasStrsv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasStrsv"); return; } float* nativeA; float* nativeX; nativeA = (float*)getPointer(env, A); nativeX = (float*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasStrsv(%c, %c, %c, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, "A", lda, "x", incx); cublasStrsv((char)uplo, (char)trans, (char)diag, n, nativeA, lda, nativeX, incx); } /** *
* void * cublasZtrmv (char uplo, char trans, char diag, int n, const cuDoubleComplex *A, * int lda, cuDoubleComplex *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, * where op(A) = A, or op(A) = transpose(A) or op(A) = conjugate(transpose(A)). * x is an n-element double precision complex vector, and * A is an n x n, unit or non-unit, upper or lower, triangular matrix composed * of double precision complex elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', then A is an upper triangular matrix. * If uplo = 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If trans = 'n' or 'N', op(A) = A. If trans = 't' or * 'T', op(A) = transpose(A). If trans = 'c' or 'C', op(A) = * conjugate(transpose(A)). * diag specifies whether or not matrix A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * A double precision array of dimension (lda, n). If uplo = 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular matrix and the strictly lower triangular part * of A is not referenced. If uplo = 'L' or 'l', the leading n x n lower * triangular part of the array A must contain the lower triangular * matrix and the strictly upper triangular part of A is not referenced. * When diag = 'U' or 'u', the diagonal elements of A are not referenced * either, but are are assumed to be unity. * lda is the leading dimension of A. It must be at least max (1, n). * x double precision array of length at least (1 + (n - 1) * abs(incx) ). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/ztrmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZtrmvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZtrmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZtrmv"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeX; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeX = (cuDoubleComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasZtrmv(%c, %c, %c, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, "A", lda, "x", incx); cublasZtrmv((char)uplo, (char)trans, (char)diag, n, nativeA, lda, nativeX, incx); } /** *
* void * cublasZgbmv (char trans, int m, int n, int kl, int ku, cuDoubleComplex alpha, * const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, cuDoubleComplex beta, * cuDoubleComplex *y, int incy); * * performs one of the matrix-vector operations * * y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A) * * alpha and beta are double precision complex scalars. x and y are double precision * complex vectors. A is an m by n band matrix consisting of double precision complex elements * with kl sub-diagonals and ku super-diagonals. * * Input * ----- * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * or 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * kl specifies the number of sub-diagonals of matrix A. It must be at * least zero. * ku specifies the number of super-diagonals of matrix A. It must be at * least zero. * alpha double precision complex scalar multiplier applied to op(A). * A double precision complex array of dimensions (lda, n). The leading * (kl + ku + 1) x n part of the array A must contain the band matrix A, * supplied column by column, with the leading diagonal of the matrix * in row (ku + 1) of the array, the first super-diagonal starting at * position 2 in row ku, the first sub-diagonal starting at position 1 * in row (ku + 2), and so on. Elements in the array A that do not * correspond to elements in the band matrix (such as the top left * ku x ku triangle) are not referenced. * lda leading dimension of A. lda must be at least (kl + ku + 1). * x double precision complex array of length at least (1+(n-1)*abs(incx)) when * trans == 'N' or 'n' and at least (1+(m-1)*abs(incx)) otherwise. * incx specifies the increment for the elements of x. incx must not be zero. * beta double precision complex scalar multiplier applied to vector y. If beta is * zero, y is not read. * y double precision complex array of length at least (1+(m-1)*abs(incy)) when * trans == 'N' or 'n' and at least (1+(n-1)*abs(incy)) otherwise. If * beta is zero, y is not read. * incy On entry, incy specifies the increment for the elements of y. incy * must not be zero. * * Output * ------ * y updated according to y = alpha*op(A)*x + beta*y * * Reference: http://www.netlib.org/blas/zgbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZgbmvNative (JNIEnv *env, jclass cls, jchar trans, jint m, jint n, jint kl, jint ku, jobject alpha, jobject A, jint lda, jobject x, jint incx, jobject beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZgbmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZgbmv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZgbmv"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; cuDoubleComplex dobuleComplexAlpha; cuDoubleComplex dobuleComplexBeta; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); dobuleComplexBeta.x = env->GetDoubleField(beta, cuDoubleComplex_x); dobuleComplexBeta.y = env->GetDoubleField(beta, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZgbmv(%c, %d, %d, %d, %d, [%lf,%lf], '%s', %d, '%s', %d, [%lf,%lf], '%s', %d)\n", trans, m, n, kl, ku, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "A", lda, "x", incx, dobuleComplexBeta.x, dobuleComplexBeta.y, "y", incy); cublasZgbmv((char)trans, m, n, kl, ku, dobuleComplexAlpha, nativeA, lda, nativeX, incx, dobuleComplexBeta, nativeY, incy); } /** *
* void * cublasZtbmv (char uplo, char trans, char diag, int n, int k, const cuDoubleComplex *A, * int lda, cuDoubleComplex *x, int incx) * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * op(A) = transpose(A) or op(A) = conjugate(transpose(A)). x is an n-element * double precision complex vector, and A is an n x n, unit or non-unit, upper * or lower triangular band matrix composed of double precision complex elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular band * matrix. If uplo == 'U' or 'u', A is an upper triangular band matrix. * If uplo == 'L' or 'l', A is a lower triangular band matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * or 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A double precision complex array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first * super-diagonal starting at position 2 in row k, and so on. The top * left k x k triangle of the array A is not referenced. If uplo == 'L' * or 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal startingat position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * lda is the leading dimension of A. It must be at least (k + 1). * x double precision complex array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x * * Reference: http://www.netlib.org/blas/ztbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n or k < 0, or if incx == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZtbmvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jint k, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZtbmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZtbmv"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeX; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeX = (cuDoubleComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasZtbmv(%c, %c, %c, %d, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, k, "A", lda, "x", incx); cublasZtbmv((char)uplo, (char)trans, (char)diag, n, k, nativeA, lda, nativeX, incx); } /** *
* void cublasZtbsv (char uplo, char trans, char diag, int n, int k, * const cuDoubleComplex *A, int lda, cuDoubleComplex *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)). * b and x are n element vectors, and A is an n x n unit or non-unit, * upper or lower triangular band matrix with k + 1 diagonals. No test * for singularity or near-singularity is included in this function. * Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular band * matrix as follows: If uplo == 'U' or 'u', A is an upper triangular * band matrix. If uplo == 'L' or 'l', A is a lower triangular band * matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A double precision complex array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first super- * diagonal starting at position 2 in row k, and so on. The top left * k x k triangle of the array A is not referenced. If uplo == 'L' or * 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal starting at position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * x double precision complex array of length at least (1+(n-1)*abs(incx)). * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/ztbsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0 or n > 1016 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZtbsvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jint k, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZtbsv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZtbsv"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeX; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeX = (cuDoubleComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasZtbsv(%c, %c, %c, %d, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, k, "A", lda, "x", incx); cublasZtbsv((char)uplo, (char)trans, (char)diag, n, k, nativeA, lda, nativeX, incx); } /** *
* void * cublasZhemv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, * const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy) * * performs the matrix-vector operation * * y = alpha*A*x + beta*y * * Alpha and beta are double precision complex scalars, and x and y are double * precision complex vectors, each with n elements. A is a hermitian n x n matrix * consisting of double precision complex elements that is stored in either upper or * lower storage mode. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the array A * is to be referenced. If uplo == 'U' or 'u', the hermitian matrix A * is stored in upper storage mode, i.e. only the upper triangular part * of A is to be referenced while the lower triangular part of A is to * be inferred. If uplo == 'L' or 'l', the hermitian matrix A is stored * in lower storage mode, i.e. only the lower triangular part of A is * to be referenced while the upper triangular part of A is to be * inferred. * n specifies the number of rows and the number of columns of the * hermitian matrix A. n must be at least zero. * alpha double precision complex scalar multiplier applied to A*x. * A double precision complex array of dimensions (lda, n). If uplo == 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular part of the hermitian matrix and the strictly * lower triangular part of A is not referenced. If uplo == 'L' or 'l', * the leading n x n lower triangular part of the array A must contain * the lower triangular part of the hermitian matrix and the strictly * upper triangular part of A is not referenced. The imaginary parts * of the diagonal elements need not be set, they are assumed to be zero. * lda leading dimension of A. It must be at least max (1, n). * x double precision complex array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta double precision complex scalar multiplier applied to vector y. * y double precision complex array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/zhemv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZhemvNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jobject alpha, jobject A, jint lda, jobject x, jint incx, jobject beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZhemv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZhemv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZhemv"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; cuDoubleComplex dobuleComplexAlpha; cuDoubleComplex dobuleComplexBeta; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); dobuleComplexBeta.x = env->GetDoubleField(beta, cuDoubleComplex_x); dobuleComplexBeta.y = env->GetDoubleField(beta, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZhemv(%c, %d, [%lf,%lf], '%s', %d, '%s', %d, [%lf,%lf], '%s', %d)\n", uplo, n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "A", lda, "x", incx, dobuleComplexBeta.x, dobuleComplexBeta.y, "y", incy); cublasZhemv((char)uplo, n, dobuleComplexAlpha, nativeA, lda, nativeX, incx, dobuleComplexBeta, nativeY, incy); } /** *
* void * cublasZhpmv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *AP, const cuDoubleComplex *x, * int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy) * * performs the matrix-vector operation * * y = alpha * A * x + beta * y * * Alpha and beta are double precision complex scalars, and x and y are double * precision complex vectors with n elements. A is an hermitian n x n matrix * consisting of double precision complex elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision complex scalar multiplier applied to A*x. * AP double precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * The imaginary parts of the diagonal elements need not be set, they * are assumed to be zero. * x double precision complex array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta double precision complex scalar multiplier applied to vector y; * y double precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/zhpmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZhpmvNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jobject alpha, jobject AP, jobject x, jint incx, jobject beta, jobject y, jint incy) { if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasZhpmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZhpmv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZhpmv"); return; } cuDoubleComplex* nativeAP; cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; cuDoubleComplex dobuleComplexAlpha; cuDoubleComplex dobuleComplexBeta; nativeAP = (cuDoubleComplex*)getPointer(env, AP); nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); dobuleComplexBeta.x = env->GetDoubleField(beta, cuDoubleComplex_x); dobuleComplexBeta.y = env->GetDoubleField(beta, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZhpmv(%c, %d, [%lf,%lf], '%s', '%s', %d, [%lf,%lf], '%s', %d)\n", uplo, n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "AP", "x", incx, dobuleComplexBeta.x, dobuleComplexBeta.y, "y", incy); cublasZhpmv((char)uplo, n, dobuleComplexAlpha, nativeAP, nativeX, incx, dobuleComplexBeta, nativeY, incy); } /** *
* cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, * const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy) * * performs one of the matrix-vector operations * * y = alpha * op(A) * x + beta * y, * * where op(A) is one of * * op(A) = A or op(A) = transpose(A) * * where alpha and beta are double precision scalars, x and y are double * precision vectors, and A is an m x n matrix consisting of double precision * elements. Matrix A is stored in column major format, and lda is the leading * dimension of the two-dimensional array in which A is stored. * * Input * ----- * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If trans = * trans = 't', 'T', 'c', or 'C', op(A) = transpose(A) * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * alpha double precision scalar multiplier applied to op(A). * A double precision array of dimensions (lda, n) if trans = 'n' or * 'N'), and of dimensions (lda, m) otherwise. lda must be at least * max(1, m) and at least max(1, n) otherwise. * lda leading dimension of two-dimensional array used to store matrix A * x double precision array of length at least (1 + (n - 1) * abs(incx)) * when trans = 'N' or 'n' and at least (1 + (m - 1) * abs(incx)) * otherwise. * incx specifies the storage spacing between elements of x. incx must not * be zero. * beta double precision scalar multiplier applied to vector y. If beta * is zero, y is not read. * y double precision array of length at least (1 + (m - 1) * abs(incy)) * when trans = 'N' or 'n' and at least (1 + (n - 1) * abs(incy)) * otherwise. * incy specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * y updated according to alpha * op(A) * x + beta * y * * Reference: http://www.netlib.org/blas/zgemv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZgemvNative (JNIEnv *env, jclass cls, jchar trans, jint m, jint n, jobject alpha, jobject A, jint lda, jobject x, jint incx, jobject beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZgemv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZgemv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZgemv"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; cuDoubleComplex dobuleComplexAlpha; cuDoubleComplex dobuleComplexBeta; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); dobuleComplexBeta.x = env->GetDoubleField(beta, cuDoubleComplex_x); dobuleComplexBeta.y = env->GetDoubleField(beta, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZgemv(%c, %d, %d, [%lf,%lf], '%s', %d, '%s', %d, [%lf,%lf], '%s', %d)\n", trans, m, n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "A", lda, "x", incx, dobuleComplexBeta.x, dobuleComplexBeta.y, "y", incy); cublasZgemv((char)trans, m, n, dobuleComplexAlpha, nativeA, lda, nativeX, incx, dobuleComplexBeta, nativeY, incy); } /** *
* void * cublasZtpmv (char uplo, char trans, char diag, int n, const cuDoubleComplex *AP, * cuDoubleComplex *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * op(A) = transpose(A) or op(A) = conjugate(transpose(A)) . x is an n element * double precision complex vector, and A is an n x n, unit or non-unit, upper * or lower triangular matrix composed of double precision complex elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo == 'U' or 'u', then A is an upper triangular matrix. * If uplo == 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * or 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. In the current implementation n must not exceed 4070. * AP double precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored in AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * x double precision complex array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/ztpmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or n < 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZtpmvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject AP, jobject x, jint incx) { if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasZtpmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZtpmv"); return; } cuDoubleComplex* nativeAP; cuDoubleComplex* nativeX; nativeAP = (cuDoubleComplex*)getPointer(env, AP); nativeX = (cuDoubleComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasZtpmv(%c, %c, %c, %d, '%s', '%s', %d)\n", uplo, trans, diag, n, "AP", "x", incx); cublasZtpmv((char)uplo, (char)trans, (char)diag, n, nativeAP, nativeX, incx); } /** *
* void * cublasZtpsv (char uplo, char trans, char diag, int n, const cuDoubleComplex *AP, * cuDoubleComplex *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose)). b and * x are n element complex vectors, and A is an n x n unit or non-unit, * upper or lower triangular matrix. No test for singularity or near-singularity * is included in this routine. Such tests must be performed before calling this routine. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular matrix * as follows: If uplo == 'U' or 'u', A is an upper triangluar matrix. * If uplo == 'L' or 'l', A is a lower triangular matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T' * or 't', op(A) = transpose(A). If trans == 'C' or 'c', op(A) = * conjugate(transpose(A)). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * AP double precision complex array with at least ((n*(n+1))/2) elements. * If uplo == 'U' or 'u', the array AP contains the upper triangular * matrix A, packed sequentially, column by column; that is, if i <= j, then * A[i,j] is stored is AP[i+(j*(j+1)/2)]. If uplo == 'L' or 'L', the * array AP contains the lower triangular matrix A, packed sequentially, * column by column; that is, if i >= j, then A[i,j] is stored in * AP[i+((2*n-j+1)*j)/2]. When diag = 'U' or 'u', the diagonal elements * of A are not referenced and are assumed to be unity. * x double precision complex array of length at least (1+(n-1)*abs(incx)). * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/ztpsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 or n > 2035 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZtpsvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject AP, jobject x, jint incx) { if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasZtpsv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZtpsv"); return; } cuDoubleComplex* nativeAP; cuDoubleComplex* nativeX; nativeAP = (cuDoubleComplex*)getPointer(env, AP); nativeX = (cuDoubleComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasZtpsv(%c, %c, %c, %d, '%s', '%s', %d)\n", uplo, trans, diag, n, "AP", "x", incx); cublasZtpsv((char)uplo, (char)trans, (char)diag, n, nativeAP, nativeX, incx); } /** *
* cublasCgemv (char trans, int m, int n, cuComplex alpha, const cuComplex *A, * int lda, const cuComplex *x, int incx, cuComplex beta, cuComplex *y, * int incy) * * performs one of the matrix-vector operations * * y = alpha * op(A) * x + beta * y, * * where op(A) is one of * * op(A) = A or op(A) = transpose(A) or op(A) = conjugate(transpose(A)) * * where alpha and beta are single precision scalars, x and y are single * precision vectors, and A is an m x n matrix consisting of single precision * elements. Matrix A is stored in column major format, and lda is the leading * dimension of the two-dimensional array in which A is stored. * * Input * ----- * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If trans = * trans = 't' or 'T', op(A) = transpose(A). If trans = 'c' or 'C', * op(A) = conjugate(transpose(A)) * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * alpha single precision scalar multiplier applied to op(A). * A single precision array of dimensions (lda, n) if trans = 'n' or * 'N'), and of dimensions (lda, m) otherwise. lda must be at least * max(1, m) and at least max(1, n) otherwise. * lda leading dimension of two-dimensional array used to store matrix A * x single precision array of length at least (1 + (n - 1) * abs(incx)) * when trans = 'N' or 'n' and at least (1 + (m - 1) * abs(incx)) * otherwise. * incx specifies the storage spacing between elements of x. incx must not * be zero. * beta single precision scalar multiplier applied to vector y. If beta * is zero, y is not read. * y single precision array of length at least (1 + (m - 1) * abs(incy)) * when trans = 'N' or 'n' and at least (1 + (n - 1) * abs(incy)) * otherwise. * incy specifies the storage spacing between elements of y. incy must not * be zero. * * Output * ------ * y updated according to alpha * op(A) * x + beta * y * * Reference: http://www.netlib.org/blas/cgemv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCgemvNative (JNIEnv *env, jclass cls, jchar trans, jint m, jint n, jobject alpha, jobject A, jint lda, jobject x, jint incx, jobject beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCgemv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCgemv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasCgemv"); return; } cuComplex* nativeA; cuComplex* nativeX; cuComplex* nativeY; cuComplex complexAlpha; cuComplex complexBeta; nativeA = (cuComplex*)getPointer(env, A); nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); complexBeta.x = env->GetFloatField(beta, cuComplex_x); complexBeta.y = env->GetFloatField(beta, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCgemv(%c, %d, %d, [%f,%f], '%s', %d, '%s', %d, [%f,%f], '%s', %d)\n", trans, m, n, complexAlpha.x, complexAlpha.y, "A", lda, "x", incx, complexBeta.x, complexBeta.y, "y", incy); cublasCgemv((char)trans, m, n, complexAlpha, nativeA, lda, nativeX, incx, complexBeta, nativeY, incy); } /** *
* void * cublasCgbmv (char trans, int m, int n, int kl, int ku, cuComplex alpha, * const cuComplex *A, int lda, const cuComplex *x, int incx, cuComplex beta, * cuComplex *y, int incy); * * performs one of the matrix-vector operations * * y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A) * * alpha and beta are single precision complex scalars. x and y are single precision * complex vectors. A is an m by n band matrix consisting of single precision complex elements * with kl sub-diagonals and ku super-diagonals. * * Input * ----- * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * or 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * kl specifies the number of sub-diagonals of matrix A. It must be at * least zero. * ku specifies the number of super-diagonals of matrix A. It must be at * least zero. * alpha single precision complex scalar multiplier applied to op(A). * A single precision complex array of dimensions (lda, n). The leading * (kl + ku + 1) x n part of the array A must contain the band matrix A, * supplied column by column, with the leading diagonal of the matrix * in row (ku + 1) of the array, the first super-diagonal starting at * position 2 in row ku, the first sub-diagonal starting at position 1 * in row (ku + 2), and so on. Elements in the array A that do not * correspond to elements in the band matrix (such as the top left * ku x ku triangle) are not referenced. * lda leading dimension of A. lda must be at least (kl + ku + 1). * x single precision complex array of length at least (1+(n-1)*abs(incx)) when * trans == 'N' or 'n' and at least (1+(m-1)*abs(incx)) otherwise. * incx specifies the increment for the elements of x. incx must not be zero. * beta single precision complex scalar multiplier applied to vector y. If beta is * zero, y is not read. * y single precision complex array of length at least (1+(m-1)*abs(incy)) when * trans == 'N' or 'n' and at least (1+(n-1)*abs(incy)) otherwise. If * beta is zero, y is not read. * incy On entry, incy specifies the increment for the elements of y. incy * must not be zero. * * Output * ------ * y updated according to y = alpha*op(A)*x + beta*y * * Reference: http://www.netlib.org/blas/cgbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCgbmvNative (JNIEnv *env, jclass cls, jchar trans, jint m, jint n, jint kl, jint ku, jobject alpha, jobject A, jint lda, jobject x, jint incx, jobject beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCgbmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCgbmv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasCgbmv"); return; } cuComplex* nativeA; cuComplex* nativeX; cuComplex* nativeY; cuComplex complexAlpha; cuComplex complexBeta; nativeA = (cuComplex*)getPointer(env, A); nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); complexBeta.x = env->GetFloatField(beta, cuComplex_x); complexBeta.y = env->GetFloatField(beta, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCgbmv(%c, %d, %d, %d, %d, [%f,%f], '%s', %d, '%s', %d, [%f,%f], '%s', %d)\n", trans, m, n, kl, ku, complexAlpha.x, complexAlpha.y, "A", lda, "x", incx, complexBeta.x, complexBeta.y, "y", incy); cublasCgbmv((char)trans, m, n, kl, ku, complexAlpha, nativeA, lda, nativeX, incx, complexBeta, nativeY, incy); } /** *
* void * cublasChemv (char uplo, int n, cuComplex alpha, const cuComplex *A, int lda, * const cuComplex *x, int incx, cuComplex beta, cuComplex *y, int incy) * * performs the matrix-vector operation * * y = alpha*A*x + beta*y * * Alpha and beta are single precision complex scalars, and x and y are single * precision complex vectors, each with n elements. A is a hermitian n x n matrix * consisting of single precision complex elements that is stored in either upper or * lower storage mode. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the array A * is to be referenced. If uplo == 'U' or 'u', the hermitian matrix A * is stored in upper storage mode, i.e. only the upper triangular part * of A is to be referenced while the lower triangular part of A is to * be inferred. If uplo == 'L' or 'l', the hermitian matrix A is stored * in lower storage mode, i.e. only the lower triangular part of A is * to be referenced while the upper triangular part of A is to be * inferred. * n specifies the number of rows and the number of columns of the * hermitian matrix A. n must be at least zero. * alpha single precision complex scalar multiplier applied to A*x. * A single precision complex array of dimensions (lda, n). If uplo == 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular part of the hermitian matrix and the strictly * lower triangular part of A is not referenced. If uplo == 'L' or 'l', * the leading n x n lower triangular part of the array A must contain * the lower triangular part of the hermitian matrix and the strictly * upper triangular part of A is not referenced. The imaginary parts * of the diagonal elements need not be set, they are assumed to be zero. * lda leading dimension of A. It must be at least max (1, n). * x single precision complex array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta single precision complex scalar multiplier applied to vector y. * y single precision complex array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/chemv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasChemvNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jobject alpha, jobject A, jint lda, jobject x, jint incx, jobject beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasChemv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasChemv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasChemv"); return; } cuComplex* nativeA; cuComplex* nativeX; cuComplex* nativeY; cuComplex complexAlpha; cuComplex complexBeta; nativeA = (cuComplex*)getPointer(env, A); nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); complexBeta.x = env->GetFloatField(beta, cuComplex_x); complexBeta.y = env->GetFloatField(beta, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasChemv(%c, %d, [%f,%f], '%s', %d, '%s', %d, [%f,%f], '%s', %d)\n", uplo, n, complexAlpha.x, complexAlpha.y, "A", lda, "x", incx, complexBeta.x, complexBeta.y, "y", incy); cublasChemv((char)uplo, n, complexAlpha, nativeA, lda, nativeX, incx, complexBeta, nativeY, incy); } /** *
* void * cublasChbmv (char uplo, int n, int k, cuComplex alpha, const cuComplex *A, int lda, * const cuComplex *x, int incx, cuComplex beta, cuComplex *y, int incy) * * performs the matrix-vector operation * * y := alpha*A*x + beta*y * * alpha and beta are single precision complex scalars. x and y are single precision * complex vectors with n elements. A is an n by n hermitian band matrix consisting * of single precision complex elements, with k super-diagonals and the same number * of subdiagonals. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the hermitian * band matrix A is being supplied. If uplo == 'U' or 'u', the upper * triangular part is being supplied. If uplo == 'L' or 'l', the lower * triangular part is being supplied. * n specifies the number of rows and the number of columns of the * hermitian matrix A. n must be at least zero. * k specifies the number of super-diagonals of matrix A. Since the matrix * is hermitian, this is also the number of sub-diagonals. k must be at * least zero. * alpha single precision complex scalar multiplier applied to A*x. * A single precision complex array of dimensions (lda, n). When uplo == 'U' or * 'u', the leading (k + 1) x n part of array A must contain the upper * triangular band of the hermitian matrix, supplied column by column, * with the leading diagonal of the matrix in row (k+1) of the array, * the first super-diagonal starting at position 2 in row k, and so on. * The top left k x k triangle of the array A is not referenced. When * uplo == 'L' or 'l', the leading (k + 1) x n part of the array A must * contain the lower triangular band part of the hermitian matrix, * supplied column by column, with the leading diagonal of the matrix in * row 1 of the array, the first sub-diagonal starting at position 1 in * row 2, and so on. The bottom right k x k triangle of the array A is * not referenced. The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero. * lda leading dimension of A. lda must be at least (k + 1). * x single precision complex array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta single precision complex scalar multiplier applied to vector y. If beta is * zero, y is not read. * y single precision complex array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/chbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if k or n < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasChbmvNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jint k, jobject alpha, jobject A, jint lda, jobject x, jint incx, jobject beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasChbmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasChbmv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasChbmv"); return; } cuComplex* nativeA; cuComplex* nativeX; cuComplex* nativeY; cuComplex complexAlpha; cuComplex complexBeta; nativeA = (cuComplex*)getPointer(env, A); nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); complexBeta.x = env->GetFloatField(beta, cuComplex_x); complexBeta.y = env->GetFloatField(beta, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasChbmv(%c, %d, %d, [%f,%f], '%s', %d, '%s', %d, [%f,%f], '%s', %d)\n", uplo, n, k, complexAlpha.x, complexAlpha.y, "A", lda, "x", incx, complexBeta.x, complexBeta.y, "y", incy); cublasChbmv((char)uplo, n, k, complexAlpha, nativeA, lda, nativeX, incx, complexBeta, nativeY, incy); } /** *
* * cublasCtrmv (char uplo, char trans, char diag, int n, const cuComplex *A, * int lda, cuComplex *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, * where op(A) = A, or op(A) = transpose(A) or op(A) = conjugate(transpose(A)). * x is an n-element signle precision complex vector, and * A is an n x n, unit or non-unit, upper or lower, triangular matrix composed * of single precision complex elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', then A is an upper triangular matrix. * If uplo = 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If trans = 'n' or 'N', op(A) = A. If trans = 't' or * 'T', op(A) = transpose(A). If trans = 'c' or 'C', op(A) = * conjugate(transpose(A)). * diag specifies whether or not matrix A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * A single precision array of dimension (lda, n). If uplo = 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular matrix and the strictly lower triangular part * of A is not referenced. If uplo = 'L' or 'l', the leading n x n lower * triangular part of the array A must contain the lower triangular * matrix and the strictly upper triangular part of A is not referenced. * When diag = 'U' or 'u', the diagonal elements of A are not referenced * either, but are are assumed to be unity. * lda is the leading dimension of A. It must be at least max (1, n). * x single precision array of length at least (1 + (n - 1) * abs(incx) ). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/ctrmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCtrmvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCtrmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCtrmv"); return; } cuComplex* nativeA; cuComplex* nativeX; nativeA = (cuComplex*)getPointer(env, A); nativeX = (cuComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasCtrmv(%c, %c, %c, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, "A", lda, "x", incx); cublasCtrmv((char)uplo, (char)trans, (char)diag, n, nativeA, lda, nativeX, incx); } /** *
* void * cublasCtbmv (char uplo, char trans, char diag, int n, int k, const cuComplex *A, * int lda, cuComplex *x, int incx) * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * op(A) = transpose(A) or op(A) = conjugate(transpose(A)). x is an n-element * single precision complex vector, and A is an n x n, unit or non-unit, upper * or lower triangular band matrix composed of single precision complex elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular band * matrix. If uplo == 'U' or 'u', A is an upper triangular band matrix. * If uplo == 'L' or 'l', A is a lower triangular band matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * or 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A single precision complex array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first * super-diagonal starting at position 2 in row k, and so on. The top * left k x k triangle of the array A is not referenced. If uplo == 'L' * or 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal startingat position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * lda is the leading dimension of A. It must be at least (k + 1). * x single precision complex array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x * * Reference: http://www.netlib.org/blas/ctbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n or k < 0, or if incx == 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCtbmvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jint k, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCtbmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCtbmv"); return; } cuComplex* nativeA; cuComplex* nativeX; nativeA = (cuComplex*)getPointer(env, A); nativeX = (cuComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasCtbmv(%c, %c, %c, %d, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, k, "A", lda, "x", incx); cublasCtbmv((char)uplo, (char)trans, (char)diag, n, k, nativeA, lda, nativeX, incx); } /** *
* void * cublasCtpmv (char uplo, char trans, char diag, int n, const cuComplex *AP, * cuComplex *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * op(A) = transpose(A) or op(A) = conjugate(transpose(A)) . x is an n element * single precision complex vector, and A is an n x n, unit or non-unit, upper * or lower triangular matrix composed of single precision complex elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo == 'U' or 'u', then A is an upper triangular matrix. * If uplo == 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * or 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. In the current implementation n must not exceed 4070. * AP single precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored in AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * x single precision complex array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/ctpmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or n < 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCtpmvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject AP, jobject x, jint incx) { if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasCtpmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCtpmv"); return; } cuComplex* nativeAP; cuComplex* nativeX; nativeAP = (cuComplex*)getPointer(env, AP); nativeX = (cuComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasCtpmv(%c, %c, %c, %d, '%s', '%s', %d)\n", uplo, trans, diag, n, "AP", "x", incx); cublasCtpmv((char)uplo, (char)trans, (char)diag, n, nativeAP, nativeX, incx); } /** *
* void * cublasCtrsv (char uplo, char trans, char diag, int n, const cuComplex *A, * int lda, cuComplex *x, int incx) * * solves a system of equations op(A) * x = b, where op(A) is either A, * transpose(A) or conjugate(transpose(A)). b and x are single precision * complex vectors consisting of n elements, and A is an n x n matrix * composed of a unit or non-unit, upper or lower triangular matrix. * Matrix A is stored in column major format, and lda is the leading * dimension of the two-dimensional array containing A. * * No test for singularity or near-singularity is included in this function. * Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the * lower triangular part of array A. If uplo = 'U' or 'u', then only * the upper triangular part of A may be referenced. If uplo = 'L' or * 'l', then only the lower triangular part of A may be referenced. * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = 't', * 'T', 'c', or 'C', op(A) = transpose(A) * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * A is a single precision complex array of dimensions (lda, n). If uplo = 'U' * or 'u', then A must contains the upper triangular part of a symmetric * matrix, and the strictly lower triangular parts is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part of * a symmetric matrix, and the strictly upper triangular part is not * referenced. * lda is the leading dimension of the two-dimensional array containing A. * lda must be at least max(1, n). * x single precision complex array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the n element right-hand side vector b. On exit, * it is overwritten with the solution vector x. * incx specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/ctrsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCtrsvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCtrsv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCtrsv"); return; } cuComplex* nativeA; cuComplex* nativeX; nativeA = (cuComplex*)getPointer(env, A); nativeX = (cuComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasCtrsv(%c, %c, %c, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, "A", lda, "x", incx); cublasCtrsv((char)uplo, (char)trans, (char)diag, n, nativeA, lda, nativeX, incx); } /** *
* void cublasCtbsv (char uplo, char trans, char diag, int n, int k, * const cuComplex *A, int lda, cuComplex *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)). * b and x are n element vectors, and A is an n x n unit or non-unit, * upper or lower triangular band matrix with k + 1 diagonals. No test * for singularity or near-singularity is included in this function. * Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular band * matrix as follows: If uplo == 'U' or 'u', A is an upper triangular * band matrix. If uplo == 'L' or 'l', A is a lower triangular band * matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A single precision complex array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first super- * diagonal starting at position 2 in row k, and so on. The top left * k x k triangle of the array A is not referenced. If uplo == 'L' or * 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal starting at position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * x single precision complex array of length at least (1+(n-1)*abs(incx)). * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/ctbsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0 or n > 2035 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCtbsvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jint k, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCtbsv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCtbsv"); return; } cuComplex* nativeA; cuComplex* nativeX; nativeA = (cuComplex*)getPointer(env, A); nativeX = (cuComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasCtbsv(%c, %c, %c, %d, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, k, "A", lda, "x", incx); cublasCtbsv((char)uplo, (char)trans, (char)diag, n, k, nativeA, lda, nativeX, incx); } /** *
* void * cublasCtpsv (char uplo, char trans, char diag, int n, const cuComplex *AP, * cuComplex *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose)). b and * x are n element complex vectors, and A is an n x n unit or non-unit, * upper or lower triangular matrix. No test for singularity or near-singularity * is included in this routine. Such tests must be performed before calling this routine. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular matrix * as follows: If uplo == 'U' or 'u', A is an upper triangluar matrix. * If uplo == 'L' or 'l', A is a lower triangular matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T' * or 't', op(A) = transpose(A). If trans == 'C' or 'c', op(A) = * conjugate(transpose(A)). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * AP single precision complex array with at least ((n*(n+1))/2) elements. * If uplo == 'U' or 'u', the array AP contains the upper triangular * matrix A, packed sequentially, column by column; that is, if i <= j, then * A[i,j] is stored is AP[i+(j*(j+1)/2)]. If uplo == 'L' or 'L', the * array AP contains the lower triangular matrix A, packed sequentially, * column by column; that is, if i >= j, then A[i,j] is stored in * AP[i+((2*n-j+1)*j)/2]. When diag = 'U' or 'u', the diagonal elements * of A are not referenced and are assumed to be unity. * x single precision complex array of length at least (1+(n-1)*abs(incx)). * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/ctpsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 or n > 2035 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCtpsvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject AP, jobject x, jint incx) { if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasCtpsv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCtpsv"); return; } cuComplex* nativeAP; cuComplex* nativeX; nativeAP = (cuComplex*)getPointer(env, AP); nativeX = (cuComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasCtpsv(%c, %c, %c, %d, '%s', '%s', %d)\n", uplo, trans, diag, n, "AP", "x", incx); cublasCtpsv((char)uplo, (char)trans, (char)diag, n, nativeAP, nativeX, incx); } /** *
* cublasCgeru (int m, int n, cuComplex alpha, const cuComplex *x, int incx, * const cuComplex *y, int incy, cuComplex *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(y) + A, * * where alpha is a single precision complex scalar, x is an m element single * precision complex vector, y is an n element single precision complex vector, and A * is an m by n matrix consisting of single precision complex elements. Matrix A * is stored in column major format, and lda is the leading dimension of * the two-dimensional array used to store A. * * Input * ----- * m specifies the number of rows of the matrix A. It must be at least * zero. * n specifies the number of columns of the matrix A. It must be at * least zero. * alpha single precision complex scalar multiplier applied to x * transpose(y) * x single precision complex array of length at least (1 + (m - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must not * be zero. * y single precision complex array of length at least (1 + (n - 1) * abs(incy)) * incy specifies the storage spacing between elements of y. incy must not * be zero. * A single precision complex array of dimensions (lda, n). * lda leading dimension of two-dimensional array used to store matrix A * * Output * ------ * A updated according to A = alpha * x * transpose(y) + A * * Reference: http://www.netlib.org/blas/cgeru.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m <0, n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCgeruNative (JNIEnv *env, jclass cls, jint m, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCgeru"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasCgeru"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCgeru"); return; } cuComplex* nativeX; cuComplex* nativeY; cuComplex* nativeA; cuComplex complexAlpha; nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); nativeA = (cuComplex*)getPointer(env, A); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCgeru(%d, %d, [%f,%f], '%s', %d, '%s', %d, '%s', %d)\n", m, n, complexAlpha.x, complexAlpha.y, "x", incx, "y", incy, "A", lda); cublasCgeru(m, n, complexAlpha, nativeX, incx, nativeY, incy, nativeA, lda); } /** *
* cublasCgerc (int m, int n, cuComplex alpha, const cuComplex *x, int incx, * const cuComplex *y, int incy, cuComplex *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * conjugate(transpose(y)) + A, * * where alpha is a single precision complex scalar, x is an m element single * precision complex vector, y is an n element single precision complex vector, and A * is an m by n matrix consisting of single precision complex elements. Matrix A * is stored in column major format, and lda is the leading dimension of * the two-dimensional array used to store A. * * Input * ----- * m specifies the number of rows of the matrix A. It must be at least * zero. * n specifies the number of columns of the matrix A. It must be at * least zero. * alpha single precision complex scalar multiplier applied to x * transpose(y) * x single precision complex array of length at least (1 + (m - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must not * be zero. * y single precision complex array of length at least (1 + (n - 1) * abs(incy)) * incy specifies the storage spacing between elements of y. incy must not * be zero. * A single precision complex array of dimensions (lda, n). * lda leading dimension of two-dimensional array used to store matrix A * * Output * ------ * A updated according to A = alpha * x * conjugate(transpose(y)) + A * * Reference: http://www.netlib.org/blas/cgerc.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m <0, n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCgercNative (JNIEnv *env, jclass cls, jint m, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCgerc"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasCgerc"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCgerc"); return; } cuComplex* nativeX; cuComplex* nativeY; cuComplex* nativeA; cuComplex complexAlpha; nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); nativeA = (cuComplex*)getPointer(env, A); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCgerc(%d, %d, [%f,%f], '%s', %d, '%s', %d, '%s', %d)\n", m, n, complexAlpha.x, complexAlpha.y, "x", incx, "y", incy, "A", lda); cublasCgerc(m, n, complexAlpha, nativeX, incx, nativeY, incy, nativeA, lda); } /** *
* void * cublasCher (char uplo, int n, float alpha, const cuComplex *x, int incx, * cuComplex *A, int lda) * * performs the hermitian rank 1 operation * * A = alpha * x * conjugate(transpose(x)) + A, * * where alpha is a single precision real scalar, x is an n element single * precision complex vector and A is an n x n hermitian matrix consisting of * single precision complex elements. Matrix A is stored in column major format, * and lda is the leading dimension of the two-dimensional array * containing A. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or * the lower triangular part of array A. If uplo = 'U' or 'u', * then only the upper triangular part of A may be referenced. * If uplo = 'L' or 'l', then only the lower triangular part of * A may be referenced. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * alpha single precision real scalar multiplier applied to * x * conjugate(transpose(x)) * x single precision complex array of length at least (1 + (n - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must * not be zero. * A single precision complex array of dimensions (lda, n). If uplo = 'U' or * 'u', then A must contain the upper triangular part of a hermitian * matrix, and the strictly lower triangular part is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part * of a hermitian matrix, and the strictly upper triangular part is * not referenced. The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero, and on exit they * are set to zero. * lda leading dimension of the two-dimensional array containing A. lda * must be at least max(1, n). * * Output * ------ * A updated according to A = alpha * x * conjugate(transpose(x)) + A * * Reference: http://www.netlib.org/blas/cher.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCherNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jfloat alpha, jobject x, jint incx, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCher"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCher"); return; } cuComplex* nativeX; cuComplex* nativeA; nativeX = (cuComplex*)getPointer(env, x); nativeA = (cuComplex*)getPointer(env, A); Logger::log(LOG_TRACE, "Executing cublasCher(%c, %d, %f, '%s', %d, '%s', %d)\n", uplo, n, alpha, "x", incx, "A", lda); cublasCher((char)uplo, n, alpha, nativeX, incx, nativeA, lda); } /** *
* void * cublasChpr (char uplo, int n, float alpha, const cuComplex *x, int incx, * cuComplex *AP) * * performs the hermitian rank 1 operation * * A = alpha * x * conjugate(transpose(x)) + A, * * where alpha is a single precision real scalar and x is an n element single * precision complex vector. A is a hermitian n x n matrix consisting of single * precision complex elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision real scalar multiplier applied to x * conjugate(transpose(x)). * x single precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * AP single precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * The imaginary parts of the diagonal elements need not be set, they * are assumed to be zero, and on exit they are set to zero. * * Output * ------ * A updated according to A = alpha * x * conjugate(transpose(x)) + A * * Reference: http://www.netlib.org/blas/chpr.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasChprNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jfloat alpha, jobject x, jint incx, jobject AP) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasChpr"); return; } if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasChpr"); return; } cuComplex* nativeX; cuComplex* nativeAP; nativeX = (cuComplex*)getPointer(env, x); nativeAP = (cuComplex*)getPointer(env, AP); Logger::log(LOG_TRACE, "Executing cublasChpr(%c, %d, %f, '%s', %d, '%s')\n", uplo, n, alpha, "x", incx, "AP"); cublasChpr((char)uplo, n, alpha, nativeX, incx, nativeAP); } /** *
* void * cublasChpr2 (char uplo, int n, cuComplex alpha, const cuComplex *x, int incx, * const cuComplex *y, int incy, cuComplex *AP) * * performs the hermitian rank 2 operation * * A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(transpose(x)) + A, * * where alpha is a single precision complex scalar, and x and y are n element single * precision complex vectors. A is a hermitian n x n matrix consisting of single * precision complex elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision complex scalar multiplier applied to x * conjugate(transpose(y)) + * y * conjugate(transpose(x)). * x single precision complex array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y single precision complex array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * AP single precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * The imaginary parts of the diagonal elements need not be set, they * are assumed to be zero, and on exit they are set to zero. * * Output * ------ * A updated according to A = alpha*x*conjugate(transpose(y)) * + conjugate(alpha)*y*conjugate(transpose(x))+A * * Reference: http://www.netlib.org/blas/chpr2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasChpr2Native (JNIEnv *env, jclass cls, jchar uplo, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy, jobject AP) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasChpr2"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasChpr2"); return; } if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasChpr2"); return; } cuComplex* nativeX; cuComplex* nativeY; cuComplex* nativeAP; cuComplex complexAlpha; nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); nativeAP = (cuComplex*)getPointer(env, AP); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasChpr2(%c, %d, [%f,%f], '%s', %d, '%s', %d, '%s')\n", uplo, n, complexAlpha.x, complexAlpha.y, "x", incx, "y", incy, "AP"); cublasChpr2((char)uplo, n, complexAlpha, nativeX, incx, nativeY, incy, nativeAP); } /** *
* void cublasCher2 (char uplo, int n, cuComplex alpha, const cuComplex *x, int incx, * const cuComplex *y, int incy, cuComplex *A, int lda) * * performs the hermitian rank 2 operation * * A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(transpose(x)) + A, * * where alpha is a single precision complex scalar, x and y are n element single * precision complex vector and A is an n by n hermitian matrix consisting of single * precision complex elements. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision complex scalar multiplier applied to x * conjugate(transpose(y)) + * y * conjugate(transpose(x)). * x single precision array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y single precision array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * A single precision complex array of dimensions (lda, n). If uplo == 'U' or 'u', * then A must contains the upper triangular part of a hermitian matrix, * and the strictly lower triangular parts is not referenced. If uplo == * 'L' or 'l', then A contains the lower triangular part of a hermitian * matrix, and the strictly upper triangular part is not referenced. * The imaginary parts of the diagonal elements need not be set, * they are assumed to be zero, and on exit they are set to zero. * * lda leading dimension of A. It must be at least max(1, n). * * Output * ------ * A updated according to A = alpha*x*conjugate(transpose(y)) * + conjugate(alpha)*y*conjugate(transpose(x))+A * * Reference: http://www.netlib.org/blas/cher2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCher2Native (JNIEnv *env, jclass cls, jchar uplo, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasCher2"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasCher2"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCher2"); return; } cuComplex* nativeX; cuComplex* nativeY; cuComplex* nativeA; cuComplex complexAlpha; nativeX = (cuComplex*)getPointer(env, x); nativeY = (cuComplex*)getPointer(env, y); nativeA = (cuComplex*)getPointer(env, A); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCher2(%c, %d, [%f,%f], '%s', %d, '%s', %d, '%s', %d)\n", uplo, n, complexAlpha.x, complexAlpha.y, "x", incx, "y", incy, "A", lda); cublasCher2((char)uplo, n, complexAlpha, nativeX, incx, nativeY, incy, nativeA, lda); } /** *
* void * cublasSgemm (char transa, char transb, int m, int n, int k, float alpha, * const float *A, int lda, const float *B, int ldb, float beta, * float *C, int ldc) * * computes the product of matrix A and matrix B, multiplies the result * by a scalar alpha, and adds the sum to the product of matrix C and * scalar beta. sgemm() performs one of the matrix-matrix operations: * * C = alpha * op(A) * op(B) + beta * C, * * where op(X) is one of * * op(X) = X or op(X) = transpose(X) * * alpha and beta are single precision scalars, and A, B and C are * matrices consisting of single precision elements, with op(A) an m x k * matrix, op(B) a k x n matrix, and C an m x n matrix. Matrices A, B, * and C are stored in column major format, and lda, ldb, and ldc are * the leading dimensions of the two-dimensional arrays containing A, * B, and C. * * Input * ----- * transa specifies op(A). If transa = 'n' or 'N', op(A) = A. If * transa = 't', 'T', 'c', or 'C', op(A) = transpose(A) * transb specifies op(B). If transb = 'n' or 'N', op(B) = B. If * transb = 't', 'T', 'c', or 'C', op(B) = transpose(B) * m number of rows of matrix op(A) and rows of matrix C * n number of columns of matrix op(B) and number of columns of C * k number of columns of matrix op(A) and number of rows of op(B) * alpha single precision scalar multiplier applied to op(A)op(B) * A single precision array of dimensions (lda, k) if transa = * 'n' or 'N'), and of dimensions (lda, m) otherwise. When transa = * 'N' or 'n' then lda must be at least max( 1, m ), otherwise lda * must be at least max(1, k). * lda leading dimension of two-dimensional array used to store matrix A * B single precision array of dimensions (ldb, n) if transb = * 'n' or 'N'), and of dimensions (ldb, k) otherwise. When transb = * 'N' or 'n' then ldb must be at least max (1, k), otherwise ldb * must be at least max (1, n). * ldb leading dimension of two-dimensional array used to store matrix B * beta single precision scalar multiplier applied to C. If 0, C does * not have to be a valid input * C single precision array of dimensions (ldc, n). ldc must be at * least max (1, m). * ldc leading dimension of two-dimensional array used to store matrix C * * Output * ------ * C updated based on C = alpha * op(A)*op(B) + beta * C * * Reference: http://www.netlib.org/blas/sgemm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if any of m, n, or k are < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSgemmNative (JNIEnv *env, jclass cls, jchar transa, jchar transb, jint m, jint n, jint k, jfloat alpha, jobject A, jint lda, jobject B, jint ldb, jfloat beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasSgemm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasSgemm"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasSgemm"); return; } float* nativeA; float* nativeB; float* nativeC; nativeA = (float*)getPointer(env, A); nativeB = (float*)getPointer(env, B); nativeC = (float*)getPointer(env, C); Logger::log(LOG_TRACE, "Executing cublasSgemm(%c, %c, %d, %d, %d, %f, '%s', %d, '%s', %d, %f, '%s', %d)\n", transa, transb, m, n, k, alpha, "A", lda, "B", ldb, beta, "C", ldc); cublasSgemm((char)transa, (char)transb, m, n, k, alpha, nativeA, lda, nativeB, ldb, beta, nativeC, ldc); } /** *
* void * cublasSsymm (char side, char uplo, int m, int n, float alpha, * const float *A, int lda, const float *B, int ldb, * float beta, float *C, int ldc); * * performs one of the matrix-matrix operations * * C = alpha * A * B + beta * C, or * C = alpha * B * A + beta * C, * * where alpha and beta are single precision scalars, A is a symmetric matrix * consisting of single precision elements and stored in either lower or upper * storage mode, and B and C are m x n matrices consisting of single precision * elements. * * Input * ----- * side specifies whether the symmetric matrix A appears on the left side * hand side or right hand side of matrix B, as follows. If side == 'L' * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', * then C = alpha * B * A + beta * C. * uplo specifies whether the symmetric matrix A is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * m specifies the number of rows of the matrix C, and the number of rows * of matrix B. It also specifies the dimensions of symmetric matrix A * when side == 'L' or 'l'. m must be at least zero. * n specifies the number of columns of the matrix C, and the number of * columns of matrix B. It also specifies the dimensions of symmetric * matrix A when side == 'R' or 'r'. n must be at least zero. * alpha single precision scalar multiplier applied to A * B, or B * A * A single precision array of dimensions (lda, ka), where ka is m when * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the * leading m x m part of array A must contain the symmetric matrix, * such that when uplo == 'U' or 'u', the leading m x m part stores the * upper triangular part of the symmetric matrix, and the strictly lower * triangular part of A is not referenced, and when uplo == 'U' or 'u', * the leading m x m part stores the lower triangular part of the * symmetric matrix and the strictly upper triangular part is not * referenced. If side == 'R' or 'r' the leading n x n part of array A * must contain the symmetric matrix, such that when uplo == 'U' or 'u', * the leading n x n part stores the upper triangular part of the * symmetric matrix and the strictly lower triangular part of A is not * referenced, and when uplo == 'U' or 'u', the leading n x n part * stores the lower triangular part of the symmetric matrix and the * strictly upper triangular part is not referenced. * lda leading dimension of A. When side == 'L' or 'l', it must be at least * max(1, m) and at least max(1, n) otherwise. * B single precision array of dimensions (ldb, n). On entry, the leading * m x n part of the array contains the matrix B. * ldb leading dimension of B. It must be at least max (1, m). * beta single precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C single precision array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m) * * Output * ------ * C updated according to C = alpha * A * B + beta * C, or C = alpha * * B * A + beta * C * * Reference: http://www.netlib.org/blas/ssymm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSsymmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jint m, jint n, jfloat alpha, jobject A, jint lda, jobject B, jint ldb, jfloat beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasSsymm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasSsymm"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasSsymm"); return; } float* nativeA; float* nativeB; float* nativeC; nativeA = (float*)getPointer(env, A); nativeB = (float*)getPointer(env, B); nativeC = (float*)getPointer(env, C); Logger::log(LOG_TRACE, "Executing cublasSsymm(%c, %c, %d, %d, %f, '%s', %d, '%s', %d, %f, '%s', %d)\n", side, uplo, m, n, alpha, "A", lda, "B", ldb, beta, "C", ldc); cublasSsymm((char)side, (char)uplo, m, n, alpha, nativeA, lda, nativeB, ldb, beta, nativeC, ldc); } /** *
* void * cublasSsyrk (char uplo, char trans, int n, int k, float alpha, * const float *A, int lda, float beta, float *C, int ldc) * * performs one of the symmetric rank k operations * * C = alpha * A * transpose(A) + beta * C, or * C = alpha * transpose(A) * A + beta * C. * * Alpha and beta are single precision scalars. C is an n x n symmetric matrix * consisting of single precision elements and stored in either lower or * upper storage mode. A is a matrix consisting of single precision elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', C = * alpha * transpose(A) + beta * C. If trans == 'T', 't', 'C', or 'c', * C = transpose(A) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha single precision scalar multiplier applied to A * transpose(A) or * transpose(A) * A. * A single precision array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contains the * matrix A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1, k). * beta single precision scalar multiplier applied to C. If beta izs zero, C * does not have to be a valid input * C single precision array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. It must be at least max(1, n). * * Output * ------ * C updated according to C = alpha * A * transpose(A) + beta * C, or C = * alpha * transpose(A) * A + beta * C * * Reference: http://www.netlib.org/blas/ssyrk.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSsyrkNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jint n, jint k, jfloat alpha, jobject A, jint lda, jfloat beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasSsyrk"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasSsyrk"); return; } float* nativeA; float* nativeC; nativeA = (float*)getPointer(env, A); nativeC = (float*)getPointer(env, C); Logger::log(LOG_TRACE, "Executing cublasSsyrk(%c, %c, %d, %d, %f, '%s', %d, %f, '%s', %d)\n", uplo, trans, n, k, alpha, "A", lda, beta, "C", ldc); cublasSsyrk((char)uplo, (char)trans, n, k, alpha, nativeA, lda, beta, nativeC, ldc); } /** *
* void * cublasSsyr2k (char uplo, char trans, int n, int k, float alpha, * const float *A, int lda, const float *B, int ldb, * float beta, float *C, int ldc) * * performs one of the symmetric rank 2k operations * * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, or * C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C. * * Alpha and beta are single precision scalars. C is an n x n symmetric matrix * consisting of single precision elements and stored in either lower or upper * storage mode. A and B are matrices consisting of single precision elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be references, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, * If trans == 'T', 't', 'C', or 'c', C = alpha * transpose(A) * B + * alpha * transpose(B) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha single precision scalar multiplier. * A single precision array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contain the matrix * A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1,k). * B single precision array of dimensions (lda, kb), where kb is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array B must contain the matrix B, * otherwise the leading k x n part of the array must contain the matrix * B. * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be at * least max(1, n). Otherwise ldb must be at least max(1, k). * beta single precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C single precision array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. Must be at least max(1, n). * * Output * ------ * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A) + * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C * * Reference: http://www.netlib.org/blas/ssyr2k.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasSsyr2kNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jint n, jint k, jfloat alpha, jobject A, jint lda, jobject B, jint ldb, jfloat beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasSsyr2k"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasSsyr2k"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasSsyr2k"); return; } float* nativeA; float* nativeB; float* nativeC; nativeA = (float*)getPointer(env, A); nativeB = (float*)getPointer(env, B); nativeC = (float*)getPointer(env, C); Logger::log(LOG_TRACE, "Executing cublasSsyr2k(%c, %c, %d, %d, %f, '%s', %d, '%s', %d, %f, '%s', %d)\n", uplo, trans, n, k, alpha, "A", lda, "B", ldb, beta, "C", ldc); cublasSsyr2k((char)uplo, (char)trans, n, k, alpha, nativeA, lda, nativeB, ldb, beta, nativeC, ldc); } /** *
* void * cublasStrmm (char side, char uplo, char transa, char diag, int m, int n, * float alpha, const float *A, int lda, const float *B, int ldb) * * performs one of the matrix-matrix operations * * B = alpha * op(A) * B, or B = alpha * B * op(A) * * where alpha is a single-precision scalar, B is an m x n matrix composed * of single precision elements, and A is a unit or non-unit, upper or lower, * triangular matrix composed of single precision elements. op(A) is one of * * op(A) = A or op(A) = transpose(A) * * Matrices A and B are stored in column major format, and lda and ldb are * the leading dimensions of the two-dimensonials arrays that contain A and * B, respectively. * * Input * ----- * side specifies whether op(A) multiplies B from the left or right. * If side = 'L' or 'l', then B = alpha * op(A) * B. If side = * 'R' or 'r', then B = alpha * B * op(A). * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', A is an upper triangular matrix. * If uplo = 'L' or 'l', A is a lower triangular matrix. * transa specifies the form of op(A) to be used in the matrix * multiplication. If transa = 'N' or 'n', then op(A) = A. If * transa = 'T', 't', 'C', or 'c', then op(A) = transpose(A). * diag specifies whether or not A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or * 'n', A is not assumed to be unit triangular. * m the number of rows of matrix B. m must be at least zero. * n the number of columns of matrix B. n must be at least zero. * alpha single precision scalar multiplier applied to op(A)*B, or * B*op(A), respectively. If alpha is zero no accesses are made * to matrix A, and no read accesses are made to matrix B. * A single precision array of dimensions (lda, k). k = m if side = * 'L' or 'l', k = n if side = 'R' or 'r'. If uplo = 'U' or 'u' * the leading k x k upper triangular part of the array A must * contain the upper triangular matrix, and the strictly lower * triangular part of A is not referenced. If uplo = 'L' or 'l' * the leading k x k lower triangular part of the array A must * contain the lower triangular matrix, and the strictly upper * triangular part of A is not referenced. When diag = 'U' or 'u' * the diagonal elements of A are no referenced and are assumed * to be unity. * lda leading dimension of A. When side = 'L' or 'l', it must be at * least max(1,m) and at least max(1,n) otherwise * B single precision array of dimensions (ldb, n). On entry, the * leading m x n part of the array contains the matrix B. It is * overwritten with the transformed matrix on exit. * ldb leading dimension of B. It must be at least max (1, m). * * Output * ------ * B updated according to B = alpha * op(A) * B or B = alpha * B * op(A) * * Reference: http://www.netlib.org/blas/strmm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasStrmmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jchar transa, jchar diag, jint m, jint n, jfloat alpha, jobject A, jint lda, jobject B, jint ldb) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasStrmm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasStrmm"); return; } float* nativeA; float* nativeB; nativeA = (float*)getPointer(env, A); nativeB = (float*)getPointer(env, B); Logger::log(LOG_TRACE, "Executing cublasStrmm(%c, %c, %c, %c, %d, %d, %f, '%s', %d, '%s', %d)\n", side, uplo, transa, diag, m, n, alpha, "A", lda, "B", ldb); cublasStrmm((char)side, (char)uplo, (char)transa, (char)diag, m, n, alpha, nativeA, lda, nativeB, ldb); } /** *
* void * cublasStrsm (char side, char uplo, char transa, char diag, int m, int n, * float alpha, const float *A, int lda, float *B, int ldb) * * solves one of the matrix equations * * op(A) * X = alpha * B, or X * op(A) = alpha * B, * * where alpha is a single precision scalar, and X and B are m x n matrices * that are composed of single precision elements. A is a unit or non-unit, * upper or lower triangular matrix, and op(A) is one of * * op(A) = A or op(A) = transpose(A) * * The result matrix X overwrites input matrix B; that is, on exit the result * is stored in B. Matrices A and B are stored in column major format, and * lda and ldb are the leading dimensions of the two-dimensonials arrays that * contain A and B, respectively. * * Input * ----- * side specifies whether op(A) appears on the left or right of X as * follows: side = 'L' or 'l' indicates solve op(A) * X = alpha * B. * side = 'R' or 'r' indicates solve X * op(A) = alpha * B. * uplo specifies whether the matrix A is an upper or lower triangular * matrix as follows: uplo = 'U' or 'u' indicates A is an upper * triangular matrix. uplo = 'L' or 'l' indicates A is a lower * triangular matrix. * transa specifies the form of op(A) to be used in matrix multiplication * as follows: If transa = 'N' or 'N', then op(A) = A. If transa = * 'T', 't', 'C', or 'c', then op(A) = transpose(A). * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * m specifies the number of rows of B. m must be at least zero. * n specifies the number of columns of B. n must be at least zero. * alpha is a single precision scalar to be multiplied with B. When alpha is * zero, then A is not referenced and B need not be set before entry. * A is a single precision array of dimensions (lda, k), where k is * m when side = 'L' or 'l', and is n when side = 'R' or 'r'. If * uplo = 'U' or 'u', the leading k x k upper triangular part of * the array A must contain the upper triangular matrix and the * strictly lower triangular matrix of A is not referenced. When * uplo = 'L' or 'l', the leading k x k lower triangular part of * the array A must contain the lower triangular matrix and the * strictly upper triangular part of A is not referenced. Note that * when diag = 'U' or 'u', the diagonal elements of A are not * referenced, and are assumed to be unity. * lda is the leading dimension of the two dimensional array containing A. * When side = 'L' or 'l' then lda must be at least max(1, m), when * side = 'R' or 'r' then lda must be at least max(1, n). * B is a single precision array of dimensions (ldb, n). ldb must be * at least max (1,m). The leading m x n part of the array B must * contain the right-hand side matrix B. On exit B is overwritten * by the solution matrix X. * ldb is the leading dimension of the two dimensional array containing B. * ldb must be at least max(1, m). * * Output * ------ * B contains the solution matrix X satisfying op(A) * X = alpha * B, * or X * op(A) = alpha * B * * Reference: http://www.netlib.org/blas/strsm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasStrsmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jchar transa, jchar diag, jint m, jint n, jfloat alpha, jobject A, jint lda, jobject B, jint ldb) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasStrsm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasStrsm"); return; } float* nativeA; float* nativeB; nativeA = (float*)getPointer(env, A); nativeB = (float*)getPointer(env, B); Logger::log(LOG_TRACE, "Executing cublasStrsm(%c, %c, %c, %c, %d, %d, %f, '%s', %d, '%s', %d)\n", side, uplo, transa, diag, m, n, alpha, "A", lda, "B", ldb); cublasStrsm((char)side, (char)uplo, (char)transa, (char)diag, m, n, alpha, nativeA, lda, nativeB, ldb); } /** *
* void cublasCgemm (char transa, char transb, int m, int n, int k, * cuComplex alpha, const cuComplex *A, int lda, * const cuComplex *B, int ldb, cuComplex beta, * cuComplex *C, int ldc) * * performs one of the matrix-matrix operations * * C = alpha * op(A) * op(B) + beta*C, * * where op(X) is one of * * op(X) = X or op(X) = transpose or op(X) = conjg(transpose(X)) * * alpha and beta are single-complex scalars, and A, B and C are matrices * consisting of single-complex elements, with op(A) an m x k matrix, op(B) * a k x n matrix and C an m x n matrix. * * Input * ----- * transa specifies op(A). If transa == 'N' or 'n', op(A) = A. If transa == * 'T' or 't', op(A) = transpose(A). If transa == 'C' or 'c', op(A) = * conjg(transpose(A)). * transb specifies op(B). If transa == 'N' or 'n', op(B) = B. If transb == * 'T' or 't', op(B) = transpose(B). If transb == 'C' or 'c', op(B) = * conjg(transpose(B)). * m number of rows of matrix op(A) and rows of matrix C. It must be at * least zero. * n number of columns of matrix op(B) and number of columns of C. It * must be at least zero. * k number of columns of matrix op(A) and number of rows of op(B). It * must be at least zero. * alpha single-complex scalar multiplier applied to op(A)op(B) * A single-complex array of dimensions (lda, k) if transa == 'N' or * 'n'), and of dimensions (lda, m) otherwise. * lda leading dimension of A. When transa == 'N' or 'n', it must be at * least max(1, m) and at least max(1, k) otherwise. * B single-complex array of dimensions (ldb, n) if transb == 'N' or 'n', * and of dimensions (ldb, k) otherwise * ldb leading dimension of B. When transb == 'N' or 'n', it must be at * least max(1, k) and at least max(1, n) otherwise. * beta single-complex scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C single precision array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m). * * Output * ------ * C updated according to C = alpha*op(A)*op(B) + beta*C * * Reference: http://www.netlib.org/blas/cgemm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if any of m, n, or k are < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCgemmNative (JNIEnv *env, jclass cls, jchar transa, jchar transb, jint m, jint n, jint k, jobject alpha, jobject A, jint lda, jobject B, jint ldb, jobject beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCgemm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasCgemm"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasCgemm"); return; } cuComplex* nativeA; cuComplex* nativeB; cuComplex* nativeC; cuComplex complexAlpha; cuComplex complexBeta; nativeA = (cuComplex*)getPointer(env, A); nativeB = (cuComplex*)getPointer(env, B); nativeC = (cuComplex*)getPointer(env, C); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); complexBeta.x = env->GetFloatField(beta, cuComplex_x); complexBeta.y = env->GetFloatField(beta, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCgemm(%c, %c, %d, %d, %d, [%f,%f], '%s', %d, '%s', %d, [%f,%f], '%s', %d)\n", transa, transb, m, n, k, complexAlpha.x, complexAlpha.y, "A", lda, "B", ldb, complexBeta.x, complexBeta.y, "C", ldc); cublasCgemm((char)transa, (char)transb, m, n, k, complexAlpha, nativeA, lda, nativeB, ldb, complexBeta, nativeC, ldc); } /** *
* void * cublasCsymm (char side, char uplo, int m, int n, cuComplex alpha, * const cuComplex *A, int lda, const cuComplex *B, int ldb, * cuComplex beta, cuComplex *C, int ldc); * * performs one of the matrix-matrix operations * * C = alpha * A * B + beta * C, or * C = alpha * B * A + beta * C, * * where alpha and beta are single precision complex scalars, A is a symmetric matrix * consisting of single precision complex elements and stored in either lower or upper * storage mode, and B and C are m x n matrices consisting of single precision * complex elements. * * Input * ----- * side specifies whether the symmetric matrix A appears on the left side * hand side or right hand side of matrix B, as follows. If side == 'L' * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', * then C = alpha * B * A + beta * C. * uplo specifies whether the symmetric matrix A is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * m specifies the number of rows of the matrix C, and the number of rows * of matrix B. It also specifies the dimensions of symmetric matrix A * when side == 'L' or 'l'. m must be at least zero. * n specifies the number of columns of the matrix C, and the number of * columns of matrix B. It also specifies the dimensions of symmetric * matrix A when side == 'R' or 'r'. n must be at least zero. * alpha single precision scalar multiplier applied to A * B, or B * A * A single precision array of dimensions (lda, ka), where ka is m when * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the * leading m x m part of array A must contain the symmetric matrix, * such that when uplo == 'U' or 'u', the leading m x m part stores the * upper triangular part of the symmetric matrix, and the strictly lower * triangular part of A is not referenced, and when uplo == 'U' or 'u', * the leading m x m part stores the lower triangular part of the * symmetric matrix and the strictly upper triangular part is not * referenced. If side == 'R' or 'r' the leading n x n part of array A * must contain the symmetric matrix, such that when uplo == 'U' or 'u', * the leading n x n part stores the upper triangular part of the * symmetric matrix and the strictly lower triangular part of A is not * referenced, and when uplo == 'U' or 'u', the leading n x n part * stores the lower triangular part of the symmetric matrix and the * strictly upper triangular part is not referenced. * lda leading dimension of A. When side == 'L' or 'l', it must be at least * max(1, m) and at least max(1, n) otherwise. * B single precision array of dimensions (ldb, n). On entry, the leading * m x n part of the array contains the matrix B. * ldb leading dimension of B. It must be at least max (1, m). * beta single precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C single precision array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m) * * Output * ------ * C updated according to C = alpha * A * B + beta * C, or C = alpha * * B * A + beta * C * * Reference: http://www.netlib.org/blas/csymm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCsymmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jint m, jint n, jobject alpha, jobject A, jint lda, jobject B, jint ldb, jobject beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCsymm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasCsymm"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasCsymm"); return; } cuComplex* nativeA; cuComplex* nativeB; cuComplex* nativeC; cuComplex complexAlpha; cuComplex complexBeta; nativeA = (cuComplex*)getPointer(env, A); nativeB = (cuComplex*)getPointer(env, B); nativeC = (cuComplex*)getPointer(env, C); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); complexBeta.x = env->GetFloatField(beta, cuComplex_x); complexBeta.y = env->GetFloatField(beta, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCsymm(%c, %c, %d, %d, [%f,%f], '%s', %d, '%s', %d, [%f,%f], '%s', %d)\n", side, uplo, m, n, complexAlpha.x, complexAlpha.y, "A", lda, "B", ldb, complexBeta.x, complexBeta.y, "C", ldc); cublasCsymm((char)side, (char)uplo, m, n, complexAlpha, nativeA, lda, nativeB, ldb, complexBeta, nativeC, ldc); } /** *
* void * cublasChemm (char side, char uplo, int m, int n, cuComplex alpha, * const cuComplex *A, int lda, const cuComplex *B, int ldb, * cuComplex beta, cuComplex *C, int ldc); * * performs one of the matrix-matrix operations * * C = alpha * A * B + beta * C, or * C = alpha * B * A + beta * C, * * where alpha and beta are single precision complex scalars, A is a hermitian matrix * consisting of single precision complex elements and stored in either lower or upper * storage mode, and B and C are m x n matrices consisting of single precision * complex elements. * * Input * ----- * side specifies whether the hermitian matrix A appears on the left side * hand side or right hand side of matrix B, as follows. If side == 'L' * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', * then C = alpha * B * A + beta * C. * uplo specifies whether the hermitian matrix A is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the hermitian matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the hermitian matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * m specifies the number of rows of the matrix C, and the number of rows * of matrix B. It also specifies the dimensions of hermitian matrix A * when side == 'L' or 'l'. m must be at least zero. * n specifies the number of columns of the matrix C, and the number of * columns of matrix B. It also specifies the dimensions of hermitian * matrix A when side == 'R' or 'r'. n must be at least zero. * alpha single precision complex scalar multiplier applied to A * B, or B * A * A single precision complex array of dimensions (lda, ka), where ka is m when * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the * leading m x m part of array A must contain the hermitian matrix, * such that when uplo == 'U' or 'u', the leading m x m part stores the * upper triangular part of the hermitian matrix, and the strictly lower * triangular part of A is not referenced, and when uplo == 'U' or 'u', * the leading m x m part stores the lower triangular part of the * hermitian matrix and the strictly upper triangular part is not * referenced. If side == 'R' or 'r' the leading n x n part of array A * must contain the hermitian matrix, such that when uplo == 'U' or 'u', * the leading n x n part stores the upper triangular part of the * hermitian matrix and the strictly lower triangular part of A is not * referenced, and when uplo == 'U' or 'u', the leading n x n part * stores the lower triangular part of the hermitian matrix and the * strictly upper triangular part is not referenced. The imaginary parts * of the diagonal elements need not be set, they are assumed to be zero. * lda leading dimension of A. When side == 'L' or 'l', it must be at least * max(1, m) and at least max(1, n) otherwise. * B single precision complex array of dimensions (ldb, n). On entry, the leading * m x n part of the array contains the matrix B. * ldb leading dimension of B. It must be at least max (1, m). * beta single precision complex scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C single precision complex array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m) * * Output * ------ * C updated according to C = alpha * A * B + beta * C, or C = alpha * * B * A + beta * C * * Reference: http://www.netlib.org/blas/chemm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasChemmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jint m, jint n, jobject alpha, jobject A, jint lda, jobject B, jint ldb, jobject beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasChemm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasChemm"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasChemm"); return; } cuComplex* nativeA; cuComplex* nativeB; cuComplex* nativeC; cuComplex complexAlpha; cuComplex complexBeta; nativeA = (cuComplex*)getPointer(env, A); nativeB = (cuComplex*)getPointer(env, B); nativeC = (cuComplex*)getPointer(env, C); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); complexBeta.x = env->GetFloatField(beta, cuComplex_x); complexBeta.y = env->GetFloatField(beta, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasChemm(%c, %c, %d, %d, [%f,%f], '%s', %d, '%s', %d, [%f,%f], '%s', %d)\n", side, uplo, m, n, complexAlpha.x, complexAlpha.y, "A", lda, "B", ldb, complexBeta.x, complexBeta.y, "C", ldc); cublasChemm((char)side, (char)uplo, m, n, complexAlpha, nativeA, lda, nativeB, ldb, complexBeta, nativeC, ldc); } /** *
* void * cublasCsyrk (char uplo, char trans, int n, int k, cuComplex alpha, * const cuComplex *A, int lda, cuComplex beta, cuComplex *C, int ldc) * * performs one of the symmetric rank k operations * * C = alpha * A * transpose(A) + beta * C, or * C = alpha * transpose(A) * A + beta * C. * * Alpha and beta are single precision complex scalars. C is an n x n symmetric matrix * consisting of single precision complex elements and stored in either lower or * upper storage mode. A is a matrix consisting of single precision complex elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', C = * alpha * transpose(A) + beta * C. If trans == 'T', 't', 'C', or 'c', * C = transpose(A) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha single precision complex scalar multiplier applied to A * transpose(A) or * transpose(A) * A. * A single precision complex array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contains the * matrix A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1, k). * beta single precision complex scalar multiplier applied to C. If beta izs zero, C * does not have to be a valid input * C single precision complex array of dimensions (ldc, n). If uplo = 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo = 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. It must be at least max(1, n). * * Output * ------ * C updated according to C = alpha * A * transpose(A) + beta * C, or C = * alpha * transpose(A) * A + beta * C * * Reference: http://www.netlib.org/blas/csyrk.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCsyrkNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jint n, jint k, jobject alpha, jobject A, jint lda, jobject beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCsyrk"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasCsyrk"); return; } cuComplex* nativeA; cuComplex* nativeC; cuComplex complexAlpha; cuComplex complexBeta; nativeA = (cuComplex*)getPointer(env, A); nativeC = (cuComplex*)getPointer(env, C); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); complexBeta.x = env->GetFloatField(beta, cuComplex_x); complexBeta.y = env->GetFloatField(beta, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCsyrk(%c, %c, %d, %d, [%f,%f], '%s', %d, [%f,%f], '%s', %d)\n", uplo, trans, n, k, complexAlpha.x, complexAlpha.y, "A", lda, complexBeta.x, complexBeta.y, "C", ldc); cublasCsyrk((char)uplo, (char)trans, n, k, complexAlpha, nativeA, lda, complexBeta, nativeC, ldc); } /** *
* void * cublasCherk (char uplo, char trans, int n, int k, float alpha, * const cuComplex *A, int lda, float beta, cuComplex *C, int ldc) * * performs one of the hermitian rank k operations * * C = alpha * A * conjugate(transpose(A)) + beta * C, or * C = alpha * conjugate(transpose(A)) * A + beta * C. * * Alpha and beta are single precision real scalars. C is an n x n hermitian matrix * consisting of single precision complex elements and stored in either lower or * upper storage mode. A is a matrix consisting of single precision complex elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the hermitian matrix C is stored in upper or lower * storage mode as follows. If uplo == 'U' or 'u', only the upper * triangular part of the hermitian matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the hermitian matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', C = * alpha * A * conjugate(transpose(A)) + beta * C. If trans == 'T', 't', 'C', or 'c', * C = alpha * conjugate(transpose(A)) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of columns of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha single precision scalar multiplier applied to A * conjugate(transpose(A)) or * conjugate(transpose(A)) * A. * A single precision complex array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contains the * matrix A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1, k). * beta single precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C single precision complex array of dimensions (ldc, n). If uplo = 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the hermitian matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo = 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the hermitian matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero, and on exit they * are set to zero. * ldc leading dimension of C. It must be at least max(1, n). * * Output * ------ * C updated according to C = alpha * A * conjugate(transpose(A)) + beta * C, or C = * alpha * conjugate(transpose(A)) * A + beta * C * * Reference: http://www.netlib.org/blas/cherk.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCherkNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jint n, jint k, jfloat alpha, jobject A, jint lda, jfloat beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCherk"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasCherk"); return; } cuComplex* nativeA; cuComplex* nativeC; nativeA = (cuComplex*)getPointer(env, A); nativeC = (cuComplex*)getPointer(env, C); Logger::log(LOG_TRACE, "Executing cublasCherk(%c, %c, %d, %d, %f, '%s', %d, %f, '%s', %d)\n", uplo, trans, n, k, alpha, "A", lda, beta, "C", ldc); cublasCherk((char)uplo, (char)trans, n, k, alpha, nativeA, lda, beta, nativeC, ldc); } /** *
* void * cublasCsyr2k (char uplo, char trans, int n, int k, cuComplex alpha, * const cuComplex *A, int lda, const cuComplex *B, int ldb, * cuComplex beta, cuComplex *C, int ldc) * * performs one of the symmetric rank 2k operations * * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, or * C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C. * * Alpha and beta are single precision complex scalars. C is an n x n symmetric matrix * consisting of single precision complex elements and stored in either lower or upper * storage mode. A and B are matrices consisting of single precision complex elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be references, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, * If trans == 'T', 't', 'C', or 'c', C = alpha * transpose(A) * B + * alpha * transpose(B) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha single precision complex scalar multiplier. * A single precision complex array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contain the matrix * A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1,k). * B single precision complex array of dimensions (lda, kb), where kb is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array B must contain the matrix B, * otherwise the leading k x n part of the array must contain the matrix * B. * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be at * least max(1, n). Otherwise ldb must be at least max(1, k). * beta single precision complex scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C single precision complex array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. Must be at least max(1, n). * * Output * ------ * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A) + * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C * * Reference: http://www.netlib.org/blas/csyr2k.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCsyr2kNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jint n, jint k, jobject alpha, jobject A, jint lda, jobject B, jint ldb, jobject beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCsyr2k"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasCsyr2k"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasCsyr2k"); return; } cuComplex* nativeA; cuComplex* nativeB; cuComplex* nativeC; cuComplex complexAlpha; cuComplex complexBeta; nativeA = (cuComplex*)getPointer(env, A); nativeB = (cuComplex*)getPointer(env, B); nativeC = (cuComplex*)getPointer(env, C); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); complexBeta.x = env->GetFloatField(beta, cuComplex_x); complexBeta.y = env->GetFloatField(beta, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCsyr2k(%c, %c, %d, %d, [%f,%f], '%s', %d, '%s', %d, [%f,%f], '%s', %d)\n", uplo, trans, n, k, complexAlpha.x, complexAlpha.y, "A", lda, "B", ldb, complexBeta.x, complexBeta.y, "C", ldc); cublasCsyr2k((char)uplo, (char)trans, n, k, complexAlpha, nativeA, lda, nativeB, ldb, complexBeta, nativeC, ldc); } /** *
* void * cublasCher2k (char uplo, char trans, int n, int k, cuComplex alpha, * const cuComplex *A, int lda, const cuComplex *B, int ldb, * float beta, cuComplex *C, int ldc) * * performs one of the hermitian rank 2k operations * * C = alpha * A * conjugate(transpose(B)) * + conjugate(alpha) * B * conjugate(transpose(A)) * + beta * C , * or * C = alpha * conjugate(transpose(A)) * B * + conjugate(alpha) * conjugate(transpose(B)) * A * + beta * C. * * Alpha is single precision complex scalar whereas Beta is a single preocision real scalar. * C is an n x n hermitian matrix consisting of single precision complex elements * and stored in either lower or upper storage mode. A and B are matrices consisting * of single precision complex elements with dimension of n x k in the first case, * and k x n in the second case. * * Input * ----- * uplo specifies whether the hermitian matrix C is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the hermitian matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the hermitian matrix is to be references, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', * C = alpha * A * conjugate(transpose(B)) * + conjugate(alpha) * B * conjugate(transpose(A)) * + beta * C . * If trans == 'T', 't', 'C', or 'c', * C = alpha * conjugate(transpose(A)) * B * + conjugate(alpha) * conjugate(transpose(B)) * A * + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha single precision complex scalar multiplier. * A single precision complex array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contain the matrix * A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1,k). * B single precision complex array of dimensions (lda, kb), where kb is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array B must contain the matrix B, * otherwise the leading k x n part of the array must contain the matrix * B. * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be at * least max(1, n). Otherwise ldb must be at least max(1, k). * beta single precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C single precision complex array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the hermitian matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the hermitian matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero, and on exit they * are set to zero. * ldc leading dimension of C. Must be at least max(1, n). * * Output * ------ * C updated according to alpha*A*conjugate(transpose(B)) + * + conjugate(alpha)*B*conjugate(transpose(A)) + beta*C or * alpha*conjugate(transpose(A))*B + conjugate(alpha)*conjugate(transpose(B))*A * + beta*C. * * Reference: http://www.netlib.org/blas/cher2k.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCher2kNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jint n, jint k, jobject alpha, jobject A, jint lda, jobject B, jint ldb, jfloat beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCher2k"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasCher2k"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasCher2k"); return; } cuComplex* nativeA; cuComplex* nativeB; cuComplex* nativeC; cuComplex complexAlpha; nativeA = (cuComplex*)getPointer(env, A); nativeB = (cuComplex*)getPointer(env, B); nativeC = (cuComplex*)getPointer(env, C); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCher2k(%c, %c, %d, %d, [%f,%f], '%s', %d, '%s', %d, %f, '%s', %d)\n", uplo, trans, n, k, complexAlpha.x, complexAlpha.y, "A", lda, "B", ldb, beta, "C", ldc); cublasCher2k((char)uplo, (char)trans, n, k, complexAlpha, nativeA, lda, nativeB, ldb, beta, nativeC, ldc); } /** *
* void * cublasCtrmm (char side, char uplo, char transa, char diag, int m, int n, * cuComplex alpha, const cuComplex *A, int lda, const cuComplex *B, * int ldb) * * performs one of the matrix-matrix operations * * B = alpha * op(A) * B, or B = alpha * B * op(A) * * where alpha is a single-precision complex scalar, B is an m x n matrix composed * of single precision complex elements, and A is a unit or non-unit, upper or lower, * triangular matrix composed of single precision complex elements. op(A) is one of * * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)) * * Matrices A and B are stored in column major format, and lda and ldb are * the leading dimensions of the two-dimensonials arrays that contain A and * B, respectively. * * Input * ----- * side specifies whether op(A) multiplies B from the left or right. * If side = 'L' or 'l', then B = alpha * op(A) * B. If side = * 'R' or 'r', then B = alpha * B * op(A). * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', A is an upper triangular matrix. * If uplo = 'L' or 'l', A is a lower triangular matrix. * transa specifies the form of op(A) to be used in the matrix * multiplication. If transa = 'N' or 'n', then op(A) = A. If * transa = 'T' or 't', then op(A) = transpose(A). * If transa = 'C' or 'c', then op(A) = conjugate(transpose(A)). * diag specifies whether or not A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or * 'n', A is not assumed to be unit triangular. * m the number of rows of matrix B. m must be at least zero. * n the number of columns of matrix B. n must be at least zero. * alpha single precision complex scalar multiplier applied to op(A)*B, or * B*op(A), respectively. If alpha is zero no accesses are made * to matrix A, and no read accesses are made to matrix B. * A single precision complex array of dimensions (lda, k). k = m if side = * 'L' or 'l', k = n if side = 'R' or 'r'. If uplo = 'U' or 'u' * the leading k x k upper triangular part of the array A must * contain the upper triangular matrix, and the strictly lower * triangular part of A is not referenced. If uplo = 'L' or 'l' * the leading k x k lower triangular part of the array A must * contain the lower triangular matrix, and the strictly upper * triangular part of A is not referenced. When diag = 'U' or 'u' * the diagonal elements of A are no referenced and are assumed * to be unity. * lda leading dimension of A. When side = 'L' or 'l', it must be at * least max(1,m) and at least max(1,n) otherwise * B single precision complex array of dimensions (ldb, n). On entry, the * leading m x n part of the array contains the matrix B. It is * overwritten with the transformed matrix on exit. * ldb leading dimension of B. It must be at least max (1, m). * * Output * ------ * B updated according to B = alpha * op(A) * B or B = alpha * B * op(A) * * Reference: http://www.netlib.org/blas/ctrmm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCtrmmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jchar transa, jchar diag, jint m, jint n, jobject alpha, jobject A, jint lda, jobject B, jint ldb) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCtrmm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasCtrmm"); return; } cuComplex* nativeA; cuComplex* nativeB; cuComplex complexAlpha; nativeA = (cuComplex*)getPointer(env, A); nativeB = (cuComplex*)getPointer(env, B); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCtrmm(%c, %c, %c, %c, %d, %d, [%f,%f], '%s', %d, '%s', %d)\n", side, uplo, transa, diag, m, n, complexAlpha.x, complexAlpha.y, "A", lda, "B", ldb); cublasCtrmm((char)side, (char)uplo, (char)transa, (char)diag, m, n, complexAlpha, nativeA, lda, nativeB, ldb); } /** *
* void * cublasCtrsm (char side, char uplo, char transa, char diag, int m, int n, * cuComplex alpha, const cuComplex *A, int lda, * cuComplex *B, int ldb) * * solves one of the matrix equations * * op(A) * X = alpha * B, or X * op(A) = alpha * B, * * where alpha is a single precision complex scalar, and X and B are m x n matrices * that are composed of single precision complex elements. A is a unit or non-unit, * upper or lower triangular matrix, and op(A) is one of * * op(A) = A or op(A) = transpose(A) or op( A ) = conj( A' ). * * The result matrix X overwrites input matrix B; that is, on exit the result * is stored in B. Matrices A and B are stored in column major format, and * lda and ldb are the leading dimensions of the two-dimensonials arrays that * contain A and B, respectively. * * Input * ----- * side specifies whether op(A) appears on the left or right of X as * follows: side = 'L' or 'l' indicates solve op(A) * X = alpha * B. * side = 'R' or 'r' indicates solve X * op(A) = alpha * B. * uplo specifies whether the matrix A is an upper or lower triangular * matrix as follows: uplo = 'U' or 'u' indicates A is an upper * triangular matrix. uplo = 'L' or 'l' indicates A is a lower * triangular matrix. * transa specifies the form of op(A) to be used in matrix multiplication * as follows: If transa = 'N' or 'N', then op(A) = A. If transa = * 'T', 't', 'C', or 'c', then op(A) = transpose(A). * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * m specifies the number of rows of B. m must be at least zero. * n specifies the number of columns of B. n must be at least zero. * alpha is a single precision complex scalar to be multiplied with B. When alpha is * zero, then A is not referenced and B need not be set before entry. * A is a single precision complex array of dimensions (lda, k), where k is * m when side = 'L' or 'l', and is n when side = 'R' or 'r'. If * uplo = 'U' or 'u', the leading k x k upper triangular part of * the array A must contain the upper triangular matrix and the * strictly lower triangular matrix of A is not referenced. When * uplo = 'L' or 'l', the leading k x k lower triangular part of * the array A must contain the lower triangular matrix and the * strictly upper triangular part of A is not referenced. Note that * when diag = 'U' or 'u', the diagonal elements of A are not * referenced, and are assumed to be unity. * lda is the leading dimension of the two dimensional array containing A. * When side = 'L' or 'l' then lda must be at least max(1, m), when * side = 'R' or 'r' then lda must be at least max(1, n). * B is a single precision complex array of dimensions (ldb, n). ldb must be * at least max (1,m). The leading m x n part of the array B must * contain the right-hand side matrix B. On exit B is overwritten * by the solution matrix X. * ldb is the leading dimension of the two dimensional array containing B. * ldb must be at least max(1, m). * * Output * ------ * B contains the solution matrix X satisfying op(A) * X = alpha * B, * or X * op(A) = alpha * B * * Reference: http://www.netlib.org/blas/ctrsm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasCtrsmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jchar transa, jchar diag, jint m, jint n, jobject alpha, jobject A, jint lda, jobject B, jint ldb) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasCtrsm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasCtrsm"); return; } cuComplex* nativeA; cuComplex* nativeB; cuComplex complexAlpha; nativeA = (cuComplex*)getPointer(env, A); nativeB = (cuComplex*)getPointer(env, B); complexAlpha.x = env->GetFloatField(alpha, cuComplex_x); complexAlpha.y = env->GetFloatField(alpha, cuComplex_y); Logger::log(LOG_TRACE, "Executing cublasCtrsm(%c, %c, %c, %c, %d, %d, [%f,%f], '%s', %d, '%s', %d)\n", side, uplo, transa, diag, m, n, complexAlpha.x, complexAlpha.y, "A", lda, "B", ldb); cublasCtrsm((char)side, (char)uplo, (char)transa, (char)diag, m, n, complexAlpha, nativeA, lda, nativeB, ldb); } /** *
* double * cublasDasum (int n, const double *x, int incx) * * computes the sum of the absolute values of the elements of double * precision vector x; that is, the result is the sum from i = 0 to n - 1 of * abs(x[1 + i * incx]). * * Input * ----- * n number of elements in input vector * x double-precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the double-precision sum of absolute values * (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/dasum.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jdouble JNICALL Java_jcuda_jcublas_JCublas_cublasDasumNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDasum"); return 0.0; } double* nativeX; nativeX = (double*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasDasum(%d, '%s', %d)\n", n, "x", incx); return cublasDasum(n, nativeX, incx); } /** *
* void * cublasDaxpy (int n, double alpha, const double *x, int incx, double *y, * int incy) * * multiplies double-precision vector x by double-precision scalar alpha * and adds the result to double-precision vector y; that is, it overwrites * double-precision y with double-precision alpha * x + y. For i = 0 to n-1, * it replaces y[ly + i * incy] with alpha * x[lx + i * incx] + y[ly + i*incy], * where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx; ly is defined in a * similar way using incy. * * Input * ----- * n number of elements in input vectors * alpha double-precision scalar multiplier * x double-precision vector with n elements * incx storage spacing between elements of x * y double-precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * y double-precision result (unchanged if n <= 0) * * Reference: http://www.netlib.org/blas/daxpy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library was not initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDaxpyNative (JNIEnv *env, jclass cls, jint n, jdouble alpha, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDaxpy"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDaxpy"); return; } double* nativeX; double* nativeY; nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasDaxpy(%d, %lf, '%s', %d, '%s', %d)\n", n, alpha, "x", incx, "y", incy); cublasDaxpy(n, alpha, nativeX, incx, nativeY, incy); } /** *
* void * cublasDcopy (int n, const double *x, int incx, double *y, int incy) * * copies the double-precision vector x to the double-precision vector y. For * i = 0 to n-1, copies x[lx + i * incx] to y[ly + i * incy], where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a similar * way using incy. * * Input * ----- * n number of elements in input vectors * x double-precision vector with n elements * incx storage spacing between elements of x * y double-precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * y contains double precision vector x * * Reference: http://www.netlib.org/blas/dcopy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDcopyNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDcopy"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDcopy"); return; } double* nativeX; double* nativeY; nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasDcopy(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); cublasDcopy(n, nativeX, incx, nativeY, incy); } /** *
* double * cublasDdot (int n, const double *x, int incx, const double *y, int incy) * * computes the dot product of two double-precision vectors. It returns the * dot product of the double precision vectors x and y if successful, and * 0.0f otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * * incx] * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * *incx, and ly is defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * x double-precision vector with n elements * incx storage spacing between elements of x * y double-precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * returns double-precision dot product (zero if n <= 0) * * Reference: http://www.netlib.org/blas/ddot.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU **/ JNIEXPORT jdouble JNICALL Java_jcuda_jcublas_JCublas_cublasDdotNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDdot"); return 0.0; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDdot"); return 0.0; } double* nativeX; double* nativeY; nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasDdot(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); return cublasDdot(n, nativeX, incx, nativeY, incy); } /** *
* double * dnrm2 (int n, const double *x, int incx) * * computes the Euclidean norm of the double-precision n-vector x (with * storage increment incx). This code uses a multiphase model of * accumulation to avoid intermediate underflow and overflow. * * Input * ----- * n number of elements in input vector * x double-precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns Euclidian norm (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/dnrm2.f * Reference: http://www.netlib.org/slatec/lin/dnrm2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jdouble JNICALL Java_jcuda_jcublas_JCublas_cublasDnrm2Native (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDnrm2"); return 0.0; } double* nativeX; nativeX = (double*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasDnrm2(%d, '%s', %d)\n", n, "x", incx); return cublasDnrm2(n, nativeX, incx); } /** *
* void * cublasDrot (int n, double *x, int incx, double *y, int incy, double sc, * double ss) * * multiplies a 2x2 matrix ( sc ss) with the 2xn matrix ( transpose(x) ) * (-ss sc) ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. * * Input * ----- * n number of elements in input vectors * x double-precision vector with n elements * incx storage spacing between elements of x * y double-precision vector with n elements * incy storage spacing between elements of y * sc element of rotation matrix * ss element of rotation matrix * * Output * ------ * x rotated vector x (unchanged if n <= 0) * y rotated vector y (unchanged if n <= 0) * * Reference http://www.netlib.org/blas/drot.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDrotNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy, jdouble sc, jdouble ss) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDrot"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDrot"); return; } double* nativeX; double* nativeY; nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasDrot(%d, '%s', %d, '%s', %d, %lf, %lf)\n", n, "x", incx, "y", incy, sc, ss); cublasDrot(n, nativeX, incx, nativeY, incy, sc, ss); } /** *
* void * cublasDrotg (double *host_sa, double *host_sb, double *host_sc, double *host_ss) * * constructs the Givens tranformation * * ( sc ss ) * G = ( ) , sc^2 + ss^2 = 1, * (-ss sc ) * * which zeros the second entry of the 2-vector transpose(sa, sb). * * The quantity r = (+/-) sqrt (sa^2 + sb^2) overwrites sa in storage. The * value of sb is overwritten by a value z which allows sc and ss to be * recovered by the following algorithm: * * if z=1 set sc = 0.0 and ss = 1.0 * if abs(z) < 1 set sc = sqrt(1-z^2) and ss = z * if abs(z) > 1 set sc = 1/z and ss = sqrt(1-sc^2) * * The function drot (n, x, incx, y, incy, sc, ss) normally is called next * to apply the transformation to a 2 x n matrix. * Note that is function is provided for completeness and run exclusively * on the Host. * * Input * ----- * sa double-precision scalar * sb double-precision scalar * * Output * ------ * sa double-precision r * sb double-precision z * sc double-precision result * ss double-precision result * * Reference: http://www.netlib.org/blas/drotg.f * * This function does not set any error status. **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDrotgNative (JNIEnv *env, jclass cls, jobject host_sa, jobject host_sb, jobject host_sc, jobject host_ss) { if (host_sa == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_sa' is null for cublasDrotg"); return; } if (host_sb == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_sb' is null for cublasDrotg"); return; } if (host_sc == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_sc' is null for cublasDrotg"); return; } if (host_ss == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'host_ss' is null for cublasDrotg"); return; } double* nativeHOST_SA; double* nativeHOST_SB; double* nativeHOST_SC; double* nativeHOST_SS; nativeHOST_SA = (double*)getPointer(env, host_sa); nativeHOST_SB = (double*)getPointer(env, host_sb); nativeHOST_SC = (double*)getPointer(env, host_sc); nativeHOST_SS = (double*)getPointer(env, host_ss); Logger::log(LOG_TRACE, "Executing cublasDrotg('%s', '%s', '%s', '%s')\n", "host_sa", "host_sb", "host_sc", "host_ss"); cublasDrotg(nativeHOST_SA, nativeHOST_SB, nativeHOST_SC, nativeHOST_SS); } /** *
* void * cublasDscal (int n, double alpha, double *x, int incx) * * replaces double-precision vector x with double-precision alpha * x. For * i = 0 to n-1, it replaces x[lx + i * incx] with alpha * x[lx + i * incx], * where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vector * alpha double-precision scalar multiplier * x double-precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * x double-precision result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/dscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library was not initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDscalNative (JNIEnv *env, jclass cls, jint n, jdouble alpha, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDscal"); return; } double* nativeX; nativeX = (double*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasDscal(%d, %lf, '%s', %d)\n", n, alpha, "x", incx); cublasDscal(n, alpha, nativeX, incx); } /** *
* void * cublasDswap (int n, double *x, int incx, double *y, int incy) * * replaces double-precision vector x with double-precision alpha * x. For i * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx], * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vectors * alpha double-precision scalar multiplier * x double-precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * x double precision result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/dswap.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDswapNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx, jobject y, jint incy) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDswap"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDswap"); return; } double* nativeX; double* nativeY; nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasDswap(%d, '%s', %d, '%s', %d)\n", n, "x", incx, "y", incy); cublasDswap(n, nativeX, incx, nativeY, incy); } /** *
* int * idamax (int n, const double *x, int incx) * * finds the smallest index of the maximum magnitude element of double- * precision vector x; that is, the result is the first i, i = 0 to n - 1, * that maximizes abs(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x double-precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/idamax.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jint JNICALL Java_jcuda_jcublas_JCublas_cublasIdamaxNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasIdamax"); return 0; } double* nativeX; nativeX = (double*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasIdamax(%d, '%s', %d)\n", n, "x", incx); return cublasIdamax(n, nativeX, incx); } /** *
* int * idamin (int n, const double *x, int incx) * * finds the smallest index of the minimum magnitude element of double- * precision vector x; that is, the result is the first i, i = 0 to n - 1, * that minimizes abs(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x double-precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/scilib/blass.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT jint JNICALL Java_jcuda_jcublas_JCublas_cublasIdaminNative (JNIEnv *env, jclass cls, jint n, jobject x, jint incx) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasIdamin"); return 0; } double* nativeX; nativeX = (double*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasIdamin(%d, '%s', %d)\n", n, "x", incx); return cublasIdamin(n, nativeX, incx); } /** *
* cublasDgemv (char trans, int m, int n, double alpha, const double *A, * int lda, const double *x, int incx, double beta, double *y, * int incy) * * performs one of the matrix-vector operations * * y = alpha * op(A) * x + beta * y, * * where op(A) is one of * * op(A) = A or op(A) = transpose(A) * * where alpha and beta are double precision scalars, x and y are double * precision vectors, and A is an m x n matrix consisting of double precision * elements. Matrix A is stored in column major format, and lda is the leading * dimension of the two-dimensional array in which A is stored. * * Input * ----- * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If trans = * trans = 't', 'T', 'c', or 'C', op(A) = transpose(A) * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * alpha double precision scalar multiplier applied to op(A). * A double precision array of dimensions (lda, n) if trans = 'n' or * 'N'), and of dimensions (lda, m) otherwise. lda must be at least * max(1, m) and at least max(1, n) otherwise. * lda leading dimension of two-dimensional array used to store matrix A * x double precision array of length at least (1 + (n - 1) * abs(incx)) * when trans = 'N' or 'n' and at least (1 + (m - 1) * abs(incx)) * otherwise. * incx specifies the storage spacing between elements of x. incx must not * be zero. * beta double precision scalar multiplier applied to vector y. If beta * is zero, y is not read. * y double precision array of length at least (1 + (m - 1) * abs(incy)) * when trans = 'N' or 'n' and at least (1 + (n - 1) * abs(incy)) * otherwise. * incy specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * y updated according to alpha * op(A) * x + beta * y * * Reference: http://www.netlib.org/blas/dgemv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDgemvNative (JNIEnv *env, jclass cls, jchar trans, jint m, jint n, jdouble alpha, jobject A, jint lda, jobject x, jint incx, jdouble beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDgemv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDgemv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDgemv"); return; } double* nativeA; double* nativeX; double* nativeY; nativeA = (double*)getPointer(env, A); nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasDgemv(%c, %d, %d, %lf, '%s', %d, '%s', %d, %lf, '%s', %d)\n", trans, m, n, alpha, "A", lda, "x", incx, beta, "y", incy); cublasDgemv((char)trans, m, n, alpha, nativeA, lda, nativeX, incx, beta, nativeY, incy); } /** *
* cublasDger (int m, int n, double alpha, const double *x, int incx, * const double *y, int incy, double *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(y) + A, * * where alpha is a double precision scalar, x is an m element double * precision vector, y is an n element double precision vector, and A * is an m by n matrix consisting of double precision elements. Matrix A * is stored in column major format, and lda is the leading dimension of * the two-dimensional array used to store A. * * Input * ----- * m specifies the number of rows of the matrix A. It must be at least * zero. * n specifies the number of columns of the matrix A. It must be at * least zero. * alpha double precision scalar multiplier applied to x * transpose(y) * x double precision array of length at least (1 + (m - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must not * be zero. * y double precision array of length at least (1 + (n - 1) * abs(incy)) * incy specifies the storage spacing between elements of y. incy must not * be zero. * A double precision array of dimensions (lda, n). * lda leading dimension of two-dimensional array used to store matrix A * * Output * ------ * A updated according to A = alpha * x * transpose(y) + A * * Reference: http://www.netlib.org/blas/dger.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDgerNative (JNIEnv *env, jclass cls, jint m, jint n, jdouble alpha, jobject x, jint incx, jobject y, jint incy, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDger"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDger"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDger"); return; } double* nativeX; double* nativeY; double* nativeA; nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); nativeA = (double*)getPointer(env, A); Logger::log(LOG_TRACE, "Executing cublasDger(%d, %d, %lf, '%s', %d, '%s', %d, '%s', %d)\n", m, n, alpha, "x", incx, "y", incy, "A", lda); cublasDger(m, n, alpha, nativeX, incx, nativeY, incy, nativeA, lda); } /** *
* void * cublasDsyr (char uplo, int n, double alpha, const double *x, int incx, * double *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(x) + A, * * where alpha is a double precision scalar, x is an n element double * precision vector and A is an n x n symmetric matrix consisting of * double precision elements. Matrix A is stored in column major format, * and lda is the leading dimension of the two-dimensional array * containing A. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or * the lower triangular part of array A. If uplo = 'U' or 'u', * then only the upper triangular part of A may be referenced. * If uplo = 'L' or 'l', then only the lower triangular part of * A may be referenced. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * alpha double precision scalar multiplier applied to x * transpose(x) * x double precision array of length at least (1 + (n - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must * not be zero. * A double precision array of dimensions (lda, n). If uplo = 'U' or * 'u', then A must contain the upper triangular part of a symmetric * matrix, and the strictly lower triangular part is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part * of a symmetric matrix, and the strictly upper triangular part is * not referenced. * lda leading dimension of the two-dimensional array containing A. lda * must be at least max(1, n). * * Output * ------ * A updated according to A = alpha * x * transpose(x) + A * * Reference: http://www.netlib.org/blas/dsyr.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDsyrNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jdouble alpha, jobject x, jint incx, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDsyr"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDsyr"); return; } double* nativeX; double* nativeA; nativeX = (double*)getPointer(env, x); nativeA = (double*)getPointer(env, A); Logger::log(LOG_TRACE, "Executing cublasDsyr(%c, %d, %lf, '%s', %d, '%s', %d)\n", uplo, n, alpha, "x", incx, "A", lda); cublasDsyr((char)uplo, n, alpha, nativeX, incx, nativeA, lda); } /** *
* void cublasDsyr2 (char uplo, int n, double alpha, const double *x, int incx, * const double *y, int incy, double *A, int lda) * * performs the symmetric rank 2 operation * * A = alpha*x*transpose(y) + alpha*y*transpose(x) + A, * * where alpha is a double precision scalar, x and y are n element double * precision vector and A is an n by n symmetric matrix consisting of double * precision elements. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision scalar multiplier applied to x * transpose(y) + * y * transpose(x). * x double precision array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y double precision array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * A double precision array of dimensions (lda, n). If uplo == 'U' or 'u', * then A must contains the upper triangular part of a symmetric matrix, * and the strictly lower triangular parts is not referenced. If uplo == * 'L' or 'l', then A contains the lower triangular part of a symmetric * matrix, and the strictly upper triangular part is not referenced. * lda leading dimension of A. It must be at least max(1, n). * * Output * ------ * A updated according to A = alpha*x*transpose(y)+alpha*y*transpose(x)+A * * Reference: http://www.netlib.org/blas/dsyr2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDsyr2Native (JNIEnv *env, jclass cls, jchar uplo, jint n, jdouble alpha, jobject x, jint incx, jobject y, jint incy, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDsyr2"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDsyr2"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDsyr2"); return; } double* nativeX; double* nativeY; double* nativeA; nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); nativeA = (double*)getPointer(env, A); Logger::log(LOG_TRACE, "Executing cublasDsyr2(%c, %d, %lf, '%s', %d, '%s', %d, '%s', %d)\n", uplo, n, alpha, "x", incx, "y", incy, "A", lda); cublasDsyr2((char)uplo, n, alpha, nativeX, incx, nativeY, incy, nativeA, lda); } /** *
* void * cublasDspr (char uplo, int n, double alpha, const double *x, int incx, * double *AP) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(x) + A, * * where alpha is a double precision scalar and x is an n element double * precision vector. A is a symmetric n x n matrix consisting of double * precision elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision scalar multiplier applied to x * transpose(x). * x double precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * AP double precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * * Output * ------ * A updated according to A = alpha * x * transpose(x) + A * * Reference: http://www.netlib.org/blas/dspr.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDsprNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jdouble alpha, jobject x, jint incx, jobject AP) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDspr"); return; } if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasDspr"); return; } double* nativeX; double* nativeAP; nativeX = (double*)getPointer(env, x); nativeAP = (double*)getPointer(env, AP); Logger::log(LOG_TRACE, "Executing cublasDspr(%c, %d, %lf, '%s', %d, '%s')\n", uplo, n, alpha, "x", incx, "AP"); cublasDspr((char)uplo, n, alpha, nativeX, incx, nativeAP); } /** *
* void * cublasDspr2 (char uplo, int n, double alpha, const double *x, int incx, * const double *y, int incy, double *AP) * * performs the symmetric rank 2 operation * * A = alpha*x*transpose(y) + alpha*y*transpose(x) + A, * * where alpha is a double precision scalar, and x and y are n element double * precision vectors. A is a symmetric n x n matrix consisting of double * precision elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision scalar multiplier applied to x * transpose(y) + * y * transpose(x). * x double precision array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y double precision array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * AP double precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * * Output * ------ * A updated according to A = alpha*x*transpose(y)+alpha*y*transpose(x)+A * * Reference: http://www.netlib.org/blas/dspr2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDspr2Native (JNIEnv *env, jclass cls, jchar uplo, jint n, jdouble alpha, jobject x, jint incx, jobject y, jint incy, jobject AP) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDspr2"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDspr2"); return; } if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasDspr2"); return; } double* nativeX; double* nativeY; double* nativeAP; nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); nativeAP = (double*)getPointer(env, AP); Logger::log(LOG_TRACE, "Executing cublasDspr2(%c, %d, %lf, '%s', %d, '%s', %d, '%s')\n", uplo, n, alpha, "x", incx, "y", incy, "AP"); cublasDspr2((char)uplo, n, alpha, nativeX, incx, nativeY, incy, nativeAP); } /** *
* void * cublasDtrsv (char uplo, char trans, char diag, int n, const double *A, * int lda, double *x, int incx) * * solves a system of equations op(A) * x = b, where op(A) is either A or * transpose(A). b and x are double precision vectors consisting of n * elements, and A is an n x n matrix composed of a unit or non-unit, upper * or lower triangular matrix. Matrix A is stored in column major format, * and lda is the leading dimension of the two-dimensional array containing * A. * * No test for singularity or near-singularity is included in this function. * Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the * lower triangular part of array A. If uplo = 'U' or 'u', then only * the upper triangular part of A may be referenced. If uplo = 'L' or * 'l', then only the lower triangular part of A may be referenced. * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = 't', * 'T', 'c', or 'C', op(A) = transpose(A) * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * A is a double precision array of dimensions (lda, n). If uplo = 'U' * or 'u', then A must contains the upper triangular part of a symmetric * matrix, and the strictly lower triangular parts is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part of * a symmetric matrix, and the strictly upper triangular part is not * referenced. * lda is the leading dimension of the two-dimensional array containing A. * lda must be at least max(1, n). * x double precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the n element right-hand side vector b. On exit, * it is overwritten with the solution vector x. * incx specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/dtrsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDtrsvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDtrsv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDtrsv"); return; } double* nativeA; double* nativeX; nativeA = (double*)getPointer(env, A); nativeX = (double*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasDtrsv(%c, %c, %c, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, "A", lda, "x", incx); cublasDtrsv((char)uplo, (char)trans, (char)diag, n, nativeA, lda, nativeX, incx); } /** *
* void * cublasDtrmv (char uplo, char trans, char diag, int n, const double *A, * int lda, double *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = = A, or op(A) = transpose(A). x is an n-element single precision vector, and * A is an n x n, unit or non-unit, upper or lower, triangular matrix composed * of single precision elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', then A is an upper triangular matrix. * If uplo = 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If transa = 'N' or 'n', op(A) = A. If trans = 'T', * 't', 'C', or 'c', op(A) = transpose(A) * diag specifies whether or not matrix A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * A single precision array of dimension (lda, n). If uplo = 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular matrix and the strictly lower triangular part * of A is not referenced. If uplo = 'L' or 'l', the leading n x n lower * triangular part of the array A must contain the lower triangular * matrix and the strictly upper triangular part of A is not referenced. * When diag = 'U' or 'u', the diagonal elements of A are not referenced * either, but are are assumed to be unity. * lda is the leading dimension of A. It must be at least max (1, n). * x single precision array of length at least (1 + (n - 1) * abs(incx) ). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/dtrmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDtrmvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDtrmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDtrmv"); return; } double* nativeA; double* nativeX; nativeA = (double*)getPointer(env, A); nativeX = (double*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasDtrmv(%c, %c, %c, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, "A", lda, "x", incx); cublasDtrmv((char)uplo, (char)trans, (char)diag, n, nativeA, lda, nativeX, incx); } /** *
* void * cublasDgbmv (char trans, int m, int n, int kl, int ku, double alpha, * const double *A, int lda, const double *x, int incx, double beta, * double *y, int incy); * * performs one of the matrix-vector operations * * y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A) * * alpha and beta are double precision scalars. x and y are double precision * vectors. A is an m by n band matrix consisting of double precision elements * with kl sub-diagonals and ku super-diagonals. * * Input * ----- * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A) * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * kl specifies the number of sub-diagonals of matrix A. It must be at * least zero. * ku specifies the number of super-diagonals of matrix A. It must be at * least zero. * alpha double precision scalar multiplier applied to op(A). * A double precision array of dimensions (lda, n). The leading * (kl + ku + 1) x n part of the array A must contain the band matrix A, * supplied column by column, with the leading diagonal of the matrix * in row (ku + 1) of the array, the first super-diagonal starting at * position 2 in row ku, the first sub-diagonal starting at position 1 * in row (ku + 2), and so on. Elements in the array A that do not * correspond to elements in the band matrix (such as the top left * ku x ku triangle) are not referenced. * lda leading dimension of A. lda must be at least (kl + ku + 1). * x double precision array of length at least (1+(n-1)*abs(incx)) when * trans == 'N' or 'n' and at least (1+(m-1)*abs(incx)) otherwise. * incx specifies the increment for the elements of x. incx must not be zero. * beta double precision scalar multiplier applied to vector y. If beta is * zero, y is not read. * y double precision array of length at least (1+(m-1)*abs(incy)) when * trans == 'N' or 'n' and at least (1+(n-1)*abs(incy)) otherwise. If * beta is zero, y is not read. * incy On entry, incy specifies the increment for the elements of y. incy * must not be zero. * * Output * ------ * y updated according to y = alpha*op(A)*x + beta*y * * Reference: http://www.netlib.org/blas/dgbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDgbmvNative (JNIEnv *env, jclass cls, jchar trans, jint m, jint n, jint kl, jint ku, jdouble alpha, jobject A, jint lda, jobject x, jint incx, jdouble beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDgbmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDgbmv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDgbmv"); return; } double* nativeA; double* nativeX; double* nativeY; nativeA = (double*)getPointer(env, A); nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasDgbmv(%c, %d, %d, %d, %d, %lf, '%s', %d, '%s', %d, %lf, '%s', %d)\n", trans, m, n, kl, ku, alpha, "A", lda, "x", incx, beta, "y", incy); cublasDgbmv((char)trans, m, n, kl, ku, alpha, nativeA, lda, nativeX, incx, beta, nativeY, incy); } /** *
* void * cublasDtbmv (char uplo, char trans, char diag, int n, int k, const double *A, * int lda, double *x, int incx) * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * or op(A) = transpose(A). x is an n-element double precision vector, and A is * an n x n, unit or non-unit, upper or lower triangular band matrix composed * of double precision elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular band * matrix. If uplo == 'U' or 'u', A is an upper triangular band matrix. * If uplo == 'L' or 'l', A is a lower triangular band matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A) * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A double precision array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first * super-diagonal starting at position 2 in row k, and so on. The top * left k x k triangle of the array A is not referenced. If uplo == 'L' * or 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal startingat position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * lda is the leading dimension of A. It must be at least (k + 1). * x double precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x * * Reference: http://www.netlib.org/blas/dtbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n or k < 0, or if incx == 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDtbmvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jint k, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDtbmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDtbmv"); return; } double* nativeA; double* nativeX; nativeA = (double*)getPointer(env, A); nativeX = (double*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasDtbmv(%c, %c, %c, %d, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, k, "A", lda, "x", incx); cublasDtbmv((char)uplo, (char)trans, (char)diag, n, k, nativeA, lda, nativeX, incx); } /** *
* void * cublasDtpmv (char uplo, char trans, char diag, int n, const double *AP, * double *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * or op(A) = transpose(A). x is an n element double precision vector, and A * is an n x n, unit or non-unit, upper or lower triangular matrix composed * of double precision elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo == 'U' or 'u', then A is an upper triangular matrix. * If uplo == 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A) * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. In the current implementation n must not exceed 4070. * AP double precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored in AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * x double precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/dtpmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or n < 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDtpmvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject AP, jobject x, jint incx) { if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasDtpmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDtpmv"); return; } double* nativeAP; double* nativeX; nativeAP = (double*)getPointer(env, AP); nativeX = (double*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasDtpmv(%c, %c, %c, %d, '%s', '%s', %d)\n", uplo, trans, diag, n, "AP", "x", incx); cublasDtpmv((char)uplo, (char)trans, (char)diag, n, nativeAP, nativeX, incx); } /** *
* void * cublasDtpsv (char uplo, char trans, char diag, int n, const double *AP, * double *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A or op(A) = transpose(A). b and x are n element vectors, and A is * an n x n unit or non-unit, upper or lower triangular matrix. No test for * singularity or near-singularity is included in this routine. Such tests * must be performed before calling this routine. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular matrix * as follows: If uplo == 'U' or 'u', A is an upper triangluar matrix. * If uplo == 'L' or 'l', A is a lower triangular matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * AP double precision array with at least ((n*(n+1))/2) elements. If uplo * == 'U' or 'u', the array AP contains the upper triangular matrix A, * packed sequentially, column by column; that is, if i <= j, then * A[i,j] is stored is AP[i+(j*(j+1)/2)]. If uplo == 'L' or 'L', the * array AP contains the lower triangular matrix A, packed sequentially, * column by column; that is, if i >= j, then A[i,j] is stored in * AP[i+((2*n-j+1)*j)/2]. When diag = 'U' or 'u', the diagonal elements * of A are not referenced and are assumed to be unity. * x double precision array of length at least (1+(n-1)*abs(incx)). * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/dtpsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 or n > 2035 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDtpsvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject AP, jobject x, jint incx) { if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasDtpsv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDtpsv"); return; } double* nativeAP; double* nativeX; nativeAP = (double*)getPointer(env, AP); nativeX = (double*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasDtpsv(%c, %c, %c, %d, '%s', '%s', %d)\n", uplo, trans, diag, n, "AP", "x", incx); cublasDtpsv((char)uplo, (char)trans, (char)diag, n, nativeAP, nativeX, incx); } /** *
* void cublasDtbsv (char uplo, char trans, char diag, int n, int k, * const double *A, int lda, double *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A or op(A) = transpose(A). b and x are n element vectors, and A is * an n x n unit or non-unit, upper or lower triangular band matrix with k + 1 * diagonals. No test for singularity or near-singularity is included in this * function. Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular band * matrix as follows: If uplo == 'U' or 'u', A is an upper triangular * band matrix. If uplo == 'L' or 'l', A is a lower triangular band * matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A double precision array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first super- * diagonal starting at position 2 in row k, and so on. The top left * k x k triangle of the array A is not referenced. If uplo == 'L' or * 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal starting at position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * x double precision array of length at least (1+(n-1)*abs(incx)). * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/dtbsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0 or n > 2035 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDtbsvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jint k, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDtbsv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDtbsv"); return; } double* nativeA; double* nativeX; nativeA = (double*)getPointer(env, A); nativeX = (double*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasDtbsv(%c, %c, %c, %d, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, k, "A", lda, "x", incx); cublasDtbsv((char)uplo, (char)trans, (char)diag, n, k, nativeA, lda, nativeX, incx); } /** *
* void * cublasDsymv (char uplo, int n, double alpha, const double *A, int lda, * const double *x, int incx, double beta, double *y, int incy) * * performs the matrix-vector operation * * y = alpha*A*x + beta*y * * Alpha and beta are double precision scalars, and x and y are double * precision vectors, each with n elements. A is a symmetric n x n matrix * consisting of double precision elements that is stored in either upper or * lower storage mode. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the array A * is to be referenced. If uplo == 'U' or 'u', the symmetric matrix A * is stored in upper storage mode, i.e. only the upper triangular part * of A is to be referenced while the lower triangular part of A is to * be inferred. If uplo == 'L' or 'l', the symmetric matrix A is stored * in lower storage mode, i.e. only the lower triangular part of A is * to be referenced while the upper triangular part of A is to be * inferred. * n specifies the number of rows and the number of columns of the * symmetric matrix A. n must be at least zero. * alpha double precision scalar multiplier applied to A*x. * A double precision array of dimensions (lda, n). If uplo == 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular part of the symmetric matrix and the strictly * lower triangular part of A is not referenced. If uplo == 'L' or 'l', * the leading n x n lower triangular part of the array A must contain * the lower triangular part of the symmetric matrix and the strictly * upper triangular part of A is not referenced. * lda leading dimension of A. It must be at least max (1, n). * x double precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta double precision scalar multiplier applied to vector y. * y double precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/dsymv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDsymvNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jdouble alpha, jobject A, jint lda, jobject x, jint incx, jdouble beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDsymv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDsymv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDsymv"); return; } double* nativeA; double* nativeX; double* nativeY; nativeA = (double*)getPointer(env, A); nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasDsymv(%c, %d, %lf, '%s', %d, '%s', %d, %lf, '%s', %d)\n", uplo, n, alpha, "A", lda, "x", incx, beta, "y", incy); cublasDsymv((char)uplo, n, alpha, nativeA, lda, nativeX, incx, beta, nativeY, incy); } /** *
* void * cublasDsbmv (char uplo, int n, int k, double alpha, const double *A, int lda, * const double *x, int incx, double beta, double *y, int incy) * * performs the matrix-vector operation * * y := alpha*A*x + beta*y * * alpha and beta are double precision scalars. x and y are double precision * vectors with n elements. A is an n by n symmetric band matrix consisting * of double precision elements, with k super-diagonals and the same number * of subdiagonals. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the symmetric * band matrix A is being supplied. If uplo == 'U' or 'u', the upper * triangular part is being supplied. If uplo == 'L' or 'l', the lower * triangular part is being supplied. * n specifies the number of rows and the number of columns of the * symmetric matrix A. n must be at least zero. * k specifies the number of super-diagonals of matrix A. Since the matrix * is symmetric, this is also the number of sub-diagonals. k must be at * least zero. * alpha double precision scalar multiplier applied to A*x. * A double precision array of dimensions (lda, n). When uplo == 'U' or * 'u', the leading (k + 1) x n part of array A must contain the upper * triangular band of the symmetric matrix, supplied column by column, * with the leading diagonal of the matrix in row (k+1) of the array, * the first super-diagonal starting at position 2 in row k, and so on. * The top left k x k triangle of the array A is not referenced. When * uplo == 'L' or 'l', the leading (k + 1) x n part of the array A must * contain the lower triangular band part of the symmetric matrix, * supplied column by column, with the leading diagonal of the matrix in * row 1 of the array, the first sub-diagonal starting at position 1 in * row 2, and so on. The bottom right k x k triangle of the array A is * not referenced. * lda leading dimension of A. lda must be at least (k + 1). * x double precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta double precision scalar multiplier applied to vector y. If beta is * zero, y is not read. * y double precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/dsbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if k or n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDsbmvNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jint k, jdouble alpha, jobject A, jint lda, jobject x, jint incx, jdouble beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDsbmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDsbmv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDsbmv"); return; } double* nativeA; double* nativeX; double* nativeY; nativeA = (double*)getPointer(env, A); nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasDsbmv(%c, %d, %d, %lf, '%s', %d, '%s', %d, %lf, '%s', %d)\n", uplo, n, k, alpha, "A", lda, "x", incx, beta, "y", incy); cublasDsbmv((char)uplo, n, k, alpha, nativeA, lda, nativeX, incx, beta, nativeY, incy); } /** *
* void * cublasDspmv (char uplo, int n, double alpha, const double *AP, const double *x, * int incx, double beta, double *y, int incy) * * performs the matrix-vector operation * * y = alpha * A * x + beta * y * * Alpha and beta are double precision scalars, and x and y are double * precision vectors with n elements. A is a symmetric n x n matrix * consisting of double precision elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision scalar multiplier applied to A*x. * AP double precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * x double precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta double precision scalar multiplier applied to vector y; * y double precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/dspmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDspmvNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jdouble alpha, jobject AP, jobject x, jint incx, jdouble beta, jobject y, jint incy) { if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasDspmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasDspmv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasDspmv"); return; } double* nativeAP; double* nativeX; double* nativeY; nativeAP = (double*)getPointer(env, AP); nativeX = (double*)getPointer(env, x); nativeY = (double*)getPointer(env, y); Logger::log(LOG_TRACE, "Executing cublasDspmv(%c, %d, %lf, '%s', '%s', %d, %lf, '%s', %d)\n", uplo, n, alpha, "AP", "x", incx, beta, "y", incy); cublasDspmv((char)uplo, n, alpha, nativeAP, nativeX, incx, beta, nativeY, incy); } /** *
* void * cublasDgemm (char transa, char transb, int m, int n, int k, double alpha, * const double *A, int lda, const double *B, int ldb, * double beta, double *C, int ldc) * * computes the product of matrix A and matrix B, multiplies the result * by scalar alpha, and adds the sum to the product of matrix C and * scalar beta. It performs one of the matrix-matrix operations: * * C = alpha * op(A) * op(B) + beta * C, * where op(X) = X or op(X) = transpose(X), * * and alpha and beta are double-precision scalars. A, B and C are matrices * consisting of double-precision elements, with op(A) an m x k matrix, * op(B) a k x n matrix, and C an m x n matrix. Matrices A, B, and C are * stored in column-major format, and lda, ldb, and ldc are the leading * dimensions of the two-dimensional arrays containing A, B, and C. * * Input * ----- * transa specifies op(A). If transa == 'N' or 'n', op(A) = A. * If transa == 'T', 't', 'C', or 'c', op(A) = transpose(A). * transb specifies op(B). If transb == 'N' or 'n', op(B) = B. * If transb == 'T', 't', 'C', or 'c', op(B) = transpose(B). * m number of rows of matrix op(A) and rows of matrix C; m must be at * least zero. * n number of columns of matrix op(B) and number of columns of C; * n must be at least zero. * k number of columns of matrix op(A) and number of rows of op(B); * k must be at least zero. * alpha double-precision scalar multiplier applied to op(A) * op(B). * A double-precision array of dimensions (lda, k) if transa == 'N' or * 'n', and of dimensions (lda, m) otherwise. If transa == 'N' or * 'n' lda must be at least max(1, m), otherwise lda must be at * least max(1, k). * lda leading dimension of two-dimensional array used to store matrix A. * B double-precision array of dimensions (ldb, n) if transb == 'N' or * 'n', and of dimensions (ldb, k) otherwise. If transb == 'N' or * 'n' ldb must be at least max (1, k), otherwise ldb must be at * least max(1, n). * ldb leading dimension of two-dimensional array used to store matrix B. * beta double-precision scalar multiplier applied to C. If zero, C does not * have to be a valid input * C double-precision array of dimensions (ldc, n); ldc must be at least * max(1, m). * ldc leading dimension of two-dimensional array used to store matrix C. * * Output * ------ * C updated based on C = alpha * op(A)*op(B) + beta * C. * * Reference: http://www.netlib.org/blas/sgemm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS was not initialized * CUBLAS_STATUS_INVALID_VALUE if m < 0, n < 0, or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDgemmNative (JNIEnv *env, jclass cls, jchar transa, jchar transb, jint m, jint n, jint k, jdouble alpha, jobject A, jint lda, jobject B, jint ldb, jdouble beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDgemm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasDgemm"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasDgemm"); return; } double* nativeA; double* nativeB; double* nativeC; nativeA = (double*)getPointer(env, A); nativeB = (double*)getPointer(env, B); nativeC = (double*)getPointer(env, C); Logger::log(LOG_TRACE, "Executing cublasDgemm(%c, %c, %d, %d, %d, %lf, '%s', %d, '%s', %d, %lf, '%s', %d)\n", transa, transb, m, n, k, alpha, "A", lda, "B", ldb, beta, "C", ldc); cublasDgemm((char)transa, (char)transb, m, n, k, alpha, nativeA, lda, nativeB, ldb, beta, nativeC, ldc); } /** *
* void * cublasDtrsm (char side, char uplo, char transa, char diag, int m, int n, * double alpha, const double *A, int lda, double *B, int ldb) * * solves one of the matrix equations * * op(A) * X = alpha * B, or X * op(A) = alpha * B, * * where alpha is a double precision scalar, and X and B are m x n matrices * that are composed of double precision elements. A is a unit or non-unit, * upper or lower triangular matrix, and op(A) is one of * * op(A) = A or op(A) = transpose(A) * * The result matrix X overwrites input matrix B; that is, on exit the result * is stored in B. Matrices A and B are stored in column major format, and * lda and ldb are the leading dimensions of the two-dimensonials arrays that * contain A and B, respectively. * * Input * ----- * side specifies whether op(A) appears on the left or right of X as * follows: side = 'L' or 'l' indicates solve op(A) * X = alpha * B. * side = 'R' or 'r' indicates solve X * op(A) = alpha * B. * uplo specifies whether the matrix A is an upper or lower triangular * matrix as follows: uplo = 'U' or 'u' indicates A is an upper * triangular matrix. uplo = 'L' or 'l' indicates A is a lower * triangular matrix. * transa specifies the form of op(A) to be used in matrix multiplication * as follows: If transa = 'N' or 'N', then op(A) = A. If transa = * 'T', 't', 'C', or 'c', then op(A) = transpose(A). * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * m specifies the number of rows of B. m must be at least zero. * n specifies the number of columns of B. n must be at least zero. * alpha is a double precision scalar to be multiplied with B. When alpha is * zero, then A is not referenced and B need not be set before entry. * A is a double precision array of dimensions (lda, k), where k is * m when side = 'L' or 'l', and is n when side = 'R' or 'r'. If * uplo = 'U' or 'u', the leading k x k upper triangular part of * the array A must contain the upper triangular matrix and the * strictly lower triangular matrix of A is not referenced. When * uplo = 'L' or 'l', the leading k x k lower triangular part of * the array A must contain the lower triangular matrix and the * strictly upper triangular part of A is not referenced. Note that * when diag = 'U' or 'u', the diagonal elements of A are not * referenced, and are assumed to be unity. * lda is the leading dimension of the two dimensional array containing A. * When side = 'L' or 'l' then lda must be at least max(1, m), when * side = 'R' or 'r' then lda must be at least max(1, n). * B is a double precision array of dimensions (ldb, n). ldb must be * at least max (1,m). The leading m x n part of the array B must * contain the right-hand side matrix B. On exit B is overwritten * by the solution matrix X. * ldb is the leading dimension of the two dimensional array containing B. * ldb must be at least max(1, m). * * Output * ------ * B contains the solution matrix X satisfying op(A) * X = alpha * B, * or X * op(A) = alpha * B * * Reference: http://www.netlib.org/blas/dtrsm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDtrsmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jchar transa, jchar diag, jint m, jint n, jdouble alpha, jobject A, jint lda, jobject B, jint ldb) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDtrsm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasDtrsm"); return; } double* nativeA; double* nativeB; nativeA = (double*)getPointer(env, A); nativeB = (double*)getPointer(env, B); Logger::log(LOG_TRACE, "Executing cublasDtrsm(%c, %c, %c, %c, %d, %d, %lf, '%s', %d, '%s', %d)\n", side, uplo, transa, diag, m, n, alpha, "A", lda, "B", ldb); cublasDtrsm((char)side, (char)uplo, (char)transa, (char)diag, m, n, alpha, nativeA, lda, nativeB, ldb); } /** *
* void * cublasZtrsm (char side, char uplo, char transa, char diag, int m, int n, * cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, * cuDoubleComplex *B, int ldb) * * solves one of the matrix equations * * op(A) * X = alpha * B, or X * op(A) = alpha * B, * * where alpha is a double precision complex scalar, and X and B are m x n matrices * that are composed of double precision complex elements. A is a unit or non-unit, * upper or lower triangular matrix, and op(A) is one of * * op(A) = A or op(A) = transpose(A) or op( A ) = conj( A' ). * * The result matrix X overwrites input matrix B; that is, on exit the result * is stored in B. Matrices A and B are stored in column major format, and * lda and ldb are the leading dimensions of the two-dimensonials arrays that * contain A and B, respectively. * * Input * ----- * side specifies whether op(A) appears on the left or right of X as * follows: side = 'L' or 'l' indicates solve op(A) * X = alpha * B. * side = 'R' or 'r' indicates solve X * op(A) = alpha * B. * uplo specifies whether the matrix A is an upper or lower triangular * matrix as follows: uplo = 'U' or 'u' indicates A is an upper * triangular matrix. uplo = 'L' or 'l' indicates A is a lower * triangular matrix. * transa specifies the form of op(A) to be used in matrix multiplication * as follows: If transa = 'N' or 'N', then op(A) = A. If transa = * 'T', 't', 'C', or 'c', then op(A) = transpose(A). * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * m specifies the number of rows of B. m must be at least zero. * n specifies the number of columns of B. n must be at least zero. * alpha is a double precision complex scalar to be multiplied with B. When alpha is * zero, then A is not referenced and B need not be set before entry. * A is a double precision complex array of dimensions (lda, k), where k is * m when side = 'L' or 'l', and is n when side = 'R' or 'r'. If * uplo = 'U' or 'u', the leading k x k upper triangular part of * the array A must contain the upper triangular matrix and the * strictly lower triangular matrix of A is not referenced. When * uplo = 'L' or 'l', the leading k x k lower triangular part of * the array A must contain the lower triangular matrix and the * strictly upper triangular part of A is not referenced. Note that * when diag = 'U' or 'u', the diagonal elements of A are not * referenced, and are assumed to be unity. * lda is the leading dimension of the two dimensional array containing A. * When side = 'L' or 'l' then lda must be at least max(1, m), when * side = 'R' or 'r' then lda must be at least max(1, n). * B is a double precision complex array of dimensions (ldb, n). ldb must be * at least max (1,m). The leading m x n part of the array B must * contain the right-hand side matrix B. On exit B is overwritten * by the solution matrix X. * ldb is the leading dimension of the two dimensional array containing B. * ldb must be at least max(1, m). * * Output * ------ * B contains the solution matrix X satisfying op(A) * X = alpha * B, * or X * op(A) = alpha * B * * Reference: http://www.netlib.org/blas/ztrsm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZtrsmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jchar transa, jchar diag, jint m, jint n, jobject alpha, jobject A, jint lda, jobject B, jint ldb) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZtrsm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasZtrsm"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeB; cuDoubleComplex dobuleComplexAlpha; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeB = (cuDoubleComplex*)getPointer(env, B); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZtrsm(%c, %c, %c, %c, %d, %d, [%lf,%lf], '%s', %d, '%s', %d)\n", side, uplo, transa, diag, m, n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "A", lda, "B", ldb); cublasZtrsm((char)side, (char)uplo, (char)transa, (char)diag, m, n, dobuleComplexAlpha, nativeA, lda, nativeB, ldb); } /** *
* void * cublasDtrmm (char side, char uplo, char transa, char diag, int m, int n, * double alpha, const double *A, int lda, const double *B, int ldb) * * performs one of the matrix-matrix operations * * B = alpha * op(A) * B, or B = alpha * B * op(A) * * where alpha is a double-precision scalar, B is an m x n matrix composed * of double precision elements, and A is a unit or non-unit, upper or lower, * triangular matrix composed of double precision elements. op(A) is one of * * op(A) = A or op(A) = transpose(A) * * Matrices A and B are stored in column major format, and lda and ldb are * the leading dimensions of the two-dimensonials arrays that contain A and * B, respectively. * * Input * ----- * side specifies whether op(A) multiplies B from the left or right. * If side = 'L' or 'l', then B = alpha * op(A) * B. If side = * 'R' or 'r', then B = alpha * B * op(A). * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', A is an upper triangular matrix. * If uplo = 'L' or 'l', A is a lower triangular matrix. * transa specifies the form of op(A) to be used in the matrix * multiplication. If transa = 'N' or 'n', then op(A) = A. If * transa = 'T', 't', 'C', or 'c', then op(A) = transpose(A). * diag specifies whether or not A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or * 'n', A is not assumed to be unit triangular. * m the number of rows of matrix B. m must be at least zero. * n the number of columns of matrix B. n must be at least zero. * alpha double precision scalar multiplier applied to op(A)*B, or * B*op(A), respectively. If alpha is zero no accesses are made * to matrix A, and no read accesses are made to matrix B. * A double precision array of dimensions (lda, k). k = m if side = * 'L' or 'l', k = n if side = 'R' or 'r'. If uplo = 'U' or 'u' * the leading k x k upper triangular part of the array A must * contain the upper triangular matrix, and the strictly lower * triangular part of A is not referenced. If uplo = 'L' or 'l' * the leading k x k lower triangular part of the array A must * contain the lower triangular matrix, and the strictly upper * triangular part of A is not referenced. When diag = 'U' or 'u' * the diagonal elements of A are no referenced and are assumed * to be unity. * lda leading dimension of A. When side = 'L' or 'l', it must be at * least max(1,m) and at least max(1,n) otherwise * B double precision array of dimensions (ldb, n). On entry, the * leading m x n part of the array contains the matrix B. It is * overwritten with the transformed matrix on exit. * ldb leading dimension of B. It must be at least max (1, m). * * Output * ------ * B updated according to B = alpha * op(A) * B or B = alpha * B * op(A) * * Reference: http://www.netlib.org/blas/dtrmm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDtrmmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jchar transa, jchar diag, jint m, jint n, jdouble alpha, jobject A, jint lda, jobject B, jint ldb) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDtrmm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasDtrmm"); return; } double* nativeA; double* nativeB; nativeA = (double*)getPointer(env, A); nativeB = (double*)getPointer(env, B); Logger::log(LOG_TRACE, "Executing cublasDtrmm(%c, %c, %c, %c, %d, %d, %lf, '%s', %d, '%s', %d)\n", side, uplo, transa, diag, m, n, alpha, "A", lda, "B", ldb); cublasDtrmm((char)side, (char)uplo, (char)transa, (char)diag, m, n, alpha, nativeA, lda, nativeB, ldb); } /** *
* void * cublasDsymm (char side, char uplo, int m, int n, double alpha, * const double *A, int lda, const double *B, int ldb, * double beta, double *C, int ldc); * * performs one of the matrix-matrix operations * * C = alpha * A * B + beta * C, or * C = alpha * B * A + beta * C, * * where alpha and beta are double precision scalars, A is a symmetric matrix * consisting of double precision elements and stored in either lower or upper * storage mode, and B and C are m x n matrices consisting of double precision * elements. * * Input * ----- * side specifies whether the symmetric matrix A appears on the left side * hand side or right hand side of matrix B, as follows. If side == 'L' * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', * then C = alpha * B * A + beta * C. * uplo specifies whether the symmetric matrix A is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * m specifies the number of rows of the matrix C, and the number of rows * of matrix B. It also specifies the dimensions of symmetric matrix A * when side == 'L' or 'l'. m must be at least zero. * n specifies the number of columns of the matrix C, and the number of * columns of matrix B. It also specifies the dimensions of symmetric * matrix A when side == 'R' or 'r'. n must be at least zero. * alpha double precision scalar multiplier applied to A * B, or B * A * A double precision array of dimensions (lda, ka), where ka is m when * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the * leading m x m part of array A must contain the symmetric matrix, * such that when uplo == 'U' or 'u', the leading m x m part stores the * upper triangular part of the symmetric matrix, and the strictly lower * triangular part of A is not referenced, and when uplo == 'U' or 'u', * the leading m x m part stores the lower triangular part of the * symmetric matrix and the strictly upper triangular part is not * referenced. If side == 'R' or 'r' the leading n x n part of array A * must contain the symmetric matrix, such that when uplo == 'U' or 'u', * the leading n x n part stores the upper triangular part of the * symmetric matrix and the strictly lower triangular part of A is not * referenced, and when uplo == 'U' or 'u', the leading n x n part * stores the lower triangular part of the symmetric matrix and the * strictly upper triangular part is not referenced. * lda leading dimension of A. When side == 'L' or 'l', it must be at least * max(1, m) and at least max(1, n) otherwise. * B double precision array of dimensions (ldb, n). On entry, the leading * m x n part of the array contains the matrix B. * ldb leading dimension of B. It must be at least max (1, m). * beta double precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C double precision array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m) * * Output * ------ * C updated according to C = alpha * A * B + beta * C, or C = alpha * * B * A + beta * C * * Reference: http://www.netlib.org/blas/dsymm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDsymmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jint m, jint n, jdouble alpha, jobject A, jint lda, jobject B, jint ldb, jdouble beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDsymm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasDsymm"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasDsymm"); return; } double* nativeA; double* nativeB; double* nativeC; nativeA = (double*)getPointer(env, A); nativeB = (double*)getPointer(env, B); nativeC = (double*)getPointer(env, C); Logger::log(LOG_TRACE, "Executing cublasDsymm(%c, %c, %d, %d, %lf, '%s', %d, '%s', %d, %lf, '%s', %d)\n", side, uplo, m, n, alpha, "A", lda, "B", ldb, beta, "C", ldc); cublasDsymm((char)side, (char)uplo, m, n, alpha, nativeA, lda, nativeB, ldb, beta, nativeC, ldc); } /** *
* void * cublasZsymm (char side, char uplo, int m, int n, cuDoubleComplex alpha, * const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, * cuDoubleComplex beta, cuDoubleComplex *C, int ldc); * * performs one of the matrix-matrix operations * * C = alpha * A * B + beta * C, or * C = alpha * B * A + beta * C, * * where alpha and beta are double precision complex scalars, A is a symmetric matrix * consisting of double precision complex elements and stored in either lower or upper * storage mode, and B and C are m x n matrices consisting of double precision * complex elements. * * Input * ----- * side specifies whether the symmetric matrix A appears on the left side * hand side or right hand side of matrix B, as follows. If side == 'L' * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', * then C = alpha * B * A + beta * C. * uplo specifies whether the symmetric matrix A is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * m specifies the number of rows of the matrix C, and the number of rows * of matrix B. It also specifies the dimensions of symmetric matrix A * when side == 'L' or 'l'. m must be at least zero. * n specifies the number of columns of the matrix C, and the number of * columns of matrix B. It also specifies the dimensions of symmetric * matrix A when side == 'R' or 'r'. n must be at least zero. * alpha double precision scalar multiplier applied to A * B, or B * A * A double precision array of dimensions (lda, ka), where ka is m when * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the * leading m x m part of array A must contain the symmetric matrix, * such that when uplo == 'U' or 'u', the leading m x m part stores the * upper triangular part of the symmetric matrix, and the strictly lower * triangular part of A is not referenced, and when uplo == 'U' or 'u', * the leading m x m part stores the lower triangular part of the * symmetric matrix and the strictly upper triangular part is not * referenced. If side == 'R' or 'r' the leading n x n part of array A * must contain the symmetric matrix, such that when uplo == 'U' or 'u', * the leading n x n part stores the upper triangular part of the * symmetric matrix and the strictly lower triangular part of A is not * referenced, and when uplo == 'U' or 'u', the leading n x n part * stores the lower triangular part of the symmetric matrix and the * strictly upper triangular part is not referenced. * lda leading dimension of A. When side == 'L' or 'l', it must be at least * max(1, m) and at least max(1, n) otherwise. * B double precision array of dimensions (ldb, n). On entry, the leading * m x n part of the array contains the matrix B. * ldb leading dimension of B. It must be at least max (1, m). * beta double precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C double precision array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m) * * Output * ------ * C updated according to C = alpha * A * B + beta * C, or C = alpha * * B * A + beta * C * * Reference: http://www.netlib.org/blas/zsymm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZsymmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jint m, jint n, jobject alpha, jobject A, jint lda, jobject B, jint ldb, jobject beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZsymm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasZsymm"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasZsymm"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeB; cuDoubleComplex* nativeC; cuDoubleComplex dobuleComplexAlpha; cuDoubleComplex dobuleComplexBeta; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeB = (cuDoubleComplex*)getPointer(env, B); nativeC = (cuDoubleComplex*)getPointer(env, C); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); dobuleComplexBeta.x = env->GetDoubleField(beta, cuDoubleComplex_x); dobuleComplexBeta.y = env->GetDoubleField(beta, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZsymm(%c, %c, %d, %d, [%lf,%lf], '%s', %d, '%s', %d, [%lf,%lf], '%s', %d)\n", side, uplo, m, n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "A", lda, "B", ldb, dobuleComplexBeta.x, dobuleComplexBeta.y, "C", ldc); cublasZsymm((char)side, (char)uplo, m, n, dobuleComplexAlpha, nativeA, lda, nativeB, ldb, dobuleComplexBeta, nativeC, ldc); } /** *
* void * cublasDsyrk (char uplo, char trans, int n, int k, double alpha, * const double *A, int lda, double beta, double *C, int ldc) * * performs one of the symmetric rank k operations * * C = alpha * A * transpose(A) + beta * C, or * C = alpha * transpose(A) * A + beta * C. * * Alpha and beta are double precision scalars. C is an n x n symmetric matrix * consisting of double precision elements and stored in either lower or * upper storage mode. A is a matrix consisting of double precision elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', C = * alpha * transpose(A) + beta * C. If trans == 'T', 't', 'C', or 'c', * C = transpose(A) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha double precision scalar multiplier applied to A * transpose(A) or * transpose(A) * A. * A double precision array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contains the * matrix A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1, k). * beta double precision scalar multiplier applied to C. If beta izs zero, C * does not have to be a valid input * C double precision array of dimensions (ldc, n). If uplo = 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo = 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. It must be at least max(1, n). * * Output * ------ * C updated according to C = alpha * A * transpose(A) + beta * C, or C = * alpha * transpose(A) * A + beta * C * * Reference: http://www.netlib.org/blas/dsyrk.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDsyrkNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jint n, jint k, jdouble alpha, jobject A, jint lda, jdouble beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDsyrk"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasDsyrk"); return; } double* nativeA; double* nativeC; nativeA = (double*)getPointer(env, A); nativeC = (double*)getPointer(env, C); Logger::log(LOG_TRACE, "Executing cublasDsyrk(%c, %c, %d, %d, %lf, '%s', %d, %lf, '%s', %d)\n", uplo, trans, n, k, alpha, "A", lda, beta, "C", ldc); cublasDsyrk((char)uplo, (char)trans, n, k, alpha, nativeA, lda, beta, nativeC, ldc); } /** *
* void * cublasZsyrk (char uplo, char trans, int n, int k, cuDoubleComplex alpha, * const cuDoubleComplex *A, int lda, cuDoubleComplex beta, cuDoubleComplex *C, int ldc) * * performs one of the symmetric rank k operations * * C = alpha * A * transpose(A) + beta * C, or * C = alpha * transpose(A) * A + beta * C. * * Alpha and beta are double precision complex scalars. C is an n x n symmetric matrix * consisting of double precision complex elements and stored in either lower or * upper storage mode. A is a matrix consisting of double precision complex elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', C = * alpha * transpose(A) + beta * C. If trans == 'T', 't', 'C', or 'c', * C = transpose(A) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha double precision complex scalar multiplier applied to A * transpose(A) or * transpose(A) * A. * A double precision complex array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contains the * matrix A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1, k). * beta double precision complex scalar multiplier applied to C. If beta izs zero, C * does not have to be a valid input * C double precision complex array of dimensions (ldc, n). If uplo = 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo = 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. It must be at least max(1, n). * * Output * ------ * C updated according to C = alpha * A * transpose(A) + beta * C, or C = * alpha * transpose(A) * A + beta * C * * Reference: http://www.netlib.org/blas/zsyrk.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZsyrkNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jint n, jint k, jobject alpha, jobject A, jint lda, jobject beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZsyrk"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasZsyrk"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeC; cuDoubleComplex dobuleComplexAlpha; cuDoubleComplex dobuleComplexBeta; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeC = (cuDoubleComplex*)getPointer(env, C); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); dobuleComplexBeta.x = env->GetDoubleField(beta, cuDoubleComplex_x); dobuleComplexBeta.y = env->GetDoubleField(beta, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZsyrk(%c, %c, %d, %d, [%lf,%lf], '%s', %d, [%lf,%lf], '%s', %d)\n", uplo, trans, n, k, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "A", lda, dobuleComplexBeta.x, dobuleComplexBeta.y, "C", ldc); cublasZsyrk((char)uplo, (char)trans, n, k, dobuleComplexAlpha, nativeA, lda, dobuleComplexBeta, nativeC, ldc); } /** *
* void * cublasZsyr2k (char uplo, char trans, int n, int k, cuDoubleComplex alpha, * const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, * cuDoubleComplex beta, cuDoubleComplex *C, int ldc) * * performs one of the symmetric rank 2k operations * * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, or * C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C. * * Alpha and beta are double precision complex scalars. C is an n x n symmetric matrix * consisting of double precision complex elements and stored in either lower or upper * storage mode. A and B are matrices consisting of double precision complex elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be references, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, * If trans == 'T', 't', 'C', or 'c', C = alpha * transpose(A) * B + * alpha * transpose(B) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha double precision scalar multiplier. * A double precision array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contain the matrix * A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1,k). * B double precision array of dimensions (lda, kb), where kb is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array B must contain the matrix B, * otherwise the leading k x n part of the array must contain the matrix * B. * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be at * least max(1, n). Otherwise ldb must be at least max(1, k). * beta double precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C double precision array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. Must be at least max(1, n). * * Output * ------ * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A) + * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C * * Reference: http://www.netlib.org/blas/zsyr2k.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZsyr2kNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jint n, jint k, jobject alpha, jobject A, jint lda, jobject B, jint ldb, jobject beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZsyr2k"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasZsyr2k"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasZsyr2k"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeB; cuDoubleComplex* nativeC; cuDoubleComplex dobuleComplexAlpha; cuDoubleComplex dobuleComplexBeta; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeB = (cuDoubleComplex*)getPointer(env, B); nativeC = (cuDoubleComplex*)getPointer(env, C); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); dobuleComplexBeta.x = env->GetDoubleField(beta, cuDoubleComplex_x); dobuleComplexBeta.y = env->GetDoubleField(beta, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZsyr2k(%c, %c, %d, %d, [%lf,%lf], '%s', %d, '%s', %d, [%lf,%lf], '%s', %d)\n", uplo, trans, n, k, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "A", lda, "B", ldb, dobuleComplexBeta.x, dobuleComplexBeta.y, "C", ldc); cublasZsyr2k((char)uplo, (char)trans, n, k, dobuleComplexAlpha, nativeA, lda, nativeB, ldb, dobuleComplexBeta, nativeC, ldc); } /** *
* void * cublasZher2k (char uplo, char trans, int n, int k, cuDoubleComplex alpha, * const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, * double beta, cuDoubleComplex *C, int ldc) * * performs one of the hermitian rank 2k operations * * C = alpha * A * conjugate(transpose(B)) * + conjugate(alpha) * B * conjugate(transpose(A)) * + beta * C , * or * C = alpha * conjugate(transpose(A)) * B * + conjugate(alpha) * conjugate(transpose(B)) * A * + beta * C. * * Alpha is double precision complex scalar whereas Beta is a double precision real scalar. * C is an n x n hermitian matrix consisting of double precision complex elements and * stored in either lower or upper storage mode. A and B are matrices consisting of * double precision complex elements with dimension of n x k in the first case, * and k x n in the second case. * * Input * ----- * uplo specifies whether the hermitian matrix C is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the hermitian matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the hermitian matrix is to be references, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', * C = alpha * A * conjugate(transpose(B)) * + conjugate(alpha) * B * conjugate(transpose(A)) * + beta * C . * If trans == 'T', 't', 'C', or 'c', * C = alpha * conjugate(transpose(A)) * B * + conjugate(alpha) * conjugate(transpose(B)) * A * + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha double precision scalar multiplier. * A double precision array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contain the matrix * A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1,k). * B double precision array of dimensions (lda, kb), where kb is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array B must contain the matrix B, * otherwise the leading k x n part of the array must contain the matrix * B. * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be at * least max(1, n). Otherwise ldb must be at least max(1, k). * beta double precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C double precision array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the hermitian matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the hermitian matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero, and on exit they * are set to zero. * ldc leading dimension of C. Must be at least max(1, n). * * Output * ------ * C updated according to alpha*A*conjugate(transpose(B)) + * + conjugate(alpha)*B*conjugate(transpose(A)) + beta*C or * alpha*conjugate(transpose(A))*B + conjugate(alpha)*conjugate(transpose(B))*A * + beta*C. * * Reference: http://www.netlib.org/blas/zher2k.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZher2kNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jint n, jint k, jobject alpha, jobject A, jint lda, jobject B, jint ldb, jdouble beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZher2k"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasZher2k"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasZher2k"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeB; cuDoubleComplex* nativeC; cuDoubleComplex dobuleComplexAlpha; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeB = (cuDoubleComplex*)getPointer(env, B); nativeC = (cuDoubleComplex*)getPointer(env, C); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZher2k(%c, %c, %d, %d, [%lf,%lf], '%s', %d, '%s', %d, %lf, '%s', %d)\n", uplo, trans, n, k, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "A", lda, "B", ldb, beta, "C", ldc); cublasZher2k((char)uplo, (char)trans, n, k, dobuleComplexAlpha, nativeA, lda, nativeB, ldb, beta, nativeC, ldc); } /** *
* void * cublasZher (char uplo, int n, double alpha, const cuDoubleComplex *x, int incx, * cuDoubleComplex *A, int lda) * * performs the hermitian rank 1 operation * * A = alpha * x * conjugate(transpose(x)) + A, * * where alpha is a double precision real scalar, x is an n element double * precision complex vector and A is an n x n hermitian matrix consisting of * double precision complex elements. Matrix A is stored in column major format, * and lda is the leading dimension of the two-dimensional array * containing A. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or * the lower triangular part of array A. If uplo = 'U' or 'u', * then only the upper triangular part of A may be referenced. * If uplo = 'L' or 'l', then only the lower triangular part of * A may be referenced. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * alpha double precision real scalar multiplier applied to * x * conjugate(transpose(x)) * x double precision complex array of length at least (1 + (n - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must * not be zero. * A double precision complex array of dimensions (lda, n). If uplo = 'U' or * 'u', then A must contain the upper triangular part of a hermitian * matrix, and the strictly lower triangular part is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part * of a hermitian matrix, and the strictly upper triangular part is * not referenced. The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero, and on exit they * are set to zero. * lda leading dimension of the two-dimensional array containing A. lda * must be at least max(1, n). * * Output * ------ * A updated according to A = alpha * x * conjugate(transpose(x)) + A * * Reference: http://www.netlib.org/blas/zher.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZherNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jdouble alpha, jobject x, jint incx, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZher"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZher"); return; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeA; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeA = (cuDoubleComplex*)getPointer(env, A); Logger::log(LOG_TRACE, "Executing cublasZher(%c, %d, %lf, '%s', %d, '%s', %d)\n", uplo, n, alpha, "x", incx, "A", lda); cublasZher((char)uplo, n, alpha, nativeX, incx, nativeA, lda); } /** *
* void * cublasZhpr (char uplo, int n, double alpha, const cuDoubleComplex *x, int incx, * cuDoubleComplex *AP) * * performs the hermitian rank 1 operation * * A = alpha * x * conjugate(transpose(x)) + A, * * where alpha is a double precision real scalar and x is an n element double * precision complex vector. A is a hermitian n x n matrix consisting of double * precision complex elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision real scalar multiplier applied to x * conjugate(transpose(x)). * x double precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * AP double precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * The imaginary parts of the diagonal elements need not be set, they * are assumed to be zero, and on exit they are set to zero. * * Output * ------ * A updated according to A = alpha * x * conjugate(transpose(x)) + A * * Reference: http://www.netlib.org/blas/zhpr.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZhprNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jdouble alpha, jobject x, jint incx, jobject AP) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZhpr"); return; } if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasZhpr"); return; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeAP; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeAP = (cuDoubleComplex*)getPointer(env, AP); Logger::log(LOG_TRACE, "Executing cublasZhpr(%c, %d, %lf, '%s', %d, '%s')\n", uplo, n, alpha, "x", incx, "AP"); cublasZhpr((char)uplo, n, alpha, nativeX, incx, nativeAP); } /** *
* void * cublasZhpr2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx, * const cuDoubleComplex *y, int incy, cuDoubleComplex *AP) * * performs the hermitian rank 2 operation * * A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(transpose(x)) + A, * * where alpha is a double precision complex scalar, and x and y are n element double * precision complex vectors. A is a hermitian n x n matrix consisting of double * precision complex elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision complex scalar multiplier applied to x * conjugate(transpose(y)) + * y * conjugate(transpose(x)). * x double precision complex array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y double precision complex array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * AP double precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * The imaginary parts of the diagonal elements need not be set, they * are assumed to be zero, and on exit they are set to zero. * * Output * ------ * A updated according to A = alpha*x*conjugate(transpose(y)) * + conjugate(alpha)*y*conjugate(transpose(x))+A * * Reference: http://www.netlib.org/blas/zhpr2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZhpr2Native (JNIEnv *env, jclass cls, jchar uplo, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy, jobject AP) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZhpr2"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZhpr2"); return; } if (AP == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'AP' is null for cublasZhpr2"); return; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; cuDoubleComplex* nativeAP; cuDoubleComplex dobuleComplexAlpha; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); nativeAP = (cuDoubleComplex*)getPointer(env, AP); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZhpr2(%c, %d, [%lf,%lf], '%s', %d, '%s', %d, '%s')\n", uplo, n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "x", incx, "y", incy, "AP"); cublasZhpr2((char)uplo, n, dobuleComplexAlpha, nativeX, incx, nativeY, incy, nativeAP); } /** *
* void cublasZher2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx, * const cuDoubleComplex *y, int incy, cuDoubleComplex *A, int lda) * * performs the hermitian rank 2 operation * * A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(transpose(x)) + A, * * where alpha is a double precision complex scalar, x and y are n element double * precision complex vector and A is an n by n hermitian matrix consisting of double * precision complex elements. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision complex scalar multiplier applied to x * conjugate(transpose(y)) + * y * conjugate(transpose(x)). * x double precision array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y double precision array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * A double precision complex array of dimensions (lda, n). If uplo == 'U' or 'u', * then A must contains the upper triangular part of a hermitian matrix, * and the strictly lower triangular parts is not referenced. If uplo == * 'L' or 'l', then A contains the lower triangular part of a hermitian * matrix, and the strictly upper triangular part is not referenced. * The imaginary parts of the diagonal elements need not be set, * they are assumed to be zero, and on exit they are set to zero. * * lda leading dimension of A. It must be at least max(1, n). * * Output * ------ * A updated according to A = alpha*x*conjugate(transpose(y)) * + conjugate(alpha)*y*conjugate(transpose(x))+A * * Reference: http://www.netlib.org/blas/zher2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZher2Native (JNIEnv *env, jclass cls, jchar uplo, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZher2"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZher2"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZher2"); return; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; cuDoubleComplex* nativeA; cuDoubleComplex dobuleComplexAlpha; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); nativeA = (cuDoubleComplex*)getPointer(env, A); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZher2(%c, %d, [%lf,%lf], '%s', %d, '%s', %d, '%s', %d)\n", uplo, n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "x", incx, "y", incy, "A", lda); cublasZher2((char)uplo, n, dobuleComplexAlpha, nativeX, incx, nativeY, incy, nativeA, lda); } /** *
* void * cublasDsyr2k (char uplo, char trans, int n, int k, double alpha, * const double *A, int lda, const double *B, int ldb, * double beta, double *C, int ldc) * * performs one of the symmetric rank 2k operations * * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, or * C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C. * * Alpha and beta are double precision scalars. C is an n x n symmetric matrix * consisting of double precision elements and stored in either lower or upper * storage mode. A and B are matrices consisting of double precision elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be references, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, * If trans == 'T', 't', 'C', or 'c', C = alpha * transpose(A) * B + * alpha * transpose(B) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha double precision scalar multiplier. * A double precision array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contain the matrix * A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1,k). * B double precision array of dimensions (lda, kb), where kb is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array B must contain the matrix B, * otherwise the leading k x n part of the array must contain the matrix * B. * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be at * least max(1, n). Otherwise ldb must be at least max(1, k). * beta double precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C double precision array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. Must be at least max(1, n). * * Output * ------ * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A) + * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C * * Reference: http://www.netlib.org/blas/dsyr2k.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasDsyr2kNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jint n, jint k, jdouble alpha, jobject A, jint lda, jobject B, jint ldb, jdouble beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasDsyr2k"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasDsyr2k"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasDsyr2k"); return; } double* nativeA; double* nativeB; double* nativeC; nativeA = (double*)getPointer(env, A); nativeB = (double*)getPointer(env, B); nativeC = (double*)getPointer(env, C); Logger::log(LOG_TRACE, "Executing cublasDsyr2k(%c, %c, %d, %d, %lf, '%s', %d, '%s', %d, %lf, '%s', %d)\n", uplo, trans, n, k, alpha, "A", lda, "B", ldb, beta, "C", ldc); cublasDsyr2k((char)uplo, (char)trans, n, k, alpha, nativeA, lda, nativeB, ldb, beta, nativeC, ldc); } /** *
* void cublasZgemm (char transa, char transb, int m, int n, int k, * cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, * const cuDoubleComplex *B, int ldb, cuDoubleComplex beta, * cuDoubleComplex *C, int ldc) * * zgemm performs one of the matrix-matrix operations * * C = alpha * op(A) * op(B) + beta*C, * * where op(X) is one of * * op(X) = X or op(X) = transpose or op(X) = conjg(transpose(X)) * * alpha and beta are double-complex scalars, and A, B and C are matrices * consisting of double-complex elements, with op(A) an m x k matrix, op(B) * a k x n matrix and C an m x n matrix. * * Input * ----- * transa specifies op(A). If transa == 'N' or 'n', op(A) = A. If transa == * 'T' or 't', op(A) = transpose(A). If transa == 'C' or 'c', op(A) = * conjg(transpose(A)). * transb specifies op(B). If transa == 'N' or 'n', op(B) = B. If transb == * 'T' or 't', op(B) = transpose(B). If transb == 'C' or 'c', op(B) = * conjg(transpose(B)). * m number of rows of matrix op(A) and rows of matrix C. It must be at * least zero. * n number of columns of matrix op(B) and number of columns of C. It * must be at least zero. * k number of columns of matrix op(A) and number of rows of op(B). It * must be at least zero. * alpha double-complex scalar multiplier applied to op(A)op(B) * A double-complex array of dimensions (lda, k) if transa == 'N' or * 'n'), and of dimensions (lda, m) otherwise. * lda leading dimension of A. When transa == 'N' or 'n', it must be at * least max(1, m) and at least max(1, k) otherwise. * B double-complex array of dimensions (ldb, n) if transb == 'N' or 'n', * and of dimensions (ldb, k) otherwise * ldb leading dimension of B. When transb == 'N' or 'n', it must be at * least max(1, k) and at least max(1, n) otherwise. * beta double-complex scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C double precision array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m). * * Output * ------ * C updated according to C = alpha*op(A)*op(B) + beta*C * * Reference: http://www.netlib.org/blas/zgemm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if any of m, n, or k are < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZgemmNative (JNIEnv *env, jclass cls, jchar transa, jchar transb, jint m, jint n, jint k, jobject alpha, jobject A, jint lda, jobject B, jint ldb, jobject beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZgemm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasZgemm"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasZgemm"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeB; cuDoubleComplex* nativeC; cuDoubleComplex dobuleComplexAlpha; cuDoubleComplex dobuleComplexBeta; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeB = (cuDoubleComplex*)getPointer(env, B); nativeC = (cuDoubleComplex*)getPointer(env, C); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); dobuleComplexBeta.x = env->GetDoubleField(beta, cuDoubleComplex_x); dobuleComplexBeta.y = env->GetDoubleField(beta, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZgemm(%c, %c, %d, %d, %d, [%lf,%lf], '%s', %d, '%s', %d, [%lf,%lf], '%s', %d)\n", transa, transb, m, n, k, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "A", lda, "B", ldb, dobuleComplexBeta.x, dobuleComplexBeta.y, "C", ldc); cublasZgemm((char)transa, (char)transb, m, n, k, dobuleComplexAlpha, nativeA, lda, nativeB, ldb, dobuleComplexBeta, nativeC, ldc); } /** *
* void * cublasZtrmm (char side, char uplo, char transa, char diag, int m, int n, * cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, * int ldb) * * performs one of the matrix-matrix operations * * B = alpha * op(A) * B, or B = alpha * B * op(A) * * where alpha is a double-precision complex scalar, B is an m x n matrix composed * of double precision complex elements, and A is a unit or non-unit, upper or lower, * triangular matrix composed of double precision complex elements. op(A) is one of * * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)) * * Matrices A and B are stored in column major format, and lda and ldb are * the leading dimensions of the two-dimensonials arrays that contain A and * B, respectively. * * Input * ----- * side specifies whether op(A) multiplies B from the left or right. * If side = 'L' or 'l', then B = alpha * op(A) * B. If side = * 'R' or 'r', then B = alpha * B * op(A). * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', A is an upper triangular matrix. * If uplo = 'L' or 'l', A is a lower triangular matrix. * transa specifies the form of op(A) to be used in the matrix * multiplication. If transa = 'N' or 'n', then op(A) = A. If * transa = 'T' or 't', then op(A) = transpose(A). * If transa = 'C' or 'c', then op(A) = conjugate(transpose(A)). * diag specifies whether or not A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or * 'n', A is not assumed to be unit triangular. * m the number of rows of matrix B. m must be at least zero. * n the number of columns of matrix B. n must be at least zero. * alpha double precision complex scalar multiplier applied to op(A)*B, or * B*op(A), respectively. If alpha is zero no accesses are made * to matrix A, and no read accesses are made to matrix B. * A double precision complex array of dimensions (lda, k). k = m if side = * 'L' or 'l', k = n if side = 'R' or 'r'. If uplo = 'U' or 'u' * the leading k x k upper triangular part of the array A must * contain the upper triangular matrix, and the strictly lower * triangular part of A is not referenced. If uplo = 'L' or 'l' * the leading k x k lower triangular part of the array A must * contain the lower triangular matrix, and the strictly upper * triangular part of A is not referenced. When diag = 'U' or 'u' * the diagonal elements of A are no referenced and are assumed * to be unity. * lda leading dimension of A. When side = 'L' or 'l', it must be at * least max(1,m) and at least max(1,n) otherwise * B double precision complex array of dimensions (ldb, n). On entry, the * leading m x n part of the array contains the matrix B. It is * overwritten with the transformed matrix on exit. * ldb leading dimension of B. It must be at least max (1, m). * * Output * ------ * B updated according to B = alpha * op(A) * B or B = alpha * B * op(A) * * Reference: http://www.netlib.org/blas/ztrmm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZtrmmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jchar transa, jchar diag, jint m, jint n, jobject alpha, jobject A, jint lda, jobject B, jint ldb) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZtrmm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasZtrmm"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeB; cuDoubleComplex dobuleComplexAlpha; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeB = (cuDoubleComplex*)getPointer(env, B); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZtrmm(%c, %c, %c, %c, %d, %d, [%lf,%lf], '%s', %d, '%s', %d)\n", side, uplo, transa, diag, m, n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "A", lda, "B", ldb); cublasZtrmm((char)side, (char)uplo, (char)transa, (char)diag, m, n, dobuleComplexAlpha, nativeA, lda, nativeB, ldb); } /** *
* cublasZgeru (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx, * const cuDoubleComplex *y, int incy, cuDoubleComplex *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(y) + A, * * where alpha is a double precision complex scalar, x is an m element double * precision complex vector, y is an n element double precision complex vector, and A * is an m by n matrix consisting of double precision complex elements. Matrix A * is stored in column major format, and lda is the leading dimension of * the two-dimensional array used to store A. * * Input * ----- * m specifies the number of rows of the matrix A. It must be at least * zero. * n specifies the number of columns of the matrix A. It must be at * least zero. * alpha double precision complex scalar multiplier applied to x * transpose(y) * x double precision complex array of length at least (1 + (m - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must not * be zero. * y double precision complex array of length at least (1 + (n - 1) * abs(incy)) * incy specifies the storage spacing between elements of y. incy must not * be zero. * A double precision complex array of dimensions (lda, n). * lda leading dimension of two-dimensional array used to store matrix A * * Output * ------ * A updated according to A = alpha * x * transpose(y) + A * * Reference: http://www.netlib.org/blas/zgeru.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m < 0, n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZgeruNative (JNIEnv *env, jclass cls, jint m, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZgeru"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZgeru"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZgeru"); return; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; cuDoubleComplex* nativeA; cuDoubleComplex dobuleComplexAlpha; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); nativeA = (cuDoubleComplex*)getPointer(env, A); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZgeru(%d, %d, [%lf,%lf], '%s', %d, '%s', %d, '%s', %d)\n", m, n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "x", incx, "y", incy, "A", lda); cublasZgeru(m, n, dobuleComplexAlpha, nativeX, incx, nativeY, incy, nativeA, lda); } /** *
* cublasZgerc (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx, * const cuDoubleComplex *y, int incy, cuDoubleComplex *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * conjugate(transpose(y)) + A, * * where alpha is a double precision complex scalar, x is an m element double * precision complex vector, y is an n element double precision complex vector, and A * is an m by n matrix consisting of double precision complex elements. Matrix A * is stored in column major format, and lda is the leading dimension of * the two-dimensional array used to store A. * * Input * ----- * m specifies the number of rows of the matrix A. It must be at least * zero. * n specifies the number of columns of the matrix A. It must be at * least zero. * alpha double precision complex scalar multiplier applied to x * conjugate(transpose(y)) * x double precision array of length at least (1 + (m - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must not * be zero. * y double precision complex array of length at least (1 + (n - 1) * abs(incy)) * incy specifies the storage spacing between elements of y. incy must not * be zero. * A double precision complex array of dimensions (lda, n). * lda leading dimension of two-dimensional array used to store matrix A * * Output * ------ * A updated according to A = alpha * x * conjugate(transpose(y)) + A * * Reference: http://www.netlib.org/blas/zgerc.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m < 0, n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZgercNative (JNIEnv *env, jclass cls, jint m, jint n, jobject alpha, jobject x, jint incx, jobject y, jint incy, jobject A, jint lda) { if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZgerc"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZgerc"); return; } if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZgerc"); return; } cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; cuDoubleComplex* nativeA; cuDoubleComplex dobuleComplexAlpha; nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); nativeA = (cuDoubleComplex*)getPointer(env, A); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZgerc(%d, %d, [%lf,%lf], '%s', %d, '%s', %d, '%s', %d)\n", m, n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "x", incx, "y", incy, "A", lda); cublasZgerc(m, n, dobuleComplexAlpha, nativeX, incx, nativeY, incy, nativeA, lda); } /** *
* void * cublasZherk (char uplo, char trans, int n, int k, double alpha, * const cuDoubleComplex *A, int lda, double beta, cuDoubleComplex *C, int ldc) * * performs one of the hermitian rank k operations * * C = alpha * A * conjugate(transpose(A)) + beta * C, or * C = alpha * conjugate(transpose(A)) * A + beta * C. * * Alpha and beta are double precision scalars. C is an n x n hermitian matrix * consisting of double precision complex elements and stored in either lower or * upper storage mode. A is a matrix consisting of double precision complex elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the hermitian matrix C is stored in upper or lower * storage mode as follows. If uplo == 'U' or 'u', only the upper * triangular part of the hermitian matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the hermitian matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', C = * alpha * A * conjugate(transpose(A)) + beta * C. If trans == 'T', 't', 'C', or 'c', * C = alpha * conjugate(transpose(A)) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of columns of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha double precision scalar multiplier applied to A * conjugate(transpose(A)) or * conjugate(transpose(A)) * A. * A double precision complex array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contains the * matrix A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1, k). * beta double precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C double precision complex array of dimensions (ldc, n). If uplo = 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the hermitian matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo = 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the hermitian matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero, and on exit they * are set to zero. * ldc leading dimension of C. It must be at least max(1, n). * * Output * ------ * C updated according to C = alpha * A * conjugate(transpose(A)) + beta * C, or C = * alpha * conjugate(transpose(A)) * A + beta * C * * Reference: http://www.netlib.org/blas/zherk.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZherkNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jint n, jint k, jdouble alpha, jobject A, jint lda, jdouble beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZherk"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasZherk"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeC; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeC = (cuDoubleComplex*)getPointer(env, C); Logger::log(LOG_TRACE, "Executing cublasZherk(%c, %c, %d, %d, %lf, '%s', %d, %lf, '%s', %d)\n", uplo, trans, n, k, alpha, "A", lda, beta, "C", ldc); cublasZherk((char)uplo, (char)trans, n, k, alpha, nativeA, lda, beta, nativeC, ldc); } /** *
* void * cublasZhemm (char side, char uplo, int m, int n, cuDoubleComplex alpha, * const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, * cuDoubleComplex beta, cuDoubleComplex *C, int ldc); * * performs one of the matrix-matrix operations * * C = alpha * A * B + beta * C, or * C = alpha * B * A + beta * C, * * where alpha and beta are double precision complex scalars, A is a hermitian matrix * consisting of double precision complex elements and stored in either lower or upper * storage mode, and B and C are m x n matrices consisting of double precision * complex elements. * * Input * ----- * side specifies whether the hermitian matrix A appears on the left side * hand side or right hand side of matrix B, as follows. If side == 'L' * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', * then C = alpha * B * A + beta * C. * uplo specifies whether the hermitian matrix A is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the hermitian matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the hermitian matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * m specifies the number of rows of the matrix C, and the number of rows * of matrix B. It also specifies the dimensions of hermitian matrix A * when side == 'L' or 'l'. m must be at least zero. * n specifies the number of columns of the matrix C, and the number of * columns of matrix B. It also specifies the dimensions of hermitian * matrix A when side == 'R' or 'r'. n must be at least zero. * alpha double precision scalar multiplier applied to A * B, or B * A * A double precision complex array of dimensions (lda, ka), where ka is m when * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the * leading m x m part of array A must contain the hermitian matrix, * such that when uplo == 'U' or 'u', the leading m x m part stores the * upper triangular part of the hermitian matrix, and the strictly lower * triangular part of A is not referenced, and when uplo == 'U' or 'u', * the leading m x m part stores the lower triangular part of the * hermitian matrix and the strictly upper triangular part is not * referenced. If side == 'R' or 'r' the leading n x n part of array A * must contain the hermitian matrix, such that when uplo == 'U' or 'u', * the leading n x n part stores the upper triangular part of the * hermitian matrix and the strictly lower triangular part of A is not * referenced, and when uplo == 'U' or 'u', the leading n x n part * stores the lower triangular part of the hermitian matrix and the * strictly upper triangular part is not referenced. The imaginary parts * of the diagonal elements need not be set, they are assumed to be zero. * * lda leading dimension of A. When side == 'L' or 'l', it must be at least * max(1, m) and at least max(1, n) otherwise. * B double precision complex array of dimensions (ldb, n). On entry, the leading * m x n part of the array contains the matrix B. * ldb leading dimension of B. It must be at least max (1, m). * beta double precision complex scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C double precision complex array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m) * * Output * ------ * C updated according to C = alpha * A * B + beta * C, or C = alpha * * B * A + beta * C * * Reference: http://www.netlib.org/blas/zhemm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZhemmNative (JNIEnv *env, jclass cls, jchar side, jchar uplo, jint m, jint n, jobject alpha, jobject A, jint lda, jobject B, jint ldb, jobject beta, jobject C, jint ldc) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZhemm"); return; } if (B == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'B' is null for cublasZhemm"); return; } if (C == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'C' is null for cublasZhemm"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeB; cuDoubleComplex* nativeC; cuDoubleComplex dobuleComplexAlpha; cuDoubleComplex dobuleComplexBeta; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeB = (cuDoubleComplex*)getPointer(env, B); nativeC = (cuDoubleComplex*)getPointer(env, C); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); dobuleComplexBeta.x = env->GetDoubleField(beta, cuDoubleComplex_x); dobuleComplexBeta.y = env->GetDoubleField(beta, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZhemm(%c, %c, %d, %d, [%lf,%lf], '%s', %d, '%s', %d, [%lf,%lf], '%s', %d)\n", side, uplo, m, n, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "A", lda, "B", ldb, dobuleComplexBeta.x, dobuleComplexBeta.y, "C", ldc); cublasZhemm((char)side, (char)uplo, m, n, dobuleComplexAlpha, nativeA, lda, nativeB, ldb, dobuleComplexBeta, nativeC, ldc); } /** *
* void * cublasZtrsv (char uplo, char trans, char diag, int n, const cuDoubleComplex *A, * int lda, cuDoubleComplex *x, int incx) * * solves a system of equations op(A) * x = b, where op(A) is either A, * transpose(A) or conjugate(transpose(A)). b and x are double precision * complex vectors consisting of n elements, and A is an n x n matrix * composed of a unit or non-unit, upper or lower triangular matrix. * Matrix A is stored in column major format, and lda is the leading * dimension of the two-dimensional array containing A. * * No test for singularity or near-singularity is included in this function. * Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the * lower triangular part of array A. If uplo = 'U' or 'u', then only * the upper triangular part of A may be referenced. If uplo = 'L' or * 'l', then only the lower triangular part of A may be referenced. * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = 't', * 'T', 'c', or 'C', op(A) = transpose(A) * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * A is a double precision complex array of dimensions (lda, n). If uplo = 'U' * or 'u', then A must contains the upper triangular part of a symmetric * matrix, and the strictly lower triangular parts is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part of * a symmetric matrix, and the strictly upper triangular part is not * referenced. * lda is the leading dimension of the two-dimensional array containing A. * lda must be at least max(1, n). * x double precision complex array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the n element right-hand side vector b. On exit, * it is overwritten with the solution vector x. * incx specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/ztrsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZtrsvNative (JNIEnv *env, jclass cls, jchar uplo, jchar trans, jchar diag, jint n, jobject A, jint lda, jobject x, jint incx) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZtrsv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZtrsv"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeX; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeX = (cuDoubleComplex*)getPointer(env, x); Logger::log(LOG_TRACE, "Executing cublasZtrsv(%c, %c, %c, %d, '%s', %d, '%s', %d)\n", uplo, trans, diag, n, "A", lda, "x", incx); cublasZtrsv((char)uplo, (char)trans, (char)diag, n, nativeA, lda, nativeX, incx); } /** *
* void * cublasZhbmv (char uplo, int n, int k, cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, * const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy) * * performs the matrix-vector operation * * y := alpha*A*x + beta*y * * alpha and beta are double precision complex scalars. x and y are double precision * complex vectors with n elements. A is an n by n hermitian band matrix consisting * of double precision complex elements, with k super-diagonals and the same number * of subdiagonals. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the hermitian * band matrix A is being supplied. If uplo == 'U' or 'u', the upper * triangular part is being supplied. If uplo == 'L' or 'l', the lower * triangular part is being supplied. * n specifies the number of rows and the number of columns of the * hermitian matrix A. n must be at least zero. * k specifies the number of super-diagonals of matrix A. Since the matrix * is hermitian, this is also the number of sub-diagonals. k must be at * least zero. * alpha double precision complex scalar multiplier applied to A*x. * A double precision complex array of dimensions (lda, n). When uplo == 'U' or * 'u', the leading (k + 1) x n part of array A must contain the upper * triangular band of the hermitian matrix, supplied column by column, * with the leading diagonal of the matrix in row (k+1) of the array, * the first super-diagonal starting at position 2 in row k, and so on. * The top left k x k triangle of the array A is not referenced. When * uplo == 'L' or 'l', the leading (k + 1) x n part of the array A must * contain the lower triangular band part of the hermitian matrix, * supplied column by column, with the leading diagonal of the matrix in * row 1 of the array, the first sub-diagonal starting at position 1 in * row 2, and so on. The bottom right k x k triangle of the array A is * not referenced. The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero. * lda leading dimension of A. lda must be at least (k + 1). * x double precision complex array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta double precision complex scalar multiplier applied to vector y. If beta is * zero, y is not read. * y double precision complex array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/zhbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if k or n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ JNIEXPORT void JNICALL Java_jcuda_jcublas_JCublas_cublasZhbmvNative (JNIEnv *env, jclass cls, jchar uplo, jint n, jint k, jobject alpha, jobject A, jint lda, jobject x, jint incx, jobject beta, jobject y, jint incy) { if (A == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'A' is null for cublasZhbmv"); return; } if (x == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'x' is null for cublasZhbmv"); return; } if (y == NULL) { ThrowByName(env, "java/lang/NullPointerException", "Parameter 'y' is null for cublasZhbmv"); return; } cuDoubleComplex* nativeA; cuDoubleComplex* nativeX; cuDoubleComplex* nativeY; cuDoubleComplex dobuleComplexAlpha; cuDoubleComplex dobuleComplexBeta; nativeA = (cuDoubleComplex*)getPointer(env, A); nativeX = (cuDoubleComplex*)getPointer(env, x); nativeY = (cuDoubleComplex*)getPointer(env, y); dobuleComplexAlpha.x = env->GetDoubleField(alpha, cuDoubleComplex_x); dobuleComplexAlpha.y = env->GetDoubleField(alpha, cuDoubleComplex_y); dobuleComplexBeta.x = env->GetDoubleField(beta, cuDoubleComplex_x); dobuleComplexBeta.y = env->GetDoubleField(beta, cuDoubleComplex_y); Logger::log(LOG_TRACE, "Executing cublasZhbmv(%c, %d, %d, [%lf,%lf], '%s', %d, '%s', %d, [%lf,%lf], '%s', %d)\n", uplo, n, k, dobuleComplexAlpha.x, dobuleComplexAlpha.y, "A", lda, "x", incx, dobuleComplexBeta.x, dobuleComplexBeta.y, "y", incy); cublasZhbmv((char)uplo, n, k, dobuleComplexAlpha, nativeA, lda, nativeX, incx, dobuleComplexBeta, nativeY, incy); }
© 2015 - 2025 Weber Informatics LLC | Privacy Policy