jcuda.jcublas.JCublas Maven / Gradle / Ivy
/*
*
* * Copyright 2015 Skymind,Inc.
* *
* * Licensed under the Apache License, Version 2.0 (the "License");
* * you may not use this file except in compliance with the License.
* * You may obtain a copy of the License at
* *
* * http://www.apache.org/licenses/LICENSE-2.0
* *
* * Unless required by applicable law or agreed to in writing, software
* * distributed under the License is distributed on an "AS IS" BASIS,
* * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* * See the License for the specific language governing permissions and
* * limitations under the License.
*
*
*/
package jcuda.jcublas;
import jcuda.*;
import jcuda.runtime.cudaStream_t;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.DoubleBuffer;
import java.nio.FloatBuffer;
/**
* Java bindings for CUBLAS, the NVIDIA CUDA BLAS library.
*
* Most comments are taken from the cublas.h header file.
*
*/
public class JCublas
{
/**
* The flag that indicates whether the native library has been
* loaded
*/
private static boolean initialized = false;
/**
* Whether a CudaException should be thrown if a method is about
* to set a result code that is not cublasStatus.CUBLAS_STATUS_SUCCESS
*/
private static boolean exceptionsEnabled = false;
/**
* The last result code that was set by any of the BLAS functions.
* This will be stored in the checkResultBLAS() method if
* exceptions are enabled.
*/
private static int lastResult = cublasStatus.CUBLAS_STATUS_SUCCESS;
/* Private constructor to prevent instantiation */
private JCublas()
{
}
// Initialize the native library.
static
{
initialize();
}
/**
* Initializes the native library. Note that this method
* does not have to be called explicitly, since it will
* be called automatically when this class is loaded.
*/
public static void initialize()
{
if (!initialized)
{
LibUtils.loadLibrary("JCublas");
initialized = true;
}
}
/**
* Set the specified log level for the JCublas library.
*
* Currently supported log levels:
*
* LOG_QUIET: Never print anything
* LOG_ERROR: Print error messages
* LOG_TRACE: Print a trace of all native function calls
*
* @param logLevel The log level to use.
*/
public static void setLogLevel(LogLevel logLevel)
{
setLogLevelNative(logLevel.ordinal());
}
private static native void setLogLevelNative(int logLevel);
/**
* Enables or disables exceptions. By default, the methods of this class
* only set the result status which may be queried with
* {@link JCublas#cublasGetError()}.
* If exceptions are enabled, a CudaException with a detailed error
* message will be thrown if a method is about to set a result code
* that is not cublasStatus.CUBLAS_STATUS_SUCCESS
*
* @param enabled Whether exceptions are enabled
*/
public static void setExceptionsEnabled(boolean enabled)
{
exceptionsEnabled = enabled;
}
/**
* If the given result is different to cublasStatus.CUBLAS_STATUS_SUCCESS
* and exceptions have been enabled, this method will throw a
* CudaException with an error message that corresponds to the
* given result code. Otherwise, the given result is simply
* returned.
*
* @param result The result to check
* @return The result that was given as the parameter
* @throws CudaException If exceptions have been enabled and
* the given result code is not cublasStatus.CUBLAS_STATUS_SUCCESS
*/
private static int checkResult(int result)
{
if (exceptionsEnabled && result != cublasStatus.CUBLAS_STATUS_SUCCESS)
{
throw new CudaException(cublasStatus.stringFor(result));
}
return result;
}
/**
* Obtain the current CUBLAS status by calling cublasGetErrorNative,
* and store the result as the lastResult. If the obtained result
* code is not cublasStatus.CUBLAS_STATUS_SUCCESS and exceptions
* have been enabled, an CudaException will be thrown.
*/
private static void checkResultBLAS()
{
if (exceptionsEnabled)
{
lastResult = cublasGetErrorNative();
if (lastResult != cublasStatus.CUBLAS_STATUS_SUCCESS)
{
throw new CudaException(cublasStatus.stringFor(lastResult));
}
}
}
/**
* Wrapper for CUBLAS function.
*
* cublasStatus
* cublasInit()
*
* initializes the CUBLAS library and must be called before any other
* CUBLAS API function is invoked. It allocates hardware resources
* necessary for accessing the GPU.
*
* Return Values
* -------------
* CUBLAS_STATUS_ALLOC_FAILED if resources could not be allocated
* CUBLAS_STATUS_SUCCESS if CUBLAS library initialized successfully
*/
public static int cublasInit()
{
return checkResult(cublasInitNative());
}
private static native int cublasInitNative();
/**
* Wrapper for CUBLAS function.
*
* cublasStatus
* cublasShutdown()
*
* releases CPU-side resources used by the CUBLAS library. The release of
* GPU-side resources may be deferred until the application shuts down.
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
* CUBLAS_STATUS_SUCCESS if CUBLAS library shut down successfully
*/
public static int cublasShutdown()
{
return checkResult(cublasShutdownNative());
}
private static native int cublasShutdownNative();
/**
* Wrapper for CUBLAS function.
*
* cublasStatus
* cublasGetError()
*
* returns the last error that occurred on invocation of any of the
* CUBLAS BLAS functions. While the CUBLAS helper functions return status
* directly, the BLAS functions do not do so for improved
* compatibility with existing environments that do not expect BLAS
* functions to return status. Reading the error status via
* cublasGetError() resets the internal error state to
* CUBLAS_STATUS_SUCCESS.
*/
public static int cublasGetError()
{
if (exceptionsEnabled)
{
int returnedResult = lastResult;
lastResult = cublasStatus.CUBLAS_STATUS_SUCCESS;
return returnedResult;
}
return cublasGetErrorNative();
}
private static native int cublasGetErrorNative();
/**
* Wrapper for CUBLAS function.
*
* cublasStatus
* cublasAlloc (int n, int elemSize, void **devicePtr)
*
* creates an object in GPU memory space capable of holding an array of
* n elements, where each element requires elemSize bytes of storage. If
* the function call is successful, a pointer to the object in GPU memory
* space is placed in devicePtr. Note that this is a device pointer that
* cannot be dereferenced in host code.
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
* CUBLAS_STATUS_INVALID_VALUE if n <= 0, or elemSize <= 0
* CUBLAS_STATUS_ALLOC_FAILED if the object could not be allocated due to
* lack of resources.
* CUBLAS_STATUS_SUCCESS if storage was successfully allocated
*/
public static int cublasAlloc(int n, int elemSize, Pointer ptr)
{
return checkResult(cublasAllocNative(n, elemSize, ptr));
}
private static native int cublasAllocNative(int n, int elemSize, Pointer ptr);
/**
* Wrapper for CUBLAS function.
*
* cublasStatus
* cublasFree (const void *devicePtr)
*
* destroys the object in GPU memory space pointed to by devicePtr.
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
* CUBLAS_STATUS_INTERNAL_ERROR if the object could not be deallocated
* CUBLAS_STATUS_SUCCESS if object was destroyed successfully
*/
public static int cublasFree(Pointer ptr)
{
return checkResult(cublasFreeNative(ptr));
}
private static native int cublasFreeNative(Pointer ptr);
// Debug method
public static native void printVector(int n, Pointer x);
// Debug method
public static native void printMatrix(int cols, Pointer A, int lda);
/*
* Internal method to which all calls to an implementation of
* cublasSetVector are finally delegated
*/
private static native int cublasSetVectorNative(int n, int elemSize, Pointer x, int incx, Pointer y, int incy);
/*
* Internal method to which all calls to an implementation of
* cublasGetVector are finally delegated
*/
private static native int cublasGetVectorNative(int n, int elemSize, Pointer x, int incx, Pointer y, int incy);
/*
* Internal method to which all calls to an implementation of
* cublasSetMatrix are finally delegated
*/
private static native int cublasSetMatrixNative(int rows, int cols, int elemSize, Pointer A, int lda, Pointer B, int ldb);
/*
* Internal method to which all calls to an implementation of
* cublasGetMatrix are finally delegated
*/
private static native int cublasGetMatrixNative(int rows, int cols, int elemSize, Pointer A, int lda, Pointer B, int ldb);
/*
* Internal method to which all calls to an implementation of
* cublasSetVectorAsync are finally delegated
*/
private static native int cublasSetVectorAsyncNative(int n, int elemSize, Pointer x, int incx, Pointer y, int incy, cudaStream_t stream);
/*
* Internal method to which all calls to an implementation of
* cublasGetVectorAsync are finally delegated
*/
private static native int cublasGetVectorAsyncNative(int n, int elemSize, Pointer x, int incx, Pointer y, int incy, cudaStream_t stream);
/*
* Internal method to which all calls to an implementation of
* cublasSetMatrixAsync are finally delegated
*/
private static native int cublasSetMatrixAsyncNative(int rows, int cols, int elemSize, Pointer A, int lda, Pointer B, int ldb, cudaStream_t stream);
/*
* Internal method to which all calls to an implementation of
* cublasGetMatrix are finally delegated
*/
private static native int cublasGetMatrixAsyncNative(int rows, int cols, int elemSize, Pointer A, int lda, Pointer B, int ldb, cudaStream_t stream);
//============================================================================
// Memory management methods for single precision data:
/**
* Wrapper for CUBLAS function.
*
* cublasStatus
* cublasSetVector (int n, int elemSize, const void *x, int incx,
* void *y, int incy)
*
* copies n elements from a vector x in CPU memory space to a vector y
* in GPU memory space. Elements in both vectors are assumed to have a
* size of elemSize bytes. Storage spacing between consecutive elements
* is incx for the source vector x and incy for the destination vector
* y. In general, y points to an object, or part of an object, allocated
* via cublasAlloc(). Column major format for two-dimensional matrices
* is assumed throughout CUBLAS. Therefore, if the increment for a vector
* is equal to 1, this access a column vector while using an increment
* equal to the leading dimension of the respective matrix accesses a
* row vector.
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized
* CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0
* CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory
* CUBLAS_STATUS_SUCCESS if the operation completed successfully
*/
public static int cublasSetVector(int n, int elemSize, Pointer x, int incx, Pointer y, int incy)
{
return checkResult(cublasSetVectorNative(n, elemSize, x, incx, y, incy));
}
/**
* Extended wrapper for arrays of cuComplex values. Note that this method
* only exists for convenience and compatibility with native C code. It
* is much more efficient to provide a Pointer to a float array containing
* the complex numbers, where each pair of consecutive numbers in the array
* describes the real- and imaginary part of one complex number.
*
* @see JCublas#cublasSetVector(int, int, Pointer, int, Pointer, int)
*/
public static int cublasSetVector (int n, cuComplex x[], int offsetx, int incx, Pointer y, int incy)
{
ByteBuffer byteBufferx = ByteBuffer.allocateDirect(x.length * 4 * 2);
byteBufferx.order(ByteOrder.nativeOrder());
FloatBuffer floatBufferx = byteBufferx.asFloatBuffer();
int indexx = offsetx;
for (int i=0; i
*
* cublasStatus
* cublasGetVector (int n, int elemSize, const void *x, int incx,
* void *y, int incy)
*
* copies n elements from a vector x in GPU memory space to a vector y
* in CPU memory space. Elements in both vectors are assumed to have a
* size of elemSize bytes. Storage spacing between consecutive elements
* is incx for the source vector x and incy for the destination vector
* y. In general, x points to an object, or part of an object, allocated
* via cublasAlloc(). Column major format for two-dimensional matrices
* is assumed throughout CUBLAS. Therefore, if the increment for a vector
* is equal to 1, this access a column vector while using an increment
* equal to the leading dimension of the respective matrix accesses a
* row vector.
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized
* CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0
* CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory
* CUBLAS_STATUS_SUCCESS if the operation completed successfully
*/
public static int cublasGetVector (int n, int elemSize, Pointer x, int incx, Pointer y, int incy)
{
return checkResult(cublasGetVectorNative(n, elemSize, x, incx, y, incy));
}
/**
* Extended wrapper for arrays of cuComplex values. Note that this method
* only exists for convenience and compatibility with native C code. It
* is much more efficient to provide a Pointer to a float array that may
* store the complex numbers, where each pair of consecutive numbers in
* the array describes the real- and imaginary part of one complex number.
*
* @see JCublas#cublasGetVector(int, int, Pointer, int, Pointer, int)
*/
public static int cublasGetVector (int n, Pointer x, int incx, cuComplex y[], int offsety, int incy)
{
ByteBuffer byteBuffery = ByteBuffer.allocateDirect(y.length * 4 * 2);
byteBuffery.order(ByteOrder.nativeOrder());
FloatBuffer floatBuffery = byteBuffery.asFloatBuffer();
int status = cublasGetVectorNative(n, 8, x, incx, Pointer.to(floatBuffery).withByteOffset(offsety * 4 * 2), incy);
if (status == cublasStatus.CUBLAS_STATUS_SUCCESS)
{
floatBuffery.rewind();
int indexy = offsety;
for (int i=0; i
*
* cublasStatus
* cublasSetMatrix (int rows, int cols, int elemSize, const void *A,
* int lda, void *B, int ldb)
*
* copies a tile of rows x cols elements from a matrix A in CPU memory
* space to a matrix B in GPU memory space. Each element requires storage
* of elemSize bytes. Both matrices are assumed to be stored in column
* major format, with the leading dimension (i.e. number of rows) of
* source matrix A provided in lda, and the leading dimension of matrix B
* provided in ldb. In general, B points to an object, or part of an
* object, that was allocated via cublasAlloc().
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
* CUBLAS_STATUS_INVALID_VALUE if rows or cols < 0, or elemSize, lda, or
* ldb <= 0
* CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory
* CUBLAS_STATUS_SUCCESS if the operation completed successfully
*/
public static int cublasSetMatrix (int rows, int cols, int elemSize, Pointer A, int lda, Pointer B, int ldb)
{
return checkResult(cublasSetMatrixNative(rows, elemSize, cols, A, lda, B, ldb));
}
/**
* Extended wrapper for arrays of cuComplex values. Note that this method
* only exists for convenience and compatibility with native C code. It
* is much more efficient to provide a Pointer to a float array containing
* the complex numbers, where each pair of consecutive numbers in the array
* describes the real- and imaginary part of one complex number.
*
* @see JCublas#cublasSetMatrix(int, int, int, Pointer, int, Pointer, int)
*/
public static int cublasSetMatrix (int rows, int cols, cuComplex A[], int offsetA, int lda, Pointer B, int ldb)
{
ByteBuffer byteBufferA = ByteBuffer.allocateDirect(A.length * 4 * 2);
byteBufferA.order(ByteOrder.nativeOrder());
FloatBuffer floatBufferA = byteBufferA.asFloatBuffer();
for (int i=0; i
*
* cublasStatus
* cublasGetMatrix (int rows, int cols, int elemSize, const void *A,
* int lda, void *B, int ldb)
*
* copies a tile of rows x cols elements from a matrix A in GPU memory
* space to a matrix B in CPU memory space. Each element requires storage
* of elemSize bytes. Both matrices are assumed to be stored in column
* major format, with the leading dimension (i.e. number of rows) of
* source matrix A provided in lda, and the leading dimension of matrix B
* provided in ldb. In general, A points to an object, or part of an
* object, that was allocated via cublasAlloc().
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
* CUBLAS_STATUS_INVALID_VALUE if rows, cols, eleSize, lda, or ldb <= 0
* CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory
* CUBLAS_STATUS_SUCCESS if the operation completed successfully
*/
public static int cublasGetMatrix (int rows, int cols, int elemSize, Pointer A, int lda, Pointer B, int ldb)
{
return checkResult(cublasGetMatrixNative(rows, cols, elemSize, A, lda, B, ldb));
}
/**
* Extended wrapper for arrays of cuComplex values. Note that this method
* only exists for convenience and compatibility with native C code. It
* is much more efficient to provide a Pointer to a float array that may
* store the complex numbers, where each pair of consecutive numbers in
* the array describes the real- and imaginary part of one complex number.
*
* @see JCublas#cublasGetMatrix(int, int, int, Pointer, int, Pointer, int)
*/
public static int cublasGetMatrix (int rows, int cols, Pointer A, int lda, cuComplex B[], int offsetB, int ldb)
{
ByteBuffer byteBufferB = ByteBuffer.allocateDirect(B.length * 4 * 2);
byteBufferB.order(ByteOrder.nativeOrder());
FloatBuffer floatBufferB = byteBufferB.asFloatBuffer();
int status = cublasGetMatrixNative(rows, cols, 8, A, lda, Pointer.to(floatBufferB).withByteOffset(offsetB * 4 * 2), ldb);
if (status == cublasStatus.CUBLAS_STATUS_SUCCESS)
{
floatBufferB.rewind();
for (int c=0; c
* Set the CUBLAS stream in which all subsequent CUBLAS kernel launches will run.
*
* cublasStatus
* cublasSetKernelStream ( cudaStream_t stream )
*
* set the CUBLAS stream in which all subsequent CUBLAS kernel launches will run.
* By default, if the CUBLAS stream is not set, all kernels will use the NULL
* stream. This routine can be used to change the stream between kernels launches
* and can be used also to set the CUBLAS stream back to NULL.
*
* Return Values
* -------------
* CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized
* CUBLAS_STATUS_SUCCESS if stream set successfully
*
*/
public static int cublasSetKernelStream (cudaStream_t stream)
{
return checkResult(cublasSetKernelStreamNative(stream));
}
private static native int cublasSetKernelStreamNative(cudaStream_t stream);
/*
* Wrapper for CUBLAS function.
* * cublasStatus * cublasSetVectorAsync ( int n, int elemSize, const void *x, int incx, * void *y, int incy, cudaStream_t stream ); * * cublasSetVectorAsync has the same functionnality as cublasSetVector * but the transfer is done asynchronously within the CUDA stream passed * in parameter. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0 * CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory * CUBLAS_STATUS_SUCCESS if the operation completed successfully **/ public static int cublasSetVectorAsync (int n, int elemSize, Pointer hostPtr, int incx, Pointer devicePtr, int incy, cudaStream_t stream) { return checkResult(cublasSetVectorAsyncNative(n, elemSize, hostPtr, incx, devicePtr, incy, stream)); } /* * Wrapper for CUBLAS function. *
* cublasStatus * cublasGetVectorAsync( int n, int elemSize, const void *x, int incx, * void *y, int incy, cudaStream_t stream) * * cublasGetVectorAsync has the same functionnality as cublasGetVector * but the transfer is done asynchronously within the CUDA stream passed * in parameter. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx, incy, or elemSize <= 0 * CUBLAS_STATUS_MAPPING_ERROR if an error occurred accessing GPU memory * CUBLAS_STATUS_SUCCESS if the operation completed successfully **/ public static int cublasGetVectorAsync(int n, int elemSize, Pointer devicePtr, int incx, Pointer hostPtr, int incy, cudaStream_t stream) { return checkResult(cublasGetVectorAsyncNative(n, elemSize, devicePtr, incx, hostPtr, incy, stream)); } /* * Wrapper for CUBLAS function. *
* cublasStatus * cublasSetMatrixAsync (int rows, int cols, int elemSize, const void *A, * int lda, void *B, int ldb, cudaStream_t stream) * * cublasSetMatrixAsync has the same functionnality as cublasSetMatrix * but the transfer is done asynchronously within the CUDA stream passed * in parameter. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if rows or cols < 0, or elemSize, lda, or * ldb <= 0 * CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory * CUBLAS_STATUS_SUCCESS if the operation completed successfully **/ public static int cublasSetMatrixAsync (int rows, int cols, int elemSize, Pointer A, int lda, Pointer B, int ldb, cudaStream_t stream) { return checkResult(cublasSetMatrixAsyncNative(rows, cols, elemSize, A, lda, B, ldb, stream)); } /* * Wrapper for CUBLAS function. *
* cublasStatus * cublasGetMatrixAsync (int rows, int cols, int elemSize, const void *A, * int lda, void *B, int ldb, cudaStream_t stream) * * cublasGetMatrixAsync has the same functionnality as cublasGetMatrix * but the transfer is done asynchronously within the CUDA stream passed * in parameter. * * Return Values * ------------- * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if rows, cols, eleSize, lda, or ldb <= 0 * CUBLAS_STATUS_MAPPING_ERROR if error occurred accessing GPU memory * CUBLAS_STATUS_SUCCESS if the operation completed successfully **/ public static int cublasGetMatrixAsync (int rows, int cols, int elemSize, Pointer A, int lda, Pointer B, int ldb, cudaStream_t stream) { return checkResult(cublasGetMatrixAsyncNative(rows, cols, elemSize, A, lda, B, ldb, stream)); } //============================================================================ // Methods that are not handled by the code generator: /** * Wrapper for CUBLAS function. *
* void * cublasSrotm (int n, float *x, int incx, float *y, int incy, * const float* sparam) * * applies the modified Givens transformation, h, to the 2 x n matrix * * ( transpose(x) ) * ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 to n-1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. With sparam[0] = sflag, h has one of the following forms: * * sflag = -1.0f sflag = 0.0f sflag = 1.0f sflag = -2.0f * * (sh00 sh01) (1.0f sh01) (sh00 1.0f) (1.0f 0.0f) * h = ( ) ( ) ( ) ( ) * (sh10 sh11) (sh10 1.0f) (-1.0f sh11) (0.0f 1.0f) * * Input * ----- * n number of elements in input vectors * x single precision vector with n elements * incx storage spacing between elements of x * y single precision vector with n elements * incy storage spacing between elements of y * sparam 5-element vector. sparam[0] is sflag described above. sparam[1] * through sparam[4] contain the 2x2 rotation matrix h: sparam[1] * contains sh00, sparam[2] contains sh10, sparam[3] contains sh01, * and sprams[4] contains sh11. * * Output * ------ * x rotated vector x (unchanged if n <= 0) * y rotated vector y (unchanged if n <= 0) * * Reference: http://www.netlib.org/blas/srotm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSrotm(int n, Pointer x, int incx, Pointer y, int incy, float sparam[]) { cublasSrotmNative(n, x, incx, y, incy, sparam); checkResultBLAS(); } private static native void cublasSrotmNative(int n, Pointer x, int incx, Pointer y, int incy, float sparam[]); /** * Wrapper for CUBLAS function. *
* void * cublasSrotmg (float *psd1, float *psd2, float *psx1, const float *psy1, * float *sparam) * * constructs the modified Givens transformation matrix h which zeros * the second component of the 2-vector transpose(sqrt(sd1)*sx1,sqrt(sd2)*sy1). * With sparam[0] = sflag, h has one of the following forms: * * sflag = -1.0f sflag = 0.0f sflag = 1.0f sflag = -2.0f * * (sh00 sh01) (1.0f sh01) (sh00 1.0f) (1.0f 0.0f) * h = ( ) ( ) ( ) ( ) * (sh10 sh11) (sh10 1.0f) (-1.0f sh11) (0.0f 1.0f) * * sparam[1] through sparam[4] contain sh00, sh10, sh01, sh11, * respectively. Values of 1.0f, -1.0f, or 0.0f implied by the value * of sflag are not stored in sparam. * * Input * ----- * sd1 single precision scalar * sd2 single precision scalar * sx1 single precision scalar * sy1 single precision scalar * * Output * ------ * sd1 changed to represent the effect of the transformation * sd2 changed to represent the effect of the transformation * sx1 changed to represent the effect of the transformation * sparam 5-element vector. sparam[0] is sflag described above. sparam[1] * through sparam[4] contain the 2x2 rotation matrix h: sparam[1] * contains sh00, sparam[2] contains sh10, sparam[3] contains sh01, * and sprams[4] contains sh11. * * Reference: http://www.netlib.org/blas/srotmg.f * * This functions does not set any error status. **/ public static void cublasSrotmg(float sd1[], float sd2[], float sx1[], float sy1, float sparam[]) { cublasSrotmgNative(sd1, sd2, sx1, sy1, sparam); checkResultBLAS(); } private static native void cublasSrotmgNative(float sd1[], float sd2[], float sx1[], float sy1, float sparam[]); /** * Wrapper for CUBLAS function. *
* void * cublasDrotm (int n, double *x, int incx, double *y, int incy, * const double* sparam) * * applies the modified Givens transformation, h, to the 2 x n matrix * * ( transpose(x) ) * ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 to n-1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. With sparam[0] = sflag, h has one of the following forms: * * sflag = -1.0 sflag = 0.0 sflag = 1.0 sflag = -2.0 * * (sh00 sh01) (1.0 sh01) (sh00 1.0) (1.0 0.0) * h = ( ) ( ) ( ) ( ) * (sh10 sh11) (sh10 1.0) (-1.0 sh11) (0.0 1.0) * * Input * ----- * n number of elements in input vectors * x double-precision vector with n elements * incx storage spacing between elements of x * y double-precision vector with n elements * incy storage spacing between elements of y * sparam 5-element vector. sparam[0] is sflag described above. sparam[1] * through sparam[4] contain the 2x2 rotation matrix h: sparam[1] * contains sh00, sparam[2] contains sh10, sparam[3] contains sh01, * and sprams[4] contains sh11. * * Output * ------ * x rotated vector x (unchanged if n <= 0) * y rotated vector y (unchanged if n <= 0) * * Reference: http://www.netlib.org/blas/drotm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDrotm(int n, Pointer x, int incx, Pointer y, int incy, double sparam[]) { cublasDrotmNative(n, x, incx, y, incy, sparam); checkResultBLAS(); } private static native void cublasDrotmNative(int n, Pointer x, int incx, Pointer y, int incy, double sparam[]); /** * Wrapper for CUBLAS function. *
* void * cublasDrotmg (double *psd1, double *psd2, double *psx1, const double *psy1, * double *sparam) * * constructs the modified Givens transformation matrix h which zeros * the second component of the 2-vector transpose(sqrt(sd1)*sx1,sqrt(sd2)*sy1). * With sparam[0] = sflag, h has one of the following forms: * * sflag = -1.0 sflag = 0.0 sflag = 1.0 sflag = -2.0 * * (sh00 sh01) (1.0 sh01) (sh00 1.0) (1.0 0.0) * h = ( ) ( ) ( ) ( ) * (sh10 sh11) (sh10 1.0) (-1.0 sh11) (0.0 1.0) * * sparam[1] through sparam[4] contain sh00, sh10, sh01, sh11, * respectively. Values of 1.0, -1.0, or 0.0 implied by the value * of sflag are not stored in sparam. * * Input * ----- * sd1 single precision scalar * sd2 single precision scalar * sx1 single precision scalar * sy1 single precision scalar * * Output * ------ * sd1 changed to represent the effect of the transformation * sd2 changed to represent the effect of the transformation * sx1 changed to represent the effect of the transformation * sparam 5-element vector. sparam[0] is sflag described above. sparam[1] * through sparam[4] contain the 2x2 rotation matrix h: sparam[1] * contains sh00, sparam[2] contains sh10, sparam[3] contains sh01, * and sprams[4] contains sh11. * * Reference: http://www.netlib.org/blas/drotmg.f * * This functions does not set any error status. * **/ public static void cublasDrotmg(double sd1[], double sd2[], double sx1[], double sy1, double sparam[]) { cublasDrotmgNative(sd1, sd2, sx1, sy1, sparam); checkResultBLAS(); } private static native void cublasDrotmgNative(double sd1[], double sd2[], double sx1[], double sy1, double sparam[]); //============================================================================ // Auto-generated part: /** *
* int * cublasIsamax (int n, const float *x, int incx) * * finds the smallest index of the maximum magnitude element of single * precision vector x; that is, the result is the first i, i = 0 to n - 1, * that maximizes abs(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x single precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/isamax.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static int cublasIsamax(int n, Pointer x, int incx) { int result = cublasIsamaxNative(n, x, incx); checkResultBLAS(); return result; } private static native int cublasIsamaxNative(int n, Pointer x, int incx); /** *
* int * cublasIsamin (int n, const float *x, int incx) * * finds the smallest index of the minimum magnitude element of single * precision vector x; that is, the result is the first i, i = 0 to n - 1, * that minimizes abs(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x single precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/scilib/blass.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static int cublasIsamin(int n, Pointer x, int incx) { int result = cublasIsaminNative(n, x, incx); checkResultBLAS(); return result; } private static native int cublasIsaminNative(int n, Pointer x, int incx); /** *
* float * cublasSasum (int n, const float *x, int incx) * * computes the sum of the absolute values of the elements of single * precision vector x; that is, the result is the sum from i = 0 to n - 1 of * abs(x[1 + i * incx]). * * Input * ----- * n number of elements in input vector * x single precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the single precision sum of absolute values * (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/sasum.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static float cublasSasum(int n, Pointer x, int incx) { float result = cublasSasumNative(n, x, incx); checkResultBLAS(); return result; } private static native float cublasSasumNative(int n, Pointer x, int incx); /** *
* void * cublasSaxpy (int n, float alpha, const float *x, int incx, float *y, * int incy) * * multiplies single precision vector x by single precision scalar alpha * and adds the result to single precision vector y; that is, it overwrites * single precision y with single precision alpha * x + y. For i = 0 to n - 1, * it replaces y[ly + i * incy] with alpha * x[lx + i * incx] + y[ly + i * * incy], where lx = 1 if incx >= 0, else lx = 1 +(1 - n) * incx, and ly is * defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * alpha single precision scalar multiplier * x single precision vector with n elements * incx storage spacing between elements of x * y single precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * y single precision result (unchanged if n <= 0) * * Reference: http://www.netlib.org/blas/saxpy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSaxpy(int n, float alpha, Pointer x, int incx, Pointer y, int incy) { cublasSaxpyNative(n, alpha, x, incx, y, incy); checkResultBLAS(); } private static native void cublasSaxpyNative(int n, float alpha, Pointer x, int incx, Pointer y, int incy); /** *
* void * cublasScopy (int n, const float *x, int incx, float *y, int incy) * * copies the single precision vector x to the single precision vector y. For * i = 0 to n-1, copies x[lx + i * incx] to y[ly + i * incy], where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a similar * way using incy. * * Input * ----- * n number of elements in input vectors * x single precision vector with n elements * incx storage spacing between elements of x * y single precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * y contains single precision vector x * * Reference: http://www.netlib.org/blas/scopy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasScopy(int n, Pointer x, int incx, Pointer y, int incy) { cublasScopyNative(n, x, incx, y, incy); checkResultBLAS(); } private static native void cublasScopyNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* float * cublasSdot (int n, const float *x, int incx, const float *y, int incy) * * computes the dot product of two single precision vectors. It returns the * dot product of the single precision vectors x and y if successful, and * 0.0f otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * * incx] * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * *incx, and ly is defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * x single precision vector with n elements * incx storage spacing between elements of x * y single precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * returns single precision dot product (zero if n <= 0) * * Reference: http://www.netlib.org/blas/sdot.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU **/ public static float cublasSdot(int n, Pointer x, int incx, Pointer y, int incy) { float result = cublasSdotNative(n, x, incx, y, incy); checkResultBLAS(); return result; } private static native float cublasSdotNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* float * cublasSnrm2 (int n, const float *x, int incx) * * computes the Euclidean norm of the single precision n-vector x (with * storage increment incx). This code uses a multiphase model of * accumulation to avoid intermediate underflow and overflow. * * Input * ----- * n number of elements in input vector * x single precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns Euclidian norm (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/snrm2.f * Reference: http://www.netlib.org/slatec/lin/snrm2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static float cublasSnrm2(int n, Pointer x, int incx) { float result = cublasSnrm2Native(n, x, incx); checkResultBLAS(); return result; } private static native float cublasSnrm2Native(int n, Pointer x, int incx); /** *
* void * cublasSrot (int n, float *x, int incx, float *y, int incy, float sc, * float ss) * * multiplies a 2x2 matrix ( sc ss) with the 2xn matrix ( transpose(x) ) * (-ss sc) ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. * * Input * ----- * n number of elements in input vectors * x single precision vector with n elements * incx storage spacing between elements of x * y single precision vector with n elements * incy storage spacing between elements of y * sc element of rotation matrix * ss element of rotation matrix * * Output * ------ * x rotated vector x (unchanged if n <= 0) * y rotated vector y (unchanged if n <= 0) * * Reference http://www.netlib.org/blas/srot.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSrot(int n, Pointer x, int incx, Pointer y, int incy, float sc, float ss) { cublasSrotNative(n, x, incx, y, incy, sc, ss); checkResultBLAS(); } private static native void cublasSrotNative(int n, Pointer x, int incx, Pointer y, int incy, float sc, float ss); /** *
* void * cublasSrotg (float *host_sa, float *host_sb, float *host_sc, float *host_ss) * * constructs the Givens tranformation * * ( sc ss ) * G = ( ) , sc^2 + ss^2 = 1, * (-ss sc ) * * which zeros the second entry of the 2-vector transpose(sa, sb). * * The quantity r = (+/-) sqrt (sa^2 + sb^2) overwrites sa in storage. The * value of sb is overwritten by a value z which allows sc and ss to be * recovered by the following algorithm: * * if z=1 set sc = 0.0 and ss = 1.0 * if abs(z) < 1 set sc = sqrt(1-z^2) and ss = z * if abs(z) > 1 set sc = 1/z and ss = sqrt(1-sc^2) * * The function srot (n, x, incx, y, incy, sc, ss) normally is called next * to apply the transformation to a 2 x n matrix. * Note that is function is provided for completeness and run exclusively * on the Host. * * Input * ----- * sa single precision scalar * sb single precision scalar * * Output * ------ * sa single precision r * sb single precision z * sc single precision result * ss single precision result * * Reference: http://www.netlib.org/blas/srotg.f * * This function does not set any error status. **/ public static void cublasSrotg(Pointer host_sa, Pointer host_sb, Pointer host_sc, Pointer host_ss) { cublasSrotgNative(host_sa, host_sb, host_sc, host_ss); checkResultBLAS(); } private static native void cublasSrotgNative(Pointer host_sa, Pointer host_sb, Pointer host_sc, Pointer host_ss); /** *
* void * sscal (int n, float alpha, float *x, int incx) * * replaces single precision vector x with single precision alpha * x. For i * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx], * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vectors * alpha single precision scalar multiplier * x single precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * x single precision result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/sscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSscal(int n, float alpha, Pointer x, int incx) { cublasSscalNative(n, alpha, x, incx); checkResultBLAS(); } private static native void cublasSscalNative(int n, float alpha, Pointer x, int incx); /** *
* void * cublasSswap (int n, float *x, int incx, float *y, int incy) * * interchanges the single-precision vector x with the single-precision vector y. * For i = 0 to n-1, interchanges x[lx + i * incx] with y[ly + i * incy], where * lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a * similar way using incy. * * Input * ----- * n number of elements in input vectors * x single precision vector with n elements * incx storage spacing between elements of x * y single precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * x contains single precision vector y * y contains single precision vector x * * Reference: http://www.netlib.org/blas/sscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSswap(int n, Pointer x, int incx, Pointer y, int incy) { cublasSswapNative(n, x, incx, y, incy); checkResultBLAS(); } private static native void cublasSswapNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* void * cublasCaxpy (int n, cuComplex alpha, const cuComplex *x, int incx, * cuComplex *y, int incy) * * multiplies single-complex vector x by single-complex scalar alpha and adds * the result to single-complex vector y; that is, it overwrites single-complex * y with single-complex alpha * x + y. For i = 0 to n - 1, it replaces * y[ly + i * incy] with alpha * x[lx + i * incx] + y[ly + i * incy], where * lx = 0 if incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a * similar way using incy. * * Input * ----- * n number of elements in input vectors * alpha single-complex scalar multiplier * x single-complex vector with n elements * incx storage spacing between elements of x * y single-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * y single-complex result (unchanged if n <= 0) * * Reference: http://www.netlib.org/blas/caxpy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCaxpy(int n, cuComplex alpha, Pointer x, int incx, Pointer y, int incy) { cublasCaxpyNative(n, alpha, x, incx, y, incy); checkResultBLAS(); } private static native void cublasCaxpyNative(int n, cuComplex alpha, Pointer x, int incx, Pointer y, int incy); /** *
* void * cublasCcopy (int n, const cuComplex *x, int incx, cuComplex *y, int incy) * * copies the single-complex vector x to the single-complex vector y. For * i = 0 to n-1, copies x[lx + i * incx] to y[ly + i * incy], where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a similar * way using incy. * * Input * ----- * n number of elements in input vectors * x single-complex vector with n elements * incx storage spacing between elements of x * y single-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * y contains single complex vector x * * Reference: http://www.netlib.org/blas/ccopy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCcopy(int n, Pointer x, int incx, Pointer y, int incy) { cublasCcopyNative(n, x, incx, y, incy); checkResultBLAS(); } private static native void cublasCcopyNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* void * cublasZcopy (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) * * copies the double-complex vector x to the double-complex vector y. For * i = 0 to n-1, copies x[lx + i * incx] to y[ly + i * incy], where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a similar * way using incy. * * Input * ----- * n number of elements in input vectors * x double-complex vector with n elements * incx storage spacing between elements of x * y double-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * y contains double complex vector x * * Reference: http://www.netlib.org/blas/zcopy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZcopy(int n, Pointer x, int incx, Pointer y, int incy) { cublasZcopyNative(n, x, incx, y, incy); checkResultBLAS(); } private static native void cublasZcopyNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* void * cublasCscal (int n, cuComplex alpha, cuComplex *x, int incx) * * replaces single-complex vector x with single-complex alpha * x. For i * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx], * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vectors * alpha single-complex scalar multiplier * x single-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * x single-complex result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/cscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCscal(int n, cuComplex alpha, Pointer x, int incx) { cublasCscalNative(n, alpha, x, incx); checkResultBLAS(); } private static native void cublasCscalNative(int n, cuComplex alpha, Pointer x, int incx); /** *
* void * cublasCrotg (cuComplex *host_ca, cuComplex cb, float *host_sc, cuComplex *host_cs) * * constructs the complex Givens tranformation * * ( sc cs ) * G = ( ) , sc^2 + cabs(cs)^2 = 1, * (-cs sc ) * * which zeros the second entry of the complex 2-vector transpose(ca, cb). * * The quantity ca/cabs(ca)*norm(ca,cb) overwrites ca in storage. The * function crot (n, x, incx, y, incy, sc, cs) is normally called next * to apply the transformation to a 2 x n matrix. * Note that is function is provided for completeness and run exclusively * on the Host. * * Input * ----- * ca single-precision complex precision scalar * cb single-precision complex scalar * * Output * ------ * ca single-precision complex ca/cabs(ca)*norm(ca,cb) * sc single-precision cosine component of rotation matrix * cs single-precision complex sine component of rotation matrix * * Reference: http://www.netlib.org/blas/crotg.f * * This function does not set any error status. **/ public static void cublasCrotg(Pointer host_ca, cuComplex cb, Pointer host_sc, Pointer host_cs) { cublasCrotgNative(host_ca, cb, host_sc, host_cs); checkResultBLAS(); } private static native void cublasCrotgNative(Pointer host_ca, cuComplex cb, Pointer host_sc, Pointer host_cs); /** *
* void * cublasCrot (int n, cuComplex *x, int incx, cuComplex *y, int incy, float sc, * cuComplex cs) * * multiplies a 2x2 matrix ( sc cs) with the 2xn matrix ( transpose(x) ) * (-conj(cs) sc) ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. * * Input * ----- * n number of elements in input vectors * x single-precision complex vector with n elements * incx storage spacing between elements of x * y single-precision complex vector with n elements * incy storage spacing between elements of y * sc single-precision cosine component of rotation matrix * cs single-precision complex sine component of rotation matrix * * Output * ------ * x rotated single-precision complex vector x (unchanged if n <= 0) * y rotated single-precision complex vector y (unchanged if n <= 0) * * Reference: http://netlib.org/lapack/explore-html/crot.f.html * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCrot(int n, Pointer x, int incx, Pointer y, int incy, float c, cuComplex s) { cublasCrotNative(n, x, incx, y, incy, c, s); checkResultBLAS(); } private static native void cublasCrotNative(int n, Pointer x, int incx, Pointer y, int incy, float c, cuComplex s); /** *
* void * csrot (int n, cuComplex *x, int incx, cuCumplex *y, int incy, float c, * float s) * * multiplies a 2x2 rotation matrix ( c s) with a 2xn matrix ( transpose(x) ) * (-s c) ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. * * Input * ----- * n number of elements in input vectors * x single-precision complex vector with n elements * incx storage spacing between elements of x * y single-precision complex vector with n elements * incy storage spacing between elements of y * c cosine component of rotation matrix * s sine component of rotation matrix * * Output * ------ * x rotated vector x (unchanged if n <= 0) * y rotated vector y (unchanged if n <= 0) * * Reference http://www.netlib.org/blas/csrot.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCsrot(int n, Pointer x, int incx, Pointer y, int incy, float c, float s) { cublasCsrotNative(n, x, incx, y, incy, c, s); checkResultBLAS(); } private static native void cublasCsrotNative(int n, Pointer x, int incx, Pointer y, int incy, float c, float s); /** *
* void * cublasCsscal (int n, float alpha, cuComplex *x, int incx) * * replaces single-complex vector x with single-complex alpha * x. For i * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx], * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vectors * alpha single precision scalar multiplier * x single-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * x single-complex result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/csscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCsscal(int n, float alpha, Pointer x, int incx) { cublasCsscalNative(n, alpha, x, incx); checkResultBLAS(); } private static native void cublasCsscalNative(int n, float alpha, Pointer x, int incx); /** *
* void * cublasCswap (int n, const cuComplex *x, int incx, cuComplex *y, int incy) * * interchanges the single-complex vector x with the single-complex vector y. * For i = 0 to n-1, interchanges x[lx + i * incx] with y[ly + i * incy], where * lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a * similar way using incy. * * Input * ----- * n number of elements in input vectors * x single-complex vector with n elements * incx storage spacing between elements of x * y single-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * x contains-single complex vector y * y contains-single complex vector x * * Reference: http://www.netlib.org/blas/cswap.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCswap(int n, Pointer x, int incx, Pointer y, int incy) { cublasCswapNative(n, x, incx, y, incy); checkResultBLAS(); } private static native void cublasCswapNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* void * cublasZswap (int n, const cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy) * * interchanges the double-complex vector x with the double-complex vector y. * For i = 0 to n-1, interchanges x[lx + i * incx] with y[ly + i * incy], where * lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a * similar way using incy. * * Input * ----- * n number of elements in input vectors * x double-complex vector with n elements * incx storage spacing between elements of x * y double-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * x contains-double complex vector y * y contains-double complex vector x * * Reference: http://www.netlib.org/blas/zswap.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZswap(int n, Pointer x, int incx, Pointer y, int incy) { cublasZswapNative(n, x, incx, y, incy); checkResultBLAS(); } private static native void cublasZswapNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* cuComplex * cdotu (int n, const cuComplex *x, int incx, const cuComplex *y, int incy) * * computes the dot product of two single-complex vectors. It returns the * dot product of the single-complex vectors x and y if successful, and complex * zero otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * incx] * * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx; * ly is defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * x single-complex vector with n elements * incx storage spacing between elements of x * y single-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * returns single-complex dot product (zero if n <= 0) * * Reference: http://www.netlib.org/blas/cdotu.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU **/ public static cuComplex cublasCdotu(int n, Pointer x, int incx, Pointer y, int incy) { cuComplex result = cublasCdotuNative(n, x, incx, y, incy); checkResultBLAS(); return result; } private static native cuComplex cublasCdotuNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* cuComplex * cublasCdotc (int n, const cuComplex *x, int incx, const cuComplex *y, * int incy) * * computes the dot product of two single-complex vectors. It returns the * dot product of the single-complex vectors x and y if successful, and complex * zero otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * incx] * * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx; * ly is defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * x single-complex vector with n elements * incx storage spacing between elements of x * y single-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * returns single-complex dot product (zero if n <= 0) * * Reference: http://www.netlib.org/blas/cdotc.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU **/ public static cuComplex cublasCdotc(int n, Pointer x, int incx, Pointer y, int incy) { cuComplex result = cublasCdotcNative(n, x, incx, y, incy); checkResultBLAS(); return result; } private static native cuComplex cublasCdotcNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* int * cublasIcamax (int n, const float *x, int incx) * * finds the smallest index of the element having maximum absolute value * in single-complex vector x; that is, the result is the first i, i = 0 * to n - 1 that maximizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x single-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/icamax.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static int cublasIcamax(int n, Pointer x, int incx) { int result = cublasIcamaxNative(n, x, incx); checkResultBLAS(); return result; } private static native int cublasIcamaxNative(int n, Pointer x, int incx); /** *
* int * cublasIcamin (int n, const float *x, int incx) * * finds the smallest index of the element having minimum absolute value * in single-complex vector x; that is, the result is the first i, i = 0 * to n - 1 that minimizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x single-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: see ICAMAX. * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static int cublasIcamin(int n, Pointer x, int incx) { int result = cublasIcaminNative(n, x, incx); checkResultBLAS(); return result; } private static native int cublasIcaminNative(int n, Pointer x, int incx); /** *
* float * cublasScasum (int n, const cuDouble *x, int incx) * * takes the sum of the absolute values of a complex vector and returns a * single precision result. Note that this is not the L1 norm of the vector. * The result is the sum from 0 to n-1 of abs(real(x[ix+i*incx])) + * abs(imag(x(ix+i*incx))), where ix = 1 if incx <= 0, else ix = 1+(1-n)*incx. * * Input * ----- * n number of elements in input vector * x single-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the single precision sum of absolute values of real and imaginary * parts (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/scasum.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static float cublasScasum(int n, Pointer x, int incx) { float result = cublasScasumNative(n, x, incx); checkResultBLAS(); return result; } private static native float cublasScasumNative(int n, Pointer x, int incx); /** *
* float * cublasScnrm2 (int n, const cuComplex *x, int incx) * * computes the Euclidean norm of the single-complex n-vector x. This code * uses simple scaling to avoid intermediate underflow and overflow. * * Input * ----- * n number of elements in input vector * x single-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns Euclidian norm (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/scnrm2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static float cublasScnrm2(int n, Pointer x, int incx) { float result = cublasScnrm2Native(n, x, incx); checkResultBLAS(); return result; } private static native float cublasScnrm2Native(int n, Pointer x, int incx); /** *
* void * cublasZaxpy (int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx, * cuDoubleComplex *y, int incy) * * multiplies double-complex vector x by double-complex scalar alpha and adds * the result to double-complex vector y; that is, it overwrites double-complex * y with double-complex alpha * x + y. For i = 0 to n - 1, it replaces * y[ly + i * incy] with alpha * x[lx + i * incx] + y[ly + i * incy], where * lx = 0 if incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a * similar way using incy. * * Input * ----- * n number of elements in input vectors * alpha double-complex scalar multiplier * x double-complex vector with n elements * incx storage spacing between elements of x * y double-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * y double-complex result (unchanged if n <= 0) * * Reference: http://www.netlib.org/blas/zaxpy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZaxpy(int n, cuDoubleComplex alpha, Pointer x, int incx, Pointer y, int incy) { cublasZaxpyNative(n, alpha, x, incx, y, incy); checkResultBLAS(); } private static native void cublasZaxpyNative(int n, cuDoubleComplex alpha, Pointer x, int incx, Pointer y, int incy); /** *
* cuDoubleComplex * zdotu (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, int incy) * * computes the dot product of two double-complex vectors. It returns the * dot product of the double-complex vectors x and y if successful, and double-complex * zero otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * incx] * * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx; * ly is defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * x double-complex vector with n elements * incx storage spacing between elements of x * y double-complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * returns double-complex dot product (zero if n <= 0) * * Reference: http://www.netlib.org/blas/zdotu.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU **/ public static cuDoubleComplex cublasZdotu(int n, Pointer x, int incx, Pointer y, int incy) { cuDoubleComplex result = cublasZdotuNative(n, x, incx, y, incy); checkResultBLAS(); return result; } private static native cuDoubleComplex cublasZdotuNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* cuDoubleComplex * cublasZdotc (int n, const cuDoubleComplex *x, int incx, const cuDoubleComplex *y, int incy) * * computes the dot product of two double-precision complex vectors. It returns the * dot product of the double-precision complex vectors conjugate(x) and y if successful, * and double-precision complex zero otherwise. It computes the * sum for i = 0 to n - 1 of conjugate(x[lx + i * incx]) * y[ly + i * incy], * where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx; * ly is defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * x double-precision complex vector with n elements * incx storage spacing between elements of x * y double-precision complex vector with n elements * incy storage spacing between elements of y * * Output * ------ * returns double-complex dot product (zero if n <= 0) * * Reference: http://www.netlib.org/blas/zdotc.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU **/ public static cuDoubleComplex cublasZdotc(int n, Pointer x, int incx, Pointer y, int incy) { cuDoubleComplex result = cublasZdotcNative(n, x, incx, y, incy); checkResultBLAS(); return result; } private static native cuDoubleComplex cublasZdotcNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* void * cublasZscal (int n, cuComplex alpha, cuComplex *x, int incx) * * replaces double-complex vector x with double-complex alpha * x. For i * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx], * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vectors * alpha double-complex scalar multiplier * x double-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * x double-complex result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/zscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZscal(int n, cuDoubleComplex alpha, Pointer x, int incx) { cublasZscalNative(n, alpha, x, incx); checkResultBLAS(); } private static native void cublasZscalNative(int n, cuDoubleComplex alpha, Pointer x, int incx); /** *
* void * cublasZdscal (int n, double alpha, cuDoubleComplex *x, int incx) * * replaces double-complex vector x with double-complex alpha * x. For i * = 0 to n - 1, it replaces x[ix + i * incx] with alpha * x[ix + i * incx], * where ix = 1 if incx >= 0, else ix = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vectors * alpha double precision scalar multiplier * x double-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * x double-complex result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/zdscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZdscal(int n, double alpha, Pointer x, int incx) { cublasZdscalNative(n, alpha, x, incx); checkResultBLAS(); } private static native void cublasZdscalNative(int n, double alpha, Pointer x, int incx); /** *
* double * cublasDznrm2 (int n, const cuDoubleComplex *x, int incx) * * computes the Euclidean norm of the double precision complex n-vector x. This code * uses simple scaling to avoid intermediate underflow and overflow. * * Input * ----- * n number of elements in input vector * x double-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns Euclidian norm (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/dznrm2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static double cublasDznrm2(int n, Pointer x, int incx) { double result = cublasDznrm2Native(n, x, incx); checkResultBLAS(); return result; } private static native double cublasDznrm2Native(int n, Pointer x, int incx); /** *
* void * cublasZrotg (cuDoubleComplex *host_ca, cuDoubleComplex cb, double *host_sc, double *host_cs) * * constructs the complex Givens tranformation * * ( sc cs ) * G = ( ) , sc^2 + cabs(cs)^2 = 1, * (-cs sc ) * * which zeros the second entry of the complex 2-vector transpose(ca, cb). * * The quantity ca/cabs(ca)*norm(ca,cb) overwrites ca in storage. The * function crot (n, x, incx, y, incy, sc, cs) is normally called next * to apply the transformation to a 2 x n matrix. * Note that is function is provided for completeness and run exclusively * on the Host. * * Input * ----- * ca double-precision complex precision scalar * cb double-precision complex scalar * * Output * ------ * ca double-precision complex ca/cabs(ca)*norm(ca,cb) * sc double-precision cosine component of rotation matrix * cs double-precision complex sine component of rotation matrix * * Reference: http://www.netlib.org/blas/zrotg.f * * This function does not set any error status. **/ public static void cublasZrotg(Pointer host_ca, cuDoubleComplex cb, Pointer host_sc, Pointer host_cs) { cublasZrotgNative(host_ca, cb, host_sc, host_cs); checkResultBLAS(); } private static native void cublasZrotgNative(Pointer host_ca, cuDoubleComplex cb, Pointer host_sc, Pointer host_cs); /** *
* cublasZrot (int n, cuDoubleComplex *x, int incx, cuDoubleComplex *y, int incy, double sc, * cuDoubleComplex cs) * * multiplies a 2x2 matrix ( sc cs) with the 2xn matrix ( transpose(x) ) * (-conj(cs) sc) ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. * * Input * ----- * n number of elements in input vectors * x double-precision complex vector with n elements * incx storage spacing between elements of x * y double-precision complex vector with n elements * incy storage spacing between elements of y * sc double-precision cosine component of rotation matrix * cs double-precision complex sine component of rotation matrix * * Output * ------ * x rotated double-precision complex vector x (unchanged if n <= 0) * y rotated double-precision complex vector y (unchanged if n <= 0) * * Reference: http://netlib.org/lapack/explore-html/zrot.f.html * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZrot(int n, Pointer x, int incx, Pointer y, int incy, double sc, cuDoubleComplex cs) { cublasZrotNative(n, x, incx, y, incy, sc, cs); checkResultBLAS(); } private static native void cublasZrotNative(int n, Pointer x, int incx, Pointer y, int incy, double sc, cuDoubleComplex cs); /** *
* void * zdrot (int n, cuDoubleComplex *x, int incx, cuCumplex *y, int incy, double c, * double s) * * multiplies a 2x2 matrix ( c s) with the 2xn matrix ( transpose(x) ) * (-s c) ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. * * Input * ----- * n number of elements in input vectors * x double-precision complex vector with n elements * incx storage spacing between elements of x * y double-precision complex vector with n elements * incy storage spacing between elements of y * c cosine component of rotation matrix * s sine component of rotation matrix * * Output * ------ * x rotated vector x (unchanged if n <= 0) * y rotated vector y (unchanged if n <= 0) * * Reference http://www.netlib.org/blas/zdrot.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZdrot(int n, Pointer x, int incx, Pointer y, int incy, double c, double s) { cublasZdrotNative(n, x, incx, y, incy, c, s); checkResultBLAS(); } private static native void cublasZdrotNative(int n, Pointer x, int incx, Pointer y, int incy, double c, double s); /** *
* int * cublasIzamax (int n, const double *x, int incx) * * finds the smallest index of the element having maximum absolute value * in double-complex vector x; that is, the result is the first i, i = 0 * to n - 1 that maximizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x double-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/izamax.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static int cublasIzamax(int n, Pointer x, int incx) { int result = cublasIzamaxNative(n, x, incx); checkResultBLAS(); return result; } private static native int cublasIzamaxNative(int n, Pointer x, int incx); /** *
* int * cublasIzamin (int n, const cuDoubleComplex *x, int incx) * * finds the smallest index of the element having minimum absolute value * in double-complex vector x; that is, the result is the first i, i = 0 * to n - 1 that minimizes abs(real(x[1+i*incx]))+abs(imag(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x double-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: Analogous to IZAMAX, see there. * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static int cublasIzamin(int n, Pointer x, int incx) { int result = cublasIzaminNative(n, x, incx); checkResultBLAS(); return result; } private static native int cublasIzaminNative(int n, Pointer x, int incx); /** *
* double * cublasDzasum (int n, const cuDoubleComplex *x, int incx) * * takes the sum of the absolute values of a complex vector and returns a * double precision result. Note that this is not the L1 norm of the vector. * The result is the sum from 0 to n-1 of abs(real(x[ix+i*incx])) + * abs(imag(x(ix+i*incx))), where ix = 1 if incx <= 0, else ix = 1+(1-n)*incx. * * Input * ----- * n number of elements in input vector * x double-complex vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the double precision sum of absolute values of real and imaginary * parts (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/dzasum.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static double cublasDzasum(int n, Pointer x, int incx) { double result = cublasDzasumNative(n, x, incx); checkResultBLAS(); return result; } private static native double cublasDzasumNative(int n, Pointer x, int incx); /** *
* void * cublasSgbmv (char trans, int m, int n, int kl, int ku, float alpha, * const float *A, int lda, const float *x, int incx, float beta, * float *y, int incy) * * performs one of the matrix-vector operations * * y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A) * * alpha and beta are single precision scalars. x and y are single precision * vectors. A is an m by n band matrix consisting of single precision elements * with kl sub-diagonals and ku super-diagonals. * * Input * ----- * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A) * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * kl specifies the number of sub-diagonals of matrix A. It must be at * least zero. * ku specifies the number of super-diagonals of matrix A. It must be at * least zero. * alpha single precision scalar multiplier applied to op(A). * A single precision array of dimensions (lda, n). The leading * (kl + ku + 1) x n part of the array A must contain the band matrix A, * supplied column by column, with the leading diagonal of the matrix * in row (ku + 1) of the array, the first super-diagonal starting at * position 2 in row ku, the first sub-diagonal starting at position 1 * in row (ku + 2), and so on. Elements in the array A that do not * correspond to elements in the band matrix (such as the top left * ku x ku triangle) are not referenced. * lda leading dimension of A. lda must be at least (kl + ku + 1). * x single precision array of length at least (1+(n-1)*abs(incx)) when * trans == 'N' or 'n' and at least (1+(m-1)*abs(incx)) otherwise. * incx storage spacing between elements of x. incx must not be zero. * beta single precision scalar multiplier applied to vector y. If beta is * zero, y is not read. * y single precision array of length at least (1+(m-1)*abs(incy)) when * trans == 'N' or 'n' and at least (1+(n-1)*abs(incy)) otherwise. If * beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*op(A)*x + beta*y * * Reference: http://www.netlib.org/blas/sgbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n, kl, or ku < 0; if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSgbmv(char trans, int m, int n, int kl, int ku, float alpha, Pointer A, int lda, Pointer x, int incx, float beta, Pointer y, int incy) { cublasSgbmvNative(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasSgbmvNative(char trans, int m, int n, int kl, int ku, float alpha, Pointer A, int lda, Pointer x, int incx, float beta, Pointer y, int incy); /** *
* cublasSgemv (char trans, int m, int n, float alpha, const float *A, int lda, * const float *x, int incx, float beta, float *y, int incy) * * performs one of the matrix-vector operations * * y = alpha * op(A) * x + beta * y, * * where op(A) is one of * * op(A) = A or op(A) = transpose(A) * * where alpha and beta are single precision scalars, x and y are single * precision vectors, and A is an m x n matrix consisting of single precision * elements. Matrix A is stored in column major format, and lda is the leading * dimension of the two-dimensional array in which A is stored. * * Input * ----- * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If trans = * trans = 't', 'T', 'c', or 'C', op(A) = transpose(A) * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * alpha single precision scalar multiplier applied to op(A). * A single precision array of dimensions (lda, n) if trans = 'n' or * 'N'), and of dimensions (lda, m) otherwise. lda must be at least * max(1, m) and at least max(1, n) otherwise. * lda leading dimension of two-dimensional array used to store matrix A * x single precision array of length at least (1 + (n - 1) * abs(incx)) * when trans = 'N' or 'n' and at least (1 + (m - 1) * abs(incx)) * otherwise. * incx specifies the storage spacing between elements of x. incx must not * be zero. * beta single precision scalar multiplier applied to vector y. If beta * is zero, y is not read. * y single precision array of length at least (1 + (m - 1) * abs(incy)) * when trans = 'N' or 'n' and at least (1 + (n - 1) * abs(incy)) * otherwise. * incy specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * y updated according to alpha * op(A) * x + beta * y * * Reference: http://www.netlib.org/blas/sgemv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSgemv(char trans, int m, int n, float alpha, Pointer A, int lda, Pointer x, int incx, float beta, Pointer y, int incy) { cublasSgemvNative(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasSgemvNative(char trans, int m, int n, float alpha, Pointer A, int lda, Pointer x, int incx, float beta, Pointer y, int incy); /** *
* cublasSger (int m, int n, float alpha, const float *x, int incx, * const float *y, int incy, float *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(y) + A, * * where alpha is a single precision scalar, x is an m element single * precision vector, y is an n element single precision vector, and A * is an m by n matrix consisting of single precision elements. Matrix A * is stored in column major format, and lda is the leading dimension of * the two-dimensional array used to store A. * * Input * ----- * m specifies the number of rows of the matrix A. It must be at least * zero. * n specifies the number of columns of the matrix A. It must be at * least zero. * alpha single precision scalar multiplier applied to x * transpose(y) * x single precision array of length at least (1 + (m - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must not * be zero. * y single precision array of length at least (1 + (n - 1) * abs(incy)) * incy specifies the storage spacing between elements of y. incy must not * be zero. * A single precision array of dimensions (lda, n). * lda leading dimension of two-dimensional array used to store matrix A * * Output * ------ * A updated according to A = alpha * x * transpose(y) + A * * Reference: http://www.netlib.org/blas/sger.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSger(int m, int n, float alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda) { cublasSgerNative(m, n, alpha, x, incx, y, incy, A, lda); checkResultBLAS(); } private static native void cublasSgerNative(int m, int n, float alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda); /** *
* void * cublasSsbmv (char uplo, int n, int k, float alpha, const float *A, int lda, * const float *x, int incx, float beta, float *y, int incy) * * performs the matrix-vector operation * * y := alpha*A*x + beta*y * * alpha and beta are single precision scalars. x and y are single precision * vectors with n elements. A is an n x n symmetric band matrix consisting * of single precision elements, with k super-diagonals and the same number * of sub-diagonals. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the symmetric * band matrix A is being supplied. If uplo == 'U' or 'u', the upper * triangular part is being supplied. If uplo == 'L' or 'l', the lower * triangular part is being supplied. * n specifies the number of rows and the number of columns of the * symmetric matrix A. n must be at least zero. * k specifies the number of super-diagonals of matrix A. Since the matrix * is symmetric, this is also the number of sub-diagonals. k must be at * least zero. * alpha single precision scalar multiplier applied to A*x. * A single precision array of dimensions (lda, n). When uplo == 'U' or * 'u', the leading (k + 1) x n part of array A must contain the upper * triangular band of the symmetric matrix, supplied column by column, * with the leading diagonal of the matrix in row (k+1) of the array, * the first super-diagonal starting at position 2 in row k, and so on. * The top left k x k triangle of the array A is not referenced. When * uplo == 'L' or 'l', the leading (k + 1) x n part of the array A must * contain the lower triangular band part of the symmetric matrix, * supplied column by column, with the leading diagonal of the matrix in * row 1 of the array, the first sub-diagonal starting at position 1 in * row 2, and so on. The bottom right k x k triangle of the array A is * not referenced. * lda leading dimension of A. lda must be at least (k + 1). * x single precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta single precision scalar multiplier applied to vector y. If beta is * zero, y is not read. * y single precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/ssbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_INVALID_VALUE if k or n < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSsbmv(char uplo, int n, int k, float alpha, Pointer A, int lda, Pointer x, int incx, float beta, Pointer y, int incy) { cublasSsbmvNative(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasSsbmvNative(char uplo, int n, int k, float alpha, Pointer A, int lda, Pointer x, int incx, float beta, Pointer y, int incy); /** *
* void * cublasSspmv (char uplo, int n, float alpha, const float *AP, const float *x, * int incx, float beta, float *y, int incy) * * performs the matrix-vector operation * * y = alpha * A * x + beta * y * * Alpha and beta are single precision scalars, and x and y are single * precision vectors with n elements. A is a symmetric n x n matrix * consisting of single precision elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision scalar multiplier applied to A*x. * AP single precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * x single precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta single precision scalar multiplier applied to vector y; * y single precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/sspmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSspmv(char uplo, int n, float alpha, Pointer AP, Pointer x, int incx, float beta, Pointer y, int incy) { cublasSspmvNative(uplo, n, alpha, AP, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasSspmvNative(char uplo, int n, float alpha, Pointer AP, Pointer x, int incx, float beta, Pointer y, int incy); /** *
* void * cublasSspr (char uplo, int n, float alpha, const float *x, int incx, * float *AP) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(x) + A, * * where alpha is a single precision scalar and x is an n element single * precision vector. A is a symmetric n x n matrix consisting of single * precision elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision scalar multiplier applied to x * transpose(x). * x single precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * AP single precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * * Output * ------ * A updated according to A = alpha * x * transpose(x) + A * * Reference: http://www.netlib.org/blas/sspr.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSspr(char uplo, int n, float alpha, Pointer x, int incx, Pointer AP) { cublasSsprNative(uplo, n, alpha, x, incx, AP); checkResultBLAS(); } private static native void cublasSsprNative(char uplo, int n, float alpha, Pointer x, int incx, Pointer AP); /** *
* void * cublasSspr2 (char uplo, int n, float alpha, const float *x, int incx, * const float *y, int incy, float *AP) * * performs the symmetric rank 2 operation * * A = alpha*x*transpose(y) + alpha*y*transpose(x) + A, * * where alpha is a single precision scalar, and x and y are n element single * precision vectors. A is a symmetric n x n matrix consisting of single * precision elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision scalar multiplier applied to x * transpose(y) + * y * transpose(x). * x single precision array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y single precision array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * AP single precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * * Output * ------ * A updated according to A = alpha*x*transpose(y)+alpha*y*transpose(x)+A * * Reference: http://www.netlib.org/blas/sspr2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSspr2(char uplo, int n, float alpha, Pointer x, int incx, Pointer y, int incy, Pointer AP) { cublasSspr2Native(uplo, n, alpha, x, incx, y, incy, AP); checkResultBLAS(); } private static native void cublasSspr2Native(char uplo, int n, float alpha, Pointer x, int incx, Pointer y, int incy, Pointer AP); /** *
* void * cublasSsymv (char uplo, int n, float alpha, const float *A, int lda, * const float *x, int incx, float beta, float *y, int incy) * * performs the matrix-vector operation * * y = alpha*A*x + beta*y * * Alpha and beta are single precision scalars, and x and y are single * precision vectors, each with n elements. A is a symmetric n x n matrix * consisting of single precision elements that is stored in either upper or * lower storage mode. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the array A * is to be referenced. If uplo == 'U' or 'u', the symmetric matrix A * is stored in upper storage mode, i.e. only the upper triangular part * of A is to be referenced while the lower triangular part of A is to * be inferred. If uplo == 'L' or 'l', the symmetric matrix A is stored * in lower storage mode, i.e. only the lower triangular part of A is * to be referenced while the upper triangular part of A is to be * inferred. * n specifies the number of rows and the number of columns of the * symmetric matrix A. n must be at least zero. * alpha single precision scalar multiplier applied to A*x. * A single precision array of dimensions (lda, n). If uplo == 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular part of the symmetric matrix and the strictly * lower triangular part of A is not referenced. If uplo == 'L' or 'l', * the leading n x n lower triangular part of the array A must contain * the lower triangular part of the symmetric matrix and the strictly * upper triangular part of A is not referenced. * lda leading dimension of A. It must be at least max (1, n). * x single precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta single precision scalar multiplier applied to vector y. * y single precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/ssymv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSsymv(char uplo, int n, float alpha, Pointer A, int lda, Pointer x, int incx, float beta, Pointer y, int incy) { cublasSsymvNative(uplo, n, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasSsymvNative(char uplo, int n, float alpha, Pointer A, int lda, Pointer x, int incx, float beta, Pointer y, int incy); /** *
* void * cublasSsyr (char uplo, int n, float alpha, const float *x, int incx, * float *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(x) + A, * * where alpha is a single precision scalar, x is an n element single * precision vector and A is an n x n symmetric matrix consisting of * single precision elements. Matrix A is stored in column major format, * and lda is the leading dimension of the two-dimensional array * containing A. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or * the lower triangular part of array A. If uplo = 'U' or 'u', * then only the upper triangular part of A may be referenced. * If uplo = 'L' or 'l', then only the lower triangular part of * A may be referenced. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * alpha single precision scalar multiplier applied to x * transpose(x) * x single precision array of length at least (1 + (n - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must * not be zero. * A single precision array of dimensions (lda, n). If uplo = 'U' or * 'u', then A must contain the upper triangular part of a symmetric * matrix, and the strictly lower triangular part is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part * of a symmetric matrix, and the strictly upper triangular part is * not referenced. * lda leading dimension of the two-dimensional array containing A. lda * must be at least max(1, n). * * Output * ------ * A updated according to A = alpha * x * transpose(x) + A * * Reference: http://www.netlib.org/blas/ssyr.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSsyr(char uplo, int n, float alpha, Pointer x, int incx, Pointer A, int lda) { cublasSsyrNative(uplo, n, alpha, x, incx, A, lda); checkResultBLAS(); } private static native void cublasSsyrNative(char uplo, int n, float alpha, Pointer x, int incx, Pointer A, int lda); /** *
* void * cublasSsyr2 (char uplo, int n, float alpha, const float *x, int incx, * const float *y, int incy, float *A, int lda) * * performs the symmetric rank 2 operation * * A = alpha*x*transpose(y) + alpha*y*transpose(x) + A, * * where alpha is a single precision scalar, x and y are n element single * precision vector and A is an n by n symmetric matrix consisting of single * precision elements. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision scalar multiplier applied to x * transpose(y) + * y * transpose(x). * x single precision array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y single precision array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * A single precision array of dimensions (lda, n). If uplo == 'U' or 'u', * then A must contains the upper triangular part of a symmetric matrix, * and the strictly lower triangular parts is not referenced. If uplo == * 'L' or 'l', then A contains the lower triangular part of a symmetric * matrix, and the strictly upper triangular part is not referenced. * lda leading dimension of A. It must be at least max(1, n). * * Output * ------ * A updated according to A = alpha*x*transpose(y)+alpha*y*transpose(x)+A * * Reference: http://www.netlib.org/blas/ssyr2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSsyr2(char uplo, int n, float alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda) { cublasSsyr2Native(uplo, n, alpha, x, incx, y, incy, A, lda); checkResultBLAS(); } private static native void cublasSsyr2Native(char uplo, int n, float alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda); /** *
* void * cublasStbmv (char uplo, char trans, char diag, int n, int k, const float *A, * int lda, float *x, int incx) * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A * or op(A) = transpose(A). x is an n-element single precision vector, and A is * an n x n, unit or non-unit upper or lower triangular band matrix consisting * of single precision elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular band * matrix. If uplo == 'U' or 'u', A is an upper triangular band matrix. * If uplo == 'L' or 'l', A is a lower triangular band matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A). * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. In the current implementation n must not exceed 4070. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A single precision array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first * super-diagonal starting at position 2 in row k, and so on. The top * left k x k triangle of the array A is not referenced. If uplo == 'L' * or 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal startingat position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * lda is the leading dimension of A. It must be at least (k + 1). * x single precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x * * Reference: http://www.netlib.org/blas/stbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, k < 0, or incx == 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasStbmv(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx) { cublasStbmvNative(uplo, trans, diag, n, k, A, lda, x, incx); checkResultBLAS(); } private static native void cublasStbmvNative(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx); /** *
* void cublasStbsv (char uplo, char trans, char diag, int n, int k, * const float *A, int lda, float *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A or op(A) = transpose(A). b and x are n-element vectors, and A is * an n x n unit or non-unit, upper or lower triangular band matrix with k + 1 * diagonals. No test for singularity or near-singularity is included in this * function. Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular band * matrix as follows: If uplo == 'U' or 'u', A is an upper triangular * band matrix. If uplo == 'L' or 'l', A is a lower triangular band * matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must be at least * zero. * A single precision array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first super- * diagonal starting at position 2 in row k, and so on. The top left * k x k triangle of the array A is not referenced. If uplo == 'L' or * 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal starting at position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * x single precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the n-element right-hand side vector b. On exit, * it is overwritten with the solution vector x. * incx storage spacing between elements of x. incx must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/stbsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0 or n > 4070 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasStbsv(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx) { cublasStbsvNative(uplo, trans, diag, n, k, A, lda, x, incx); checkResultBLAS(); } private static native void cublasStbsvNative(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasStpmv (char uplo, char trans, char diag, int n, const float *AP, * float *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * or op(A) = transpose(A). x is an n element single precision vector, and A * is an n x n, unit or non-unit, upper or lower triangular matrix composed * of single precision elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo == 'U' or 'u', then A is an upper triangular matrix. * If uplo == 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A) * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * AP single precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored in AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * x single precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/stpmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasStpmv(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx) { cublasStpmvNative(uplo, trans, diag, n, AP, x, incx); checkResultBLAS(); } private static native void cublasStpmvNative(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx); /** *
* void * cublasStpsv (char uplo, char trans, char diag, int n, const float *AP, * float *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A or op(A) = transpose(A). b and x are n element vectors, and A is * an n x n unit or non-unit, upper or lower triangular matrix. No test for * singularity or near-singularity is included in this function. Such tests * must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular matrix * as follows: If uplo == 'U' or 'u', A is an upper triangluar matrix. * If uplo == 'L' or 'l', A is a lower triangular matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. In the current implementation n must not exceed 4070. * AP single precision array with at least ((n*(n+1))/2) elements. If uplo * == 'U' or 'u', the array AP contains the upper triangular matrix A, * packed sequentially, column by column; that is, if i <= j, then * A[i,j] is stored is AP[i+(j*(j+1)/2)]. If uplo == 'L' or 'L', the * array AP contains the lower triangular matrix A, packed sequentially, * column by column; that is, if i >= j, then A[i,j] is stored in * AP[i+((2*n-j+1)*j)/2]. When diag = 'U' or 'u', the diagonal elements * of A are not referenced and are assumed to be unity. * x single precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the n-element right-hand side vector b. On exit, * it is overwritten with the solution vector x. * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/stpsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0, or n > 4070 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasStpsv(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx) { cublasStpsvNative(uplo, trans, diag, n, AP, x, incx); checkResultBLAS(); } private static native void cublasStpsvNative(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx); /** *
* void * cublasStrmv (char uplo, char trans, char diag, int n, const float *A, * int lda, float *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = = A, or op(A) = transpose(A). x is an n-element single precision vector, and * A is an n x n, unit or non-unit, upper or lower, triangular matrix composed * of single precision elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', then A is an upper triangular matrix. * If uplo = 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If transa = 'N' or 'n', op(A) = A. If trans = 'T', * 't', 'C', or 'c', op(A) = transpose(A) * diag specifies whether or not matrix A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * A single precision array of dimension (lda, n). If uplo = 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular matrix and the strictly lower triangular part * of A is not referenced. If uplo = 'L' or 'l', the leading n x n lower * triangular part of the array A must contain the lower triangular * matrix and the strictly upper triangular part of A is not referenced. * When diag = 'U' or 'u', the diagonal elements of A are not referenced * either, but are are assumed to be unity. * lda is the leading dimension of A. It must be at least max (1, n). * x single precision array of length at least (1 + (n - 1) * abs(incx) ). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/strmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasStrmv(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx) { cublasStrmvNative(uplo, trans, diag, n, A, lda, x, incx); checkResultBLAS(); } private static native void cublasStrmvNative(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasStrsv (char uplo, char trans, char diag, int n, const float *A, * int lda, float *x, int incx) * * solves a system of equations op(A) * x = b, where op(A) is either A or * transpose(A). b and x are single precision vectors consisting of n * elements, and A is an n x n matrix composed of a unit or non-unit, upper * or lower triangular matrix. Matrix A is stored in column major format, * and lda is the leading dimension of the two-dimensional array containing * A. * * No test for singularity or near-singularity is included in this function. * Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the * lower triangular part of array A. If uplo = 'U' or 'u', then only * the upper triangular part of A may be referenced. If uplo = 'L' or * 'l', then only the lower triangular part of A may be referenced. * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = 't', * 'T', 'c', or 'C', op(A) = transpose(A) * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * A is a single precision array of dimensions (lda, n). If uplo = 'U' * or 'u', then A must contains the upper triangular part of a symmetric * matrix, and the strictly lower triangular parts is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part of * a symmetric matrix, and the strictly upper triangular part is not * referenced. * lda is the leading dimension of the two-dimensional array containing A. * lda must be at least max(1, n). * x single precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the n element right-hand side vector b. On exit, * it is overwritten with the solution vector x. * incx specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/strsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasStrsv(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx) { cublasStrsvNative(uplo, trans, diag, n, A, lda, x, incx); checkResultBLAS(); } private static native void cublasStrsvNative(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasZtrmv (char uplo, char trans, char diag, int n, const cuDoubleComplex *A, * int lda, cuDoubleComplex *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, * where op(A) = A, or op(A) = transpose(A) or op(A) = conjugate(transpose(A)). * x is an n-element double precision complex vector, and * A is an n x n, unit or non-unit, upper or lower, triangular matrix composed * of double precision complex elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', then A is an upper triangular matrix. * If uplo = 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If trans = 'n' or 'N', op(A) = A. If trans = 't' or * 'T', op(A) = transpose(A). If trans = 'c' or 'C', op(A) = * conjugate(transpose(A)). * diag specifies whether or not matrix A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * A double precision array of dimension (lda, n). If uplo = 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular matrix and the strictly lower triangular part * of A is not referenced. If uplo = 'L' or 'l', the leading n x n lower * triangular part of the array A must contain the lower triangular * matrix and the strictly upper triangular part of A is not referenced. * When diag = 'U' or 'u', the diagonal elements of A are not referenced * either, but are are assumed to be unity. * lda is the leading dimension of A. It must be at least max (1, n). * x double precision array of length at least (1 + (n - 1) * abs(incx) ). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/ztrmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZtrmv(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx) { cublasZtrmvNative(uplo, trans, diag, n, A, lda, x, incx); checkResultBLAS(); } private static native void cublasZtrmvNative(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasZgbmv (char trans, int m, int n, int kl, int ku, cuDoubleComplex alpha, * const cuDoubleComplex *A, int lda, const cuDoubleComplex *x, int incx, cuDoubleComplex beta, * cuDoubleComplex *y, int incy); * * performs one of the matrix-vector operations * * y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A) * * alpha and beta are double precision complex scalars. x and y are double precision * complex vectors. A is an m by n band matrix consisting of double precision complex elements * with kl sub-diagonals and ku super-diagonals. * * Input * ----- * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * or 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * kl specifies the number of sub-diagonals of matrix A. It must be at * least zero. * ku specifies the number of super-diagonals of matrix A. It must be at * least zero. * alpha double precision complex scalar multiplier applied to op(A). * A double precision complex array of dimensions (lda, n). The leading * (kl + ku + 1) x n part of the array A must contain the band matrix A, * supplied column by column, with the leading diagonal of the matrix * in row (ku + 1) of the array, the first super-diagonal starting at * position 2 in row ku, the first sub-diagonal starting at position 1 * in row (ku + 2), and so on. Elements in the array A that do not * correspond to elements in the band matrix (such as the top left * ku x ku triangle) are not referenced. * lda leading dimension of A. lda must be at least (kl + ku + 1). * x double precision complex array of length at least (1+(n-1)*abs(incx)) when * trans == 'N' or 'n' and at least (1+(m-1)*abs(incx)) otherwise. * incx specifies the increment for the elements of x. incx must not be zero. * beta double precision complex scalar multiplier applied to vector y. If beta is * zero, y is not read. * y double precision complex array of length at least (1+(m-1)*abs(incy)) when * trans == 'N' or 'n' and at least (1+(n-1)*abs(incy)) otherwise. If * beta is zero, y is not read. * incy On entry, incy specifies the increment for the elements of y. incy * must not be zero. * * Output * ------ * y updated according to y = alpha*op(A)*x + beta*y * * Reference: http://www.netlib.org/blas/zgbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZgbmv(char trans, int m, int n, int kl, int ku, cuDoubleComplex alpha, Pointer A, int lda, Pointer x, int incx, cuDoubleComplex beta, Pointer y, int incy) { cublasZgbmvNative(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasZgbmvNative(char trans, int m, int n, int kl, int ku, cuDoubleComplex alpha, Pointer A, int lda, Pointer x, int incx, cuDoubleComplex beta, Pointer y, int incy); /** *
* void * cublasZtbmv (char uplo, char trans, char diag, int n, int k, const cuDoubleComplex *A, * int lda, cuDoubleComplex *x, int incx) * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * op(A) = transpose(A) or op(A) = conjugate(transpose(A)). x is an n-element * double precision complex vector, and A is an n x n, unit or non-unit, upper * or lower triangular band matrix composed of double precision complex elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular band * matrix. If uplo == 'U' or 'u', A is an upper triangular band matrix. * If uplo == 'L' or 'l', A is a lower triangular band matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * or 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A double precision complex array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first * super-diagonal starting at position 2 in row k, and so on. The top * left k x k triangle of the array A is not referenced. If uplo == 'L' * or 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal startingat position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * lda is the leading dimension of A. It must be at least (k + 1). * x double precision complex array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x * * Reference: http://www.netlib.org/blas/ztbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n or k < 0, or if incx == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZtbmv(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx) { cublasZtbmvNative(uplo, trans, diag, n, k, A, lda, x, incx); checkResultBLAS(); } private static native void cublasZtbmvNative(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx); /** *
* void cublasZtbsv (char uplo, char trans, char diag, int n, int k, * const cuDoubleComplex *A, int lda, cuDoubleComplex *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)). * b and x are n element vectors, and A is an n x n unit or non-unit, * upper or lower triangular band matrix with k + 1 diagonals. No test * for singularity or near-singularity is included in this function. * Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular band * matrix as follows: If uplo == 'U' or 'u', A is an upper triangular * band matrix. If uplo == 'L' or 'l', A is a lower triangular band * matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A double precision complex array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first super- * diagonal starting at position 2 in row k, and so on. The top left * k x k triangle of the array A is not referenced. If uplo == 'L' or * 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal starting at position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * x double precision complex array of length at least (1+(n-1)*abs(incx)). * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/ztbsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0 or n > 1016 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZtbsv(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx) { cublasZtbsvNative(uplo, trans, diag, n, k, A, lda, x, incx); checkResultBLAS(); } private static native void cublasZtbsvNative(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasZhemv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, * const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy) * * performs the matrix-vector operation * * y = alpha*A*x + beta*y * * Alpha and beta are double precision complex scalars, and x and y are double * precision complex vectors, each with n elements. A is a hermitian n x n matrix * consisting of double precision complex elements that is stored in either upper or * lower storage mode. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the array A * is to be referenced. If uplo == 'U' or 'u', the hermitian matrix A * is stored in upper storage mode, i.e. only the upper triangular part * of A is to be referenced while the lower triangular part of A is to * be inferred. If uplo == 'L' or 'l', the hermitian matrix A is stored * in lower storage mode, i.e. only the lower triangular part of A is * to be referenced while the upper triangular part of A is to be * inferred. * n specifies the number of rows and the number of columns of the * hermitian matrix A. n must be at least zero. * alpha double precision complex scalar multiplier applied to A*x. * A double precision complex array of dimensions (lda, n). If uplo == 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular part of the hermitian matrix and the strictly * lower triangular part of A is not referenced. If uplo == 'L' or 'l', * the leading n x n lower triangular part of the array A must contain * the lower triangular part of the hermitian matrix and the strictly * upper triangular part of A is not referenced. The imaginary parts * of the diagonal elements need not be set, they are assumed to be zero. * lda leading dimension of A. It must be at least max (1, n). * x double precision complex array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta double precision complex scalar multiplier applied to vector y. * y double precision complex array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/zhemv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZhemv(char uplo, int n, cuDoubleComplex alpha, Pointer A, int lda, Pointer x, int incx, cuDoubleComplex beta, Pointer y, int incy) { cublasZhemvNative(uplo, n, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasZhemvNative(char uplo, int n, cuDoubleComplex alpha, Pointer A, int lda, Pointer x, int incx, cuDoubleComplex beta, Pointer y, int incy); /** *
* void * cublasZhpmv (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *AP, const cuDoubleComplex *x, * int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy) * * performs the matrix-vector operation * * y = alpha * A * x + beta * y * * Alpha and beta are double precision complex scalars, and x and y are double * precision complex vectors with n elements. A is an hermitian n x n matrix * consisting of double precision complex elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision complex scalar multiplier applied to A*x. * AP double precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * The imaginary parts of the diagonal elements need not be set, they * are assumed to be zero. * x double precision complex array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta double precision complex scalar multiplier applied to vector y; * y double precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/zhpmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZhpmv(char uplo, int n, cuDoubleComplex alpha, Pointer AP, Pointer x, int incx, cuDoubleComplex beta, Pointer y, int incy) { cublasZhpmvNative(uplo, n, alpha, AP, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasZhpmvNative(char uplo, int n, cuDoubleComplex alpha, Pointer AP, Pointer x, int incx, cuDoubleComplex beta, Pointer y, int incy); /** *
* cublasZgemv (char trans, int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, * const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy) * * performs one of the matrix-vector operations * * y = alpha * op(A) * x + beta * y, * * where op(A) is one of * * op(A) = A or op(A) = transpose(A) * * where alpha and beta are double precision scalars, x and y are double * precision vectors, and A is an m x n matrix consisting of double precision * elements. Matrix A is stored in column major format, and lda is the leading * dimension of the two-dimensional array in which A is stored. * * Input * ----- * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If trans = * trans = 't', 'T', 'c', or 'C', op(A) = transpose(A) * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * alpha double precision scalar multiplier applied to op(A). * A double precision array of dimensions (lda, n) if trans = 'n' or * 'N'), and of dimensions (lda, m) otherwise. lda must be at least * max(1, m) and at least max(1, n) otherwise. * lda leading dimension of two-dimensional array used to store matrix A * x double precision array of length at least (1 + (n - 1) * abs(incx)) * when trans = 'N' or 'n' and at least (1 + (m - 1) * abs(incx)) * otherwise. * incx specifies the storage spacing between elements of x. incx must not * be zero. * beta double precision scalar multiplier applied to vector y. If beta * is zero, y is not read. * y double precision array of length at least (1 + (m - 1) * abs(incy)) * when trans = 'N' or 'n' and at least (1 + (n - 1) * abs(incy)) * otherwise. * incy specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * y updated according to alpha * op(A) * x + beta * y * * Reference: http://www.netlib.org/blas/zgemv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZgemv(char trans, int m, int n, cuDoubleComplex alpha, Pointer A, int lda, Pointer x, int incx, cuDoubleComplex beta, Pointer y, int incy) { cublasZgemvNative(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasZgemvNative(char trans, int m, int n, cuDoubleComplex alpha, Pointer A, int lda, Pointer x, int incx, cuDoubleComplex beta, Pointer y, int incy); /** *
* void * cublasZtpmv (char uplo, char trans, char diag, int n, const cuDoubleComplex *AP, * cuDoubleComplex *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * op(A) = transpose(A) or op(A) = conjugate(transpose(A)) . x is an n element * double precision complex vector, and A is an n x n, unit or non-unit, upper * or lower triangular matrix composed of double precision complex elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo == 'U' or 'u', then A is an upper triangular matrix. * If uplo == 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * or 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. In the current implementation n must not exceed 4070. * AP double precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored in AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * x double precision complex array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/ztpmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or n < 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZtpmv(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx) { cublasZtpmvNative(uplo, trans, diag, n, AP, x, incx); checkResultBLAS(); } private static native void cublasZtpmvNative(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx); /** *
* void * cublasZtpsv (char uplo, char trans, char diag, int n, const cuDoubleComplex *AP, * cuDoubleComplex *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose)). b and * x are n element complex vectors, and A is an n x n unit or non-unit, * upper or lower triangular matrix. No test for singularity or near-singularity * is included in this routine. Such tests must be performed before calling this routine. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular matrix * as follows: If uplo == 'U' or 'u', A is an upper triangluar matrix. * If uplo == 'L' or 'l', A is a lower triangular matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T' * or 't', op(A) = transpose(A). If trans == 'C' or 'c', op(A) = * conjugate(transpose(A)). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * AP double precision complex array with at least ((n*(n+1))/2) elements. * If uplo == 'U' or 'u', the array AP contains the upper triangular * matrix A, packed sequentially, column by column; that is, if i <= j, then * A[i,j] is stored is AP[i+(j*(j+1)/2)]. If uplo == 'L' or 'L', the * array AP contains the lower triangular matrix A, packed sequentially, * column by column; that is, if i >= j, then A[i,j] is stored in * AP[i+((2*n-j+1)*j)/2]. When diag = 'U' or 'u', the diagonal elements * of A are not referenced and are assumed to be unity. * x double precision complex array of length at least (1+(n-1)*abs(incx)). * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/ztpsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 or n > 2035 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZtpsv(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx) { cublasZtpsvNative(uplo, trans, diag, n, AP, x, incx); checkResultBLAS(); } private static native void cublasZtpsvNative(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx); /** *
* cublasCgemv (char trans, int m, int n, cuComplex alpha, const cuComplex *A, * int lda, const cuComplex *x, int incx, cuComplex beta, cuComplex *y, * int incy) * * performs one of the matrix-vector operations * * y = alpha * op(A) * x + beta * y, * * where op(A) is one of * * op(A) = A or op(A) = transpose(A) or op(A) = conjugate(transpose(A)) * * where alpha and beta are single precision scalars, x and y are single * precision vectors, and A is an m x n matrix consisting of single precision * elements. Matrix A is stored in column major format, and lda is the leading * dimension of the two-dimensional array in which A is stored. * * Input * ----- * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If trans = * trans = 't' or 'T', op(A) = transpose(A). If trans = 'c' or 'C', * op(A) = conjugate(transpose(A)) * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * alpha single precision scalar multiplier applied to op(A). * A single precision array of dimensions (lda, n) if trans = 'n' or * 'N'), and of dimensions (lda, m) otherwise. lda must be at least * max(1, m) and at least max(1, n) otherwise. * lda leading dimension of two-dimensional array used to store matrix A * x single precision array of length at least (1 + (n - 1) * abs(incx)) * when trans = 'N' or 'n' and at least (1 + (m - 1) * abs(incx)) * otherwise. * incx specifies the storage spacing between elements of x. incx must not * be zero. * beta single precision scalar multiplier applied to vector y. If beta * is zero, y is not read. * y single precision array of length at least (1 + (m - 1) * abs(incy)) * when trans = 'N' or 'n' and at least (1 + (n - 1) * abs(incy)) * otherwise. * incy specifies the storage spacing between elements of y. incy must not * be zero. * * Output * ------ * y updated according to alpha * op(A) * x + beta * y * * Reference: http://www.netlib.org/blas/cgemv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCgemv(char trans, int m, int n, cuComplex alpha, Pointer A, int lda, Pointer x, int incx, cuComplex beta, Pointer y, int incy) { cublasCgemvNative(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasCgemvNative(char trans, int m, int n, cuComplex alpha, Pointer A, int lda, Pointer x, int incx, cuComplex beta, Pointer y, int incy); /** *
* void * cublasCgbmv (char trans, int m, int n, int kl, int ku, cuComplex alpha, * const cuComplex *A, int lda, const cuComplex *x, int incx, cuComplex beta, * cuComplex *y, int incy); * * performs one of the matrix-vector operations * * y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A) * * alpha and beta are single precision complex scalars. x and y are single precision * complex vectors. A is an m by n band matrix consisting of single precision complex elements * with kl sub-diagonals and ku super-diagonals. * * Input * ----- * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * or 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * kl specifies the number of sub-diagonals of matrix A. It must be at * least zero. * ku specifies the number of super-diagonals of matrix A. It must be at * least zero. * alpha single precision complex scalar multiplier applied to op(A). * A single precision complex array of dimensions (lda, n). The leading * (kl + ku + 1) x n part of the array A must contain the band matrix A, * supplied column by column, with the leading diagonal of the matrix * in row (ku + 1) of the array, the first super-diagonal starting at * position 2 in row ku, the first sub-diagonal starting at position 1 * in row (ku + 2), and so on. Elements in the array A that do not * correspond to elements in the band matrix (such as the top left * ku x ku triangle) are not referenced. * lda leading dimension of A. lda must be at least (kl + ku + 1). * x single precision complex array of length at least (1+(n-1)*abs(incx)) when * trans == 'N' or 'n' and at least (1+(m-1)*abs(incx)) otherwise. * incx specifies the increment for the elements of x. incx must not be zero. * beta single precision complex scalar multiplier applied to vector y. If beta is * zero, y is not read. * y single precision complex array of length at least (1+(m-1)*abs(incy)) when * trans == 'N' or 'n' and at least (1+(n-1)*abs(incy)) otherwise. If * beta is zero, y is not read. * incy On entry, incy specifies the increment for the elements of y. incy * must not be zero. * * Output * ------ * y updated according to y = alpha*op(A)*x + beta*y * * Reference: http://www.netlib.org/blas/cgbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCgbmv(char trans, int m, int n, int kl, int ku, cuComplex alpha, Pointer A, int lda, Pointer x, int incx, cuComplex beta, Pointer y, int incy) { cublasCgbmvNative(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasCgbmvNative(char trans, int m, int n, int kl, int ku, cuComplex alpha, Pointer A, int lda, Pointer x, int incx, cuComplex beta, Pointer y, int incy); /** *
* void * cublasChemv (char uplo, int n, cuComplex alpha, const cuComplex *A, int lda, * const cuComplex *x, int incx, cuComplex beta, cuComplex *y, int incy) * * performs the matrix-vector operation * * y = alpha*A*x + beta*y * * Alpha and beta are single precision complex scalars, and x and y are single * precision complex vectors, each with n elements. A is a hermitian n x n matrix * consisting of single precision complex elements that is stored in either upper or * lower storage mode. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the array A * is to be referenced. If uplo == 'U' or 'u', the hermitian matrix A * is stored in upper storage mode, i.e. only the upper triangular part * of A is to be referenced while the lower triangular part of A is to * be inferred. If uplo == 'L' or 'l', the hermitian matrix A is stored * in lower storage mode, i.e. only the lower triangular part of A is * to be referenced while the upper triangular part of A is to be * inferred. * n specifies the number of rows and the number of columns of the * hermitian matrix A. n must be at least zero. * alpha single precision complex scalar multiplier applied to A*x. * A single precision complex array of dimensions (lda, n). If uplo == 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular part of the hermitian matrix and the strictly * lower triangular part of A is not referenced. If uplo == 'L' or 'l', * the leading n x n lower triangular part of the array A must contain * the lower triangular part of the hermitian matrix and the strictly * upper triangular part of A is not referenced. The imaginary parts * of the diagonal elements need not be set, they are assumed to be zero. * lda leading dimension of A. It must be at least max (1, n). * x single precision complex array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta single precision complex scalar multiplier applied to vector y. * y single precision complex array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/chemv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasChemv(char uplo, int n, cuComplex alpha, Pointer A, int lda, Pointer x, int incx, cuComplex beta, Pointer y, int incy) { cublasChemvNative(uplo, n, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasChemvNative(char uplo, int n, cuComplex alpha, Pointer A, int lda, Pointer x, int incx, cuComplex beta, Pointer y, int incy); /** *
* void * cublasChbmv (char uplo, int n, int k, cuComplex alpha, const cuComplex *A, int lda, * const cuComplex *x, int incx, cuComplex beta, cuComplex *y, int incy) * * performs the matrix-vector operation * * y := alpha*A*x + beta*y * * alpha and beta are single precision complex scalars. x and y are single precision * complex vectors with n elements. A is an n by n hermitian band matrix consisting * of single precision complex elements, with k super-diagonals and the same number * of subdiagonals. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the hermitian * band matrix A is being supplied. If uplo == 'U' or 'u', the upper * triangular part is being supplied. If uplo == 'L' or 'l', the lower * triangular part is being supplied. * n specifies the number of rows and the number of columns of the * hermitian matrix A. n must be at least zero. * k specifies the number of super-diagonals of matrix A. Since the matrix * is hermitian, this is also the number of sub-diagonals. k must be at * least zero. * alpha single precision complex scalar multiplier applied to A*x. * A single precision complex array of dimensions (lda, n). When uplo == 'U' or * 'u', the leading (k + 1) x n part of array A must contain the upper * triangular band of the hermitian matrix, supplied column by column, * with the leading diagonal of the matrix in row (k+1) of the array, * the first super-diagonal starting at position 2 in row k, and so on. * The top left k x k triangle of the array A is not referenced. When * uplo == 'L' or 'l', the leading (k + 1) x n part of the array A must * contain the lower triangular band part of the hermitian matrix, * supplied column by column, with the leading diagonal of the matrix in * row 1 of the array, the first sub-diagonal starting at position 1 in * row 2, and so on. The bottom right k x k triangle of the array A is * not referenced. The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero. * lda leading dimension of A. lda must be at least (k + 1). * x single precision complex array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta single precision complex scalar multiplier applied to vector y. If beta is * zero, y is not read. * y single precision complex array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/chbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if k or n < 0, or if incx or incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasChbmv(char uplo, int n, int k, cuComplex alpha, Pointer A, int lda, Pointer x, int incx, cuComplex beta, Pointer y, int incy) { cublasChbmvNative(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasChbmvNative(char uplo, int n, int k, cuComplex alpha, Pointer A, int lda, Pointer x, int incx, cuComplex beta, Pointer y, int incy); /** *
* * cublasCtrmv (char uplo, char trans, char diag, int n, const cuComplex *A, * int lda, cuComplex *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, * where op(A) = A, or op(A) = transpose(A) or op(A) = conjugate(transpose(A)). * x is an n-element signle precision complex vector, and * A is an n x n, unit or non-unit, upper or lower, triangular matrix composed * of single precision complex elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', then A is an upper triangular matrix. * If uplo = 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If trans = 'n' or 'N', op(A) = A. If trans = 't' or * 'T', op(A) = transpose(A). If trans = 'c' or 'C', op(A) = * conjugate(transpose(A)). * diag specifies whether or not matrix A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * A single precision array of dimension (lda, n). If uplo = 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular matrix and the strictly lower triangular part * of A is not referenced. If uplo = 'L' or 'l', the leading n x n lower * triangular part of the array A must contain the lower triangular * matrix and the strictly upper triangular part of A is not referenced. * When diag = 'U' or 'u', the diagonal elements of A are not referenced * either, but are are assumed to be unity. * lda is the leading dimension of A. It must be at least max (1, n). * x single precision array of length at least (1 + (n - 1) * abs(incx) ). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/ctrmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCtrmv(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx) { cublasCtrmvNative(uplo, trans, diag, n, A, lda, x, incx); checkResultBLAS(); } private static native void cublasCtrmvNative(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasCtbmv (char uplo, char trans, char diag, int n, int k, const cuComplex *A, * int lda, cuComplex *x, int incx) * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * op(A) = transpose(A) or op(A) = conjugate(transpose(A)). x is an n-element * single precision complex vector, and A is an n x n, unit or non-unit, upper * or lower triangular band matrix composed of single precision complex elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular band * matrix. If uplo == 'U' or 'u', A is an upper triangular band matrix. * If uplo == 'L' or 'l', A is a lower triangular band matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * or 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A single precision complex array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first * super-diagonal starting at position 2 in row k, and so on. The top * left k x k triangle of the array A is not referenced. If uplo == 'L' * or 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal startingat position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * lda is the leading dimension of A. It must be at least (k + 1). * x single precision complex array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x * * Reference: http://www.netlib.org/blas/ctbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n or k < 0, or if incx == 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCtbmv(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx) { cublasCtbmvNative(uplo, trans, diag, n, k, A, lda, x, incx); checkResultBLAS(); } private static native void cublasCtbmvNative(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasCtpmv (char uplo, char trans, char diag, int n, const cuComplex *AP, * cuComplex *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * op(A) = transpose(A) or op(A) = conjugate(transpose(A)) . x is an n element * single precision complex vector, and A is an n x n, unit or non-unit, upper * or lower triangular matrix composed of single precision complex elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo == 'U' or 'u', then A is an upper triangular matrix. * If uplo == 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * or 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. In the current implementation n must not exceed 4070. * AP single precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored in AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * x single precision complex array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/ctpmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or n < 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCtpmv(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx) { cublasCtpmvNative(uplo, trans, diag, n, AP, x, incx); checkResultBLAS(); } private static native void cublasCtpmvNative(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx); /** *
* void * cublasCtrsv (char uplo, char trans, char diag, int n, const cuComplex *A, * int lda, cuComplex *x, int incx) * * solves a system of equations op(A) * x = b, where op(A) is either A, * transpose(A) or conjugate(transpose(A)). b and x are single precision * complex vectors consisting of n elements, and A is an n x n matrix * composed of a unit or non-unit, upper or lower triangular matrix. * Matrix A is stored in column major format, and lda is the leading * dimension of the two-dimensional array containing A. * * No test for singularity or near-singularity is included in this function. * Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the * lower triangular part of array A. If uplo = 'U' or 'u', then only * the upper triangular part of A may be referenced. If uplo = 'L' or * 'l', then only the lower triangular part of A may be referenced. * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = 't', * 'T', 'c', or 'C', op(A) = transpose(A) * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * A is a single precision complex array of dimensions (lda, n). If uplo = 'U' * or 'u', then A must contains the upper triangular part of a symmetric * matrix, and the strictly lower triangular parts is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part of * a symmetric matrix, and the strictly upper triangular part is not * referenced. * lda is the leading dimension of the two-dimensional array containing A. * lda must be at least max(1, n). * x single precision complex array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the n element right-hand side vector b. On exit, * it is overwritten with the solution vector x. * incx specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/ctrsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCtrsv(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx) { cublasCtrsvNative(uplo, trans, diag, n, A, lda, x, incx); checkResultBLAS(); } private static native void cublasCtrsvNative(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx); /** *
* void cublasCtbsv (char uplo, char trans, char diag, int n, int k, * const cuComplex *A, int lda, cuComplex *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)). * b and x are n element vectors, and A is an n x n unit or non-unit, * upper or lower triangular band matrix with k + 1 diagonals. No test * for singularity or near-singularity is included in this function. * Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular band * matrix as follows: If uplo == 'U' or 'u', A is an upper triangular * band matrix. If uplo == 'L' or 'l', A is a lower triangular band * matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', op(A) = transpose(A). If trans == 'C' or 'c', * op(A) = conjugate(transpose(A)). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A single precision complex array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first super- * diagonal starting at position 2 in row k, and so on. The top left * k x k triangle of the array A is not referenced. If uplo == 'L' or * 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal starting at position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * x single precision complex array of length at least (1+(n-1)*abs(incx)). * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/ctbsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0 or n > 2035 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCtbsv(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx) { cublasCtbsvNative(uplo, trans, diag, n, k, A, lda, x, incx); checkResultBLAS(); } private static native void cublasCtbsvNative(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasCtpsv (char uplo, char trans, char diag, int n, const cuComplex *AP, * cuComplex *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose)). b and * x are n element complex vectors, and A is an n x n unit or non-unit, * upper or lower triangular matrix. No test for singularity or near-singularity * is included in this routine. Such tests must be performed before calling this routine. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular matrix * as follows: If uplo == 'U' or 'u', A is an upper triangluar matrix. * If uplo == 'L' or 'l', A is a lower triangular matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T' * or 't', op(A) = transpose(A). If trans == 'C' or 'c', op(A) = * conjugate(transpose(A)). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * AP single precision complex array with at least ((n*(n+1))/2) elements. * If uplo == 'U' or 'u', the array AP contains the upper triangular * matrix A, packed sequentially, column by column; that is, if i <= j, then * A[i,j] is stored is AP[i+(j*(j+1)/2)]. If uplo == 'L' or 'L', the * array AP contains the lower triangular matrix A, packed sequentially, * column by column; that is, if i >= j, then A[i,j] is stored in * AP[i+((2*n-j+1)*j)/2]. When diag = 'U' or 'u', the diagonal elements * of A are not referenced and are assumed to be unity. * x single precision complex array of length at least (1+(n-1)*abs(incx)). * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/ctpsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 or n > 2035 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCtpsv(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx) { cublasCtpsvNative(uplo, trans, diag, n, AP, x, incx); checkResultBLAS(); } private static native void cublasCtpsvNative(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx); /** *
* cublasCgeru (int m, int n, cuComplex alpha, const cuComplex *x, int incx, * const cuComplex *y, int incy, cuComplex *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(y) + A, * * where alpha is a single precision complex scalar, x is an m element single * precision complex vector, y is an n element single precision complex vector, and A * is an m by n matrix consisting of single precision complex elements. Matrix A * is stored in column major format, and lda is the leading dimension of * the two-dimensional array used to store A. * * Input * ----- * m specifies the number of rows of the matrix A. It must be at least * zero. * n specifies the number of columns of the matrix A. It must be at * least zero. * alpha single precision complex scalar multiplier applied to x * transpose(y) * x single precision complex array of length at least (1 + (m - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must not * be zero. * y single precision complex array of length at least (1 + (n - 1) * abs(incy)) * incy specifies the storage spacing between elements of y. incy must not * be zero. * A single precision complex array of dimensions (lda, n). * lda leading dimension of two-dimensional array used to store matrix A * * Output * ------ * A updated according to A = alpha * x * transpose(y) + A * * Reference: http://www.netlib.org/blas/cgeru.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m <0, n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCgeru(int m, int n, cuComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda) { cublasCgeruNative(m, n, alpha, x, incx, y, incy, A, lda); checkResultBLAS(); } private static native void cublasCgeruNative(int m, int n, cuComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda); /** *
* cublasCgerc (int m, int n, cuComplex alpha, const cuComplex *x, int incx, * const cuComplex *y, int incy, cuComplex *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * conjugate(transpose(y)) + A, * * where alpha is a single precision complex scalar, x is an m element single * precision complex vector, y is an n element single precision complex vector, and A * is an m by n matrix consisting of single precision complex elements. Matrix A * is stored in column major format, and lda is the leading dimension of * the two-dimensional array used to store A. * * Input * ----- * m specifies the number of rows of the matrix A. It must be at least * zero. * n specifies the number of columns of the matrix A. It must be at * least zero. * alpha single precision complex scalar multiplier applied to x * transpose(y) * x single precision complex array of length at least (1 + (m - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must not * be zero. * y single precision complex array of length at least (1 + (n - 1) * abs(incy)) * incy specifies the storage spacing between elements of y. incy must not * be zero. * A single precision complex array of dimensions (lda, n). * lda leading dimension of two-dimensional array used to store matrix A * * Output * ------ * A updated according to A = alpha * x * conjugate(transpose(y)) + A * * Reference: http://www.netlib.org/blas/cgerc.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m <0, n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCgerc(int m, int n, cuComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda) { cublasCgercNative(m, n, alpha, x, incx, y, incy, A, lda); checkResultBLAS(); } private static native void cublasCgercNative(int m, int n, cuComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda); /** *
* void * cublasCher (char uplo, int n, float alpha, const cuComplex *x, int incx, * cuComplex *A, int lda) * * performs the hermitian rank 1 operation * * A = alpha * x * conjugate(transpose(x)) + A, * * where alpha is a single precision real scalar, x is an n element single * precision complex vector and A is an n x n hermitian matrix consisting of * single precision complex elements. Matrix A is stored in column major format, * and lda is the leading dimension of the two-dimensional array * containing A. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or * the lower triangular part of array A. If uplo = 'U' or 'u', * then only the upper triangular part of A may be referenced. * If uplo = 'L' or 'l', then only the lower triangular part of * A may be referenced. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * alpha single precision real scalar multiplier applied to * x * conjugate(transpose(x)) * x single precision complex array of length at least (1 + (n - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must * not be zero. * A single precision complex array of dimensions (lda, n). If uplo = 'U' or * 'u', then A must contain the upper triangular part of a hermitian * matrix, and the strictly lower triangular part is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part * of a hermitian matrix, and the strictly upper triangular part is * not referenced. The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero, and on exit they * are set to zero. * lda leading dimension of the two-dimensional array containing A. lda * must be at least max(1, n). * * Output * ------ * A updated according to A = alpha * x * conjugate(transpose(x)) + A * * Reference: http://www.netlib.org/blas/cher.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCher(char uplo, int n, float alpha, Pointer x, int incx, Pointer A, int lda) { cublasCherNative(uplo, n, alpha, x, incx, A, lda); checkResultBLAS(); } private static native void cublasCherNative(char uplo, int n, float alpha, Pointer x, int incx, Pointer A, int lda); /** *
* void * cublasChpr (char uplo, int n, float alpha, const cuComplex *x, int incx, * cuComplex *AP) * * performs the hermitian rank 1 operation * * A = alpha * x * conjugate(transpose(x)) + A, * * where alpha is a single precision real scalar and x is an n element single * precision complex vector. A is a hermitian n x n matrix consisting of single * precision complex elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision real scalar multiplier applied to x * conjugate(transpose(x)). * x single precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * AP single precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * The imaginary parts of the diagonal elements need not be set, they * are assumed to be zero, and on exit they are set to zero. * * Output * ------ * A updated according to A = alpha * x * conjugate(transpose(x)) + A * * Reference: http://www.netlib.org/blas/chpr.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasChpr(char uplo, int n, float alpha, Pointer x, int incx, Pointer AP) { cublasChprNative(uplo, n, alpha, x, incx, AP); checkResultBLAS(); } private static native void cublasChprNative(char uplo, int n, float alpha, Pointer x, int incx, Pointer AP); /** *
* void * cublasChpr2 (char uplo, int n, cuComplex alpha, const cuComplex *x, int incx, * const cuComplex *y, int incy, cuComplex *AP) * * performs the hermitian rank 2 operation * * A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(transpose(x)) + A, * * where alpha is a single precision complex scalar, and x and y are n element single * precision complex vectors. A is a hermitian n x n matrix consisting of single * precision complex elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision complex scalar multiplier applied to x * conjugate(transpose(y)) + * y * conjugate(transpose(x)). * x single precision complex array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y single precision complex array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * AP single precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * The imaginary parts of the diagonal elements need not be set, they * are assumed to be zero, and on exit they are set to zero. * * Output * ------ * A updated according to A = alpha*x*conjugate(transpose(y)) * + conjugate(alpha)*y*conjugate(transpose(x))+A * * Reference: http://www.netlib.org/blas/chpr2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasChpr2(char uplo, int n, cuComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer AP) { cublasChpr2Native(uplo, n, alpha, x, incx, y, incy, AP); checkResultBLAS(); } private static native void cublasChpr2Native(char uplo, int n, cuComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer AP); /** *
* void cublasCher2 (char uplo, int n, cuComplex alpha, const cuComplex *x, int incx, * const cuComplex *y, int incy, cuComplex *A, int lda) * * performs the hermitian rank 2 operation * * A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(transpose(x)) + A, * * where alpha is a single precision complex scalar, x and y are n element single * precision complex vector and A is an n by n hermitian matrix consisting of single * precision complex elements. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha single precision complex scalar multiplier applied to x * conjugate(transpose(y)) + * y * conjugate(transpose(x)). * x single precision array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y single precision array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * A single precision complex array of dimensions (lda, n). If uplo == 'U' or 'u', * then A must contains the upper triangular part of a hermitian matrix, * and the strictly lower triangular parts is not referenced. If uplo == * 'L' or 'l', then A contains the lower triangular part of a hermitian * matrix, and the strictly upper triangular part is not referenced. * The imaginary parts of the diagonal elements need not be set, * they are assumed to be zero, and on exit they are set to zero. * * lda leading dimension of A. It must be at least max(1, n). * * Output * ------ * A updated according to A = alpha*x*conjugate(transpose(y)) * + conjugate(alpha)*y*conjugate(transpose(x))+A * * Reference: http://www.netlib.org/blas/cher2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCher2(char uplo, int n, cuComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda) { cublasCher2Native(uplo, n, alpha, x, incx, y, incy, A, lda); checkResultBLAS(); } private static native void cublasCher2Native(char uplo, int n, cuComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda); /** *
* void * cublasSgemm (char transa, char transb, int m, int n, int k, float alpha, * const float *A, int lda, const float *B, int ldb, float beta, * float *C, int ldc) * * computes the product of matrix A and matrix B, multiplies the result * by a scalar alpha, and adds the sum to the product of matrix C and * scalar beta. sgemm() performs one of the matrix-matrix operations: * * C = alpha * op(A) * op(B) + beta * C, * * where op(X) is one of * * op(X) = X or op(X) = transpose(X) * * alpha and beta are single precision scalars, and A, B and C are * matrices consisting of single precision elements, with op(A) an m x k * matrix, op(B) a k x n matrix, and C an m x n matrix. Matrices A, B, * and C are stored in column major format, and lda, ldb, and ldc are * the leading dimensions of the two-dimensional arrays containing A, * B, and C. * * Input * ----- * transa specifies op(A). If transa = 'n' or 'N', op(A) = A. If * transa = 't', 'T', 'c', or 'C', op(A) = transpose(A) * transb specifies op(B). If transb = 'n' or 'N', op(B) = B. If * transb = 't', 'T', 'c', or 'C', op(B) = transpose(B) * m number of rows of matrix op(A) and rows of matrix C * n number of columns of matrix op(B) and number of columns of C * k number of columns of matrix op(A) and number of rows of op(B) * alpha single precision scalar multiplier applied to op(A)op(B) * A single precision array of dimensions (lda, k) if transa = * 'n' or 'N'), and of dimensions (lda, m) otherwise. When transa = * 'N' or 'n' then lda must be at least max( 1, m ), otherwise lda * must be at least max(1, k). * lda leading dimension of two-dimensional array used to store matrix A * B single precision array of dimensions (ldb, n) if transb = * 'n' or 'N'), and of dimensions (ldb, k) otherwise. When transb = * 'N' or 'n' then ldb must be at least max (1, k), otherwise ldb * must be at least max (1, n). * ldb leading dimension of two-dimensional array used to store matrix B * beta single precision scalar multiplier applied to C. If 0, C does * not have to be a valid input * C single precision array of dimensions (ldc, n). ldc must be at * least max (1, m). * ldc leading dimension of two-dimensional array used to store matrix C * * Output * ------ * C updated based on C = alpha * op(A)*op(B) + beta * C * * Reference: http://www.netlib.org/blas/sgemm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if any of m, n, or k are < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSgemm(char transa, char transb, int m, int n, int k, float alpha, Pointer A, int lda, Pointer B, int ldb, float beta, Pointer C, int ldc) { cublasSgemmNative(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasSgemmNative(char transa, char transb, int m, int n, int k, float alpha, Pointer A, int lda, Pointer B, int ldb, float beta, Pointer C, int ldc); /** *
* void * cublasSsymm (char side, char uplo, int m, int n, float alpha, * const float *A, int lda, const float *B, int ldb, * float beta, float *C, int ldc); * * performs one of the matrix-matrix operations * * C = alpha * A * B + beta * C, or * C = alpha * B * A + beta * C, * * where alpha and beta are single precision scalars, A is a symmetric matrix * consisting of single precision elements and stored in either lower or upper * storage mode, and B and C are m x n matrices consisting of single precision * elements. * * Input * ----- * side specifies whether the symmetric matrix A appears on the left side * hand side or right hand side of matrix B, as follows. If side == 'L' * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', * then C = alpha * B * A + beta * C. * uplo specifies whether the symmetric matrix A is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * m specifies the number of rows of the matrix C, and the number of rows * of matrix B. It also specifies the dimensions of symmetric matrix A * when side == 'L' or 'l'. m must be at least zero. * n specifies the number of columns of the matrix C, and the number of * columns of matrix B. It also specifies the dimensions of symmetric * matrix A when side == 'R' or 'r'. n must be at least zero. * alpha single precision scalar multiplier applied to A * B, or B * A * A single precision array of dimensions (lda, ka), where ka is m when * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the * leading m x m part of array A must contain the symmetric matrix, * such that when uplo == 'U' or 'u', the leading m x m part stores the * upper triangular part of the symmetric matrix, and the strictly lower * triangular part of A is not referenced, and when uplo == 'U' or 'u', * the leading m x m part stores the lower triangular part of the * symmetric matrix and the strictly upper triangular part is not * referenced. If side == 'R' or 'r' the leading n x n part of array A * must contain the symmetric matrix, such that when uplo == 'U' or 'u', * the leading n x n part stores the upper triangular part of the * symmetric matrix and the strictly lower triangular part of A is not * referenced, and when uplo == 'U' or 'u', the leading n x n part * stores the lower triangular part of the symmetric matrix and the * strictly upper triangular part is not referenced. * lda leading dimension of A. When side == 'L' or 'l', it must be at least * max(1, m) and at least max(1, n) otherwise. * B single precision array of dimensions (ldb, n). On entry, the leading * m x n part of the array contains the matrix B. * ldb leading dimension of B. It must be at least max (1, m). * beta single precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C single precision array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m) * * Output * ------ * C updated according to C = alpha * A * B + beta * C, or C = alpha * * B * A + beta * C * * Reference: http://www.netlib.org/blas/ssymm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSsymm(char side, char uplo, int m, int n, float alpha, Pointer A, int lda, Pointer B, int ldb, float beta, Pointer C, int ldc) { cublasSsymmNative(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasSsymmNative(char side, char uplo, int m, int n, float alpha, Pointer A, int lda, Pointer B, int ldb, float beta, Pointer C, int ldc); /** *
* void * cublasSsyrk (char uplo, char trans, int n, int k, float alpha, * const float *A, int lda, float beta, float *C, int ldc) * * performs one of the symmetric rank k operations * * C = alpha * A * transpose(A) + beta * C, or * C = alpha * transpose(A) * A + beta * C. * * Alpha and beta are single precision scalars. C is an n x n symmetric matrix * consisting of single precision elements and stored in either lower or * upper storage mode. A is a matrix consisting of single precision elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', C = * alpha * transpose(A) + beta * C. If trans == 'T', 't', 'C', or 'c', * C = transpose(A) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha single precision scalar multiplier applied to A * transpose(A) or * transpose(A) * A. * A single precision array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contains the * matrix A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1, k). * beta single precision scalar multiplier applied to C. If beta izs zero, C * does not have to be a valid input * C single precision array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. It must be at least max(1, n). * * Output * ------ * C updated according to C = alpha * A * transpose(A) + beta * C, or C = * alpha * transpose(A) * A + beta * C * * Reference: http://www.netlib.org/blas/ssyrk.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSsyrk(char uplo, char trans, int n, int k, float alpha, Pointer A, int lda, float beta, Pointer C, int ldc) { cublasSsyrkNative(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); checkResultBLAS(); } private static native void cublasSsyrkNative(char uplo, char trans, int n, int k, float alpha, Pointer A, int lda, float beta, Pointer C, int ldc); /** *
* void * cublasSsyr2k (char uplo, char trans, int n, int k, float alpha, * const float *A, int lda, const float *B, int ldb, * float beta, float *C, int ldc) * * performs one of the symmetric rank 2k operations * * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, or * C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C. * * Alpha and beta are single precision scalars. C is an n x n symmetric matrix * consisting of single precision elements and stored in either lower or upper * storage mode. A and B are matrices consisting of single precision elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be references, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, * If trans == 'T', 't', 'C', or 'c', C = alpha * transpose(A) * B + * alpha * transpose(B) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha single precision scalar multiplier. * A single precision array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contain the matrix * A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1,k). * B single precision array of dimensions (lda, kb), where kb is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array B must contain the matrix B, * otherwise the leading k x n part of the array must contain the matrix * B. * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be at * least max(1, n). Otherwise ldb must be at least max(1, k). * beta single precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C single precision array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. Must be at least max(1, n). * * Output * ------ * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A) + * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C * * Reference: http://www.netlib.org/blas/ssyr2k.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasSsyr2k(char uplo, char trans, int n, int k, float alpha, Pointer A, int lda, Pointer B, int ldb, float beta, Pointer C, int ldc) { cublasSsyr2kNative(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasSsyr2kNative(char uplo, char trans, int n, int k, float alpha, Pointer A, int lda, Pointer B, int ldb, float beta, Pointer C, int ldc); /** *
* void * cublasStrmm (char side, char uplo, char transa, char diag, int m, int n, * float alpha, const float *A, int lda, const float *B, int ldb) * * performs one of the matrix-matrix operations * * B = alpha * op(A) * B, or B = alpha * B * op(A) * * where alpha is a single-precision scalar, B is an m x n matrix composed * of single precision elements, and A is a unit or non-unit, upper or lower, * triangular matrix composed of single precision elements. op(A) is one of * * op(A) = A or op(A) = transpose(A) * * Matrices A and B are stored in column major format, and lda and ldb are * the leading dimensions of the two-dimensonials arrays that contain A and * B, respectively. * * Input * ----- * side specifies whether op(A) multiplies B from the left or right. * If side = 'L' or 'l', then B = alpha * op(A) * B. If side = * 'R' or 'r', then B = alpha * B * op(A). * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', A is an upper triangular matrix. * If uplo = 'L' or 'l', A is a lower triangular matrix. * transa specifies the form of op(A) to be used in the matrix * multiplication. If transa = 'N' or 'n', then op(A) = A. If * transa = 'T', 't', 'C', or 'c', then op(A) = transpose(A). * diag specifies whether or not A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or * 'n', A is not assumed to be unit triangular. * m the number of rows of matrix B. m must be at least zero. * n the number of columns of matrix B. n must be at least zero. * alpha single precision scalar multiplier applied to op(A)*B, or * B*op(A), respectively. If alpha is zero no accesses are made * to matrix A, and no read accesses are made to matrix B. * A single precision array of dimensions (lda, k). k = m if side = * 'L' or 'l', k = n if side = 'R' or 'r'. If uplo = 'U' or 'u' * the leading k x k upper triangular part of the array A must * contain the upper triangular matrix, and the strictly lower * triangular part of A is not referenced. If uplo = 'L' or 'l' * the leading k x k lower triangular part of the array A must * contain the lower triangular matrix, and the strictly upper * triangular part of A is not referenced. When diag = 'U' or 'u' * the diagonal elements of A are no referenced and are assumed * to be unity. * lda leading dimension of A. When side = 'L' or 'l', it must be at * least max(1,m) and at least max(1,n) otherwise * B single precision array of dimensions (ldb, n). On entry, the * leading m x n part of the array contains the matrix B. It is * overwritten with the transformed matrix on exit. * ldb leading dimension of B. It must be at least max (1, m). * * Output * ------ * B updated according to B = alpha * op(A) * B or B = alpha * B * op(A) * * Reference: http://www.netlib.org/blas/strmm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasStrmm(char side, char uplo, char transa, char diag, int m, int n, float alpha, Pointer A, int lda, Pointer B, int ldb) { cublasStrmmNative(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); checkResultBLAS(); } private static native void cublasStrmmNative(char side, char uplo, char transa, char diag, int m, int n, float alpha, Pointer A, int lda, Pointer B, int ldb); /** *
* void * cublasStrsm (char side, char uplo, char transa, char diag, int m, int n, * float alpha, const float *A, int lda, float *B, int ldb) * * solves one of the matrix equations * * op(A) * X = alpha * B, or X * op(A) = alpha * B, * * where alpha is a single precision scalar, and X and B are m x n matrices * that are composed of single precision elements. A is a unit or non-unit, * upper or lower triangular matrix, and op(A) is one of * * op(A) = A or op(A) = transpose(A) * * The result matrix X overwrites input matrix B; that is, on exit the result * is stored in B. Matrices A and B are stored in column major format, and * lda and ldb are the leading dimensions of the two-dimensonials arrays that * contain A and B, respectively. * * Input * ----- * side specifies whether op(A) appears on the left or right of X as * follows: side = 'L' or 'l' indicates solve op(A) * X = alpha * B. * side = 'R' or 'r' indicates solve X * op(A) = alpha * B. * uplo specifies whether the matrix A is an upper or lower triangular * matrix as follows: uplo = 'U' or 'u' indicates A is an upper * triangular matrix. uplo = 'L' or 'l' indicates A is a lower * triangular matrix. * transa specifies the form of op(A) to be used in matrix multiplication * as follows: If transa = 'N' or 'N', then op(A) = A. If transa = * 'T', 't', 'C', or 'c', then op(A) = transpose(A). * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * m specifies the number of rows of B. m must be at least zero. * n specifies the number of columns of B. n must be at least zero. * alpha is a single precision scalar to be multiplied with B. When alpha is * zero, then A is not referenced and B need not be set before entry. * A is a single precision array of dimensions (lda, k), where k is * m when side = 'L' or 'l', and is n when side = 'R' or 'r'. If * uplo = 'U' or 'u', the leading k x k upper triangular part of * the array A must contain the upper triangular matrix and the * strictly lower triangular matrix of A is not referenced. When * uplo = 'L' or 'l', the leading k x k lower triangular part of * the array A must contain the lower triangular matrix and the * strictly upper triangular part of A is not referenced. Note that * when diag = 'U' or 'u', the diagonal elements of A are not * referenced, and are assumed to be unity. * lda is the leading dimension of the two dimensional array containing A. * When side = 'L' or 'l' then lda must be at least max(1, m), when * side = 'R' or 'r' then lda must be at least max(1, n). * B is a single precision array of dimensions (ldb, n). ldb must be * at least max (1,m). The leading m x n part of the array B must * contain the right-hand side matrix B. On exit B is overwritten * by the solution matrix X. * ldb is the leading dimension of the two dimensional array containing B. * ldb must be at least max(1, m). * * Output * ------ * B contains the solution matrix X satisfying op(A) * X = alpha * B, * or X * op(A) = alpha * B * * Reference: http://www.netlib.org/blas/strsm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasStrsm(char side, char uplo, char transa, char diag, int m, int n, float alpha, Pointer A, int lda, Pointer B, int ldb) { cublasStrsmNative(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); checkResultBLAS(); } private static native void cublasStrsmNative(char side, char uplo, char transa, char diag, int m, int n, float alpha, Pointer A, int lda, Pointer B, int ldb); /** *
* void cublasCgemm (char transa, char transb, int m, int n, int k, * cuComplex alpha, const cuComplex *A, int lda, * const cuComplex *B, int ldb, cuComplex beta, * cuComplex *C, int ldc) * * performs one of the matrix-matrix operations * * C = alpha * op(A) * op(B) + beta*C, * * where op(X) is one of * * op(X) = X or op(X) = transpose or op(X) = conjg(transpose(X)) * * alpha and beta are single-complex scalars, and A, B and C are matrices * consisting of single-complex elements, with op(A) an m x k matrix, op(B) * a k x n matrix and C an m x n matrix. * * Input * ----- * transa specifies op(A). If transa == 'N' or 'n', op(A) = A. If transa == * 'T' or 't', op(A) = transpose(A). If transa == 'C' or 'c', op(A) = * conjg(transpose(A)). * transb specifies op(B). If transa == 'N' or 'n', op(B) = B. If transb == * 'T' or 't', op(B) = transpose(B). If transb == 'C' or 'c', op(B) = * conjg(transpose(B)). * m number of rows of matrix op(A) and rows of matrix C. It must be at * least zero. * n number of columns of matrix op(B) and number of columns of C. It * must be at least zero. * k number of columns of matrix op(A) and number of rows of op(B). It * must be at least zero. * alpha single-complex scalar multiplier applied to op(A)op(B) * A single-complex array of dimensions (lda, k) if transa == 'N' or * 'n'), and of dimensions (lda, m) otherwise. * lda leading dimension of A. When transa == 'N' or 'n', it must be at * least max(1, m) and at least max(1, k) otherwise. * B single-complex array of dimensions (ldb, n) if transb == 'N' or 'n', * and of dimensions (ldb, k) otherwise * ldb leading dimension of B. When transb == 'N' or 'n', it must be at * least max(1, k) and at least max(1, n) otherwise. * beta single-complex scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C single precision array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m). * * Output * ------ * C updated according to C = alpha*op(A)*op(B) + beta*C * * Reference: http://www.netlib.org/blas/cgemm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if any of m, n, or k are < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCgemm(char transa, char transb, int m, int n, int k, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuComplex beta, Pointer C, int ldc) { cublasCgemmNative(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasCgemmNative(char transa, char transb, int m, int n, int k, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuComplex beta, Pointer C, int ldc); /** *
* void * cublasCsymm (char side, char uplo, int m, int n, cuComplex alpha, * const cuComplex *A, int lda, const cuComplex *B, int ldb, * cuComplex beta, cuComplex *C, int ldc); * * performs one of the matrix-matrix operations * * C = alpha * A * B + beta * C, or * C = alpha * B * A + beta * C, * * where alpha and beta are single precision complex scalars, A is a symmetric matrix * consisting of single precision complex elements and stored in either lower or upper * storage mode, and B and C are m x n matrices consisting of single precision * complex elements. * * Input * ----- * side specifies whether the symmetric matrix A appears on the left side * hand side or right hand side of matrix B, as follows. If side == 'L' * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', * then C = alpha * B * A + beta * C. * uplo specifies whether the symmetric matrix A is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * m specifies the number of rows of the matrix C, and the number of rows * of matrix B. It also specifies the dimensions of symmetric matrix A * when side == 'L' or 'l'. m must be at least zero. * n specifies the number of columns of the matrix C, and the number of * columns of matrix B. It also specifies the dimensions of symmetric * matrix A when side == 'R' or 'r'. n must be at least zero. * alpha single precision scalar multiplier applied to A * B, or B * A * A single precision array of dimensions (lda, ka), where ka is m when * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the * leading m x m part of array A must contain the symmetric matrix, * such that when uplo == 'U' or 'u', the leading m x m part stores the * upper triangular part of the symmetric matrix, and the strictly lower * triangular part of A is not referenced, and when uplo == 'U' or 'u', * the leading m x m part stores the lower triangular part of the * symmetric matrix and the strictly upper triangular part is not * referenced. If side == 'R' or 'r' the leading n x n part of array A * must contain the symmetric matrix, such that when uplo == 'U' or 'u', * the leading n x n part stores the upper triangular part of the * symmetric matrix and the strictly lower triangular part of A is not * referenced, and when uplo == 'U' or 'u', the leading n x n part * stores the lower triangular part of the symmetric matrix and the * strictly upper triangular part is not referenced. * lda leading dimension of A. When side == 'L' or 'l', it must be at least * max(1, m) and at least max(1, n) otherwise. * B single precision array of dimensions (ldb, n). On entry, the leading * m x n part of the array contains the matrix B. * ldb leading dimension of B. It must be at least max (1, m). * beta single precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C single precision array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m) * * Output * ------ * C updated according to C = alpha * A * B + beta * C, or C = alpha * * B * A + beta * C * * Reference: http://www.netlib.org/blas/csymm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCsymm(char side, char uplo, int m, int n, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuComplex beta, Pointer C, int ldc) { cublasCsymmNative(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasCsymmNative(char side, char uplo, int m, int n, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuComplex beta, Pointer C, int ldc); /** *
* void * cublasChemm (char side, char uplo, int m, int n, cuComplex alpha, * const cuComplex *A, int lda, const cuComplex *B, int ldb, * cuComplex beta, cuComplex *C, int ldc); * * performs one of the matrix-matrix operations * * C = alpha * A * B + beta * C, or * C = alpha * B * A + beta * C, * * where alpha and beta are single precision complex scalars, A is a hermitian matrix * consisting of single precision complex elements and stored in either lower or upper * storage mode, and B and C are m x n matrices consisting of single precision * complex elements. * * Input * ----- * side specifies whether the hermitian matrix A appears on the left side * hand side or right hand side of matrix B, as follows. If side == 'L' * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', * then C = alpha * B * A + beta * C. * uplo specifies whether the hermitian matrix A is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the hermitian matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the hermitian matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * m specifies the number of rows of the matrix C, and the number of rows * of matrix B. It also specifies the dimensions of hermitian matrix A * when side == 'L' or 'l'. m must be at least zero. * n specifies the number of columns of the matrix C, and the number of * columns of matrix B. It also specifies the dimensions of hermitian * matrix A when side == 'R' or 'r'. n must be at least zero. * alpha single precision complex scalar multiplier applied to A * B, or B * A * A single precision complex array of dimensions (lda, ka), where ka is m when * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the * leading m x m part of array A must contain the hermitian matrix, * such that when uplo == 'U' or 'u', the leading m x m part stores the * upper triangular part of the hermitian matrix, and the strictly lower * triangular part of A is not referenced, and when uplo == 'U' or 'u', * the leading m x m part stores the lower triangular part of the * hermitian matrix and the strictly upper triangular part is not * referenced. If side == 'R' or 'r' the leading n x n part of array A * must contain the hermitian matrix, such that when uplo == 'U' or 'u', * the leading n x n part stores the upper triangular part of the * hermitian matrix and the strictly lower triangular part of A is not * referenced, and when uplo == 'U' or 'u', the leading n x n part * stores the lower triangular part of the hermitian matrix and the * strictly upper triangular part is not referenced. The imaginary parts * of the diagonal elements need not be set, they are assumed to be zero. * lda leading dimension of A. When side == 'L' or 'l', it must be at least * max(1, m) and at least max(1, n) otherwise. * B single precision complex array of dimensions (ldb, n). On entry, the leading * m x n part of the array contains the matrix B. * ldb leading dimension of B. It must be at least max (1, m). * beta single precision complex scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C single precision complex array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m) * * Output * ------ * C updated according to C = alpha * A * B + beta * C, or C = alpha * * B * A + beta * C * * Reference: http://www.netlib.org/blas/chemm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasChemm(char side, char uplo, int m, int n, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuComplex beta, Pointer C, int ldc) { cublasChemmNative(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasChemmNative(char side, char uplo, int m, int n, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuComplex beta, Pointer C, int ldc); /** *
* void * cublasCsyrk (char uplo, char trans, int n, int k, cuComplex alpha, * const cuComplex *A, int lda, cuComplex beta, cuComplex *C, int ldc) * * performs one of the symmetric rank k operations * * C = alpha * A * transpose(A) + beta * C, or * C = alpha * transpose(A) * A + beta * C. * * Alpha and beta are single precision complex scalars. C is an n x n symmetric matrix * consisting of single precision complex elements and stored in either lower or * upper storage mode. A is a matrix consisting of single precision complex elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', C = * alpha * transpose(A) + beta * C. If trans == 'T', 't', 'C', or 'c', * C = transpose(A) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha single precision complex scalar multiplier applied to A * transpose(A) or * transpose(A) * A. * A single precision complex array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contains the * matrix A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1, k). * beta single precision complex scalar multiplier applied to C. If beta izs zero, C * does not have to be a valid input * C single precision complex array of dimensions (ldc, n). If uplo = 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo = 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. It must be at least max(1, n). * * Output * ------ * C updated according to C = alpha * A * transpose(A) + beta * C, or C = * alpha * transpose(A) * A + beta * C * * Reference: http://www.netlib.org/blas/csyrk.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCsyrk(char uplo, char trans, int n, int k, cuComplex alpha, Pointer A, int lda, cuComplex beta, Pointer C, int ldc) { cublasCsyrkNative(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); checkResultBLAS(); } private static native void cublasCsyrkNative(char uplo, char trans, int n, int k, cuComplex alpha, Pointer A, int lda, cuComplex beta, Pointer C, int ldc); /** *
* void * cublasCherk (char uplo, char trans, int n, int k, float alpha, * const cuComplex *A, int lda, float beta, cuComplex *C, int ldc) * * performs one of the hermitian rank k operations * * C = alpha * A * conjugate(transpose(A)) + beta * C, or * C = alpha * conjugate(transpose(A)) * A + beta * C. * * Alpha and beta are single precision real scalars. C is an n x n hermitian matrix * consisting of single precision complex elements and stored in either lower or * upper storage mode. A is a matrix consisting of single precision complex elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the hermitian matrix C is stored in upper or lower * storage mode as follows. If uplo == 'U' or 'u', only the upper * triangular part of the hermitian matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the hermitian matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', C = * alpha * A * conjugate(transpose(A)) + beta * C. If trans == 'T', 't', 'C', or 'c', * C = alpha * conjugate(transpose(A)) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of columns of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha single precision scalar multiplier applied to A * conjugate(transpose(A)) or * conjugate(transpose(A)) * A. * A single precision complex array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contains the * matrix A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1, k). * beta single precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C single precision complex array of dimensions (ldc, n). If uplo = 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the hermitian matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo = 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the hermitian matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero, and on exit they * are set to zero. * ldc leading dimension of C. It must be at least max(1, n). * * Output * ------ * C updated according to C = alpha * A * conjugate(transpose(A)) + beta * C, or C = * alpha * conjugate(transpose(A)) * A + beta * C * * Reference: http://www.netlib.org/blas/cherk.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCherk(char uplo, char trans, int n, int k, float alpha, Pointer A, int lda, float beta, Pointer C, int ldc) { cublasCherkNative(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); checkResultBLAS(); } private static native void cublasCherkNative(char uplo, char trans, int n, int k, float alpha, Pointer A, int lda, float beta, Pointer C, int ldc); /** *
* void * cublasCsyr2k (char uplo, char trans, int n, int k, cuComplex alpha, * const cuComplex *A, int lda, const cuComplex *B, int ldb, * cuComplex beta, cuComplex *C, int ldc) * * performs one of the symmetric rank 2k operations * * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, or * C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C. * * Alpha and beta are single precision complex scalars. C is an n x n symmetric matrix * consisting of single precision complex elements and stored in either lower or upper * storage mode. A and B are matrices consisting of single precision complex elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be references, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, * If trans == 'T', 't', 'C', or 'c', C = alpha * transpose(A) * B + * alpha * transpose(B) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha single precision complex scalar multiplier. * A single precision complex array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contain the matrix * A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1,k). * B single precision complex array of dimensions (lda, kb), where kb is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array B must contain the matrix B, * otherwise the leading k x n part of the array must contain the matrix * B. * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be at * least max(1, n). Otherwise ldb must be at least max(1, k). * beta single precision complex scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C single precision complex array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. Must be at least max(1, n). * * Output * ------ * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A) + * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C * * Reference: http://www.netlib.org/blas/csyr2k.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCsyr2k(char uplo, char trans, int n, int k, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuComplex beta, Pointer C, int ldc) { cublasCsyr2kNative(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasCsyr2kNative(char uplo, char trans, int n, int k, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuComplex beta, Pointer C, int ldc); /** *
* void * cublasCher2k (char uplo, char trans, int n, int k, cuComplex alpha, * const cuComplex *A, int lda, const cuComplex *B, int ldb, * float beta, cuComplex *C, int ldc) * * performs one of the hermitian rank 2k operations * * C = alpha * A * conjugate(transpose(B)) * + conjugate(alpha) * B * conjugate(transpose(A)) * + beta * C , * or * C = alpha * conjugate(transpose(A)) * B * + conjugate(alpha) * conjugate(transpose(B)) * A * + beta * C. * * Alpha is single precision complex scalar whereas Beta is a single preocision real scalar. * C is an n x n hermitian matrix consisting of single precision complex elements * and stored in either lower or upper storage mode. A and B are matrices consisting * of single precision complex elements with dimension of n x k in the first case, * and k x n in the second case. * * Input * ----- * uplo specifies whether the hermitian matrix C is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the hermitian matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the hermitian matrix is to be references, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', * C = alpha * A * conjugate(transpose(B)) * + conjugate(alpha) * B * conjugate(transpose(A)) * + beta * C . * If trans == 'T', 't', 'C', or 'c', * C = alpha * conjugate(transpose(A)) * B * + conjugate(alpha) * conjugate(transpose(B)) * A * + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha single precision complex scalar multiplier. * A single precision complex array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contain the matrix * A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1,k). * B single precision complex array of dimensions (lda, kb), where kb is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array B must contain the matrix B, * otherwise the leading k x n part of the array must contain the matrix * B. * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be at * least max(1, n). Otherwise ldb must be at least max(1, k). * beta single precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C single precision complex array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the hermitian matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the hermitian matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero, and on exit they * are set to zero. * ldc leading dimension of C. Must be at least max(1, n). * * Output * ------ * C updated according to alpha*A*conjugate(transpose(B)) + * + conjugate(alpha)*B*conjugate(transpose(A)) + beta*C or * alpha*conjugate(transpose(A))*B + conjugate(alpha)*conjugate(transpose(B))*A * + beta*C. * * Reference: http://www.netlib.org/blas/cher2k.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCher2k(char uplo, char trans, int n, int k, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb, float beta, Pointer C, int ldc) { cublasCher2kNative(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasCher2kNative(char uplo, char trans, int n, int k, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb, float beta, Pointer C, int ldc); /** *
* void * cublasCtrmm (char side, char uplo, char transa, char diag, int m, int n, * cuComplex alpha, const cuComplex *A, int lda, const cuComplex *B, * int ldb) * * performs one of the matrix-matrix operations * * B = alpha * op(A) * B, or B = alpha * B * op(A) * * where alpha is a single-precision complex scalar, B is an m x n matrix composed * of single precision complex elements, and A is a unit or non-unit, upper or lower, * triangular matrix composed of single precision complex elements. op(A) is one of * * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)) * * Matrices A and B are stored in column major format, and lda and ldb are * the leading dimensions of the two-dimensonials arrays that contain A and * B, respectively. * * Input * ----- * side specifies whether op(A) multiplies B from the left or right. * If side = 'L' or 'l', then B = alpha * op(A) * B. If side = * 'R' or 'r', then B = alpha * B * op(A). * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', A is an upper triangular matrix. * If uplo = 'L' or 'l', A is a lower triangular matrix. * transa specifies the form of op(A) to be used in the matrix * multiplication. If transa = 'N' or 'n', then op(A) = A. If * transa = 'T' or 't', then op(A) = transpose(A). * If transa = 'C' or 'c', then op(A) = conjugate(transpose(A)). * diag specifies whether or not A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or * 'n', A is not assumed to be unit triangular. * m the number of rows of matrix B. m must be at least zero. * n the number of columns of matrix B. n must be at least zero. * alpha single precision complex scalar multiplier applied to op(A)*B, or * B*op(A), respectively. If alpha is zero no accesses are made * to matrix A, and no read accesses are made to matrix B. * A single precision complex array of dimensions (lda, k). k = m if side = * 'L' or 'l', k = n if side = 'R' or 'r'. If uplo = 'U' or 'u' * the leading k x k upper triangular part of the array A must * contain the upper triangular matrix, and the strictly lower * triangular part of A is not referenced. If uplo = 'L' or 'l' * the leading k x k lower triangular part of the array A must * contain the lower triangular matrix, and the strictly upper * triangular part of A is not referenced. When diag = 'U' or 'u' * the diagonal elements of A are no referenced and are assumed * to be unity. * lda leading dimension of A. When side = 'L' or 'l', it must be at * least max(1,m) and at least max(1,n) otherwise * B single precision complex array of dimensions (ldb, n). On entry, the * leading m x n part of the array contains the matrix B. It is * overwritten with the transformed matrix on exit. * ldb leading dimension of B. It must be at least max (1, m). * * Output * ------ * B updated according to B = alpha * op(A) * B or B = alpha * B * op(A) * * Reference: http://www.netlib.org/blas/ctrmm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCtrmm(char side, char uplo, char transa, char diag, int m, int n, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb) { cublasCtrmmNative(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); checkResultBLAS(); } private static native void cublasCtrmmNative(char side, char uplo, char transa, char diag, int m, int n, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb); /** *
* void * cublasCtrsm (char side, char uplo, char transa, char diag, int m, int n, * cuComplex alpha, const cuComplex *A, int lda, * cuComplex *B, int ldb) * * solves one of the matrix equations * * op(A) * X = alpha * B, or X * op(A) = alpha * B, * * where alpha is a single precision complex scalar, and X and B are m x n matrices * that are composed of single precision complex elements. A is a unit or non-unit, * upper or lower triangular matrix, and op(A) is one of * * op(A) = A or op(A) = transpose(A) or op( A ) = conj( A' ). * * The result matrix X overwrites input matrix B; that is, on exit the result * is stored in B. Matrices A and B are stored in column major format, and * lda and ldb are the leading dimensions of the two-dimensonials arrays that * contain A and B, respectively. * * Input * ----- * side specifies whether op(A) appears on the left or right of X as * follows: side = 'L' or 'l' indicates solve op(A) * X = alpha * B. * side = 'R' or 'r' indicates solve X * op(A) = alpha * B. * uplo specifies whether the matrix A is an upper or lower triangular * matrix as follows: uplo = 'U' or 'u' indicates A is an upper * triangular matrix. uplo = 'L' or 'l' indicates A is a lower * triangular matrix. * transa specifies the form of op(A) to be used in matrix multiplication * as follows: If transa = 'N' or 'N', then op(A) = A. If transa = * 'T', 't', 'C', or 'c', then op(A) = transpose(A). * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * m specifies the number of rows of B. m must be at least zero. * n specifies the number of columns of B. n must be at least zero. * alpha is a single precision complex scalar to be multiplied with B. When alpha is * zero, then A is not referenced and B need not be set before entry. * A is a single precision complex array of dimensions (lda, k), where k is * m when side = 'L' or 'l', and is n when side = 'R' or 'r'. If * uplo = 'U' or 'u', the leading k x k upper triangular part of * the array A must contain the upper triangular matrix and the * strictly lower triangular matrix of A is not referenced. When * uplo = 'L' or 'l', the leading k x k lower triangular part of * the array A must contain the lower triangular matrix and the * strictly upper triangular part of A is not referenced. Note that * when diag = 'U' or 'u', the diagonal elements of A are not * referenced, and are assumed to be unity. * lda is the leading dimension of the two dimensional array containing A. * When side = 'L' or 'l' then lda must be at least max(1, m), when * side = 'R' or 'r' then lda must be at least max(1, n). * B is a single precision complex array of dimensions (ldb, n). ldb must be * at least max (1,m). The leading m x n part of the array B must * contain the right-hand side matrix B. On exit B is overwritten * by the solution matrix X. * ldb is the leading dimension of the two dimensional array containing B. * ldb must be at least max(1, m). * * Output * ------ * B contains the solution matrix X satisfying op(A) * X = alpha * B, * or X * op(A) = alpha * B * * Reference: http://www.netlib.org/blas/ctrsm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasCtrsm(char side, char uplo, char transa, char diag, int m, int n, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb) { cublasCtrsmNative(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); checkResultBLAS(); } private static native void cublasCtrsmNative(char side, char uplo, char transa, char diag, int m, int n, cuComplex alpha, Pointer A, int lda, Pointer B, int ldb); /** *
* double * cublasDasum (int n, const double *x, int incx) * * computes the sum of the absolute values of the elements of double * precision vector x; that is, the result is the sum from i = 0 to n - 1 of * abs(x[1 + i * incx]). * * Input * ----- * n number of elements in input vector * x double-precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the double-precision sum of absolute values * (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/dasum.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static double cublasDasum(int n, Pointer x, int incx) { double result = cublasDasumNative(n, x, incx); checkResultBLAS(); return result; } private static native double cublasDasumNative(int n, Pointer x, int incx); /** *
* void * cublasDaxpy (int n, double alpha, const double *x, int incx, double *y, * int incy) * * multiplies double-precision vector x by double-precision scalar alpha * and adds the result to double-precision vector y; that is, it overwrites * double-precision y with double-precision alpha * x + y. For i = 0 to n-1, * it replaces y[ly + i * incy] with alpha * x[lx + i * incx] + y[ly + i*incy], * where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx; ly is defined in a * similar way using incy. * * Input * ----- * n number of elements in input vectors * alpha double-precision scalar multiplier * x double-precision vector with n elements * incx storage spacing between elements of x * y double-precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * y double-precision result (unchanged if n <= 0) * * Reference: http://www.netlib.org/blas/daxpy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library was not initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDaxpy(int n, double alpha, Pointer x, int incx, Pointer y, int incy) { cublasDaxpyNative(n, alpha, x, incx, y, incy); checkResultBLAS(); } private static native void cublasDaxpyNative(int n, double alpha, Pointer x, int incx, Pointer y, int incy); /** *
* void * cublasDcopy (int n, const double *x, int incx, double *y, int incy) * * copies the double-precision vector x to the double-precision vector y. For * i = 0 to n-1, copies x[lx + i * incx] to y[ly + i * incy], where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a similar * way using incy. * * Input * ----- * n number of elements in input vectors * x double-precision vector with n elements * incx storage spacing between elements of x * y double-precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * y contains double precision vector x * * Reference: http://www.netlib.org/blas/dcopy.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDcopy(int n, Pointer x, int incx, Pointer y, int incy) { cublasDcopyNative(n, x, incx, y, incy); checkResultBLAS(); } private static native void cublasDcopyNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* double * cublasDdot (int n, const double *x, int incx, const double *y, int incy) * * computes the dot product of two double-precision vectors. It returns the * dot product of the double precision vectors x and y if successful, and * 0.0f otherwise. It computes the sum for i = 0 to n - 1 of x[lx + i * * incx] * y[ly + i * incy], where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * *incx, and ly is defined in a similar way using incy. * * Input * ----- * n number of elements in input vectors * x double-precision vector with n elements * incx storage spacing between elements of x * y double-precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * returns double-precision dot product (zero if n <= 0) * * Reference: http://www.netlib.org/blas/ddot.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has nor been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to execute on GPU **/ public static double cublasDdot(int n, Pointer x, int incx, Pointer y, int incy) { double result = cublasDdotNative(n, x, incx, y, incy); checkResultBLAS(); return result; } private static native double cublasDdotNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* double * dnrm2 (int n, const double *x, int incx) * * computes the Euclidean norm of the double-precision n-vector x (with * storage increment incx). This code uses a multiphase model of * accumulation to avoid intermediate underflow and overflow. * * Input * ----- * n number of elements in input vector * x double-precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns Euclidian norm (0 if n <= 0 or incx <= 0, or if an error occurs) * * Reference: http://www.netlib.org/blas/dnrm2.f * Reference: http://www.netlib.org/slatec/lin/dnrm2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static double cublasDnrm2(int n, Pointer x, int incx) { double result = cublasDnrm2Native(n, x, incx); checkResultBLAS(); return result; } private static native double cublasDnrm2Native(int n, Pointer x, int incx); /** *
* void * cublasDrot (int n, double *x, int incx, double *y, int incy, double sc, * double ss) * * multiplies a 2x2 matrix ( sc ss) with the 2xn matrix ( transpose(x) ) * (-ss sc) ( transpose(y) ) * * The elements of x are in x[lx + i * incx], i = 0 ... n - 1, where lx = 1 if * incx >= 0, else lx = 1 + (1 - n) * incx, and similarly for y using ly and * incy. * * Input * ----- * n number of elements in input vectors * x double-precision vector with n elements * incx storage spacing between elements of x * y double-precision vector with n elements * incy storage spacing between elements of y * sc element of rotation matrix * ss element of rotation matrix * * Output * ------ * x rotated vector x (unchanged if n <= 0) * y rotated vector y (unchanged if n <= 0) * * Reference http://www.netlib.org/blas/drot.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDrot(int n, Pointer x, int incx, Pointer y, int incy, double sc, double ss) { cublasDrotNative(n, x, incx, y, incy, sc, ss); checkResultBLAS(); } private static native void cublasDrotNative(int n, Pointer x, int incx, Pointer y, int incy, double sc, double ss); /** *
* void * cublasDrotg (double *host_sa, double *host_sb, double *host_sc, double *host_ss) * * constructs the Givens tranformation * * ( sc ss ) * G = ( ) , sc^2 + ss^2 = 1, * (-ss sc ) * * which zeros the second entry of the 2-vector transpose(sa, sb). * * The quantity r = (+/-) sqrt (sa^2 + sb^2) overwrites sa in storage. The * value of sb is overwritten by a value z which allows sc and ss to be * recovered by the following algorithm: * * if z=1 set sc = 0.0 and ss = 1.0 * if abs(z) < 1 set sc = sqrt(1-z^2) and ss = z * if abs(z) > 1 set sc = 1/z and ss = sqrt(1-sc^2) * * The function drot (n, x, incx, y, incy, sc, ss) normally is called next * to apply the transformation to a 2 x n matrix. * Note that is function is provided for completeness and run exclusively * on the Host. * * Input * ----- * sa double-precision scalar * sb double-precision scalar * * Output * ------ * sa double-precision r * sb double-precision z * sc double-precision result * ss double-precision result * * Reference: http://www.netlib.org/blas/drotg.f * * This function does not set any error status. **/ public static void cublasDrotg(Pointer host_sa, Pointer host_sb, Pointer host_sc, Pointer host_ss) { cublasDrotgNative(host_sa, host_sb, host_sc, host_ss); checkResultBLAS(); } private static native void cublasDrotgNative(Pointer host_sa, Pointer host_sb, Pointer host_sc, Pointer host_ss); /** *
* void * cublasDscal (int n, double alpha, double *x, int incx) * * replaces double-precision vector x with double-precision alpha * x. For * i = 0 to n-1, it replaces x[lx + i * incx] with alpha * x[lx + i * incx], * where lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx. * * Input * ----- * n number of elements in input vector * alpha double-precision scalar multiplier * x double-precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * x double-precision result (unchanged if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/dscal.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library was not initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDscal(int n, double alpha, Pointer x, int incx) { cublasDscalNative(n, alpha, x, incx); checkResultBLAS(); } private static native void cublasDscalNative(int n, double alpha, Pointer x, int incx); /** *
* void * cublasDswap (int n, double *x, int incx, double *y, int incy) * * interchanges the double-precision vector x with the double-precision vector y. * For i = 0 to n-1, interchanges x[lx + i * incx] with y[ly + i * incy], where * lx = 1 if incx >= 0, else lx = 1 + (1 - n) * incx, and ly is defined in a * similar way using incy. * * Input * ----- * n number of elements in input vectors * x double precision vector with n elements * incx storage spacing between elements of x * y double precision vector with n elements * incy storage spacing between elements of y * * Output * ------ * x contains double precision vector y * y contains double precision vector x * * Reference: http://www.netlib.org/blas/dswap.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDswap(int n, Pointer x, int incx, Pointer y, int incy) { cublasDswapNative(n, x, incx, y, incy); checkResultBLAS(); } private static native void cublasDswapNative(int n, Pointer x, int incx, Pointer y, int incy); /** *
* int * idamax (int n, const double *x, int incx) * * finds the smallest index of the maximum magnitude element of double- * precision vector x; that is, the result is the first i, i = 0 to n - 1, * that maximizes abs(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x double-precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/blas/idamax.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static int cublasIdamax(int n, Pointer x, int incx) { int result = cublasIdamaxNative(n, x, incx); checkResultBLAS(); return result; } private static native int cublasIdamaxNative(int n, Pointer x, int incx); /** *
* int * idamin (int n, const double *x, int incx) * * finds the smallest index of the minimum magnitude element of double- * precision vector x; that is, the result is the first i, i = 0 to n - 1, * that minimizes abs(x[1 + i * incx])). * * Input * ----- * n number of elements in input vector * x double-precision vector with n elements * incx storage spacing between elements of x * * Output * ------ * returns the smallest index (0 if n <= 0 or incx <= 0) * * Reference: http://www.netlib.org/scilib/blass.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static int cublasIdamin(int n, Pointer x, int incx) { int result = cublasIdaminNative(n, x, incx); checkResultBLAS(); return result; } private static native int cublasIdaminNative(int n, Pointer x, int incx); /** *
* cublasDgemv (char trans, int m, int n, double alpha, const double *A, * int lda, const double *x, int incx, double beta, double *y, * int incy) * * performs one of the matrix-vector operations * * y = alpha * op(A) * x + beta * y, * * where op(A) is one of * * op(A) = A or op(A) = transpose(A) * * where alpha and beta are double precision scalars, x and y are double * precision vectors, and A is an m x n matrix consisting of double precision * elements. Matrix A is stored in column major format, and lda is the leading * dimension of the two-dimensional array in which A is stored. * * Input * ----- * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If trans = * trans = 't', 'T', 'c', or 'C', op(A) = transpose(A) * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * alpha double precision scalar multiplier applied to op(A). * A double precision array of dimensions (lda, n) if trans = 'n' or * 'N'), and of dimensions (lda, m) otherwise. lda must be at least * max(1, m) and at least max(1, n) otherwise. * lda leading dimension of two-dimensional array used to store matrix A * x double precision array of length at least (1 + (n - 1) * abs(incx)) * when trans = 'N' or 'n' and at least (1 + (m - 1) * abs(incx)) * otherwise. * incx specifies the storage spacing between elements of x. incx must not * be zero. * beta double precision scalar multiplier applied to vector y. If beta * is zero, y is not read. * y double precision array of length at least (1 + (m - 1) * abs(incy)) * when trans = 'N' or 'n' and at least (1 + (n - 1) * abs(incy)) * otherwise. * incy specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * y updated according to alpha * op(A) * x + beta * y * * Reference: http://www.netlib.org/blas/dgemv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDgemv(char trans, int m, int n, double alpha, Pointer A, int lda, Pointer x, int incx, double beta, Pointer y, int incy) { cublasDgemvNative(trans, m, n, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasDgemvNative(char trans, int m, int n, double alpha, Pointer A, int lda, Pointer x, int incx, double beta, Pointer y, int incy); /** *
* cublasDger (int m, int n, double alpha, const double *x, int incx, * const double *y, int incy, double *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(y) + A, * * where alpha is a double precision scalar, x is an m element double * precision vector, y is an n element double precision vector, and A * is an m by n matrix consisting of double precision elements. Matrix A * is stored in column major format, and lda is the leading dimension of * the two-dimensional array used to store A. * * Input * ----- * m specifies the number of rows of the matrix A. It must be at least * zero. * n specifies the number of columns of the matrix A. It must be at * least zero. * alpha double precision scalar multiplier applied to x * transpose(y) * x double precision array of length at least (1 + (m - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must not * be zero. * y double precision array of length at least (1 + (n - 1) * abs(incy)) * incy specifies the storage spacing between elements of y. incy must not * be zero. * A double precision array of dimensions (lda, n). * lda leading dimension of two-dimensional array used to store matrix A * * Output * ------ * A updated according to A = alpha * x * transpose(y) + A * * Reference: http://www.netlib.org/blas/dger.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDger(int m, int n, double alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda) { cublasDgerNative(m, n, alpha, x, incx, y, incy, A, lda); checkResultBLAS(); } private static native void cublasDgerNative(int m, int n, double alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda); /** *
* void * cublasDsyr (char uplo, int n, double alpha, const double *x, int incx, * double *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(x) + A, * * where alpha is a double precision scalar, x is an n element double * precision vector and A is an n x n symmetric matrix consisting of * double precision elements. Matrix A is stored in column major format, * and lda is the leading dimension of the two-dimensional array * containing A. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or * the lower triangular part of array A. If uplo = 'U' or 'u', * then only the upper triangular part of A may be referenced. * If uplo = 'L' or 'l', then only the lower triangular part of * A may be referenced. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * alpha double precision scalar multiplier applied to x * transpose(x) * x double precision array of length at least (1 + (n - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must * not be zero. * A double precision array of dimensions (lda, n). If uplo = 'U' or * 'u', then A must contain the upper triangular part of a symmetric * matrix, and the strictly lower triangular part is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part * of a symmetric matrix, and the strictly upper triangular part is * not referenced. * lda leading dimension of the two-dimensional array containing A. lda * must be at least max(1, n). * * Output * ------ * A updated according to A = alpha * x * transpose(x) + A * * Reference: http://www.netlib.org/blas/dsyr.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDsyr(char uplo, int n, double alpha, Pointer x, int incx, Pointer A, int lda) { cublasDsyrNative(uplo, n, alpha, x, incx, A, lda); checkResultBLAS(); } private static native void cublasDsyrNative(char uplo, int n, double alpha, Pointer x, int incx, Pointer A, int lda); /** *
* void cublasDsyr2 (char uplo, int n, double alpha, const double *x, int incx, * const double *y, int incy, double *A, int lda) * * performs the symmetric rank 2 operation * * A = alpha*x*transpose(y) + alpha*y*transpose(x) + A, * * where alpha is a double precision scalar, x and y are n element double * precision vector and A is an n by n symmetric matrix consisting of double * precision elements. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision scalar multiplier applied to x * transpose(y) + * y * transpose(x). * x double precision array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y double precision array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * A double precision array of dimensions (lda, n). If uplo == 'U' or 'u', * then A must contains the upper triangular part of a symmetric matrix, * and the strictly lower triangular parts is not referenced. If uplo == * 'L' or 'l', then A contains the lower triangular part of a symmetric * matrix, and the strictly upper triangular part is not referenced. * lda leading dimension of A. It must be at least max(1, n). * * Output * ------ * A updated according to A = alpha*x*transpose(y)+alpha*y*transpose(x)+A * * Reference: http://www.netlib.org/blas/dsyr2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDsyr2(char uplo, int n, double alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda) { cublasDsyr2Native(uplo, n, alpha, x, incx, y, incy, A, lda); checkResultBLAS(); } private static native void cublasDsyr2Native(char uplo, int n, double alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda); /** *
* void * cublasDspr (char uplo, int n, double alpha, const double *x, int incx, * double *AP) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(x) + A, * * where alpha is a double precision scalar and x is an n element double * precision vector. A is a symmetric n x n matrix consisting of double * precision elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision scalar multiplier applied to x * transpose(x). * x double precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * AP double precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * * Output * ------ * A updated according to A = alpha * x * transpose(x) + A * * Reference: http://www.netlib.org/blas/dspr.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDspr(char uplo, int n, double alpha, Pointer x, int incx, Pointer AP) { cublasDsprNative(uplo, n, alpha, x, incx, AP); checkResultBLAS(); } private static native void cublasDsprNative(char uplo, int n, double alpha, Pointer x, int incx, Pointer AP); /** *
* void * cublasDspr2 (char uplo, int n, double alpha, const double *x, int incx, * const double *y, int incy, double *AP) * * performs the symmetric rank 2 operation * * A = alpha*x*transpose(y) + alpha*y*transpose(x) + A, * * where alpha is a double precision scalar, and x and y are n element double * precision vectors. A is a symmetric n x n matrix consisting of double * precision elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision scalar multiplier applied to x * transpose(y) + * y * transpose(x). * x double precision array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y double precision array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * AP double precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * * Output * ------ * A updated according to A = alpha*x*transpose(y)+alpha*y*transpose(x)+A * * Reference: http://www.netlib.org/blas/dspr2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDspr2(char uplo, int n, double alpha, Pointer x, int incx, Pointer y, int incy, Pointer AP) { cublasDspr2Native(uplo, n, alpha, x, incx, y, incy, AP); checkResultBLAS(); } private static native void cublasDspr2Native(char uplo, int n, double alpha, Pointer x, int incx, Pointer y, int incy, Pointer AP); /** *
* void * cublasDtrsv (char uplo, char trans, char diag, int n, const double *A, * int lda, double *x, int incx) * * solves a system of equations op(A) * x = b, where op(A) is either A or * transpose(A). b and x are double precision vectors consisting of n * elements, and A is an n x n matrix composed of a unit or non-unit, upper * or lower triangular matrix. Matrix A is stored in column major format, * and lda is the leading dimension of the two-dimensional array containing * A. * * No test for singularity or near-singularity is included in this function. * Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the * lower triangular part of array A. If uplo = 'U' or 'u', then only * the upper triangular part of A may be referenced. If uplo = 'L' or * 'l', then only the lower triangular part of A may be referenced. * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = 't', * 'T', 'c', or 'C', op(A) = transpose(A) * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * A is a double precision array of dimensions (lda, n). If uplo = 'U' * or 'u', then A must contains the upper triangular part of a symmetric * matrix, and the strictly lower triangular parts is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part of * a symmetric matrix, and the strictly upper triangular part is not * referenced. * lda is the leading dimension of the two-dimensional array containing A. * lda must be at least max(1, n). * x double precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the n element right-hand side vector b. On exit, * it is overwritten with the solution vector x. * incx specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/dtrsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDtrsv(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx) { cublasDtrsvNative(uplo, trans, diag, n, A, lda, x, incx); checkResultBLAS(); } private static native void cublasDtrsvNative(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasDtrmv (char uplo, char trans, char diag, int n, const double *A, * int lda, double *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = = A, or op(A) = transpose(A). x is an n-element single precision vector, and * A is an n x n, unit or non-unit, upper or lower, triangular matrix composed * of single precision elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', then A is an upper triangular matrix. * If uplo = 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If transa = 'N' or 'n', op(A) = A. If trans = 'T', * 't', 'C', or 'c', op(A) = transpose(A) * diag specifies whether or not matrix A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * A single precision array of dimension (lda, n). If uplo = 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular matrix and the strictly lower triangular part * of A is not referenced. If uplo = 'L' or 'l', the leading n x n lower * triangular part of the array A must contain the lower triangular * matrix and the strictly upper triangular part of A is not referenced. * When diag = 'U' or 'u', the diagonal elements of A are not referenced * either, but are are assumed to be unity. * lda is the leading dimension of A. It must be at least max (1, n). * x single precision array of length at least (1 + (n - 1) * abs(incx) ). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/dtrmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDtrmv(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx) { cublasDtrmvNative(uplo, trans, diag, n, A, lda, x, incx); checkResultBLAS(); } private static native void cublasDtrmvNative(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasDgbmv (char trans, int m, int n, int kl, int ku, double alpha, * const double *A, int lda, const double *x, int incx, double beta, * double *y, int incy); * * performs one of the matrix-vector operations * * y = alpha*op(A)*x + beta*y, op(A)=A or op(A) = transpose(A) * * alpha and beta are double precision scalars. x and y are double precision * vectors. A is an m by n band matrix consisting of double precision elements * with kl sub-diagonals and ku super-diagonals. * * Input * ----- * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A) * m specifies the number of rows of the matrix A. m must be at least * zero. * n specifies the number of columns of the matrix A. n must be at least * zero. * kl specifies the number of sub-diagonals of matrix A. It must be at * least zero. * ku specifies the number of super-diagonals of matrix A. It must be at * least zero. * alpha double precision scalar multiplier applied to op(A). * A double precision array of dimensions (lda, n). The leading * (kl + ku + 1) x n part of the array A must contain the band matrix A, * supplied column by column, with the leading diagonal of the matrix * in row (ku + 1) of the array, the first super-diagonal starting at * position 2 in row ku, the first sub-diagonal starting at position 1 * in row (ku + 2), and so on. Elements in the array A that do not * correspond to elements in the band matrix (such as the top left * ku x ku triangle) are not referenced. * lda leading dimension of A. lda must be at least (kl + ku + 1). * x double precision array of length at least (1+(n-1)*abs(incx)) when * trans == 'N' or 'n' and at least (1+(m-1)*abs(incx)) otherwise. * incx specifies the increment for the elements of x. incx must not be zero. * beta double precision scalar multiplier applied to vector y. If beta is * zero, y is not read. * y double precision array of length at least (1+(m-1)*abs(incy)) when * trans == 'N' or 'n' and at least (1+(n-1)*abs(incy)) otherwise. If * beta is zero, y is not read. * incy On entry, incy specifies the increment for the elements of y. incy * must not be zero. * * Output * ------ * y updated according to y = alpha*op(A)*x + beta*y * * Reference: http://www.netlib.org/blas/dgbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDgbmv(char trans, int m, int n, int kl, int ku, double alpha, Pointer A, int lda, Pointer x, int incx, double beta, Pointer y, int incy) { cublasDgbmvNative(trans, m, n, kl, ku, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasDgbmvNative(char trans, int m, int n, int kl, int ku, double alpha, Pointer A, int lda, Pointer x, int incx, double beta, Pointer y, int incy); /** *
* void * cublasDtbmv (char uplo, char trans, char diag, int n, int k, const double *A, * int lda, double *x, int incx) * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * or op(A) = transpose(A). x is an n-element double precision vector, and A is * an n x n, unit or non-unit, upper or lower triangular band matrix composed * of double precision elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular band * matrix. If uplo == 'U' or 'u', A is an upper triangular band matrix. * If uplo == 'L' or 'l', A is a lower triangular band matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A) * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A double precision array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first * super-diagonal starting at position 2 in row k, and so on. The top * left k x k triangle of the array A is not referenced. If uplo == 'L' * or 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal startingat position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * lda is the leading dimension of A. It must be at least (k + 1). * x double precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x * * Reference: http://www.netlib.org/blas/dtbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n or k < 0, or if incx == 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDtbmv(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx) { cublasDtbmvNative(uplo, trans, diag, n, k, A, lda, x, incx); checkResultBLAS(); } private static native void cublasDtbmvNative(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasDtpmv (char uplo, char trans, char diag, int n, const double *AP, * double *x, int incx); * * performs one of the matrix-vector operations x = op(A) * x, where op(A) = A, * or op(A) = transpose(A). x is an n element double precision vector, and A * is an n x n, unit or non-unit, upper or lower triangular matrix composed * of double precision elements. * * Input * ----- * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo == 'U' or 'u', then A is an upper triangular matrix. * If uplo == 'L' or 'l', then A is a lower triangular matrix. * trans specifies op(A). If transa == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A) * diag specifies whether or not matrix A is unit triangular. If diag == 'U' * or 'u', A is assumed to be unit triangular. If diag == 'N' or 'n', A * is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. In the current implementation n must not exceed 4070. * AP double precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored in AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * x double precision array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the source vector. On exit, x is overwritten * with the result vector. * incx specifies the storage spacing for elements of x. incx must not be * zero. * * Output * ------ * x updated according to x = op(A) * x, * * Reference: http://www.netlib.org/blas/dtpmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or n < 0 * CUBLAS_STATUS_ALLOC_FAILED if function cannot allocate enough internal scratch vector memory * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDtpmv(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx) { cublasDtpmvNative(uplo, trans, diag, n, AP, x, incx); checkResultBLAS(); } private static native void cublasDtpmvNative(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx); /** *
* void * cublasDtpsv (char uplo, char trans, char diag, int n, const double *AP, * double *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A or op(A) = transpose(A). b and x are n element vectors, and A is * an n x n unit or non-unit, upper or lower triangular matrix. No test for * singularity or near-singularity is included in this routine. Such tests * must be performed before calling this routine. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular matrix * as follows: If uplo == 'U' or 'u', A is an upper triangluar matrix. * If uplo == 'L' or 'l', A is a lower triangular matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * AP double precision array with at least ((n*(n+1))/2) elements. If uplo * == 'U' or 'u', the array AP contains the upper triangular matrix A, * packed sequentially, column by column; that is, if i <= j, then * A[i,j] is stored is AP[i+(j*(j+1)/2)]. If uplo == 'L' or 'L', the * array AP contains the lower triangular matrix A, packed sequentially, * column by column; that is, if i >= j, then A[i,j] is stored in * AP[i+((2*n-j+1)*j)/2]. When diag = 'U' or 'u', the diagonal elements * of A are not referenced and are assumed to be unity. * x double precision array of length at least (1+(n-1)*abs(incx)). * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/dtpsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 or n > 2035 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDtpsv(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx) { cublasDtpsvNative(uplo, trans, diag, n, AP, x, incx); checkResultBLAS(); } private static native void cublasDtpsvNative(char uplo, char trans, char diag, int n, Pointer AP, Pointer x, int incx); /** *
* void cublasDtbsv (char uplo, char trans, char diag, int n, int k, * const double *A, int lda, double *X, int incx) * * solves one of the systems of equations op(A)*x = b, where op(A) is either * op(A) = A or op(A) = transpose(A). b and x are n element vectors, and A is * an n x n unit or non-unit, upper or lower triangular band matrix with k + 1 * diagonals. No test for singularity or near-singularity is included in this * function. Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix is an upper or lower triangular band * matrix as follows: If uplo == 'U' or 'u', A is an upper triangular * band matrix. If uplo == 'L' or 'l', A is a lower triangular band * matrix. * trans specifies op(A). If trans == 'N' or 'n', op(A) = A. If trans == 'T', * 't', 'C', or 'c', op(A) = transpose(A). * diag specifies whether A is unit triangular. If diag == 'U' or 'u', A is * assumed to be unit triangular; thas is, diagonal elements are not * read and are assumed to be unity. If diag == 'N' or 'n', A is not * assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. n must be * at least zero. * k specifies the number of super- or sub-diagonals. If uplo == 'U' or * 'u', k specifies the number of super-diagonals. If uplo == 'L' or * 'l', k specifies the number of sub-diagonals. k must at least be * zero. * A double precision array of dimension (lda, n). If uplo == 'U' or 'u', * the leading (k + 1) x n part of the array A must contain the upper * triangular band matrix, supplied column by column, with the leading * diagonal of the matrix in row (k + 1) of the array, the first super- * diagonal starting at position 2 in row k, and so on. The top left * k x k triangle of the array A is not referenced. If uplo == 'L' or * 'l', the leading (k + 1) x n part of the array A must constain the * lower triangular band matrix, supplied column by column, with the * leading diagonal of the matrix in row 1 of the array, the first * sub-diagonal starting at position 1 in row 2, and so on. The bottom * right k x k triangle of the array is not referenced. * x double precision array of length at least (1+(n-1)*abs(incx)). * incx storage spacing between elements of x. It must not be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/dtbsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0, n < 0 or n > 2035 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDtbsv(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx) { cublasDtbsvNative(uplo, trans, diag, n, k, A, lda, x, incx); checkResultBLAS(); } private static native void cublasDtbsvNative(char uplo, char trans, char diag, int n, int k, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasDsymv (char uplo, int n, double alpha, const double *A, int lda, * const double *x, int incx, double beta, double *y, int incy) * * performs the matrix-vector operation * * y = alpha*A*x + beta*y * * Alpha and beta are double precision scalars, and x and y are double * precision vectors, each with n elements. A is a symmetric n x n matrix * consisting of double precision elements that is stored in either upper or * lower storage mode. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the array A * is to be referenced. If uplo == 'U' or 'u', the symmetric matrix A * is stored in upper storage mode, i.e. only the upper triangular part * of A is to be referenced while the lower triangular part of A is to * be inferred. If uplo == 'L' or 'l', the symmetric matrix A is stored * in lower storage mode, i.e. only the lower triangular part of A is * to be referenced while the upper triangular part of A is to be * inferred. * n specifies the number of rows and the number of columns of the * symmetric matrix A. n must be at least zero. * alpha double precision scalar multiplier applied to A*x. * A double precision array of dimensions (lda, n). If uplo == 'U' or 'u', * the leading n x n upper triangular part of the array A must contain * the upper triangular part of the symmetric matrix and the strictly * lower triangular part of A is not referenced. If uplo == 'L' or 'l', * the leading n x n lower triangular part of the array A must contain * the lower triangular part of the symmetric matrix and the strictly * upper triangular part of A is not referenced. * lda leading dimension of A. It must be at least max (1, n). * x double precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta double precision scalar multiplier applied to vector y. * y double precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/dsymv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDsymv(char uplo, int n, double alpha, Pointer A, int lda, Pointer x, int incx, double beta, Pointer y, int incy) { cublasDsymvNative(uplo, n, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasDsymvNative(char uplo, int n, double alpha, Pointer A, int lda, Pointer x, int incx, double beta, Pointer y, int incy); /** *
* void * cublasDsbmv (char uplo, int n, int k, double alpha, const double *A, int lda, * const double *x, int incx, double beta, double *y, int incy) * * performs the matrix-vector operation * * y := alpha*A*x + beta*y * * alpha and beta are double precision scalars. x and y are double precision * vectors with n elements. A is an n by n symmetric band matrix consisting * of double precision elements, with k super-diagonals and the same number * of subdiagonals. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the symmetric * band matrix A is being supplied. If uplo == 'U' or 'u', the upper * triangular part is being supplied. If uplo == 'L' or 'l', the lower * triangular part is being supplied. * n specifies the number of rows and the number of columns of the * symmetric matrix A. n must be at least zero. * k specifies the number of super-diagonals of matrix A. Since the matrix * is symmetric, this is also the number of sub-diagonals. k must be at * least zero. * alpha double precision scalar multiplier applied to A*x. * A double precision array of dimensions (lda, n). When uplo == 'U' or * 'u', the leading (k + 1) x n part of array A must contain the upper * triangular band of the symmetric matrix, supplied column by column, * with the leading diagonal of the matrix in row (k+1) of the array, * the first super-diagonal starting at position 2 in row k, and so on. * The top left k x k triangle of the array A is not referenced. When * uplo == 'L' or 'l', the leading (k + 1) x n part of the array A must * contain the lower triangular band part of the symmetric matrix, * supplied column by column, with the leading diagonal of the matrix in * row 1 of the array, the first sub-diagonal starting at position 1 in * row 2, and so on. The bottom right k x k triangle of the array A is * not referenced. * lda leading dimension of A. lda must be at least (k + 1). * x double precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta double precision scalar multiplier applied to vector y. If beta is * zero, y is not read. * y double precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/dsbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if k or n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDsbmv(char uplo, int n, int k, double alpha, Pointer A, int lda, Pointer x, int incx, double beta, Pointer y, int incy) { cublasDsbmvNative(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasDsbmvNative(char uplo, int n, int k, double alpha, Pointer A, int lda, Pointer x, int incx, double beta, Pointer y, int incy); /** *
* void * cublasDspmv (char uplo, int n, double alpha, const double *AP, const double *x, * int incx, double beta, double *y, int incy) * * performs the matrix-vector operation * * y = alpha * A * x + beta * y * * Alpha and beta are double precision scalars, and x and y are double * precision vectors with n elements. A is a symmetric n x n matrix * consisting of double precision elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision scalar multiplier applied to A*x. * AP double precision array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the symmetric matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * x double precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta double precision scalar multiplier applied to vector y; * y double precision array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to y = alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/dspmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDspmv(char uplo, int n, double alpha, Pointer AP, Pointer x, int incx, double beta, Pointer y, int incy) { cublasDspmvNative(uplo, n, alpha, AP, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasDspmvNative(char uplo, int n, double alpha, Pointer AP, Pointer x, int incx, double beta, Pointer y, int incy); /** *
* void * cublasDgemm (char transa, char transb, int m, int n, int k, double alpha, * const double *A, int lda, const double *B, int ldb, * double beta, double *C, int ldc) * * computes the product of matrix A and matrix B, multiplies the result * by scalar alpha, and adds the sum to the product of matrix C and * scalar beta. It performs one of the matrix-matrix operations: * * C = alpha * op(A) * op(B) + beta * C, * where op(X) = X or op(X) = transpose(X), * * and alpha and beta are double-precision scalars. A, B and C are matrices * consisting of double-precision elements, with op(A) an m x k matrix, * op(B) a k x n matrix, and C an m x n matrix. Matrices A, B, and C are * stored in column-major format, and lda, ldb, and ldc are the leading * dimensions of the two-dimensional arrays containing A, B, and C. * * Input * ----- * transa specifies op(A). If transa == 'N' or 'n', op(A) = A. * If transa == 'T', 't', 'C', or 'c', op(A) = transpose(A). * transb specifies op(B). If transb == 'N' or 'n', op(B) = B. * If transb == 'T', 't', 'C', or 'c', op(B) = transpose(B). * m number of rows of matrix op(A) and rows of matrix C; m must be at * least zero. * n number of columns of matrix op(B) and number of columns of C; * n must be at least zero. * k number of columns of matrix op(A) and number of rows of op(B); * k must be at least zero. * alpha double-precision scalar multiplier applied to op(A) * op(B). * A double-precision array of dimensions (lda, k) if transa == 'N' or * 'n', and of dimensions (lda, m) otherwise. If transa == 'N' or * 'n' lda must be at least max(1, m), otherwise lda must be at * least max(1, k). * lda leading dimension of two-dimensional array used to store matrix A. * B double-precision array of dimensions (ldb, n) if transb == 'N' or * 'n', and of dimensions (ldb, k) otherwise. If transb == 'N' or * 'n' ldb must be at least max (1, k), otherwise ldb must be at * least max(1, n). * ldb leading dimension of two-dimensional array used to store matrix B. * beta double-precision scalar multiplier applied to C. If zero, C does not * have to be a valid input * C double-precision array of dimensions (ldc, n); ldc must be at least * max(1, m). * ldc leading dimension of two-dimensional array used to store matrix C. * * Output * ------ * C updated based on C = alpha * op(A)*op(B) + beta * C. * * Reference: http://www.netlib.org/blas/sgemm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS was not initialized * CUBLAS_STATUS_INVALID_VALUE if m < 0, n < 0, or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDgemm(char transa, char transb, int m, int n, int k, double alpha, Pointer A, int lda, Pointer B, int ldb, double beta, Pointer C, int ldc) { cublasDgemmNative(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasDgemmNative(char transa, char transb, int m, int n, int k, double alpha, Pointer A, int lda, Pointer B, int ldb, double beta, Pointer C, int ldc); /** *
* void * cublasDtrsm (char side, char uplo, char transa, char diag, int m, int n, * double alpha, const double *A, int lda, double *B, int ldb) * * solves one of the matrix equations * * op(A) * X = alpha * B, or X * op(A) = alpha * B, * * where alpha is a double precision scalar, and X and B are m x n matrices * that are composed of double precision elements. A is a unit or non-unit, * upper or lower triangular matrix, and op(A) is one of * * op(A) = A or op(A) = transpose(A) * * The result matrix X overwrites input matrix B; that is, on exit the result * is stored in B. Matrices A and B are stored in column major format, and * lda and ldb are the leading dimensions of the two-dimensonials arrays that * contain A and B, respectively. * * Input * ----- * side specifies whether op(A) appears on the left or right of X as * follows: side = 'L' or 'l' indicates solve op(A) * X = alpha * B. * side = 'R' or 'r' indicates solve X * op(A) = alpha * B. * uplo specifies whether the matrix A is an upper or lower triangular * matrix as follows: uplo = 'U' or 'u' indicates A is an upper * triangular matrix. uplo = 'L' or 'l' indicates A is a lower * triangular matrix. * transa specifies the form of op(A) to be used in matrix multiplication * as follows: If transa = 'N' or 'N', then op(A) = A. If transa = * 'T', 't', 'C', or 'c', then op(A) = transpose(A). * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * m specifies the number of rows of B. m must be at least zero. * n specifies the number of columns of B. n must be at least zero. * alpha is a double precision scalar to be multiplied with B. When alpha is * zero, then A is not referenced and B need not be set before entry. * A is a double precision array of dimensions (lda, k), where k is * m when side = 'L' or 'l', and is n when side = 'R' or 'r'. If * uplo = 'U' or 'u', the leading k x k upper triangular part of * the array A must contain the upper triangular matrix and the * strictly lower triangular matrix of A is not referenced. When * uplo = 'L' or 'l', the leading k x k lower triangular part of * the array A must contain the lower triangular matrix and the * strictly upper triangular part of A is not referenced. Note that * when diag = 'U' or 'u', the diagonal elements of A are not * referenced, and are assumed to be unity. * lda is the leading dimension of the two dimensional array containing A. * When side = 'L' or 'l' then lda must be at least max(1, m), when * side = 'R' or 'r' then lda must be at least max(1, n). * B is a double precision array of dimensions (ldb, n). ldb must be * at least max (1,m). The leading m x n part of the array B must * contain the right-hand side matrix B. On exit B is overwritten * by the solution matrix X. * ldb is the leading dimension of the two dimensional array containing B. * ldb must be at least max(1, m). * * Output * ------ * B contains the solution matrix X satisfying op(A) * X = alpha * B, * or X * op(A) = alpha * B * * Reference: http://www.netlib.org/blas/dtrsm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDtrsm(char side, char uplo, char transa, char diag, int m, int n, double alpha, Pointer A, int lda, Pointer B, int ldb) { cublasDtrsmNative(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); checkResultBLAS(); } private static native void cublasDtrsmNative(char side, char uplo, char transa, char diag, int m, int n, double alpha, Pointer A, int lda, Pointer B, int ldb); /** *
* void * cublasZtrsm (char side, char uplo, char transa, char diag, int m, int n, * cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, * cuDoubleComplex *B, int ldb) * * solves one of the matrix equations * * op(A) * X = alpha * B, or X * op(A) = alpha * B, * * where alpha is a double precision complex scalar, and X and B are m x n matrices * that are composed of double precision complex elements. A is a unit or non-unit, * upper or lower triangular matrix, and op(A) is one of * * op(A) = A or op(A) = transpose(A) or op( A ) = conj( A' ). * * The result matrix X overwrites input matrix B; that is, on exit the result * is stored in B. Matrices A and B are stored in column major format, and * lda and ldb are the leading dimensions of the two-dimensonials arrays that * contain A and B, respectively. * * Input * ----- * side specifies whether op(A) appears on the left or right of X as * follows: side = 'L' or 'l' indicates solve op(A) * X = alpha * B. * side = 'R' or 'r' indicates solve X * op(A) = alpha * B. * uplo specifies whether the matrix A is an upper or lower triangular * matrix as follows: uplo = 'U' or 'u' indicates A is an upper * triangular matrix. uplo = 'L' or 'l' indicates A is a lower * triangular matrix. * transa specifies the form of op(A) to be used in matrix multiplication * as follows: If transa = 'N' or 'N', then op(A) = A. If transa = * 'T', 't', 'C', or 'c', then op(A) = transpose(A). * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * m specifies the number of rows of B. m must be at least zero. * n specifies the number of columns of B. n must be at least zero. * alpha is a double precision complex scalar to be multiplied with B. When alpha is * zero, then A is not referenced and B need not be set before entry. * A is a double precision complex array of dimensions (lda, k), where k is * m when side = 'L' or 'l', and is n when side = 'R' or 'r'. If * uplo = 'U' or 'u', the leading k x k upper triangular part of * the array A must contain the upper triangular matrix and the * strictly lower triangular matrix of A is not referenced. When * uplo = 'L' or 'l', the leading k x k lower triangular part of * the array A must contain the lower triangular matrix and the * strictly upper triangular part of A is not referenced. Note that * when diag = 'U' or 'u', the diagonal elements of A are not * referenced, and are assumed to be unity. * lda is the leading dimension of the two dimensional array containing A. * When side = 'L' or 'l' then lda must be at least max(1, m), when * side = 'R' or 'r' then lda must be at least max(1, n). * B is a double precision complex array of dimensions (ldb, n). ldb must be * at least max (1,m). The leading m x n part of the array B must * contain the right-hand side matrix B. On exit B is overwritten * by the solution matrix X. * ldb is the leading dimension of the two dimensional array containing B. * ldb must be at least max(1, m). * * Output * ------ * B contains the solution matrix X satisfying op(A) * X = alpha * B, * or X * op(A) = alpha * B * * Reference: http://www.netlib.org/blas/ztrsm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZtrsm(char side, char uplo, char transa, char diag, int m, int n, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb) { cublasZtrsmNative(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); checkResultBLAS(); } private static native void cublasZtrsmNative(char side, char uplo, char transa, char diag, int m, int n, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb); /** *
* void * cublasDtrmm (char side, char uplo, char transa, char diag, int m, int n, * double alpha, const double *A, int lda, const double *B, int ldb) * * performs one of the matrix-matrix operations * * B = alpha * op(A) * B, or B = alpha * B * op(A) * * where alpha is a double-precision scalar, B is an m x n matrix composed * of double precision elements, and A is a unit or non-unit, upper or lower, * triangular matrix composed of double precision elements. op(A) is one of * * op(A) = A or op(A) = transpose(A) * * Matrices A and B are stored in column major format, and lda and ldb are * the leading dimensions of the two-dimensonials arrays that contain A and * B, respectively. * * Input * ----- * side specifies whether op(A) multiplies B from the left or right. * If side = 'L' or 'l', then B = alpha * op(A) * B. If side = * 'R' or 'r', then B = alpha * B * op(A). * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', A is an upper triangular matrix. * If uplo = 'L' or 'l', A is a lower triangular matrix. * transa specifies the form of op(A) to be used in the matrix * multiplication. If transa = 'N' or 'n', then op(A) = A. If * transa = 'T', 't', 'C', or 'c', then op(A) = transpose(A). * diag specifies whether or not A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or * 'n', A is not assumed to be unit triangular. * m the number of rows of matrix B. m must be at least zero. * n the number of columns of matrix B. n must be at least zero. * alpha double precision scalar multiplier applied to op(A)*B, or * B*op(A), respectively. If alpha is zero no accesses are made * to matrix A, and no read accesses are made to matrix B. * A double precision array of dimensions (lda, k). k = m if side = * 'L' or 'l', k = n if side = 'R' or 'r'. If uplo = 'U' or 'u' * the leading k x k upper triangular part of the array A must * contain the upper triangular matrix, and the strictly lower * triangular part of A is not referenced. If uplo = 'L' or 'l' * the leading k x k lower triangular part of the array A must * contain the lower triangular matrix, and the strictly upper * triangular part of A is not referenced. When diag = 'U' or 'u' * the diagonal elements of A are no referenced and are assumed * to be unity. * lda leading dimension of A. When side = 'L' or 'l', it must be at * least max(1,m) and at least max(1,n) otherwise * B double precision array of dimensions (ldb, n). On entry, the * leading m x n part of the array contains the matrix B. It is * overwritten with the transformed matrix on exit. * ldb leading dimension of B. It must be at least max (1, m). * * Output * ------ * B updated according to B = alpha * op(A) * B or B = alpha * B * op(A) * * Reference: http://www.netlib.org/blas/dtrmm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDtrmm(char side, char uplo, char transa, char diag, int m, int n, double alpha, Pointer A, int lda, Pointer B, int ldb) { cublasDtrmmNative(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); checkResultBLAS(); } private static native void cublasDtrmmNative(char side, char uplo, char transa, char diag, int m, int n, double alpha, Pointer A, int lda, Pointer B, int ldb); /** *
* void * cublasDsymm (char side, char uplo, int m, int n, double alpha, * const double *A, int lda, const double *B, int ldb, * double beta, double *C, int ldc); * * performs one of the matrix-matrix operations * * C = alpha * A * B + beta * C, or * C = alpha * B * A + beta * C, * * where alpha and beta are double precision scalars, A is a symmetric matrix * consisting of double precision elements and stored in either lower or upper * storage mode, and B and C are m x n matrices consisting of double precision * elements. * * Input * ----- * side specifies whether the symmetric matrix A appears on the left side * hand side or right hand side of matrix B, as follows. If side == 'L' * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', * then C = alpha * B * A + beta * C. * uplo specifies whether the symmetric matrix A is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * m specifies the number of rows of the matrix C, and the number of rows * of matrix B. It also specifies the dimensions of symmetric matrix A * when side == 'L' or 'l'. m must be at least zero. * n specifies the number of columns of the matrix C, and the number of * columns of matrix B. It also specifies the dimensions of symmetric * matrix A when side == 'R' or 'r'. n must be at least zero. * alpha double precision scalar multiplier applied to A * B, or B * A * A double precision array of dimensions (lda, ka), where ka is m when * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the * leading m x m part of array A must contain the symmetric matrix, * such that when uplo == 'U' or 'u', the leading m x m part stores the * upper triangular part of the symmetric matrix, and the strictly lower * triangular part of A is not referenced, and when uplo == 'U' or 'u', * the leading m x m part stores the lower triangular part of the * symmetric matrix and the strictly upper triangular part is not * referenced. If side == 'R' or 'r' the leading n x n part of array A * must contain the symmetric matrix, such that when uplo == 'U' or 'u', * the leading n x n part stores the upper triangular part of the * symmetric matrix and the strictly lower triangular part of A is not * referenced, and when uplo == 'U' or 'u', the leading n x n part * stores the lower triangular part of the symmetric matrix and the * strictly upper triangular part is not referenced. * lda leading dimension of A. When side == 'L' or 'l', it must be at least * max(1, m) and at least max(1, n) otherwise. * B double precision array of dimensions (ldb, n). On entry, the leading * m x n part of the array contains the matrix B. * ldb leading dimension of B. It must be at least max (1, m). * beta double precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C double precision array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m) * * Output * ------ * C updated according to C = alpha * A * B + beta * C, or C = alpha * * B * A + beta * C * * Reference: http://www.netlib.org/blas/dsymm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDsymm(char side, char uplo, int m, int n, double alpha, Pointer A, int lda, Pointer B, int ldb, double beta, Pointer C, int ldc) { cublasDsymmNative(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasDsymmNative(char side, char uplo, int m, int n, double alpha, Pointer A, int lda, Pointer B, int ldb, double beta, Pointer C, int ldc); /** *
* void * cublasZsymm (char side, char uplo, int m, int n, cuDoubleComplex alpha, * const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, * cuDoubleComplex beta, cuDoubleComplex *C, int ldc); * * performs one of the matrix-matrix operations * * C = alpha * A * B + beta * C, or * C = alpha * B * A + beta * C, * * where alpha and beta are double precision complex scalars, A is a symmetric matrix * consisting of double precision complex elements and stored in either lower or upper * storage mode, and B and C are m x n matrices consisting of double precision * complex elements. * * Input * ----- * side specifies whether the symmetric matrix A appears on the left side * hand side or right hand side of matrix B, as follows. If side == 'L' * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', * then C = alpha * B * A + beta * C. * uplo specifies whether the symmetric matrix A is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * m specifies the number of rows of the matrix C, and the number of rows * of matrix B. It also specifies the dimensions of symmetric matrix A * when side == 'L' or 'l'. m must be at least zero. * n specifies the number of columns of the matrix C, and the number of * columns of matrix B. It also specifies the dimensions of symmetric * matrix A when side == 'R' or 'r'. n must be at least zero. * alpha double precision scalar multiplier applied to A * B, or B * A * A double precision array of dimensions (lda, ka), where ka is m when * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the * leading m x m part of array A must contain the symmetric matrix, * such that when uplo == 'U' or 'u', the leading m x m part stores the * upper triangular part of the symmetric matrix, and the strictly lower * triangular part of A is not referenced, and when uplo == 'U' or 'u', * the leading m x m part stores the lower triangular part of the * symmetric matrix and the strictly upper triangular part is not * referenced. If side == 'R' or 'r' the leading n x n part of array A * must contain the symmetric matrix, such that when uplo == 'U' or 'u', * the leading n x n part stores the upper triangular part of the * symmetric matrix and the strictly lower triangular part of A is not * referenced, and when uplo == 'U' or 'u', the leading n x n part * stores the lower triangular part of the symmetric matrix and the * strictly upper triangular part is not referenced. * lda leading dimension of A. When side == 'L' or 'l', it must be at least * max(1, m) and at least max(1, n) otherwise. * B double precision array of dimensions (ldb, n). On entry, the leading * m x n part of the array contains the matrix B. * ldb leading dimension of B. It must be at least max (1, m). * beta double precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C double precision array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m) * * Output * ------ * C updated according to C = alpha * A * B + beta * C, or C = alpha * * B * A + beta * C * * Reference: http://www.netlib.org/blas/zsymm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZsymm(char side, char uplo, int m, int n, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuDoubleComplex beta, Pointer C, int ldc) { cublasZsymmNative(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasZsymmNative(char side, char uplo, int m, int n, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuDoubleComplex beta, Pointer C, int ldc); /** *
* void * cublasDsyrk (char uplo, char trans, int n, int k, double alpha, * const double *A, int lda, double beta, double *C, int ldc) * * performs one of the symmetric rank k operations * * C = alpha * A * transpose(A) + beta * C, or * C = alpha * transpose(A) * A + beta * C. * * Alpha and beta are double precision scalars. C is an n x n symmetric matrix * consisting of double precision elements and stored in either lower or * upper storage mode. A is a matrix consisting of double precision elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', C = * alpha * transpose(A) + beta * C. If trans == 'T', 't', 'C', or 'c', * C = transpose(A) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha double precision scalar multiplier applied to A * transpose(A) or * transpose(A) * A. * A double precision array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contains the * matrix A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1, k). * beta double precision scalar multiplier applied to C. If beta izs zero, C * does not have to be a valid input * C double precision array of dimensions (ldc, n). If uplo = 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo = 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. It must be at least max(1, n). * * Output * ------ * C updated according to C = alpha * A * transpose(A) + beta * C, or C = * alpha * transpose(A) * A + beta * C * * Reference: http://www.netlib.org/blas/dsyrk.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDsyrk(char uplo, char trans, int n, int k, double alpha, Pointer A, int lda, double beta, Pointer C, int ldc) { cublasDsyrkNative(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); checkResultBLAS(); } private static native void cublasDsyrkNative(char uplo, char trans, int n, int k, double alpha, Pointer A, int lda, double beta, Pointer C, int ldc); /** *
* void * cublasZsyrk (char uplo, char trans, int n, int k, cuDoubleComplex alpha, * const cuDoubleComplex *A, int lda, cuDoubleComplex beta, cuDoubleComplex *C, int ldc) * * performs one of the symmetric rank k operations * * C = alpha * A * transpose(A) + beta * C, or * C = alpha * transpose(A) * A + beta * C. * * Alpha and beta are double precision complex scalars. C is an n x n symmetric matrix * consisting of double precision complex elements and stored in either lower or * upper storage mode. A is a matrix consisting of double precision complex elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', C = * alpha * transpose(A) + beta * C. If trans == 'T', 't', 'C', or 'c', * C = transpose(A) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha double precision complex scalar multiplier applied to A * transpose(A) or * transpose(A) * A. * A double precision complex array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contains the * matrix A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1, k). * beta double precision complex scalar multiplier applied to C. If beta izs zero, C * does not have to be a valid input * C double precision complex array of dimensions (ldc, n). If uplo = 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo = 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. It must be at least max(1, n). * * Output * ------ * C updated according to C = alpha * A * transpose(A) + beta * C, or C = * alpha * transpose(A) * A + beta * C * * Reference: http://www.netlib.org/blas/zsyrk.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZsyrk(char uplo, char trans, int n, int k, cuDoubleComplex alpha, Pointer A, int lda, cuDoubleComplex beta, Pointer C, int ldc) { cublasZsyrkNative(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); checkResultBLAS(); } private static native void cublasZsyrkNative(char uplo, char trans, int n, int k, cuDoubleComplex alpha, Pointer A, int lda, cuDoubleComplex beta, Pointer C, int ldc); /** *
* void * cublasZsyr2k (char uplo, char trans, int n, int k, cuDoubleComplex alpha, * const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, * cuDoubleComplex beta, cuDoubleComplex *C, int ldc) * * performs one of the symmetric rank 2k operations * * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, or * C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C. * * Alpha and beta are double precision complex scalars. C is an n x n symmetric matrix * consisting of double precision complex elements and stored in either lower or upper * storage mode. A and B are matrices consisting of double precision complex elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be references, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, * If trans == 'T', 't', 'C', or 'c', C = alpha * transpose(A) * B + * alpha * transpose(B) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha double precision scalar multiplier. * A double precision array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contain the matrix * A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1,k). * B double precision array of dimensions (lda, kb), where kb is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array B must contain the matrix B, * otherwise the leading k x n part of the array must contain the matrix * B. * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be at * least max(1, n). Otherwise ldb must be at least max(1, k). * beta double precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C double precision array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. Must be at least max(1, n). * * Output * ------ * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A) + * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C * * Reference: http://www.netlib.org/blas/zsyr2k.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZsyr2k(char uplo, char trans, int n, int k, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuDoubleComplex beta, Pointer C, int ldc) { cublasZsyr2kNative(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasZsyr2kNative(char uplo, char trans, int n, int k, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuDoubleComplex beta, Pointer C, int ldc); /** *
* void * cublasZher2k (char uplo, char trans, int n, int k, cuDoubleComplex alpha, * const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, * double beta, cuDoubleComplex *C, int ldc) * * performs one of the hermitian rank 2k operations * * C = alpha * A * conjugate(transpose(B)) * + conjugate(alpha) * B * conjugate(transpose(A)) * + beta * C , * or * C = alpha * conjugate(transpose(A)) * B * + conjugate(alpha) * conjugate(transpose(B)) * A * + beta * C. * * Alpha is double precision complex scalar whereas Beta is a double precision real scalar. * C is an n x n hermitian matrix consisting of double precision complex elements and * stored in either lower or upper storage mode. A and B are matrices consisting of * double precision complex elements with dimension of n x k in the first case, * and k x n in the second case. * * Input * ----- * uplo specifies whether the hermitian matrix C is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the hermitian matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the hermitian matrix is to be references, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', * C = alpha * A * conjugate(transpose(B)) * + conjugate(alpha) * B * conjugate(transpose(A)) * + beta * C . * If trans == 'T', 't', 'C', or 'c', * C = alpha * conjugate(transpose(A)) * B * + conjugate(alpha) * conjugate(transpose(B)) * A * + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha double precision scalar multiplier. * A double precision array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contain the matrix * A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1,k). * B double precision array of dimensions (lda, kb), where kb is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array B must contain the matrix B, * otherwise the leading k x n part of the array must contain the matrix * B. * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be at * least max(1, n). Otherwise ldb must be at least max(1, k). * beta double precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C double precision array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the hermitian matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the hermitian matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero, and on exit they * are set to zero. * ldc leading dimension of C. Must be at least max(1, n). * * Output * ------ * C updated according to alpha*A*conjugate(transpose(B)) + * + conjugate(alpha)*B*conjugate(transpose(A)) + beta*C or * alpha*conjugate(transpose(A))*B + conjugate(alpha)*conjugate(transpose(B))*A * + beta*C. * * Reference: http://www.netlib.org/blas/zher2k.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZher2k(char uplo, char trans, int n, int k, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb, double beta, Pointer C, int ldc) { cublasZher2kNative(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasZher2kNative(char uplo, char trans, int n, int k, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb, double beta, Pointer C, int ldc); /** *
* void * cublasZher (char uplo, int n, double alpha, const cuDoubleComplex *x, int incx, * cuDoubleComplex *A, int lda) * * performs the hermitian rank 1 operation * * A = alpha * x * conjugate(transpose(x) + A, * * where alpha is a double precision real scalar, x is an n element double * precision complex vector and A is an n x n hermitian matrix consisting of * double precision complex elements. Matrix A is stored in column major format, * and lda is the leading dimension of the two-dimensional array * containing A. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or * the lower triangular part of array A. If uplo = 'U' or 'u', * then only the upper triangular part of A may be referenced. * If uplo = 'L' or 'l', then only the lower triangular part of * A may be referenced. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * alpha double precision real scalar multiplier applied to * x * conjugate(transpose(x)) * x double precision complex array of length at least (1 + (n - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must * not be zero. * A double precision complex array of dimensions (lda, n). If uplo = 'U' or * 'u', then A must contain the upper triangular part of a hermitian * matrix, and the strictly lower triangular part is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part * of a hermitian matrix, and the strictly upper triangular part is * not referenced. The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero, and on exit they * are set to zero. * lda leading dimension of the two-dimensional array containing A. lda * must be at least max(1, n). * * Output * ------ * A updated according to A = alpha * x * conjugate(transpose(x)) + A * * Reference: http://www.netlib.org/blas/zher.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZher(char uplo, int n, double alpha, Pointer x, int incx, Pointer A, int lda) { cublasZherNative(uplo, n, alpha, x, incx, A, lda); checkResultBLAS(); } private static native void cublasZherNative(char uplo, int n, double alpha, Pointer x, int incx, Pointer A, int lda); /** *
* void * cublasZhpr (char uplo, int n, double alpha, const cuDoubleComplex *x, int incx, * cuDoubleComplex *AP) * * performs the hermitian rank 1 operation * * A = alpha * x * conjugate(transpose(x)) + A, * * where alpha is a double precision real scalar and x is an n element double * precision complex vector. A is a hermitian n x n matrix consisting of double * precision complex elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array AP. If uplo == 'U' or 'u', then the upper * triangular part of A is supplied in AP. If uplo == 'L' or 'l', then * the lower triangular part of A is supplied in AP. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision real scalar multiplier applied to x * conjugate(transpose(x)). * x double precision array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * AP double precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * The imaginary parts of the diagonal elements need not be set, they * are assumed to be zero, and on exit they are set to zero. * * Output * ------ * A updated according to A = alpha * x * conjugate(transpose(x)) + A * * Reference: http://www.netlib.org/blas/zhpr.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, or incx == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZhpr(char uplo, int n, double alpha, Pointer x, int incx, Pointer AP) { cublasZhprNative(uplo, n, alpha, x, incx, AP); checkResultBLAS(); } private static native void cublasZhprNative(char uplo, int n, double alpha, Pointer x, int incx, Pointer AP); /** *
* void * cublasZhpr2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx, * const cuDoubleComplex *y, int incy, cuDoubleComplex *AP) * * performs the hermitian rank 2 operation * * A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(transpose(x)) + A, * * where alpha is a double precision complex scalar, and x and y are n element double * precision complex vectors. A is a hermitian n x n matrix consisting of double * precision complex elements that is supplied in packed form. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision complex scalar multiplier applied to x * conjugate(transpose(y)) + * y * conjugate(transpose(x)). * x double precision complex array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y double precision complex array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * AP double precision complex array with at least ((n * (n + 1)) / 2) elements. If * uplo == 'U' or 'u', the array AP contains the upper triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i <= j, then A[i,j] is stored is AP[i+(j*(j+1)/2)]. If * uplo == 'L' or 'L', the array AP contains the lower triangular part * of the hermitian matrix A, packed sequentially, column by column; * that is, if i >= j, then A[i,j] is stored in AP[i+((2*n-j+1)*j)/2]. * The imaginary parts of the diagonal elements need not be set, they * are assumed to be zero, and on exit they are set to zero. * * Output * ------ * A updated according to A = alpha*x*conjugate(transpose(y)) * + conjugate(alpha)*y*conjugate(transpose(x))+A * * Reference: http://www.netlib.org/blas/zhpr2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZhpr2(char uplo, int n, cuDoubleComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer AP) { cublasZhpr2Native(uplo, n, alpha, x, incx, y, incy, AP); checkResultBLAS(); } private static native void cublasZhpr2Native(char uplo, int n, cuDoubleComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer AP); /** *
* void cublasZher2 (char uplo, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx, * const cuDoubleComplex *y, int incy, cuDoubleComplex *A, int lda) * * performs the hermitian rank 2 operation * * A = alpha*x*conjugate(transpose(y)) + conjugate(alpha)*y*conjugate(transpose(x)) + A, * * where alpha is a double precision complex scalar, x and y are n element double * precision complex vector and A is an n by n hermitian matrix consisting of double * precision complex elements. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the lower * triangular part of array A. If uplo == 'U' or 'u', then only the * upper triangular part of A may be referenced and the lower triangular * part of A is inferred. If uplo == 'L' or 'l', then only the lower * triangular part of A may be referenced and the upper triangular part * of A is inferred. * n specifies the number of rows and columns of the matrix A. It must be * at least zero. * alpha double precision complex scalar multiplier applied to x * conjugate(transpose(y)) + * y * conjugate(transpose(x)). * x double precision array of length at least (1 + (n - 1) * abs (incx)). * incx storage spacing between elements of x. incx must not be zero. * y double precision array of length at least (1 + (n - 1) * abs (incy)). * incy storage spacing between elements of y. incy must not be zero. * A double precision complex array of dimensions (lda, n). If uplo == 'U' or 'u', * then A must contains the upper triangular part of a hermitian matrix, * and the strictly lower triangular parts is not referenced. If uplo == * 'L' or 'l', then A contains the lower triangular part of a hermitian * matrix, and the strictly upper triangular part is not referenced. * The imaginary parts of the diagonal elements need not be set, * they are assumed to be zero, and on exit they are set to zero. * * lda leading dimension of A. It must be at least max(1, n). * * Output * ------ * A updated according to A = alpha*x*conjugate(transpose(y)) * + conjugate(alpha)*y*conjugate(transpose(x))+A * * Reference: http://www.netlib.org/blas/zher2.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZher2(char uplo, int n, cuDoubleComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda) { cublasZher2Native(uplo, n, alpha, x, incx, y, incy, A, lda); checkResultBLAS(); } private static native void cublasZher2Native(char uplo, int n, cuDoubleComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda); /** *
* void * cublasDsyr2k (char uplo, char trans, int n, int k, double alpha, * const double *A, int lda, const double *B, int ldb, * double beta, double *C, int ldc) * * performs one of the symmetric rank 2k operations * * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, or * C = alpha * transpose(A) * B + alpha * transpose(B) * A + beta * C. * * Alpha and beta are double precision scalars. C is an n x n symmetric matrix * consisting of double precision elements and stored in either lower or upper * storage mode. A and B are matrices consisting of double precision elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the symmetric matrix C is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the symmetric matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the symmetric matrix is to be references, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', * C = alpha * A * transpose(B) + alpha * B * transpose(A) + beta * C, * If trans == 'T', 't', 'C', or 'c', C = alpha * transpose(A) * B + * alpha * transpose(B) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of rows of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha double precision scalar multiplier. * A double precision array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contain the matrix * A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1,k). * B double precision array of dimensions (lda, kb), where kb is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array B must contain the matrix B, * otherwise the leading k x n part of the array must contain the matrix * B. * ldb leading dimension of N. When trans == 'N' or 'n' then ldb must be at * least max(1, n). Otherwise ldb must be at least max(1, k). * beta double precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C double precision array of dimensions (ldc, n). If uplo == 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the symmetric matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo == 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the symmetric matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * ldc leading dimension of C. Must be at least max(1, n). * * Output * ------ * C updated according to alpha*A*transpose(B) + alpha*B*transpose(A) + * beta*C or alpha*transpose(A)*B + alpha*transpose(B)*A + beta*C * * Reference: http://www.netlib.org/blas/dsyr2k.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasDsyr2k(char uplo, char trans, int n, int k, double alpha, Pointer A, int lda, Pointer B, int ldb, double beta, Pointer C, int ldc) { cublasDsyr2kNative(uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasDsyr2kNative(char uplo, char trans, int n, int k, double alpha, Pointer A, int lda, Pointer B, int ldb, double beta, Pointer C, int ldc); /** *
* void cublasZgemm (char transa, char transb, int m, int n, int k, * cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, * const cuDoubleComplex *B, int ldb, cuDoubleComplex beta, * cuDoubleComplex *C, int ldc) * * zgemm performs one of the matrix-matrix operations * * C = alpha * op(A) * op(B) + beta*C, * * where op(X) is one of * * op(X) = X or op(X) = transpose or op(X) = conjg(transpose(X)) * * alpha and beta are double-complex scalars, and A, B and C are matrices * consisting of double-complex elements, with op(A) an m x k matrix, op(B) * a k x n matrix and C an m x n matrix. * * Input * ----- * transa specifies op(A). If transa == 'N' or 'n', op(A) = A. If transa == * 'T' or 't', op(A) = transpose(A). If transa == 'C' or 'c', op(A) = * conjg(transpose(A)). * transb specifies op(B). If transa == 'N' or 'n', op(B) = B. If transb == * 'T' or 't', op(B) = transpose(B). If transb == 'C' or 'c', op(B) = * conjg(transpose(B)). * m number of rows of matrix op(A) and rows of matrix C. It must be at * least zero. * n number of columns of matrix op(B) and number of columns of C. It * must be at least zero. * k number of columns of matrix op(A) and number of rows of op(B). It * must be at least zero. * alpha double-complex scalar multiplier applied to op(A)op(B) * A double-complex array of dimensions (lda, k) if transa == 'N' or * 'n'), and of dimensions (lda, m) otherwise. * lda leading dimension of A. When transa == 'N' or 'n', it must be at * least max(1, m) and at least max(1, k) otherwise. * B double-complex array of dimensions (ldb, n) if transb == 'N' or 'n', * and of dimensions (ldb, k) otherwise * ldb leading dimension of B. When transb == 'N' or 'n', it must be at * least max(1, k) and at least max(1, n) otherwise. * beta double-complex scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input. * C double precision array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m). * * Output * ------ * C updated according to C = alpha*op(A)*op(B) + beta*C * * Reference: http://www.netlib.org/blas/zgemm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if any of m, n, or k are < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZgemm(char transa, char transb, int m, int n, int k, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuDoubleComplex beta, Pointer C, int ldc) { cublasZgemmNative(transa, transb, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasZgemmNative(char transa, char transb, int m, int n, int k, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuDoubleComplex beta, Pointer C, int ldc); /** *
* void * cublasZtrmm (char side, char uplo, char transa, char diag, int m, int n, * cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, * int ldb) * * performs one of the matrix-matrix operations * * B = alpha * op(A) * B, or B = alpha * B * op(A) * * where alpha is a double-precision complex scalar, B is an m x n matrix composed * of double precision complex elements, and A is a unit or non-unit, upper or lower, * triangular matrix composed of double precision complex elements. op(A) is one of * * op(A) = A , op(A) = transpose(A) or op(A) = conjugate(transpose(A)) * * Matrices A and B are stored in column major format, and lda and ldb are * the leading dimensions of the two-dimensonials arrays that contain A and * B, respectively. * * Input * ----- * side specifies whether op(A) multiplies B from the left or right. * If side = 'L' or 'l', then B = alpha * op(A) * B. If side = * 'R' or 'r', then B = alpha * B * op(A). * uplo specifies whether the matrix A is an upper or lower triangular * matrix. If uplo = 'U' or 'u', A is an upper triangular matrix. * If uplo = 'L' or 'l', A is a lower triangular matrix. * transa specifies the form of op(A) to be used in the matrix * multiplication. If transa = 'N' or 'n', then op(A) = A. If * transa = 'T' or 't', then op(A) = transpose(A). * If transa = 'C' or 'c', then op(A) = conjugate(transpose(A)). * diag specifies whether or not A is unit triangular. If diag = 'U' * or 'u', A is assumed to be unit triangular. If diag = 'N' or * 'n', A is not assumed to be unit triangular. * m the number of rows of matrix B. m must be at least zero. * n the number of columns of matrix B. n must be at least zero. * alpha double precision complex scalar multiplier applied to op(A)*B, or * B*op(A), respectively. If alpha is zero no accesses are made * to matrix A, and no read accesses are made to matrix B. * A double precision complex array of dimensions (lda, k). k = m if side = * 'L' or 'l', k = n if side = 'R' or 'r'. If uplo = 'U' or 'u' * the leading k x k upper triangular part of the array A must * contain the upper triangular matrix, and the strictly lower * triangular part of A is not referenced. If uplo = 'L' or 'l' * the leading k x k lower triangular part of the array A must * contain the lower triangular matrix, and the strictly upper * triangular part of A is not referenced. When diag = 'U' or 'u' * the diagonal elements of A are no referenced and are assumed * to be unity. * lda leading dimension of A. When side = 'L' or 'l', it must be at * least max(1,m) and at least max(1,n) otherwise * B double precision complex array of dimensions (ldb, n). On entry, the * leading m x n part of the array contains the matrix B. It is * overwritten with the transformed matrix on exit. * ldb leading dimension of B. It must be at least max (1, m). * * Output * ------ * B updated according to B = alpha * op(A) * B or B = alpha * B * op(A) * * Reference: http://www.netlib.org/blas/ztrmm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZtrmm(char side, char uplo, char transa, char diag, int m, int n, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb) { cublasZtrmmNative(side, uplo, transa, diag, m, n, alpha, A, lda, B, ldb); checkResultBLAS(); } private static native void cublasZtrmmNative(char side, char uplo, char transa, char diag, int m, int n, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb); /** *
* cublasZgeru (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx, * const cuDoubleComplex *y, int incy, cuDoubleComplex *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * transpose(y) + A, * * where alpha is a double precision complex scalar, x is an m element double * precision complex vector, y is an n element double precision complex vector, and A * is an m by n matrix consisting of double precision complex elements. Matrix A * is stored in column major format, and lda is the leading dimension of * the two-dimensional array used to store A. * * Input * ----- * m specifies the number of rows of the matrix A. It must be at least * zero. * n specifies the number of columns of the matrix A. It must be at * least zero. * alpha double precision complex scalar multiplier applied to x * transpose(y) * x double precision complex array of length at least (1 + (m - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must not * be zero. * y double precision complex array of length at least (1 + (n - 1) * abs(incy)) * incy specifies the storage spacing between elements of y. incy must not * be zero. * A double precision complex array of dimensions (lda, n). * lda leading dimension of two-dimensional array used to store matrix A * * Output * ------ * A updated according to A = alpha * x * transpose(y) + A * * Reference: http://www.netlib.org/blas/zgeru.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m < 0, n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZgeru(int m, int n, cuDoubleComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda) { cublasZgeruNative(m, n, alpha, x, incx, y, incy, A, lda); checkResultBLAS(); } private static native void cublasZgeruNative(int m, int n, cuDoubleComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda); /** *
* cublasZgerc (int m, int n, cuDoubleComplex alpha, const cuDoubleComplex *x, int incx, * const cuDoubleComplex *y, int incy, cuDoubleComplex *A, int lda) * * performs the symmetric rank 1 operation * * A = alpha * x * conjugate(transpose(y)) + A, * * where alpha is a double precision complex scalar, x is an m element double * precision complex vector, y is an n element double precision complex vector, and A * is an m by n matrix consisting of double precision complex elements. Matrix A * is stored in column major format, and lda is the leading dimension of * the two-dimensional array used to store A. * * Input * ----- * m specifies the number of rows of the matrix A. It must be at least * zero. * n specifies the number of columns of the matrix A. It must be at * least zero. * alpha double precision complex scalar multiplier applied to x * conjugate(transpose(y)) * x double precision array of length at least (1 + (m - 1) * abs(incx)) * incx specifies the storage spacing between elements of x. incx must not * be zero. * y double precision complex array of length at least (1 + (n - 1) * abs(incy)) * incy specifies the storage spacing between elements of y. incy must not * be zero. * A double precision complex array of dimensions (lda, n). * lda leading dimension of two-dimensional array used to store matrix A * * Output * ------ * A updated according to A = alpha * x * conjugate(transpose(y)) + A * * Reference: http://www.netlib.org/blas/zgerc.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m < 0, n < 0, incx == 0, incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZgerc(int m, int n, cuDoubleComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda) { cublasZgercNative(m, n, alpha, x, incx, y, incy, A, lda); checkResultBLAS(); } private static native void cublasZgercNative(int m, int n, cuDoubleComplex alpha, Pointer x, int incx, Pointer y, int incy, Pointer A, int lda); /** *
* void * cublasZherk (char uplo, char trans, int n, int k, double alpha, * const cuDoubleComplex *A, int lda, double beta, cuDoubleComplex *C, int ldc) * * performs one of the hermitian rank k operations * * C = alpha * A * conjugate(transpose(A)) + beta * C, or * C = alpha * conjugate(transpose(A)) * A + beta * C. * * Alpha and beta are double precision scalars. C is an n x n hermitian matrix * consisting of double precision complex elements and stored in either lower or * upper storage mode. A is a matrix consisting of double precision complex elements * with dimension of n x k in the first case, and k x n in the second case. * * Input * ----- * uplo specifies whether the hermitian matrix C is stored in upper or lower * storage mode as follows. If uplo == 'U' or 'u', only the upper * triangular part of the hermitian matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the hermitian matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * trans specifies the operation to be performed. If trans == 'N' or 'n', C = * alpha * A * conjugate(transpose(A)) + beta * C. If trans == 'T', 't', 'C', or 'c', * C = alpha * conjugate(transpose(A)) * A + beta * C. * n specifies the number of rows and the number columns of matrix C. If * trans == 'N' or 'n', n specifies the number of rows of matrix A. If * trans == 'T', 't', 'C', or 'c', n specifies the columns of matrix A. * n must be at least zero. * k If trans == 'N' or 'n', k specifies the number of columns of matrix A. * If trans == 'T', 't', 'C', or 'c', k specifies the number of rows of * matrix A. k must be at least zero. * alpha double precision scalar multiplier applied to A * conjugate(transpose(A)) or * conjugate(transpose(A)) * A. * A double precision complex array of dimensions (lda, ka), where ka is k when * trans == 'N' or 'n', and is n otherwise. When trans == 'N' or 'n', * the leading n x k part of array A must contain the matrix A, * otherwise the leading k x n part of the array must contains the * matrix A. * lda leading dimension of A. When trans == 'N' or 'n' then lda must be at * least max(1, n). Otherwise lda must be at least max(1, k). * beta double precision scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C double precision complex array of dimensions (ldc, n). If uplo = 'U' or 'u', * the leading n x n triangular part of the array C must contain the * upper triangular part of the hermitian matrix C and the strictly * lower triangular part of C is not referenced. On exit, the upper * triangular part of C is overwritten by the upper triangular part of * the updated matrix. If uplo = 'L' or 'l', the leading n x n * triangular part of the array C must contain the lower triangular part * of the hermitian matrix C and the strictly upper triangular part of C * is not referenced. On exit, the lower triangular part of C is * overwritten by the lower triangular part of the updated matrix. * The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero, and on exit they * are set to zero. * ldc leading dimension of C. It must be at least max(1, n). * * Output * ------ * C updated according to C = alpha * A * conjugate(transpose(A)) + beta * C, or C = * alpha * conjugate(transpose(A)) * A + beta * C * * Reference: http://www.netlib.org/blas/zherk.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if n < 0 or k < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZherk(char uplo, char trans, int n, int k, double alpha, Pointer A, int lda, double beta, Pointer C, int ldc) { cublasZherkNative(uplo, trans, n, k, alpha, A, lda, beta, C, ldc); checkResultBLAS(); } private static native void cublasZherkNative(char uplo, char trans, int n, int k, double alpha, Pointer A, int lda, double beta, Pointer C, int ldc); /** *
* void * cublasZhemm (char side, char uplo, int m, int n, cuDoubleComplex alpha, * const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, * cuDoubleComplex beta, cuDoubleComplex *C, int ldc); * * performs one of the matrix-matrix operations * * C = alpha * A * B + beta * C, or * C = alpha * B * A + beta * C, * * where alpha and beta are double precision complex scalars, A is a hermitian matrix * consisting of double precision complex elements and stored in either lower or upper * storage mode, and B and C are m x n matrices consisting of double precision * complex elements. * * Input * ----- * side specifies whether the hermitian matrix A appears on the left side * hand side or right hand side of matrix B, as follows. If side == 'L' * or 'l', then C = alpha * A * B + beta * C. If side = 'R' or 'r', * then C = alpha * B * A + beta * C. * uplo specifies whether the hermitian matrix A is stored in upper or lower * storage mode, as follows. If uplo == 'U' or 'u', only the upper * triangular part of the hermitian matrix is to be referenced, and the * elements of the strictly lower triangular part are to be infered from * those in the upper triangular part. If uplo == 'L' or 'l', only the * lower triangular part of the hermitian matrix is to be referenced, * and the elements of the strictly upper triangular part are to be * infered from those in the lower triangular part. * m specifies the number of rows of the matrix C, and the number of rows * of matrix B. It also specifies the dimensions of hermitian matrix A * when side == 'L' or 'l'. m must be at least zero. * n specifies the number of columns of the matrix C, and the number of * columns of matrix B. It also specifies the dimensions of hermitian * matrix A when side == 'R' or 'r'. n must be at least zero. * alpha double precision scalar multiplier applied to A * B, or B * A * A double precision complex array of dimensions (lda, ka), where ka is m when * side == 'L' or 'l' and is n otherwise. If side == 'L' or 'l' the * leading m x m part of array A must contain the hermitian matrix, * such that when uplo == 'U' or 'u', the leading m x m part stores the * upper triangular part of the hermitian matrix, and the strictly lower * triangular part of A is not referenced, and when uplo == 'U' or 'u', * the leading m x m part stores the lower triangular part of the * hermitian matrix and the strictly upper triangular part is not * referenced. If side == 'R' or 'r' the leading n x n part of array A * must contain the hermitian matrix, such that when uplo == 'U' or 'u', * the leading n x n part stores the upper triangular part of the * hermitian matrix and the strictly lower triangular part of A is not * referenced, and when uplo == 'U' or 'u', the leading n x n part * stores the lower triangular part of the hermitian matrix and the * strictly upper triangular part is not referenced. The imaginary parts * of the diagonal elements need not be set, they are assumed to be zero. * * lda leading dimension of A. When side == 'L' or 'l', it must be at least * max(1, m) and at least max(1, n) otherwise. * B double precision complex array of dimensions (ldb, n). On entry, the leading * m x n part of the array contains the matrix B. * ldb leading dimension of B. It must be at least max (1, m). * beta double precision complex scalar multiplier applied to C. If beta is zero, C * does not have to be a valid input * C double precision complex array of dimensions (ldc, n) * ldc leading dimension of C. Must be at least max(1, m) * * Output * ------ * C updated according to C = alpha * A * B + beta * C, or C = alpha * * B * A + beta * C * * Reference: http://www.netlib.org/blas/zhemm.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if m or n are < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZhemm(char side, char uplo, int m, int n, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuDoubleComplex beta, Pointer C, int ldc) { cublasZhemmNative(side, uplo, m, n, alpha, A, lda, B, ldb, beta, C, ldc); checkResultBLAS(); } private static native void cublasZhemmNative(char side, char uplo, int m, int n, cuDoubleComplex alpha, Pointer A, int lda, Pointer B, int ldb, cuDoubleComplex beta, Pointer C, int ldc); /** *
* void * cublasZtrsv (char uplo, char trans, char diag, int n, const cuDoubleComplex *A, * int lda, cuDoubleComplex *x, int incx) * * solves a system of equations op(A) * x = b, where op(A) is either A, * transpose(A) or conjugate(transpose(A)). b and x are double precision * complex vectors consisting of n elements, and A is an n x n matrix * composed of a unit or non-unit, upper or lower triangular matrix. * Matrix A is stored in column major format, and lda is the leading * dimension of the two-dimensional array containing A. * * No test for singularity or near-singularity is included in this function. * Such tests must be performed before calling this function. * * Input * ----- * uplo specifies whether the matrix data is stored in the upper or the * lower triangular part of array A. If uplo = 'U' or 'u', then only * the upper triangular part of A may be referenced. If uplo = 'L' or * 'l', then only the lower triangular part of A may be referenced. * trans specifies op(A). If transa = 'n' or 'N', op(A) = A. If transa = 't', * 'T', 'c', or 'C', op(A) = transpose(A) * diag specifies whether or not A is a unit triangular matrix like so: * if diag = 'U' or 'u', A is assumed to be unit triangular. If * diag = 'N' or 'n', then A is not assumed to be unit triangular. * n specifies the number of rows and columns of the matrix A. It * must be at least 0. * A is a double precision complex array of dimensions (lda, n). If uplo = 'U' * or 'u', then A must contains the upper triangular part of a symmetric * matrix, and the strictly lower triangular parts is not referenced. * If uplo = 'L' or 'l', then A contains the lower triangular part of * a symmetric matrix, and the strictly upper triangular part is not * referenced. * lda is the leading dimension of the two-dimensional array containing A. * lda must be at least max(1, n). * x double precision complex array of length at least (1 + (n - 1) * abs(incx)). * On entry, x contains the n element right-hand side vector b. On exit, * it is overwritten with the solution vector x. * incx specifies the storage spacing between elements of x. incx must not * be zero. * * Output * ------ * x updated to contain the solution vector x that solves op(A) * x = b. * * Reference: http://www.netlib.org/blas/ztrsv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if incx == 0 or if n < 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZtrsv(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx) { cublasZtrsvNative(uplo, trans, diag, n, A, lda, x, incx); checkResultBLAS(); } private static native void cublasZtrsvNative(char uplo, char trans, char diag, int n, Pointer A, int lda, Pointer x, int incx); /** *
* void * cublasZhbmv (char uplo, int n, int k, cuDoubleComplex alpha, const cuDoubleComplex *A, int lda, * const cuDoubleComplex *x, int incx, cuDoubleComplex beta, cuDoubleComplex *y, int incy) * * performs the matrix-vector operation * * y := alpha*A*x + beta*y * * alpha and beta are double precision complex scalars. x and y are double precision * complex vectors with n elements. A is an n by n hermitian band matrix consisting * of double precision complex elements, with k super-diagonals and the same number * of subdiagonals. * * Input * ----- * uplo specifies whether the upper or lower triangular part of the hermitian * band matrix A is being supplied. If uplo == 'U' or 'u', the upper * triangular part is being supplied. If uplo == 'L' or 'l', the lower * triangular part is being supplied. * n specifies the number of rows and the number of columns of the * hermitian matrix A. n must be at least zero. * k specifies the number of super-diagonals of matrix A. Since the matrix * is hermitian, this is also the number of sub-diagonals. k must be at * least zero. * alpha double precision complex scalar multiplier applied to A*x. * A double precision complex array of dimensions (lda, n). When uplo == 'U' or * 'u', the leading (k + 1) x n part of array A must contain the upper * triangular band of the hermitian matrix, supplied column by column, * with the leading diagonal of the matrix in row (k+1) of the array, * the first super-diagonal starting at position 2 in row k, and so on. * The top left k x k triangle of the array A is not referenced. When * uplo == 'L' or 'l', the leading (k + 1) x n part of the array A must * contain the lower triangular band part of the hermitian matrix, * supplied column by column, with the leading diagonal of the matrix in * row 1 of the array, the first sub-diagonal starting at position 1 in * row 2, and so on. The bottom right k x k triangle of the array A is * not referenced. The imaginary parts of the diagonal elements need * not be set, they are assumed to be zero. * lda leading dimension of A. lda must be at least (k + 1). * x double precision complex array of length at least (1 + (n - 1) * abs(incx)). * incx storage spacing between elements of x. incx must not be zero. * beta double precision complex scalar multiplier applied to vector y. If beta is * zero, y is not read. * y double precision complex array of length at least (1 + (n - 1) * abs(incy)). * If beta is zero, y is not read. * incy storage spacing between elements of y. incy must not be zero. * * Output * ------ * y updated according to alpha*A*x + beta*y * * Reference: http://www.netlib.org/blas/zhbmv.f * * Error status for this function can be retrieved via cublasGetError(). * * Error Status * ------------ * CUBLAS_STATUS_NOT_INITIALIZED if CUBLAS library has not been initialized * CUBLAS_STATUS_INVALID_VALUE if k or n < 0, or if incx or incy == 0 * CUBLAS_STATUS_ARCH_MISMATCH if invoked on device without DP support * CUBLAS_STATUS_EXECUTION_FAILED if function failed to launch on GPU **/ public static void cublasZhbmv(char uplo, int n, int k, cuDoubleComplex alpha, Pointer A, int lda, Pointer x, int incx, cuDoubleComplex beta, Pointer y, int incy) { cublasZhbmvNative(uplo, n, k, alpha, A, lda, x, incx, beta, y, incy); checkResultBLAS(); } private static native void cublasZhbmvNative(char uplo, int n, int k, cuDoubleComplex alpha, Pointer A, int lda, Pointer x, int incx, cuDoubleComplex beta, Pointer y, int incy); }
© 2015 - 2024 Weber Informatics LLC | Privacy Policy